init - 初始化项目
This commit is contained in:
34
modules/gapi/samples/api_example.cpp
Normal file
34
modules/gapi/samples/api_example.cpp
Normal file
@@ -0,0 +1,34 @@
|
||||
#include <opencv2/videoio.hpp>
|
||||
#include <opencv2/highgui.hpp>
|
||||
#include <opencv2/gapi.hpp>
|
||||
#include <opencv2/gapi/core.hpp>
|
||||
#include <opencv2/gapi/imgproc.hpp>
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
cv::VideoCapture cap;
|
||||
if (argc > 1) cap.open(argv[1]);
|
||||
else cap.open(0);
|
||||
CV_Assert(cap.isOpened());
|
||||
|
||||
cv::GMat in;
|
||||
cv::GMat vga = cv::gapi::resize(in, cv::Size(), 0.5, 0.5);
|
||||
cv::GMat gray = cv::gapi::BGR2Gray(vga);
|
||||
cv::GMat blurred = cv::gapi::blur(gray, cv::Size(5,5));
|
||||
cv::GMat edges = cv::gapi::Canny(blurred, 32, 128, 3);
|
||||
cv::GMat b,g,r;
|
||||
std::tie(b,g,r) = cv::gapi::split3(vga);
|
||||
cv::GMat out = cv::gapi::merge3(b, g | edges, r);
|
||||
cv::GComputation ac(in, out);
|
||||
|
||||
cv::Mat input_frame;
|
||||
cv::Mat output_frame;
|
||||
CV_Assert(cap.read(input_frame));
|
||||
do
|
||||
{
|
||||
ac.apply(input_frame, output_frame);
|
||||
cv::imshow("output", output_frame);
|
||||
} while (cap.read(input_frame) && cv::waitKey(30) < 0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
122
modules/gapi/samples/api_ref_snippets.cpp
Normal file
122
modules/gapi/samples/api_ref_snippets.cpp
Normal file
@@ -0,0 +1,122 @@
|
||||
#include <opencv2/videoio.hpp>
|
||||
#include <opencv2/highgui.hpp>
|
||||
#include <opencv2/gapi.hpp>
|
||||
#include <opencv2/gapi/core.hpp>
|
||||
#include <opencv2/gapi/imgproc.hpp>
|
||||
|
||||
#include <opencv2/gapi/cpu/gcpukernel.hpp>
|
||||
|
||||
#include <opencv2/gapi/fluid/core.hpp>
|
||||
#include <opencv2/gapi/fluid/imgproc.hpp>
|
||||
|
||||
static void typed_example()
|
||||
{
|
||||
const cv::Size sz(32, 32);
|
||||
cv::Mat
|
||||
in_mat1 (sz, CV_8UC1),
|
||||
in_mat2 (sz, CV_8UC1),
|
||||
out_mat_untyped(sz, CV_8UC1),
|
||||
out_mat_typed1 (sz, CV_8UC1),
|
||||
out_mat_typed2 (sz, CV_8UC1);
|
||||
cv::randu(in_mat1, cv::Scalar::all(0), cv::Scalar::all(255));
|
||||
cv::randu(in_mat2, cv::Scalar::all(0), cv::Scalar::all(255));
|
||||
|
||||
//! [Untyped_Example]
|
||||
// Untyped G-API ///////////////////////////////////////////////////////////
|
||||
cv::GComputation cvtU([]()
|
||||
{
|
||||
cv::GMat in1, in2;
|
||||
cv::GMat out = cv::gapi::add(in1, in2);
|
||||
return cv::GComputation({in1, in2}, {out});
|
||||
});
|
||||
std::vector<cv::Mat> u_ins = {in_mat1, in_mat2};
|
||||
std::vector<cv::Mat> u_outs = {out_mat_untyped};
|
||||
cvtU.apply(u_ins, u_outs);
|
||||
//! [Untyped_Example]
|
||||
|
||||
//! [Typed_Example]
|
||||
// Typed G-API /////////////////////////////////////////////////////////////
|
||||
cv::GComputationT<cv::GMat (cv::GMat, cv::GMat)> cvtT([](cv::GMat m1, cv::GMat m2)
|
||||
{
|
||||
return m1+m2;
|
||||
});
|
||||
cvtT.apply(in_mat1, in_mat2, out_mat_typed1);
|
||||
|
||||
auto cvtTC = cvtT.compile(cv::descr_of(in_mat1), cv::descr_of(in_mat2));
|
||||
cvtTC(in_mat1, in_mat2, out_mat_typed2);
|
||||
//! [Typed_Example]
|
||||
}
|
||||
|
||||
G_TYPED_KERNEL(IAdd, <cv::GMat(cv::GMat)>, "test.custom.add") {
|
||||
static cv::GMatDesc outMeta(const cv::GMatDesc &in) { return in; }
|
||||
};
|
||||
G_TYPED_KERNEL(IFilter2D, <cv::GMat(cv::GMat)>, "test.custom.filter2d") {
|
||||
static cv::GMatDesc outMeta(const cv::GMatDesc &in) { return in; }
|
||||
};
|
||||
G_TYPED_KERNEL(IRGB2YUV, <cv::GMat(cv::GMat)>, "test.custom.add") {
|
||||
static cv::GMatDesc outMeta(const cv::GMatDesc &in) { return in; }
|
||||
};
|
||||
GAPI_OCV_KERNEL(CustomAdd, IAdd) { static void run(cv::Mat, cv::Mat &) {} };
|
||||
GAPI_OCV_KERNEL(CustomFilter2D, IFilter2D) { static void run(cv::Mat, cv::Mat &) {} };
|
||||
GAPI_OCV_KERNEL(CustomRGB2YUV, IRGB2YUV) { static void run(cv::Mat, cv::Mat &) {} };
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
if (argc < 3)
|
||||
return -1;
|
||||
|
||||
cv::Mat input = cv::imread(argv[1]);
|
||||
cv::Mat output;
|
||||
|
||||
{
|
||||
//! [graph_def]
|
||||
cv::GMat in;
|
||||
cv::GMat gx = cv::gapi::Sobel(in, CV_32F, 1, 0);
|
||||
cv::GMat gy = cv::gapi::Sobel(in, CV_32F, 0, 1);
|
||||
cv::GMat g = cv::gapi::sqrt(cv::gapi::mul(gx, gx) + cv::gapi::mul(gy, gy));
|
||||
cv::GMat out = cv::gapi::convertTo(g, CV_8U);
|
||||
//! [graph_def]
|
||||
|
||||
//! [graph_decl_apply]
|
||||
//! [graph_cap_full]
|
||||
cv::GComputation sobelEdge(cv::GIn(in), cv::GOut(out));
|
||||
//! [graph_cap_full]
|
||||
sobelEdge.apply(input, output);
|
||||
//! [graph_decl_apply]
|
||||
|
||||
//! [apply_with_param]
|
||||
cv::gapi::GKernelPackage kernels = cv::gapi::combine
|
||||
(cv::gapi::core::fluid::kernels(),
|
||||
cv::gapi::imgproc::fluid::kernels());
|
||||
sobelEdge.apply(input, output, cv::compile_args(kernels));
|
||||
//! [apply_with_param]
|
||||
|
||||
//! [graph_cap_sub]
|
||||
cv::GComputation sobelEdgeSub(cv::GIn(gx, gy), cv::GOut(out));
|
||||
//! [graph_cap_sub]
|
||||
}
|
||||
//! [graph_gen]
|
||||
cv::GComputation sobelEdgeGen([](){
|
||||
cv::GMat in;
|
||||
cv::GMat gx = cv::gapi::Sobel(in, CV_32F, 1, 0);
|
||||
cv::GMat gy = cv::gapi::Sobel(in, CV_32F, 0, 1);
|
||||
cv::GMat g = cv::gapi::sqrt(cv::gapi::mul(gx, gx) + cv::gapi::mul(gy, gy));
|
||||
cv::GMat out = cv::gapi::convertTo(g, CV_8U);
|
||||
return cv::GComputation(in, out);
|
||||
});
|
||||
//! [graph_gen]
|
||||
|
||||
cv::imwrite(argv[2], output);
|
||||
|
||||
//! [kernels_snippet]
|
||||
cv::gapi::GKernelPackage pkg = cv::gapi::kernels
|
||||
< CustomAdd
|
||||
, CustomFilter2D
|
||||
, CustomRGB2YUV
|
||||
>();
|
||||
//! [kernels_snippet]
|
||||
|
||||
// Just call typed example with no input/output
|
||||
typed_example();
|
||||
return 0;
|
||||
}
|
||||
56
modules/gapi/samples/draw_example.cpp
Normal file
56
modules/gapi/samples/draw_example.cpp
Normal file
@@ -0,0 +1,56 @@
|
||||
#include <opencv2/imgproc.hpp> // cv::FONT*, cv::LINE*, cv::FILLED
|
||||
#include <opencv2/highgui.hpp> // imwrite
|
||||
|
||||
#include <opencv2/gapi.hpp>
|
||||
#include <opencv2/gapi/render.hpp>
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
if (argc < 2) {
|
||||
std::cerr << "Filename required" << std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
const auto font = cv::FONT_HERSHEY_DUPLEX;
|
||||
const auto blue = cv::Scalar{ 255, 0, 0}; // B/G/R
|
||||
const auto green = cv::Scalar{ 0, 255, 0};
|
||||
const auto coral = cv::Scalar{0x81,0x81,0xF1};
|
||||
const auto white = cv::Scalar{ 255, 255, 255};
|
||||
cv::Mat test(cv::Size(480, 160), CV_8UC3, white);
|
||||
|
||||
namespace draw = cv::gapi::wip::draw;
|
||||
std::vector<draw::Prim> prims;
|
||||
prims.emplace_back(draw::Circle{ // CIRCLE primitive
|
||||
{400,72}, // Position (a cv::Point)
|
||||
32, // Radius
|
||||
coral, // Color
|
||||
cv::FILLED, // Thickness/fill type
|
||||
cv::LINE_8, // Line type
|
||||
0 // Shift
|
||||
});
|
||||
prims.emplace_back(draw::Text{ // TEXT primitive
|
||||
"Hello from G-API!", // Text
|
||||
{64,96}, // Position (a cv::Point)
|
||||
font, // Font
|
||||
1.0, // Scale (size)
|
||||
blue, // Color
|
||||
2, // Thickness
|
||||
cv::LINE_8, // Line type
|
||||
false // Bottom left origin flag
|
||||
});
|
||||
prims.emplace_back(draw::Rect{ // RECTANGLE primitive
|
||||
{16,48,400,72}, // Geometry (a cv::Rect)
|
||||
green, // Color
|
||||
2, // Thickness
|
||||
cv::LINE_8, // Line type
|
||||
0 // Shift
|
||||
});
|
||||
prims.emplace_back(draw::Mosaic{ // MOSAIC primitive
|
||||
{320,96,128,32}, // Geometry (a cv::Rect)
|
||||
16, // Cell size
|
||||
0 // Decimation
|
||||
});
|
||||
draw::render(test, prims);
|
||||
cv::imwrite(argv[1], test);
|
||||
return 0;
|
||||
}
|
||||
68
modules/gapi/samples/dynamic_graph.cpp
Normal file
68
modules/gapi/samples/dynamic_graph.cpp
Normal file
@@ -0,0 +1,68 @@
|
||||
#include <opencv2/gapi.hpp>
|
||||
#include <opencv2/gapi/core.hpp>
|
||||
#include <opencv2/gapi/cpu/core.hpp>
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
(void) argc;
|
||||
(void) argv;
|
||||
|
||||
bool need_first_conversion = true;
|
||||
bool need_second_conversion = false;
|
||||
|
||||
cv::Size szOut(4, 4);
|
||||
cv::GComputation cc([&](){
|
||||
// ! [GIOProtoArgs usage]
|
||||
auto ins = cv::GIn();
|
||||
cv::GMat in1;
|
||||
if (need_first_conversion)
|
||||
ins += cv::GIn(in1);
|
||||
|
||||
cv::GMat in2;
|
||||
if (need_second_conversion)
|
||||
ins += cv::GIn(in2);
|
||||
|
||||
auto outs = cv::GOut();
|
||||
cv::GMat out1 = cv::gapi::resize(in1, szOut);
|
||||
if (need_first_conversion)
|
||||
outs += cv::GOut(out1);
|
||||
|
||||
cv::GMat out2 = cv::gapi::resize(in2, szOut);
|
||||
if (need_second_conversion)
|
||||
outs += cv::GOut(out2);
|
||||
// ! [GIOProtoArgs usage]
|
||||
return cv::GComputation(std::move(ins), std::move(outs));
|
||||
});
|
||||
|
||||
// ! [GRunArgs usage]
|
||||
auto in_vector = cv::gin();
|
||||
|
||||
cv::Mat in_mat1( 8, 8, CV_8UC3);
|
||||
cv::Mat in_mat2(16, 16, CV_8UC3);
|
||||
cv::randu(in_mat1, cv::Scalar::all(0), cv::Scalar::all(255));
|
||||
cv::randu(in_mat2, cv::Scalar::all(0), cv::Scalar::all(255));
|
||||
|
||||
if (need_first_conversion)
|
||||
in_vector += cv::gin(in_mat1);
|
||||
if (need_second_conversion)
|
||||
in_vector += cv::gin(in_mat2);
|
||||
// ! [GRunArgs usage]
|
||||
|
||||
// ! [GRunArgsP usage]
|
||||
auto out_vector = cv::gout();
|
||||
cv::Mat out_mat1, out_mat2;
|
||||
if (need_first_conversion)
|
||||
out_vector += cv::gout(out_mat1);
|
||||
if (need_second_conversion)
|
||||
out_vector += cv::gout(out_mat2);
|
||||
// ! [GRunArgsP usage]
|
||||
|
||||
auto stream = cc.compileStreaming(cv::compile_args(cv::gapi::core::cpu::kernels()));
|
||||
stream.setSource(std::move(in_vector));
|
||||
|
||||
stream.start();
|
||||
stream.pull(std::move(out_vector));
|
||||
stream.stop();
|
||||
|
||||
return 0;
|
||||
}
|
||||
432
modules/gapi/samples/gaze_estimation.cpp
Normal file
432
modules/gapi/samples/gaze_estimation.cpp
Normal file
@@ -0,0 +1,432 @@
|
||||
#include <algorithm>
|
||||
#include <iostream>
|
||||
#include <cctype>
|
||||
|
||||
#include <opencv2/gapi.hpp>
|
||||
#include <opencv2/gapi/core.hpp>
|
||||
#include <opencv2/gapi/infer.hpp>
|
||||
#include <opencv2/gapi/infer/ie.hpp>
|
||||
#include <opencv2/gapi/streaming/cap.hpp>
|
||||
#include <opencv2/gapi/cpu/gcpukernel.hpp>
|
||||
#include <opencv2/highgui.hpp> // CommandLineParser
|
||||
|
||||
const std::string about =
|
||||
"This is an OpenCV-based version of Gaze Estimation example";
|
||||
const std::string keys =
|
||||
"{ h help | | Print this help message }"
|
||||
"{ input | | Path to the input video file }"
|
||||
"{ facem | face-detection-retail-0005.xml | Path to OpenVINO face detection model (.xml) }"
|
||||
"{ faced | CPU | Target device for the face detection (e.g. CPU, GPU, VPU, ...) }"
|
||||
"{ landm | facial-landmarks-35-adas-0002.xml | Path to OpenVINO landmarks detector model (.xml) }"
|
||||
"{ landd | CPU | Target device for the landmarks detector (e.g. CPU, GPU, VPU, ...) }"
|
||||
"{ headm | head-pose-estimation-adas-0001.xml | Path to OpenVINO head pose estimation model (.xml) }"
|
||||
"{ headd | CPU | Target device for the head pose estimation inference (e.g. CPU, GPU, VPU, ...) }"
|
||||
"{ gazem | gaze-estimation-adas-0002.xml | Path to OpenVINO gaze vector estimaiton model (.xml) }"
|
||||
"{ gazed | CPU | Target device for the gaze vector estimation inference (e.g. CPU, GPU, VPU, ...) }"
|
||||
;
|
||||
|
||||
namespace {
|
||||
std::string weights_path(const std::string &model_path) {
|
||||
const auto EXT_LEN = 4u;
|
||||
const auto sz = model_path.size();
|
||||
CV_Assert(sz > EXT_LEN);
|
||||
|
||||
auto ext = model_path.substr(sz - EXT_LEN);
|
||||
auto lower = [](unsigned char c) {
|
||||
return static_cast<unsigned char>(std::tolower(c));
|
||||
};
|
||||
std::transform(ext.begin(), ext.end(), ext.begin(), lower);
|
||||
CV_Assert(ext == ".xml");
|
||||
return model_path.substr(0u, sz - EXT_LEN) + ".bin";
|
||||
}
|
||||
} // anonymous namespace
|
||||
|
||||
namespace custom {
|
||||
namespace {
|
||||
using GMat3 = std::tuple<cv::GMat,cv::GMat,cv::GMat>;
|
||||
using GMats = cv::GArray<cv::GMat>;
|
||||
using GRects = cv::GArray<cv::Rect>;
|
||||
using GSize = cv::GOpaque<cv::Size>;
|
||||
G_API_NET(Faces, <cv::GMat(cv::GMat)>, "face-detector" );
|
||||
G_API_NET(Landmarks, <cv::GMat(cv::GMat)>, "facial-landmarks");
|
||||
G_API_NET(HeadPose, < GMat3(cv::GMat)>, "head-pose");
|
||||
G_API_NET(Gaze, <cv::GMat(cv::GMat,cv::GMat,cv::GMat)>, "gaze-vector");
|
||||
|
||||
G_API_OP(Size, <GSize(cv::GMat)>, "custom.gapi.size") {
|
||||
static cv::GOpaqueDesc outMeta(const cv::GMatDesc &) {
|
||||
return cv::empty_gopaque_desc();
|
||||
}
|
||||
};
|
||||
|
||||
G_API_OP(ParseSSD,
|
||||
<GRects(cv::GMat, GSize, bool)>,
|
||||
"custom.gaze_estimation.parseSSD") {
|
||||
static cv::GArrayDesc outMeta( const cv::GMatDesc &
|
||||
, const cv::GOpaqueDesc &
|
||||
, bool) {
|
||||
return cv::empty_array_desc();
|
||||
}
|
||||
};
|
||||
|
||||
// Left/Right eye per every face
|
||||
G_API_OP(ParseEyes,
|
||||
<std::tuple<GRects, GRects>(GMats, GRects, GSize)>,
|
||||
"custom.gaze_estimation.parseEyes") {
|
||||
static std::tuple<cv::GArrayDesc, cv::GArrayDesc>
|
||||
outMeta( const cv::GArrayDesc &
|
||||
, const cv::GArrayDesc &
|
||||
, const cv::GOpaqueDesc &) {
|
||||
return std::make_tuple(cv::empty_array_desc(), cv::empty_array_desc());
|
||||
}
|
||||
};
|
||||
|
||||
// Combine three scalars into a 1x3 vector (per every face)
|
||||
G_API_OP(ProcessPoses,
|
||||
<GMats(GMats, GMats, GMats)>,
|
||||
"custom.gaze_estimation.processPoses") {
|
||||
static cv::GArrayDesc outMeta( const cv::GArrayDesc &
|
||||
, const cv::GArrayDesc &
|
||||
, const cv::GArrayDesc &) {
|
||||
return cv::empty_array_desc();
|
||||
}
|
||||
};
|
||||
|
||||
void adjustBoundingBox(cv::Rect& boundingBox) {
|
||||
auto w = boundingBox.width;
|
||||
auto h = boundingBox.height;
|
||||
|
||||
boundingBox.x -= static_cast<int>(0.067 * w);
|
||||
boundingBox.y -= static_cast<int>(0.028 * h);
|
||||
|
||||
boundingBox.width += static_cast<int>(0.15 * w);
|
||||
boundingBox.height += static_cast<int>(0.13 * h);
|
||||
|
||||
if (boundingBox.width < boundingBox.height) {
|
||||
auto dx = (boundingBox.height - boundingBox.width);
|
||||
boundingBox.x -= dx / 2;
|
||||
boundingBox.width += dx;
|
||||
} else {
|
||||
auto dy = (boundingBox.width - boundingBox.height);
|
||||
boundingBox.y -= dy / 2;
|
||||
boundingBox.height += dy;
|
||||
}
|
||||
}
|
||||
|
||||
void gazeVectorToGazeAngles(const cv::Point3f& gazeVector,
|
||||
cv::Point2f& gazeAngles) {
|
||||
auto r = cv::norm(gazeVector);
|
||||
|
||||
double v0 = static_cast<double>(gazeVector.x);
|
||||
double v1 = static_cast<double>(gazeVector.y);
|
||||
double v2 = static_cast<double>(gazeVector.z);
|
||||
|
||||
gazeAngles.x = static_cast<float>(180.0 / M_PI * (M_PI_2 + std::atan2(v2, v0)));
|
||||
gazeAngles.y = static_cast<float>(180.0 / M_PI * (M_PI_2 - std::acos(v1 / r)));
|
||||
}
|
||||
|
||||
GAPI_OCV_KERNEL(OCVSize, Size) {
|
||||
static void run(const cv::Mat &in, cv::Size &out) {
|
||||
out = in.size();
|
||||
}
|
||||
};
|
||||
|
||||
GAPI_OCV_KERNEL(OCVParseSSD, ParseSSD) {
|
||||
static void run(const cv::Mat &in_ssd_result,
|
||||
const cv::Size &upscale,
|
||||
const bool filter_out_of_bounds,
|
||||
std::vector<cv::Rect> &out_objects) {
|
||||
const auto &in_ssd_dims = in_ssd_result.size;
|
||||
CV_Assert(in_ssd_dims.dims() == 4u);
|
||||
|
||||
const int MAX_PROPOSALS = in_ssd_dims[2];
|
||||
const int OBJECT_SIZE = in_ssd_dims[3];
|
||||
CV_Assert(OBJECT_SIZE == 7); // fixed SSD object size
|
||||
|
||||
const cv::Rect surface({0,0}, upscale);
|
||||
out_objects.clear();
|
||||
|
||||
const float *data = in_ssd_result.ptr<float>();
|
||||
for (int i = 0; i < MAX_PROPOSALS; i++) {
|
||||
const float image_id = data[i * OBJECT_SIZE + 0];
|
||||
const float label = data[i * OBJECT_SIZE + 1];
|
||||
const float confidence = data[i * OBJECT_SIZE + 2];
|
||||
const float rc_left = data[i * OBJECT_SIZE + 3];
|
||||
const float rc_top = data[i * OBJECT_SIZE + 4];
|
||||
const float rc_right = data[i * OBJECT_SIZE + 5];
|
||||
const float rc_bottom = data[i * OBJECT_SIZE + 6];
|
||||
(void) label;
|
||||
if (image_id < 0.f) {
|
||||
break; // marks end-of-detections
|
||||
}
|
||||
if (confidence < 0.5f) {
|
||||
continue; // skip objects with low confidence
|
||||
}
|
||||
cv::Rect rc; // map relative coordinates to the original image scale
|
||||
rc.x = static_cast<int>(rc_left * upscale.width);
|
||||
rc.y = static_cast<int>(rc_top * upscale.height);
|
||||
rc.width = static_cast<int>(rc_right * upscale.width) - rc.x;
|
||||
rc.height = static_cast<int>(rc_bottom * upscale.height) - rc.y;
|
||||
adjustBoundingBox(rc); // TODO: new option?
|
||||
|
||||
const auto clipped_rc = rc & surface; // TODO: new option?
|
||||
if (filter_out_of_bounds) {
|
||||
if (clipped_rc.area() != rc.area()) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
out_objects.emplace_back(clipped_rc);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
cv::Rect eyeBox(const cv::Rect &face_rc,
|
||||
float p1_x, float p1_y, float p2_x, float p2_y,
|
||||
float scale = 1.8f) {
|
||||
const auto &up = face_rc.size();
|
||||
const cv::Point p1 = {
|
||||
static_cast<int>(p1_x*up.width),
|
||||
static_cast<int>(p1_y*up.height)
|
||||
};
|
||||
const cv::Point p2 = {
|
||||
static_cast<int>(p2_x*up.width),
|
||||
static_cast<int>(p2_y*up.height)
|
||||
};
|
||||
cv::Rect result;
|
||||
|
||||
const auto size = static_cast<float>(cv::norm(p1 - p2));
|
||||
const auto midpoint = (p1 + p2) / 2;
|
||||
|
||||
result.width = static_cast<int>(scale * size);
|
||||
result.height = result.width;
|
||||
result.x = face_rc.x + midpoint.x - (result.width / 2);
|
||||
result.y = face_rc.y + midpoint.y - (result.height / 2);
|
||||
// Shift result to the original frame's absolute coordinates
|
||||
return result;
|
||||
}
|
||||
|
||||
GAPI_OCV_KERNEL(OCVParseEyes, ParseEyes) {
|
||||
static void run(const std::vector<cv::Mat> &in_landmarks_per_face,
|
||||
const std::vector<cv::Rect> &in_face_rcs,
|
||||
const cv::Size &frame_size,
|
||||
std::vector<cv::Rect> &out_left_eyes,
|
||||
std::vector<cv::Rect> &out_right_eyes) {
|
||||
const size_t numFaces = in_landmarks_per_face.size();
|
||||
const cv::Rect surface(cv::Point(0,0), frame_size);
|
||||
GAPI_Assert(numFaces == in_face_rcs.size());
|
||||
out_left_eyes.clear();
|
||||
out_right_eyes.clear();
|
||||
out_left_eyes.reserve(numFaces);
|
||||
out_right_eyes.reserve(numFaces);
|
||||
|
||||
for (std::size_t i = 0u; i < numFaces; i++) {
|
||||
const auto &lm = in_landmarks_per_face[i];
|
||||
const auto &rc = in_face_rcs[i];
|
||||
// Left eye is defined by points 0/1 (x2),
|
||||
// Right eye is defined by points 2/3 (x2)
|
||||
const float *data = lm.ptr<float>();
|
||||
out_left_eyes .push_back(surface & eyeBox(rc, data[0], data[1], data[2], data[3]));
|
||||
out_right_eyes.push_back(surface & eyeBox(rc, data[4], data[5], data[6], data[7]));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
GAPI_OCV_KERNEL(OCVProcessPoses, ProcessPoses) {
|
||||
static void run(const std::vector<cv::Mat> &in_ys,
|
||||
const std::vector<cv::Mat> &in_ps,
|
||||
const std::vector<cv::Mat> &in_rs,
|
||||
std::vector<cv::Mat> &out_poses) {
|
||||
const std::size_t sz = in_ys.size();
|
||||
GAPI_Assert(sz == in_ps.size() && sz == in_rs.size());
|
||||
out_poses.clear();
|
||||
for (std::size_t idx = 0u; idx < sz; idx++) {
|
||||
cv::Mat pose(1, 3, CV_32FC1);
|
||||
float *ptr = pose.ptr<float>();
|
||||
ptr[0] = in_ys[idx].ptr<float>()[0];
|
||||
ptr[1] = in_ps[idx].ptr<float>()[0];
|
||||
ptr[2] = in_rs[idx].ptr<float>()[0];
|
||||
out_poses.push_back(std::move(pose));
|
||||
}
|
||||
}
|
||||
};
|
||||
} // anonymous namespace
|
||||
} // namespace custom
|
||||
|
||||
namespace vis {
|
||||
namespace {
|
||||
cv::Point2f midp(const cv::Rect &rc) {
|
||||
return (rc.tl() + rc.br()) / 2;
|
||||
};
|
||||
void bbox(cv::Mat &m, const cv::Rect &rc) {
|
||||
cv::rectangle(m, rc, cv::Scalar{0,255,0}, 2, cv::LINE_8, 0);
|
||||
};
|
||||
void pose(cv::Mat &m, const cv::Mat &p, const cv::Rect &face_rc) {
|
||||
const auto *posePtr = p.ptr<float>();
|
||||
const auto yaw = static_cast<double>(posePtr[0]);
|
||||
const auto pitch = static_cast<double>(posePtr[1]);
|
||||
const auto roll = static_cast<double>(posePtr[2]);
|
||||
|
||||
const auto sinY = std::sin(yaw * M_PI / 180.0);
|
||||
const auto sinP = std::sin(pitch * M_PI / 180.0);
|
||||
const auto sinR = std::sin(roll * M_PI / 180.0);
|
||||
|
||||
const auto cosY = std::cos(yaw * M_PI / 180.0);
|
||||
const auto cosP = std::cos(pitch * M_PI / 180.0);
|
||||
const auto cosR = std::cos(roll * M_PI / 180.0);
|
||||
|
||||
const auto axisLength = 0.4 * face_rc.width;
|
||||
const auto xCenter = face_rc.x + face_rc.width / 2;
|
||||
const auto yCenter = face_rc.y + face_rc.height / 2;
|
||||
|
||||
const auto center = cv::Point{xCenter, yCenter};
|
||||
const auto axisln = cv::Point2d{axisLength, axisLength};
|
||||
const auto ctr = cv::Matx<double,2,2>(cosR*cosY, sinY*sinP*sinR, 0.f, cosP*sinR);
|
||||
const auto ctt = cv::Matx<double,2,2>(cosR*sinY*sinP, cosY*sinR, 0.f, -cosP*cosR);
|
||||
const auto ctf = cv::Matx<double,2,2>(sinY*cosP, 0.f, 0.f, sinP);
|
||||
|
||||
// center to right
|
||||
cv::line(m, center, center + static_cast<cv::Point>(ctr*axisln), cv::Scalar(0, 0, 255), 2);
|
||||
// center to top
|
||||
cv::line(m, center, center + static_cast<cv::Point>(ctt*axisln), cv::Scalar(0, 255, 0), 2);
|
||||
// center to forward
|
||||
cv::line(m, center, center + static_cast<cv::Point>(ctf*axisln), cv::Scalar(255, 0, 255), 2);
|
||||
}
|
||||
void vvec(cv::Mat &m, const cv::Mat &v, const cv::Rect &face_rc,
|
||||
const cv::Rect &left_rc, const cv::Rect &right_rc) {
|
||||
const auto scale = 0.002 * face_rc.width;
|
||||
|
||||
cv::Point3f gazeVector;
|
||||
const auto *gazePtr = v.ptr<float>();
|
||||
gazeVector.x = gazePtr[0];
|
||||
gazeVector.y = gazePtr[1];
|
||||
gazeVector.z = gazePtr[2];
|
||||
gazeVector = gazeVector / cv::norm(gazeVector);
|
||||
|
||||
const double arrowLength = 0.4 * face_rc.width;
|
||||
const auto left_mid = midp(left_rc);
|
||||
const auto right_mid = midp(right_rc);
|
||||
|
||||
cv::Point2f gazeArrow;
|
||||
gazeArrow.x = gazeVector.x;
|
||||
gazeArrow.y = -gazeVector.y;
|
||||
gazeArrow *= arrowLength;
|
||||
|
||||
cv::arrowedLine(m, left_mid, left_mid + gazeArrow, cv::Scalar(255, 0, 0), 2);
|
||||
cv::arrowedLine(m, right_mid, right_mid + gazeArrow, cv::Scalar(255, 0, 0), 2);
|
||||
|
||||
cv::Point2f gazeAngles;
|
||||
custom::gazeVectorToGazeAngles(gazeVector, gazeAngles);
|
||||
|
||||
cv::putText(m,
|
||||
cv::format("gaze angles: (h=%0.0f, v=%0.0f)",
|
||||
static_cast<double>(std::round(gazeAngles.x)),
|
||||
static_cast<double>(std::round(gazeAngles.y))),
|
||||
cv::Point(static_cast<int>(face_rc.tl().x),
|
||||
static_cast<int>(face_rc.br().y + 12. * face_rc.width / 100.)),
|
||||
cv::FONT_HERSHEY_PLAIN, scale * 2, cv::Scalar::all(255), 1);
|
||||
};
|
||||
} // anonymous namespace
|
||||
} // namespace vis
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
cv::CommandLineParser cmd(argc, argv, keys);
|
||||
cmd.about(about);
|
||||
if (cmd.has("help")) {
|
||||
cmd.printMessage();
|
||||
return 0;
|
||||
}
|
||||
|
||||
cv::GMat in;
|
||||
cv::GMat faces = cv::gapi::infer<custom::Faces>(in);
|
||||
cv::GOpaque<cv::Size> sz = custom::Size::on(in); // FIXME
|
||||
cv::GArray<cv::Rect> faces_rc = custom::ParseSSD::on(faces, sz, true);
|
||||
cv::GArray<cv::GMat> angles_y, angles_p, angles_r;
|
||||
std::tie(angles_y, angles_p, angles_r) = cv::gapi::infer<custom::HeadPose>(faces_rc, in);
|
||||
cv::GArray<cv::GMat> heads_pos = custom::ProcessPoses::on(angles_y, angles_p, angles_r);
|
||||
cv::GArray<cv::GMat> landmarks = cv::gapi::infer<custom::Landmarks>(faces_rc, in);
|
||||
cv::GArray<cv::Rect> left_eyes, right_eyes;
|
||||
std::tie(left_eyes, right_eyes) = custom::ParseEyes::on(landmarks, faces_rc, sz);
|
||||
cv::GArray<cv::GMat> gaze_vectors = cv::gapi::infer2<custom::Gaze>( in
|
||||
, left_eyes
|
||||
, right_eyes
|
||||
, heads_pos);
|
||||
cv::GComputation graph(cv::GIn(in),
|
||||
cv::GOut( cv::gapi::copy(in)
|
||||
, faces_rc
|
||||
, left_eyes
|
||||
, right_eyes
|
||||
, heads_pos
|
||||
, gaze_vectors));
|
||||
|
||||
const auto input_file_name = cmd.get<std::string>("input");
|
||||
const auto face_model_path = cmd.get<std::string>("facem");
|
||||
const auto head_model_path = cmd.get<std::string>("headm");
|
||||
const auto lmrk_model_path = cmd.get<std::string>("landm");
|
||||
const auto gaze_model_path = cmd.get<std::string>("gazem");
|
||||
|
||||
auto face_net = cv::gapi::ie::Params<custom::Faces> {
|
||||
face_model_path, // path to topology IR
|
||||
weights_path(face_model_path), // path to weights
|
||||
cmd.get<std::string>("faced"), /// device specifier
|
||||
};
|
||||
auto head_net = cv::gapi::ie::Params<custom::HeadPose> {
|
||||
head_model_path, // path to topology IR
|
||||
weights_path(head_model_path), // path to weights
|
||||
cmd.get<std::string>("headd"), // device specifier
|
||||
}.cfgOutputLayers({"angle_y_fc", "angle_p_fc", "angle_r_fc"});
|
||||
auto landmarks_net = cv::gapi::ie::Params<custom::Landmarks> {
|
||||
lmrk_model_path, // path to topology IR
|
||||
weights_path(lmrk_model_path), // path to weights
|
||||
cmd.get<std::string>("landd"), // device specifier
|
||||
};
|
||||
auto gaze_net = cv::gapi::ie::Params<custom::Gaze> {
|
||||
gaze_model_path, // path to topology IR
|
||||
weights_path(gaze_model_path), // path to weights
|
||||
cmd.get<std::string>("gazed"), // device specifier
|
||||
}.cfgInputLayers({"left_eye_image", "right_eye_image", "head_pose_angles"});
|
||||
|
||||
auto kernels = cv::gapi::kernels< custom::OCVSize
|
||||
, custom::OCVParseSSD
|
||||
, custom::OCVParseEyes
|
||||
, custom::OCVProcessPoses>();
|
||||
auto networks = cv::gapi::networks(face_net, head_net, landmarks_net, gaze_net);
|
||||
auto pipeline = graph.compileStreaming(cv::compile_args(networks, kernels));
|
||||
|
||||
cv::TickMeter tm;
|
||||
cv::Mat image;
|
||||
std::vector<cv::Rect> out_faces, out_right_eyes, out_left_eyes;
|
||||
std::vector<cv::Mat> out_poses;
|
||||
std::vector<cv::Mat> out_gazes;
|
||||
std::size_t frames = 0u;
|
||||
std::cout << "Reading " << input_file_name << std::endl;
|
||||
|
||||
pipeline.setSource(cv::gapi::wip::make_src<cv::gapi::wip::GCaptureSource>(input_file_name));
|
||||
pipeline.start();
|
||||
tm.start();
|
||||
while (pipeline.pull(cv::gout( image
|
||||
, out_faces
|
||||
, out_left_eyes
|
||||
, out_right_eyes
|
||||
, out_poses
|
||||
, out_gazes))) {
|
||||
frames++;
|
||||
// Visualize results on the frame
|
||||
for (auto &&rc : out_faces) vis::bbox(image, rc);
|
||||
for (auto &&rc : out_left_eyes) vis::bbox(image, rc);
|
||||
for (auto &&rc : out_right_eyes) vis::bbox(image, rc);
|
||||
for (std::size_t i = 0u; i < out_faces.size(); i++) {
|
||||
vis::pose(image, out_poses[i], out_faces[i]);
|
||||
vis::vvec(image, out_gazes[i], out_faces[i], out_left_eyes[i], out_right_eyes[i]);
|
||||
}
|
||||
tm.stop();
|
||||
const auto fps_str = std::to_string(frames / tm.getTimeSec()) + " FPS";
|
||||
cv::putText(image, fps_str, {0,32}, cv::FONT_HERSHEY_SIMPLEX, 1.0, {0,255,0}, 2);
|
||||
cv::imshow("Out", image);
|
||||
cv::waitKey(1);
|
||||
tm.start();
|
||||
}
|
||||
tm.stop();
|
||||
std::cout << "Processed " << frames << " frames"
|
||||
<< " (" << frames / tm.getTimeSec() << " FPS)" << std::endl;
|
||||
return 0;
|
||||
}
|
||||
195
modules/gapi/samples/infer_ie_onnx_hybrid.cpp
Normal file
195
modules/gapi/samples/infer_ie_onnx_hybrid.cpp
Normal file
@@ -0,0 +1,195 @@
|
||||
#include <chrono>
|
||||
#include <iomanip>
|
||||
|
||||
#include "opencv2/imgproc.hpp"
|
||||
#include "opencv2/highgui.hpp"
|
||||
|
||||
#include "opencv2/gapi.hpp"
|
||||
#include "opencv2/gapi/core.hpp"
|
||||
#include "opencv2/gapi/imgproc.hpp"
|
||||
#include "opencv2/gapi/infer.hpp"
|
||||
#include "opencv2/gapi/infer/ie.hpp"
|
||||
#include "opencv2/gapi/infer/onnx.hpp"
|
||||
#include "opencv2/gapi/cpu/gcpukernel.hpp"
|
||||
#include "opencv2/gapi/streaming/cap.hpp"
|
||||
|
||||
namespace {
|
||||
const std::string keys =
|
||||
"{ h help | | print this help message }"
|
||||
"{ input | | Path to an input video file }"
|
||||
"{ fdm | | IE face detection model IR }"
|
||||
"{ fdw | | IE face detection model weights }"
|
||||
"{ fdd | | IE face detection device }"
|
||||
"{ emom | | ONNX emotions recognition model }"
|
||||
"{ output | | (Optional) Path to an output video file }"
|
||||
;
|
||||
} // namespace
|
||||
|
||||
namespace custom {
|
||||
G_API_NET(Faces, <cv::GMat(cv::GMat)>, "face-detector");
|
||||
G_API_NET(Emotions, <cv::GMat(cv::GMat)>, "emotions-recognition");
|
||||
|
||||
G_API_OP(PostProc, <cv::GArray<cv::Rect>(cv::GMat, cv::GMat)>, "custom.fd_postproc") {
|
||||
static cv::GArrayDesc outMeta(const cv::GMatDesc &, const cv::GMatDesc &) {
|
||||
return cv::empty_array_desc();
|
||||
}
|
||||
};
|
||||
|
||||
GAPI_OCV_KERNEL(OCVPostProc, PostProc) {
|
||||
static void run(const cv::Mat &in_ssd_result,
|
||||
const cv::Mat &in_frame,
|
||||
std::vector<cv::Rect> &out_faces) {
|
||||
const int MAX_PROPOSALS = 200;
|
||||
const int OBJECT_SIZE = 7;
|
||||
const cv::Size upscale = in_frame.size();
|
||||
const cv::Rect surface({0,0}, upscale);
|
||||
|
||||
out_faces.clear();
|
||||
|
||||
const float *data = in_ssd_result.ptr<float>();
|
||||
for (int i = 0; i < MAX_PROPOSALS; i++) {
|
||||
const float image_id = data[i * OBJECT_SIZE + 0]; // batch id
|
||||
const float confidence = data[i * OBJECT_SIZE + 2];
|
||||
const float rc_left = data[i * OBJECT_SIZE + 3];
|
||||
const float rc_top = data[i * OBJECT_SIZE + 4];
|
||||
const float rc_right = data[i * OBJECT_SIZE + 5];
|
||||
const float rc_bottom = data[i * OBJECT_SIZE + 6];
|
||||
|
||||
if (image_id < 0.f) { // indicates end of detections
|
||||
break;
|
||||
}
|
||||
if (confidence < 0.5f) {
|
||||
continue;
|
||||
}
|
||||
|
||||
cv::Rect rc;
|
||||
rc.x = static_cast<int>(rc_left * upscale.width);
|
||||
rc.y = static_cast<int>(rc_top * upscale.height);
|
||||
rc.width = static_cast<int>(rc_right * upscale.width) - rc.x;
|
||||
rc.height = static_cast<int>(rc_bottom * upscale.height) - rc.y;
|
||||
out_faces.push_back(rc & surface);
|
||||
}
|
||||
}
|
||||
};
|
||||
//! [Postproc]
|
||||
|
||||
} // namespace custom
|
||||
|
||||
namespace labels {
|
||||
// Labels as defined in
|
||||
// https://github.com/onnx/models/tree/master/vision/body_analysis/emotion_ferplus
|
||||
//
|
||||
const std::string emotions[] = {
|
||||
"neutral", "happiness", "surprise", "sadness", "anger", "disgust", "fear", "contempt"
|
||||
};
|
||||
namespace {
|
||||
template<typename Iter>
|
||||
std::vector<float> softmax(Iter begin, Iter end) {
|
||||
std::vector<float> prob(end - begin, 0.f);
|
||||
std::transform(begin, end, prob.begin(), [](float x) { return std::exp(x); });
|
||||
float sum = std::accumulate(prob.begin(), prob.end(), 0.0f);
|
||||
for (int i = 0; i < static_cast<int>(prob.size()); i++)
|
||||
prob[i] /= sum;
|
||||
return prob;
|
||||
}
|
||||
|
||||
void DrawResults(cv::Mat &frame,
|
||||
const std::vector<cv::Rect> &faces,
|
||||
const std::vector<cv::Mat> &out_emotions) {
|
||||
CV_Assert(faces.size() == out_emotions.size());
|
||||
|
||||
for (auto it = faces.begin(); it != faces.end(); ++it) {
|
||||
const auto idx = std::distance(faces.begin(), it);
|
||||
const auto &rc = *it;
|
||||
|
||||
const float *emotions_data = out_emotions[idx].ptr<float>();
|
||||
auto sm = softmax(emotions_data, emotions_data + 8);
|
||||
const auto emo_id = std::max_element(sm.begin(), sm.end()) - sm.begin();
|
||||
|
||||
const int ATTRIB_OFFSET = 15;
|
||||
cv::rectangle(frame, rc, {0, 255, 0}, 4);
|
||||
cv::putText(frame, emotions[emo_id],
|
||||
cv::Point(rc.x, rc.y - ATTRIB_OFFSET),
|
||||
cv::FONT_HERSHEY_COMPLEX_SMALL,
|
||||
1,
|
||||
cv::Scalar(0, 0, 255));
|
||||
|
||||
std::cout << emotions[emo_id] << " at " << rc << std::endl;
|
||||
}
|
||||
}
|
||||
} // anonymous namespace
|
||||
} // namespace labels
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
cv::CommandLineParser cmd(argc, argv, keys);
|
||||
if (cmd.has("help")) {
|
||||
cmd.printMessage();
|
||||
return 0;
|
||||
}
|
||||
const std::string input = cmd.get<std::string>("input");
|
||||
const std::string output = cmd.get<std::string>("output");
|
||||
|
||||
// OpenVINO FD parameters here
|
||||
auto det_net = cv::gapi::ie::Params<custom::Faces> {
|
||||
cmd.get<std::string>("fdm"), // read cmd args: path to topology IR
|
||||
cmd.get<std::string>("fdw"), // read cmd args: path to weights
|
||||
cmd.get<std::string>("fdd"), // read cmd args: device specifier
|
||||
};
|
||||
|
||||
// ONNX Emotions parameters here
|
||||
auto emo_net = cv::gapi::onnx::Params<custom::Emotions> {
|
||||
cmd.get<std::string>("emom"), // read cmd args: path to the ONNX model
|
||||
}.cfgNormalize({false}); // model accepts 0..255 range in FP32
|
||||
|
||||
auto kernels = cv::gapi::kernels<custom::OCVPostProc>();
|
||||
auto networks = cv::gapi::networks(det_net, emo_net);
|
||||
|
||||
cv::GMat in;
|
||||
cv::GMat bgr = cv::gapi::copy(in);
|
||||
cv::GMat frame = cv::gapi::streaming::desync(bgr);
|
||||
cv::GMat detections = cv::gapi::infer<custom::Faces>(frame);
|
||||
cv::GArray<cv::Rect> faces = custom::PostProc::on(detections, frame);
|
||||
cv::GArray<cv::GMat> emotions = cv::gapi::infer<custom::Emotions>(faces, frame);
|
||||
auto pipeline = cv::GComputation(cv::GIn(in), cv::GOut(bgr, faces, emotions))
|
||||
.compileStreaming(cv::compile_args(kernels, networks));
|
||||
|
||||
auto in_src = cv::gapi::wip::make_src<cv::gapi::wip::GCaptureSource>(input);
|
||||
pipeline.setSource(cv::gin(in_src));
|
||||
pipeline.start();
|
||||
|
||||
cv::util::optional<cv::Mat> out_frame;
|
||||
cv::util::optional<std::vector<cv::Rect>> out_faces;
|
||||
cv::util::optional<std::vector<cv::Mat>> out_emotions;
|
||||
|
||||
cv::Mat last_mat;
|
||||
std::vector<cv::Rect> last_faces;
|
||||
std::vector<cv::Mat> last_emotions;
|
||||
|
||||
cv::VideoWriter writer;
|
||||
|
||||
while (pipeline.pull(cv::gout(out_frame, out_faces, out_emotions))) {
|
||||
if (out_faces && out_emotions) {
|
||||
last_faces = *out_faces;
|
||||
last_emotions = *out_emotions;
|
||||
}
|
||||
if (out_frame) {
|
||||
last_mat = *out_frame;
|
||||
labels::DrawResults(last_mat, last_faces, last_emotions);
|
||||
|
||||
if (!output.empty()) {
|
||||
if (!writer.isOpened()) {
|
||||
const auto sz = cv::Size{last_mat.cols, last_mat.rows};
|
||||
writer.open(output, cv::VideoWriter::fourcc('M','J','P','G'), 25.0, sz);
|
||||
CV_Assert(writer.isOpened());
|
||||
}
|
||||
writer << last_mat;
|
||||
}
|
||||
}
|
||||
if (!last_mat.empty()) {
|
||||
cv::imshow("Out", last_mat);
|
||||
cv::waitKey(1);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
264
modules/gapi/samples/infer_single_roi.cpp
Normal file
264
modules/gapi/samples/infer_single_roi.cpp
Normal file
@@ -0,0 +1,264 @@
|
||||
#include <algorithm>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
|
||||
#include <opencv2/imgproc.hpp>
|
||||
#include <opencv2/imgcodecs.hpp>
|
||||
#include <opencv2/gapi.hpp>
|
||||
#include <opencv2/gapi/core.hpp>
|
||||
#include <opencv2/gapi/imgproc.hpp>
|
||||
#include <opencv2/gapi/infer.hpp>
|
||||
#include <opencv2/gapi/render.hpp>
|
||||
#include <opencv2/gapi/infer/ie.hpp>
|
||||
#include <opencv2/gapi/cpu/gcpukernel.hpp>
|
||||
#include <opencv2/gapi/streaming/cap.hpp>
|
||||
#include <opencv2/highgui.hpp>
|
||||
|
||||
const std::string keys =
|
||||
"{ h help | | Print this help message }"
|
||||
"{ input | | Path to the input video file }"
|
||||
"{ facem | face-detection-adas-0001.xml | Path to OpenVINO IE face detection model (.xml) }"
|
||||
"{ faced | CPU | Target device for face detection model (e.g. CPU, GPU, VPU, ...) }"
|
||||
"{ r roi | -1,-1,-1,-1 | Region of interest (ROI) to use for inference. Identified automatically when not set }";
|
||||
|
||||
namespace {
|
||||
|
||||
std::string weights_path(const std::string &model_path) {
|
||||
const auto EXT_LEN = 4u;
|
||||
const auto sz = model_path.size();
|
||||
CV_Assert(sz > EXT_LEN);
|
||||
|
||||
auto ext = model_path.substr(sz - EXT_LEN);
|
||||
std::transform(ext.begin(), ext.end(), ext.begin(), [](unsigned char c){
|
||||
return static_cast<unsigned char>(std::tolower(c));
|
||||
});
|
||||
CV_Assert(ext == ".xml");
|
||||
return model_path.substr(0u, sz - EXT_LEN) + ".bin";
|
||||
}
|
||||
|
||||
cv::util::optional<cv::Rect> parse_roi(const std::string &rc) {
|
||||
cv::Rect rv;
|
||||
char delim[3];
|
||||
|
||||
std::stringstream is(rc);
|
||||
is >> rv.x >> delim[0] >> rv.y >> delim[1] >> rv.width >> delim[2] >> rv.height;
|
||||
if (is.bad()) {
|
||||
return cv::util::optional<cv::Rect>(); // empty value
|
||||
}
|
||||
const auto is_delim = [](char c) {
|
||||
return c == ',';
|
||||
};
|
||||
if (!std::all_of(std::begin(delim), std::end(delim), is_delim)) {
|
||||
return cv::util::optional<cv::Rect>(); // empty value
|
||||
|
||||
}
|
||||
if (rv.x < 0 || rv.y < 0 || rv.width <= 0 || rv.height <= 0) {
|
||||
return cv::util::optional<cv::Rect>(); // empty value
|
||||
}
|
||||
return cv::util::make_optional(std::move(rv));
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
namespace custom {
|
||||
|
||||
G_API_NET(FaceDetector, <cv::GMat(cv::GMat)>, "face-detector");
|
||||
|
||||
using GDetections = cv::GArray<cv::Rect>;
|
||||
using GRect = cv::GOpaque<cv::Rect>;
|
||||
using GSize = cv::GOpaque<cv::Size>;
|
||||
using GPrims = cv::GArray<cv::gapi::wip::draw::Prim>;
|
||||
|
||||
G_API_OP(GetSize, <GSize(cv::GMat)>, "sample.custom.get-size") {
|
||||
static cv::GOpaqueDesc outMeta(const cv::GMatDesc &) {
|
||||
return cv::empty_gopaque_desc();
|
||||
}
|
||||
};
|
||||
|
||||
G_API_OP(LocateROI, <GRect(cv::GMat)>, "sample.custom.locate-roi") {
|
||||
static cv::GOpaqueDesc outMeta(const cv::GMatDesc &) {
|
||||
return cv::empty_gopaque_desc();
|
||||
}
|
||||
};
|
||||
|
||||
G_API_OP(ParseSSD, <GDetections(cv::GMat, GRect, GSize)>, "sample.custom.parse-ssd") {
|
||||
static cv::GArrayDesc outMeta(const cv::GMatDesc &, const cv::GOpaqueDesc &, const cv::GOpaqueDesc &) {
|
||||
return cv::empty_array_desc();
|
||||
}
|
||||
};
|
||||
|
||||
G_API_OP(BBoxes, <GPrims(GDetections, GRect)>, "sample.custom.b-boxes") {
|
||||
static cv::GArrayDesc outMeta(const cv::GArrayDesc &, const cv::GOpaqueDesc &) {
|
||||
return cv::empty_array_desc();
|
||||
}
|
||||
};
|
||||
|
||||
GAPI_OCV_KERNEL(OCVGetSize, GetSize) {
|
||||
static void run(const cv::Mat &in, cv::Size &out) {
|
||||
out = {in.cols, in.rows};
|
||||
}
|
||||
};
|
||||
|
||||
GAPI_OCV_KERNEL(OCVLocateROI, LocateROI) {
|
||||
// This is the place where we can run extra analytics
|
||||
// on the input image frame and select the ROI (region
|
||||
// of interest) where we want to detect our objects (or
|
||||
// run any other inference).
|
||||
//
|
||||
// Currently it doesn't do anything intelligent,
|
||||
// but only crops the input image to square (this is
|
||||
// the most convenient aspect ratio for detectors to use)
|
||||
|
||||
static void run(const cv::Mat &in_mat, cv::Rect &out_rect) {
|
||||
|
||||
// Identify the central point & square size (- some padding)
|
||||
const auto center = cv::Point{in_mat.cols/2, in_mat.rows/2};
|
||||
auto sqside = std::min(in_mat.cols, in_mat.rows);
|
||||
|
||||
// Now build the central square ROI
|
||||
out_rect = cv::Rect{ center.x - sqside/2
|
||||
, center.y - sqside/2
|
||||
, sqside
|
||||
, sqside
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
GAPI_OCV_KERNEL(OCVParseSSD, ParseSSD) {
|
||||
static void run(const cv::Mat &in_ssd_result,
|
||||
const cv::Rect &in_roi,
|
||||
const cv::Size &in_parent_size,
|
||||
std::vector<cv::Rect> &out_objects) {
|
||||
const auto &in_ssd_dims = in_ssd_result.size;
|
||||
CV_Assert(in_ssd_dims.dims() == 4u);
|
||||
|
||||
const int MAX_PROPOSALS = in_ssd_dims[2];
|
||||
const int OBJECT_SIZE = in_ssd_dims[3];
|
||||
CV_Assert(OBJECT_SIZE == 7); // fixed SSD object size
|
||||
|
||||
const cv::Size up_roi = in_roi.size();
|
||||
const cv::Rect surface({0,0}, in_parent_size);
|
||||
|
||||
out_objects.clear();
|
||||
|
||||
const float *data = in_ssd_result.ptr<float>();
|
||||
for (int i = 0; i < MAX_PROPOSALS; i++) {
|
||||
const float image_id = data[i * OBJECT_SIZE + 0];
|
||||
const float label = data[i * OBJECT_SIZE + 1];
|
||||
const float confidence = data[i * OBJECT_SIZE + 2];
|
||||
const float rc_left = data[i * OBJECT_SIZE + 3];
|
||||
const float rc_top = data[i * OBJECT_SIZE + 4];
|
||||
const float rc_right = data[i * OBJECT_SIZE + 5];
|
||||
const float rc_bottom = data[i * OBJECT_SIZE + 6];
|
||||
(void) label; // unused
|
||||
|
||||
if (image_id < 0.f) {
|
||||
break; // marks end-of-detections
|
||||
}
|
||||
if (confidence < 0.5f) {
|
||||
continue; // skip objects with low confidence
|
||||
}
|
||||
|
||||
// map relative coordinates to the original image scale
|
||||
// taking the ROI into account
|
||||
cv::Rect rc;
|
||||
rc.x = static_cast<int>(rc_left * up_roi.width);
|
||||
rc.y = static_cast<int>(rc_top * up_roi.height);
|
||||
rc.width = static_cast<int>(rc_right * up_roi.width) - rc.x;
|
||||
rc.height = static_cast<int>(rc_bottom * up_roi.height) - rc.y;
|
||||
rc.x += in_roi.x;
|
||||
rc.y += in_roi.y;
|
||||
out_objects.emplace_back(rc & surface);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
GAPI_OCV_KERNEL(OCVBBoxes, BBoxes) {
|
||||
// This kernel converts the rectangles into G-API's
|
||||
// rendering primitives
|
||||
static void run(const std::vector<cv::Rect> &in_face_rcs,
|
||||
const cv::Rect &in_roi,
|
||||
std::vector<cv::gapi::wip::draw::Prim> &out_prims) {
|
||||
out_prims.clear();
|
||||
const auto cvt = [](const cv::Rect &rc, const cv::Scalar &clr) {
|
||||
return cv::gapi::wip::draw::Rect(rc, clr, 2);
|
||||
};
|
||||
out_prims.emplace_back(cvt(in_roi, CV_RGB(0,255,255))); // cyan
|
||||
for (auto &&rc : in_face_rcs) {
|
||||
out_prims.emplace_back(cvt(rc, CV_RGB(0,255,0))); // green
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace custom
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
cv::CommandLineParser cmd(argc, argv, keys);
|
||||
if (cmd.has("help")) {
|
||||
cmd.printMessage();
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Prepare parameters first
|
||||
const std::string input = cmd.get<std::string>("input");
|
||||
const auto opt_roi = parse_roi(cmd.get<std::string>("roi"));
|
||||
|
||||
const auto face_model_path = cmd.get<std::string>("facem");
|
||||
auto face_net = cv::gapi::ie::Params<custom::FaceDetector> {
|
||||
face_model_path, // path to topology IR
|
||||
weights_path(face_model_path), // path to weights
|
||||
cmd.get<std::string>("faced"), // device specifier
|
||||
};
|
||||
auto kernels = cv::gapi::kernels
|
||||
< custom::OCVGetSize
|
||||
, custom::OCVLocateROI
|
||||
, custom::OCVParseSSD
|
||||
, custom::OCVBBoxes>();
|
||||
auto networks = cv::gapi::networks(face_net);
|
||||
|
||||
// Now build the graph. The graph structure may vary
|
||||
// pased on the input parameters
|
||||
cv::GStreamingCompiled pipeline;
|
||||
auto inputs = cv::gin(cv::gapi::wip::make_src<cv::gapi::wip::GCaptureSource>(input));
|
||||
|
||||
if (opt_roi.has_value()) {
|
||||
// Use the value provided by user
|
||||
std::cout << "Will run inference for static region "
|
||||
<< opt_roi.value()
|
||||
<< " only"
|
||||
<< std::endl;
|
||||
cv::GMat in;
|
||||
cv::GOpaque<cv::Rect> in_roi;
|
||||
auto blob = cv::gapi::infer<custom::FaceDetector>(in_roi, in);
|
||||
auto rcs = custom::ParseSSD::on(blob, in_roi, custom::GetSize::on(in));
|
||||
auto out = cv::gapi::wip::draw::render3ch(in, custom::BBoxes::on(rcs, in_roi));
|
||||
pipeline = cv::GComputation(cv::GIn(in, in_roi), cv::GOut(out))
|
||||
.compileStreaming(cv::compile_args(kernels, networks));
|
||||
|
||||
// Since the ROI to detect is manual, make it part of the input vector
|
||||
inputs.push_back(cv::gin(opt_roi.value())[0]);
|
||||
} else {
|
||||
// Automatically detect ROI to infer. Make it output parameter
|
||||
std::cout << "ROI is not set or invalid. Locating it automatically"
|
||||
<< std::endl;
|
||||
cv::GMat in;
|
||||
cv::GOpaque<cv::Rect> roi = custom::LocateROI::on(in);
|
||||
auto blob = cv::gapi::infer<custom::FaceDetector>(roi, in);
|
||||
auto rcs = custom::ParseSSD::on(blob, roi, custom::GetSize::on(in));
|
||||
auto out = cv::gapi::wip::draw::render3ch(in, custom::BBoxes::on(rcs, roi));
|
||||
pipeline = cv::GComputation(cv::GIn(in), cv::GOut(out))
|
||||
.compileStreaming(cv::compile_args(kernels, networks));
|
||||
}
|
||||
|
||||
// The execution part
|
||||
pipeline.setSource(std::move(inputs));
|
||||
pipeline.start();
|
||||
|
||||
cv::Mat out;
|
||||
while (pipeline.pull(cv::gout(out))) {
|
||||
cv::imshow("Out", out);
|
||||
cv::waitKey(1);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
213
modules/gapi/samples/infer_ssd_onnx.cpp
Normal file
213
modules/gapi/samples/infer_ssd_onnx.cpp
Normal file
@@ -0,0 +1,213 @@
|
||||
#include <algorithm>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
|
||||
#include <opencv2/imgproc.hpp>
|
||||
#include <opencv2/imgcodecs.hpp>
|
||||
|
||||
#include <opencv2/gapi.hpp>
|
||||
#include <opencv2/gapi/core.hpp>
|
||||
#include <opencv2/gapi/imgproc.hpp>
|
||||
#include <opencv2/gapi/infer.hpp>
|
||||
#include <opencv2/gapi/render.hpp>
|
||||
#include <opencv2/gapi/infer/onnx.hpp>
|
||||
#include <opencv2/gapi/cpu/gcpukernel.hpp>
|
||||
#include <opencv2/gapi/streaming/cap.hpp>
|
||||
#include <opencv2/highgui.hpp>
|
||||
|
||||
namespace custom {
|
||||
|
||||
G_API_NET(ObjDetector, <cv::GMat(cv::GMat)>, "object-detector");
|
||||
|
||||
using GDetections = cv::GArray<cv::Rect>;
|
||||
using GSize = cv::GOpaque<cv::Size>;
|
||||
using GPrims = cv::GArray<cv::gapi::wip::draw::Prim>;
|
||||
|
||||
G_API_OP(GetSize, <GSize(cv::GMat)>, "sample.custom.get-size") {
|
||||
static cv::GOpaqueDesc outMeta(const cv::GMatDesc &) {
|
||||
return cv::empty_gopaque_desc();
|
||||
}
|
||||
};
|
||||
G_API_OP(ParseSSD, <GDetections(cv::GMat, GSize)>, "sample.custom.parse-ssd") {
|
||||
static cv::GArrayDesc outMeta(const cv::GMatDesc &, const cv::GOpaqueDesc &) {
|
||||
return cv::empty_array_desc();
|
||||
}
|
||||
};
|
||||
G_API_OP(BBoxes, <GPrims(GDetections)>, "sample.custom.b-boxes") {
|
||||
static cv::GArrayDesc outMeta(const cv::GArrayDesc &) {
|
||||
return cv::empty_array_desc();
|
||||
}
|
||||
};
|
||||
|
||||
GAPI_OCV_KERNEL(OCVGetSize, GetSize) {
|
||||
static void run(const cv::Mat &in, cv::Size &out) {
|
||||
out = {in.cols, in.rows};
|
||||
}
|
||||
};
|
||||
GAPI_OCV_KERNEL(OCVParseSSD, ParseSSD) {
|
||||
static void run(const cv::Mat &in_ssd_result,
|
||||
const cv::Size &in_parent_size,
|
||||
std::vector<cv::Rect> &out_objects) {
|
||||
const auto &in_ssd_dims = in_ssd_result.size;
|
||||
CV_Assert(in_ssd_dims.dims() == 4u);
|
||||
|
||||
const int MAX_PROPOSALS = in_ssd_dims[2];
|
||||
const int OBJECT_SIZE = in_ssd_dims[3];
|
||||
|
||||
CV_Assert(OBJECT_SIZE == 7); // fixed SSD object size
|
||||
|
||||
const cv::Rect surface({0,0}, in_parent_size);
|
||||
|
||||
out_objects.clear();
|
||||
|
||||
const float *data = in_ssd_result.ptr<float>();
|
||||
for (int i = 0; i < MAX_PROPOSALS; i++) {
|
||||
const float image_id = data[i * OBJECT_SIZE + 0];
|
||||
const float label = data[i * OBJECT_SIZE + 1];
|
||||
const float confidence = data[i * OBJECT_SIZE + 2];
|
||||
const float rc_left = data[i * OBJECT_SIZE + 3];
|
||||
const float rc_top = data[i * OBJECT_SIZE + 4];
|
||||
const float rc_right = data[i * OBJECT_SIZE + 5];
|
||||
const float rc_bottom = data[i * OBJECT_SIZE + 6];
|
||||
(void) label; // unused
|
||||
|
||||
if (image_id < 0.f) {
|
||||
break; // marks end-of-detections
|
||||
}
|
||||
if (confidence < 0.5f) {
|
||||
continue; // skip objects with low confidence
|
||||
}
|
||||
|
||||
// map relative coordinates to the original image scale
|
||||
cv::Rect rc;
|
||||
rc.x = static_cast<int>(rc_left * in_parent_size.width);
|
||||
rc.y = static_cast<int>(rc_top * in_parent_size.height);
|
||||
rc.width = static_cast<int>(rc_right * in_parent_size.width) - rc.x;
|
||||
rc.height = static_cast<int>(rc_bottom * in_parent_size.height) - rc.y;
|
||||
out_objects.emplace_back(rc & surface);
|
||||
}
|
||||
}
|
||||
};
|
||||
GAPI_OCV_KERNEL(OCVBBoxes, BBoxes) {
|
||||
// This kernel converts the rectangles into G-API's
|
||||
// rendering primitives
|
||||
static void run(const std::vector<cv::Rect> &in_obj_rcs,
|
||||
std::vector<cv::gapi::wip::draw::Prim> &out_prims) {
|
||||
out_prims.clear();
|
||||
const auto cvt = [](const cv::Rect &rc, const cv::Scalar &clr) {
|
||||
return cv::gapi::wip::draw::Rect(rc, clr, 2);
|
||||
};
|
||||
for (auto &&rc : in_obj_rcs) {
|
||||
out_prims.emplace_back(cvt(rc, CV_RGB(0,255,0))); // green
|
||||
}
|
||||
|
||||
std::cout << "Detections:";
|
||||
for (auto &&rc : in_obj_rcs) std::cout << ' ' << rc;
|
||||
std::cout << std::endl;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace custom
|
||||
|
||||
namespace {
|
||||
void remap_ssd_ports(const std::unordered_map<std::string, cv::Mat> &onnx,
|
||||
std::unordered_map<std::string, cv::Mat> &gapi) {
|
||||
// Assemble ONNX-processed outputs back to a single 1x1x200x7 blob
|
||||
// to preserve compatibility with OpenVINO-based SSD pipeline
|
||||
const cv::Mat &num_detections = onnx.at("num_detections:0");
|
||||
const cv::Mat &detection_boxes = onnx.at("detection_boxes:0");
|
||||
const cv::Mat &detection_scores = onnx.at("detection_scores:0");
|
||||
const cv::Mat &detection_classes = onnx.at("detection_classes:0");
|
||||
|
||||
GAPI_Assert(num_detections.depth() == CV_32F);
|
||||
GAPI_Assert(detection_boxes.depth() == CV_32F);
|
||||
GAPI_Assert(detection_scores.depth() == CV_32F);
|
||||
GAPI_Assert(detection_classes.depth() == CV_32F);
|
||||
|
||||
cv::Mat &ssd_output = gapi.at("detection_output");
|
||||
|
||||
const int num_objects = static_cast<int>(num_detections.ptr<float>()[0]);
|
||||
const float *in_boxes = detection_boxes.ptr<float>();
|
||||
const float *in_scores = detection_scores.ptr<float>();
|
||||
const float *in_classes = detection_classes.ptr<float>();
|
||||
float *ptr = ssd_output.ptr<float>();
|
||||
|
||||
for (int i = 0; i < num_objects; i++) {
|
||||
ptr[0] = 0.f; // "image_id"
|
||||
ptr[1] = in_classes[i]; // "label"
|
||||
ptr[2] = in_scores[i]; // "confidence"
|
||||
ptr[3] = in_boxes[4*i + 1]; // left
|
||||
ptr[4] = in_boxes[4*i + 0]; // top
|
||||
ptr[5] = in_boxes[4*i + 3]; // right
|
||||
ptr[6] = in_boxes[4*i + 2]; // bottom
|
||||
|
||||
ptr += 7;
|
||||
in_boxes += 4;
|
||||
}
|
||||
if (num_objects < ssd_output.size[2]-1) {
|
||||
// put a -1 mark at the end of output blob if there is space left
|
||||
ptr[0] = -1.f;
|
||||
}
|
||||
}
|
||||
} // anonymous namespace
|
||||
|
||||
|
||||
const std::string keys =
|
||||
"{ h help | | Print this help message }"
|
||||
"{ input | | Path to the input video file }"
|
||||
"{ output | | (Optional) path to output video file }"
|
||||
"{ detm | | Path to an ONNX SSD object detection model (.onnx) }"
|
||||
;
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
cv::CommandLineParser cmd(argc, argv, keys);
|
||||
if (cmd.has("help")) {
|
||||
cmd.printMessage();
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Prepare parameters first
|
||||
const std::string input = cmd.get<std::string>("input");
|
||||
const std::string output = cmd.get<std::string>("output");
|
||||
const auto obj_model_path = cmd.get<std::string>("detm");
|
||||
|
||||
auto obj_net = cv::gapi::onnx::Params<custom::ObjDetector>{obj_model_path}
|
||||
.cfgOutputLayers({"detection_output"})
|
||||
.cfgPostProc({cv::GMatDesc{CV_32F, {1,1,200,7}}}, remap_ssd_ports);
|
||||
auto kernels = cv::gapi::kernels< custom::OCVGetSize
|
||||
, custom::OCVParseSSD
|
||||
, custom::OCVBBoxes>();
|
||||
auto networks = cv::gapi::networks(obj_net);
|
||||
|
||||
// Now build the graph
|
||||
cv::GMat in;
|
||||
auto blob = cv::gapi::infer<custom::ObjDetector>(in);
|
||||
auto rcs = custom::ParseSSD::on(blob, custom::GetSize::on(in));
|
||||
auto out = cv::gapi::wip::draw::render3ch(in, custom::BBoxes::on(rcs));
|
||||
cv::GStreamingCompiled pipeline = cv::GComputation(cv::GIn(in), cv::GOut(out))
|
||||
.compileStreaming(cv::compile_args(kernels, networks));
|
||||
|
||||
auto inputs = cv::gin(cv::gapi::wip::make_src<cv::gapi::wip::GCaptureSource>(input));
|
||||
|
||||
// The execution part
|
||||
pipeline.setSource(std::move(inputs));
|
||||
pipeline.start();
|
||||
|
||||
cv::VideoWriter writer;
|
||||
|
||||
cv::Mat outMat;
|
||||
while (pipeline.pull(cv::gout(outMat))) {
|
||||
cv::imshow("Out", outMat);
|
||||
cv::waitKey(1);
|
||||
if (!output.empty()) {
|
||||
if (!writer.isOpened()) {
|
||||
const auto sz = cv::Size{outMat.cols, outMat.rows};
|
||||
writer.open(output, cv::VideoWriter::fourcc('M','J','P','G'), 25.0, sz);
|
||||
CV_Assert(writer.isOpened());
|
||||
}
|
||||
writer << outMat;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
157
modules/gapi/samples/kernel_api_snippets.cpp
Normal file
157
modules/gapi/samples/kernel_api_snippets.cpp
Normal file
@@ -0,0 +1,157 @@
|
||||
// [filter2d_api]
|
||||
#include <opencv2/gapi.hpp>
|
||||
|
||||
G_TYPED_KERNEL(GFilter2D,
|
||||
<cv::GMat(cv::GMat,int,cv::Mat,cv::Point,double,int,cv::Scalar)>,
|
||||
"org.opencv.imgproc.filters.filter2D")
|
||||
{
|
||||
static cv::GMatDesc // outMeta's return value type
|
||||
outMeta(cv::GMatDesc in , // descriptor of input GMat
|
||||
int ddepth , // depth parameter
|
||||
cv::Mat /* coeffs */, // (unused)
|
||||
cv::Point /* anchor */, // (unused)
|
||||
double /* scale */, // (unused)
|
||||
int /* border */, // (unused)
|
||||
cv::Scalar /* bvalue */ ) // (unused)
|
||||
{
|
||||
return in.withDepth(ddepth);
|
||||
}
|
||||
};
|
||||
// [filter2d_api]
|
||||
|
||||
cv::GMat filter2D(cv::GMat ,
|
||||
int ,
|
||||
cv::Mat ,
|
||||
cv::Point ,
|
||||
double ,
|
||||
int ,
|
||||
cv::Scalar);
|
||||
|
||||
// [filter2d_wrap]
|
||||
cv::GMat filter2D(cv::GMat in,
|
||||
int ddepth,
|
||||
cv::Mat k,
|
||||
cv::Point anchor = cv::Point(-1,-1),
|
||||
double scale = 0.,
|
||||
int border = cv::BORDER_DEFAULT,
|
||||
cv::Scalar bval = cv::Scalar(0))
|
||||
{
|
||||
return GFilter2D::on(in, ddepth, k, anchor, scale, border, bval);
|
||||
}
|
||||
// [filter2d_wrap]
|
||||
|
||||
// [compound]
|
||||
#include <opencv2/gapi/gcompoundkernel.hpp> // GAPI_COMPOUND_KERNEL()
|
||||
|
||||
using PointArray2f = cv::GArray<cv::Point2f>;
|
||||
|
||||
G_TYPED_KERNEL(HarrisCorners,
|
||||
<PointArray2f(cv::GMat,int,double,double,int,double)>,
|
||||
"org.opencv.imgproc.harris_corner")
|
||||
{
|
||||
static cv::GArrayDesc outMeta(const cv::GMatDesc &,
|
||||
int,
|
||||
double,
|
||||
double,
|
||||
int,
|
||||
double)
|
||||
{
|
||||
// No special metadata for arrays in G-API (yet)
|
||||
return cv::empty_array_desc();
|
||||
}
|
||||
};
|
||||
|
||||
// Define Fluid-backend-local kernels which form GoodFeatures
|
||||
G_TYPED_KERNEL(HarrisResponse,
|
||||
<cv::GMat(cv::GMat,double,int,double)>,
|
||||
"org.opencv.fluid.harris_response")
|
||||
{
|
||||
static cv::GMatDesc outMeta(const cv::GMatDesc &in,
|
||||
double,
|
||||
int,
|
||||
double)
|
||||
{
|
||||
return in.withType(CV_32F, 1);
|
||||
}
|
||||
};
|
||||
|
||||
G_TYPED_KERNEL(ArrayNMS,
|
||||
<PointArray2f(cv::GMat,int,double)>,
|
||||
"org.opencv.cpu.nms_array")
|
||||
{
|
||||
static cv::GArrayDesc outMeta(const cv::GMatDesc &,
|
||||
int,
|
||||
double)
|
||||
{
|
||||
return cv::empty_array_desc();
|
||||
}
|
||||
};
|
||||
|
||||
GAPI_COMPOUND_KERNEL(GFluidHarrisCorners, HarrisCorners)
|
||||
{
|
||||
static PointArray2f
|
||||
expand(cv::GMat in,
|
||||
int maxCorners,
|
||||
double quality,
|
||||
double minDist,
|
||||
int blockSize,
|
||||
double k)
|
||||
{
|
||||
cv::GMat response = HarrisResponse::on(in, quality, blockSize, k);
|
||||
return ArrayNMS::on(response, maxCorners, minDist);
|
||||
}
|
||||
};
|
||||
|
||||
// Then implement HarrisResponse as Fluid kernel and NMSresponse
|
||||
// as a generic (OpenCV) kernel
|
||||
// [compound]
|
||||
|
||||
// [filter2d_ocv]
|
||||
#include <opencv2/gapi/cpu/gcpukernel.hpp> // GAPI_OCV_KERNEL()
|
||||
#include <opencv2/imgproc.hpp> // cv::filter2D()
|
||||
|
||||
GAPI_OCV_KERNEL(GCPUFilter2D, GFilter2D)
|
||||
{
|
||||
static void
|
||||
run(const cv::Mat &in, // in - derived from GMat
|
||||
const int ddepth, // opaque (passed as-is)
|
||||
const cv::Mat &k, // opaque (passed as-is)
|
||||
const cv::Point &anchor, // opaque (passed as-is)
|
||||
const double delta, // opaque (passed as-is)
|
||||
const int border, // opaque (passed as-is)
|
||||
const cv::Scalar &, // opaque (passed as-is)
|
||||
cv::Mat &out) // out - derived from GMat (retval)
|
||||
{
|
||||
cv::filter2D(in, out, ddepth, k, anchor, delta, border);
|
||||
}
|
||||
};
|
||||
// [filter2d_ocv]
|
||||
|
||||
int main(int, char *[])
|
||||
{
|
||||
std::cout << "This sample is non-complete. It is used as code snippents in documentation." << std::endl;
|
||||
|
||||
cv::Mat conv_kernel_mat;
|
||||
|
||||
{
|
||||
// [filter2d_on]
|
||||
cv::GMat in;
|
||||
cv::GMat out = GFilter2D::on(/* GMat */ in,
|
||||
/* int */ -1,
|
||||
/* Mat */ conv_kernel_mat,
|
||||
/* Point */ cv::Point(-1,-1),
|
||||
/* double */ 0.,
|
||||
/* int */ cv::BORDER_DEFAULT,
|
||||
/* Scalar */ cv::Scalar(0));
|
||||
// [filter2d_on]
|
||||
}
|
||||
|
||||
{
|
||||
// [filter2d_wrap_call]
|
||||
cv::GMat in;
|
||||
cv::GMat out = filter2D(in, -1, conv_kernel_mat);
|
||||
// [filter2d_wrap_call]
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
216
modules/gapi/samples/privacy_masking_camera.cpp
Normal file
216
modules/gapi/samples/privacy_masking_camera.cpp
Normal file
@@ -0,0 +1,216 @@
|
||||
#include <algorithm>
|
||||
#include <iostream>
|
||||
#include <cctype>
|
||||
|
||||
#include <opencv2/imgproc.hpp>
|
||||
#include <opencv2/imgcodecs.hpp>
|
||||
#include <opencv2/gapi.hpp>
|
||||
#include <opencv2/gapi/core.hpp>
|
||||
#include <opencv2/gapi/imgproc.hpp>
|
||||
#include <opencv2/gapi/infer.hpp>
|
||||
#include <opencv2/gapi/render.hpp>
|
||||
#include <opencv2/gapi/infer/ie.hpp>
|
||||
#include <opencv2/gapi/cpu/gcpukernel.hpp>
|
||||
#include <opencv2/gapi/streaming/cap.hpp>
|
||||
#include <opencv2/highgui.hpp>
|
||||
|
||||
const std::string about =
|
||||
"This is an OpenCV-based version of Privacy Masking Camera example";
|
||||
const std::string keys =
|
||||
"{ h help | | Print this help message }"
|
||||
"{ input | | Path to the input video file }"
|
||||
"{ platm | vehicle-license-plate-detection-barrier-0106.xml | Path to OpenVINO IE vehicle/plate detection model (.xml) }"
|
||||
"{ platd | CPU | Target device for vehicle/plate detection model (e.g. CPU, GPU, VPU, ...) }"
|
||||
"{ facem | face-detection-retail-0005.xml | Path to OpenVINO IE face detection model (.xml) }"
|
||||
"{ faced | CPU | Target device for face detection model (e.g. CPU, GPU, VPU, ...) }"
|
||||
"{ trad | false | Run processing in a traditional (non-pipelined) way }"
|
||||
"{ noshow | false | Don't display UI (improves performance) }";
|
||||
|
||||
namespace {
|
||||
|
||||
std::string weights_path(const std::string &model_path) {
|
||||
const auto EXT_LEN = 4u;
|
||||
const auto sz = model_path.size();
|
||||
CV_Assert(sz > EXT_LEN);
|
||||
|
||||
auto ext = model_path.substr(sz - EXT_LEN);
|
||||
|
||||
std::transform(ext.begin(), ext.end(), ext.begin(), [](unsigned char c){ return static_cast<unsigned char>(std::tolower(c)); });
|
||||
CV_Assert(ext == ".xml");
|
||||
|
||||
return model_path.substr(0u, sz - EXT_LEN) + ".bin";
|
||||
}
|
||||
} // namespace
|
||||
|
||||
namespace custom {
|
||||
|
||||
G_API_NET(VehLicDetector, <cv::GMat(cv::GMat)>, "vehicle-license-plate-detector");
|
||||
G_API_NET(FaceDetector, <cv::GMat(cv::GMat)>, "face-detector");
|
||||
|
||||
using GDetections = cv::GArray<cv::Rect>;
|
||||
|
||||
G_API_OP(ParseSSD, <GDetections(cv::GMat, cv::GMat, int)>, "custom.privacy_masking.postproc") {
|
||||
static cv::GArrayDesc outMeta(const cv::GMatDesc &, const cv::GMatDesc &, int) {
|
||||
return cv::empty_array_desc();
|
||||
}
|
||||
};
|
||||
|
||||
using GPrims = cv::GArray<cv::gapi::wip::draw::Prim>;
|
||||
|
||||
G_API_OP(ToMosaic, <GPrims(GDetections, GDetections)>, "custom.privacy_masking.to_mosaic") {
|
||||
static cv::GArrayDesc outMeta(const cv::GArrayDesc &, const cv::GArrayDesc &) {
|
||||
return cv::empty_array_desc();
|
||||
}
|
||||
};
|
||||
|
||||
GAPI_OCV_KERNEL(OCVParseSSD, ParseSSD) {
|
||||
static void run(const cv::Mat &in_ssd_result,
|
||||
const cv::Mat &in_frame,
|
||||
const int filter_label,
|
||||
std::vector<cv::Rect> &out_objects) {
|
||||
const auto &in_ssd_dims = in_ssd_result.size;
|
||||
CV_Assert(in_ssd_dims.dims() == 4u);
|
||||
|
||||
const int MAX_PROPOSALS = in_ssd_dims[2];
|
||||
const int OBJECT_SIZE = in_ssd_dims[3];
|
||||
CV_Assert(OBJECT_SIZE == 7); // fixed SSD object size
|
||||
|
||||
const cv::Size upscale = in_frame.size();
|
||||
const cv::Rect surface({0,0}, upscale);
|
||||
|
||||
out_objects.clear();
|
||||
|
||||
const float *data = in_ssd_result.ptr<float>();
|
||||
for (int i = 0; i < MAX_PROPOSALS; i++) {
|
||||
const float image_id = data[i * OBJECT_SIZE + 0];
|
||||
const float label = data[i * OBJECT_SIZE + 1];
|
||||
const float confidence = data[i * OBJECT_SIZE + 2];
|
||||
const float rc_left = data[i * OBJECT_SIZE + 3];
|
||||
const float rc_top = data[i * OBJECT_SIZE + 4];
|
||||
const float rc_right = data[i * OBJECT_SIZE + 5];
|
||||
const float rc_bottom = data[i * OBJECT_SIZE + 6];
|
||||
|
||||
if (image_id < 0.f) {
|
||||
break; // marks end-of-detections
|
||||
}
|
||||
if (confidence < 0.5f) {
|
||||
continue; // skip objects with low confidence
|
||||
}
|
||||
if (filter_label != -1 && static_cast<int>(label) != filter_label) {
|
||||
continue; // filter out object classes if filter is specified
|
||||
}
|
||||
|
||||
cv::Rect rc; // map relative coordinates to the original image scale
|
||||
rc.x = static_cast<int>(rc_left * upscale.width);
|
||||
rc.y = static_cast<int>(rc_top * upscale.height);
|
||||
rc.width = static_cast<int>(rc_right * upscale.width) - rc.x;
|
||||
rc.height = static_cast<int>(rc_bottom * upscale.height) - rc.y;
|
||||
out_objects.emplace_back(rc & surface);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
GAPI_OCV_KERNEL(OCVToMosaic, ToMosaic) {
|
||||
static void run(const std::vector<cv::Rect> &in_plate_rcs,
|
||||
const std::vector<cv::Rect> &in_face_rcs,
|
||||
std::vector<cv::gapi::wip::draw::Prim> &out_prims) {
|
||||
out_prims.clear();
|
||||
const auto cvt = [](cv::Rect rc) {
|
||||
// Align the mosaic region to mosaic block size
|
||||
const int BLOCK_SIZE = 24;
|
||||
const int dw = BLOCK_SIZE - (rc.width % BLOCK_SIZE);
|
||||
const int dh = BLOCK_SIZE - (rc.height % BLOCK_SIZE);
|
||||
rc.width += dw;
|
||||
rc.height += dh;
|
||||
rc.x -= dw / 2;
|
||||
rc.y -= dh / 2;
|
||||
return cv::gapi::wip::draw::Mosaic{rc, BLOCK_SIZE, 0};
|
||||
};
|
||||
for (auto &&rc : in_plate_rcs) { out_prims.emplace_back(cvt(rc)); }
|
||||
for (auto &&rc : in_face_rcs) { out_prims.emplace_back(cvt(rc)); }
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace custom
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
cv::CommandLineParser cmd(argc, argv, keys);
|
||||
cmd.about(about);
|
||||
if (cmd.has("help")) {
|
||||
cmd.printMessage();
|
||||
return 0;
|
||||
}
|
||||
const std::string input = cmd.get<std::string>("input");
|
||||
const bool no_show = cmd.get<bool>("noshow");
|
||||
const bool run_trad = cmd.get<bool>("trad");
|
||||
|
||||
cv::GMat in;
|
||||
cv::GMat blob_plates = cv::gapi::infer<custom::VehLicDetector>(in);
|
||||
cv::GMat blob_faces = cv::gapi::infer<custom::FaceDetector>(in);
|
||||
// VehLicDetector from Open Model Zoo marks vehicles with label "1" and
|
||||
// license plates with label "2", filter out license plates only.
|
||||
cv::GArray<cv::Rect> rc_plates = custom::ParseSSD::on(blob_plates, in, 2);
|
||||
// Face detector produces faces only so there's no need to filter by label,
|
||||
// pass "-1".
|
||||
cv::GArray<cv::Rect> rc_faces = custom::ParseSSD::on(blob_faces, in, -1);
|
||||
cv::GMat out = cv::gapi::wip::draw::render3ch(in, custom::ToMosaic::on(rc_plates, rc_faces));
|
||||
cv::GComputation graph(in, out);
|
||||
|
||||
const auto plate_model_path = cmd.get<std::string>("platm");
|
||||
auto plate_net = cv::gapi::ie::Params<custom::VehLicDetector> {
|
||||
plate_model_path, // path to topology IR
|
||||
weights_path(plate_model_path), // path to weights
|
||||
cmd.get<std::string>("platd"), // device specifier
|
||||
};
|
||||
const auto face_model_path = cmd.get<std::string>("facem");
|
||||
auto face_net = cv::gapi::ie::Params<custom::FaceDetector> {
|
||||
face_model_path, // path to topology IR
|
||||
weights_path(face_model_path), // path to weights
|
||||
cmd.get<std::string>("faced"), // device specifier
|
||||
};
|
||||
auto kernels = cv::gapi::kernels<custom::OCVParseSSD, custom::OCVToMosaic>();
|
||||
auto networks = cv::gapi::networks(plate_net, face_net);
|
||||
|
||||
cv::TickMeter tm;
|
||||
cv::Mat out_frame;
|
||||
std::size_t frames = 0u;
|
||||
std::cout << "Reading " << input << std::endl;
|
||||
|
||||
if (run_trad) {
|
||||
cv::Mat in_frame;
|
||||
cv::VideoCapture cap(input);
|
||||
cap >> in_frame;
|
||||
|
||||
auto exec = graph.compile(cv::descr_of(in_frame), cv::compile_args(kernels, networks));
|
||||
tm.start();
|
||||
do {
|
||||
exec(in_frame, out_frame);
|
||||
if (!no_show) {
|
||||
cv::imshow("Out", out_frame);
|
||||
cv::waitKey(1);
|
||||
}
|
||||
frames++;
|
||||
} while (cap.read(in_frame));
|
||||
tm.stop();
|
||||
} else {
|
||||
auto pipeline = graph.compileStreaming(cv::compile_args(kernels, networks));
|
||||
pipeline.setSource(cv::gapi::wip::make_src<cv::gapi::wip::GCaptureSource>(input));
|
||||
pipeline.start();
|
||||
tm.start();
|
||||
|
||||
while (pipeline.pull(cv::gout(out_frame))) {
|
||||
frames++;
|
||||
if (!no_show) {
|
||||
cv::imshow("Out", out_frame);
|
||||
cv::waitKey(1);
|
||||
}
|
||||
}
|
||||
|
||||
tm.stop();
|
||||
}
|
||||
|
||||
std::cout << "Processed " << frames << " frames"
|
||||
<< " (" << frames / tm.getTimeSec() << " FPS)" << std::endl;
|
||||
return 0;
|
||||
}
|
||||
133
modules/gapi/samples/semantic_segmentation.cpp
Normal file
133
modules/gapi/samples/semantic_segmentation.cpp
Normal file
@@ -0,0 +1,133 @@
|
||||
#include <opencv2/imgproc.hpp>
|
||||
#include <opencv2/gapi/infer/ie.hpp>
|
||||
#include <opencv2/gapi/cpu/gcpukernel.hpp>
|
||||
#include <opencv2/gapi/streaming/cap.hpp>
|
||||
#include <opencv2/highgui.hpp>
|
||||
|
||||
const std::string keys =
|
||||
"{ h help | | Print this help message }"
|
||||
"{ input | | Path to the input video file }"
|
||||
"{ output | | Path to the output video file }"
|
||||
"{ ssm | semantic-segmentation-adas-0001.xml | Path to OpenVINO IE semantic segmentation model (.xml) }";
|
||||
|
||||
// 20 colors for 20 classes of semantic-segmentation-adas-0001
|
||||
const std::vector<cv::Vec3b> colors = {
|
||||
{ 128, 64, 128 },
|
||||
{ 232, 35, 244 },
|
||||
{ 70, 70, 70 },
|
||||
{ 156, 102, 102 },
|
||||
{ 153, 153, 190 },
|
||||
{ 153, 153, 153 },
|
||||
{ 30, 170, 250 },
|
||||
{ 0, 220, 220 },
|
||||
{ 35, 142, 107 },
|
||||
{ 152, 251, 152 },
|
||||
{ 180, 130, 70 },
|
||||
{ 60, 20, 220 },
|
||||
{ 0, 0, 255 },
|
||||
{ 142, 0, 0 },
|
||||
{ 70, 0, 0 },
|
||||
{ 100, 60, 0 },
|
||||
{ 90, 0, 0 },
|
||||
{ 230, 0, 0 },
|
||||
{ 32, 11, 119 },
|
||||
{ 0, 74, 111 },
|
||||
};
|
||||
|
||||
namespace {
|
||||
std::string get_weights_path(const std::string &model_path) {
|
||||
const auto EXT_LEN = 4u;
|
||||
const auto sz = model_path.size();
|
||||
CV_Assert(sz > EXT_LEN);
|
||||
|
||||
auto ext = model_path.substr(sz - EXT_LEN);
|
||||
std::transform(ext.begin(), ext.end(), ext.begin(), [](unsigned char c){
|
||||
return static_cast<unsigned char>(std::tolower(c));
|
||||
});
|
||||
CV_Assert(ext == ".xml");
|
||||
return model_path.substr(0u, sz - EXT_LEN) + ".bin";
|
||||
}
|
||||
} // anonymous namespace
|
||||
|
||||
namespace custom {
|
||||
G_API_OP(PostProcessing, <cv::GMat(cv::GMat, cv::GMat)>, "sample.custom.post_processing") {
|
||||
static cv::GMatDesc outMeta(const cv::GMatDesc &in, const cv::GMatDesc &) {
|
||||
return in;
|
||||
}
|
||||
};
|
||||
|
||||
GAPI_OCV_KERNEL(OCVPostProcessing, PostProcessing) {
|
||||
static void run(const cv::Mat &in, const cv::Mat &detected_classes, cv::Mat &out) {
|
||||
// This kernel constructs output image by class table and colors vector
|
||||
|
||||
// The semantic-segmentation-adas-0001 output a blob with the shape
|
||||
// [B, C=1, H=1024, W=2048]
|
||||
const int outHeight = 1024;
|
||||
const int outWidth = 2048;
|
||||
cv::Mat maskImg(outHeight, outWidth, CV_8UC3);
|
||||
const int* const classes = detected_classes.ptr<int>();
|
||||
for (int rowId = 0; rowId < outHeight; ++rowId) {
|
||||
for (int colId = 0; colId < outWidth; ++colId) {
|
||||
size_t classId = static_cast<size_t>(classes[rowId * outWidth + colId]);
|
||||
maskImg.at<cv::Vec3b>(rowId, colId) =
|
||||
classId < colors.size()
|
||||
? colors[classId]
|
||||
: cv::Vec3b{0, 0, 0}; // sample detects 20 classes
|
||||
}
|
||||
}
|
||||
cv::resize(maskImg, out, in.size());
|
||||
const float blending = 0.3f;
|
||||
out = in * blending + out * (1 - blending);
|
||||
}
|
||||
};
|
||||
} // namespace custom
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
cv::CommandLineParser cmd(argc, argv, keys);
|
||||
if (cmd.has("help")) {
|
||||
cmd.printMessage();
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Prepare parameters first
|
||||
const std::string input = cmd.get<std::string>("input");
|
||||
const std::string output = cmd.get<std::string>("output");
|
||||
const auto model_path = cmd.get<std::string>("ssm");
|
||||
const auto weights_path = get_weights_path(model_path);
|
||||
const auto device = "CPU";
|
||||
G_API_NET(SemSegmNet, <cv::GMat(cv::GMat)>, "semantic-segmentation");
|
||||
const auto net = cv::gapi::ie::Params<SemSegmNet> {
|
||||
model_path, weights_path, device
|
||||
};
|
||||
const auto kernels = cv::gapi::kernels<custom::OCVPostProcessing>();
|
||||
const auto networks = cv::gapi::networks(net);
|
||||
|
||||
// Now build the graph
|
||||
cv::GMat in;
|
||||
cv::GMat detected_classes = cv::gapi::infer<SemSegmNet>(in);
|
||||
cv::GMat out = custom::PostProcessing::on(in, detected_classes);
|
||||
|
||||
cv::GStreamingCompiled pipeline = cv::GComputation(cv::GIn(in), cv::GOut(out))
|
||||
.compileStreaming(cv::compile_args(kernels, networks));
|
||||
auto inputs = cv::gin(cv::gapi::wip::make_src<cv::gapi::wip::GCaptureSource>(input));
|
||||
|
||||
// The execution part
|
||||
pipeline.setSource(std::move(inputs));
|
||||
pipeline.start();
|
||||
|
||||
cv::VideoWriter writer;
|
||||
cv::Mat outMat;
|
||||
while (pipeline.pull(cv::gout(outMat))) {
|
||||
cv::imshow("Out", outMat);
|
||||
cv::waitKey(1);
|
||||
if (!output.empty()) {
|
||||
if (!writer.isOpened()) {
|
||||
const auto sz = cv::Size{outMat.cols, outMat.rows};
|
||||
writer.open(output, cv::VideoWriter::fourcc('M','J','P','G'), 25.0, sz);
|
||||
CV_Assert(writer.isOpened());
|
||||
}
|
||||
writer << outMat;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
19
modules/gapi/samples/slides_blur_gapi.cpp
Normal file
19
modules/gapi/samples/slides_blur_gapi.cpp
Normal file
@@ -0,0 +1,19 @@
|
||||
#include <opencv2/gapi.hpp> // G-API framework header
|
||||
#include <opencv2/gapi/imgproc.hpp> // cv::gapi::blur()
|
||||
#include <opencv2/highgui.hpp> // cv::imread/imwrite
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
if (argc < 3) return 1;
|
||||
|
||||
cv::GMat in; // Express the graph:
|
||||
cv::GMat out = cv::gapi::blur(in, cv::Size(3,3)); // `out` is a result of `blur` of `in`
|
||||
|
||||
cv::Mat in_mat = cv::imread(argv[1]); // Get the real data
|
||||
cv::Mat out_mat; // Output buffer (may be empty)
|
||||
|
||||
cv::GComputation(cv::GIn(in), cv::GOut(out)) // Declare a graph from `in` to `out`
|
||||
.apply(cv::gin(in_mat), cv::gout(out_mat)); // ...and run it immediately
|
||||
|
||||
cv::imwrite(argv[2], out_mat); // Save the result
|
||||
return 0;
|
||||
}
|
||||
27
modules/gapi/samples/slides_sobel_cv.cpp
Normal file
27
modules/gapi/samples/slides_sobel_cv.cpp
Normal file
@@ -0,0 +1,27 @@
|
||||
#include <opencv2/videoio.hpp>
|
||||
#include <opencv2/highgui.hpp>
|
||||
#include <opencv2/gapi.hpp>
|
||||
#include <opencv2/gapi/core.hpp>
|
||||
#include <opencv2/gapi/imgproc.hpp>
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
(void) argc;
|
||||
(void) argv;
|
||||
|
||||
using namespace cv;
|
||||
Mat in_mat = imread("lena.png");
|
||||
Mat gx, gy;
|
||||
|
||||
Sobel(in_mat, gx, CV_32F, 1, 0);
|
||||
Sobel(in_mat, gy, CV_32F, 0, 1);
|
||||
|
||||
Mat mag;
|
||||
sqrt(gx.mul(gx) + gy.mul(gy), mag);
|
||||
|
||||
Mat out_mat;
|
||||
mag.convertTo(out_mat, CV_8U);
|
||||
|
||||
imwrite("lena-out.png", out_mat);
|
||||
return 0;
|
||||
}
|
||||
28
modules/gapi/samples/slides_sobel_gapi.cpp
Normal file
28
modules/gapi/samples/slides_sobel_gapi.cpp
Normal file
@@ -0,0 +1,28 @@
|
||||
#include <opencv2/videoio.hpp>
|
||||
#include <opencv2/highgui.hpp>
|
||||
#include <opencv2/gapi.hpp>
|
||||
#include <opencv2/gapi/core.hpp>
|
||||
#include <opencv2/gapi/imgproc.hpp>
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
(void) argc;
|
||||
(void) argv;
|
||||
|
||||
using namespace cv;
|
||||
Mat in_mat = imread("lena.png");
|
||||
Mat out_mat;
|
||||
|
||||
GMat in;
|
||||
GMat gx = gapi::Sobel(in, CV_32F, 1, 0);
|
||||
GMat gy = gapi::Sobel(in, CV_32F, 0, 1);
|
||||
GMat mag = gapi::sqrt( gapi::mul(gx, gx)
|
||||
+ gapi::mul(gy, gy));
|
||||
GMat out = gapi::convertTo(mag, CV_8U);
|
||||
|
||||
GComputation sobel(GIn(in), GOut(out));
|
||||
sobel.apply(in_mat, out_mat);
|
||||
|
||||
imwrite("lena-out.png", out_mat);
|
||||
return 0;
|
||||
}
|
||||
698
modules/gapi/samples/text_detection.cpp
Normal file
698
modules/gapi/samples/text_detection.cpp
Normal file
@@ -0,0 +1,698 @@
|
||||
#include <algorithm>
|
||||
#include <cctype>
|
||||
#include <cmath>
|
||||
#include <iostream>
|
||||
#include <limits>
|
||||
#include <numeric>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include <opencv2/gapi.hpp>
|
||||
#include <opencv2/gapi/core.hpp>
|
||||
#include <opencv2/gapi/cpu/gcpukernel.hpp>
|
||||
#include <opencv2/gapi/infer.hpp>
|
||||
#include <opencv2/gapi/infer/ie.hpp>
|
||||
#include <opencv2/gapi/streaming/cap.hpp>
|
||||
|
||||
#include <opencv2/highgui.hpp>
|
||||
#include <opencv2/core/utility.hpp>
|
||||
|
||||
const std::string about =
|
||||
"This is an OpenCV-based version of OMZ Text Detection example";
|
||||
const std::string keys =
|
||||
"{ h help | | Print this help message }"
|
||||
"{ input | | Path to the input video file }"
|
||||
"{ tdm | text-detection-0004.xml | Path to OpenVINO text detection model (.xml), versions 0003 and 0004 work }"
|
||||
"{ tdd | CPU | Target device for the text detector (e.g. CPU, GPU, VPU, ...) }"
|
||||
"{ trm | text-recognition-0012.xml | Path to OpenVINO text recognition model (.xml) }"
|
||||
"{ trd | CPU | Target device for the text recognition (e.g. CPU, GPU, VPU, ...) }"
|
||||
"{ bw | 0 | CTC beam search decoder bandwidth, if 0, a CTC greedy decoder is used}"
|
||||
"{ sset | 0123456789abcdefghijklmnopqrstuvwxyz | Symbol set to use with text recognition decoder. Shouldn't contain symbol #. }"
|
||||
"{ thr | 0.2 | Text recognition confidence threshold}"
|
||||
;
|
||||
|
||||
namespace {
|
||||
std::string weights_path(const std::string &model_path) {
|
||||
const auto EXT_LEN = 4u;
|
||||
const auto sz = model_path.size();
|
||||
CV_Assert(sz > EXT_LEN);
|
||||
|
||||
const auto ext = model_path.substr(sz - EXT_LEN);
|
||||
CV_Assert(cv::toLowerCase(ext) == ".xml");
|
||||
return model_path.substr(0u, sz - EXT_LEN) + ".bin";
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// Taken from OMZ samples as-is
|
||||
template<typename Iter>
|
||||
void softmax_and_choose(Iter begin, Iter end, int *argmax, float *prob) {
|
||||
auto max_element = std::max_element(begin, end);
|
||||
*argmax = static_cast<int>(std::distance(begin, max_element));
|
||||
float max_val = *max_element;
|
||||
double sum = 0;
|
||||
for (auto i = begin; i != end; i++) {
|
||||
sum += std::exp((*i) - max_val);
|
||||
}
|
||||
if (std::fabs(sum) < std::numeric_limits<double>::epsilon()) {
|
||||
throw std::logic_error("sum can't be equal to zero");
|
||||
}
|
||||
*prob = 1.0f / static_cast<float>(sum);
|
||||
}
|
||||
|
||||
template<typename Iter>
|
||||
std::vector<float> softmax(Iter begin, Iter end) {
|
||||
std::vector<float> prob(end - begin, 0.f);
|
||||
std::transform(begin, end, prob.begin(), [](float x) { return std::exp(x); });
|
||||
float sum = std::accumulate(prob.begin(), prob.end(), 0.0f);
|
||||
for (int i = 0; i < static_cast<int>(prob.size()); i++)
|
||||
prob[i] /= sum;
|
||||
return prob;
|
||||
}
|
||||
|
||||
struct BeamElement {
|
||||
std::vector<int> sentence; //!< The sequence of chars that will be a result of the beam element
|
||||
|
||||
float prob_blank; //!< The probability that the last char in CTC sequence
|
||||
//!< for the beam element is the special blank char
|
||||
|
||||
float prob_not_blank; //!< The probability that the last char in CTC sequence
|
||||
//!< for the beam element is NOT the special blank char
|
||||
|
||||
float prob() const { //!< The probability of the beam element.
|
||||
return prob_blank + prob_not_blank;
|
||||
}
|
||||
};
|
||||
|
||||
std::string CTCGreedyDecoder(const float *data,
|
||||
const std::size_t sz,
|
||||
const std::string &alphabet,
|
||||
const char pad_symbol,
|
||||
double *conf) {
|
||||
std::string res = "";
|
||||
bool prev_pad = false;
|
||||
*conf = 1;
|
||||
|
||||
const auto num_classes = alphabet.length();
|
||||
for (auto it = data; it != (data+sz); it += num_classes) {
|
||||
int argmax = 0;
|
||||
float prob = 0.f;
|
||||
|
||||
softmax_and_choose(it, it + num_classes, &argmax, &prob);
|
||||
(*conf) *= prob;
|
||||
|
||||
auto symbol = alphabet[argmax];
|
||||
if (symbol != pad_symbol) {
|
||||
if (res.empty() || prev_pad || (!res.empty() && symbol != res.back())) {
|
||||
prev_pad = false;
|
||||
res += symbol;
|
||||
}
|
||||
} else {
|
||||
prev_pad = true;
|
||||
}
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
std::string CTCBeamSearchDecoder(const float *data,
|
||||
const std::size_t sz,
|
||||
const std::string &alphabet,
|
||||
double *conf,
|
||||
int bandwidth) {
|
||||
const auto num_classes = alphabet.length();
|
||||
|
||||
std::vector<BeamElement> curr;
|
||||
std::vector<BeamElement> last;
|
||||
|
||||
last.push_back(BeamElement{std::vector<int>(), 1.f, 0.f});
|
||||
|
||||
for (auto it = data; it != (data+sz); it += num_classes) {
|
||||
curr.clear();
|
||||
|
||||
std::vector<float> prob = softmax(it, it + num_classes);
|
||||
|
||||
for(const auto& candidate: last) {
|
||||
float prob_not_blank = 0.f;
|
||||
const std::vector<int>& candidate_sentence = candidate.sentence;
|
||||
if (!candidate_sentence.empty()) {
|
||||
int n = candidate_sentence.back();
|
||||
prob_not_blank = candidate.prob_not_blank * prob[n];
|
||||
}
|
||||
float prob_blank = candidate.prob() * prob[num_classes - 1];
|
||||
|
||||
auto check_res = std::find_if(curr.begin(),
|
||||
curr.end(),
|
||||
[&candidate_sentence](const BeamElement& n) {
|
||||
return n.sentence == candidate_sentence;
|
||||
});
|
||||
if (check_res == std::end(curr)) {
|
||||
curr.push_back(BeamElement{candidate.sentence, prob_blank, prob_not_blank});
|
||||
} else {
|
||||
check_res->prob_not_blank += prob_not_blank;
|
||||
if (check_res->prob_blank != 0.f) {
|
||||
throw std::logic_error("Probability that the last char in CTC-sequence "
|
||||
"is the special blank char must be zero here");
|
||||
}
|
||||
check_res->prob_blank = prob_blank;
|
||||
}
|
||||
|
||||
for (int i = 0; i < static_cast<int>(num_classes) - 1; i++) {
|
||||
auto extend = candidate_sentence;
|
||||
extend.push_back(i);
|
||||
|
||||
if (candidate_sentence.size() > 0 && candidate.sentence.back() == i) {
|
||||
prob_not_blank = prob[i] * candidate.prob_blank;
|
||||
} else {
|
||||
prob_not_blank = prob[i] * candidate.prob();
|
||||
}
|
||||
|
||||
auto check_res2 = std::find_if(curr.begin(),
|
||||
curr.end(),
|
||||
[&extend](const BeamElement &n) {
|
||||
return n.sentence == extend;
|
||||
});
|
||||
if (check_res2 == std::end(curr)) {
|
||||
curr.push_back(BeamElement{extend, 0.f, prob_not_blank});
|
||||
} else {
|
||||
check_res2->prob_not_blank += prob_not_blank;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
sort(curr.begin(), curr.end(), [](const BeamElement &a, const BeamElement &b) -> bool {
|
||||
return a.prob() > b.prob();
|
||||
});
|
||||
|
||||
last.clear();
|
||||
int num_to_copy = std::min(bandwidth, static_cast<int>(curr.size()));
|
||||
for (int b = 0; b < num_to_copy; b++) {
|
||||
last.push_back(curr[b]);
|
||||
}
|
||||
}
|
||||
|
||||
*conf = last[0].prob();
|
||||
std::string res="";
|
||||
for (const auto& idx: last[0].sentence) {
|
||||
res += alphabet[idx];
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
} // anonymous namespace
|
||||
|
||||
namespace custom {
|
||||
namespace {
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// Define networks for this sample
|
||||
using GMat2 = std::tuple<cv::GMat, cv::GMat>;
|
||||
G_API_NET(TextDetection,
|
||||
<GMat2(cv::GMat)>,
|
||||
"sample.custom.text_detect");
|
||||
|
||||
G_API_NET(TextRecognition,
|
||||
<cv::GMat(cv::GMat)>,
|
||||
"sample.custom.text_recogn");
|
||||
|
||||
// Define custom operations
|
||||
using GSize = cv::GOpaque<cv::Size>;
|
||||
using GRRects = cv::GArray<cv::RotatedRect>;
|
||||
G_API_OP(PostProcess,
|
||||
<GRRects(cv::GMat,cv::GMat,GSize,float,float)>,
|
||||
"sample.custom.text.post_proc") {
|
||||
static cv::GArrayDesc outMeta(const cv::GMatDesc &,
|
||||
const cv::GMatDesc &,
|
||||
const cv::GOpaqueDesc &,
|
||||
float,
|
||||
float) {
|
||||
return cv::empty_array_desc();
|
||||
}
|
||||
};
|
||||
|
||||
using GMats = cv::GArray<cv::GMat>;
|
||||
G_API_OP(CropLabels,
|
||||
<GMats(cv::GMat,GRRects,GSize)>,
|
||||
"sample.custom.text.crop") {
|
||||
static cv::GArrayDesc outMeta(const cv::GMatDesc &,
|
||||
const cv::GArrayDesc &,
|
||||
const cv::GOpaqueDesc &) {
|
||||
return cv::empty_array_desc();
|
||||
}
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// Implement custom operations
|
||||
GAPI_OCV_KERNEL(OCVPostProcess, PostProcess) {
|
||||
static void run(const cv::Mat &link,
|
||||
const cv::Mat &segm,
|
||||
const cv::Size &img_size,
|
||||
const float link_threshold,
|
||||
const float segm_threshold,
|
||||
std::vector<cv::RotatedRect> &out) {
|
||||
// NOTE: Taken from the OMZ text detection sample almost as-is
|
||||
const int kMinArea = 300;
|
||||
const int kMinHeight = 10;
|
||||
|
||||
const float *link_data_pointer = link.ptr<float>();
|
||||
std::vector<float> link_data(link_data_pointer, link_data_pointer + link.total());
|
||||
link_data = transpose4d(link_data, dimsToShape(link.size), {0, 2, 3, 1});
|
||||
softmax(link_data);
|
||||
link_data = sliceAndGetSecondChannel(link_data);
|
||||
std::vector<int> new_link_data_shape = {
|
||||
link.size[0],
|
||||
link.size[2],
|
||||
link.size[3],
|
||||
link.size[1]/2,
|
||||
};
|
||||
|
||||
const float *cls_data_pointer = segm.ptr<float>();
|
||||
std::vector<float> cls_data(cls_data_pointer, cls_data_pointer + segm.total());
|
||||
cls_data = transpose4d(cls_data, dimsToShape(segm.size), {0, 2, 3, 1});
|
||||
softmax(cls_data);
|
||||
cls_data = sliceAndGetSecondChannel(cls_data);
|
||||
std::vector<int> new_cls_data_shape = {
|
||||
segm.size[0],
|
||||
segm.size[2],
|
||||
segm.size[3],
|
||||
segm.size[1]/2,
|
||||
};
|
||||
|
||||
out = maskToBoxes(decodeImageByJoin(cls_data, new_cls_data_shape,
|
||||
link_data, new_link_data_shape,
|
||||
segm_threshold, link_threshold),
|
||||
static_cast<float>(kMinArea),
|
||||
static_cast<float>(kMinHeight),
|
||||
img_size);
|
||||
}
|
||||
|
||||
static std::vector<std::size_t> dimsToShape(const cv::MatSize &sz) {
|
||||
const int n_dims = sz.dims();
|
||||
std::vector<std::size_t> result;
|
||||
result.reserve(n_dims);
|
||||
|
||||
// cv::MatSize is not iterable...
|
||||
for (int i = 0; i < n_dims; i++) {
|
||||
result.emplace_back(static_cast<std::size_t>(sz[i]));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
static void softmax(std::vector<float> &rdata) {
|
||||
// NOTE: Taken from the OMZ text detection sample almost as-is
|
||||
const size_t last_dim = 2;
|
||||
for (size_t i = 0 ; i < rdata.size(); i+=last_dim) {
|
||||
float m = std::max(rdata[i], rdata[i+1]);
|
||||
rdata[i] = std::exp(rdata[i] - m);
|
||||
rdata[i + 1] = std::exp(rdata[i + 1] - m);
|
||||
float s = rdata[i] + rdata[i + 1];
|
||||
rdata[i] /= s;
|
||||
rdata[i + 1] /= s;
|
||||
}
|
||||
}
|
||||
|
||||
static std::vector<float> transpose4d(const std::vector<float> &data,
|
||||
const std::vector<size_t> &shape,
|
||||
const std::vector<size_t> &axes) {
|
||||
// NOTE: Taken from the OMZ text detection sample almost as-is
|
||||
if (shape.size() != axes.size())
|
||||
throw std::runtime_error("Shape and axes must have the same dimension.");
|
||||
|
||||
for (size_t a : axes) {
|
||||
if (a >= shape.size())
|
||||
throw std::runtime_error("Axis must be less than dimension of shape.");
|
||||
}
|
||||
size_t total_size = shape[0]*shape[1]*shape[2]*shape[3];
|
||||
std::vector<size_t> steps {
|
||||
shape[axes[1]]*shape[axes[2]]*shape[axes[3]],
|
||||
shape[axes[2]]*shape[axes[3]],
|
||||
shape[axes[3]],
|
||||
1
|
||||
};
|
||||
|
||||
size_t source_data_idx = 0;
|
||||
std::vector<float> new_data(total_size, 0);
|
||||
std::vector<size_t> ids(shape.size());
|
||||
for (ids[0] = 0; ids[0] < shape[0]; ids[0]++) {
|
||||
for (ids[1] = 0; ids[1] < shape[1]; ids[1]++) {
|
||||
for (ids[2] = 0; ids[2] < shape[2]; ids[2]++) {
|
||||
for (ids[3]= 0; ids[3] < shape[3]; ids[3]++) {
|
||||
size_t new_data_idx = ids[axes[0]]*steps[0] + ids[axes[1]]*steps[1] +
|
||||
ids[axes[2]]*steps[2] + ids[axes[3]]*steps[3];
|
||||
new_data[new_data_idx] = data[source_data_idx++];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return new_data;
|
||||
}
|
||||
|
||||
static std::vector<float> sliceAndGetSecondChannel(const std::vector<float> &data) {
|
||||
// NOTE: Taken from the OMZ text detection sample almost as-is
|
||||
std::vector<float> new_data(data.size() / 2, 0);
|
||||
for (size_t i = 0; i < data.size() / 2; i++) {
|
||||
new_data[i] = data[2 * i + 1];
|
||||
}
|
||||
return new_data;
|
||||
}
|
||||
|
||||
static void join(const int p1,
|
||||
const int p2,
|
||||
std::unordered_map<int, int> &group_mask) {
|
||||
// NOTE: Taken from the OMZ text detection sample almost as-is
|
||||
const int root1 = findRoot(p1, group_mask);
|
||||
const int root2 = findRoot(p2, group_mask);
|
||||
if (root1 != root2) {
|
||||
group_mask[root1] = root2;
|
||||
}
|
||||
}
|
||||
|
||||
static cv::Mat decodeImageByJoin(const std::vector<float> &cls_data,
|
||||
const std::vector<int> &cls_data_shape,
|
||||
const std::vector<float> &link_data,
|
||||
const std::vector<int> &link_data_shape,
|
||||
float cls_conf_threshold,
|
||||
float link_conf_threshold) {
|
||||
// NOTE: Taken from the OMZ text detection sample almost as-is
|
||||
const int h = cls_data_shape[1];
|
||||
const int w = cls_data_shape[2];
|
||||
|
||||
std::vector<uchar> pixel_mask(h * w, 0);
|
||||
std::unordered_map<int, int> group_mask;
|
||||
std::vector<cv::Point> points;
|
||||
for (int i = 0; i < static_cast<int>(pixel_mask.size()); i++) {
|
||||
pixel_mask[i] = cls_data[i] >= cls_conf_threshold;
|
||||
if (pixel_mask[i]) {
|
||||
points.emplace_back(i % w, i / w);
|
||||
group_mask[i] = -1;
|
||||
}
|
||||
}
|
||||
std::vector<uchar> link_mask(link_data.size(), 0);
|
||||
for (size_t i = 0; i < link_mask.size(); i++) {
|
||||
link_mask[i] = link_data[i] >= link_conf_threshold;
|
||||
}
|
||||
size_t neighbours = size_t(link_data_shape[3]);
|
||||
for (const auto &point : points) {
|
||||
size_t neighbour = 0;
|
||||
for (int ny = point.y - 1; ny <= point.y + 1; ny++) {
|
||||
for (int nx = point.x - 1; nx <= point.x + 1; nx++) {
|
||||
if (nx == point.x && ny == point.y)
|
||||
continue;
|
||||
if (nx >= 0 && nx < w && ny >= 0 && ny < h) {
|
||||
uchar pixel_value = pixel_mask[size_t(ny) * size_t(w) + size_t(nx)];
|
||||
uchar link_value = link_mask[(size_t(point.y) * size_t(w) + size_t(point.x))
|
||||
*neighbours + neighbour];
|
||||
if (pixel_value && link_value) {
|
||||
join(point.x + point.y * w, nx + ny * w, group_mask);
|
||||
}
|
||||
}
|
||||
neighbour++;
|
||||
}
|
||||
}
|
||||
}
|
||||
return get_all(points, w, h, group_mask);
|
||||
}
|
||||
|
||||
static cv::Mat get_all(const std::vector<cv::Point> &points,
|
||||
const int w,
|
||||
const int h,
|
||||
std::unordered_map<int, int> &group_mask) {
|
||||
// NOTE: Taken from the OMZ text detection sample almost as-is
|
||||
std::unordered_map<int, int> root_map;
|
||||
cv::Mat mask(h, w, CV_32S, cv::Scalar(0));
|
||||
for (const auto &point : points) {
|
||||
int point_root = findRoot(point.x + point.y * w, group_mask);
|
||||
if (root_map.find(point_root) == root_map.end()) {
|
||||
root_map.emplace(point_root, static_cast<int>(root_map.size() + 1));
|
||||
}
|
||||
mask.at<int>(point.x + point.y * w) = root_map[point_root];
|
||||
}
|
||||
return mask;
|
||||
}
|
||||
|
||||
static int findRoot(const int point,
|
||||
std::unordered_map<int, int> &group_mask) {
|
||||
// NOTE: Taken from the OMZ text detection sample almost as-is
|
||||
int root = point;
|
||||
bool update_parent = false;
|
||||
while (group_mask.at(root) != -1) {
|
||||
root = group_mask.at(root);
|
||||
update_parent = true;
|
||||
}
|
||||
if (update_parent) {
|
||||
group_mask[point] = root;
|
||||
}
|
||||
return root;
|
||||
}
|
||||
|
||||
static std::vector<cv::RotatedRect> maskToBoxes(const cv::Mat &mask,
|
||||
const float min_area,
|
||||
const float min_height,
|
||||
const cv::Size &image_size) {
|
||||
// NOTE: Taken from the OMZ text detection sample almost as-is
|
||||
std::vector<cv::RotatedRect> bboxes;
|
||||
double min_val = 0.;
|
||||
double max_val = 0.;
|
||||
cv::minMaxLoc(mask, &min_val, &max_val);
|
||||
int max_bbox_idx = static_cast<int>(max_val);
|
||||
cv::Mat resized_mask;
|
||||
cv::resize(mask, resized_mask, image_size, 0, 0, cv::INTER_NEAREST);
|
||||
|
||||
for (int i = 1; i <= max_bbox_idx; i++) {
|
||||
cv::Mat bbox_mask = resized_mask == i;
|
||||
std::vector<std::vector<cv::Point>> contours;
|
||||
|
||||
cv::findContours(bbox_mask, contours, cv::RETR_CCOMP, cv::CHAIN_APPROX_SIMPLE);
|
||||
if (contours.empty())
|
||||
continue;
|
||||
cv::RotatedRect r = cv::minAreaRect(contours[0]);
|
||||
if (std::min(r.size.width, r.size.height) < min_height)
|
||||
continue;
|
||||
if (r.size.area() < min_area)
|
||||
continue;
|
||||
bboxes.emplace_back(r);
|
||||
}
|
||||
return bboxes;
|
||||
}
|
||||
}; // GAPI_OCV_KERNEL(PostProcess)
|
||||
|
||||
GAPI_OCV_KERNEL(OCVCropLabels, CropLabels) {
|
||||
static void run(const cv::Mat &image,
|
||||
const std::vector<cv::RotatedRect> &detections,
|
||||
const cv::Size &outSize,
|
||||
std::vector<cv::Mat> &out) {
|
||||
out.clear();
|
||||
out.reserve(detections.size());
|
||||
cv::Mat crop(outSize, CV_8UC3, cv::Scalar(0));
|
||||
cv::Mat gray(outSize, CV_8UC1, cv::Scalar(0));
|
||||
std::vector<int> blob_shape = {1,1,outSize.height,outSize.width};
|
||||
|
||||
for (auto &&rr : detections) {
|
||||
std::vector<cv::Point2f> points(4);
|
||||
rr.points(points.data());
|
||||
|
||||
const auto top_left_point_idx = topLeftPointIdx(points);
|
||||
cv::Point2f point0 = points[static_cast<size_t>(top_left_point_idx)];
|
||||
cv::Point2f point1 = points[(top_left_point_idx + 1) % 4];
|
||||
cv::Point2f point2 = points[(top_left_point_idx + 2) % 4];
|
||||
|
||||
std::vector<cv::Point2f> from{point0, point1, point2};
|
||||
std::vector<cv::Point2f> to{
|
||||
cv::Point2f(0.0f, 0.0f),
|
||||
cv::Point2f(static_cast<float>(outSize.width-1), 0.0f),
|
||||
cv::Point2f(static_cast<float>(outSize.width-1),
|
||||
static_cast<float>(outSize.height-1))
|
||||
};
|
||||
cv::Mat M = cv::getAffineTransform(from, to);
|
||||
cv::warpAffine(image, crop, M, outSize);
|
||||
cv::cvtColor(crop, gray, cv::COLOR_BGR2GRAY);
|
||||
|
||||
cv::Mat blob;
|
||||
gray.convertTo(blob, CV_32F);
|
||||
out.push_back(blob.reshape(1, blob_shape)); // pass as 1,1,H,W instead of H,W
|
||||
}
|
||||
}
|
||||
|
||||
static int topLeftPointIdx(const std::vector<cv::Point2f> &points) {
|
||||
// NOTE: Taken from the OMZ text detection sample almost as-is
|
||||
cv::Point2f most_left(std::numeric_limits<float>::max(),
|
||||
std::numeric_limits<float>::max());
|
||||
cv::Point2f almost_most_left(std::numeric_limits<float>::max(),
|
||||
std::numeric_limits<float>::max());
|
||||
int most_left_idx = -1;
|
||||
int almost_most_left_idx = -1;
|
||||
|
||||
for (size_t i = 0; i < points.size() ; i++) {
|
||||
if (most_left.x > points[i].x) {
|
||||
if (most_left.x < std::numeric_limits<float>::max()) {
|
||||
almost_most_left = most_left;
|
||||
almost_most_left_idx = most_left_idx;
|
||||
}
|
||||
most_left = points[i];
|
||||
most_left_idx = static_cast<int>(i);
|
||||
}
|
||||
if (almost_most_left.x > points[i].x && points[i] != most_left) {
|
||||
almost_most_left = points[i];
|
||||
almost_most_left_idx = static_cast<int>(i);
|
||||
}
|
||||
}
|
||||
|
||||
if (almost_most_left.y < most_left.y) {
|
||||
most_left = almost_most_left;
|
||||
most_left_idx = almost_most_left_idx;
|
||||
}
|
||||
return most_left_idx;
|
||||
}
|
||||
|
||||
}; // GAPI_OCV_KERNEL(CropLabels)
|
||||
|
||||
} // anonymous namespace
|
||||
} // namespace custom
|
||||
|
||||
namespace vis {
|
||||
namespace {
|
||||
|
||||
void drawRotatedRect(cv::Mat &m, const cv::RotatedRect &rc) {
|
||||
std::vector<cv::Point2f> tmp_points(5);
|
||||
rc.points(tmp_points.data());
|
||||
tmp_points[4] = tmp_points[0];
|
||||
auto prev = tmp_points.begin(), it = prev+1;
|
||||
for (; it != tmp_points.end(); ++it) {
|
||||
cv::line(m, *prev, *it, cv::Scalar(50, 205, 50), 2);
|
||||
prev = it;
|
||||
}
|
||||
}
|
||||
|
||||
void drawText(cv::Mat &m, const cv::RotatedRect &rc, const std::string &str) {
|
||||
const int fface = cv::FONT_HERSHEY_SIMPLEX;
|
||||
const double scale = 0.7;
|
||||
const int thick = 1;
|
||||
int base = 0;
|
||||
const auto text_size = cv::getTextSize(str, fface, scale, thick, &base);
|
||||
|
||||
std::vector<cv::Point2f> tmp_points(4);
|
||||
rc.points(tmp_points.data());
|
||||
const auto tl_point_idx = custom::OCVCropLabels::topLeftPointIdx(tmp_points);
|
||||
cv::Point text_pos = tmp_points[tl_point_idx];
|
||||
text_pos.x = std::max(0, text_pos.x);
|
||||
text_pos.y = std::max(text_size.height, text_pos.y);
|
||||
|
||||
cv::rectangle(m,
|
||||
text_pos + cv::Point{0, base},
|
||||
text_pos + cv::Point{text_size.width, -text_size.height},
|
||||
CV_RGB(50, 205, 50),
|
||||
cv::FILLED);
|
||||
const auto white = CV_RGB(255, 255, 255);
|
||||
cv::putText(m, str, text_pos, fface, scale, white, thick, 8);
|
||||
}
|
||||
|
||||
} // anonymous namespace
|
||||
} // namespace vis
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
cv::CommandLineParser cmd(argc, argv, keys);
|
||||
cmd.about(about);
|
||||
if (cmd.has("help")) {
|
||||
cmd.printMessage();
|
||||
return 0;
|
||||
}
|
||||
const auto input_file_name = cmd.get<std::string>("input");
|
||||
const auto tdet_model_path = cmd.get<std::string>("tdm");
|
||||
const auto trec_model_path = cmd.get<std::string>("trm");
|
||||
const auto tdet_target_dev = cmd.get<std::string>("tdd");
|
||||
const auto trec_target_dev = cmd.get<std::string>("trd");
|
||||
const auto ctc_beam_dec_bw = cmd.get<int>("bw");
|
||||
const auto dec_conf_thresh = cmd.get<double>("thr");
|
||||
|
||||
const auto pad_symbol = '#';
|
||||
const auto symbol_set = cmd.get<std::string>("sset") + pad_symbol;
|
||||
|
||||
cv::GMat in;
|
||||
cv::GOpaque<cv::Size> in_rec_sz;
|
||||
cv::GMat link, segm;
|
||||
std::tie(link, segm) = cv::gapi::infer<custom::TextDetection>(in);
|
||||
cv::GOpaque<cv::Size> size = cv::gapi::streaming::size(in);
|
||||
cv::GArray<cv::RotatedRect> rrs = custom::PostProcess::on(link, segm, size, 0.8f, 0.8f);
|
||||
cv::GArray<cv::GMat> labels = custom::CropLabels::on(in, rrs, in_rec_sz);
|
||||
cv::GArray<cv::GMat> text = cv::gapi::infer2<custom::TextRecognition>(in, labels);
|
||||
|
||||
cv::GComputation graph(cv::GIn(in, in_rec_sz),
|
||||
cv::GOut(cv::gapi::copy(in), rrs, text));
|
||||
|
||||
// Text detection network
|
||||
auto tdet_net = cv::gapi::ie::Params<custom::TextDetection> {
|
||||
tdet_model_path, // path to topology IR
|
||||
weights_path(tdet_model_path), // path to weights
|
||||
tdet_target_dev, // device specifier
|
||||
}.cfgOutputLayers({"model/link_logits_/add", "model/segm_logits/add"});
|
||||
|
||||
auto trec_net = cv::gapi::ie::Params<custom::TextRecognition> {
|
||||
trec_model_path, // path to topology IR
|
||||
weights_path(trec_model_path), // path to weights
|
||||
trec_target_dev, // device specifier
|
||||
};
|
||||
auto networks = cv::gapi::networks(tdet_net, trec_net);
|
||||
|
||||
auto kernels = cv::gapi::kernels< custom::OCVPostProcess
|
||||
, custom::OCVCropLabels
|
||||
>();
|
||||
auto pipeline = graph.compileStreaming(cv::compile_args(kernels, networks));
|
||||
|
||||
std::cout << "Reading " << input_file_name << std::endl;
|
||||
|
||||
// Input stream
|
||||
auto in_src = cv::gapi::wip::make_src<cv::gapi::wip::GCaptureSource>(input_file_name);
|
||||
|
||||
// Text recognition input size (also an input parameter to the graph)
|
||||
auto in_rsz = cv::Size{ 120, 32 };
|
||||
|
||||
// Set the pipeline source & start the pipeline
|
||||
pipeline.setSource(cv::gin(in_src, in_rsz));
|
||||
pipeline.start();
|
||||
|
||||
// Declare the output data & run the processing loop
|
||||
cv::TickMeter tm;
|
||||
cv::Mat image;
|
||||
std::vector<cv::RotatedRect> out_rcs;
|
||||
std::vector<cv::Mat> out_text;
|
||||
|
||||
tm.start();
|
||||
int frames = 0;
|
||||
while (pipeline.pull(cv::gout(image, out_rcs, out_text))) {
|
||||
frames++;
|
||||
|
||||
CV_Assert(out_rcs.size() == out_text.size());
|
||||
const auto num_labels = out_rcs.size();
|
||||
|
||||
std::vector<cv::Point2f> tmp_points(4);
|
||||
for (std::size_t l = 0; l < num_labels; l++) {
|
||||
// Decode the recognized text in the rectangle
|
||||
const auto &blob = out_text[l];
|
||||
const float *data = blob.ptr<float>();
|
||||
const auto sz = blob.total();
|
||||
double conf = 1.0;
|
||||
const std::string res = ctc_beam_dec_bw == 0
|
||||
? CTCGreedyDecoder(data, sz, symbol_set, pad_symbol, &conf)
|
||||
: CTCBeamSearchDecoder(data, sz, symbol_set, &conf, ctc_beam_dec_bw);
|
||||
|
||||
// Draw a bounding box for this rotated rectangle
|
||||
const auto &rc = out_rcs[l];
|
||||
vis::drawRotatedRect(image, rc);
|
||||
|
||||
// Draw text, if decoded
|
||||
if (conf >= dec_conf_thresh) {
|
||||
vis::drawText(image, rc, res);
|
||||
}
|
||||
}
|
||||
tm.stop();
|
||||
cv::imshow("Out", image);
|
||||
cv::waitKey(1);
|
||||
tm.start();
|
||||
}
|
||||
tm.stop();
|
||||
std::cout << "Processed " << frames << " frames"
|
||||
<< " (" << frames / tm.getTimeSec() << " FPS)" << std::endl;
|
||||
return 0;
|
||||
}
|
||||
Reference in New Issue
Block a user