init - 初始化项目

This commit is contained in:
Lee Nony
2022-05-06 01:58:53 +08:00
commit 90a5cc7cb6
6772 changed files with 2837787 additions and 0 deletions

View File

@@ -0,0 +1,34 @@
#include <opencv2/videoio.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/gapi.hpp>
#include <opencv2/gapi/core.hpp>
#include <opencv2/gapi/imgproc.hpp>
int main(int argc, char *argv[])
{
cv::VideoCapture cap;
if (argc > 1) cap.open(argv[1]);
else cap.open(0);
CV_Assert(cap.isOpened());
cv::GMat in;
cv::GMat vga = cv::gapi::resize(in, cv::Size(), 0.5, 0.5);
cv::GMat gray = cv::gapi::BGR2Gray(vga);
cv::GMat blurred = cv::gapi::blur(gray, cv::Size(5,5));
cv::GMat edges = cv::gapi::Canny(blurred, 32, 128, 3);
cv::GMat b,g,r;
std::tie(b,g,r) = cv::gapi::split3(vga);
cv::GMat out = cv::gapi::merge3(b, g | edges, r);
cv::GComputation ac(in, out);
cv::Mat input_frame;
cv::Mat output_frame;
CV_Assert(cap.read(input_frame));
do
{
ac.apply(input_frame, output_frame);
cv::imshow("output", output_frame);
} while (cap.read(input_frame) && cv::waitKey(30) < 0);
return 0;
}

View File

@@ -0,0 +1,122 @@
#include <opencv2/videoio.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/gapi.hpp>
#include <opencv2/gapi/core.hpp>
#include <opencv2/gapi/imgproc.hpp>
#include <opencv2/gapi/cpu/gcpukernel.hpp>
#include <opencv2/gapi/fluid/core.hpp>
#include <opencv2/gapi/fluid/imgproc.hpp>
static void typed_example()
{
const cv::Size sz(32, 32);
cv::Mat
in_mat1 (sz, CV_8UC1),
in_mat2 (sz, CV_8UC1),
out_mat_untyped(sz, CV_8UC1),
out_mat_typed1 (sz, CV_8UC1),
out_mat_typed2 (sz, CV_8UC1);
cv::randu(in_mat1, cv::Scalar::all(0), cv::Scalar::all(255));
cv::randu(in_mat2, cv::Scalar::all(0), cv::Scalar::all(255));
//! [Untyped_Example]
// Untyped G-API ///////////////////////////////////////////////////////////
cv::GComputation cvtU([]()
{
cv::GMat in1, in2;
cv::GMat out = cv::gapi::add(in1, in2);
return cv::GComputation({in1, in2}, {out});
});
std::vector<cv::Mat> u_ins = {in_mat1, in_mat2};
std::vector<cv::Mat> u_outs = {out_mat_untyped};
cvtU.apply(u_ins, u_outs);
//! [Untyped_Example]
//! [Typed_Example]
// Typed G-API /////////////////////////////////////////////////////////////
cv::GComputationT<cv::GMat (cv::GMat, cv::GMat)> cvtT([](cv::GMat m1, cv::GMat m2)
{
return m1+m2;
});
cvtT.apply(in_mat1, in_mat2, out_mat_typed1);
auto cvtTC = cvtT.compile(cv::descr_of(in_mat1), cv::descr_of(in_mat2));
cvtTC(in_mat1, in_mat2, out_mat_typed2);
//! [Typed_Example]
}
G_TYPED_KERNEL(IAdd, <cv::GMat(cv::GMat)>, "test.custom.add") {
static cv::GMatDesc outMeta(const cv::GMatDesc &in) { return in; }
};
G_TYPED_KERNEL(IFilter2D, <cv::GMat(cv::GMat)>, "test.custom.filter2d") {
static cv::GMatDesc outMeta(const cv::GMatDesc &in) { return in; }
};
G_TYPED_KERNEL(IRGB2YUV, <cv::GMat(cv::GMat)>, "test.custom.add") {
static cv::GMatDesc outMeta(const cv::GMatDesc &in) { return in; }
};
GAPI_OCV_KERNEL(CustomAdd, IAdd) { static void run(cv::Mat, cv::Mat &) {} };
GAPI_OCV_KERNEL(CustomFilter2D, IFilter2D) { static void run(cv::Mat, cv::Mat &) {} };
GAPI_OCV_KERNEL(CustomRGB2YUV, IRGB2YUV) { static void run(cv::Mat, cv::Mat &) {} };
int main(int argc, char *argv[])
{
if (argc < 3)
return -1;
cv::Mat input = cv::imread(argv[1]);
cv::Mat output;
{
//! [graph_def]
cv::GMat in;
cv::GMat gx = cv::gapi::Sobel(in, CV_32F, 1, 0);
cv::GMat gy = cv::gapi::Sobel(in, CV_32F, 0, 1);
cv::GMat g = cv::gapi::sqrt(cv::gapi::mul(gx, gx) + cv::gapi::mul(gy, gy));
cv::GMat out = cv::gapi::convertTo(g, CV_8U);
//! [graph_def]
//! [graph_decl_apply]
//! [graph_cap_full]
cv::GComputation sobelEdge(cv::GIn(in), cv::GOut(out));
//! [graph_cap_full]
sobelEdge.apply(input, output);
//! [graph_decl_apply]
//! [apply_with_param]
cv::gapi::GKernelPackage kernels = cv::gapi::combine
(cv::gapi::core::fluid::kernels(),
cv::gapi::imgproc::fluid::kernels());
sobelEdge.apply(input, output, cv::compile_args(kernels));
//! [apply_with_param]
//! [graph_cap_sub]
cv::GComputation sobelEdgeSub(cv::GIn(gx, gy), cv::GOut(out));
//! [graph_cap_sub]
}
//! [graph_gen]
cv::GComputation sobelEdgeGen([](){
cv::GMat in;
cv::GMat gx = cv::gapi::Sobel(in, CV_32F, 1, 0);
cv::GMat gy = cv::gapi::Sobel(in, CV_32F, 0, 1);
cv::GMat g = cv::gapi::sqrt(cv::gapi::mul(gx, gx) + cv::gapi::mul(gy, gy));
cv::GMat out = cv::gapi::convertTo(g, CV_8U);
return cv::GComputation(in, out);
});
//! [graph_gen]
cv::imwrite(argv[2], output);
//! [kernels_snippet]
cv::gapi::GKernelPackage pkg = cv::gapi::kernels
< CustomAdd
, CustomFilter2D
, CustomRGB2YUV
>();
//! [kernels_snippet]
// Just call typed example with no input/output
typed_example();
return 0;
}

View File

@@ -0,0 +1,56 @@
#include <opencv2/imgproc.hpp> // cv::FONT*, cv::LINE*, cv::FILLED
#include <opencv2/highgui.hpp> // imwrite
#include <opencv2/gapi.hpp>
#include <opencv2/gapi/render.hpp>
int main(int argc, char *argv[])
{
if (argc < 2) {
std::cerr << "Filename required" << std::endl;
return 1;
}
const auto font = cv::FONT_HERSHEY_DUPLEX;
const auto blue = cv::Scalar{ 255, 0, 0}; // B/G/R
const auto green = cv::Scalar{ 0, 255, 0};
const auto coral = cv::Scalar{0x81,0x81,0xF1};
const auto white = cv::Scalar{ 255, 255, 255};
cv::Mat test(cv::Size(480, 160), CV_8UC3, white);
namespace draw = cv::gapi::wip::draw;
std::vector<draw::Prim> prims;
prims.emplace_back(draw::Circle{ // CIRCLE primitive
{400,72}, // Position (a cv::Point)
32, // Radius
coral, // Color
cv::FILLED, // Thickness/fill type
cv::LINE_8, // Line type
0 // Shift
});
prims.emplace_back(draw::Text{ // TEXT primitive
"Hello from G-API!", // Text
{64,96}, // Position (a cv::Point)
font, // Font
1.0, // Scale (size)
blue, // Color
2, // Thickness
cv::LINE_8, // Line type
false // Bottom left origin flag
});
prims.emplace_back(draw::Rect{ // RECTANGLE primitive
{16,48,400,72}, // Geometry (a cv::Rect)
green, // Color
2, // Thickness
cv::LINE_8, // Line type
0 // Shift
});
prims.emplace_back(draw::Mosaic{ // MOSAIC primitive
{320,96,128,32}, // Geometry (a cv::Rect)
16, // Cell size
0 // Decimation
});
draw::render(test, prims);
cv::imwrite(argv[1], test);
return 0;
}

View File

@@ -0,0 +1,68 @@
#include <opencv2/gapi.hpp>
#include <opencv2/gapi/core.hpp>
#include <opencv2/gapi/cpu/core.hpp>
int main(int argc, char *argv[])
{
(void) argc;
(void) argv;
bool need_first_conversion = true;
bool need_second_conversion = false;
cv::Size szOut(4, 4);
cv::GComputation cc([&](){
// ! [GIOProtoArgs usage]
auto ins = cv::GIn();
cv::GMat in1;
if (need_first_conversion)
ins += cv::GIn(in1);
cv::GMat in2;
if (need_second_conversion)
ins += cv::GIn(in2);
auto outs = cv::GOut();
cv::GMat out1 = cv::gapi::resize(in1, szOut);
if (need_first_conversion)
outs += cv::GOut(out1);
cv::GMat out2 = cv::gapi::resize(in2, szOut);
if (need_second_conversion)
outs += cv::GOut(out2);
// ! [GIOProtoArgs usage]
return cv::GComputation(std::move(ins), std::move(outs));
});
// ! [GRunArgs usage]
auto in_vector = cv::gin();
cv::Mat in_mat1( 8, 8, CV_8UC3);
cv::Mat in_mat2(16, 16, CV_8UC3);
cv::randu(in_mat1, cv::Scalar::all(0), cv::Scalar::all(255));
cv::randu(in_mat2, cv::Scalar::all(0), cv::Scalar::all(255));
if (need_first_conversion)
in_vector += cv::gin(in_mat1);
if (need_second_conversion)
in_vector += cv::gin(in_mat2);
// ! [GRunArgs usage]
// ! [GRunArgsP usage]
auto out_vector = cv::gout();
cv::Mat out_mat1, out_mat2;
if (need_first_conversion)
out_vector += cv::gout(out_mat1);
if (need_second_conversion)
out_vector += cv::gout(out_mat2);
// ! [GRunArgsP usage]
auto stream = cc.compileStreaming(cv::compile_args(cv::gapi::core::cpu::kernels()));
stream.setSource(std::move(in_vector));
stream.start();
stream.pull(std::move(out_vector));
stream.stop();
return 0;
}

View File

@@ -0,0 +1,432 @@
#include <algorithm>
#include <iostream>
#include <cctype>
#include <opencv2/gapi.hpp>
#include <opencv2/gapi/core.hpp>
#include <opencv2/gapi/infer.hpp>
#include <opencv2/gapi/infer/ie.hpp>
#include <opencv2/gapi/streaming/cap.hpp>
#include <opencv2/gapi/cpu/gcpukernel.hpp>
#include <opencv2/highgui.hpp> // CommandLineParser
const std::string about =
"This is an OpenCV-based version of Gaze Estimation example";
const std::string keys =
"{ h help | | Print this help message }"
"{ input | | Path to the input video file }"
"{ facem | face-detection-retail-0005.xml | Path to OpenVINO face detection model (.xml) }"
"{ faced | CPU | Target device for the face detection (e.g. CPU, GPU, VPU, ...) }"
"{ landm | facial-landmarks-35-adas-0002.xml | Path to OpenVINO landmarks detector model (.xml) }"
"{ landd | CPU | Target device for the landmarks detector (e.g. CPU, GPU, VPU, ...) }"
"{ headm | head-pose-estimation-adas-0001.xml | Path to OpenVINO head pose estimation model (.xml) }"
"{ headd | CPU | Target device for the head pose estimation inference (e.g. CPU, GPU, VPU, ...) }"
"{ gazem | gaze-estimation-adas-0002.xml | Path to OpenVINO gaze vector estimaiton model (.xml) }"
"{ gazed | CPU | Target device for the gaze vector estimation inference (e.g. CPU, GPU, VPU, ...) }"
;
namespace {
std::string weights_path(const std::string &model_path) {
const auto EXT_LEN = 4u;
const auto sz = model_path.size();
CV_Assert(sz > EXT_LEN);
auto ext = model_path.substr(sz - EXT_LEN);
auto lower = [](unsigned char c) {
return static_cast<unsigned char>(std::tolower(c));
};
std::transform(ext.begin(), ext.end(), ext.begin(), lower);
CV_Assert(ext == ".xml");
return model_path.substr(0u, sz - EXT_LEN) + ".bin";
}
} // anonymous namespace
namespace custom {
namespace {
using GMat3 = std::tuple<cv::GMat,cv::GMat,cv::GMat>;
using GMats = cv::GArray<cv::GMat>;
using GRects = cv::GArray<cv::Rect>;
using GSize = cv::GOpaque<cv::Size>;
G_API_NET(Faces, <cv::GMat(cv::GMat)>, "face-detector" );
G_API_NET(Landmarks, <cv::GMat(cv::GMat)>, "facial-landmarks");
G_API_NET(HeadPose, < GMat3(cv::GMat)>, "head-pose");
G_API_NET(Gaze, <cv::GMat(cv::GMat,cv::GMat,cv::GMat)>, "gaze-vector");
G_API_OP(Size, <GSize(cv::GMat)>, "custom.gapi.size") {
static cv::GOpaqueDesc outMeta(const cv::GMatDesc &) {
return cv::empty_gopaque_desc();
}
};
G_API_OP(ParseSSD,
<GRects(cv::GMat, GSize, bool)>,
"custom.gaze_estimation.parseSSD") {
static cv::GArrayDesc outMeta( const cv::GMatDesc &
, const cv::GOpaqueDesc &
, bool) {
return cv::empty_array_desc();
}
};
// Left/Right eye per every face
G_API_OP(ParseEyes,
<std::tuple<GRects, GRects>(GMats, GRects, GSize)>,
"custom.gaze_estimation.parseEyes") {
static std::tuple<cv::GArrayDesc, cv::GArrayDesc>
outMeta( const cv::GArrayDesc &
, const cv::GArrayDesc &
, const cv::GOpaqueDesc &) {
return std::make_tuple(cv::empty_array_desc(), cv::empty_array_desc());
}
};
// Combine three scalars into a 1x3 vector (per every face)
G_API_OP(ProcessPoses,
<GMats(GMats, GMats, GMats)>,
"custom.gaze_estimation.processPoses") {
static cv::GArrayDesc outMeta( const cv::GArrayDesc &
, const cv::GArrayDesc &
, const cv::GArrayDesc &) {
return cv::empty_array_desc();
}
};
void adjustBoundingBox(cv::Rect& boundingBox) {
auto w = boundingBox.width;
auto h = boundingBox.height;
boundingBox.x -= static_cast<int>(0.067 * w);
boundingBox.y -= static_cast<int>(0.028 * h);
boundingBox.width += static_cast<int>(0.15 * w);
boundingBox.height += static_cast<int>(0.13 * h);
if (boundingBox.width < boundingBox.height) {
auto dx = (boundingBox.height - boundingBox.width);
boundingBox.x -= dx / 2;
boundingBox.width += dx;
} else {
auto dy = (boundingBox.width - boundingBox.height);
boundingBox.y -= dy / 2;
boundingBox.height += dy;
}
}
void gazeVectorToGazeAngles(const cv::Point3f& gazeVector,
cv::Point2f& gazeAngles) {
auto r = cv::norm(gazeVector);
double v0 = static_cast<double>(gazeVector.x);
double v1 = static_cast<double>(gazeVector.y);
double v2 = static_cast<double>(gazeVector.z);
gazeAngles.x = static_cast<float>(180.0 / M_PI * (M_PI_2 + std::atan2(v2, v0)));
gazeAngles.y = static_cast<float>(180.0 / M_PI * (M_PI_2 - std::acos(v1 / r)));
}
GAPI_OCV_KERNEL(OCVSize, Size) {
static void run(const cv::Mat &in, cv::Size &out) {
out = in.size();
}
};
GAPI_OCV_KERNEL(OCVParseSSD, ParseSSD) {
static void run(const cv::Mat &in_ssd_result,
const cv::Size &upscale,
const bool filter_out_of_bounds,
std::vector<cv::Rect> &out_objects) {
const auto &in_ssd_dims = in_ssd_result.size;
CV_Assert(in_ssd_dims.dims() == 4u);
const int MAX_PROPOSALS = in_ssd_dims[2];
const int OBJECT_SIZE = in_ssd_dims[3];
CV_Assert(OBJECT_SIZE == 7); // fixed SSD object size
const cv::Rect surface({0,0}, upscale);
out_objects.clear();
const float *data = in_ssd_result.ptr<float>();
for (int i = 0; i < MAX_PROPOSALS; i++) {
const float image_id = data[i * OBJECT_SIZE + 0];
const float label = data[i * OBJECT_SIZE + 1];
const float confidence = data[i * OBJECT_SIZE + 2];
const float rc_left = data[i * OBJECT_SIZE + 3];
const float rc_top = data[i * OBJECT_SIZE + 4];
const float rc_right = data[i * OBJECT_SIZE + 5];
const float rc_bottom = data[i * OBJECT_SIZE + 6];
(void) label;
if (image_id < 0.f) {
break; // marks end-of-detections
}
if (confidence < 0.5f) {
continue; // skip objects with low confidence
}
cv::Rect rc; // map relative coordinates to the original image scale
rc.x = static_cast<int>(rc_left * upscale.width);
rc.y = static_cast<int>(rc_top * upscale.height);
rc.width = static_cast<int>(rc_right * upscale.width) - rc.x;
rc.height = static_cast<int>(rc_bottom * upscale.height) - rc.y;
adjustBoundingBox(rc); // TODO: new option?
const auto clipped_rc = rc & surface; // TODO: new option?
if (filter_out_of_bounds) {
if (clipped_rc.area() != rc.area()) {
continue;
}
}
out_objects.emplace_back(clipped_rc);
}
}
};
cv::Rect eyeBox(const cv::Rect &face_rc,
float p1_x, float p1_y, float p2_x, float p2_y,
float scale = 1.8f) {
const auto &up = face_rc.size();
const cv::Point p1 = {
static_cast<int>(p1_x*up.width),
static_cast<int>(p1_y*up.height)
};
const cv::Point p2 = {
static_cast<int>(p2_x*up.width),
static_cast<int>(p2_y*up.height)
};
cv::Rect result;
const auto size = static_cast<float>(cv::norm(p1 - p2));
const auto midpoint = (p1 + p2) / 2;
result.width = static_cast<int>(scale * size);
result.height = result.width;
result.x = face_rc.x + midpoint.x - (result.width / 2);
result.y = face_rc.y + midpoint.y - (result.height / 2);
// Shift result to the original frame's absolute coordinates
return result;
}
GAPI_OCV_KERNEL(OCVParseEyes, ParseEyes) {
static void run(const std::vector<cv::Mat> &in_landmarks_per_face,
const std::vector<cv::Rect> &in_face_rcs,
const cv::Size &frame_size,
std::vector<cv::Rect> &out_left_eyes,
std::vector<cv::Rect> &out_right_eyes) {
const size_t numFaces = in_landmarks_per_face.size();
const cv::Rect surface(cv::Point(0,0), frame_size);
GAPI_Assert(numFaces == in_face_rcs.size());
out_left_eyes.clear();
out_right_eyes.clear();
out_left_eyes.reserve(numFaces);
out_right_eyes.reserve(numFaces);
for (std::size_t i = 0u; i < numFaces; i++) {
const auto &lm = in_landmarks_per_face[i];
const auto &rc = in_face_rcs[i];
// Left eye is defined by points 0/1 (x2),
// Right eye is defined by points 2/3 (x2)
const float *data = lm.ptr<float>();
out_left_eyes .push_back(surface & eyeBox(rc, data[0], data[1], data[2], data[3]));
out_right_eyes.push_back(surface & eyeBox(rc, data[4], data[5], data[6], data[7]));
}
}
};
GAPI_OCV_KERNEL(OCVProcessPoses, ProcessPoses) {
static void run(const std::vector<cv::Mat> &in_ys,
const std::vector<cv::Mat> &in_ps,
const std::vector<cv::Mat> &in_rs,
std::vector<cv::Mat> &out_poses) {
const std::size_t sz = in_ys.size();
GAPI_Assert(sz == in_ps.size() && sz == in_rs.size());
out_poses.clear();
for (std::size_t idx = 0u; idx < sz; idx++) {
cv::Mat pose(1, 3, CV_32FC1);
float *ptr = pose.ptr<float>();
ptr[0] = in_ys[idx].ptr<float>()[0];
ptr[1] = in_ps[idx].ptr<float>()[0];
ptr[2] = in_rs[idx].ptr<float>()[0];
out_poses.push_back(std::move(pose));
}
}
};
} // anonymous namespace
} // namespace custom
namespace vis {
namespace {
cv::Point2f midp(const cv::Rect &rc) {
return (rc.tl() + rc.br()) / 2;
};
void bbox(cv::Mat &m, const cv::Rect &rc) {
cv::rectangle(m, rc, cv::Scalar{0,255,0}, 2, cv::LINE_8, 0);
};
void pose(cv::Mat &m, const cv::Mat &p, const cv::Rect &face_rc) {
const auto *posePtr = p.ptr<float>();
const auto yaw = static_cast<double>(posePtr[0]);
const auto pitch = static_cast<double>(posePtr[1]);
const auto roll = static_cast<double>(posePtr[2]);
const auto sinY = std::sin(yaw * M_PI / 180.0);
const auto sinP = std::sin(pitch * M_PI / 180.0);
const auto sinR = std::sin(roll * M_PI / 180.0);
const auto cosY = std::cos(yaw * M_PI / 180.0);
const auto cosP = std::cos(pitch * M_PI / 180.0);
const auto cosR = std::cos(roll * M_PI / 180.0);
const auto axisLength = 0.4 * face_rc.width;
const auto xCenter = face_rc.x + face_rc.width / 2;
const auto yCenter = face_rc.y + face_rc.height / 2;
const auto center = cv::Point{xCenter, yCenter};
const auto axisln = cv::Point2d{axisLength, axisLength};
const auto ctr = cv::Matx<double,2,2>(cosR*cosY, sinY*sinP*sinR, 0.f, cosP*sinR);
const auto ctt = cv::Matx<double,2,2>(cosR*sinY*sinP, cosY*sinR, 0.f, -cosP*cosR);
const auto ctf = cv::Matx<double,2,2>(sinY*cosP, 0.f, 0.f, sinP);
// center to right
cv::line(m, center, center + static_cast<cv::Point>(ctr*axisln), cv::Scalar(0, 0, 255), 2);
// center to top
cv::line(m, center, center + static_cast<cv::Point>(ctt*axisln), cv::Scalar(0, 255, 0), 2);
// center to forward
cv::line(m, center, center + static_cast<cv::Point>(ctf*axisln), cv::Scalar(255, 0, 255), 2);
}
void vvec(cv::Mat &m, const cv::Mat &v, const cv::Rect &face_rc,
const cv::Rect &left_rc, const cv::Rect &right_rc) {
const auto scale = 0.002 * face_rc.width;
cv::Point3f gazeVector;
const auto *gazePtr = v.ptr<float>();
gazeVector.x = gazePtr[0];
gazeVector.y = gazePtr[1];
gazeVector.z = gazePtr[2];
gazeVector = gazeVector / cv::norm(gazeVector);
const double arrowLength = 0.4 * face_rc.width;
const auto left_mid = midp(left_rc);
const auto right_mid = midp(right_rc);
cv::Point2f gazeArrow;
gazeArrow.x = gazeVector.x;
gazeArrow.y = -gazeVector.y;
gazeArrow *= arrowLength;
cv::arrowedLine(m, left_mid, left_mid + gazeArrow, cv::Scalar(255, 0, 0), 2);
cv::arrowedLine(m, right_mid, right_mid + gazeArrow, cv::Scalar(255, 0, 0), 2);
cv::Point2f gazeAngles;
custom::gazeVectorToGazeAngles(gazeVector, gazeAngles);
cv::putText(m,
cv::format("gaze angles: (h=%0.0f, v=%0.0f)",
static_cast<double>(std::round(gazeAngles.x)),
static_cast<double>(std::round(gazeAngles.y))),
cv::Point(static_cast<int>(face_rc.tl().x),
static_cast<int>(face_rc.br().y + 12. * face_rc.width / 100.)),
cv::FONT_HERSHEY_PLAIN, scale * 2, cv::Scalar::all(255), 1);
};
} // anonymous namespace
} // namespace vis
int main(int argc, char *argv[])
{
cv::CommandLineParser cmd(argc, argv, keys);
cmd.about(about);
if (cmd.has("help")) {
cmd.printMessage();
return 0;
}
cv::GMat in;
cv::GMat faces = cv::gapi::infer<custom::Faces>(in);
cv::GOpaque<cv::Size> sz = custom::Size::on(in); // FIXME
cv::GArray<cv::Rect> faces_rc = custom::ParseSSD::on(faces, sz, true);
cv::GArray<cv::GMat> angles_y, angles_p, angles_r;
std::tie(angles_y, angles_p, angles_r) = cv::gapi::infer<custom::HeadPose>(faces_rc, in);
cv::GArray<cv::GMat> heads_pos = custom::ProcessPoses::on(angles_y, angles_p, angles_r);
cv::GArray<cv::GMat> landmarks = cv::gapi::infer<custom::Landmarks>(faces_rc, in);
cv::GArray<cv::Rect> left_eyes, right_eyes;
std::tie(left_eyes, right_eyes) = custom::ParseEyes::on(landmarks, faces_rc, sz);
cv::GArray<cv::GMat> gaze_vectors = cv::gapi::infer2<custom::Gaze>( in
, left_eyes
, right_eyes
, heads_pos);
cv::GComputation graph(cv::GIn(in),
cv::GOut( cv::gapi::copy(in)
, faces_rc
, left_eyes
, right_eyes
, heads_pos
, gaze_vectors));
const auto input_file_name = cmd.get<std::string>("input");
const auto face_model_path = cmd.get<std::string>("facem");
const auto head_model_path = cmd.get<std::string>("headm");
const auto lmrk_model_path = cmd.get<std::string>("landm");
const auto gaze_model_path = cmd.get<std::string>("gazem");
auto face_net = cv::gapi::ie::Params<custom::Faces> {
face_model_path, // path to topology IR
weights_path(face_model_path), // path to weights
cmd.get<std::string>("faced"), /// device specifier
};
auto head_net = cv::gapi::ie::Params<custom::HeadPose> {
head_model_path, // path to topology IR
weights_path(head_model_path), // path to weights
cmd.get<std::string>("headd"), // device specifier
}.cfgOutputLayers({"angle_y_fc", "angle_p_fc", "angle_r_fc"});
auto landmarks_net = cv::gapi::ie::Params<custom::Landmarks> {
lmrk_model_path, // path to topology IR
weights_path(lmrk_model_path), // path to weights
cmd.get<std::string>("landd"), // device specifier
};
auto gaze_net = cv::gapi::ie::Params<custom::Gaze> {
gaze_model_path, // path to topology IR
weights_path(gaze_model_path), // path to weights
cmd.get<std::string>("gazed"), // device specifier
}.cfgInputLayers({"left_eye_image", "right_eye_image", "head_pose_angles"});
auto kernels = cv::gapi::kernels< custom::OCVSize
, custom::OCVParseSSD
, custom::OCVParseEyes
, custom::OCVProcessPoses>();
auto networks = cv::gapi::networks(face_net, head_net, landmarks_net, gaze_net);
auto pipeline = graph.compileStreaming(cv::compile_args(networks, kernels));
cv::TickMeter tm;
cv::Mat image;
std::vector<cv::Rect> out_faces, out_right_eyes, out_left_eyes;
std::vector<cv::Mat> out_poses;
std::vector<cv::Mat> out_gazes;
std::size_t frames = 0u;
std::cout << "Reading " << input_file_name << std::endl;
pipeline.setSource(cv::gapi::wip::make_src<cv::gapi::wip::GCaptureSource>(input_file_name));
pipeline.start();
tm.start();
while (pipeline.pull(cv::gout( image
, out_faces
, out_left_eyes
, out_right_eyes
, out_poses
, out_gazes))) {
frames++;
// Visualize results on the frame
for (auto &&rc : out_faces) vis::bbox(image, rc);
for (auto &&rc : out_left_eyes) vis::bbox(image, rc);
for (auto &&rc : out_right_eyes) vis::bbox(image, rc);
for (std::size_t i = 0u; i < out_faces.size(); i++) {
vis::pose(image, out_poses[i], out_faces[i]);
vis::vvec(image, out_gazes[i], out_faces[i], out_left_eyes[i], out_right_eyes[i]);
}
tm.stop();
const auto fps_str = std::to_string(frames / tm.getTimeSec()) + " FPS";
cv::putText(image, fps_str, {0,32}, cv::FONT_HERSHEY_SIMPLEX, 1.0, {0,255,0}, 2);
cv::imshow("Out", image);
cv::waitKey(1);
tm.start();
}
tm.stop();
std::cout << "Processed " << frames << " frames"
<< " (" << frames / tm.getTimeSec() << " FPS)" << std::endl;
return 0;
}

View File

@@ -0,0 +1,195 @@
#include <chrono>
#include <iomanip>
#include "opencv2/imgproc.hpp"
#include "opencv2/highgui.hpp"
#include "opencv2/gapi.hpp"
#include "opencv2/gapi/core.hpp"
#include "opencv2/gapi/imgproc.hpp"
#include "opencv2/gapi/infer.hpp"
#include "opencv2/gapi/infer/ie.hpp"
#include "opencv2/gapi/infer/onnx.hpp"
#include "opencv2/gapi/cpu/gcpukernel.hpp"
#include "opencv2/gapi/streaming/cap.hpp"
namespace {
const std::string keys =
"{ h help | | print this help message }"
"{ input | | Path to an input video file }"
"{ fdm | | IE face detection model IR }"
"{ fdw | | IE face detection model weights }"
"{ fdd | | IE face detection device }"
"{ emom | | ONNX emotions recognition model }"
"{ output | | (Optional) Path to an output video file }"
;
} // namespace
namespace custom {
G_API_NET(Faces, <cv::GMat(cv::GMat)>, "face-detector");
G_API_NET(Emotions, <cv::GMat(cv::GMat)>, "emotions-recognition");
G_API_OP(PostProc, <cv::GArray<cv::Rect>(cv::GMat, cv::GMat)>, "custom.fd_postproc") {
static cv::GArrayDesc outMeta(const cv::GMatDesc &, const cv::GMatDesc &) {
return cv::empty_array_desc();
}
};
GAPI_OCV_KERNEL(OCVPostProc, PostProc) {
static void run(const cv::Mat &in_ssd_result,
const cv::Mat &in_frame,
std::vector<cv::Rect> &out_faces) {
const int MAX_PROPOSALS = 200;
const int OBJECT_SIZE = 7;
const cv::Size upscale = in_frame.size();
const cv::Rect surface({0,0}, upscale);
out_faces.clear();
const float *data = in_ssd_result.ptr<float>();
for (int i = 0; i < MAX_PROPOSALS; i++) {
const float image_id = data[i * OBJECT_SIZE + 0]; // batch id
const float confidence = data[i * OBJECT_SIZE + 2];
const float rc_left = data[i * OBJECT_SIZE + 3];
const float rc_top = data[i * OBJECT_SIZE + 4];
const float rc_right = data[i * OBJECT_SIZE + 5];
const float rc_bottom = data[i * OBJECT_SIZE + 6];
if (image_id < 0.f) { // indicates end of detections
break;
}
if (confidence < 0.5f) {
continue;
}
cv::Rect rc;
rc.x = static_cast<int>(rc_left * upscale.width);
rc.y = static_cast<int>(rc_top * upscale.height);
rc.width = static_cast<int>(rc_right * upscale.width) - rc.x;
rc.height = static_cast<int>(rc_bottom * upscale.height) - rc.y;
out_faces.push_back(rc & surface);
}
}
};
//! [Postproc]
} // namespace custom
namespace labels {
// Labels as defined in
// https://github.com/onnx/models/tree/master/vision/body_analysis/emotion_ferplus
//
const std::string emotions[] = {
"neutral", "happiness", "surprise", "sadness", "anger", "disgust", "fear", "contempt"
};
namespace {
template<typename Iter>
std::vector<float> softmax(Iter begin, Iter end) {
std::vector<float> prob(end - begin, 0.f);
std::transform(begin, end, prob.begin(), [](float x) { return std::exp(x); });
float sum = std::accumulate(prob.begin(), prob.end(), 0.0f);
for (int i = 0; i < static_cast<int>(prob.size()); i++)
prob[i] /= sum;
return prob;
}
void DrawResults(cv::Mat &frame,
const std::vector<cv::Rect> &faces,
const std::vector<cv::Mat> &out_emotions) {
CV_Assert(faces.size() == out_emotions.size());
for (auto it = faces.begin(); it != faces.end(); ++it) {
const auto idx = std::distance(faces.begin(), it);
const auto &rc = *it;
const float *emotions_data = out_emotions[idx].ptr<float>();
auto sm = softmax(emotions_data, emotions_data + 8);
const auto emo_id = std::max_element(sm.begin(), sm.end()) - sm.begin();
const int ATTRIB_OFFSET = 15;
cv::rectangle(frame, rc, {0, 255, 0}, 4);
cv::putText(frame, emotions[emo_id],
cv::Point(rc.x, rc.y - ATTRIB_OFFSET),
cv::FONT_HERSHEY_COMPLEX_SMALL,
1,
cv::Scalar(0, 0, 255));
std::cout << emotions[emo_id] << " at " << rc << std::endl;
}
}
} // anonymous namespace
} // namespace labels
int main(int argc, char *argv[])
{
cv::CommandLineParser cmd(argc, argv, keys);
if (cmd.has("help")) {
cmd.printMessage();
return 0;
}
const std::string input = cmd.get<std::string>("input");
const std::string output = cmd.get<std::string>("output");
// OpenVINO FD parameters here
auto det_net = cv::gapi::ie::Params<custom::Faces> {
cmd.get<std::string>("fdm"), // read cmd args: path to topology IR
cmd.get<std::string>("fdw"), // read cmd args: path to weights
cmd.get<std::string>("fdd"), // read cmd args: device specifier
};
// ONNX Emotions parameters here
auto emo_net = cv::gapi::onnx::Params<custom::Emotions> {
cmd.get<std::string>("emom"), // read cmd args: path to the ONNX model
}.cfgNormalize({false}); // model accepts 0..255 range in FP32
auto kernels = cv::gapi::kernels<custom::OCVPostProc>();
auto networks = cv::gapi::networks(det_net, emo_net);
cv::GMat in;
cv::GMat bgr = cv::gapi::copy(in);
cv::GMat frame = cv::gapi::streaming::desync(bgr);
cv::GMat detections = cv::gapi::infer<custom::Faces>(frame);
cv::GArray<cv::Rect> faces = custom::PostProc::on(detections, frame);
cv::GArray<cv::GMat> emotions = cv::gapi::infer<custom::Emotions>(faces, frame);
auto pipeline = cv::GComputation(cv::GIn(in), cv::GOut(bgr, faces, emotions))
.compileStreaming(cv::compile_args(kernels, networks));
auto in_src = cv::gapi::wip::make_src<cv::gapi::wip::GCaptureSource>(input);
pipeline.setSource(cv::gin(in_src));
pipeline.start();
cv::util::optional<cv::Mat> out_frame;
cv::util::optional<std::vector<cv::Rect>> out_faces;
cv::util::optional<std::vector<cv::Mat>> out_emotions;
cv::Mat last_mat;
std::vector<cv::Rect> last_faces;
std::vector<cv::Mat> last_emotions;
cv::VideoWriter writer;
while (pipeline.pull(cv::gout(out_frame, out_faces, out_emotions))) {
if (out_faces && out_emotions) {
last_faces = *out_faces;
last_emotions = *out_emotions;
}
if (out_frame) {
last_mat = *out_frame;
labels::DrawResults(last_mat, last_faces, last_emotions);
if (!output.empty()) {
if (!writer.isOpened()) {
const auto sz = cv::Size{last_mat.cols, last_mat.rows};
writer.open(output, cv::VideoWriter::fourcc('M','J','P','G'), 25.0, sz);
CV_Assert(writer.isOpened());
}
writer << last_mat;
}
}
if (!last_mat.empty()) {
cv::imshow("Out", last_mat);
cv::waitKey(1);
}
}
return 0;
}

View File

@@ -0,0 +1,264 @@
#include <algorithm>
#include <iostream>
#include <sstream>
#include <opencv2/imgproc.hpp>
#include <opencv2/imgcodecs.hpp>
#include <opencv2/gapi.hpp>
#include <opencv2/gapi/core.hpp>
#include <opencv2/gapi/imgproc.hpp>
#include <opencv2/gapi/infer.hpp>
#include <opencv2/gapi/render.hpp>
#include <opencv2/gapi/infer/ie.hpp>
#include <opencv2/gapi/cpu/gcpukernel.hpp>
#include <opencv2/gapi/streaming/cap.hpp>
#include <opencv2/highgui.hpp>
const std::string keys =
"{ h help | | Print this help message }"
"{ input | | Path to the input video file }"
"{ facem | face-detection-adas-0001.xml | Path to OpenVINO IE face detection model (.xml) }"
"{ faced | CPU | Target device for face detection model (e.g. CPU, GPU, VPU, ...) }"
"{ r roi | -1,-1,-1,-1 | Region of interest (ROI) to use for inference. Identified automatically when not set }";
namespace {
std::string weights_path(const std::string &model_path) {
const auto EXT_LEN = 4u;
const auto sz = model_path.size();
CV_Assert(sz > EXT_LEN);
auto ext = model_path.substr(sz - EXT_LEN);
std::transform(ext.begin(), ext.end(), ext.begin(), [](unsigned char c){
return static_cast<unsigned char>(std::tolower(c));
});
CV_Assert(ext == ".xml");
return model_path.substr(0u, sz - EXT_LEN) + ".bin";
}
cv::util::optional<cv::Rect> parse_roi(const std::string &rc) {
cv::Rect rv;
char delim[3];
std::stringstream is(rc);
is >> rv.x >> delim[0] >> rv.y >> delim[1] >> rv.width >> delim[2] >> rv.height;
if (is.bad()) {
return cv::util::optional<cv::Rect>(); // empty value
}
const auto is_delim = [](char c) {
return c == ',';
};
if (!std::all_of(std::begin(delim), std::end(delim), is_delim)) {
return cv::util::optional<cv::Rect>(); // empty value
}
if (rv.x < 0 || rv.y < 0 || rv.width <= 0 || rv.height <= 0) {
return cv::util::optional<cv::Rect>(); // empty value
}
return cv::util::make_optional(std::move(rv));
}
} // namespace
namespace custom {
G_API_NET(FaceDetector, <cv::GMat(cv::GMat)>, "face-detector");
using GDetections = cv::GArray<cv::Rect>;
using GRect = cv::GOpaque<cv::Rect>;
using GSize = cv::GOpaque<cv::Size>;
using GPrims = cv::GArray<cv::gapi::wip::draw::Prim>;
G_API_OP(GetSize, <GSize(cv::GMat)>, "sample.custom.get-size") {
static cv::GOpaqueDesc outMeta(const cv::GMatDesc &) {
return cv::empty_gopaque_desc();
}
};
G_API_OP(LocateROI, <GRect(cv::GMat)>, "sample.custom.locate-roi") {
static cv::GOpaqueDesc outMeta(const cv::GMatDesc &) {
return cv::empty_gopaque_desc();
}
};
G_API_OP(ParseSSD, <GDetections(cv::GMat, GRect, GSize)>, "sample.custom.parse-ssd") {
static cv::GArrayDesc outMeta(const cv::GMatDesc &, const cv::GOpaqueDesc &, const cv::GOpaqueDesc &) {
return cv::empty_array_desc();
}
};
G_API_OP(BBoxes, <GPrims(GDetections, GRect)>, "sample.custom.b-boxes") {
static cv::GArrayDesc outMeta(const cv::GArrayDesc &, const cv::GOpaqueDesc &) {
return cv::empty_array_desc();
}
};
GAPI_OCV_KERNEL(OCVGetSize, GetSize) {
static void run(const cv::Mat &in, cv::Size &out) {
out = {in.cols, in.rows};
}
};
GAPI_OCV_KERNEL(OCVLocateROI, LocateROI) {
// This is the place where we can run extra analytics
// on the input image frame and select the ROI (region
// of interest) where we want to detect our objects (or
// run any other inference).
//
// Currently it doesn't do anything intelligent,
// but only crops the input image to square (this is
// the most convenient aspect ratio for detectors to use)
static void run(const cv::Mat &in_mat, cv::Rect &out_rect) {
// Identify the central point & square size (- some padding)
const auto center = cv::Point{in_mat.cols/2, in_mat.rows/2};
auto sqside = std::min(in_mat.cols, in_mat.rows);
// Now build the central square ROI
out_rect = cv::Rect{ center.x - sqside/2
, center.y - sqside/2
, sqside
, sqside
};
}
};
GAPI_OCV_KERNEL(OCVParseSSD, ParseSSD) {
static void run(const cv::Mat &in_ssd_result,
const cv::Rect &in_roi,
const cv::Size &in_parent_size,
std::vector<cv::Rect> &out_objects) {
const auto &in_ssd_dims = in_ssd_result.size;
CV_Assert(in_ssd_dims.dims() == 4u);
const int MAX_PROPOSALS = in_ssd_dims[2];
const int OBJECT_SIZE = in_ssd_dims[3];
CV_Assert(OBJECT_SIZE == 7); // fixed SSD object size
const cv::Size up_roi = in_roi.size();
const cv::Rect surface({0,0}, in_parent_size);
out_objects.clear();
const float *data = in_ssd_result.ptr<float>();
for (int i = 0; i < MAX_PROPOSALS; i++) {
const float image_id = data[i * OBJECT_SIZE + 0];
const float label = data[i * OBJECT_SIZE + 1];
const float confidence = data[i * OBJECT_SIZE + 2];
const float rc_left = data[i * OBJECT_SIZE + 3];
const float rc_top = data[i * OBJECT_SIZE + 4];
const float rc_right = data[i * OBJECT_SIZE + 5];
const float rc_bottom = data[i * OBJECT_SIZE + 6];
(void) label; // unused
if (image_id < 0.f) {
break; // marks end-of-detections
}
if (confidence < 0.5f) {
continue; // skip objects with low confidence
}
// map relative coordinates to the original image scale
// taking the ROI into account
cv::Rect rc;
rc.x = static_cast<int>(rc_left * up_roi.width);
rc.y = static_cast<int>(rc_top * up_roi.height);
rc.width = static_cast<int>(rc_right * up_roi.width) - rc.x;
rc.height = static_cast<int>(rc_bottom * up_roi.height) - rc.y;
rc.x += in_roi.x;
rc.y += in_roi.y;
out_objects.emplace_back(rc & surface);
}
}
};
GAPI_OCV_KERNEL(OCVBBoxes, BBoxes) {
// This kernel converts the rectangles into G-API's
// rendering primitives
static void run(const std::vector<cv::Rect> &in_face_rcs,
const cv::Rect &in_roi,
std::vector<cv::gapi::wip::draw::Prim> &out_prims) {
out_prims.clear();
const auto cvt = [](const cv::Rect &rc, const cv::Scalar &clr) {
return cv::gapi::wip::draw::Rect(rc, clr, 2);
};
out_prims.emplace_back(cvt(in_roi, CV_RGB(0,255,255))); // cyan
for (auto &&rc : in_face_rcs) {
out_prims.emplace_back(cvt(rc, CV_RGB(0,255,0))); // green
}
}
};
} // namespace custom
int main(int argc, char *argv[])
{
cv::CommandLineParser cmd(argc, argv, keys);
if (cmd.has("help")) {
cmd.printMessage();
return 0;
}
// Prepare parameters first
const std::string input = cmd.get<std::string>("input");
const auto opt_roi = parse_roi(cmd.get<std::string>("roi"));
const auto face_model_path = cmd.get<std::string>("facem");
auto face_net = cv::gapi::ie::Params<custom::FaceDetector> {
face_model_path, // path to topology IR
weights_path(face_model_path), // path to weights
cmd.get<std::string>("faced"), // device specifier
};
auto kernels = cv::gapi::kernels
< custom::OCVGetSize
, custom::OCVLocateROI
, custom::OCVParseSSD
, custom::OCVBBoxes>();
auto networks = cv::gapi::networks(face_net);
// Now build the graph. The graph structure may vary
// pased on the input parameters
cv::GStreamingCompiled pipeline;
auto inputs = cv::gin(cv::gapi::wip::make_src<cv::gapi::wip::GCaptureSource>(input));
if (opt_roi.has_value()) {
// Use the value provided by user
std::cout << "Will run inference for static region "
<< opt_roi.value()
<< " only"
<< std::endl;
cv::GMat in;
cv::GOpaque<cv::Rect> in_roi;
auto blob = cv::gapi::infer<custom::FaceDetector>(in_roi, in);
auto rcs = custom::ParseSSD::on(blob, in_roi, custom::GetSize::on(in));
auto out = cv::gapi::wip::draw::render3ch(in, custom::BBoxes::on(rcs, in_roi));
pipeline = cv::GComputation(cv::GIn(in, in_roi), cv::GOut(out))
.compileStreaming(cv::compile_args(kernels, networks));
// Since the ROI to detect is manual, make it part of the input vector
inputs.push_back(cv::gin(opt_roi.value())[0]);
} else {
// Automatically detect ROI to infer. Make it output parameter
std::cout << "ROI is not set or invalid. Locating it automatically"
<< std::endl;
cv::GMat in;
cv::GOpaque<cv::Rect> roi = custom::LocateROI::on(in);
auto blob = cv::gapi::infer<custom::FaceDetector>(roi, in);
auto rcs = custom::ParseSSD::on(blob, roi, custom::GetSize::on(in));
auto out = cv::gapi::wip::draw::render3ch(in, custom::BBoxes::on(rcs, roi));
pipeline = cv::GComputation(cv::GIn(in), cv::GOut(out))
.compileStreaming(cv::compile_args(kernels, networks));
}
// The execution part
pipeline.setSource(std::move(inputs));
pipeline.start();
cv::Mat out;
while (pipeline.pull(cv::gout(out))) {
cv::imshow("Out", out);
cv::waitKey(1);
}
return 0;
}

View File

@@ -0,0 +1,213 @@
#include <algorithm>
#include <iostream>
#include <sstream>
#include <opencv2/imgproc.hpp>
#include <opencv2/imgcodecs.hpp>
#include <opencv2/gapi.hpp>
#include <opencv2/gapi/core.hpp>
#include <opencv2/gapi/imgproc.hpp>
#include <opencv2/gapi/infer.hpp>
#include <opencv2/gapi/render.hpp>
#include <opencv2/gapi/infer/onnx.hpp>
#include <opencv2/gapi/cpu/gcpukernel.hpp>
#include <opencv2/gapi/streaming/cap.hpp>
#include <opencv2/highgui.hpp>
namespace custom {
G_API_NET(ObjDetector, <cv::GMat(cv::GMat)>, "object-detector");
using GDetections = cv::GArray<cv::Rect>;
using GSize = cv::GOpaque<cv::Size>;
using GPrims = cv::GArray<cv::gapi::wip::draw::Prim>;
G_API_OP(GetSize, <GSize(cv::GMat)>, "sample.custom.get-size") {
static cv::GOpaqueDesc outMeta(const cv::GMatDesc &) {
return cv::empty_gopaque_desc();
}
};
G_API_OP(ParseSSD, <GDetections(cv::GMat, GSize)>, "sample.custom.parse-ssd") {
static cv::GArrayDesc outMeta(const cv::GMatDesc &, const cv::GOpaqueDesc &) {
return cv::empty_array_desc();
}
};
G_API_OP(BBoxes, <GPrims(GDetections)>, "sample.custom.b-boxes") {
static cv::GArrayDesc outMeta(const cv::GArrayDesc &) {
return cv::empty_array_desc();
}
};
GAPI_OCV_KERNEL(OCVGetSize, GetSize) {
static void run(const cv::Mat &in, cv::Size &out) {
out = {in.cols, in.rows};
}
};
GAPI_OCV_KERNEL(OCVParseSSD, ParseSSD) {
static void run(const cv::Mat &in_ssd_result,
const cv::Size &in_parent_size,
std::vector<cv::Rect> &out_objects) {
const auto &in_ssd_dims = in_ssd_result.size;
CV_Assert(in_ssd_dims.dims() == 4u);
const int MAX_PROPOSALS = in_ssd_dims[2];
const int OBJECT_SIZE = in_ssd_dims[3];
CV_Assert(OBJECT_SIZE == 7); // fixed SSD object size
const cv::Rect surface({0,0}, in_parent_size);
out_objects.clear();
const float *data = in_ssd_result.ptr<float>();
for (int i = 0; i < MAX_PROPOSALS; i++) {
const float image_id = data[i * OBJECT_SIZE + 0];
const float label = data[i * OBJECT_SIZE + 1];
const float confidence = data[i * OBJECT_SIZE + 2];
const float rc_left = data[i * OBJECT_SIZE + 3];
const float rc_top = data[i * OBJECT_SIZE + 4];
const float rc_right = data[i * OBJECT_SIZE + 5];
const float rc_bottom = data[i * OBJECT_SIZE + 6];
(void) label; // unused
if (image_id < 0.f) {
break; // marks end-of-detections
}
if (confidence < 0.5f) {
continue; // skip objects with low confidence
}
// map relative coordinates to the original image scale
cv::Rect rc;
rc.x = static_cast<int>(rc_left * in_parent_size.width);
rc.y = static_cast<int>(rc_top * in_parent_size.height);
rc.width = static_cast<int>(rc_right * in_parent_size.width) - rc.x;
rc.height = static_cast<int>(rc_bottom * in_parent_size.height) - rc.y;
out_objects.emplace_back(rc & surface);
}
}
};
GAPI_OCV_KERNEL(OCVBBoxes, BBoxes) {
// This kernel converts the rectangles into G-API's
// rendering primitives
static void run(const std::vector<cv::Rect> &in_obj_rcs,
std::vector<cv::gapi::wip::draw::Prim> &out_prims) {
out_prims.clear();
const auto cvt = [](const cv::Rect &rc, const cv::Scalar &clr) {
return cv::gapi::wip::draw::Rect(rc, clr, 2);
};
for (auto &&rc : in_obj_rcs) {
out_prims.emplace_back(cvt(rc, CV_RGB(0,255,0))); // green
}
std::cout << "Detections:";
for (auto &&rc : in_obj_rcs) std::cout << ' ' << rc;
std::cout << std::endl;
}
};
} // namespace custom
namespace {
void remap_ssd_ports(const std::unordered_map<std::string, cv::Mat> &onnx,
std::unordered_map<std::string, cv::Mat> &gapi) {
// Assemble ONNX-processed outputs back to a single 1x1x200x7 blob
// to preserve compatibility with OpenVINO-based SSD pipeline
const cv::Mat &num_detections = onnx.at("num_detections:0");
const cv::Mat &detection_boxes = onnx.at("detection_boxes:0");
const cv::Mat &detection_scores = onnx.at("detection_scores:0");
const cv::Mat &detection_classes = onnx.at("detection_classes:0");
GAPI_Assert(num_detections.depth() == CV_32F);
GAPI_Assert(detection_boxes.depth() == CV_32F);
GAPI_Assert(detection_scores.depth() == CV_32F);
GAPI_Assert(detection_classes.depth() == CV_32F);
cv::Mat &ssd_output = gapi.at("detection_output");
const int num_objects = static_cast<int>(num_detections.ptr<float>()[0]);
const float *in_boxes = detection_boxes.ptr<float>();
const float *in_scores = detection_scores.ptr<float>();
const float *in_classes = detection_classes.ptr<float>();
float *ptr = ssd_output.ptr<float>();
for (int i = 0; i < num_objects; i++) {
ptr[0] = 0.f; // "image_id"
ptr[1] = in_classes[i]; // "label"
ptr[2] = in_scores[i]; // "confidence"
ptr[3] = in_boxes[4*i + 1]; // left
ptr[4] = in_boxes[4*i + 0]; // top
ptr[5] = in_boxes[4*i + 3]; // right
ptr[6] = in_boxes[4*i + 2]; // bottom
ptr += 7;
in_boxes += 4;
}
if (num_objects < ssd_output.size[2]-1) {
// put a -1 mark at the end of output blob if there is space left
ptr[0] = -1.f;
}
}
} // anonymous namespace
const std::string keys =
"{ h help | | Print this help message }"
"{ input | | Path to the input video file }"
"{ output | | (Optional) path to output video file }"
"{ detm | | Path to an ONNX SSD object detection model (.onnx) }"
;
int main(int argc, char *argv[])
{
cv::CommandLineParser cmd(argc, argv, keys);
if (cmd.has("help")) {
cmd.printMessage();
return 0;
}
// Prepare parameters first
const std::string input = cmd.get<std::string>("input");
const std::string output = cmd.get<std::string>("output");
const auto obj_model_path = cmd.get<std::string>("detm");
auto obj_net = cv::gapi::onnx::Params<custom::ObjDetector>{obj_model_path}
.cfgOutputLayers({"detection_output"})
.cfgPostProc({cv::GMatDesc{CV_32F, {1,1,200,7}}}, remap_ssd_ports);
auto kernels = cv::gapi::kernels< custom::OCVGetSize
, custom::OCVParseSSD
, custom::OCVBBoxes>();
auto networks = cv::gapi::networks(obj_net);
// Now build the graph
cv::GMat in;
auto blob = cv::gapi::infer<custom::ObjDetector>(in);
auto rcs = custom::ParseSSD::on(blob, custom::GetSize::on(in));
auto out = cv::gapi::wip::draw::render3ch(in, custom::BBoxes::on(rcs));
cv::GStreamingCompiled pipeline = cv::GComputation(cv::GIn(in), cv::GOut(out))
.compileStreaming(cv::compile_args(kernels, networks));
auto inputs = cv::gin(cv::gapi::wip::make_src<cv::gapi::wip::GCaptureSource>(input));
// The execution part
pipeline.setSource(std::move(inputs));
pipeline.start();
cv::VideoWriter writer;
cv::Mat outMat;
while (pipeline.pull(cv::gout(outMat))) {
cv::imshow("Out", outMat);
cv::waitKey(1);
if (!output.empty()) {
if (!writer.isOpened()) {
const auto sz = cv::Size{outMat.cols, outMat.rows};
writer.open(output, cv::VideoWriter::fourcc('M','J','P','G'), 25.0, sz);
CV_Assert(writer.isOpened());
}
writer << outMat;
}
}
return 0;
}

View File

@@ -0,0 +1,157 @@
// [filter2d_api]
#include <opencv2/gapi.hpp>
G_TYPED_KERNEL(GFilter2D,
<cv::GMat(cv::GMat,int,cv::Mat,cv::Point,double,int,cv::Scalar)>,
"org.opencv.imgproc.filters.filter2D")
{
static cv::GMatDesc // outMeta's return value type
outMeta(cv::GMatDesc in , // descriptor of input GMat
int ddepth , // depth parameter
cv::Mat /* coeffs */, // (unused)
cv::Point /* anchor */, // (unused)
double /* scale */, // (unused)
int /* border */, // (unused)
cv::Scalar /* bvalue */ ) // (unused)
{
return in.withDepth(ddepth);
}
};
// [filter2d_api]
cv::GMat filter2D(cv::GMat ,
int ,
cv::Mat ,
cv::Point ,
double ,
int ,
cv::Scalar);
// [filter2d_wrap]
cv::GMat filter2D(cv::GMat in,
int ddepth,
cv::Mat k,
cv::Point anchor = cv::Point(-1,-1),
double scale = 0.,
int border = cv::BORDER_DEFAULT,
cv::Scalar bval = cv::Scalar(0))
{
return GFilter2D::on(in, ddepth, k, anchor, scale, border, bval);
}
// [filter2d_wrap]
// [compound]
#include <opencv2/gapi/gcompoundkernel.hpp> // GAPI_COMPOUND_KERNEL()
using PointArray2f = cv::GArray<cv::Point2f>;
G_TYPED_KERNEL(HarrisCorners,
<PointArray2f(cv::GMat,int,double,double,int,double)>,
"org.opencv.imgproc.harris_corner")
{
static cv::GArrayDesc outMeta(const cv::GMatDesc &,
int,
double,
double,
int,
double)
{
// No special metadata for arrays in G-API (yet)
return cv::empty_array_desc();
}
};
// Define Fluid-backend-local kernels which form GoodFeatures
G_TYPED_KERNEL(HarrisResponse,
<cv::GMat(cv::GMat,double,int,double)>,
"org.opencv.fluid.harris_response")
{
static cv::GMatDesc outMeta(const cv::GMatDesc &in,
double,
int,
double)
{
return in.withType(CV_32F, 1);
}
};
G_TYPED_KERNEL(ArrayNMS,
<PointArray2f(cv::GMat,int,double)>,
"org.opencv.cpu.nms_array")
{
static cv::GArrayDesc outMeta(const cv::GMatDesc &,
int,
double)
{
return cv::empty_array_desc();
}
};
GAPI_COMPOUND_KERNEL(GFluidHarrisCorners, HarrisCorners)
{
static PointArray2f
expand(cv::GMat in,
int maxCorners,
double quality,
double minDist,
int blockSize,
double k)
{
cv::GMat response = HarrisResponse::on(in, quality, blockSize, k);
return ArrayNMS::on(response, maxCorners, minDist);
}
};
// Then implement HarrisResponse as Fluid kernel and NMSresponse
// as a generic (OpenCV) kernel
// [compound]
// [filter2d_ocv]
#include <opencv2/gapi/cpu/gcpukernel.hpp> // GAPI_OCV_KERNEL()
#include <opencv2/imgproc.hpp> // cv::filter2D()
GAPI_OCV_KERNEL(GCPUFilter2D, GFilter2D)
{
static void
run(const cv::Mat &in, // in - derived from GMat
const int ddepth, // opaque (passed as-is)
const cv::Mat &k, // opaque (passed as-is)
const cv::Point &anchor, // opaque (passed as-is)
const double delta, // opaque (passed as-is)
const int border, // opaque (passed as-is)
const cv::Scalar &, // opaque (passed as-is)
cv::Mat &out) // out - derived from GMat (retval)
{
cv::filter2D(in, out, ddepth, k, anchor, delta, border);
}
};
// [filter2d_ocv]
int main(int, char *[])
{
std::cout << "This sample is non-complete. It is used as code snippents in documentation." << std::endl;
cv::Mat conv_kernel_mat;
{
// [filter2d_on]
cv::GMat in;
cv::GMat out = GFilter2D::on(/* GMat */ in,
/* int */ -1,
/* Mat */ conv_kernel_mat,
/* Point */ cv::Point(-1,-1),
/* double */ 0.,
/* int */ cv::BORDER_DEFAULT,
/* Scalar */ cv::Scalar(0));
// [filter2d_on]
}
{
// [filter2d_wrap_call]
cv::GMat in;
cv::GMat out = filter2D(in, -1, conv_kernel_mat);
// [filter2d_wrap_call]
}
return 0;
}

View File

@@ -0,0 +1,216 @@
#include <algorithm>
#include <iostream>
#include <cctype>
#include <opencv2/imgproc.hpp>
#include <opencv2/imgcodecs.hpp>
#include <opencv2/gapi.hpp>
#include <opencv2/gapi/core.hpp>
#include <opencv2/gapi/imgproc.hpp>
#include <opencv2/gapi/infer.hpp>
#include <opencv2/gapi/render.hpp>
#include <opencv2/gapi/infer/ie.hpp>
#include <opencv2/gapi/cpu/gcpukernel.hpp>
#include <opencv2/gapi/streaming/cap.hpp>
#include <opencv2/highgui.hpp>
const std::string about =
"This is an OpenCV-based version of Privacy Masking Camera example";
const std::string keys =
"{ h help | | Print this help message }"
"{ input | | Path to the input video file }"
"{ platm | vehicle-license-plate-detection-barrier-0106.xml | Path to OpenVINO IE vehicle/plate detection model (.xml) }"
"{ platd | CPU | Target device for vehicle/plate detection model (e.g. CPU, GPU, VPU, ...) }"
"{ facem | face-detection-retail-0005.xml | Path to OpenVINO IE face detection model (.xml) }"
"{ faced | CPU | Target device for face detection model (e.g. CPU, GPU, VPU, ...) }"
"{ trad | false | Run processing in a traditional (non-pipelined) way }"
"{ noshow | false | Don't display UI (improves performance) }";
namespace {
std::string weights_path(const std::string &model_path) {
const auto EXT_LEN = 4u;
const auto sz = model_path.size();
CV_Assert(sz > EXT_LEN);
auto ext = model_path.substr(sz - EXT_LEN);
std::transform(ext.begin(), ext.end(), ext.begin(), [](unsigned char c){ return static_cast<unsigned char>(std::tolower(c)); });
CV_Assert(ext == ".xml");
return model_path.substr(0u, sz - EXT_LEN) + ".bin";
}
} // namespace
namespace custom {
G_API_NET(VehLicDetector, <cv::GMat(cv::GMat)>, "vehicle-license-plate-detector");
G_API_NET(FaceDetector, <cv::GMat(cv::GMat)>, "face-detector");
using GDetections = cv::GArray<cv::Rect>;
G_API_OP(ParseSSD, <GDetections(cv::GMat, cv::GMat, int)>, "custom.privacy_masking.postproc") {
static cv::GArrayDesc outMeta(const cv::GMatDesc &, const cv::GMatDesc &, int) {
return cv::empty_array_desc();
}
};
using GPrims = cv::GArray<cv::gapi::wip::draw::Prim>;
G_API_OP(ToMosaic, <GPrims(GDetections, GDetections)>, "custom.privacy_masking.to_mosaic") {
static cv::GArrayDesc outMeta(const cv::GArrayDesc &, const cv::GArrayDesc &) {
return cv::empty_array_desc();
}
};
GAPI_OCV_KERNEL(OCVParseSSD, ParseSSD) {
static void run(const cv::Mat &in_ssd_result,
const cv::Mat &in_frame,
const int filter_label,
std::vector<cv::Rect> &out_objects) {
const auto &in_ssd_dims = in_ssd_result.size;
CV_Assert(in_ssd_dims.dims() == 4u);
const int MAX_PROPOSALS = in_ssd_dims[2];
const int OBJECT_SIZE = in_ssd_dims[3];
CV_Assert(OBJECT_SIZE == 7); // fixed SSD object size
const cv::Size upscale = in_frame.size();
const cv::Rect surface({0,0}, upscale);
out_objects.clear();
const float *data = in_ssd_result.ptr<float>();
for (int i = 0; i < MAX_PROPOSALS; i++) {
const float image_id = data[i * OBJECT_SIZE + 0];
const float label = data[i * OBJECT_SIZE + 1];
const float confidence = data[i * OBJECT_SIZE + 2];
const float rc_left = data[i * OBJECT_SIZE + 3];
const float rc_top = data[i * OBJECT_SIZE + 4];
const float rc_right = data[i * OBJECT_SIZE + 5];
const float rc_bottom = data[i * OBJECT_SIZE + 6];
if (image_id < 0.f) {
break; // marks end-of-detections
}
if (confidence < 0.5f) {
continue; // skip objects with low confidence
}
if (filter_label != -1 && static_cast<int>(label) != filter_label) {
continue; // filter out object classes if filter is specified
}
cv::Rect rc; // map relative coordinates to the original image scale
rc.x = static_cast<int>(rc_left * upscale.width);
rc.y = static_cast<int>(rc_top * upscale.height);
rc.width = static_cast<int>(rc_right * upscale.width) - rc.x;
rc.height = static_cast<int>(rc_bottom * upscale.height) - rc.y;
out_objects.emplace_back(rc & surface);
}
}
};
GAPI_OCV_KERNEL(OCVToMosaic, ToMosaic) {
static void run(const std::vector<cv::Rect> &in_plate_rcs,
const std::vector<cv::Rect> &in_face_rcs,
std::vector<cv::gapi::wip::draw::Prim> &out_prims) {
out_prims.clear();
const auto cvt = [](cv::Rect rc) {
// Align the mosaic region to mosaic block size
const int BLOCK_SIZE = 24;
const int dw = BLOCK_SIZE - (rc.width % BLOCK_SIZE);
const int dh = BLOCK_SIZE - (rc.height % BLOCK_SIZE);
rc.width += dw;
rc.height += dh;
rc.x -= dw / 2;
rc.y -= dh / 2;
return cv::gapi::wip::draw::Mosaic{rc, BLOCK_SIZE, 0};
};
for (auto &&rc : in_plate_rcs) { out_prims.emplace_back(cvt(rc)); }
for (auto &&rc : in_face_rcs) { out_prims.emplace_back(cvt(rc)); }
}
};
} // namespace custom
int main(int argc, char *argv[])
{
cv::CommandLineParser cmd(argc, argv, keys);
cmd.about(about);
if (cmd.has("help")) {
cmd.printMessage();
return 0;
}
const std::string input = cmd.get<std::string>("input");
const bool no_show = cmd.get<bool>("noshow");
const bool run_trad = cmd.get<bool>("trad");
cv::GMat in;
cv::GMat blob_plates = cv::gapi::infer<custom::VehLicDetector>(in);
cv::GMat blob_faces = cv::gapi::infer<custom::FaceDetector>(in);
// VehLicDetector from Open Model Zoo marks vehicles with label "1" and
// license plates with label "2", filter out license plates only.
cv::GArray<cv::Rect> rc_plates = custom::ParseSSD::on(blob_plates, in, 2);
// Face detector produces faces only so there's no need to filter by label,
// pass "-1".
cv::GArray<cv::Rect> rc_faces = custom::ParseSSD::on(blob_faces, in, -1);
cv::GMat out = cv::gapi::wip::draw::render3ch(in, custom::ToMosaic::on(rc_plates, rc_faces));
cv::GComputation graph(in, out);
const auto plate_model_path = cmd.get<std::string>("platm");
auto plate_net = cv::gapi::ie::Params<custom::VehLicDetector> {
plate_model_path, // path to topology IR
weights_path(plate_model_path), // path to weights
cmd.get<std::string>("platd"), // device specifier
};
const auto face_model_path = cmd.get<std::string>("facem");
auto face_net = cv::gapi::ie::Params<custom::FaceDetector> {
face_model_path, // path to topology IR
weights_path(face_model_path), // path to weights
cmd.get<std::string>("faced"), // device specifier
};
auto kernels = cv::gapi::kernels<custom::OCVParseSSD, custom::OCVToMosaic>();
auto networks = cv::gapi::networks(plate_net, face_net);
cv::TickMeter tm;
cv::Mat out_frame;
std::size_t frames = 0u;
std::cout << "Reading " << input << std::endl;
if (run_trad) {
cv::Mat in_frame;
cv::VideoCapture cap(input);
cap >> in_frame;
auto exec = graph.compile(cv::descr_of(in_frame), cv::compile_args(kernels, networks));
tm.start();
do {
exec(in_frame, out_frame);
if (!no_show) {
cv::imshow("Out", out_frame);
cv::waitKey(1);
}
frames++;
} while (cap.read(in_frame));
tm.stop();
} else {
auto pipeline = graph.compileStreaming(cv::compile_args(kernels, networks));
pipeline.setSource(cv::gapi::wip::make_src<cv::gapi::wip::GCaptureSource>(input));
pipeline.start();
tm.start();
while (pipeline.pull(cv::gout(out_frame))) {
frames++;
if (!no_show) {
cv::imshow("Out", out_frame);
cv::waitKey(1);
}
}
tm.stop();
}
std::cout << "Processed " << frames << " frames"
<< " (" << frames / tm.getTimeSec() << " FPS)" << std::endl;
return 0;
}

View File

@@ -0,0 +1,133 @@
#include <opencv2/imgproc.hpp>
#include <opencv2/gapi/infer/ie.hpp>
#include <opencv2/gapi/cpu/gcpukernel.hpp>
#include <opencv2/gapi/streaming/cap.hpp>
#include <opencv2/highgui.hpp>
const std::string keys =
"{ h help | | Print this help message }"
"{ input | | Path to the input video file }"
"{ output | | Path to the output video file }"
"{ ssm | semantic-segmentation-adas-0001.xml | Path to OpenVINO IE semantic segmentation model (.xml) }";
// 20 colors for 20 classes of semantic-segmentation-adas-0001
const std::vector<cv::Vec3b> colors = {
{ 128, 64, 128 },
{ 232, 35, 244 },
{ 70, 70, 70 },
{ 156, 102, 102 },
{ 153, 153, 190 },
{ 153, 153, 153 },
{ 30, 170, 250 },
{ 0, 220, 220 },
{ 35, 142, 107 },
{ 152, 251, 152 },
{ 180, 130, 70 },
{ 60, 20, 220 },
{ 0, 0, 255 },
{ 142, 0, 0 },
{ 70, 0, 0 },
{ 100, 60, 0 },
{ 90, 0, 0 },
{ 230, 0, 0 },
{ 32, 11, 119 },
{ 0, 74, 111 },
};
namespace {
std::string get_weights_path(const std::string &model_path) {
const auto EXT_LEN = 4u;
const auto sz = model_path.size();
CV_Assert(sz > EXT_LEN);
auto ext = model_path.substr(sz - EXT_LEN);
std::transform(ext.begin(), ext.end(), ext.begin(), [](unsigned char c){
return static_cast<unsigned char>(std::tolower(c));
});
CV_Assert(ext == ".xml");
return model_path.substr(0u, sz - EXT_LEN) + ".bin";
}
} // anonymous namespace
namespace custom {
G_API_OP(PostProcessing, <cv::GMat(cv::GMat, cv::GMat)>, "sample.custom.post_processing") {
static cv::GMatDesc outMeta(const cv::GMatDesc &in, const cv::GMatDesc &) {
return in;
}
};
GAPI_OCV_KERNEL(OCVPostProcessing, PostProcessing) {
static void run(const cv::Mat &in, const cv::Mat &detected_classes, cv::Mat &out) {
// This kernel constructs output image by class table and colors vector
// The semantic-segmentation-adas-0001 output a blob with the shape
// [B, C=1, H=1024, W=2048]
const int outHeight = 1024;
const int outWidth = 2048;
cv::Mat maskImg(outHeight, outWidth, CV_8UC3);
const int* const classes = detected_classes.ptr<int>();
for (int rowId = 0; rowId < outHeight; ++rowId) {
for (int colId = 0; colId < outWidth; ++colId) {
size_t classId = static_cast<size_t>(classes[rowId * outWidth + colId]);
maskImg.at<cv::Vec3b>(rowId, colId) =
classId < colors.size()
? colors[classId]
: cv::Vec3b{0, 0, 0}; // sample detects 20 classes
}
}
cv::resize(maskImg, out, in.size());
const float blending = 0.3f;
out = in * blending + out * (1 - blending);
}
};
} // namespace custom
int main(int argc, char *argv[]) {
cv::CommandLineParser cmd(argc, argv, keys);
if (cmd.has("help")) {
cmd.printMessage();
return 0;
}
// Prepare parameters first
const std::string input = cmd.get<std::string>("input");
const std::string output = cmd.get<std::string>("output");
const auto model_path = cmd.get<std::string>("ssm");
const auto weights_path = get_weights_path(model_path);
const auto device = "CPU";
G_API_NET(SemSegmNet, <cv::GMat(cv::GMat)>, "semantic-segmentation");
const auto net = cv::gapi::ie::Params<SemSegmNet> {
model_path, weights_path, device
};
const auto kernels = cv::gapi::kernels<custom::OCVPostProcessing>();
const auto networks = cv::gapi::networks(net);
// Now build the graph
cv::GMat in;
cv::GMat detected_classes = cv::gapi::infer<SemSegmNet>(in);
cv::GMat out = custom::PostProcessing::on(in, detected_classes);
cv::GStreamingCompiled pipeline = cv::GComputation(cv::GIn(in), cv::GOut(out))
.compileStreaming(cv::compile_args(kernels, networks));
auto inputs = cv::gin(cv::gapi::wip::make_src<cv::gapi::wip::GCaptureSource>(input));
// The execution part
pipeline.setSource(std::move(inputs));
pipeline.start();
cv::VideoWriter writer;
cv::Mat outMat;
while (pipeline.pull(cv::gout(outMat))) {
cv::imshow("Out", outMat);
cv::waitKey(1);
if (!output.empty()) {
if (!writer.isOpened()) {
const auto sz = cv::Size{outMat.cols, outMat.rows};
writer.open(output, cv::VideoWriter::fourcc('M','J','P','G'), 25.0, sz);
CV_Assert(writer.isOpened());
}
writer << outMat;
}
}
return 0;
}

View File

@@ -0,0 +1,19 @@
#include <opencv2/gapi.hpp> // G-API framework header
#include <opencv2/gapi/imgproc.hpp> // cv::gapi::blur()
#include <opencv2/highgui.hpp> // cv::imread/imwrite
int main(int argc, char *argv[]) {
if (argc < 3) return 1;
cv::GMat in; // Express the graph:
cv::GMat out = cv::gapi::blur(in, cv::Size(3,3)); // `out` is a result of `blur` of `in`
cv::Mat in_mat = cv::imread(argv[1]); // Get the real data
cv::Mat out_mat; // Output buffer (may be empty)
cv::GComputation(cv::GIn(in), cv::GOut(out)) // Declare a graph from `in` to `out`
.apply(cv::gin(in_mat), cv::gout(out_mat)); // ...and run it immediately
cv::imwrite(argv[2], out_mat); // Save the result
return 0;
}

View File

@@ -0,0 +1,27 @@
#include <opencv2/videoio.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/gapi.hpp>
#include <opencv2/gapi/core.hpp>
#include <opencv2/gapi/imgproc.hpp>
int main(int argc, char *argv[])
{
(void) argc;
(void) argv;
using namespace cv;
Mat in_mat = imread("lena.png");
Mat gx, gy;
Sobel(in_mat, gx, CV_32F, 1, 0);
Sobel(in_mat, gy, CV_32F, 0, 1);
Mat mag;
sqrt(gx.mul(gx) + gy.mul(gy), mag);
Mat out_mat;
mag.convertTo(out_mat, CV_8U);
imwrite("lena-out.png", out_mat);
return 0;
}

View File

@@ -0,0 +1,28 @@
#include <opencv2/videoio.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/gapi.hpp>
#include <opencv2/gapi/core.hpp>
#include <opencv2/gapi/imgproc.hpp>
int main(int argc, char *argv[])
{
(void) argc;
(void) argv;
using namespace cv;
Mat in_mat = imread("lena.png");
Mat out_mat;
GMat in;
GMat gx = gapi::Sobel(in, CV_32F, 1, 0);
GMat gy = gapi::Sobel(in, CV_32F, 0, 1);
GMat mag = gapi::sqrt( gapi::mul(gx, gx)
+ gapi::mul(gy, gy));
GMat out = gapi::convertTo(mag, CV_8U);
GComputation sobel(GIn(in), GOut(out));
sobel.apply(in_mat, out_mat);
imwrite("lena-out.png", out_mat);
return 0;
}

View File

@@ -0,0 +1,698 @@
#include <algorithm>
#include <cctype>
#include <cmath>
#include <iostream>
#include <limits>
#include <numeric>
#include <stdexcept>
#include <string>
#include <vector>
#include <opencv2/gapi.hpp>
#include <opencv2/gapi/core.hpp>
#include <opencv2/gapi/cpu/gcpukernel.hpp>
#include <opencv2/gapi/infer.hpp>
#include <opencv2/gapi/infer/ie.hpp>
#include <opencv2/gapi/streaming/cap.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/core/utility.hpp>
const std::string about =
"This is an OpenCV-based version of OMZ Text Detection example";
const std::string keys =
"{ h help | | Print this help message }"
"{ input | | Path to the input video file }"
"{ tdm | text-detection-0004.xml | Path to OpenVINO text detection model (.xml), versions 0003 and 0004 work }"
"{ tdd | CPU | Target device for the text detector (e.g. CPU, GPU, VPU, ...) }"
"{ trm | text-recognition-0012.xml | Path to OpenVINO text recognition model (.xml) }"
"{ trd | CPU | Target device for the text recognition (e.g. CPU, GPU, VPU, ...) }"
"{ bw | 0 | CTC beam search decoder bandwidth, if 0, a CTC greedy decoder is used}"
"{ sset | 0123456789abcdefghijklmnopqrstuvwxyz | Symbol set to use with text recognition decoder. Shouldn't contain symbol #. }"
"{ thr | 0.2 | Text recognition confidence threshold}"
;
namespace {
std::string weights_path(const std::string &model_path) {
const auto EXT_LEN = 4u;
const auto sz = model_path.size();
CV_Assert(sz > EXT_LEN);
const auto ext = model_path.substr(sz - EXT_LEN);
CV_Assert(cv::toLowerCase(ext) == ".xml");
return model_path.substr(0u, sz - EXT_LEN) + ".bin";
}
//////////////////////////////////////////////////////////////////////
// Taken from OMZ samples as-is
template<typename Iter>
void softmax_and_choose(Iter begin, Iter end, int *argmax, float *prob) {
auto max_element = std::max_element(begin, end);
*argmax = static_cast<int>(std::distance(begin, max_element));
float max_val = *max_element;
double sum = 0;
for (auto i = begin; i != end; i++) {
sum += std::exp((*i) - max_val);
}
if (std::fabs(sum) < std::numeric_limits<double>::epsilon()) {
throw std::logic_error("sum can't be equal to zero");
}
*prob = 1.0f / static_cast<float>(sum);
}
template<typename Iter>
std::vector<float> softmax(Iter begin, Iter end) {
std::vector<float> prob(end - begin, 0.f);
std::transform(begin, end, prob.begin(), [](float x) { return std::exp(x); });
float sum = std::accumulate(prob.begin(), prob.end(), 0.0f);
for (int i = 0; i < static_cast<int>(prob.size()); i++)
prob[i] /= sum;
return prob;
}
struct BeamElement {
std::vector<int> sentence; //!< The sequence of chars that will be a result of the beam element
float prob_blank; //!< The probability that the last char in CTC sequence
//!< for the beam element is the special blank char
float prob_not_blank; //!< The probability that the last char in CTC sequence
//!< for the beam element is NOT the special blank char
float prob() const { //!< The probability of the beam element.
return prob_blank + prob_not_blank;
}
};
std::string CTCGreedyDecoder(const float *data,
const std::size_t sz,
const std::string &alphabet,
const char pad_symbol,
double *conf) {
std::string res = "";
bool prev_pad = false;
*conf = 1;
const auto num_classes = alphabet.length();
for (auto it = data; it != (data+sz); it += num_classes) {
int argmax = 0;
float prob = 0.f;
softmax_and_choose(it, it + num_classes, &argmax, &prob);
(*conf) *= prob;
auto symbol = alphabet[argmax];
if (symbol != pad_symbol) {
if (res.empty() || prev_pad || (!res.empty() && symbol != res.back())) {
prev_pad = false;
res += symbol;
}
} else {
prev_pad = true;
}
}
return res;
}
std::string CTCBeamSearchDecoder(const float *data,
const std::size_t sz,
const std::string &alphabet,
double *conf,
int bandwidth) {
const auto num_classes = alphabet.length();
std::vector<BeamElement> curr;
std::vector<BeamElement> last;
last.push_back(BeamElement{std::vector<int>(), 1.f, 0.f});
for (auto it = data; it != (data+sz); it += num_classes) {
curr.clear();
std::vector<float> prob = softmax(it, it + num_classes);
for(const auto& candidate: last) {
float prob_not_blank = 0.f;
const std::vector<int>& candidate_sentence = candidate.sentence;
if (!candidate_sentence.empty()) {
int n = candidate_sentence.back();
prob_not_blank = candidate.prob_not_blank * prob[n];
}
float prob_blank = candidate.prob() * prob[num_classes - 1];
auto check_res = std::find_if(curr.begin(),
curr.end(),
[&candidate_sentence](const BeamElement& n) {
return n.sentence == candidate_sentence;
});
if (check_res == std::end(curr)) {
curr.push_back(BeamElement{candidate.sentence, prob_blank, prob_not_blank});
} else {
check_res->prob_not_blank += prob_not_blank;
if (check_res->prob_blank != 0.f) {
throw std::logic_error("Probability that the last char in CTC-sequence "
"is the special blank char must be zero here");
}
check_res->prob_blank = prob_blank;
}
for (int i = 0; i < static_cast<int>(num_classes) - 1; i++) {
auto extend = candidate_sentence;
extend.push_back(i);
if (candidate_sentence.size() > 0 && candidate.sentence.back() == i) {
prob_not_blank = prob[i] * candidate.prob_blank;
} else {
prob_not_blank = prob[i] * candidate.prob();
}
auto check_res2 = std::find_if(curr.begin(),
curr.end(),
[&extend](const BeamElement &n) {
return n.sentence == extend;
});
if (check_res2 == std::end(curr)) {
curr.push_back(BeamElement{extend, 0.f, prob_not_blank});
} else {
check_res2->prob_not_blank += prob_not_blank;
}
}
}
sort(curr.begin(), curr.end(), [](const BeamElement &a, const BeamElement &b) -> bool {
return a.prob() > b.prob();
});
last.clear();
int num_to_copy = std::min(bandwidth, static_cast<int>(curr.size()));
for (int b = 0; b < num_to_copy; b++) {
last.push_back(curr[b]);
}
}
*conf = last[0].prob();
std::string res="";
for (const auto& idx: last[0].sentence) {
res += alphabet[idx];
}
return res;
}
//////////////////////////////////////////////////////////////////////
} // anonymous namespace
namespace custom {
namespace {
//////////////////////////////////////////////////////////////////////
// Define networks for this sample
using GMat2 = std::tuple<cv::GMat, cv::GMat>;
G_API_NET(TextDetection,
<GMat2(cv::GMat)>,
"sample.custom.text_detect");
G_API_NET(TextRecognition,
<cv::GMat(cv::GMat)>,
"sample.custom.text_recogn");
// Define custom operations
using GSize = cv::GOpaque<cv::Size>;
using GRRects = cv::GArray<cv::RotatedRect>;
G_API_OP(PostProcess,
<GRRects(cv::GMat,cv::GMat,GSize,float,float)>,
"sample.custom.text.post_proc") {
static cv::GArrayDesc outMeta(const cv::GMatDesc &,
const cv::GMatDesc &,
const cv::GOpaqueDesc &,
float,
float) {
return cv::empty_array_desc();
}
};
using GMats = cv::GArray<cv::GMat>;
G_API_OP(CropLabels,
<GMats(cv::GMat,GRRects,GSize)>,
"sample.custom.text.crop") {
static cv::GArrayDesc outMeta(const cv::GMatDesc &,
const cv::GArrayDesc &,
const cv::GOpaqueDesc &) {
return cv::empty_array_desc();
}
};
//////////////////////////////////////////////////////////////////////
// Implement custom operations
GAPI_OCV_KERNEL(OCVPostProcess, PostProcess) {
static void run(const cv::Mat &link,
const cv::Mat &segm,
const cv::Size &img_size,
const float link_threshold,
const float segm_threshold,
std::vector<cv::RotatedRect> &out) {
// NOTE: Taken from the OMZ text detection sample almost as-is
const int kMinArea = 300;
const int kMinHeight = 10;
const float *link_data_pointer = link.ptr<float>();
std::vector<float> link_data(link_data_pointer, link_data_pointer + link.total());
link_data = transpose4d(link_data, dimsToShape(link.size), {0, 2, 3, 1});
softmax(link_data);
link_data = sliceAndGetSecondChannel(link_data);
std::vector<int> new_link_data_shape = {
link.size[0],
link.size[2],
link.size[3],
link.size[1]/2,
};
const float *cls_data_pointer = segm.ptr<float>();
std::vector<float> cls_data(cls_data_pointer, cls_data_pointer + segm.total());
cls_data = transpose4d(cls_data, dimsToShape(segm.size), {0, 2, 3, 1});
softmax(cls_data);
cls_data = sliceAndGetSecondChannel(cls_data);
std::vector<int> new_cls_data_shape = {
segm.size[0],
segm.size[2],
segm.size[3],
segm.size[1]/2,
};
out = maskToBoxes(decodeImageByJoin(cls_data, new_cls_data_shape,
link_data, new_link_data_shape,
segm_threshold, link_threshold),
static_cast<float>(kMinArea),
static_cast<float>(kMinHeight),
img_size);
}
static std::vector<std::size_t> dimsToShape(const cv::MatSize &sz) {
const int n_dims = sz.dims();
std::vector<std::size_t> result;
result.reserve(n_dims);
// cv::MatSize is not iterable...
for (int i = 0; i < n_dims; i++) {
result.emplace_back(static_cast<std::size_t>(sz[i]));
}
return result;
}
static void softmax(std::vector<float> &rdata) {
// NOTE: Taken from the OMZ text detection sample almost as-is
const size_t last_dim = 2;
for (size_t i = 0 ; i < rdata.size(); i+=last_dim) {
float m = std::max(rdata[i], rdata[i+1]);
rdata[i] = std::exp(rdata[i] - m);
rdata[i + 1] = std::exp(rdata[i + 1] - m);
float s = rdata[i] + rdata[i + 1];
rdata[i] /= s;
rdata[i + 1] /= s;
}
}
static std::vector<float> transpose4d(const std::vector<float> &data,
const std::vector<size_t> &shape,
const std::vector<size_t> &axes) {
// NOTE: Taken from the OMZ text detection sample almost as-is
if (shape.size() != axes.size())
throw std::runtime_error("Shape and axes must have the same dimension.");
for (size_t a : axes) {
if (a >= shape.size())
throw std::runtime_error("Axis must be less than dimension of shape.");
}
size_t total_size = shape[0]*shape[1]*shape[2]*shape[3];
std::vector<size_t> steps {
shape[axes[1]]*shape[axes[2]]*shape[axes[3]],
shape[axes[2]]*shape[axes[3]],
shape[axes[3]],
1
};
size_t source_data_idx = 0;
std::vector<float> new_data(total_size, 0);
std::vector<size_t> ids(shape.size());
for (ids[0] = 0; ids[0] < shape[0]; ids[0]++) {
for (ids[1] = 0; ids[1] < shape[1]; ids[1]++) {
for (ids[2] = 0; ids[2] < shape[2]; ids[2]++) {
for (ids[3]= 0; ids[3] < shape[3]; ids[3]++) {
size_t new_data_idx = ids[axes[0]]*steps[0] + ids[axes[1]]*steps[1] +
ids[axes[2]]*steps[2] + ids[axes[3]]*steps[3];
new_data[new_data_idx] = data[source_data_idx++];
}
}
}
}
return new_data;
}
static std::vector<float> sliceAndGetSecondChannel(const std::vector<float> &data) {
// NOTE: Taken from the OMZ text detection sample almost as-is
std::vector<float> new_data(data.size() / 2, 0);
for (size_t i = 0; i < data.size() / 2; i++) {
new_data[i] = data[2 * i + 1];
}
return new_data;
}
static void join(const int p1,
const int p2,
std::unordered_map<int, int> &group_mask) {
// NOTE: Taken from the OMZ text detection sample almost as-is
const int root1 = findRoot(p1, group_mask);
const int root2 = findRoot(p2, group_mask);
if (root1 != root2) {
group_mask[root1] = root2;
}
}
static cv::Mat decodeImageByJoin(const std::vector<float> &cls_data,
const std::vector<int> &cls_data_shape,
const std::vector<float> &link_data,
const std::vector<int> &link_data_shape,
float cls_conf_threshold,
float link_conf_threshold) {
// NOTE: Taken from the OMZ text detection sample almost as-is
const int h = cls_data_shape[1];
const int w = cls_data_shape[2];
std::vector<uchar> pixel_mask(h * w, 0);
std::unordered_map<int, int> group_mask;
std::vector<cv::Point> points;
for (int i = 0; i < static_cast<int>(pixel_mask.size()); i++) {
pixel_mask[i] = cls_data[i] >= cls_conf_threshold;
if (pixel_mask[i]) {
points.emplace_back(i % w, i / w);
group_mask[i] = -1;
}
}
std::vector<uchar> link_mask(link_data.size(), 0);
for (size_t i = 0; i < link_mask.size(); i++) {
link_mask[i] = link_data[i] >= link_conf_threshold;
}
size_t neighbours = size_t(link_data_shape[3]);
for (const auto &point : points) {
size_t neighbour = 0;
for (int ny = point.y - 1; ny <= point.y + 1; ny++) {
for (int nx = point.x - 1; nx <= point.x + 1; nx++) {
if (nx == point.x && ny == point.y)
continue;
if (nx >= 0 && nx < w && ny >= 0 && ny < h) {
uchar pixel_value = pixel_mask[size_t(ny) * size_t(w) + size_t(nx)];
uchar link_value = link_mask[(size_t(point.y) * size_t(w) + size_t(point.x))
*neighbours + neighbour];
if (pixel_value && link_value) {
join(point.x + point.y * w, nx + ny * w, group_mask);
}
}
neighbour++;
}
}
}
return get_all(points, w, h, group_mask);
}
static cv::Mat get_all(const std::vector<cv::Point> &points,
const int w,
const int h,
std::unordered_map<int, int> &group_mask) {
// NOTE: Taken from the OMZ text detection sample almost as-is
std::unordered_map<int, int> root_map;
cv::Mat mask(h, w, CV_32S, cv::Scalar(0));
for (const auto &point : points) {
int point_root = findRoot(point.x + point.y * w, group_mask);
if (root_map.find(point_root) == root_map.end()) {
root_map.emplace(point_root, static_cast<int>(root_map.size() + 1));
}
mask.at<int>(point.x + point.y * w) = root_map[point_root];
}
return mask;
}
static int findRoot(const int point,
std::unordered_map<int, int> &group_mask) {
// NOTE: Taken from the OMZ text detection sample almost as-is
int root = point;
bool update_parent = false;
while (group_mask.at(root) != -1) {
root = group_mask.at(root);
update_parent = true;
}
if (update_parent) {
group_mask[point] = root;
}
return root;
}
static std::vector<cv::RotatedRect> maskToBoxes(const cv::Mat &mask,
const float min_area,
const float min_height,
const cv::Size &image_size) {
// NOTE: Taken from the OMZ text detection sample almost as-is
std::vector<cv::RotatedRect> bboxes;
double min_val = 0.;
double max_val = 0.;
cv::minMaxLoc(mask, &min_val, &max_val);
int max_bbox_idx = static_cast<int>(max_val);
cv::Mat resized_mask;
cv::resize(mask, resized_mask, image_size, 0, 0, cv::INTER_NEAREST);
for (int i = 1; i <= max_bbox_idx; i++) {
cv::Mat bbox_mask = resized_mask == i;
std::vector<std::vector<cv::Point>> contours;
cv::findContours(bbox_mask, contours, cv::RETR_CCOMP, cv::CHAIN_APPROX_SIMPLE);
if (contours.empty())
continue;
cv::RotatedRect r = cv::minAreaRect(contours[0]);
if (std::min(r.size.width, r.size.height) < min_height)
continue;
if (r.size.area() < min_area)
continue;
bboxes.emplace_back(r);
}
return bboxes;
}
}; // GAPI_OCV_KERNEL(PostProcess)
GAPI_OCV_KERNEL(OCVCropLabels, CropLabels) {
static void run(const cv::Mat &image,
const std::vector<cv::RotatedRect> &detections,
const cv::Size &outSize,
std::vector<cv::Mat> &out) {
out.clear();
out.reserve(detections.size());
cv::Mat crop(outSize, CV_8UC3, cv::Scalar(0));
cv::Mat gray(outSize, CV_8UC1, cv::Scalar(0));
std::vector<int> blob_shape = {1,1,outSize.height,outSize.width};
for (auto &&rr : detections) {
std::vector<cv::Point2f> points(4);
rr.points(points.data());
const auto top_left_point_idx = topLeftPointIdx(points);
cv::Point2f point0 = points[static_cast<size_t>(top_left_point_idx)];
cv::Point2f point1 = points[(top_left_point_idx + 1) % 4];
cv::Point2f point2 = points[(top_left_point_idx + 2) % 4];
std::vector<cv::Point2f> from{point0, point1, point2};
std::vector<cv::Point2f> to{
cv::Point2f(0.0f, 0.0f),
cv::Point2f(static_cast<float>(outSize.width-1), 0.0f),
cv::Point2f(static_cast<float>(outSize.width-1),
static_cast<float>(outSize.height-1))
};
cv::Mat M = cv::getAffineTransform(from, to);
cv::warpAffine(image, crop, M, outSize);
cv::cvtColor(crop, gray, cv::COLOR_BGR2GRAY);
cv::Mat blob;
gray.convertTo(blob, CV_32F);
out.push_back(blob.reshape(1, blob_shape)); // pass as 1,1,H,W instead of H,W
}
}
static int topLeftPointIdx(const std::vector<cv::Point2f> &points) {
// NOTE: Taken from the OMZ text detection sample almost as-is
cv::Point2f most_left(std::numeric_limits<float>::max(),
std::numeric_limits<float>::max());
cv::Point2f almost_most_left(std::numeric_limits<float>::max(),
std::numeric_limits<float>::max());
int most_left_idx = -1;
int almost_most_left_idx = -1;
for (size_t i = 0; i < points.size() ; i++) {
if (most_left.x > points[i].x) {
if (most_left.x < std::numeric_limits<float>::max()) {
almost_most_left = most_left;
almost_most_left_idx = most_left_idx;
}
most_left = points[i];
most_left_idx = static_cast<int>(i);
}
if (almost_most_left.x > points[i].x && points[i] != most_left) {
almost_most_left = points[i];
almost_most_left_idx = static_cast<int>(i);
}
}
if (almost_most_left.y < most_left.y) {
most_left = almost_most_left;
most_left_idx = almost_most_left_idx;
}
return most_left_idx;
}
}; // GAPI_OCV_KERNEL(CropLabels)
} // anonymous namespace
} // namespace custom
namespace vis {
namespace {
void drawRotatedRect(cv::Mat &m, const cv::RotatedRect &rc) {
std::vector<cv::Point2f> tmp_points(5);
rc.points(tmp_points.data());
tmp_points[4] = tmp_points[0];
auto prev = tmp_points.begin(), it = prev+1;
for (; it != tmp_points.end(); ++it) {
cv::line(m, *prev, *it, cv::Scalar(50, 205, 50), 2);
prev = it;
}
}
void drawText(cv::Mat &m, const cv::RotatedRect &rc, const std::string &str) {
const int fface = cv::FONT_HERSHEY_SIMPLEX;
const double scale = 0.7;
const int thick = 1;
int base = 0;
const auto text_size = cv::getTextSize(str, fface, scale, thick, &base);
std::vector<cv::Point2f> tmp_points(4);
rc.points(tmp_points.data());
const auto tl_point_idx = custom::OCVCropLabels::topLeftPointIdx(tmp_points);
cv::Point text_pos = tmp_points[tl_point_idx];
text_pos.x = std::max(0, text_pos.x);
text_pos.y = std::max(text_size.height, text_pos.y);
cv::rectangle(m,
text_pos + cv::Point{0, base},
text_pos + cv::Point{text_size.width, -text_size.height},
CV_RGB(50, 205, 50),
cv::FILLED);
const auto white = CV_RGB(255, 255, 255);
cv::putText(m, str, text_pos, fface, scale, white, thick, 8);
}
} // anonymous namespace
} // namespace vis
int main(int argc, char *argv[])
{
cv::CommandLineParser cmd(argc, argv, keys);
cmd.about(about);
if (cmd.has("help")) {
cmd.printMessage();
return 0;
}
const auto input_file_name = cmd.get<std::string>("input");
const auto tdet_model_path = cmd.get<std::string>("tdm");
const auto trec_model_path = cmd.get<std::string>("trm");
const auto tdet_target_dev = cmd.get<std::string>("tdd");
const auto trec_target_dev = cmd.get<std::string>("trd");
const auto ctc_beam_dec_bw = cmd.get<int>("bw");
const auto dec_conf_thresh = cmd.get<double>("thr");
const auto pad_symbol = '#';
const auto symbol_set = cmd.get<std::string>("sset") + pad_symbol;
cv::GMat in;
cv::GOpaque<cv::Size> in_rec_sz;
cv::GMat link, segm;
std::tie(link, segm) = cv::gapi::infer<custom::TextDetection>(in);
cv::GOpaque<cv::Size> size = cv::gapi::streaming::size(in);
cv::GArray<cv::RotatedRect> rrs = custom::PostProcess::on(link, segm, size, 0.8f, 0.8f);
cv::GArray<cv::GMat> labels = custom::CropLabels::on(in, rrs, in_rec_sz);
cv::GArray<cv::GMat> text = cv::gapi::infer2<custom::TextRecognition>(in, labels);
cv::GComputation graph(cv::GIn(in, in_rec_sz),
cv::GOut(cv::gapi::copy(in), rrs, text));
// Text detection network
auto tdet_net = cv::gapi::ie::Params<custom::TextDetection> {
tdet_model_path, // path to topology IR
weights_path(tdet_model_path), // path to weights
tdet_target_dev, // device specifier
}.cfgOutputLayers({"model/link_logits_/add", "model/segm_logits/add"});
auto trec_net = cv::gapi::ie::Params<custom::TextRecognition> {
trec_model_path, // path to topology IR
weights_path(trec_model_path), // path to weights
trec_target_dev, // device specifier
};
auto networks = cv::gapi::networks(tdet_net, trec_net);
auto kernels = cv::gapi::kernels< custom::OCVPostProcess
, custom::OCVCropLabels
>();
auto pipeline = graph.compileStreaming(cv::compile_args(kernels, networks));
std::cout << "Reading " << input_file_name << std::endl;
// Input stream
auto in_src = cv::gapi::wip::make_src<cv::gapi::wip::GCaptureSource>(input_file_name);
// Text recognition input size (also an input parameter to the graph)
auto in_rsz = cv::Size{ 120, 32 };
// Set the pipeline source & start the pipeline
pipeline.setSource(cv::gin(in_src, in_rsz));
pipeline.start();
// Declare the output data & run the processing loop
cv::TickMeter tm;
cv::Mat image;
std::vector<cv::RotatedRect> out_rcs;
std::vector<cv::Mat> out_text;
tm.start();
int frames = 0;
while (pipeline.pull(cv::gout(image, out_rcs, out_text))) {
frames++;
CV_Assert(out_rcs.size() == out_text.size());
const auto num_labels = out_rcs.size();
std::vector<cv::Point2f> tmp_points(4);
for (std::size_t l = 0; l < num_labels; l++) {
// Decode the recognized text in the rectangle
const auto &blob = out_text[l];
const float *data = blob.ptr<float>();
const auto sz = blob.total();
double conf = 1.0;
const std::string res = ctc_beam_dec_bw == 0
? CTCGreedyDecoder(data, sz, symbol_set, pad_symbol, &conf)
: CTCBeamSearchDecoder(data, sz, symbol_set, &conf, ctc_beam_dec_bw);
// Draw a bounding box for this rotated rectangle
const auto &rc = out_rcs[l];
vis::drawRotatedRect(image, rc);
// Draw text, if decoded
if (conf >= dec_conf_thresh) {
vis::drawText(image, rc, res);
}
}
tm.stop();
cv::imshow("Out", image);
cv::waitKey(1);
tm.start();
}
tm.stop();
std::cout << "Processed " << frames << " frames"
<< " (" << frames / tm.getTimeSec() << " FPS)" << std::endl;
return 0;
}