Skip to content

Commit b083c20

Browse files
author
AsyaPronina
committed
Enable stateful kernels in G-API OCV Backend
1 parent c3e8a82 commit b083c20

15 files changed

Lines changed: 527 additions & 44 deletions

modules/gapi/include/opencv2/gapi/cpu/gcpukernel.hpp

Lines changed: 107 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include <opencv2/gapi/gcommon.hpp>
1818
#include <opencv2/gapi/gkernel.hpp>
1919
#include <opencv2/gapi/garg.hpp>
20+
#include <opencv2/gapi/gmetaarg.hpp>
2021
#include <opencv2/gapi/util/compiler_hints.hpp> //suppress_unused_warning
2122
#include <opencv2/gapi/util/util.hpp>
2223

@@ -109,11 +110,17 @@ class GAPI_EXPORTS GCPUContext
109110
return outOpaqueRef(output).wref<T>();
110111
}
111112

113+
GArg state()
114+
{
115+
return m_state;
116+
}
117+
112118
protected:
113119
detail::VectorRef& outVecRef(int output);
114120
detail::OpaqueRef& outOpaqueRef(int output);
115121

116122
std::vector<GArg> m_args;
123+
GArg m_state;
117124

118125
//FIXME: avoid conversion of arguments from internal representation to OpenCV one on each call
119126
//to OCV kernel. (This can be achieved by a two single time conversions in GCPUExecutable::run,
@@ -127,16 +134,18 @@ class GAPI_EXPORTS GCPUContext
127134
class GAPI_EXPORTS GCPUKernel
128135
{
129136
public:
130-
// This function is kernel's execution entry point (does the processing work)
131-
using F = std::function<void(GCPUContext &)>;
137+
// This function is a kernel's execution entry point (does the processing work)
138+
using RunF = std::function<void(GCPUContext &)>;
139+
// This function is a stateful kernel's setup routine (configures state)
140+
using SetupF = std::function<void(const GMetaArgs &, const GArgs &, GArg &)>;
132141

133142
GCPUKernel();
134-
explicit GCPUKernel(const F& f);
143+
GCPUKernel(const RunF& runF, const SetupF& setupF = nullptr);
135144

136-
void apply(GCPUContext &ctx);
145+
RunF m_runF = nullptr;
146+
SetupF m_setupF = nullptr;
137147

138-
protected:
139-
F m_f;
148+
bool m_isStateful = false;
140149
};
141150

142151
// FIXME: This is an ugly ad-hoc implementation. TODO: refactor
@@ -269,12 +278,38 @@ template<typename U> struct get_out<cv::GOpaque<U>>
269278
}
270279
};
271280

281+
template<typename, typename>
282+
struct OCVSetupHelper;
283+
284+
template<typename Impl, typename... Ins>
285+
struct OCVSetupHelper<Impl, std::tuple<Ins...>>
286+
{
287+
template<int... IIs>
288+
static void setup_impl(const GMetaArgs &metaArgs, const GArgs &args, GArg &state,
289+
detail::Seq<IIs...>)
290+
{
291+
// TODO: unique_ptr <-> shared_ptr conversion ?
292+
// To check: Conversion is possible only if the state which should be passed to
293+
// 'setup' user callback isn't required to have previous value
294+
std::shared_ptr<typename Impl::State> stPtr;
295+
Impl::setup(detail::get_in_meta<Ins>(metaArgs, args, IIs)..., stPtr);
296+
state = GArg(stPtr);
297+
}
298+
299+
static void setup(const GMetaArgs &metaArgs, const GArgs &args, GArg& state)
300+
{
301+
setup_impl(metaArgs, args, state,
302+
typename detail::MkSeq<sizeof...(Ins)>::type());
303+
}
304+
};
305+
306+
// OCVCallHelper is a helper class to call stateless OCV kernels and OCV kernel functors.
272307
template<typename, typename, typename>
273308
struct OCVCallHelper;
274309

275310
// FIXME: probably can be simplified with std::apply or analogue.
276311
template<typename Impl, typename... Ins, typename... Outs>
277-
struct OCVCallHelper<Impl, std::tuple<Ins...>, std::tuple<Outs...> >
312+
struct OCVCallHelper<Impl, std::tuple<Ins...>, std::tuple<Outs...>>
278313
{
279314
template<typename... Inputs>
280315
struct call_and_postprocess
@@ -302,19 +337,16 @@ struct OCVCallHelper<Impl, std::tuple<Ins...>, std::tuple<Outs...> >
302337
//by comparing it's state (data ptr) before and after the call.
303338
//This is done by converting each output Mat into tracked_cv_mat object, and binding
304339
//them to parameters of ad-hoc function
305-
//Convert own::Scalar to cv::Scalar before call kernel and run kernel
306-
//convert cv::Scalar to own::Scalar after call kernel and write back results
307340
call_and_postprocess<decltype(get_in<Ins>::get(ctx, IIs))...>
308-
::call(get_in<Ins>::get(ctx, IIs)...,
309-
get_out<Outs>::get(ctx, OIs)...);
341+
::call(get_in<Ins>::get(ctx, IIs)..., get_out<Outs>::get(ctx, OIs)...);
310342
}
311343

312344
template<int... IIs, int... OIs>
313-
static void call_impl(cv::GCPUContext &ctx, Impl& impl, detail::Seq<IIs...>, detail::Seq<OIs...>)
345+
static void call_impl(cv::GCPUContext &ctx, Impl& impl,
346+
detail::Seq<IIs...>, detail::Seq<OIs...>)
314347
{
315-
call_and_postprocess<decltype(cv::detail::get_in<Ins>::get(ctx, IIs))...>
316-
::call(impl, cv::detail::get_in<Ins>::get(ctx, IIs)...,
317-
cv::detail::get_out<Outs>::get(ctx, OIs)...);
348+
call_and_postprocess<decltype(get_in<Ins>::get(ctx, IIs))...>
349+
::call(impl, get_in<Ins>::get(ctx, IIs)..., get_out<Outs>::get(ctx, OIs)...);
318350
}
319351

320352
static void call(GCPUContext &ctx)
@@ -335,23 +367,78 @@ struct OCVCallHelper<Impl, std::tuple<Ins...>, std::tuple<Outs...> >
335367
}
336368
};
337369

370+
// OCVStCallHelper is a helper class to call stateful OCV kernels.
371+
template<typename, typename, typename>
372+
struct OCVStCallHelper;
373+
374+
template<typename Impl, typename... Ins, typename... Outs>
375+
struct OCVStCallHelper<Impl, std::tuple<Ins...>, std::tuple<Outs...>> :
376+
OCVCallHelper<Impl, std::tuple<Ins...>, std::tuple<Outs...>>
377+
{
378+
template<typename... Inputs>
379+
struct call_and_postprocess
380+
{
381+
template<typename... Outputs>
382+
static void call(typename Impl::State& st, Inputs&&... ins, Outputs&&... outs)
383+
{
384+
Impl::run(std::forward<Inputs>(ins)..., outs..., st);
385+
postprocess(outs...);
386+
}
387+
};
388+
389+
template<int... IIs, int... OIs>
390+
static void call_impl(GCPUContext &ctx, detail::Seq<IIs...>, detail::Seq<OIs...>)
391+
{
392+
auto& st = *ctx.state().get<std::shared_ptr<typename Impl::State>>();
393+
call_and_postprocess<decltype(get_in<Ins>::get(ctx, IIs))...>
394+
::call(st, get_in<Ins>::get(ctx, IIs)..., get_out<Outs>::get(ctx, OIs)...);
395+
}
396+
397+
static void call(GCPUContext &ctx)
398+
{
399+
call_impl(ctx,
400+
typename detail::MkSeq<sizeof...(Ins)>::type(),
401+
typename detail::MkSeq<sizeof...(Outs)>::type());
402+
}
403+
};
404+
338405
} // namespace detail
339406

340407
template<class Impl, class K>
341-
class GCPUKernelImpl: public cv::detail::OCVCallHelper<Impl, typename K::InArgs, typename K::OutArgs>,
342-
public cv::detail::KernelTag
408+
class GCPUKernelImpl: public cv::detail::KernelTag
409+
{
410+
using CallHelper = detail::OCVCallHelper<Impl, typename K::InArgs, typename K::OutArgs>;
411+
412+
public:
413+
using API = K;
414+
415+
static cv::gapi::GBackend backend() { return cv::gapi::cpu::backend(); }
416+
static cv::GCPUKernel kernel() { return GCPUKernel(&CallHelper::call); }
417+
};
418+
419+
template<class Impl, class K, class S>
420+
class GCPUStKernelImpl: public cv::detail::KernelTag
343421
{
344-
using P = detail::OCVCallHelper<Impl, typename K::InArgs, typename K::OutArgs>;
422+
using StSetupHelper = detail::OCVSetupHelper<Impl, typename K::InArgs>;
423+
using StCallHelper = detail::OCVStCallHelper<Impl, typename K::InArgs, typename K::OutArgs>;
345424

346425
public:
347426
using API = K;
427+
using State = S;
348428

349-
static cv::gapi::GBackend backend() { return cv::gapi::cpu::backend(); }
350-
static cv::GCPUKernel kernel() { return GCPUKernel(&P::call); }
429+
static cv::gapi::GBackend backend() { return cv::gapi::cpu::backend(); }
430+
static cv::GCPUKernel kernel() { return GCPUKernel(&StCallHelper::call,
431+
&StSetupHelper::setup); }
351432
};
352433

353434
#define GAPI_OCV_KERNEL(Name, API) struct Name: public cv::GCPUKernelImpl<Name, API>
354435

436+
// TODO: Reuse Anatoliy's logic for support of types with commas in macro.
437+
// Retrieve the common part from Anatoliy's logic to the separate place.
438+
#define GAPI_OCV_KERNEL_ST(Name, API, State) \
439+
struct Name:public cv::GCPUStKernelImpl<Name, API, State> \
440+
441+
355442
class gapi::cpu::GOCVFunctor : public gapi::GFunctor
356443
{
357444
public:

modules/gapi/include/opencv2/gapi/gcompiled.hpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,19 @@ class GAPI_EXPORTS GCompiled
208208
// FIXME: Why it requires compile args?
209209
void reshape(const GMetaArgs& inMetas, const GCompileArgs& args);
210210

211+
/**
212+
* @brief Prepare inner kernels states for a new video-stream.
213+
*
214+
* GCompiled objects may be used to process video streams frame by frame.
215+
* In this case, a GCompiled is called on every image frame individually.
216+
* Starting OpenCV 4.4, some kernels in the graph may have their internal
217+
* states (see GAPI_OCV_KERNEL_ST for the OpenCV backend).
218+
* In this case, if user starts processing another video stream with
219+
* this GCompiled, this method needs to be called to let kernels re-initialize
220+
* their internal states to a new video stream.
221+
*/
222+
void prepareForNewStream();
223+
211224
protected:
212225
/// @private
213226
std::shared_ptr<Priv> m_priv;

modules/gapi/src/backends/cpu/gcpubackend.cpp

Lines changed: 57 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -33,13 +33,13 @@
3333
//
3434
// If not, we need to introduce that!
3535
using GCPUModel = ade::TypedGraph
36-
< cv::gimpl::Unit
36+
< cv::gimpl::CPUUnit
3737
, cv::gimpl::Protocol
3838
>;
3939

4040
// FIXME: Same issue with Typed and ConstTyped
4141
using GConstGCPUModel = ade::ConstTypedGraph
42-
< cv::gimpl::Unit
42+
< cv::gimpl::CPUUnit
4343
, cv::gimpl::Protocol
4444
>;
4545

@@ -53,7 +53,7 @@ namespace
5353
{
5454
GCPUModel gm(graph);
5555
auto cpu_impl = cv::util::any_cast<cv::GCPUKernel>(impl.opaque);
56-
gm.metadata(op_node).set(cv::gimpl::Unit{cpu_impl});
56+
gm.metadata(op_node).set(cv::gimpl::CPUUnit{cpu_impl});
5757
}
5858

5959
virtual EPtr compile(const ade::Graph &graph,
@@ -78,11 +78,23 @@ cv::gimpl::GCPUExecutable::GCPUExecutable(const ade::Graph &g,
7878
{
7979
// Convert list of operations (which is topologically sorted already)
8080
// into an execution script.
81+
GConstGCPUModel gcm(m_g);
8182
for (auto &nh : nodes)
8283
{
8384
switch (m_gm.metadata(nh).get<NodeType>().t)
8485
{
85-
case NodeType::OP: m_script.push_back({nh, GModel::collectOutputMeta(m_gm, nh)}); break;
86+
case NodeType::OP:
87+
{
88+
m_script.push_back({nh, GModel::collectOutputMeta(m_gm, nh)});
89+
90+
// If kernel is stateful then prepare storage for its state.
91+
GCPUKernel k = gcm.metadata(nh).get<CPUUnit>().k;
92+
if (k.m_isStateful)
93+
{
94+
m_nodesToStates[nh] = GArg{ };
95+
}
96+
break;
97+
}
8698
case NodeType::DATA:
8799
{
88100
m_dataNodes.push_back(nh);
@@ -104,6 +116,9 @@ cv::gimpl::GCPUExecutable::GCPUExecutable(const ade::Graph &g,
104116
default: util::throw_error(std::logic_error("Unsupported NodeType type"));
105117
}
106118
}
119+
120+
// For each stateful kernel call 'setup' user callback to initialize state.
121+
setupKernelStates();
107122
}
108123

109124
// FIXME: Document what it does
@@ -140,6 +155,26 @@ cv::GArg cv::gimpl::GCPUExecutable::packArg(const GArg &arg)
140155
}
141156
}
142157

158+
void cv::gimpl::GCPUExecutable::setupKernelStates()
159+
{
160+
GConstGCPUModel gcm(m_g);
161+
for (auto& nodeToState : m_nodesToStates)
162+
{
163+
auto& kernelNode = nodeToState.first;
164+
auto& kernelState = nodeToState.second;
165+
166+
const GCPUKernel& kernel = gcm.metadata(kernelNode).get<CPUUnit>().k;
167+
kernel.m_setupF(GModel::collectInputMeta(m_gm, kernelNode),
168+
m_gm.metadata(kernelNode).get<Op>().args,
169+
kernelState);
170+
}
171+
}
172+
173+
void cv::gimpl::GCPUExecutable::handleNewStream()
174+
{
175+
m_newStreamStarted = true;
176+
}
177+
143178
void cv::gimpl::GCPUExecutable::run(std::vector<InObj> &&input_objs,
144179
std::vector<OutObj> &&output_objs)
145180
{
@@ -167,6 +202,14 @@ void cv::gimpl::GCPUExecutable::run(std::vector<InObj> &&input_objs,
167202
}
168203
}
169204

205+
// In case if new video-stream happens - for each stateful kernel
206+
// call 'setup' user callback to re-initialize state.
207+
if (m_newStreamStarted)
208+
{
209+
setupKernelStates();
210+
m_newStreamStarted = false;
211+
}
212+
170213
// OpenCV backend execution is not a rocket science at all.
171214
// Simply invoke our kernels in the proper order.
172215
GConstGCPUModel gcm(m_g);
@@ -176,7 +219,7 @@ void cv::gimpl::GCPUExecutable::run(std::vector<InObj> &&input_objs,
176219

177220
// Obtain our real execution unit
178221
// TODO: Should kernels be copyable?
179-
GCPUKernel k = gcm.metadata(op_info.nh).get<Unit>().k;
222+
GCPUKernel k = gcm.metadata(op_info.nh).get<CPUUnit>().k;
180223

181224
// Initialize kernel's execution context:
182225
// - Input parameters
@@ -185,8 +228,8 @@ void cv::gimpl::GCPUExecutable::run(std::vector<InObj> &&input_objs,
185228

186229
using namespace std::placeholders;
187230
ade::util::transform(op.args,
188-
std::back_inserter(context.m_args),
189-
std::bind(&GCPUExecutable::packArg, this, _1));
231+
std::back_inserter(context.m_args),
232+
std::bind(&GCPUExecutable::packArg, this, _1));
190233

191234
// - Output parameters.
192235
// FIXME: pre-allocate internal Mats, etc, according to the known meta
@@ -198,8 +241,14 @@ void cv::gimpl::GCPUExecutable::run(std::vector<InObj> &&input_objs,
198241
context.m_results[out_port] = magazine::getObjPtr(m_res, out_desc);
199242
}
200243

244+
// For stateful kernel add state to its execution context
245+
if (k.m_isStateful)
246+
{
247+
context.m_state = m_nodesToStates.at(op_info.nh);
248+
}
249+
201250
// Now trigger the executable unit
202-
k.apply(context);
251+
k.m_runF(context);
203252

204253
//As Kernels are forbidden to allocate memory for (Mat) outputs,
205254
//this code seems redundant, at least for Mats

modules/gapi/src/backends/cpu/gcpubackend.hpp

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323

2424
namespace cv { namespace gimpl {
2525

26-
struct Unit
26+
struct CPUUnit
2727
{
2828
static const char *name() { return "HostKernel"; }
2929
GCPUKernel k;
@@ -48,6 +48,13 @@ class GCPUExecutable final: public GIslandExecutable
4848
// Actual data of all resources in graph (both internal and external)
4949
Mag m_res;
5050
GArg packArg(const GArg &arg);
51+
void setupKernelStates();
52+
53+
// TODO: Check that it is thread-safe
54+
std::unordered_map<ade::NodeHandle, GArg,
55+
ade::HandleHasher<ade::Node>> m_nodesToStates;
56+
57+
bool m_newStreamStarted = false;
5158

5259
public:
5360
GCPUExecutable(const ade::Graph &graph,
@@ -62,6 +69,8 @@ class GCPUExecutable final: public GIslandExecutable
6269
util::throw_error(std::logic_error("GCPUExecutable::reshape() should never be called"));
6370
}
6471

72+
virtual void handleNewStream() override;
73+
6574
virtual void run(std::vector<InObj> &&input_objs,
6675
std::vector<OutObj> &&output_objs) override;
6776
};

0 commit comments

Comments
 (0)