Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
127 changes: 107 additions & 20 deletions modules/gapi/include/opencv2/gapi/cpu/gcpukernel.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include <opencv2/gapi/gcommon.hpp>
#include <opencv2/gapi/gkernel.hpp>
#include <opencv2/gapi/garg.hpp>
#include <opencv2/gapi/gmetaarg.hpp>
#include <opencv2/gapi/util/compiler_hints.hpp> //suppress_unused_warning
#include <opencv2/gapi/util/util.hpp>

Expand Down Expand Up @@ -109,11 +110,17 @@ class GAPI_EXPORTS GCPUContext
return outOpaqueRef(output).wref<T>();
}

GArg state()
{
return m_state;
}

protected:
detail::VectorRef& outVecRef(int output);
detail::OpaqueRef& outOpaqueRef(int output);

std::vector<GArg> m_args;
GArg m_state;

//FIXME: avoid conversion of arguments from internal representation to OpenCV one on each call
//to OCV kernel. (This can be achieved by a two single time conversions in GCPUExecutable::run,
Expand All @@ -127,16 +134,18 @@ class GAPI_EXPORTS GCPUContext
class GAPI_EXPORTS GCPUKernel
{
public:
// This function is kernel's execution entry point (does the processing work)
using F = std::function<void(GCPUContext &)>;
// This function is a kernel's execution entry point (does the processing work)
using RunF = std::function<void(GCPUContext &)>;
// This function is a stateful kernel's setup routine (configures state)
using SetupF = std::function<void(const GMetaArgs &, const GArgs &, GArg &)>;

GCPUKernel();
explicit GCPUKernel(const F& f);
GCPUKernel(const RunF& runF, const SetupF& setupF = nullptr);

void apply(GCPUContext &ctx);
RunF m_runF = nullptr;
SetupF m_setupF = nullptr;

protected:
F m_f;
bool m_isStateful = false;
};

// FIXME: This is an ugly ad-hoc implementation. TODO: refactor
Expand Down Expand Up @@ -269,12 +278,38 @@ template<typename U> struct get_out<cv::GOpaque<U>>
}
};

template<typename, typename>
struct OCVSetupHelper;

template<typename Impl, typename... Ins>
struct OCVSetupHelper<Impl, std::tuple<Ins...>>
{
template<int... IIs>
static void setup_impl(const GMetaArgs &metaArgs, const GArgs &args, GArg &state,
detail::Seq<IIs...>)
{
// TODO: unique_ptr <-> shared_ptr conversion ?
// To check: Conversion is possible only if the state which should be passed to
// 'setup' user callback isn't required to have previous value
std::shared_ptr<typename Impl::State> stPtr;
Impl::setup(detail::get_in_meta<Ins>(metaArgs, args, IIs)..., stPtr);
state = GArg(stPtr);
}

static void setup(const GMetaArgs &metaArgs, const GArgs &args, GArg& state)
{
setup_impl(metaArgs, args, state,
typename detail::MkSeq<sizeof...(Ins)>::type());
}
};

// OCVCallHelper is a helper class to call stateless OCV kernels and OCV kernel functors.
template<typename, typename, typename>
struct OCVCallHelper;

// FIXME: probably can be simplified with std::apply or analogue.
template<typename Impl, typename... Ins, typename... Outs>
struct OCVCallHelper<Impl, std::tuple<Ins...>, std::tuple<Outs...> >
struct OCVCallHelper<Impl, std::tuple<Ins...>, std::tuple<Outs...>>
{
template<typename... Inputs>
struct call_and_postprocess
Expand Down Expand Up @@ -302,19 +337,16 @@ struct OCVCallHelper<Impl, std::tuple<Ins...>, std::tuple<Outs...> >
//by comparing it's state (data ptr) before and after the call.
//This is done by converting each output Mat into tracked_cv_mat object, and binding
//them to parameters of ad-hoc function
//Convert own::Scalar to cv::Scalar before call kernel and run kernel
//convert cv::Scalar to own::Scalar after call kernel and write back results
call_and_postprocess<decltype(get_in<Ins>::get(ctx, IIs))...>
::call(get_in<Ins>::get(ctx, IIs)...,
get_out<Outs>::get(ctx, OIs)...);
::call(get_in<Ins>::get(ctx, IIs)..., get_out<Outs>::get(ctx, OIs)...);
}

template<int... IIs, int... OIs>
static void call_impl(cv::GCPUContext &ctx, Impl& impl, detail::Seq<IIs...>, detail::Seq<OIs...>)
static void call_impl(cv::GCPUContext &ctx, Impl& impl,
detail::Seq<IIs...>, detail::Seq<OIs...>)
{
call_and_postprocess<decltype(cv::detail::get_in<Ins>::get(ctx, IIs))...>
::call(impl, cv::detail::get_in<Ins>::get(ctx, IIs)...,
cv::detail::get_out<Outs>::get(ctx, OIs)...);
call_and_postprocess<decltype(get_in<Ins>::get(ctx, IIs))...>
::call(impl, get_in<Ins>::get(ctx, IIs)..., get_out<Outs>::get(ctx, OIs)...);
}

static void call(GCPUContext &ctx)
Expand All @@ -335,23 +367,78 @@ struct OCVCallHelper<Impl, std::tuple<Ins...>, std::tuple<Outs...> >
}
};

// OCVStCallHelper is a helper class to call stateful OCV kernels.
template<typename, typename, typename>
struct OCVStCallHelper;

template<typename Impl, typename... Ins, typename... Outs>
struct OCVStCallHelper<Impl, std::tuple<Ins...>, std::tuple<Outs...>> :
OCVCallHelper<Impl, std::tuple<Ins...>, std::tuple<Outs...>>
{
template<typename... Inputs>
struct call_and_postprocess
{
template<typename... Outputs>
static void call(typename Impl::State& st, Inputs&&... ins, Outputs&&... outs)
{
Impl::run(std::forward<Inputs>(ins)..., outs..., st);
postprocess(outs...);
}
};

template<int... IIs, int... OIs>
static void call_impl(GCPUContext &ctx, detail::Seq<IIs...>, detail::Seq<OIs...>)
{
auto& st = *ctx.state().get<std::shared_ptr<typename Impl::State>>();
call_and_postprocess<decltype(get_in<Ins>::get(ctx, IIs))...>
::call(st, get_in<Ins>::get(ctx, IIs)..., get_out<Outs>::get(ctx, OIs)...);
}

static void call(GCPUContext &ctx)
{
call_impl(ctx,
typename detail::MkSeq<sizeof...(Ins)>::type(),
typename detail::MkSeq<sizeof...(Outs)>::type());
}
};

} // namespace detail

template<class Impl, class K>
class GCPUKernelImpl: public cv::detail::OCVCallHelper<Impl, typename K::InArgs, typename K::OutArgs>,
public cv::detail::KernelTag
class GCPUKernelImpl: public cv::detail::KernelTag
{
using CallHelper = detail::OCVCallHelper<Impl, typename K::InArgs, typename K::OutArgs>;

public:
using API = K;

static cv::gapi::GBackend backend() { return cv::gapi::cpu::backend(); }
static cv::GCPUKernel kernel() { return GCPUKernel(&CallHelper::call); }
};

template<class Impl, class K, class S>
class GCPUStKernelImpl: public cv::detail::KernelTag
{
using P = detail::OCVCallHelper<Impl, typename K::InArgs, typename K::OutArgs>;
using StSetupHelper = detail::OCVSetupHelper<Impl, typename K::InArgs>;
using StCallHelper = detail::OCVStCallHelper<Impl, typename K::InArgs, typename K::OutArgs>;

public:
using API = K;
using State = S;

static cv::gapi::GBackend backend() { return cv::gapi::cpu::backend(); }
static cv::GCPUKernel kernel() { return GCPUKernel(&P::call); }
static cv::gapi::GBackend backend() { return cv::gapi::cpu::backend(); }
static cv::GCPUKernel kernel() { return GCPUKernel(&StCallHelper::call,
&StSetupHelper::setup); }
};

#define GAPI_OCV_KERNEL(Name, API) struct Name: public cv::GCPUKernelImpl<Name, API>

// TODO: Reuse Anatoliy's logic for support of types with commas in macro.
// Retrieve the common part from Anatoliy's logic to the separate place.
#define GAPI_OCV_KERNEL_ST(Name, API, State) \
struct Name:public cv::GCPUStKernelImpl<Name, API, State> \


class gapi::cpu::GOCVFunctor : public gapi::GFunctor
{
public:
Expand Down
13 changes: 13 additions & 0 deletions modules/gapi/include/opencv2/gapi/gcompiled.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,19 @@ class GAPI_EXPORTS GCompiled
// FIXME: Why it requires compile args?
void reshape(const GMetaArgs& inMetas, const GCompileArgs& args);

/**
* @brief Prepare inner kernels states for a new video-stream.
*
* GCompiled objects may be used to process video streams frame by frame.
* In this case, a GCompiled is called on every image frame individually.
* Starting OpenCV 4.4, some kernels in the graph may have their internal
* states (see GAPI_OCV_KERNEL_ST for the OpenCV backend).
* In this case, if user starts processing another video stream with
* this GCompiled, this method needs to be called to let kernels re-initialize
* their internal states to a new video stream.
*/
void prepareForNewStream();

protected:
/// @private
std::shared_ptr<Priv> m_priv;
Expand Down
65 changes: 57 additions & 8 deletions modules/gapi/src/backends/cpu/gcpubackend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,13 @@
//
// If not, we need to introduce that!
using GCPUModel = ade::TypedGraph
< cv::gimpl::Unit
< cv::gimpl::CPUUnit
, cv::gimpl::Protocol
>;

// FIXME: Same issue with Typed and ConstTyped
using GConstGCPUModel = ade::ConstTypedGraph
< cv::gimpl::Unit
< cv::gimpl::CPUUnit
, cv::gimpl::Protocol
>;

Expand All @@ -53,7 +53,7 @@ namespace
{
GCPUModel gm(graph);
auto cpu_impl = cv::util::any_cast<cv::GCPUKernel>(impl.opaque);
gm.metadata(op_node).set(cv::gimpl::Unit{cpu_impl});
gm.metadata(op_node).set(cv::gimpl::CPUUnit{cpu_impl});
}

virtual EPtr compile(const ade::Graph &graph,
Expand All @@ -78,11 +78,23 @@ cv::gimpl::GCPUExecutable::GCPUExecutable(const ade::Graph &g,
{
// Convert list of operations (which is topologically sorted already)
// into an execution script.
GConstGCPUModel gcm(m_g);
for (auto &nh : nodes)
{
switch (m_gm.metadata(nh).get<NodeType>().t)
{
case NodeType::OP: m_script.push_back({nh, GModel::collectOutputMeta(m_gm, nh)}); break;
case NodeType::OP:
{
m_script.push_back({nh, GModel::collectOutputMeta(m_gm, nh)});

// If kernel is stateful then prepare storage for its state.
GCPUKernel k = gcm.metadata(nh).get<CPUUnit>().k;
if (k.m_isStateful)
{
m_nodesToStates[nh] = GArg{ };
}
break;
}
case NodeType::DATA:
{
m_dataNodes.push_back(nh);
Expand All @@ -104,6 +116,9 @@ cv::gimpl::GCPUExecutable::GCPUExecutable(const ade::Graph &g,
default: util::throw_error(std::logic_error("Unsupported NodeType type"));
}
}

// For each stateful kernel call 'setup' user callback to initialize state.
setupKernelStates();
}

// FIXME: Document what it does
Expand Down Expand Up @@ -140,6 +155,26 @@ cv::GArg cv::gimpl::GCPUExecutable::packArg(const GArg &arg)
}
}

void cv::gimpl::GCPUExecutable::setupKernelStates()
{
GConstGCPUModel gcm(m_g);
for (auto& nodeToState : m_nodesToStates)
{
auto& kernelNode = nodeToState.first;
auto& kernelState = nodeToState.second;

const GCPUKernel& kernel = gcm.metadata(kernelNode).get<CPUUnit>().k;
kernel.m_setupF(GModel::collectInputMeta(m_gm, kernelNode),
m_gm.metadata(kernelNode).get<Op>().args,
kernelState);
}
}

void cv::gimpl::GCPUExecutable::handleNewStream()
{
m_newStreamStarted = true;
}

void cv::gimpl::GCPUExecutable::run(std::vector<InObj> &&input_objs,
std::vector<OutObj> &&output_objs)
{
Expand Down Expand Up @@ -167,6 +202,14 @@ void cv::gimpl::GCPUExecutable::run(std::vector<InObj> &&input_objs,
}
}

// In case if new video-stream happens - for each stateful kernel
// call 'setup' user callback to re-initialize state.
if (m_newStreamStarted)
{
setupKernelStates();
m_newStreamStarted = false;
}

// OpenCV backend execution is not a rocket science at all.
// Simply invoke our kernels in the proper order.
GConstGCPUModel gcm(m_g);
Expand All @@ -176,7 +219,7 @@ void cv::gimpl::GCPUExecutable::run(std::vector<InObj> &&input_objs,

// Obtain our real execution unit
// TODO: Should kernels be copyable?
GCPUKernel k = gcm.metadata(op_info.nh).get<Unit>().k;
GCPUKernel k = gcm.metadata(op_info.nh).get<CPUUnit>().k;

// Initialize kernel's execution context:
// - Input parameters
Expand All @@ -185,8 +228,8 @@ void cv::gimpl::GCPUExecutable::run(std::vector<InObj> &&input_objs,

using namespace std::placeholders;
ade::util::transform(op.args,
std::back_inserter(context.m_args),
std::bind(&GCPUExecutable::packArg, this, _1));
std::back_inserter(context.m_args),
std::bind(&GCPUExecutable::packArg, this, _1));

// - Output parameters.
// FIXME: pre-allocate internal Mats, etc, according to the known meta
Expand All @@ -198,8 +241,14 @@ void cv::gimpl::GCPUExecutable::run(std::vector<InObj> &&input_objs,
context.m_results[out_port] = magazine::getObjPtr(m_res, out_desc);
}

// For stateful kernel add state to its execution context
if (k.m_isStateful)
{
context.m_state = m_nodesToStates.at(op_info.nh);
}

// Now trigger the executable unit
k.apply(context);
k.m_runF(context);

//As Kernels are forbidden to allocate memory for (Mat) outputs,
//this code seems redundant, at least for Mats
Expand Down
11 changes: 10 additions & 1 deletion modules/gapi/src/backends/cpu/gcpubackend.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@

namespace cv { namespace gimpl {

struct Unit
struct CPUUnit
{
static const char *name() { return "HostKernel"; }
GCPUKernel k;
Expand All @@ -48,6 +48,13 @@ class GCPUExecutable final: public GIslandExecutable
// Actual data of all resources in graph (both internal and external)
Mag m_res;
GArg packArg(const GArg &arg);
void setupKernelStates();

// TODO: Check that it is thread-safe
std::unordered_map<ade::NodeHandle, GArg,
ade::HandleHasher<ade::Node>> m_nodesToStates;

bool m_newStreamStarted = false;

public:
GCPUExecutable(const ade::Graph &graph,
Expand All @@ -62,6 +69,8 @@ class GCPUExecutable final: public GIslandExecutable
util::throw_error(std::logic_error("GCPUExecutable::reshape() should never be called"));
}

virtual void handleNewStream() override;

virtual void run(std::vector<InObj> &&input_objs,
std::vector<OutObj> &&output_objs) override;
};
Expand Down
Loading