opencv · alalek · Jun 4, 2020 · Jun 4, 2020
diff --git a/modules/gapi/include/opencv2/gapi/cpu/gcpukernel.hpp b/modules/gapi/include/opencv2/gapi/cpu/gcpukernel.hpp
@@ -17,6 +17,7 @@
 #include <opencv2/gapi/gcommon.hpp>
 #include <opencv2/gapi/gkernel.hpp>
 #include <opencv2/gapi/garg.hpp>
+#include <opencv2/gapi/gmetaarg.hpp>
 #include <opencv2/gapi/util/compiler_hints.hpp> //suppress_unused_warning
 #include <opencv2/gapi/util/util.hpp>
 
@@ -109,11 +110,17 @@ class GAPI_EXPORTS GCPUContext
         return outOpaqueRef(output).wref<T>();
     }
 
+    GArg state()
+    {
+        return m_state;
+    }
+
 protected:
     detail::VectorRef& outVecRef(int output);
     detail::OpaqueRef& outOpaqueRef(int output);
 
     std::vector<GArg> m_args;
+    GArg m_state;
 
     //FIXME: avoid conversion of arguments from internal representation to OpenCV one on each call
     //to OCV kernel. (This can be achieved by a two single time conversions in GCPUExecutable::run,
@@ -127,16 +134,18 @@ class GAPI_EXPORTS GCPUContext
 class GAPI_EXPORTS GCPUKernel
 {
 public:
-    // This function is kernel's execution entry point (does the processing work)
-    using F = std::function<void(GCPUContext &)>;
+    // This function is a kernel's execution entry point (does the processing work)
+    using RunF = std::function<void(GCPUContext &)>;
+    // This function is a stateful kernel's setup routine (configures state)
+    using SetupF = std::function<void(const GMetaArgs &, const GArgs &, GArg &)>;
 
     GCPUKernel();
-    explicit GCPUKernel(const F& f);
+    GCPUKernel(const RunF& runF, const SetupF& setupF = nullptr);
 
-    void apply(GCPUContext &ctx);
+    RunF m_runF = nullptr;
+    SetupF m_setupF = nullptr;
 
-protected:
-    F m_f;
+    bool m_isStateful = false;
 };
 
 // FIXME: This is an ugly ad-hoc implementation. TODO: refactor
@@ -269,12 +278,38 @@ template<typename U> struct get_out<cv::GOpaque<U>>
     }
 };
 
+template<typename, typename>
+struct OCVSetupHelper;
+
+template<typename Impl, typename... Ins>
+struct OCVSetupHelper<Impl, std::tuple<Ins...>>
+{
+    template<int... IIs>
+    static void setup_impl(const GMetaArgs &metaArgs, const GArgs &args, GArg &state,
+                           detail::Seq<IIs...>)
+    {
+        // TODO: unique_ptr <-> shared_ptr conversion ?
+        // To check: Conversion is possible only if the state which should be passed to
+        // 'setup' user callback isn't required to have previous value
+        std::shared_ptr<typename Impl::State> stPtr;
+        Impl::setup(detail::get_in_meta<Ins>(metaArgs, args, IIs)..., stPtr);
+        state = GArg(stPtr);
+    }
+
+    static void setup(const GMetaArgs &metaArgs, const GArgs &args, GArg& state)
+    {
+        setup_impl(metaArgs, args, state,
+                   typename detail::MkSeq<sizeof...(Ins)>::type());
+    }
+};
+
+// OCVCallHelper is a helper class to call stateless OCV kernels and OCV kernel functors.
 template<typename, typename, typename>
 struct OCVCallHelper;
 
 // FIXME: probably can be simplified with std::apply or analogue.
 template<typename Impl, typename... Ins, typename... Outs>
-struct OCVCallHelper<Impl, std::tuple<Ins...>, std::tuple<Outs...> >
+struct OCVCallHelper<Impl, std::tuple<Ins...>, std::tuple<Outs...>>
 {
     template<typename... Inputs>
     struct call_and_postprocess
@@ -302,19 +337,16 @@ struct OCVCallHelper<Impl, std::tuple<Ins...>, std::tuple<Outs...> >
         //by comparing it's state (data ptr) before and after the call.
         //This is done by converting each output Mat into tracked_cv_mat object, and binding
         //them to parameters of ad-hoc function
-        //Convert own::Scalar to cv::Scalar before call kernel and run kernel
-        //convert cv::Scalar to own::Scalar after call kernel and write back results
         call_and_postprocess<decltype(get_in<Ins>::get(ctx, IIs))...>
-                                      ::call(get_in<Ins>::get(ctx, IIs)...,
-                                             get_out<Outs>::get(ctx, OIs)...);
+            ::call(get_in<Ins>::get(ctx, IIs)..., get_out<Outs>::get(ctx, OIs)...);
     }
 
     template<int... IIs, int... OIs>
-    static void call_impl(cv::GCPUContext &ctx, Impl& impl, detail::Seq<IIs...>, detail::Seq<OIs...>)
+    static void call_impl(cv::GCPUContext &ctx, Impl& impl,
+                          detail::Seq<IIs...>, detail::Seq<OIs...>)
     {
-        call_and_postprocess<decltype(cv::detail::get_in<Ins>::get(ctx, IIs))...>
-                                      ::call(impl, cv::detail::get_in<Ins>::get(ctx, IIs)...,
-                                                   cv::detail::get_out<Outs>::get(ctx, OIs)...);
+        call_and_postprocess<decltype(get_in<Ins>::get(ctx, IIs))...>
+            ::call(impl, get_in<Ins>::get(ctx, IIs)..., get_out<Outs>::get(ctx, OIs)...);
     }
 
     static void call(GCPUContext &ctx)
@@ -335,23 +367,78 @@ struct OCVCallHelper<Impl, std::tuple<Ins...>, std::tuple<Outs...> >
     }
 };
 
+// OCVStCallHelper is a helper class to call stateful OCV kernels.
+template<typename, typename, typename>
+struct OCVStCallHelper;
+
+template<typename Impl, typename... Ins, typename... Outs>
+struct OCVStCallHelper<Impl, std::tuple<Ins...>, std::tuple<Outs...>> :
+    OCVCallHelper<Impl, std::tuple<Ins...>, std::tuple<Outs...>>
+{
+    template<typename... Inputs>
+    struct call_and_postprocess
+    {
+        template<typename... Outputs>
+        static void call(typename Impl::State& st, Inputs&&... ins, Outputs&&... outs)
+        {
+            Impl::run(std::forward<Inputs>(ins)..., outs..., st);
+            postprocess(outs...);
+        }
+    };
+
+    template<int... IIs, int... OIs>
+    static void call_impl(GCPUContext &ctx, detail::Seq<IIs...>, detail::Seq<OIs...>)
+    {
+        auto& st = *ctx.state().get<std::shared_ptr<typename Impl::State>>();
+        call_and_postprocess<decltype(get_in<Ins>::get(ctx, IIs))...>
+            ::call(st, get_in<Ins>::get(ctx, IIs)..., get_out<Outs>::get(ctx, OIs)...);
+    }
+
+    static void call(GCPUContext &ctx)
+    {
+        call_impl(ctx,
+                  typename detail::MkSeq<sizeof...(Ins)>::type(),
+                  typename detail::MkSeq<sizeof...(Outs)>::type());
+    }
+};
+
 } // namespace detail
 
 template<class Impl, class K>
-class GCPUKernelImpl: public cv::detail::OCVCallHelper<Impl, typename K::InArgs, typename K::OutArgs>,
-                      public cv::detail::KernelTag
+class GCPUKernelImpl: public cv::detail::KernelTag
+{
+    using CallHelper = detail::OCVCallHelper<Impl, typename K::InArgs, typename K::OutArgs>;
+
+public:
+    using API = K;
+
+    static cv::gapi::GBackend backend() { return cv::gapi::cpu::backend(); }
+    static cv::GCPUKernel      kernel() { return GCPUKernel(&CallHelper::call); }
+};
+
+template<class Impl, class K, class S>
+class GCPUStKernelImpl: public cv::detail::KernelTag
 {
-    using P = detail::OCVCallHelper<Impl, typename K::InArgs, typename K::OutArgs>;
+    using StSetupHelper = detail::OCVSetupHelper<Impl, typename K::InArgs>;
+    using StCallHelper  = detail::OCVStCallHelper<Impl, typename K::InArgs, typename K::OutArgs>;
 
 public:
     using API = K;
+    using State = S;
 
-    static cv::gapi::GBackend backend()  { return cv::gapi::cpu::backend(); }
-    static cv::GCPUKernel     kernel()   { return GCPUKernel(&P::call);     }
+    static cv::gapi::GBackend backend() { return cv::gapi::cpu::backend(); }
+    static cv::GCPUKernel     kernel()  { return GCPUKernel(&StCallHelper::call,
+                                                            &StSetupHelper::setup); }
 };
 
 #define GAPI_OCV_KERNEL(Name, API) struct Name: public cv::GCPUKernelImpl<Name, API>
 
+// TODO: Reuse Anatoliy's logic for support of types with commas in macro.
+//       Retrieve the common part from Anatoliy's logic to the separate place.
+#define GAPI_OCV_KERNEL_ST(Name, API, State)                  \
+    struct Name:public cv::GCPUStKernelImpl<Name, API, State> \
+
+
 class gapi::cpu::GOCVFunctor : public gapi::GFunctor
 {
 public:

diff --git a/modules/gapi/include/opencv2/gapi/gcompiled.hpp b/modules/gapi/include/opencv2/gapi/gcompiled.hpp
@@ -208,6 +208,19 @@ class GAPI_EXPORTS GCompiled
     // FIXME: Why it requires compile args?
     void reshape(const GMetaArgs& inMetas, const GCompileArgs& args);
 
+    /**
+     * @brief Prepare inner kernels states for a new video-stream.
+     *
+     * GCompiled objects may be used to process video streams frame by frame.
+     * In this case, a GCompiled is called on every image frame individually.
+     * Starting OpenCV 4.4, some kernels in the graph may have their internal
+     * states (see GAPI_OCV_KERNEL_ST for the OpenCV backend).
+     * In this case, if user starts processing another video stream with
+     * this GCompiled, this method needs to be called to let kernels re-initialize
+     * their internal states to a new video stream.
+     */
+    void prepareForNewStream();
+
 protected:
     /// @private
     std::shared_ptr<Priv> m_priv;

diff --git a/modules/gapi/src/backends/cpu/gcpubackend.cpp b/modules/gapi/src/backends/cpu/gcpubackend.cpp
@@ -33,13 +33,13 @@
 //
 // If not, we need to introduce that!
 using GCPUModel = ade::TypedGraph
-    < cv::gimpl::Unit
+    < cv::gimpl::CPUUnit
     , cv::gimpl::Protocol
     >;
 
 // FIXME: Same issue with Typed and ConstTyped
 using GConstGCPUModel = ade::ConstTypedGraph
-    < cv::gimpl::Unit
+    < cv::gimpl::CPUUnit
     , cv::gimpl::Protocol
     >;
 
@@ -53,7 +53,7 @@ namespace
         {
             GCPUModel gm(graph);
             auto cpu_impl = cv::util::any_cast<cv::GCPUKernel>(impl.opaque);
-            gm.metadata(op_node).set(cv::gimpl::Unit{cpu_impl});
+            gm.metadata(op_node).set(cv::gimpl::CPUUnit{cpu_impl});
         }
 
         virtual EPtr compile(const ade::Graph &graph,
@@ -78,11 +78,23 @@ cv::gimpl::GCPUExecutable::GCPUExecutable(const ade::Graph &g,
 {
     // Convert list of operations (which is topologically sorted already)
     // into an execution script.
+    GConstGCPUModel gcm(m_g);
     for (auto &nh : nodes)
     {
         switch (m_gm.metadata(nh).get<NodeType>().t)
         {
-        case NodeType::OP: m_script.push_back({nh, GModel::collectOutputMeta(m_gm, nh)}); break;
+        case NodeType::OP:
+        {
+            m_script.push_back({nh, GModel::collectOutputMeta(m_gm, nh)});
+
+            // If kernel is stateful then prepare storage for its state.
+            GCPUKernel k = gcm.metadata(nh).get<CPUUnit>().k;
+            if (k.m_isStateful)
+            {
+                m_nodesToStates[nh] = GArg{ };
+            }
+            break;
+        }
         case NodeType::DATA:
         {
             m_dataNodes.push_back(nh);
@@ -104,6 +116,9 @@ cv::gimpl::GCPUExecutable::GCPUExecutable(const ade::Graph &g,
         default: util::throw_error(std::logic_error("Unsupported NodeType type"));
         }
     }
+
+    // For each stateful kernel call 'setup' user callback to initialize state.
+    setupKernelStates();
 }
 
 // FIXME: Document what it does
@@ -140,6 +155,26 @@ cv::GArg cv::gimpl::GCPUExecutable::packArg(const GArg &arg)
     }
 }
 
+void cv::gimpl::GCPUExecutable::setupKernelStates()
+{
+    GConstGCPUModel gcm(m_g);
+    for (auto& nodeToState : m_nodesToStates)
+    {
+        auto& kernelNode = nodeToState.first;
+        auto& kernelState = nodeToState.second;
+
+        const GCPUKernel& kernel = gcm.metadata(kernelNode).get<CPUUnit>().k;
+        kernel.m_setupF(GModel::collectInputMeta(m_gm, kernelNode),
+                        m_gm.metadata(kernelNode).get<Op>().args,
+                        kernelState);
+    }
+}
+
+void cv::gimpl::GCPUExecutable::handleNewStream()
+{
+    m_newStreamStarted = true;
+}
+
 void cv::gimpl::GCPUExecutable::run(std::vector<InObj>  &&input_objs,
                                     std::vector<OutObj> &&output_objs)
 {
@@ -167,6 +202,14 @@ void cv::gimpl::GCPUExecutable::run(std::vector<InObj>  &&input_objs,
         }
     }
 
+    // In case if new video-stream happens - for each stateful kernel
+    // call 'setup' user callback to re-initialize state.
+    if (m_newStreamStarted)
+    {
+        setupKernelStates();
+        m_newStreamStarted = false;
+    }
+
     // OpenCV backend execution is not a rocket science at all.
     // Simply invoke our kernels in the proper order.
     GConstGCPUModel gcm(m_g);
@@ -176,7 +219,7 @@ void cv::gimpl::GCPUExecutable::run(std::vector<InObj>  &&input_objs,
 
         // Obtain our real execution unit
         // TODO: Should kernels be copyable?
-        GCPUKernel k = gcm.metadata(op_info.nh).get<Unit>().k;
+        GCPUKernel k = gcm.metadata(op_info.nh).get<CPUUnit>().k;
 
         // Initialize kernel's execution context:
         // - Input parameters
@@ -185,8 +228,8 @@ void cv::gimpl::GCPUExecutable::run(std::vector<InObj>  &&input_objs,
 
         using namespace std::placeholders;
         ade::util::transform(op.args,
-                          std::back_inserter(context.m_args),
-                          std::bind(&GCPUExecutable::packArg, this, _1));
+                             std::back_inserter(context.m_args),
+                             std::bind(&GCPUExecutable::packArg, this, _1));
 
         // - Output parameters.
         // FIXME: pre-allocate internal Mats, etc, according to the known meta
@@ -198,8 +241,14 @@ void cv::gimpl::GCPUExecutable::run(std::vector<InObj>  &&input_objs,
             context.m_results[out_port] = magazine::getObjPtr(m_res, out_desc);
         }
 
+        // For stateful kernel add state to its execution context
+        if (k.m_isStateful)
+        {
+            context.m_state = m_nodesToStates.at(op_info.nh);
+        }
+
         // Now trigger the executable unit
-        k.apply(context);
+        k.m_runF(context);
 
         //As Kernels are forbidden to allocate memory for (Mat) outputs,
         //this code seems redundant, at least for Mats

diff --git a/modules/gapi/src/backends/cpu/gcpubackend.hpp b/modules/gapi/src/backends/cpu/gcpubackend.hpp
@@ -23,7 +23,7 @@
 
 namespace cv { namespace gimpl {
 
-struct Unit
+struct CPUUnit
 {
     static const char *name() { return "HostKernel"; }
     GCPUKernel k;
@@ -48,6 +48,13 @@ class GCPUExecutable final: public GIslandExecutable
     // Actual data of all resources in graph (both internal and external)
     Mag m_res;
     GArg packArg(const GArg &arg);
+    void setupKernelStates();
+
+    // TODO: Check that it is thread-safe
+    std::unordered_map<ade::NodeHandle, GArg,
+                       ade::HandleHasher<ade::Node>> m_nodesToStates;
+
+    bool m_newStreamStarted = false;
 
 public:
     GCPUExecutable(const ade::Graph                   &graph,
@@ -62,6 +69,8 @@ class GCPUExecutable final: public GIslandExecutable
         util::throw_error(std::logic_error("GCPUExecutable::reshape() should never be called"));
     }
 
+    virtual void handleNewStream() override;
+
     virtual void run(std::vector<InObj>  &&input_objs,
                      std::vector<OutObj> &&output_objs) override;
 };