-
Notifications
You must be signed in to change notification settings - Fork 522
Description
Bug report
Required Info:
- Operating System:
- Ubuntu 20.04 LTS
- Installation type:
- Binaries
- Version or commit hash:
- Foxy, Rolling
- DDS implementation:
- CycloneDDS
- Client library (if applicable):
- rclcpp
Expected behavior
My packages' integration tests should run with the same behavior both locally and in CI.
Actual behavior
When running in an environment with a comparatively small number of CPU cores (for example, 2 cores for Travis and GH Actions runners), my tests sometimes fail because action feedback that I require to pass the test was never received by my action client.
To test this, I'm using the fibonacci_action_server node from the action_tutorials_cpp package, which includes an action server of type action_tutorials_interfaces::action::Fibonacci.
My test node sends a goal to the Fibonacci action with order = 5. I expect the action server to publish 4 feedback messages. In the log output from my tests, I see that the server does publish 4 feedback messages, but the feedback callback registered with my action client is only called for the first 3 messages. Since my test needs to receive 4 feedback messages to succeed, it times out waiting for the 4th message and fails.
taskset -c 0-1 python3 -u /opt/ros/rolling/share/ament_cmake_test/cmake/run_test.py /tmp/asd \
--package-name rclcpp_multicore_ci_test \
--env TEST_LAUNCH_DIR=src/rclcpp_multicore_ci_test/test TEST_EXECUTABLE=build/rclcpp_multicore_ci_test/test_fixture_node \
--command src/rclcpp_multicore_ci_test/test/test_nodes.py
[INFO] [fibonacci_action_server-1]: process started with pid [2053673]
[INFO] [test_node-2]: process started with pid [2053675]
[test_node-2] [==========] Running 1 test from 1 test suite.
[test_node-2] [----------] Global test environment set-up.
[test_node-2] [----------] 1 test from TaskSequenceInterface
[test_node-2] [ RUN ] TaskSequenceInterface.setup_plan_exec_through_ros
[fibonacci_action_server-1] 1621445668.874821 [0] fibonacci_: using network interface wlp0s20f3 (udp/192.168.0.4) selected arbitrarily from: wlp0s20f3, docker0
[test_node-2] 1621445668.875303 [0] test_fixtu: using network interface wlp0s20f3 (udp/192.168.0.4) selected arbitrarily from: wlp0s20f3, docker0
[fibonacci_action_server-1] [INFO] [1621445668.891735102] [fibonacci_action_server]: Received goal request with order 5
[fibonacci_action_server-1] [INFO] [1621445668.892005547] [fibonacci_action_server]: Executing goal
[fibonacci_action_server-1] [INFO] [1621445668.892090501] [fibonacci_action_server]: Publish feedback
[fibonacci_action_server-1] [INFO] [1621445669.892521092] [fibonacci_action_server]: Publish feedback
[test_node-2] [INFO] [1621445669.892611049] [test_node]: Have 1 state feedback messages
[fibonacci_action_server-1] [INFO] [1621445670.892371325] [fibonacci_action_server]: Publish feedback
[test_node-2] [INFO] [1621445670.892701509] [test_node]: Have 2 state feedback messages
[fibonacci_action_server-1] [INFO] [1621445671.892378535] [fibonacci_action_server]: Publish feedback
[test_node-2] [INFO] [1621445671.892631492] [test_node]: Have 3 state feedback messages
[fibonacci_action_server-1] [INFO] [1621445672.892589940] [fibonacci_action_server]: Goal succeeded
[test_node-2] [ERROR] [1621445678.893078911] [test_node]: Timed out waiting for feedback vector to be populated
[test_node-2] terminate called without an active exception
[test_node-2] /home/jschornak/workspaces/test_ws/src/rclcpp_multicore_ci_test/src/test_fixture_node.cpp:99: Failure
[test_node-2] Value of: node->testActionFeebackCount()
[test_node-2] Actual: false
[test_node-2] Expected: true
[ERROR] [test_node-2]: process has died [pid 2053675, exit code -6, cmd 'build/rclcpp_multicore_ci_test/test_fixture_node'].
[INFO] [fibonacci_action_server-1]: sending signal 'SIGINT' to process[fibonacci_action_server-1]
[fibonacci_action_server-1] [INFO] [1621445679.071037031] [rclcpp]: signal_handler(signal_value=2)
[INFO] [fibonacci_action_server-1]: process has finished cleanly [pid 2053673]
I think issues similar to this are causing problems in a variety of ROS2 packages, such as during this PR for MoveIt2.
Steps to reproduce issue
I made a minimal package to reproduce this issue: https://github.com/schornakj/rclcpp_multicore_ci_test/
This mirrors how similar integration tests run in my CI pipelines.
More detailed instructions are in the repo's README.
Additional information
My dev workstation is a Lenovo Thinkpad P15 with an Intel i7-10750H CPU, which has 12 cores, and 16 GB RAM.
Source code for my test executables is below (and also in the repo I linked previously):
test_fixture_node.hpp
#pragma once
#include <rclcpp/rclcpp.hpp>
#include <rclcpp_action/rclcpp_action.hpp>
#include <action_tutorials_interfaces/action/fibonacci.hpp>
namespace rclcpp_multicore_ci_test
{
using action_tutorials_interfaces::action::Fibonacci;
using GoalHandleFibonacci = rclcpp_action::ClientGoalHandle<Fibonacci>;
class TestFixtureNode : public rclcpp::Node
{
public:
TestFixtureNode();
bool testActionFeebackCount();
private:
rclcpp::CallbackGroup::SharedPtr cb_group_me_;
rclcpp_action::Client<Fibonacci>::SharedPtr fib_client_;
};
} // namespace rclcpp_multicore_ci_test
test_fixture_node.cpp
#include <rclcpp_thread_test/test_fixture_node.hpp>
#include <gtest/gtest.h>
static const std::size_t ORDER = 5;
namespace rclcpp_multicore_ci_test
{
TestFixtureNode::TestFixtureNode()
: Node("test_node")
, cb_group_me_(create_callback_group(rclcpp::CallbackGroupType::MutuallyExclusive))
, fib_client_(rclcpp_action::create_client<Fibonacci>(this, "/fibonacci", cb_group_me_))
{
}
bool TestFixtureNode::testActionFeebackCount()
{
if(!fib_client_->wait_for_action_server(std::chrono::seconds(3)))
{
RCLCPP_ERROR_STREAM(get_logger(), "Timed out waiting for action server to be available");
return false;
}
auto options = rclcpp_action::Client<Fibonacci>::SendGoalOptions();
options.goal_response_callback = [](std::shared_future<GoalHandleFibonacci::SharedPtr> future) -> void {
(void)future;
};
std::mutex lk;
std::vector<Fibonacci::Feedback> feedback;
std::promise<void> promise;
options.feedback_callback = [=, &feedback, &lk,
&promise](GoalHandleFibonacci::SharedPtr gh,
std::shared_ptr<const Fibonacci::Feedback> fb) -> void {
(void)gh;
std::scoped_lock _(lk);
feedback.push_back(*fb);
RCLCPP_INFO_STREAM(get_logger(), "Have " << feedback.size() << " state feedback messages");
if (feedback.size() == ORDER - 1)
{
promise.set_value();
}
};
options.result_callback = [](const GoalHandleFibonacci::WrappedResult& result) -> void { (void)result; };
auto goal = Fibonacci::Goal();
goal.order = ORDER;
auto fut = fib_client_->async_send_goal(goal, options);
if (fut.wait_for(std::chrono::seconds(1)) == std::future_status::timeout)
{
RCLCPP_ERROR_STREAM(get_logger(), "Timed out waiting for goal response callback");
return false;
}
auto gh = fut.get();
auto res_fut = fib_client_->async_get_result(gh);
if (res_fut.wait_for(std::chrono::seconds(ORDER + 1)) == std::future_status::timeout)
{
RCLCPP_ERROR_STREAM(get_logger(), "Timed out waiting for result callback");
return false;
}
auto received_state_fut = promise.get_future();
if (received_state_fut.wait_for(std::chrono::seconds(ORDER + 1)) == std::future_status::timeout)
{
RCLCPP_ERROR_STREAM(get_logger(), "Timed out waiting for feedback vector to be populated");
return false;
}
if (feedback.size() != ORDER - 1)
{
RCLCPP_ERROR_STREAM(get_logger(), "Wrong number of feedback messages: expected " << ORDER - 1 << ", got " << feedback.size() << ".");
return false;
}
return true;
}
} // namespace rclcpp_multicore_ci_test
TEST(TaskSequenceInterface, setup_plan_exec_through_ros)
{
auto node = std::make_shared<rclcpp_multicore_ci_test::TestFixtureNode>();
rclcpp::executors::MultiThreadedExecutor exec;
auto exec_thread = std::make_unique<std::thread>([&exec, &node]() {
exec.add_node(node);
exec.spin();
exec.remove_node(node);
});
ASSERT_TRUE(node->testActionFeebackCount());
exec.cancel();
exec_thread->join();
}
int main(int argc, char* argv[])
{
rclcpp::init(argc, argv);
testing::InitGoogleTest(&argc, argv);
rclcpp::NodeOptions options;
bool all_successful = RUN_ALL_TESTS();
rclcpp::shutdown();
return all_successful;
}
test_nodes.py
#!/usr/bin/env python3
import os
import sys
from launch import LaunchService, LaunchDescription
from launch.actions import EmitEvent, ExecuteProcess, RegisterEventHandler
from launch.events import Shutdown
from launch.event_handlers import OnProcessExit
from launch_ros.actions import Node
from launch_testing.legacy import LaunchTestService
def main(argv=sys.argv[1:]):
test_executable = os.getenv("TEST_EXECUTABLE")
test_action = ExecuteProcess(
cmd=[test_executable], name="test_node", output="screen"
)
fib_server_node = Node(
package="action_tutorials_cpp",
executable="fibonacci_action_server",
name="fibonacci_action_server",
output="screen",
)
ld = LaunchDescription([])
ld.add_action(fib_server_node)
lts = LaunchTestService()
lts.add_test_action(ld, test_action)
ls = LaunchService(argv=argv)
ls.include_launch_description(ld)
return lts.run(ls)
if __name__ == "__main__":
sys.exit(main())