|
std::shared_ptr<void> |
|
ClientBase::take_data() |
|
{ |
|
if (pimpl_->is_feedback_ready) { |
|
std::shared_ptr<void> feedback_message = this->create_feedback_message(); |
|
rcl_ret_t ret = rcl_action_take_feedback( |
|
pimpl_->client_handle.get(), feedback_message.get()); |
|
return std::static_pointer_cast<void>( |
|
std::make_shared<std::tuple<rcl_ret_t, std::shared_ptr<void>>>( |
|
ret, feedback_message)); |
|
} else if (pimpl_->is_status_ready) { |
|
std::shared_ptr<void> status_message = this->create_status_message(); |
|
rcl_ret_t ret = rcl_action_take_status( |
|
pimpl_->client_handle.get(), status_message.get()); |
|
return std::static_pointer_cast<void>( |
|
std::make_shared<std::tuple<rcl_ret_t, std::shared_ptr<void>>>( |
|
ret, status_message)); |
|
} else if (pimpl_->is_goal_response_ready) { |
|
rmw_request_id_t response_header; |
|
std::shared_ptr<void> goal_response = this->create_goal_response(); |
|
rcl_ret_t ret = rcl_action_take_goal_response( |
|
pimpl_->client_handle.get(), &response_header, goal_response.get()); |
|
return std::static_pointer_cast<void>( |
|
std::make_shared<std::tuple<rcl_ret_t, rmw_request_id_t, std::shared_ptr<void>>>( |
|
ret, response_header, goal_response)); |
|
} else if (pimpl_->is_result_response_ready) { |
|
rmw_request_id_t response_header; |
|
std::shared_ptr<void> result_response = this->create_result_response(); |
|
rcl_ret_t ret = rcl_action_take_result_response( |
|
pimpl_->client_handle.get(), &response_header, result_response.get()); |
|
return std::static_pointer_cast<void>( |
|
std::make_shared<std::tuple<rcl_ret_t, rmw_request_id_t, std::shared_ptr<void>>>( |
|
ret, response_header, result_response)); |
|
} else if (pimpl_->is_cancel_response_ready) { |
|
rmw_request_id_t response_header; |
|
std::shared_ptr<void> cancel_response = this->create_cancel_response(); |
|
rcl_ret_t ret = rcl_action_take_cancel_response( |
|
pimpl_->client_handle.get(), &response_header, cancel_response.get()); |
|
return std::static_pointer_cast<void>( |
|
std::make_shared<std::tuple<rcl_ret_t, rmw_request_id_t, std::shared_ptr<void>>>( |
|
ret, response_header, cancel_response)); |
|
} else { |
|
throw std::runtime_error("Taking data from action client but nothing is ready"); |
|
} |
|
} |
|
|
|
std::shared_ptr<void> |
|
ClientBase::take_data_by_entity_id(size_t id) |
|
{ |
|
// Mark as ready the entity from which we want to take data |
|
switch (static_cast<EntityType>(id)) { |
|
case EntityType::GoalClient: |
|
pimpl_->is_goal_response_ready = true; |
|
break; |
|
case EntityType::ResultClient: |
|
pimpl_->is_result_response_ready = true; |
|
break; |
|
case EntityType::CancelClient: |
|
pimpl_->is_cancel_response_ready = true; |
|
break; |
|
case EntityType::FeedbackSubscription: |
|
pimpl_->is_feedback_ready = true; |
|
break; |
|
case EntityType::StatusSubscription: |
|
pimpl_->is_status_ready = true; |
|
break; |
|
} |
|
|
|
return take_data(); |
|
} |
|
|
|
void |
|
ClientBase::execute(std::shared_ptr<void> & data) |
|
{ |
|
if (!data) { |
|
throw std::runtime_error("'data' is empty"); |
|
} |
|
|
|
if (pimpl_->is_feedback_ready) { |
|
auto shared_ptr = std::static_pointer_cast<std::tuple<rcl_ret_t, std::shared_ptr<void>>>(data); |
|
auto ret = std::get<0>(*shared_ptr); |
|
pimpl_->is_feedback_ready = false; |
|
if (RCL_RET_OK == ret) { |
|
auto feedback_message = std::get<1>(*shared_ptr); |
|
this->handle_feedback_message(feedback_message); |
|
} else if (RCL_RET_ACTION_CLIENT_TAKE_FAILED != ret) { |
|
rclcpp::exceptions::throw_from_rcl_error(ret, "error taking feedback"); |
|
} |
|
} else if (pimpl_->is_status_ready) { |
|
auto shared_ptr = std::static_pointer_cast<std::tuple<rcl_ret_t, std::shared_ptr<void>>>(data); |
|
auto ret = std::get<0>(*shared_ptr); |
|
pimpl_->is_status_ready = false; |
|
if (RCL_RET_OK == ret) { |
|
auto status_message = std::get<1>(*shared_ptr); |
|
this->handle_status_message(status_message); |
|
} else if (RCL_RET_ACTION_CLIENT_TAKE_FAILED != ret) { |
|
rclcpp::exceptions::throw_from_rcl_error(ret, "error taking status"); |
|
} |
|
} else if (pimpl_->is_goal_response_ready) { |
|
auto shared_ptr = std::static_pointer_cast< |
|
std::tuple<rcl_ret_t, rmw_request_id_t, std::shared_ptr<void>>>(data); |
|
auto ret = std::get<0>(*shared_ptr); |
|
pimpl_->is_goal_response_ready = false; |
|
if (RCL_RET_OK == ret) { |
|
auto response_header = std::get<1>(*shared_ptr); |
|
auto goal_response = std::get<2>(*shared_ptr); |
|
this->handle_goal_response(response_header, goal_response); |
|
} else if (RCL_RET_ACTION_CLIENT_TAKE_FAILED != ret) { |
|
rclcpp::exceptions::throw_from_rcl_error(ret, "error taking goal response"); |
|
} |
|
} else if (pimpl_->is_result_response_ready) { |
|
auto shared_ptr = std::static_pointer_cast< |
|
std::tuple<rcl_ret_t, rmw_request_id_t, std::shared_ptr<void>>>(data); |
|
auto ret = std::get<0>(*shared_ptr); |
|
pimpl_->is_result_response_ready = false; |
|
if (RCL_RET_OK == ret) { |
|
auto response_header = std::get<1>(*shared_ptr); |
|
auto result_response = std::get<2>(*shared_ptr); |
|
this->handle_result_response(response_header, result_response); |
|
} else if (RCL_RET_ACTION_CLIENT_TAKE_FAILED != ret) { |
|
rclcpp::exceptions::throw_from_rcl_error(ret, "error taking result response"); |
|
} |
|
} else if (pimpl_->is_cancel_response_ready) { |
|
auto shared_ptr = std::static_pointer_cast< |
|
std::tuple<rcl_ret_t, rmw_request_id_t, std::shared_ptr<void>>>(data); |
|
auto ret = std::get<0>(*shared_ptr); |
|
pimpl_->is_cancel_response_ready = false; |
|
if (RCL_RET_OK == ret) { |
|
auto response_header = std::get<1>(*shared_ptr); |
|
auto cancel_response = std::get<2>(*shared_ptr); |
|
this->handle_cancel_response(response_header, cancel_response); |
|
} else if (RCL_RET_ACTION_CLIENT_TAKE_FAILED != ret) { |
|
rclcpp::exceptions::throw_from_rcl_error(ret, "error taking cancel response"); |
|
} |
|
} else { |
|
throw std::runtime_error("Executing action client but nothing is ready"); |
|
} |
|
} |
Bug report
When running an
rclcpp_action::Client<T>in usingspin_some()and arclcpp::Rate, if the server you connect to publishes feedback at a faster rate than yourrclcpp::Rate, your client waits for the goal to be accepted forever, effectively becoming deadlocked.Required Info:
Note: based on my understanding of the root cause (see
Additional information), I believe this bug exists in rolling too, but I've only tested on Galactic as that's what I have access to at the moment.Steps to reproduce issue
The below server and client reproduce 100% of the time on my machine when running over loopback. Compilation settings shouldn't matter. Once built, they can be run with
ros2 run ...or directly with./path/to/binary. I'm usingaction_tutorials_interfaces/action/Fibonacci, but this should repro with any action interface.Server
Notes:
Client
Notes
SendGoalOptionsas the deadlock is not dependent on one.Expected behavior
The action client will accept the goal, perhaps after working through a short backlog of feedback up to the queue depth in the feedback subscription's QoS.
Actual behavior
The server has accepted and started working on the goal but the client never sees that. The client continuously logs
Waiting for goal response.in the repro and debug logs haveReceived feedback for unknown goal. Ignoring.... Debug logs showClient in wait set is ready, which I presume to be the goal response, but it's never taken, so the client deadlocks.Additional information
I did some digging and think I have tracked down the root cause, some other contributing factors, and some related impacts.
rclcpp_action::Client<T>derives fromrclcpp_action::ClientBasederives fromrclcpp::Waitable, and these are scheduled for work in anrclcpp::Executoras a single executable, not multiple executables of their constituent parts. When usingrclcpp::Executor::spin_some, which only collects work from ready entities once per call, therclcpp::Waitableis scheduled once, regardless of the amount of work it has ready. This should be fine, but therclcpp::ClientBase::take_dataimplementation only yields a single executable per call and therefore the subsequentexecuteinvoked byrclcpp::Executoronly performs one thing on the client rather than everything that's ready.rclcpp/rclcpp_action/src/client.cpp
Lines 550 to 687 in 5e14a28
In most scenarios, this is fine, but it deadlocks in this specific scenario: a server publishing feedback faster than the client is spinning (specifically, faster than it calls
rclcpp::Executor::wait_for_work). This can happen with any of thespin*implementations but happens most readily withspin_some. Why? Becauserclcpp::ClientBase::take_dataonly returns the first ready executable and it prioritizes feedback over responses. The implementation ofrclcpp::ClientBase::take_dataandexecutetake and execute in this order: feedback, status, goal response, result response, cancel response.When you combine the implementation of
rclcpp::Waitablethat only returns execution data representing a single unit of work, the implicit prioritization mechanism built intoClientBase, and an action server whose feedback topic publishes faster than the spin rate of the client, you get a client that always has ready feedback for an unknown goal, ironically never executing the goal response that feedback is for.Some ancillary effects of this: the goal response is never taken, the client never creates the goal handle, and therefore cancellation isn't easy. Cancellation can be done through
rclcpp_action::Client<T>::async_cancel_all_goalsbut that may have adverse side effects depending on your scenario. Additionally, the client never becomes "result aware" for the goal since that occurs when the goal response is processed, so result responses are permanently lost if they occur during the deadlock. Feedback is also always dropped because it's for an unknown goal.