Skip to content

Commit 6fe7480

Browse files
authored
GH-37164: [Python] Attach Python stacktrace to errors in ConvertPyError (#39380)
### Rationale for this change Users might define Python generators that are used in RecordBatchReaders and then exported through the C Data Interface. However, if an error occurs in their generator, the stacktrace and message are currently swallowed in the current `ConvertPyError` implementation, which only provides the type of error. This makes debugging code that passed RBRs difficult. ### What changes are included in this PR? Changes `ConvertPyError` to provide the fully formatted traceback in the error message. ### Are these changes tested? Yes, added one test to validate the errors messages are propagated. ### Are there any user-facing changes? This is a minor change in the error reporting behavior, which will provide more information. * Closes: #37164 Authored-by: Will Jones <willjones127@gmail.com> Signed-off-by: Antoine Pitrou <antoine@python.org>
1 parent 2b4a703 commit 6fe7480

3 files changed

Lines changed: 92 additions & 8 deletions

File tree

python/pyarrow/src/arrow/python/common.cc

Lines changed: 46 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
#include <cstdlib>
2121
#include <mutex>
22+
#include <sstream>
2223
#include <string>
2324

2425
#include "arrow/memory_pool.h"
@@ -90,9 +91,15 @@ class PythonErrorDetail : public StatusDetail {
9091

9192
std::string ToString() const override {
9293
// This is simple enough not to need the GIL
93-
const auto ty = reinterpret_cast<const PyTypeObject*>(exc_type_.obj());
94-
// XXX Should we also print traceback?
95-
return std::string("Python exception: ") + ty->tp_name;
94+
Result<std::string> result = FormatImpl();
95+
96+
if (result.ok()) {
97+
return result.ValueOrDie();
98+
} else {
99+
// Fallback to just the exception type
100+
const auto ty = reinterpret_cast<const PyTypeObject*>(exc_type_.obj());
101+
return std::string("Python exception: ") + ty->tp_name;
102+
}
96103
}
97104

98105
void RestorePyError() const {
@@ -131,6 +138,42 @@ class PythonErrorDetail : public StatusDetail {
131138
}
132139

133140
protected:
141+
Result<std::string> FormatImpl() const {
142+
PyAcquireGIL lock;
143+
144+
// Use traceback.format_exception()
145+
OwnedRef traceback_module;
146+
RETURN_NOT_OK(internal::ImportModule("traceback", &traceback_module));
147+
148+
OwnedRef fmt_exception;
149+
RETURN_NOT_OK(internal::ImportFromModule(traceback_module.obj(), "format_exception",
150+
&fmt_exception));
151+
152+
OwnedRef formatted;
153+
formatted.reset(PyObject_CallFunctionObjArgs(fmt_exception.obj(), exc_type_.obj(),
154+
exc_value_.obj(), exc_traceback_.obj(),
155+
NULL));
156+
RETURN_IF_PYERROR();
157+
158+
std::stringstream ss;
159+
ss << "Python exception: ";
160+
Py_ssize_t num_lines = PySequence_Length(formatted.obj());
161+
RETURN_IF_PYERROR();
162+
163+
for (Py_ssize_t i = 0; i < num_lines; ++i) {
164+
Py_ssize_t line_size;
165+
166+
PyObject* line = PySequence_GetItem(formatted.obj(), i);
167+
RETURN_IF_PYERROR();
168+
169+
const char* data = PyUnicode_AsUTF8AndSize(line, &line_size);
170+
RETURN_IF_PYERROR();
171+
172+
ss << std::string_view(data, line_size);
173+
}
174+
return ss.str();
175+
}
176+
134177
PythonErrorDetail() = default;
135178

136179
OwnedRefNoGIL exc_type_, exc_value_, exc_traceback_;

python/pyarrow/src/arrow/python/python_test.cc

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -174,10 +174,14 @@ Status TestOwnedRefNoGILMoves() {
174174
}
175175
}
176176

177-
std::string FormatPythonException(const std::string& exc_class_name) {
177+
std::string FormatPythonException(const std::string& exc_class_name,
178+
const std::string& exc_value) {
178179
std::stringstream ss;
179180
ss << "Python exception: ";
180181
ss << exc_class_name;
182+
ss << ": ";
183+
ss << exc_value;
184+
ss << "\n";
181185
return ss.str();
182186
}
183187

@@ -205,7 +209,8 @@ Status TestCheckPyErrorStatus() {
205209
}
206210

207211
PyErr_SetString(PyExc_TypeError, "some error");
208-
ASSERT_OK(check_error(st, "some error", FormatPythonException("TypeError")));
212+
ASSERT_OK(
213+
check_error(st, "some error", FormatPythonException("TypeError", "some error")));
209214
ASSERT_TRUE(st.IsTypeError());
210215

211216
PyErr_SetString(PyExc_ValueError, "some error");
@@ -223,7 +228,8 @@ Status TestCheckPyErrorStatus() {
223228
}
224229

225230
PyErr_SetString(PyExc_NotImplementedError, "some error");
226-
ASSERT_OK(check_error(st, "some error", FormatPythonException("NotImplementedError")));
231+
ASSERT_OK(check_error(st, "some error",
232+
FormatPythonException("NotImplementedError", "some error")));
227233
ASSERT_TRUE(st.IsNotImplemented());
228234

229235
// No override if a specific status code is given
@@ -246,7 +252,8 @@ Status TestCheckPyErrorStatusNoGIL() {
246252
lock.release();
247253
ASSERT_TRUE(st.IsUnknownError());
248254
ASSERT_EQ(st.message(), "zzzt");
249-
ASSERT_EQ(st.detail()->ToString(), FormatPythonException("ZeroDivisionError"));
255+
ASSERT_EQ(st.detail()->ToString(),
256+
FormatPythonException("ZeroDivisionError", "zzzt"));
250257
return Status::OK();
251258
}
252259
}
@@ -257,7 +264,7 @@ Status TestRestorePyErrorBasics() {
257264
ASSERT_FALSE(PyErr_Occurred());
258265
ASSERT_TRUE(st.IsUnknownError());
259266
ASSERT_EQ(st.message(), "zzzt");
260-
ASSERT_EQ(st.detail()->ToString(), FormatPythonException("ZeroDivisionError"));
267+
ASSERT_EQ(st.detail()->ToString(), FormatPythonException("ZeroDivisionError", "zzzt"));
261268

262269
RestorePyError(st);
263270
ASSERT_TRUE(PyErr_Occurred());

python/pyarrow/tests/test_cffi.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -414,6 +414,40 @@ def test_export_import_batch_reader(reader_factory):
414414
pa.RecordBatchReader._import_from_c(ptr_stream)
415415

416416

417+
@needs_cffi
418+
def test_export_import_exception_reader():
419+
# See: https://github.com/apache/arrow/issues/37164
420+
c_stream = ffi.new("struct ArrowArrayStream*")
421+
ptr_stream = int(ffi.cast("uintptr_t", c_stream))
422+
423+
gc.collect() # Make sure no Arrow data dangles in a ref cycle
424+
old_allocated = pa.total_allocated_bytes()
425+
426+
def gen():
427+
if True:
428+
try:
429+
raise ValueError('foo')
430+
except ValueError as e:
431+
raise NotImplementedError('bar') from e
432+
else:
433+
yield from make_batches()
434+
435+
original = pa.RecordBatchReader.from_batches(make_schema(), gen())
436+
original._export_to_c(ptr_stream)
437+
438+
reader = pa.RecordBatchReader._import_from_c(ptr_stream)
439+
with pytest.raises(OSError) as exc_info:
440+
reader.read_next_batch()
441+
442+
# inner *and* outer exception should be present
443+
assert 'ValueError: foo' in str(exc_info.value)
444+
assert 'NotImplementedError: bar' in str(exc_info.value)
445+
# Stacktrace containing line of the raise statement
446+
assert 'raise ValueError(\'foo\')' in str(exc_info.value)
447+
448+
assert pa.total_allocated_bytes() == old_allocated
449+
450+
417451
@needs_cffi
418452
def test_imported_batch_reader_error():
419453
c_stream = ffi.new("struct ArrowArrayStream*")

0 commit comments

Comments
 (0)