Skip to content

Commit e640ebf

Browse files
committed
fix leak
1 parent f0602c4 commit e640ebf

File tree

5 files changed

+146
-94
lines changed

5 files changed

+146
-94
lines changed

crates/vm/src/stdlib/ctypes/array.rs

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -724,15 +724,9 @@ impl PyCArray {
724724
let ptr_val = if value.is(&vm.ctx.none) {
725725
0usize
726726
} else if let Some(s) = value.downcast_ref::<PyStr>() {
727-
let mut wchars: Vec<libc::wchar_t> = s
728-
.as_str()
729-
.chars()
730-
.map(|c| c as libc::wchar_t)
731-
.chain(std::iter::once(0))
732-
.collect();
733-
let ptr = wchars.as_mut_ptr();
734-
std::mem::forget(wchars);
735-
ptr as usize
727+
let (holder, ptr) = super::base::make_wchar_buffer(s.as_str(), vm);
728+
*zelf.0.buffer_holder.write() = Some(holder);
729+
ptr
736730
} else if let Ok(int_val) = value.try_index(vm) {
737731
int_val.as_bigint().to_usize().unwrap_or(0)
738732
} else {

crates/vm/src/stdlib/ctypes/base.rs

Lines changed: 60 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -360,6 +360,29 @@ pub(super) static CDATA_BUFFER_METHODS: BufferMethods = BufferMethods {
360360
retain: |_| {},
361361
};
362362

363+
/// Convert Vec<T> to Vec<u8> by reinterpreting the memory (same allocation).
364+
pub(super) fn vec_to_bytes<T>(vec: Vec<T>) -> Vec<u8> {
365+
let len = vec.len() * std::mem::size_of::<T>();
366+
let cap = vec.capacity() * std::mem::size_of::<T>();
367+
let ptr = vec.as_ptr() as *mut u8;
368+
std::mem::forget(vec);
369+
unsafe { Vec::from_raw_parts(ptr, len, cap) }
370+
}
371+
372+
/// Create a null-terminated wchar_t buffer from str and return (holder, pointer).
373+
/// The holder is a PyBytes that owns the buffer to prevent memory leak.
374+
pub(super) fn make_wchar_buffer(s: &str, vm: &VirtualMachine) -> (PyObjectRef, usize) {
375+
let wchars: Vec<libc::wchar_t> = s
376+
.chars()
377+
.map(|c| c as libc::wchar_t)
378+
.chain(std::iter::once(0))
379+
.collect();
380+
let ptr = wchars.as_ptr() as usize;
381+
let bytes = vec_to_bytes(wchars);
382+
let holder: PyObjectRef = vm.ctx.new_bytes(bytes).into();
383+
(holder, ptr)
384+
}
385+
363386
/// PyCData - base type for all ctypes data types
364387
#[pyclass(name = "_CData", module = "_ctypes")]
365388
#[derive(Debug, PyPayload)]
@@ -380,6 +403,9 @@ pub struct PyCData {
380403
pub objects: PyRwLock<Option<PyObjectRef>>,
381404
/// number of references we need (b_length)
382405
pub length: AtomicCell<usize>,
406+
/// Holds temporary buffer (e.g., null-terminated string) to prevent memory leak
407+
/// This is separate from objects to maintain API compatibility
408+
pub buffer_holder: PyRwLock<Option<PyObjectRef>>,
383409
}
384410

385411
impl PyCData {
@@ -392,6 +418,7 @@ impl PyCData {
392418
index: AtomicCell::new(0),
393419
objects: PyRwLock::new(None),
394420
length: AtomicCell::new(stg_info.length),
421+
buffer_holder: PyRwLock::new(None),
395422
}
396423
}
397424

@@ -404,6 +431,7 @@ impl PyCData {
404431
index: AtomicCell::new(0),
405432
objects: PyRwLock::new(objects),
406433
length: AtomicCell::new(0),
434+
buffer_holder: PyRwLock::new(None),
407435
}
408436
}
409437

@@ -420,6 +448,7 @@ impl PyCData {
420448
index: AtomicCell::new(0),
421449
objects: PyRwLock::new(objects),
422450
length: AtomicCell::new(length),
451+
buffer_holder: PyRwLock::new(None),
423452
}
424453
}
425454

@@ -440,6 +469,7 @@ impl PyCData {
440469
index: AtomicCell::new(0),
441470
objects: PyRwLock::new(None),
442471
length: AtomicCell::new(0),
472+
buffer_holder: PyRwLock::new(None),
443473
}
444474
}
445475

@@ -462,6 +492,7 @@ impl PyCData {
462492
index: AtomicCell::new(idx),
463493
objects: PyRwLock::new(None),
464494
length: AtomicCell::new(length),
495+
buffer_holder: PyRwLock::new(None),
465496
}
466497
}
467498

@@ -488,6 +519,7 @@ impl PyCData {
488519
index: AtomicCell::new(idx),
489520
objects: PyRwLock::new(None),
490521
length: AtomicCell::new(0),
522+
buffer_holder: PyRwLock::new(None),
491523
}
492524
}
493525

@@ -522,6 +554,7 @@ impl PyCData {
522554
index: AtomicCell::new(0),
523555
objects: PyRwLock::new(Some(objects_dict.into())),
524556
length: AtomicCell::new(length),
557+
buffer_holder: PyRwLock::new(None),
525558
}
526559
}
527560

@@ -894,10 +927,10 @@ impl PyCData {
894927
.ok()
895928
.and_then(|attr| attr.downcast_ref::<PyStr>().map(|s| s.to_string()));
896929

897-
let mut bytes = if let Some(type_code) = &field_type_code {
930+
let (mut bytes, wchar_holder) = if let Some(type_code) = &field_type_code {
898931
PyCField::value_to_bytes_for_type(type_code, &value, size, vm)?
899932
} else {
900-
PyCField::value_to_bytes(&value, size, vm)?
933+
(PyCField::value_to_bytes(&value, size, vm)?, None)
901934
};
902935

903936
// Swap bytes for opposite endianness
@@ -907,6 +940,11 @@ impl PyCData {
907940

908941
self.write_bytes_at_offset(offset, &bytes);
909942

943+
// Store wchar holder if present (for c_wchar_p)
944+
if let Some(holder) = wchar_holder {
945+
*self.buffer_holder.write() = Some(holder);
946+
}
947+
910948
// KeepRef
911949
if value.downcast_ref::<PyBytes>().is_some() {
912950
self.keep_ref(index, value, vm)?;
@@ -1360,13 +1398,14 @@ impl PyCField {
13601398
}
13611399
}
13621400

1363-
/// Convert a Python value to bytes with type-specific handling for pointer types
1401+
/// Convert a Python value to bytes with type-specific handling for pointer types.
1402+
/// Returns (bytes, optional holder for wchar buffer).
13641403
fn value_to_bytes_for_type(
13651404
type_code: &str,
13661405
value: &PyObject,
13671406
size: usize,
13681407
vm: &VirtualMachine,
1369-
) -> PyResult<Vec<u8>> {
1408+
) -> PyResult<(Vec<u8>, Option<PyObjectRef>)> {
13701409
match type_code {
13711410
// c_float: always convert to float first (f_set)
13721411
"f" => {
@@ -1381,7 +1420,7 @@ impl PyCField {
13811420
)));
13821421
};
13831422
let val = f as f32;
1384-
Ok(val.to_ne_bytes().to_vec())
1423+
Ok((val.to_ne_bytes().to_vec(), None))
13851424
}
13861425
// c_double: always convert to float first (d_set)
13871426
"d" => {
@@ -1395,7 +1434,7 @@ impl PyCField {
13951434
value.class().name()
13961435
)));
13971436
};
1398-
Ok(f.to_ne_bytes().to_vec())
1437+
Ok((f.to_ne_bytes().to_vec(), None))
13991438
}
14001439
// c_longdouble: convert to float (treated as f64 in RustPython)
14011440
"g" => {
@@ -1409,7 +1448,7 @@ impl PyCField {
14091448
value.class().name()
14101449
)));
14111450
};
1412-
Ok(f.to_ne_bytes().to_vec())
1451+
Ok((f.to_ne_bytes().to_vec(), None))
14131452
}
14141453
"z" => {
14151454
// c_char_p: store pointer to bytes data
@@ -1420,7 +1459,7 @@ impl PyCField {
14201459
let len = std::cmp::min(addr_bytes.len(), size);
14211460
result[..len].copy_from_slice(&addr_bytes[..len]);
14221461
result.push(0);
1423-
return Ok(result);
1462+
return Ok((result, None));
14241463
}
14251464
// Integer address
14261465
if let Ok(int_val) = value.try_index(vm) {
@@ -1429,32 +1468,23 @@ impl PyCField {
14291468
let bytes = v.to_ne_bytes();
14301469
let len = std::cmp::min(bytes.len(), size);
14311470
result[..len].copy_from_slice(&bytes[..len]);
1432-
return Ok(result);
1471+
return Ok((result, None));
14331472
}
14341473
// None -> NULL pointer
14351474
if vm.is_none(value) {
1436-
return Ok(vec![0u8; size]);
1475+
return Ok((vec![0u8; size], None));
14371476
}
1438-
PyCField::value_to_bytes(value, size, vm)
1477+
Ok((PyCField::value_to_bytes(value, size, vm)?, None))
14391478
}
14401479
"Z" => {
14411480
// c_wchar_p: store pointer to wide string
14421481
if let Some(s) = value.downcast_ref::<PyStr>() {
1443-
// Create wchar_t buffer with null terminator and leak it
1444-
let mut wchars: Vec<libc::wchar_t> = s
1445-
.as_str()
1446-
.chars()
1447-
.map(|c| c as libc::wchar_t)
1448-
.chain(std::iter::once(0))
1449-
.collect();
1450-
let ptr = wchars.as_mut_ptr();
1451-
std::mem::forget(wchars); // Leak to keep the pointer valid
1452-
let addr = ptr as usize;
1482+
let (holder, ptr) = make_wchar_buffer(s.as_str(), vm);
14531483
let mut result = vec![0u8; size];
1454-
let addr_bytes = addr.to_ne_bytes();
1484+
let addr_bytes = ptr.to_ne_bytes();
14551485
let len = std::cmp::min(addr_bytes.len(), size);
14561486
result[..len].copy_from_slice(&addr_bytes[..len]);
1457-
return Ok(result);
1487+
return Ok((result, Some(holder)));
14581488
}
14591489
// Integer address
14601490
if let Ok(int_val) = value.try_index(vm) {
@@ -1463,13 +1493,13 @@ impl PyCField {
14631493
let bytes = v.to_ne_bytes();
14641494
let len = std::cmp::min(bytes.len(), size);
14651495
result[..len].copy_from_slice(&bytes[..len]);
1466-
return Ok(result);
1496+
return Ok((result, None));
14671497
}
14681498
// None -> NULL pointer
14691499
if vm.is_none(value) {
1470-
return Ok(vec![0u8; size]);
1500+
return Ok((vec![0u8; size], None));
14711501
}
1472-
PyCField::value_to_bytes(value, size, vm)
1502+
Ok((PyCField::value_to_bytes(value, size, vm)?, None))
14731503
}
14741504
"P" => {
14751505
// c_void_p: store integer as pointer
@@ -1479,15 +1509,15 @@ impl PyCField {
14791509
let bytes = v.to_ne_bytes();
14801510
let len = std::cmp::min(bytes.len(), size);
14811511
result[..len].copy_from_slice(&bytes[..len]);
1482-
return Ok(result);
1512+
return Ok((result, None));
14831513
}
14841514
// None -> NULL pointer
14851515
if vm.is_none(value) {
1486-
return Ok(vec![0u8; size]);
1516+
return Ok((vec![0u8; size], None));
14871517
}
1488-
PyCField::value_to_bytes(value, size, vm)
1518+
Ok((PyCField::value_to_bytes(value, size, vm)?, None))
14891519
}
1490-
_ => PyCField::value_to_bytes(value, size, vm),
1520+
_ => Ok((PyCField::value_to_bytes(value, size, vm)?, None)),
14911521
}
14921522
}
14931523

crates/vm/src/stdlib/ctypes/function.rs

Lines changed: 3 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -149,30 +149,10 @@ fn conv_param(value: &PyObject, vm: &VirtualMachine) -> PyResult<(Type, FfiArgVa
149149
return Ok((ffi_type, carg.value));
150150
}
151151

152-
// 4. Python str -> pointer (c_wchar_p semantics on Windows)
152+
// 4. Python str -> pointer (use internal UTF-8 buffer)
153153
if let Some(s) = value.downcast_ref::<PyStr>() {
154-
// For oledll (Windows), strings are passed as wide char pointers
155-
// The str's internal UTF-8 bytes won't work for LPCWSTR
156-
// Need to encode as UTF-16 for Windows
157-
#[cfg(windows)]
158-
{
159-
// Convert to UTF-16 (null-terminated) and return pointer
160-
// Note: This is a temporary buffer, caller must keep value alive
161-
let wide: Vec<u16> = s
162-
.as_str()
163-
.encode_utf16()
164-
.chain(std::iter::once(0))
165-
.collect();
166-
let addr = wide.as_ptr() as usize;
167-
// FIXME: This leaks memory! Need to store the buffer somewhere
168-
std::mem::forget(wide);
169-
return Ok((Type::pointer(), FfiArgValue::Pointer(addr)));
170-
}
171-
#[cfg(not(windows))]
172-
{
173-
let addr = s.as_str().as_ptr() as usize;
174-
return Ok((Type::pointer(), FfiArgValue::Pointer(addr)));
175-
}
154+
let addr = s.as_str().as_ptr() as usize;
155+
return Ok((Type::pointer(), FfiArgValue::Pointer(addr)));
176156
}
177157

178158
// 9. Python bytes -> pointer to buffer

crates/vm/src/stdlib/ctypes/pointer.rs

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -564,7 +564,18 @@ impl PyCPointer {
564564
std::ptr::copy_nonoverlapping(src_buffer.as_ptr(), dest_ptr, copy_len);
565565
}
566566
} else {
567-
Self::write_value_at_address(addr, element_size, &value, type_code.as_deref(), vm)?;
567+
// Handle c_wchar_p (Z) specially to store holder
568+
if type_code.as_deref() == Some("Z")
569+
&& let Some(s) = value.downcast_ref::<PyStr>()
570+
{
571+
let (holder, ptr_val) = super::base::make_wchar_buffer(s.as_str(), vm);
572+
*zelf.0.buffer_holder.write() = Some(holder);
573+
unsafe {
574+
*(addr as *mut usize) = ptr_val;
575+
}
576+
} else {
577+
Self::write_value_at_address(addr, element_size, &value, type_code.as_deref(), vm)?;
578+
}
568579
}
569580

570581
// KeepRef: store reference to keep value alive using actual index

0 commit comments

Comments
 (0)