Skip to content

Commit 2bf7dac

Browse files
committed
fix leak
1 parent 1707861 commit 2bf7dac

File tree

5 files changed

+156
-141
lines changed

5 files changed

+156
-141
lines changed

crates/vm/src/stdlib/ctypes/array.rs

Lines changed: 18 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -703,12 +703,13 @@ impl PyCArray {
703703
}
704704
}
705705
Some("z") => {
706-
let ptr_val = if value.is(&vm.ctx.none) {
707-
0usize
706+
let (ptr_val, converted) = if value.is(&vm.ctx.none) {
707+
(0usize, None)
708708
} else if let Some(bytes) = value.downcast_ref::<PyBytes>() {
709-
bytes.as_bytes().as_ptr() as usize
709+
let (c, ptr) = super::base::ensure_z_null_terminated(bytes, vm);
710+
(ptr, Some(c))
710711
} else if let Ok(int_val) = value.try_index(vm) {
711-
int_val.as_bigint().to_usize().unwrap_or(0)
712+
(int_val.as_bigint().to_usize().unwrap_or(0), None)
712713
} else {
713714
return Err(vm.new_type_error(
714715
"bytes or integer address expected instead of {}".to_owned(),
@@ -717,28 +718,27 @@ impl PyCArray {
717718
if offset + element_size <= buffer.len() {
718719
buffer[offset..offset + element_size].copy_from_slice(&ptr_val.to_ne_bytes());
719720
}
721+
if let Some(c) = converted {
722+
return zelf.0.keep_ref(index, c, vm);
723+
}
720724
}
721725
Some("Z") => {
722-
let ptr_val = if value.is(&vm.ctx.none) {
723-
0usize
726+
let (ptr_val, converted) = if value.is(&vm.ctx.none) {
727+
(0usize, None)
724728
} else if let Some(s) = value.downcast_ref::<PyStr>() {
725-
let mut wchars: Vec<libc::wchar_t> = s
726-
.as_str()
727-
.chars()
728-
.map(|c| c as libc::wchar_t)
729-
.chain(std::iter::once(0))
730-
.collect();
731-
let ptr = wchars.as_mut_ptr();
732-
std::mem::forget(wchars);
733-
ptr as usize
729+
let (holder, ptr) = super::base::str_to_wchar_bytes(s.as_str(), vm);
730+
(ptr, Some(holder))
734731
} else if let Ok(int_val) = value.try_index(vm) {
735-
int_val.as_bigint().to_usize().unwrap_or(0)
732+
(int_val.as_bigint().to_usize().unwrap_or(0), None)
736733
} else {
737734
return Err(vm.new_type_error("unicode string or integer address expected"));
738735
};
739736
if offset + element_size <= buffer.len() {
740737
buffer[offset..offset + element_size].copy_from_slice(&ptr_val.to_ne_bytes());
741738
}
739+
if let Some(c) = converted {
740+
return zelf.0.keep_ref(index, c, vm);
741+
}
742742
}
743743
Some("f") => {
744744
// c_float: convert int/float to f32 bytes
@@ -791,10 +791,8 @@ impl PyCArray {
791791
}
792792
}
793793

794-
// KeepRef for c_char_p/c_wchar_p
795-
if matches!(type_code, Some("z") | Some("Z")) && !value.is(&vm.ctx.none) {
796-
zelf.0.keep_ref(index, value.to_owned(), vm)?;
797-
} else if super::base::PyCData::should_keep_ref(value) {
794+
// KeepRef
795+
if super::base::PyCData::should_keep_ref(value) {
798796
let to_keep = super::base::PyCData::get_kept_objects(value, vm);
799797
zelf.0.keep_ref(index, to_keep, vm)?;
800798
}

crates/vm/src/stdlib/ctypes/base.rs

Lines changed: 72 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -360,6 +360,49 @@ pub(super) static CDATA_BUFFER_METHODS: BufferMethods = BufferMethods {
360360
retain: |_| {},
361361
};
362362

363+
/// Convert Vec<T> to Vec<u8> by reinterpreting the memory (same allocation).
364+
fn vec_to_bytes<T>(vec: Vec<T>) -> Vec<u8> {
365+
let len = vec.len() * std::mem::size_of::<T>();
366+
let cap = vec.capacity() * std::mem::size_of::<T>();
367+
let ptr = vec.as_ptr() as *mut u8;
368+
std::mem::forget(vec);
369+
unsafe { Vec::from_raw_parts(ptr, len, cap) }
370+
}
371+
372+
/// Ensure PyBytes is null-terminated. Returns (PyBytes to keep, pointer).
373+
/// If already contains null, returns original. Otherwise creates new with null appended.
374+
pub(super) fn ensure_z_null_terminated(
375+
bytes: &PyBytes,
376+
vm: &VirtualMachine,
377+
) -> (PyObjectRef, usize) {
378+
let data = bytes.as_bytes();
379+
if data.contains(&0) {
380+
// Already has null, use original
381+
let original: PyObjectRef = vm.ctx.new_bytes(data.to_vec()).into();
382+
(original, data.as_ptr() as usize)
383+
} else {
384+
// Create new with null appended
385+
let mut buffer = data.to_vec();
386+
buffer.push(0);
387+
let ptr = buffer.as_ptr() as usize;
388+
let new_bytes: PyObjectRef = vm.ctx.new_bytes(buffer).into();
389+
(new_bytes, ptr)
390+
}
391+
}
392+
393+
/// Convert str to null-terminated wchar_t buffer. Returns (PyBytes holder, pointer).
394+
pub(super) fn str_to_wchar_bytes(s: &str, vm: &VirtualMachine) -> (PyObjectRef, usize) {
395+
let wchars: Vec<libc::wchar_t> = s
396+
.chars()
397+
.map(|c| c as libc::wchar_t)
398+
.chain(std::iter::once(0))
399+
.collect();
400+
let ptr = wchars.as_ptr() as usize;
401+
let bytes = vec_to_bytes(wchars);
402+
let holder: PyObjectRef = vm.ctx.new_bytes(bytes).into();
403+
(holder, ptr)
404+
}
405+
363406
/// PyCData - base type for all ctypes data types
364407
#[pyclass(name = "_CData", module = "_ctypes")]
365408
#[derive(Debug, PyPayload)]
@@ -894,10 +937,10 @@ impl PyCData {
894937
.ok()
895938
.and_then(|attr| attr.downcast_ref::<PyStr>().map(|s| s.to_string()));
896939

897-
let mut bytes = if let Some(type_code) = &field_type_code {
940+
let (mut bytes, converted_value) = if let Some(type_code) = &field_type_code {
898941
PyCField::value_to_bytes_for_type(type_code, &value, size, vm)?
899942
} else {
900-
PyCField::value_to_bytes(&value, size, vm)?
943+
(PyCField::value_to_bytes(&value, size, vm)?, None)
901944
};
902945

903946
// Swap bytes for opposite endianness
@@ -907,9 +950,9 @@ impl PyCData {
907950

908951
self.write_bytes_at_offset(offset, &bytes);
909952

910-
// KeepRef
911-
if value.downcast_ref::<PyBytes>().is_some() {
912-
self.keep_ref(index, value, vm)?;
953+
// KeepRef: for z/Z types use converted value, otherwise use original
954+
if let Some(converted) = converted_value {
955+
self.keep_ref(index, converted, vm)?;
913956
} else if Self::should_keep_ref(&value) {
914957
let to_keep = Self::get_kept_objects(&value, vm);
915958
self.keep_ref(index, to_keep, vm)?;
@@ -1360,13 +1403,14 @@ impl PyCField {
13601403
}
13611404
}
13621405

1363-
/// Convert a Python value to bytes with type-specific handling for pointer types
1406+
/// Convert a Python value to bytes with type-specific handling for pointer types.
1407+
/// Returns (bytes, optional holder for wchar buffer).
13641408
fn value_to_bytes_for_type(
13651409
type_code: &str,
13661410
value: &PyObject,
13671411
size: usize,
13681412
vm: &VirtualMachine,
1369-
) -> PyResult<Vec<u8>> {
1413+
) -> PyResult<(Vec<u8>, Option<PyObjectRef>)> {
13701414
match type_code {
13711415
// c_float: always convert to float first (f_set)
13721416
"f" => {
@@ -1381,7 +1425,7 @@ impl PyCField {
13811425
)));
13821426
};
13831427
let val = f as f32;
1384-
Ok(val.to_ne_bytes().to_vec())
1428+
Ok((val.to_ne_bytes().to_vec(), None))
13851429
}
13861430
// c_double: always convert to float first (d_set)
13871431
"d" => {
@@ -1395,7 +1439,7 @@ impl PyCField {
13951439
value.class().name()
13961440
)));
13971441
};
1398-
Ok(f.to_ne_bytes().to_vec())
1442+
Ok((f.to_ne_bytes().to_vec(), None))
13991443
}
14001444
// c_longdouble: convert to float (treated as f64 in RustPython)
14011445
"g" => {
@@ -1409,18 +1453,17 @@ impl PyCField {
14091453
value.class().name()
14101454
)));
14111455
};
1412-
Ok(f.to_ne_bytes().to_vec())
1456+
Ok((f.to_ne_bytes().to_vec(), None))
14131457
}
14141458
"z" => {
1415-
// c_char_p: store pointer to bytes data
1459+
// c_char_p: store pointer to null-terminated bytes
14161460
if let Some(bytes) = value.downcast_ref::<PyBytes>() {
1417-
let addr = bytes.as_bytes().as_ptr() as usize;
1461+
let (converted, ptr) = ensure_z_null_terminated(bytes, vm);
14181462
let mut result = vec![0u8; size];
1419-
let addr_bytes = addr.to_ne_bytes();
1463+
let addr_bytes = ptr.to_ne_bytes();
14201464
let len = std::cmp::min(addr_bytes.len(), size);
14211465
result[..len].copy_from_slice(&addr_bytes[..len]);
1422-
result.push(0);
1423-
return Ok(result);
1466+
return Ok((result, Some(converted)));
14241467
}
14251468
// Integer address
14261469
if let Ok(int_val) = value.try_index(vm) {
@@ -1429,32 +1472,23 @@ impl PyCField {
14291472
let bytes = v.to_ne_bytes();
14301473
let len = std::cmp::min(bytes.len(), size);
14311474
result[..len].copy_from_slice(&bytes[..len]);
1432-
return Ok(result);
1475+
return Ok((result, None));
14331476
}
14341477
// None -> NULL pointer
14351478
if vm.is_none(value) {
1436-
return Ok(vec![0u8; size]);
1479+
return Ok((vec![0u8; size], None));
14371480
}
1438-
PyCField::value_to_bytes(value, size, vm)
1481+
Ok((PyCField::value_to_bytes(value, size, vm)?, None))
14391482
}
14401483
"Z" => {
1441-
// c_wchar_p: store pointer to wide string
1484+
// c_wchar_p: store pointer to null-terminated wchar_t buffer
14421485
if let Some(s) = value.downcast_ref::<PyStr>() {
1443-
// Create wchar_t buffer with null terminator and leak it
1444-
let mut wchars: Vec<libc::wchar_t> = s
1445-
.as_str()
1446-
.chars()
1447-
.map(|c| c as libc::wchar_t)
1448-
.chain(std::iter::once(0))
1449-
.collect();
1450-
let ptr = wchars.as_mut_ptr();
1451-
std::mem::forget(wchars); // Leak to keep the pointer valid
1452-
let addr = ptr as usize;
1486+
let (holder, ptr) = str_to_wchar_bytes(s.as_str(), vm);
14531487
let mut result = vec![0u8; size];
1454-
let addr_bytes = addr.to_ne_bytes();
1488+
let addr_bytes = ptr.to_ne_bytes();
14551489
let len = std::cmp::min(addr_bytes.len(), size);
14561490
result[..len].copy_from_slice(&addr_bytes[..len]);
1457-
return Ok(result);
1491+
return Ok((result, Some(holder)));
14581492
}
14591493
// Integer address
14601494
if let Ok(int_val) = value.try_index(vm) {
@@ -1463,13 +1497,13 @@ impl PyCField {
14631497
let bytes = v.to_ne_bytes();
14641498
let len = std::cmp::min(bytes.len(), size);
14651499
result[..len].copy_from_slice(&bytes[..len]);
1466-
return Ok(result);
1500+
return Ok((result, None));
14671501
}
14681502
// None -> NULL pointer
14691503
if vm.is_none(value) {
1470-
return Ok(vec![0u8; size]);
1504+
return Ok((vec![0u8; size], None));
14711505
}
1472-
PyCField::value_to_bytes(value, size, vm)
1506+
Ok((PyCField::value_to_bytes(value, size, vm)?, None))
14731507
}
14741508
"P" => {
14751509
// c_void_p: store integer as pointer
@@ -1479,15 +1513,15 @@ impl PyCField {
14791513
let bytes = v.to_ne_bytes();
14801514
let len = std::cmp::min(bytes.len(), size);
14811515
result[..len].copy_from_slice(&bytes[..len]);
1482-
return Ok(result);
1516+
return Ok((result, None));
14831517
}
14841518
// None -> NULL pointer
14851519
if vm.is_none(value) {
1486-
return Ok(vec![0u8; size]);
1520+
return Ok((vec![0u8; size], None));
14871521
}
1488-
PyCField::value_to_bytes(value, size, vm)
1522+
Ok((PyCField::value_to_bytes(value, size, vm)?, None))
14891523
}
1490-
_ => PyCField::value_to_bytes(value, size, vm),
1524+
_ => Ok((PyCField::value_to_bytes(value, size, vm)?, None)),
14911525
}
14921526
}
14931527

crates/vm/src/stdlib/ctypes/function.rs

Lines changed: 3 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -169,30 +169,10 @@ fn conv_param(value: &PyObject, vm: &VirtualMachine) -> PyResult<(Type, FfiArgVa
169169
return Ok((ffi_type, carg.value));
170170
}
171171

172-
// 4. Python str -> pointer (c_wchar_p semantics on Windows)
172+
// 4. Python str -> pointer (use internal UTF-8 buffer)
173173
if let Some(s) = value.downcast_ref::<PyStr>() {
174-
// For oledll (Windows), strings are passed as wide char pointers
175-
// The str's internal UTF-8 bytes won't work for LPCWSTR
176-
// Need to encode as UTF-16 for Windows
177-
#[cfg(windows)]
178-
{
179-
// Convert to UTF-16 (null-terminated) and return pointer
180-
// Note: This is a temporary buffer, caller must keep value alive
181-
let wide: Vec<u16> = s
182-
.as_str()
183-
.encode_utf16()
184-
.chain(std::iter::once(0))
185-
.collect();
186-
let addr = wide.as_ptr() as usize;
187-
// FIXME: This leaks memory! Need to store the buffer somewhere
188-
std::mem::forget(wide);
189-
return Ok((Type::pointer(), FfiArgValue::Pointer(addr)));
190-
}
191-
#[cfg(not(windows))]
192-
{
193-
let addr = s.as_str().as_ptr() as usize;
194-
return Ok((Type::pointer(), FfiArgValue::Pointer(addr)));
195-
}
174+
let addr = s.as_str().as_ptr() as usize;
175+
return Ok((Type::pointer(), FfiArgValue::Pointer(addr)));
196176
}
197177

198178
// 9. Python bytes -> pointer to buffer

crates/vm/src/stdlib/ctypes/pointer.rs

Lines changed: 23 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -564,7 +564,26 @@ impl PyCPointer {
564564
std::ptr::copy_nonoverlapping(src_buffer.as_ptr(), dest_ptr, copy_len);
565565
}
566566
} else {
567-
Self::write_value_at_address(addr, element_size, &value, type_code.as_deref(), vm)?;
567+
// Handle z/Z specially to store converted value
568+
if type_code.as_deref() == Some("z")
569+
&& let Some(bytes) = value.downcast_ref::<PyBytes>()
570+
{
571+
let (converted, ptr_val) = super::base::ensure_z_null_terminated(bytes, vm);
572+
unsafe {
573+
*(addr as *mut usize) = ptr_val;
574+
}
575+
return zelf.0.keep_ref(index as usize, converted, vm);
576+
} else if type_code.as_deref() == Some("Z")
577+
&& let Some(s) = value.downcast_ref::<PyStr>()
578+
{
579+
let (holder, ptr_val) = super::base::str_to_wchar_bytes(s.as_str(), vm);
580+
unsafe {
581+
*(addr as *mut usize) = ptr_val;
582+
}
583+
return zelf.0.keep_ref(index as usize, holder, vm);
584+
} else {
585+
Self::write_value_at_address(addr, element_size, &value, type_code.as_deref(), vm)?;
586+
}
568587
}
569588

570589
// KeepRef: store reference to keep value alive using actual index
@@ -616,41 +635,16 @@ impl PyCPointer {
616635
let ptr = addr as *mut u8;
617636

618637
// Handle c_char_p (z) and c_wchar_p (Z) - store pointer address
638+
// Note: PyBytes/PyStr cases are handled by caller (setitem_by_index)
619639
match type_code {
620-
Some("z") => {
621-
// c_char_p: store pointer to bytes buffer
622-
let ptr_val = if vm.is_none(value) {
623-
0usize
624-
} else if let Some(bytes) = value.downcast_ref::<PyBytes>() {
625-
bytes.as_bytes().as_ptr() as usize
626-
} else if let Ok(int_val) = value.try_index(vm) {
627-
int_val.as_bigint().to_usize().unwrap_or(0)
628-
} else {
629-
return Err(vm.new_type_error("bytes or integer address expected"));
630-
};
631-
*(ptr as *mut usize) = ptr_val;
632-
return Ok(());
633-
}
634-
Some("Z") => {
635-
// c_wchar_p: store pointer to wchar_t buffer
640+
Some("z") | Some("Z") => {
636641
let ptr_val = if vm.is_none(value) {
637642
0usize
638-
} else if let Some(s) = value.downcast_ref::<PyStr>() {
639-
// Create wchar_t buffer (UTF-32 on macOS/Linux)
640-
let mut wchars: Vec<libc::wchar_t> = s
641-
.as_str()
642-
.chars()
643-
.map(|c| c as libc::wchar_t)
644-
.chain(std::iter::once(0))
645-
.collect();
646-
let ptr = wchars.as_mut_ptr();
647-
std::mem::forget(wchars);
648-
ptr as usize
649643
} else if let Ok(int_val) = value.try_index(vm) {
650644
int_val.as_bigint().to_usize().unwrap_or(0)
651645
} else {
652646
return Err(vm.new_type_error(
653-
"unicode string or integer address expected".to_owned(),
647+
"bytes/string or integer address expected".to_owned(),
654648
));
655649
};
656650
*(ptr as *mut usize) = ptr_val;

0 commit comments

Comments
 (0)