pyo3/types/bytearray.rs
1use crate::err::{PyErr, PyResult};
2use crate::ffi_ptr_ext::FfiPtrExt;
3use crate::instance::{Borrowed, Bound};
4use crate::py_result_ext::PyResultExt;
5use crate::sync::critical_section::with_critical_section;
6use crate::{ffi, PyAny, Python};
7use std::slice;
8
9/// Represents a Python `bytearray`.
10///
11/// Values of this type are accessed via PyO3's smart pointers, e.g. as
12/// [`Py<PyByteArray>`][crate::Py] or [`Bound<'py, PyByteArray>`][Bound].
13///
14/// For APIs available on `bytearray` objects, see the [`PyByteArrayMethods`] trait which is implemented for
15/// [`Bound<'py, PyByteArray>`][Bound].
16#[repr(transparent)]
17pub struct PyByteArray(PyAny);
18
19pyobject_native_type_core!(PyByteArray, pyobject_native_static_type_object!(ffi::PyByteArray_Type), "builtins", "bytearray", #checkfunction=ffi::PyByteArray_Check);
20
21impl PyByteArray {
22 /// Creates a new Python bytearray object.
23 ///
24 /// The byte string is initialized by copying the data from the `&[u8]`.
25 pub fn new<'py>(py: Python<'py>, src: &[u8]) -> Bound<'py, PyByteArray> {
26 let ptr = src.as_ptr().cast();
27 let len = src.len() as ffi::Py_ssize_t;
28 unsafe {
29 ffi::PyByteArray_FromStringAndSize(ptr, len)
30 .assume_owned(py)
31 .cast_into_unchecked()
32 }
33 }
34
35 /// Creates a new Python `bytearray` object with an `init` closure to write its contents.
36 /// Before calling `init` the bytearray is zero-initialised.
37 /// * If Python raises a MemoryError on the allocation, `new_with` will return
38 /// it inside `Err`.
39 /// * If `init` returns `Err(e)`, `new_with` will return `Err(e)`.
40 /// * If `init` returns `Ok(())`, `new_with` will return `Ok(&PyByteArray)`.
41 ///
42 /// # Examples
43 ///
44 /// ```
45 /// use pyo3::{prelude::*, types::PyByteArray};
46 ///
47 /// # fn main() -> PyResult<()> {
48 /// Python::attach(|py| -> PyResult<()> {
49 /// let py_bytearray = PyByteArray::new_with(py, 10, |bytes: &mut [u8]| {
50 /// bytes.copy_from_slice(b"Hello Rust");
51 /// Ok(())
52 /// })?;
53 /// let bytearray: &[u8] = unsafe { py_bytearray.as_bytes() };
54 /// assert_eq!(bytearray, b"Hello Rust");
55 /// Ok(())
56 /// })
57 /// # }
58 /// ```
59 pub fn new_with<F>(py: Python<'_>, len: usize, init: F) -> PyResult<Bound<'_, PyByteArray>>
60 where
61 F: FnOnce(&mut [u8]) -> PyResult<()>,
62 {
63 unsafe {
64 // Allocate buffer and check for an error
65 let pybytearray: Bound<'_, Self> =
66 ffi::PyByteArray_FromStringAndSize(std::ptr::null(), len as ffi::Py_ssize_t)
67 .assume_owned_or_err(py)?
68 .cast_into_unchecked();
69
70 let buffer: *mut u8 = ffi::PyByteArray_AsString(pybytearray.as_ptr()).cast();
71 debug_assert!(!buffer.is_null());
72 // Zero-initialise the uninitialised bytearray
73 std::ptr::write_bytes(buffer, 0u8, len);
74 // (Further) Initialise the bytearray in init
75 // If init returns an Err, pypybytearray will automatically deallocate the buffer
76 init(std::slice::from_raw_parts_mut(buffer, len)).map(|_| pybytearray)
77 }
78 }
79
80 /// Creates a new Python `bytearray` object from another Python object that
81 /// implements the buffer protocol.
82 pub fn from<'py>(src: &Bound<'py, PyAny>) -> PyResult<Bound<'py, PyByteArray>> {
83 unsafe {
84 ffi::PyByteArray_FromObject(src.as_ptr())
85 .assume_owned_or_err(src.py())
86 .cast_into_unchecked()
87 }
88 }
89}
90
91/// Implementation of functionality for [`PyByteArray`].
92///
93/// These methods are defined for the `Bound<'py, PyByteArray>` smart pointer, so to use method call
94/// syntax these methods are separated into a trait, because stable Rust does not yet support
95/// `arbitrary_self_types`.
96#[doc(alias = "PyByteArray")]
97pub trait PyByteArrayMethods<'py>: crate::sealed::Sealed {
98 /// Gets the length of the bytearray.
99 fn len(&self) -> usize;
100
101 /// Checks if the bytearray is empty.
102 fn is_empty(&self) -> bool;
103
104 /// Gets the start of the buffer containing the contents of the bytearray.
105 ///
106 /// # Safety
107 ///
108 /// See the safety requirements of [`PyByteArrayMethods::as_bytes`] and [`PyByteArrayMethods::as_bytes_mut`].
109 fn data(&self) -> *mut u8;
110
111 /// Extracts a slice of the `ByteArray`'s entire buffer.
112 ///
113 /// # Safety
114 ///
115 /// Mutation of the `bytearray` invalidates the slice. If it is used afterwards, the behavior is
116 /// undefined.
117 ///
118 /// These mutations may occur in Python code as well as from Rust:
119 /// - Calling methods like [`PyByteArrayMethods::as_bytes_mut`] and [`PyByteArrayMethods::resize`] will
120 /// invalidate the slice.
121 /// - Actions like dropping objects or raising exceptions can invoke `__del__`methods or signal
122 /// handlers, which may execute arbitrary Python code. This means that if Python code has a
123 /// reference to the `bytearray` you cannot safely use the vast majority of PyO3's API whilst
124 /// using the slice.
125 ///
126 /// As a result, this slice should only be used for short-lived operations without executing any
127 /// Python code, such as copying into a Vec.
128 /// For free-threaded Python support see also [`with_critical_section`].
129 ///
130 /// # Examples
131 ///
132 /// ```rust
133 /// use pyo3::prelude::*;
134 /// use pyo3::exceptions::PyRuntimeError;
135 /// use pyo3::sync::critical_section::with_critical_section;
136 /// use pyo3::types::PyByteArray;
137 ///
138 /// #[pyfunction]
139 /// fn a_valid_function(bytes: &Bound<'_, PyByteArray>) -> PyResult<()> {
140 /// let section = with_critical_section(bytes, || {
141 /// // SAFETY: We promise to not let the interpreter regain control over the bytearray
142 /// // or invoke any PyO3 APIs while using the slice.
143 /// let slice = unsafe { bytes.as_bytes() };
144 ///
145 /// // Copy only a section of `bytes` while avoiding
146 /// // `to_vec` which copies the entire thing.
147 /// slice.get(6..11)
148 /// .map(Vec::from)
149 /// .ok_or_else(|| PyRuntimeError::new_err("input is not long enough"))
150 /// })?;
151 ///
152 /// // Now we can do things with `section` and call PyO3 APIs again.
153 /// // ...
154 /// # assert_eq!(§ion, b"world");
155 ///
156 /// Ok(())
157 /// }
158 /// # fn main() -> PyResult<()> {
159 /// # Python::attach(|py| -> PyResult<()> {
160 /// # let fun = wrap_pyfunction!(a_valid_function, py)?;
161 /// # let locals = pyo3::types::PyDict::new(py);
162 /// # locals.set_item("a_valid_function", fun)?;
163 /// #
164 /// # py.run(cr#"b = bytearray(b"hello world")
165 /// # a_valid_function(b)
166 /// #
167 /// # try:
168 /// # a_valid_function(bytearray())
169 /// # except RuntimeError as e:
170 /// # assert str(e) == 'input is not long enough'"#,
171 /// # None,
172 /// # Some(&locals),
173 /// # )?;
174 /// #
175 /// # Ok(())
176 /// # })
177 /// # }
178 /// ```
179 ///
180 /// # Incorrect usage
181 ///
182 /// The following `bug` function is unsound ⚠️
183 ///
184 /// ```rust,no_run
185 /// # use pyo3::prelude::*;
186 /// # use pyo3::types::PyByteArray;
187 ///
188 /// # #[allow(dead_code)]
189 /// #[pyfunction]
190 /// fn bug(py: Python<'_>, bytes: &Bound<'_, PyByteArray>) {
191 /// // No critical section is being used.
192 /// // This means that for no-gil Python another thread could be modifying the
193 /// // bytearray concurrently and thus invalidate `slice` any time.
194 /// let slice = unsafe { bytes.as_bytes() };
195 ///
196 /// // This explicitly yields control back to the Python interpreter...
197 /// // ...but it's not always this obvious. Many things do this implicitly.
198 /// py.detach(|| {
199 /// // Python code could be mutating through its handle to `bytes`,
200 /// // which makes reading it a data race, which is undefined behavior.
201 /// println!("{:?}", slice[0]);
202 /// });
203 ///
204 /// // Python code might have mutated it, so we can not rely on the slice
205 /// // remaining valid. As such this is also undefined behavior.
206 /// println!("{:?}", slice[0]);
207 /// }
208 /// ```
209 unsafe fn as_bytes(&self) -> &[u8];
210
211 /// Extracts a mutable slice of the `ByteArray`'s entire buffer.
212 ///
213 /// # Safety
214 ///
215 /// Any other accesses of the `bytearray`'s buffer invalidate the slice. If it is used
216 /// afterwards, the behavior is undefined. The safety requirements of [`PyByteArrayMethods::as_bytes`]
217 /// apply to this function as well.
218 #[expect(clippy::mut_from_ref)]
219 unsafe fn as_bytes_mut(&self) -> &mut [u8];
220
221 /// Copies the contents of the bytearray to a Rust vector.
222 ///
223 /// # Examples
224 ///
225 /// ```
226 /// # use pyo3::prelude::*;
227 /// # use pyo3::types::PyByteArray;
228 /// # Python::attach(|py| {
229 /// let bytearray = PyByteArray::new(py, b"Hello World.");
230 /// let mut copied_message = bytearray.to_vec();
231 /// assert_eq!(b"Hello World.", copied_message.as_slice());
232 ///
233 /// copied_message[11] = b'!';
234 /// assert_eq!(b"Hello World!", copied_message.as_slice());
235 ///
236 /// pyo3::py_run!(py, bytearray, "assert bytearray == b'Hello World.'");
237 /// # });
238 /// ```
239 fn to_vec(&self) -> Vec<u8>;
240
241 /// Resizes the bytearray object to the new length `len`.
242 ///
243 /// Note that this will invalidate any pointers obtained by [PyByteArrayMethods::data], as well as
244 /// any (unsafe) slices obtained from [PyByteArrayMethods::as_bytes] and [PyByteArrayMethods::as_bytes_mut].
245 fn resize(&self, len: usize) -> PyResult<()>;
246}
247
248impl<'py> PyByteArrayMethods<'py> for Bound<'py, PyByteArray> {
249 #[inline]
250 fn len(&self) -> usize {
251 // non-negative Py_ssize_t should always fit into Rust usize
252 unsafe { ffi::PyByteArray_Size(self.as_ptr()) as usize }
253 }
254
255 fn is_empty(&self) -> bool {
256 self.len() == 0
257 }
258
259 fn data(&self) -> *mut u8 {
260 self.as_borrowed().data()
261 }
262
263 unsafe fn as_bytes(&self) -> &[u8] {
264 unsafe { self.as_borrowed().as_bytes() }
265 }
266
267 unsafe fn as_bytes_mut(&self) -> &mut [u8] {
268 unsafe { self.as_borrowed().as_bytes_mut() }
269 }
270
271 fn to_vec(&self) -> Vec<u8> {
272 with_critical_section(self, || {
273 // SAFETY:
274 // * `self` is a `Bound` object, which guarantees that the Python GIL is held.
275 // * For no-gil Python, a critical section is used in lieu of the GIL.
276 // * We don't interact with the interpreter
277 // * We don't mutate the underlying slice
278 unsafe { self.as_bytes() }.to_vec()
279 })
280 }
281
282 fn resize(&self, len: usize) -> PyResult<()> {
283 unsafe {
284 let result = ffi::PyByteArray_Resize(self.as_ptr(), len as ffi::Py_ssize_t);
285 if result == 0 {
286 Ok(())
287 } else {
288 Err(PyErr::fetch(self.py()))
289 }
290 }
291 }
292}
293
294impl<'a> Borrowed<'a, '_, PyByteArray> {
295 fn data(&self) -> *mut u8 {
296 unsafe { ffi::PyByteArray_AsString(self.as_ptr()).cast() }
297 }
298
299 pub(crate) unsafe fn as_bytes(self) -> &'a [u8] {
300 unsafe { slice::from_raw_parts(self.data(), self.len()) }
301 }
302
303 unsafe fn as_bytes_mut(self) -> &'a mut [u8] {
304 unsafe { slice::from_raw_parts_mut(self.data(), self.len()) }
305 }
306}
307
308impl<'py> TryFrom<&Bound<'py, PyAny>> for Bound<'py, PyByteArray> {
309 type Error = crate::PyErr;
310
311 /// Creates a new Python `bytearray` object from another Python object that
312 /// implements the buffer protocol.
313 fn try_from(value: &Bound<'py, PyAny>) -> Result<Self, Self::Error> {
314 PyByteArray::from(value)
315 }
316}
317
318#[cfg(test)]
319mod tests {
320 use crate::types::{PyAnyMethods, PyByteArray, PyByteArrayMethods};
321 use crate::{exceptions, Bound, Py, PyAny, Python};
322
323 #[test]
324 fn test_len() {
325 Python::attach(|py| {
326 let src = b"Hello Python";
327 let bytearray = PyByteArray::new(py, src);
328 assert_eq!(src.len(), bytearray.len());
329 });
330 }
331
332 #[test]
333 fn test_as_bytes() {
334 Python::attach(|py| {
335 let src = b"Hello Python";
336 let bytearray = PyByteArray::new(py, src);
337
338 let slice = unsafe { bytearray.as_bytes() };
339 assert_eq!(src, slice);
340 assert_eq!(bytearray.data() as *const _, slice.as_ptr());
341 });
342 }
343
344 #[test]
345 fn test_as_bytes_mut() {
346 Python::attach(|py| {
347 let src = b"Hello Python";
348 let bytearray = PyByteArray::new(py, src);
349
350 let slice = unsafe { bytearray.as_bytes_mut() };
351 assert_eq!(src, slice);
352 assert_eq!(bytearray.data(), slice.as_mut_ptr());
353
354 slice[0..5].copy_from_slice(b"Hi...");
355
356 assert_eq!(bytearray.str().unwrap(), "bytearray(b'Hi... Python')");
357 });
358 }
359
360 #[test]
361 fn test_to_vec() {
362 Python::attach(|py| {
363 let src = b"Hello Python";
364 let bytearray = PyByteArray::new(py, src);
365
366 let vec = bytearray.to_vec();
367 assert_eq!(src, vec.as_slice());
368 });
369 }
370
371 #[test]
372 fn test_from() {
373 Python::attach(|py| {
374 let src = b"Hello Python";
375 let bytearray = PyByteArray::new(py, src);
376
377 let ba: Py<PyAny> = bytearray.into();
378 let bytearray = PyByteArray::from(ba.bind(py)).unwrap();
379
380 assert_eq!(src, unsafe { bytearray.as_bytes() });
381 });
382 }
383
384 #[test]
385 fn test_from_err() {
386 Python::attach(|py| {
387 if let Err(err) = PyByteArray::from(py.None().bind(py)) {
388 assert!(err.is_instance_of::<exceptions::PyTypeError>(py));
389 } else {
390 panic!("error");
391 }
392 });
393 }
394
395 #[test]
396 fn test_try_from() {
397 Python::attach(|py| {
398 let src = b"Hello Python";
399 let bytearray: &Bound<'_, PyAny> = &PyByteArray::new(py, src);
400 let bytearray: Bound<'_, PyByteArray> = TryInto::try_into(bytearray).unwrap();
401
402 assert_eq!(src, unsafe { bytearray.as_bytes() });
403 });
404 }
405
406 #[test]
407 fn test_resize() {
408 Python::attach(|py| {
409 let src = b"Hello Python";
410 let bytearray = PyByteArray::new(py, src);
411
412 bytearray.resize(20).unwrap();
413 assert_eq!(20, bytearray.len());
414 });
415 }
416
417 #[test]
418 fn test_byte_array_new_with() -> super::PyResult<()> {
419 Python::attach(|py| -> super::PyResult<()> {
420 let py_bytearray = PyByteArray::new_with(py, 10, |b: &mut [u8]| {
421 b.copy_from_slice(b"Hello Rust");
422 Ok(())
423 })?;
424 let bytearray: &[u8] = unsafe { py_bytearray.as_bytes() };
425 assert_eq!(bytearray, b"Hello Rust");
426 Ok(())
427 })
428 }
429
430 #[test]
431 fn test_byte_array_new_with_zero_initialised() -> super::PyResult<()> {
432 Python::attach(|py| -> super::PyResult<()> {
433 let py_bytearray = PyByteArray::new_with(py, 10, |_b: &mut [u8]| Ok(()))?;
434 let bytearray: &[u8] = unsafe { py_bytearray.as_bytes() };
435 assert_eq!(bytearray, &[0; 10]);
436 Ok(())
437 })
438 }
439
440 #[test]
441 fn test_byte_array_new_with_error() {
442 use crate::exceptions::PyValueError;
443 Python::attach(|py| {
444 let py_bytearray_result = PyByteArray::new_with(py, 10, |_b: &mut [u8]| {
445 Err(PyValueError::new_err("Hello Crustaceans!"))
446 });
447 assert!(py_bytearray_result.is_err());
448 assert!(py_bytearray_result
449 .err()
450 .unwrap()
451 .is_instance_of::<PyValueError>(py));
452 })
453 }
454
455 // * wasm has no threading support
456 // * CPython 3.13t is unsound => test fails
457 #[cfg(all(
458 not(target_family = "wasm"),
459 any(Py_3_14, not(all(Py_3_13, Py_GIL_DISABLED)))
460 ))]
461 #[test]
462 fn test_data_integrity_in_critical_section() {
463 use crate::instance::Py;
464 use crate::sync::{critical_section::with_critical_section, MutexExt};
465
466 use std::sync::atomic::{AtomicBool, Ordering};
467 use std::sync::Mutex;
468 use std::thread;
469 use std::thread::ScopedJoinHandle;
470 use std::time::Duration;
471
472 const SIZE: usize = 1_000_000;
473 const DATA_VALUE: u8 = 42;
474
475 fn make_byte_array(py: Python<'_>, size: usize, value: u8) -> Bound<'_, PyByteArray> {
476 PyByteArray::new_with(py, size, |b| {
477 b.fill(value);
478 Ok(())
479 })
480 .unwrap()
481 }
482
483 let data: Mutex<Py<PyByteArray>> = Mutex::new(Python::attach(|py| {
484 make_byte_array(py, SIZE, DATA_VALUE).unbind()
485 }));
486
487 fn get_data<'py>(
488 data: &Mutex<Py<PyByteArray>>,
489 py: Python<'py>,
490 ) -> Bound<'py, PyByteArray> {
491 data.lock_py_attached(py).unwrap().bind(py).clone()
492 }
493
494 fn set_data(data: &Mutex<Py<PyByteArray>>, new: Bound<'_, PyByteArray>) {
495 let py = new.py();
496 *data.lock_py_attached(py).unwrap() = new.unbind()
497 }
498
499 let running = AtomicBool::new(true);
500 let extending = AtomicBool::new(false);
501
502 // continuously extends and resets the bytearray in data
503 let worker1 = || {
504 let mut rounds = 0;
505 while running.load(Ordering::SeqCst) && rounds < 50 {
506 Python::attach(|py| {
507 let byte_array = get_data(&data, py);
508 extending.store(true, Ordering::SeqCst);
509 byte_array
510 .call_method("extend", (&byte_array,), None)
511 .unwrap();
512 extending.store(false, Ordering::SeqCst);
513 set_data(&data, make_byte_array(py, SIZE, DATA_VALUE));
514 rounds += 1;
515 });
516 }
517 };
518
519 // continuously checks the integrity of bytearray in data
520 let worker2 = || {
521 while running.load(Ordering::SeqCst) {
522 if !extending.load(Ordering::SeqCst) {
523 // wait until we have a chance to read inconsistent state
524 continue;
525 }
526 Python::attach(|py| {
527 let read = get_data(&data, py);
528 if read.len() == SIZE {
529 // extend is still not done => wait even more
530 return;
531 }
532 with_critical_section(&read, || {
533 // SAFETY: we are in a critical section
534 // This is the whole point of the test: make sure that a
535 // critical section is sufficient to ensure that the data
536 // read is consistent.
537 unsafe {
538 let bytes = read.as_bytes();
539 assert!(bytes.iter().rev().take(50).all(|v| *v == DATA_VALUE
540 && bytes.iter().take(50).all(|v| *v == DATA_VALUE)));
541 }
542 });
543 });
544 }
545 };
546
547 thread::scope(|s| {
548 let mut handle1 = Some(s.spawn(worker1));
549 let mut handle2 = Some(s.spawn(worker2));
550 let mut handles = [&mut handle1, &mut handle2];
551
552 let t0 = std::time::Instant::now();
553 while t0.elapsed() < Duration::from_secs(1) {
554 for handle in &mut handles {
555 if handle
556 .as_ref()
557 .map(ScopedJoinHandle::is_finished)
558 .unwrap_or(false)
559 {
560 let res = handle.take().unwrap().join();
561 if res.is_err() {
562 running.store(false, Ordering::SeqCst);
563 }
564 res.unwrap();
565 }
566 }
567 if handles.iter().any(|handle| handle.is_none()) {
568 break;
569 }
570 }
571 running.store(false, Ordering::SeqCst);
572 for handle in &mut handles {
573 if let Some(handle) = handle.take() {
574 handle.join().unwrap()
575 }
576 }
577 });
578 }
579}