pyo3/types/
bytearray.rs

1use crate::err::{PyErr, PyResult};
2use crate::ffi_ptr_ext::FfiPtrExt;
3use crate::instance::{Borrowed, Bound};
4use crate::py_result_ext::PyResultExt;
5use crate::sync::critical_section::with_critical_section;
6use crate::{ffi, PyAny, Python};
7use std::slice;
8
9/// Represents a Python `bytearray`.
10///
11/// Values of this type are accessed via PyO3's smart pointers, e.g. as
12/// [`Py<PyByteArray>`][crate::Py] or [`Bound<'py, PyByteArray>`][Bound].
13///
14/// For APIs available on `bytearray` objects, see the [`PyByteArrayMethods`] trait which is implemented for
15/// [`Bound<'py, PyByteArray>`][Bound].
16#[repr(transparent)]
17pub struct PyByteArray(PyAny);
18
19pyobject_native_type_core!(PyByteArray, pyobject_native_static_type_object!(ffi::PyByteArray_Type), "builtins", "bytearray", #checkfunction=ffi::PyByteArray_Check);
20
21impl PyByteArray {
22    /// Creates a new Python bytearray object.
23    ///
24    /// The byte string is initialized by copying the data from the `&[u8]`.
25    pub fn new<'py>(py: Python<'py>, src: &[u8]) -> Bound<'py, PyByteArray> {
26        let ptr = src.as_ptr().cast();
27        let len = src.len() as ffi::Py_ssize_t;
28        unsafe {
29            ffi::PyByteArray_FromStringAndSize(ptr, len)
30                .assume_owned(py)
31                .cast_into_unchecked()
32        }
33    }
34
35    /// Creates a new Python `bytearray` object with an `init` closure to write its contents.
36    /// Before calling `init` the bytearray is zero-initialised.
37    /// * If Python raises a MemoryError on the allocation, `new_with` will return
38    ///   it inside `Err`.
39    /// * If `init` returns `Err(e)`, `new_with` will return `Err(e)`.
40    /// * If `init` returns `Ok(())`, `new_with` will return `Ok(&PyByteArray)`.
41    ///
42    /// # Examples
43    ///
44    /// ```
45    /// use pyo3::{prelude::*, types::PyByteArray};
46    ///
47    /// # fn main() -> PyResult<()> {
48    /// Python::attach(|py| -> PyResult<()> {
49    ///     let py_bytearray = PyByteArray::new_with(py, 10, |bytes: &mut [u8]| {
50    ///         bytes.copy_from_slice(b"Hello Rust");
51    ///         Ok(())
52    ///     })?;
53    ///     let bytearray: &[u8] = unsafe { py_bytearray.as_bytes() };
54    ///     assert_eq!(bytearray, b"Hello Rust");
55    ///     Ok(())
56    /// })
57    /// # }
58    /// ```
59    pub fn new_with<F>(py: Python<'_>, len: usize, init: F) -> PyResult<Bound<'_, PyByteArray>>
60    where
61        F: FnOnce(&mut [u8]) -> PyResult<()>,
62    {
63        unsafe {
64            // Allocate buffer and check for an error
65            let pybytearray: Bound<'_, Self> =
66                ffi::PyByteArray_FromStringAndSize(std::ptr::null(), len as ffi::Py_ssize_t)
67                    .assume_owned_or_err(py)?
68                    .cast_into_unchecked();
69
70            let buffer: *mut u8 = ffi::PyByteArray_AsString(pybytearray.as_ptr()).cast();
71            debug_assert!(!buffer.is_null());
72            // Zero-initialise the uninitialised bytearray
73            std::ptr::write_bytes(buffer, 0u8, len);
74            // (Further) Initialise the bytearray in init
75            // If init returns an Err, pypybytearray will automatically deallocate the buffer
76            init(std::slice::from_raw_parts_mut(buffer, len)).map(|_| pybytearray)
77        }
78    }
79
80    /// Creates a new Python `bytearray` object from another Python object that
81    /// implements the buffer protocol.
82    pub fn from<'py>(src: &Bound<'py, PyAny>) -> PyResult<Bound<'py, PyByteArray>> {
83        unsafe {
84            ffi::PyByteArray_FromObject(src.as_ptr())
85                .assume_owned_or_err(src.py())
86                .cast_into_unchecked()
87        }
88    }
89}
90
91/// Implementation of functionality for [`PyByteArray`].
92///
93/// These methods are defined for the `Bound<'py, PyByteArray>` smart pointer, so to use method call
94/// syntax these methods are separated into a trait, because stable Rust does not yet support
95/// `arbitrary_self_types`.
96#[doc(alias = "PyByteArray")]
97pub trait PyByteArrayMethods<'py>: crate::sealed::Sealed {
98    /// Gets the length of the bytearray.
99    fn len(&self) -> usize;
100
101    /// Checks if the bytearray is empty.
102    fn is_empty(&self) -> bool;
103
104    /// Gets the start of the buffer containing the contents of the bytearray.
105    ///
106    /// # Safety
107    ///
108    /// See the safety requirements of [`PyByteArrayMethods::as_bytes`] and [`PyByteArrayMethods::as_bytes_mut`].
109    fn data(&self) -> *mut u8;
110
111    /// Extracts a slice of the `ByteArray`'s entire buffer.
112    ///
113    /// # Safety
114    ///
115    /// Mutation of the `bytearray` invalidates the slice. If it is used afterwards, the behavior is
116    /// undefined.
117    ///
118    /// These mutations may occur in Python code as well as from Rust:
119    /// - Calling methods like [`PyByteArrayMethods::as_bytes_mut`] and [`PyByteArrayMethods::resize`] will
120    ///   invalidate the slice.
121    /// - Actions like dropping objects or raising exceptions can invoke `__del__`methods or signal
122    ///   handlers, which may execute arbitrary Python code. This means that if Python code has a
123    ///   reference to the `bytearray` you cannot safely use the vast majority of PyO3's API whilst
124    ///   using the slice.
125    ///
126    /// As a result, this slice should only be used for short-lived operations without executing any
127    /// Python code, such as copying into a Vec.
128    /// For free-threaded Python support see also [`with_critical_section`].
129    ///
130    /// # Examples
131    ///
132    /// ```rust
133    /// use pyo3::prelude::*;
134    /// use pyo3::exceptions::PyRuntimeError;
135    /// use pyo3::sync::critical_section::with_critical_section;
136    /// use pyo3::types::PyByteArray;
137    ///
138    /// #[pyfunction]
139    /// fn a_valid_function(bytes: &Bound<'_, PyByteArray>) -> PyResult<()> {
140    ///     let section = with_critical_section(bytes, || {
141    ///         // SAFETY: We promise to not let the interpreter regain control over the bytearray
142    ///         // or invoke any PyO3 APIs while using the slice.
143    ///         let slice = unsafe { bytes.as_bytes() };
144    ///
145    ///         // Copy only a section of `bytes` while avoiding
146    ///         // `to_vec` which copies the entire thing.
147    ///         slice.get(6..11)
148    ///             .map(Vec::from)
149    ///             .ok_or_else(|| PyRuntimeError::new_err("input is not long enough"))
150    ///     })?;
151    ///
152    ///     // Now we can do things with `section` and call PyO3 APIs again.
153    ///     // ...
154    ///     # assert_eq!(&section, b"world");
155    ///
156    ///     Ok(())
157    /// }
158    /// # fn main() -> PyResult<()> {
159    /// #     Python::attach(|py| -> PyResult<()> {
160    /// #         let fun = wrap_pyfunction!(a_valid_function, py)?;
161    /// #         let locals = pyo3::types::PyDict::new(py);
162    /// #         locals.set_item("a_valid_function", fun)?;
163    /// #
164    /// #         py.run(cr#"b = bytearray(b"hello world")
165    /// # a_valid_function(b)
166    /// #
167    /// # try:
168    /// #     a_valid_function(bytearray())
169    /// # except RuntimeError as e:
170    /// #     assert str(e) == 'input is not long enough'"#,
171    /// #             None,
172    /// #             Some(&locals),
173    /// #         )?;
174    /// #
175    /// #         Ok(())
176    /// #     })
177    /// # }
178    /// ```
179    ///
180    /// # Incorrect usage
181    ///
182    /// The following `bug` function is unsound ⚠️
183    ///
184    /// ```rust,no_run
185    /// # use pyo3::prelude::*;
186    /// # use pyo3::types::PyByteArray;
187    ///
188    /// # #[allow(dead_code)]
189    /// #[pyfunction]
190    /// fn bug(py: Python<'_>, bytes: &Bound<'_, PyByteArray>) {
191    ///     // No critical section is being used.
192    ///     // This means that for no-gil Python another thread could be modifying the
193    ///     // bytearray concurrently and thus invalidate `slice` any time.
194    ///     let slice = unsafe { bytes.as_bytes() };
195    ///
196    ///     // This explicitly yields control back to the Python interpreter...
197    ///     // ...but it's not always this obvious. Many things do this implicitly.
198    ///     py.detach(|| {
199    ///         // Python code could be mutating through its handle to `bytes`,
200    ///         // which makes reading it a data race, which is undefined behavior.
201    ///         println!("{:?}", slice[0]);
202    ///     });
203    ///
204    ///     // Python code might have mutated it, so we can not rely on the slice
205    ///     // remaining valid. As such this is also undefined behavior.
206    ///     println!("{:?}", slice[0]);
207    /// }
208    /// ```
209    unsafe fn as_bytes(&self) -> &[u8];
210
211    /// Extracts a mutable slice of the `ByteArray`'s entire buffer.
212    ///
213    /// # Safety
214    ///
215    /// Any other accesses of the `bytearray`'s buffer invalidate the slice. If it is used
216    /// afterwards, the behavior is undefined. The safety requirements of [`PyByteArrayMethods::as_bytes`]
217    /// apply to this function as well.
218    #[expect(clippy::mut_from_ref)]
219    unsafe fn as_bytes_mut(&self) -> &mut [u8];
220
221    /// Copies the contents of the bytearray to a Rust vector.
222    ///
223    /// # Examples
224    ///
225    /// ```
226    /// # use pyo3::prelude::*;
227    /// # use pyo3::types::PyByteArray;
228    /// # Python::attach(|py| {
229    /// let bytearray = PyByteArray::new(py, b"Hello World.");
230    /// let mut copied_message = bytearray.to_vec();
231    /// assert_eq!(b"Hello World.", copied_message.as_slice());
232    ///
233    /// copied_message[11] = b'!';
234    /// assert_eq!(b"Hello World!", copied_message.as_slice());
235    ///
236    /// pyo3::py_run!(py, bytearray, "assert bytearray == b'Hello World.'");
237    /// # });
238    /// ```
239    fn to_vec(&self) -> Vec<u8>;
240
241    /// Resizes the bytearray object to the new length `len`.
242    ///
243    /// Note that this will invalidate any pointers obtained by [PyByteArrayMethods::data], as well as
244    /// any (unsafe) slices obtained from [PyByteArrayMethods::as_bytes] and [PyByteArrayMethods::as_bytes_mut].
245    fn resize(&self, len: usize) -> PyResult<()>;
246}
247
248impl<'py> PyByteArrayMethods<'py> for Bound<'py, PyByteArray> {
249    #[inline]
250    fn len(&self) -> usize {
251        // non-negative Py_ssize_t should always fit into Rust usize
252        unsafe { ffi::PyByteArray_Size(self.as_ptr()) as usize }
253    }
254
255    fn is_empty(&self) -> bool {
256        self.len() == 0
257    }
258
259    fn data(&self) -> *mut u8 {
260        self.as_borrowed().data()
261    }
262
263    unsafe fn as_bytes(&self) -> &[u8] {
264        unsafe { self.as_borrowed().as_bytes() }
265    }
266
267    unsafe fn as_bytes_mut(&self) -> &mut [u8] {
268        unsafe { self.as_borrowed().as_bytes_mut() }
269    }
270
271    fn to_vec(&self) -> Vec<u8> {
272        with_critical_section(self, || {
273            // SAFETY:
274            //  * `self` is a `Bound` object, which guarantees that the Python GIL is held.
275            //  * For no-gil Python, a critical section is used in lieu of the GIL.
276            //  * We don't interact with the interpreter
277            //  * We don't mutate the underlying slice
278            unsafe { self.as_bytes() }.to_vec()
279        })
280    }
281
282    fn resize(&self, len: usize) -> PyResult<()> {
283        unsafe {
284            let result = ffi::PyByteArray_Resize(self.as_ptr(), len as ffi::Py_ssize_t);
285            if result == 0 {
286                Ok(())
287            } else {
288                Err(PyErr::fetch(self.py()))
289            }
290        }
291    }
292}
293
294impl<'a> Borrowed<'a, '_, PyByteArray> {
295    fn data(&self) -> *mut u8 {
296        unsafe { ffi::PyByteArray_AsString(self.as_ptr()).cast() }
297    }
298
299    pub(crate) unsafe fn as_bytes(self) -> &'a [u8] {
300        unsafe { slice::from_raw_parts(self.data(), self.len()) }
301    }
302
303    unsafe fn as_bytes_mut(self) -> &'a mut [u8] {
304        unsafe { slice::from_raw_parts_mut(self.data(), self.len()) }
305    }
306}
307
308impl<'py> TryFrom<&Bound<'py, PyAny>> for Bound<'py, PyByteArray> {
309    type Error = crate::PyErr;
310
311    /// Creates a new Python `bytearray` object from another Python object that
312    /// implements the buffer protocol.
313    fn try_from(value: &Bound<'py, PyAny>) -> Result<Self, Self::Error> {
314        PyByteArray::from(value)
315    }
316}
317
318#[cfg(test)]
319mod tests {
320    use crate::types::{PyAnyMethods, PyByteArray, PyByteArrayMethods};
321    use crate::{exceptions, Bound, Py, PyAny, Python};
322
323    #[test]
324    fn test_len() {
325        Python::attach(|py| {
326            let src = b"Hello Python";
327            let bytearray = PyByteArray::new(py, src);
328            assert_eq!(src.len(), bytearray.len());
329        });
330    }
331
332    #[test]
333    fn test_as_bytes() {
334        Python::attach(|py| {
335            let src = b"Hello Python";
336            let bytearray = PyByteArray::new(py, src);
337
338            let slice = unsafe { bytearray.as_bytes() };
339            assert_eq!(src, slice);
340            assert_eq!(bytearray.data() as *const _, slice.as_ptr());
341        });
342    }
343
344    #[test]
345    fn test_as_bytes_mut() {
346        Python::attach(|py| {
347            let src = b"Hello Python";
348            let bytearray = PyByteArray::new(py, src);
349
350            let slice = unsafe { bytearray.as_bytes_mut() };
351            assert_eq!(src, slice);
352            assert_eq!(bytearray.data(), slice.as_mut_ptr());
353
354            slice[0..5].copy_from_slice(b"Hi...");
355
356            assert_eq!(bytearray.str().unwrap(), "bytearray(b'Hi... Python')");
357        });
358    }
359
360    #[test]
361    fn test_to_vec() {
362        Python::attach(|py| {
363            let src = b"Hello Python";
364            let bytearray = PyByteArray::new(py, src);
365
366            let vec = bytearray.to_vec();
367            assert_eq!(src, vec.as_slice());
368        });
369    }
370
371    #[test]
372    fn test_from() {
373        Python::attach(|py| {
374            let src = b"Hello Python";
375            let bytearray = PyByteArray::new(py, src);
376
377            let ba: Py<PyAny> = bytearray.into();
378            let bytearray = PyByteArray::from(ba.bind(py)).unwrap();
379
380            assert_eq!(src, unsafe { bytearray.as_bytes() });
381        });
382    }
383
384    #[test]
385    fn test_from_err() {
386        Python::attach(|py| {
387            if let Err(err) = PyByteArray::from(py.None().bind(py)) {
388                assert!(err.is_instance_of::<exceptions::PyTypeError>(py));
389            } else {
390                panic!("error");
391            }
392        });
393    }
394
395    #[test]
396    fn test_try_from() {
397        Python::attach(|py| {
398            let src = b"Hello Python";
399            let bytearray: &Bound<'_, PyAny> = &PyByteArray::new(py, src);
400            let bytearray: Bound<'_, PyByteArray> = TryInto::try_into(bytearray).unwrap();
401
402            assert_eq!(src, unsafe { bytearray.as_bytes() });
403        });
404    }
405
406    #[test]
407    fn test_resize() {
408        Python::attach(|py| {
409            let src = b"Hello Python";
410            let bytearray = PyByteArray::new(py, src);
411
412            bytearray.resize(20).unwrap();
413            assert_eq!(20, bytearray.len());
414        });
415    }
416
417    #[test]
418    fn test_byte_array_new_with() -> super::PyResult<()> {
419        Python::attach(|py| -> super::PyResult<()> {
420            let py_bytearray = PyByteArray::new_with(py, 10, |b: &mut [u8]| {
421                b.copy_from_slice(b"Hello Rust");
422                Ok(())
423            })?;
424            let bytearray: &[u8] = unsafe { py_bytearray.as_bytes() };
425            assert_eq!(bytearray, b"Hello Rust");
426            Ok(())
427        })
428    }
429
430    #[test]
431    fn test_byte_array_new_with_zero_initialised() -> super::PyResult<()> {
432        Python::attach(|py| -> super::PyResult<()> {
433            let py_bytearray = PyByteArray::new_with(py, 10, |_b: &mut [u8]| Ok(()))?;
434            let bytearray: &[u8] = unsafe { py_bytearray.as_bytes() };
435            assert_eq!(bytearray, &[0; 10]);
436            Ok(())
437        })
438    }
439
440    #[test]
441    fn test_byte_array_new_with_error() {
442        use crate::exceptions::PyValueError;
443        Python::attach(|py| {
444            let py_bytearray_result = PyByteArray::new_with(py, 10, |_b: &mut [u8]| {
445                Err(PyValueError::new_err("Hello Crustaceans!"))
446            });
447            assert!(py_bytearray_result.is_err());
448            assert!(py_bytearray_result
449                .err()
450                .unwrap()
451                .is_instance_of::<PyValueError>(py));
452        })
453    }
454
455    // * wasm has no threading support
456    // * CPython 3.13t is unsound => test fails
457    #[cfg(all(
458        not(target_family = "wasm"),
459        any(Py_3_14, not(all(Py_3_13, Py_GIL_DISABLED)))
460    ))]
461    #[test]
462    fn test_data_integrity_in_critical_section() {
463        use crate::instance::Py;
464        use crate::sync::{critical_section::with_critical_section, MutexExt};
465
466        use std::sync::atomic::{AtomicBool, Ordering};
467        use std::sync::Mutex;
468        use std::thread;
469        use std::thread::ScopedJoinHandle;
470        use std::time::Duration;
471
472        const SIZE: usize = 1_000_000;
473        const DATA_VALUE: u8 = 42;
474
475        fn make_byte_array(py: Python<'_>, size: usize, value: u8) -> Bound<'_, PyByteArray> {
476            PyByteArray::new_with(py, size, |b| {
477                b.fill(value);
478                Ok(())
479            })
480            .unwrap()
481        }
482
483        let data: Mutex<Py<PyByteArray>> = Mutex::new(Python::attach(|py| {
484            make_byte_array(py, SIZE, DATA_VALUE).unbind()
485        }));
486
487        fn get_data<'py>(
488            data: &Mutex<Py<PyByteArray>>,
489            py: Python<'py>,
490        ) -> Bound<'py, PyByteArray> {
491            data.lock_py_attached(py).unwrap().bind(py).clone()
492        }
493
494        fn set_data(data: &Mutex<Py<PyByteArray>>, new: Bound<'_, PyByteArray>) {
495            let py = new.py();
496            *data.lock_py_attached(py).unwrap() = new.unbind()
497        }
498
499        let running = AtomicBool::new(true);
500        let extending = AtomicBool::new(false);
501
502        // continuously extends and resets the bytearray in data
503        let worker1 = || {
504            let mut rounds = 0;
505            while running.load(Ordering::SeqCst) && rounds < 50 {
506                Python::attach(|py| {
507                    let byte_array = get_data(&data, py);
508                    extending.store(true, Ordering::SeqCst);
509                    byte_array
510                        .call_method("extend", (&byte_array,), None)
511                        .unwrap();
512                    extending.store(false, Ordering::SeqCst);
513                    set_data(&data, make_byte_array(py, SIZE, DATA_VALUE));
514                    rounds += 1;
515                });
516            }
517        };
518
519        // continuously checks the integrity of bytearray in data
520        let worker2 = || {
521            while running.load(Ordering::SeqCst) {
522                if !extending.load(Ordering::SeqCst) {
523                    // wait until we have a chance to read inconsistent state
524                    continue;
525                }
526                Python::attach(|py| {
527                    let read = get_data(&data, py);
528                    if read.len() == SIZE {
529                        // extend is still not done => wait even more
530                        return;
531                    }
532                    with_critical_section(&read, || {
533                        // SAFETY: we are in a critical section
534                        // This is the whole point of the test: make sure that a
535                        // critical section is sufficient to ensure that the data
536                        // read is consistent.
537                        unsafe {
538                            let bytes = read.as_bytes();
539                            assert!(bytes.iter().rev().take(50).all(|v| *v == DATA_VALUE
540                                && bytes.iter().take(50).all(|v| *v == DATA_VALUE)));
541                        }
542                    });
543                });
544            }
545        };
546
547        thread::scope(|s| {
548            let mut handle1 = Some(s.spawn(worker1));
549            let mut handle2 = Some(s.spawn(worker2));
550            let mut handles = [&mut handle1, &mut handle2];
551
552            let t0 = std::time::Instant::now();
553            while t0.elapsed() < Duration::from_secs(1) {
554                for handle in &mut handles {
555                    if handle
556                        .as_ref()
557                        .map(ScopedJoinHandle::is_finished)
558                        .unwrap_or(false)
559                    {
560                        let res = handle.take().unwrap().join();
561                        if res.is_err() {
562                            running.store(false, Ordering::SeqCst);
563                        }
564                        res.unwrap();
565                    }
566                }
567                if handles.iter().any(|handle| handle.is_none()) {
568                    break;
569                }
570            }
571            running.store(false, Ordering::SeqCst);
572            for handle in &mut handles {
573                if let Some(handle) = handle.take() {
574                    handle.join().unwrap()
575                }
576            }
577        });
578    }
579}