Skip to main content

pyo3/types/
bytes.rs

1use crate::byteswriter::PyBytesWriter;
2use crate::ffi_ptr_ext::FfiPtrExt;
3use crate::instance::{Borrowed, Bound};
4use crate::{ffi, Py, PyAny, PyResult, Python};
5#[cfg(RustPython)]
6use crate::{
7    sync::PyOnceLock,
8    types::{PyType, PyTypeMethods},
9};
10use core::ops::Index;
11use core::slice::SliceIndex;
12use core::str;
13use std::io::Write;
14
15/// Represents a Python `bytes` object.
16///
17/// This type is immutable.
18///
19/// Values of this type are accessed via PyO3's smart pointers, e.g. as
20/// [`Py<PyBytes>`][crate::Py] or [`Bound<'py, PyBytes>`][Bound].
21///
22/// For APIs available on `bytes` objects, see the [`PyBytesMethods`] trait which is implemented for
23/// [`Bound<'py, PyBytes>`][Bound].
24///
25/// # Equality
26///
27/// For convenience, [`Bound<'py, PyBytes>`][Bound] implements [`PartialEq<[u8]>`][PartialEq] to allow comparing the
28/// data in the Python bytes to a Rust `[u8]` byte slice.
29///
30/// This is not always the most appropriate way to compare Python bytes, as Python bytes subclasses
31/// may have different equality semantics. In situations where subclasses overriding equality might
32/// be relevant, use [`PyAnyMethods::eq`](crate::types::any::PyAnyMethods::eq), at cost of the
33/// additional overhead of a Python method call.
34///
35/// ```rust
36/// # use pyo3::prelude::*;
37/// use pyo3::types::PyBytes;
38///
39/// # Python::attach(|py| {
40/// let py_bytes = PyBytes::new(py, b"foo".as_slice());
41/// // via PartialEq<[u8]>
42/// assert_eq!(py_bytes, b"foo".as_slice());
43///
44/// // via Python equality
45/// let other = PyBytes::new(py, b"foo".as_slice());
46/// assert!(py_bytes.as_any().eq(other).unwrap());
47///
48/// // Note that `eq` will convert its argument to Python using `IntoPyObject`.
49/// // Byte collections are specialized, so that the following slice will indeed
50/// // convert into a `bytes` object and not a `list`:
51/// assert!(py_bytes.as_any().eq(b"foo".as_slice()).unwrap());
52/// # });
53/// ```
54#[repr(transparent)]
55pub struct PyBytes(PyAny);
56
57#[cfg(not(RustPython))]
58pyobject_native_type_core!(PyBytes, pyobject_native_static_type_object!(ffi::PyBytes_Type), "builtins", "bytes", #checkfunction=ffi::PyBytes_Check);
59
60#[cfg(RustPython)]
61pyobject_native_type_core!(
62    PyBytes,
63    |py| {
64        static TYPE: PyOnceLock<Py<PyType>> = PyOnceLock::new();
65        TYPE.import(py, "builtins", "bytes").unwrap().as_type_ptr()
66    },
67    "builtins",
68    "bytes",
69    #checkfunction=ffi::PyBytes_Check
70);
71
72impl PyBytes {
73    /// Creates a new Python bytestring object.
74    /// The bytestring is initialized by copying the data from the `&[u8]`.
75    ///
76    /// Panics if out of memory.
77    pub fn new<'p>(py: Python<'p>, s: &[u8]) -> Bound<'p, PyBytes> {
78        let ptr = s.as_ptr().cast();
79        let len = s.len() as ffi::Py_ssize_t;
80        unsafe {
81            ffi::PyBytes_FromStringAndSize(ptr, len)
82                .assume_owned(py)
83                .cast_into_unchecked()
84        }
85    }
86
87    /// Creates a new Python `bytes` object with an `init` closure to write its contents.
88    /// Before calling `init` the bytes' contents are zero-initialised.
89    /// * If Python raises a MemoryError on the allocation, `new_with` will return
90    ///   it inside `Err`.
91    /// * If `init` returns `Err(e)`, `new_with` will return `Err(e)`.
92    /// * If `init` returns `Ok(())`, `new_with` will return `Ok(&PyBytes)`.
93    ///
94    /// # Examples
95    ///
96    /// ```
97    /// use pyo3::{prelude::*, types::PyBytes};
98    ///
99    /// # fn main() -> PyResult<()> {
100    /// Python::attach(|py| -> PyResult<()> {
101    ///     let py_bytes = PyBytes::new_with(py, 10, |bytes: &mut [u8]| {
102    ///         bytes.copy_from_slice(b"Hello Rust");
103    ///         Ok(())
104    ///     })?;
105    ///     let bytes: &[u8] = py_bytes.extract()?;
106    ///     assert_eq!(bytes, b"Hello Rust");
107    ///     Ok(())
108    /// })
109    /// # }
110    /// ```
111    #[inline]
112    pub fn new_with<F>(py: Python<'_>, len: usize, init: F) -> PyResult<Bound<'_, PyBytes>>
113    where
114        F: FnOnce(&mut [u8]) -> PyResult<()>,
115    {
116        unsafe {
117            let pyptr = ffi::PyBytes_FromStringAndSize(core::ptr::null(), len as ffi::Py_ssize_t);
118            // Check for an allocation error and return it
119            let pybytes = pyptr.assume_owned_or_err(py)?.cast_into_unchecked();
120            let buffer: *mut u8 = ffi::PyBytes_AsString(pyptr).cast();
121            debug_assert!(!buffer.is_null());
122            // Zero-initialise the uninitialised bytestring
123            core::ptr::write_bytes(buffer, 0u8, len);
124            // (Further) Initialise the bytestring in init
125            // If init returns an Err, pypybytearray will automatically deallocate the buffer
126            init(core::slice::from_raw_parts_mut(buffer, len)).map(|_| pybytes)
127        }
128    }
129
130    /// Creates a new Python `bytes` object using a writer closure.
131    ///
132    /// This function allocates a Python `bytes` object with at least `reserved_capacity` bytes of capacity,
133    /// then provides a mutable writer to the closure `write`. The closure can write any number of bytes,
134    /// even more than the reserved capacity; the buffer will grow dynamically as needed.
135    ///
136    /// If `reserved_capacity` is 0, the buffer will start empty and grow as the writer writes data.
137    ///
138    /// After the closure returns, the resulting bytes object contains the written data.
139    ///
140    /// # Example
141    ///
142    /// ```
143    /// use pyo3::{prelude::*, types::PyBytes};
144    /// use std::io::Write;
145    ///
146    /// # fn main() -> PyResult<()> {
147    /// Python::attach(|py| -> PyResult<()> {
148    ///     let py_bytes = PyBytes::new_with_writer(py, 0, |writer| {
149    ///         writer.write_all(b"hello world")?;
150    ///         Ok(())
151    ///     })?;
152    ///     assert_eq!(py_bytes.as_bytes(), b"hello world");
153    ///     Ok(())
154    /// })
155    /// # }
156    /// ```
157    #[inline]
158    pub fn new_with_writer<F>(
159        py: Python<'_>,
160        reserved_capacity: usize,
161        write: F,
162    ) -> PyResult<Bound<'_, PyBytes>>
163    where
164        F: FnOnce(&mut dyn Write) -> PyResult<()>,
165    {
166        let mut writer = PyBytesWriter::with_capacity(py, reserved_capacity)?;
167        write(&mut writer)?;
168        writer.try_into()
169    }
170
171    /// Creates a new Python byte string object from a raw pointer and length.
172    ///
173    /// Panics if out of memory.
174    ///
175    /// # Safety
176    ///
177    /// This function dereferences the raw pointer `ptr` as the
178    /// leading pointer of a slice of length `len`. [As with
179    /// `core::slice::from_raw_parts`, this is
180    /// unsafe](https://doc.rust-lang.org/std/slice/fn.from_raw_parts.html#safety).
181    pub unsafe fn from_ptr(py: Python<'_>, ptr: *const u8, len: usize) -> Bound<'_, PyBytes> {
182        unsafe {
183            ffi::PyBytes_FromStringAndSize(ptr.cast(), len as isize)
184                .assume_owned(py)
185                .cast_into_unchecked()
186        }
187    }
188}
189
190/// Implementation of functionality for [`PyBytes`].
191///
192/// These methods are defined for the `Bound<'py, PyBytes>` smart pointer, so to use method call
193/// syntax these methods are separated into a trait, because stable Rust does not yet support
194/// `arbitrary_self_types`.
195#[doc(alias = "PyBytes")]
196pub trait PyBytesMethods<'py>: crate::sealed::Sealed {
197    /// Gets the Python string as a byte slice.
198    fn as_bytes(&self) -> &[u8];
199}
200
201impl<'py> PyBytesMethods<'py> for Bound<'py, PyBytes> {
202    #[inline]
203    fn as_bytes(&self) -> &[u8] {
204        self.as_borrowed().as_bytes()
205    }
206}
207
208impl<'a> Borrowed<'a, '_, PyBytes> {
209    /// Gets the Python string as a byte slice.
210    #[allow(clippy::wrong_self_convention)]
211    pub(crate) fn as_bytes(self) -> &'a [u8] {
212        unsafe {
213            let buffer = ffi::PyBytes_AsString(self.as_ptr()) as *const u8;
214            let length = ffi::PyBytes_Size(self.as_ptr()) as usize;
215            debug_assert!(!buffer.is_null());
216            core::slice::from_raw_parts(buffer, length)
217        }
218    }
219}
220
221impl Py<PyBytes> {
222    /// Gets the Python bytes as a byte slice. Because Python bytes are
223    /// immutable, the result may be used for as long as the reference to
224    /// `self` is held, including when the GIL is released.
225    pub fn as_bytes<'a>(&'a self, py: Python<'_>) -> &'a [u8] {
226        self.bind_borrowed(py).as_bytes()
227    }
228}
229
230/// This is the same way [Vec] is indexed.
231impl<I: SliceIndex<[u8]>> Index<I> for Bound<'_, PyBytes> {
232    type Output = I::Output;
233
234    fn index(&self, index: I) -> &Self::Output {
235        &self.as_bytes()[index]
236    }
237}
238
239/// Compares whether the Python bytes object is equal to the [u8].
240///
241/// In some cases Python equality might be more appropriate; see the note on [`PyBytes`].
242impl PartialEq<[u8]> for Bound<'_, PyBytes> {
243    #[inline]
244    fn eq(&self, other: &[u8]) -> bool {
245        self.as_borrowed() == *other
246    }
247}
248
249/// Compares whether the Python bytes object is equal to the [u8].
250///
251/// In some cases Python equality might be more appropriate; see the note on [`PyBytes`].
252impl PartialEq<&'_ [u8]> for Bound<'_, PyBytes> {
253    #[inline]
254    fn eq(&self, other: &&[u8]) -> bool {
255        self.as_borrowed() == **other
256    }
257}
258
259/// Compares whether the Python bytes object is equal to the [u8].
260///
261/// In some cases Python equality might be more appropriate; see the note on [`PyBytes`].
262impl PartialEq<Bound<'_, PyBytes>> for [u8] {
263    #[inline]
264    fn eq(&self, other: &Bound<'_, PyBytes>) -> bool {
265        *self == other.as_borrowed()
266    }
267}
268
269/// Compares whether the Python bytes object is equal to the [u8].
270///
271/// In some cases Python equality might be more appropriate; see the note on [`PyBytes`].
272impl PartialEq<&'_ Bound<'_, PyBytes>> for [u8] {
273    #[inline]
274    fn eq(&self, other: &&Bound<'_, PyBytes>) -> bool {
275        *self == other.as_borrowed()
276    }
277}
278
279/// Compares whether the Python bytes object is equal to the [u8].
280///
281/// In some cases Python equality might be more appropriate; see the note on [`PyBytes`].
282impl PartialEq<Bound<'_, PyBytes>> for &'_ [u8] {
283    #[inline]
284    fn eq(&self, other: &Bound<'_, PyBytes>) -> bool {
285        **self == other.as_borrowed()
286    }
287}
288
289/// Compares whether the Python bytes object is equal to the [u8].
290///
291/// In some cases Python equality might be more appropriate; see the note on [`PyBytes`].
292impl PartialEq<[u8]> for &'_ Bound<'_, PyBytes> {
293    #[inline]
294    fn eq(&self, other: &[u8]) -> bool {
295        self.as_borrowed() == other
296    }
297}
298
299/// Compares whether the Python bytes object is equal to the [u8].
300///
301/// In some cases Python equality might be more appropriate; see the note on [`PyBytes`].
302impl PartialEq<[u8]> for Borrowed<'_, '_, PyBytes> {
303    #[inline]
304    fn eq(&self, other: &[u8]) -> bool {
305        self.as_bytes() == other
306    }
307}
308
309/// Compares whether the Python bytes object is equal to the [u8].
310///
311/// In some cases Python equality might be more appropriate; see the note on [`PyBytes`].
312impl PartialEq<&[u8]> for Borrowed<'_, '_, PyBytes> {
313    #[inline]
314    fn eq(&self, other: &&[u8]) -> bool {
315        *self == **other
316    }
317}
318
319/// Compares whether the Python bytes object is equal to the [u8].
320///
321/// In some cases Python equality might be more appropriate; see the note on [`PyBytes`].
322impl PartialEq<Borrowed<'_, '_, PyBytes>> for [u8] {
323    #[inline]
324    fn eq(&self, other: &Borrowed<'_, '_, PyBytes>) -> bool {
325        other == self
326    }
327}
328
329/// Compares whether the Python bytes object is equal to the [u8].
330///
331/// In some cases Python equality might be more appropriate; see the note on [`PyBytes`].
332impl PartialEq<Borrowed<'_, '_, PyBytes>> for &'_ [u8] {
333    #[inline]
334    fn eq(&self, other: &Borrowed<'_, '_, PyBytes>) -> bool {
335        other == self
336    }
337}
338
339impl<'a> AsRef<[u8]> for Borrowed<'a, '_, PyBytes> {
340    #[inline]
341    fn as_ref(&self) -> &'a [u8] {
342        self.as_bytes()
343    }
344}
345
346impl AsRef<[u8]> for Bound<'_, PyBytes> {
347    #[inline]
348    fn as_ref(&self) -> &[u8] {
349        self.as_bytes()
350    }
351}
352
353#[cfg(test)]
354mod tests {
355    use super::*;
356    use crate::types::PyAnyMethods as _;
357
358    #[test]
359    fn test_bytes_index() {
360        Python::attach(|py| {
361            let bytes = PyBytes::new(py, b"Hello World");
362            assert_eq!(bytes[1], b'e');
363        });
364    }
365
366    #[test]
367    fn test_bound_bytes_index() {
368        Python::attach(|py| {
369            let bytes = PyBytes::new(py, b"Hello World");
370            assert_eq!(bytes[1], b'e');
371
372            let bytes = &bytes;
373            assert_eq!(bytes[1], b'e');
374        });
375    }
376
377    #[test]
378    fn test_bytes_new_with() -> super::PyResult<()> {
379        Python::attach(|py| -> super::PyResult<()> {
380            let py_bytes = PyBytes::new_with(py, 10, |b: &mut [u8]| {
381                b.copy_from_slice(b"Hello Rust");
382                Ok(())
383            })?;
384            let bytes: &[u8] = py_bytes.extract()?;
385            assert_eq!(bytes, b"Hello Rust");
386            Ok(())
387        })
388    }
389
390    #[test]
391    fn test_bytes_new_with_zero_initialised() -> super::PyResult<()> {
392        Python::attach(|py| -> super::PyResult<()> {
393            let py_bytes = PyBytes::new_with(py, 10, |_b: &mut [u8]| Ok(()))?;
394            let bytes: &[u8] = py_bytes.extract()?;
395            assert_eq!(bytes, &[0; 10]);
396            Ok(())
397        })
398    }
399
400    #[test]
401    fn test_bytes_new_with_error() {
402        use crate::exceptions::PyValueError;
403        Python::attach(|py| {
404            let py_bytes_result = PyBytes::new_with(py, 10, |_b: &mut [u8]| {
405                Err(PyValueError::new_err("Hello Crustaceans!"))
406            });
407            assert!(py_bytes_result.is_err());
408            assert!(py_bytes_result
409                .err()
410                .unwrap()
411                .is_instance_of::<PyValueError>(py));
412        });
413    }
414
415    #[test]
416    fn test_comparisons() {
417        Python::attach(|py| {
418            let b = b"hello, world".as_slice();
419            let py_bytes = PyBytes::new(py, b);
420
421            assert_eq!(py_bytes, b"hello, world".as_slice());
422
423            assert_eq!(py_bytes, b);
424            assert_eq!(&py_bytes, b);
425            assert_eq!(b, py_bytes);
426            assert_eq!(b, &py_bytes);
427
428            assert_eq!(py_bytes, *b);
429            assert_eq!(&py_bytes, *b);
430            assert_eq!(*b, py_bytes);
431            assert_eq!(*b, &py_bytes);
432
433            let py_string = py_bytes.as_borrowed();
434
435            assert_eq!(py_string, b);
436            assert_eq!(&py_string, b);
437            assert_eq!(b, py_string);
438            assert_eq!(b, &py_string);
439
440            assert_eq!(py_string, *b);
441            assert_eq!(*b, py_string);
442        })
443    }
444
445    #[test]
446    #[cfg(not(Py_LIMITED_API))]
447    fn test_as_string() {
448        Python::attach(|py| {
449            let b = b"hello, world".as_slice();
450            let py_bytes = PyBytes::new(py, b);
451            unsafe {
452                assert_eq!(
453                    ffi::PyBytes_AsString(py_bytes.as_ptr()) as *const core::ffi::c_char,
454                    ffi::PyBytes_AS_STRING(py_bytes.as_ptr()) as *const core::ffi::c_char
455                );
456            }
457        })
458    }
459
460    #[test]
461    fn test_as_ref_slice() {
462        Python::attach(|py| {
463            let b = b"hello, world";
464            let py_bytes = PyBytes::new(py, b);
465            let ref_bound: &[u8] = py_bytes.as_ref();
466            assert_eq!(ref_bound, b);
467            let py_bytes_borrowed = py_bytes.as_borrowed();
468            let ref_borrowed: &[u8] = py_bytes_borrowed.as_ref();
469            assert_eq!(ref_borrowed, b);
470        })
471    }
472
473    #[test]
474    fn test_with_writer() {
475        Python::attach(|py| {
476            let bytes = PyBytes::new_with_writer(py, 0, |writer| {
477                writer.write_all(b"hallo")?;
478                Ok(())
479            })
480            .unwrap();
481
482            assert_eq!(bytes.as_bytes(), b"hallo");
483        })
484    }
485}