pyo3/types/bytes.rs
1use crate::byteswriter::PyBytesWriter;
2use crate::ffi_ptr_ext::FfiPtrExt;
3use crate::instance::{Borrowed, Bound};
4use crate::{ffi, Py, PyAny, PyResult, Python};
5#[cfg(RustPython)]
6use crate::{
7 sync::PyOnceLock,
8 types::{PyType, PyTypeMethods},
9};
10use core::ops::Index;
11use core::slice::SliceIndex;
12use core::str;
13use std::io::Write;
14
15/// Represents a Python `bytes` object.
16///
17/// This type is immutable.
18///
19/// Values of this type are accessed via PyO3's smart pointers, e.g. as
20/// [`Py<PyBytes>`][crate::Py] or [`Bound<'py, PyBytes>`][Bound].
21///
22/// For APIs available on `bytes` objects, see the [`PyBytesMethods`] trait which is implemented for
23/// [`Bound<'py, PyBytes>`][Bound].
24///
25/// # Equality
26///
27/// For convenience, [`Bound<'py, PyBytes>`][Bound] implements [`PartialEq<[u8]>`][PartialEq] to allow comparing the
28/// data in the Python bytes to a Rust `[u8]` byte slice.
29///
30/// This is not always the most appropriate way to compare Python bytes, as Python bytes subclasses
31/// may have different equality semantics. In situations where subclasses overriding equality might
32/// be relevant, use [`PyAnyMethods::eq`](crate::types::any::PyAnyMethods::eq), at cost of the
33/// additional overhead of a Python method call.
34///
35/// ```rust
36/// # use pyo3::prelude::*;
37/// use pyo3::types::PyBytes;
38///
39/// # Python::attach(|py| {
40/// let py_bytes = PyBytes::new(py, b"foo".as_slice());
41/// // via PartialEq<[u8]>
42/// assert_eq!(py_bytes, b"foo".as_slice());
43///
44/// // via Python equality
45/// let other = PyBytes::new(py, b"foo".as_slice());
46/// assert!(py_bytes.as_any().eq(other).unwrap());
47///
48/// // Note that `eq` will convert its argument to Python using `IntoPyObject`.
49/// // Byte collections are specialized, so that the following slice will indeed
50/// // convert into a `bytes` object and not a `list`:
51/// assert!(py_bytes.as_any().eq(b"foo".as_slice()).unwrap());
52/// # });
53/// ```
54#[repr(transparent)]
55pub struct PyBytes(PyAny);
56
57#[cfg(not(RustPython))]
58pyobject_native_type_core!(PyBytes, pyobject_native_static_type_object!(ffi::PyBytes_Type), "builtins", "bytes", #checkfunction=ffi::PyBytes_Check);
59
60#[cfg(RustPython)]
61pyobject_native_type_core!(
62 PyBytes,
63 |py| {
64 static TYPE: PyOnceLock<Py<PyType>> = PyOnceLock::new();
65 TYPE.import(py, "builtins", "bytes").unwrap().as_type_ptr()
66 },
67 "builtins",
68 "bytes",
69 #checkfunction=ffi::PyBytes_Check
70);
71
72impl PyBytes {
73 /// Creates a new Python bytestring object.
74 /// The bytestring is initialized by copying the data from the `&[u8]`.
75 ///
76 /// Panics if out of memory.
77 pub fn new<'p>(py: Python<'p>, s: &[u8]) -> Bound<'p, PyBytes> {
78 let ptr = s.as_ptr().cast();
79 let len = s.len() as ffi::Py_ssize_t;
80 unsafe {
81 ffi::PyBytes_FromStringAndSize(ptr, len)
82 .assume_owned(py)
83 .cast_into_unchecked()
84 }
85 }
86
87 /// Creates a new Python `bytes` object with an `init` closure to write its contents.
88 /// Before calling `init` the bytes' contents are zero-initialised.
89 /// * If Python raises a MemoryError on the allocation, `new_with` will return
90 /// it inside `Err`.
91 /// * If `init` returns `Err(e)`, `new_with` will return `Err(e)`.
92 /// * If `init` returns `Ok(())`, `new_with` will return `Ok(&PyBytes)`.
93 ///
94 /// # Examples
95 ///
96 /// ```
97 /// use pyo3::{prelude::*, types::PyBytes};
98 ///
99 /// # fn main() -> PyResult<()> {
100 /// Python::attach(|py| -> PyResult<()> {
101 /// let py_bytes = PyBytes::new_with(py, 10, |bytes: &mut [u8]| {
102 /// bytes.copy_from_slice(b"Hello Rust");
103 /// Ok(())
104 /// })?;
105 /// let bytes: &[u8] = py_bytes.extract()?;
106 /// assert_eq!(bytes, b"Hello Rust");
107 /// Ok(())
108 /// })
109 /// # }
110 /// ```
111 #[inline]
112 pub fn new_with<F>(py: Python<'_>, len: usize, init: F) -> PyResult<Bound<'_, PyBytes>>
113 where
114 F: FnOnce(&mut [u8]) -> PyResult<()>,
115 {
116 unsafe {
117 let pyptr = ffi::PyBytes_FromStringAndSize(core::ptr::null(), len as ffi::Py_ssize_t);
118 // Check for an allocation error and return it
119 let pybytes = pyptr.assume_owned_or_err(py)?.cast_into_unchecked();
120 let buffer: *mut u8 = ffi::PyBytes_AsString(pyptr).cast();
121 debug_assert!(!buffer.is_null());
122 // Zero-initialise the uninitialised bytestring
123 core::ptr::write_bytes(buffer, 0u8, len);
124 // (Further) Initialise the bytestring in init
125 // If init returns an Err, pypybytearray will automatically deallocate the buffer
126 init(core::slice::from_raw_parts_mut(buffer, len)).map(|_| pybytes)
127 }
128 }
129
130 /// Creates a new Python `bytes` object using a writer closure.
131 ///
132 /// This function allocates a Python `bytes` object with at least `reserved_capacity` bytes of capacity,
133 /// then provides a mutable writer to the closure `write`. The closure can write any number of bytes,
134 /// even more than the reserved capacity; the buffer will grow dynamically as needed.
135 ///
136 /// If `reserved_capacity` is 0, the buffer will start empty and grow as the writer writes data.
137 ///
138 /// After the closure returns, the resulting bytes object contains the written data.
139 ///
140 /// # Example
141 ///
142 /// ```
143 /// use pyo3::{prelude::*, types::PyBytes};
144 /// use std::io::Write;
145 ///
146 /// # fn main() -> PyResult<()> {
147 /// Python::attach(|py| -> PyResult<()> {
148 /// let py_bytes = PyBytes::new_with_writer(py, 0, |writer| {
149 /// writer.write_all(b"hello world")?;
150 /// Ok(())
151 /// })?;
152 /// assert_eq!(py_bytes.as_bytes(), b"hello world");
153 /// Ok(())
154 /// })
155 /// # }
156 /// ```
157 #[inline]
158 pub fn new_with_writer<F>(
159 py: Python<'_>,
160 reserved_capacity: usize,
161 write: F,
162 ) -> PyResult<Bound<'_, PyBytes>>
163 where
164 F: FnOnce(&mut dyn Write) -> PyResult<()>,
165 {
166 let mut writer = PyBytesWriter::with_capacity(py, reserved_capacity)?;
167 write(&mut writer)?;
168 writer.try_into()
169 }
170
171 /// Creates a new Python byte string object from a raw pointer and length.
172 ///
173 /// Panics if out of memory.
174 ///
175 /// # Safety
176 ///
177 /// This function dereferences the raw pointer `ptr` as the
178 /// leading pointer of a slice of length `len`. [As with
179 /// `core::slice::from_raw_parts`, this is
180 /// unsafe](https://doc.rust-lang.org/std/slice/fn.from_raw_parts.html#safety).
181 pub unsafe fn from_ptr(py: Python<'_>, ptr: *const u8, len: usize) -> Bound<'_, PyBytes> {
182 unsafe {
183 ffi::PyBytes_FromStringAndSize(ptr.cast(), len as isize)
184 .assume_owned(py)
185 .cast_into_unchecked()
186 }
187 }
188}
189
190/// Implementation of functionality for [`PyBytes`].
191///
192/// These methods are defined for the `Bound<'py, PyBytes>` smart pointer, so to use method call
193/// syntax these methods are separated into a trait, because stable Rust does not yet support
194/// `arbitrary_self_types`.
195#[doc(alias = "PyBytes")]
196pub trait PyBytesMethods<'py>: crate::sealed::Sealed {
197 /// Gets the Python string as a byte slice.
198 fn as_bytes(&self) -> &[u8];
199}
200
201impl<'py> PyBytesMethods<'py> for Bound<'py, PyBytes> {
202 #[inline]
203 fn as_bytes(&self) -> &[u8] {
204 self.as_borrowed().as_bytes()
205 }
206}
207
208impl<'a> Borrowed<'a, '_, PyBytes> {
209 /// Gets the Python string as a byte slice.
210 #[allow(clippy::wrong_self_convention)]
211 pub(crate) fn as_bytes(self) -> &'a [u8] {
212 unsafe {
213 let buffer = ffi::PyBytes_AsString(self.as_ptr()) as *const u8;
214 let length = ffi::PyBytes_Size(self.as_ptr()) as usize;
215 debug_assert!(!buffer.is_null());
216 core::slice::from_raw_parts(buffer, length)
217 }
218 }
219}
220
221impl Py<PyBytes> {
222 /// Gets the Python bytes as a byte slice. Because Python bytes are
223 /// immutable, the result may be used for as long as the reference to
224 /// `self` is held, including when the GIL is released.
225 pub fn as_bytes<'a>(&'a self, py: Python<'_>) -> &'a [u8] {
226 self.bind_borrowed(py).as_bytes()
227 }
228}
229
230/// This is the same way [Vec] is indexed.
231impl<I: SliceIndex<[u8]>> Index<I> for Bound<'_, PyBytes> {
232 type Output = I::Output;
233
234 fn index(&self, index: I) -> &Self::Output {
235 &self.as_bytes()[index]
236 }
237}
238
239/// Compares whether the Python bytes object is equal to the [u8].
240///
241/// In some cases Python equality might be more appropriate; see the note on [`PyBytes`].
242impl PartialEq<[u8]> for Bound<'_, PyBytes> {
243 #[inline]
244 fn eq(&self, other: &[u8]) -> bool {
245 self.as_borrowed() == *other
246 }
247}
248
249/// Compares whether the Python bytes object is equal to the [u8].
250///
251/// In some cases Python equality might be more appropriate; see the note on [`PyBytes`].
252impl PartialEq<&'_ [u8]> for Bound<'_, PyBytes> {
253 #[inline]
254 fn eq(&self, other: &&[u8]) -> bool {
255 self.as_borrowed() == **other
256 }
257}
258
259/// Compares whether the Python bytes object is equal to the [u8].
260///
261/// In some cases Python equality might be more appropriate; see the note on [`PyBytes`].
262impl PartialEq<Bound<'_, PyBytes>> for [u8] {
263 #[inline]
264 fn eq(&self, other: &Bound<'_, PyBytes>) -> bool {
265 *self == other.as_borrowed()
266 }
267}
268
269/// Compares whether the Python bytes object is equal to the [u8].
270///
271/// In some cases Python equality might be more appropriate; see the note on [`PyBytes`].
272impl PartialEq<&'_ Bound<'_, PyBytes>> for [u8] {
273 #[inline]
274 fn eq(&self, other: &&Bound<'_, PyBytes>) -> bool {
275 *self == other.as_borrowed()
276 }
277}
278
279/// Compares whether the Python bytes object is equal to the [u8].
280///
281/// In some cases Python equality might be more appropriate; see the note on [`PyBytes`].
282impl PartialEq<Bound<'_, PyBytes>> for &'_ [u8] {
283 #[inline]
284 fn eq(&self, other: &Bound<'_, PyBytes>) -> bool {
285 **self == other.as_borrowed()
286 }
287}
288
289/// Compares whether the Python bytes object is equal to the [u8].
290///
291/// In some cases Python equality might be more appropriate; see the note on [`PyBytes`].
292impl PartialEq<[u8]> for &'_ Bound<'_, PyBytes> {
293 #[inline]
294 fn eq(&self, other: &[u8]) -> bool {
295 self.as_borrowed() == other
296 }
297}
298
299/// Compares whether the Python bytes object is equal to the [u8].
300///
301/// In some cases Python equality might be more appropriate; see the note on [`PyBytes`].
302impl PartialEq<[u8]> for Borrowed<'_, '_, PyBytes> {
303 #[inline]
304 fn eq(&self, other: &[u8]) -> bool {
305 self.as_bytes() == other
306 }
307}
308
309/// Compares whether the Python bytes object is equal to the [u8].
310///
311/// In some cases Python equality might be more appropriate; see the note on [`PyBytes`].
312impl PartialEq<&[u8]> for Borrowed<'_, '_, PyBytes> {
313 #[inline]
314 fn eq(&self, other: &&[u8]) -> bool {
315 *self == **other
316 }
317}
318
319/// Compares whether the Python bytes object is equal to the [u8].
320///
321/// In some cases Python equality might be more appropriate; see the note on [`PyBytes`].
322impl PartialEq<Borrowed<'_, '_, PyBytes>> for [u8] {
323 #[inline]
324 fn eq(&self, other: &Borrowed<'_, '_, PyBytes>) -> bool {
325 other == self
326 }
327}
328
329/// Compares whether the Python bytes object is equal to the [u8].
330///
331/// In some cases Python equality might be more appropriate; see the note on [`PyBytes`].
332impl PartialEq<Borrowed<'_, '_, PyBytes>> for &'_ [u8] {
333 #[inline]
334 fn eq(&self, other: &Borrowed<'_, '_, PyBytes>) -> bool {
335 other == self
336 }
337}
338
339impl<'a> AsRef<[u8]> for Borrowed<'a, '_, PyBytes> {
340 #[inline]
341 fn as_ref(&self) -> &'a [u8] {
342 self.as_bytes()
343 }
344}
345
346impl AsRef<[u8]> for Bound<'_, PyBytes> {
347 #[inline]
348 fn as_ref(&self) -> &[u8] {
349 self.as_bytes()
350 }
351}
352
353#[cfg(test)]
354mod tests {
355 use super::*;
356 use crate::types::PyAnyMethods as _;
357
358 #[test]
359 fn test_bytes_index() {
360 Python::attach(|py| {
361 let bytes = PyBytes::new(py, b"Hello World");
362 assert_eq!(bytes[1], b'e');
363 });
364 }
365
366 #[test]
367 fn test_bound_bytes_index() {
368 Python::attach(|py| {
369 let bytes = PyBytes::new(py, b"Hello World");
370 assert_eq!(bytes[1], b'e');
371
372 let bytes = &bytes;
373 assert_eq!(bytes[1], b'e');
374 });
375 }
376
377 #[test]
378 fn test_bytes_new_with() -> super::PyResult<()> {
379 Python::attach(|py| -> super::PyResult<()> {
380 let py_bytes = PyBytes::new_with(py, 10, |b: &mut [u8]| {
381 b.copy_from_slice(b"Hello Rust");
382 Ok(())
383 })?;
384 let bytes: &[u8] = py_bytes.extract()?;
385 assert_eq!(bytes, b"Hello Rust");
386 Ok(())
387 })
388 }
389
390 #[test]
391 fn test_bytes_new_with_zero_initialised() -> super::PyResult<()> {
392 Python::attach(|py| -> super::PyResult<()> {
393 let py_bytes = PyBytes::new_with(py, 10, |_b: &mut [u8]| Ok(()))?;
394 let bytes: &[u8] = py_bytes.extract()?;
395 assert_eq!(bytes, &[0; 10]);
396 Ok(())
397 })
398 }
399
400 #[test]
401 fn test_bytes_new_with_error() {
402 use crate::exceptions::PyValueError;
403 Python::attach(|py| {
404 let py_bytes_result = PyBytes::new_with(py, 10, |_b: &mut [u8]| {
405 Err(PyValueError::new_err("Hello Crustaceans!"))
406 });
407 assert!(py_bytes_result.is_err());
408 assert!(py_bytes_result
409 .err()
410 .unwrap()
411 .is_instance_of::<PyValueError>(py));
412 });
413 }
414
415 #[test]
416 fn test_comparisons() {
417 Python::attach(|py| {
418 let b = b"hello, world".as_slice();
419 let py_bytes = PyBytes::new(py, b);
420
421 assert_eq!(py_bytes, b"hello, world".as_slice());
422
423 assert_eq!(py_bytes, b);
424 assert_eq!(&py_bytes, b);
425 assert_eq!(b, py_bytes);
426 assert_eq!(b, &py_bytes);
427
428 assert_eq!(py_bytes, *b);
429 assert_eq!(&py_bytes, *b);
430 assert_eq!(*b, py_bytes);
431 assert_eq!(*b, &py_bytes);
432
433 let py_string = py_bytes.as_borrowed();
434
435 assert_eq!(py_string, b);
436 assert_eq!(&py_string, b);
437 assert_eq!(b, py_string);
438 assert_eq!(b, &py_string);
439
440 assert_eq!(py_string, *b);
441 assert_eq!(*b, py_string);
442 })
443 }
444
445 #[test]
446 #[cfg(not(Py_LIMITED_API))]
447 fn test_as_string() {
448 Python::attach(|py| {
449 let b = b"hello, world".as_slice();
450 let py_bytes = PyBytes::new(py, b);
451 unsafe {
452 assert_eq!(
453 ffi::PyBytes_AsString(py_bytes.as_ptr()) as *const core::ffi::c_char,
454 ffi::PyBytes_AS_STRING(py_bytes.as_ptr()) as *const core::ffi::c_char
455 );
456 }
457 })
458 }
459
460 #[test]
461 fn test_as_ref_slice() {
462 Python::attach(|py| {
463 let b = b"hello, world";
464 let py_bytes = PyBytes::new(py, b);
465 let ref_bound: &[u8] = py_bytes.as_ref();
466 assert_eq!(ref_bound, b);
467 let py_bytes_borrowed = py_bytes.as_borrowed();
468 let ref_borrowed: &[u8] = py_bytes_borrowed.as_ref();
469 assert_eq!(ref_borrowed, b);
470 })
471 }
472
473 #[test]
474 fn test_with_writer() {
475 Python::attach(|py| {
476 let bytes = PyBytes::new_with_writer(py, 0, |writer| {
477 writer.write_all(b"hallo")?;
478 Ok(())
479 })
480 .unwrap();
481
482 assert_eq!(bytes.as_bytes(), b"hallo");
483 })
484 }
485}