Skip to main content

pyo3/conversions/std/
osstr.rs

1use crate::conversion::IntoPyObject;
2#[cfg(not(target_os = "wasi"))]
3use crate::ffi;
4#[cfg(not(target_os = "wasi"))]
5use crate::ffi_ptr_ext::FfiPtrExt;
6#[cfg(feature = "experimental-inspect")]
7use crate::inspect::PyStaticExpr;
8use crate::instance::Bound;
9#[cfg(feature = "experimental-inspect")]
10use crate::type_object::PyTypeInfo;
11use crate::types::PyString;
12#[cfg(any(unix, target_os = "emscripten"))]
13use crate::types::{PyBytes, PyBytesMethods};
14use crate::{Borrowed, FromPyObject, PyAny, PyErr, Python};
15use std::borrow::Cow;
16use std::convert::Infallible;
17use std::ffi::{OsStr, OsString};
18#[cfg(any(unix, target_os = "emscripten"))]
19use std::os::unix::ffi::OsStrExt;
20#[cfg(windows)]
21use std::os::windows::ffi::OsStrExt;
22
23impl<'py> IntoPyObject<'py> for &OsStr {
24    type Target = PyString;
25    type Output = Bound<'py, Self::Target>;
26    type Error = Infallible;
27
28    #[cfg(feature = "experimental-inspect")]
29    const OUTPUT_TYPE: PyStaticExpr = PyString::TYPE_HINT;
30
31    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
32        // If the string is UTF-8, take the quick and easy shortcut
33        #[cfg(not(target_os = "wasi"))]
34        if let Some(valid_utf8_path) = self.to_str() {
35            return valid_utf8_path.into_pyobject(py);
36        }
37
38        #[cfg(target_os = "wasi")]
39        {
40            self.to_str()
41                .expect("wasi strings are UTF8")
42                .into_pyobject(py)
43        }
44
45        #[cfg(any(unix, target_os = "emscripten"))]
46        {
47            let bytes = self.as_bytes();
48            let ptr = bytes.as_ptr().cast();
49            let len = bytes.len() as ffi::Py_ssize_t;
50            unsafe {
51                // DecodeFSDefault automatically chooses an appropriate decoding mechanism to
52                // parse os strings losslessly (i.e. surrogateescape most of the time)
53                Ok(ffi::PyUnicode_DecodeFSDefaultAndSize(ptr, len)
54                    .assume_owned(py)
55                    .cast_into_unchecked())
56            }
57        }
58
59        #[cfg(windows)]
60        {
61            let wstr: Vec<u16> = self.encode_wide().collect();
62            unsafe {
63                // This will not panic because the data from encode_wide is well-formed Windows
64                // string data
65
66                Ok(
67                    ffi::PyUnicode_FromWideChar(wstr.as_ptr(), wstr.len() as ffi::Py_ssize_t)
68                        .assume_owned(py)
69                        .cast_into_unchecked(),
70                )
71            }
72        }
73    }
74}
75
76impl<'py> IntoPyObject<'py> for &&OsStr {
77    type Target = PyString;
78    type Output = Bound<'py, Self::Target>;
79    type Error = Infallible;
80
81    #[cfg(feature = "experimental-inspect")]
82    const OUTPUT_TYPE: PyStaticExpr = <&OsStr>::OUTPUT_TYPE;
83
84    #[inline]
85    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
86        (*self).into_pyobject(py)
87    }
88}
89
90impl FromPyObject<'_, '_> for OsString {
91    type Error = PyErr;
92
93    #[cfg(feature = "experimental-inspect")]
94    const INPUT_TYPE: PyStaticExpr = PyString::TYPE_HINT;
95
96    fn extract(ob: Borrowed<'_, '_, PyAny>) -> Result<Self, Self::Error> {
97        let pystring = ob.cast::<PyString>()?;
98
99        #[cfg(target_os = "wasi")]
100        {
101            Ok(pystring.to_cow()?.into_owned().into())
102        }
103
104        #[cfg(any(unix, target_os = "emscripten"))]
105        {
106            // Decode from Python's lossless bytes string representation back into raw bytes
107            // SAFETY: PyUnicode_EncodeFSDefault returns a new reference or null on error, known to
108            // be a `bytes` object, thread is attached to the interpreter
109            let fs_encoded_bytes = unsafe {
110                ffi::PyUnicode_EncodeFSDefault(pystring.as_ptr())
111                    .assume_owned_or_err(ob.py())?
112                    .cast_into_unchecked::<PyBytes>()
113            };
114
115            // Create an OsStr view into the raw bytes from Python
116            let os_str: &OsStr = OsStrExt::from_bytes(fs_encoded_bytes.as_bytes());
117
118            Ok(os_str.to_os_string())
119        }
120
121        #[cfg(windows)]
122        {
123            // Take the quick and easy shortcut if UTF-8
124            if let Ok(utf8_string) = pystring.to_cow() {
125                return Ok(utf8_string.into_owned().into());
126            }
127
128            // Get an owned allocated wide char buffer from PyString, which we have to deallocate
129            // ourselves
130            let size =
131                unsafe { ffi::PyUnicode_AsWideChar(pystring.as_ptr(), std::ptr::null_mut(), 0) };
132            crate::err::error_on_minusone(ob.py(), size)?;
133
134            debug_assert!(
135                size > 0,
136                "PyUnicode_AsWideChar should return at least 1 for null terminator"
137            );
138            let size = size - 1; // exclude null terminator
139
140            let mut buffer = vec![0; size as usize];
141            let bytes_read =
142                unsafe { ffi::PyUnicode_AsWideChar(pystring.as_ptr(), buffer.as_mut_ptr(), size) };
143            assert_eq!(bytes_read, size);
144
145            // Copy wide char buffer into OsString
146            let os_string = std::os::windows::ffi::OsStringExt::from_wide(&buffer);
147
148            Ok(os_string)
149        }
150    }
151}
152
153impl<'py> IntoPyObject<'py> for Cow<'_, OsStr> {
154    type Target = PyString;
155    type Output = Bound<'py, Self::Target>;
156    type Error = Infallible;
157
158    #[cfg(feature = "experimental-inspect")]
159    const OUTPUT_TYPE: PyStaticExpr = <&OsStr>::OUTPUT_TYPE;
160
161    #[inline]
162    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
163        (*self).into_pyobject(py)
164    }
165}
166
167impl<'py> IntoPyObject<'py> for &Cow<'_, OsStr> {
168    type Target = PyString;
169    type Output = Bound<'py, Self::Target>;
170    type Error = Infallible;
171
172    #[cfg(feature = "experimental-inspect")]
173    const OUTPUT_TYPE: PyStaticExpr = <&OsStr>::OUTPUT_TYPE;
174
175    #[inline]
176    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
177        (&**self).into_pyobject(py)
178    }
179}
180
181impl<'a> FromPyObject<'a, '_> for Cow<'a, OsStr> {
182    type Error = PyErr;
183
184    #[cfg(feature = "experimental-inspect")]
185    const INPUT_TYPE: PyStaticExpr = OsString::INPUT_TYPE;
186
187    fn extract(obj: Borrowed<'a, '_, PyAny>) -> Result<Self, Self::Error> {
188        #[cfg(any(Py_3_10, not(Py_LIMITED_API)))]
189        if let Ok(s) = obj.extract::<&str>() {
190            return Ok(Cow::Borrowed(s.as_ref()));
191        }
192
193        obj.extract::<OsString>().map(Cow::Owned)
194    }
195}
196
197impl<'py> IntoPyObject<'py> for OsString {
198    type Target = PyString;
199    type Output = Bound<'py, Self::Target>;
200    type Error = Infallible;
201
202    #[cfg(feature = "experimental-inspect")]
203    const OUTPUT_TYPE: PyStaticExpr = <&OsStr>::OUTPUT_TYPE;
204
205    #[inline]
206    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
207        self.as_os_str().into_pyobject(py)
208    }
209}
210
211impl<'py> IntoPyObject<'py> for &OsString {
212    type Target = PyString;
213    type Output = Bound<'py, Self::Target>;
214    type Error = Infallible;
215
216    #[cfg(feature = "experimental-inspect")]
217    const OUTPUT_TYPE: PyStaticExpr = <&OsStr>::OUTPUT_TYPE;
218
219    #[inline]
220    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
221        self.as_os_str().into_pyobject(py)
222    }
223}
224
225#[cfg(test)]
226mod tests {
227    #[cfg(target_os = "wasi")]
228    use crate::exceptions::PyFileNotFoundError;
229    use crate::types::{PyAnyMethods, PyString, PyStringMethods};
230    use crate::{Bound, BoundObject, IntoPyObject, Python};
231    use std::fmt::Debug;
232    #[cfg(any(unix, target_os = "emscripten"))]
233    use std::os::unix::ffi::OsStringExt;
234    #[cfg(windows)]
235    use std::os::windows::ffi::OsStringExt;
236    use std::{
237        borrow::Cow,
238        ffi::{OsStr, OsString},
239    };
240
241    #[test]
242    #[cfg(any(unix, target_os = "emscripten"))]
243    fn test_non_utf8_conversion() {
244        Python::attach(|py| {
245            use std::os::unix::ffi::OsStrExt;
246
247            // this is not valid UTF-8
248            let payload = &[250, 251, 252, 253, 254, 255, 0, 255];
249            let os_str = OsStr::from_bytes(payload);
250
251            // do a roundtrip into Pythonland and back and compare
252            let py_str = os_str.into_pyobject(py).unwrap();
253            let os_str_2: OsString = py_str.extract().unwrap();
254            assert_eq!(os_str, os_str_2);
255        });
256    }
257
258    #[test]
259    #[cfg(target_os = "wasi")]
260    fn test_extract_non_utf8_wasi_should_error() {
261        Python::attach(|py| {
262            // Non utf-8 strings are not valid wasi paths
263            let open_result = py.run(c"open('\\udcff', 'rb')", None, None).unwrap_err();
264            assert!(
265                !open_result.is_instance_of::<PyFileNotFoundError>(py),
266                "Opening invalid utf8 will error with OSError, not FileNotFoundError"
267            );
268
269            // Create a Python string with not valid UTF-8: &[255]
270            let py_str = py.eval(c"'\\udcff'", None, None).unwrap();
271            assert!(
272                py_str.extract::<OsString>().is_err(),
273                "Extracting invalid UTF-8 as OsString should error"
274            );
275        });
276    }
277
278    #[test]
279    fn test_intopyobject_roundtrip() {
280        Python::attach(|py| {
281            fn test_roundtrip<'py, T>(py: Python<'py>, obj: T)
282            where
283                T: IntoPyObject<'py> + AsRef<OsStr> + Debug + Clone,
284                T::Error: Debug,
285            {
286                let pyobject = obj.clone().into_pyobject(py).unwrap().into_any();
287                let pystring = pyobject.as_borrowed().cast::<PyString>().unwrap();
288                assert_eq!(pystring.to_string_lossy(), obj.as_ref().to_string_lossy());
289                let roundtripped_obj: OsString = pystring.extract().unwrap();
290                assert_eq!(obj.as_ref(), roundtripped_obj.as_os_str());
291            }
292            let os_str = OsStr::new("Hello\0\n🐍");
293            test_roundtrip::<&OsStr>(py, os_str);
294            test_roundtrip::<Cow<'_, OsStr>>(py, Cow::Borrowed(os_str));
295            test_roundtrip::<Cow<'_, OsStr>>(py, Cow::Owned(os_str.to_os_string()));
296            test_roundtrip::<OsString>(py, os_str.to_os_string());
297        });
298    }
299
300    #[test]
301    #[cfg(windows)]
302    fn test_windows_non_utf8_osstring_roundtrip() {
303        use std::os::windows::ffi::{OsStrExt, OsStringExt};
304
305        Python::attach(|py| {
306            // Example: Unpaired surrogate (0xD800) is not valid UTF-8, but valid in Windows OsString
307            let wide: &[u16] = &['A' as u16, 0xD800, 'B' as u16]; // 'A', unpaired surrogate, 'B'
308            let os_str = OsString::from_wide(wide);
309
310            assert_eq!(os_str.to_string_lossy(), "A�B");
311
312            // This cannot be represented as UTF-8, so .to_str() would return None
313            assert!(os_str.to_str().is_none());
314
315            // Convert to Python and back
316            let py_str = os_str.as_os_str().into_pyobject(py).unwrap();
317            let os_str_2 = py_str.extract::<OsString>().unwrap();
318
319            // The roundtrip should preserve the original wide data
320            assert_eq!(os_str, os_str_2);
321
322            // Show that encode_wide is necessary: direct UTF-8 conversion would lose information
323            let encoded: Vec<u16> = os_str.encode_wide().collect();
324            assert_eq!(encoded, wide);
325        });
326    }
327
328    #[test]
329    fn test_extract_cow() {
330        Python::attach(|py| {
331            fn test_extract<'py, T>(py: Python<'py>, input: &T, is_borrowed: bool)
332            where
333                for<'a> &'a T: IntoPyObject<'py, Output = Bound<'py, PyString>>,
334                for<'a> <&'a T as IntoPyObject<'py>>::Error: Debug,
335                T: AsRef<OsStr> + ?Sized,
336            {
337                let pystring = input.into_pyobject(py).unwrap();
338                let cow: Cow<'_, OsStr> = pystring.extract().unwrap();
339                assert_eq!(cow, input.as_ref());
340                assert_eq!(is_borrowed, matches!(cow, Cow::Borrowed(_)));
341            }
342
343            // On Python 3.10+ or when not using the limited API, we can borrow strings from python
344            let can_borrow_str = cfg!(any(Py_3_10, not(Py_LIMITED_API)));
345            // This can be borrowed because it is valid UTF-8
346            test_extract::<str>(py, "Hello\0\n🐍", can_borrow_str);
347            test_extract::<str>(py, "Hello, world!", can_borrow_str);
348
349            #[cfg(windows)]
350            let os_str = {
351                // 'A', unpaired surrogate, 'B'
352                OsString::from_wide(&['A' as u16, 0xD800, 'B' as u16])
353            };
354
355            #[cfg(any(unix, target_os = "emscripten"))]
356            let os_str = { OsString::from_vec(vec![250, 251, 252, 253, 254, 255, 0, 255]) };
357
358            // This cannot be borrowed because it is not valid UTF-8
359            #[cfg(any(windows, unix, target_os = "emscripten"))]
360            test_extract::<OsStr>(py, &os_str, false);
361        });
362    }
363}