Skip to main content

pyo3_ffi/
unicodeobject.rs

1use crate::object::*;
2use crate::pyport::Py_ssize_t;
3use core::ffi::{c_char, c_int, c_void};
4use libc::wchar_t;
5
6pub type Py_UCS4 = u32;
7pub type Py_UCS2 = u16;
8pub type Py_UCS1 = u8;
9
10extern_libpython! {
11    #[cfg(not(RustPython))]
12    #[cfg_attr(PyPy, link_name = "PyPyUnicode_Type")]
13    pub static mut PyUnicode_Type: PyTypeObject;
14    #[cfg(not(RustPython))]
15    pub static mut PyUnicodeIter_Type: PyTypeObject;
16
17    #[cfg(any(PyPy, RustPython))]
18    #[cfg_attr(PyPy, link_name = "PyPyUnicode_Check")]
19    pub fn PyUnicode_Check(op: *mut PyObject) -> c_int;
20
21    #[cfg(any(PyPy, RustPython))]
22    #[cfg_attr(PyPy, link_name = "PyPyUnicode_CheckExact")]
23    pub fn PyUnicode_CheckExact(op: *mut PyObject) -> c_int;
24}
25
26#[inline]
27#[cfg(not(any(PyPy, RustPython)))]
28pub unsafe fn PyUnicode_Check(op: *mut PyObject) -> c_int {
29    PyType_FastSubclass(Py_TYPE(op), Py_TPFLAGS_UNICODE_SUBCLASS)
30}
31
32#[inline]
33#[cfg(not(any(PyPy, RustPython)))]
34pub unsafe fn PyUnicode_CheckExact(op: *mut PyObject) -> c_int {
35    Py_IS_TYPE(op, &raw mut PyUnicode_Type)
36}
37
38pub const Py_UNICODE_REPLACEMENT_CHARACTER: Py_UCS4 = 0xFFFD;
39
40extern_libpython! {
41
42    #[cfg_attr(PyPy, link_name = "PyPyUnicode_FromStringAndSize")]
43    pub fn PyUnicode_FromStringAndSize(u: *const c_char, size: Py_ssize_t) -> *mut PyObject;
44    pub fn PyUnicode_FromString(u: *const c_char) -> *mut PyObject;
45
46    pub fn PyUnicode_Substring(
47        str: *mut PyObject,
48        start: Py_ssize_t,
49        end: Py_ssize_t,
50    ) -> *mut PyObject;
51    pub fn PyUnicode_AsUCS4(
52        unicode: *mut PyObject,
53        buffer: *mut Py_UCS4,
54        buflen: Py_ssize_t,
55        copy_null: c_int,
56    ) -> *mut Py_UCS4;
57    pub fn PyUnicode_AsUCS4Copy(unicode: *mut PyObject) -> *mut Py_UCS4;
58    #[cfg_attr(PyPy, link_name = "PyPyUnicode_GetLength")]
59    pub fn PyUnicode_GetLength(unicode: *mut PyObject) -> Py_ssize_t;
60    #[cfg(not(Py_3_12))]
61    #[deprecated(note = "Removed in Python 3.12")]
62    #[cfg_attr(PyPy, link_name = "PyPyUnicode_GetSize")]
63    pub fn PyUnicode_GetSize(unicode: *mut PyObject) -> Py_ssize_t;
64    pub fn PyUnicode_ReadChar(unicode: *mut PyObject, index: Py_ssize_t) -> Py_UCS4;
65    pub fn PyUnicode_WriteChar(
66        unicode: *mut PyObject,
67        index: Py_ssize_t,
68        character: Py_UCS4,
69    ) -> c_int;
70    #[cfg_attr(PyPy, link_name = "PyPyUnicode_Resize")]
71    pub fn PyUnicode_Resize(unicode: *mut *mut PyObject, length: Py_ssize_t) -> c_int;
72    #[cfg_attr(PyPy, link_name = "PyPyUnicode_FromEncodedObject")]
73    pub fn PyUnicode_FromEncodedObject(
74        obj: *mut PyObject,
75        encoding: *const c_char,
76        errors: *const c_char,
77    ) -> *mut PyObject;
78    #[cfg_attr(PyPy, link_name = "PyPyUnicode_FromObject")]
79    pub fn PyUnicode_FromObject(obj: *mut PyObject) -> *mut PyObject;
80    // #[cfg_attr(PyPy, link_name = "PyPyUnicode_FromFormatV")]
81    // pub fn PyUnicode_FromFormatV(format: *const c_char, vargs: va_list) -> *mut PyObject;
82    #[cfg_attr(PyPy, link_name = "PyPyUnicode_FromFormat")]
83    pub fn PyUnicode_FromFormat(format: *const c_char, ...) -> *mut PyObject;
84    #[cfg_attr(PyPy, link_name = "PyPyUnicode_InternInPlace")]
85    pub fn PyUnicode_InternInPlace(arg1: *mut *mut PyObject);
86    #[cfg(not(Py_3_12))]
87    #[cfg_attr(Py_3_10, deprecated(note = "Python 3.10"))]
88    pub fn PyUnicode_InternImmortal(arg1: *mut *mut PyObject);
89    #[cfg_attr(PyPy, link_name = "PyPyUnicode_InternFromString")]
90    pub fn PyUnicode_InternFromString(u: *const c_char) -> *mut PyObject;
91    #[cfg_attr(PyPy, link_name = "PyPyUnicode_FromWideChar")]
92    pub fn PyUnicode_FromWideChar(w: *const wchar_t, size: Py_ssize_t) -> *mut PyObject;
93    #[cfg_attr(PyPy, link_name = "PyPyUnicode_AsWideChar")]
94    pub fn PyUnicode_AsWideChar(
95        unicode: *mut PyObject,
96        w: *mut wchar_t,
97        size: Py_ssize_t,
98    ) -> Py_ssize_t;
99    #[cfg_attr(PyPy, link_name = "PyPyUnicode_AsWideCharString")]
100    pub fn PyUnicode_AsWideCharString(
101        unicode: *mut PyObject,
102        size: *mut Py_ssize_t,
103    ) -> *mut wchar_t;
104    #[cfg_attr(PyPy, link_name = "PyPyUnicode_FromOrdinal")]
105    pub fn PyUnicode_FromOrdinal(ordinal: c_int) -> *mut PyObject;
106    #[cfg(not(Py_3_9))]
107    pub fn PyUnicode_ClearFreeList() -> c_int;
108    #[cfg_attr(PyPy, link_name = "PyPyUnicode_GetDefaultEncoding")]
109    pub fn PyUnicode_GetDefaultEncoding() -> *const c_char;
110    #[cfg_attr(PyPy, link_name = "PyPyUnicode_Decode")]
111    pub fn PyUnicode_Decode(
112        s: *const c_char,
113        size: Py_ssize_t,
114        encoding: *const c_char,
115        errors: *const c_char,
116    ) -> *mut PyObject;
117    #[cfg(not(Py_3_15))]
118    #[deprecated(note = "use PyCodec_Decode() instead")]
119    pub fn PyUnicode_AsDecodedObject(
120        unicode: *mut PyObject,
121        encoding: *const c_char,
122        errors: *const c_char,
123    ) -> *mut PyObject;
124    #[cfg(not(Py_3_15))]
125    #[deprecated(note = "use PyCodec_Decode() instead")]
126    pub fn PyUnicode_AsDecodedUnicode(
127        unicode: *mut PyObject,
128        encoding: *const c_char,
129        errors: *const c_char,
130    ) -> *mut PyObject;
131    #[cfg(not(Py_3_15))]
132    #[deprecated(note = "use PyCodec_Encode() instead")]
133    #[cfg_attr(PyPy, link_name = "PyPyUnicode_AsEncodedObject")]
134    pub fn PyUnicode_AsEncodedObject(
135        unicode: *mut PyObject,
136        encoding: *const c_char,
137        errors: *const c_char,
138    ) -> *mut PyObject;
139    #[cfg_attr(PyPy, link_name = "PyPyUnicode_AsEncodedString")]
140    pub fn PyUnicode_AsEncodedString(
141        unicode: *mut PyObject,
142        encoding: *const c_char,
143        errors: *const c_char,
144    ) -> *mut PyObject;
145    #[cfg(not(Py_3_15))]
146    #[deprecated(note = "use PyCodec_Encode() instead")]
147    pub fn PyUnicode_AsEncodedUnicode(
148        unicode: *mut PyObject,
149        encoding: *const c_char,
150        errors: *const c_char,
151    ) -> *mut PyObject;
152    pub fn PyUnicode_BuildEncodingMap(string: *mut PyObject) -> *mut PyObject;
153    pub fn PyUnicode_DecodeUTF7(
154        string: *const c_char,
155        length: Py_ssize_t,
156        errors: *const c_char,
157    ) -> *mut PyObject;
158    pub fn PyUnicode_DecodeUTF7Stateful(
159        string: *const c_char,
160        length: Py_ssize_t,
161        errors: *const c_char,
162        consumed: *mut Py_ssize_t,
163    ) -> *mut PyObject;
164    #[cfg_attr(PyPy, link_name = "PyPyUnicode_DecodeUTF8")]
165    pub fn PyUnicode_DecodeUTF8(
166        string: *const c_char,
167        length: Py_ssize_t,
168        errors: *const c_char,
169    ) -> *mut PyObject;
170    pub fn PyUnicode_DecodeUTF8Stateful(
171        string: *const c_char,
172        length: Py_ssize_t,
173        errors: *const c_char,
174        consumed: *mut Py_ssize_t,
175    ) -> *mut PyObject;
176    #[cfg_attr(PyPy, link_name = "PyPyUnicode_AsUTF8String")]
177    pub fn PyUnicode_AsUTF8String(unicode: *mut PyObject) -> *mut PyObject;
178    #[cfg(any(Py_3_10, not(Py_LIMITED_API)))]
179    #[cfg_attr(PyPy, link_name = "PyPyUnicode_AsUTF8AndSize")]
180    pub fn PyUnicode_AsUTF8AndSize(unicode: *mut PyObject, size: *mut Py_ssize_t) -> *const c_char;
181    #[cfg_attr(PyPy, link_name = "PyPyUnicode_DecodeUTF32")]
182    pub fn PyUnicode_DecodeUTF32(
183        string: *const c_char,
184        length: Py_ssize_t,
185        errors: *const c_char,
186        byteorder: *mut c_int,
187    ) -> *mut PyObject;
188    pub fn PyUnicode_DecodeUTF32Stateful(
189        string: *const c_char,
190        length: Py_ssize_t,
191        errors: *const c_char,
192        byteorder: *mut c_int,
193        consumed: *mut Py_ssize_t,
194    ) -> *mut PyObject;
195    #[cfg_attr(PyPy, link_name = "PyPyUnicode_AsUTF32String")]
196    pub fn PyUnicode_AsUTF32String(unicode: *mut PyObject) -> *mut PyObject;
197    #[cfg_attr(PyPy, link_name = "PyPyUnicode_DecodeUTF16")]
198    pub fn PyUnicode_DecodeUTF16(
199        string: *const c_char,
200        length: Py_ssize_t,
201        errors: *const c_char,
202        byteorder: *mut c_int,
203    ) -> *mut PyObject;
204    pub fn PyUnicode_DecodeUTF16Stateful(
205        string: *const c_char,
206        length: Py_ssize_t,
207        errors: *const c_char,
208        byteorder: *mut c_int,
209        consumed: *mut Py_ssize_t,
210    ) -> *mut PyObject;
211    #[cfg_attr(PyPy, link_name = "PyPyUnicode_AsUTF16String")]
212    pub fn PyUnicode_AsUTF16String(unicode: *mut PyObject) -> *mut PyObject;
213    pub fn PyUnicode_DecodeUnicodeEscape(
214        string: *const c_char,
215        length: Py_ssize_t,
216        errors: *const c_char,
217    ) -> *mut PyObject;
218    #[cfg_attr(PyPy, link_name = "PyPyUnicode_AsUnicodeEscapeString")]
219    pub fn PyUnicode_AsUnicodeEscapeString(unicode: *mut PyObject) -> *mut PyObject;
220    pub fn PyUnicode_DecodeRawUnicodeEscape(
221        string: *const c_char,
222        length: Py_ssize_t,
223        errors: *const c_char,
224    ) -> *mut PyObject;
225    pub fn PyUnicode_AsRawUnicodeEscapeString(unicode: *mut PyObject) -> *mut PyObject;
226    #[cfg_attr(PyPy, link_name = "PyPyUnicode_DecodeLatin1")]
227    pub fn PyUnicode_DecodeLatin1(
228        string: *const c_char,
229        length: Py_ssize_t,
230        errors: *const c_char,
231    ) -> *mut PyObject;
232    #[cfg_attr(PyPy, link_name = "PyPyUnicode_AsLatin1String")]
233    pub fn PyUnicode_AsLatin1String(unicode: *mut PyObject) -> *mut PyObject;
234    #[cfg_attr(PyPy, link_name = "PyPyUnicode_DecodeASCII")]
235    pub fn PyUnicode_DecodeASCII(
236        string: *const c_char,
237        length: Py_ssize_t,
238        errors: *const c_char,
239    ) -> *mut PyObject;
240    #[cfg_attr(PyPy, link_name = "PyPyUnicode_AsASCIIString")]
241    pub fn PyUnicode_AsASCIIString(unicode: *mut PyObject) -> *mut PyObject;
242    pub fn PyUnicode_DecodeCharmap(
243        string: *const c_char,
244        length: Py_ssize_t,
245        mapping: *mut PyObject,
246        errors: *const c_char,
247    ) -> *mut PyObject;
248    pub fn PyUnicode_AsCharmapString(
249        unicode: *mut PyObject,
250        mapping: *mut PyObject,
251    ) -> *mut PyObject;
252    // skipped PyUnicode_DecodeMBCS
253    // skipped PyUnicode_DecodeMBCSStateful
254    // skipped PyUnicode_DecodeCodePageStateful
255    // skipped PyUnicode_AsMBCSString
256    // skipped PyUnicode_EncodeCodePage
257    pub fn PyUnicode_DecodeLocaleAndSize(
258        str: *const c_char,
259        len: Py_ssize_t,
260        errors: *const c_char,
261    ) -> *mut PyObject;
262    pub fn PyUnicode_DecodeLocale(str: *const c_char, errors: *const c_char) -> *mut PyObject;
263    pub fn PyUnicode_EncodeLocale(unicode: *mut PyObject, errors: *const c_char) -> *mut PyObject;
264    #[cfg_attr(PyPy, link_name = "PyPyUnicode_FSConverter")]
265    pub fn PyUnicode_FSConverter(arg1: *mut PyObject, arg2: *mut c_void) -> c_int;
266    #[cfg_attr(PyPy, link_name = "PyPyUnicode_FSDecoder")]
267    pub fn PyUnicode_FSDecoder(arg1: *mut PyObject, arg2: *mut c_void) -> c_int;
268    #[cfg_attr(PyPy, link_name = "PyPyUnicode_DecodeFSDefault")]
269    pub fn PyUnicode_DecodeFSDefault(s: *const c_char) -> *mut PyObject;
270    #[cfg_attr(PyPy, link_name = "PyPyUnicode_DecodeFSDefaultAndSize")]
271    pub fn PyUnicode_DecodeFSDefaultAndSize(s: *const c_char, size: Py_ssize_t) -> *mut PyObject;
272    #[cfg_attr(PyPy, link_name = "PyPyUnicode_EncodeFSDefault")]
273    pub fn PyUnicode_EncodeFSDefault(unicode: *mut PyObject) -> *mut PyObject;
274    #[cfg_attr(PyPy, link_name = "PyPyUnicode_Concat")]
275    pub fn PyUnicode_Concat(left: *mut PyObject, right: *mut PyObject) -> *mut PyObject;
276    pub fn PyUnicode_Append(pleft: *mut *mut PyObject, right: *mut PyObject);
277    pub fn PyUnicode_AppendAndDel(pleft: *mut *mut PyObject, right: *mut PyObject);
278    #[cfg_attr(PyPy, link_name = "PyPyUnicode_Split")]
279    pub fn PyUnicode_Split(
280        s: *mut PyObject,
281        sep: *mut PyObject,
282        maxsplit: Py_ssize_t,
283    ) -> *mut PyObject;
284    #[cfg_attr(PyPy, link_name = "PyPyUnicode_Splitlines")]
285    pub fn PyUnicode_Splitlines(s: *mut PyObject, keepends: c_int) -> *mut PyObject;
286    pub fn PyUnicode_Partition(s: *mut PyObject, sep: *mut PyObject) -> *mut PyObject;
287    pub fn PyUnicode_RPartition(s: *mut PyObject, sep: *mut PyObject) -> *mut PyObject;
288    pub fn PyUnicode_RSplit(
289        s: *mut PyObject,
290        sep: *mut PyObject,
291        maxsplit: Py_ssize_t,
292    ) -> *mut PyObject;
293    pub fn PyUnicode_Translate(
294        str: *mut PyObject,
295        table: *mut PyObject,
296        errors: *const c_char,
297    ) -> *mut PyObject;
298    #[cfg_attr(PyPy, link_name = "PyPyUnicode_Join")]
299    pub fn PyUnicode_Join(separator: *mut PyObject, seq: *mut PyObject) -> *mut PyObject;
300}
301
302#[cfg(PyPy)]
303type TailmatchResult = c_int;
304
305#[cfg(not(PyPy))]
306type TailmatchResult = Py_ssize_t;
307
308extern_libpython! {
309    #[cfg_attr(PyPy, link_name = "PyPyUnicode_Tailmatch")]
310    pub fn PyUnicode_Tailmatch(
311        str: *mut PyObject,
312        substr: *mut PyObject,
313        start: Py_ssize_t,
314        end: Py_ssize_t,
315        direction: c_int,
316    ) -> TailmatchResult;
317    #[cfg_attr(PyPy, link_name = "PyPyUnicode_Find")]
318    pub fn PyUnicode_Find(
319        str: *mut PyObject,
320        substr: *mut PyObject,
321        start: Py_ssize_t,
322        end: Py_ssize_t,
323        direction: c_int,
324    ) -> Py_ssize_t;
325    pub fn PyUnicode_FindChar(
326        str: *mut PyObject,
327        ch: Py_UCS4,
328        start: Py_ssize_t,
329        end: Py_ssize_t,
330        direction: c_int,
331    ) -> Py_ssize_t;
332    #[cfg_attr(PyPy, link_name = "PyPyUnicode_Count")]
333    pub fn PyUnicode_Count(
334        str: *mut PyObject,
335        substr: *mut PyObject,
336        start: Py_ssize_t,
337        end: Py_ssize_t,
338    ) -> Py_ssize_t;
339    #[cfg_attr(PyPy, link_name = "PyPyUnicode_Replace")]
340    pub fn PyUnicode_Replace(
341        str: *mut PyObject,
342        substr: *mut PyObject,
343        replstr: *mut PyObject,
344        maxcount: Py_ssize_t,
345    ) -> *mut PyObject;
346    #[cfg_attr(PyPy, link_name = "PyPyUnicode_Compare")]
347    pub fn PyUnicode_Compare(left: *mut PyObject, right: *mut PyObject) -> c_int;
348    #[cfg_attr(PyPy, link_name = "PyPyUnicode_CompareWithASCIIString")]
349    pub fn PyUnicode_CompareWithASCIIString(left: *mut PyObject, right: *const c_char) -> c_int;
350    #[cfg(Py_3_13)]
351    pub fn PyUnicode_EqualToUTF8(unicode: *mut PyObject, string: *const c_char) -> c_int;
352    #[cfg(Py_3_13)]
353    pub fn PyUnicode_EqualToUTF8AndSize(
354        unicode: *mut PyObject,
355        string: *const c_char,
356        size: Py_ssize_t,
357    ) -> c_int;
358    // skipped PyUnicode_Equal
359    pub fn PyUnicode_RichCompare(
360        left: *mut PyObject,
361        right: *mut PyObject,
362        op: c_int,
363    ) -> *mut PyObject;
364    #[cfg_attr(PyPy, link_name = "PyPyUnicode_Format")]
365    pub fn PyUnicode_Format(format: *mut PyObject, args: *mut PyObject) -> *mut PyObject;
366    pub fn PyUnicode_Contains(container: *mut PyObject, element: *mut PyObject) -> c_int;
367    pub fn PyUnicode_IsIdentifier(s: *mut PyObject) -> c_int;
368}