pyo3_introspection/
introspection.rs

1use crate::model::{Class, Function, Module};
2use anyhow::{bail, ensure, Context, Result};
3use goblin::elf::Elf;
4use goblin::mach::load_command::CommandVariant;
5use goblin::mach::symbols::{NO_SECT, N_SECT};
6use goblin::mach::{Mach, MachO, SingleArch};
7use goblin::pe::PE;
8use goblin::Object;
9use serde::Deserialize;
10use std::collections::HashMap;
11use std::fs;
12use std::path::Path;
13
14/// Introspect a cdylib built with PyO3 and returns the definition of a Python module.
15///
16/// This function currently supports the ELF (most *nix including Linux), Match-O (macOS) and PE (Windows) formats.
17pub fn introspect_cdylib(library_path: impl AsRef<Path>, main_module_name: &str) -> Result<Module> {
18    let chunks = find_introspection_chunks_in_binary_object(library_path.as_ref())?;
19    parse_chunks(&chunks, main_module_name)
20}
21
22/// Parses the introspection chunks found in the binary
23fn parse_chunks(chunks: &[Chunk], main_module_name: &str) -> Result<Module> {
24    let chunks_by_id = chunks
25        .iter()
26        .map(|c| {
27            (
28                match c {
29                    Chunk::Module { id, .. } => id,
30                    Chunk::Class { id, .. } => id,
31                    Chunk::Function { id, .. } => id,
32                },
33                c,
34            )
35        })
36        .collect::<HashMap<_, _>>();
37    // We look for the root chunk
38    for chunk in chunks {
39        if let Chunk::Module {
40            name,
41            members,
42            id: _,
43        } = chunk
44        {
45            if name == main_module_name {
46                return parse_module(name, members, &chunks_by_id);
47            }
48        }
49    }
50    bail!("No module named {main_module_name} found")
51}
52
53fn parse_module(
54    name: &str,
55    members: &[String],
56    chunks_by_id: &HashMap<&String, &Chunk>,
57) -> Result<Module> {
58    let mut modules = Vec::new();
59    let mut classes = Vec::new();
60    let mut functions = Vec::new();
61    for member in members {
62        if let Some(chunk) = chunks_by_id.get(member) {
63            match chunk {
64                Chunk::Module {
65                    name,
66                    members,
67                    id: _,
68                } => {
69                    modules.push(parse_module(name, members, chunks_by_id)?);
70                }
71                Chunk::Class { name, id: _ } => classes.push(Class { name: name.into() }),
72                Chunk::Function { name, id: _ } => functions.push(Function { name: name.into() }),
73            }
74        }
75    }
76    Ok(Module {
77        name: name.into(),
78        modules,
79        classes,
80        functions,
81    })
82}
83
84fn find_introspection_chunks_in_binary_object(path: &Path) -> Result<Vec<Chunk>> {
85    let library_content =
86        fs::read(path).with_context(|| format!("Failed to read {}", path.display()))?;
87    match Object::parse(&library_content)
88        .context("The built library is not valid or not supported by our binary parser")?
89    {
90        Object::Elf(elf) => find_introspection_chunks_in_elf(&elf, &library_content),
91        Object::Mach(Mach::Binary(macho)) => {
92            find_introspection_chunks_in_macho(&macho, &library_content)
93        }
94        Object::Mach(Mach::Fat(multi_arch)) => {
95            for arch in &multi_arch {
96                match arch? {
97                    SingleArch::MachO(macho) => {
98                        return find_introspection_chunks_in_macho(&macho, &library_content)
99                    }
100                    SingleArch::Archive(_) => (),
101                }
102            }
103            bail!("No Mach-o chunk found in the multi-arch Mach-o container")
104        }
105        Object::PE(pe) => find_introspection_chunks_in_pe(&pe, &library_content),
106        _ => {
107            bail!("Only ELF, Mach-o and PE containers can be introspected")
108        }
109    }
110}
111
112fn find_introspection_chunks_in_elf(elf: &Elf<'_>, library_content: &[u8]) -> Result<Vec<Chunk>> {
113    let mut chunks = Vec::new();
114    for sym in &elf.syms {
115        if is_introspection_symbol(elf.strtab.get_at(sym.st_name).unwrap_or_default()) {
116            let section_header = &elf.section_headers[sym.st_shndx];
117            let data_offset = sym.st_value + section_header.sh_offset - section_header.sh_addr;
118            chunks.push(read_symbol_value_with_ptr_and_len(
119                &library_content[usize::try_from(data_offset).context("File offset overflow")?..],
120                0,
121                library_content,
122                elf.is_64,
123            )?);
124        }
125    }
126    Ok(chunks)
127}
128
129fn find_introspection_chunks_in_macho(
130    macho: &MachO<'_>,
131    library_content: &[u8],
132) -> Result<Vec<Chunk>> {
133    if !macho.little_endian {
134        bail!("Only little endian Mach-o binaries are supported");
135    }
136    ensure!(
137        !macho.load_commands.iter().any(|command| {
138            matches!(command.command, CommandVariant::DyldChainedFixups(_))
139        }),
140        "Mach-O binaries with fixup chains are not supported yet, to avoid using fixup chains, use `--codegen=link-arg=-no_fixup_chains` option."
141    );
142
143    let sections = macho
144        .segments
145        .sections()
146        .flatten()
147        .map(|t| t.map(|s| s.0))
148        .collect::<Result<Vec<_>, _>>()?;
149    let mut chunks = Vec::new();
150    for symbol in macho.symbols() {
151        let (name, nlist) = symbol?;
152        if nlist.is_global()
153            && nlist.get_type() == N_SECT
154            && nlist.n_sect != NO_SECT as usize
155            && is_introspection_symbol(name)
156        {
157            let section = &sections[nlist.n_sect - 1]; // Sections are counted from 1
158            let data_offset = nlist.n_value + u64::from(section.offset) - section.addr;
159            chunks.push(read_symbol_value_with_ptr_and_len(
160                &library_content[usize::try_from(data_offset).context("File offset overflow")?..],
161                0,
162                library_content,
163                macho.is_64,
164            )?);
165        }
166    }
167    Ok(chunks)
168}
169
170fn find_introspection_chunks_in_pe(pe: &PE<'_>, library_content: &[u8]) -> Result<Vec<Chunk>> {
171    let rdata_data_section = pe
172        .sections
173        .iter()
174        .find(|section| section.name().unwrap_or_default() == ".rdata")
175        .context("No .rdata section found")?;
176    let rdata_shift = pe.image_base
177        + usize::try_from(rdata_data_section.virtual_address)
178            .context(".rdata virtual_address overflow")?
179        - usize::try_from(rdata_data_section.pointer_to_raw_data)
180            .context(".rdata pointer_to_raw_data overflow")?;
181
182    let mut chunks = Vec::new();
183    for export in &pe.exports {
184        if is_introspection_symbol(export.name.unwrap_or_default()) {
185            chunks.push(read_symbol_value_with_ptr_and_len(
186                &library_content[export.offset.context("No symbol offset")?..],
187                rdata_shift,
188                library_content,
189                pe.is_64,
190            )?);
191        }
192    }
193    Ok(chunks)
194}
195
196fn read_symbol_value_with_ptr_and_len(
197    value_slice: &[u8],
198    shift: usize,
199    full_library_content: &[u8],
200    is_64: bool,
201) -> Result<Chunk> {
202    let (ptr, len) = if is_64 {
203        let (ptr, len) = value_slice[..16].split_at(8);
204        let ptr = usize::try_from(u64::from_le_bytes(
205            ptr.try_into().context("Too short symbol value")?,
206        ))
207        .context("Pointer overflow")?;
208        let len = usize::try_from(u64::from_le_bytes(
209            len.try_into().context("Too short symbol value")?,
210        ))
211        .context("Length overflow")?;
212        (ptr, len)
213    } else {
214        let (ptr, len) = value_slice[..8].split_at(4);
215        let ptr = usize::try_from(u32::from_le_bytes(
216            ptr.try_into().context("Too short symbol value")?,
217        ))
218        .context("Pointer overflow")?;
219        let len = usize::try_from(u32::from_le_bytes(
220            len.try_into().context("Too short symbol value")?,
221        ))
222        .context("Length overflow")?;
223        (ptr, len)
224    };
225    let chunk = &full_library_content[ptr - shift..ptr - shift + len];
226    serde_json::from_slice(chunk).with_context(|| {
227        format!(
228            "Failed to parse introspection chunk: '{}'",
229            String::from_utf8_lossy(chunk)
230        )
231    })
232}
233
234fn is_introspection_symbol(name: &str) -> bool {
235    name.strip_prefix('_')
236        .unwrap_or(name)
237        .starts_with("PYO3_INTROSPECTION_0_")
238}
239
240#[derive(Deserialize)]
241#[serde(tag = "type", rename_all = "lowercase")]
242enum Chunk {
243    Module {
244        id: String,
245        name: String,
246        members: Vec<String>,
247    },
248    Class {
249        id: String,
250        name: String,
251    },
252    Function {
253        id: String,
254        name: String,
255    },
256}