1use crate::model::{Class, Function, Module};
2use anyhow::{bail, ensure, Context, Result};
3use goblin::elf::Elf;
4use goblin::mach::load_command::CommandVariant;
5use goblin::mach::symbols::{NO_SECT, N_SECT};
6use goblin::mach::{Mach, MachO, SingleArch};
7use goblin::pe::PE;
8use goblin::Object;
9use serde::Deserialize;
10use std::collections::HashMap;
11use std::fs;
12use std::path::Path;
13
14pub fn introspect_cdylib(library_path: impl AsRef<Path>, main_module_name: &str) -> Result<Module> {
18 let chunks = find_introspection_chunks_in_binary_object(library_path.as_ref())?;
19 parse_chunks(&chunks, main_module_name)
20}
21
22fn parse_chunks(chunks: &[Chunk], main_module_name: &str) -> Result<Module> {
24 let chunks_by_id = chunks
25 .iter()
26 .map(|c| {
27 (
28 match c {
29 Chunk::Module { id, .. } => id,
30 Chunk::Class { id, .. } => id,
31 Chunk::Function { id, .. } => id,
32 },
33 c,
34 )
35 })
36 .collect::<HashMap<_, _>>();
37 for chunk in chunks {
39 if let Chunk::Module {
40 name,
41 members,
42 id: _,
43 } = chunk
44 {
45 if name == main_module_name {
46 return parse_module(name, members, &chunks_by_id);
47 }
48 }
49 }
50 bail!("No module named {main_module_name} found")
51}
52
53fn parse_module(
54 name: &str,
55 members: &[String],
56 chunks_by_id: &HashMap<&String, &Chunk>,
57) -> Result<Module> {
58 let mut modules = Vec::new();
59 let mut classes = Vec::new();
60 let mut functions = Vec::new();
61 for member in members {
62 if let Some(chunk) = chunks_by_id.get(member) {
63 match chunk {
64 Chunk::Module {
65 name,
66 members,
67 id: _,
68 } => {
69 modules.push(parse_module(name, members, chunks_by_id)?);
70 }
71 Chunk::Class { name, id: _ } => classes.push(Class { name: name.into() }),
72 Chunk::Function { name, id: _ } => functions.push(Function { name: name.into() }),
73 }
74 }
75 }
76 Ok(Module {
77 name: name.into(),
78 modules,
79 classes,
80 functions,
81 })
82}
83
84fn find_introspection_chunks_in_binary_object(path: &Path) -> Result<Vec<Chunk>> {
85 let library_content =
86 fs::read(path).with_context(|| format!("Failed to read {}", path.display()))?;
87 match Object::parse(&library_content)
88 .context("The built library is not valid or not supported by our binary parser")?
89 {
90 Object::Elf(elf) => find_introspection_chunks_in_elf(&elf, &library_content),
91 Object::Mach(Mach::Binary(macho)) => {
92 find_introspection_chunks_in_macho(&macho, &library_content)
93 }
94 Object::Mach(Mach::Fat(multi_arch)) => {
95 for arch in &multi_arch {
96 match arch? {
97 SingleArch::MachO(macho) => {
98 return find_introspection_chunks_in_macho(&macho, &library_content)
99 }
100 SingleArch::Archive(_) => (),
101 }
102 }
103 bail!("No Mach-o chunk found in the multi-arch Mach-o container")
104 }
105 Object::PE(pe) => find_introspection_chunks_in_pe(&pe, &library_content),
106 _ => {
107 bail!("Only ELF, Mach-o and PE containers can be introspected")
108 }
109 }
110}
111
112fn find_introspection_chunks_in_elf(elf: &Elf<'_>, library_content: &[u8]) -> Result<Vec<Chunk>> {
113 let mut chunks = Vec::new();
114 for sym in &elf.syms {
115 if is_introspection_symbol(elf.strtab.get_at(sym.st_name).unwrap_or_default()) {
116 let section_header = &elf.section_headers[sym.st_shndx];
117 let data_offset = sym.st_value + section_header.sh_offset - section_header.sh_addr;
118 chunks.push(read_symbol_value_with_ptr_and_len(
119 &library_content[usize::try_from(data_offset).context("File offset overflow")?..],
120 0,
121 library_content,
122 elf.is_64,
123 )?);
124 }
125 }
126 Ok(chunks)
127}
128
129fn find_introspection_chunks_in_macho(
130 macho: &MachO<'_>,
131 library_content: &[u8],
132) -> Result<Vec<Chunk>> {
133 if !macho.little_endian {
134 bail!("Only little endian Mach-o binaries are supported");
135 }
136 ensure!(
137 !macho.load_commands.iter().any(|command| {
138 matches!(command.command, CommandVariant::DyldChainedFixups(_))
139 }),
140 "Mach-O binaries with fixup chains are not supported yet, to avoid using fixup chains, use `--codegen=link-arg=-no_fixup_chains` option."
141 );
142
143 let sections = macho
144 .segments
145 .sections()
146 .flatten()
147 .map(|t| t.map(|s| s.0))
148 .collect::<Result<Vec<_>, _>>()?;
149 let mut chunks = Vec::new();
150 for symbol in macho.symbols() {
151 let (name, nlist) = symbol?;
152 if nlist.is_global()
153 && nlist.get_type() == N_SECT
154 && nlist.n_sect != NO_SECT as usize
155 && is_introspection_symbol(name)
156 {
157 let section = §ions[nlist.n_sect - 1]; let data_offset = nlist.n_value + u64::from(section.offset) - section.addr;
159 chunks.push(read_symbol_value_with_ptr_and_len(
160 &library_content[usize::try_from(data_offset).context("File offset overflow")?..],
161 0,
162 library_content,
163 macho.is_64,
164 )?);
165 }
166 }
167 Ok(chunks)
168}
169
170fn find_introspection_chunks_in_pe(pe: &PE<'_>, library_content: &[u8]) -> Result<Vec<Chunk>> {
171 let rdata_data_section = pe
172 .sections
173 .iter()
174 .find(|section| section.name().unwrap_or_default() == ".rdata")
175 .context("No .rdata section found")?;
176 let rdata_shift = pe.image_base
177 + usize::try_from(rdata_data_section.virtual_address)
178 .context(".rdata virtual_address overflow")?
179 - usize::try_from(rdata_data_section.pointer_to_raw_data)
180 .context(".rdata pointer_to_raw_data overflow")?;
181
182 let mut chunks = Vec::new();
183 for export in &pe.exports {
184 if is_introspection_symbol(export.name.unwrap_or_default()) {
185 chunks.push(read_symbol_value_with_ptr_and_len(
186 &library_content[export.offset.context("No symbol offset")?..],
187 rdata_shift,
188 library_content,
189 pe.is_64,
190 )?);
191 }
192 }
193 Ok(chunks)
194}
195
196fn read_symbol_value_with_ptr_and_len(
197 value_slice: &[u8],
198 shift: usize,
199 full_library_content: &[u8],
200 is_64: bool,
201) -> Result<Chunk> {
202 let (ptr, len) = if is_64 {
203 let (ptr, len) = value_slice[..16].split_at(8);
204 let ptr = usize::try_from(u64::from_le_bytes(
205 ptr.try_into().context("Too short symbol value")?,
206 ))
207 .context("Pointer overflow")?;
208 let len = usize::try_from(u64::from_le_bytes(
209 len.try_into().context("Too short symbol value")?,
210 ))
211 .context("Length overflow")?;
212 (ptr, len)
213 } else {
214 let (ptr, len) = value_slice[..8].split_at(4);
215 let ptr = usize::try_from(u32::from_le_bytes(
216 ptr.try_into().context("Too short symbol value")?,
217 ))
218 .context("Pointer overflow")?;
219 let len = usize::try_from(u32::from_le_bytes(
220 len.try_into().context("Too short symbol value")?,
221 ))
222 .context("Length overflow")?;
223 (ptr, len)
224 };
225 let chunk = &full_library_content[ptr - shift..ptr - shift + len];
226 serde_json::from_slice(chunk).with_context(|| {
227 format!(
228 "Failed to parse introspection chunk: '{}'",
229 String::from_utf8_lossy(chunk)
230 )
231 })
232}
233
234fn is_introspection_symbol(name: &str) -> bool {
235 name.strip_prefix('_')
236 .unwrap_or(name)
237 .starts_with("PYO3_INTROSPECTION_0_")
238}
239
240#[derive(Deserialize)]
241#[serde(tag = "type", rename_all = "lowercase")]
242enum Chunk {
243 Module {
244 id: String,
245 name: String,
246 members: Vec<String>,
247 },
248 Class {
249 id: String,
250 name: String,
251 },
252 Function {
253 id: String,
254 name: String,
255 },
256}