blob: 3ace6b20e1041253961666d2a6993d5654a65cc5 [file] [log] [blame]
//! Disassembly calling function for most targets.
use crate::Function;
use std::{collections::HashSet, env, process::Command, str};
// Extracts the "shim" name from the `symbol`.
fn normalize(mut symbol: &str) -> String {
// Remove trailing colon:
if symbol.ends_with(':') {
symbol = &symbol[..symbol.len() - 1];
}
if symbol.ends_with('>') {
symbol = &symbol[..symbol.len() - 1];
}
if let Some(idx) = symbol.find('<') {
symbol = &symbol[idx + 1..];
}
let mut symbol = rustc_demangle::demangle(symbol).to_string();
symbol = match symbol.rfind("::h") {
Some(i) => symbol[..i].to_string(),
None => symbol.to_string(),
};
// Remove Rust paths
if let Some(last_colon) = symbol.rfind(':') {
symbol = (&symbol[last_colon + 1..]).to_string();
}
// Normalize to no leading underscore to handle platforms that may
// inject extra ones in symbol names.
while symbol.starts_with('_') {
symbol.remove(0);
}
// Windows/x86 has a suffix such as @@4.
if let Some(idx) = symbol.find("@@") {
symbol = (&symbol[..idx]).to_string();
}
symbol
}
pub(crate) fn disassemble_myself() -> HashSet<Function> {
let me = env::current_exe().expect("failed to get current exe");
let disassembly = if cfg!(target_os = "windows") && cfg!(target_env = "msvc") {
let target = if cfg!(target_arch = "x86_64") {
"x86_64-pc-windows-msvc"
} else if cfg!(target_arch = "x86") {
"i686-pc-windows-msvc"
} else {
panic!("disassembly unimplemented")
};
let mut cmd = cc::windows_registry::find(target, "dumpbin.exe")
.expect("failed to find `dumpbin` tool");
let output = cmd
.arg("/DISASM")
.arg(&me)
.output()
.expect("failed to execute dumpbin");
println!(
"{}\n{}",
output.status,
String::from_utf8_lossy(&output.stderr)
);
assert!(output.status.success());
// Windows does not return valid UTF-8 output:
String::from_utf8_lossy(Vec::leak(output.stdout))
} else if cfg!(target_os = "windows") {
panic!("disassembly unimplemented")
} else {
let objdump = env::var("OBJDUMP").unwrap_or_else(|_| "objdump".to_string());
let add_args = if cfg!(target_os = "macos") && cfg!(target_arch = "aarch64") {
// Target features need to be enabled for LLVM objdump on Macos ARM64
vec!["--mattr=+v8.6a,+crypto,+tme"]
} else {
vec![]
};
let output = Command::new(objdump.clone())
.arg("--disassemble")
.arg("--no-show-raw-insn")
.args(add_args)
.arg(&me)
.output()
.unwrap_or_else(|_| panic!("failed to execute objdump. OBJDUMP={}", objdump));
println!(
"{}\n{}",
output.status,
String::from_utf8_lossy(&output.stderr)
);
assert!(output.status.success());
String::from_utf8_lossy(Vec::leak(output.stdout))
};
parse(&disassembly)
}
fn parse(output: &str) -> HashSet<Function> {
let mut lines = output.lines();
println!(
"First 100 lines of the disassembly input containing {} lines:",
lines.clone().count()
);
for line in output.lines().take(100) {
println!("{}", line);
}
let mut functions = HashSet::new();
let mut cached_header = None;
while let Some(header) = cached_header.take().or_else(|| lines.next()) {
if !header.ends_with(':') || !header.contains("stdarch_test_shim") {
continue;
}
eprintln!("header: {}", header);
let symbol = normalize(header);
eprintln!("normalized symbol: {}", symbol);
let mut instructions = Vec::new();
while let Some(instruction) = lines.next() {
if instruction.ends_with(':') {
cached_header = Some(instruction);
break;
}
if instruction.is_empty() {
cached_header = None;
break;
}
let mut parts = if cfg!(target_env = "msvc") {
// Each line looks like:
//
// > $addr: ab cd ef $instr..
// > 00 12 # this line os optional
if instruction.starts_with(" ") {
continue;
}
instruction
.split_whitespace()
.skip(1)
.skip_while(|s| s.len() == 2 && usize::from_str_radix(s, 16).is_ok())
.map(std::string::ToString::to_string)
.skip_while(|s| *s == "lock") // skip x86-specific prefix
.collect::<Vec<String>>()
} else {
// objdump with --no-show-raw-insn
// Each line of instructions should look like:
//
// $rel_offset: $instruction...
instruction
.split_whitespace()
.skip(1)
.skip_while(|s| *s == "lock") // skip x86-specific prefix
.map(std::string::ToString::to_string)
.collect::<Vec<String>>()
};
if cfg!(target_arch = "aarch64") {
// Normalize [us]shll.* ..., #0 instructions to the preferred form: [us]xtl.* ...
// as LLVM objdump does not do that.
// See https://developer.arm.com/documentation/ddi0602/latest/SIMD-FP-Instructions/UXTL--UXTL2--Unsigned-extend-Long--an-alias-of-USHLL--USHLL2-
// and https://developer.arm.com/documentation/ddi0602/latest/SIMD-FP-Instructions/SXTL--SXTL2--Signed-extend-Long--an-alias-of-SSHLL--SSHLL2-
// for details.
match (parts.first(), parts.last()) {
(Some(instr), Some(last_arg))
if (instr.starts_with("ushll.") || instr.starts_with("sshll."))
&& last_arg == "#0" =>
{
assert_eq!(parts.len(), 4);
let mut new_parts = Vec::with_capacity(3);
let new_instr = format!("{}{}{}", &instr[..1], "xtl", &instr[5..]);
new_parts.push(new_instr);
new_parts.push(parts[1].clone());
new_parts.push(parts[2][0..parts[2].len() - 1].to_owned()); // strip trailing comma
parts = new_parts;
}
_ => {}
};
}
instructions.push(parts.join(" "));
}
let function = Function {
name: symbol,
instrs: instructions,
};
assert!(functions.insert(function));
}
eprintln!("all found functions dump:");
for k in &functions {
eprintln!(" f: {}", k.name);
}
functions
}