[dynarmic] allow better dtrace diagnostics for code - do not clobber %rbp and save frame pointer #2653
9 changed files with 194 additions and 77 deletions
2
.gitignore
vendored
2
.gitignore
vendored
|
@ -30,6 +30,8 @@ CMakeLists.txt.user*
|
||||||
# *nix related
|
# *nix related
|
||||||
# Common convention for backup or temporary files
|
# Common convention for backup or temporary files
|
||||||
*~
|
*~
|
||||||
|
*.core
|
||||||
|
dtrace-out/
|
||||||
|
|
||||||
# Visual Studio CMake settings
|
# Visual Studio CMake settings
|
||||||
CMakeSettings.json
|
CMakeSettings.json
|
||||||
|
|
|
@ -217,13 +217,13 @@ void A32EmitX64::ClearFastDispatchTable() {
|
||||||
}
|
}
|
||||||
|
|
||||||
void A32EmitX64::GenTerminalHandlers() {
|
void A32EmitX64::GenTerminalHandlers() {
|
||||||
// PC ends up in ebp, location_descriptor ends up in rbx
|
// PC ends up in edi, location_descriptor ends up in rbx
|
||||||
const auto calculate_location_descriptor = [this] {
|
const auto calculate_location_descriptor = [this] {
|
||||||
// This calculation has to match up with IREmitter::PushRSB
|
// This calculation has to match up with IREmitter::PushRSB
|
||||||
code.mov(ebx, dword[code.ABI_JIT_PTR + offsetof(A32JitState, upper_location_descriptor)]);
|
code.mov(ebx, dword[code.ABI_JIT_PTR + offsetof(A32JitState, upper_location_descriptor)]);
|
||||||
code.shl(rbx, 32);
|
code.shl(rbx, 32);
|
||||||
code.mov(ecx, MJitStateReg(A32::Reg::PC));
|
code.mov(ecx, MJitStateReg(A32::Reg::PC));
|
||||||
code.mov(ebp, ecx);
|
code.mov(edi, ecx);
|
||||||
code.or_(rbx, rcx);
|
code.or_(rbx, rcx);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -238,7 +238,7 @@ void A32EmitX64::GenTerminalHandlers() {
|
||||||
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, rsb_ptr)], eax);
|
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, rsb_ptr)], eax);
|
||||||
code.cmp(rbx, qword[code.ABI_JIT_PTR + offsetof(A32JitState, rsb_location_descriptors) + rax * sizeof(u64)]);
|
code.cmp(rbx, qword[code.ABI_JIT_PTR + offsetof(A32JitState, rsb_location_descriptors) + rax * sizeof(u64)]);
|
||||||
if (conf.HasOptimization(OptimizationFlag::FastDispatch)) {
|
if (conf.HasOptimization(OptimizationFlag::FastDispatch)) {
|
||||||
code.jne(rsb_cache_miss);
|
code.jne(rsb_cache_miss, code.T_NEAR);
|
||||||
} else {
|
} else {
|
||||||
code.jne(code.GetReturnFromRunCodeAddress());
|
code.jne(code.GetReturnFromRunCodeAddress());
|
||||||
}
|
}
|
||||||
|
@ -251,20 +251,21 @@ void A32EmitX64::GenTerminalHandlers() {
|
||||||
terminal_handler_fast_dispatch_hint = code.getCurr<const void*>();
|
terminal_handler_fast_dispatch_hint = code.getCurr<const void*>();
|
||||||
calculate_location_descriptor();
|
calculate_location_descriptor();
|
||||||
code.L(rsb_cache_miss);
|
code.L(rsb_cache_miss);
|
||||||
code.mov(r12, reinterpret_cast<u64>(fast_dispatch_table.data()));
|
code.mov(r8, reinterpret_cast<u64>(fast_dispatch_table.data()));
|
||||||
code.mov(rbp, rbx);
|
//code.mov(r12d, MJitStateReg(A32::Reg::PC));
|
||||||
|
code.mov(r12, rbx);
|
||||||
if (code.HasHostFeature(HostFeature::SSE42)) {
|
if (code.HasHostFeature(HostFeature::SSE42)) {
|
||||||
code.crc32(rbp, r12);
|
code.crc32(r12, r8);
|
||||||
}
|
}
|
||||||
code.and_(ebp, fast_dispatch_table_mask);
|
code.and_(r12d, fast_dispatch_table_mask);
|
||||||
code.lea(rbp, ptr[r12 + rbp]);
|
code.lea(r12, ptr[r8 + r12]);
|
||||||
code.cmp(rbx, qword[rbp + offsetof(FastDispatchEntry, location_descriptor)]);
|
code.cmp(rbx, qword[r12 + offsetof(FastDispatchEntry, location_descriptor)]);
|
||||||
code.jne(fast_dispatch_cache_miss);
|
code.jne(fast_dispatch_cache_miss, code.T_NEAR);
|
||||||
code.jmp(ptr[rbp + offsetof(FastDispatchEntry, code_ptr)]);
|
code.jmp(ptr[r12 + offsetof(FastDispatchEntry, code_ptr)]);
|
||||||
code.L(fast_dispatch_cache_miss);
|
code.L(fast_dispatch_cache_miss);
|
||||||
code.mov(qword[rbp + offsetof(FastDispatchEntry, location_descriptor)], rbx);
|
code.mov(qword[r12 + offsetof(FastDispatchEntry, location_descriptor)], rbx);
|
||||||
code.LookupBlock();
|
code.LookupBlock();
|
||||||
code.mov(ptr[rbp + offsetof(FastDispatchEntry, code_ptr)], rax);
|
code.mov(ptr[r12 + offsetof(FastDispatchEntry, code_ptr)], rax);
|
||||||
code.jmp(rax);
|
code.jmp(rax);
|
||||||
PerfMapRegister(terminal_handler_fast_dispatch_hint, code.getCurr(), "a32_terminal_handler_fast_dispatch_hint");
|
PerfMapRegister(terminal_handler_fast_dispatch_hint, code.getCurr(), "a32_terminal_handler_fast_dispatch_hint");
|
||||||
|
|
||||||
|
|
|
@ -331,4 +331,8 @@ void Jit::DumpDisassembly() const {
|
||||||
impl->DumpDisassembly();
|
impl->DumpDisassembly();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::vector<std::string> Jit::Disassemble() const {
|
||||||
|
return impl->Disassemble();
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace Dynarmic::A32
|
} // namespace Dynarmic::A32
|
||||||
|
|
|
@ -188,13 +188,14 @@ void A64EmitX64::ClearFastDispatchTable() {
|
||||||
}
|
}
|
||||||
|
|
||||||
void A64EmitX64::GenTerminalHandlers() {
|
void A64EmitX64::GenTerminalHandlers() {
|
||||||
// PC ends up in rbp, location_descriptor ends up in rbx
|
// PC ends up in rcx, location_descriptor ends up in rbx
|
||||||
|
static_assert(std::find(ABI_ALL_CALLEE_SAVE.begin(), ABI_ALL_CALLEE_SAVE.end(), HostLoc::R12));
|
||||||
const auto calculate_location_descriptor = [this] {
|
const auto calculate_location_descriptor = [this] {
|
||||||
// This calculation has to match up with A64::LocationDescriptor::UniqueHash
|
// This calculation has to match up with A64::LocationDescriptor::UniqueHash
|
||||||
// TODO: Optimization is available here based on known state of fpcr.
|
// TODO: Optimization is available here based on known state of fpcr.
|
||||||
code.mov(rbp, qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)]);
|
code.mov(rdi, qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)]);
|
||||||
code.mov(rcx, A64::LocationDescriptor::pc_mask);
|
code.mov(rcx, A64::LocationDescriptor::pc_mask);
|
||||||
code.and_(rcx, rbp);
|
code.and_(rcx, rdi);
|
||||||
code.mov(ebx, dword[code.ABI_JIT_PTR + offsetof(A64JitState, fpcr)]);
|
code.mov(ebx, dword[code.ABI_JIT_PTR + offsetof(A64JitState, fpcr)]);
|
||||||
code.and_(ebx, A64::LocationDescriptor::fpcr_mask);
|
code.and_(ebx, A64::LocationDescriptor::fpcr_mask);
|
||||||
code.shl(rbx, A64::LocationDescriptor::fpcr_shift);
|
code.shl(rbx, A64::LocationDescriptor::fpcr_shift);
|
||||||
|
@ -226,20 +227,21 @@ void A64EmitX64::GenTerminalHandlers() {
|
||||||
terminal_handler_fast_dispatch_hint = code.getCurr<const void*>();
|
terminal_handler_fast_dispatch_hint = code.getCurr<const void*>();
|
||||||
calculate_location_descriptor();
|
calculate_location_descriptor();
|
||||||
code.L(rsb_cache_miss);
|
code.L(rsb_cache_miss);
|
||||||
code.mov(r12, reinterpret_cast<u64>(fast_dispatch_table.data()));
|
code.mov(r8, reinterpret_cast<u64>(fast_dispatch_table.data()));
|
||||||
code.mov(rbp, rbx);
|
//code.mov(r12, qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)]);
|
||||||
|
code.mov(r12, rbx);
|
||||||
if (code.HasHostFeature(HostFeature::SSE42)) {
|
if (code.HasHostFeature(HostFeature::SSE42)) {
|
||||||
code.crc32(rbp, r12);
|
code.crc32(r12, r8);
|
||||||
}
|
}
|
||||||
code.and_(ebp, fast_dispatch_table_mask);
|
code.and_(r12d, fast_dispatch_table_mask);
|
||||||
code.lea(rbp, ptr[r12 + rbp]);
|
code.lea(r12, ptr[r8 + r12]);
|
||||||
code.cmp(rbx, qword[rbp + offsetof(FastDispatchEntry, location_descriptor)]);
|
code.cmp(rbx, qword[r12 + offsetof(FastDispatchEntry, location_descriptor)]);
|
||||||
code.jne(fast_dispatch_cache_miss);
|
code.jne(fast_dispatch_cache_miss, code.T_NEAR);
|
||||||
code.jmp(ptr[rbp + offsetof(FastDispatchEntry, code_ptr)]);
|
code.jmp(ptr[r12 + offsetof(FastDispatchEntry, code_ptr)]);
|
||||||
code.L(fast_dispatch_cache_miss);
|
code.L(fast_dispatch_cache_miss);
|
||||||
code.mov(qword[rbp + offsetof(FastDispatchEntry, location_descriptor)], rbx);
|
code.mov(qword[r12 + offsetof(FastDispatchEntry, location_descriptor)], rbx);
|
||||||
code.LookupBlock();
|
code.LookupBlock();
|
||||||
code.mov(ptr[rbp + offsetof(FastDispatchEntry, code_ptr)], rax);
|
code.mov(ptr[r12 + offsetof(FastDispatchEntry, code_ptr)], rax);
|
||||||
code.jmp(rax);
|
code.jmp(rax);
|
||||||
PerfMapRegister(terminal_handler_fast_dispatch_hint, code.getCurr(), "a64_terminal_handler_fast_dispatch_hint");
|
PerfMapRegister(terminal_handler_fast_dispatch_hint, code.getCurr(), "a64_terminal_handler_fast_dispatch_hint");
|
||||||
|
|
||||||
|
|
|
@ -29,7 +29,8 @@ static_assert(ABI_SHADOW_SPACE <= 32);
|
||||||
|
|
||||||
static FrameInfo CalculateFrameInfo(const size_t num_gprs, const size_t num_xmms, size_t frame_size) {
|
static FrameInfo CalculateFrameInfo(const size_t num_gprs, const size_t num_xmms, size_t frame_size) {
|
||||||
// We are initially 8 byte aligned because the return value is pushed onto an aligned stack after a call.
|
// We are initially 8 byte aligned because the return value is pushed onto an aligned stack after a call.
|
||||||
const size_t rsp_alignment = (num_gprs % 2 == 0) ? 8 : 0;
|
// (It's an extra GPR save due to %rbp)
|
||||||
|
const size_t rsp_alignment = ((num_gprs + 1) % 2 == 0) ? 8 : 0;
|
||||||
const size_t total_xmm_size = num_xmms * XMM_SIZE;
|
const size_t total_xmm_size = num_xmms * XMM_SIZE;
|
||||||
if (frame_size & 0xF) {
|
if (frame_size & 0xF) {
|
||||||
frame_size += 0x10 - (frame_size & 0xF);
|
frame_size += 0x10 - (frame_size & 0xF);
|
||||||
|
@ -49,6 +50,10 @@ void ABI_PushRegistersAndAdjustStack(BlockOfCode& code, const size_t frame_size,
|
||||||
const size_t num_xmms = std::count_if(regs.begin(), regs.end(), HostLocIsXMM);
|
const size_t num_xmms = std::count_if(regs.begin(), regs.end(), HostLocIsXMM);
|
||||||
const FrameInfo frame_info = CalculateFrameInfo(num_gprs, num_xmms, frame_size);
|
const FrameInfo frame_info = CalculateFrameInfo(num_gprs, num_xmms, frame_size);
|
||||||
|
|
||||||
|
if (true) {
|
||||||
|
code.push(rbp);
|
||||||
|
code.mov(rbp, rsp);
|
||||||
|
}
|
||||||
for (auto const gpr : regs)
|
for (auto const gpr : regs)
|
||||||
if (HostLocIsGPR(gpr))
|
if (HostLocIsGPR(gpr))
|
||||||
code.push(HostLocToReg64(gpr));
|
code.push(HostLocToReg64(gpr));
|
||||||
|
@ -91,6 +96,9 @@ void ABI_PopRegistersAndAdjustStack(BlockOfCode& code, const size_t frame_size,
|
||||||
for (auto const gpr : mcl::iterator::reverse(regs))
|
for (auto const gpr : mcl::iterator::reverse(regs))
|
||||||
if (HostLocIsGPR(gpr))
|
if (HostLocIsGPR(gpr))
|
||||||
code.pop(HostLocToReg64(gpr));
|
code.pop(HostLocToReg64(gpr));
|
||||||
|
if (true) {
|
||||||
|
code.pop(rbp);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void ABI_PushCalleeSaveRegistersAndAdjustStack(BlockOfCode& code, const std::size_t frame_size) {
|
void ABI_PushCalleeSaveRegistersAndAdjustStack(BlockOfCode& code, const std::size_t frame_size) {
|
||||||
|
|
|
@ -364,8 +364,7 @@ void BlockOfCode::GenRunCode(std::function<void(BlockOfCode&)> rcp) {
|
||||||
|
|
||||||
cmp(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], 0);
|
cmp(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], 0);
|
||||||
jne(return_to_caller_mxcsr_already_exited, T_NEAR);
|
jne(return_to_caller_mxcsr_already_exited, T_NEAR);
|
||||||
lock();
|
lock(); or_(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], u32(HaltReason::Step));
|
||||||
or_(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], static_cast<u32>(HaltReason::Step));
|
|
||||||
|
|
||||||
SwitchMxcsrOnEntry();
|
SwitchMxcsrOnEntry();
|
||||||
jmp(ABI_PARAM2);
|
jmp(ABI_PARAM2);
|
||||||
|
@ -415,7 +414,6 @@ void BlockOfCode::GenRunCode(std::function<void(BlockOfCode&)> rcp) {
|
||||||
}
|
}
|
||||||
|
|
||||||
xor_(eax, eax);
|
xor_(eax, eax);
|
||||||
lock();
|
|
||||||
xchg(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], eax);
|
xchg(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], eax);
|
||||||
|
|
||||||
ABI_PopCalleeSaveRegistersAndAdjustStack(*this, sizeof(StackLayout));
|
ABI_PopCalleeSaveRegistersAndAdjustStack(*this, sizeof(StackLayout));
|
||||||
|
|
|
@ -37,6 +37,9 @@
|
||||||
#include "dynarmic/ir/basic_block.h"
|
#include "dynarmic/ir/basic_block.h"
|
||||||
#include "dynarmic/ir/opt_passes.h"
|
#include "dynarmic/ir/opt_passes.h"
|
||||||
|
|
||||||
|
#include "./A32/testenv.h"
|
||||||
|
#include "./A64/testenv.h"
|
||||||
|
|
||||||
using namespace Dynarmic;
|
using namespace Dynarmic;
|
||||||
|
|
||||||
std::string_view GetNameOfA32Instruction(u32 instruction) {
|
std::string_view GetNameOfA32Instruction(u32 instruction) {
|
||||||
|
@ -65,7 +68,10 @@ void PrintA32Instruction(u32 instruction) {
|
||||||
fmt::print("should_continue: {}\n\n", should_continue);
|
fmt::print("should_continue: {}\n\n", should_continue);
|
||||||
fmt::print("IR:\n");
|
fmt::print("IR:\n");
|
||||||
fmt::print("{}\n", IR::DumpBlock(ir_block));
|
fmt::print("{}\n", IR::DumpBlock(ir_block));
|
||||||
Optimization::Optimize(ir_block, A32::UserConfig{}, {});
|
ArmTestEnv jit_env{};
|
||||||
|
Dynarmic::A32::UserConfig jit_user_config{};
|
||||||
|
jit_user_config.callbacks = &jit_env;
|
||||||
|
Optimization::Optimize(ir_block, jit_user_config, {});
|
||||||
fmt::print("Optimized IR:\n");
|
fmt::print("Optimized IR:\n");
|
||||||
fmt::print("{}\n", IR::DumpBlock(ir_block));
|
fmt::print("{}\n", IR::DumpBlock(ir_block));
|
||||||
}
|
}
|
||||||
|
@ -80,7 +86,10 @@ void PrintA64Instruction(u32 instruction) {
|
||||||
fmt::print("should_continue: {}\n\n", should_continue);
|
fmt::print("should_continue: {}\n\n", should_continue);
|
||||||
fmt::print("IR:\n");
|
fmt::print("IR:\n");
|
||||||
fmt::print("{}\n", IR::DumpBlock(ir_block));
|
fmt::print("{}\n", IR::DumpBlock(ir_block));
|
||||||
Optimization::Optimize(ir_block, A64::UserConfig{}, {});
|
A64TestEnv jit_env{};
|
||||||
|
Dynarmic::A64::UserConfig jit_user_config{};
|
||||||
|
jit_user_config.callbacks = &jit_env;
|
||||||
|
Optimization::Optimize(ir_block, jit_user_config, {});
|
||||||
fmt::print("Optimized IR:\n");
|
fmt::print("Optimized IR:\n");
|
||||||
fmt::print("{}\n", IR::DumpBlock(ir_block));
|
fmt::print("{}\n", IR::DumpBlock(ir_block));
|
||||||
}
|
}
|
||||||
|
@ -98,7 +107,10 @@ void PrintThumbInstruction(u32 instruction) {
|
||||||
fmt::print("should_continue: {}\n\n", should_continue);
|
fmt::print("should_continue: {}\n\n", should_continue);
|
||||||
fmt::print("IR:\n");
|
fmt::print("IR:\n");
|
||||||
fmt::print("{}\n", IR::DumpBlock(ir_block));
|
fmt::print("{}\n", IR::DumpBlock(ir_block));
|
||||||
Optimization::Optimize(ir_block, A32::UserConfig{}, {});
|
ThumbTestEnv jit_env{};
|
||||||
|
Dynarmic::A32::UserConfig jit_user_config{};
|
||||||
|
jit_user_config.callbacks = &jit_env;
|
||||||
|
Optimization::Optimize(ir_block, jit_user_config, {});
|
||||||
fmt::print("Optimized IR:\n");
|
fmt::print("Optimized IR:\n");
|
||||||
fmt::print("{}\n", IR::DumpBlock(ir_block));
|
fmt::print("{}\n", IR::DumpBlock(ir_block));
|
||||||
}
|
}
|
||||||
|
@ -219,7 +231,7 @@ void ExecuteA32Instruction(u32 instruction) {
|
||||||
*(iter->second) = *value;
|
*(iter->second) = *value;
|
||||||
fmt::print("> {} = 0x{:08x}\n", reg_name, *value);
|
fmt::print("> {} = 0x{:08x}\n", reg_name, *value);
|
||||||
}
|
}
|
||||||
} else if (reg_name == "mem" || reg_name == "memory") {
|
} else if (reg_name.starts_with("m")) {
|
||||||
fmt::print("address: ");
|
fmt::print("address: ");
|
||||||
if (const auto address = get_value()) {
|
if (const auto address = get_value()) {
|
||||||
fmt::print("value: ");
|
fmt::print("value: ");
|
||||||
|
@ -228,7 +240,7 @@ void ExecuteA32Instruction(u32 instruction) {
|
||||||
fmt::print("> mem[0x{:08x}] = 0x{:08x}\n", *address, *value);
|
fmt::print("> mem[0x{:08x}] = 0x{:08x}\n", *address, *value);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if (reg_name == "end") {
|
} else if (reg_name == "exit" || reg_name == "end" || reg_name.starts_with("q")) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -244,6 +256,7 @@ void ExecuteA32Instruction(u32 instruction) {
|
||||||
env.MemoryWrite32(initial_pc + 4, 0xEAFFFFFE); // B +0
|
env.MemoryWrite32(initial_pc + 4, 0xEAFFFFFE); // B +0
|
||||||
|
|
||||||
cpu.Run();
|
cpu.Run();
|
||||||
|
fmt::print("{}", fmt::join(cpu.Disassemble(), "\n"));
|
||||||
|
|
||||||
fmt::print("Registers modified:\n");
|
fmt::print("Registers modified:\n");
|
||||||
for (size_t i = 0; i < regs.size(); ++i) {
|
for (size_t i = 0; i < regs.size(); ++i) {
|
||||||
|
|
131
tools/dtrace-tool.pl
Executable file
131
tools/dtrace-tool.pl
Executable file
|
@ -0,0 +1,131 @@
|
||||||
|
#!/usr/bin/perl
|
||||||
|
# SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||||
|
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
|
# Basic script to run dtrace sampling over the program (requires Flamegraph)
|
||||||
|
# Usage is either running as: ./dtrace-tool.sh pid (then input the pid of the process)
|
||||||
|
# Or just run directly with: ./dtrace-tool.sh <command>
|
||||||
|
use strict;
|
||||||
|
use warnings;
|
||||||
|
use POSIX qw(strftime);
|
||||||
|
|
||||||
|
my $input;
|
||||||
|
my $sampling_hz = '4000';
|
||||||
|
my $sampling_time = '5';
|
||||||
|
my $sampling_pid = `pgrep eden`;
|
||||||
|
my $sampling_program = 'eden';
|
||||||
|
my $sampling_type = 3;
|
||||||
|
|
||||||
|
sub dtrace_ask_params {
|
||||||
|
my $is_ok = 'Y';
|
||||||
|
do {
|
||||||
|
print "Sampling HZ [" . $sampling_hz . "]: ";
|
||||||
|
chomp($input = <STDIN>);
|
||||||
|
$sampling_hz = $input || $sampling_hz;
|
||||||
|
|
||||||
|
print "Sampling time [" . $sampling_time . "]: ";
|
||||||
|
chomp($input = <STDIN>);
|
||||||
|
$sampling_time = $input || $sampling_time;
|
||||||
|
|
||||||
|
print "Sampling pid [" . $sampling_pid . "]: ";
|
||||||
|
chomp($input = <STDIN>);
|
||||||
|
$sampling_pid = $input || $sampling_pid;
|
||||||
|
|
||||||
|
print "Are these settings correct?: [" . $is_ok . "]\n";
|
||||||
|
print "HZ = " . $sampling_hz . "\nTime = " . $sampling_time . "\nPID = " . $sampling_pid . "\n";
|
||||||
|
chomp($input = <STDIN>);
|
||||||
|
$is_ok = $input || $is_ok;
|
||||||
|
} while ($is_ok eq 'n');
|
||||||
|
}
|
||||||
|
|
||||||
|
sub dtrace_probe_profiling {
|
||||||
|
if ($sampling_type eq 0) {
|
||||||
|
return "
|
||||||
|
profile-".$sampling_hz." /pid == ".$sampling_pid." && arg0/ {
|
||||||
|
@[stack(100)] = count();
|
||||||
|
}
|
||||||
|
profile-".$sampling_hz." /pid == ".$sampling_pid." && arg1/ {
|
||||||
|
@[ustack(100)] = count();
|
||||||
|
}
|
||||||
|
tick-".$sampling_time."s {
|
||||||
|
exit(0);
|
||||||
|
}";
|
||||||
|
} elsif ($sampling_type eq 1) {
|
||||||
|
return "
|
||||||
|
syscall:::entry /pid == ".$sampling_pid."/ {
|
||||||
|
\@traces[ustack(100)] = count();
|
||||||
|
}
|
||||||
|
tick-".$sampling_time."s {
|
||||||
|
exit(0);
|
||||||
|
}";
|
||||||
|
} elsif ($sampling_type eq 2) {
|
||||||
|
return "
|
||||||
|
profile-".$sampling_hz." /pid == ".$sampling_pid." && arg0/ {
|
||||||
|
@[stringof(curthread->td_name), stack(100)] = count();
|
||||||
|
}
|
||||||
|
profile-".$sampling_hz." /pid == ".$sampling_pid." && arg1/ {
|
||||||
|
@[stringof(curthread->td_name), ustack(100)] = count();
|
||||||
|
}
|
||||||
|
tick-".$sampling_time."s {
|
||||||
|
exit(0);
|
||||||
|
}";
|
||||||
|
} elsif ($sampling_type eq 3) {
|
||||||
|
return "
|
||||||
|
io::start /pid == ".$sampling_pid."/ {
|
||||||
|
@[ustack(100)] = count();
|
||||||
|
}
|
||||||
|
tick-".$sampling_time."s {
|
||||||
|
exit(0);
|
||||||
|
}";
|
||||||
|
} else {
|
||||||
|
die "idk";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
sub dtrace_generate {
|
||||||
|
my @date = (localtime(time))[5, 4, 3, 2, 1, 0];
|
||||||
|
$date[0] += 1900;
|
||||||
|
$date[1]++;
|
||||||
|
my $fmt_date = sprintf "%4d-%02d-%02d_%02d-%02d-%02d", @date;
|
||||||
|
my $trace_dir = "dtrace-out";
|
||||||
|
my $trace_file = $trace_dir . "/" . $fmt_date . ".user_stacks";
|
||||||
|
my $trace_fold = $trace_dir . "/" . $fmt_date . ".fold";
|
||||||
|
my $trace_svg = $trace_dir . "/" . $fmt_date . ".svg";
|
||||||
|
my $trace_probe = dtrace_probe_profiling;
|
||||||
|
|
||||||
|
print $trace_probe . "\n";
|
||||||
|
system "sudo", "dtrace", "-Z", "-n", $trace_probe, "-o", $trace_file;
|
||||||
|
die "$!" if $?;
|
||||||
|
|
||||||
|
open (my $trace_fold_handle, ">", $trace_fold) or die "$!";
|
||||||
|
#run ["perl", "../FlameGraph/stackcollapse.pl", $trace_file], ">", \my $fold_output;
|
||||||
|
my $fold_output = `perl ../FlameGraph/stackcollapse.pl $trace_file`;
|
||||||
|
print $trace_fold_handle $fold_output;
|
||||||
|
|
||||||
|
open (my $trace_svg_handle, ">", $trace_svg) or die "$!";
|
||||||
|
#run ["perl", "../FlameGraph/flamegraph.pl", $trace_fold], ">", \my $svg_output;
|
||||||
|
my $svg_output = `perl ../FlameGraph/flamegraph.pl $trace_fold`;
|
||||||
|
print $trace_svg_handle $svg_output;
|
||||||
|
|
||||||
|
system "sudo", "chmod", "0666", $trace_file;
|
||||||
|
}
|
||||||
|
|
||||||
|
foreach my $i (0 .. $#ARGV) {
|
||||||
|
if ($ARGV[$i] eq '-h') {
|
||||||
|
print "Usage: $0\n";
|
||||||
|
printf "%-20s%s\n", "-p", "Prompt for parameters";
|
||||||
|
printf "%-20s%s\n", "-g", "Generate dtrace output";
|
||||||
|
printf "%-20s%s\n", "-s", "Continously generate output until Ctrl^C";
|
||||||
|
printf "%-20s%s\n", "-<n>", "Select dtrace type";
|
||||||
|
} elsif ($ARGV[$i] eq '-g') {
|
||||||
|
dtrace_generate;
|
||||||
|
} elsif ($ARGV[$i] eq '-s') {
|
||||||
|
while (1) {
|
||||||
|
dtrace_generate;
|
||||||
|
}
|
||||||
|
} elsif ($ARGV[$i] eq '-p') {
|
||||||
|
dtrace_ask_params;
|
||||||
|
} else {
|
||||||
|
$sampling_type = substr $ARGV[$i], 1;
|
||||||
|
print "Select: ".$sampling_type."\n";
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,42 +0,0 @@
|
||||||
#!/usr/local/bin/bash -ex
|
|
||||||
# SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
|
||||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
|
||||||
# Basic script to run dtrace sampling over the program (requires Flamegraph)
|
|
||||||
# Usage is either running as: ./dtrace-tool.sh pid (then input the pid of the process)
|
|
||||||
# Or just run directly with: ./dtrace-tool.sh <command>
|
|
||||||
FLAMEGRAPH_DIR=".."
|
|
||||||
function fail {
|
|
||||||
printf '%s\n' "$1" >&2
|
|
||||||
exit "${2-1}"
|
|
||||||
}
|
|
||||||
[ -f $FLAMEGRAPH_DIR/FlameGraph/stackcollapse.pl ] || fail 'Where is flamegraph?'
|
|
||||||
#[ which dtrace ] || fail 'Needs DTrace installed'
|
|
||||||
read -p "Sampling Hz [800]: " TRACE_CFG_HZ
|
|
||||||
if [ -z "${TRACE_CFG_HZ}" ]; then
|
|
||||||
TRACE_CFG_HZ=800
|
|
||||||
fi
|
|
||||||
read -p "Sampling time [5] sec: " TRACE_CFG_TIME
|
|
||||||
if [ -z "${TRACE_CFG_TIME}" ]; then
|
|
||||||
TRACE_CFG_TIME=5
|
|
||||||
fi
|
|
||||||
TRACE_FILE=dtrace-out.user_stacks
|
|
||||||
TRACE_FOLD=dtrace-out.fold
|
|
||||||
TRACE_SVG=dtrace-out.svg
|
|
||||||
ps
|
|
||||||
if [[ $1 = 'pid' ]]; then
|
|
||||||
read -p "PID: " TRACE_CFG_PID
|
|
||||||
sudo echo 'Sudo!'
|
|
||||||
else
|
|
||||||
[[ -f $1 && $1 ]] || fail 'Usage: ./tools/dtrace-profile.sh <path to program>'
|
|
||||||
echo "Executing: '$@'"
|
|
||||||
sudo echo 'Sudo!'
|
|
||||||
"$@" &
|
|
||||||
TRACE_CFG_PID=$!
|
|
||||||
fi
|
|
||||||
TRACE_PROBE="profile-${TRACE_CFG_HZ} /pid == ${TRACE_CFG_PID} && arg1/ { @[ustack()] = count(); } tick-${TRACE_CFG_TIME}s { exit(0); }"
|
|
||||||
rm -- $TRACE_SVG || echo 'Skip'
|
|
||||||
sudo dtrace -x ustackframes=100 -Z -n "$TRACE_PROBE" -o $TRACE_FILE 2>/dev/null || exit
|
|
||||||
perl $FLAMEGRAPH_DIR/FlameGraph/stackcollapse.pl $TRACE_FILE > $TRACE_FOLD || exit
|
|
||||||
perl $FLAMEGRAPH_DIR/FlameGraph/flamegraph.pl $TRACE_FOLD > $TRACE_SVG || exit
|
|
||||||
sudo chmod 0666 $TRACE_FILE
|
|
||||||
rm -- $TRACE_FILE $TRACE_FOLD
|
|
Loading…
Add table
Add a link
Reference in a new issue