Compare commits
31 commits
daa2a06d76
...
6c448cb1c8
Author | SHA1 | Date | |
---|---|---|---|
6c448cb1c8 | |||
a31ba9555d | |||
ab869b9665 | |||
1e6b253b18 | |||
5db0a75611 | |||
70cb54197a | |||
9beda23abf | |||
2689ff9968 | |||
e9f2b025fc | |||
1858b9e452 | |||
6d91740e11 | |||
5db5a625be | |||
7a4ad4c941 | |||
02136d9347 | |||
9c15e7ffc1 | |||
cc1c2e5d54 | |||
5f6ea24db3 | |||
afac19c011 | |||
33b8d02c20 | |||
3ea30a4b34 | |||
308477a732 | |||
be8a316389 | |||
be93123808 | |||
a69b746edb | |||
cae7aa760c | |||
a31042a94c | |||
0dc61683f0 | |||
e69906032e | |||
6b413719ac | |||
d4427d90ea | |||
aec7f19b7e |
81 changed files with 2306 additions and 3122 deletions
|
@ -210,12 +210,9 @@ std::shared_ptr<Dynarmic::A32::Jit> ArmDynarmic32::MakeJit(Common::PageTable* pa
|
|||
config.wall_clock_cntpct = m_uses_wall_clock;
|
||||
config.enable_cycle_counting = !m_uses_wall_clock;
|
||||
|
||||
// Code cache size
|
||||
#ifdef ARCHITECTURE_arm64
|
||||
// Code cache size - max in ARM is 128MiB, max in x86_64 is 2GiB
|
||||
// Solaris doesn't support kPageSize >= 512MiB
|
||||
config.code_cache_size = std::uint32_t(128_MiB);
|
||||
#else
|
||||
config.code_cache_size = std::uint32_t(512_MiB);
|
||||
#endif
|
||||
|
||||
// Allow memory fault handling to work
|
||||
if (m_system.DebuggerEnabled()) {
|
||||
|
|
|
@ -269,12 +269,9 @@ std::shared_ptr<Dynarmic::A64::Jit> ArmDynarmic64::MakeJit(Common::PageTable* pa
|
|||
config.wall_clock_cntpct = m_uses_wall_clock;
|
||||
config.enable_cycle_counting = !m_uses_wall_clock;
|
||||
|
||||
// Code cache size
|
||||
#ifdef ARCHITECTURE_arm64
|
||||
// Code cache size - max in ARM is 128MiB, max in x86_64 is 2GiB
|
||||
// Solaris doesn't support kPageSize >= 512MiB
|
||||
config.code_cache_size = std::uint32_t(128_MiB);
|
||||
#else
|
||||
config.code_cache_size = std::uint32_t(512_MiB);
|
||||
#endif
|
||||
|
||||
// Allow memory fault handling to work
|
||||
if (m_system.DebuggerEnabled()) {
|
||||
|
|
|
@ -45,13 +45,6 @@ constexpr u64 CURRENT_CRYPTO_REVISION = 0x5;
|
|||
|
||||
using Common::AsArray;
|
||||
|
||||
// clang-format off
|
||||
constexpr std::array eticket_source_hashes{
|
||||
AsArray("B71DB271DC338DF380AA2C4335EF8873B1AFD408E80B3582D8719FC81C5E511C"), // eticket_rsa_kek_source
|
||||
AsArray("E8965A187D30E57869F562D04383C996DE487BBA5761363D2D4D32391866A85C"), // eticket_rsa_kekek_source
|
||||
};
|
||||
// clang-format on
|
||||
|
||||
constexpr std::array<std::pair<std::string_view, KeyIndex<S128KeyType>>, 30> s128_file_id{{
|
||||
{"eticket_rsa_kek", {S128KeyType::ETicketRSAKek, 0, 0}},
|
||||
{"eticket_rsa_kek_source",
|
||||
|
@ -1117,81 +1110,25 @@ void KeyManager::DeriveBase() {
|
|||
|
||||
void KeyManager::DeriveETicket(PartitionDataManager& data,
|
||||
const FileSys::ContentProvider& provider) {
|
||||
// ETicket keys
|
||||
const auto es = provider.GetEntry(0x0100000000000033, FileSys::ContentRecordType::Program);
|
||||
|
||||
if (es == nullptr) {
|
||||
// The emulator no longer derives the ETicket RSA Kek.
|
||||
// It is now required for the user to provide this key in their keys file.
|
||||
if (!HasKey(S128KeyType::ETicketRSAKek)) {
|
||||
LOG_WARNING(Crypto, "ETicket RSA Kek not found, skipping eTicket parsing.");
|
||||
return;
|
||||
}
|
||||
|
||||
const auto exefs = es->GetExeFS();
|
||||
if (exefs == nullptr) {
|
||||
return;
|
||||
}
|
||||
|
||||
const auto main = exefs->GetFile("main");
|
||||
if (main == nullptr) {
|
||||
return;
|
||||
}
|
||||
|
||||
const auto bytes = main->ReadAllBytes();
|
||||
|
||||
const auto eticket_kek = FindKeyFromHex16(bytes, eticket_source_hashes[0]);
|
||||
const auto eticket_kekek = FindKeyFromHex16(bytes, eticket_source_hashes[1]);
|
||||
|
||||
const auto seed3 = data.GetRSAKekSeed3();
|
||||
const auto mask0 = data.GetRSAKekMask0();
|
||||
|
||||
if (eticket_kek != Key128{}) {
|
||||
SetKey(S128KeyType::Source, eticket_kek, static_cast<size_t>(SourceKeyType::ETicketKek));
|
||||
}
|
||||
if (eticket_kekek != Key128{}) {
|
||||
SetKey(S128KeyType::Source, eticket_kekek,
|
||||
static_cast<size_t>(SourceKeyType::ETicketKekek));
|
||||
}
|
||||
if (seed3 != Key128{}) {
|
||||
SetKey(S128KeyType::RSAKek, seed3, static_cast<size_t>(RSAKekType::Seed3));
|
||||
}
|
||||
if (mask0 != Key128{}) {
|
||||
SetKey(S128KeyType::RSAKek, mask0, static_cast<size_t>(RSAKekType::Mask0));
|
||||
}
|
||||
if (eticket_kek == Key128{} || eticket_kekek == Key128{} || seed3 == Key128{} ||
|
||||
mask0 == Key128{}) {
|
||||
return;
|
||||
}
|
||||
|
||||
const Key128 rsa_oaep_kek = seed3 ^ mask0;
|
||||
if (rsa_oaep_kek == Key128{}) {
|
||||
return;
|
||||
}
|
||||
|
||||
SetKey(S128KeyType::Source, rsa_oaep_kek,
|
||||
static_cast<u64>(SourceKeyType::RSAOaepKekGeneration));
|
||||
|
||||
Key128 temp_kek{};
|
||||
Key128 temp_kekek{};
|
||||
Key128 eticket_final{};
|
||||
|
||||
// Derive ETicket RSA Kek
|
||||
AESCipher<Key128> es_master(GetKey(S128KeyType::Master), Mode::ECB);
|
||||
es_master.Transcode(rsa_oaep_kek.data(), rsa_oaep_kek.size(), temp_kek.data(), Op::Decrypt);
|
||||
AESCipher<Key128> es_kekek(temp_kek, Mode::ECB);
|
||||
es_kekek.Transcode(eticket_kekek.data(), eticket_kekek.size(), temp_kekek.data(), Op::Decrypt);
|
||||
AESCipher<Key128> es_kek(temp_kekek, Mode::ECB);
|
||||
es_kek.Transcode(eticket_kek.data(), eticket_kek.size(), eticket_final.data(), Op::Decrypt);
|
||||
|
||||
if (eticket_final == Key128{}) {
|
||||
return;
|
||||
}
|
||||
|
||||
SetKey(S128KeyType::ETicketRSAKek, eticket_final);
|
||||
|
||||
// Titlekeys
|
||||
// Decrypt PRODINFO to get the extended kek needed for the RSA keypair.
|
||||
data.DecryptProdInfo(GetBISKey(0));
|
||||
|
||||
// The extended kek is read from the decrypted PRODINFO.
|
||||
eticket_extended_kek = data.GetETicketExtendedKek();
|
||||
WriteKeyToFile(KeyCategory::Console, "eticket_extended_kek", eticket_extended_kek);
|
||||
|
||||
// Derive the final RSA keypair using the user-provided ETicketRSAKek
|
||||
// and the extended kek from PRODINFO.
|
||||
DeriveETicketRSAKey();
|
||||
|
||||
// Load personalized tickets from the NAND.
|
||||
PopulateTickets();
|
||||
}
|
||||
|
||||
|
@ -1272,22 +1209,6 @@ void KeyManager::PopulateFromPartitionData(PartitionDataManager& data) {
|
|||
encrypted_keyblobs[i]);
|
||||
}
|
||||
|
||||
SetKeyWrapped(S128KeyType::Source, data.GetPackage2KeySource(),
|
||||
static_cast<u64>(SourceKeyType::Package2));
|
||||
SetKeyWrapped(S128KeyType::Source, data.GetAESKekGenerationSource(),
|
||||
static_cast<u64>(SourceKeyType::AESKekGeneration));
|
||||
SetKeyWrapped(S128KeyType::Source, data.GetTitlekekSource(),
|
||||
static_cast<u64>(SourceKeyType::Titlekek));
|
||||
SetKeyWrapped(S128KeyType::Source, data.GetMasterKeySource(),
|
||||
static_cast<u64>(SourceKeyType::Master));
|
||||
SetKeyWrapped(S128KeyType::Source, data.GetKeyblobMACKeySource(),
|
||||
static_cast<u64>(SourceKeyType::KeyblobMAC));
|
||||
|
||||
for (size_t i = 0; i < PartitionDataManager::MAX_KEYBLOB_SOURCE_HASH; ++i) {
|
||||
SetKeyWrapped(S128KeyType::Source, data.GetKeyblobKeySource(i),
|
||||
static_cast<u64>(SourceKeyType::Keyblob), i);
|
||||
}
|
||||
|
||||
if (data.HasFuses()) {
|
||||
SetKeyWrapped(S128KeyType::SecureBoot, data.GetSecureBootKey());
|
||||
}
|
||||
|
@ -1302,13 +1223,6 @@ void KeyManager::PopulateFromPartitionData(PartitionDataManager& data) {
|
|||
}
|
||||
}
|
||||
|
||||
const auto masters = data.GetTZMasterKeys(latest_master);
|
||||
for (size_t i = 0; i < masters.size(); ++i) {
|
||||
if (masters[i] != Key128{} && !HasKey(S128KeyType::Master, i)) {
|
||||
SetKey(S128KeyType::Master, masters[i], i);
|
||||
}
|
||||
}
|
||||
|
||||
DeriveBase();
|
||||
|
||||
if (!data.HasPackage2())
|
||||
|
@ -1322,27 +1236,6 @@ void KeyManager::PopulateFromPartitionData(PartitionDataManager& data) {
|
|||
}
|
||||
data.DecryptPackage2(package2_keys, Package2Type::NormalMain);
|
||||
|
||||
SetKeyWrapped(S128KeyType::Source, data.GetKeyAreaKeyApplicationSource(),
|
||||
static_cast<u64>(SourceKeyType::KeyAreaKey),
|
||||
static_cast<u64>(KeyAreaKeyType::Application));
|
||||
SetKeyWrapped(S128KeyType::Source, data.GetKeyAreaKeyOceanSource(),
|
||||
static_cast<u64>(SourceKeyType::KeyAreaKey),
|
||||
static_cast<u64>(KeyAreaKeyType::Ocean));
|
||||
SetKeyWrapped(S128KeyType::Source, data.GetKeyAreaKeySystemSource(),
|
||||
static_cast<u64>(SourceKeyType::KeyAreaKey),
|
||||
static_cast<u64>(KeyAreaKeyType::System));
|
||||
SetKeyWrapped(S128KeyType::Source, data.GetSDKekSource(),
|
||||
static_cast<u64>(SourceKeyType::SDKek));
|
||||
SetKeyWrapped(S256KeyType::SDKeySource, data.GetSDSaveKeySource(),
|
||||
static_cast<u64>(SDKeyType::Save));
|
||||
SetKeyWrapped(S256KeyType::SDKeySource, data.GetSDNCAKeySource(),
|
||||
static_cast<u64>(SDKeyType::NCA));
|
||||
SetKeyWrapped(S128KeyType::Source, data.GetHeaderKekSource(),
|
||||
static_cast<u64>(SourceKeyType::HeaderKek));
|
||||
SetKeyWrapped(S256KeyType::HeaderSource, data.GetHeaderKeySource());
|
||||
SetKeyWrapped(S128KeyType::Source, data.GetAESKeyGenerationSource(),
|
||||
static_cast<u64>(SourceKeyType::AESKeyGeneration));
|
||||
|
||||
DeriveBase();
|
||||
}
|
||||
|
||||
|
|
|
@ -4,11 +4,6 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
// NOTE TO FUTURE MAINTAINERS:
|
||||
// When a new version of switch cryptography is released,
|
||||
// hash the new keyblob source and master key and add the hashes to
|
||||
// the arrays below.
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <cctype>
|
||||
|
@ -49,178 +44,7 @@ struct Package2Header {
|
|||
};
|
||||
static_assert(sizeof(Package2Header) == 0x200, "Package2Header has incorrect size.");
|
||||
|
||||
// clang-format off
|
||||
constexpr std::array source_hashes{
|
||||
AsArray("B24BD293259DBC7AC5D63F88E60C59792498E6FC5443402C7FFE87EE8B61A3F0"), // keyblob_mac_key_source
|
||||
AsArray("7944862A3A5C31C6720595EFD302245ABD1B54CCDCF33000557681E65C5664A4"), // master_key_source
|
||||
AsArray("21E2DF100FC9E094DB51B47B9B1D6E94ED379DB8B547955BEF8FE08D8DD35603"), // package2_key_source
|
||||
AsArray("FC02B9D37B42D7A1452E71444F1F700311D1132E301A83B16062E72A78175085"), // aes_kek_generation_source
|
||||
AsArray("FBD10056999EDC7ACDB96098E47E2C3606230270D23281E671F0F389FC5BC585"), // aes_key_generation_source
|
||||
AsArray("C48B619827986C7F4E3081D59DB2B460C84312650E9A8E6B458E53E8CBCA4E87"), // titlekek_source
|
||||
AsArray("04AD66143C726B2A139FB6B21128B46F56C553B2B3887110304298D8D0092D9E"), // key_area_key_application_source
|
||||
AsArray("FD434000C8FF2B26F8E9A9D2D2C12F6BE5773CBB9DC86300E1BD99F8EA33A417"), // key_area_key_ocean_source
|
||||
AsArray("1F17B1FD51AD1C2379B58F152CA4912EC2106441E51722F38700D5937A1162F7"), // key_area_key_system_source
|
||||
AsArray("6B2ED877C2C52334AC51E59ABFA7EC457F4A7D01E46291E9F2EAA45F011D24B7"), // sd_card_kek_source
|
||||
AsArray("D482743563D3EA5DCDC3B74E97C9AC8A342164FA041A1DC80F17F6D31E4BC01C"), // sd_card_save_key_source
|
||||
AsArray("2E751CECF7D93A2B957BD5FFCB082FD038CC2853219DD3092C6DAB9838F5A7CC"), // sd_card_nca_key_source
|
||||
AsArray("1888CAED5551B3EDE01499E87CE0D86827F80820EFB275921055AA4E2ABDFFC2"), // header_kek_source
|
||||
AsArray("8F783E46852DF6BE0BA4E19273C4ADBAEE16380043E1B8C418C4089A8BD64AA6"), // header_key_source
|
||||
AsArray("D1757E52F1AE55FA882EC690BC6F954AC46A83DC22F277F8806BD55577C6EED7"), // rsa_kek_seed3
|
||||
AsArray("FC02B9D37B42D7A1452E71444F1F700311D1132E301A83B16062E72A78175085"), // rsa_kek_mask0
|
||||
};
|
||||
// clang-format on
|
||||
|
||||
// clang-format off
|
||||
constexpr std::array keyblob_source_hashes{
|
||||
AsArray("8A06FE274AC491436791FDB388BCDD3AB9943BD4DEF8094418CDAC150FD73786"), // keyblob_key_source_00
|
||||
AsArray("2D5CAEB2521FEF70B47E17D6D0F11F8CE2C1E442A979AD8035832C4E9FBCCC4B"), // keyblob_key_source_01
|
||||
AsArray("61C5005E713BAE780641683AF43E5F5C0E03671117F702F401282847D2FC6064"), // keyblob_key_source_02
|
||||
AsArray("8E9795928E1C4428E1B78F0BE724D7294D6934689C11B190943923B9D5B85903"), // keyblob_key_source_03
|
||||
AsArray("95FA33AF95AFF9D9B61D164655B32710ED8D615D46C7D6CC3CC70481B686B402"), // keyblob_key_source_04
|
||||
AsArray("3F5BE7B3C8B1ABD8C10B4B703D44766BA08730562C172A4FE0D6B866B3E2DB3E"), // keyblob_key_source_05
|
||||
AsArray("0000000000000000000000000000000000000000000000000000000000000000"), // keyblob_key_source_06
|
||||
AsArray("0000000000000000000000000000000000000000000000000000000000000000"), // keyblob_key_source_07
|
||||
|
||||
AsArray("0000000000000000000000000000000000000000000000000000000000000000"), // keyblob_key_source_08
|
||||
AsArray("0000000000000000000000000000000000000000000000000000000000000000"), // keyblob_key_source_09
|
||||
AsArray("0000000000000000000000000000000000000000000000000000000000000000"), // keyblob_key_source_0A
|
||||
AsArray("0000000000000000000000000000000000000000000000000000000000000000"), // keyblob_key_source_0B
|
||||
AsArray("0000000000000000000000000000000000000000000000000000000000000000"), // keyblob_key_source_0C
|
||||
AsArray("0000000000000000000000000000000000000000000000000000000000000000"), // keyblob_key_source_0D
|
||||
AsArray("0000000000000000000000000000000000000000000000000000000000000000"), // keyblob_key_source_0E
|
||||
AsArray("0000000000000000000000000000000000000000000000000000000000000000"), // keyblob_key_source_0F
|
||||
|
||||
AsArray("0000000000000000000000000000000000000000000000000000000000000000"), // keyblob_key_source_10
|
||||
AsArray("0000000000000000000000000000000000000000000000000000000000000000"), // keyblob_key_source_11
|
||||
AsArray("0000000000000000000000000000000000000000000000000000000000000000"), // keyblob_key_source_12
|
||||
AsArray("0000000000000000000000000000000000000000000000000000000000000000"), // keyblob_key_source_13
|
||||
AsArray("0000000000000000000000000000000000000000000000000000000000000000"), // keyblob_key_source_14
|
||||
AsArray("0000000000000000000000000000000000000000000000000000000000000000"), // keyblob_key_source_15
|
||||
AsArray("0000000000000000000000000000000000000000000000000000000000000000"), // keyblob_key_source_16
|
||||
AsArray("0000000000000000000000000000000000000000000000000000000000000000"), // keyblob_key_source_17
|
||||
|
||||
AsArray("0000000000000000000000000000000000000000000000000000000000000000"), // keyblob_key_source_18
|
||||
AsArray("0000000000000000000000000000000000000000000000000000000000000000"), // keyblob_key_source_19
|
||||
AsArray("0000000000000000000000000000000000000000000000000000000000000000"), // keyblob_key_source_1A
|
||||
AsArray("0000000000000000000000000000000000000000000000000000000000000000"), // keyblob_key_source_1B
|
||||
AsArray("0000000000000000000000000000000000000000000000000000000000000000"), // keyblob_key_source_1C
|
||||
AsArray("0000000000000000000000000000000000000000000000000000000000000000"), // keyblob_key_source_1D
|
||||
AsArray("0000000000000000000000000000000000000000000000000000000000000000"), // keyblob_key_source_1E
|
||||
AsArray("0000000000000000000000000000000000000000000000000000000000000000"), // keyblob_key_source_1F
|
||||
};
|
||||
// clang-format on
|
||||
|
||||
// clang-format off
|
||||
constexpr std::array master_key_hashes{
|
||||
AsArray("0EE359BE3C864BB0782E1D70A718A0342C551EED28C369754F9C4F691BECF7CA"), // master_key_00
|
||||
AsArray("4FE707B7E4ABDAF727C894AAF13B1351BFE2AC90D875F73B2E20FA94B9CC661E"), // master_key_01
|
||||
AsArray("79277C0237A2252EC3DFAC1F7C359C2B3D121E9DB15BB9AB4C2B4408D2F3AE09"), // master_key_02
|
||||
AsArray("4F36C565D13325F65EE134073C6A578FFCB0008E02D69400836844EAB7432754"), // master_key_03
|
||||
AsArray("75FF1D95D26113550EE6FCC20ACB58E97EDEB3A2FF52543ED5AEC63BDCC3DA50"), // master_key_04
|
||||
AsArray("EBE2BCD6704673EC0F88A187BB2AD9F1CC82B718C389425941BDC194DC46B0DD"), // master_key_05
|
||||
AsArray("9497E6779F5D840F2BBA1DE4E95BA1D6F21EFC94717D5AE5CA37D7EC5BD37A19"), // master_key_06
|
||||
AsArray("4EC96B8CB01B8DCE382149443430B2B6EBCB2983348AFA04A25E53609DABEDF6"), // master_key_07
|
||||
|
||||
AsArray("2998E2E23609BC2675FF062A2D64AF5B1B78DFF463B24119D64A1B64F01B2D51"), // master_key_08
|
||||
AsArray("9D486A98067C44B37CF173D3BF577891EB6081FF6B4A166347D9DBBF7025076B"), // master_key_09
|
||||
AsArray("4EC5A237A75A083A9C5F6CF615601522A7F822D06BD4BA32612C9CEBBB29BD45"), // master_key_0A
|
||||
AsArray("0000000000000000000000000000000000000000000000000000000000000000"), // master_key_0B
|
||||
AsArray("0000000000000000000000000000000000000000000000000000000000000000"), // master_key_0C
|
||||
AsArray("0000000000000000000000000000000000000000000000000000000000000000"), // master_key_0D
|
||||
AsArray("0000000000000000000000000000000000000000000000000000000000000000"), // master_key_0E
|
||||
AsArray("0000000000000000000000000000000000000000000000000000000000000000"), // master_key_0F
|
||||
|
||||
AsArray("0000000000000000000000000000000000000000000000000000000000000000"), // master_key_10
|
||||
AsArray("0000000000000000000000000000000000000000000000000000000000000000"), // master_key_11
|
||||
AsArray("0000000000000000000000000000000000000000000000000000000000000000"), // master_key_12
|
||||
AsArray("0000000000000000000000000000000000000000000000000000000000000000"), // master_key_13
|
||||
AsArray("0000000000000000000000000000000000000000000000000000000000000000"), // master_key_14
|
||||
AsArray("0000000000000000000000000000000000000000000000000000000000000000"), // master_key_15
|
||||
AsArray("0000000000000000000000000000000000000000000000000000000000000000"), // master_key_16
|
||||
AsArray("0000000000000000000000000000000000000000000000000000000000000000"), // master_key_17
|
||||
|
||||
AsArray("0000000000000000000000000000000000000000000000000000000000000000"), // master_key_18
|
||||
AsArray("0000000000000000000000000000000000000000000000000000000000000000"), // master_key_19
|
||||
AsArray("0000000000000000000000000000000000000000000000000000000000000000"), // master_key_1A
|
||||
AsArray("0000000000000000000000000000000000000000000000000000000000000000"), // master_key_1B
|
||||
AsArray("0000000000000000000000000000000000000000000000000000000000000000"), // master_key_1C
|
||||
AsArray("0000000000000000000000000000000000000000000000000000000000000000"), // master_key_1D
|
||||
AsArray("0000000000000000000000000000000000000000000000000000000000000000"), // master_key_1E
|
||||
AsArray("0000000000000000000000000000000000000000000000000000000000000000"), // master_key_1F
|
||||
};
|
||||
// clang-format on
|
||||
|
||||
static constexpr u8 CalculateMaxKeyblobSourceHash() {
|
||||
const auto is_zero = [](const auto& data) {
|
||||
// TODO: Replace with std::all_of whenever mingw decides to update their
|
||||
// libraries to include the constexpr variant of it.
|
||||
for (const auto element : data) {
|
||||
if (element != 0) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
};
|
||||
|
||||
for (s8 i = 0x1F; i >= 0; --i) {
|
||||
if (!is_zero(keyblob_source_hashes[i])) {
|
||||
return static_cast<u8>(i + 1);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
const u8 PartitionDataManager::MAX_KEYBLOB_SOURCE_HASH = CalculateMaxKeyblobSourceHash();
|
||||
|
||||
template <size_t key_size = 0x10>
|
||||
std::array<u8, key_size> FindKeyFromHex(const std::vector<u8>& binary,
|
||||
const std::array<u8, 0x20>& hash) {
|
||||
if (binary.size() < key_size)
|
||||
return {};
|
||||
|
||||
std::array<u8, 0x20> temp{};
|
||||
for (size_t i = 0; i < binary.size() - key_size; ++i) {
|
||||
mbedtls_sha256(binary.data() + i, key_size, temp.data(), 0);
|
||||
|
||||
if (temp != hash)
|
||||
continue;
|
||||
|
||||
std::array<u8, key_size> out{};
|
||||
std::memcpy(out.data(), binary.data() + i, key_size);
|
||||
return out;
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
std::array<u8, 16> FindKeyFromHex16(const std::vector<u8>& binary, std::array<u8, 32> hash) {
|
||||
return FindKeyFromHex<0x10>(binary, hash);
|
||||
}
|
||||
|
||||
static std::array<Key128, 0x20> FindEncryptedMasterKeyFromHex(const std::vector<u8>& binary,
|
||||
const Key128& key) {
|
||||
if (binary.size() < 0x10)
|
||||
return {};
|
||||
|
||||
SHA256Hash temp{};
|
||||
Key128 dec_temp{};
|
||||
std::array<Key128, 0x20> out{};
|
||||
AESCipher<Key128> cipher(key, Mode::ECB);
|
||||
for (size_t i = 0; i < binary.size() - 0x10; ++i) {
|
||||
cipher.Transcode(binary.data() + i, dec_temp.size(), dec_temp.data(), Op::Decrypt);
|
||||
mbedtls_sha256(dec_temp.data(), dec_temp.size(), temp.data(), 0);
|
||||
|
||||
for (size_t k = 0; k < out.size(); ++k) {
|
||||
if (temp == master_key_hashes[k]) {
|
||||
out[k] = dec_temp;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return out;
|
||||
}
|
||||
const u8 PartitionDataManager::MAX_KEYBLOB_SOURCE_HASH = 32;
|
||||
|
||||
static FileSys::VirtualFile FindFileInDirWithNames(const FileSys::VirtualDir& dir,
|
||||
const std::string& name) {
|
||||
|
@ -287,52 +111,10 @@ std::vector<u8> PartitionDataManager::GetSecureMonitor() const {
|
|||
return secure_monitor_bytes;
|
||||
}
|
||||
|
||||
std::array<u8, 16> PartitionDataManager::GetPackage2KeySource() const {
|
||||
return FindKeyFromHex(secure_monitor_bytes, source_hashes[2]);
|
||||
}
|
||||
|
||||
std::array<u8, 16> PartitionDataManager::GetAESKekGenerationSource() const {
|
||||
return FindKeyFromHex(secure_monitor_bytes, source_hashes[3]);
|
||||
}
|
||||
|
||||
std::array<u8, 16> PartitionDataManager::GetTitlekekSource() const {
|
||||
return FindKeyFromHex(secure_monitor_bytes, source_hashes[5]);
|
||||
}
|
||||
|
||||
std::array<std::array<u8, 16>, 32> PartitionDataManager::GetTZMasterKeys(
|
||||
std::array<u8, 0x10> master_key) const {
|
||||
return FindEncryptedMasterKeyFromHex(secure_monitor_bytes, master_key);
|
||||
}
|
||||
|
||||
std::array<u8, 16> PartitionDataManager::GetRSAKekSeed3() const {
|
||||
return FindKeyFromHex(secure_monitor_bytes, source_hashes[14]);
|
||||
}
|
||||
|
||||
std::array<u8, 16> PartitionDataManager::GetRSAKekMask0() const {
|
||||
return FindKeyFromHex(secure_monitor_bytes, source_hashes[15]);
|
||||
}
|
||||
|
||||
std::vector<u8> PartitionDataManager::GetPackage1Decrypted() const {
|
||||
return package1_decrypted_bytes;
|
||||
}
|
||||
|
||||
std::array<u8, 16> PartitionDataManager::GetMasterKeySource() const {
|
||||
return FindKeyFromHex(package1_decrypted_bytes, source_hashes[1]);
|
||||
}
|
||||
|
||||
std::array<u8, 16> PartitionDataManager::GetKeyblobMACKeySource() const {
|
||||
return FindKeyFromHex(package1_decrypted_bytes, source_hashes[0]);
|
||||
}
|
||||
|
||||
std::array<u8, 16> PartitionDataManager::GetKeyblobKeySource(std::size_t revision) const {
|
||||
if (keyblob_source_hashes[revision] == SHA256Hash{}) {
|
||||
LOG_WARNING(Crypto,
|
||||
"No keyblob source hash for crypto revision {:02X}! Cannot derive keys...",
|
||||
revision);
|
||||
}
|
||||
return FindKeyFromHex(package1_decrypted_bytes, keyblob_source_hashes[revision]);
|
||||
}
|
||||
|
||||
bool PartitionDataManager::HasFuses() const {
|
||||
return fuses != nullptr;
|
||||
}
|
||||
|
@ -444,46 +226,10 @@ const std::vector<u8>& PartitionDataManager::GetPackage2FSDecompressed(Package2T
|
|||
return package2_fs.at(static_cast<size_t>(type));
|
||||
}
|
||||
|
||||
std::array<u8, 16> PartitionDataManager::GetKeyAreaKeyApplicationSource(Package2Type type) const {
|
||||
return FindKeyFromHex(package2_fs.at(static_cast<size_t>(type)), source_hashes[6]);
|
||||
}
|
||||
|
||||
std::array<u8, 16> PartitionDataManager::GetKeyAreaKeyOceanSource(Package2Type type) const {
|
||||
return FindKeyFromHex(package2_fs.at(static_cast<size_t>(type)), source_hashes[7]);
|
||||
}
|
||||
|
||||
std::array<u8, 16> PartitionDataManager::GetKeyAreaKeySystemSource(Package2Type type) const {
|
||||
return FindKeyFromHex(package2_fs.at(static_cast<size_t>(type)), source_hashes[8]);
|
||||
}
|
||||
|
||||
std::array<u8, 16> PartitionDataManager::GetSDKekSource(Package2Type type) const {
|
||||
return FindKeyFromHex(package2_fs.at(static_cast<size_t>(type)), source_hashes[9]);
|
||||
}
|
||||
|
||||
std::array<u8, 32> PartitionDataManager::GetSDSaveKeySource(Package2Type type) const {
|
||||
return FindKeyFromHex<0x20>(package2_fs.at(static_cast<size_t>(type)), source_hashes[10]);
|
||||
}
|
||||
|
||||
std::array<u8, 32> PartitionDataManager::GetSDNCAKeySource(Package2Type type) const {
|
||||
return FindKeyFromHex<0x20>(package2_fs.at(static_cast<size_t>(type)), source_hashes[11]);
|
||||
}
|
||||
|
||||
std::array<u8, 16> PartitionDataManager::GetHeaderKekSource(Package2Type type) const {
|
||||
return FindKeyFromHex(package2_fs.at(static_cast<size_t>(type)), source_hashes[12]);
|
||||
}
|
||||
|
||||
std::array<u8, 32> PartitionDataManager::GetHeaderKeySource(Package2Type type) const {
|
||||
return FindKeyFromHex<0x20>(package2_fs.at(static_cast<size_t>(type)), source_hashes[13]);
|
||||
}
|
||||
|
||||
const std::vector<u8>& PartitionDataManager::GetPackage2SPLDecompressed(Package2Type type) const {
|
||||
return package2_spl.at(static_cast<size_t>(type));
|
||||
}
|
||||
|
||||
std::array<u8, 16> PartitionDataManager::GetAESKeyGenerationSource(Package2Type type) const {
|
||||
return FindKeyFromHex(package2_spl.at(static_cast<size_t>(type)), source_hashes[4]);
|
||||
}
|
||||
|
||||
bool PartitionDataManager::HasProdInfo() const {
|
||||
return prodinfo != nullptr;
|
||||
}
|
||||
|
@ -509,4 +255,4 @@ std::array<u8, 576> PartitionDataManager::GetETicketExtendedKek() const {
|
|||
prodinfo_decrypted->Read(out.data(), out.size(), 0x3890);
|
||||
return out;
|
||||
}
|
||||
} // namespace Core::Crypto
|
||||
} // namespace Core::Crypto
|
|
@ -1,3 +1,6 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
|
@ -36,16 +39,7 @@ public:
|
|||
EncryptedKeyBlob GetEncryptedKeyblob(std::size_t index) const;
|
||||
EncryptedKeyBlobs GetEncryptedKeyblobs() const;
|
||||
std::vector<u8> GetSecureMonitor() const;
|
||||
std::array<u8, 0x10> GetPackage2KeySource() const;
|
||||
std::array<u8, 0x10> GetAESKekGenerationSource() const;
|
||||
std::array<u8, 0x10> GetTitlekekSource() const;
|
||||
std::array<std::array<u8, 0x10>, 0x20> GetTZMasterKeys(std::array<u8, 0x10> master_key) const;
|
||||
std::array<u8, 0x10> GetRSAKekSeed3() const;
|
||||
std::array<u8, 0x10> GetRSAKekMask0() const;
|
||||
std::vector<u8> GetPackage1Decrypted() const;
|
||||
std::array<u8, 0x10> GetMasterKeySource() const;
|
||||
std::array<u8, 0x10> GetKeyblobMACKeySource() const;
|
||||
std::array<u8, 0x10> GetKeyblobKeySource(std::size_t revision) const;
|
||||
|
||||
// Fuses
|
||||
bool HasFuses() const;
|
||||
|
@ -63,21 +57,8 @@ public:
|
|||
Package2Type type);
|
||||
const std::vector<u8>& GetPackage2FSDecompressed(
|
||||
Package2Type type = Package2Type::NormalMain) const;
|
||||
std::array<u8, 0x10> GetKeyAreaKeyApplicationSource(
|
||||
Package2Type type = Package2Type::NormalMain) const;
|
||||
std::array<u8, 0x10> GetKeyAreaKeyOceanSource(
|
||||
Package2Type type = Package2Type::NormalMain) const;
|
||||
std::array<u8, 0x10> GetKeyAreaKeySystemSource(
|
||||
Package2Type type = Package2Type::NormalMain) const;
|
||||
std::array<u8, 0x10> GetSDKekSource(Package2Type type = Package2Type::NormalMain) const;
|
||||
std::array<u8, 0x20> GetSDSaveKeySource(Package2Type type = Package2Type::NormalMain) const;
|
||||
std::array<u8, 0x20> GetSDNCAKeySource(Package2Type type = Package2Type::NormalMain) const;
|
||||
std::array<u8, 0x10> GetHeaderKekSource(Package2Type type = Package2Type::NormalMain) const;
|
||||
std::array<u8, 0x20> GetHeaderKeySource(Package2Type type = Package2Type::NormalMain) const;
|
||||
const std::vector<u8>& GetPackage2SPLDecompressed(
|
||||
Package2Type type = Package2Type::NormalMain) const;
|
||||
std::array<u8, 0x10> GetAESKeyGenerationSource(
|
||||
Package2Type type = Package2Type::NormalMain) const;
|
||||
|
||||
// PRODINFO
|
||||
bool HasProdInfo() const;
|
||||
|
@ -104,6 +85,4 @@ private:
|
|||
std::array<std::vector<u8>, 6> package2_spl;
|
||||
};
|
||||
|
||||
std::array<u8, 0x10> FindKeyFromHex16(const std::vector<u8>& binary, std::array<u8, 0x20> hash);
|
||||
|
||||
} // namespace Core::Crypto
|
||||
} // namespace Core::Crypto
|
|
@ -39,9 +39,6 @@ option(DYNARMIC_INSTALL "Install dynarmic headers and CMake files" OFF)
|
|||
option(DYNARMIC_USE_BUNDLED_EXTERNALS "Use all bundled externals (useful when e.g. cross-compiling)" OFF)
|
||||
option(DYNARMIC_WARNINGS_AS_ERRORS "Warnings as errors" ${MASTER_PROJECT})
|
||||
option(DYNARMIC_ENABLE_LTO "Enable LTO" OFF)
|
||||
if (NOT DEFINED DYNARMIC_FRONTENDS)
|
||||
set(DYNARMIC_FRONTENDS "A32;A64" CACHE STRING "Selects which frontends to enable")
|
||||
endif()
|
||||
|
||||
# Default to a Release build
|
||||
if (NOT CMAKE_BUILD_TYPE)
|
||||
|
|
|
@ -59,14 +59,11 @@ add_library(dynarmic
|
|||
common/lut_from_list.h
|
||||
common/math_util.cpp
|
||||
common/math_util.h
|
||||
common/memory_pool.cpp
|
||||
common/memory_pool.h
|
||||
common/safe_ops.h
|
||||
common/spin_lock.h
|
||||
common/string_util.h
|
||||
common/u128.cpp
|
||||
common/u128.h
|
||||
common/variant_util.h
|
||||
frontend/A32/a32_types.cpp
|
||||
frontend/A32/a32_types.h
|
||||
frontend/A64/a64_types.cpp
|
||||
|
@ -81,7 +78,6 @@ add_library(dynarmic
|
|||
ir/basic_block.cpp
|
||||
ir/basic_block.h
|
||||
ir/cond.h
|
||||
ir/ir_emitter.cpp
|
||||
ir/ir_emitter.h
|
||||
ir/location_descriptor.cpp
|
||||
ir/location_descriptor.h
|
||||
|
@ -90,78 +86,59 @@ add_library(dynarmic
|
|||
ir/opcodes.cpp
|
||||
ir/opcodes.h
|
||||
ir/opcodes.inc
|
||||
ir/opt/constant_propagation_pass.cpp
|
||||
ir/opt/dead_code_elimination_pass.cpp
|
||||
ir/opt/identity_removal_pass.cpp
|
||||
ir/opt/ir_matcher.h
|
||||
ir/opt/naming_pass.cpp
|
||||
ir/opt/passes.h
|
||||
ir/opt/polyfill_pass.cpp
|
||||
ir/opt/verification_pass.cpp
|
||||
ir/opt_passes.cpp
|
||||
ir/opt_passes.h
|
||||
ir/terminal.h
|
||||
ir/type.cpp
|
||||
ir/type.h
|
||||
ir/value.cpp
|
||||
ir/value.h
|
||||
# A32
|
||||
frontend/A32/a32_ir_emitter.cpp
|
||||
frontend/A32/a32_ir_emitter.h
|
||||
frontend/A32/a32_location_descriptor.cpp
|
||||
frontend/A32/a32_location_descriptor.h
|
||||
frontend/A32/decoder/arm.h
|
||||
frontend/A32/decoder/arm.inc
|
||||
frontend/A32/decoder/asimd.h
|
||||
frontend/A32/decoder/asimd.inc
|
||||
frontend/A32/decoder/thumb16.h
|
||||
frontend/A32/decoder/thumb16.inc
|
||||
frontend/A32/decoder/thumb32.h
|
||||
frontend/A32/decoder/thumb32.inc
|
||||
frontend/A32/decoder/vfp.h
|
||||
frontend/A32/decoder/vfp.inc
|
||||
frontend/A32/disassembler/disassembler.h
|
||||
frontend/A32/disassembler/disassembler_arm.cpp
|
||||
frontend/A32/disassembler/disassembler_thumb.cpp
|
||||
frontend/A32/FPSCR.h
|
||||
frontend/A32/ITState.h
|
||||
frontend/A32/PSR.h
|
||||
frontend/A32/translate/a32_translate.cpp
|
||||
frontend/A32/translate/a32_translate.h
|
||||
frontend/A32/translate/conditional_state.cpp
|
||||
frontend/A32/translate/conditional_state.h
|
||||
frontend/A32/translate/translate_arm.cpp
|
||||
frontend/A32/translate/translate_thumb.cpp
|
||||
interface/A32/a32.h
|
||||
interface/A32/arch_version.h
|
||||
interface/A32/config.h
|
||||
interface/A32/coprocessor.h
|
||||
interface/A32/coprocessor_util.h
|
||||
interface/A32/disassembler.h
|
||||
# A64
|
||||
frontend/A64/a64_ir_emitter.cpp
|
||||
frontend/A64/a64_ir_emitter.h
|
||||
frontend/A64/a64_location_descriptor.cpp
|
||||
frontend/A64/a64_location_descriptor.h
|
||||
frontend/A64/decoder/a64.h
|
||||
frontend/A64/decoder/a64.inc
|
||||
frontend/A64/translate/a64_translate.cpp
|
||||
frontend/A64/translate/a64_translate.h
|
||||
interface/A64/a64.h
|
||||
interface/A64/config.h
|
||||
)
|
||||
|
||||
if ("A32" IN_LIST DYNARMIC_FRONTENDS)
|
||||
target_sources(dynarmic PRIVATE
|
||||
frontend/A32/a32_ir_emitter.cpp
|
||||
frontend/A32/a32_ir_emitter.h
|
||||
frontend/A32/a32_location_descriptor.cpp
|
||||
frontend/A32/a32_location_descriptor.h
|
||||
frontend/A32/decoder/arm.h
|
||||
frontend/A32/decoder/arm.inc
|
||||
frontend/A32/decoder/asimd.h
|
||||
frontend/A32/decoder/asimd.inc
|
||||
frontend/A32/decoder/thumb16.h
|
||||
frontend/A32/decoder/thumb16.inc
|
||||
frontend/A32/decoder/thumb32.h
|
||||
frontend/A32/decoder/thumb32.inc
|
||||
frontend/A32/decoder/vfp.h
|
||||
frontend/A32/decoder/vfp.inc
|
||||
frontend/A32/disassembler/disassembler.h
|
||||
frontend/A32/disassembler/disassembler_arm.cpp
|
||||
frontend/A32/disassembler/disassembler_thumb.cpp
|
||||
frontend/A32/FPSCR.h
|
||||
frontend/A32/ITState.h
|
||||
frontend/A32/PSR.h
|
||||
frontend/A32/translate/a32_translate.cpp
|
||||
frontend/A32/translate/a32_translate.h
|
||||
frontend/A32/translate/conditional_state.cpp
|
||||
frontend/A32/translate/conditional_state.h
|
||||
frontend/A32/translate/translate_arm.cpp
|
||||
frontend/A32/translate/translate_thumb.cpp
|
||||
interface/A32/a32.h
|
||||
interface/A32/arch_version.h
|
||||
interface/A32/config.h
|
||||
interface/A32/coprocessor.h
|
||||
interface/A32/coprocessor_util.h
|
||||
interface/A32/disassembler.h
|
||||
ir/opt/a32_constant_memory_reads_pass.cpp
|
||||
ir/opt/a32_get_set_elimination_pass.cpp
|
||||
)
|
||||
endif()
|
||||
|
||||
if ("A64" IN_LIST DYNARMIC_FRONTENDS)
|
||||
target_sources(dynarmic PRIVATE
|
||||
frontend/A64/a64_ir_emitter.cpp
|
||||
frontend/A64/a64_ir_emitter.h
|
||||
frontend/A64/a64_location_descriptor.cpp
|
||||
frontend/A64/a64_location_descriptor.h
|
||||
frontend/A64/decoder/a64.h
|
||||
frontend/A64/decoder/a64.inc
|
||||
frontend/A64/translate/a64_translate.cpp
|
||||
frontend/A64/translate/a64_translate.h
|
||||
interface/A64/a64.h
|
||||
interface/A64/config.h
|
||||
ir/opt/a64_callback_config_pass.cpp
|
||||
ir/opt/a64_get_set_elimination_pass.cpp
|
||||
ir/opt/a64_merge_interpret_blocks.cpp
|
||||
)
|
||||
endif()
|
||||
|
||||
if ("x86_64" IN_LIST ARCHITECTURE)
|
||||
# Newer versions of xbyak (>= 7.25.0) have stricter checks that currently
|
||||
# fail in dynarmic
|
||||
|
@ -218,29 +195,21 @@ if ("x86_64" IN_LIST ARCHITECTURE)
|
|||
common/spin_lock_x64.h
|
||||
common/x64_disassemble.cpp
|
||||
common/x64_disassemble.h
|
||||
# A32
|
||||
backend/x64/a32_emit_x64.cpp
|
||||
backend/x64/a32_emit_x64.h
|
||||
backend/x64/a32_emit_x64_memory.cpp
|
||||
backend/x64/a32_interface.cpp
|
||||
backend/x64/a32_jitstate.cpp
|
||||
backend/x64/a32_jitstate.h
|
||||
# A64
|
||||
backend/x64/a64_emit_x64.cpp
|
||||
backend/x64/a64_emit_x64.h
|
||||
backend/x64/a64_emit_x64_memory.cpp
|
||||
backend/x64/a64_interface.cpp
|
||||
backend/x64/a64_jitstate.cpp
|
||||
backend/x64/a64_jitstate.h
|
||||
)
|
||||
|
||||
if ("A32" IN_LIST DYNARMIC_FRONTENDS)
|
||||
target_architecture_specific_sources(dynarmic "x86_64"
|
||||
backend/x64/a32_emit_x64.cpp
|
||||
backend/x64/a32_emit_x64.h
|
||||
backend/x64/a32_emit_x64_memory.cpp
|
||||
backend/x64/a32_interface.cpp
|
||||
backend/x64/a32_jitstate.cpp
|
||||
backend/x64/a32_jitstate.h
|
||||
)
|
||||
endif()
|
||||
|
||||
if ("A64" IN_LIST DYNARMIC_FRONTENDS)
|
||||
target_architecture_specific_sources(dynarmic "x86_64"
|
||||
backend/x64/a64_emit_x64.cpp
|
||||
backend/x64/a64_emit_x64.h
|
||||
backend/x64/a64_emit_x64_memory.cpp
|
||||
backend/x64/a64_interface.cpp
|
||||
backend/x64/a64_jitstate.cpp
|
||||
backend/x64/a64_jitstate.h
|
||||
)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if ("arm64" IN_LIST ARCHITECTURE)
|
||||
|
@ -284,25 +253,17 @@ if ("arm64" IN_LIST ARCHITECTURE)
|
|||
backend/arm64/verbose_debugging_output.h
|
||||
common/spin_lock_arm64.cpp
|
||||
common/spin_lock_arm64.h
|
||||
# A32
|
||||
backend/arm64/a32_address_space.cpp
|
||||
backend/arm64/a32_address_space.h
|
||||
backend/arm64/a32_core.h
|
||||
backend/arm64/a32_interface.cpp
|
||||
# A64
|
||||
backend/arm64/a64_address_space.cpp
|
||||
backend/arm64/a64_address_space.h
|
||||
backend/arm64/a64_core.h
|
||||
backend/arm64/a64_interface.cpp
|
||||
)
|
||||
|
||||
if ("A32" IN_LIST DYNARMIC_FRONTENDS)
|
||||
target_architecture_specific_sources(dynarmic "arm64"
|
||||
backend/arm64/a32_address_space.cpp
|
||||
backend/arm64/a32_address_space.h
|
||||
backend/arm64/a32_core.h
|
||||
backend/arm64/a32_interface.cpp
|
||||
)
|
||||
endif()
|
||||
|
||||
if ("A64" IN_LIST DYNARMIC_FRONTENDS)
|
||||
target_architecture_specific_sources(dynarmic "arm64"
|
||||
backend/arm64/a64_address_space.cpp
|
||||
backend/arm64/a64_address_space.h
|
||||
backend/arm64/a64_core.h
|
||||
backend/arm64/a64_interface.cpp
|
||||
)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if ("riscv" IN_LIST ARCHITECTURE)
|
||||
|
@ -331,21 +292,14 @@ if ("riscv" IN_LIST ARCHITECTURE)
|
|||
backend/riscv64/reg_alloc.cpp
|
||||
backend/riscv64/reg_alloc.h
|
||||
backend/riscv64/stack_layout.h
|
||||
# A32
|
||||
backend/riscv64/a32_address_space.cpp
|
||||
backend/riscv64/a32_address_space.h
|
||||
backend/riscv64/a32_core.h
|
||||
backend/riscv64/a32_interface.cpp
|
||||
backend/riscv64/code_block.h
|
||||
)
|
||||
|
||||
if ("A32" IN_LIST DYNARMIC_FRONTENDS)
|
||||
target_sources(dynarmic PRIVATE
|
||||
backend/riscv64/a32_address_space.cpp
|
||||
backend/riscv64/a32_address_space.h
|
||||
backend/riscv64/a32_core.h
|
||||
backend/riscv64/a32_interface.cpp
|
||||
backend/riscv64/code_block.h
|
||||
)
|
||||
endif()
|
||||
|
||||
if ("A64" IN_LIST DYNARMIC_FRONTENDS)
|
||||
message(FATAL_ERROR "TODO: Unimplemented frontend for this host architecture")
|
||||
endif()
|
||||
message(FATAL_ERROR "TODO: Unimplemented frontend for this host architecture")
|
||||
endif()
|
||||
|
||||
if (WIN32)
|
||||
|
@ -423,7 +377,7 @@ target_link_libraries(dynarmic
|
|||
)
|
||||
|
||||
if (BOOST_NO_HEADERS)
|
||||
target_link_libraries(dynarmic PRIVATE Boost::variant Boost::icl Boost::pool)
|
||||
target_link_libraries(dynarmic PRIVATE Boost::variant Boost::icl Boost::pool)
|
||||
else()
|
||||
target_link_libraries(dynarmic PRIVATE Boost::headers)
|
||||
endif()
|
||||
|
|
|
@ -1,3 +1,6 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2022 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
|
@ -16,7 +19,7 @@
|
|||
#include "dynarmic/frontend/A32/translate/a32_translate.h"
|
||||
#include "dynarmic/interface/A32/config.h"
|
||||
#include "dynarmic/interface/exclusive_monitor.h"
|
||||
#include "dynarmic/ir/opt/passes.h"
|
||||
#include "dynarmic/ir/opt_passes.h"
|
||||
|
||||
namespace Dynarmic::Backend::Arm64 {
|
||||
|
||||
|
@ -163,21 +166,7 @@ A32AddressSpace::A32AddressSpace(const A32::UserConfig& conf)
|
|||
|
||||
IR::Block A32AddressSpace::GenerateIR(IR::LocationDescriptor descriptor) const {
|
||||
IR::Block ir_block = A32::Translate(A32::LocationDescriptor{descriptor}, conf.callbacks, {conf.arch_version, conf.define_unpredictable_behaviour, conf.hook_hint_instructions});
|
||||
|
||||
Optimization::PolyfillPass(ir_block, {});
|
||||
Optimization::NamingPass(ir_block);
|
||||
if (conf.HasOptimization(OptimizationFlag::GetSetElimination)) {
|
||||
Optimization::A32GetSetElimination(ir_block, {.convert_nzc_to_nz = true});
|
||||
Optimization::DeadCodeElimination(ir_block);
|
||||
}
|
||||
if (conf.HasOptimization(OptimizationFlag::ConstProp)) {
|
||||
Optimization::A32ConstantMemoryReads(ir_block, conf.callbacks);
|
||||
Optimization::ConstantPropagation(ir_block);
|
||||
Optimization::DeadCodeElimination(ir_block);
|
||||
}
|
||||
Optimization::IdentityRemovalPass(ir_block);
|
||||
Optimization::VerificationPass(ir_block);
|
||||
|
||||
Optimization::Optimize(ir_block, conf, {});
|
||||
return ir_block;
|
||||
}
|
||||
|
||||
|
|
|
@ -1,3 +1,6 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2022 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
|
@ -15,7 +18,7 @@
|
|||
#include "dynarmic/frontend/A64/translate/a64_translate.h"
|
||||
#include "dynarmic/interface/A64/config.h"
|
||||
#include "dynarmic/interface/exclusive_monitor.h"
|
||||
#include "dynarmic/ir/opt/passes.h"
|
||||
#include "dynarmic/ir/opt_passes.h"
|
||||
|
||||
namespace Dynarmic::Backend::Arm64 {
|
||||
|
||||
|
@ -331,22 +334,7 @@ IR::Block A64AddressSpace::GenerateIR(IR::LocationDescriptor descriptor) const {
|
|||
const auto get_code = [this](u64 vaddr) { return conf.callbacks->MemoryReadCode(vaddr); };
|
||||
IR::Block ir_block = A64::Translate(A64::LocationDescriptor{descriptor}, get_code,
|
||||
{conf.define_unpredictable_behaviour, conf.wall_clock_cntpct});
|
||||
|
||||
Optimization::A64CallbackConfigPass(ir_block, conf);
|
||||
Optimization::NamingPass(ir_block);
|
||||
if (conf.HasOptimization(OptimizationFlag::GetSetElimination) && !conf.check_halt_on_memory_access) {
|
||||
Optimization::A64GetSetElimination(ir_block);
|
||||
Optimization::DeadCodeElimination(ir_block);
|
||||
}
|
||||
if (conf.HasOptimization(OptimizationFlag::ConstProp)) {
|
||||
Optimization::ConstantPropagation(ir_block);
|
||||
Optimization::DeadCodeElimination(ir_block);
|
||||
}
|
||||
if (conf.HasOptimization(OptimizationFlag::MiscIROpt)) {
|
||||
Optimization::A64MergeInterpretBlocksPass(ir_block, conf.callbacks);
|
||||
}
|
||||
Optimization::VerificationPass(ir_block);
|
||||
|
||||
Optimization::Optimize(ir_block, conf, {});
|
||||
return ir_block;
|
||||
}
|
||||
|
||||
|
|
|
@ -15,15 +15,15 @@
|
|||
#include <mcl/macro/architecture.hpp>
|
||||
#include "dynarmic/common/common_types.h"
|
||||
|
||||
#if defined(MCL_ARCHITECTURE_X86_64)
|
||||
#if defined(ARCHITECTURE_x86_64)
|
||||
namespace Dynarmic::Backend::X64 {
|
||||
class BlockOfCode;
|
||||
} // namespace Dynarmic::Backend::X64
|
||||
#elif defined(MCL_ARCHITECTURE_ARM64)
|
||||
#elif defined(ARCHITECTURE_arm64)
|
||||
namespace oaknut {
|
||||
class CodeBlock;
|
||||
} // namespace oaknut
|
||||
#elif defined(MCL_ARCHITECTURE_RISCV)
|
||||
#elif defined(ARCHITECTURE_riscv64)
|
||||
namespace Dynarmic::Backend::RV64 {
|
||||
class CodeBlock;
|
||||
} // namespace Dynarmic::Backend::RV64
|
||||
|
@ -33,16 +33,16 @@ class CodeBlock;
|
|||
|
||||
namespace Dynarmic::Backend {
|
||||
|
||||
#if defined(MCL_ARCHITECTURE_X86_64)
|
||||
#if defined(ARCHITECTURE_x86_64)
|
||||
struct FakeCall {
|
||||
u64 call_rip;
|
||||
u64 ret_rip;
|
||||
};
|
||||
#elif defined(MCL_ARCHITECTURE_ARM64)
|
||||
#elif defined(ARCHITECTURE_arm64)
|
||||
struct FakeCall {
|
||||
u64 call_pc;
|
||||
};
|
||||
#elif defined(MCL_ARCHITECTURE_RISCV)
|
||||
#elif defined(ARCHITECTURE_riscv64)
|
||||
struct FakeCall {
|
||||
};
|
||||
#else
|
||||
|
@ -54,11 +54,11 @@ public:
|
|||
ExceptionHandler();
|
||||
~ExceptionHandler();
|
||||
|
||||
#if defined(MCL_ARCHITECTURE_X86_64)
|
||||
#if defined(ARCHITECTURE_x86_64)
|
||||
void Register(X64::BlockOfCode& code);
|
||||
#elif defined(MCL_ARCHITECTURE_ARM64)
|
||||
#elif defined(ARCHITECTURE_arm64)
|
||||
void Register(oaknut::CodeBlock& mem, std::size_t mem_size);
|
||||
#elif defined(MCL_ARCHITECTURE_RISCV)
|
||||
#elif defined(ARCHITECTURE_riscv64)
|
||||
void Register(RV64::CodeBlock& mem, std::size_t mem_size);
|
||||
#else
|
||||
# error "Invalid architecture"
|
||||
|
|
|
@ -1,3 +1,6 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2016 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
|
@ -13,15 +16,15 @@ struct ExceptionHandler::Impl final {
|
|||
ExceptionHandler::ExceptionHandler() = default;
|
||||
ExceptionHandler::~ExceptionHandler() = default;
|
||||
|
||||
#if defined(MCL_ARCHITECTURE_X86_64)
|
||||
#if defined(ARCHITECTURE_x86_64)
|
||||
void ExceptionHandler::Register(X64::BlockOfCode&) {
|
||||
// Do nothing
|
||||
}
|
||||
#elif defined(MCL_ARCHITECTURE_ARM64)
|
||||
#elif defined(ARCHITECTURE_arm64)
|
||||
void ExceptionHandler::Register(oaknut::CodeBlock&, std::size_t) {
|
||||
// Do nothing
|
||||
}
|
||||
#elif defined(MCL_ARCHITECTURE_RISCV)
|
||||
#elif defined(ARCHITECTURE_riscv64)
|
||||
void ExceptionHandler::Register(RV64::CodeBlock&, std::size_t) {
|
||||
// Do nothing
|
||||
}
|
||||
|
|
|
@ -25,7 +25,7 @@
|
|||
|
||||
#include "dynarmic/backend/exception_handler.h"
|
||||
|
||||
#if defined(MCL_ARCHITECTURE_X86_64)
|
||||
#if defined(ARCHITECTURE_x86_64)
|
||||
|
||||
# include "dynarmic/backend/x64/block_of_code.h"
|
||||
# define mig_external extern "C"
|
||||
|
@ -36,7 +36,7 @@
|
|||
|
||||
using dynarmic_thread_state_t = x86_thread_state64_t;
|
||||
|
||||
#elif defined(MCL_ARCHITECTURE_ARM64)
|
||||
#elif defined(ARCHITECTURE_arm64)
|
||||
|
||||
# include <oaknut/code_block.hpp>
|
||||
# define mig_external extern "C"
|
||||
|
@ -133,7 +133,7 @@ void MachHandler::MessagePump() {
|
|||
}
|
||||
}
|
||||
|
||||
#if defined(MCL_ARCHITECTURE_X86_64)
|
||||
#if defined(ARCHITECTURE_x86_64)
|
||||
kern_return_t MachHandler::HandleRequest(x86_thread_state64_t* ts) {
|
||||
std::lock_guard<std::mutex> guard(code_block_infos_mutex);
|
||||
|
||||
|
@ -151,7 +151,7 @@ kern_return_t MachHandler::HandleRequest(x86_thread_state64_t* ts) {
|
|||
|
||||
return KERN_SUCCESS;
|
||||
}
|
||||
#elif defined(MCL_ARCHITECTURE_ARM64)
|
||||
#elif defined(ARCHITECTURE_arm64)
|
||||
kern_return_t MachHandler::HandleRequest(arm_thread_state64_t* ts) {
|
||||
std::lock_guard<std::mutex> guard(code_block_infos_mutex);
|
||||
|
||||
|
@ -269,13 +269,13 @@ private:
|
|||
ExceptionHandler::ExceptionHandler() = default;
|
||||
ExceptionHandler::~ExceptionHandler() = default;
|
||||
|
||||
#if defined(MCL_ARCHITECTURE_X86_64)
|
||||
#if defined(ARCHITECTURE_x86_64)
|
||||
void ExceptionHandler::Register(X64::BlockOfCode& code) {
|
||||
const u64 code_begin = mcl::bit_cast<u64>(code.getCode());
|
||||
const u64 code_end = code_begin + code.GetTotalCodeSize();
|
||||
impl = std::make_unique<Impl>(code_begin, code_end);
|
||||
}
|
||||
#elif defined(MCL_ARCHITECTURE_ARM64)
|
||||
#elif defined(ARCHITECTURE_arm64)
|
||||
void ExceptionHandler::Register(oaknut::CodeBlock& mem, std::size_t size) {
|
||||
const u64 code_begin = mcl::bit_cast<u64>(mem.ptr());
|
||||
const u64 code_end = code_begin + size;
|
||||
|
|
|
@ -5,9 +5,9 @@
|
|||
|
||||
#include <mcl/macro/architecture.hpp>
|
||||
|
||||
#if defined(MCL_ARCHITECTURE_X86_64)
|
||||
#if defined(ARCHITECTURE_x86_64)
|
||||
# include "dynarmic/backend/x64/mig/mach_exc_server.c"
|
||||
#elif defined(MCL_ARCHITECTURE_ARM64)
|
||||
#elif defined(ARCHITECTURE_arm64)
|
||||
# include "dynarmic/backend/arm64/mig/mach_exc_server.c"
|
||||
#else
|
||||
# error "Invalid architecture"
|
||||
|
|
|
@ -12,31 +12,17 @@
|
|||
#include <mutex>
|
||||
#include <shared_mutex>
|
||||
#include <optional>
|
||||
#include <sys/mman.h>
|
||||
#ifdef __APPLE__
|
||||
# include <signal.h>
|
||||
# include <sys/ucontext.h>
|
||||
#else
|
||||
# include <signal.h>
|
||||
# ifndef __OpenBSD__
|
||||
# include <ucontext.h>
|
||||
# endif
|
||||
# ifdef __sun__
|
||||
# include <sys/regset.h>
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#include <ankerl/unordered_dense.h>
|
||||
|
||||
#include "dynarmic/backend/exception_handler.h"
|
||||
#include "dynarmic/common/assert.h"
|
||||
#include "dynarmic/common/context.h"
|
||||
#include "dynarmic/common/common_types.h"
|
||||
#if defined(MCL_ARCHITECTURE_X86_64)
|
||||
#if defined(ARCHITECTURE_x86_64)
|
||||
# include "dynarmic/backend/x64/block_of_code.h"
|
||||
#elif defined(MCL_ARCHITECTURE_ARM64)
|
||||
#elif defined(ARCHITECTURE_arm64)
|
||||
# include <oaknut/code_block.hpp>
|
||||
|
||||
# include "dynarmic/backend/arm64/abi.h"
|
||||
#elif defined(MCL_ARCHITECTURE_RISCV)
|
||||
#elif defined(ARCHITECTURE_riscv64)
|
||||
# include "dynarmic/backend/riscv64/code_block.h"
|
||||
#else
|
||||
# error "Invalid architecture"
|
||||
|
@ -129,35 +115,8 @@ void RegisterHandler() {
|
|||
|
||||
void SigHandler::SigAction(int sig, siginfo_t* info, void* raw_context) {
|
||||
DEBUG_ASSERT(sig == SIGSEGV || sig == SIGBUS);
|
||||
#ifndef MCL_ARCHITECTURE_RISCV
|
||||
ucontext_t* ucontext = reinterpret_cast<ucontext_t*>(raw_context);
|
||||
#ifndef __OpenBSD__
|
||||
auto& mctx = ucontext->uc_mcontext;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(MCL_ARCHITECTURE_X86_64)
|
||||
# if defined(__APPLE__)
|
||||
# define CTX_RIP (mctx->__ss.__rip)
|
||||
# define CTX_RSP (mctx->__ss.__rsp)
|
||||
# elif defined(__linux__)
|
||||
# define CTX_RIP (mctx.gregs[REG_RIP])
|
||||
# define CTX_RSP (mctx.gregs[REG_RSP])
|
||||
# elif defined(__FreeBSD__)
|
||||
# define CTX_RIP (mctx.mc_rip)
|
||||
# define CTX_RSP (mctx.mc_rsp)
|
||||
# elif defined(__NetBSD__)
|
||||
# define CTX_RIP (mctx.__gregs[_REG_RIP])
|
||||
# define CTX_RSP (mctx.__gregs[_REG_RSP])
|
||||
# elif defined(__OpenBSD__)
|
||||
# define CTX_RIP (ucontext->sc_rip)
|
||||
# define CTX_RSP (ucontext->sc_rsp)
|
||||
# elif defined(__sun__)
|
||||
# define CTX_RIP (mctx.gregs[REG_RIP])
|
||||
# define CTX_RSP (mctx.gregs[REG_RSP])
|
||||
# else
|
||||
# error "Unknown platform"
|
||||
# endif
|
||||
CTX_DECLARE(raw_context);
|
||||
#if defined(ARCHITECTURE_x86_64)
|
||||
{
|
||||
std::shared_lock guard(sig_handler->code_block_infos_mutex);
|
||||
if (auto const iter = sig_handler->FindCodeBlockInfo(CTX_RIP); iter != sig_handler->code_block_infos.end()) {
|
||||
|
@ -169,48 +128,7 @@ void SigHandler::SigAction(int sig, siginfo_t* info, void* raw_context) {
|
|||
}
|
||||
}
|
||||
fmt::print(stderr, "Unhandled {} at rip {:#018x}\n", sig == SIGSEGV ? "SIGSEGV" : "SIGBUS", CTX_RIP);
|
||||
#elif defined(MCL_ARCHITECTURE_ARM64)
|
||||
# if defined(__APPLE__)
|
||||
# define CTX_PC (mctx->__ss.__pc)
|
||||
# define CTX_SP (mctx->__ss.__sp)
|
||||
# define CTX_LR (mctx->__ss.__lr)
|
||||
# define CTX_X(i) (mctx->__ss.__x[i])
|
||||
# define CTX_Q(i) (mctx->__ns.__v[i])
|
||||
# elif defined(__linux__)
|
||||
# define CTX_PC (mctx.pc)
|
||||
# define CTX_SP (mctx.sp)
|
||||
# define CTX_LR (mctx.regs[30])
|
||||
# define CTX_X(i) (mctx.regs[i])
|
||||
# define CTX_Q(i) (fpctx->vregs[i])
|
||||
[[maybe_unused]] const auto fpctx = [&mctx] {
|
||||
_aarch64_ctx* header = (_aarch64_ctx*)&mctx.__reserved;
|
||||
while (header->magic != FPSIMD_MAGIC) {
|
||||
ASSERT(header->magic && header->size);
|
||||
header = (_aarch64_ctx*)((char*)header + header->size);
|
||||
}
|
||||
return (fpsimd_context*)header;
|
||||
}();
|
||||
# elif defined(__FreeBSD__)
|
||||
# define CTX_PC (mctx.mc_gpregs.gp_elr)
|
||||
# define CTX_SP (mctx.mc_gpregs.gp_sp)
|
||||
# define CTX_LR (mctx.mc_gpregs.gp_lr)
|
||||
# define CTX_X(i) (mctx.mc_gpregs.gp_x[i])
|
||||
# define CTX_Q(i) (mctx.mc_fpregs.fp_q[i])
|
||||
# elif defined(__NetBSD__)
|
||||
# define CTX_PC (mctx.mc_gpregs.gp_elr)
|
||||
# define CTX_SP (mctx.mc_gpregs.gp_sp)
|
||||
# define CTX_LR (mctx.mc_gpregs.gp_lr)
|
||||
# define CTX_X(i) (mctx.mc_gpregs.gp_x[i])
|
||||
# define CTX_Q(i) (mctx.mc_fpregs.fp_q[i])
|
||||
# elif defined(__OpenBSD__)
|
||||
# define CTX_PC (ucontext->sc_elr)
|
||||
# define CTX_SP (ucontext->sc_sp)
|
||||
# define CTX_LR (ucontext->sc_lr)
|
||||
# define CTX_X(i) (ucontext->sc_x[i])
|
||||
# define CTX_Q(i) (ucontext->sc_q[i])
|
||||
# else
|
||||
# error "Unknown platform"
|
||||
# endif
|
||||
#elif defined(ARCHITECTURE_arm64)
|
||||
{
|
||||
std::shared_lock guard(sig_handler->code_block_infos_mutex);
|
||||
if (const auto iter = sig_handler->FindCodeBlockInfo(CTX_PC); iter != sig_handler->code_block_infos.end()) {
|
||||
|
@ -220,7 +138,7 @@ void SigHandler::SigAction(int sig, siginfo_t* info, void* raw_context) {
|
|||
}
|
||||
}
|
||||
fmt::print(stderr, "Unhandled {} at pc {:#018x}\n", sig == SIGSEGV ? "SIGSEGV" : "SIGBUS", CTX_PC);
|
||||
#elif defined(MCL_ARCHITECTURE_RISCV)
|
||||
#elif defined(ARCHITECTURE_riscv64)
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
#else
|
||||
# error "Invalid architecture"
|
||||
|
|
|
@ -1,3 +1,6 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2023 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
|
@ -5,9 +8,9 @@
|
|||
|
||||
#include <mcl/macro/architecture.hpp>
|
||||
|
||||
#if defined(MCL_ARCHITECTURE_X86_64)
|
||||
#if defined(ARCHITECTURE_x86_64)
|
||||
# include "dynarmic/backend/x64/exception_handler_windows.cpp"
|
||||
#elif defined(MCL_ARCHITECTURE_ARM64)
|
||||
#elif defined(ARCHITECTURE_arm64)
|
||||
# include "dynarmic/backend/exception_handler_generic.cpp"
|
||||
#else
|
||||
# error "Invalid architecture"
|
||||
|
|
|
@ -15,7 +15,7 @@
|
|||
#include "dynarmic/backend/riscv64/stack_layout.h"
|
||||
#include "dynarmic/frontend/A32/a32_location_descriptor.h"
|
||||
#include "dynarmic/frontend/A32/translate/a32_translate.h"
|
||||
#include "dynarmic/ir/opt/passes.h"
|
||||
#include "dynarmic/ir/opt_passes.h"
|
||||
|
||||
namespace Dynarmic::Backend::RV64 {
|
||||
|
||||
|
@ -28,19 +28,7 @@ A32AddressSpace::A32AddressSpace(const A32::UserConfig& conf)
|
|||
|
||||
IR::Block A32AddressSpace::GenerateIR(IR::LocationDescriptor descriptor) const {
|
||||
IR::Block ir_block = A32::Translate(A32::LocationDescriptor{descriptor}, conf.callbacks, {conf.arch_version, conf.define_unpredictable_behaviour, conf.hook_hint_instructions});
|
||||
|
||||
Optimization::PolyfillPass(ir_block, {});
|
||||
if (conf.HasOptimization(OptimizationFlag::GetSetElimination)) {
|
||||
Optimization::A32GetSetElimination(ir_block, {.convert_nzc_to_nz = true});
|
||||
Optimization::DeadCodeElimination(ir_block);
|
||||
}
|
||||
if (conf.HasOptimization(OptimizationFlag::ConstProp)) {
|
||||
Optimization::A32ConstantMemoryReads(ir_block, conf.callbacks);
|
||||
Optimization::ConstantPropagation(ir_block);
|
||||
Optimization::DeadCodeElimination(ir_block);
|
||||
}
|
||||
Optimization::VerificationPass(ir_block);
|
||||
|
||||
Optimization::Optimize(ir_block, conf, {});
|
||||
return ir_block;
|
||||
}
|
||||
|
||||
|
|
|
@ -28,7 +28,6 @@
|
|||
#include "dynarmic/backend/x64/nzcv_util.h"
|
||||
#include "dynarmic/backend/x64/perf_map.h"
|
||||
#include "dynarmic/backend/x64/stack_layout.h"
|
||||
#include "dynarmic/common/variant_util.h"
|
||||
#include "dynarmic/frontend/A32/a32_location_descriptor.h"
|
||||
#include "dynarmic/frontend/A32/a32_types.h"
|
||||
#include "dynarmic/interface/A32/coprocessor.h"
|
||||
|
|
|
@ -29,7 +29,7 @@
|
|||
#include "dynarmic/interface/A32/a32.h"
|
||||
#include "dynarmic/ir/basic_block.h"
|
||||
#include "dynarmic/ir/location_descriptor.h"
|
||||
#include "dynarmic/ir/opt/passes.h"
|
||||
#include "dynarmic/ir/opt_passes.h"
|
||||
|
||||
namespace Dynarmic::A32 {
|
||||
|
||||
|
@ -217,19 +217,7 @@ private:
|
|||
block_of_code.EnsureMemoryCommitted(MINIMUM_REMAINING_CODESIZE);
|
||||
|
||||
IR::Block ir_block = A32::Translate(A32::LocationDescriptor{descriptor}, conf.callbacks, {conf.arch_version, conf.define_unpredictable_behaviour, conf.hook_hint_instructions});
|
||||
Optimization::PolyfillPass(ir_block, polyfill_options);
|
||||
Optimization::NamingPass(ir_block);
|
||||
if (conf.HasOptimization(OptimizationFlag::GetSetElimination) && !conf.check_halt_on_memory_access) {
|
||||
Optimization::A32GetSetElimination(ir_block, {.convert_nz_to_nzc = true});
|
||||
Optimization::DeadCodeElimination(ir_block);
|
||||
}
|
||||
if (conf.HasOptimization(OptimizationFlag::ConstProp)) {
|
||||
Optimization::A32ConstantMemoryReads(ir_block, conf.callbacks);
|
||||
Optimization::ConstantPropagation(ir_block);
|
||||
Optimization::DeadCodeElimination(ir_block);
|
||||
}
|
||||
Optimization::IdentityRemovalPass(ir_block);
|
||||
Optimization::VerificationPass(ir_block);
|
||||
Optimization::Optimize(ir_block, conf, polyfill_options);
|
||||
return emitter.Emit(ir_block);
|
||||
}
|
||||
|
||||
|
|
|
@ -122,9 +122,9 @@ A64EmitX64::BlockDescriptor A64EmitX64::Emit(IR::Block& block) noexcept {
|
|||
auto const opcode = inst.GetOpcode();
|
||||
// Call the relevant Emit* member function.
|
||||
switch (opcode) {
|
||||
#define OPCODE(name, type, ...) [[likely]] case IR::Opcode::name: goto opcode_branch;
|
||||
#define OPCODE(name, type, ...) case IR::Opcode::name: goto opcode_branch;
|
||||
#define A32OPC(name, type, ...)
|
||||
#define A64OPC(name, type, ...) [[likely]] case IR::Opcode::A64##name: goto a64_branch;
|
||||
#define A64OPC(name, type, ...) case IR::Opcode::A64##name: goto a64_branch;
|
||||
#include "dynarmic/ir/opcodes.inc"
|
||||
#undef OPCODE
|
||||
#undef A32OPC
|
||||
|
@ -764,7 +764,7 @@ void A64EmitX64::EmitPatchMovRcx(CodePtr target_code_ptr) {
|
|||
target_code_ptr = code.GetReturnFromRunCodeAddress();
|
||||
}
|
||||
const CodePtr patch_location = code.getCurr();
|
||||
code.mov(code.rcx, reinterpret_cast<u64>(target_code_ptr));
|
||||
code.mov(code.rcx, u64(target_code_ptr));
|
||||
code.EnsurePatchLocationSize(patch_location, 10);
|
||||
}
|
||||
|
||||
|
|
|
@ -25,7 +25,7 @@
|
|||
#include "dynarmic/frontend/A64/translate/a64_translate.h"
|
||||
#include "dynarmic/interface/A64/a64.h"
|
||||
#include "dynarmic/ir/basic_block.h"
|
||||
#include "dynarmic/ir/opt/passes.h"
|
||||
#include "dynarmic/ir/opt_passes.h"
|
||||
|
||||
namespace Dynarmic::A64 {
|
||||
|
||||
|
@ -80,16 +80,16 @@ public:
|
|||
};
|
||||
|
||||
// TODO: Check code alignment
|
||||
|
||||
const CodePtr current_code_ptr = [this] {
|
||||
const CodePtr aligned_code_ptr = CodePtr((uintptr_t(GetCurrentBlock()) + 15) & ~uintptr_t(15));
|
||||
const CodePtr current_code_ptr = [this, aligned_code_ptr] {
|
||||
// RSB optimization
|
||||
const u32 new_rsb_ptr = (jit_state.rsb_ptr - 1) & A64JitState::RSBPtrMask;
|
||||
if (jit_state.GetUniqueHash() == jit_state.rsb_location_descriptors[new_rsb_ptr]) {
|
||||
jit_state.rsb_ptr = new_rsb_ptr;
|
||||
return reinterpret_cast<CodePtr>(jit_state.rsb_codeptrs[new_rsb_ptr]);
|
||||
return CodePtr(jit_state.rsb_codeptrs[new_rsb_ptr]);
|
||||
}
|
||||
|
||||
return GetCurrentBlock();
|
||||
return aligned_code_ptr;
|
||||
//return GetCurrentBlock();
|
||||
}();
|
||||
|
||||
const HaltReason hr = block_of_code.RunCode(&jit_state, current_code_ptr);
|
||||
|
@ -275,21 +275,7 @@ private:
|
|||
const auto get_code = [this](u64 vaddr) { return conf.callbacks->MemoryReadCode(vaddr); };
|
||||
IR::Block ir_block = A64::Translate(A64::LocationDescriptor{current_location}, get_code,
|
||||
{conf.define_unpredictable_behaviour, conf.wall_clock_cntpct});
|
||||
Optimization::PolyfillPass(ir_block, polyfill_options);
|
||||
Optimization::A64CallbackConfigPass(ir_block, conf);
|
||||
Optimization::NamingPass(ir_block);
|
||||
if (conf.HasOptimization(OptimizationFlag::GetSetElimination) && !conf.check_halt_on_memory_access) {
|
||||
Optimization::A64GetSetElimination(ir_block);
|
||||
Optimization::DeadCodeElimination(ir_block);
|
||||
}
|
||||
if (conf.HasOptimization(OptimizationFlag::ConstProp)) {
|
||||
Optimization::ConstantPropagation(ir_block);
|
||||
Optimization::DeadCodeElimination(ir_block);
|
||||
}
|
||||
if (conf.HasOptimization(OptimizationFlag::MiscIROpt)) {
|
||||
Optimization::A64MergeInterpretBlocksPass(ir_block, conf.callbacks);
|
||||
}
|
||||
Optimization::VerificationPass(ir_block);
|
||||
Optimization::Optimize(ir_block, conf, polyfill_options);
|
||||
return emitter.Emit(ir_block).entrypoint;
|
||||
}
|
||||
|
||||
|
|
|
@ -10,7 +10,6 @@
|
|||
|
||||
#include <algorithm>
|
||||
|
||||
#include <mcl/iterator/reverse.hpp>
|
||||
#include "dynarmic/common/common_types.h"
|
||||
#include <xbyak/xbyak.h>
|
||||
|
||||
|
@ -76,7 +75,8 @@ void ABI_PopRegistersAndAdjustStack(BlockOfCode& code, const size_t frame_size,
|
|||
const FrameInfo frame_info = CalculateFrameInfo(num_gprs, num_xmms, frame_size);
|
||||
|
||||
size_t xmm_offset = frame_info.xmm_offset + (num_xmms * XMM_SIZE);
|
||||
for (auto const xmm : mcl::iterator::reverse(regs)) {
|
||||
for (auto it = regs.rbegin(); it != regs.rend(); ++it) {
|
||||
auto const xmm = *it;
|
||||
if (HostLocIsXMM(xmm)) {
|
||||
xmm_offset -= XMM_SIZE;
|
||||
if (code.HasHostFeature(HostFeature::AVX)) {
|
||||
|
@ -88,9 +88,11 @@ void ABI_PopRegistersAndAdjustStack(BlockOfCode& code, const size_t frame_size,
|
|||
}
|
||||
if (frame_info.stack_subtraction != 0)
|
||||
code.add(rsp, u32(frame_info.stack_subtraction));
|
||||
for (auto const gpr : mcl::iterator::reverse(regs))
|
||||
for (auto it = regs.rbegin(); it != regs.rend(); ++it) {
|
||||
auto const gpr = *it;
|
||||
if (HostLocIsGPR(gpr))
|
||||
code.pop(HostLocToReg64(gpr));
|
||||
}
|
||||
}
|
||||
|
||||
void ABI_PushCalleeSaveRegistersAndAdjustStack(BlockOfCode& code, const std::size_t frame_size) {
|
||||
|
|
|
@ -364,8 +364,7 @@ void BlockOfCode::GenRunCode(std::function<void(BlockOfCode&)> rcp) {
|
|||
|
||||
cmp(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], 0);
|
||||
jne(return_to_caller_mxcsr_already_exited, T_NEAR);
|
||||
lock();
|
||||
or_(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], static_cast<u32>(HaltReason::Step));
|
||||
lock(); or_(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], static_cast<u32>(HaltReason::Step));
|
||||
|
||||
SwitchMxcsrOnEntry();
|
||||
jmp(ABI_PARAM2);
|
||||
|
@ -415,7 +414,6 @@ void BlockOfCode::GenRunCode(std::function<void(BlockOfCode&)> rcp) {
|
|||
}
|
||||
|
||||
xor_(eax, eax);
|
||||
lock();
|
||||
xchg(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], eax);
|
||||
|
||||
ABI_PopCalleeSaveRegistersAndAdjustStack(*this, sizeof(StackLayout));
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
#include <iterator>
|
||||
|
||||
#include "dynarmic/common/assert.h"
|
||||
#include <boost/variant/detail/apply_visitor_binary.hpp>
|
||||
#include <mcl/bit/bit_field.hpp>
|
||||
#include <mcl/scope_exit.hpp>
|
||||
#include "dynarmic/common/common_types.h"
|
||||
|
@ -21,7 +22,6 @@
|
|||
#include "dynarmic/backend/x64/perf_map.h"
|
||||
#include "dynarmic/backend/x64/stack_layout.h"
|
||||
#include "dynarmic/backend/x64/verbose_debugging_output.h"
|
||||
#include "dynarmic/common/variant_util.h"
|
||||
#include "dynarmic/ir/basic_block.h"
|
||||
#include "dynarmic/ir/microinstruction.h"
|
||||
#include "dynarmic/ir/opcodes.h"
|
||||
|
@ -347,14 +347,14 @@ EmitX64::BlockDescriptor EmitX64::RegisterBlock(const IR::LocationDescriptor& de
|
|||
}
|
||||
|
||||
void EmitX64::EmitTerminal(IR::Terminal terminal, IR::LocationDescriptor initial_location, bool is_single_step) {
|
||||
Common::VisitVariant<void>(terminal, [this, initial_location, is_single_step](auto x) {
|
||||
boost::apply_visitor([this, initial_location, is_single_step](auto x) {
|
||||
using T = std::decay_t<decltype(x)>;
|
||||
if constexpr (!std::is_same_v<T, IR::Term::Invalid>) {
|
||||
this->EmitTerminalImpl(x, initial_location, is_single_step);
|
||||
} else {
|
||||
ASSERT_MSG(false, "Invalid terminal");
|
||||
}
|
||||
});
|
||||
}, terminal);
|
||||
}
|
||||
|
||||
void EmitX64::Patch(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr) {
|
||||
|
|
|
@ -92,13 +92,10 @@ void ForceDenormalsToZero(BlockOfCode& code, std::initializer_list<Xbyak::Xmm> t
|
|||
FpFixup::Norm_Src,
|
||||
FpFixup::Norm_Src,
|
||||
FpFixup::Norm_Src);
|
||||
|
||||
const Xbyak::Xmm tmp = xmm16;
|
||||
const Xbyak::Xmm tmp = xmm0;
|
||||
FCODE(vmovap)(tmp, code.BConst<fsize>(xword, denormal_to_zero));
|
||||
|
||||
for (const Xbyak::Xmm& xmm : to_daz) {
|
||||
for (const Xbyak::Xmm& xmm : to_daz)
|
||||
FCODE(vfixupimms)(xmm, xmm, tmp, u8(0));
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
|
@ -273,34 +273,31 @@ void AxxEmitX64::EmitExclusiveWriteMemory(AxxEmitContext& ctx, IR::Inst* inst) {
|
|||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const bool ordered = IsOrdered(args[3].GetImmediateAccType());
|
||||
|
||||
if constexpr (bitsize != 128) {
|
||||
ctx.reg_alloc.HostCall(inst, {}, args[1], args[2]);
|
||||
} else {
|
||||
if constexpr (bitsize == 128) {
|
||||
ctx.reg_alloc.Use(args[1], ABI_PARAM2);
|
||||
ctx.reg_alloc.Use(args[2], HostLoc::XMM1);
|
||||
ctx.reg_alloc.EndOfAllocScope();
|
||||
ctx.reg_alloc.HostCall(inst);
|
||||
} else {
|
||||
ctx.reg_alloc.HostCall(inst, {}, args[1], args[2]);
|
||||
}
|
||||
|
||||
const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr();
|
||||
Xbyak::Label end;
|
||||
|
||||
code.mov(code.ABI_RETURN, u32(1));
|
||||
code.cmp(code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)], u8(0));
|
||||
code.movzx(tmp.cvt32(), code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)]);
|
||||
code.test(tmp.cvt8(), tmp.cvt8());
|
||||
code.je(end);
|
||||
code.mov(code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)], u8(0));
|
||||
code.mov(code.ABI_PARAM1, reinterpret_cast<u64>(&conf));
|
||||
code.xor_(tmp.cvt32(), tmp.cvt32());
|
||||
code.xchg(tmp.cvt8(), code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)]);
|
||||
code.mov(code.ABI_PARAM1, u64(&conf));
|
||||
if constexpr (bitsize != 128) {
|
||||
using T = mcl::unsigned_integer_of_size<bitsize>;
|
||||
|
||||
code.CallLambda(
|
||||
[](AxxUserConfig& conf, Axx::VAddr vaddr, T value) -> u32 {
|
||||
return conf.global_monitor->DoExclusiveOperation<T>(conf.processor_id, vaddr,
|
||||
[&](T expected) -> bool {
|
||||
return (conf.callbacks->*callback)(vaddr, value, expected);
|
||||
})
|
||||
? 0
|
||||
: 1;
|
||||
});
|
||||
code.CallLambda([](AxxUserConfig& conf, Axx::VAddr vaddr, T value) -> u32 {
|
||||
return conf.global_monitor->DoExclusiveOperation<T>(conf.processor_id, vaddr, [&](T expected) -> bool {
|
||||
return (conf.callbacks->*callback)(vaddr, value, expected);
|
||||
}) ? 0 : 1;
|
||||
});
|
||||
if (ordered) {
|
||||
code.mfence();
|
||||
}
|
||||
|
@ -308,15 +305,11 @@ void AxxEmitX64::EmitExclusiveWriteMemory(AxxEmitContext& ctx, IR::Inst* inst) {
|
|||
ctx.reg_alloc.AllocStackSpace(16 + ABI_SHADOW_SPACE);
|
||||
code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE]);
|
||||
code.movaps(xword[code.ABI_PARAM3], xmm1);
|
||||
code.CallLambda(
|
||||
[](AxxUserConfig& conf, Axx::VAddr vaddr, Vector& value) -> u32 {
|
||||
return conf.global_monitor->DoExclusiveOperation<Vector>(conf.processor_id, vaddr,
|
||||
[&](Vector expected) -> bool {
|
||||
return (conf.callbacks->*callback)(vaddr, value, expected);
|
||||
})
|
||||
? 0
|
||||
: 1;
|
||||
});
|
||||
code.CallLambda([](AxxUserConfig& conf, Axx::VAddr vaddr, Vector& value) -> u32 {
|
||||
return conf.global_monitor->DoExclusiveOperation<Vector>(conf.processor_id, vaddr, [&](Vector expected) -> bool {
|
||||
return (conf.callbacks->*callback)(vaddr, value, expected);
|
||||
}) ? 0 : 1;
|
||||
});
|
||||
if (ordered) {
|
||||
code.mfence();
|
||||
}
|
||||
|
@ -437,10 +430,11 @@ void AxxEmitX64::EmitExclusiveWriteMemoryInline(AxxEmitContext& ctx, IR::Inst* i
|
|||
|
||||
SharedLabel end = GenSharedLabel();
|
||||
|
||||
code.mov(tmp, mcl::bit_cast<u64>(GetExclusiveMonitorAddressPointer(conf.global_monitor, conf.processor_id)));
|
||||
code.mov(status, u32(1));
|
||||
code.cmp(code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)], u8(0));
|
||||
code.movzx(tmp.cvt32(), code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)]);
|
||||
code.test(tmp.cvt8(), tmp.cvt8());
|
||||
code.je(*end, code.T_NEAR);
|
||||
code.mov(tmp, mcl::bit_cast<u64>(GetExclusiveMonitorAddressPointer(conf.global_monitor, conf.processor_id)));
|
||||
code.cmp(qword[tmp], vaddr);
|
||||
code.jne(*end, code.T_NEAR);
|
||||
|
||||
|
@ -474,30 +468,29 @@ void AxxEmitX64::EmitExclusiveWriteMemoryInline(AxxEmitContext& ctx, IR::Inst* i
|
|||
|
||||
const auto location = code.getCurr();
|
||||
|
||||
if constexpr (bitsize == 128) {
|
||||
switch (bitsize) {
|
||||
case 8:
|
||||
code.lock();
|
||||
code.cmpxchg(code.byte[dest_ptr], value.cvt8());
|
||||
break;
|
||||
case 16:
|
||||
code.lock();
|
||||
code.cmpxchg(word[dest_ptr], value.cvt16());
|
||||
break;
|
||||
case 32:
|
||||
code.lock();
|
||||
code.cmpxchg(dword[dest_ptr], value.cvt32());
|
||||
break;
|
||||
case 64:
|
||||
code.lock();
|
||||
code.cmpxchg(qword[dest_ptr], value.cvt64());
|
||||
break;
|
||||
case 128:
|
||||
code.lock();
|
||||
code.cmpxchg16b(ptr[dest_ptr]);
|
||||
} else {
|
||||
switch (bitsize) {
|
||||
case 8:
|
||||
code.lock();
|
||||
code.cmpxchg(code.byte[dest_ptr], value.cvt8());
|
||||
break;
|
||||
case 16:
|
||||
code.lock();
|
||||
code.cmpxchg(word[dest_ptr], value.cvt16());
|
||||
break;
|
||||
case 32:
|
||||
code.lock();
|
||||
code.cmpxchg(dword[dest_ptr], value.cvt32());
|
||||
break;
|
||||
case 64:
|
||||
code.lock();
|
||||
code.cmpxchg(qword[dest_ptr], value.cvt64());
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
code.setnz(status.cvt8());
|
||||
|
|
|
@ -609,8 +609,8 @@ void EmitX64::EmitVectorArithmeticVShift16(EmitContext& ctx, IR::Inst* inst) {
|
|||
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
const Xbyak::Xmm left_shift = ctx.reg_alloc.UseScratchXmm(args[1]);
|
||||
const Xbyak::Xmm right_shift = xmm16;
|
||||
const Xbyak::Xmm tmp = xmm17;
|
||||
const Xbyak::Xmm right_shift = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
||||
|
||||
code.vmovdqa32(tmp, code.Const(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF));
|
||||
code.vpxord(right_shift, right_shift, right_shift);
|
||||
|
@ -674,8 +674,8 @@ void EmitX64::EmitVectorArithmeticVShift64(EmitContext& ctx, IR::Inst* inst) {
|
|||
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
const Xbyak::Xmm left_shift = ctx.reg_alloc.UseScratchXmm(args[1]);
|
||||
const Xbyak::Xmm right_shift = xmm16;
|
||||
const Xbyak::Xmm tmp = xmm17;
|
||||
const Xbyak::Xmm right_shift = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
||||
|
||||
code.vmovdqa32(tmp, code.Const(xword, 0x00000000000000FF, 0x00000000000000FF));
|
||||
code.vpxorq(right_shift, right_shift, right_shift);
|
||||
|
@ -1955,8 +1955,8 @@ void EmitX64::EmitVectorLogicalVShift16(EmitContext& ctx, IR::Inst* inst) {
|
|||
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
const Xbyak::Xmm left_shift = ctx.reg_alloc.UseScratchXmm(args[1]);
|
||||
const Xbyak::Xmm right_shift = xmm16;
|
||||
const Xbyak::Xmm tmp = xmm17;
|
||||
const Xbyak::Xmm right_shift = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
||||
|
||||
code.vmovdqa32(tmp, code.Const(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF));
|
||||
code.vpxord(right_shift, right_shift, right_shift);
|
||||
|
@ -2737,7 +2737,7 @@ void EmitX64::EmitVectorPairedAddSignedWiden32(EmitContext& ctx, IR::Inst* inst)
|
|||
const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
|
||||
if (code.HasHostFeature(HostFeature::AVX512_Ortho)) {
|
||||
const Xbyak::Xmm c = xmm16;
|
||||
const Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm();
|
||||
code.vpsraq(c, a, 32);
|
||||
code.vpsllq(a, a, 32);
|
||||
code.vpsraq(a, a, 32);
|
||||
|
@ -5461,7 +5461,7 @@ void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) {
|
|||
if (code.HasHostFeature(HostFeature::AVX512_Ortho | HostFeature::AVX512BW)) {
|
||||
const Xbyak::Xmm indicies = ctx.reg_alloc.UseXmm(args[2]);
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
const Xbyak::Xmm masked = xmm16;
|
||||
const Xbyak::Xmm masked = ctx.reg_alloc.ScratchXmm();
|
||||
|
||||
code.vpandd(masked, indicies, code.Const(xword_b, 0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0));
|
||||
|
||||
|
|
|
@ -9,6 +9,7 @@
|
|||
#include "dynarmic/backend/x64/reg_alloc.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <limits>
|
||||
#include <numeric>
|
||||
#include <utility>
|
||||
|
||||
|
@ -118,7 +119,7 @@ void HostLocInfo::AddValue(IR::Inst* inst) noexcept {
|
|||
values.push_back(inst);
|
||||
ASSERT(size_t(total_uses) + inst->UseCount() < (std::numeric_limits<uint16_t>::max)());
|
||||
total_uses += inst->UseCount();
|
||||
max_bit_width = std::max<uint8_t>(max_bit_width, GetBitWidth(inst->GetType()));
|
||||
max_bit_width = std::max<uint8_t>(max_bit_width, std::countr_zero(GetBitWidth(inst->GetType())));
|
||||
}
|
||||
|
||||
void HostLocInfo::EmitVerboseDebuggingOutput(BlockOfCode* code, size_t host_loc_index) const noexcept {
|
||||
|
@ -152,19 +153,19 @@ bool Argument::GetImmediateU1() const noexcept {
|
|||
|
||||
u8 Argument::GetImmediateU8() const noexcept {
|
||||
const u64 imm = value.GetImmediateAsU64();
|
||||
ASSERT(imm < 0x100);
|
||||
ASSERT(imm <= u64(std::numeric_limits<u8>::max()));
|
||||
return u8(imm);
|
||||
}
|
||||
|
||||
u16 Argument::GetImmediateU16() const noexcept {
|
||||
const u64 imm = value.GetImmediateAsU64();
|
||||
ASSERT(imm < 0x10000);
|
||||
ASSERT(imm <= u64(std::numeric_limits<u16>::max()));
|
||||
return u16(imm);
|
||||
}
|
||||
|
||||
u32 Argument::GetImmediateU32() const noexcept {
|
||||
const u64 imm = value.GetImmediateAsU64();
|
||||
ASSERT(imm < 0x100000000);
|
||||
ASSERT(imm <= u64(std::numeric_limits<u32>::max()));
|
||||
return u32(imm);
|
||||
}
|
||||
|
||||
|
@ -366,10 +367,20 @@ void RegAlloc::HostCall(IR::Inst* result_def,
|
|||
if (result_def) {
|
||||
DefineValueImpl(result_def, ABI_RETURN);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < args.size(); i++) {
|
||||
if (args[i]) {
|
||||
UseScratch(*args[i], args_hostloc[i]);
|
||||
} else {
|
||||
ScratchGpr(args_hostloc[i]); // TODO: Force spill
|
||||
}
|
||||
}
|
||||
// Must match with with ScratchImpl
|
||||
for (auto const gpr : other_caller_save) {
|
||||
MoveOutOfTheWay(gpr);
|
||||
LocInfo(gpr).WriteLock();
|
||||
}
|
||||
for (size_t i = 0; i < args.size(); i++) {
|
||||
if (args[i] && !args[i]->get().IsVoid()) {
|
||||
UseScratch(*args[i], args_hostloc[i]);
|
||||
// LLVM puts the burden of zero-extension of 8 and 16 bit values on the caller instead of the callee
|
||||
const Xbyak::Reg64 reg = HostLocToReg64(args_hostloc[i]);
|
||||
switch (args[i]->get().GetType()) {
|
||||
|
@ -389,14 +400,6 @@ void RegAlloc::HostCall(IR::Inst* result_def,
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < args.size(); i++)
|
||||
if (!args[i]) {
|
||||
// TODO: Force spill
|
||||
ScratchGpr(args_hostloc[i]);
|
||||
}
|
||||
for (auto const caller_saved : other_caller_save)
|
||||
ScratchImpl({caller_saved});
|
||||
}
|
||||
|
||||
void RegAlloc::AllocStackSpace(const size_t stack_space) noexcept {
|
||||
|
@ -559,13 +562,12 @@ void RegAlloc::SpillRegister(HostLoc loc) noexcept {
|
|||
}
|
||||
|
||||
HostLoc RegAlloc::FindFreeSpill(bool is_xmm) const noexcept {
|
||||
#if 0
|
||||
// TODO(lizzie): Ok, Windows hates XMM spills, this means less perf for windows
|
||||
// but it's fine anyways. We can find other ways to cheat it later - but which?!?!
|
||||
// we should NOT save xmm each block entering... MAYBE xbyak has a bug on start/end?
|
||||
// TODO(lizzie): This needs to be investigated further later.
|
||||
// Do not spill XMM into other XMM silly
|
||||
if (!is_xmm) {
|
||||
/*if (!is_xmm) {
|
||||
// TODO(lizzie): Using lower (xmm0 and such) registers results in issues/crashes - INVESTIGATE WHY
|
||||
// Intel recommends to spill GPR onto XMM registers IF POSSIBLE
|
||||
// TODO(lizzie): Issues on DBZ, theory: Scratch XMM not properly restored after a function call?
|
||||
|
@ -573,8 +575,9 @@ HostLoc RegAlloc::FindFreeSpill(bool is_xmm) const noexcept {
|
|||
for (size_t i = size_t(HostLoc::XMM15); i >= size_t(HostLoc::XMM3); --i)
|
||||
if (const auto loc = HostLoc(i); LocInfo(loc).IsEmpty())
|
||||
return loc;
|
||||
}
|
||||
#endif
|
||||
}*/
|
||||
// TODO: Doing this would mean saving XMM on each call... need to benchmark the benefits
|
||||
// of spilling on XMM versus the potential cost of using XMM registers.....
|
||||
// Otherwise go to stack spilling
|
||||
for (size_t i = size_t(HostLoc::FirstSpill); i < hostloc_info.size(); ++i)
|
||||
if (const auto loc = HostLoc(i); LocInfo(loc).IsEmpty())
|
||||
|
|
|
@ -12,6 +12,7 @@
|
|||
#include <functional>
|
||||
#include <optional>
|
||||
|
||||
#include "boost/container/small_vector.hpp"
|
||||
#include "dynarmic/common/common_types.h"
|
||||
#include <xbyak/xbyak.h>
|
||||
#include <boost/container/static_vector.hpp>
|
||||
|
@ -77,13 +78,13 @@ public:
|
|||
return std::find(values.begin(), values.end(), inst) != values.end();
|
||||
}
|
||||
inline size_t GetMaxBitWidth() const noexcept {
|
||||
return max_bit_width;
|
||||
return 1 << max_bit_width;
|
||||
}
|
||||
void AddValue(IR::Inst* inst) noexcept;
|
||||
void EmitVerboseDebuggingOutput(BlockOfCode* code, size_t host_loc_index) const noexcept;
|
||||
private:
|
||||
//non trivial
|
||||
std::vector<IR::Inst*> values; //24
|
||||
boost::container::small_vector<IR::Inst*, 3> values; //24
|
||||
// Block state
|
||||
uint16_t total_uses = 0; //8
|
||||
//sometimes zeroed
|
||||
|
@ -93,10 +94,10 @@ private:
|
|||
uint16_t is_being_used_count = 0; //8
|
||||
uint16_t current_references = 0; //8
|
||||
// Value state
|
||||
uint8_t max_bit_width = 0; //Valid values: 1,2,4,8,16,32,128
|
||||
uint8_t max_bit_width : 4 = 0; //Valid values: log2(1,2,4,8,16,32,128) = (0, 1, 2, 3, 4, 5, 6)
|
||||
uint8_t lru_counter : 2 = 0; //1
|
||||
bool is_scratch : 1 = false; //1
|
||||
bool is_set_last_use : 1 = false; //1
|
||||
alignas(16) uint8_t lru_counter = 0; //1
|
||||
friend class RegAlloc;
|
||||
};
|
||||
static_assert(sizeof(HostLocInfo) == 64);
|
||||
|
|
120
src/dynarmic/src/dynarmic/common/context.h
Normal file
120
src/dynarmic/src/dynarmic/common/context.h
Normal file
|
@ -0,0 +1,120 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#ifdef __APPLE__
|
||||
# include <signal.h>
|
||||
# include <sys/ucontext.h>
|
||||
#else
|
||||
# include <signal.h>
|
||||
# ifndef __OpenBSD__
|
||||
# include <ucontext.h>
|
||||
# endif
|
||||
# ifdef __sun__
|
||||
# include <sys/regset.h>
|
||||
# endif
|
||||
# ifdef __linux__
|
||||
# include <sys/syscall.h>
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifdef ARCHITECTURE_x86_64
|
||||
# ifdef __OpenBSD__
|
||||
# define CTX_DECLARE(raw_context) ucontext_t* ucontext = reinterpret_cast<ucontext_t*>(raw_context);
|
||||
# else
|
||||
# define CTX_DECLARE(raw_context) \
|
||||
ucontext_t* ucontext = reinterpret_cast<ucontext_t*>(raw_context); \
|
||||
[[maybe_unused]] auto& mctx = ucontext->uc_mcontext;
|
||||
# endif
|
||||
#elif defined(ARCHITECTURE_arm64)
|
||||
# ifdef __OpenBSD__
|
||||
# define CTX_DECLARE(raw_context) ucontext_t* ucontext = reinterpret_cast<ucontext_t*>(raw_context);
|
||||
# else
|
||||
# define CTX_DECLARE(raw_context) \
|
||||
ucontext_t* ucontext = reinterpret_cast<ucontext_t*>(raw_context); \
|
||||
[[maybe_unused]] auto& mctx = ucontext->uc_mcontext; \
|
||||
[[maybe_unused]] const auto fpctx = GetFloatingPointState(mctx);
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#if defined(ARCHITECTURE_x86_64)
|
||||
# if defined(__APPLE__)
|
||||
# define CTX_RIP (mctx->__ss.__rip)
|
||||
# define CTX_RSP (mctx->__ss.__rsp)
|
||||
# elif defined(__linux__)
|
||||
# define CTX_RIP (mctx.gregs[REG_RIP])
|
||||
# define CTX_RSP (mctx.gregs[REG_RSP])
|
||||
# elif defined(__FreeBSD__)
|
||||
# define CTX_RIP (mctx.mc_rip)
|
||||
# define CTX_RSP (mctx.mc_rsp)
|
||||
# elif defined(__NetBSD__)
|
||||
# define CTX_RIP (mctx.__gregs[_REG_RIP])
|
||||
# define CTX_RSP (mctx.__gregs[_REG_RSP])
|
||||
# elif defined(__OpenBSD__)
|
||||
# define CTX_RIP (ucontext->sc_rip)
|
||||
# define CTX_RSP (ucontext->sc_rsp)
|
||||
# elif defined(__sun__)
|
||||
# define CTX_RIP (mctx.gregs[REG_RIP])
|
||||
# define CTX_RSP (mctx.gregs[REG_RSP])
|
||||
# else
|
||||
# error "Unknown platform"
|
||||
# endif
|
||||
#elif defined(ARCHITECTURE_arm64)
|
||||
# if defined(__APPLE__)
|
||||
# define CTX_PC (mctx->__ss.__pc)
|
||||
# define CTX_SP (mctx->__ss.__sp)
|
||||
# define CTX_LR (mctx->__ss.__lr)
|
||||
# define CTX_PSTATE (mctx->__ss.__cpsr)
|
||||
# define CTX_X(i) (mctx->__ss.__x[i])
|
||||
# define CTX_Q(i) (mctx->__ns.__v[i])
|
||||
# define CTX_FPSR (mctx->__ns.__fpsr)
|
||||
# define CTX_FPCR (mctx->__ns.__fpcr)
|
||||
# elif defined(__linux__)
|
||||
# define CTX_PC (mctx.pc)
|
||||
# define CTX_SP (mctx.sp)
|
||||
# define CTX_LR (mctx.regs[30])
|
||||
# define CTX_PSTATE (mctx.pstate)
|
||||
# define CTX_X(i) (mctx.regs[i])
|
||||
# define CTX_Q(i) (fpctx->vregs[i])
|
||||
# define CTX_FPSR (fpctx->fpsr)
|
||||
# define CTX_FPCR (fpctx->fpcr)
|
||||
# elif defined(__FreeBSD__)
|
||||
# define CTX_PC (mctx.mc_gpregs.gp_elr)
|
||||
# define CTX_SP (mctx.mc_gpregs.gp_sp)
|
||||
# define CTX_LR (mctx.mc_gpregs.gp_lr)
|
||||
# define CTX_X(i) (mctx.mc_gpregs.gp_x[i])
|
||||
# define CTX_Q(i) (mctx.mc_fpregs.fp_q[i])
|
||||
# elif defined(__NetBSD__)
|
||||
# define CTX_PC (mctx.mc_gpregs.gp_elr)
|
||||
# define CTX_SP (mctx.mc_gpregs.gp_sp)
|
||||
# define CTX_LR (mctx.mc_gpregs.gp_lr)
|
||||
# define CTX_X(i) (mctx.mc_gpregs.gp_x[i])
|
||||
# define CTX_Q(i) (mctx.mc_fpregs.fp_q[i])
|
||||
# elif defined(__OpenBSD__)
|
||||
# define CTX_PC (ucontext->sc_elr)
|
||||
# define CTX_SP (ucontext->sc_sp)
|
||||
# define CTX_LR (ucontext->sc_lr)
|
||||
# define CTX_X(i) (ucontext->sc_x[i])
|
||||
# define CTX_Q(i) (ucontext->sc_q[i])
|
||||
# else
|
||||
# error "Unknown platform"
|
||||
# endif
|
||||
#else
|
||||
# error "unimplemented"
|
||||
#endif
|
||||
|
||||
#ifdef ARCHITECTURE_arm64
|
||||
#ifdef __APPLE__
|
||||
inline _STRUCT_ARM_NEON_STATE64* GetFloatingPointState(mcontext_t& host_ctx) {
|
||||
return &(host_ctx->__ns);
|
||||
}
|
||||
#elif defined(__linux__)
|
||||
inline fpsimd_context* GetFloatingPointState(mcontext_t& host_ctx) {
|
||||
_aarch64_ctx* header = reinterpret_cast<_aarch64_ctx*>(&host_ctx.__reserved);
|
||||
while (header->magic != FPSIMD_MAGIC)
|
||||
header = reinterpret_cast<_aarch64_ctx*>(reinterpret_cast<char*>(header) + header->size);
|
||||
return reinterpret_cast<fpsimd_context*>(header);
|
||||
}
|
||||
#endif
|
||||
#endif
|
|
@ -1,13 +0,0 @@
|
|||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2016 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#include "dynarmic/common/memory_pool.h"
|
||||
|
||||
#include <cstdlib>
|
||||
|
||||
namespace Dynarmic::Common {
|
||||
|
||||
|
||||
} // namespace Dynarmic::Common
|
|
@ -1,61 +0,0 @@
|
|||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2016 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
#include <vector>
|
||||
|
||||
namespace Dynarmic::Common {
|
||||
|
||||
/// @tparam object_size Byte-size of objects to construct
|
||||
/// @tparam slab_size Number of objects to have per slab
|
||||
template<size_t object_size, size_t slab_size>
|
||||
class Pool {
|
||||
public:
|
||||
inline Pool() noexcept {
|
||||
AllocateNewSlab();
|
||||
}
|
||||
inline ~Pool() noexcept {
|
||||
std::free(current_slab);
|
||||
for (char* slab : slabs) {
|
||||
std::free(slab);
|
||||
}
|
||||
}
|
||||
|
||||
Pool(const Pool&) = delete;
|
||||
Pool(Pool&&) = delete;
|
||||
|
||||
Pool& operator=(const Pool&) = delete;
|
||||
Pool& operator=(Pool&&) = delete;
|
||||
|
||||
/// @brief Returns a pointer to an `object_size`-bytes block of memory.
|
||||
[[nodiscard]] void* Alloc() noexcept {
|
||||
if (remaining == 0) {
|
||||
slabs.push_back(current_slab);
|
||||
AllocateNewSlab();
|
||||
}
|
||||
void* ret = static_cast<void*>(current_ptr);
|
||||
current_ptr += object_size;
|
||||
remaining--;
|
||||
return ret;
|
||||
}
|
||||
private:
|
||||
/// @brief Allocates a completely new memory slab.
|
||||
/// Used when an entirely new slab is needed
|
||||
/// due the current one running out of usable space.
|
||||
void AllocateNewSlab() noexcept {
|
||||
current_slab = static_cast<char*>(std::malloc(object_size * slab_size));
|
||||
current_ptr = current_slab;
|
||||
remaining = slab_size;
|
||||
}
|
||||
|
||||
std::vector<char*> slabs;
|
||||
char* current_slab = nullptr;
|
||||
char* current_ptr = nullptr;
|
||||
size_t remaining = 0;
|
||||
};
|
||||
|
||||
} // namespace Dynarmic::Common
|
|
@ -1,29 +0,0 @@
|
|||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2016 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <boost/variant.hpp>
|
||||
|
||||
namespace Dynarmic::Common {
|
||||
namespace detail {
|
||||
|
||||
template<typename ReturnT, typename Lambda>
|
||||
struct VariantVisitor : boost::static_visitor<ReturnT>
|
||||
, Lambda {
|
||||
VariantVisitor(Lambda&& lambda)
|
||||
: Lambda(std::move(lambda)) {}
|
||||
|
||||
using Lambda::operator();
|
||||
};
|
||||
|
||||
} // namespace detail
|
||||
|
||||
template<typename ReturnT, typename Variant, typename Lambda>
|
||||
inline ReturnT VisitVariant(Variant&& variant, Lambda&& lambda) {
|
||||
return boost::apply_visitor(detail::VariantVisitor<ReturnT, Lambda>(std::move(lambda)), variant);
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::Common
|
|
@ -9,12 +9,9 @@
|
|||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <utility>
|
||||
|
||||
#include <fmt/format.h>
|
||||
#include "dynarmic/common/assert.h"
|
||||
#include "dynarmic/common/common_types.h"
|
||||
|
||||
#include "dynarmic/interface/A32/coprocessor_util.h"
|
||||
#include "dynarmic/ir/cond.h"
|
||||
|
||||
|
@ -89,24 +86,17 @@ constexpr bool IsQuadExtReg(ExtReg reg) {
|
|||
|
||||
inline size_t RegNumber(Reg reg) {
|
||||
ASSERT(reg != Reg::INVALID_REG);
|
||||
return static_cast<size_t>(reg);
|
||||
return size_t(reg);
|
||||
}
|
||||
|
||||
inline size_t RegNumber(ExtReg reg) {
|
||||
if (IsSingleExtReg(reg)) {
|
||||
return static_cast<size_t>(reg) - static_cast<size_t>(ExtReg::S0);
|
||||
return size_t(reg) - size_t(ExtReg::S0);
|
||||
} else if (IsDoubleExtReg(reg)) {
|
||||
return size_t(reg) - size_t(ExtReg::D0);
|
||||
}
|
||||
|
||||
if (IsDoubleExtReg(reg)) {
|
||||
return static_cast<size_t>(reg) - static_cast<size_t>(ExtReg::D0);
|
||||
}
|
||||
|
||||
if (IsQuadExtReg(reg)) {
|
||||
return static_cast<size_t>(reg) - static_cast<size_t>(ExtReg::Q0);
|
||||
}
|
||||
|
||||
ASSERT_MSG(false, "Invalid extended register");
|
||||
return 0;
|
||||
ASSERT(IsQuadExtReg(reg));
|
||||
return size_t(reg) - size_t(ExtReg::Q0);
|
||||
}
|
||||
|
||||
inline Reg operator+(Reg reg, size_t number) {
|
||||
|
|
|
@ -30,13 +30,13 @@ template<typename Visitor>
|
|||
using ArmDecodeTable = std::array<std::vector<ArmMatcher<Visitor>>, 0x1000>;
|
||||
|
||||
namespace detail {
|
||||
inline size_t ToFastLookupIndexArm(u32 instruction) {
|
||||
inline size_t ToFastLookupIndexArm(u32 instruction) noexcept {
|
||||
return ((instruction >> 4) & 0x00F) | ((instruction >> 16) & 0xFF0);
|
||||
}
|
||||
} // namespace detail
|
||||
|
||||
template<typename V>
|
||||
constexpr ArmDecodeTable<V> GetArmDecodeTable() {
|
||||
constexpr ArmDecodeTable<V> GetArmDecodeTable() noexcept {
|
||||
std::vector<ArmMatcher<V>> list = {
|
||||
#define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(ArmMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)),
|
||||
#include "./arm.inc"
|
||||
|
@ -62,15 +62,27 @@ constexpr ArmDecodeTable<V> GetArmDecodeTable() {
|
|||
}
|
||||
|
||||
template<typename V>
|
||||
std::optional<std::reference_wrapper<const ArmMatcher<V>>> DecodeArm(u32 instruction) {
|
||||
std::optional<std::reference_wrapper<const ArmMatcher<V>>> DecodeArm(u32 instruction) noexcept {
|
||||
alignas(64) static const auto table = GetArmDecodeTable<V>();
|
||||
const auto matches_instruction = [instruction](const auto& matcher) {
|
||||
return matcher.Matches(instruction);
|
||||
};
|
||||
|
||||
const auto& subtable = table[detail::ToFastLookupIndexArm(instruction)];
|
||||
auto iter = std::find_if(subtable.begin(), subtable.end(), matches_instruction);
|
||||
return iter != subtable.end() ? std::optional<std::reference_wrapper<const ArmMatcher<V>>>(*iter) : std::nullopt;
|
||||
}
|
||||
|
||||
template<typename V>
|
||||
std::optional<std::string_view> GetNameARM(u32 inst) noexcept {
|
||||
std::vector<std::pair<std::string_view, ArmMatcher<V>>> list = {
|
||||
#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(ArmMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) },
|
||||
#include "./arm.inc"
|
||||
#undef INST
|
||||
};
|
||||
auto const iter = std::find_if(list.cbegin(), list.cend(), [inst](auto const& m) {
|
||||
return m.second.Matches(inst);
|
||||
});
|
||||
return iter != list.cend() ? std::optional{iter->first} : std::nullopt;
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::A32
|
||||
|
|
|
@ -26,15 +26,12 @@ template<typename Visitor>
|
|||
using ASIMDMatcher = Decoder::Matcher<Visitor, u32>;
|
||||
|
||||
template<typename V>
|
||||
std::vector<ASIMDMatcher<V>> GetASIMDDecodeTable() {
|
||||
std::vector<ASIMDMatcher<V>> table = {
|
||||
|
||||
#define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(ASIMDMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)),
|
||||
std::vector<ASIMDMatcher<V>> GetASIMDDecodeTable() noexcept {
|
||||
std::vector<std::pair<const char*, ASIMDMatcher<V>>> table = {
|
||||
#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(ASIMDMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) },
|
||||
#include "./asimd.inc"
|
||||
#undef INST
|
||||
|
||||
};
|
||||
|
||||
// Exceptions to the rule of thumb.
|
||||
const std::set<std::string> comes_first{
|
||||
"VBIC, VMOV, VMVN, VORR (immediate)",
|
||||
|
@ -53,29 +50,43 @@ std::vector<ASIMDMatcher<V>> GetASIMDDecodeTable() {
|
|||
"VQDMULH (scalar)",
|
||||
"VQRDMULH (scalar)",
|
||||
};
|
||||
const auto sort_begin = std::stable_partition(table.begin(), table.end(), [&](const auto& matcher) {
|
||||
return comes_first.count(matcher.GetName()) > 0;
|
||||
const auto sort_begin = std::stable_partition(table.begin(), table.end(), [&](const auto& e) {
|
||||
return comes_first.count(e.first) > 0;
|
||||
});
|
||||
const auto sort_end = std::stable_partition(table.begin(), table.end(), [&](const auto& matcher) {
|
||||
return comes_last.count(matcher.GetName()) == 0;
|
||||
const auto sort_end = std::stable_partition(table.begin(), table.end(), [&](const auto& e) {
|
||||
return comes_last.count(e.first) == 0;
|
||||
});
|
||||
|
||||
// If a matcher has more bits in its mask it is more specific, so it should come first.
|
||||
std::stable_sort(sort_begin, sort_end, [](const auto& matcher1, const auto& matcher2) {
|
||||
return mcl::bit::count_ones(matcher1.GetMask()) > mcl::bit::count_ones(matcher2.GetMask());
|
||||
std::stable_sort(sort_begin, sort_end, [](const auto& a, const auto& b) {
|
||||
return mcl::bit::count_ones(a.second.GetMask()) > mcl::bit::count_ones(b.second.GetMask());
|
||||
});
|
||||
|
||||
return table;
|
||||
std::vector<ASIMDMatcher<V>> final_table;
|
||||
std::transform(table.cbegin(), table.cend(), std::back_inserter(final_table), [](auto const& e) {
|
||||
return e.second;
|
||||
});
|
||||
return final_table;
|
||||
}
|
||||
|
||||
template<typename V>
|
||||
std::optional<std::reference_wrapper<const ASIMDMatcher<V>>> DecodeASIMD(u32 instruction) {
|
||||
static const auto table = GetASIMDDecodeTable<V>();
|
||||
|
||||
const auto matches_instruction = [instruction](const auto& matcher) { return matcher.Matches(instruction); };
|
||||
|
||||
auto iter = std::find_if(table.begin(), table.end(), matches_instruction);
|
||||
std::optional<std::reference_wrapper<const ASIMDMatcher<V>>> DecodeASIMD(u32 instruction) noexcept {
|
||||
alignas(64) static const auto table = GetASIMDDecodeTable<V>();
|
||||
auto iter = std::find_if(table.begin(), table.end(), [instruction](const auto& matcher) {
|
||||
return matcher.Matches(instruction);
|
||||
});
|
||||
return iter != table.end() ? std::optional<std::reference_wrapper<const ASIMDMatcher<V>>>(*iter) : std::nullopt;
|
||||
}
|
||||
|
||||
template<typename V>
|
||||
std::optional<std::string_view> GetNameASIMD(u32 inst) noexcept {
|
||||
std::vector<std::pair<std::string_view, ASIMDMatcher<V>>> list = {
|
||||
#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(ASIMDMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) },
|
||||
#include "./asimd.inc"
|
||||
#undef INST
|
||||
};
|
||||
auto const iter = std::find_if(list.cbegin(), list.cend(), [inst](auto const& m) {
|
||||
return m.second.Matches(inst);
|
||||
});
|
||||
return iter != list.cend() ? std::optional{iter->first} : std::nullopt;
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::A32
|
||||
|
|
|
@ -25,18 +25,28 @@ using Thumb16Matcher = Decoder::Matcher<Visitor, u16>;
|
|||
|
||||
template<typename V>
|
||||
std::optional<std::reference_wrapper<const Thumb16Matcher<V>>> DecodeThumb16(u16 instruction) {
|
||||
static const std::vector<Thumb16Matcher<V>> table = {
|
||||
|
||||
alignas(64) static const std::vector<Thumb16Matcher<V>> table = {
|
||||
#define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(Thumb16Matcher, fn, name, Decoder::detail::StringToArray<16>(bitstring)),
|
||||
#include "./thumb16.inc"
|
||||
#undef INST
|
||||
|
||||
};
|
||||
|
||||
const auto matches_instruction = [instruction](const auto& matcher) { return matcher.Matches(instruction); };
|
||||
|
||||
auto iter = std::find_if(table.begin(), table.end(), matches_instruction);
|
||||
auto iter = std::find_if(table.begin(), table.end(), [instruction](const auto& matcher) {
|
||||
return matcher.Matches(instruction);
|
||||
});
|
||||
return iter != table.end() ? std::optional<std::reference_wrapper<const Thumb16Matcher<V>>>(*iter) : std::nullopt;
|
||||
}
|
||||
|
||||
template<typename V>
|
||||
std::optional<std::string_view> GetNameThumb16(u32 inst) noexcept {
|
||||
std::vector<std::pair<std::string_view, Thumb16Matcher<V>>> list = {
|
||||
#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(Thumb16Matcher, fn, name, Decoder::detail::StringToArray<16>(bitstring)) },
|
||||
#include "./thumb16.inc"
|
||||
#undef INST
|
||||
};
|
||||
auto const iter = std::find_if(list.cbegin(), list.cend(), [inst](auto const& m) {
|
||||
return m.second.Matches(inst);
|
||||
});
|
||||
return iter != list.cend() ? std::optional{iter->first} : std::nullopt;
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::A32
|
||||
|
|
|
@ -24,18 +24,28 @@ using Thumb32Matcher = Decoder::Matcher<Visitor, u32>;
|
|||
|
||||
template<typename V>
|
||||
std::optional<std::reference_wrapper<const Thumb32Matcher<V>>> DecodeThumb32(u32 instruction) {
|
||||
static const std::vector<Thumb32Matcher<V>> table = {
|
||||
|
||||
alignas(64) static const std::vector<Thumb32Matcher<V>> table = {
|
||||
#define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(Thumb32Matcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)),
|
||||
#include "./thumb32.inc"
|
||||
#undef INST
|
||||
|
||||
};
|
||||
|
||||
const auto matches_instruction = [instruction](const auto& matcher) { return matcher.Matches(instruction); };
|
||||
|
||||
auto iter = std::find_if(table.begin(), table.end(), matches_instruction);
|
||||
auto iter = std::find_if(table.begin(), table.end(), [instruction](const auto& matcher) {
|
||||
return matcher.Matches(instruction);
|
||||
});
|
||||
return iter != table.end() ? std::optional<std::reference_wrapper<const Thumb32Matcher<V>>>(*iter) : std::nullopt;
|
||||
}
|
||||
|
||||
template<typename V>
|
||||
std::optional<std::string_view> GetNameThumb32(u32 inst) noexcept {
|
||||
std::vector<std::pair<std::string_view, Thumb32Matcher<V>>> list = {
|
||||
#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(Thumb32Matcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) },
|
||||
#include "./thumb32.inc"
|
||||
#undef INST
|
||||
};
|
||||
auto const iter = std::find_if(list.cbegin(), list.cend(), [inst](auto const& m) {
|
||||
return m.second.Matches(inst);
|
||||
});
|
||||
return iter != list.cend() ? std::optional{iter->first} : std::nullopt;
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::A32
|
||||
|
|
|
@ -26,36 +26,42 @@ using VFPMatcher = Decoder::Matcher<Visitor, u32>;
|
|||
template<typename V>
|
||||
std::optional<std::reference_wrapper<const VFPMatcher<V>>> DecodeVFP(u32 instruction) {
|
||||
using Table = std::vector<VFPMatcher<V>>;
|
||||
|
||||
static const struct Tables {
|
||||
alignas(64) static const struct Tables {
|
||||
Table unconditional;
|
||||
Table conditional;
|
||||
} tables = [] {
|
||||
} tables = []() {
|
||||
Table list = {
|
||||
|
||||
#define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(VFPMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)),
|
||||
#include "./vfp.inc"
|
||||
#undef INST
|
||||
|
||||
};
|
||||
|
||||
const auto division = std::stable_partition(list.begin(), list.end(), [&](const auto& matcher) {
|
||||
auto const it = std::stable_partition(list.begin(), list.end(), [&](const auto& matcher) {
|
||||
return (matcher.GetMask() & 0xF0000000) == 0xF0000000;
|
||||
});
|
||||
|
||||
return Tables{
|
||||
Table{list.begin(), division},
|
||||
Table{division, list.end()},
|
||||
Table{list.begin(), it},
|
||||
Table{it, list.end()},
|
||||
};
|
||||
}();
|
||||
|
||||
const bool is_unconditional = (instruction & 0xF0000000) == 0xF0000000;
|
||||
const Table& table = is_unconditional ? tables.unconditional : tables.conditional;
|
||||
|
||||
const auto matches_instruction = [instruction](const auto& matcher) { return matcher.Matches(instruction); };
|
||||
|
||||
auto iter = std::find_if(table.begin(), table.end(), matches_instruction);
|
||||
auto iter = std::find_if(table.begin(), table.end(), [instruction](const auto& matcher) {
|
||||
return matcher.Matches(instruction);
|
||||
});
|
||||
return iter != table.end() ? std::optional<std::reference_wrapper<const VFPMatcher<V>>>(*iter) : std::nullopt;
|
||||
}
|
||||
|
||||
template<typename V>
|
||||
std::optional<std::string_view> GetNameVFP(u32 inst) noexcept {
|
||||
std::vector<std::pair<std::string_view, VFPMatcher<V>>> list = {
|
||||
#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(VFPMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) },
|
||||
#include "./vfp.inc"
|
||||
#undef INST
|
||||
};
|
||||
auto const iter = std::find_if(list.cbegin(), list.cend(), [inst](auto const& m) {
|
||||
return m.second.Matches(inst);
|
||||
});
|
||||
return iter != list.cend() ? std::optional{iter->first} : std::nullopt;
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::A32
|
||||
|
|
|
@ -97,7 +97,7 @@ u32 ConvertASIMDInstruction(u32 thumb_instruction) {
|
|||
return 0xF7F0A000; // UDF
|
||||
}
|
||||
|
||||
bool MaybeVFPOrASIMDInstruction(u32 thumb_instruction) {
|
||||
inline bool MaybeVFPOrASIMDInstruction(u32 thumb_instruction) noexcept {
|
||||
return (thumb_instruction & 0xEC000000) == 0xEC000000 || (thumb_instruction & 0xFF100000) == 0xF9000000;
|
||||
}
|
||||
|
||||
|
|
|
@ -37,34 +37,31 @@ inline size_t ToFastLookupIndex(u32 instruction) {
|
|||
|
||||
template<typename V>
|
||||
constexpr DecodeTable<V> GetDecodeTable() {
|
||||
std::vector<Matcher<V>> list = {
|
||||
#define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(Matcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)),
|
||||
std::vector<std::pair<const char*, Matcher<V>>> list = {
|
||||
#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(Matcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) },
|
||||
#include "./a64.inc"
|
||||
#undef INST
|
||||
};
|
||||
|
||||
// If a matcher has more bits in its mask it is more specific, so it should come first.
|
||||
std::stable_sort(list.begin(), list.end(), [](const auto& matcher1, const auto& matcher2) {
|
||||
std::stable_sort(list.begin(), list.end(), [](const auto& a, const auto& b) {
|
||||
// If a matcher has more bits in its mask it is more specific, so it should come first.
|
||||
return mcl::bit::count_ones(matcher1.GetMask()) > mcl::bit::count_ones(matcher2.GetMask());
|
||||
return mcl::bit::count_ones(a.second.GetMask()) > mcl::bit::count_ones(b.second.GetMask());
|
||||
});
|
||||
|
||||
// Exceptions to the above rule of thumb.
|
||||
std::stable_partition(list.begin(), list.end(), [&](const auto& matcher) {
|
||||
std::stable_partition(list.begin(), list.end(), [&](const auto& e) {
|
||||
return std::set<std::string>{
|
||||
"MOVI, MVNI, ORR, BIC (vector, immediate)",
|
||||
"FMOV (vector, immediate)",
|
||||
"Unallocated SIMD modified immediate",
|
||||
}.count(matcher.GetName()) > 0;
|
||||
}.count(e.first) > 0;
|
||||
});
|
||||
|
||||
DecodeTable<V> table{};
|
||||
for (size_t i = 0; i < table.size(); ++i) {
|
||||
for (auto matcher : list) {
|
||||
const auto expect = detail::ToFastLookupIndex(matcher.GetExpected());
|
||||
const auto mask = detail::ToFastLookupIndex(matcher.GetMask());
|
||||
for (auto const& e : list) {
|
||||
const auto expect = detail::ToFastLookupIndex(e.second.GetExpected());
|
||||
const auto mask = detail::ToFastLookupIndex(e.second.GetMask());
|
||||
if ((i & mask) == expect) {
|
||||
table[i].push_back(matcher);
|
||||
table[i].push_back(e.second);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -74,12 +71,24 @@ constexpr DecodeTable<V> GetDecodeTable() {
|
|||
template<typename V>
|
||||
std::optional<std::reference_wrapper<const Matcher<V>>> Decode(u32 instruction) {
|
||||
alignas(64) static const auto table = GetDecodeTable<V>();
|
||||
const auto matches_instruction = [instruction](const auto& matcher) {
|
||||
return matcher.Matches(instruction);
|
||||
};
|
||||
const auto& subtable = table[detail::ToFastLookupIndex(instruction)];
|
||||
auto iter = std::find_if(subtable.begin(), subtable.end(), matches_instruction);
|
||||
auto iter = std::find_if(subtable.begin(), subtable.end(), [instruction](const auto& matcher) {
|
||||
return matcher.Matches(instruction);
|
||||
});
|
||||
return iter != subtable.end() ? std::optional<std::reference_wrapper<const Matcher<V>>>(*iter) : std::nullopt;
|
||||
}
|
||||
|
||||
template<typename V>
|
||||
std::optional<std::string_view> GetName(u32 inst) noexcept {
|
||||
std::vector<std::pair<std::string_view, Matcher<V>>> list = {
|
||||
#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(Matcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) },
|
||||
#include "./a64.inc"
|
||||
#undef INST
|
||||
};
|
||||
auto const iter = std::find_if(list.cbegin(), list.cend(), [inst](auto const& m) {
|
||||
return m.second.Matches(inst);
|
||||
});
|
||||
return iter != list.cend() ? std::optional{iter->first} : std::nullopt;
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::A64
|
||||
|
|
|
@ -1,3 +1,6 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2018 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
|
@ -20,9 +23,12 @@ bool TranslatorVisitor::B_cond(Imm<19> imm19, Cond cond) {
|
|||
bool TranslatorVisitor::B_uncond(Imm<26> imm26) {
|
||||
const s64 offset = concatenate(imm26, Imm<2>{0}).SignExtend<s64>();
|
||||
const u64 target = ir.PC() + offset;
|
||||
|
||||
//ir.SetTerm(IR::Term::LinkBlockFast{ir.current_location->SetPC(target)});
|
||||
ir.SetTerm(IR::Term::LinkBlock{ir.current_location->SetPC(target)});
|
||||
// Pattern to halt execution (B .)
|
||||
if (target == ir.PC()) {
|
||||
ir.SetTerm(IR::Term::LinkBlock{ir.current_location->SetPC(target)});
|
||||
return false;
|
||||
}
|
||||
ir.SetTerm(IR::Term::LinkBlockFast{ir.current_location->SetPC(target)});
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
|
@ -70,11 +70,9 @@ struct detail {
|
|||
return std::make_tuple(mask, expect);
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates the masks and shifts for each argument.
|
||||
* A '-' in a bitstring indicates that we don't care about that value.
|
||||
* An argument is specified by a continuous string of the same character.
|
||||
*/
|
||||
/// @brief Generates the masks and shifts for each argument.
|
||||
/// A '-' in a bitstring indicates that we don't care about that value.
|
||||
/// An argument is specified by a continuous string of the same character.
|
||||
template<size_t N>
|
||||
static consteval auto GetArgInfo(std::array<char, opcode_bitsize> bitstring) {
|
||||
std::array<opcode_type, N> masks = {};
|
||||
|
@ -98,7 +96,6 @@ struct detail {
|
|||
|
||||
if constexpr (N > 0) {
|
||||
const size_t bit_position = opcode_bitsize - i - 1;
|
||||
|
||||
if (arg_index >= N)
|
||||
throw std::out_of_range("Unexpected field");
|
||||
|
||||
|
@ -109,20 +106,16 @@ struct detail {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
#if !defined(DYNARMIC_IGNORE_ASSERTS) && !defined(__ANDROID__)
|
||||
// Avoids a MSVC ICE, and avoids Android NDK issue.
|
||||
ASSERT(std::all_of(masks.begin(), masks.end(), [](auto m) { return m != 0; }));
|
||||
#endif
|
||||
|
||||
return std::make_tuple(masks, shifts);
|
||||
}
|
||||
|
||||
/**
|
||||
* This struct's Make member function generates a lambda which decodes an instruction based on
|
||||
* the provided arg_masks and arg_shifts. The Visitor member function to call is provided as a
|
||||
* template argument.
|
||||
*/
|
||||
/// @brief This struct's Make member function generates a lambda which decodes an instruction
|
||||
/// based on the provided arg_masks and arg_shifts. The Visitor member function to call is
|
||||
/// provided as a template argument.
|
||||
template<typename FnT>
|
||||
struct VisitorCaller;
|
||||
|
||||
|
@ -130,36 +123,36 @@ struct detail {
|
|||
# pragma warning(push)
|
||||
# pragma warning(disable : 4800) // forcing value to bool 'true' or 'false' (performance warning)
|
||||
#endif
|
||||
template<typename Visitor, typename... Args, typename CallRetT>
|
||||
struct VisitorCaller<CallRetT (Visitor::*)(Args...)> {
|
||||
template<typename V, typename... Args, typename ReturnType>
|
||||
struct VisitorCaller<ReturnType (V::*)(Args...)> {
|
||||
template<size_t... iota>
|
||||
static auto Make(std::integer_sequence<size_t, iota...>,
|
||||
CallRetT (Visitor::*const fn)(Args...),
|
||||
static constexpr auto Make(std::integer_sequence<size_t, iota...>,
|
||||
ReturnType (V::*const fn)(Args...),
|
||||
const std::array<opcode_type, sizeof...(iota)> arg_masks,
|
||||
const std::array<size_t, sizeof...(iota)> arg_shifts) {
|
||||
static_assert(std::is_same_v<visitor_type, Visitor>, "Member function is not from Matcher's Visitor");
|
||||
return [fn, arg_masks, arg_shifts](Visitor& v, opcode_type instruction) {
|
||||
static_assert(std::is_same_v<visitor_type, V>, "Member function is not from Matcher's Visitor");
|
||||
return [fn, arg_masks, arg_shifts](V& v, opcode_type instruction) {
|
||||
(void)instruction;
|
||||
(void)arg_masks;
|
||||
(void)arg_shifts;
|
||||
return (v.*fn)(static_cast<Args>((instruction & arg_masks[iota]) >> arg_shifts[iota])...);
|
||||
return (v.*fn)(Args((instruction & arg_masks[iota]) >> arg_shifts[iota])...);
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Visitor, typename... Args, typename CallRetT>
|
||||
struct VisitorCaller<CallRetT (Visitor::*)(Args...) const> {
|
||||
template<typename V, typename... Args, typename ReturnType>
|
||||
struct VisitorCaller<ReturnType (V::*)(Args...) const> {
|
||||
template<size_t... iota>
|
||||
static auto Make(std::integer_sequence<size_t, iota...>,
|
||||
CallRetT (Visitor::*const fn)(Args...) const,
|
||||
static constexpr auto Make(std::integer_sequence<size_t, iota...>,
|
||||
ReturnType (V::*const fn)(Args...) const,
|
||||
const std::array<opcode_type, sizeof...(iota)> arg_masks,
|
||||
const std::array<size_t, sizeof...(iota)> arg_shifts) {
|
||||
static_assert(std::is_same_v<visitor_type, const Visitor>, "Member function is not from Matcher's Visitor");
|
||||
return [fn, arg_masks, arg_shifts](const Visitor& v, opcode_type instruction) {
|
||||
static_assert(std::is_same_v<visitor_type, const V>, "Member function is not from Matcher's Visitor");
|
||||
return [fn, arg_masks, arg_shifts](const V& v, opcode_type instruction) {
|
||||
(void)instruction;
|
||||
(void)arg_masks;
|
||||
(void)arg_shifts;
|
||||
return (v.*fn)(static_cast<Args>((instruction & arg_masks[iota]) >> arg_shifts[iota])...);
|
||||
return (v.*fn)(Args((instruction & arg_masks[iota]) >> arg_shifts[iota])...);
|
||||
};
|
||||
}
|
||||
};
|
||||
|
@ -167,27 +160,21 @@ struct detail {
|
|||
# pragma warning(pop)
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Creates a matcher that can match and parse instructions based on bitstring.
|
||||
* See also: GetMaskAndExpect and GetArgInfo for format of bitstring.
|
||||
*/
|
||||
template<auto bitstring, typename FnT>
|
||||
static auto GetMatcher(FnT fn, const char* const name) {
|
||||
constexpr size_t args_count = mcl::parameter_count_v<FnT>;
|
||||
|
||||
/// @brief Creates a matcher that can match and parse instructions based on bitstring.
|
||||
/// See also: GetMaskAndExpect and GetArgInfo for format of bitstring.
|
||||
template<auto bitstring, typename F>
|
||||
static constexpr auto GetMatcher(F fn) {
|
||||
constexpr size_t args_count = mcl::parameter_count_v<F>;
|
||||
constexpr auto mask = std::get<0>(GetMaskAndExpect(bitstring));
|
||||
constexpr auto expect = std::get<1>(GetMaskAndExpect(bitstring));
|
||||
constexpr auto arg_masks = std::get<0>(GetArgInfo<args_count>(bitstring));
|
||||
constexpr auto arg_shifts = std::get<1>(GetArgInfo<args_count>(bitstring));
|
||||
|
||||
using Iota = std::make_index_sequence<args_count>;
|
||||
|
||||
const auto proxy_fn = VisitorCaller<FnT>::Make(Iota(), fn, arg_masks, arg_shifts);
|
||||
return MatcherT(name, mask, expect, proxy_fn);
|
||||
const auto proxy_fn = VisitorCaller<F>::Make(std::make_index_sequence<args_count>(), fn, arg_masks, arg_shifts);
|
||||
return MatcherT(mask, expect, proxy_fn);
|
||||
}
|
||||
};
|
||||
|
||||
#define DYNARMIC_DECODER_GET_MATCHER(MatcherT, fn, name, bitstring) Decoder::detail::detail<MatcherT<V>>::template GetMatcher<bitstring>(&V::fn, name)
|
||||
#define DYNARMIC_DECODER_GET_MATCHER(MatcherT, fn, name, bitstring) Decoder::detail::detail<MatcherT<V>>::template GetMatcher<bitstring>(&V::fn)
|
||||
|
||||
} // namespace detail
|
||||
} // namespace Dynarmic::Decoder
|
||||
|
|
|
@ -14,16 +14,12 @@
|
|||
|
||||
namespace Dynarmic::Decoder {
|
||||
|
||||
/**
|
||||
* Generic instruction handling construct.
|
||||
*
|
||||
* @tparam Visitor An arbitrary visitor type that will be passed through
|
||||
* to the function being handled. This type must be the
|
||||
* type of the first parameter in a handler function.
|
||||
*
|
||||
* @tparam OpcodeType Type representing an opcode. This must be the
|
||||
* type of the second parameter in a handler function.
|
||||
*/
|
||||
/// Generic instruction handling construct.
|
||||
/// @tparam Visitor An arbitrary visitor type that will be passed through
|
||||
/// to the function being handled. This type must be the
|
||||
/// type of the first parameter in a handler function.
|
||||
/// @tparam OpcodeType Type representing an opcode. This must be the
|
||||
/// type of the second parameter in a handler function.
|
||||
template<typename Visitor, typename OpcodeType>
|
||||
class Matcher {
|
||||
public:
|
||||
|
@ -31,46 +27,35 @@ public:
|
|||
using visitor_type = Visitor;
|
||||
using handler_return_type = typename Visitor::instruction_return_type;
|
||||
using handler_function = std::function<handler_return_type(Visitor&, opcode_type)>;
|
||||
|
||||
Matcher(const char* const name, opcode_type mask, opcode_type expected, handler_function func)
|
||||
: name{name}, mask{mask}, expected{expected}, fn{std::move(func)} {}
|
||||
|
||||
/// Gets the name of this type of instruction.
|
||||
const char* GetName() const {
|
||||
return name;
|
||||
}
|
||||
Matcher(opcode_type mask, opcode_type expected, handler_function func)
|
||||
: mask{mask}, expected{expected}, fn{std::move(func)} {}
|
||||
|
||||
/// Gets the mask for this instruction.
|
||||
opcode_type GetMask() const {
|
||||
inline opcode_type GetMask() const noexcept {
|
||||
return mask;
|
||||
}
|
||||
|
||||
/// Gets the expected value after masking for this instruction.
|
||||
opcode_type GetExpected() const {
|
||||
inline opcode_type GetExpected() const noexcept {
|
||||
return expected;
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests to see if the given instruction is the instruction this matcher represents.
|
||||
* @param instruction The instruction to test
|
||||
* @returns true if the given instruction matches.
|
||||
*/
|
||||
bool Matches(opcode_type instruction) const {
|
||||
/// Tests to see if the given instruction is the instruction this matcher represents.
|
||||
/// @param instruction The instruction to test
|
||||
/// @returns true if the given instruction matches.
|
||||
inline bool Matches(opcode_type instruction) const noexcept {
|
||||
return (instruction & mask) == expected;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calls the corresponding instruction handler on visitor for this type of instruction.
|
||||
* @param v The visitor to use
|
||||
* @param instruction The instruction to decode.
|
||||
*/
|
||||
handler_return_type call(Visitor& v, opcode_type instruction) const {
|
||||
/// Calls the corresponding instruction handler on visitor for this type of instruction.
|
||||
/// @param v The visitor to use
|
||||
/// @param instruction The instruction to decode.
|
||||
inline handler_return_type call(Visitor& v, opcode_type instruction) const noexcept {
|
||||
ASSERT(Matches(instruction));
|
||||
return fn(v, instruction);
|
||||
}
|
||||
|
||||
private:
|
||||
const char* name;
|
||||
opcode_type mask;
|
||||
opcode_type expected;
|
||||
handler_function fn;
|
||||
|
|
|
@ -1,3 +1,6 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2020 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
|
@ -34,6 +37,8 @@ enum class OptimizationFlag : std::uint32_t {
|
|||
MiscIROpt = 0x00000020,
|
||||
/// Optimize for code speed rather than for code size (this serves well for tight loops)
|
||||
CodeSpeed = 0x00000040,
|
||||
/// Disable verification passes
|
||||
DisableVerification = 0x00000080,
|
||||
|
||||
/// This is an UNSAFE optimization that reduces accuracy of fused multiply-add operations.
|
||||
/// This unfuses fused instructions to improve performance on host CPUs without FMA support.
|
||||
|
|
|
@ -15,8 +15,6 @@
|
|||
|
||||
#include <fmt/format.h>
|
||||
#include "dynarmic/common/assert.h"
|
||||
|
||||
#include "dynarmic/common/memory_pool.h"
|
||||
#include "dynarmic/frontend/A32/a32_types.h"
|
||||
#include "dynarmic/frontend/A64/a64_types.h"
|
||||
#include "dynarmic/ir/cond.h"
|
||||
|
@ -27,8 +25,7 @@ namespace Dynarmic::IR {
|
|||
Block::Block(const LocationDescriptor& location)
|
||||
: location{location},
|
||||
end_location{location},
|
||||
cond{Cond::AL},
|
||||
instruction_alloc_pool{std::make_unique<std::remove_reference_t<decltype(*instruction_alloc_pool)>>()}
|
||||
cond{Cond::AL}
|
||||
{
|
||||
|
||||
}
|
||||
|
@ -40,7 +37,21 @@ Block::Block(const LocationDescriptor& location)
|
|||
/// @param args A sequence of Value instances used as arguments for the instruction.
|
||||
/// @returns Iterator to the newly created instruction.
|
||||
Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode opcode, std::initializer_list<Value> args) noexcept {
|
||||
IR::Inst* inst = new (instruction_alloc_pool->Alloc()) IR::Inst(opcode);
|
||||
// First try using the "inline" buffer, otherwise fallback to a slower slab-like allocation scheme
|
||||
// purpouse is to avoid many calls to new/delete which invoke malloc which invokes mmap
|
||||
// just pool it!!! - reason why there is an inline buffer is because many small blocks are created
|
||||
// with few instructions due to subpar optimisations on other passes... plus branch-heavy code will
|
||||
// hugely benefit from the coherency of faster allocations...
|
||||
IR::Inst* inst;
|
||||
if (inlined_inst.size() < inlined_inst.max_size()) {
|
||||
inst = &inlined_inst[inlined_inst.size()];
|
||||
inlined_inst.emplace_back(opcode);
|
||||
} else {
|
||||
if (pooled_inst.empty() || pooled_inst.back().size() == pooled_inst.back().max_size())
|
||||
pooled_inst.emplace_back();
|
||||
inst = &pooled_inst.back()[pooled_inst.back().size()];
|
||||
pooled_inst.back().emplace_back(opcode);
|
||||
}
|
||||
DEBUG_ASSERT(args.size() == inst->NumArgs());
|
||||
std::for_each(args.begin(), args.end(), [&inst, index = size_t(0)](const auto& arg) mutable {
|
||||
inst->SetArg(index, arg);
|
||||
|
|
|
@ -13,6 +13,9 @@
|
|||
#include <optional>
|
||||
#include <string>
|
||||
|
||||
#include <boost/container/container_fwd.hpp>
|
||||
#include <boost/container/static_vector.hpp>
|
||||
#include <boost/container/stable_vector.hpp>
|
||||
#include <mcl/container/intrusive_list.hpp>
|
||||
#include "dynarmic/common/common_types.h"
|
||||
|
||||
|
@ -21,7 +24,6 @@
|
|||
#include "dynarmic/ir/terminal.h"
|
||||
#include "dynarmic/ir/value.h"
|
||||
#include "dynarmic/ir/dense_list.h"
|
||||
#include "dynarmic/common/memory_pool.h"
|
||||
|
||||
namespace Dynarmic::IR {
|
||||
|
||||
|
@ -164,8 +166,12 @@ public:
|
|||
return cycle_count;
|
||||
}
|
||||
private:
|
||||
/// "Hot cache" for small blocks so we don't call global allocator
|
||||
boost::container::static_vector<Inst, 14> inlined_inst;
|
||||
/// List of instructions in this block.
|
||||
instruction_list_type instructions;
|
||||
/// "Long/far" memory pool
|
||||
boost::container::stable_vector<boost::container::static_vector<Inst, 32>> pooled_inst;
|
||||
/// Block to execute next if `cond` did not pass.
|
||||
std::optional<LocationDescriptor> cond_failed = {};
|
||||
/// Description of the starting location of this block
|
||||
|
@ -174,8 +180,6 @@ private:
|
|||
LocationDescriptor end_location;
|
||||
/// Conditional to pass in order to execute this block
|
||||
Cond cond;
|
||||
/// Memory pool for instruction list
|
||||
std::unique_ptr<Common::Pool<sizeof(Inst), 2097152UL / sizeof(Inst)>> instruction_alloc_pool;
|
||||
/// Terminal instruction of this block.
|
||||
Terminal terminal = Term::Invalid{};
|
||||
/// Number of cycles this block takes to execute if the conditional fails.
|
||||
|
@ -183,6 +187,7 @@ private:
|
|||
/// Number of cycles this block takes to execute.
|
||||
size_t cycle_count = 0;
|
||||
};
|
||||
static_assert(sizeof(Block) == 2048);
|
||||
|
||||
/// Returns a string representation of the contents of block. Intended for debugging.
|
||||
std::string DumpBlock(const IR::Block& block) noexcept;
|
||||
|
|
|
@ -1,21 +0,0 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2016 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#include "dynarmic/ir/ir_emitter.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "dynarmic/common/assert.h"
|
||||
#include <mcl/bit_cast.hpp>
|
||||
|
||||
#include "dynarmic/ir/opcodes.h"
|
||||
|
||||
namespace Dynarmic::IR {
|
||||
|
||||
|
||||
} // namespace Dynarmic::IR
|
|
@ -1,70 +0,0 @@
|
|||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2016 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#include "dynarmic/interface/A32/config.h"
|
||||
#include "dynarmic/ir/basic_block.h"
|
||||
#include "dynarmic/ir/opcodes.h"
|
||||
#include "dynarmic/ir/opt/passes.h"
|
||||
|
||||
namespace Dynarmic::Optimization {
|
||||
|
||||
void A32ConstantMemoryReads(IR::Block& block, A32::UserCallbacks* cb) {
|
||||
for (auto& inst : block) {
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::A32ReadMemory8: {
|
||||
if (!inst.AreAllArgsImmediates()) {
|
||||
break;
|
||||
}
|
||||
|
||||
const u32 vaddr = inst.GetArg(1).GetU32();
|
||||
if (cb->IsReadOnlyMemory(vaddr)) {
|
||||
const u8 value_from_memory = cb->MemoryRead8(vaddr);
|
||||
inst.ReplaceUsesWith(IR::Value{value_from_memory});
|
||||
}
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::A32ReadMemory16: {
|
||||
if (!inst.AreAllArgsImmediates()) {
|
||||
break;
|
||||
}
|
||||
|
||||
const u32 vaddr = inst.GetArg(1).GetU32();
|
||||
if (cb->IsReadOnlyMemory(vaddr)) {
|
||||
const u16 value_from_memory = cb->MemoryRead16(vaddr);
|
||||
inst.ReplaceUsesWith(IR::Value{value_from_memory});
|
||||
}
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::A32ReadMemory32: {
|
||||
if (!inst.AreAllArgsImmediates()) {
|
||||
break;
|
||||
}
|
||||
|
||||
const u32 vaddr = inst.GetArg(1).GetU32();
|
||||
if (cb->IsReadOnlyMemory(vaddr)) {
|
||||
const u32 value_from_memory = cb->MemoryRead32(vaddr);
|
||||
inst.ReplaceUsesWith(IR::Value{value_from_memory});
|
||||
}
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::A32ReadMemory64: {
|
||||
if (!inst.AreAllArgsImmediates()) {
|
||||
break;
|
||||
}
|
||||
|
||||
const u32 vaddr = inst.GetArg(1).GetU32();
|
||||
if (cb->IsReadOnlyMemory(vaddr)) {
|
||||
const u64 value_from_memory = cb->MemoryRead64(vaddr);
|
||||
inst.ReplaceUsesWith(IR::Value{value_from_memory});
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::Optimization
|
|
@ -1,382 +0,0 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2016 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <functional>
|
||||
|
||||
#include "dynarmic/common/assert.h"
|
||||
#include "dynarmic/common/common_types.h"
|
||||
|
||||
#include "dynarmic/frontend/A32/a32_ir_emitter.h"
|
||||
#include "dynarmic/frontend/A32/a32_types.h"
|
||||
#include "dynarmic/ir/basic_block.h"
|
||||
#include "dynarmic/ir/opcodes.h"
|
||||
#include "dynarmic/ir/opt/passes.h"
|
||||
#include "dynarmic/ir/value.h"
|
||||
|
||||
namespace Dynarmic::Optimization {
|
||||
|
||||
namespace {
|
||||
|
||||
void FlagsPass(IR::Block& block) {
|
||||
using Iterator = std::reverse_iterator<IR::Block::iterator>;
|
||||
|
||||
struct FlagInfo {
|
||||
bool set_not_required = false;
|
||||
bool has_value_request = false;
|
||||
Iterator value_request = {};
|
||||
};
|
||||
struct ValuelessFlagInfo {
|
||||
bool set_not_required = false;
|
||||
};
|
||||
ValuelessFlagInfo nzcvq;
|
||||
ValuelessFlagInfo nzcv;
|
||||
ValuelessFlagInfo nz;
|
||||
FlagInfo c_flag;
|
||||
FlagInfo ge;
|
||||
|
||||
auto do_set = [&](FlagInfo& info, IR::Value value, Iterator inst) {
|
||||
if (info.has_value_request) {
|
||||
info.value_request->ReplaceUsesWith(value);
|
||||
}
|
||||
info.has_value_request = false;
|
||||
|
||||
if (info.set_not_required) {
|
||||
inst->Invalidate();
|
||||
}
|
||||
info.set_not_required = true;
|
||||
};
|
||||
|
||||
auto do_set_valueless = [&](ValuelessFlagInfo& info, Iterator inst) {
|
||||
if (info.set_not_required) {
|
||||
inst->Invalidate();
|
||||
}
|
||||
info.set_not_required = true;
|
||||
};
|
||||
|
||||
auto do_get = [](FlagInfo& info, Iterator inst) {
|
||||
if (info.has_value_request) {
|
||||
info.value_request->ReplaceUsesWith(IR::Value{&*inst});
|
||||
}
|
||||
info.has_value_request = true;
|
||||
info.value_request = inst;
|
||||
};
|
||||
|
||||
A32::IREmitter ir{block, A32::LocationDescriptor{block.Location()}, {}};
|
||||
|
||||
for (auto inst = block.rbegin(); inst != block.rend(); ++inst) {
|
||||
auto const opcode = inst->GetOpcode();
|
||||
switch (opcode) {
|
||||
case IR::Opcode::A32GetCFlag: {
|
||||
do_get(c_flag, inst);
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::A32SetCpsrNZCV: {
|
||||
if (c_flag.has_value_request) {
|
||||
ir.SetInsertionPointBefore(inst.base()); // base is one ahead
|
||||
IR::U1 c = ir.GetCFlagFromNZCV(IR::NZCV{inst->GetArg(0)});
|
||||
c_flag.value_request->ReplaceUsesWith(c);
|
||||
c_flag.has_value_request = false;
|
||||
break; // This case will be executed again because of the above
|
||||
}
|
||||
|
||||
do_set_valueless(nzcv, inst);
|
||||
|
||||
nz = {.set_not_required = true};
|
||||
c_flag = {.set_not_required = true};
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::A32SetCpsrNZCVRaw: {
|
||||
if (c_flag.has_value_request) {
|
||||
nzcv.set_not_required = false;
|
||||
}
|
||||
|
||||
do_set_valueless(nzcv, inst);
|
||||
|
||||
nzcvq = {};
|
||||
nz = {.set_not_required = true};
|
||||
c_flag = {.set_not_required = true};
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::A32SetCpsrNZCVQ: {
|
||||
if (c_flag.has_value_request) {
|
||||
nzcvq.set_not_required = false;
|
||||
}
|
||||
|
||||
do_set_valueless(nzcvq, inst);
|
||||
|
||||
nzcv = {.set_not_required = true};
|
||||
nz = {.set_not_required = true};
|
||||
c_flag = {.set_not_required = true};
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::A32SetCpsrNZ: {
|
||||
do_set_valueless(nz, inst);
|
||||
|
||||
nzcvq = {};
|
||||
nzcv = {};
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::A32SetCpsrNZC: {
|
||||
if (c_flag.has_value_request) {
|
||||
c_flag.value_request->ReplaceUsesWith(inst->GetArg(1));
|
||||
c_flag.has_value_request = false;
|
||||
}
|
||||
|
||||
if (!inst->GetArg(1).IsImmediate() && inst->GetArg(1).GetInstRecursive()->GetOpcode() == IR::Opcode::A32GetCFlag) {
|
||||
const auto nz_value = inst->GetArg(0);
|
||||
|
||||
inst->Invalidate();
|
||||
|
||||
ir.SetInsertionPointBefore(inst.base());
|
||||
ir.SetCpsrNZ(IR::NZCV{nz_value});
|
||||
|
||||
nzcvq = {};
|
||||
nzcv = {};
|
||||
nz = {.set_not_required = true};
|
||||
break;
|
||||
}
|
||||
|
||||
if (nz.set_not_required && c_flag.set_not_required) {
|
||||
inst->Invalidate();
|
||||
} else if (nz.set_not_required) {
|
||||
inst->SetArg(0, IR::Value::EmptyNZCVImmediateMarker());
|
||||
}
|
||||
nz.set_not_required = true;
|
||||
c_flag.set_not_required = true;
|
||||
|
||||
nzcv = {};
|
||||
nzcvq = {};
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::A32SetGEFlags: {
|
||||
do_set(ge, inst->GetArg(0), inst);
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::A32GetGEFlags: {
|
||||
do_get(ge, inst);
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::A32SetGEFlagsCompressed: {
|
||||
ge = {.set_not_required = true};
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::A32OrQFlag: {
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
if (ReadsFromCPSR(opcode) || WritesToCPSR(opcode)) {
|
||||
nzcvq = {};
|
||||
nzcv = {};
|
||||
nz = {};
|
||||
c_flag = {};
|
||||
ge = {};
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void RegisterPass(IR::Block& block) {
|
||||
using Iterator = IR::Block::iterator;
|
||||
|
||||
struct RegInfo {
|
||||
IR::Value register_value;
|
||||
std::optional<Iterator> last_set_instruction;
|
||||
};
|
||||
std::array<RegInfo, 15> reg_info;
|
||||
|
||||
const auto do_get = [](RegInfo& info, Iterator get_inst) {
|
||||
if (info.register_value.IsEmpty()) {
|
||||
info.register_value = IR::Value(&*get_inst);
|
||||
return;
|
||||
}
|
||||
get_inst->ReplaceUsesWith(info.register_value);
|
||||
};
|
||||
|
||||
const auto do_set = [](RegInfo& info, IR::Value value, Iterator set_inst) {
|
||||
if (info.last_set_instruction) {
|
||||
(*info.last_set_instruction)->Invalidate();
|
||||
}
|
||||
info = {
|
||||
.register_value = value,
|
||||
.last_set_instruction = set_inst,
|
||||
};
|
||||
};
|
||||
|
||||
enum class ExtValueType {
|
||||
Empty,
|
||||
Single,
|
||||
Double,
|
||||
VectorDouble,
|
||||
VectorQuad,
|
||||
};
|
||||
struct ExtRegInfo {
|
||||
ExtValueType value_type = {};
|
||||
IR::Value register_value;
|
||||
std::optional<Iterator> last_set_instruction;
|
||||
};
|
||||
std::array<ExtRegInfo, 64> ext_reg_info;
|
||||
|
||||
const auto do_ext_get = [](ExtValueType type, std::initializer_list<std::reference_wrapper<ExtRegInfo>> infos, Iterator get_inst) {
|
||||
if (!std::all_of(infos.begin(), infos.end(), [type](const auto& info) { return info.get().value_type == type; })) {
|
||||
for (auto& info : infos) {
|
||||
info.get() = {
|
||||
.value_type = type,
|
||||
.register_value = IR::Value(&*get_inst),
|
||||
.last_set_instruction = std::nullopt,
|
||||
};
|
||||
}
|
||||
return;
|
||||
}
|
||||
get_inst->ReplaceUsesWith(std::data(infos)[0].get().register_value);
|
||||
};
|
||||
|
||||
const auto do_ext_set = [](ExtValueType type, std::initializer_list<std::reference_wrapper<ExtRegInfo>> infos, IR::Value value, Iterator set_inst) {
|
||||
if (std::all_of(infos.begin(), infos.end(), [type](const auto& info) { return info.get().value_type == type; })) {
|
||||
if (std::data(infos)[0].get().last_set_instruction) {
|
||||
(*std::data(infos)[0].get().last_set_instruction)->Invalidate();
|
||||
}
|
||||
}
|
||||
for (auto& info : infos) {
|
||||
info.get() = {
|
||||
.value_type = type,
|
||||
.register_value = value,
|
||||
.last_set_instruction = set_inst,
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
// Location and version don't matter here.
|
||||
A32::IREmitter ir{block, A32::LocationDescriptor{block.Location()}, {}};
|
||||
|
||||
for (auto inst = block.begin(); inst != block.end(); ++inst) {
|
||||
auto const opcode = inst->GetOpcode();
|
||||
switch (opcode) {
|
||||
case IR::Opcode::A32GetRegister: {
|
||||
const A32::Reg reg = inst->GetArg(0).GetA32RegRef();
|
||||
ASSERT(reg != A32::Reg::PC);
|
||||
const size_t reg_index = static_cast<size_t>(reg);
|
||||
do_get(reg_info[reg_index], inst);
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::A32SetRegister: {
|
||||
const A32::Reg reg = inst->GetArg(0).GetA32RegRef();
|
||||
if (reg == A32::Reg::PC) {
|
||||
break;
|
||||
}
|
||||
const auto reg_index = static_cast<size_t>(reg);
|
||||
do_set(reg_info[reg_index], inst->GetArg(1), inst);
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::A32GetExtendedRegister32: {
|
||||
const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef();
|
||||
const size_t reg_index = A32::RegNumber(reg);
|
||||
do_ext_get(ExtValueType::Single, {ext_reg_info[reg_index]}, inst);
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::A32SetExtendedRegister32: {
|
||||
const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef();
|
||||
const size_t reg_index = A32::RegNumber(reg);
|
||||
do_ext_set(ExtValueType::Single, {ext_reg_info[reg_index]}, inst->GetArg(1), inst);
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::A32GetExtendedRegister64: {
|
||||
const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef();
|
||||
const size_t reg_index = A32::RegNumber(reg);
|
||||
do_ext_get(ExtValueType::Double,
|
||||
{
|
||||
ext_reg_info[reg_index * 2 + 0],
|
||||
ext_reg_info[reg_index * 2 + 1],
|
||||
},
|
||||
inst);
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::A32SetExtendedRegister64: {
|
||||
const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef();
|
||||
const size_t reg_index = A32::RegNumber(reg);
|
||||
do_ext_set(ExtValueType::Double,
|
||||
{
|
||||
ext_reg_info[reg_index * 2 + 0],
|
||||
ext_reg_info[reg_index * 2 + 1],
|
||||
},
|
||||
inst->GetArg(1),
|
||||
inst);
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::A32GetVector: {
|
||||
const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef();
|
||||
const size_t reg_index = A32::RegNumber(reg);
|
||||
if (A32::IsDoubleExtReg(reg)) {
|
||||
do_ext_get(ExtValueType::VectorDouble,
|
||||
{
|
||||
ext_reg_info[reg_index * 2 + 0],
|
||||
ext_reg_info[reg_index * 2 + 1],
|
||||
},
|
||||
inst);
|
||||
} else {
|
||||
DEBUG_ASSERT(A32::IsQuadExtReg(reg));
|
||||
do_ext_get(ExtValueType::VectorQuad,
|
||||
{
|
||||
ext_reg_info[reg_index * 4 + 0],
|
||||
ext_reg_info[reg_index * 4 + 1],
|
||||
ext_reg_info[reg_index * 4 + 2],
|
||||
ext_reg_info[reg_index * 4 + 3],
|
||||
},
|
||||
inst);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::A32SetVector: {
|
||||
const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef();
|
||||
const size_t reg_index = A32::RegNumber(reg);
|
||||
if (A32::IsDoubleExtReg(reg)) {
|
||||
ir.SetInsertionPointAfter(inst);
|
||||
const IR::U128 stored_value = ir.VectorZeroUpper(IR::U128{inst->GetArg(1)});
|
||||
do_ext_set(ExtValueType::VectorDouble,
|
||||
{
|
||||
ext_reg_info[reg_index * 2 + 0],
|
||||
ext_reg_info[reg_index * 2 + 1],
|
||||
},
|
||||
stored_value,
|
||||
inst);
|
||||
} else {
|
||||
DEBUG_ASSERT(A32::IsQuadExtReg(reg));
|
||||
do_ext_set(ExtValueType::VectorQuad,
|
||||
{
|
||||
ext_reg_info[reg_index * 4 + 0],
|
||||
ext_reg_info[reg_index * 4 + 1],
|
||||
ext_reg_info[reg_index * 4 + 2],
|
||||
ext_reg_info[reg_index * 4 + 3],
|
||||
},
|
||||
inst->GetArg(1),
|
||||
inst);
|
||||
}
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
if (ReadsFromCoreRegister(opcode) || WritesToCoreRegister(opcode)) {
|
||||
reg_info = {};
|
||||
ext_reg_info = {};
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
void A32GetSetElimination(IR::Block& block, A32GetSetEliminationOptions) {
|
||||
FlagsPass(block);
|
||||
RegisterPass(block);
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::Optimization
|
|
@ -1,57 +0,0 @@
|
|||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2018 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#include "dynarmic/frontend/A64/a64_ir_emitter.h"
|
||||
#include "dynarmic/interface/A64/config.h"
|
||||
#include "dynarmic/ir/basic_block.h"
|
||||
#include "dynarmic/ir/microinstruction.h"
|
||||
#include "dynarmic/ir/opcodes.h"
|
||||
#include "dynarmic/ir/opt/passes.h"
|
||||
|
||||
namespace Dynarmic::Optimization {
|
||||
|
||||
void A64CallbackConfigPass(IR::Block& block, const A64::UserConfig& conf) {
|
||||
if (conf.hook_data_cache_operations) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (auto& inst : block) {
|
||||
if (inst.GetOpcode() != IR::Opcode::A64DataCacheOperationRaised) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const auto op = static_cast<A64::DataCacheOperation>(inst.GetArg(1).GetU64());
|
||||
if (op == A64::DataCacheOperation::ZeroByVA) {
|
||||
A64::IREmitter ir{block};
|
||||
ir.current_location = A64::LocationDescriptor{IR::LocationDescriptor{inst.GetArg(0).GetU64()}};
|
||||
ir.SetInsertionPointBefore(&inst);
|
||||
|
||||
size_t bytes = 4 << static_cast<size_t>(conf.dczid_el0 & 0b1111);
|
||||
IR::U64 addr{inst.GetArg(2)};
|
||||
|
||||
const IR::U128 zero_u128 = ir.ZeroExtendToQuad(ir.Imm64(0));
|
||||
while (bytes >= 16) {
|
||||
ir.WriteMemory128(addr, zero_u128, IR::AccType::DCZVA);
|
||||
addr = ir.Add(addr, ir.Imm64(16));
|
||||
bytes -= 16;
|
||||
}
|
||||
|
||||
while (bytes >= 8) {
|
||||
ir.WriteMemory64(addr, ir.Imm64(0), IR::AccType::DCZVA);
|
||||
addr = ir.Add(addr, ir.Imm64(8));
|
||||
bytes -= 8;
|
||||
}
|
||||
|
||||
while (bytes >= 4) {
|
||||
ir.WriteMemory32(addr, ir.Imm32(0), IR::AccType::DCZVA);
|
||||
addr = ir.Add(addr, ir.Imm64(4));
|
||||
bytes -= 4;
|
||||
}
|
||||
}
|
||||
inst.Invalidate();
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::Optimization
|
|
@ -1,165 +0,0 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2016 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#include <array>
|
||||
|
||||
#include "dynarmic/common/common_types.h"
|
||||
|
||||
#include "dynarmic/frontend/A64/a64_types.h"
|
||||
#include "dynarmic/ir/basic_block.h"
|
||||
#include "dynarmic/ir/opcodes.h"
|
||||
#include "dynarmic/ir/opt/passes.h"
|
||||
#include "dynarmic/ir/value.h"
|
||||
|
||||
namespace Dynarmic::Optimization {
|
||||
|
||||
void A64GetSetElimination(IR::Block& block) {
|
||||
using Iterator = IR::Block::iterator;
|
||||
|
||||
enum class TrackingType {
|
||||
W,
|
||||
X,
|
||||
S,
|
||||
D,
|
||||
Q,
|
||||
SP,
|
||||
NZCV,
|
||||
NZCVRaw,
|
||||
};
|
||||
struct RegisterInfo {
|
||||
IR::Value register_value;
|
||||
TrackingType tracking_type;
|
||||
bool set_instruction_present = false;
|
||||
Iterator last_set_instruction;
|
||||
};
|
||||
std::array<RegisterInfo, 31> reg_info;
|
||||
std::array<RegisterInfo, 32> vec_info;
|
||||
RegisterInfo sp_info;
|
||||
RegisterInfo nzcv_info;
|
||||
|
||||
const auto do_set = [&block](RegisterInfo& info, IR::Value value, Iterator set_inst, TrackingType tracking_type) {
|
||||
if (info.set_instruction_present) {
|
||||
info.last_set_instruction->Invalidate();
|
||||
block.Instructions().erase(info.last_set_instruction);
|
||||
}
|
||||
|
||||
info.register_value = value;
|
||||
info.tracking_type = tracking_type;
|
||||
info.set_instruction_present = true;
|
||||
info.last_set_instruction = set_inst;
|
||||
};
|
||||
|
||||
const auto do_get = [](RegisterInfo& info, Iterator get_inst, TrackingType tracking_type) {
|
||||
const auto do_nothing = [&] {
|
||||
info = {};
|
||||
info.register_value = IR::Value(&*get_inst);
|
||||
info.tracking_type = tracking_type;
|
||||
};
|
||||
|
||||
if (info.register_value.IsEmpty()) {
|
||||
do_nothing();
|
||||
return;
|
||||
}
|
||||
|
||||
if (info.tracking_type == tracking_type) {
|
||||
get_inst->ReplaceUsesWith(info.register_value);
|
||||
return;
|
||||
}
|
||||
|
||||
do_nothing();
|
||||
};
|
||||
|
||||
for (auto inst = block.begin(); inst != block.end(); ++inst) {
|
||||
auto const opcode = inst->GetOpcode();
|
||||
switch (opcode) {
|
||||
case IR::Opcode::A64GetW: {
|
||||
const size_t index = A64::RegNumber(inst->GetArg(0).GetA64RegRef());
|
||||
do_get(reg_info.at(index), inst, TrackingType::W);
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::A64GetX: {
|
||||
const size_t index = A64::RegNumber(inst->GetArg(0).GetA64RegRef());
|
||||
do_get(reg_info.at(index), inst, TrackingType::X);
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::A64GetS: {
|
||||
const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef());
|
||||
do_get(vec_info.at(index), inst, TrackingType::S);
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::A64GetD: {
|
||||
const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef());
|
||||
do_get(vec_info.at(index), inst, TrackingType::D);
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::A64GetQ: {
|
||||
const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef());
|
||||
do_get(vec_info.at(index), inst, TrackingType::Q);
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::A64GetSP: {
|
||||
do_get(sp_info, inst, TrackingType::SP);
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::A64GetNZCVRaw: {
|
||||
do_get(nzcv_info, inst, TrackingType::NZCVRaw);
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::A64SetW: {
|
||||
const size_t index = A64::RegNumber(inst->GetArg(0).GetA64RegRef());
|
||||
do_set(reg_info.at(index), inst->GetArg(1), inst, TrackingType::W);
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::A64SetX: {
|
||||
const size_t index = A64::RegNumber(inst->GetArg(0).GetA64RegRef());
|
||||
do_set(reg_info.at(index), inst->GetArg(1), inst, TrackingType::X);
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::A64SetS: {
|
||||
const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef());
|
||||
do_set(vec_info.at(index), inst->GetArg(1), inst, TrackingType::S);
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::A64SetD: {
|
||||
const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef());
|
||||
do_set(vec_info.at(index), inst->GetArg(1), inst, TrackingType::D);
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::A64SetQ: {
|
||||
const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef());
|
||||
do_set(vec_info.at(index), inst->GetArg(1), inst, TrackingType::Q);
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::A64SetSP: {
|
||||
do_set(sp_info, inst->GetArg(0), inst, TrackingType::SP);
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::A64SetNZCV: {
|
||||
do_set(nzcv_info, inst->GetArg(0), inst, TrackingType::NZCV);
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::A64SetNZCVRaw: {
|
||||
do_set(nzcv_info, inst->GetArg(0), inst, TrackingType::NZCVRaw);
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
if (ReadsFromCPSR(opcode) || WritesToCPSR(opcode)) {
|
||||
nzcv_info = {};
|
||||
}
|
||||
if (ReadsFromCoreRegister(opcode) || WritesToCoreRegister(opcode)) {
|
||||
reg_info = {};
|
||||
vec_info = {};
|
||||
sp_info = {};
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::Optimization
|
|
@ -1,57 +0,0 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2018 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#include <boost/variant/get.hpp>
|
||||
#include "dynarmic/common/common_types.h"
|
||||
|
||||
#include "dynarmic/frontend/A64/a64_location_descriptor.h"
|
||||
#include "dynarmic/frontend/A64/translate/a64_translate.h"
|
||||
#include "dynarmic/interface/A64/config.h"
|
||||
#include "dynarmic/ir/basic_block.h"
|
||||
#include "dynarmic/ir/opt/passes.h"
|
||||
|
||||
namespace Dynarmic::Optimization {
|
||||
|
||||
void A64MergeInterpretBlocksPass(IR::Block& block, A64::UserCallbacks* cb) {
|
||||
const auto is_interpret_instruction = [cb](A64::LocationDescriptor location) {
|
||||
const auto instruction = cb->MemoryReadCode(location.PC());
|
||||
if (!instruction)
|
||||
return false;
|
||||
|
||||
IR::Block new_block{location};
|
||||
A64::TranslateSingleInstruction(new_block, location, *instruction);
|
||||
|
||||
if (!new_block.Instructions().empty())
|
||||
return false;
|
||||
|
||||
const IR::Terminal terminal = new_block.GetTerminal();
|
||||
if (auto term = boost::get<IR::Term::Interpret>(&terminal)) {
|
||||
return term->next == location;
|
||||
}
|
||||
|
||||
return false;
|
||||
};
|
||||
|
||||
IR::Terminal terminal = block.GetTerminal();
|
||||
auto term = boost::get<IR::Term::Interpret>(&terminal);
|
||||
if (!term)
|
||||
return;
|
||||
|
||||
A64::LocationDescriptor location{term->next};
|
||||
size_t num_instructions = 1;
|
||||
|
||||
while (is_interpret_instruction(location.AdvancePC(static_cast<int>(num_instructions * 4)))) {
|
||||
num_instructions++;
|
||||
}
|
||||
|
||||
term->num_instructions = num_instructions;
|
||||
block.ReplaceTerminal(terminal);
|
||||
block.CycleCount() += num_instructions - 1;
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::Optimization
|
|
@ -1,559 +0,0 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2016 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#include <optional>
|
||||
|
||||
#include "dynarmic/common/assert.h"
|
||||
#include <mcl/bit/rotate.hpp>
|
||||
#include <mcl/bit/swap.hpp>
|
||||
#include "dynarmic/common/common_types.h"
|
||||
|
||||
#include "dynarmic/common/safe_ops.h"
|
||||
#include "dynarmic/ir/basic_block.h"
|
||||
#include "dynarmic/ir/ir_emitter.h"
|
||||
#include "dynarmic/ir/opcodes.h"
|
||||
#include "dynarmic/ir/opt/passes.h"
|
||||
|
||||
namespace Dynarmic::Optimization {
|
||||
|
||||
using Op = Dynarmic::IR::Opcode;
|
||||
|
||||
namespace {
|
||||
|
||||
// Tiny helper to avoid the need to store based off the opcode
|
||||
// bit size all over the place within folding functions.
|
||||
void ReplaceUsesWith(IR::Inst& inst, bool is_32_bit, u64 value) {
|
||||
if (is_32_bit) {
|
||||
inst.ReplaceUsesWith(IR::Value{static_cast<u32>(value)});
|
||||
} else {
|
||||
inst.ReplaceUsesWith(IR::Value{value});
|
||||
}
|
||||
}
|
||||
|
||||
IR::Value Value(bool is_32_bit, u64 value) {
|
||||
return is_32_bit ? IR::Value{static_cast<u32>(value)} : IR::Value{value};
|
||||
}
|
||||
|
||||
template<typename ImmFn>
|
||||
bool FoldCommutative(IR::Inst& inst, bool is_32_bit, ImmFn imm_fn) {
|
||||
const auto lhs = inst.GetArg(0);
|
||||
const auto rhs = inst.GetArg(1);
|
||||
|
||||
const bool is_lhs_immediate = lhs.IsImmediate();
|
||||
const bool is_rhs_immediate = rhs.IsImmediate();
|
||||
|
||||
if (is_lhs_immediate && is_rhs_immediate) {
|
||||
const u64 result = imm_fn(lhs.GetImmediateAsU64(), rhs.GetImmediateAsU64());
|
||||
ReplaceUsesWith(inst, is_32_bit, result);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (is_lhs_immediate && !is_rhs_immediate) {
|
||||
const IR::Inst* rhs_inst = rhs.GetInstRecursive();
|
||||
if (rhs_inst->GetOpcode() == inst.GetOpcode() && rhs_inst->GetArg(1).IsImmediate()) {
|
||||
const u64 combined = imm_fn(lhs.GetImmediateAsU64(), rhs_inst->GetArg(1).GetImmediateAsU64());
|
||||
inst.SetArg(0, rhs_inst->GetArg(0));
|
||||
inst.SetArg(1, Value(is_32_bit, combined));
|
||||
} else {
|
||||
// Normalize
|
||||
inst.SetArg(0, rhs);
|
||||
inst.SetArg(1, lhs);
|
||||
}
|
||||
}
|
||||
|
||||
if (!is_lhs_immediate && is_rhs_immediate) {
|
||||
const IR::Inst* lhs_inst = lhs.GetInstRecursive();
|
||||
if (lhs_inst->GetOpcode() == inst.GetOpcode() && lhs_inst->GetArg(1).IsImmediate()) {
|
||||
const u64 combined = imm_fn(rhs.GetImmediateAsU64(), lhs_inst->GetArg(1).GetImmediateAsU64());
|
||||
inst.SetArg(0, lhs_inst->GetArg(0));
|
||||
inst.SetArg(1, Value(is_32_bit, combined));
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void FoldAdd(IR::Inst& inst, bool is_32_bit) {
|
||||
const auto lhs = inst.GetArg(0);
|
||||
const auto rhs = inst.GetArg(1);
|
||||
const auto carry = inst.GetArg(2);
|
||||
|
||||
if (lhs.IsImmediate() && !rhs.IsImmediate()) {
|
||||
// Normalize
|
||||
inst.SetArg(0, rhs);
|
||||
inst.SetArg(1, lhs);
|
||||
FoldAdd(inst, is_32_bit);
|
||||
return;
|
||||
}
|
||||
|
||||
if (inst.HasAssociatedPseudoOperation()) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (!lhs.IsImmediate() && rhs.IsImmediate()) {
|
||||
const IR::Inst* lhs_inst = lhs.GetInstRecursive();
|
||||
if (lhs_inst->GetOpcode() == inst.GetOpcode() && lhs_inst->GetArg(1).IsImmediate() && lhs_inst->GetArg(2).IsImmediate()) {
|
||||
const u64 combined = rhs.GetImmediateAsU64() + lhs_inst->GetArg(1).GetImmediateAsU64() + lhs_inst->GetArg(2).GetU1();
|
||||
if (combined == 0) {
|
||||
inst.ReplaceUsesWith(lhs_inst->GetArg(0));
|
||||
return;
|
||||
}
|
||||
inst.SetArg(0, lhs_inst->GetArg(0));
|
||||
inst.SetArg(1, Value(is_32_bit, combined));
|
||||
return;
|
||||
}
|
||||
if (rhs.IsZero() && carry.IsZero()) {
|
||||
inst.ReplaceUsesWith(lhs);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (inst.AreAllArgsImmediates()) {
|
||||
const u64 result = lhs.GetImmediateAsU64() + rhs.GetImmediateAsU64() + carry.GetU1();
|
||||
ReplaceUsesWith(inst, is_32_bit, result);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/// Folds AND operations based on the following:
|
||||
///
|
||||
/// 1. imm_x & imm_y -> result
|
||||
/// 2. x & 0 -> 0
|
||||
/// 3. 0 & y -> 0
|
||||
/// 4. x & y -> y (where x has all bits set to 1)
|
||||
/// 5. x & y -> x (where y has all bits set to 1)
|
||||
///
|
||||
void FoldAND(IR::Inst& inst, bool is_32_bit) {
|
||||
if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a & b; })) {
|
||||
const auto rhs = inst.GetArg(1);
|
||||
if (rhs.IsZero()) {
|
||||
ReplaceUsesWith(inst, is_32_bit, 0);
|
||||
} else if (rhs.HasAllBitsSet()) {
|
||||
inst.ReplaceUsesWith(inst.GetArg(0));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Folds byte reversal opcodes based on the following:
|
||||
///
|
||||
/// 1. imm -> swap(imm)
|
||||
///
|
||||
void FoldByteReverse(IR::Inst& inst, Op op) {
|
||||
const auto operand = inst.GetArg(0);
|
||||
|
||||
if (!operand.IsImmediate()) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (op == Op::ByteReverseWord) {
|
||||
const u32 result = mcl::bit::swap_bytes_32(static_cast<u32>(operand.GetImmediateAsU64()));
|
||||
inst.ReplaceUsesWith(IR::Value{result});
|
||||
} else if (op == Op::ByteReverseHalf) {
|
||||
const u16 result = mcl::bit::swap_bytes_16(static_cast<u16>(operand.GetImmediateAsU64()));
|
||||
inst.ReplaceUsesWith(IR::Value{result});
|
||||
} else {
|
||||
const u64 result = mcl::bit::swap_bytes_64(operand.GetImmediateAsU64());
|
||||
inst.ReplaceUsesWith(IR::Value{result});
|
||||
}
|
||||
}
|
||||
|
||||
/// Folds division operations based on the following:
|
||||
///
|
||||
/// 1. x / 0 -> 0 (NOTE: This is an ARM-specific behavior defined in the architecture reference manual)
|
||||
/// 2. imm_x / imm_y -> result
|
||||
/// 3. x / 1 -> x
|
||||
///
|
||||
void FoldDivide(IR::Inst& inst, bool is_32_bit, bool is_signed) {
|
||||
const auto rhs = inst.GetArg(1);
|
||||
|
||||
if (rhs.IsZero()) {
|
||||
ReplaceUsesWith(inst, is_32_bit, 0);
|
||||
return;
|
||||
}
|
||||
|
||||
const auto lhs = inst.GetArg(0);
|
||||
if (lhs.IsImmediate() && rhs.IsImmediate()) {
|
||||
if (is_signed) {
|
||||
const s64 result = lhs.GetImmediateAsS64() / rhs.GetImmediateAsS64();
|
||||
ReplaceUsesWith(inst, is_32_bit, static_cast<u64>(result));
|
||||
} else {
|
||||
const u64 result = lhs.GetImmediateAsU64() / rhs.GetImmediateAsU64();
|
||||
ReplaceUsesWith(inst, is_32_bit, result);
|
||||
}
|
||||
} else if (rhs.IsUnsignedImmediate(1)) {
|
||||
inst.ReplaceUsesWith(IR::Value{lhs});
|
||||
}
|
||||
}
|
||||
|
||||
// Folds EOR operations based on the following:
|
||||
//
|
||||
// 1. imm_x ^ imm_y -> result
|
||||
// 2. x ^ 0 -> x
|
||||
// 3. 0 ^ y -> y
|
||||
//
|
||||
void FoldEOR(IR::Inst& inst, bool is_32_bit) {
|
||||
if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a ^ b; })) {
|
||||
const auto rhs = inst.GetArg(1);
|
||||
if (rhs.IsZero()) {
|
||||
inst.ReplaceUsesWith(inst.GetArg(0));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void FoldLeastSignificantByte(IR::Inst& inst) {
|
||||
if (!inst.AreAllArgsImmediates()) {
|
||||
return;
|
||||
}
|
||||
|
||||
const auto operand = inst.GetArg(0);
|
||||
inst.ReplaceUsesWith(IR::Value{static_cast<u8>(operand.GetImmediateAsU64())});
|
||||
}
|
||||
|
||||
void FoldLeastSignificantHalf(IR::Inst& inst) {
|
||||
if (!inst.AreAllArgsImmediates()) {
|
||||
return;
|
||||
}
|
||||
|
||||
const auto operand = inst.GetArg(0);
|
||||
inst.ReplaceUsesWith(IR::Value{static_cast<u16>(operand.GetImmediateAsU64())});
|
||||
}
|
||||
|
||||
void FoldLeastSignificantWord(IR::Inst& inst) {
|
||||
if (!inst.AreAllArgsImmediates()) {
|
||||
return;
|
||||
}
|
||||
|
||||
const auto operand = inst.GetArg(0);
|
||||
inst.ReplaceUsesWith(IR::Value{static_cast<u32>(operand.GetImmediateAsU64())});
|
||||
}
|
||||
|
||||
void FoldMostSignificantBit(IR::Inst& inst) {
|
||||
if (!inst.AreAllArgsImmediates()) {
|
||||
return;
|
||||
}
|
||||
|
||||
const auto operand = inst.GetArg(0);
|
||||
inst.ReplaceUsesWith(IR::Value{(operand.GetImmediateAsU64() >> 31) != 0});
|
||||
}
|
||||
|
||||
void FoldMostSignificantWord(IR::Inst& inst) {
|
||||
IR::Inst* carry_inst = inst.GetAssociatedPseudoOperation(Op::GetCarryFromOp);
|
||||
|
||||
if (!inst.AreAllArgsImmediates()) {
|
||||
return;
|
||||
}
|
||||
|
||||
const auto operand = inst.GetArg(0);
|
||||
if (carry_inst) {
|
||||
carry_inst->ReplaceUsesWith(IR::Value{mcl::bit::get_bit<31>(operand.GetImmediateAsU64())});
|
||||
}
|
||||
inst.ReplaceUsesWith(IR::Value{static_cast<u32>(operand.GetImmediateAsU64() >> 32)});
|
||||
}
|
||||
|
||||
// Folds multiplication operations based on the following:
|
||||
//
|
||||
// 1. imm_x * imm_y -> result
|
||||
// 2. x * 0 -> 0
|
||||
// 3. 0 * y -> 0
|
||||
// 4. x * 1 -> x
|
||||
// 5. 1 * y -> y
|
||||
//
|
||||
void FoldMultiply(IR::Inst& inst, bool is_32_bit) {
|
||||
if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a * b; })) {
|
||||
const auto rhs = inst.GetArg(1);
|
||||
if (rhs.IsZero()) {
|
||||
ReplaceUsesWith(inst, is_32_bit, 0);
|
||||
} else if (rhs.IsUnsignedImmediate(1)) {
|
||||
inst.ReplaceUsesWith(inst.GetArg(0));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Folds NOT operations if the contained value is an immediate.
|
||||
void FoldNOT(IR::Inst& inst, bool is_32_bit) {
|
||||
const auto operand = inst.GetArg(0);
|
||||
|
||||
if (!operand.IsImmediate()) {
|
||||
return;
|
||||
}
|
||||
|
||||
const u64 result = ~operand.GetImmediateAsU64();
|
||||
ReplaceUsesWith(inst, is_32_bit, result);
|
||||
}
|
||||
|
||||
// Folds OR operations based on the following:
|
||||
//
|
||||
// 1. imm_x | imm_y -> result
|
||||
// 2. x | 0 -> x
|
||||
// 3. 0 | y -> y
|
||||
//
|
||||
void FoldOR(IR::Inst& inst, bool is_32_bit) {
|
||||
if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a | b; })) {
|
||||
const auto rhs = inst.GetArg(1);
|
||||
if (rhs.IsZero()) {
|
||||
inst.ReplaceUsesWith(inst.GetArg(0));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool FoldShifts(IR::Inst& inst) {
|
||||
IR::Inst* carry_inst = inst.GetAssociatedPseudoOperation(Op::GetCarryFromOp);
|
||||
|
||||
// The 32-bit variants can contain 3 arguments, while the
|
||||
// 64-bit variants only contain 2.
|
||||
if (inst.NumArgs() == 3 && !carry_inst) {
|
||||
inst.SetArg(2, IR::Value(false));
|
||||
}
|
||||
|
||||
const auto shift_amount = inst.GetArg(1);
|
||||
|
||||
if (shift_amount.IsZero()) {
|
||||
if (carry_inst) {
|
||||
carry_inst->ReplaceUsesWith(inst.GetArg(2));
|
||||
}
|
||||
inst.ReplaceUsesWith(inst.GetArg(0));
|
||||
return false;
|
||||
}
|
||||
|
||||
if (inst.NumArgs() == 3 && shift_amount.IsImmediate() && !shift_amount.IsZero()) {
|
||||
inst.SetArg(2, IR::Value(false));
|
||||
}
|
||||
|
||||
if (!inst.AreAllArgsImmediates() || carry_inst) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void FoldSignExtendXToWord(IR::Inst& inst) {
|
||||
if (!inst.AreAllArgsImmediates()) {
|
||||
return;
|
||||
}
|
||||
|
||||
const s64 value = inst.GetArg(0).GetImmediateAsS64();
|
||||
inst.ReplaceUsesWith(IR::Value{static_cast<u32>(value)});
|
||||
}
|
||||
|
||||
void FoldSignExtendXToLong(IR::Inst& inst) {
|
||||
if (!inst.AreAllArgsImmediates()) {
|
||||
return;
|
||||
}
|
||||
|
||||
const s64 value = inst.GetArg(0).GetImmediateAsS64();
|
||||
inst.ReplaceUsesWith(IR::Value{static_cast<u64>(value)});
|
||||
}
|
||||
|
||||
void FoldSub(IR::Inst& inst, bool is_32_bit) {
|
||||
if (!inst.AreAllArgsImmediates() || inst.HasAssociatedPseudoOperation()) {
|
||||
return;
|
||||
}
|
||||
|
||||
const auto lhs = inst.GetArg(0);
|
||||
const auto rhs = inst.GetArg(1);
|
||||
const auto carry = inst.GetArg(2);
|
||||
|
||||
const u64 result = lhs.GetImmediateAsU64() + (~rhs.GetImmediateAsU64()) + carry.GetU1();
|
||||
ReplaceUsesWith(inst, is_32_bit, result);
|
||||
}
|
||||
|
||||
void FoldZeroExtendXToWord(IR::Inst& inst) {
|
||||
if (!inst.AreAllArgsImmediates()) {
|
||||
return;
|
||||
}
|
||||
|
||||
const u64 value = inst.GetArg(0).GetImmediateAsU64();
|
||||
inst.ReplaceUsesWith(IR::Value{static_cast<u32>(value)});
|
||||
}
|
||||
|
||||
void FoldZeroExtendXToLong(IR::Inst& inst) {
|
||||
if (!inst.AreAllArgsImmediates()) {
|
||||
return;
|
||||
}
|
||||
|
||||
const u64 value = inst.GetArg(0).GetImmediateAsU64();
|
||||
inst.ReplaceUsesWith(IR::Value{value});
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void ConstantPropagation(IR::Block& block) {
|
||||
for (auto& inst : block) {
|
||||
const auto opcode = inst.GetOpcode();
|
||||
|
||||
switch (opcode) {
|
||||
case Op::LeastSignificantWord:
|
||||
FoldLeastSignificantWord(inst);
|
||||
break;
|
||||
case Op::MostSignificantWord:
|
||||
FoldMostSignificantWord(inst);
|
||||
break;
|
||||
case Op::LeastSignificantHalf:
|
||||
FoldLeastSignificantHalf(inst);
|
||||
break;
|
||||
case Op::LeastSignificantByte:
|
||||
FoldLeastSignificantByte(inst);
|
||||
break;
|
||||
case Op::MostSignificantBit:
|
||||
FoldMostSignificantBit(inst);
|
||||
break;
|
||||
case Op::IsZero32:
|
||||
if (inst.AreAllArgsImmediates()) {
|
||||
inst.ReplaceUsesWith(IR::Value{inst.GetArg(0).GetU32() == 0});
|
||||
}
|
||||
break;
|
||||
case Op::IsZero64:
|
||||
if (inst.AreAllArgsImmediates()) {
|
||||
inst.ReplaceUsesWith(IR::Value{inst.GetArg(0).GetU64() == 0});
|
||||
}
|
||||
break;
|
||||
case Op::LogicalShiftLeft32:
|
||||
if (FoldShifts(inst)) {
|
||||
ReplaceUsesWith(inst, true, Safe::LogicalShiftLeft<u32>(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU8()));
|
||||
}
|
||||
break;
|
||||
case Op::LogicalShiftLeft64:
|
||||
if (FoldShifts(inst)) {
|
||||
ReplaceUsesWith(inst, false, Safe::LogicalShiftLeft<u64>(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU8()));
|
||||
}
|
||||
break;
|
||||
case Op::LogicalShiftRight32:
|
||||
if (FoldShifts(inst)) {
|
||||
ReplaceUsesWith(inst, true, Safe::LogicalShiftRight<u32>(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU8()));
|
||||
}
|
||||
break;
|
||||
case Op::LogicalShiftRight64:
|
||||
if (FoldShifts(inst)) {
|
||||
ReplaceUsesWith(inst, false, Safe::LogicalShiftRight<u64>(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU8()));
|
||||
}
|
||||
break;
|
||||
case Op::ArithmeticShiftRight32:
|
||||
if (FoldShifts(inst)) {
|
||||
ReplaceUsesWith(inst, true, Safe::ArithmeticShiftRight<u32>(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU8()));
|
||||
}
|
||||
break;
|
||||
case Op::ArithmeticShiftRight64:
|
||||
if (FoldShifts(inst)) {
|
||||
ReplaceUsesWith(inst, false, Safe::ArithmeticShiftRight<u64>(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU8()));
|
||||
}
|
||||
break;
|
||||
case Op::RotateRight32:
|
||||
if (FoldShifts(inst)) {
|
||||
ReplaceUsesWith(inst, true, mcl::bit::rotate_right<u32>(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU8()));
|
||||
}
|
||||
break;
|
||||
case Op::RotateRight64:
|
||||
if (FoldShifts(inst)) {
|
||||
ReplaceUsesWith(inst, false, mcl::bit::rotate_right<u64>(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU8()));
|
||||
}
|
||||
break;
|
||||
case Op::LogicalShiftLeftMasked32:
|
||||
if (inst.AreAllArgsImmediates()) {
|
||||
ReplaceUsesWith(inst, true, inst.GetArg(0).GetU32() << (inst.GetArg(1).GetU32() & 0x1f));
|
||||
}
|
||||
break;
|
||||
case Op::LogicalShiftLeftMasked64:
|
||||
if (inst.AreAllArgsImmediates()) {
|
||||
ReplaceUsesWith(inst, false, inst.GetArg(0).GetU64() << (inst.GetArg(1).GetU64() & 0x3f));
|
||||
}
|
||||
break;
|
||||
case Op::LogicalShiftRightMasked32:
|
||||
if (inst.AreAllArgsImmediates()) {
|
||||
ReplaceUsesWith(inst, true, inst.GetArg(0).GetU32() >> (inst.GetArg(1).GetU32() & 0x1f));
|
||||
}
|
||||
break;
|
||||
case Op::LogicalShiftRightMasked64:
|
||||
if (inst.AreAllArgsImmediates()) {
|
||||
ReplaceUsesWith(inst, false, inst.GetArg(0).GetU64() >> (inst.GetArg(1).GetU64() & 0x3f));
|
||||
}
|
||||
break;
|
||||
case Op::ArithmeticShiftRightMasked32:
|
||||
if (inst.AreAllArgsImmediates()) {
|
||||
ReplaceUsesWith(inst, true, static_cast<s32>(inst.GetArg(0).GetU32()) >> (inst.GetArg(1).GetU32() & 0x1f));
|
||||
}
|
||||
break;
|
||||
case Op::ArithmeticShiftRightMasked64:
|
||||
if (inst.AreAllArgsImmediates()) {
|
||||
ReplaceUsesWith(inst, false, static_cast<s64>(inst.GetArg(0).GetU64()) >> (inst.GetArg(1).GetU64() & 0x3f));
|
||||
}
|
||||
break;
|
||||
case Op::RotateRightMasked32:
|
||||
if (inst.AreAllArgsImmediates()) {
|
||||
ReplaceUsesWith(inst, true, mcl::bit::rotate_right<u32>(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU32()));
|
||||
}
|
||||
break;
|
||||
case Op::RotateRightMasked64:
|
||||
if (inst.AreAllArgsImmediates()) {
|
||||
ReplaceUsesWith(inst, false, mcl::bit::rotate_right<u64>(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU64()));
|
||||
}
|
||||
break;
|
||||
case Op::Add32:
|
||||
case Op::Add64:
|
||||
FoldAdd(inst, opcode == Op::Add32);
|
||||
break;
|
||||
case Op::Sub32:
|
||||
case Op::Sub64:
|
||||
FoldSub(inst, opcode == Op::Sub32);
|
||||
break;
|
||||
case Op::Mul32:
|
||||
case Op::Mul64:
|
||||
FoldMultiply(inst, opcode == Op::Mul32);
|
||||
break;
|
||||
case Op::SignedDiv32:
|
||||
case Op::SignedDiv64:
|
||||
FoldDivide(inst, opcode == Op::SignedDiv32, true);
|
||||
break;
|
||||
case Op::UnsignedDiv32:
|
||||
case Op::UnsignedDiv64:
|
||||
FoldDivide(inst, opcode == Op::UnsignedDiv32, false);
|
||||
break;
|
||||
case Op::And32:
|
||||
case Op::And64:
|
||||
FoldAND(inst, opcode == Op::And32);
|
||||
break;
|
||||
case Op::Eor32:
|
||||
case Op::Eor64:
|
||||
FoldEOR(inst, opcode == Op::Eor32);
|
||||
break;
|
||||
case Op::Or32:
|
||||
case Op::Or64:
|
||||
FoldOR(inst, opcode == Op::Or32);
|
||||
break;
|
||||
case Op::Not32:
|
||||
case Op::Not64:
|
||||
FoldNOT(inst, opcode == Op::Not32);
|
||||
break;
|
||||
case Op::SignExtendByteToWord:
|
||||
case Op::SignExtendHalfToWord:
|
||||
FoldSignExtendXToWord(inst);
|
||||
break;
|
||||
case Op::SignExtendByteToLong:
|
||||
case Op::SignExtendHalfToLong:
|
||||
case Op::SignExtendWordToLong:
|
||||
FoldSignExtendXToLong(inst);
|
||||
break;
|
||||
case Op::ZeroExtendByteToWord:
|
||||
case Op::ZeroExtendHalfToWord:
|
||||
FoldZeroExtendXToWord(inst);
|
||||
break;
|
||||
case Op::ZeroExtendByteToLong:
|
||||
case Op::ZeroExtendHalfToLong:
|
||||
case Op::ZeroExtendWordToLong:
|
||||
FoldZeroExtendXToLong(inst);
|
||||
break;
|
||||
case Op::ByteReverseWord:
|
||||
case Op::ByteReverseHalf:
|
||||
case Op::ByteReverseDual:
|
||||
FoldByteReverse(inst, opcode);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::Optimization
|
|
@ -1,23 +0,0 @@
|
|||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2016 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#include <mcl/iterator/reverse.hpp>
|
||||
|
||||
#include "dynarmic/ir/basic_block.h"
|
||||
#include "dynarmic/ir/opt/passes.h"
|
||||
|
||||
namespace Dynarmic::Optimization {
|
||||
|
||||
void DeadCodeElimination(IR::Block& block) {
|
||||
// We iterate over the instructions in reverse order.
|
||||
// This is because removing an instruction reduces the number of uses for earlier instructions.
|
||||
for (auto& inst : mcl::iterator::reverse(block)) {
|
||||
if (!inst.HasUses() && !MayHaveSideEffects(inst.GetOpcode())) {
|
||||
inst.Invalidate();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::Optimization
|
|
@ -1,44 +0,0 @@
|
|||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2020 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "dynarmic/ir/basic_block.h"
|
||||
#include "dynarmic/ir/opcodes.h"
|
||||
#include "dynarmic/ir/opt/passes.h"
|
||||
|
||||
namespace Dynarmic::Optimization {
|
||||
|
||||
void IdentityRemovalPass(IR::Block& block) {
|
||||
std::vector<IR::Inst*> to_invalidate;
|
||||
|
||||
auto iter = block.begin();
|
||||
while (iter != block.end()) {
|
||||
IR::Inst& inst = *iter;
|
||||
|
||||
const size_t num_args = inst.NumArgs();
|
||||
for (size_t i = 0; i < num_args; i++) {
|
||||
while (true) {
|
||||
IR::Value arg = inst.GetArg(i);
|
||||
if (!arg.IsIdentity())
|
||||
break;
|
||||
inst.SetArg(i, arg.GetInst()->GetArg(0));
|
||||
}
|
||||
}
|
||||
|
||||
if (inst.GetOpcode() == IR::Opcode::Identity || inst.GetOpcode() == IR::Opcode::Void) {
|
||||
iter = block.Instructions().erase(inst);
|
||||
to_invalidate.push_back(&inst);
|
||||
} else {
|
||||
++iter;
|
||||
}
|
||||
}
|
||||
|
||||
for (IR::Inst* inst : to_invalidate) {
|
||||
inst->Invalidate();
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::Optimization
|
|
@ -1,127 +0,0 @@
|
|||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2020 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <optional>
|
||||
#include <tuple>
|
||||
|
||||
#include <mp/metafunction/apply.h>
|
||||
#include <mp/typelist/concat.h>
|
||||
#include <mp/typelist/drop.h>
|
||||
#include <mp/typelist/get.h>
|
||||
#include <mp/typelist/head.h>
|
||||
#include <mp/typelist/list.h>
|
||||
#include <mp/typelist/prepend.h>
|
||||
|
||||
#include "dynarmic/ir/microinstruction.h"
|
||||
#include "dynarmic/ir/opcodes.h"
|
||||
#include "dynarmic/ir/value.h"
|
||||
|
||||
namespace Dynarmic::Optimization::IRMatcher {
|
||||
|
||||
struct CaptureValue {
|
||||
using ReturnType = std::tuple<IR::Value>;
|
||||
|
||||
static std::optional<ReturnType> Match(IR::Value value) {
|
||||
return std::tuple(value);
|
||||
}
|
||||
};
|
||||
|
||||
struct CaptureInst {
|
||||
using ReturnType = std::tuple<IR::Inst*>;
|
||||
|
||||
static std::optional<ReturnType> Match(IR::Value value) {
|
||||
if (value.IsImmediate())
|
||||
return std::nullopt;
|
||||
return std::tuple(value.GetInstRecursive());
|
||||
}
|
||||
};
|
||||
|
||||
struct CaptureUImm {
|
||||
using ReturnType = std::tuple<u64>;
|
||||
|
||||
static std::optional<ReturnType> Match(IR::Value value) {
|
||||
return std::tuple(value.GetImmediateAsU64());
|
||||
}
|
||||
};
|
||||
|
||||
struct CaptureSImm {
|
||||
using ReturnType = std::tuple<s64>;
|
||||
|
||||
static std::optional<ReturnType> Match(IR::Value value) {
|
||||
return std::tuple(value.GetImmediateAsS64());
|
||||
}
|
||||
};
|
||||
|
||||
template<u64 Value>
|
||||
struct UImm {
|
||||
using ReturnType = std::tuple<>;
|
||||
|
||||
static std::optional<std::tuple<>> Match(IR::Value value) {
|
||||
if (value.GetImmediateAsU64() == Value)
|
||||
return std::tuple();
|
||||
return std::nullopt;
|
||||
}
|
||||
};
|
||||
|
||||
template<s64 Value>
|
||||
struct SImm {
|
||||
using ReturnType = std::tuple<>;
|
||||
|
||||
static std::optional<std::tuple<>> Match(IR::Value value) {
|
||||
if (value.GetImmediateAsS64() == Value)
|
||||
return std::tuple();
|
||||
return std::nullopt;
|
||||
}
|
||||
};
|
||||
|
||||
template<IR::Opcode Opcode, typename... Args>
|
||||
struct Inst {
|
||||
public:
|
||||
using ReturnType = mp::concat<std::tuple<>, typename Args::ReturnType...>;
|
||||
|
||||
static std::optional<ReturnType> Match(const IR::Inst& inst) {
|
||||
if (inst.GetOpcode() != Opcode)
|
||||
return std::nullopt;
|
||||
if (inst.HasAssociatedPseudoOperation())
|
||||
return std::nullopt;
|
||||
return MatchArgs<0>(inst);
|
||||
}
|
||||
|
||||
static std::optional<ReturnType> Match(IR::Value value) {
|
||||
if (value.IsImmediate())
|
||||
return std::nullopt;
|
||||
return Match(*value.GetInstRecursive());
|
||||
}
|
||||
|
||||
private:
|
||||
template<size_t I>
|
||||
static auto MatchArgs(const IR::Inst& inst) -> std::optional<mp::apply<mp::concat, mp::prepend<mp::drop<I, mp::list<typename Args::ReturnType...>>, std::tuple<>>>> {
|
||||
if constexpr (I >= sizeof...(Args)) {
|
||||
return std::tuple();
|
||||
} else {
|
||||
using Arg = mp::get<I, mp::list<Args...>>;
|
||||
|
||||
if (const auto arg = Arg::Match(inst.GetArg(I))) {
|
||||
if (const auto rest = MatchArgs<I + 1>(inst)) {
|
||||
return std::tuple_cat(*arg, *rest);
|
||||
}
|
||||
}
|
||||
|
||||
return std::nullopt;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
inline bool IsSameInst(std::tuple<IR::Inst*, IR::Inst*> t) {
|
||||
return std::get<0>(t) == std::get<1>(t);
|
||||
}
|
||||
|
||||
inline bool IsSameInst(std::tuple<IR::Inst*, IR::Inst*, IR::Inst*> t) {
|
||||
return std::get<0>(t) == std::get<1>(t) && std::get<0>(t) == std::get<2>(t);
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::Optimization::IRMatcher
|
|
@ -1,18 +0,0 @@
|
|||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2023 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#include "dynarmic/ir/basic_block.h"
|
||||
#include "dynarmic/ir/microinstruction.h"
|
||||
|
||||
namespace Dynarmic::Optimization {
|
||||
|
||||
void NamingPass(IR::Block& block) {
|
||||
unsigned name = 1;
|
||||
for (auto& inst : block) {
|
||||
inst.SetName(name++);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::Optimization
|
|
@ -1,47 +0,0 @@
|
|||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2016 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
namespace Dynarmic::A32 {
|
||||
struct UserCallbacks;
|
||||
}
|
||||
|
||||
namespace Dynarmic::A64 {
|
||||
struct UserCallbacks;
|
||||
struct UserConfig;
|
||||
} // namespace Dynarmic::A64
|
||||
|
||||
namespace Dynarmic::IR {
|
||||
class Block;
|
||||
}
|
||||
|
||||
namespace Dynarmic::Optimization {
|
||||
|
||||
struct PolyfillOptions {
|
||||
bool sha256 = false;
|
||||
bool vector_multiply_widen = false;
|
||||
|
||||
bool operator==(const PolyfillOptions&) const = default;
|
||||
};
|
||||
|
||||
struct A32GetSetEliminationOptions {
|
||||
bool convert_nzc_to_nz = false;
|
||||
bool convert_nz_to_nzc = false;
|
||||
};
|
||||
|
||||
void PolyfillPass(IR::Block& block, const PolyfillOptions& opt);
|
||||
void A32ConstantMemoryReads(IR::Block& block, A32::UserCallbacks* cb);
|
||||
void A32GetSetElimination(IR::Block& block, A32GetSetEliminationOptions opt);
|
||||
void A64CallbackConfigPass(IR::Block& block, const A64::UserConfig& conf);
|
||||
void A64GetSetElimination(IR::Block& block);
|
||||
void A64MergeInterpretBlocksPass(IR::Block& block, A64::UserCallbacks* cb);
|
||||
void ConstantPropagation(IR::Block& block);
|
||||
void DeadCodeElimination(IR::Block& block);
|
||||
void IdentityRemovalPass(IR::Block& block);
|
||||
void VerificationPass(const IR::Block& block);
|
||||
void NamingPass(IR::Block& block);
|
||||
|
||||
} // namespace Dynarmic::Optimization
|
|
@ -1,218 +0,0 @@
|
|||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2022 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#include "dynarmic/ir/basic_block.h"
|
||||
#include "dynarmic/ir/ir_emitter.h"
|
||||
#include "dynarmic/ir/microinstruction.h"
|
||||
#include "dynarmic/ir/opcodes.h"
|
||||
#include "dynarmic/ir/opt/passes.h"
|
||||
|
||||
namespace Dynarmic::Optimization {
|
||||
|
||||
namespace {
|
||||
|
||||
void PolyfillSHA256MessageSchedule0(IR::IREmitter& ir, IR::Inst& inst) {
|
||||
const IR::U128 x = (IR::U128)inst.GetArg(0);
|
||||
const IR::U128 y = (IR::U128)inst.GetArg(1);
|
||||
|
||||
const IR::U128 t = ir.VectorExtract(x, y, 32);
|
||||
|
||||
IR::U128 result = ir.ZeroVector();
|
||||
for (size_t i = 0; i < 4; i++) {
|
||||
const IR::U32 modified_element = [&] {
|
||||
const IR::U32 element = ir.VectorGetElement(32, t, i);
|
||||
const IR::U32 tmp1 = ir.RotateRight(element, ir.Imm8(7));
|
||||
const IR::U32 tmp2 = ir.RotateRight(element, ir.Imm8(18));
|
||||
const IR::U32 tmp3 = ir.LogicalShiftRight(element, ir.Imm8(3));
|
||||
|
||||
return ir.Eor(tmp1, ir.Eor(tmp2, tmp3));
|
||||
}();
|
||||
|
||||
result = ir.VectorSetElement(32, result, i, modified_element);
|
||||
}
|
||||
result = ir.VectorAdd(32, result, x);
|
||||
|
||||
inst.ReplaceUsesWith(result);
|
||||
}
|
||||
|
||||
void PolyfillSHA256MessageSchedule1(IR::IREmitter& ir, IR::Inst& inst) {
|
||||
const IR::U128 x = (IR::U128)inst.GetArg(0);
|
||||
const IR::U128 y = (IR::U128)inst.GetArg(1);
|
||||
const IR::U128 z = (IR::U128)inst.GetArg(2);
|
||||
|
||||
const IR::U128 T0 = ir.VectorExtract(y, z, 32);
|
||||
|
||||
const IR::U128 lower_half = [&] {
|
||||
const IR::U128 T = ir.VectorRotateWholeVectorRight(z, 64);
|
||||
const IR::U128 tmp1 = ir.VectorRotateRight(32, T, 17);
|
||||
const IR::U128 tmp2 = ir.VectorRotateRight(32, T, 19);
|
||||
const IR::U128 tmp3 = ir.VectorLogicalShiftRight(32, T, 10);
|
||||
const IR::U128 tmp4 = ir.VectorEor(tmp1, ir.VectorEor(tmp2, tmp3));
|
||||
const IR::U128 tmp5 = ir.VectorAdd(32, tmp4, ir.VectorAdd(32, x, T0));
|
||||
return ir.VectorZeroUpper(tmp5);
|
||||
}();
|
||||
|
||||
const IR::U64 upper_half = [&] {
|
||||
const IR::U128 tmp1 = ir.VectorRotateRight(32, lower_half, 17);
|
||||
const IR::U128 tmp2 = ir.VectorRotateRight(32, lower_half, 19);
|
||||
const IR::U128 tmp3 = ir.VectorLogicalShiftRight(32, lower_half, 10);
|
||||
const IR::U128 tmp4 = ir.VectorEor(tmp1, ir.VectorEor(tmp2, tmp3));
|
||||
|
||||
// Shuffle the top two 32-bit elements downwards [3, 2, 1, 0] -> [1, 0, 3, 2]
|
||||
const IR::U128 shuffled_d = ir.VectorRotateWholeVectorRight(x, 64);
|
||||
const IR::U128 shuffled_T0 = ir.VectorRotateWholeVectorRight(T0, 64);
|
||||
|
||||
const IR::U128 tmp5 = ir.VectorAdd(32, tmp4, ir.VectorAdd(32, shuffled_d, shuffled_T0));
|
||||
return ir.VectorGetElement(64, tmp5, 0);
|
||||
}();
|
||||
|
||||
const IR::U128 result = ir.VectorSetElement(64, lower_half, 1, upper_half);
|
||||
|
||||
inst.ReplaceUsesWith(result);
|
||||
}
|
||||
|
||||
IR::U32 SHAchoose(IR::IREmitter& ir, IR::U32 x, IR::U32 y, IR::U32 z) {
|
||||
return ir.Eor(ir.And(ir.Eor(y, z), x), z);
|
||||
}
|
||||
|
||||
IR::U32 SHAmajority(IR::IREmitter& ir, IR::U32 x, IR::U32 y, IR::U32 z) {
|
||||
return ir.Or(ir.And(x, y), ir.And(ir.Or(x, y), z));
|
||||
}
|
||||
|
||||
IR::U32 SHAhashSIGMA0(IR::IREmitter& ir, IR::U32 x) {
|
||||
const IR::U32 tmp1 = ir.RotateRight(x, ir.Imm8(2));
|
||||
const IR::U32 tmp2 = ir.RotateRight(x, ir.Imm8(13));
|
||||
const IR::U32 tmp3 = ir.RotateRight(x, ir.Imm8(22));
|
||||
|
||||
return ir.Eor(tmp1, ir.Eor(tmp2, tmp3));
|
||||
}
|
||||
|
||||
IR::U32 SHAhashSIGMA1(IR::IREmitter& ir, IR::U32 x) {
|
||||
const IR::U32 tmp1 = ir.RotateRight(x, ir.Imm8(6));
|
||||
const IR::U32 tmp2 = ir.RotateRight(x, ir.Imm8(11));
|
||||
const IR::U32 tmp3 = ir.RotateRight(x, ir.Imm8(25));
|
||||
|
||||
return ir.Eor(tmp1, ir.Eor(tmp2, tmp3));
|
||||
}
|
||||
|
||||
void PolyfillSHA256Hash(IR::IREmitter& ir, IR::Inst& inst) {
|
||||
IR::U128 x = (IR::U128)inst.GetArg(0);
|
||||
IR::U128 y = (IR::U128)inst.GetArg(1);
|
||||
const IR::U128 w = (IR::U128)inst.GetArg(2);
|
||||
const bool part1 = inst.GetArg(3).GetU1();
|
||||
|
||||
for (size_t i = 0; i < 4; i++) {
|
||||
const IR::U32 low_x = ir.VectorGetElement(32, x, 0);
|
||||
const IR::U32 after_low_x = ir.VectorGetElement(32, x, 1);
|
||||
const IR::U32 before_high_x = ir.VectorGetElement(32, x, 2);
|
||||
const IR::U32 high_x = ir.VectorGetElement(32, x, 3);
|
||||
|
||||
const IR::U32 low_y = ir.VectorGetElement(32, y, 0);
|
||||
const IR::U32 after_low_y = ir.VectorGetElement(32, y, 1);
|
||||
const IR::U32 before_high_y = ir.VectorGetElement(32, y, 2);
|
||||
const IR::U32 high_y = ir.VectorGetElement(32, y, 3);
|
||||
|
||||
const IR::U32 choice = SHAchoose(ir, low_y, after_low_y, before_high_y);
|
||||
const IR::U32 majority = SHAmajority(ir, low_x, after_low_x, before_high_x);
|
||||
|
||||
const IR::U32 t = [&] {
|
||||
const IR::U32 w_element = ir.VectorGetElement(32, w, i);
|
||||
const IR::U32 sig = SHAhashSIGMA1(ir, low_y);
|
||||
|
||||
return ir.Add(high_y, ir.Add(sig, ir.Add(choice, w_element)));
|
||||
}();
|
||||
|
||||
const IR::U32 new_low_x = ir.Add(t, ir.Add(SHAhashSIGMA0(ir, low_x), majority));
|
||||
const IR::U32 new_low_y = ir.Add(t, high_x);
|
||||
|
||||
// Shuffle all words left by 1 element: [3, 2, 1, 0] -> [2, 1, 0, 3]
|
||||
const IR::U128 shuffled_x = ir.VectorRotateWholeVectorRight(x, 96);
|
||||
const IR::U128 shuffled_y = ir.VectorRotateWholeVectorRight(y, 96);
|
||||
|
||||
x = ir.VectorSetElement(32, shuffled_x, 0, new_low_x);
|
||||
y = ir.VectorSetElement(32, shuffled_y, 0, new_low_y);
|
||||
}
|
||||
|
||||
inst.ReplaceUsesWith(part1 ? x : y);
|
||||
}
|
||||
|
||||
template<size_t esize, bool is_signed>
|
||||
void PolyfillVectorMultiplyWiden(IR::IREmitter& ir, IR::Inst& inst) {
|
||||
IR::U128 n = (IR::U128)inst.GetArg(0);
|
||||
IR::U128 m = (IR::U128)inst.GetArg(1);
|
||||
|
||||
const IR::U128 wide_n = is_signed ? ir.VectorSignExtend(esize, n) : ir.VectorZeroExtend(esize, n);
|
||||
const IR::U128 wide_m = is_signed ? ir.VectorSignExtend(esize, m) : ir.VectorZeroExtend(esize, m);
|
||||
|
||||
const IR::U128 result = ir.VectorMultiply(esize * 2, wide_n, wide_m);
|
||||
|
||||
inst.ReplaceUsesWith(result);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
void PolyfillPass(IR::Block& block, const PolyfillOptions& polyfill) {
|
||||
if (polyfill == PolyfillOptions{}) {
|
||||
return;
|
||||
}
|
||||
|
||||
IR::IREmitter ir{block};
|
||||
|
||||
for (auto& inst : block) {
|
||||
ir.SetInsertionPointBefore(&inst);
|
||||
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::SHA256MessageSchedule0:
|
||||
if (polyfill.sha256) {
|
||||
PolyfillSHA256MessageSchedule0(ir, inst);
|
||||
}
|
||||
break;
|
||||
case IR::Opcode::SHA256MessageSchedule1:
|
||||
if (polyfill.sha256) {
|
||||
PolyfillSHA256MessageSchedule1(ir, inst);
|
||||
}
|
||||
break;
|
||||
case IR::Opcode::SHA256Hash:
|
||||
if (polyfill.sha256) {
|
||||
PolyfillSHA256Hash(ir, inst);
|
||||
}
|
||||
break;
|
||||
case IR::Opcode::VectorMultiplySignedWiden8:
|
||||
if (polyfill.vector_multiply_widen) {
|
||||
PolyfillVectorMultiplyWiden<8, true>(ir, inst);
|
||||
}
|
||||
break;
|
||||
case IR::Opcode::VectorMultiplySignedWiden16:
|
||||
if (polyfill.vector_multiply_widen) {
|
||||
PolyfillVectorMultiplyWiden<16, true>(ir, inst);
|
||||
}
|
||||
break;
|
||||
case IR::Opcode::VectorMultiplySignedWiden32:
|
||||
if (polyfill.vector_multiply_widen) {
|
||||
PolyfillVectorMultiplyWiden<32, true>(ir, inst);
|
||||
}
|
||||
break;
|
||||
case IR::Opcode::VectorMultiplyUnsignedWiden8:
|
||||
if (polyfill.vector_multiply_widen) {
|
||||
PolyfillVectorMultiplyWiden<8, false>(ir, inst);
|
||||
}
|
||||
break;
|
||||
case IR::Opcode::VectorMultiplyUnsignedWiden16:
|
||||
if (polyfill.vector_multiply_widen) {
|
||||
PolyfillVectorMultiplyWiden<16, false>(ir, inst);
|
||||
}
|
||||
break;
|
||||
case IR::Opcode::VectorMultiplyUnsignedWiden32:
|
||||
if (polyfill.vector_multiply_widen) {
|
||||
PolyfillVectorMultiplyWiden<32, false>(ir, inst);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::Optimization
|
|
@ -1,51 +0,0 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2016 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#include <cstdio>
|
||||
#include <map>
|
||||
|
||||
#include "dynarmic/common/assert.h"
|
||||
#include "dynarmic/common/common_types.h"
|
||||
#include <ankerl/unordered_dense.h>
|
||||
|
||||
#include "dynarmic/ir/basic_block.h"
|
||||
#include "dynarmic/ir/microinstruction.h"
|
||||
#include "dynarmic/ir/opcodes.h"
|
||||
#include "dynarmic/ir/opt/passes.h"
|
||||
#include "dynarmic/ir/type.h"
|
||||
|
||||
namespace Dynarmic::Optimization {
|
||||
|
||||
void VerificationPass(const IR::Block& block) {
|
||||
for (const auto& inst : block) {
|
||||
for (size_t i = 0; i < inst.NumArgs(); i++) {
|
||||
const IR::Type t1 = inst.GetArg(i).GetType();
|
||||
const IR::Type t2 = IR::GetArgTypeOf(inst.GetOpcode(), i);
|
||||
if (!IR::AreTypesCompatible(t1, t2)) {
|
||||
std::puts(IR::DumpBlock(block).c_str());
|
||||
ASSERT_FALSE("above block failed validation");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ankerl::unordered_dense::map<IR::Inst*, size_t> actual_uses;
|
||||
for (const auto& inst : block) {
|
||||
for (size_t i = 0; i < inst.NumArgs(); i++) {
|
||||
const auto arg = inst.GetArg(i);
|
||||
if (!arg.IsImmediate()) {
|
||||
actual_uses[arg.GetInst()]++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (const auto& pair : actual_uses) {
|
||||
ASSERT(pair.first->UseCount() == pair.second);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::Optimization
|
1519
src/dynarmic/src/dynarmic/ir/opt_passes.cpp
Normal file
1519
src/dynarmic/src/dynarmic/ir/opt_passes.cpp
Normal file
File diff suppressed because it is too large
Load diff
37
src/dynarmic/src/dynarmic/ir/opt_passes.h
Normal file
37
src/dynarmic/src/dynarmic/ir/opt_passes.h
Normal file
|
@ -0,0 +1,37 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2016 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
namespace Dynarmic::A32 {
|
||||
struct UserCallbacks;
|
||||
struct UserConfig;
|
||||
}
|
||||
|
||||
namespace Dynarmic::A64 {
|
||||
struct UserCallbacks;
|
||||
struct UserConfig;
|
||||
}
|
||||
|
||||
namespace Dynarmic::IR {
|
||||
class Block;
|
||||
}
|
||||
|
||||
namespace Dynarmic::Optimization {
|
||||
|
||||
struct PolyfillOptions {
|
||||
bool sha256 = false;
|
||||
bool vector_multiply_widen = false;
|
||||
|
||||
bool operator==(const PolyfillOptions&) const = default;
|
||||
};
|
||||
|
||||
void Optimize(IR::Block& block, const A32::UserConfig& conf, const Optimization::PolyfillOptions& polyfill_options);
|
||||
void Optimize(IR::Block& block, const A64::UserConfig& conf, const Optimization::PolyfillOptions& polyfill_options);
|
||||
|
||||
} // namespace Dynarmic::Optimization
|
|
@ -24,6 +24,7 @@
|
|||
#include "../rand_int.h"
|
||||
#include "../unicorn_emu/a32_unicorn.h"
|
||||
#include "./testenv.h"
|
||||
#include "../native/testenv.h"
|
||||
#include "dynarmic/common/fp/fpcr.h"
|
||||
#include "dynarmic/common/fp/fpsr.h"
|
||||
#include "dynarmic/common/llvm_disassemble.h"
|
||||
|
@ -46,7 +47,7 @@ using namespace Dynarmic;
|
|||
|
||||
template<typename Fn>
|
||||
bool AnyLocationDescriptorForTerminalHas(IR::Terminal terminal, Fn fn) {
|
||||
return Common::VisitVariant<bool>(terminal, [&](auto t) -> bool {
|
||||
return boost::apply_visitor([&](auto t) -> bool {
|
||||
using T = std::decay_t<decltype(t)>;
|
||||
if constexpr (std::is_same_v<T, IR::Term::Invalid>) {
|
||||
return false;
|
||||
|
@ -72,7 +73,7 @@ bool AnyLocationDescriptorForTerminalHas(IR::Terminal terminal, Fn fn) {
|
|||
ASSERT_MSG(false, "Invalid terminal type");
|
||||
return false;
|
||||
}
|
||||
});
|
||||
}, terminal);
|
||||
}
|
||||
|
||||
bool ShouldTestInst(u32 instruction, u32 pc, bool is_thumb, bool is_last_inst, A32::ITState it_state = {}) {
|
||||
|
|
|
@ -22,6 +22,7 @@
|
|||
#include "../rand_int.h"
|
||||
#include "../unicorn_emu/a32_unicorn.h"
|
||||
#include "./testenv.h"
|
||||
#include "../native/testenv.h"
|
||||
#include "dynarmic/frontend/A32/FPSCR.h"
|
||||
#include "dynarmic/frontend/A32/PSR.h"
|
||||
#include "dynarmic/frontend/A32/a32_location_descriptor.h"
|
||||
|
@ -29,7 +30,7 @@
|
|||
#include "dynarmic/frontend/A32/translate/a32_translate.h"
|
||||
#include "dynarmic/interface/A32/a32.h"
|
||||
#include "dynarmic/ir/basic_block.h"
|
||||
#include "dynarmic/ir/opt/passes.h"
|
||||
#include "dynarmic/ir/opt_passes.h"
|
||||
|
||||
using namespace Dynarmic;
|
||||
|
||||
|
@ -179,13 +180,7 @@ static void RunInstance(size_t run_number, ThumbTestEnv& test_env, A32Unicorn<Th
|
|||
while (num_insts < instructions_to_execute_count) {
|
||||
A32::LocationDescriptor descriptor = {u32(num_insts * 4), cpsr, A32::FPSCR{}};
|
||||
IR::Block ir_block = A32::Translate(descriptor, &test_env, {});
|
||||
Optimization::NamingPass(ir_block);
|
||||
Optimization::A32GetSetElimination(ir_block, {.convert_nz_to_nzc = true});
|
||||
Optimization::DeadCodeElimination(ir_block);
|
||||
Optimization::A32ConstantMemoryReads(ir_block, &test_env);
|
||||
Optimization::ConstantPropagation(ir_block);
|
||||
Optimization::DeadCodeElimination(ir_block);
|
||||
Optimization::VerificationPass(ir_block);
|
||||
Optimization::Optimize(ir_block, &test_env, {});
|
||||
printf("\n\nIR:\n%s", IR::DumpBlock(ir_block).c_str());
|
||||
printf("\n\nx86_64:\n");
|
||||
jit.DumpDisassembly();
|
||||
|
|
|
@ -1,3 +1,6 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2016 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
|
@ -6,6 +9,7 @@
|
|||
#include <catch2/catch_test_macros.hpp>
|
||||
|
||||
#include "./testenv.h"
|
||||
#include "../native/testenv.h"
|
||||
#include "dynarmic/frontend/A32/a32_location_descriptor.h"
|
||||
#include "dynarmic/interface/A32/a32.h"
|
||||
|
||||
|
|
|
@ -1,3 +1,6 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2022 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
|
@ -8,6 +11,7 @@
|
|||
#include <catch2/catch_test_macros.hpp>
|
||||
|
||||
#include "./testenv.h"
|
||||
#include "../native/testenv.h"
|
||||
#include "dynarmic/frontend/A32/a32_location_descriptor.h"
|
||||
#include "dynarmic/interface/A32/a32.h"
|
||||
#include "dynarmic/interface/A32/coprocessor.h"
|
||||
|
|
|
@ -1,3 +1,6 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2022 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
|
@ -8,6 +11,7 @@
|
|||
#include <catch2/catch_test_macros.hpp>
|
||||
|
||||
#include "./testenv.h"
|
||||
#include "../native/testenv.h"
|
||||
|
||||
using namespace Dynarmic;
|
||||
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
#include "dynarmic/common/common_types.h"
|
||||
|
||||
#include "./testenv.h"
|
||||
#include "../native/testenv.h"
|
||||
#include "dynarmic/interface/A32/a32.h"
|
||||
|
||||
static Dynarmic::A32::UserConfig GetUserConfig(ThumbTestEnv* testenv) {
|
||||
|
|
|
@ -17,7 +17,6 @@
|
|||
#include "dynarmic/common/assert.h"
|
||||
#include "dynarmic/common/common_types.h"
|
||||
#include "dynarmic/interface/A32/a32.h"
|
||||
#include "../native/testenv.h"
|
||||
|
||||
template<typename InstructionType_, u32 infinite_loop_u32>
|
||||
class A32TestEnv : public Dynarmic::A32::UserCallbacks {
|
||||
|
|
|
@ -1,3 +1,6 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2018 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
|
@ -7,6 +10,7 @@
|
|||
#include <oaknut/oaknut.hpp>
|
||||
|
||||
#include "./testenv.h"
|
||||
#include "../native/testenv.h"
|
||||
#include "dynarmic/common/fp/fpsr.h"
|
||||
#include "dynarmic/interface/exclusive_monitor.h"
|
||||
|
||||
|
|
|
@ -12,6 +12,7 @@
|
|||
#include "dynarmic/common/common_types.h"
|
||||
|
||||
#include "./testenv.h"
|
||||
#include "../native/testenv.h"
|
||||
|
||||
using namespace Dynarmic;
|
||||
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
#include "../rand_int.h"
|
||||
#include "../unicorn_emu/a64_unicorn.h"
|
||||
#include "./testenv.h"
|
||||
#include "../native/testenv.h"
|
||||
#include "dynarmic/common/fp/fpcr.h"
|
||||
#include "dynarmic/common/fp/fpsr.h"
|
||||
#include "dynarmic/common/llvm_disassemble.h"
|
||||
|
@ -28,7 +29,7 @@
|
|||
#include "dynarmic/frontend/A64/translate/a64_translate.h"
|
||||
#include "dynarmic/ir/basic_block.h"
|
||||
#include "dynarmic/ir/opcodes.h"
|
||||
#include "dynarmic/ir/opt/passes.h"
|
||||
#include "dynarmic/ir/opt_passes.h"
|
||||
|
||||
// Must be declared last for all necessary operator<< to be declared prior to this.
|
||||
#include <fmt/format.h>
|
||||
|
|
|
@ -1,3 +1,6 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2018 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
|
@ -6,6 +9,7 @@
|
|||
#include <catch2/catch_test_macros.hpp>
|
||||
|
||||
#include "./testenv.h"
|
||||
#include "../native/testenv.h"
|
||||
#include "dynarmic/interface/A64/a64.h"
|
||||
|
||||
TEST_CASE("misaligned load/store do not use page_table when detect_misaligned_access_via_page_table is set", "[a64]") {
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
#include <oaknut/oaknut.hpp>
|
||||
|
||||
#include "./testenv.h"
|
||||
#include "../native/testenv.h"
|
||||
#include "dynarmic/interface/A64/a64.h"
|
||||
|
||||
using namespace Dynarmic;
|
||||
|
|
|
@ -1,3 +1,6 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2018 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
|
@ -6,6 +9,7 @@
|
|||
#include <catch2/catch_test_macros.hpp>
|
||||
|
||||
#include "./testenv.h"
|
||||
#include "../native/testenv.h"
|
||||
#include "dynarmic/interface/A64/a64.h"
|
||||
|
||||
using namespace Dynarmic;
|
||||
|
|
|
@ -12,7 +12,6 @@
|
|||
#include "dynarmic/common/assert.h"
|
||||
#include "dynarmic/common/common_types.h"
|
||||
#include "dynarmic/interface/A64/a64.h"
|
||||
#include "../native/testenv.h"
|
||||
|
||||
using Vector = Dynarmic::A64::Vector;
|
||||
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
# SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
include(TargetArchitectureSpecificSources)
|
||||
|
||||
add_executable(dynarmic_tests
|
||||
|
@ -6,33 +8,24 @@ add_executable(dynarmic_tests
|
|||
fp/mantissa_util_tests.cpp
|
||||
fp/unpacked_tests.cpp
|
||||
rand_int.h
|
||||
# A32
|
||||
A32/test_arm_disassembler.cpp
|
||||
A32/test_arm_instructions.cpp
|
||||
A32/test_coprocessor.cpp
|
||||
A32/test_svc.cpp
|
||||
A32/test_thumb_instructions.cpp
|
||||
A32/testenv.h
|
||||
decoder_tests.cpp
|
||||
# A64
|
||||
A64/a64.cpp
|
||||
A64/fibonacci.cpp
|
||||
A64/fp_min_max.cpp
|
||||
A64/misaligned_page_table.cpp
|
||||
A64/test_invalidation.cpp
|
||||
A64/real_world.cpp
|
||||
A64/testenv.h
|
||||
)
|
||||
|
||||
if ("A32" IN_LIST DYNARMIC_FRONTENDS)
|
||||
target_sources(dynarmic_tests PRIVATE
|
||||
A32/test_arm_disassembler.cpp
|
||||
A32/test_arm_instructions.cpp
|
||||
A32/test_coprocessor.cpp
|
||||
A32/test_svc.cpp
|
||||
A32/test_thumb_instructions.cpp
|
||||
A32/testenv.h
|
||||
decoder_tests.cpp
|
||||
)
|
||||
endif()
|
||||
|
||||
if ("A64" IN_LIST DYNARMIC_FRONTENDS)
|
||||
target_link_libraries(dynarmic_tests PRIVATE merry::oaknut)
|
||||
|
||||
target_sources(dynarmic_tests PRIVATE
|
||||
A64/a64.cpp
|
||||
A64/fibonacci.cpp
|
||||
A64/fp_min_max.cpp
|
||||
A64/misaligned_page_table.cpp
|
||||
A64/test_invalidation.cpp
|
||||
A64/real_world.cpp
|
||||
A64/testenv.h
|
||||
)
|
||||
endif()
|
||||
target_link_libraries(dynarmic_tests PRIVATE merry::oaknut)
|
||||
|
||||
if (DYNARMIC_TESTS_USE_UNICORN)
|
||||
target_link_libraries(dynarmic_tests PRIVATE Unicorn::Unicorn)
|
||||
|
@ -40,25 +33,17 @@ if (DYNARMIC_TESTS_USE_UNICORN)
|
|||
target_sources(dynarmic_tests PRIVATE
|
||||
fuzz_util.cpp
|
||||
fuzz_util.h
|
||||
# A32
|
||||
A32/fuzz_arm.cpp
|
||||
A32/fuzz_thumb.cpp
|
||||
unicorn_emu/a32_unicorn.cpp
|
||||
unicorn_emu/a32_unicorn.h
|
||||
# A64
|
||||
A64/fuzz_with_unicorn.cpp
|
||||
A64/verify_unicorn.cpp
|
||||
unicorn_emu/a64_unicorn.cpp
|
||||
unicorn_emu/a64_unicorn.h
|
||||
)
|
||||
|
||||
if ("A32" IN_LIST DYNARMIC_FRONTENDS)
|
||||
target_sources(dynarmic_tests PRIVATE
|
||||
A32/fuzz_arm.cpp
|
||||
A32/fuzz_thumb.cpp
|
||||
unicorn_emu/a32_unicorn.cpp
|
||||
unicorn_emu/a32_unicorn.h
|
||||
)
|
||||
endif()
|
||||
|
||||
if ("A64" IN_LIST DYNARMIC_FRONTENDS)
|
||||
target_sources(dynarmic_tests PRIVATE
|
||||
A64/fuzz_with_unicorn.cpp
|
||||
A64/verify_unicorn.cpp
|
||||
unicorn_emu/a64_unicorn.cpp
|
||||
unicorn_emu/a64_unicorn.h
|
||||
)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if ("riscv" IN_LIST ARCHITECTURE)
|
||||
|
@ -69,9 +54,6 @@ if ("x86_64" IN_LIST ARCHITECTURE)
|
|||
target_link_libraries(dynarmic_tests PRIVATE xbyak::xbyak)
|
||||
target_architecture_specific_sources(dynarmic_tests "x86_64"
|
||||
x64_cpu_info.cpp
|
||||
)
|
||||
|
||||
target_architecture_specific_sources(dynarmic_tests "x86_64"
|
||||
native/preserve_xmm.cpp
|
||||
)
|
||||
|
||||
|
@ -85,50 +67,70 @@ endif()
|
|||
|
||||
include(CreateDirectoryGroups)
|
||||
|
||||
if (("A32" IN_LIST DYNARMIC_FRONTENDS) AND ("A64" IN_LIST DYNARMIC_FRONTENDS))
|
||||
add_executable(dynarmic_print_info
|
||||
print_info.cpp
|
||||
)
|
||||
|
||||
create_target_directory_groups(dynarmic_print_info)
|
||||
|
||||
target_link_libraries(dynarmic_print_info PRIVATE dynarmic Boost::headers fmt::fmt merry::mcl)
|
||||
target_include_directories(dynarmic_print_info PRIVATE . ../src)
|
||||
target_compile_options(dynarmic_print_info PRIVATE ${DYNARMIC_CXX_FLAGS})
|
||||
target_compile_definitions(dynarmic_print_info PRIVATE FMT_USE_USER_DEFINED_LITERALS=1)
|
||||
#
|
||||
# dynarmic_print_info
|
||||
#
|
||||
add_executable(dynarmic_print_info
|
||||
print_info.cpp
|
||||
)
|
||||
create_target_directory_groups(dynarmic_print_info)
|
||||
target_link_libraries(dynarmic_print_info PRIVATE dynarmic fmt::fmt merry::mcl)
|
||||
if (BOOST_NO_HEADERS)
|
||||
target_link_libraries(dynarmic_print_info PRIVATE Boost::variant Boost::icl Boost::pool)
|
||||
else()
|
||||
target_link_libraries(dynarmic_print_info PRIVATE Boost::headers)
|
||||
endif()
|
||||
target_include_directories(dynarmic_print_info PRIVATE . ../src)
|
||||
target_compile_options(dynarmic_print_info PRIVATE ${DYNARMIC_CXX_FLAGS})
|
||||
target_compile_definitions(dynarmic_print_info PRIVATE FMT_USE_USER_DEFINED_LITERALS=1)
|
||||
|
||||
if (("A32" IN_LIST DYNARMIC_FRONTENDS) AND ("A64" IN_LIST DYNARMIC_FRONTENDS))
|
||||
add_executable(dynarmic_test_generator
|
||||
fuzz_util.cpp
|
||||
fuzz_util.h
|
||||
test_generator.cpp
|
||||
)
|
||||
#
|
||||
# dynarmic_test_generator
|
||||
#
|
||||
add_executable(dynarmic_test_generator
|
||||
fuzz_util.cpp
|
||||
fuzz_util.h
|
||||
test_generator.cpp
|
||||
)
|
||||
|
||||
create_target_directory_groups(dynarmic_test_generator)
|
||||
create_target_directory_groups(dynarmic_test_generator)
|
||||
|
||||
target_link_libraries(dynarmic_test_generator PRIVATE dynarmic Boost::headers fmt::fmt merry::mcl)
|
||||
target_include_directories(dynarmic_test_generator PRIVATE . ../src)
|
||||
target_compile_options(dynarmic_test_generator PRIVATE ${DYNARMIC_CXX_FLAGS})
|
||||
target_compile_definitions(dynarmic_test_generator PRIVATE FMT_USE_USER_DEFINED_LITERALS=1)
|
||||
target_link_libraries(dynarmic_test_generator PRIVATE dynarmic fmt::fmt merry::mcl)
|
||||
if (BOOST_NO_HEADERS)
|
||||
target_link_libraries(dynarmic_test_generator PRIVATE Boost::variant Boost::icl Boost::pool)
|
||||
else()
|
||||
target_link_libraries(dynarmic_test_generator PRIVATE Boost::headers)
|
||||
endif()
|
||||
target_include_directories(dynarmic_test_generator PRIVATE . ../src)
|
||||
target_compile_options(dynarmic_test_generator PRIVATE ${DYNARMIC_CXX_FLAGS})
|
||||
target_compile_definitions(dynarmic_test_generator PRIVATE FMT_USE_USER_DEFINED_LITERALS=1)
|
||||
|
||||
if (("A32" IN_LIST DYNARMIC_FRONTENDS) AND ("A64" IN_LIST DYNARMIC_FRONTENDS))
|
||||
add_executable(dynarmic_test_reader
|
||||
test_reader.cpp
|
||||
)
|
||||
|
||||
create_target_directory_groups(dynarmic_test_reader)
|
||||
|
||||
target_link_libraries(dynarmic_test_reader PRIVATE dynarmic Boost::headers fmt::fmt merry::mcl)
|
||||
target_include_directories(dynarmic_test_reader PRIVATE . ../src)
|
||||
target_compile_options(dynarmic_test_reader PRIVATE ${DYNARMIC_CXX_FLAGS})
|
||||
target_compile_definitions(dynarmic_test_reader PRIVATE FMT_USE_USER_DEFINED_LITERALS=1)
|
||||
#
|
||||
# dynarmic_test_reader
|
||||
#
|
||||
add_executable(dynarmic_test_reader
|
||||
test_reader.cpp
|
||||
)
|
||||
create_target_directory_groups(dynarmic_test_reader)
|
||||
target_link_libraries(dynarmic_test_reader PRIVATE dynarmic fmt::fmt merry::mcl)
|
||||
if (BOOST_NO_HEADERS)
|
||||
target_link_libraries(dynarmic_test_reader PRIVATE Boost::variant Boost::icl Boost::pool)
|
||||
else()
|
||||
target_link_libraries(dynarmic_test_reader PRIVATE Boost::headers)
|
||||
endif()
|
||||
target_include_directories(dynarmic_test_reader PRIVATE . ../src)
|
||||
target_compile_options(dynarmic_test_reader PRIVATE ${DYNARMIC_CXX_FLAGS})
|
||||
target_compile_definitions(dynarmic_test_reader PRIVATE FMT_USE_USER_DEFINED_LITERALS=1)
|
||||
|
||||
#
|
||||
create_target_directory_groups(dynarmic_tests)
|
||||
|
||||
target_link_libraries(dynarmic_tests PRIVATE dynarmic Boost::headers Catch2::Catch2WithMain fmt::fmt merry::mcl)
|
||||
target_link_libraries(dynarmic_tests PRIVATE dynarmic Catch2::Catch2WithMain fmt::fmt merry::mcl)
|
||||
if (BOOST_NO_HEADERS)
|
||||
target_link_libraries(dynarmic_tests PRIVATE Boost::variant Boost::icl Boost::pool)
|
||||
else()
|
||||
target_link_libraries(dynarmic_tests PRIVATE Boost::headers)
|
||||
endif()
|
||||
target_include_directories(dynarmic_tests PRIVATE . ../src)
|
||||
target_compile_options(dynarmic_tests PRIVATE ${DYNARMIC_CXX_FLAGS})
|
||||
target_compile_definitions(dynarmic_tests PRIVATE FMT_USE_USER_DEFINED_LITERALS=1)
|
||||
|
|
|
@ -36,22 +36,12 @@ TEST_CASE("ASIMD Decoder: Ensure table order correctness", "[decode][a32][.]") {
|
|||
|
||||
const auto is_decode_error = [&get_ir](const A32::ASIMDMatcher<A32::TranslatorVisitor>& matcher, u32 instruction) {
|
||||
const auto block = get_ir(matcher, instruction);
|
||||
|
||||
for (const auto& ir_inst : block) {
|
||||
if (ir_inst.GetOpcode() == IR::Opcode::A32ExceptionRaised) {
|
||||
if (static_cast<A32::Exception>(ir_inst.GetArg(1).GetU64()) == A32::Exception::DecodeError) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
return std::find_if(block.cbegin(), block.cend(), [](auto const& e) {
|
||||
return e.GetOpcode() == IR::Opcode::A32ExceptionRaised && A32::Exception(e.GetArg(1).GetU64()) == A32::Exception::DecodeError;
|
||||
}) != block.cend();
|
||||
};
|
||||
|
||||
for (auto iter = table.cbegin(); iter != table.cend(); ++iter) {
|
||||
if (std::strncmp(iter->GetName(), "UNALLOCATED", 11) == 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const u32 expect = iter->GetExpected();
|
||||
const u32 mask = iter->GetMask();
|
||||
u32 x = 0;
|
||||
|
@ -59,15 +49,17 @@ TEST_CASE("ASIMD Decoder: Ensure table order correctness", "[decode][a32][.]") {
|
|||
const u32 instruction = expect | x;
|
||||
|
||||
const bool iserr = is_decode_error(*iter, instruction);
|
||||
const auto alternative = std::find_if(table.cbegin(), iter, [instruction](const auto& m) { return m.Matches(instruction); });
|
||||
const auto alternative = std::find_if(table.cbegin(), iter, [instruction](const auto& m) {
|
||||
return m.Matches(instruction);
|
||||
});
|
||||
const bool altiserr = is_decode_error(*alternative, instruction);
|
||||
|
||||
INFO("Instruction: " << std::hex << std::setfill('0') << std::setw(8) << instruction);
|
||||
INFO("Expect: " << std::hex << std::setfill('0') << std::setw(8) << expect);
|
||||
INFO("Fill: " << std::hex << std::setfill('0') << std::setw(8) << x);
|
||||
INFO("Name: " << iter->GetName());
|
||||
INFO("Name: " << *A32::GetNameASIMD<A32::TranslatorVisitor>(instruction));
|
||||
INFO("iserr: " << iserr);
|
||||
INFO("alternative: " << alternative->GetName());
|
||||
//INFO("alternative: " << alternative->GetName());
|
||||
INFO("altiserr: " << altiserr);
|
||||
|
||||
REQUIRE(((!iserr && alternative == iter) || (iserr && alternative != iter && !altiserr)));
|
||||
|
@ -75,4 +67,4 @@ TEST_CASE("ASIMD Decoder: Ensure table order correctness", "[decode][a32][.]") {
|
|||
x = ((x | mask) + 1) & ~mask;
|
||||
} while (x != 0);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -6,6 +6,7 @@
|
|||
#include <immintrin.h>
|
||||
|
||||
#include "../A64/testenv.h"
|
||||
#include "../native/testenv.h"
|
||||
#include "dynarmic/common/fp/fpsr.h"
|
||||
#include "dynarmic/interface/exclusive_monitor.h"
|
||||
|
||||
|
|
|
@ -32,27 +32,26 @@
|
|||
#include "dynarmic/frontend/A64/translate/a64_translate.h"
|
||||
#include "dynarmic/frontend/A64/translate/impl/impl.h"
|
||||
#include "dynarmic/interface/A32/a32.h"
|
||||
#include "dynarmic/interface/A32/config.h"
|
||||
#include "dynarmic/interface/A32/disassembler.h"
|
||||
#include "dynarmic/ir/basic_block.h"
|
||||
#include "dynarmic/ir/opt/passes.h"
|
||||
#include "dynarmic/ir/opt_passes.h"
|
||||
|
||||
using namespace Dynarmic;
|
||||
|
||||
const char* GetNameOfA32Instruction(u32 instruction) {
|
||||
if (auto vfp_decoder = A32::DecodeVFP<A32::TranslatorVisitor>(instruction)) {
|
||||
return vfp_decoder->get().GetName();
|
||||
} else if (auto asimd_decoder = A32::DecodeASIMD<A32::TranslatorVisitor>(instruction)) {
|
||||
return asimd_decoder->get().GetName();
|
||||
} else if (auto decoder = A32::DecodeArm<A32::TranslatorVisitor>(instruction)) {
|
||||
return decoder->get().GetName();
|
||||
}
|
||||
std::string_view GetNameOfA32Instruction(u32 instruction) {
|
||||
if (auto const vfp_decoder = A32::DecodeVFP<A32::TranslatorVisitor>(instruction))
|
||||
return *A32::GetNameVFP<A32::TranslatorVisitor>(instruction);
|
||||
else if (auto const asimd_decoder = A32::DecodeASIMD<A32::TranslatorVisitor>(instruction))
|
||||
return *A32::GetNameASIMD<A32::TranslatorVisitor>(instruction);
|
||||
else if (auto const decoder = A32::DecodeArm<A32::TranslatorVisitor>(instruction))
|
||||
return *A32::GetNameARM<A32::TranslatorVisitor>(instruction);
|
||||
return "<null>";
|
||||
}
|
||||
|
||||
const char* GetNameOfA64Instruction(u32 instruction) {
|
||||
if (auto decoder = A64::Decode<A64::TranslatorVisitor>(instruction)) {
|
||||
return decoder->get().GetName();
|
||||
}
|
||||
std::string_view GetNameOfA64Instruction(u32 instruction) {
|
||||
if (auto const decoder = A64::Decode<A64::TranslatorVisitor>(instruction))
|
||||
return *A64::GetName<A64::TranslatorVisitor>(instruction);
|
||||
return "<null>";
|
||||
}
|
||||
|
||||
|
@ -64,18 +63,9 @@ void PrintA32Instruction(u32 instruction) {
|
|||
IR::Block ir_block{location};
|
||||
const bool should_continue = A32::TranslateSingleInstruction(ir_block, location, instruction);
|
||||
fmt::print("should_continue: {}\n\n", should_continue);
|
||||
|
||||
Optimization::NamingPass(ir_block);
|
||||
|
||||
fmt::print("IR:\n");
|
||||
fmt::print("{}\n", IR::DumpBlock(ir_block));
|
||||
|
||||
Optimization::A32GetSetElimination(ir_block, {});
|
||||
Optimization::DeadCodeElimination(ir_block);
|
||||
Optimization::ConstantPropagation(ir_block);
|
||||
Optimization::DeadCodeElimination(ir_block);
|
||||
Optimization::IdentityRemovalPass(ir_block);
|
||||
|
||||
Optimization::Optimize(ir_block, A32::UserConfig{}, {});
|
||||
fmt::print("Optimized IR:\n");
|
||||
fmt::print("{}\n", IR::DumpBlock(ir_block));
|
||||
}
|
||||
|
@ -88,18 +78,9 @@ void PrintA64Instruction(u32 instruction) {
|
|||
IR::Block ir_block{location};
|
||||
const bool should_continue = A64::TranslateSingleInstruction(ir_block, location, instruction);
|
||||
fmt::print("should_continue: {}\n\n", should_continue);
|
||||
|
||||
Optimization::NamingPass(ir_block);
|
||||
|
||||
fmt::print("IR:\n");
|
||||
fmt::print("{}\n", IR::DumpBlock(ir_block));
|
||||
|
||||
Optimization::A64GetSetElimination(ir_block);
|
||||
Optimization::DeadCodeElimination(ir_block);
|
||||
Optimization::ConstantPropagation(ir_block);
|
||||
Optimization::DeadCodeElimination(ir_block);
|
||||
Optimization::IdentityRemovalPass(ir_block);
|
||||
|
||||
Optimization::Optimize(ir_block, A64::UserConfig{}, {});
|
||||
fmt::print("Optimized IR:\n");
|
||||
fmt::print("{}\n", IR::DumpBlock(ir_block));
|
||||
}
|
||||
|
@ -115,18 +96,9 @@ void PrintThumbInstruction(u32 instruction) {
|
|||
IR::Block ir_block{location};
|
||||
const bool should_continue = A32::TranslateSingleInstruction(ir_block, location, instruction);
|
||||
fmt::print("should_continue: {}\n\n", should_continue);
|
||||
|
||||
Optimization::NamingPass(ir_block);
|
||||
|
||||
fmt::print("IR:\n");
|
||||
fmt::print("{}\n", IR::DumpBlock(ir_block));
|
||||
|
||||
Optimization::A32GetSetElimination(ir_block, {});
|
||||
Optimization::DeadCodeElimination(ir_block);
|
||||
Optimization::ConstantPropagation(ir_block);
|
||||
Optimization::DeadCodeElimination(ir_block);
|
||||
Optimization::IdentityRemovalPass(ir_block);
|
||||
|
||||
Optimization::Optimize(ir_block, A32::UserConfig{}, {});
|
||||
fmt::print("Optimized IR:\n");
|
||||
fmt::print("{}\n", IR::DumpBlock(ir_block));
|
||||
}
|
||||
|
|
|
@ -1377,17 +1377,13 @@ void TextureCacheRuntime::CopyImage(Image& dst, Image& src,
|
|||
// As per the size-compatible formats section of vulkan, copy manually via ReinterpretImage
|
||||
// these images that aren't size-compatible
|
||||
if (BytesPerBlock(src.info.format) != BytesPerBlock(dst.info.format)) {
|
||||
|
||||
if (src.info.type == ImageType::Linear || dst.info.type == ImageType::Linear) {
|
||||
return;
|
||||
}
|
||||
|
||||
auto oneCopy = VideoCommon::ImageCopy{.src_offset = VideoCommon::Offset3D(0, 0, 0),
|
||||
.dst_offset = VideoCommon::Offset3D(0, 0, 0),
|
||||
.extent = dst.info.size};
|
||||
auto oneCopy = VideoCommon::ImageCopy{
|
||||
.src_offset = VideoCommon::Offset3D(0, 0, 0),
|
||||
.dst_offset = VideoCommon::Offset3D(0, 0, 0),
|
||||
.extent = dst.info.size
|
||||
};
|
||||
return ReinterpretImage(dst, src, std::span{&oneCopy, 1});
|
||||
}
|
||||
|
||||
boost::container::small_vector<VkImageCopy, 16> vk_copies(copies.size());
|
||||
const VkImageAspectFlags aspect_mask = dst.AspectMask();
|
||||
ASSERT(aspect_mask == src.AspectMask());
|
||||
|
@ -1565,10 +1561,10 @@ void Image::UploadMemory(VkBuffer buffer, VkDeviceSize offset,
|
|||
since tropic didn't want to touch it for a long time, so it needs a rewrite from someone
|
||||
better than me at vulkan. */
|
||||
// CHANGE: Gate the MSAA path more strictly and only use it for color, when the pass and device
|
||||
// support are available. Avoid running the MSAA path when prerequisites aren't met, preventing
|
||||
// validation and runtime issues.
|
||||
// support are available. Avoid running the MSAA path when prerequisites aren't met,
|
||||
// preventing validation and runtime issues.
|
||||
const bool wants_msaa_upload = info.num_samples > 1 &&
|
||||
aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT &&
|
||||
(aspect_mask & VK_IMAGE_ASPECT_COLOR_BIT) != 0 &&
|
||||
runtime->CanUploadMSAA() && runtime->msaa_copy_pass != nullptr &&
|
||||
runtime->device.IsStorageImageMultisampleSupported();
|
||||
|
||||
|
@ -1578,7 +1574,8 @@ void Image::UploadMemory(VkBuffer buffer, VkDeviceSize offset,
|
|||
temp_info.num_samples = 1;
|
||||
|
||||
// CHANGE: Build a fresh VkImageCreateInfo with robust usage flags for the temp image.
|
||||
// Using the target image's usage as-is could miss STORAGE/TRANSFER bits and trigger validation errors.
|
||||
// Using the target image's usage as-is could miss STORAGE/TRANSFER bits and trigger
|
||||
// validation errors.
|
||||
VkImageCreateInfo image_ci = MakeImageCreateInfo(runtime->device, temp_info);
|
||||
image_ci.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT |
|
||||
VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_STORAGE_BIT;
|
||||
|
@ -1588,7 +1585,7 @@ void Image::UploadMemory(VkBuffer buffer, VkDeviceSize offset,
|
|||
auto temp_wrapper = std::make_shared<Image>(*runtime, temp_info, 0, 0);
|
||||
temp_wrapper->original_image = runtime->memory_allocator.CreateImage(image_ci);
|
||||
temp_wrapper->current_image = &Image::original_image;
|
||||
temp_wrapper->aspect_mask = VK_IMAGE_ASPECT_COLOR_BIT;
|
||||
temp_wrapper->aspect_mask = aspect_mask;
|
||||
temp_wrapper->initialized = true;
|
||||
|
||||
// Upload to the temporary non-MSAA image
|
||||
|
@ -1597,18 +1594,17 @@ void Image::UploadMemory(VkBuffer buffer, VkDeviceSize offset,
|
|||
const VkBuffer src_buffer = buffer;
|
||||
const VkImage temp_vk_image = *temp_wrapper->original_image;
|
||||
const VkImageAspectFlags vk_aspect_mask = temp_wrapper->aspect_mask;
|
||||
|
||||
scheduler->Record([src_buffer, temp_vk_image, vk_aspect_mask, vk_copies,
|
||||
// CHANGE: capture shared_ptr to pin lifetime through submission
|
||||
keep = temp_wrapper](vk::CommandBuffer cmdbuf) {
|
||||
CopyBufferToImage(cmdbuf, src_buffer, temp_vk_image, vk_aspect_mask, false, vk_copies);
|
||||
});
|
||||
|
||||
// Use MSAACopyPass to convert from non-MSAA to MSAA
|
||||
// CHANGE: only lifetime and usage were fixed.
|
||||
std::vector<VideoCommon::ImageCopy> image_copies;
|
||||
image_copies.reserve(copies.size());
|
||||
for (const auto& copy : copies) {
|
||||
VideoCommon::ImageCopy image_copy;
|
||||
VideoCommon::ImageCopy image_copy{};
|
||||
image_copy.src_offset = {0, 0, 0}; // Use zero offset for source
|
||||
image_copy.dst_offset = copy.image_offset;
|
||||
image_copy.src_subresource = copy.image_subresource;
|
||||
|
@ -1621,11 +1617,9 @@ void Image::UploadMemory(VkBuffer buffer, VkDeviceSize offset,
|
|||
/*msaa_to_non_msaa=*/false);
|
||||
std::exchange(initialized, true);
|
||||
|
||||
// CHANGE: Add a no-op recording that captures temp_wrapper to ensure it stays alive
|
||||
// at least until commands are submitted/recorded.
|
||||
scheduler->Record([keep = std::move(temp_wrapper)](vk::CommandBuffer) {});
|
||||
const u64 tick = scheduler->Flush();
|
||||
scheduler->Wait(tick);
|
||||
|
||||
// CHANGE: Restore scaling before returning from the MSAA path.
|
||||
if (is_rescaled) {
|
||||
ScaleUp();
|
||||
}
|
||||
|
@ -1638,10 +1632,11 @@ void Image::UploadMemory(VkBuffer buffer, VkDeviceSize offset,
|
|||
const VkBuffer src_buffer = buffer;
|
||||
const VkImage vk_image = *original_image;
|
||||
const VkImageAspectFlags vk_aspect_mask = aspect_mask;
|
||||
const bool is_initialized = std::exchange(initialized, true);
|
||||
scheduler->Record([src_buffer, vk_image, vk_aspect_mask, is_initialized,
|
||||
const bool was_initialized = std::exchange(initialized, true);
|
||||
|
||||
scheduler->Record([src_buffer, vk_image, vk_aspect_mask, was_initialized,
|
||||
vk_copies](vk::CommandBuffer cmdbuf) {
|
||||
CopyBufferToImage(cmdbuf, src_buffer, vk_image, vk_aspect_mask, is_initialized, vk_copies);
|
||||
CopyBufferToImage(cmdbuf, src_buffer, vk_image, vk_aspect_mask, was_initialized, vk_copies);
|
||||
});
|
||||
|
||||
if (is_rescaled) {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue