[nca] Use better tight loop allocation schemes (none at all) for AES decrypt/encrypt and force MbedTLS to use AES x86_64 instructions (#2750)
Uses stack instead of allocating stuff haphazardly (16 bytes and 512 bytes respectively) - removes malloc() pollution and all that nasty stuff from tight loops Original work by Ribbit but edited by me. Will NOT bring a massive speedup since the main bottleneck is mbedtls itself, but may bring nice oddities to STARTUP TIMES nonetheless. AES instructions being forced wont affect CPUs without them since there is always a runtime check for them. Signed-off-by: lizzie lizzie@eden-emu.dev Co-authored-by: Ribbit <ribbit@placeholder.com> Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/2750 Reviewed-by: CamilleLaVey <camillelavey99@gmail.com> Co-authored-by: lizzie <lizzie@eden-emu.dev> Co-committed-by: lizzie <lizzie@eden-emu.dev>
This commit is contained in:
parent
551f244dfd
commit
440ee4916d
9 changed files with 200 additions and 130 deletions
13
.patch/mbedtls/0002-aesni-fix.patch
Normal file
13
.patch/mbedtls/0002-aesni-fix.patch
Normal file
|
|
@ -0,0 +1,13 @@
|
||||||
|
diff --git a/library/aesni.h b/library/aesni.h
|
||||||
|
index 754c984c79..59e27afd3e 100644
|
||||||
|
--- a/library/aesni.h
|
||||||
|
+++ b/library/aesni.h
|
||||||
|
@@ -35,7 +35,7 @@
|
||||||
|
/* GCC-like compilers: currently, we only support intrinsics if the requisite
|
||||||
|
* target flag is enabled when building the library (e.g. `gcc -mpclmul -msse2`
|
||||||
|
* or `clang -maes -mpclmul`). */
|
||||||
|
-#if (defined(__GNUC__) || defined(__clang__)) && defined(__AES__) && defined(__PCLMUL__)
|
||||||
|
+#if defined(__GNUC__) || defined(__clang__)
|
||||||
|
#define MBEDTLS_AESNI_HAVE_INTRINSICS
|
||||||
|
#endif
|
||||||
|
/* For 32-bit, we only support intrinsics */
|
||||||
22
.patch/mbedtls/0003-aesni-fix.patch
Normal file
22
.patch/mbedtls/0003-aesni-fix.patch
Normal file
|
|
@ -0,0 +1,22 @@
|
||||||
|
diff --git a/library/aesni.c b/library/aesni.c
|
||||||
|
index 2857068..3e104ab 100644
|
||||||
|
--- a/library/aesni.c
|
||||||
|
+++ b/library/aesni.c
|
||||||
|
@@ -31,16 +31,14 @@
|
||||||
|
#include <immintrin.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
-#if defined(MBEDTLS_ARCH_IS_X86)
|
||||||
|
#if defined(MBEDTLS_COMPILER_IS_GCC)
|
||||||
|
#pragma GCC push_options
|
||||||
|
#pragma GCC target ("pclmul,sse2,aes")
|
||||||
|
#define MBEDTLS_POP_TARGET_PRAGMA
|
||||||
|
-#elif defined(__clang__) && (__clang_major__ >= 5)
|
||||||
|
+#elif defined(__clang__)
|
||||||
|
#pragma clang attribute push (__attribute__((target("pclmul,sse2,aes"))), apply_to=function)
|
||||||
|
#define MBEDTLS_POP_TARGET_PRAGMA
|
||||||
|
#endif
|
||||||
|
-#endif
|
||||||
|
|
||||||
|
#if !defined(MBEDTLS_AES_USE_HARDWARE_ONLY)
|
||||||
|
/*
|
||||||
|
|
@ -52,6 +52,10 @@ if (PLATFORM_SUN)
|
||||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O3")
|
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O3")
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3")
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3")
|
||||||
endif()
|
endif()
|
||||||
|
if (CMAKE_BUILD_TYPE MATCHES "RelWithDebInfo")
|
||||||
|
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O2")
|
||||||
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2")
|
||||||
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
# Needed for FFmpeg w/ VAAPI and DRM
|
# Needed for FFmpeg w/ VAAPI and DRM
|
||||||
|
|
|
||||||
6
externals/cpmfile.json
vendored
6
externals/cpmfile.json
vendored
|
|
@ -97,7 +97,11 @@
|
||||||
"version": "3",
|
"version": "3",
|
||||||
"git_version": "3.6.4",
|
"git_version": "3.6.4",
|
||||||
"artifact": "%TAG%.tar.bz2",
|
"artifact": "%TAG%.tar.bz2",
|
||||||
"skip_updates": true
|
"skip_updates": true,
|
||||||
|
"patches": [
|
||||||
|
"0002-aesni-fix.patch",
|
||||||
|
"0003-aesni-fix.patch"
|
||||||
|
]
|
||||||
},
|
},
|
||||||
"enet": {
|
"enet": {
|
||||||
"repo": "lsalzman/enet",
|
"repo": "lsalzman/enet",
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,11 @@
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||||
|
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
|
|
||||||
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
|
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
|
||||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
#include <array>
|
#include <array>
|
||||||
|
#include <vector>
|
||||||
#include <mbedtls/cipher.h>
|
#include <mbedtls/cipher.h>
|
||||||
#include "common/assert.h"
|
#include "common/assert.h"
|
||||||
#include "common/logging/log.h"
|
#include "common/logging/log.h"
|
||||||
|
|
@ -71,14 +75,16 @@ void AESCipher<Key, KeySize>::Transcode(const u8* src, std::size_t size, u8* des
|
||||||
|
|
||||||
mbedtls_cipher_reset(context);
|
mbedtls_cipher_reset(context);
|
||||||
|
|
||||||
|
// Only ECB strictly requires block sized chunks.
|
||||||
std::size_t written = 0;
|
std::size_t written = 0;
|
||||||
if (mbedtls_cipher_get_cipher_mode(context) == MBEDTLS_MODE_XTS) {
|
if (mbedtls_cipher_get_cipher_mode(context) != MBEDTLS_MODE_ECB) {
|
||||||
mbedtls_cipher_update(context, src, size, dest, &written);
|
mbedtls_cipher_update(context, src, size, dest, &written);
|
||||||
if (written != size) {
|
if (written != size)
|
||||||
LOG_WARNING(Crypto, "Not all data was decrypted requested={:016X}, actual={:016X}.",
|
LOG_WARNING(Crypto, "Not all data was processed requested={:016X}, actual={:016X}.", size, written);
|
||||||
size, written);
|
return;
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
|
// ECB path: operate in block sized chunks and mirror previous behavior.
|
||||||
const auto block_size = mbedtls_cipher_get_block_size(context);
|
const auto block_size = mbedtls_cipher_get_block_size(context);
|
||||||
if (size < block_size) {
|
if (size < block_size) {
|
||||||
std::vector<u8> block(block_size);
|
std::vector<u8> block(block_size);
|
||||||
|
|
@ -89,7 +95,7 @@ void AESCipher<Key, KeySize>::Transcode(const u8* src, std::size_t size, u8* des
|
||||||
}
|
}
|
||||||
|
|
||||||
for (std::size_t offset = 0; offset < size; offset += block_size) {
|
for (std::size_t offset = 0; offset < size; offset += block_size) {
|
||||||
auto length = std::min<std::size_t>(block_size, size - offset);
|
const auto length = std::min<std::size_t>(block_size, size - offset);
|
||||||
mbedtls_cipher_update(context, src + offset, length, dest + offset, &written);
|
mbedtls_cipher_update(context, src + offset, length, dest + offset, &written);
|
||||||
if (written != length) {
|
if (written != length) {
|
||||||
if (length < block_size) {
|
if (length < block_size) {
|
||||||
|
|
@ -99,9 +105,7 @@ void AESCipher<Key, KeySize>::Transcode(const u8* src, std::size_t size, u8* des
|
||||||
std::memcpy(dest + offset, block.data(), length);
|
std::memcpy(dest + offset, block.data(), length);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
LOG_WARNING(Crypto, "Not all data was decrypted requested={:016X}, actual={:016X}.",
|
LOG_WARNING(Crypto, "Not all data was processed requested={:016X}, actual={:016X}.", length, written);
|
||||||
length, written);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,6 @@
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||||
|
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
|
|
||||||
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
|
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
|
||||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
|
@ -15,26 +18,36 @@ std::size_t CTREncryptionLayer::Read(u8* data, std::size_t length, std::size_t o
|
||||||
if (length == 0)
|
if (length == 0)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
const auto sector_offset = offset & 0xF;
|
std::size_t total_read = 0;
|
||||||
if (sector_offset == 0) {
|
// Handle an initial misaligned portion if needed.
|
||||||
|
if (auto const sector_offset = offset & 0xF; sector_offset != 0) {
|
||||||
|
const std::size_t aligned_off = offset - sector_offset;
|
||||||
|
std::array<u8, 0x10> block{};
|
||||||
|
if (auto const got = base->Read(block.data(), block.size(), aligned_off); got != 0) {
|
||||||
|
UpdateIV(base_offset + aligned_off);
|
||||||
|
cipher.Transcode(block.data(), got, block.data(), Op::Decrypt);
|
||||||
|
auto const to_copy = std::min<std::size_t>(length, got > sector_offset ? got - sector_offset : 0);
|
||||||
|
if (to_copy > 0) {
|
||||||
|
std::memcpy(data, block.data() + sector_offset, to_copy);
|
||||||
|
data += to_copy;
|
||||||
|
offset += to_copy;
|
||||||
|
length -= to_copy;
|
||||||
|
total_read += to_copy;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (length > 0) {
|
||||||
|
// Now aligned to 0x10
|
||||||
UpdateIV(base_offset + offset);
|
UpdateIV(base_offset + offset);
|
||||||
std::vector<u8> raw = base->ReadBytes(length, offset);
|
const std::size_t got = base->Read(data, length, offset);
|
||||||
cipher.Transcode(raw.data(), raw.size(), data, Op::Decrypt);
|
if (got > 0) {
|
||||||
return length;
|
cipher.Transcode(data, got, data, Op::Decrypt);
|
||||||
|
total_read += got;
|
||||||
}
|
}
|
||||||
|
|
||||||
// offset does not fall on block boundary (0x10)
|
|
||||||
std::vector<u8> block = base->ReadBytes(0x10, offset - sector_offset);
|
|
||||||
UpdateIV(base_offset + offset - sector_offset);
|
|
||||||
cipher.Transcode(block.data(), block.size(), block.data(), Op::Decrypt);
|
|
||||||
std::size_t read = 0x10 - sector_offset;
|
|
||||||
|
|
||||||
if (length + sector_offset < 0x10) {
|
|
||||||
std::memcpy(data, block.data() + sector_offset, std::min<u64>(length, read));
|
|
||||||
return std::min<u64>(length, read);
|
|
||||||
}
|
}
|
||||||
std::memcpy(data, block.data() + sector_offset, read);
|
return total_read;
|
||||||
return read + Read(data + read, length - read, offset + read);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void CTREncryptionLayer::SetIV(const IVData& iv_) {
|
void CTREncryptionLayer::SetIV(const IVData& iv_) {
|
||||||
|
|
|
||||||
|
|
@ -5,12 +5,13 @@
|
||||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
#include <array>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include "core/crypto/xts_encryption_layer.h"
|
#include "core/crypto/xts_encryption_layer.h"
|
||||||
|
|
||||||
namespace Core::Crypto {
|
namespace Core::Crypto {
|
||||||
|
|
||||||
constexpr u64 XTS_SECTOR_SIZE = 0x4000;
|
constexpr std::size_t XTS_SECTOR_SIZE = 0x4000;
|
||||||
|
|
||||||
XTSEncryptionLayer::XTSEncryptionLayer(FileSys::VirtualFile base_, Key256 key_)
|
XTSEncryptionLayer::XTSEncryptionLayer(FileSys::VirtualFile base_, Key256 key_)
|
||||||
: EncryptionLayer(std::move(base_)), cipher(key_, Mode::XTS) {}
|
: EncryptionLayer(std::move(base_)), cipher(key_, Mode::XTS) {}
|
||||||
|
|
@ -19,41 +20,67 @@ std::size_t XTSEncryptionLayer::Read(u8* data, std::size_t length, std::size_t o
|
||||||
if (length == 0)
|
if (length == 0)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
const auto sector_offset = offset & 0x3FFF;
|
std::size_t total_read = 0;
|
||||||
if (sector_offset == 0) {
|
// Handle initial unaligned part within a sector.
|
||||||
if (length % XTS_SECTOR_SIZE == 0) {
|
if (auto const sector_offset = offset % XTS_SECTOR_SIZE; sector_offset != 0) {
|
||||||
std::vector<u8> raw = base->ReadBytes(length, offset);
|
const std::size_t aligned_off = offset - sector_offset;
|
||||||
cipher.XTSTranscode(raw.data(), raw.size(), data, offset / XTS_SECTOR_SIZE,
|
std::array<u8, XTS_SECTOR_SIZE> block{};
|
||||||
|
if (auto const got = base->Read(block.data(), XTS_SECTOR_SIZE, aligned_off); got > 0) {
|
||||||
|
if (got < XTS_SECTOR_SIZE)
|
||||||
|
std::memset(block.data() + got, 0, XTS_SECTOR_SIZE - got);
|
||||||
|
cipher.XTSTranscode(block.data(), XTS_SECTOR_SIZE, block.data(), aligned_off / XTS_SECTOR_SIZE,
|
||||||
XTS_SECTOR_SIZE, Op::Decrypt);
|
XTS_SECTOR_SIZE, Op::Decrypt);
|
||||||
return raw.size();
|
|
||||||
|
auto const to_copy = std::min<std::size_t>(length, got > sector_offset ? got - sector_offset : 0);
|
||||||
|
if (to_copy > 0) {
|
||||||
|
std::memcpy(data, block.data() + sector_offset, to_copy);
|
||||||
|
data += to_copy;
|
||||||
|
offset += to_copy;
|
||||||
|
length -= to_copy;
|
||||||
|
total_read += to_copy;
|
||||||
}
|
}
|
||||||
if (length > XTS_SECTOR_SIZE) {
|
} else {
|
||||||
const auto rem = length % XTS_SECTOR_SIZE;
|
return 0;
|
||||||
const auto read = length - rem;
|
|
||||||
return Read(data, read, offset) + Read(data + read, rem, offset + read);
|
|
||||||
}
|
}
|
||||||
std::vector<u8> buffer = base->ReadBytes(XTS_SECTOR_SIZE, offset);
|
|
||||||
if (buffer.size() < XTS_SECTOR_SIZE)
|
|
||||||
buffer.resize(XTS_SECTOR_SIZE);
|
|
||||||
cipher.XTSTranscode(buffer.data(), buffer.size(), buffer.data(), offset / XTS_SECTOR_SIZE,
|
|
||||||
XTS_SECTOR_SIZE, Op::Decrypt);
|
|
||||||
std::memcpy(data, buffer.data(), (std::min)(buffer.size(), length));
|
|
||||||
return (std::min)(buffer.size(), length);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// offset does not fall on block boundary (0x4000)
|
if (length > 0) {
|
||||||
std::vector<u8> block = base->ReadBytes(0x4000, offset - sector_offset);
|
// Process aligned middle inplace, in sector sized multiples.
|
||||||
if (block.size() < XTS_SECTOR_SIZE)
|
while (length >= XTS_SECTOR_SIZE) {
|
||||||
block.resize(XTS_SECTOR_SIZE);
|
const std::size_t req = (length / XTS_SECTOR_SIZE) * XTS_SECTOR_SIZE;
|
||||||
cipher.XTSTranscode(block.data(), block.size(), block.data(),
|
const std::size_t got = base->Read(data, req, offset);
|
||||||
(offset - sector_offset) / XTS_SECTOR_SIZE, XTS_SECTOR_SIZE, Op::Decrypt);
|
if (got == 0) {
|
||||||
const std::size_t read = XTS_SECTOR_SIZE - sector_offset;
|
return total_read;
|
||||||
|
|
||||||
if (length + sector_offset < XTS_SECTOR_SIZE) {
|
|
||||||
std::memcpy(data, block.data() + sector_offset, std::min<u64>(length, read));
|
|
||||||
return std::min<u64>(length, read);
|
|
||||||
}
|
}
|
||||||
std::memcpy(data, block.data() + sector_offset, read);
|
const std::size_t got_rounded = got - (got % XTS_SECTOR_SIZE);
|
||||||
return read + Read(data + read, length - read, offset + read);
|
if (got_rounded > 0) {
|
||||||
|
cipher.XTSTranscode(data, got_rounded, data, offset / XTS_SECTOR_SIZE, XTS_SECTOR_SIZE, Op::Decrypt);
|
||||||
|
data += got_rounded;
|
||||||
|
offset += got_rounded;
|
||||||
|
length -= got_rounded;
|
||||||
|
total_read += got_rounded;
|
||||||
|
}
|
||||||
|
// If we didn't get a full sector next, break to handle tail.
|
||||||
|
if (got_rounded != got) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Handle tail within a sector, if any.
|
||||||
|
if (length > 0) {
|
||||||
|
std::array<u8, XTS_SECTOR_SIZE> block{};
|
||||||
|
const std::size_t got = base->Read(block.data(), XTS_SECTOR_SIZE, offset);
|
||||||
|
if (got > 0) {
|
||||||
|
if (got < XTS_SECTOR_SIZE) {
|
||||||
|
std::memset(block.data() + got, 0, XTS_SECTOR_SIZE - got);
|
||||||
|
}
|
||||||
|
cipher.XTSTranscode(block.data(), XTS_SECTOR_SIZE, block.data(),
|
||||||
|
offset / XTS_SECTOR_SIZE, XTS_SECTOR_SIZE, Op::Decrypt);
|
||||||
|
const std::size_t to_copy = std::min<std::size_t>(length, got);
|
||||||
|
std::memcpy(data, block.data(), to_copy);
|
||||||
|
total_read += to_copy;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return total_read;
|
||||||
}
|
}
|
||||||
} // namespace Core::Crypto
|
} // namespace Core::Crypto
|
||||||
|
|
|
||||||
|
|
@ -4,6 +4,7 @@
|
||||||
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
|
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
|
||||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
#include <boost/container/static_vector.hpp>
|
||||||
#include "common/alignment.h"
|
#include "common/alignment.h"
|
||||||
#include "common/swap.h"
|
#include "common/swap.h"
|
||||||
#include "core/file_sys/fssystem/fssystem_aes_ctr_storage.h"
|
#include "core/file_sys/fssystem/fssystem_aes_ctr_storage.h"
|
||||||
|
|
@ -83,32 +84,24 @@ size_t AesCtrStorage::Write(const u8* buffer, size_t size, size_t offset) {
|
||||||
std::memcpy(ctr.data(), m_iv.data(), IvSize);
|
std::memcpy(ctr.data(), m_iv.data(), IvSize);
|
||||||
AddCounter(ctr.data(), IvSize, offset / BlockSize);
|
AddCounter(ctr.data(), IvSize, offset / BlockSize);
|
||||||
|
|
||||||
// Loop until all data is written.
|
// Loop until all data is written using a pooled buffer residing on the stack (blocksize = 0x10)
|
||||||
size_t remaining = size;
|
boost::container::static_vector<u8, BlockSize> pooled_buffer;
|
||||||
s64 cur_offset = 0;
|
for (size_t remaining = size; remaining > 0; ) {
|
||||||
|
|
||||||
// Get a pooled buffer.
|
|
||||||
std::vector<char> pooled_buffer(BlockSize);
|
|
||||||
while (remaining > 0) {
|
|
||||||
// Determine data we're writing and where.
|
// Determine data we're writing and where.
|
||||||
const size_t write_size = std::min(pooled_buffer.size(), remaining);
|
auto const write_size = (std::min)(pooled_buffer.size(), remaining);
|
||||||
u8* write_buf = reinterpret_cast<u8*>(pooled_buffer.data());
|
u8* write_buf = pooled_buffer.data();
|
||||||
|
|
||||||
// Encrypt the data.
|
// Encrypt the data and then write it.
|
||||||
m_cipher->SetIV(ctr);
|
m_cipher->SetIV(ctr);
|
||||||
m_cipher->Transcode(buffer, write_size, write_buf, Core::Crypto::Op::Encrypt);
|
m_cipher->Transcode(buffer, write_size, write_buf, Core::Crypto::Op::Encrypt);
|
||||||
|
m_base_storage->Write(write_buf, write_size, offset);
|
||||||
|
|
||||||
// Write the encrypted data.
|
// Advance next write chunk
|
||||||
m_base_storage->Write(write_buf, write_size, offset + cur_offset);
|
offset += write_size;
|
||||||
|
|
||||||
// Advance.
|
|
||||||
cur_offset += write_size;
|
|
||||||
remaining -= write_size;
|
remaining -= write_size;
|
||||||
if (remaining > 0) {
|
if (remaining > 0)
|
||||||
AddCounter(ctr.data(), IvSize, write_size / BlockSize);
|
AddCounter(ctr.data(), IvSize, write_size / BlockSize);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
return size;
|
return size;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -4,9 +4,13 @@
|
||||||
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
|
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
|
||||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <array>
|
||||||
|
#include <boost/container/static_vector.hpp>
|
||||||
#include "common/alignment.h"
|
#include "common/alignment.h"
|
||||||
#include "common/swap.h"
|
#include "common/swap.h"
|
||||||
#include "core/file_sys/fssystem/fssystem_aes_xts_storage.h"
|
#include "core/file_sys/fssystem/fssystem_aes_xts_storage.h"
|
||||||
|
#include "core/file_sys/fssystem/fssystem_nca_header.h"
|
||||||
#include "core/file_sys/fssystem/fssystem_utility.h"
|
#include "core/file_sys/fssystem/fssystem_utility.h"
|
||||||
|
|
||||||
namespace FileSys {
|
namespace FileSys {
|
||||||
|
|
@ -41,18 +45,12 @@ AesXtsStorage::AesXtsStorage(VirtualFile base, const void* key1, const void* key
|
||||||
|
|
||||||
size_t AesXtsStorage::Read(u8* buffer, size_t size, size_t offset) const {
|
size_t AesXtsStorage::Read(u8* buffer, size_t size, size_t offset) const {
|
||||||
// Allow zero-size reads.
|
// Allow zero-size reads.
|
||||||
if (size == 0) {
|
if (size == 0)
|
||||||
return size;
|
return size;
|
||||||
}
|
|
||||||
|
|
||||||
// Ensure buffer is valid.
|
// Ensure buffer is valid and we can only read at block aligned offsets.
|
||||||
ASSERT(buffer != nullptr);
|
ASSERT(buffer != nullptr);
|
||||||
|
ASSERT(Common::IsAligned(offset, AesBlockSize) && Common::IsAligned(size, AesBlockSize));
|
||||||
// We can only read at block aligned offsets.
|
|
||||||
ASSERT(Common::IsAligned(offset, AesBlockSize));
|
|
||||||
ASSERT(Common::IsAligned(size, AesBlockSize));
|
|
||||||
|
|
||||||
// Read the data.
|
|
||||||
m_base_storage->Read(buffer, size, offset);
|
m_base_storage->Read(buffer, size, offset);
|
||||||
|
|
||||||
// Setup the counter.
|
// Setup the counter.
|
||||||
|
|
@ -60,25 +58,21 @@ size_t AesXtsStorage::Read(u8* buffer, size_t size, size_t offset) const {
|
||||||
std::memcpy(ctr.data(), m_iv.data(), IvSize);
|
std::memcpy(ctr.data(), m_iv.data(), IvSize);
|
||||||
AddCounter(ctr.data(), IvSize, offset / m_block_size);
|
AddCounter(ctr.data(), IvSize, offset / m_block_size);
|
||||||
|
|
||||||
// Handle any unaligned data before the start.
|
// Handle any unaligned data before the start; then read said data into a local pooled
|
||||||
|
// buffer that resides on the stack, do not use the global memory allocator this is a
|
||||||
|
// very tiny (512 bytes) buffer so should be fine to keep on the stack (Nca::XtsBlockSize wide buffer)
|
||||||
size_t processed_size = 0;
|
size_t processed_size = 0;
|
||||||
if ((offset % m_block_size) != 0) {
|
if ((offset % m_block_size) != 0) {
|
||||||
|
// Decrypt into our pooled stack buffer (max bound = NCA::XtsBlockSize)
|
||||||
|
boost::container::static_vector<u8, NcaHeader::XtsBlockSize> tmp_buf;
|
||||||
// Determine the size of the pre-data read.
|
// Determine the size of the pre-data read.
|
||||||
const size_t skip_size =
|
auto const skip_size = size_t(offset - Common::AlignDown(offset, m_block_size));
|
||||||
static_cast<size_t>(offset - Common::AlignDown(offset, m_block_size));
|
auto const data_size = (std::min)(size, m_block_size - skip_size);
|
||||||
const size_t data_size = (std::min)(size, m_block_size - skip_size);
|
std::fill_n(tmp_buf.begin(), skip_size, u8{0});
|
||||||
|
|
||||||
// Decrypt into a pooled buffer.
|
|
||||||
{
|
|
||||||
std::vector<char> tmp_buf(m_block_size, 0);
|
|
||||||
std::memcpy(tmp_buf.data() + skip_size, buffer, data_size);
|
std::memcpy(tmp_buf.data() + skip_size, buffer, data_size);
|
||||||
|
|
||||||
m_cipher->SetIV(ctr);
|
m_cipher->SetIV(ctr);
|
||||||
m_cipher->Transcode(tmp_buf.data(), m_block_size, tmp_buf.data(),
|
m_cipher->Transcode(tmp_buf.data(), m_block_size, tmp_buf.data(), Core::Crypto::Op::Decrypt);
|
||||||
Core::Crypto::Op::Decrypt);
|
|
||||||
|
|
||||||
std::memcpy(buffer, tmp_buf.data() + skip_size, data_size);
|
std::memcpy(buffer, tmp_buf.data() + skip_size, data_size);
|
||||||
}
|
|
||||||
|
|
||||||
AddCounter(ctr.data(), IvSize, 1);
|
AddCounter(ctr.data(), IvSize, 1);
|
||||||
processed_size += data_size;
|
processed_size += data_size;
|
||||||
|
|
@ -86,20 +80,16 @@ size_t AesXtsStorage::Read(u8* buffer, size_t size, size_t offset) const {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Decrypt aligned chunks.
|
// Decrypt aligned chunks.
|
||||||
char* cur = reinterpret_cast<char*>(buffer) + processed_size;
|
auto* cur = buffer + processed_size;
|
||||||
size_t remaining = size - processed_size;
|
for (size_t remaining = size - processed_size; remaining > 0; ) {
|
||||||
while (remaining > 0) {
|
auto const cur_size = (std::min)(m_block_size, remaining);
|
||||||
const size_t cur_size = (std::min)(m_block_size, remaining);
|
|
||||||
|
|
||||||
m_cipher->SetIV(ctr);
|
m_cipher->SetIV(ctr);
|
||||||
m_cipher->Transcode(cur, cur_size, cur, Core::Crypto::Op::Decrypt);
|
auto* char_cur = reinterpret_cast<char*>(cur); //same repr cur - diff signedness
|
||||||
|
m_cipher->Transcode(char_cur, cur_size, char_cur, Core::Crypto::Op::Decrypt);
|
||||||
remaining -= cur_size;
|
remaining -= cur_size;
|
||||||
cur += cur_size;
|
cur += cur_size;
|
||||||
|
|
||||||
AddCounter(ctr.data(), IvSize, 1);
|
AddCounter(ctr.data(), IvSize, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
return size;
|
return size;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue