diff --git a/.patch/mbedtls/0004-arm64-aes-fix.patch b/.patch/mbedtls/0004-arm64-aes-fix.patch new file mode 100644 index 0000000000..285cb75139 --- /dev/null +++ b/.patch/mbedtls/0004-arm64-aes-fix.patch @@ -0,0 +1,20 @@ +diff --git a/library/common.h b/library/common.h +index 50f2a29..c60d9dc 100644 +--- a/library/common.h ++++ b/library/common.h +@@ -19,11 +19,11 @@ + #include + #include + +-#if defined(__ARM_NEON) +-#include ++#if defined(MBEDTLS_PLATFORM_IS_WINDOWS_ON_ARM64) ++#include + #define MBEDTLS_HAVE_NEON_INTRINSICS +-#elif defined(MBEDTLS_PLATFORM_IS_WINDOWS_ON_ARM64) +-#include ++#elif defined(__ANDROID__) || defined(__ARM_NEON) ++#include + #define MBEDTLS_HAVE_NEON_INTRINSICS + #endif + diff --git a/externals/cpmfile.json b/externals/cpmfile.json index 6056a5be24..b00523c37f 100644 --- a/externals/cpmfile.json +++ b/externals/cpmfile.json @@ -101,7 +101,8 @@ "skip_updates": true, "patches": [ "0002-aesni-fix.patch", - "0003-aesni-fix.patch" + "0003-aesni-fix.patch", + "0004-arm64-aes-fix.patch" ] }, "enet": { diff --git a/src/core/crypto/aes_util.cpp b/src/core/crypto/aes_util.cpp index 5c31eff3e1..0820ca30ea 100644 --- a/src/core/crypto/aes_util.cpp +++ b/src/core/crypto/aes_util.cpp @@ -4,8 +4,9 @@ // SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include #include -#include +#include #include #include "common/assert.h" #include "common/logging/log.h" @@ -15,6 +16,7 @@ namespace Core::Crypto { namespace { using NintendoTweak = std::array; +constexpr std::size_t AesBlockBytes = 16; NintendoTweak CalculateNintendoTweak(std::size_t sector_id) { NintendoTweak out{}; @@ -75,39 +77,51 @@ void AESCipher::Transcode(const u8* src, std::size_t size, u8* des mbedtls_cipher_reset(context); - // Only ECB strictly requires block sized chunks. + if (size == 0) + return; + + const auto mode = mbedtls_cipher_get_cipher_mode(context); std::size_t written = 0; - if (mbedtls_cipher_get_cipher_mode(context) != MBEDTLS_MODE_ECB) { - mbedtls_cipher_update(context, src, size, dest, &written); - if (written != size) + + if (mode != MBEDTLS_MODE_ECB) { + const int ret = mbedtls_cipher_update(context, src, size, dest, &written); + ASSERT(ret == 0); + if (written != size) { LOG_WARNING(Crypto, "Not all data was processed requested={:016X}, actual={:016X}.", size, written); + } return; } - // ECB path: operate in block sized chunks and mirror previous behavior. const auto block_size = mbedtls_cipher_get_block_size(context); - if (size < block_size) { - std::vector block(block_size); - std::memcpy(block.data(), src, size); - Transcode(block.data(), block.size(), block.data(), op); - std::memcpy(dest, block.data(), size); - return; - } + ASSERT(block_size <= AesBlockBytes); - for (std::size_t offset = 0; offset < size; offset += block_size) { - const auto length = std::min(block_size, size - offset); - mbedtls_cipher_update(context, src + offset, length, dest + offset, &written); - if (written != length) { - if (length < block_size) { - std::vector block(block_size); - std::memcpy(block.data(), src + offset, length); - Transcode(block.data(), block.size(), block.data(), op); - std::memcpy(dest + offset, block.data(), length); - return; - } - LOG_WARNING(Crypto, "Not all data was processed requested={:016X}, actual={:016X}.", length, written); + const std::size_t whole_block_bytes = size - (size % block_size); + if (whole_block_bytes != 0) { + const int ret = mbedtls_cipher_update(context, src, whole_block_bytes, dest, &written); + ASSERT(ret == 0); + if (written != whole_block_bytes) { + LOG_WARNING(Crypto, "Not all data was processed requested={:016X}, actual={:016X}.", + whole_block_bytes, written); } } + + const std::size_t tail = size - whole_block_bytes; + if (tail == 0) + return; + + std::array tail_buffer{}; + std::memcpy(tail_buffer.data(), src + whole_block_bytes, tail); + + std::size_t tail_written = 0; + const int ret = mbedtls_cipher_update(context, tail_buffer.data(), block_size, tail_buffer.data(), + &tail_written); + ASSERT(ret == 0); + if (tail_written != block_size) { + LOG_WARNING(Crypto, "Not all data was processed requested={:016X}, actual={:016X}.", block_size, + tail_written); + } + + std::memcpy(dest + whole_block_bytes, tail_buffer.data(), tail); } template diff --git a/src/core/crypto/ctr_encryption_layer.cpp b/src/core/crypto/ctr_encryption_layer.cpp index 6769754413..82473f57d7 100644 --- a/src/core/crypto/ctr_encryption_layer.cpp +++ b/src/core/crypto/ctr_encryption_layer.cpp @@ -5,6 +5,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include +#include #include #include "core/crypto/ctr_encryption_layer.h" @@ -18,35 +19,84 @@ std::size_t CTREncryptionLayer::Read(u8* data, std::size_t length, std::size_t o if (length == 0) return 0; + constexpr std::size_t BlockSize = 0x10; + constexpr std::size_t MaxChunkSize = 0x10000; + std::size_t total_read = 0; - // Handle an initial misaligned portion if needed. - if (auto const sector_offset = offset & 0xF; sector_offset != 0) { - const std::size_t aligned_off = offset - sector_offset; - std::array block{}; - if (auto const got = base->Read(block.data(), block.size(), aligned_off); got != 0) { - UpdateIV(base_offset + aligned_off); - cipher.Transcode(block.data(), got, block.data(), Op::Decrypt); - auto const to_copy = std::min(length, got > sector_offset ? got - sector_offset : 0); - if (to_copy > 0) { - std::memcpy(data, block.data() + sector_offset, to_copy); - data += to_copy; - offset += to_copy; - length -= to_copy; - total_read += to_copy; - } - } else { - return 0; + auto* out = data; + std::size_t remaining = length; + std::size_t current_offset = offset; + + const auto read_exact = [this](u8* dst, std::size_t bytes, std::size_t src_offset) { + std::size_t filled = 0; + while (filled < bytes) { + const std::size_t got = base->Read(dst + filled, bytes - filled, src_offset + filled); + if (got == 0) + break; + filled += got; } + return filled; + }; + + if (const std::size_t intra_block = current_offset & (BlockSize - 1); intra_block != 0) { + std::array block{}; + const std::size_t aligned_offset = current_offset - intra_block; + const std::size_t got = read_exact(block.data(), BlockSize, aligned_offset); + if (got <= intra_block) + return total_read; + + UpdateIV(base_offset + aligned_offset); + cipher.Transcode(block.data(), got, block.data(), Op::Decrypt); + + const std::size_t available = got - intra_block; + const std::size_t to_copy = std::min(remaining, available); + std::memcpy(out, block.data() + intra_block, to_copy); + + out += to_copy; + current_offset += to_copy; + remaining -= to_copy; + total_read += to_copy; + + if (to_copy != available) + return total_read; } - if (length > 0) { - // Now aligned to 0x10 - UpdateIV(base_offset + offset); - const std::size_t got = base->Read(data, length, offset); - if (got > 0) { - cipher.Transcode(data, got, data, Op::Decrypt); - total_read += got; - } + + while (remaining >= BlockSize) { + const std::size_t chunk_request = std::min(remaining, MaxChunkSize); + const std::size_t aligned_request = chunk_request - (chunk_request % BlockSize); + if (aligned_request == 0) + break; + + const std::size_t got = read_exact(out, aligned_request, current_offset); + if (got == 0) + break; + + UpdateIV(base_offset + current_offset); + cipher.Transcode(out, got, out, Op::Decrypt); + + out += got; + current_offset += got; + remaining -= got; + total_read += got; + + if (got < aligned_request) + return total_read; } + + if (remaining > 0) { + std::array block{}; + const std::size_t got = read_exact(block.data(), BlockSize, current_offset); + if (got == 0) + return total_read; + + UpdateIV(base_offset + current_offset); + cipher.Transcode(block.data(), got, block.data(), Op::Decrypt); + + const std::size_t to_copy = std::min(remaining, got); + std::memcpy(out, block.data(), to_copy); + total_read += to_copy; + } + return total_read; } diff --git a/src/core/crypto/xts_encryption_layer.cpp b/src/core/crypto/xts_encryption_layer.cpp index 240f930c57..5cfc67847d 100644 --- a/src/core/crypto/xts_encryption_layer.cpp +++ b/src/core/crypto/xts_encryption_layer.cpp @@ -20,67 +20,49 @@ std::size_t XTSEncryptionLayer::Read(u8* data, std::size_t length, std::size_t o if (length == 0) return 0; + constexpr std::size_t PrefetchSectors = 4; + + auto* out = data; + std::size_t remaining = length; + std::size_t current_offset = offset; std::size_t total_read = 0; - // Handle initial unaligned part within a sector. - if (auto const sector_offset = offset % XTS_SECTOR_SIZE; sector_offset != 0) { - const std::size_t aligned_off = offset - sector_offset; - std::array block{}; - if (auto const got = base->Read(block.data(), XTS_SECTOR_SIZE, aligned_off); got > 0) { - if (got < XTS_SECTOR_SIZE) - std::memset(block.data() + got, 0, XTS_SECTOR_SIZE - got); - cipher.XTSTranscode(block.data(), XTS_SECTOR_SIZE, block.data(), aligned_off / XTS_SECTOR_SIZE, - XTS_SECTOR_SIZE, Op::Decrypt); - auto const to_copy = std::min(length, got > sector_offset ? got - sector_offset : 0); - if (to_copy > 0) { - std::memcpy(data, block.data() + sector_offset, to_copy); - data += to_copy; - offset += to_copy; - length -= to_copy; - total_read += to_copy; - } - } else { - return 0; - } - } + std::array sector{}; - if (length > 0) { - // Process aligned middle inplace, in sector sized multiples. - while (length >= XTS_SECTOR_SIZE) { - const std::size_t req = (length / XTS_SECTOR_SIZE) * XTS_SECTOR_SIZE; - const std::size_t got = base->Read(data, req, offset); - if (got == 0) { + while (remaining > 0) { + const std::size_t sector_index = current_offset / XTS_SECTOR_SIZE; + const std::size_t sector_offset = current_offset % XTS_SECTOR_SIZE; + + const std::size_t sectors_to_read = std::min(PrefetchSectors, + (remaining + sector_offset + + XTS_SECTOR_SIZE - 1) / + XTS_SECTOR_SIZE); + + for (std::size_t s = 0; s < sectors_to_read && remaining > 0; ++s) { + const std::size_t index = sector_index + s; + const std::size_t read_offset = index * XTS_SECTOR_SIZE; + const std::size_t got = base->Read(sector.data(), XTS_SECTOR_SIZE, read_offset); + if (got == 0) return total_read; - } - const std::size_t got_rounded = got - (got % XTS_SECTOR_SIZE); - if (got_rounded > 0) { - cipher.XTSTranscode(data, got_rounded, data, offset / XTS_SECTOR_SIZE, XTS_SECTOR_SIZE, Op::Decrypt); - data += got_rounded; - offset += got_rounded; - length -= got_rounded; - total_read += got_rounded; - } - // If we didn't get a full sector next, break to handle tail. - if (got_rounded != got) { - break; - } - } - // Handle tail within a sector, if any. - if (length > 0) { - std::array block{}; - const std::size_t got = base->Read(block.data(), XTS_SECTOR_SIZE, offset); - if (got > 0) { - if (got < XTS_SECTOR_SIZE) { - std::memset(block.data() + got, 0, XTS_SECTOR_SIZE - got); - } - cipher.XTSTranscode(block.data(), XTS_SECTOR_SIZE, block.data(), - offset / XTS_SECTOR_SIZE, XTS_SECTOR_SIZE, Op::Decrypt); - const std::size_t to_copy = std::min(length, got); - std::memcpy(data, block.data(), to_copy); - total_read += to_copy; - } + + if (got < XTS_SECTOR_SIZE) + std::memset(sector.data() + got, 0, XTS_SECTOR_SIZE - got); + + cipher.XTSTranscode(sector.data(), XTS_SECTOR_SIZE, sector.data(), index, XTS_SECTOR_SIZE, + Op::Decrypt); + + const std::size_t local_offset = (s == 0) ? sector_offset : 0; + const std::size_t available = XTS_SECTOR_SIZE - local_offset; + const std::size_t to_copy = std::min(available, remaining); + std::memcpy(out, sector.data() + local_offset, to_copy); + + out += to_copy; + current_offset += to_copy; + remaining -= to_copy; + total_read += to_copy; } } + return total_read; } } // namespace Core::Crypto diff --git a/src/core/file_sys/fssystem/fssystem_aes_ctr_storage.cpp b/src/core/file_sys/fssystem/fssystem_aes_ctr_storage.cpp index 1e11d70d8d..6eb78c8255 100644 --- a/src/core/file_sys/fssystem/fssystem_aes_ctr_storage.cpp +++ b/src/core/file_sys/fssystem/fssystem_aes_ctr_storage.cpp @@ -86,18 +86,21 @@ size_t AesCtrStorage::Write(const u8* buffer, size_t size, size_t offset) { // Loop until all data is written using a pooled buffer residing on the stack (blocksize = 0x10) boost::container::static_vector pooled_buffer; - for (size_t remaining = size; remaining > 0; ) { - // Determine data we're writing and where. - auto const write_size = (std::min)(pooled_buffer.size(), remaining); - u8* write_buf = pooled_buffer.data(); + pooled_buffer.resize(BlockSize); + + const u8* cur = buffer; + size_t remaining = size; + size_t current_offset = offset; + + while (remaining > 0) { + const size_t write_size = std::min(pooled_buffer.size(), remaining); - // Encrypt the data and then write it. m_cipher->SetIV(ctr); - m_cipher->Transcode(buffer, write_size, write_buf, Core::Crypto::Op::Encrypt); - m_base_storage->Write(write_buf, write_size, offset); + m_cipher->Transcode(cur, write_size, pooled_buffer.data(), Core::Crypto::Op::Encrypt); + m_base_storage->Write(pooled_buffer.data(), write_size, current_offset); - // Advance next write chunk - offset += write_size; + cur += write_size; + current_offset += write_size; remaining -= write_size; if (remaining > 0) AddCounter(ctr.data(), IvSize, write_size / BlockSize); diff --git a/src/core/file_sys/fssystem/fssystem_aes_xts_storage.cpp b/src/core/file_sys/fssystem/fssystem_aes_xts_storage.cpp index 6e07d44cde..e51068da4b 100644 --- a/src/core/file_sys/fssystem/fssystem_aes_xts_storage.cpp +++ b/src/core/file_sys/fssystem/fssystem_aes_xts_storage.cpp @@ -65,10 +65,13 @@ size_t AesXtsStorage::Read(u8* buffer, size_t size, size_t offset) const { if ((offset % m_block_size) != 0) { // Decrypt into our pooled stack buffer (max bound = NCA::XtsBlockSize) boost::container::static_vector tmp_buf; + ASSERT(m_block_size <= tmp_buf.max_size()); + tmp_buf.resize(m_block_size); // Determine the size of the pre-data read. auto const skip_size = size_t(offset - Common::AlignDown(offset, m_block_size)); auto const data_size = (std::min)(size, m_block_size - skip_size); - std::fill_n(tmp_buf.begin(), skip_size, u8{0}); + if (skip_size > 0) + std::fill_n(tmp_buf.begin(), skip_size, u8{0}); std::memcpy(tmp_buf.data() + skip_size, buffer, data_size); m_cipher->SetIV(ctr); m_cipher->Transcode(tmp_buf.data(), m_block_size, tmp_buf.data(), Core::Crypto::Op::Decrypt);