diff --git a/CMakeLists.txt b/CMakeLists.txt index 41387c350..339c3ddfa 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -178,6 +178,7 @@ set(ODR_SOURCE_FILES "src/odr/internal/text/text_util.cpp" "src/odr/internal/util/byte_util.cpp" + "src/odr/internal/util/byte_stream_util.cpp" "src/odr/internal/util/document_util.cpp" "src/odr/internal/util/file_util.cpp" "src/odr/internal/util/hash_util.cpp" diff --git a/src/odr/internal/cfb/cfb_archive.cpp b/src/odr/internal/cfb/cfb_archive.cpp index 42848a861..04a64b81e 100644 --- a/src/odr/internal/cfb/cfb_archive.cpp +++ b/src/odr/internal/cfb/cfb_archive.cpp @@ -18,13 +18,16 @@ CfbArchive::CfbArchive(std::shared_ptr archive) : m_cfb{std::move(archive)} {} std::shared_ptr CfbArchive::as_filesystem() const { + // TODO return an actual filesystem view auto filesystem = std::make_shared(); for (const auto &e : *m_cfb) { + const AbsPath path = Path(e.path()).make_absolute(); + if (e.is_directory()) { - filesystem->create_directory(e.path()); + filesystem->create_directory(path); } else if (e.is_file()) { - filesystem->copy(e.file(), e.path()); + filesystem->copy(e.file(), path); } } diff --git a/src/odr/internal/cfb/cfb_file.cpp b/src/odr/internal/cfb/cfb_file.cpp index 49759f431..0b8ed17ea 100644 --- a/src/odr/internal/cfb/cfb_file.cpp +++ b/src/odr/internal/cfb/cfb_file.cpp @@ -5,8 +5,8 @@ namespace odr::internal::cfb { -CfbFile::CfbFile(const std::shared_ptr &file) - : m_cfb{std::make_shared(file)} {} +CfbFile::CfbFile(std::shared_ptr file) + : m_cfb{std::make_shared(std::move(file))} {} std::shared_ptr CfbFile::file() const noexcept { return m_cfb->file(); diff --git a/src/odr/internal/cfb/cfb_file.hpp b/src/odr/internal/cfb/cfb_file.hpp index ac2520b25..24bbc0dd5 100644 --- a/src/odr/internal/cfb/cfb_file.hpp +++ b/src/odr/internal/cfb/cfb_file.hpp @@ -7,10 +7,6 @@ enum class FileType; struct FileMeta; } // namespace odr -namespace odr::internal { -class MemoryFile; -} // namespace odr::internal - namespace odr::internal::cfb::util { class Archive; } @@ -19,7 +15,7 @@ namespace odr::internal::cfb { class CfbFile final : public abstract::ArchiveFile { public: - explicit CfbFile(const std::shared_ptr &file); + explicit CfbFile(std::shared_ptr file); [[nodiscard]] std::shared_ptr file() const noexcept override; diff --git a/src/odr/internal/cfb/cfb_impl.cpp b/src/odr/internal/cfb/cfb_impl.cpp index baa1148bf..66b7bc5e8 100644 --- a/src/odr/internal/cfb/cfb_impl.cpp +++ b/src/odr/internal/cfb/cfb_impl.cpp @@ -1,332 +1,303 @@ #include #include +#include +#include #include #include #include -namespace odr::internal::cfb::impl { +namespace odr::internal::cfb { namespace { -constexpr auto MAGIC = "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1"; -constexpr std::size_t MAX_REG_SECT = 0xFFFFFFFA; -std::uint32_t parse_uint32(const void *buffer) { - return *static_cast(buffer); +std::uint32_t parse_uint32(std::istream &in) { + return util::byte_stream::read(in); } + } // namespace -bool CompoundFileEntry::is_property_stream() const { - // defined in [MS-OLEPS] 2.23 "Property Set Stream and Storage Names" - return name[0] == 5; +void impl::parse_header(std::istream &in, CompoundFileHeader &hdr) { + util::byte_stream::read(in, hdr); + // TODO deal with endianess } -bool CompoundFileEntry::is_stream() const { return type == 2; } +void impl::parse_entry(std::istream &in, CompoundFileEntry &entry) { + util::byte_stream::read(in, entry); + // TODO deal with endianess +} -CompoundFileReader::CompoundFileReader(const void *buffer, - const std::size_t len) - : m_buffer{static_cast(buffer)}, m_buffer_len{len}, - m_hdr{static_cast(buffer)}, - m_sector_size{512}, m_mini_sector_size{64}, - m_mini_stream_start_sector{0} { - if (buffer == nullptr || len == 0) { - throw std::invalid_argument(""); - } +impl::CompoundFileEntry impl::parse_entry(std::istream &in) { + CompoundFileEntry entry{}; + parse_entry(in, entry); + return entry; +} + +} // namespace odr::internal::cfb - if (m_buffer_len < sizeof(*m_hdr) || - std::memcmp(m_hdr->signature, MAGIC, 8) != 0) { +namespace odr::internal::cfb::impl { + +std::string CompoundFileEntry::get_name() const { + return internal::util::string::c16str_to_string(name, name_len - 2); +} + +CompoundFileReader::CompoundFileReader(std::istream &in, + const std::uint64_t file_size) + : m_file_size{file_size} { + parse_header(in, m_header); + + if (std::memcmp(m_header.signature, MAGIC, 8) != 0) { throw NoCfbFile(); } - m_sector_size = m_hdr->major_version == 3 ? 512 : 4096; + m_sector_size = m_header.major_version == 3 ? 512 : 4096; // The file must contain at least 3 sectors - if (m_buffer_len < m_sector_size * 3) { + if (m_file_size < m_sector_size * 3) { throw CfbFileCorrupted(); } - const CompoundFileEntry *root = get_entry(0); - if (root == nullptr) { - throw CfbFileCorrupted(); - } + parse_entry(in, RootId, m_root); - m_mini_stream_start_sector = root->start_sector_location; + m_mini_stream_start_sector = m_root.start_sector_location; } -const CompoundFileEntry * -CompoundFileReader::get_entry(const std::size_t entry_id) const { - if (entry_id == 0xFFFFFFFF) { - return nullptr; - } +void CompoundFileReader::parse_entry(std::istream &in, + const std::uint32_t entry_id, + CompoundFileEntry &entry) const { + const std::uint64_t offset = entry_id * sizeof(CompoundFileEntry); - if (m_buffer_len / sizeof(CompoundFileEntry) <= entry_id) { + if (offset >= m_file_size) { throw std::invalid_argument(""); } - std::size_t sector = 0; - std::size_t offset = 0; - locate_final_sector(m_hdr->first_directory_sector_location, - entry_id * sizeof(CompoundFileEntry), §or, &offset); - return reinterpret_cast( - sector_offset_to_address(sector, offset)); -} - -const CompoundFileEntry *CompoundFileReader::get_root_entry() const { - return get_entry(0); + const SectorOffset sector_offset = normalize_sector_offset( + in, {m_header.first_directory_sector_location, offset}); + const std::uint64_t address = sector_offset_to_address(sector_offset); + in.seekg(static_cast(address)); + impl::parse_entry(in, entry); } -const CompoundFileHeader *CompoundFileReader::get_file_info() const { - return m_hdr; +CompoundFileEntry +CompoundFileReader::parse_entry(std::istream &in, + const std::uint32_t entry_id) const { + CompoundFileEntry entry{}; + parse_entry(in, entry_id, entry); + return entry; } -void CompoundFileReader::read_file(const CompoundFileEntry *entry, - const std::size_t offset, char *buffer, - const std::size_t len) const { - if (entry->size < offset || entry->size - offset < len) { - throw std::invalid_argument(""); +void CompoundFileReader::read_file(std::istream &in, + const CompoundFileEntry &entry, + const std::uint64_t offset, char *buffer, + const std::uint64_t len) const { + if (offset > entry.size) { + throw std::invalid_argument( + "offset bigger than entry size: " + std::to_string(offset) + " > " + + std::to_string(entry.size)); + } + if (len > entry.size - offset) { + throw std::invalid_argument( + "length bigger than remaining entry size: " + std::to_string(len) + + " > " + std::to_string(entry.size - offset)); } - if (entry->size < m_hdr->mini_stream_cutoff_size) { - read_mini_stream(entry->start_sector_location, offset, buffer, len); + if (entry.size < m_header.mini_stream_cutoff_size) { + read_mini_stream(in, {entry.start_sector_location, offset}, buffer, len); } else { - read_stream(entry->start_sector_location, offset, buffer, len); + read_stream(in, {entry.start_sector_location, offset}, buffer, len); } } -void CompoundFileReader::enum_files(const CompoundFileEntry *entry, - const int max_level, - const EnumFilesCallback &callback) const { - const std::u16string dir; - enum_nodes(get_entry(entry->child_id), 0, max_level, dir, callback); +void CompoundFileReader::visit_descendants( + std::istream &in, const CompoundFileEntry &entry, + const std::int32_t max_level, const EnumFilesCallback &callback) const { + const CompoundFileEntry child_entry = parse_entry(in, entry.child_id); + visit_descendants(in, child_entry, 0, max_level, std::u16string(), callback); } -void CompoundFileReader::enum_nodes(const CompoundFileEntry *entry, - const std::int32_t current_level, - const std::int32_t max_level, - const std::u16string &dir, - const EnumFilesCallback &callback) const { +void CompoundFileReader::visit_descendants( + std::istream &in, const CompoundFileEntry &entry, + const std::int32_t current_level, const std::int32_t max_level, + const std::u16string &dir, const EnumFilesCallback &callback) const { if (max_level > 0 && current_level >= max_level) { return; } - if (entry == nullptr) { - return; - } callback(entry, dir, current_level + 1); - if (const CompoundFileEntry *child = get_entry(entry->child_id); - child != nullptr) { + if (entry.child_id != NullId) { + const CompoundFileEntry child = parse_entry(in, entry.child_id); + std::u16string new_dir = dir; - if (!dir.empty()) { - new_dir.push_back('/'); - } - new_dir.append(reinterpret_cast(entry->name), - entry->name_len / 2); - enum_nodes(get_entry(entry->child_id), current_level + 1, max_level, - new_dir, callback); + new_dir.append(entry.name, entry.name_len / 2); + visit_descendants(in, child, current_level + 1, max_level, new_dir, + callback); + } + + if (entry.left_sibling_id != NullId) { + const CompoundFileEntry left_sibling = + parse_entry(in, entry.left_sibling_id); + visit_descendants(in, left_sibling, current_level, max_level, dir, + callback); } - enum_nodes(get_entry(entry->left_sibling_id), current_level, max_level, dir, - callback); - enum_nodes(get_entry(entry->right_sibling_id), current_level, max_level, dir, - callback); + if (entry.right_sibling_id != NullId) { + const CompoundFileEntry right_sibling = + parse_entry(in, entry.right_sibling_id); + visit_descendants(in, right_sibling, current_level, max_level, dir, + callback); + } } -void CompoundFileReader::read_stream(std::size_t sector, std::size_t offset, - char *buffer, std::size_t len) const { - locate_final_sector(sector, offset, §or, &offset); +void CompoundFileReader::read_stream(std::istream &in, + const SectorOffset §or_offset, + char *buffer, std::uint64_t length) const { + SectorOffset current_sector_offset = + normalize_sector_offset(in, sector_offset); // copy as many as possible in each step - // copylen typically iterate as: m_sectorSize - offset --> m_sectorSize + // copy_length typically iterate as: m_sectorSize - offset --> m_sectorSize // --> m_sectorSize --> ... --> remaining - while (len > 0) { - const std::uint8_t *src = sector_offset_to_address(sector, offset); - const std::size_t copylen = std::min(len, m_sector_size - offset); - if (m_buffer + m_buffer_len < src + copylen) { + while (length > 0) { + const std::uint64_t address = + sector_offset_to_address(current_sector_offset); + const std::size_t copy_length = + std::min(length, m_sector_size - current_sector_offset.offset); + if (address + copy_length > m_file_size) { throw CfbFileCorrupted(); } - std::memcpy(buffer, src, copylen); - buffer += copylen; - len -= copylen; - sector = get_next_sector(sector); - offset = 0; + in.seekg(static_cast(address)); + in.read(buffer, static_cast(copy_length)); + buffer += copy_length; + length -= copy_length; + + current_sector_offset.sector = + resolve_next_sector(in, current_sector_offset.sector); + current_sector_offset.offset = 0; } } -void CompoundFileReader::read_mini_stream(std::size_t sector, - std::size_t offset, char *buffer, - std::size_t len) const { - locate_final_mini_sector(sector, offset, §or, &offset); +void CompoundFileReader::read_mini_stream(std::istream &in, + const SectorOffset §or_offset, + char *buffer, + std::uint64_t length) const { + SectorOffset current_sector_offset = + normalize_mini_sector_offset(in, sector_offset); // copy as many as possible in each step - // copylen typically iterate as: m_sectorSize - offset --> m_sectorSize + // copy_length typically iterate as: m_sectorSize - offset --> m_sectorSize // --> m_sectorSize --> ... --> remaining - while (len > 0) { - const std::uint8_t *src = mini_sector_offset_to_address(sector, offset); - const std::size_t copylen = std::min(len, m_mini_sector_size - offset); - if (m_buffer + m_buffer_len < src + copylen) { + while (length > 0) { + const std::uint64_t address = + mini_sector_offset_to_address(in, current_sector_offset); + const std::size_t copy_length = + std::min(length, m_mini_sector_size - current_sector_offset.offset); + if (address + copy_length > m_file_size) { throw CfbFileCorrupted(); } - std::memcpy(buffer, src, copylen); - buffer += copylen; - len -= copylen; - sector = get_next_mini_sector(sector); - offset = 0; + in.seekg(static_cast(address)); + in.read(buffer, static_cast(copy_length)); + buffer += copy_length; + length -= copy_length; + + current_sector_offset.sector = + resolve_next_mini_sector(in, current_sector_offset.sector); + current_sector_offset.offset = 0; } } -std::size_t -CompoundFileReader::get_next_sector(const std::size_t sector) const { +Sector CompoundFileReader::resolve_next_sector(std::istream &in, + const Sector sector) const { // lookup FAT - const std::size_t entriesPerSector = m_sector_size / 4; - const std::size_t fatSectorNumber = sector / entriesPerSector; - const std::size_t fatSectorLocation = - get_fat_sector_location(fatSectorNumber); - return parse_uint32(sector_offset_to_address(fatSectorLocation, - sector % entriesPerSector * 4)); + const std::uint32_t entriesPerSector = m_sector_size / 4; + const std::uint32_t fatSectorNumber = sector / entriesPerSector; + const std::uint32_t fatSectorLocation = + resolve_fat_sector_location(in, fatSectorNumber); + const std::uint64_t address = sector_offset_to_address( + {fatSectorLocation, sector % entriesPerSector * 4}); + in.seekg(static_cast(address)); + return parse_uint32(in); } -std::size_t -CompoundFileReader::get_next_mini_sector(const std::size_t mini_sector) const { - std::size_t sector, offset; - locate_final_sector(m_hdr->first_mini_fat_sector_location, mini_sector * 4, - §or, &offset); - return parse_uint32(sector_offset_to_address(sector, offset)); +Sector CompoundFileReader::resolve_next_mini_sector( + std::istream &in, const std::uint32_t mini_sector) const { + const SectorOffset sector_offset = normalize_sector_offset( + in, {m_header.first_mini_fat_sector_location, mini_sector * 4}); + const std::uint64_t address = sector_offset_to_address(sector_offset); + in.seekg(static_cast(address)); + return parse_uint32(in); } -const std::uint8_t * -CompoundFileReader::sector_offset_to_address(const std::size_t sector, - const std::size_t offset) const { - if (sector >= MAX_REG_SECT || offset >= m_sector_size || - m_buffer_len <= static_cast(m_sector_size) * sector + - m_sector_size + offset) { +std::uint64_t CompoundFileReader::sector_offset_to_address( + const SectorOffset §or_offset) const { + const std::uint64_t address = sector_offset.offset + m_sector_size + + sector_offset.sector * m_sector_size; + + if (sector_offset.sector >= MaxSector || + sector_offset.offset >= m_sector_size || address >= m_file_size) { throw CfbFileCorrupted(); } - return m_buffer + m_sector_size + m_sector_size * sector + offset; + return address; } -const std::uint8_t * -CompoundFileReader::mini_sector_offset_to_address(std::size_t sector, - std::size_t offset) const { - if (sector >= MAX_REG_SECT || offset >= m_mini_sector_size || - m_buffer_len <= - static_cast(m_mini_sector_size) * sector + offset) { +std::uint64_t CompoundFileReader::mini_sector_offset_to_address( + std::istream &in, const SectorOffset §or_offset) const { + const std::uint64_t address = + sector_offset.offset + sector_offset.sector * m_mini_sector_size; + + if (sector_offset.sector >= MaxSector || + sector_offset.offset >= m_mini_sector_size || address >= m_file_size) { throw CfbFileCorrupted(); } - locate_final_sector(m_mini_stream_start_sector, - sector * m_mini_sector_size + offset, §or, &offset); - return sector_offset_to_address(sector, offset); + return sector_offset_to_address( + normalize_sector_offset(in, {m_mini_stream_start_sector, address})); } -void CompoundFileReader::locate_final_sector(std::size_t sector, - std::size_t offset, - std::size_t *final_sector, - std::size_t *final_offset) const { - while (offset >= m_sector_size) { - offset -= m_sector_size; - sector = get_next_sector(sector); +CompoundFileReader::SectorOffset +CompoundFileReader::normalize_sector_offset(std::istream &in, + SectorOffset sector_offset) const { + while (sector_offset.offset >= m_sector_size) { + sector_offset.offset -= m_sector_size; + sector_offset.sector = resolve_next_sector(in, sector_offset.sector); } - *final_sector = sector; - *final_offset = offset; + return sector_offset; } -void CompoundFileReader::locate_final_mini_sector( - std::size_t sector, std::size_t offset, std::size_t *final_sector, - std::size_t *final_offset) const { - while (offset >= m_mini_sector_size) { - offset -= m_mini_sector_size; - sector = get_next_mini_sector(sector); +CompoundFileReader::SectorOffset +CompoundFileReader::normalize_mini_sector_offset( + std::istream &in, SectorOffset sector_offset) const { + while (sector_offset.offset >= m_mini_sector_size) { + sector_offset.offset -= m_mini_sector_size; + sector_offset.sector = resolve_next_mini_sector(in, sector_offset.sector); } - *final_sector = sector; - *final_offset = offset; + return sector_offset; } -std::size_t CompoundFileReader::get_fat_sector_location( - std::size_t fat_sector_number) const { +std::uint32_t CompoundFileReader::resolve_fat_sector_location( + std::istream &in, std::uint32_t fat_sector_number) const { if (fat_sector_number < 109) { - return m_hdr->header_difat[fat_sector_number]; + return m_header.header_difat[fat_sector_number]; } fat_sector_number -= 109; - const std::size_t entriesPerSector = m_sector_size / 4 - 1; - std::size_t difatSectorLocation = m_hdr->first_difat_sector_location; + const std::uint32_t entriesPerSector = m_sector_size / 4 - 1; + std::uint32_t difatSectorLocation = m_header.first_difat_sector_location; while (fat_sector_number >= entriesPerSector) { fat_sector_number -= entriesPerSector; - const std::uint8_t *addr = - sector_offset_to_address(difatSectorLocation, m_sector_size - 4); - difatSectorLocation = parse_uint32(addr); - } - return parse_uint32( - sector_offset_to_address(difatSectorLocation, fat_sector_number * 4)); -} - -PropertySet::PropertySet(const void *buffer, const std::size_t len, - const char *fmt_id) - : m_buffer{static_cast(buffer)}, m_buffer_len{len}, - m_hdr{static_cast(buffer)}, m_fmtid{fmt_id} { - if (m_buffer_len < sizeof(*m_hdr) || - m_buffer_len < sizeof(*m_hdr) + - (m_hdr->num_properties - 1) * - sizeof(m_hdr->property_identifier_and_offset[0])) { - throw CfbFileCorrupted(); - } -} - -const std::uint16_t * -PropertySet::get_string_property(const std::uint32_t property_id) { - for (std::uint32_t i = 0; i < m_hdr->num_properties; i++) { - if (m_hdr->property_identifier_and_offset[i].id == property_id) { - const std::uint32_t offset = - m_hdr->property_identifier_and_offset[i].offset; - if (m_buffer_len < offset + 8) { - throw CfbFileCorrupted(); - } - if (const std::uint32_t stringLengthInChar = - parse_uint32(m_buffer + offset + 4); - m_buffer_len < offset + 8 + stringLengthInChar * 2) { - throw CfbFileCorrupted(); - } - return reinterpret_cast(m_buffer + offset + 8); - } - } - - return nullptr; -} - -const char *PropertySet::get_fmt_id() { return m_fmtid; } - -PropertySetStream::PropertySetStream(const void *buffer, const std::size_t len) - : m_buffer{static_cast(buffer)}, m_buffer_len{len}, - m_hdr{static_cast(buffer)} { - if (m_buffer_len < sizeof(*m_hdr) || - m_buffer_len < sizeof(*m_hdr) + (m_hdr->num_property_sets - 1) * - sizeof(m_hdr->property_set_info[0])) { - throw CfbFileCorrupted(); - } -} - -std::size_t PropertySetStream::get_property_set_count() { - return m_hdr->num_property_sets; -} - -PropertySet PropertySetStream::get_property_set(const std::size_t index) { - if (index >= get_property_set_count()) { - throw CfbFileCorrupted(); - } - const std::uint32_t offset = m_hdr->property_set_info[index].offset; - if (m_buffer_len < offset + 4) { - throw CfbFileCorrupted(); - } - const std::uint32_t size = parse_uint32(m_buffer + offset); - if (m_buffer_len < offset + size) { - throw CfbFileCorrupted(); + const std::uint64_t address = + sector_offset_to_address({difatSectorLocation, m_sector_size - 4}); + in.seekg(static_cast(address)); + difatSectorLocation = parse_uint32(in); } - return {m_buffer + offset, size, m_hdr->property_set_info[index].fmtid}; + const std::uint64_t address = + sector_offset_to_address({difatSectorLocation, fat_sector_number * 4}); + in.seekg(static_cast(address)); + return parse_uint32(in); } } // namespace odr::internal::cfb::impl diff --git a/src/odr/internal/cfb/cfb_impl.hpp b/src/odr/internal/cfb/cfb_impl.hpp index 545f75e77..9ce8b17c3 100644 --- a/src/odr/internal/cfb/cfb_impl.hpp +++ b/src/odr/internal/cfb/cfb_impl.hpp @@ -1,11 +1,18 @@ #pragma once +#include + #include #include #include namespace odr::internal::cfb::impl { +using Sector = std::uint32_t; + +static constexpr std::uint32_t NullId = 0xFFFFFFFF; +static constexpr std::uint32_t RootId = 0; + #pragma pack(push, 1) struct CompoundFileHeader { @@ -30,7 +37,7 @@ struct CompoundFileHeader { }; struct CompoundFileEntry { - std::uint16_t name[32]; + char16_t name[32]; std::uint16_t name_len; std::uint8_t type; std::uint8_t color_flag; @@ -44,138 +51,113 @@ struct CompoundFileEntry { std::uint32_t start_sector_location; std::uint64_t size; - [[nodiscard]] bool is_property_stream() const; - [[nodiscard]] bool is_stream() const; -}; - -struct PropertySetStreamHeader { - std::uint8_t byte_order[2]; - std::uint16_t version; - std::uint32_t system_identifier; - std::uint8_t clsid[16]; - std::uint32_t num_property_sets; - struct { - char fmtid[16]; - std::uint32_t offset; - } property_set_info[1]; -}; - -struct PropertySetHeader { - std::uint32_t size; - std::uint32_t num_properties; - struct { - std::uint32_t id; - std::uint32_t offset; - } property_identifier_and_offset[1]; + [[nodiscard]] bool is_property_stream() const { + // defined in [MS-OLEPS] 2.23 "Property Set Stream and Storage Names" + return name[0] == 5; + } + [[nodiscard]] bool is_file() const { return type == 2; } + [[nodiscard]] bool is_directory() const { return !is_file(); } + [[nodiscard]] std::string get_name() const; }; #pragma pack(pop) -using EnumFilesCallback = - std::function; +void parse_header(std::istream &in, CompoundFileHeader &hdr); +void parse_entry(std::istream &in, CompoundFileEntry &entry); +CompoundFileEntry parse_entry(std::istream &in); class CompoundFileReader final { public: - CompoundFileReader(const void *buffer, std::size_t len); + using EnumFilesCallback = + std::function; + + static constexpr auto MAGIC = "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1"; + + explicit CompoundFileReader(std::istream &in, std::uint64_t file_size); + + [[nodiscard]] const CompoundFileHeader &get_file_header() const { + return m_header; + } + + [[nodiscard]] const CompoundFileEntry &get_root_entry() const { + return m_root; + } /// Get entry (directory or file) by its ID. /// Pass "0" to get the root directory entry. -- This is the start point to /// navigate the compound file. Use the returned object to access child /// entries. - [[nodiscard]] const CompoundFileEntry *get_entry(std::size_t entry_id) const; - - [[nodiscard]] const CompoundFileEntry *get_root_entry() const; + [[nodiscard]] CompoundFileEntry parse_entry(std::istream &in, + std::uint32_t entry_id) const; - [[nodiscard]] const CompoundFileHeader *get_file_info() const; + void parse_entry(std::istream &in, std::uint32_t entry_id, + CompoundFileEntry &entry) const; /// Get file(stream) data start with "offset". - /// The buffer must have enough space to store "len" bytes. Typically "len" is - /// derived by the steam length. - void read_file(const CompoundFileEntry *entry, std::size_t offset, - char *buffer, std::size_t len) const; + /// The buffer must have enough space to store "len" bytes. Typically, "len" + /// is derived by the steam length. + void read_file(std::istream &in, const CompoundFileEntry &entry, + std::uint64_t offset, char *buffer, std::uint64_t len) const; - void enum_files(const CompoundFileEntry *entry, int max_level, - const EnumFilesCallback &callback) const; + void visit_descendants(std::istream &in, const CompoundFileEntry &entry, + int max_level, + const EnumFilesCallback &callback) const; private: + struct SectorOffset final { + Sector sector; + std::uint64_t offset; + }; + + static constexpr Sector MaxSector = 0xFFFFFFFA; + // Enum entries with same level, including 'entry' itself - void enum_nodes(const CompoundFileEntry *entry, std::int32_t current_level, - std::int32_t max_level, const std::u16string &dir, - const EnumFilesCallback &callback) const; + void visit_descendants(std::istream &in, const CompoundFileEntry &entry, + std::int32_t current_level, std::int32_t max_level, + const std::u16string &dir, + const EnumFilesCallback &callback) const; - void read_stream(std::size_t sector, std::size_t offset, char *buffer, - std::size_t len) const; + void read_stream(std::istream &in, const SectorOffset §or_offset, + char *buffer, std::uint64_t length) const; // Same logic as "ReadStream" except that use MiniStream functions instead - void read_mini_stream(std::size_t sector, std::size_t offset, char *buffer, - std::size_t len) const; + void read_mini_stream(std::istream &in, const SectorOffset §or_offset, + char *buffer, std::uint64_t length) const; - [[nodiscard]] std::size_t get_next_sector(std::size_t sector) const; + [[nodiscard]] Sector resolve_next_sector(std::istream &in, + Sector sector) const; - [[nodiscard]] std::size_t get_next_mini_sector(std::size_t mini_sector) const; + [[nodiscard]] Sector resolve_next_mini_sector(std::istream &in, + Sector mini_sector) const; /// Get absolute address from sector and offset. - [[nodiscard]] const std::uint8_t * - sector_offset_to_address(std::size_t sector, std::size_t offset) const; + [[nodiscard]] std::uint64_t + sector_offset_to_address(const SectorOffset §or_offset) const; - [[nodiscard]] const std::uint8_t * - mini_sector_offset_to_address(std::size_t sector, std::size_t offset) const; + [[nodiscard]] std::uint64_t + mini_sector_offset_to_address(std::istream &in, + const SectorOffset §or_offset) const; /// Locate the final sector/offset when original offset expands multiple /// sectors - void locate_final_sector(std::size_t sector, std::size_t offset, - std::size_t *final_sector, - std::size_t *final_offset) const; - - void locate_final_mini_sector(std::size_t sector, std::size_t offset, - std::size_t *final_sector, - std::size_t *final_offset) const; - - [[nodiscard]] std::size_t - get_fat_sector_location(std::size_t fat_sector_number) const; - - const std::uint8_t *m_buffer; - std::size_t m_buffer_len; - - const CompoundFileHeader *m_hdr; - std::size_t m_sector_size; - std::size_t m_mini_sector_size; - std::size_t m_mini_stream_start_sector; -}; - -class PropertySet final { -public: - PropertySet(const void *buffer, std::size_t len, const char *fmt_id); - - /// return the string property in UTF-16 format - const std::uint16_t *get_string_property(std::uint32_t property_id); - - /// Note: Getting property of types other than "string" is not implemented - /// yet. - /// However most other types are simpler than string so can be easily - /// added. see [MS-OLEPS] - const char *get_fmt_id(); - -private: - const std::uint8_t *m_buffer; - std::size_t m_buffer_len; - const PropertySetHeader *m_hdr; - const char *m_fmtid; // 16 bytes -}; - -class PropertySetStream final { -public: - PropertySetStream(const void *buffer, std::size_t len); - - std::size_t get_property_set_count(); - - PropertySet get_property_set(std::size_t index); - -private: - const std::uint8_t *m_buffer; - std::size_t m_buffer_len; - const PropertySetStreamHeader *m_hdr; + [[nodiscard]] SectorOffset + normalize_sector_offset(std::istream &in, SectorOffset sector_offset) const; + + [[nodiscard]] SectorOffset + normalize_mini_sector_offset(std::istream &in, + SectorOffset sector_offset) const; + + [[nodiscard]] std::uint32_t + resolve_fat_sector_location(std::istream &in, + std::uint32_t fat_sector_number) const; + + std::uint64_t m_file_size{}; + CompoundFileHeader m_header{}; + CompoundFileEntry m_root{}; + std::uint64_t m_sector_size{512}; + std::uint64_t m_mini_sector_size{64}; + std::uint32_t m_mini_stream_start_sector{0}; }; } // namespace odr::internal::cfb::impl diff --git a/src/odr/internal/cfb/cfb_util.cpp b/src/odr/internal/cfb/cfb_util.cpp index 745d67910..b8ddd73f7 100644 --- a/src/odr/internal/cfb/cfb_util.cpp +++ b/src/odr/internal/cfb/cfb_util.cpp @@ -4,6 +4,7 @@ #include #include +#include #include #include @@ -14,35 +15,33 @@ namespace { class ReaderBuffer final : public std::streambuf { public: ReaderBuffer(const impl::CompoundFileReader &reader, - const impl::CompoundFileEntry &entry) - : m_reader{reader}, m_entry{entry}, m_buffer{new char[m_buffer_size]} {} - ReaderBuffer(const ReaderBuffer &) = delete; - ReaderBuffer(ReaderBuffer &&other) noexcept = delete; - ~ReaderBuffer() override { delete[] m_buffer; } - - ReaderBuffer &operator=(const ReaderBuffer &) = delete; - ReaderBuffer &operator=(ReaderBuffer &&other) noexcept = delete; + const impl::CompoundFileEntry &entry, + std::unique_ptr stream, + const std::uint32_t buffer_size = 4098) + : m_reader{&reader}, m_entry{&entry}, m_stream{std::move(stream)}, + m_buffer(buffer_size, '\0') {} int underflow() override { - const std::uint64_t remaining = m_entry.size - m_offset; - if (remaining <= 0) { - return std::char_traits::eof(); + if (m_offset >= m_entry->size) { + return traits_type::eof(); } - const std::uint64_t amount = std::min(remaining, m_buffer_size); - m_reader.read_file(&m_entry, m_offset, m_buffer, amount); + const std::uint64_t remaining = m_entry->size - m_offset; + const std::uint64_t amount = + std::min(remaining, m_buffer.size()); + m_reader->read_file(*m_stream, *m_entry, m_offset, m_buffer.data(), amount); m_offset += amount; - setg(m_buffer, m_buffer, m_buffer + amount); + setg(m_buffer.data(), m_buffer.data(), m_buffer.data() + amount); - return std::char_traits::to_int_type(*gptr()); + return traits_type::to_int_type(*gptr()); } private: - const impl::CompoundFileReader &m_reader; - const impl::CompoundFileEntry &m_entry; + const impl::CompoundFileReader *m_reader{}; + const impl::CompoundFileEntry *m_entry{}; + std::unique_ptr m_stream; std::uint64_t m_offset{0}; - std::uint64_t m_buffer_size{4098}; - char *m_buffer; + std::vector m_buffer; }; class FileInCfbIstream final : public std::istream { @@ -51,11 +50,12 @@ class FileInCfbIstream final : public std::istream { std::unique_ptr sbuf) : std::istream(sbuf.get()), m_archive{std::move(archive)}, m_sbuf{std::move(sbuf)} {} - FileInCfbIstream(std::shared_ptr archive, + FileInCfbIstream(const std::shared_ptr &archive, const impl::CompoundFileReader &reader, const impl::CompoundFileEntry &entry) - : FileInCfbIstream(std::move(archive), - std::make_unique(reader, entry)) {} + : FileInCfbIstream(archive, + std::make_unique( + reader, entry, archive->file()->stream())) {} private: std::shared_ptr m_archive; @@ -85,51 +85,49 @@ class FileInCfb final : public abstract::File { private: std::shared_ptr m_archive; - const impl::CompoundFileEntry &m_entry; + impl::CompoundFileEntry m_entry; }; } // namespace -bool Archive::Entry::is_file() const { return m_entry->is_stream(); } - -bool Archive::Entry::is_directory() const { return !m_entry->is_stream(); } - -AbsPath Archive::Entry::path() const { return m_path; } +Archive::Archive(std::shared_ptr file) + : m_file{std::move(file)}, m_cfb{*m_file->stream(), m_file->size()} {} std::unique_ptr Archive::Entry::file() const { if (!is_file()) { return {}; } - return std::make_unique(m_parent->shared_from_this(), *m_entry); -} - -std::string Archive::Entry::name() const { - return internal::util::string::c16str_to_string( - reinterpret_cast(m_entry->name), m_entry->name_len - 2); + return std::make_unique(m_archive->shared_from_this(), m_entry); } std::optional Archive::Entry::left() const { - const auto *left = m_parent->cfb().get_entry(m_entry->left_sibling_id); - if (left == nullptr) { + if (m_entry.left_sibling_id == impl::NullId) { return {}; } - return Entry(*m_parent, *left, m_path.parent()); + const impl::CompoundFileEntry left = m_archive->m_cfb.parse_entry( + *m_archive->m_file->stream(), m_entry.left_sibling_id); + return Entry(*m_archive, m_entry.left_sibling_id, left, + m_path.parent().join(RelPath(left.get_name()))); } std::optional Archive::Entry::right() const { - const auto *right = m_parent->cfb().get_entry(m_entry->right_sibling_id); - if (right == nullptr) { + if (m_entry.right_sibling_id == impl::NullId) { return {}; } - return Entry(*m_parent, *right, m_path.parent()); + const impl::CompoundFileEntry right = m_archive->m_cfb.parse_entry( + *m_archive->m_file->stream(), m_entry.right_sibling_id); + return Entry(*m_archive, m_entry.right_sibling_id, right, + m_path.parent().join(RelPath(right.get_name()))); } std::optional Archive::Entry::child() const { - const auto *child = m_parent->cfb().get_entry(m_entry->child_id); - if (child == nullptr) { + if (m_entry.child_id == impl::NullId) { return {}; } - return Entry(*m_parent, *child, m_path); + const impl::CompoundFileEntry child = m_archive->m_cfb.parse_entry( + *m_archive->m_file->stream(), m_entry.child_id); + return Entry(*m_archive, m_entry.child_id, child, + m_path.join(RelPath(child.get_name()))); } void Archive::Iterator::dig_left_() { @@ -189,20 +187,19 @@ void Archive::Iterator::next_flat_() { m_entry = {}; } -Archive::Archive(const std::shared_ptr &file) - : m_file{file}, m_cfb{file->content().data(), file->content().size()} {} - const impl::CompoundFileReader &Archive::cfb() const { return m_cfb; } std::shared_ptr Archive::file() const { return m_file; } -Archive::Iterator Archive::begin() const { - return {*this, *m_cfb.get_root_entry()}; +Archive::Entry Archive::root() const { + return {*this, impl::RootId, m_cfb.get_root_entry(), RelPath("")}; } -Archive::Iterator Archive::end() const { return {}; } +Archive::Iterator Archive::begin() const { return Iterator::begin(root()); } + +Archive::Iterator Archive::end() const { return Iterator::end(); } -Archive::Iterator Archive::find(const AbsPath &path) const { +Archive::Iterator Archive::find(const RelPath &path) const { return std::find_if(begin(), end(), [&path](const Entry &entry) { return entry.path() == path; }); diff --git a/src/odr/internal/cfb/cfb_util.hpp b/src/odr/internal/cfb/cfb_util.hpp index 3eaf8a770..d7f314691 100644 --- a/src/odr/internal/cfb/cfb_util.hpp +++ b/src/odr/internal/cfb/cfb_util.hpp @@ -10,9 +10,9 @@ #include #include -namespace odr::internal { -class MemoryFile; -} // namespace odr::internal +namespace odr::abstract { +class File; +} // namespace odr::abstract namespace odr::internal::cfb::impl { class CompoundFileReader; @@ -23,54 +23,53 @@ namespace odr::internal::cfb::util { class Archive final : public std::enable_shared_from_this { public: - explicit Archive(const std::shared_ptr &file); + explicit Archive(std::shared_ptr file); [[nodiscard]] const impl::CompoundFileReader &cfb() const; - [[nodiscard]] std::shared_ptr file() const; + class Entry; + + Entry root() const; + class Iterator; [[nodiscard]] Iterator begin() const; [[nodiscard]] Iterator end() const; - [[nodiscard]] Iterator find(const AbsPath &path) const; + [[nodiscard]] Iterator find(const RelPath &path) const; class Entry { public: Entry(const Entry &) = default; Entry(Entry &&) noexcept = default; - Entry(const Archive &parent, const impl::CompoundFileEntry &entry) - : m_parent{&parent}, m_entry{&entry}, m_path{"/"} {} - Entry(const Archive &parent, const impl::CompoundFileEntry &entry, - const AbsPath &parent_path) - : m_parent{&parent}, m_entry{&entry}, - m_path{parent_path.join(RelPath(name()))} {} + Entry(const Archive &archive, const std::uint32_t entry_id, + const impl::CompoundFileEntry &entry, RelPath path) + : m_archive{&archive}, m_entry_id{entry_id}, m_entry{entry}, + m_path{std::move(path)} {} ~Entry() = default; Entry &operator=(const Entry &) = default; Entry &operator=(Entry &&) noexcept = default; bool operator==(const Entry &other) const { - return m_entry == other.m_entry; - } - bool operator!=(const Entry &other) const { - return m_entry != other.m_entry; + return m_entry_id == other.m_entry_id; } - [[nodiscard]] bool is_file() const; - [[nodiscard]] bool is_directory() const; - [[nodiscard]] AbsPath path() const; + [[nodiscard]] bool is_file() const { return m_entry.is_file(); } + [[nodiscard]] bool is_directory() const { return m_entry.is_directory(); } + [[nodiscard]] RelPath path() const { return m_path; } [[nodiscard]] std::unique_ptr file() const; - [[nodiscard]] std::string name() const; + [[nodiscard]] std::string name() const { return m_entry.get_name(); } [[nodiscard]] std::optional left() const; [[nodiscard]] std::optional right() const; [[nodiscard]] std::optional child() const; private: - const Archive *m_parent; - const impl::CompoundFileEntry *m_entry; - AbsPath m_path; + const Archive *m_archive{}; + std::uint32_t m_entry_id{impl::NullId}; + impl::CompoundFileEntry m_entry{}; + RelPath m_path; friend Iterator; }; @@ -83,16 +82,8 @@ class Archive final : public std::enable_shared_from_this { using pointer = const Entry *; using reference = const Entry &; - Iterator() = default; - Iterator(const Archive &parent, const impl::CompoundFileEntry &entry) - : m_entry{Entry(parent, entry)} { - dig_left_(); - } - Iterator(const Archive &parent, const impl::CompoundFileEntry &entry, - const AbsPath &parent_path) - : m_entry{Entry(parent, entry, parent_path)} { - dig_left_(); - } + static Iterator begin(const Entry &entry) { return Iterator(entry); } + static Iterator end() { return {}; } [[nodiscard]] reference operator*() const { return *m_entry; } [[nodiscard]] pointer operator->() const { return &*m_entry; } @@ -116,6 +107,11 @@ class Archive final : public std::enable_shared_from_this { std::vector m_ancestors; std::vector m_directories; + Iterator() = default; + explicit Iterator(const Entry &root_entry) : m_entry{root_entry} { + dig_left_(); + } + void dig_left_(); void next_(); void next_flat_(); diff --git a/src/odr/internal/common/path.cpp b/src/odr/internal/common/path.cpp index 0f106c3b2..cf2475e65 100644 --- a/src/odr/internal/common/path.cpp +++ b/src/odr/internal/common/path.cpp @@ -108,8 +108,10 @@ std::size_t Path::hash() const noexcept { return std::hash{}(m_path); } +bool Path::empty() const noexcept { return m_path.empty(); } + bool Path::root() const noexcept { - return (m_upwards == 0) && (m_downwards == 0); + return m_absolute && (m_upwards == 0) && (m_downwards == 0); } bool Path::absolute() const noexcept { return m_absolute; } @@ -189,6 +191,9 @@ Path Path::parent() const { } Path Path::join(const RelPath &b) const { + if (empty()) { + return b; + } if (root()) { return Path("/" + b.m_path); } diff --git a/src/odr/internal/common/path.hpp b/src/odr/internal/common/path.hpp index 466c89fc2..906aafdb1 100644 --- a/src/odr/internal/common/path.hpp +++ b/src/odr/internal/common/path.hpp @@ -33,6 +33,7 @@ class Path { [[nodiscard]] std::filesystem::path path() const noexcept; [[nodiscard]] std::size_t hash() const noexcept; + [[nodiscard]] bool empty() const noexcept; [[nodiscard]] bool root() const noexcept; [[nodiscard]] bool absolute() const noexcept; [[nodiscard]] bool relative() const noexcept; diff --git a/src/odr/internal/oldms_wvware/wvware_oldms_file.cpp b/src/odr/internal/oldms_wvware/wvware_oldms_file.cpp index 34e1f6818..255d57d27 100644 --- a/src/odr/internal/oldms_wvware/wvware_oldms_file.cpp +++ b/src/odr/internal/oldms_wvware/wvware_oldms_file.cpp @@ -20,14 +20,20 @@ struct WvWareLegacyMicrosoftFile::ParserState { }; WvWareLegacyMicrosoftFile::WvWareLegacyMicrosoftFile( - std::shared_ptr file) + std::shared_ptr file) : m_file{std::move(file)} { GError *error = nullptr; m_parser_state = std::make_shared(); - m_parser_state->gsf_input = - gsf_input_stdio_new(m_file->disk_path()->string().c_str(), &error); + if (m_file->disk_path().has_value()) { + m_parser_state->gsf_input = + gsf_input_stdio_new(m_file->disk_path()->string().c_str(), &error); + } else if (m_file->memory_data() != nullptr) { + m_parser_state->gsf_input = gsf_input_memory_new( + reinterpret_cast(m_file->memory_data()), + static_cast(m_file->size()), false); + } if (m_parser_state->gsf_input == nullptr) { throw std::runtime_error("gsf_input_stdio_new failed"); @@ -36,18 +42,6 @@ WvWareLegacyMicrosoftFile::WvWareLegacyMicrosoftFile( open(); } -WvWareLegacyMicrosoftFile::WvWareLegacyMicrosoftFile( - std::shared_ptr file) - : m_file{std::move(file)} { - m_parser_state = std::make_shared(); - - m_parser_state->gsf_input = gsf_input_memory_new( - reinterpret_cast(m_file->memory_data()), - static_cast(m_file->size()), false); - - open(); -} - void WvWareLegacyMicrosoftFile::open() { wvInit(); diff --git a/src/odr/internal/oldms_wvware/wvware_oldms_file.hpp b/src/odr/internal/oldms_wvware/wvware_oldms_file.hpp index b7b4d6150..b090bedcf 100644 --- a/src/odr/internal/oldms_wvware/wvware_oldms_file.hpp +++ b/src/odr/internal/oldms_wvware/wvware_oldms_file.hpp @@ -9,17 +9,11 @@ struct _wvParseStruct; using wvParseStruct = struct _wvParseStruct; -namespace odr::internal { -class DiskFile; -class MemoryFile; -} // namespace odr::internal - namespace odr::internal { class WvWareLegacyMicrosoftFile final : public abstract::DocumentFile { public: - explicit WvWareLegacyMicrosoftFile(std::shared_ptr file); - explicit WvWareLegacyMicrosoftFile(std::shared_ptr file); + explicit WvWareLegacyMicrosoftFile(std::shared_ptr file); [[nodiscard]] std::shared_ptr file() const noexcept override; diff --git a/src/odr/internal/ooxml/ooxml_crypto.cpp b/src/odr/internal/ooxml/ooxml_crypto.cpp index 007ca2f51..243d3e3bc 100644 --- a/src/odr/internal/ooxml/ooxml_crypto.cpp +++ b/src/odr/internal/ooxml/ooxml_crypto.cpp @@ -3,41 +3,14 @@ #include #include +#include #include #include #include #include -namespace { -template void to_little_endian(I in, O &out) { - for (unsigned int i = 0; i < sizeof(in); ++i) { - out[i] = in & 0xff; - in >>= 8; - } -} - -template void to_big_endian(I in, O &out) { - for (int i = sizeof(in) - 1; i >= 0; --i) { - out[i] = in & 0xff; - in >>= 8; - } -} - -std::string xor_bytes(const std::string &a, const std::string &b) { - if (a.size() != b.size()) { - throw std::invalid_argument("a.size() != b.size()"); - } - - std::string result(a.size(), ' '); - - for (std::size_t i = 0; i < result.size(); ++i) { - result[i] = static_cast(a[i] ^ b[i]); - } - - return result; -} -} // namespace +namespace {} // namespace namespace odr::internal::ooxml::crypto { @@ -86,10 +59,10 @@ ECMA376Standard::derive_key(const std::string &password) const noexcept { password_u16_bytes); std::string ibytes(4, ' '); for (std::uint32_t i = 0; i < ITER_COUNT; ++i) { - to_little_endian(i, ibytes); + util::byte::to_little_endian(i, ibytes); hash = internal::crypto::util::sha1(ibytes + hash); } - to_little_endian(static_cast(0), ibytes); + util::byte::to_little_endian(static_cast(0), ibytes); hash = internal::crypto::util::sha1(hash + ibytes); } @@ -100,10 +73,12 @@ ECMA376Standard::derive_key(const std::string &password) const noexcept { constexpr std::uint32_t cb_hash = 20; std::string buf1(64, '\x36'); - buf1 = xor_bytes(hash, buf1.substr(0, cb_hash)) + buf1.substr(cb_hash); + buf1 = util::byte::xor_bytes(hash, buf1.substr(0, cb_hash)) + + buf1.substr(cb_hash); const auto x1 = internal::crypto::util::sha1(buf1); std::string buf2(64, '\x5c'); - buf2 = xor_bytes(hash, buf2.substr(0, cb_hash)) + buf2.substr(cb_hash); + buf2 = util::byte::xor_bytes(hash, buf2.substr(0, cb_hash)) + + buf2.substr(cb_hash); const auto x2 = internal::crypto::util::sha1(buf2); const auto x3 = x1 + x2; result = x3.substr(0, cb_required_key_length); diff --git a/src/odr/internal/open_strategy.cpp b/src/odr/internal/open_strategy.cpp index 7fdef14aa..58a8ca4e5 100644 --- a/src/odr/internal/open_strategy.cpp +++ b/src/odr/internal/open_strategy.cpp @@ -59,13 +59,10 @@ open_strategy::list_file_types(const std::shared_ptr &file, ODR_VERBOSE(logger, "magic determined file type " << file_type_to_string(file_type)); - // TODO if `file` is in memory we would copy it unnecessarily - auto memory_file = std::make_shared(*file); - if (file_type == FileType::zip) { ODR_VERBOSE(logger, "open as zip"); - zip::ZipFile zip_file(memory_file); + zip::ZipFile zip_file(file); result.push_back(FileType::zip); auto filesystem = zip_file.archive()->as_filesystem(); @@ -86,7 +83,7 @@ open_strategy::list_file_types(const std::shared_ptr &file, } else if (file_type == FileType::compound_file_binary_format) { ODR_VERBOSE(logger, "open as cbf"); - cfb::CfbFile cfb_file(memory_file); + cfb::CfbFile cfb_file(file); result.push_back(FileType::compound_file_binary_format); auto filesystem = cfb_file.archive()->as_filesystem(); @@ -107,7 +104,7 @@ open_strategy::list_file_types(const std::shared_ptr &file, } else if (file_type == FileType::starview_metafile) { try { ODR_VERBOSE(logger, "try open as svm"); - result.push_back(svm::SvmFile(memory_file).file_type()); + result.push_back(svm::SvmFile(file).file_type()); } catch (...) { ODR_VERBOSE(logger, "failed to open as svm"); } @@ -139,7 +136,7 @@ open_strategy::list_file_types(const std::shared_ptr &file, #ifdef ODR_WITH_PDF2HTMLEX try { ODR_VERBOSE(logger, "try open as pdf with poppler"); - result.push_back(PopplerPdfFile(memory_file).file_type()); + result.push_back(PopplerPdfFile(file).file_type()); } catch (...) { ODR_VERBOSE(logger, "failed to open as pdf with poppler"); } @@ -149,7 +146,7 @@ open_strategy::list_file_types(const std::shared_ptr &file, #ifdef ODR_WITH_WVWARE try { ODR_VERBOSE(logger, "try open as legacy ms with wvware"); - result.push_back(WvWareLegacyMicrosoftFile(memory_file).file_type()); + result.push_back(WvWareLegacyMicrosoftFile(file).file_type()); } catch (...) { ODR_VERBOSE(logger, "failed to open as legacy ms with wvware"); } @@ -180,18 +177,16 @@ open_strategy::list_decoder_engines(const FileType as) { } std::unique_ptr -open_strategy::open_file(std::shared_ptr file, Logger &logger) { +open_strategy::open_file(const std::shared_ptr &file, + Logger &logger) { auto file_type = magic::file_type(*file); ODR_VERBOSE(logger, "magic determined file type " << file_type_to_string(file_type)); - // TODO if `file` is in memory we would copy it unnecessarily - auto memory_file = std::make_shared(*file); - if (file_type == FileType::zip) { ODR_VERBOSE(logger, "open as zip"); - auto zip_file = std::make_unique(std::move(memory_file)); + auto zip_file = std::make_unique(file); auto filesystem = zip_file->archive()->as_filesystem(); @@ -214,7 +209,7 @@ open_strategy::open_file(std::shared_ptr file, Logger &logger) { if (file_type == FileType::compound_file_binary_format) { ODR_VERBOSE(logger, "open as cbf"); - auto cfb_file = std::make_unique(std::move(memory_file)); + auto cfb_file = std::make_unique(file); auto filesystem = cfb_file->archive()->as_filesystem(); @@ -246,7 +241,7 @@ open_strategy::open_file(std::shared_ptr file, Logger &logger) { } if (file_type == FileType::starview_metafile) { ODR_VERBOSE(logger, "open as svm"); - return std::make_unique(memory_file); + return std::make_unique(file); } if (file_type == FileType::unknown) { ODR_VERBOSE(logger, "handle unknown file type"); @@ -281,7 +276,7 @@ open_strategy::open_file(std::shared_ptr file, Logger &logger) { #ifdef ODR_WITH_PDF2HTMLEX try { ODR_VERBOSE(logger, "try open as pdf with poppler"); - return std::make_unique(memory_file); + return std::make_unique(file); } catch (...) { ODR_VERBOSE(logger, "failed to open as pdf with poppler"); } @@ -296,16 +291,16 @@ open_strategy::open_file(std::shared_ptr file, Logger &logger) { } std::unique_ptr -open_strategy::open_file(std::shared_ptr file, FileType as, - Logger &logger) { +open_strategy::open_file(const std::shared_ptr &file, + FileType as, Logger &logger) { DecodePreference preference; preference.as_file_type = as; - return open_file(std::move(file), preference, logger); + return open_file(file, preference, logger); } std::unique_ptr -open_strategy::open_file(std::shared_ptr file, FileType as, - DecoderEngine with, Logger &logger) { +open_strategy::open_file(const std::shared_ptr &file, + FileType as, DecoderEngine with, Logger &logger) { if (as == FileType::opendocument_text || as == FileType::opendocument_presentation || as == FileType::opendocument_spreadsheet || @@ -314,8 +309,7 @@ open_strategy::open_file(std::shared_ptr file, FileType as, if (with == DecoderEngine::odr) { ODR_VERBOSE(logger, "using odr engine"); try { - auto memory_file = std::make_shared(*file); - auto zip_file = std::make_unique(std::move(memory_file)); + auto zip_file = std::make_unique(file); auto filesystem = zip_file->archive()->as_filesystem(); return std::make_unique(filesystem); } catch (...) { @@ -336,16 +330,14 @@ open_strategy::open_file(std::shared_ptr file, FileType as, if (with == DecoderEngine::odr) { ODR_VERBOSE(logger, "using odr engine"); try { - auto memory_file = std::make_shared(*file); - auto zip_file = std::make_unique(std::move(memory_file)); + auto zip_file = std::make_unique(file); auto filesystem = zip_file->archive()->as_filesystem(); return std::make_unique(filesystem); } catch (...) { ODR_VERBOSE(logger, "failed to open as ooxml zip with odr engine"); } try { - auto memory_file = std::make_shared(*file); - auto cfb_file = std::make_unique(std::move(memory_file)); + auto cfb_file = std::make_unique(file); auto filesystem = cfb_file->archive()->as_filesystem(); return std::make_unique(filesystem); } catch (...) { @@ -365,8 +357,7 @@ open_strategy::open_file(std::shared_ptr file, FileType as, if (with == DecoderEngine::odr) { ODR_VERBOSE(logger, "using odr engine"); try { - auto memory_file = std::make_shared(*file); - auto cfb_file = std::make_unique(std::move(memory_file)); + auto cfb_file = std::make_unique(file); auto filesystem = cfb_file->archive()->as_filesystem(); return std::make_unique(filesystem); } catch (...) { @@ -378,9 +369,7 @@ open_strategy::open_file(std::shared_ptr file, FileType as, if (with == DecoderEngine::wvware) { ODR_VERBOSE(logger, "using wvware engine"); try { - auto memory_file = std::make_shared(*file); - return std::make_unique( - std::move(memory_file)); + return std::make_unique(file); } catch (...) { ODR_VERBOSE(logger, "failed to open as legacy ms with wvware engine"); } @@ -407,8 +396,7 @@ open_strategy::open_file(std::shared_ptr file, FileType as, if (with == DecoderEngine::poppler) { ODR_VERBOSE(logger, "using poppler engine"); try { - auto memory_file = std::make_shared(*file); - return std::make_unique(memory_file); + return std::make_unique(file); } catch (...) { ODR_VERBOSE(logger, "failed to open as pdf with poppler engine"); } @@ -443,8 +431,7 @@ open_strategy::open_file(std::shared_ptr file, FileType as, if (with == DecoderEngine::odr) { ODR_VERBOSE(logger, "using odr engine"); try { - auto memory_file = std::make_shared(*file); - return std::make_unique(memory_file); + return std::make_unique(file); } catch (...) { ODR_VERBOSE(logger, "failed to open as svm with odr engine"); } @@ -510,8 +497,7 @@ open_strategy::open_file(std::shared_ptr file, FileType as, if (with == DecoderEngine::odr) { ODR_VERBOSE(logger, "using odr engine"); try { - auto memory_file = std::make_shared(*file); - return std::make_unique(memory_file); + return std::make_unique(file); } catch (...) { ODR_VERBOSE(logger, "failed to open as zip with odr engine"); } @@ -527,8 +513,7 @@ open_strategy::open_file(std::shared_ptr file, FileType as, if (with == DecoderEngine::odr) { ODR_VERBOSE(logger, "using odr engine"); try { - auto memory_file = std::make_shared(*file); - return std::make_unique(memory_file); + return std::make_unique(file); } catch (...) { ODR_VERBOSE(logger, "failed to open as cfb with odr engine"); } @@ -546,7 +531,7 @@ open_strategy::open_file(std::shared_ptr file, FileType as, } std::unique_ptr -open_strategy::open_file(std::shared_ptr file, +open_strategy::open_file(const std::shared_ptr &file, const DecodePreference &preference, Logger &logger) { std::vector probe_types; if (preference.as_file_type.has_value()) { @@ -607,7 +592,7 @@ open_strategy::open_file(std::shared_ptr file, } std::unique_ptr -open_strategy::open_document_file(std::shared_ptr file, +open_strategy::open_document_file(const std::shared_ptr &file, Logger &logger) { auto file_type = magic::file_type(*file); ODR_VERBOSE(logger, @@ -616,10 +601,7 @@ open_strategy::open_document_file(std::shared_ptr file, if (file_type == FileType::zip) { ODR_VERBOSE(logger, "open as zip"); - // TODO if `file` is in memory we would copy it unnecessarily - auto memory_file = std::make_shared(*file); - - auto zip_file = std::make_unique(std::move(memory_file)); + auto zip_file = std::make_unique(file); auto filesystem = zip_file->archive()->as_filesystem(); @@ -639,10 +621,7 @@ open_strategy::open_document_file(std::shared_ptr file, } else if (file_type == FileType::compound_file_binary_format) { ODR_VERBOSE(logger, "open as cbf"); - // TODO if `file` is in memory we would copy it unnecessarily - auto memory_file = std::make_unique(*file); - - auto cfb_file = std::make_unique(std::move(memory_file)); + auto cfb_file = std::make_unique(file); auto filesystem = cfb_file->archive()->as_filesystem(); diff --git a/src/odr/internal/open_strategy.hpp b/src/odr/internal/open_strategy.hpp index 6668f6fc4..24ef93ef8 100644 --- a/src/odr/internal/open_strategy.hpp +++ b/src/odr/internal/open_strategy.hpp @@ -21,22 +21,25 @@ class Path; } // namespace odr::internal::common namespace odr::internal::open_strategy { + std::vector list_file_types(const std::shared_ptr &file, Logger &logger); std::vector list_decoder_engines(FileType as); std::unique_ptr -open_file(std::shared_ptr file, Logger &logger); +open_file(const std::shared_ptr &file, Logger &logger); std::unique_ptr -open_file(std::shared_ptr file, FileType as, Logger &logger); +open_file(const std::shared_ptr &file, FileType as, + Logger &logger); std::unique_ptr -open_file(std::shared_ptr file, FileType as, DecoderEngine with, - Logger &logger); +open_file(const std::shared_ptr &file, FileType as, + DecoderEngine with, Logger &logger); std::unique_ptr -open_file(std::shared_ptr file, +open_file(const std::shared_ptr &file, const DecodePreference &preference, Logger &logger); std::unique_ptr -open_document_file(std::shared_ptr file, Logger &logger); +open_document_file(const std::shared_ptr &file, Logger &logger); + } // namespace odr::internal::open_strategy diff --git a/src/odr/internal/pdf_poppler/poppler_pdf_file.cpp b/src/odr/internal/pdf_poppler/poppler_pdf_file.cpp index 82ade1c8f..c5e21c3ee 100644 --- a/src/odr/internal/pdf_poppler/poppler_pdf_file.cpp +++ b/src/odr/internal/pdf_poppler/poppler_pdf_file.cpp @@ -8,12 +8,7 @@ namespace odr::internal { -PopplerPdfFile::PopplerPdfFile(std::shared_ptr file) - : m_file{std::move(file)} { - open(std::nullopt); -} - -PopplerPdfFile::PopplerPdfFile(std::shared_ptr file) +PopplerPdfFile::PopplerPdfFile(std::shared_ptr file) : m_file{std::move(file)} { open(std::nullopt); } diff --git a/src/odr/internal/pdf_poppler/poppler_pdf_file.hpp b/src/odr/internal/pdf_poppler/poppler_pdf_file.hpp index 40367fcf4..39c79ffec 100644 --- a/src/odr/internal/pdf_poppler/poppler_pdf_file.hpp +++ b/src/odr/internal/pdf_poppler/poppler_pdf_file.hpp @@ -10,8 +10,7 @@ namespace odr::internal { class PopplerPdfFile final : public abstract::PdfFile { public: - explicit PopplerPdfFile(std::shared_ptr file); - explicit PopplerPdfFile(std::shared_ptr file); + explicit PopplerPdfFile(std::shared_ptr file); [[nodiscard]] std::shared_ptr file() const noexcept override; diff --git a/src/odr/internal/svm/svm_format.cpp b/src/odr/internal/svm/svm_format.cpp index 06e50793a..2d825793b 100644 --- a/src/odr/internal/svm/svm_format.cpp +++ b/src/odr/internal/svm/svm_format.cpp @@ -23,19 +23,19 @@ std::string svm::read_utf16_string(std::istream &in, } std::string svm::read_uint16_prefixed_ascii_string(std::istream &in) { - uint16_t length; + std::uint16_t length; read_primitive(in, length); return read_ascii_string(in, length); } std::string svm::read_uint32_prefixed_utf16_string(std::istream &in) { - uint32_t length; + std::uint32_t length; read_primitive(in, length); return read_utf16_string(in, length); } std::string svm::read_uint16_prefixed_utf16_string(std::istream &in) { - uint16_t length; + std::uint16_t length; read_primitive(in, length); return read_utf16_string(in, length); } diff --git a/src/odr/internal/svm/svm_to_svg.cpp b/src/odr/internal/svm/svm_to_svg.cpp index 62a85a13f..e2eba563a 100644 --- a/src/odr/internal/svm/svm_to_svg.cpp +++ b/src/odr/internal/svm/svm_to_svg.cpp @@ -43,9 +43,9 @@ double transform_y(const std::int32_t y, const Context &context) { } std::string get_svg_color_string(const std::uint32_t color) { - const uint8_t blue = color >> 0 & 0xff; - const uint8_t green = color >> 8 & 0xff; - const uint8_t red = color >> 16 & 0xff; + const std::uint8_t blue = color >> 0 & 0xff; + const std::uint8_t green = color >> 8 & 0xff; + const std::uint8_t red = color >> 16 & 0xff; return "rgb(" + std::to_string(red) + "," + std::to_string(green) + "," + std::to_string(blue) + ")"; } diff --git a/src/odr/internal/util/byte_stream_util.cpp b/src/odr/internal/util/byte_stream_util.cpp new file mode 100644 index 000000000..caaf44fac --- /dev/null +++ b/src/odr/internal/util/byte_stream_util.cpp @@ -0,0 +1,3 @@ +#include + +namespace odr::internal::util {} // namespace odr::internal::util diff --git a/src/odr/internal/util/byte_stream_util.hpp b/src/odr/internal/util/byte_stream_util.hpp new file mode 100644 index 000000000..542c04167 --- /dev/null +++ b/src/odr/internal/util/byte_stream_util.hpp @@ -0,0 +1,17 @@ +#pragma once + +#include + +namespace odr::internal::util::byte_stream { + +template void read(std::istream &in, T &out) { + in.read(reinterpret_cast(&out), sizeof(T)); +} + +template T read(std::istream &in) { + T out; + read(in, out); + return out; +} + +} // namespace odr::internal::util::byte_stream diff --git a/src/odr/internal/util/byte_util.cpp b/src/odr/internal/util/byte_util.cpp index 388d42c7f..dfb12adcd 100644 --- a/src/odr/internal/util/byte_util.cpp +++ b/src/odr/internal/util/byte_util.cpp @@ -1,5 +1,7 @@ #include +#include + namespace odr::internal::util { void byte::reverse_bytes(char16_t *string, const std::size_t length) { @@ -26,4 +28,18 @@ void byte::reverse_bytes(std::u32string &string) { } } +std::string byte::xor_bytes(const std::string &a, const std::string &b) { + if (a.size() != b.size()) { + throw std::invalid_argument("a.size() != b.size()"); + } + + std::string result(a.size(), ' '); + + for (std::size_t i = 0; i < result.size(); ++i) { + result[i] = static_cast(a[i] ^ b[i]); + } + + return result; +} + } // namespace odr::internal::util diff --git a/src/odr/internal/util/byte_util.hpp b/src/odr/internal/util/byte_util.hpp index 85299cb7d..98bdfbe28 100644 --- a/src/odr/internal/util/byte_util.hpp +++ b/src/odr/internal/util/byte_util.hpp @@ -18,4 +18,21 @@ void reverse_bytes(char32_t *string, std::size_t length); void reverse_bytes(std::u16string &string); void reverse_bytes(std::u32string &string); +template +void to_little_endian(I in, O &out) { + for (unsigned int i = 0; i < sizeof(in); ++i) { + out[i] = in & 0xff; + in >>= 8; + } +} + +template void to_big_endian(I in, O &out) { + for (int i = sizeof(in) - 1; i >= 0; --i) { + out[i] = in & 0xff; + in >>= 8; + } +} + +std::string xor_bytes(const std::string &a, const std::string &b); + } // namespace odr::internal::util::byte diff --git a/src/odr/internal/zip/zip_file.cpp b/src/odr/internal/zip/zip_file.cpp index 16bc64123..30c6a5474 100644 --- a/src/odr/internal/zip/zip_file.cpp +++ b/src/odr/internal/zip/zip_file.cpp @@ -5,11 +5,8 @@ namespace odr::internal::zip { -ZipFile::ZipFile(const std::shared_ptr &file) - : m_zip{std::make_shared(file)} {} - -ZipFile::ZipFile(const std::shared_ptr &file) - : m_zip{std::make_shared(file)} {} +ZipFile::ZipFile(std::shared_ptr file) + : m_zip{std::make_shared(std::move(file))} {} std::shared_ptr ZipFile::file() const noexcept { return m_zip->file(); diff --git a/src/odr/internal/zip/zip_file.hpp b/src/odr/internal/zip/zip_file.hpp index ba3c28ee5..4ba1d6bbd 100644 --- a/src/odr/internal/zip/zip_file.hpp +++ b/src/odr/internal/zip/zip_file.hpp @@ -7,11 +7,6 @@ enum class FileType; struct FileMeta; } // namespace odr -namespace odr::internal { -class MemoryFile; -class DiskFile; -} // namespace odr::internal - namespace odr::internal::zip { namespace util { class Archive; @@ -19,8 +14,7 @@ class Archive; class ZipFile final : public abstract::ArchiveFile { public: - explicit ZipFile(const std::shared_ptr &file); - explicit ZipFile(const std::shared_ptr &file); + explicit ZipFile(std::shared_ptr file); [[nodiscard]] std::shared_ptr file() const noexcept override; diff --git a/src/odr/internal/zip/zip_util.cpp b/src/odr/internal/zip/zip_util.cpp index 66000a1ad..422240a93 100644 --- a/src/odr/internal/zip/zip_util.cpp +++ b/src/odr/internal/zip/zip_util.cpp @@ -54,7 +54,7 @@ class ReaderBuffer final : public std::streambuf { int underflow() override { if (m_remaining <= 0) { - return std::char_traits::eof(); + return traits_type::eof(); } std::lock_guard lock(m_archive->mutex()); @@ -65,7 +65,7 @@ class ReaderBuffer final : public std::streambuf { m_remaining -= result; setg(m_buffer, m_buffer, m_buffer + result); - return std::char_traits::to_int_type(*gptr()); + return traits_type::to_int_type(*gptr()); } private: @@ -175,12 +175,6 @@ std::shared_ptr Archive::Entry::file() const { return std::make_shared(m_archive->shared_from_this(), m_index); } -Archive::Archive(const std::shared_ptr &file) - : Archive(std::dynamic_pointer_cast(file)) {} - -Archive::Archive(const std::shared_ptr &file) - : Archive(std::dynamic_pointer_cast(file)) {} - Archive::Archive(std::shared_ptr file) : m_file{std::move(file)} { if (m_file == nullptr) { diff --git a/src/odr/internal/zip/zip_util.hpp b/src/odr/internal/zip/zip_util.hpp index fec5347b4..866cd5663 100644 --- a/src/odr/internal/zip/zip_util.hpp +++ b/src/odr/internal/zip/zip_util.hpp @@ -27,8 +27,7 @@ enum class Method { class Archive final : public std::enable_shared_from_this { public: - explicit Archive(const std::shared_ptr &file); - explicit Archive(const std::shared_ptr &file); + explicit Archive(std::shared_ptr file); ~Archive(); [[nodiscard]] std::mutex &mutex() const; @@ -108,8 +107,6 @@ class Archive final : public std::enable_shared_from_this { mutable std::mutex m_mutex; mutable mz_zip_archive m_zip{}; - - explicit Archive(std::shared_ptr file); }; void open_from_file(mz_zip_archive &archive, const abstract::File &file, diff --git a/test/src/internal/cfb/cfb_archive_test.cpp b/test/src/internal/cfb/cfb_archive_test.cpp index 38e5170fa..d22a073b0 100644 --- a/test/src/internal/cfb/cfb_archive_test.cpp +++ b/test/src/internal/cfb/cfb_archive_test.cpp @@ -17,20 +17,19 @@ using namespace odr::internal::cfb; using namespace odr::test; TEST(CfbArchive, open_directory) { - EXPECT_ANY_THROW(CfbFile(std::make_shared(DiskFile("/")))); + EXPECT_ANY_THROW(CfbFile(std::make_shared("/"))); } TEST(CfbArchive, open_odt) { - EXPECT_THROW( - CfbFile(std::make_shared(DiskFile( - TestData::test_file_path("odr-public/odt/style-various-1.odt")))), - odr::NoCfbFile); + EXPECT_THROW(CfbFile(std::make_shared(TestData::test_file_path( + "odr-public/odt/style-various-1.odt"))), + odr::NoCfbFile); } TEST(CfbArchive, open_encrypted_docx) { - util::Archive cfb(std::make_shared( - DiskFile(TestData::test_file_path("odr-public/docx/encrypted.docx")))); + cfb::util::Archive cfb(std::make_shared( + TestData::test_file_path("odr-public/docx/encrypted.docx"))); - EXPECT_TRUE(cfb.find(AbsPath("/Encryption")) == std::end(cfb)); - EXPECT_TRUE(cfb.find(AbsPath("/EncryptionInfo")) != std::end(cfb)); + EXPECT_TRUE(cfb.find(RelPath("Encryption")) == std::end(cfb)); + EXPECT_TRUE(cfb.find(RelPath("EncryptionInfo")) != std::end(cfb)); }