diff --git a/.github/workflows/cmake-multi-platform.yml b/.github/workflows/cmake-multi-platform.yml
index c450330..edb085a 100644
--- a/.github/workflows/cmake-multi-platform.yml
+++ b/.github/workflows/cmake-multi-platform.yml
@@ -59,7 +59,7 @@ jobs:
         -DCMAKE_C_COMPILER=${{ matrix.c_compiler }}
         -DCMAKE_BUILD_TYPE=${{ matrix.build_type }}
         -DCMAKE_VERBOSE_MAKEFILE=ON
-        -S ${{ github.workspace }}/portable
+        -S ${{ github.workspace }}/src
 
     - name: Build
       # Build your program with the given configuration. Note that --config is needed because the default Windows generator is a multi-config generator (Visual Studio generator).
diff --git a/portable/BUILDING.txt b/portable/BUILDING.txt
deleted file mode 100644
index b16b2db..0000000
--- a/portable/BUILDING.txt
+++ /dev/null
@@ -1,35 +0,0 @@
-Building using CMake on Linux:
-
-% cmake -D CMAKE_BUILD_TYPE=Release -B build
-% make -C build
-
-CMAKE_BUILD_TYPE is one of: Debug, Release, RelWithDebInfo, MinSizeRel.
-
-
-To make a debug build:
-
-% cmake -D CMAKE_BUILD_TYPE=Debug -B build
-% make -C build
-
-
-To debug the build itself:
-
-% make -C build VERBOSE=1
-
-...will show individual commands being executed. Alternatively:
-
-% cmake -D CMAKE_VERBOSE_MAKEFILE=ON ...
-
-...will enable verbose builds by default.
-
-
-To build with Ninja instead of Make (which is faster):
-
-% cmake -D CMAKE_BUILD_TYPE=Release -G Ninja -B ninja-build
-% ninja -C ninja-build
-
-
-To cross-compile from Linux to Windows:
-
-% cmake -D CMAKE_BUILD_TYPE=Release -D CMAKE_TOOLCHAIN_FILE=mingw-w64-x86_64.cmake -B build-windows
-% make -C build-windows
diff --git a/windows/Acknowledgement.txt b/src/Acknowledgement.txt
similarity index 93%
rename from windows/Acknowledgement.txt
rename to src/Acknowledgement.txt
index 2e0ee08..a9cb501 100644
--- a/windows/Acknowledgement.txt
+++ b/src/Acknowledgement.txt
@@ -3,7 +3,7 @@
 
 https://github.com/Parchive/par3cmdline
 
-Copyright (C) 2022 Yutaka Sawada.
+Copyright (C) 2022-2025 Yutaka Sawada.
 
 par3cmdline comes with ABSOLUTELY NO WARRANTY.
 
@@ -13,6 +13,12 @@ by the Free Software Foundation; either version 2.1 of the License,
 or (at your option) any later version.
 
 
+[ Additional authors ]
+
+File format specification by Michael D. Nahas.
+
+Linux platform support by Maks Verver.
+
 
 [ BLAKE3 cryptographic hash function ]
 
diff --git a/windows/Appendix.txt b/src/Appendix.txt
similarity index 100%
rename from windows/Appendix.txt
rename to src/Appendix.txt
diff --git a/src/Building.txt b/src/Building.txt
new file mode 100644
index 0000000..b052cfb
--- /dev/null
+++ b/src/Building.txt
@@ -0,0 +1,44 @@
+BUILDING ON WINDOWS
+===================
+
+With Microsoft Visual Studio: use the solution file (par3cmdline.sln).
+
+Alternatively, use CMake as described below.
+
+
+BUILDING ON LINUX
+=================
+
+To build using CMake:
+
+% cmake -D CMAKE_BUILD_TYPE=Release -B build
+% cmake --build build     # or:  make -C build
+
+CMAKE_BUILD_TYPE is one of: Debug, Release, RelWithDebInfo, MinSizeRel.
+
+To install:
+
+% cmake --install build   # installs under /usr/local by default
+
+
+To debug the build itself:
+
+% make -C build VERBOSE=1
+
+...will show individual commands being executed. Alternatively:
+
+% cmake -D CMAKE_VERBOSE_MAKEFILE=ON ...
+
+...will enable verbose builds by default.
+
+
+To build with Ninja instead of Make, which can be faster:
+
+% cmake -D CMAKE_BUILD_TYPE=Release -G Ninja -B build-ninja
+% cmake --build build-ninja     # or:  ninja -C build-ninja
+
+
+To cross-compile from Linux to Windows, creating a Windows executable:
+
+% cmake -D CMAKE_BUILD_TYPE=Release -D CMAKE_TOOLCHAIN_FILE=mingw-w64-x86_64.cmake -B build-windows
+% cmake --build build-windows
diff --git a/portable/CMakeLists.txt b/src/CMakeLists.txt
similarity index 100%
rename from portable/CMakeLists.txt
rename to src/CMakeLists.txt
diff --git a/windows/ReadMe.txt b/src/ReadMe.txt
similarity index 100%
rename from windows/ReadMe.txt
rename to src/ReadMe.txt
diff --git a/portable/blake3/CMakeLists.txt b/src/blake3/CMakeLists.txt
similarity index 100%
rename from portable/blake3/CMakeLists.txt
rename to src/blake3/CMakeLists.txt
diff --git a/portable/blake3/blake3.c b/src/blake3/blake3.c
similarity index 100%
rename from portable/blake3/blake3.c
rename to src/blake3/blake3.c
diff --git a/portable/blake3/blake3.h b/src/blake3/blake3.h
similarity index 100%
rename from portable/blake3/blake3.h
rename to src/blake3/blake3.h
diff --git a/portable/blake3/blake3_avx2.c b/src/blake3/blake3_avx2.c
similarity index 100%
rename from portable/blake3/blake3_avx2.c
rename to src/blake3/blake3_avx2.c
diff --git a/portable/blake3/blake3_avx512.c b/src/blake3/blake3_avx512.c
similarity index 100%
rename from portable/blake3/blake3_avx512.c
rename to src/blake3/blake3_avx512.c
diff --git a/portable/blake3/blake3_dispatch.c b/src/blake3/blake3_dispatch.c
similarity index 100%
rename from portable/blake3/blake3_dispatch.c
rename to src/blake3/blake3_dispatch.c
diff --git a/portable/blake3/blake3_impl.h b/src/blake3/blake3_impl.h
similarity index 100%
rename from portable/blake3/blake3_impl.h
rename to src/blake3/blake3_impl.h
diff --git a/portable/blake3/blake3_portable.c b/src/blake3/blake3_portable.c
similarity index 100%
rename from portable/blake3/blake3_portable.c
rename to src/blake3/blake3_portable.c
diff --git a/portable/blake3/blake3_sse2.c b/src/blake3/blake3_sse2.c
similarity index 100%
rename from portable/blake3/blake3_sse2.c
rename to src/blake3/blake3_sse2.c
diff --git a/portable/blake3/blake3_sse41.c b/src/blake3/blake3_sse41.c
similarity index 100%
rename from portable/blake3/blake3_sse41.c
rename to src/blake3/blake3_sse41.c
diff --git a/portable/leopard/CMakeLists.txt b/src/leopard/CMakeLists.txt
similarity index 100%
rename from portable/leopard/CMakeLists.txt
rename to src/leopard/CMakeLists.txt
diff --git a/portable/leopard/LeopardCommon.cpp b/src/leopard/LeopardCommon.cpp
similarity index 100%
rename from portable/leopard/LeopardCommon.cpp
rename to src/leopard/LeopardCommon.cpp
diff --git a/portable/leopard/LeopardCommon.h b/src/leopard/LeopardCommon.h
similarity index 100%
rename from portable/leopard/LeopardCommon.h
rename to src/leopard/LeopardCommon.h
diff --git a/portable/leopard/LeopardFF16.cpp b/src/leopard/LeopardFF16.cpp
similarity index 100%
rename from portable/leopard/LeopardFF16.cpp
rename to src/leopard/LeopardFF16.cpp
diff --git a/portable/leopard/LeopardFF16.h b/src/leopard/LeopardFF16.h
similarity index 100%
rename from portable/leopard/LeopardFF16.h
rename to src/leopard/LeopardFF16.h
diff --git a/portable/leopard/LeopardFF8.cpp b/src/leopard/LeopardFF8.cpp
similarity index 100%
rename from portable/leopard/LeopardFF8.cpp
rename to src/leopard/LeopardFF8.cpp
diff --git a/portable/leopard/LeopardFF8.h b/src/leopard/LeopardFF8.h
similarity index 100%
rename from portable/leopard/LeopardFF8.h
rename to src/leopard/LeopardFF8.h
diff --git a/portable/leopard/leopard.cpp b/src/leopard/leopard.cpp
similarity index 100%
rename from portable/leopard/leopard.cpp
rename to src/leopard/leopard.cpp
diff --git a/portable/leopard/leopard.h b/src/leopard/leopard.h
similarity index 100%
rename from portable/leopard/leopard.h
rename to src/leopard/leopard.h
diff --git a/portable/libpar3/CMakeLists.txt b/src/libpar3/CMakeLists.txt
similarity index 100%
rename from portable/libpar3/CMakeLists.txt
rename to src/libpar3/CMakeLists.txt
diff --git a/portable/libpar3/block.h b/src/libpar3/block.h
similarity index 100%
rename from portable/libpar3/block.h
rename to src/libpar3/block.h
diff --git a/portable/libpar3/block_check.c b/src/libpar3/block_check.c
similarity index 100%
rename from portable/libpar3/block_check.c
rename to src/libpar3/block_check.c
diff --git a/portable/libpar3/block_create.c b/src/libpar3/block_create.c
similarity index 100%
rename from portable/libpar3/block_create.c
rename to src/libpar3/block_create.c
diff --git a/portable/libpar3/block_map.c b/src/libpar3/block_map.c
similarity index 100%
rename from portable/libpar3/block_map.c
rename to src/libpar3/block_map.c
diff --git a/portable/libpar3/block_recover.c b/src/libpar3/block_recover.c
similarity index 100%
rename from portable/libpar3/block_recover.c
rename to src/libpar3/block_recover.c
diff --git a/portable/libpar3/common.c b/src/libpar3/common.c
similarity index 100%
rename from portable/libpar3/common.c
rename to src/libpar3/common.c
diff --git a/portable/libpar3/common.h b/src/libpar3/common.h
similarity index 100%
rename from portable/libpar3/common.h
rename to src/libpar3/common.h
diff --git a/portable/libpar3/file.c b/src/libpar3/file.c
similarity index 100%
rename from portable/libpar3/file.c
rename to src/libpar3/file.c
diff --git a/portable/libpar3/file.h b/src/libpar3/file.h
similarity index 100%
rename from portable/libpar3/file.h
rename to src/libpar3/file.h
diff --git a/portable/libpar3/galois.h b/src/libpar3/galois.h
similarity index 100%
rename from portable/libpar3/galois.h
rename to src/libpar3/galois.h
diff --git a/portable/libpar3/galois16.c b/src/libpar3/galois16.c
similarity index 100%
rename from portable/libpar3/galois16.c
rename to src/libpar3/galois16.c
diff --git a/portable/libpar3/galois8.c b/src/libpar3/galois8.c
similarity index 100%
rename from portable/libpar3/galois8.c
rename to src/libpar3/galois8.c
diff --git a/portable/libpar3/hash.c b/src/libpar3/hash.c
similarity index 100%
rename from portable/libpar3/hash.c
rename to src/libpar3/hash.c
diff --git a/portable/libpar3/hash.h b/src/libpar3/hash.h
similarity index 100%
rename from portable/libpar3/hash.h
rename to src/libpar3/hash.h
diff --git a/portable/libpar3/inside.h b/src/libpar3/inside.h
similarity index 100%
rename from portable/libpar3/inside.h
rename to src/libpar3/inside.h
diff --git a/portable/libpar3/inside_zip.c b/src/libpar3/inside_zip.c
similarity index 100%
rename from portable/libpar3/inside_zip.c
rename to src/libpar3/inside_zip.c
diff --git a/portable/libpar3/libpar3.c b/src/libpar3/libpar3.c
similarity index 100%
rename from portable/libpar3/libpar3.c
rename to src/libpar3/libpar3.c
diff --git a/portable/libpar3/libpar3.h b/src/libpar3/libpar3.h
similarity index 100%
rename from portable/libpar3/libpar3.h
rename to src/libpar3/libpar3.h
diff --git a/portable/libpar3/libpar3_create.c b/src/libpar3/libpar3_create.c
similarity index 100%
rename from portable/libpar3/libpar3_create.c
rename to src/libpar3/libpar3_create.c
diff --git a/portable/libpar3/libpar3_extra.c b/src/libpar3/libpar3_extra.c
similarity index 100%
rename from portable/libpar3/libpar3_extra.c
rename to src/libpar3/libpar3_extra.c
diff --git a/portable/libpar3/libpar3_inside.c b/src/libpar3/libpar3_inside.c
similarity index 100%
rename from portable/libpar3/libpar3_inside.c
rename to src/libpar3/libpar3_inside.c
diff --git a/portable/libpar3/libpar3_verify.c b/src/libpar3/libpar3_verify.c
similarity index 100%
rename from portable/libpar3/libpar3_verify.c
rename to src/libpar3/libpar3_verify.c
diff --git a/portable/libpar3/map.c b/src/libpar3/map.c
similarity index 100%
rename from portable/libpar3/map.c
rename to src/libpar3/map.c
diff --git a/portable/libpar3/map.h b/src/libpar3/map.h
similarity index 100%
rename from portable/libpar3/map.h
rename to src/libpar3/map.h
diff --git a/portable/libpar3/map_inside.c b/src/libpar3/map_inside.c
similarity index 100%
rename from portable/libpar3/map_inside.c
rename to src/libpar3/map_inside.c
diff --git a/portable/libpar3/map_simple.c b/src/libpar3/map_simple.c
similarity index 100%
rename from portable/libpar3/map_simple.c
rename to src/libpar3/map_simple.c
diff --git a/portable/libpar3/map_slide.c b/src/libpar3/map_slide.c
similarity index 100%
rename from portable/libpar3/map_slide.c
rename to src/libpar3/map_slide.c
diff --git a/portable/libpar3/packet.h b/src/libpar3/packet.h
similarity index 100%
rename from portable/libpar3/packet.h
rename to src/libpar3/packet.h
diff --git a/portable/libpar3/packet_add.c b/src/libpar3/packet_add.c
similarity index 100%
rename from portable/libpar3/packet_add.c
rename to src/libpar3/packet_add.c
diff --git a/portable/libpar3/packet_make.c b/src/libpar3/packet_make.c
similarity index 100%
rename from portable/libpar3/packet_make.c
rename to src/libpar3/packet_make.c
diff --git a/portable/libpar3/packet_parse.c b/src/libpar3/packet_parse.c
similarity index 100%
rename from portable/libpar3/packet_parse.c
rename to src/libpar3/packet_parse.c
diff --git a/portable/libpar3/read.c b/src/libpar3/read.c
similarity index 100%
rename from portable/libpar3/read.c
rename to src/libpar3/read.c
diff --git a/portable/libpar3/read.h b/src/libpar3/read.h
similarity index 100%
rename from portable/libpar3/read.h
rename to src/libpar3/read.h
diff --git a/portable/libpar3/reedsolomon.c b/src/libpar3/reedsolomon.c
similarity index 100%
rename from portable/libpar3/reedsolomon.c
rename to src/libpar3/reedsolomon.c
diff --git a/portable/libpar3/reedsolomon.h b/src/libpar3/reedsolomon.h
similarity index 100%
rename from portable/libpar3/reedsolomon.h
rename to src/libpar3/reedsolomon.h
diff --git a/portable/libpar3/reedsolomon16.c b/src/libpar3/reedsolomon16.c
similarity index 100%
rename from portable/libpar3/reedsolomon16.c
rename to src/libpar3/reedsolomon16.c
diff --git a/portable/libpar3/reedsolomon8.c b/src/libpar3/reedsolomon8.c
similarity index 100%
rename from portable/libpar3/reedsolomon8.c
rename to src/libpar3/reedsolomon8.c
diff --git a/portable/libpar3/repair.c b/src/libpar3/repair.c
similarity index 100%
rename from portable/libpar3/repair.c
rename to src/libpar3/repair.c
diff --git a/portable/libpar3/repair.h b/src/libpar3/repair.h
similarity index 100%
rename from portable/libpar3/repair.h
rename to src/libpar3/repair.h
diff --git a/portable/libpar3/verify.c b/src/libpar3/verify.c
similarity index 100%
rename from portable/libpar3/verify.c
rename to src/libpar3/verify.c
diff --git a/portable/libpar3/verify.h b/src/libpar3/verify.h
similarity index 100%
rename from portable/libpar3/verify.h
rename to src/libpar3/verify.h
diff --git a/portable/libpar3/verify_check.c b/src/libpar3/verify_check.c
similarity index 100%
rename from portable/libpar3/verify_check.c
rename to src/libpar3/verify_check.c
diff --git a/portable/libpar3/write.c b/src/libpar3/write.c
similarity index 100%
rename from portable/libpar3/write.c
rename to src/libpar3/write.c
diff --git a/portable/libpar3/write.h b/src/libpar3/write.h
similarity index 100%
rename from portable/libpar3/write.h
rename to src/libpar3/write.h
diff --git a/portable/libpar3/write_inside.c b/src/libpar3/write_inside.c
similarity index 100%
rename from portable/libpar3/write_inside.c
rename to src/libpar3/write_inside.c
diff --git a/portable/libpar3/write_trial.c b/src/libpar3/write_trial.c
similarity index 100%
rename from portable/libpar3/write_trial.c
rename to src/libpar3/write_trial.c
diff --git a/portable/man/par3.1 b/src/man/par3.1
similarity index 100%
rename from portable/man/par3.1
rename to src/man/par3.1
diff --git a/portable/mingw-w64-x86_64.cmake b/src/mingw-w64-x86_64.cmake
similarity index 100%
rename from portable/mingw-w64-x86_64.cmake
rename to src/mingw-w64-x86_64.cmake
diff --git a/portable/par3cmd/CMakeLists.txt b/src/par3cmd/CMakeLists.txt
similarity index 100%
rename from portable/par3cmd/CMakeLists.txt
rename to src/par3cmd/CMakeLists.txt
diff --git a/portable/par3cmd/locale_helpers.c b/src/par3cmd/locale_helpers.c
similarity index 100%
rename from portable/par3cmd/locale_helpers.c
rename to src/par3cmd/locale_helpers.c
diff --git a/portable/par3cmd/locale_helpers.h b/src/par3cmd/locale_helpers.h
similarity index 100%
rename from portable/par3cmd/locale_helpers.h
rename to src/par3cmd/locale_helpers.h
diff --git a/portable/par3cmd/main.c b/src/par3cmd/main.c
similarity index 100%
rename from portable/par3cmd/main.c
rename to src/par3cmd/main.c
diff --git a/portable/par3cmdline.sln b/src/par3cmdline.sln
similarity index 100%
rename from portable/par3cmdline.sln
rename to src/par3cmdline.sln
diff --git a/portable/par3cmdline.vcxproj b/src/par3cmdline.vcxproj
similarity index 100%
rename from portable/par3cmdline.vcxproj
rename to src/par3cmdline.vcxproj
diff --git a/portable/par3cmdline.vcxproj.filters b/src/par3cmdline.vcxproj.filters
similarity index 100%
rename from portable/par3cmdline.vcxproj.filters
rename to src/par3cmdline.vcxproj.filters
diff --git a/portable/par3cmdline.vcxproj.user b/src/par3cmdline.vcxproj.user
similarity index 100%
rename from portable/par3cmdline.vcxproj.user
rename to src/par3cmdline.vcxproj.user
diff --git a/portable/platform/linux/CMakeLists.txt b/src/platform/linux/CMakeLists.txt
similarity index 100%
rename from portable/platform/linux/CMakeLists.txt
rename to src/platform/linux/CMakeLists.txt
diff --git a/portable/platform/linux/filelength.c b/src/platform/linux/filelength.c
similarity index 100%
rename from portable/platform/linux/filelength.c
rename to src/platform/linux/filelength.c
diff --git a/portable/platform/linux/filesearch.c b/src/platform/linux/filesearch.c
similarity index 100%
rename from portable/platform/linux/filesearch.c
rename to src/platform/linux/filesearch.c
diff --git a/portable/platform/linux/get_absolute_path.c b/src/platform/linux/get_absolute_path.c
similarity index 100%
rename from portable/platform/linux/get_absolute_path.c
rename to src/platform/linux/get_absolute_path.c
diff --git a/portable/platform/linux/platform_linux.h b/src/platform/linux/platform_linux.h
similarity index 100%
rename from portable/platform/linux/platform_linux.h
rename to src/platform/linux/platform_linux.h
diff --git a/portable/platform/platform.h b/src/platform/platform.h
similarity index 100%
rename from portable/platform/platform.h
rename to src/platform/platform.h
diff --git a/portable/platform/windows/CMakeLists.txt b/src/platform/windows/CMakeLists.txt
similarity index 100%
rename from portable/platform/windows/CMakeLists.txt
rename to src/platform/windows/CMakeLists.txt
diff --git a/portable/platform/windows/get_absolute_path.c b/src/platform/windows/get_absolute_path.c
similarity index 100%
rename from portable/platform/windows/get_absolute_path.c
rename to src/platform/windows/get_absolute_path.c
diff --git a/portable/platform/windows/platform_windows.h b/src/platform/windows/platform_windows.h
similarity index 100%
rename from portable/platform/windows/platform_windows.h
rename to src/platform/windows/platform_windows.h
diff --git a/windows/src/blake3/blake3.c b/windows/src/blake3/blake3.c
deleted file mode 100644
index 1239433..0000000
--- a/windows/src/blake3/blake3.c
+++ /dev/null
@@ -1,616 +0,0 @@
-#include <assert.h>
-#include <stdbool.h>
-#include <string.h>
-
-#include "blake3.h"
-#include "blake3_impl.h"
-
-const char *blake3_version(void) { return BLAKE3_VERSION_STRING; }
-
-INLINE void chunk_state_init(blake3_chunk_state *self, const uint32_t key[8],
-                             uint8_t flags) {
-  memcpy(self->cv, key, BLAKE3_KEY_LEN);
-  self->chunk_counter = 0;
-  memset(self->buf, 0, BLAKE3_BLOCK_LEN);
-  self->buf_len = 0;
-  self->blocks_compressed = 0;
-  self->flags = flags;
-}
-
-INLINE void chunk_state_reset(blake3_chunk_state *self, const uint32_t key[8],
-                              uint64_t chunk_counter) {
-  memcpy(self->cv, key, BLAKE3_KEY_LEN);
-  self->chunk_counter = chunk_counter;
-  self->blocks_compressed = 0;
-  memset(self->buf, 0, BLAKE3_BLOCK_LEN);
-  self->buf_len = 0;
-}
-
-INLINE size_t chunk_state_len(const blake3_chunk_state *self) {
-  return (BLAKE3_BLOCK_LEN * (size_t)self->blocks_compressed) +
-         ((size_t)self->buf_len);
-}
-
-INLINE size_t chunk_state_fill_buf(blake3_chunk_state *self,
-                                   const uint8_t *input, size_t input_len) {
-  size_t take = BLAKE3_BLOCK_LEN - ((size_t)self->buf_len);
-  if (take > input_len) {
-    take = input_len;
-  }
-  uint8_t *dest = self->buf + ((size_t)self->buf_len);
-  memcpy(dest, input, take);
-  self->buf_len += (uint8_t)take;
-  return take;
-}
-
-INLINE uint8_t chunk_state_maybe_start_flag(const blake3_chunk_state *self) {
-  if (self->blocks_compressed == 0) {
-    return CHUNK_START;
-  } else {
-    return 0;
-  }
-}
-
-typedef struct {
-  uint32_t input_cv[8];
-  uint64_t counter;
-  uint8_t block[BLAKE3_BLOCK_LEN];
-  uint8_t block_len;
-  uint8_t flags;
-} output_t;
-
-INLINE output_t make_output(const uint32_t input_cv[8],
-                            const uint8_t block[BLAKE3_BLOCK_LEN],
-                            uint8_t block_len, uint64_t counter,
-                            uint8_t flags) {
-  output_t ret;
-  memcpy(ret.input_cv, input_cv, 32);
-  memcpy(ret.block, block, BLAKE3_BLOCK_LEN);
-  ret.block_len = block_len;
-  ret.counter = counter;
-  ret.flags = flags;
-  return ret;
-}
-
-// Chaining values within a given chunk (specifically the compress_in_place
-// interface) are represented as words. This avoids unnecessary bytes<->words
-// conversion overhead in the portable implementation. However, the hash_many
-// interface handles both user input and parent node blocks, so it accepts
-// bytes. For that reason, chaining values in the CV stack are represented as
-// bytes.
-INLINE void output_chaining_value(const output_t *self, uint8_t cv[32]) {
-  uint32_t cv_words[8];
-  memcpy(cv_words, self->input_cv, 32);
-  blake3_compress_in_place(cv_words, self->block, self->block_len,
-                           self->counter, self->flags);
-  store_cv_words(cv, cv_words);
-}
-
-INLINE void output_root_bytes(const output_t *self, uint64_t seek, uint8_t *out,
-                              size_t out_len) {
-  uint64_t output_block_counter = seek / 64;
-  size_t offset_within_block = seek % 64;
-  uint8_t wide_buf[64];
-  while (out_len > 0) {
-    blake3_compress_xof(self->input_cv, self->block, self->block_len,
-                        output_block_counter, self->flags | ROOT, wide_buf);
-    size_t available_bytes = 64 - offset_within_block;
-    size_t memcpy_len;
-    if (out_len > available_bytes) {
-      memcpy_len = available_bytes;
-    } else {
-      memcpy_len = out_len;
-    }
-    memcpy(out, wide_buf + offset_within_block, memcpy_len);
-    out += memcpy_len;
-    out_len -= memcpy_len;
-    output_block_counter += 1;
-    offset_within_block = 0;
-  }
-}
-
-INLINE void chunk_state_update(blake3_chunk_state *self, const uint8_t *input,
-                               size_t input_len) {
-  if (self->buf_len > 0) {
-    size_t take = chunk_state_fill_buf(self, input, input_len);
-    input += take;
-    input_len -= take;
-    if (input_len > 0) {
-      blake3_compress_in_place(
-          self->cv, self->buf, BLAKE3_BLOCK_LEN, self->chunk_counter,
-          self->flags | chunk_state_maybe_start_flag(self));
-      self->blocks_compressed += 1;
-      self->buf_len = 0;
-      memset(self->buf, 0, BLAKE3_BLOCK_LEN);
-    }
-  }
-
-  while (input_len > BLAKE3_BLOCK_LEN) {
-    blake3_compress_in_place(self->cv, input, BLAKE3_BLOCK_LEN,
-                             self->chunk_counter,
-                             self->flags | chunk_state_maybe_start_flag(self));
-    self->blocks_compressed += 1;
-    input += BLAKE3_BLOCK_LEN;
-    input_len -= BLAKE3_BLOCK_LEN;
-  }
-
-  size_t take = chunk_state_fill_buf(self, input, input_len);
-  input += take;
-  input_len -= take;
-}
-
-INLINE output_t chunk_state_output(const blake3_chunk_state *self) {
-  uint8_t block_flags =
-      self->flags | chunk_state_maybe_start_flag(self) | CHUNK_END;
-  return make_output(self->cv, self->buf, self->buf_len, self->chunk_counter,
-                     block_flags);
-}
-
-INLINE output_t parent_output(const uint8_t block[BLAKE3_BLOCK_LEN],
-                              const uint32_t key[8], uint8_t flags) {
-  return make_output(key, block, BLAKE3_BLOCK_LEN, 0, flags | PARENT);
-}
-
-// Given some input larger than one chunk, return the number of bytes that
-// should go in the left subtree. This is the largest power-of-2 number of
-// chunks that leaves at least 1 byte for the right subtree.
-INLINE size_t left_len(size_t content_len) {
-  // Subtract 1 to reserve at least one byte for the right side. content_len
-  // should always be greater than BLAKE3_CHUNK_LEN.
-  size_t full_chunks = (content_len - 1) / BLAKE3_CHUNK_LEN;
-  return round_down_to_power_of_2(full_chunks) * BLAKE3_CHUNK_LEN;
-}
-
-// Use SIMD parallelism to hash up to MAX_SIMD_DEGREE chunks at the same time
-// on a single thread. Write out the chunk chaining values and return the
-// number of chunks hashed. These chunks are never the root and never empty;
-// those cases use a different codepath.
-INLINE size_t compress_chunks_parallel(const uint8_t *input, size_t input_len,
-                                       const uint32_t key[8],
-                                       uint64_t chunk_counter, uint8_t flags,
-                                       uint8_t *out) {
-#if defined(BLAKE3_TESTING)
-  assert(0 < input_len);
-  assert(input_len <= MAX_SIMD_DEGREE * BLAKE3_CHUNK_LEN);
-#endif
-
-  const uint8_t *chunks_array[MAX_SIMD_DEGREE];
-  size_t input_position = 0;
-  size_t chunks_array_len = 0;
-  while (input_len - input_position >= BLAKE3_CHUNK_LEN) {
-    chunks_array[chunks_array_len] = &input[input_position];
-    input_position += BLAKE3_CHUNK_LEN;
-    chunks_array_len += 1;
-  }
-
-  blake3_hash_many(chunks_array, chunks_array_len,
-                   BLAKE3_CHUNK_LEN / BLAKE3_BLOCK_LEN, key, chunk_counter,
-                   true, flags, CHUNK_START, CHUNK_END, out);
-
-  // Hash the remaining partial chunk, if there is one. Note that the empty
-  // chunk (meaning the empty message) is a different codepath.
-  if (input_len > input_position) {
-    uint64_t counter = chunk_counter + (uint64_t)chunks_array_len;
-    blake3_chunk_state chunk_state;
-    chunk_state_init(&chunk_state, key, flags);
-    chunk_state.chunk_counter = counter;
-    chunk_state_update(&chunk_state, &input[input_position],
-                       input_len - input_position);
-    output_t output = chunk_state_output(&chunk_state);
-    output_chaining_value(&output, &out[chunks_array_len * BLAKE3_OUT_LEN]);
-    return chunks_array_len + 1;
-  } else {
-    return chunks_array_len;
-  }
-}
-
-// Use SIMD parallelism to hash up to MAX_SIMD_DEGREE parents at the same time
-// on a single thread. Write out the parent chaining values and return the
-// number of parents hashed. (If there's an odd input chaining value left over,
-// return it as an additional output.) These parents are never the root and
-// never empty; those cases use a different codepath.
-INLINE size_t compress_parents_parallel(const uint8_t *child_chaining_values,
-                                        size_t num_chaining_values,
-                                        const uint32_t key[8], uint8_t flags,
-                                        uint8_t *out) {
-#if defined(BLAKE3_TESTING)
-  assert(2 <= num_chaining_values);
-  assert(num_chaining_values <= 2 * MAX_SIMD_DEGREE_OR_2);
-#endif
-
-  const uint8_t *parents_array[MAX_SIMD_DEGREE_OR_2];
-  size_t parents_array_len = 0;
-  while (num_chaining_values - (2 * parents_array_len) >= 2) {
-    parents_array[parents_array_len] =
-        &child_chaining_values[2 * parents_array_len * BLAKE3_OUT_LEN];
-    parents_array_len += 1;
-  }
-
-  blake3_hash_many(parents_array, parents_array_len, 1, key,
-                   0, // Parents always use counter 0.
-                   false, flags | PARENT,
-                   0, // Parents have no start flags.
-                   0, // Parents have no end flags.
-                   out);
-
-  // If there's an odd child left over, it becomes an output.
-  if (num_chaining_values > 2 * parents_array_len) {
-    memcpy(&out[parents_array_len * BLAKE3_OUT_LEN],
-           &child_chaining_values[2 * parents_array_len * BLAKE3_OUT_LEN],
-           BLAKE3_OUT_LEN);
-    return parents_array_len + 1;
-  } else {
-    return parents_array_len;
-  }
-}
-
-// The wide helper function returns (writes out) an array of chaining values
-// and returns the length of that array. The number of chaining values returned
-// is the dyanmically detected SIMD degree, at most MAX_SIMD_DEGREE. Or fewer,
-// if the input is shorter than that many chunks. The reason for maintaining a
-// wide array of chaining values going back up the tree, is to allow the
-// implementation to hash as many parents in parallel as possible.
-//
-// As a special case when the SIMD degree is 1, this function will still return
-// at least 2 outputs. This guarantees that this function doesn't perform the
-// root compression. (If it did, it would use the wrong flags, and also we
-// wouldn't be able to implement exendable ouput.) Note that this function is
-// not used when the whole input is only 1 chunk long; that's a different
-// codepath.
-//
-// Why not just have the caller split the input on the first update(), instead
-// of implementing this special rule? Because we don't want to limit SIMD or
-// multi-threading parallelism for that update().
-static size_t blake3_compress_subtree_wide(const uint8_t *input,
-                                           size_t input_len,
-                                           const uint32_t key[8],
-                                           uint64_t chunk_counter,
-                                           uint8_t flags, uint8_t *out) {
-  // Note that the single chunk case does *not* bump the SIMD degree up to 2
-  // when it is 1. If this implementation adds multi-threading in the future,
-  // this gives us the option of multi-threading even the 2-chunk case, which
-  // can help performance on smaller platforms.
-  if (input_len <= blake3_simd_degree() * BLAKE3_CHUNK_LEN) {
-    return compress_chunks_parallel(input, input_len, key, chunk_counter, flags,
-                                    out);
-  }
-
-  // With more than simd_degree chunks, we need to recurse. Start by dividing
-  // the input into left and right subtrees. (Note that this is only optimal
-  // as long as the SIMD degree is a power of 2. If we ever get a SIMD degree
-  // of 3 or something, we'll need a more complicated strategy.)
-  size_t left_input_len = left_len(input_len);
-  size_t right_input_len = input_len - left_input_len;
-  const uint8_t *right_input = &input[left_input_len];
-  uint64_t right_chunk_counter =
-      chunk_counter + (uint64_t)(left_input_len / BLAKE3_CHUNK_LEN);
-
-  // Make space for the child outputs. Here we use MAX_SIMD_DEGREE_OR_2 to
-  // account for the special case of returning 2 outputs when the SIMD degree
-  // is 1.
-  uint8_t cv_array[2 * MAX_SIMD_DEGREE_OR_2 * BLAKE3_OUT_LEN];
-  size_t degree = blake3_simd_degree();
-  if (left_input_len > BLAKE3_CHUNK_LEN && degree == 1) {
-    // The special case: We always use a degree of at least two, to make
-    // sure there are two outputs. Except, as noted above, at the chunk
-    // level, where we allow degree=1. (Note that the 1-chunk-input case is
-    // a different codepath.)
-    degree = 2;
-  }
-  uint8_t *right_cvs = &cv_array[degree * BLAKE3_OUT_LEN];
-
-  // Recurse! If this implementation adds multi-threading support in the
-  // future, this is where it will go.
-  size_t left_n = blake3_compress_subtree_wide(input, left_input_len, key,
-                                               chunk_counter, flags, cv_array);
-  size_t right_n = blake3_compress_subtree_wide(
-      right_input, right_input_len, key, right_chunk_counter, flags, right_cvs);
-
-  // The special case again. If simd_degree=1, then we'll have left_n=1 and
-  // right_n=1. Rather than compressing them into a single output, return
-  // them directly, to make sure we always have at least two outputs.
-  if (left_n == 1) {
-    memcpy(out, cv_array, 2 * BLAKE3_OUT_LEN);
-    return 2;
-  }
-
-  // Otherwise, do one layer of parent node compression.
-  size_t num_chaining_values = left_n + right_n;
-  return compress_parents_parallel(cv_array, num_chaining_values, key, flags,
-                                   out);
-}
-
-// Hash a subtree with compress_subtree_wide(), and then condense the resulting
-// list of chaining values down to a single parent node. Don't compress that
-// last parent node, however. Instead, return its message bytes (the
-// concatenated chaining values of its children). This is necessary when the
-// first call to update() supplies a complete subtree, because the topmost
-// parent node of that subtree could end up being the root. It's also necessary
-// for extended output in the general case.
-//
-// As with compress_subtree_wide(), this function is not used on inputs of 1
-// chunk or less. That's a different codepath.
-INLINE void compress_subtree_to_parent_node(
-    const uint8_t *input, size_t input_len, const uint32_t key[8],
-    uint64_t chunk_counter, uint8_t flags, uint8_t out[2 * BLAKE3_OUT_LEN]) {
-#if defined(BLAKE3_TESTING)
-  assert(input_len > BLAKE3_CHUNK_LEN);
-#endif
-
-  uint8_t cv_array[MAX_SIMD_DEGREE_OR_2 * BLAKE3_OUT_LEN];
-  size_t num_cvs = blake3_compress_subtree_wide(input, input_len, key,
-                                                chunk_counter, flags, cv_array);
-  assert(num_cvs <= MAX_SIMD_DEGREE_OR_2);
-
-  // If MAX_SIMD_DEGREE is greater than 2 and there's enough input,
-  // compress_subtree_wide() returns more than 2 chaining values. Condense
-  // them into 2 by forming parent nodes repeatedly.
-  uint8_t out_array[MAX_SIMD_DEGREE_OR_2 * BLAKE3_OUT_LEN / 2];
-  // The second half of this loop condition is always true, and we just
-  // asserted it above. But GCC can't tell that it's always true, and if NDEBUG
-  // is set on platforms where MAX_SIMD_DEGREE_OR_2 == 2, GCC emits spurious
-  // warnings here. GCC 8.5 is particularly sensitive, so if you're changing
-  // this code, test it against that version.
-  while (num_cvs > 2 && num_cvs <= MAX_SIMD_DEGREE_OR_2) {
-    num_cvs =
-        compress_parents_parallel(cv_array, num_cvs, key, flags, out_array);
-    memcpy(cv_array, out_array, num_cvs * BLAKE3_OUT_LEN);
-  }
-  memcpy(out, cv_array, 2 * BLAKE3_OUT_LEN);
-}
-
-INLINE void hasher_init_base(blake3_hasher *self, const uint32_t key[8],
-                             uint8_t flags) {
-  memcpy(self->key, key, BLAKE3_KEY_LEN);
-  chunk_state_init(&self->chunk, key, flags);
-  self->cv_stack_len = 0;
-}
-
-void blake3_hasher_init(blake3_hasher *self) { hasher_init_base(self, IV, 0); }
-
-void blake3_hasher_init_keyed(blake3_hasher *self,
-                              const uint8_t key[BLAKE3_KEY_LEN]) {
-  uint32_t key_words[8];
-  load_key_words(key, key_words);
-  hasher_init_base(self, key_words, KEYED_HASH);
-}
-
-void blake3_hasher_init_derive_key_raw(blake3_hasher *self, const void *context,
-                                       size_t context_len) {
-  blake3_hasher context_hasher;
-  hasher_init_base(&context_hasher, IV, DERIVE_KEY_CONTEXT);
-  blake3_hasher_update(&context_hasher, context, context_len);
-  uint8_t context_key[BLAKE3_KEY_LEN];
-  blake3_hasher_finalize(&context_hasher, context_key, BLAKE3_KEY_LEN);
-  uint32_t context_key_words[8];
-  load_key_words(context_key, context_key_words);
-  hasher_init_base(self, context_key_words, DERIVE_KEY_MATERIAL);
-}
-
-void blake3_hasher_init_derive_key(blake3_hasher *self, const char *context) {
-  blake3_hasher_init_derive_key_raw(self, context, strlen(context));
-}
-
-// As described in hasher_push_cv() below, we do "lazy merging", delaying
-// merges until right before the next CV is about to be added. This is
-// different from the reference implementation. Another difference is that we
-// aren't always merging 1 chunk at a time. Instead, each CV might represent
-// any power-of-two number of chunks, as long as the smaller-above-larger stack
-// order is maintained. Instead of the "count the trailing 0-bits" algorithm
-// described in the spec, we use a "count the total number of 1-bits" variant
-// that doesn't require us to retain the subtree size of the CV on top of the
-// stack. The principle is the same: each CV that should remain in the stack is
-// represented by a 1-bit in the total number of chunks (or bytes) so far.
-INLINE void hasher_merge_cv_stack(blake3_hasher *self, uint64_t total_len) {
-  size_t post_merge_stack_len = (size_t)popcnt(total_len);
-  while (self->cv_stack_len > post_merge_stack_len) {
-    uint8_t *parent_node =
-        &self->cv_stack[(self->cv_stack_len - 2) * BLAKE3_OUT_LEN];
-    output_t output = parent_output(parent_node, self->key, self->chunk.flags);
-    output_chaining_value(&output, parent_node);
-    self->cv_stack_len -= 1;
-  }
-}
-
-// In reference_impl.rs, we merge the new CV with existing CVs from the stack
-// before pushing it. We can do that because we know more input is coming, so
-// we know none of the merges are root.
-//
-// This setting is different. We want to feed as much input as possible to
-// compress_subtree_wide(), without setting aside anything for the chunk_state.
-// If the user gives us 64 KiB, we want to parallelize over all 64 KiB at once
-// as a single subtree, if at all possible.
-//
-// This leads to two problems:
-// 1) This 64 KiB input might be the only call that ever gets made to update.
-//    In this case, the root node of the 64 KiB subtree would be the root node
-//    of the whole tree, and it would need to be ROOT finalized. We can't
-//    compress it until we know.
-// 2) This 64 KiB input might complete a larger tree, whose root node is
-//    similarly going to be the the root of the whole tree. For example, maybe
-//    we have 196 KiB (that is, 128 + 64) hashed so far. We can't compress the
-//    node at the root of the 256 KiB subtree until we know how to finalize it.
-//
-// The second problem is solved with "lazy merging". That is, when we're about
-// to add a CV to the stack, we don't merge it with anything first, as the
-// reference impl does. Instead we do merges using the *previous* CV that was
-// added, which is sitting on top of the stack, and we put the new CV
-// (unmerged) on top of the stack afterwards. This guarantees that we never
-// merge the root node until finalize().
-//
-// Solving the first problem requires an additional tool,
-// compress_subtree_to_parent_node(). That function always returns the top
-// *two* chaining values of the subtree it's compressing. We then do lazy
-// merging with each of them separately, so that the second CV will always
-// remain unmerged. (That also helps us support extendable output when we're
-// hashing an input all-at-once.)
-INLINE void hasher_push_cv(blake3_hasher *self, uint8_t new_cv[BLAKE3_OUT_LEN],
-                           uint64_t chunk_counter) {
-  hasher_merge_cv_stack(self, chunk_counter);
-  memcpy(&self->cv_stack[self->cv_stack_len * BLAKE3_OUT_LEN], new_cv,
-         BLAKE3_OUT_LEN);
-  self->cv_stack_len += 1;
-}
-
-void blake3_hasher_update(blake3_hasher *self, const void *input,
-                          size_t input_len) {
-  // Explicitly checking for zero avoids causing UB by passing a null pointer
-  // to memcpy. This comes up in practice with things like:
-  //   std::vector<uint8_t> v;
-  //   blake3_hasher_update(&hasher, v.data(), v.size());
-  if (input_len == 0) {
-    return;
-  }
-
-  const uint8_t *input_bytes = (const uint8_t *)input;
-
-  // If we have some partial chunk bytes in the internal chunk_state, we need
-  // to finish that chunk first.
-  if (chunk_state_len(&self->chunk) > 0) {
-    size_t take = BLAKE3_CHUNK_LEN - chunk_state_len(&self->chunk);
-    if (take > input_len) {
-      take = input_len;
-    }
-    chunk_state_update(&self->chunk, input_bytes, take);
-    input_bytes += take;
-    input_len -= take;
-    // If we've filled the current chunk and there's more coming, finalize this
-    // chunk and proceed. In this case we know it's not the root.
-    if (input_len > 0) {
-      output_t output = chunk_state_output(&self->chunk);
-      uint8_t chunk_cv[32];
-      output_chaining_value(&output, chunk_cv);
-      hasher_push_cv(self, chunk_cv, self->chunk.chunk_counter);
-      chunk_state_reset(&self->chunk, self->key, self->chunk.chunk_counter + 1);
-    } else {
-      return;
-    }
-  }
-
-  // Now the chunk_state is clear, and we have more input. If there's more than
-  // a single chunk (so, definitely not the root chunk), hash the largest whole
-  // subtree we can, with the full benefits of SIMD (and maybe in the future,
-  // multi-threading) parallelism. Two restrictions:
-  // - The subtree has to be a power-of-2 number of chunks. Only subtrees along
-  //   the right edge can be incomplete, and we don't know where the right edge
-  //   is going to be until we get to finalize().
-  // - The subtree must evenly divide the total number of chunks up until this
-  //   point (if total is not 0). If the current incomplete subtree is only
-  //   waiting for 1 more chunk, we can't hash a subtree of 4 chunks. We have
-  //   to complete the current subtree first.
-  // Because we might need to break up the input to form powers of 2, or to
-  // evenly divide what we already have, this part runs in a loop.
-  while (input_len > BLAKE3_CHUNK_LEN) {
-    size_t subtree_len = round_down_to_power_of_2(input_len);
-    uint64_t count_so_far = self->chunk.chunk_counter * BLAKE3_CHUNK_LEN;
-    // Shrink the subtree_len until it evenly divides the count so far. We know
-    // that subtree_len itself is a power of 2, so we can use a bitmasking
-    // trick instead of an actual remainder operation. (Note that if the caller
-    // consistently passes power-of-2 inputs of the same size, as is hopefully
-    // typical, this loop condition will always fail, and subtree_len will
-    // always be the full length of the input.)
-    //
-    // An aside: We don't have to shrink subtree_len quite this much. For
-    // example, if count_so_far is 1, we could pass 2 chunks to
-    // compress_subtree_to_parent_node. Since we'll get 2 CVs back, we'll still
-    // get the right answer in the end, and we might get to use 2-way SIMD
-    // parallelism. The problem with this optimization, is that it gets us
-    // stuck always hashing 2 chunks. The total number of chunks will remain
-    // odd, and we'll never graduate to higher degrees of parallelism. See
-    // https://github.com/BLAKE3-team/BLAKE3/issues/69.
-    while ((((uint64_t)(subtree_len - 1)) & count_so_far) != 0) {
-      subtree_len /= 2;
-    }
-    // The shrunken subtree_len might now be 1 chunk long. If so, hash that one
-    // chunk by itself. Otherwise, compress the subtree into a pair of CVs.
-    uint64_t subtree_chunks = subtree_len / BLAKE3_CHUNK_LEN;
-    if (subtree_len <= BLAKE3_CHUNK_LEN) {
-      blake3_chunk_state chunk_state;
-      chunk_state_init(&chunk_state, self->key, self->chunk.flags);
-      chunk_state.chunk_counter = self->chunk.chunk_counter;
-      chunk_state_update(&chunk_state, input_bytes, subtree_len);
-      output_t output = chunk_state_output(&chunk_state);
-      uint8_t cv[BLAKE3_OUT_LEN];
-      output_chaining_value(&output, cv);
-      hasher_push_cv(self, cv, chunk_state.chunk_counter);
-    } else {
-      // This is the high-performance happy path, though getting here depends
-      // on the caller giving us a long enough input.
-      uint8_t cv_pair[2 * BLAKE3_OUT_LEN];
-      compress_subtree_to_parent_node(input_bytes, subtree_len, self->key,
-                                      self->chunk.chunk_counter,
-                                      self->chunk.flags, cv_pair);
-      hasher_push_cv(self, cv_pair, self->chunk.chunk_counter);
-      hasher_push_cv(self, &cv_pair[BLAKE3_OUT_LEN],
-                     self->chunk.chunk_counter + (subtree_chunks / 2));
-    }
-    self->chunk.chunk_counter += subtree_chunks;
-    input_bytes += subtree_len;
-    input_len -= subtree_len;
-  }
-
-  // If there's any remaining input less than a full chunk, add it to the chunk
-  // state. In that case, also do a final merge loop to make sure the subtree
-  // stack doesn't contain any unmerged pairs. The remaining input means we
-  // know these merges are non-root. This merge loop isn't strictly necessary
-  // here, because hasher_push_chunk_cv already does its own merge loop, but it
-  // simplifies blake3_hasher_finalize below.
-  if (input_len > 0) {
-    chunk_state_update(&self->chunk, input_bytes, input_len);
-    hasher_merge_cv_stack(self, self->chunk.chunk_counter);
-  }
-}
-
-void blake3_hasher_finalize(const blake3_hasher *self, uint8_t *out,
-                            size_t out_len) {
-  blake3_hasher_finalize_seek(self, 0, out, out_len);
-}
-
-void blake3_hasher_finalize_seek(const blake3_hasher *self, uint64_t seek,
-                                 uint8_t *out, size_t out_len) {
-  // Explicitly checking for zero avoids causing UB by passing a null pointer
-  // to memcpy. This comes up in practice with things like:
-  //   std::vector<uint8_t> v;
-  //   blake3_hasher_finalize(&hasher, v.data(), v.size());
-  if (out_len == 0) {
-    return;
-  }
-
-  // If the subtree stack is empty, then the current chunk is the root.
-  if (self->cv_stack_len == 0) {
-    output_t output = chunk_state_output(&self->chunk);
-    output_root_bytes(&output, seek, out, out_len);
-    return;
-  }
-  // If there are any bytes in the chunk state, finalize that chunk and do a
-  // roll-up merge between that chunk hash and every subtree in the stack. In
-  // this case, the extra merge loop at the end of blake3_hasher_update
-  // guarantees that none of the subtrees in the stack need to be merged with
-  // each other first. Otherwise, if there are no bytes in the chunk state,
-  // then the top of the stack is a chunk hash, and we start the merge from
-  // that.
-  output_t output;
-  size_t cvs_remaining;
-  if (chunk_state_len(&self->chunk) > 0) {
-    cvs_remaining = self->cv_stack_len;
-    output = chunk_state_output(&self->chunk);
-  } else {
-    // There are always at least 2 CVs in the stack in this case.
-    cvs_remaining = self->cv_stack_len - 2;
-    output = parent_output(&self->cv_stack[cvs_remaining * 32], self->key,
-                           self->chunk.flags);
-  }
-  while (cvs_remaining > 0) {
-    cvs_remaining -= 1;
-    uint8_t parent_block[BLAKE3_BLOCK_LEN];
-    memcpy(parent_block, &self->cv_stack[cvs_remaining * 32], 32);
-    output_chaining_value(&output, &parent_block[32]);
-    output = parent_output(parent_block, self->key, self->chunk.flags);
-  }
-  output_root_bytes(&output, seek, out, out_len);
-}
-
-void blake3_hasher_reset(blake3_hasher *self) {
-  chunk_state_reset(&self->chunk, self->key, 0);
-  self->cv_stack_len = 0;
-}
diff --git a/windows/src/blake3/blake3.h b/windows/src/blake3/blake3.h
deleted file mode 100644
index 7caf9b4..0000000
--- a/windows/src/blake3/blake3.h
+++ /dev/null
@@ -1,60 +0,0 @@
-#ifndef BLAKE3_H
-#define BLAKE3_H
-
-#include <stddef.h>
-#include <stdint.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define BLAKE3_VERSION_STRING "1.3.1"
-#define BLAKE3_KEY_LEN 32
-#define BLAKE3_OUT_LEN 32
-#define BLAKE3_BLOCK_LEN 64
-#define BLAKE3_CHUNK_LEN 1024
-#define BLAKE3_MAX_DEPTH 54
-
-// This struct is a private implementation detail. It has to be here because
-// it's part of blake3_hasher below.
-typedef struct {
-  uint32_t cv[8];
-  uint64_t chunk_counter;
-  uint8_t buf[BLAKE3_BLOCK_LEN];
-  uint8_t buf_len;
-  uint8_t blocks_compressed;
-  uint8_t flags;
-} blake3_chunk_state;
-
-typedef struct {
-  uint32_t key[8];
-  blake3_chunk_state chunk;
-  uint8_t cv_stack_len;
-  // The stack size is MAX_DEPTH + 1 because we do lazy merging. For example,
-  // with 7 chunks, we have 3 entries in the stack. Adding an 8th chunk
-  // requires a 4th entry, rather than merging everything down to 1, because we
-  // don't know whether more input is coming. This is different from how the
-  // reference implementation does things.
-  uint8_t cv_stack[(BLAKE3_MAX_DEPTH + 1) * BLAKE3_OUT_LEN];
-} blake3_hasher;
-
-const char *blake3_version(void);
-void blake3_hasher_init(blake3_hasher *self);
-void blake3_hasher_init_keyed(blake3_hasher *self,
-                              const uint8_t key[BLAKE3_KEY_LEN]);
-void blake3_hasher_init_derive_key(blake3_hasher *self, const char *context);
-void blake3_hasher_init_derive_key_raw(blake3_hasher *self, const void *context,
-                                       size_t context_len);
-void blake3_hasher_update(blake3_hasher *self, const void *input,
-                          size_t input_len);
-void blake3_hasher_finalize(const blake3_hasher *self, uint8_t *out,
-                            size_t out_len);
-void blake3_hasher_finalize_seek(const blake3_hasher *self, uint64_t seek,
-                                 uint8_t *out, size_t out_len);
-void blake3_hasher_reset(blake3_hasher *self);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* BLAKE3_H */
diff --git a/windows/src/blake3/blake3_avx2.c b/windows/src/blake3/blake3_avx2.c
deleted file mode 100644
index e76aa1a..0000000
--- a/windows/src/blake3/blake3_avx2.c
+++ /dev/null
@@ -1,326 +0,0 @@
-#include "blake3_impl.h"
-
-#include <immintrin.h>
-
-#define DEGREE 8
-
-INLINE __m256i loadu(const uint8_t src[32]) {
-  return _mm256_loadu_si256((const __m256i *)src);
-}
-
-INLINE void storeu(__m256i src, uint8_t dest[16]) {
-  _mm256_storeu_si256((__m256i *)dest, src);
-}
-
-INLINE __m256i addv(__m256i a, __m256i b) { return _mm256_add_epi32(a, b); }
-
-// Note that clang-format doesn't like the name "xor" for some reason.
-INLINE __m256i xorv(__m256i a, __m256i b) { return _mm256_xor_si256(a, b); }
-
-INLINE __m256i set1(uint32_t x) { return _mm256_set1_epi32((int32_t)x); }
-
-INLINE __m256i rot16(__m256i x) {
-  return _mm256_shuffle_epi8(
-      x, _mm256_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2,
-                         13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2));
-}
-
-INLINE __m256i rot12(__m256i x) {
-  return _mm256_or_si256(_mm256_srli_epi32(x, 12), _mm256_slli_epi32(x, 32 - 12));
-}
-
-INLINE __m256i rot8(__m256i x) {
-  return _mm256_shuffle_epi8(
-      x, _mm256_set_epi8(12, 15, 14, 13, 8, 11, 10, 9, 4, 7, 6, 5, 0, 3, 2, 1,
-                         12, 15, 14, 13, 8, 11, 10, 9, 4, 7, 6, 5, 0, 3, 2, 1));
-}
-
-INLINE __m256i rot7(__m256i x) {
-  return _mm256_or_si256(_mm256_srli_epi32(x, 7), _mm256_slli_epi32(x, 32 - 7));
-}
-
-INLINE void round_fn(__m256i v[16], __m256i m[16], size_t r) {
-  v[0] = addv(v[0], m[(size_t)MSG_SCHEDULE[r][0]]);
-  v[1] = addv(v[1], m[(size_t)MSG_SCHEDULE[r][2]]);
-  v[2] = addv(v[2], m[(size_t)MSG_SCHEDULE[r][4]]);
-  v[3] = addv(v[3], m[(size_t)MSG_SCHEDULE[r][6]]);
-  v[0] = addv(v[0], v[4]);
-  v[1] = addv(v[1], v[5]);
-  v[2] = addv(v[2], v[6]);
-  v[3] = addv(v[3], v[7]);
-  v[12] = xorv(v[12], v[0]);
-  v[13] = xorv(v[13], v[1]);
-  v[14] = xorv(v[14], v[2]);
-  v[15] = xorv(v[15], v[3]);
-  v[12] = rot16(v[12]);
-  v[13] = rot16(v[13]);
-  v[14] = rot16(v[14]);
-  v[15] = rot16(v[15]);
-  v[8] = addv(v[8], v[12]);
-  v[9] = addv(v[9], v[13]);
-  v[10] = addv(v[10], v[14]);
-  v[11] = addv(v[11], v[15]);
-  v[4] = xorv(v[4], v[8]);
-  v[5] = xorv(v[5], v[9]);
-  v[6] = xorv(v[6], v[10]);
-  v[7] = xorv(v[7], v[11]);
-  v[4] = rot12(v[4]);
-  v[5] = rot12(v[5]);
-  v[6] = rot12(v[6]);
-  v[7] = rot12(v[7]);
-  v[0] = addv(v[0], m[(size_t)MSG_SCHEDULE[r][1]]);
-  v[1] = addv(v[1], m[(size_t)MSG_SCHEDULE[r][3]]);
-  v[2] = addv(v[2], m[(size_t)MSG_SCHEDULE[r][5]]);
-  v[3] = addv(v[3], m[(size_t)MSG_SCHEDULE[r][7]]);
-  v[0] = addv(v[0], v[4]);
-  v[1] = addv(v[1], v[5]);
-  v[2] = addv(v[2], v[6]);
-  v[3] = addv(v[3], v[7]);
-  v[12] = xorv(v[12], v[0]);
-  v[13] = xorv(v[13], v[1]);
-  v[14] = xorv(v[14], v[2]);
-  v[15] = xorv(v[15], v[3]);
-  v[12] = rot8(v[12]);
-  v[13] = rot8(v[13]);
-  v[14] = rot8(v[14]);
-  v[15] = rot8(v[15]);
-  v[8] = addv(v[8], v[12]);
-  v[9] = addv(v[9], v[13]);
-  v[10] = addv(v[10], v[14]);
-  v[11] = addv(v[11], v[15]);
-  v[4] = xorv(v[4], v[8]);
-  v[5] = xorv(v[5], v[9]);
-  v[6] = xorv(v[6], v[10]);
-  v[7] = xorv(v[7], v[11]);
-  v[4] = rot7(v[4]);
-  v[5] = rot7(v[5]);
-  v[6] = rot7(v[6]);
-  v[7] = rot7(v[7]);
-
-  v[0] = addv(v[0], m[(size_t)MSG_SCHEDULE[r][8]]);
-  v[1] = addv(v[1], m[(size_t)MSG_SCHEDULE[r][10]]);
-  v[2] = addv(v[2], m[(size_t)MSG_SCHEDULE[r][12]]);
-  v[3] = addv(v[3], m[(size_t)MSG_SCHEDULE[r][14]]);
-  v[0] = addv(v[0], v[5]);
-  v[1] = addv(v[1], v[6]);
-  v[2] = addv(v[2], v[7]);
-  v[3] = addv(v[3], v[4]);
-  v[15] = xorv(v[15], v[0]);
-  v[12] = xorv(v[12], v[1]);
-  v[13] = xorv(v[13], v[2]);
-  v[14] = xorv(v[14], v[3]);
-  v[15] = rot16(v[15]);
-  v[12] = rot16(v[12]);
-  v[13] = rot16(v[13]);
-  v[14] = rot16(v[14]);
-  v[10] = addv(v[10], v[15]);
-  v[11] = addv(v[11], v[12]);
-  v[8] = addv(v[8], v[13]);
-  v[9] = addv(v[9], v[14]);
-  v[5] = xorv(v[5], v[10]);
-  v[6] = xorv(v[6], v[11]);
-  v[7] = xorv(v[7], v[8]);
-  v[4] = xorv(v[4], v[9]);
-  v[5] = rot12(v[5]);
-  v[6] = rot12(v[6]);
-  v[7] = rot12(v[7]);
-  v[4] = rot12(v[4]);
-  v[0] = addv(v[0], m[(size_t)MSG_SCHEDULE[r][9]]);
-  v[1] = addv(v[1], m[(size_t)MSG_SCHEDULE[r][11]]);
-  v[2] = addv(v[2], m[(size_t)MSG_SCHEDULE[r][13]]);
-  v[3] = addv(v[3], m[(size_t)MSG_SCHEDULE[r][15]]);
-  v[0] = addv(v[0], v[5]);
-  v[1] = addv(v[1], v[6]);
-  v[2] = addv(v[2], v[7]);
-  v[3] = addv(v[3], v[4]);
-  v[15] = xorv(v[15], v[0]);
-  v[12] = xorv(v[12], v[1]);
-  v[13] = xorv(v[13], v[2]);
-  v[14] = xorv(v[14], v[3]);
-  v[15] = rot8(v[15]);
-  v[12] = rot8(v[12]);
-  v[13] = rot8(v[13]);
-  v[14] = rot8(v[14]);
-  v[10] = addv(v[10], v[15]);
-  v[11] = addv(v[11], v[12]);
-  v[8] = addv(v[8], v[13]);
-  v[9] = addv(v[9], v[14]);
-  v[5] = xorv(v[5], v[10]);
-  v[6] = xorv(v[6], v[11]);
-  v[7] = xorv(v[7], v[8]);
-  v[4] = xorv(v[4], v[9]);
-  v[5] = rot7(v[5]);
-  v[6] = rot7(v[6]);
-  v[7] = rot7(v[7]);
-  v[4] = rot7(v[4]);
-}
-
-INLINE void transpose_vecs(__m256i vecs[DEGREE]) {
-  // Interleave 32-bit lanes. The low unpack is lanes 00/11/44/55, and the high
-  // is 22/33/66/77.
-  __m256i ab_0145 = _mm256_unpacklo_epi32(vecs[0], vecs[1]);
-  __m256i ab_2367 = _mm256_unpackhi_epi32(vecs[0], vecs[1]);
-  __m256i cd_0145 = _mm256_unpacklo_epi32(vecs[2], vecs[3]);
-  __m256i cd_2367 = _mm256_unpackhi_epi32(vecs[2], vecs[3]);
-  __m256i ef_0145 = _mm256_unpacklo_epi32(vecs[4], vecs[5]);
-  __m256i ef_2367 = _mm256_unpackhi_epi32(vecs[4], vecs[5]);
-  __m256i gh_0145 = _mm256_unpacklo_epi32(vecs[6], vecs[7]);
-  __m256i gh_2367 = _mm256_unpackhi_epi32(vecs[6], vecs[7]);
-
-  // Interleave 64-bit lates. The low unpack is lanes 00/22 and the high is
-  // 11/33.
-  __m256i abcd_04 = _mm256_unpacklo_epi64(ab_0145, cd_0145);
-  __m256i abcd_15 = _mm256_unpackhi_epi64(ab_0145, cd_0145);
-  __m256i abcd_26 = _mm256_unpacklo_epi64(ab_2367, cd_2367);
-  __m256i abcd_37 = _mm256_unpackhi_epi64(ab_2367, cd_2367);
-  __m256i efgh_04 = _mm256_unpacklo_epi64(ef_0145, gh_0145);
-  __m256i efgh_15 = _mm256_unpackhi_epi64(ef_0145, gh_0145);
-  __m256i efgh_26 = _mm256_unpacklo_epi64(ef_2367, gh_2367);
-  __m256i efgh_37 = _mm256_unpackhi_epi64(ef_2367, gh_2367);
-
-  // Interleave 128-bit lanes.
-  vecs[0] = _mm256_permute2x128_si256(abcd_04, efgh_04, 0x20);
-  vecs[1] = _mm256_permute2x128_si256(abcd_15, efgh_15, 0x20);
-  vecs[2] = _mm256_permute2x128_si256(abcd_26, efgh_26, 0x20);
-  vecs[3] = _mm256_permute2x128_si256(abcd_37, efgh_37, 0x20);
-  vecs[4] = _mm256_permute2x128_si256(abcd_04, efgh_04, 0x31);
-  vecs[5] = _mm256_permute2x128_si256(abcd_15, efgh_15, 0x31);
-  vecs[6] = _mm256_permute2x128_si256(abcd_26, efgh_26, 0x31);
-  vecs[7] = _mm256_permute2x128_si256(abcd_37, efgh_37, 0x31);
-}
-
-INLINE void transpose_msg_vecs(const uint8_t *const *inputs,
-                               size_t block_offset, __m256i out[16]) {
-  out[0] = loadu(&inputs[0][block_offset + 0 * sizeof(__m256i)]);
-  out[1] = loadu(&inputs[1][block_offset + 0 * sizeof(__m256i)]);
-  out[2] = loadu(&inputs[2][block_offset + 0 * sizeof(__m256i)]);
-  out[3] = loadu(&inputs[3][block_offset + 0 * sizeof(__m256i)]);
-  out[4] = loadu(&inputs[4][block_offset + 0 * sizeof(__m256i)]);
-  out[5] = loadu(&inputs[5][block_offset + 0 * sizeof(__m256i)]);
-  out[6] = loadu(&inputs[6][block_offset + 0 * sizeof(__m256i)]);
-  out[7] = loadu(&inputs[7][block_offset + 0 * sizeof(__m256i)]);
-  out[8] = loadu(&inputs[0][block_offset + 1 * sizeof(__m256i)]);
-  out[9] = loadu(&inputs[1][block_offset + 1 * sizeof(__m256i)]);
-  out[10] = loadu(&inputs[2][block_offset + 1 * sizeof(__m256i)]);
-  out[11] = loadu(&inputs[3][block_offset + 1 * sizeof(__m256i)]);
-  out[12] = loadu(&inputs[4][block_offset + 1 * sizeof(__m256i)]);
-  out[13] = loadu(&inputs[5][block_offset + 1 * sizeof(__m256i)]);
-  out[14] = loadu(&inputs[6][block_offset + 1 * sizeof(__m256i)]);
-  out[15] = loadu(&inputs[7][block_offset + 1 * sizeof(__m256i)]);
-  for (size_t i = 0; i < 8; ++i) {
-    _mm_prefetch((const void *)&inputs[i][block_offset + 256], _MM_HINT_T0);
-  }
-  transpose_vecs(&out[0]);
-  transpose_vecs(&out[8]);
-}
-
-INLINE void load_counters(uint64_t counter, bool increment_counter,
-                          __m256i *out_lo, __m256i *out_hi) {
-  const __m256i mask = _mm256_set1_epi32(-(int32_t)increment_counter);
-  const __m256i add0 = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
-  const __m256i add1 = _mm256_and_si256(mask, add0);
-  __m256i l = _mm256_add_epi32(_mm256_set1_epi32((int32_t)counter), add1);
-  __m256i carry = _mm256_cmpgt_epi32(_mm256_xor_si256(add1, _mm256_set1_epi32(0x80000000)), 
-                                     _mm256_xor_si256(   l, _mm256_set1_epi32(0x80000000)));
-  __m256i h = _mm256_sub_epi32(_mm256_set1_epi32((int32_t)(counter >> 32)), carry);
-  *out_lo = l;
-  *out_hi = h;
-}
-
-static
-void blake3_hash8_avx2(const uint8_t *const *inputs, size_t blocks,
-                       const uint32_t key[8], uint64_t counter,
-                       bool increment_counter, uint8_t flags,
-                       uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
-  __m256i h_vecs[8] = {
-      set1(key[0]), set1(key[1]), set1(key[2]), set1(key[3]),
-      set1(key[4]), set1(key[5]), set1(key[6]), set1(key[7]),
-  };
-  __m256i counter_low_vec, counter_high_vec;
-  load_counters(counter, increment_counter, &counter_low_vec,
-                &counter_high_vec);
-  uint8_t block_flags = flags | flags_start;
-
-  for (size_t block = 0; block < blocks; block++) {
-    if (block + 1 == blocks) {
-      block_flags |= flags_end;
-    }
-    __m256i block_len_vec = set1(BLAKE3_BLOCK_LEN);
-    __m256i block_flags_vec = set1(block_flags);
-    __m256i msg_vecs[16];
-    transpose_msg_vecs(inputs, block * BLAKE3_BLOCK_LEN, msg_vecs);
-
-    __m256i v[16] = {
-        h_vecs[0],       h_vecs[1],        h_vecs[2],     h_vecs[3],
-        h_vecs[4],       h_vecs[5],        h_vecs[6],     h_vecs[7],
-        set1(IV[0]),     set1(IV[1]),      set1(IV[2]),   set1(IV[3]),
-        counter_low_vec, counter_high_vec, block_len_vec, block_flags_vec,
-    };
-    round_fn(v, msg_vecs, 0);
-    round_fn(v, msg_vecs, 1);
-    round_fn(v, msg_vecs, 2);
-    round_fn(v, msg_vecs, 3);
-    round_fn(v, msg_vecs, 4);
-    round_fn(v, msg_vecs, 5);
-    round_fn(v, msg_vecs, 6);
-    h_vecs[0] = xorv(v[0], v[8]);
-    h_vecs[1] = xorv(v[1], v[9]);
-    h_vecs[2] = xorv(v[2], v[10]);
-    h_vecs[3] = xorv(v[3], v[11]);
-    h_vecs[4] = xorv(v[4], v[12]);
-    h_vecs[5] = xorv(v[5], v[13]);
-    h_vecs[6] = xorv(v[6], v[14]);
-    h_vecs[7] = xorv(v[7], v[15]);
-
-    block_flags = flags;
-  }
-
-  transpose_vecs(h_vecs);
-  storeu(h_vecs[0], &out[0 * sizeof(__m256i)]);
-  storeu(h_vecs[1], &out[1 * sizeof(__m256i)]);
-  storeu(h_vecs[2], &out[2 * sizeof(__m256i)]);
-  storeu(h_vecs[3], &out[3 * sizeof(__m256i)]);
-  storeu(h_vecs[4], &out[4 * sizeof(__m256i)]);
-  storeu(h_vecs[5], &out[5 * sizeof(__m256i)]);
-  storeu(h_vecs[6], &out[6 * sizeof(__m256i)]);
-  storeu(h_vecs[7], &out[7 * sizeof(__m256i)]);
-}
-
-#if !defined(BLAKE3_NO_SSE41)
-void blake3_hash_many_sse41(const uint8_t *const *inputs, size_t num_inputs,
-                            size_t blocks, const uint32_t key[8],
-                            uint64_t counter, bool increment_counter,
-                            uint8_t flags, uint8_t flags_start,
-                            uint8_t flags_end, uint8_t *out);
-#else
-void blake3_hash_many_portable(const uint8_t *const *inputs, size_t num_inputs,
-                               size_t blocks, const uint32_t key[8],
-                               uint64_t counter, bool increment_counter,
-                               uint8_t flags, uint8_t flags_start,
-                               uint8_t flags_end, uint8_t *out);
-#endif
-
-void blake3_hash_many_avx2(const uint8_t *const *inputs, size_t num_inputs,
-                           size_t blocks, const uint32_t key[8],
-                           uint64_t counter, bool increment_counter,
-                           uint8_t flags, uint8_t flags_start,
-                           uint8_t flags_end, uint8_t *out) {
-  while (num_inputs >= DEGREE) {
-    blake3_hash8_avx2(inputs, blocks, key, counter, increment_counter, flags,
-                      flags_start, flags_end, out);
-    if (increment_counter) {
-      counter += DEGREE;
-    }
-    inputs += DEGREE;
-    num_inputs -= DEGREE;
-    out = &out[DEGREE * BLAKE3_OUT_LEN];
-  }
-#if !defined(BLAKE3_NO_SSE41)
-  blake3_hash_many_sse41(inputs, num_inputs, blocks, key, counter,
-                         increment_counter, flags, flags_start, flags_end, out);
-#else
-  blake3_hash_many_portable(inputs, num_inputs, blocks, key, counter,
-                            increment_counter, flags, flags_start, flags_end,
-                            out);
-#endif
-}
diff --git a/windows/src/blake3/blake3_avx512.c b/windows/src/blake3/blake3_avx512.c
deleted file mode 100644
index 9c35b08..0000000
--- a/windows/src/blake3/blake3_avx512.c
+++ /dev/null
@@ -1,1207 +0,0 @@
-#include "blake3_impl.h"
-
-#include <immintrin.h>
-
-#define _mm_shuffle_ps2(a, b, c)                                               \
-  (_mm_castps_si128(                                                           \
-      _mm_shuffle_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b), (c))))
-
-INLINE __m128i loadu_128(const uint8_t src[16]) {
-  return _mm_loadu_si128((const __m128i *)src);
-}
-
-INLINE __m256i loadu_256(const uint8_t src[32]) {
-  return _mm256_loadu_si256((const __m256i *)src);
-}
-
-INLINE __m512i loadu_512(const uint8_t src[64]) {
-  return _mm512_loadu_si512((const __m512i *)src);
-}
-
-INLINE void storeu_128(__m128i src, uint8_t dest[16]) {
-  _mm_storeu_si128((__m128i *)dest, src);
-}
-
-INLINE void storeu_256(__m256i src, uint8_t dest[16]) {
-  _mm256_storeu_si256((__m256i *)dest, src);
-}
-
-INLINE __m128i add_128(__m128i a, __m128i b) { return _mm_add_epi32(a, b); }
-
-INLINE __m256i add_256(__m256i a, __m256i b) { return _mm256_add_epi32(a, b); }
-
-INLINE __m512i add_512(__m512i a, __m512i b) { return _mm512_add_epi32(a, b); }
-
-INLINE __m128i xor_128(__m128i a, __m128i b) { return _mm_xor_si128(a, b); }
-
-INLINE __m256i xor_256(__m256i a, __m256i b) { return _mm256_xor_si256(a, b); }
-
-INLINE __m512i xor_512(__m512i a, __m512i b) { return _mm512_xor_si512(a, b); }
-
-INLINE __m128i set1_128(uint32_t x) { return _mm_set1_epi32((int32_t)x); }
-
-INLINE __m256i set1_256(uint32_t x) { return _mm256_set1_epi32((int32_t)x); }
-
-INLINE __m512i set1_512(uint32_t x) { return _mm512_set1_epi32((int32_t)x); }
-
-INLINE __m128i set4(uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
-  return _mm_setr_epi32((int32_t)a, (int32_t)b, (int32_t)c, (int32_t)d);
-}
-
-INLINE __m128i rot16_128(__m128i x) { return _mm_ror_epi32(x, 16); }
-
-INLINE __m256i rot16_256(__m256i x) { return _mm256_ror_epi32(x, 16); }
-
-INLINE __m512i rot16_512(__m512i x) { return _mm512_ror_epi32(x, 16); }
-
-INLINE __m128i rot12_128(__m128i x) { return _mm_ror_epi32(x, 12); }
-
-INLINE __m256i rot12_256(__m256i x) { return _mm256_ror_epi32(x, 12); }
-
-INLINE __m512i rot12_512(__m512i x) { return _mm512_ror_epi32(x, 12); }
-
-INLINE __m128i rot8_128(__m128i x) { return _mm_ror_epi32(x, 8); }
-
-INLINE __m256i rot8_256(__m256i x) { return _mm256_ror_epi32(x, 8); }
-
-INLINE __m512i rot8_512(__m512i x) { return _mm512_ror_epi32(x, 8); }
-
-INLINE __m128i rot7_128(__m128i x) { return _mm_ror_epi32(x, 7); }
-
-INLINE __m256i rot7_256(__m256i x) { return _mm256_ror_epi32(x, 7); }
-
-INLINE __m512i rot7_512(__m512i x) { return _mm512_ror_epi32(x, 7); }
-
-/*
- * ----------------------------------------------------------------------------
- * compress_avx512
- * ----------------------------------------------------------------------------
- */
-
-INLINE void g1(__m128i *row0, __m128i *row1, __m128i *row2, __m128i *row3,
-               __m128i m) {
-  *row0 = add_128(add_128(*row0, m), *row1);
-  *row3 = xor_128(*row3, *row0);
-  *row3 = rot16_128(*row3);
-  *row2 = add_128(*row2, *row3);
-  *row1 = xor_128(*row1, *row2);
-  *row1 = rot12_128(*row1);
-}
-
-INLINE void g2(__m128i *row0, __m128i *row1, __m128i *row2, __m128i *row3,
-               __m128i m) {
-  *row0 = add_128(add_128(*row0, m), *row1);
-  *row3 = xor_128(*row3, *row0);
-  *row3 = rot8_128(*row3);
-  *row2 = add_128(*row2, *row3);
-  *row1 = xor_128(*row1, *row2);
-  *row1 = rot7_128(*row1);
-}
-
-// Note the optimization here of leaving row1 as the unrotated row, rather than
-// row0. All the message loads below are adjusted to compensate for this. See
-// discussion at https://github.com/sneves/blake2-avx2/pull/4
-INLINE void diagonalize(__m128i *row0, __m128i *row2, __m128i *row3) {
-  *row0 = _mm_shuffle_epi32(*row0, _MM_SHUFFLE(2, 1, 0, 3));
-  *row3 = _mm_shuffle_epi32(*row3, _MM_SHUFFLE(1, 0, 3, 2));
-  *row2 = _mm_shuffle_epi32(*row2, _MM_SHUFFLE(0, 3, 2, 1));
-}
-
-INLINE void undiagonalize(__m128i *row0, __m128i *row2, __m128i *row3) {
-  *row0 = _mm_shuffle_epi32(*row0, _MM_SHUFFLE(0, 3, 2, 1));
-  *row3 = _mm_shuffle_epi32(*row3, _MM_SHUFFLE(1, 0, 3, 2));
-  *row2 = _mm_shuffle_epi32(*row2, _MM_SHUFFLE(2, 1, 0, 3));
-}
-
-INLINE void compress_pre(__m128i rows[4], const uint32_t cv[8],
-                         const uint8_t block[BLAKE3_BLOCK_LEN],
-                         uint8_t block_len, uint64_t counter, uint8_t flags) {
-  rows[0] = loadu_128((uint8_t *)&cv[0]);
-  rows[1] = loadu_128((uint8_t *)&cv[4]);
-  rows[2] = set4(IV[0], IV[1], IV[2], IV[3]);
-  rows[3] = set4(counter_low(counter), counter_high(counter),
-                 (uint32_t)block_len, (uint32_t)flags);
-
-  __m128i m0 = loadu_128(&block[sizeof(__m128i) * 0]);
-  __m128i m1 = loadu_128(&block[sizeof(__m128i) * 1]);
-  __m128i m2 = loadu_128(&block[sizeof(__m128i) * 2]);
-  __m128i m3 = loadu_128(&block[sizeof(__m128i) * 3]);
-
-  __m128i t0, t1, t2, t3, tt;
-
-  // Round 1. The first round permutes the message words from the original
-  // input order, into the groups that get mixed in parallel.
-  t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(2, 0, 2, 0)); //  6  4  2  0
-  g1(&rows[0], &rows[1], &rows[2], &rows[3], t0);
-  t1 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 3, 1)); //  7  5  3  1
-  g2(&rows[0], &rows[1], &rows[2], &rows[3], t1);
-  diagonalize(&rows[0], &rows[2], &rows[3]);
-  t2 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(2, 0, 2, 0)); // 14 12 10  8
-  t2 = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2, 1, 0, 3));   // 12 10  8 14
-  g1(&rows[0], &rows[1], &rows[2], &rows[3], t2);
-  t3 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 1, 3, 1)); // 15 13 11  9
-  t3 = _mm_shuffle_epi32(t3, _MM_SHUFFLE(2, 1, 0, 3));   // 13 11  9 15
-  g2(&rows[0], &rows[1], &rows[2], &rows[3], t3);
-  undiagonalize(&rows[0], &rows[2], &rows[3]);
-  m0 = t0;
-  m1 = t1;
-  m2 = t2;
-  m3 = t3;
-
-  // Round 2. This round and all following rounds apply a fixed permutation
-  // to the message words from the round before.
-  t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2));
-  t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(0, 3, 2, 1));
-  g1(&rows[0], &rows[1], &rows[2], &rows[3], t0);
-  t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2));
-  tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(0, 0, 3, 3));
-  t1 = _mm_blend_epi16(tt, t1, 0xCC);
-  g2(&rows[0], &rows[1], &rows[2], &rows[3], t1);
-  diagonalize(&rows[0], &rows[2], &rows[3]);
-  t2 = _mm_unpacklo_epi64(m3, m1);
-  tt = _mm_blend_epi16(t2, m2, 0xC0);
-  t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(1, 3, 2, 0));
-  g1(&rows[0], &rows[1], &rows[2], &rows[3], t2);
-  t3 = _mm_unpackhi_epi32(m1, m3);
-  tt = _mm_unpacklo_epi32(m2, t3);
-  t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(0, 1, 3, 2));
-  g2(&rows[0], &rows[1], &rows[2], &rows[3], t3);
-  undiagonalize(&rows[0], &rows[2], &rows[3]);
-  m0 = t0;
-  m1 = t1;
-  m2 = t2;
-  m3 = t3;
-
-  // Round 3
-  t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2));
-  t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(0, 3, 2, 1));
-  g1(&rows[0], &rows[1], &rows[2], &rows[3], t0);
-  t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2));
-  tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(0, 0, 3, 3));
-  t1 = _mm_blend_epi16(tt, t1, 0xCC);
-  g2(&rows[0], &rows[1], &rows[2], &rows[3], t1);
-  diagonalize(&rows[0], &rows[2], &rows[3]);
-  t2 = _mm_unpacklo_epi64(m3, m1);
-  tt = _mm_blend_epi16(t2, m2, 0xC0);
-  t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(1, 3, 2, 0));
-  g1(&rows[0], &rows[1], &rows[2], &rows[3], t2);
-  t3 = _mm_unpackhi_epi32(m1, m3);
-  tt = _mm_unpacklo_epi32(m2, t3);
-  t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(0, 1, 3, 2));
-  g2(&rows[0], &rows[1], &rows[2], &rows[3], t3);
-  undiagonalize(&rows[0], &rows[2], &rows[3]);
-  m0 = t0;
-  m1 = t1;
-  m2 = t2;
-  m3 = t3;
-
-  // Round 4
-  t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2));
-  t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(0, 3, 2, 1));
-  g1(&rows[0], &rows[1], &rows[2], &rows[3], t0);
-  t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2));
-  tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(0, 0, 3, 3));
-  t1 = _mm_blend_epi16(tt, t1, 0xCC);
-  g2(&rows[0], &rows[1], &rows[2], &rows[3], t1);
-  diagonalize(&rows[0], &rows[2], &rows[3]);
-  t2 = _mm_unpacklo_epi64(m3, m1);
-  tt = _mm_blend_epi16(t2, m2, 0xC0);
-  t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(1, 3, 2, 0));
-  g1(&rows[0], &rows[1], &rows[2], &rows[3], t2);
-  t3 = _mm_unpackhi_epi32(m1, m3);
-  tt = _mm_unpacklo_epi32(m2, t3);
-  t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(0, 1, 3, 2));
-  g2(&rows[0], &rows[1], &rows[2], &rows[3], t3);
-  undiagonalize(&rows[0], &rows[2], &rows[3]);
-  m0 = t0;
-  m1 = t1;
-  m2 = t2;
-  m3 = t3;
-
-  // Round 5
-  t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2));
-  t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(0, 3, 2, 1));
-  g1(&rows[0], &rows[1], &rows[2], &rows[3], t0);
-  t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2));
-  tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(0, 0, 3, 3));
-  t1 = _mm_blend_epi16(tt, t1, 0xCC);
-  g2(&rows[0], &rows[1], &rows[2], &rows[3], t1);
-  diagonalize(&rows[0], &rows[2], &rows[3]);
-  t2 = _mm_unpacklo_epi64(m3, m1);
-  tt = _mm_blend_epi16(t2, m2, 0xC0);
-  t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(1, 3, 2, 0));
-  g1(&rows[0], &rows[1], &rows[2], &rows[3], t2);
-  t3 = _mm_unpackhi_epi32(m1, m3);
-  tt = _mm_unpacklo_epi32(m2, t3);
-  t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(0, 1, 3, 2));
-  g2(&rows[0], &rows[1], &rows[2], &rows[3], t3);
-  undiagonalize(&rows[0], &rows[2], &rows[3]);
-  m0 = t0;
-  m1 = t1;
-  m2 = t2;
-  m3 = t3;
-
-  // Round 6
-  t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2));
-  t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(0, 3, 2, 1));
-  g1(&rows[0], &rows[1], &rows[2], &rows[3], t0);
-  t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2));
-  tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(0, 0, 3, 3));
-  t1 = _mm_blend_epi16(tt, t1, 0xCC);
-  g2(&rows[0], &rows[1], &rows[2], &rows[3], t1);
-  diagonalize(&rows[0], &rows[2], &rows[3]);
-  t2 = _mm_unpacklo_epi64(m3, m1);
-  tt = _mm_blend_epi16(t2, m2, 0xC0);
-  t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(1, 3, 2, 0));
-  g1(&rows[0], &rows[1], &rows[2], &rows[3], t2);
-  t3 = _mm_unpackhi_epi32(m1, m3);
-  tt = _mm_unpacklo_epi32(m2, t3);
-  t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(0, 1, 3, 2));
-  g2(&rows[0], &rows[1], &rows[2], &rows[3], t3);
-  undiagonalize(&rows[0], &rows[2], &rows[3]);
-  m0 = t0;
-  m1 = t1;
-  m2 = t2;
-  m3 = t3;
-
-  // Round 7
-  t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2));
-  t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(0, 3, 2, 1));
-  g1(&rows[0], &rows[1], &rows[2], &rows[3], t0);
-  t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2));
-  tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(0, 0, 3, 3));
-  t1 = _mm_blend_epi16(tt, t1, 0xCC);
-  g2(&rows[0], &rows[1], &rows[2], &rows[3], t1);
-  diagonalize(&rows[0], &rows[2], &rows[3]);
-  t2 = _mm_unpacklo_epi64(m3, m1);
-  tt = _mm_blend_epi16(t2, m2, 0xC0);
-  t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(1, 3, 2, 0));
-  g1(&rows[0], &rows[1], &rows[2], &rows[3], t2);
-  t3 = _mm_unpackhi_epi32(m1, m3);
-  tt = _mm_unpacklo_epi32(m2, t3);
-  t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(0, 1, 3, 2));
-  g2(&rows[0], &rows[1], &rows[2], &rows[3], t3);
-  undiagonalize(&rows[0], &rows[2], &rows[3]);
-}
-
-void blake3_compress_xof_avx512(const uint32_t cv[8],
-                                const uint8_t block[BLAKE3_BLOCK_LEN],
-                                uint8_t block_len, uint64_t counter,
-                                uint8_t flags, uint8_t out[64]) {
-  __m128i rows[4];
-  compress_pre(rows, cv, block, block_len, counter, flags);
-  storeu_128(xor_128(rows[0], rows[2]), &out[0]);
-  storeu_128(xor_128(rows[1], rows[3]), &out[16]);
-  storeu_128(xor_128(rows[2], loadu_128((uint8_t *)&cv[0])), &out[32]);
-  storeu_128(xor_128(rows[3], loadu_128((uint8_t *)&cv[4])), &out[48]);
-}
-
-void blake3_compress_in_place_avx512(uint32_t cv[8],
-                                     const uint8_t block[BLAKE3_BLOCK_LEN],
-                                     uint8_t block_len, uint64_t counter,
-                                     uint8_t flags) {
-  __m128i rows[4];
-  compress_pre(rows, cv, block, block_len, counter, flags);
-  storeu_128(xor_128(rows[0], rows[2]), (uint8_t *)&cv[0]);
-  storeu_128(xor_128(rows[1], rows[3]), (uint8_t *)&cv[4]);
-}
-
-/*
- * ----------------------------------------------------------------------------
- * hash4_avx512
- * ----------------------------------------------------------------------------
- */
-
-INLINE void round_fn4(__m128i v[16], __m128i m[16], size_t r) {
-  v[0] = add_128(v[0], m[(size_t)MSG_SCHEDULE[r][0]]);
-  v[1] = add_128(v[1], m[(size_t)MSG_SCHEDULE[r][2]]);
-  v[2] = add_128(v[2], m[(size_t)MSG_SCHEDULE[r][4]]);
-  v[3] = add_128(v[3], m[(size_t)MSG_SCHEDULE[r][6]]);
-  v[0] = add_128(v[0], v[4]);
-  v[1] = add_128(v[1], v[5]);
-  v[2] = add_128(v[2], v[6]);
-  v[3] = add_128(v[3], v[7]);
-  v[12] = xor_128(v[12], v[0]);
-  v[13] = xor_128(v[13], v[1]);
-  v[14] = xor_128(v[14], v[2]);
-  v[15] = xor_128(v[15], v[3]);
-  v[12] = rot16_128(v[12]);
-  v[13] = rot16_128(v[13]);
-  v[14] = rot16_128(v[14]);
-  v[15] = rot16_128(v[15]);
-  v[8] = add_128(v[8], v[12]);
-  v[9] = add_128(v[9], v[13]);
-  v[10] = add_128(v[10], v[14]);
-  v[11] = add_128(v[11], v[15]);
-  v[4] = xor_128(v[4], v[8]);
-  v[5] = xor_128(v[5], v[9]);
-  v[6] = xor_128(v[6], v[10]);
-  v[7] = xor_128(v[7], v[11]);
-  v[4] = rot12_128(v[4]);
-  v[5] = rot12_128(v[5]);
-  v[6] = rot12_128(v[6]);
-  v[7] = rot12_128(v[7]);
-  v[0] = add_128(v[0], m[(size_t)MSG_SCHEDULE[r][1]]);
-  v[1] = add_128(v[1], m[(size_t)MSG_SCHEDULE[r][3]]);
-  v[2] = add_128(v[2], m[(size_t)MSG_SCHEDULE[r][5]]);
-  v[3] = add_128(v[3], m[(size_t)MSG_SCHEDULE[r][7]]);
-  v[0] = add_128(v[0], v[4]);
-  v[1] = add_128(v[1], v[5]);
-  v[2] = add_128(v[2], v[6]);
-  v[3] = add_128(v[3], v[7]);
-  v[12] = xor_128(v[12], v[0]);
-  v[13] = xor_128(v[13], v[1]);
-  v[14] = xor_128(v[14], v[2]);
-  v[15] = xor_128(v[15], v[3]);
-  v[12] = rot8_128(v[12]);
-  v[13] = rot8_128(v[13]);
-  v[14] = rot8_128(v[14]);
-  v[15] = rot8_128(v[15]);
-  v[8] = add_128(v[8], v[12]);
-  v[9] = add_128(v[9], v[13]);
-  v[10] = add_128(v[10], v[14]);
-  v[11] = add_128(v[11], v[15]);
-  v[4] = xor_128(v[4], v[8]);
-  v[5] = xor_128(v[5], v[9]);
-  v[6] = xor_128(v[6], v[10]);
-  v[7] = xor_128(v[7], v[11]);
-  v[4] = rot7_128(v[4]);
-  v[5] = rot7_128(v[5]);
-  v[6] = rot7_128(v[6]);
-  v[7] = rot7_128(v[7]);
-
-  v[0] = add_128(v[0], m[(size_t)MSG_SCHEDULE[r][8]]);
-  v[1] = add_128(v[1], m[(size_t)MSG_SCHEDULE[r][10]]);
-  v[2] = add_128(v[2], m[(size_t)MSG_SCHEDULE[r][12]]);
-  v[3] = add_128(v[3], m[(size_t)MSG_SCHEDULE[r][14]]);
-  v[0] = add_128(v[0], v[5]);
-  v[1] = add_128(v[1], v[6]);
-  v[2] = add_128(v[2], v[7]);
-  v[3] = add_128(v[3], v[4]);
-  v[15] = xor_128(v[15], v[0]);
-  v[12] = xor_128(v[12], v[1]);
-  v[13] = xor_128(v[13], v[2]);
-  v[14] = xor_128(v[14], v[3]);
-  v[15] = rot16_128(v[15]);
-  v[12] = rot16_128(v[12]);
-  v[13] = rot16_128(v[13]);
-  v[14] = rot16_128(v[14]);
-  v[10] = add_128(v[10], v[15]);
-  v[11] = add_128(v[11], v[12]);
-  v[8] = add_128(v[8], v[13]);
-  v[9] = add_128(v[9], v[14]);
-  v[5] = xor_128(v[5], v[10]);
-  v[6] = xor_128(v[6], v[11]);
-  v[7] = xor_128(v[7], v[8]);
-  v[4] = xor_128(v[4], v[9]);
-  v[5] = rot12_128(v[5]);
-  v[6] = rot12_128(v[6]);
-  v[7] = rot12_128(v[7]);
-  v[4] = rot12_128(v[4]);
-  v[0] = add_128(v[0], m[(size_t)MSG_SCHEDULE[r][9]]);
-  v[1] = add_128(v[1], m[(size_t)MSG_SCHEDULE[r][11]]);
-  v[2] = add_128(v[2], m[(size_t)MSG_SCHEDULE[r][13]]);
-  v[3] = add_128(v[3], m[(size_t)MSG_SCHEDULE[r][15]]);
-  v[0] = add_128(v[0], v[5]);
-  v[1] = add_128(v[1], v[6]);
-  v[2] = add_128(v[2], v[7]);
-  v[3] = add_128(v[3], v[4]);
-  v[15] = xor_128(v[15], v[0]);
-  v[12] = xor_128(v[12], v[1]);
-  v[13] = xor_128(v[13], v[2]);
-  v[14] = xor_128(v[14], v[3]);
-  v[15] = rot8_128(v[15]);
-  v[12] = rot8_128(v[12]);
-  v[13] = rot8_128(v[13]);
-  v[14] = rot8_128(v[14]);
-  v[10] = add_128(v[10], v[15]);
-  v[11] = add_128(v[11], v[12]);
-  v[8] = add_128(v[8], v[13]);
-  v[9] = add_128(v[9], v[14]);
-  v[5] = xor_128(v[5], v[10]);
-  v[6] = xor_128(v[6], v[11]);
-  v[7] = xor_128(v[7], v[8]);
-  v[4] = xor_128(v[4], v[9]);
-  v[5] = rot7_128(v[5]);
-  v[6] = rot7_128(v[6]);
-  v[7] = rot7_128(v[7]);
-  v[4] = rot7_128(v[4]);
-}
-
-INLINE void transpose_vecs_128(__m128i vecs[4]) {
-  // Interleave 32-bit lates. The low unpack is lanes 00/11 and the high is
-  // 22/33. Note that this doesn't split the vector into two lanes, as the
-  // AVX2 counterparts do.
-  __m128i ab_01 = _mm_unpacklo_epi32(vecs[0], vecs[1]);
-  __m128i ab_23 = _mm_unpackhi_epi32(vecs[0], vecs[1]);
-  __m128i cd_01 = _mm_unpacklo_epi32(vecs[2], vecs[3]);
-  __m128i cd_23 = _mm_unpackhi_epi32(vecs[2], vecs[3]);
-
-  // Interleave 64-bit lanes.
-  __m128i abcd_0 = _mm_unpacklo_epi64(ab_01, cd_01);
-  __m128i abcd_1 = _mm_unpackhi_epi64(ab_01, cd_01);
-  __m128i abcd_2 = _mm_unpacklo_epi64(ab_23, cd_23);
-  __m128i abcd_3 = _mm_unpackhi_epi64(ab_23, cd_23);
-
-  vecs[0] = abcd_0;
-  vecs[1] = abcd_1;
-  vecs[2] = abcd_2;
-  vecs[3] = abcd_3;
-}
-
-INLINE void transpose_msg_vecs4(const uint8_t *const *inputs,
-                                size_t block_offset, __m128i out[16]) {
-  out[0] = loadu_128(&inputs[0][block_offset + 0 * sizeof(__m128i)]);
-  out[1] = loadu_128(&inputs[1][block_offset + 0 * sizeof(__m128i)]);
-  out[2] = loadu_128(&inputs[2][block_offset + 0 * sizeof(__m128i)]);
-  out[3] = loadu_128(&inputs[3][block_offset + 0 * sizeof(__m128i)]);
-  out[4] = loadu_128(&inputs[0][block_offset + 1 * sizeof(__m128i)]);
-  out[5] = loadu_128(&inputs[1][block_offset + 1 * sizeof(__m128i)]);
-  out[6] = loadu_128(&inputs[2][block_offset + 1 * sizeof(__m128i)]);
-  out[7] = loadu_128(&inputs[3][block_offset + 1 * sizeof(__m128i)]);
-  out[8] = loadu_128(&inputs[0][block_offset + 2 * sizeof(__m128i)]);
-  out[9] = loadu_128(&inputs[1][block_offset + 2 * sizeof(__m128i)]);
-  out[10] = loadu_128(&inputs[2][block_offset + 2 * sizeof(__m128i)]);
-  out[11] = loadu_128(&inputs[3][block_offset + 2 * sizeof(__m128i)]);
-  out[12] = loadu_128(&inputs[0][block_offset + 3 * sizeof(__m128i)]);
-  out[13] = loadu_128(&inputs[1][block_offset + 3 * sizeof(__m128i)]);
-  out[14] = loadu_128(&inputs[2][block_offset + 3 * sizeof(__m128i)]);
-  out[15] = loadu_128(&inputs[3][block_offset + 3 * sizeof(__m128i)]);
-  for (size_t i = 0; i < 4; ++i) {
-    _mm_prefetch((const void *)&inputs[i][block_offset + 256], _MM_HINT_T0);
-  }
-  transpose_vecs_128(&out[0]);
-  transpose_vecs_128(&out[4]);
-  transpose_vecs_128(&out[8]);
-  transpose_vecs_128(&out[12]);
-}
-
-INLINE void load_counters4(uint64_t counter, bool increment_counter,
-                           __m128i *out_lo, __m128i *out_hi) {
-  uint64_t mask = (increment_counter ? ~0 : 0);
-  __m256i mask_vec = _mm256_set1_epi64x(mask);
-  __m256i deltas = _mm256_setr_epi64x(0, 1, 2, 3);
-  deltas = _mm256_and_si256(mask_vec, deltas);
-  __m256i counters =
-      _mm256_add_epi64(_mm256_set1_epi64x((int64_t)counter), deltas);
-  *out_lo = _mm256_cvtepi64_epi32(counters);
-  *out_hi = _mm256_cvtepi64_epi32(_mm256_srli_epi64(counters, 32));
-}
-
-static
-void blake3_hash4_avx512(const uint8_t *const *inputs, size_t blocks,
-                         const uint32_t key[8], uint64_t counter,
-                         bool increment_counter, uint8_t flags,
-                         uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
-  __m128i h_vecs[8] = {
-      set1_128(key[0]), set1_128(key[1]), set1_128(key[2]), set1_128(key[3]),
-      set1_128(key[4]), set1_128(key[5]), set1_128(key[6]), set1_128(key[7]),
-  };
-  __m128i counter_low_vec, counter_high_vec;
-  load_counters4(counter, increment_counter, &counter_low_vec,
-                 &counter_high_vec);
-  uint8_t block_flags = flags | flags_start;
-
-  for (size_t block = 0; block < blocks; block++) {
-    if (block + 1 == blocks) {
-      block_flags |= flags_end;
-    }
-    __m128i block_len_vec = set1_128(BLAKE3_BLOCK_LEN);
-    __m128i block_flags_vec = set1_128(block_flags);
-    __m128i msg_vecs[16];
-    transpose_msg_vecs4(inputs, block * BLAKE3_BLOCK_LEN, msg_vecs);
-
-    __m128i v[16] = {
-        h_vecs[0],       h_vecs[1],        h_vecs[2],       h_vecs[3],
-        h_vecs[4],       h_vecs[5],        h_vecs[6],       h_vecs[7],
-        set1_128(IV[0]), set1_128(IV[1]),  set1_128(IV[2]), set1_128(IV[3]),
-        counter_low_vec, counter_high_vec, block_len_vec,   block_flags_vec,
-    };
-    round_fn4(v, msg_vecs, 0);
-    round_fn4(v, msg_vecs, 1);
-    round_fn4(v, msg_vecs, 2);
-    round_fn4(v, msg_vecs, 3);
-    round_fn4(v, msg_vecs, 4);
-    round_fn4(v, msg_vecs, 5);
-    round_fn4(v, msg_vecs, 6);
-    h_vecs[0] = xor_128(v[0], v[8]);
-    h_vecs[1] = xor_128(v[1], v[9]);
-    h_vecs[2] = xor_128(v[2], v[10]);
-    h_vecs[3] = xor_128(v[3], v[11]);
-    h_vecs[4] = xor_128(v[4], v[12]);
-    h_vecs[5] = xor_128(v[5], v[13]);
-    h_vecs[6] = xor_128(v[6], v[14]);
-    h_vecs[7] = xor_128(v[7], v[15]);
-
-    block_flags = flags;
-  }
-
-  transpose_vecs_128(&h_vecs[0]);
-  transpose_vecs_128(&h_vecs[4]);
-  // The first four vecs now contain the first half of each output, and the
-  // second four vecs contain the second half of each output.
-  storeu_128(h_vecs[0], &out[0 * sizeof(__m128i)]);
-  storeu_128(h_vecs[4], &out[1 * sizeof(__m128i)]);
-  storeu_128(h_vecs[1], &out[2 * sizeof(__m128i)]);
-  storeu_128(h_vecs[5], &out[3 * sizeof(__m128i)]);
-  storeu_128(h_vecs[2], &out[4 * sizeof(__m128i)]);
-  storeu_128(h_vecs[6], &out[5 * sizeof(__m128i)]);
-  storeu_128(h_vecs[3], &out[6 * sizeof(__m128i)]);
-  storeu_128(h_vecs[7], &out[7 * sizeof(__m128i)]);
-}
-
-/*
- * ----------------------------------------------------------------------------
- * hash8_avx512
- * ----------------------------------------------------------------------------
- */
-
-INLINE void round_fn8(__m256i v[16], __m256i m[16], size_t r) {
-  v[0] = add_256(v[0], m[(size_t)MSG_SCHEDULE[r][0]]);
-  v[1] = add_256(v[1], m[(size_t)MSG_SCHEDULE[r][2]]);
-  v[2] = add_256(v[2], m[(size_t)MSG_SCHEDULE[r][4]]);
-  v[3] = add_256(v[3], m[(size_t)MSG_SCHEDULE[r][6]]);
-  v[0] = add_256(v[0], v[4]);
-  v[1] = add_256(v[1], v[5]);
-  v[2] = add_256(v[2], v[6]);
-  v[3] = add_256(v[3], v[7]);
-  v[12] = xor_256(v[12], v[0]);
-  v[13] = xor_256(v[13], v[1]);
-  v[14] = xor_256(v[14], v[2]);
-  v[15] = xor_256(v[15], v[3]);
-  v[12] = rot16_256(v[12]);
-  v[13] = rot16_256(v[13]);
-  v[14] = rot16_256(v[14]);
-  v[15] = rot16_256(v[15]);
-  v[8] = add_256(v[8], v[12]);
-  v[9] = add_256(v[9], v[13]);
-  v[10] = add_256(v[10], v[14]);
-  v[11] = add_256(v[11], v[15]);
-  v[4] = xor_256(v[4], v[8]);
-  v[5] = xor_256(v[5], v[9]);
-  v[6] = xor_256(v[6], v[10]);
-  v[7] = xor_256(v[7], v[11]);
-  v[4] = rot12_256(v[4]);
-  v[5] = rot12_256(v[5]);
-  v[6] = rot12_256(v[6]);
-  v[7] = rot12_256(v[7]);
-  v[0] = add_256(v[0], m[(size_t)MSG_SCHEDULE[r][1]]);
-  v[1] = add_256(v[1], m[(size_t)MSG_SCHEDULE[r][3]]);
-  v[2] = add_256(v[2], m[(size_t)MSG_SCHEDULE[r][5]]);
-  v[3] = add_256(v[3], m[(size_t)MSG_SCHEDULE[r][7]]);
-  v[0] = add_256(v[0], v[4]);
-  v[1] = add_256(v[1], v[5]);
-  v[2] = add_256(v[2], v[6]);
-  v[3] = add_256(v[3], v[7]);
-  v[12] = xor_256(v[12], v[0]);
-  v[13] = xor_256(v[13], v[1]);
-  v[14] = xor_256(v[14], v[2]);
-  v[15] = xor_256(v[15], v[3]);
-  v[12] = rot8_256(v[12]);
-  v[13] = rot8_256(v[13]);
-  v[14] = rot8_256(v[14]);
-  v[15] = rot8_256(v[15]);
-  v[8] = add_256(v[8], v[12]);
-  v[9] = add_256(v[9], v[13]);
-  v[10] = add_256(v[10], v[14]);
-  v[11] = add_256(v[11], v[15]);
-  v[4] = xor_256(v[4], v[8]);
-  v[5] = xor_256(v[5], v[9]);
-  v[6] = xor_256(v[6], v[10]);
-  v[7] = xor_256(v[7], v[11]);
-  v[4] = rot7_256(v[4]);
-  v[5] = rot7_256(v[5]);
-  v[6] = rot7_256(v[6]);
-  v[7] = rot7_256(v[7]);
-
-  v[0] = add_256(v[0], m[(size_t)MSG_SCHEDULE[r][8]]);
-  v[1] = add_256(v[1], m[(size_t)MSG_SCHEDULE[r][10]]);
-  v[2] = add_256(v[2], m[(size_t)MSG_SCHEDULE[r][12]]);
-  v[3] = add_256(v[3], m[(size_t)MSG_SCHEDULE[r][14]]);
-  v[0] = add_256(v[0], v[5]);
-  v[1] = add_256(v[1], v[6]);
-  v[2] = add_256(v[2], v[7]);
-  v[3] = add_256(v[3], v[4]);
-  v[15] = xor_256(v[15], v[0]);
-  v[12] = xor_256(v[12], v[1]);
-  v[13] = xor_256(v[13], v[2]);
-  v[14] = xor_256(v[14], v[3]);
-  v[15] = rot16_256(v[15]);
-  v[12] = rot16_256(v[12]);
-  v[13] = rot16_256(v[13]);
-  v[14] = rot16_256(v[14]);
-  v[10] = add_256(v[10], v[15]);
-  v[11] = add_256(v[11], v[12]);
-  v[8] = add_256(v[8], v[13]);
-  v[9] = add_256(v[9], v[14]);
-  v[5] = xor_256(v[5], v[10]);
-  v[6] = xor_256(v[6], v[11]);
-  v[7] = xor_256(v[7], v[8]);
-  v[4] = xor_256(v[4], v[9]);
-  v[5] = rot12_256(v[5]);
-  v[6] = rot12_256(v[6]);
-  v[7] = rot12_256(v[7]);
-  v[4] = rot12_256(v[4]);
-  v[0] = add_256(v[0], m[(size_t)MSG_SCHEDULE[r][9]]);
-  v[1] = add_256(v[1], m[(size_t)MSG_SCHEDULE[r][11]]);
-  v[2] = add_256(v[2], m[(size_t)MSG_SCHEDULE[r][13]]);
-  v[3] = add_256(v[3], m[(size_t)MSG_SCHEDULE[r][15]]);
-  v[0] = add_256(v[0], v[5]);
-  v[1] = add_256(v[1], v[6]);
-  v[2] = add_256(v[2], v[7]);
-  v[3] = add_256(v[3], v[4]);
-  v[15] = xor_256(v[15], v[0]);
-  v[12] = xor_256(v[12], v[1]);
-  v[13] = xor_256(v[13], v[2]);
-  v[14] = xor_256(v[14], v[3]);
-  v[15] = rot8_256(v[15]);
-  v[12] = rot8_256(v[12]);
-  v[13] = rot8_256(v[13]);
-  v[14] = rot8_256(v[14]);
-  v[10] = add_256(v[10], v[15]);
-  v[11] = add_256(v[11], v[12]);
-  v[8] = add_256(v[8], v[13]);
-  v[9] = add_256(v[9], v[14]);
-  v[5] = xor_256(v[5], v[10]);
-  v[6] = xor_256(v[6], v[11]);
-  v[7] = xor_256(v[7], v[8]);
-  v[4] = xor_256(v[4], v[9]);
-  v[5] = rot7_256(v[5]);
-  v[6] = rot7_256(v[6]);
-  v[7] = rot7_256(v[7]);
-  v[4] = rot7_256(v[4]);
-}
-
-INLINE void transpose_vecs_256(__m256i vecs[8]) {
-  // Interleave 32-bit lanes. The low unpack is lanes 00/11/44/55, and the high
-  // is 22/33/66/77.
-  __m256i ab_0145 = _mm256_unpacklo_epi32(vecs[0], vecs[1]);
-  __m256i ab_2367 = _mm256_unpackhi_epi32(vecs[0], vecs[1]);
-  __m256i cd_0145 = _mm256_unpacklo_epi32(vecs[2], vecs[3]);
-  __m256i cd_2367 = _mm256_unpackhi_epi32(vecs[2], vecs[3]);
-  __m256i ef_0145 = _mm256_unpacklo_epi32(vecs[4], vecs[5]);
-  __m256i ef_2367 = _mm256_unpackhi_epi32(vecs[4], vecs[5]);
-  __m256i gh_0145 = _mm256_unpacklo_epi32(vecs[6], vecs[7]);
-  __m256i gh_2367 = _mm256_unpackhi_epi32(vecs[6], vecs[7]);
-
-  // Interleave 64-bit lates. The low unpack is lanes 00/22 and the high is
-  // 11/33.
-  __m256i abcd_04 = _mm256_unpacklo_epi64(ab_0145, cd_0145);
-  __m256i abcd_15 = _mm256_unpackhi_epi64(ab_0145, cd_0145);
-  __m256i abcd_26 = _mm256_unpacklo_epi64(ab_2367, cd_2367);
-  __m256i abcd_37 = _mm256_unpackhi_epi64(ab_2367, cd_2367);
-  __m256i efgh_04 = _mm256_unpacklo_epi64(ef_0145, gh_0145);
-  __m256i efgh_15 = _mm256_unpackhi_epi64(ef_0145, gh_0145);
-  __m256i efgh_26 = _mm256_unpacklo_epi64(ef_2367, gh_2367);
-  __m256i efgh_37 = _mm256_unpackhi_epi64(ef_2367, gh_2367);
-
-  // Interleave 128-bit lanes.
-  vecs[0] = _mm256_permute2x128_si256(abcd_04, efgh_04, 0x20);
-  vecs[1] = _mm256_permute2x128_si256(abcd_15, efgh_15, 0x20);
-  vecs[2] = _mm256_permute2x128_si256(abcd_26, efgh_26, 0x20);
-  vecs[3] = _mm256_permute2x128_si256(abcd_37, efgh_37, 0x20);
-  vecs[4] = _mm256_permute2x128_si256(abcd_04, efgh_04, 0x31);
-  vecs[5] = _mm256_permute2x128_si256(abcd_15, efgh_15, 0x31);
-  vecs[6] = _mm256_permute2x128_si256(abcd_26, efgh_26, 0x31);
-  vecs[7] = _mm256_permute2x128_si256(abcd_37, efgh_37, 0x31);
-}
-
-INLINE void transpose_msg_vecs8(const uint8_t *const *inputs,
-                                size_t block_offset, __m256i out[16]) {
-  out[0] = loadu_256(&inputs[0][block_offset + 0 * sizeof(__m256i)]);
-  out[1] = loadu_256(&inputs[1][block_offset + 0 * sizeof(__m256i)]);
-  out[2] = loadu_256(&inputs[2][block_offset + 0 * sizeof(__m256i)]);
-  out[3] = loadu_256(&inputs[3][block_offset + 0 * sizeof(__m256i)]);
-  out[4] = loadu_256(&inputs[4][block_offset + 0 * sizeof(__m256i)]);
-  out[5] = loadu_256(&inputs[5][block_offset + 0 * sizeof(__m256i)]);
-  out[6] = loadu_256(&inputs[6][block_offset + 0 * sizeof(__m256i)]);
-  out[7] = loadu_256(&inputs[7][block_offset + 0 * sizeof(__m256i)]);
-  out[8] = loadu_256(&inputs[0][block_offset + 1 * sizeof(__m256i)]);
-  out[9] = loadu_256(&inputs[1][block_offset + 1 * sizeof(__m256i)]);
-  out[10] = loadu_256(&inputs[2][block_offset + 1 * sizeof(__m256i)]);
-  out[11] = loadu_256(&inputs[3][block_offset + 1 * sizeof(__m256i)]);
-  out[12] = loadu_256(&inputs[4][block_offset + 1 * sizeof(__m256i)]);
-  out[13] = loadu_256(&inputs[5][block_offset + 1 * sizeof(__m256i)]);
-  out[14] = loadu_256(&inputs[6][block_offset + 1 * sizeof(__m256i)]);
-  out[15] = loadu_256(&inputs[7][block_offset + 1 * sizeof(__m256i)]);
-  for (size_t i = 0; i < 8; ++i) {
-    _mm_prefetch((const void *)&inputs[i][block_offset + 256], _MM_HINT_T0);
-  }
-  transpose_vecs_256(&out[0]);
-  transpose_vecs_256(&out[8]);
-}
-
-INLINE void load_counters8(uint64_t counter, bool increment_counter,
-                           __m256i *out_lo, __m256i *out_hi) {
-  uint64_t mask = (increment_counter ? ~0 : 0);
-  __m512i mask_vec = _mm512_set1_epi64(mask);
-  __m512i deltas = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7);
-  deltas = _mm512_and_si512(mask_vec, deltas);
-  __m512i counters =
-      _mm512_add_epi64(_mm512_set1_epi64((int64_t)counter), deltas);
-  *out_lo = _mm512_cvtepi64_epi32(counters);
-  *out_hi = _mm512_cvtepi64_epi32(_mm512_srli_epi64(counters, 32));
-}
-
-static
-void blake3_hash8_avx512(const uint8_t *const *inputs, size_t blocks,
-                         const uint32_t key[8], uint64_t counter,
-                         bool increment_counter, uint8_t flags,
-                         uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
-  __m256i h_vecs[8] = {
-      set1_256(key[0]), set1_256(key[1]), set1_256(key[2]), set1_256(key[3]),
-      set1_256(key[4]), set1_256(key[5]), set1_256(key[6]), set1_256(key[7]),
-  };
-  __m256i counter_low_vec, counter_high_vec;
-  load_counters8(counter, increment_counter, &counter_low_vec,
-                 &counter_high_vec);
-  uint8_t block_flags = flags | flags_start;
-
-  for (size_t block = 0; block < blocks; block++) {
-    if (block + 1 == blocks) {
-      block_flags |= flags_end;
-    }
-    __m256i block_len_vec = set1_256(BLAKE3_BLOCK_LEN);
-    __m256i block_flags_vec = set1_256(block_flags);
-    __m256i msg_vecs[16];
-    transpose_msg_vecs8(inputs, block * BLAKE3_BLOCK_LEN, msg_vecs);
-
-    __m256i v[16] = {
-        h_vecs[0],       h_vecs[1],        h_vecs[2],       h_vecs[3],
-        h_vecs[4],       h_vecs[5],        h_vecs[6],       h_vecs[7],
-        set1_256(IV[0]), set1_256(IV[1]),  set1_256(IV[2]), set1_256(IV[3]),
-        counter_low_vec, counter_high_vec, block_len_vec,   block_flags_vec,
-    };
-    round_fn8(v, msg_vecs, 0);
-    round_fn8(v, msg_vecs, 1);
-    round_fn8(v, msg_vecs, 2);
-    round_fn8(v, msg_vecs, 3);
-    round_fn8(v, msg_vecs, 4);
-    round_fn8(v, msg_vecs, 5);
-    round_fn8(v, msg_vecs, 6);
-    h_vecs[0] = xor_256(v[0], v[8]);
-    h_vecs[1] = xor_256(v[1], v[9]);
-    h_vecs[2] = xor_256(v[2], v[10]);
-    h_vecs[3] = xor_256(v[3], v[11]);
-    h_vecs[4] = xor_256(v[4], v[12]);
-    h_vecs[5] = xor_256(v[5], v[13]);
-    h_vecs[6] = xor_256(v[6], v[14]);
-    h_vecs[7] = xor_256(v[7], v[15]);
-
-    block_flags = flags;
-  }
-
-  transpose_vecs_256(h_vecs);
-  storeu_256(h_vecs[0], &out[0 * sizeof(__m256i)]);
-  storeu_256(h_vecs[1], &out[1 * sizeof(__m256i)]);
-  storeu_256(h_vecs[2], &out[2 * sizeof(__m256i)]);
-  storeu_256(h_vecs[3], &out[3 * sizeof(__m256i)]);
-  storeu_256(h_vecs[4], &out[4 * sizeof(__m256i)]);
-  storeu_256(h_vecs[5], &out[5 * sizeof(__m256i)]);
-  storeu_256(h_vecs[6], &out[6 * sizeof(__m256i)]);
-  storeu_256(h_vecs[7], &out[7 * sizeof(__m256i)]);
-}
-
-/*
- * ----------------------------------------------------------------------------
- * hash16_avx512
- * ----------------------------------------------------------------------------
- */
-
-INLINE void round_fn16(__m512i v[16], __m512i m[16], size_t r) {
-  v[0] = add_512(v[0], m[(size_t)MSG_SCHEDULE[r][0]]);
-  v[1] = add_512(v[1], m[(size_t)MSG_SCHEDULE[r][2]]);
-  v[2] = add_512(v[2], m[(size_t)MSG_SCHEDULE[r][4]]);
-  v[3] = add_512(v[3], m[(size_t)MSG_SCHEDULE[r][6]]);
-  v[0] = add_512(v[0], v[4]);
-  v[1] = add_512(v[1], v[5]);
-  v[2] = add_512(v[2], v[6]);
-  v[3] = add_512(v[3], v[7]);
-  v[12] = xor_512(v[12], v[0]);
-  v[13] = xor_512(v[13], v[1]);
-  v[14] = xor_512(v[14], v[2]);
-  v[15] = xor_512(v[15], v[3]);
-  v[12] = rot16_512(v[12]);
-  v[13] = rot16_512(v[13]);
-  v[14] = rot16_512(v[14]);
-  v[15] = rot16_512(v[15]);
-  v[8] = add_512(v[8], v[12]);
-  v[9] = add_512(v[9], v[13]);
-  v[10] = add_512(v[10], v[14]);
-  v[11] = add_512(v[11], v[15]);
-  v[4] = xor_512(v[4], v[8]);
-  v[5] = xor_512(v[5], v[9]);
-  v[6] = xor_512(v[6], v[10]);
-  v[7] = xor_512(v[7], v[11]);
-  v[4] = rot12_512(v[4]);
-  v[5] = rot12_512(v[5]);
-  v[6] = rot12_512(v[6]);
-  v[7] = rot12_512(v[7]);
-  v[0] = add_512(v[0], m[(size_t)MSG_SCHEDULE[r][1]]);
-  v[1] = add_512(v[1], m[(size_t)MSG_SCHEDULE[r][3]]);
-  v[2] = add_512(v[2], m[(size_t)MSG_SCHEDULE[r][5]]);
-  v[3] = add_512(v[3], m[(size_t)MSG_SCHEDULE[r][7]]);
-  v[0] = add_512(v[0], v[4]);
-  v[1] = add_512(v[1], v[5]);
-  v[2] = add_512(v[2], v[6]);
-  v[3] = add_512(v[3], v[7]);
-  v[12] = xor_512(v[12], v[0]);
-  v[13] = xor_512(v[13], v[1]);
-  v[14] = xor_512(v[14], v[2]);
-  v[15] = xor_512(v[15], v[3]);
-  v[12] = rot8_512(v[12]);
-  v[13] = rot8_512(v[13]);
-  v[14] = rot8_512(v[14]);
-  v[15] = rot8_512(v[15]);
-  v[8] = add_512(v[8], v[12]);
-  v[9] = add_512(v[9], v[13]);
-  v[10] = add_512(v[10], v[14]);
-  v[11] = add_512(v[11], v[15]);
-  v[4] = xor_512(v[4], v[8]);
-  v[5] = xor_512(v[5], v[9]);
-  v[6] = xor_512(v[6], v[10]);
-  v[7] = xor_512(v[7], v[11]);
-  v[4] = rot7_512(v[4]);
-  v[5] = rot7_512(v[5]);
-  v[6] = rot7_512(v[6]);
-  v[7] = rot7_512(v[7]);
-
-  v[0] = add_512(v[0], m[(size_t)MSG_SCHEDULE[r][8]]);
-  v[1] = add_512(v[1], m[(size_t)MSG_SCHEDULE[r][10]]);
-  v[2] = add_512(v[2], m[(size_t)MSG_SCHEDULE[r][12]]);
-  v[3] = add_512(v[3], m[(size_t)MSG_SCHEDULE[r][14]]);
-  v[0] = add_512(v[0], v[5]);
-  v[1] = add_512(v[1], v[6]);
-  v[2] = add_512(v[2], v[7]);
-  v[3] = add_512(v[3], v[4]);
-  v[15] = xor_512(v[15], v[0]);
-  v[12] = xor_512(v[12], v[1]);
-  v[13] = xor_512(v[13], v[2]);
-  v[14] = xor_512(v[14], v[3]);
-  v[15] = rot16_512(v[15]);
-  v[12] = rot16_512(v[12]);
-  v[13] = rot16_512(v[13]);
-  v[14] = rot16_512(v[14]);
-  v[10] = add_512(v[10], v[15]);
-  v[11] = add_512(v[11], v[12]);
-  v[8] = add_512(v[8], v[13]);
-  v[9] = add_512(v[9], v[14]);
-  v[5] = xor_512(v[5], v[10]);
-  v[6] = xor_512(v[6], v[11]);
-  v[7] = xor_512(v[7], v[8]);
-  v[4] = xor_512(v[4], v[9]);
-  v[5] = rot12_512(v[5]);
-  v[6] = rot12_512(v[6]);
-  v[7] = rot12_512(v[7]);
-  v[4] = rot12_512(v[4]);
-  v[0] = add_512(v[0], m[(size_t)MSG_SCHEDULE[r][9]]);
-  v[1] = add_512(v[1], m[(size_t)MSG_SCHEDULE[r][11]]);
-  v[2] = add_512(v[2], m[(size_t)MSG_SCHEDULE[r][13]]);
-  v[3] = add_512(v[3], m[(size_t)MSG_SCHEDULE[r][15]]);
-  v[0] = add_512(v[0], v[5]);
-  v[1] = add_512(v[1], v[6]);
-  v[2] = add_512(v[2], v[7]);
-  v[3] = add_512(v[3], v[4]);
-  v[15] = xor_512(v[15], v[0]);
-  v[12] = xor_512(v[12], v[1]);
-  v[13] = xor_512(v[13], v[2]);
-  v[14] = xor_512(v[14], v[3]);
-  v[15] = rot8_512(v[15]);
-  v[12] = rot8_512(v[12]);
-  v[13] = rot8_512(v[13]);
-  v[14] = rot8_512(v[14]);
-  v[10] = add_512(v[10], v[15]);
-  v[11] = add_512(v[11], v[12]);
-  v[8] = add_512(v[8], v[13]);
-  v[9] = add_512(v[9], v[14]);
-  v[5] = xor_512(v[5], v[10]);
-  v[6] = xor_512(v[6], v[11]);
-  v[7] = xor_512(v[7], v[8]);
-  v[4] = xor_512(v[4], v[9]);
-  v[5] = rot7_512(v[5]);
-  v[6] = rot7_512(v[6]);
-  v[7] = rot7_512(v[7]);
-  v[4] = rot7_512(v[4]);
-}
-
-// 0b10001000, or lanes a0/a2/b0/b2 in little-endian order
-#define LO_IMM8 0x88
-
-INLINE __m512i unpack_lo_128(__m512i a, __m512i b) {
-  return _mm512_shuffle_i32x4(a, b, LO_IMM8);
-}
-
-// 0b11011101, or lanes a1/a3/b1/b3 in little-endian order
-#define HI_IMM8 0xdd
-
-INLINE __m512i unpack_hi_128(__m512i a, __m512i b) {
-  return _mm512_shuffle_i32x4(a, b, HI_IMM8);
-}
-
-INLINE void transpose_vecs_512(__m512i vecs[16]) {
-  // Interleave 32-bit lanes. The _0 unpack is lanes
-  // 0/0/1/1/4/4/5/5/8/8/9/9/12/12/13/13, and the _2 unpack is lanes
-  // 2/2/3/3/6/6/7/7/10/10/11/11/14/14/15/15.
-  __m512i ab_0 = _mm512_unpacklo_epi32(vecs[0], vecs[1]);
-  __m512i ab_2 = _mm512_unpackhi_epi32(vecs[0], vecs[1]);
-  __m512i cd_0 = _mm512_unpacklo_epi32(vecs[2], vecs[3]);
-  __m512i cd_2 = _mm512_unpackhi_epi32(vecs[2], vecs[3]);
-  __m512i ef_0 = _mm512_unpacklo_epi32(vecs[4], vecs[5]);
-  __m512i ef_2 = _mm512_unpackhi_epi32(vecs[4], vecs[5]);
-  __m512i gh_0 = _mm512_unpacklo_epi32(vecs[6], vecs[7]);
-  __m512i gh_2 = _mm512_unpackhi_epi32(vecs[6], vecs[7]);
-  __m512i ij_0 = _mm512_unpacklo_epi32(vecs[8], vecs[9]);
-  __m512i ij_2 = _mm512_unpackhi_epi32(vecs[8], vecs[9]);
-  __m512i kl_0 = _mm512_unpacklo_epi32(vecs[10], vecs[11]);
-  __m512i kl_2 = _mm512_unpackhi_epi32(vecs[10], vecs[11]);
-  __m512i mn_0 = _mm512_unpacklo_epi32(vecs[12], vecs[13]);
-  __m512i mn_2 = _mm512_unpackhi_epi32(vecs[12], vecs[13]);
-  __m512i op_0 = _mm512_unpacklo_epi32(vecs[14], vecs[15]);
-  __m512i op_2 = _mm512_unpackhi_epi32(vecs[14], vecs[15]);
-
-  // Interleave 64-bit lates. The _0 unpack is lanes
-  // 0/0/0/0/4/4/4/4/8/8/8/8/12/12/12/12, the _1 unpack is lanes
-  // 1/1/1/1/5/5/5/5/9/9/9/9/13/13/13/13, the _2 unpack is lanes
-  // 2/2/2/2/6/6/6/6/10/10/10/10/14/14/14/14, and the _3 unpack is lanes
-  // 3/3/3/3/7/7/7/7/11/11/11/11/15/15/15/15.
-  __m512i abcd_0 = _mm512_unpacklo_epi64(ab_0, cd_0);
-  __m512i abcd_1 = _mm512_unpackhi_epi64(ab_0, cd_0);
-  __m512i abcd_2 = _mm512_unpacklo_epi64(ab_2, cd_2);
-  __m512i abcd_3 = _mm512_unpackhi_epi64(ab_2, cd_2);
-  __m512i efgh_0 = _mm512_unpacklo_epi64(ef_0, gh_0);
-  __m512i efgh_1 = _mm512_unpackhi_epi64(ef_0, gh_0);
-  __m512i efgh_2 = _mm512_unpacklo_epi64(ef_2, gh_2);
-  __m512i efgh_3 = _mm512_unpackhi_epi64(ef_2, gh_2);
-  __m512i ijkl_0 = _mm512_unpacklo_epi64(ij_0, kl_0);
-  __m512i ijkl_1 = _mm512_unpackhi_epi64(ij_0, kl_0);
-  __m512i ijkl_2 = _mm512_unpacklo_epi64(ij_2, kl_2);
-  __m512i ijkl_3 = _mm512_unpackhi_epi64(ij_2, kl_2);
-  __m512i mnop_0 = _mm512_unpacklo_epi64(mn_0, op_0);
-  __m512i mnop_1 = _mm512_unpackhi_epi64(mn_0, op_0);
-  __m512i mnop_2 = _mm512_unpacklo_epi64(mn_2, op_2);
-  __m512i mnop_3 = _mm512_unpackhi_epi64(mn_2, op_2);
-
-  // Interleave 128-bit lanes. The _0 unpack is
-  // 0/0/0/0/8/8/8/8/0/0/0/0/8/8/8/8, the _1 unpack is
-  // 1/1/1/1/9/9/9/9/1/1/1/1/9/9/9/9, and so on.
-  __m512i abcdefgh_0 = unpack_lo_128(abcd_0, efgh_0);
-  __m512i abcdefgh_1 = unpack_lo_128(abcd_1, efgh_1);
-  __m512i abcdefgh_2 = unpack_lo_128(abcd_2, efgh_2);
-  __m512i abcdefgh_3 = unpack_lo_128(abcd_3, efgh_3);
-  __m512i abcdefgh_4 = unpack_hi_128(abcd_0, efgh_0);
-  __m512i abcdefgh_5 = unpack_hi_128(abcd_1, efgh_1);
-  __m512i abcdefgh_6 = unpack_hi_128(abcd_2, efgh_2);
-  __m512i abcdefgh_7 = unpack_hi_128(abcd_3, efgh_3);
-  __m512i ijklmnop_0 = unpack_lo_128(ijkl_0, mnop_0);
-  __m512i ijklmnop_1 = unpack_lo_128(ijkl_1, mnop_1);
-  __m512i ijklmnop_2 = unpack_lo_128(ijkl_2, mnop_2);
-  __m512i ijklmnop_3 = unpack_lo_128(ijkl_3, mnop_3);
-  __m512i ijklmnop_4 = unpack_hi_128(ijkl_0, mnop_0);
-  __m512i ijklmnop_5 = unpack_hi_128(ijkl_1, mnop_1);
-  __m512i ijklmnop_6 = unpack_hi_128(ijkl_2, mnop_2);
-  __m512i ijklmnop_7 = unpack_hi_128(ijkl_3, mnop_3);
-
-  // Interleave 128-bit lanes again for the final outputs.
-  vecs[0] = unpack_lo_128(abcdefgh_0, ijklmnop_0);
-  vecs[1] = unpack_lo_128(abcdefgh_1, ijklmnop_1);
-  vecs[2] = unpack_lo_128(abcdefgh_2, ijklmnop_2);
-  vecs[3] = unpack_lo_128(abcdefgh_3, ijklmnop_3);
-  vecs[4] = unpack_lo_128(abcdefgh_4, ijklmnop_4);
-  vecs[5] = unpack_lo_128(abcdefgh_5, ijklmnop_5);
-  vecs[6] = unpack_lo_128(abcdefgh_6, ijklmnop_6);
-  vecs[7] = unpack_lo_128(abcdefgh_7, ijklmnop_7);
-  vecs[8] = unpack_hi_128(abcdefgh_0, ijklmnop_0);
-  vecs[9] = unpack_hi_128(abcdefgh_1, ijklmnop_1);
-  vecs[10] = unpack_hi_128(abcdefgh_2, ijklmnop_2);
-  vecs[11] = unpack_hi_128(abcdefgh_3, ijklmnop_3);
-  vecs[12] = unpack_hi_128(abcdefgh_4, ijklmnop_4);
-  vecs[13] = unpack_hi_128(abcdefgh_5, ijklmnop_5);
-  vecs[14] = unpack_hi_128(abcdefgh_6, ijklmnop_6);
-  vecs[15] = unpack_hi_128(abcdefgh_7, ijklmnop_7);
-}
-
-INLINE void transpose_msg_vecs16(const uint8_t *const *inputs,
-                                 size_t block_offset, __m512i out[16]) {
-  out[0] = loadu_512(&inputs[0][block_offset]);
-  out[1] = loadu_512(&inputs[1][block_offset]);
-  out[2] = loadu_512(&inputs[2][block_offset]);
-  out[3] = loadu_512(&inputs[3][block_offset]);
-  out[4] = loadu_512(&inputs[4][block_offset]);
-  out[5] = loadu_512(&inputs[5][block_offset]);
-  out[6] = loadu_512(&inputs[6][block_offset]);
-  out[7] = loadu_512(&inputs[7][block_offset]);
-  out[8] = loadu_512(&inputs[8][block_offset]);
-  out[9] = loadu_512(&inputs[9][block_offset]);
-  out[10] = loadu_512(&inputs[10][block_offset]);
-  out[11] = loadu_512(&inputs[11][block_offset]);
-  out[12] = loadu_512(&inputs[12][block_offset]);
-  out[13] = loadu_512(&inputs[13][block_offset]);
-  out[14] = loadu_512(&inputs[14][block_offset]);
-  out[15] = loadu_512(&inputs[15][block_offset]);
-  for (size_t i = 0; i < 16; ++i) {
-    _mm_prefetch((const void *)&inputs[i][block_offset + 256], _MM_HINT_T0);
-  }
-  transpose_vecs_512(out);
-}
-
-INLINE void load_counters16(uint64_t counter, bool increment_counter,
-                            __m512i *out_lo, __m512i *out_hi) {
-  const __m512i mask = _mm512_set1_epi32(-(int32_t)increment_counter);
-  const __m512i add0 = _mm512_set_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
-  const __m512i add1 = _mm512_and_si512(mask, add0);
-  __m512i l = _mm512_add_epi32(_mm512_set1_epi32((int32_t)counter), add1);
-  __mmask16 carry = _mm512_cmp_epu32_mask(l, add1, _MM_CMPINT_LT);
-  __m512i h = _mm512_mask_add_epi32(_mm512_set1_epi32((int32_t)(counter >> 32)), carry, _mm512_set1_epi32((int32_t)(counter >> 32)), _mm512_set1_epi32(1));
-  *out_lo = l;
-  *out_hi = h;
-}
-
-static
-void blake3_hash16_avx512(const uint8_t *const *inputs, size_t blocks,
-                          const uint32_t key[8], uint64_t counter,
-                          bool increment_counter, uint8_t flags,
-                          uint8_t flags_start, uint8_t flags_end,
-                          uint8_t *out) {
-  __m512i h_vecs[8] = {
-      set1_512(key[0]), set1_512(key[1]), set1_512(key[2]), set1_512(key[3]),
-      set1_512(key[4]), set1_512(key[5]), set1_512(key[6]), set1_512(key[7]),
-  };
-  __m512i counter_low_vec, counter_high_vec;
-  load_counters16(counter, increment_counter, &counter_low_vec,
-                  &counter_high_vec);
-  uint8_t block_flags = flags | flags_start;
-
-  for (size_t block = 0; block < blocks; block++) {
-    if (block + 1 == blocks) {
-      block_flags |= flags_end;
-    }
-    __m512i block_len_vec = set1_512(BLAKE3_BLOCK_LEN);
-    __m512i block_flags_vec = set1_512(block_flags);
-    __m512i msg_vecs[16];
-    transpose_msg_vecs16(inputs, block * BLAKE3_BLOCK_LEN, msg_vecs);
-
-    __m512i v[16] = {
-        h_vecs[0],       h_vecs[1],        h_vecs[2],       h_vecs[3],
-        h_vecs[4],       h_vecs[5],        h_vecs[6],       h_vecs[7],
-        set1_512(IV[0]), set1_512(IV[1]),  set1_512(IV[2]), set1_512(IV[3]),
-        counter_low_vec, counter_high_vec, block_len_vec,   block_flags_vec,
-    };
-    round_fn16(v, msg_vecs, 0);
-    round_fn16(v, msg_vecs, 1);
-    round_fn16(v, msg_vecs, 2);
-    round_fn16(v, msg_vecs, 3);
-    round_fn16(v, msg_vecs, 4);
-    round_fn16(v, msg_vecs, 5);
-    round_fn16(v, msg_vecs, 6);
-    h_vecs[0] = xor_512(v[0], v[8]);
-    h_vecs[1] = xor_512(v[1], v[9]);
-    h_vecs[2] = xor_512(v[2], v[10]);
-    h_vecs[3] = xor_512(v[3], v[11]);
-    h_vecs[4] = xor_512(v[4], v[12]);
-    h_vecs[5] = xor_512(v[5], v[13]);
-    h_vecs[6] = xor_512(v[6], v[14]);
-    h_vecs[7] = xor_512(v[7], v[15]);
-
-    block_flags = flags;
-  }
-
-  // transpose_vecs_512 operates on a 16x16 matrix of words, but we only have 8
-  // state vectors. Pad the matrix with zeros. After transposition, store the
-  // lower half of each vector.
-  __m512i padded[16] = {
-      h_vecs[0],   h_vecs[1],   h_vecs[2],   h_vecs[3],
-      h_vecs[4],   h_vecs[5],   h_vecs[6],   h_vecs[7],
-      set1_512(0), set1_512(0), set1_512(0), set1_512(0),
-      set1_512(0), set1_512(0), set1_512(0), set1_512(0),
-  };
-  transpose_vecs_512(padded);
-  _mm256_mask_storeu_epi32(&out[0 * sizeof(__m256i)], (__mmask8)-1, _mm512_castsi512_si256(padded[0]));
-  _mm256_mask_storeu_epi32(&out[1 * sizeof(__m256i)], (__mmask8)-1, _mm512_castsi512_si256(padded[1]));
-  _mm256_mask_storeu_epi32(&out[2 * sizeof(__m256i)], (__mmask8)-1, _mm512_castsi512_si256(padded[2]));
-  _mm256_mask_storeu_epi32(&out[3 * sizeof(__m256i)], (__mmask8)-1, _mm512_castsi512_si256(padded[3]));
-  _mm256_mask_storeu_epi32(&out[4 * sizeof(__m256i)], (__mmask8)-1, _mm512_castsi512_si256(padded[4]));
-  _mm256_mask_storeu_epi32(&out[5 * sizeof(__m256i)], (__mmask8)-1, _mm512_castsi512_si256(padded[5]));
-  _mm256_mask_storeu_epi32(&out[6 * sizeof(__m256i)], (__mmask8)-1, _mm512_castsi512_si256(padded[6]));
-  _mm256_mask_storeu_epi32(&out[7 * sizeof(__m256i)], (__mmask8)-1, _mm512_castsi512_si256(padded[7]));
-  _mm256_mask_storeu_epi32(&out[8 * sizeof(__m256i)], (__mmask8)-1, _mm512_castsi512_si256(padded[8]));
-  _mm256_mask_storeu_epi32(&out[9 * sizeof(__m256i)], (__mmask8)-1, _mm512_castsi512_si256(padded[9]));
-  _mm256_mask_storeu_epi32(&out[10 * sizeof(__m256i)], (__mmask8)-1, _mm512_castsi512_si256(padded[10]));
-  _mm256_mask_storeu_epi32(&out[11 * sizeof(__m256i)], (__mmask8)-1, _mm512_castsi512_si256(padded[11]));
-  _mm256_mask_storeu_epi32(&out[12 * sizeof(__m256i)], (__mmask8)-1, _mm512_castsi512_si256(padded[12]));
-  _mm256_mask_storeu_epi32(&out[13 * sizeof(__m256i)], (__mmask8)-1, _mm512_castsi512_si256(padded[13]));
-  _mm256_mask_storeu_epi32(&out[14 * sizeof(__m256i)], (__mmask8)-1, _mm512_castsi512_si256(padded[14]));
-  _mm256_mask_storeu_epi32(&out[15 * sizeof(__m256i)], (__mmask8)-1, _mm512_castsi512_si256(padded[15]));
-}
-
-/*
- * ----------------------------------------------------------------------------
- * hash_many_avx512
- * ----------------------------------------------------------------------------
- */
-
-INLINE void hash_one_avx512(const uint8_t *input, size_t blocks,
-                            const uint32_t key[8], uint64_t counter,
-                            uint8_t flags, uint8_t flags_start,
-                            uint8_t flags_end, uint8_t out[BLAKE3_OUT_LEN]) {
-  uint32_t cv[8];
-  memcpy(cv, key, BLAKE3_KEY_LEN);
-  uint8_t block_flags = flags | flags_start;
-  while (blocks > 0) {
-    if (blocks == 1) {
-      block_flags |= flags_end;
-    }
-    blake3_compress_in_place_avx512(cv, input, BLAKE3_BLOCK_LEN, counter,
-                                    block_flags);
-    input = &input[BLAKE3_BLOCK_LEN];
-    blocks -= 1;
-    block_flags = flags;
-  }
-  memcpy(out, cv, BLAKE3_OUT_LEN);
-}
-
-void blake3_hash_many_avx512(const uint8_t *const *inputs, size_t num_inputs,
-                             size_t blocks, const uint32_t key[8],
-                             uint64_t counter, bool increment_counter,
-                             uint8_t flags, uint8_t flags_start,
-                             uint8_t flags_end, uint8_t *out) {
-  while (num_inputs >= 16) {
-    blake3_hash16_avx512(inputs, blocks, key, counter, increment_counter, flags,
-                         flags_start, flags_end, out);
-    if (increment_counter) {
-      counter += 16;
-    }
-    inputs += 16;
-    num_inputs -= 16;
-    out = &out[16 * BLAKE3_OUT_LEN];
-  }
-  while (num_inputs >= 8) {
-    blake3_hash8_avx512(inputs, blocks, key, counter, increment_counter, flags,
-                        flags_start, flags_end, out);
-    if (increment_counter) {
-      counter += 8;
-    }
-    inputs += 8;
-    num_inputs -= 8;
-    out = &out[8 * BLAKE3_OUT_LEN];
-  }
-  while (num_inputs >= 4) {
-    blake3_hash4_avx512(inputs, blocks, key, counter, increment_counter, flags,
-                        flags_start, flags_end, out);
-    if (increment_counter) {
-      counter += 4;
-    }
-    inputs += 4;
-    num_inputs -= 4;
-    out = &out[4 * BLAKE3_OUT_LEN];
-  }
-  while (num_inputs > 0) {
-    hash_one_avx512(inputs[0], blocks, key, counter, flags, flags_start,
-                    flags_end, out);
-    if (increment_counter) {
-      counter += 1;
-    }
-    inputs += 1;
-    num_inputs -= 1;
-    out = &out[BLAKE3_OUT_LEN];
-  }
-}
diff --git a/windows/src/blake3/blake3_dispatch.c b/windows/src/blake3/blake3_dispatch.c
deleted file mode 100644
index b498058..0000000
--- a/windows/src/blake3/blake3_dispatch.c
+++ /dev/null
@@ -1,276 +0,0 @@
-#include <stdbool.h>
-#include <stddef.h>
-#include <stdint.h>
-
-#include "blake3_impl.h"
-
-#if defined(IS_X86)
-#if defined(_MSC_VER)
-#include <intrin.h>
-#elif defined(__GNUC__)
-#include <immintrin.h>
-#else
-#error "Unimplemented!"
-#endif
-#endif
-
-#define MAYBE_UNUSED(x) (void)((x))
-
-#if defined(IS_X86)
-static uint64_t xgetbv() {
-#if defined(_MSC_VER)
-  return _xgetbv(0);
-#else
-  uint32_t eax = 0, edx = 0;
-  __asm__ __volatile__("xgetbv\n" : "=a"(eax), "=d"(edx) : "c"(0));
-  return ((uint64_t)edx << 32) | eax;
-#endif
-}
-
-static void cpuid(uint32_t out[4], uint32_t id) {
-#if defined(_MSC_VER)
-  __cpuid((int *)out, id);
-#elif defined(__i386__) || defined(_M_IX86)
-  __asm__ __volatile__("movl %%ebx, %1\n"
-                       "cpuid\n"
-                       "xchgl %1, %%ebx\n"
-                       : "=a"(out[0]), "=r"(out[1]), "=c"(out[2]), "=d"(out[3])
-                       : "a"(id));
-#else
-  __asm__ __volatile__("cpuid\n"
-                       : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3])
-                       : "a"(id));
-#endif
-}
-
-static void cpuidex(uint32_t out[4], uint32_t id, uint32_t sid) {
-#if defined(_MSC_VER)
-  __cpuidex((int *)out, id, sid);
-#elif defined(__i386__) || defined(_M_IX86)
-  __asm__ __volatile__("movl %%ebx, %1\n"
-                       "cpuid\n"
-                       "xchgl %1, %%ebx\n"
-                       : "=a"(out[0]), "=r"(out[1]), "=c"(out[2]), "=d"(out[3])
-                       : "a"(id), "c"(sid));
-#else
-  __asm__ __volatile__("cpuid\n"
-                       : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3])
-                       : "a"(id), "c"(sid));
-#endif
-}
-
-#endif
-
-enum cpu_feature {
-  SSE2 = 1 << 0,
-  SSSE3 = 1 << 1,
-  SSE41 = 1 << 2,
-  AVX = 1 << 3,
-  AVX2 = 1 << 4,
-  AVX512F = 1 << 5,
-  AVX512VL = 1 << 6,
-  /* ... */
-  UNDEFINED = 1 << 30
-};
-
-#if !defined(BLAKE3_TESTING)
-static /* Allow the variable to be controlled manually for testing */
-#endif
-    enum cpu_feature g_cpu_features = UNDEFINED;
-
-#if !defined(BLAKE3_TESTING)
-static
-#endif
-    enum cpu_feature
-    get_cpu_features() {
-
-  if (g_cpu_features != UNDEFINED) {
-    return g_cpu_features;
-  } else {
-#if defined(IS_X86)
-    uint32_t regs[4] = {0};
-    uint32_t *eax = &regs[0], *ebx = &regs[1], *ecx = &regs[2], *edx = &regs[3];
-    (void)edx;
-    enum cpu_feature features = 0;
-    cpuid(regs, 0);
-    const int max_id = *eax;
-    cpuid(regs, 1);
-#if defined(__amd64__) || defined(_M_X64)
-    features |= SSE2;
-#else
-    if (*edx & (1UL << 26))
-      features |= SSE2;
-#endif
-    if (*ecx & (1UL << 0))
-      features |= SSSE3;
-    if (*ecx & (1UL << 19))
-      features |= SSE41;
-
-    if (*ecx & (1UL << 27)) { // OSXSAVE
-      const uint64_t mask = xgetbv();
-      if ((mask & 6) == 6) { // SSE and AVX states
-        if (*ecx & (1UL << 28))
-          features |= AVX;
-        if (max_id >= 7) {
-          cpuidex(regs, 7, 0);
-          if (*ebx & (1UL << 5))
-            features |= AVX2;
-          if ((mask & 224) == 224) { // Opmask, ZMM_Hi256, Hi16_Zmm
-            if (*ebx & (1UL << 31))
-              features |= AVX512VL;
-            if (*ebx & (1UL << 16))
-              features |= AVX512F;
-          }
-        }
-      }
-    }
-    g_cpu_features = features;
-    return features;
-#else
-    /* How to detect NEON? */
-    return 0;
-#endif
-  }
-}
-
-void blake3_compress_in_place(uint32_t cv[8],
-                              const uint8_t block[BLAKE3_BLOCK_LEN],
-                              uint8_t block_len, uint64_t counter,
-                              uint8_t flags) {
-#if defined(IS_X86)
-  const enum cpu_feature features = get_cpu_features();
-  MAYBE_UNUSED(features);
-#if !defined(BLAKE3_NO_AVX512)
-  if (features & AVX512VL) {
-    blake3_compress_in_place_avx512(cv, block, block_len, counter, flags);
-    return;
-  }
-#endif
-#if !defined(BLAKE3_NO_SSE41)
-  if (features & SSE41) {
-    blake3_compress_in_place_sse41(cv, block, block_len, counter, flags);
-    return;
-  }
-#endif
-#if !defined(BLAKE3_NO_SSE2)
-  if (features & SSE2) {
-    blake3_compress_in_place_sse2(cv, block, block_len, counter, flags);
-    return;
-  }
-#endif
-#endif
-  blake3_compress_in_place_portable(cv, block, block_len, counter, flags);
-}
-
-void blake3_compress_xof(const uint32_t cv[8],
-                         const uint8_t block[BLAKE3_BLOCK_LEN],
-                         uint8_t block_len, uint64_t counter, uint8_t flags,
-                         uint8_t out[64]) {
-#if defined(IS_X86)
-  const enum cpu_feature features = get_cpu_features();
-  MAYBE_UNUSED(features);
-#if !defined(BLAKE3_NO_AVX512)
-  if (features & AVX512VL) {
-    blake3_compress_xof_avx512(cv, block, block_len, counter, flags, out);
-    return;
-  }
-#endif
-#if !defined(BLAKE3_NO_SSE41)
-  if (features & SSE41) {
-    blake3_compress_xof_sse41(cv, block, block_len, counter, flags, out);
-    return;
-  }
-#endif
-#if !defined(BLAKE3_NO_SSE2)
-  if (features & SSE2) {
-    blake3_compress_xof_sse2(cv, block, block_len, counter, flags, out);
-    return;
-  }
-#endif
-#endif
-  blake3_compress_xof_portable(cv, block, block_len, counter, flags, out);
-}
-
-void blake3_hash_many(const uint8_t *const *inputs, size_t num_inputs,
-                      size_t blocks, const uint32_t key[8], uint64_t counter,
-                      bool increment_counter, uint8_t flags,
-                      uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
-#if defined(IS_X86)
-  const enum cpu_feature features = get_cpu_features();
-  MAYBE_UNUSED(features);
-#if !defined(BLAKE3_NO_AVX512)
-  if ((features & (AVX512F|AVX512VL)) == (AVX512F|AVX512VL)) {
-    blake3_hash_many_avx512(inputs, num_inputs, blocks, key, counter,
-                            increment_counter, flags, flags_start, flags_end,
-                            out);
-    return;
-  }
-#endif
-#if !defined(BLAKE3_NO_AVX2)
-  if (features & AVX2) {
-    blake3_hash_many_avx2(inputs, num_inputs, blocks, key, counter,
-                          increment_counter, flags, flags_start, flags_end,
-                          out);
-    return;
-  }
-#endif
-#if !defined(BLAKE3_NO_SSE41)
-  if (features & SSE41) {
-    blake3_hash_many_sse41(inputs, num_inputs, blocks, key, counter,
-                           increment_counter, flags, flags_start, flags_end,
-                           out);
-    return;
-  }
-#endif
-#if !defined(BLAKE3_NO_SSE2)
-  if (features & SSE2) {
-    blake3_hash_many_sse2(inputs, num_inputs, blocks, key, counter,
-                          increment_counter, flags, flags_start, flags_end,
-                          out);
-    return;
-  }
-#endif
-#endif
-
-#if BLAKE3_USE_NEON == 1
-  blake3_hash_many_neon(inputs, num_inputs, blocks, key, counter,
-                        increment_counter, flags, flags_start, flags_end, out);
-  return;
-#endif
-
-  blake3_hash_many_portable(inputs, num_inputs, blocks, key, counter,
-                            increment_counter, flags, flags_start, flags_end,
-                            out);
-}
-
-// The dynamically detected SIMD degree of the current platform.
-size_t blake3_simd_degree(void) {
-#if defined(IS_X86)
-  const enum cpu_feature features = get_cpu_features();
-  MAYBE_UNUSED(features);
-#if !defined(BLAKE3_NO_AVX512)
-  if ((features & (AVX512F|AVX512VL)) == (AVX512F|AVX512VL)) {
-    return 16;
-  }
-#endif
-#if !defined(BLAKE3_NO_AVX2)
-  if (features & AVX2) {
-    return 8;
-  }
-#endif
-#if !defined(BLAKE3_NO_SSE41)
-  if (features & SSE41) {
-    return 4;
-  }
-#endif
-#if !defined(BLAKE3_NO_SSE2)
-  if (features & SSE2) {
-    return 4;
-  }
-#endif
-#endif
-#if BLAKE3_USE_NEON == 1
-  return 4;
-#endif
-  return 1;
-}
diff --git a/windows/src/blake3/blake3_impl.h b/windows/src/blake3/blake3_impl.h
deleted file mode 100644
index cc5672f..0000000
--- a/windows/src/blake3/blake3_impl.h
+++ /dev/null
@@ -1,282 +0,0 @@
-#ifndef BLAKE3_IMPL_H
-#define BLAKE3_IMPL_H
-
-#include <assert.h>
-#include <stdbool.h>
-#include <stddef.h>
-#include <stdint.h>
-#include <string.h>
-
-#include "blake3.h"
-
-// internal flags
-enum blake3_flags {
-  CHUNK_START         = 1 << 0,
-  CHUNK_END           = 1 << 1,
-  PARENT              = 1 << 2,
-  ROOT                = 1 << 3,
-  KEYED_HASH          = 1 << 4,
-  DERIVE_KEY_CONTEXT  = 1 << 5,
-  DERIVE_KEY_MATERIAL = 1 << 6,
-};
-
-// This C implementation tries to support recent versions of GCC, Clang, and
-// MSVC.
-#if defined(_MSC_VER)
-#define INLINE static __forceinline
-#else
-#define INLINE static inline __attribute__((always_inline))
-#endif
-
-#if defined(__x86_64__) || defined(_M_X64) 
-#define IS_X86
-#define IS_X86_64
-#endif
-
-#if defined(__i386__) || defined(_M_IX86)
-#define IS_X86
-#define IS_X86_32
-#endif
-
-#if defined(__aarch64__) || defined(_M_ARM64)
-#define IS_AARCH64
-#endif
-
-#if defined(IS_X86)
-#if defined(_MSC_VER)
-#include <intrin.h>
-#endif
-#include <immintrin.h>
-#endif
-
-#if !defined(BLAKE3_USE_NEON) 
-  // If BLAKE3_USE_NEON not manually set, autodetect based on AArch64ness
-  #if defined(IS_AARCH64)
-    #define BLAKE3_USE_NEON 1
-  #else
-    #define BLAKE3_USE_NEON 0
-  #endif
-#endif
-
-#if defined(IS_X86)
-#define MAX_SIMD_DEGREE 16
-#elif BLAKE3_USE_NEON == 1
-#define MAX_SIMD_DEGREE 4
-#else
-#define MAX_SIMD_DEGREE 1
-#endif
-
-// There are some places where we want a static size that's equal to the
-// MAX_SIMD_DEGREE, but also at least 2.
-#define MAX_SIMD_DEGREE_OR_2 (MAX_SIMD_DEGREE > 2 ? MAX_SIMD_DEGREE : 2)
-
-static const uint32_t IV[8] = {0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL,
-                               0xA54FF53AUL, 0x510E527FUL, 0x9B05688CUL,
-                               0x1F83D9ABUL, 0x5BE0CD19UL};
-
-static const uint8_t MSG_SCHEDULE[7][16] = {
-    {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
-    {2, 6, 3, 10, 7, 0, 4, 13, 1, 11, 12, 5, 9, 14, 15, 8},
-    {3, 4, 10, 12, 13, 2, 7, 14, 6, 5, 9, 0, 11, 15, 8, 1},
-    {10, 7, 12, 9, 14, 3, 13, 15, 4, 0, 11, 2, 5, 8, 1, 6},
-    {12, 13, 9, 11, 15, 10, 14, 8, 7, 2, 5, 3, 0, 1, 6, 4},
-    {9, 14, 11, 5, 8, 12, 15, 1, 13, 3, 0, 10, 2, 6, 4, 7},
-    {11, 15, 5, 0, 1, 9, 8, 6, 14, 10, 2, 12, 3, 4, 7, 13},
-};
-
-/* Find index of the highest set bit */
-/* x is assumed to be nonzero.       */
-static unsigned int highest_one(uint64_t x) {
-#if defined(__GNUC__) || defined(__clang__)
-  return 63 ^ __builtin_clzll(x);
-#elif defined(_MSC_VER) && defined(IS_X86_64)
-  unsigned long index;
-  _BitScanReverse64(&index, x);
-  return index;
-#elif defined(_MSC_VER) && defined(IS_X86_32)
-  if(x >> 32) {
-    unsigned long index;
-    _BitScanReverse(&index, (unsigned long)(x >> 32));
-    return 32 + index;
-  } else {
-    unsigned long index;
-    _BitScanReverse(&index, (unsigned long)x);
-    return index;
-  }
-#else
-  unsigned int c = 0;
-  if(x & 0xffffffff00000000ULL) { x >>= 32; c += 32; }
-  if(x & 0x00000000ffff0000ULL) { x >>= 16; c += 16; }
-  if(x & 0x000000000000ff00ULL) { x >>=  8; c +=  8; }
-  if(x & 0x00000000000000f0ULL) { x >>=  4; c +=  4; }
-  if(x & 0x000000000000000cULL) { x >>=  2; c +=  2; }
-  if(x & 0x0000000000000002ULL) {           c +=  1; }
-  return c;
-#endif
-}
-
-// Count the number of 1 bits.
-INLINE unsigned int popcnt(uint64_t x) {
-#if defined(__GNUC__) || defined(__clang__)
-  return __builtin_popcountll(x);
-#else
-  unsigned int count = 0;
-  while (x != 0) {
-    count += 1;
-    x &= x - 1;
-  }
-  return count;
-#endif
-}
-
-// Largest power of two less than or equal to x. As a special case, returns 1
-// when x is 0. 
-INLINE uint64_t round_down_to_power_of_2(uint64_t x) {
-  return 1ULL << highest_one(x | 1);
-}
-
-INLINE uint32_t counter_low(uint64_t counter) { return (uint32_t)counter; }
-
-INLINE uint32_t counter_high(uint64_t counter) {
-  return (uint32_t)(counter >> 32);
-}
-
-INLINE uint32_t load32(const void *src) {
-  const uint8_t *p = (const uint8_t *)src;
-  return ((uint32_t)(p[0]) << 0) | ((uint32_t)(p[1]) << 8) |
-         ((uint32_t)(p[2]) << 16) | ((uint32_t)(p[3]) << 24);
-}
-
-INLINE void load_key_words(const uint8_t key[BLAKE3_KEY_LEN],
-                           uint32_t key_words[8]) {
-  key_words[0] = load32(&key[0 * 4]);
-  key_words[1] = load32(&key[1 * 4]);
-  key_words[2] = load32(&key[2 * 4]);
-  key_words[3] = load32(&key[3 * 4]);
-  key_words[4] = load32(&key[4 * 4]);
-  key_words[5] = load32(&key[5 * 4]);
-  key_words[6] = load32(&key[6 * 4]);
-  key_words[7] = load32(&key[7 * 4]);
-}
-
-INLINE void store32(void *dst, uint32_t w) {
-  uint8_t *p = (uint8_t *)dst;
-  p[0] = (uint8_t)(w >> 0);
-  p[1] = (uint8_t)(w >> 8);
-  p[2] = (uint8_t)(w >> 16);
-  p[3] = (uint8_t)(w >> 24);
-}
-
-INLINE void store_cv_words(uint8_t bytes_out[32], uint32_t cv_words[8]) {
-  store32(&bytes_out[0 * 4], cv_words[0]);
-  store32(&bytes_out[1 * 4], cv_words[1]);
-  store32(&bytes_out[2 * 4], cv_words[2]);
-  store32(&bytes_out[3 * 4], cv_words[3]);
-  store32(&bytes_out[4 * 4], cv_words[4]);
-  store32(&bytes_out[5 * 4], cv_words[5]);
-  store32(&bytes_out[6 * 4], cv_words[6]);
-  store32(&bytes_out[7 * 4], cv_words[7]);
-}
-
-void blake3_compress_in_place(uint32_t cv[8],
-                              const uint8_t block[BLAKE3_BLOCK_LEN],
-                              uint8_t block_len, uint64_t counter,
-                              uint8_t flags);
-
-void blake3_compress_xof(const uint32_t cv[8],
-                         const uint8_t block[BLAKE3_BLOCK_LEN],
-                         uint8_t block_len, uint64_t counter, uint8_t flags,
-                         uint8_t out[64]);
-
-void blake3_hash_many(const uint8_t *const *inputs, size_t num_inputs,
-                      size_t blocks, const uint32_t key[8], uint64_t counter,
-                      bool increment_counter, uint8_t flags,
-                      uint8_t flags_start, uint8_t flags_end, uint8_t *out);
-
-size_t blake3_simd_degree(void);
-
-
-// Declarations for implementation-specific functions.
-void blake3_compress_in_place_portable(uint32_t cv[8],
-                                       const uint8_t block[BLAKE3_BLOCK_LEN],
-                                       uint8_t block_len, uint64_t counter,
-                                       uint8_t flags);
-
-void blake3_compress_xof_portable(const uint32_t cv[8],
-                                  const uint8_t block[BLAKE3_BLOCK_LEN],
-                                  uint8_t block_len, uint64_t counter,
-                                  uint8_t flags, uint8_t out[64]);
-
-void blake3_hash_many_portable(const uint8_t *const *inputs, size_t num_inputs,
-                               size_t blocks, const uint32_t key[8],
-                               uint64_t counter, bool increment_counter,
-                               uint8_t flags, uint8_t flags_start,
-                               uint8_t flags_end, uint8_t *out);
-
-#if defined(IS_X86)
-#if !defined(BLAKE3_NO_SSE2)
-void blake3_compress_in_place_sse2(uint32_t cv[8],
-                                   const uint8_t block[BLAKE3_BLOCK_LEN],
-                                   uint8_t block_len, uint64_t counter,
-                                   uint8_t flags);
-void blake3_compress_xof_sse2(const uint32_t cv[8],
-                              const uint8_t block[BLAKE3_BLOCK_LEN],
-                              uint8_t block_len, uint64_t counter,
-                              uint8_t flags, uint8_t out[64]);
-void blake3_hash_many_sse2(const uint8_t *const *inputs, size_t num_inputs,
-                           size_t blocks, const uint32_t key[8],
-                           uint64_t counter, bool increment_counter,
-                           uint8_t flags, uint8_t flags_start,
-                           uint8_t flags_end, uint8_t *out);
-#endif
-#if !defined(BLAKE3_NO_SSE41)
-void blake3_compress_in_place_sse41(uint32_t cv[8],
-                                    const uint8_t block[BLAKE3_BLOCK_LEN],
-                                    uint8_t block_len, uint64_t counter,
-                                    uint8_t flags);
-void blake3_compress_xof_sse41(const uint32_t cv[8],
-                               const uint8_t block[BLAKE3_BLOCK_LEN],
-                               uint8_t block_len, uint64_t counter,
-                               uint8_t flags, uint8_t out[64]);
-void blake3_hash_many_sse41(const uint8_t *const *inputs, size_t num_inputs,
-                            size_t blocks, const uint32_t key[8],
-                            uint64_t counter, bool increment_counter,
-                            uint8_t flags, uint8_t flags_start,
-                            uint8_t flags_end, uint8_t *out);
-#endif
-#if !defined(BLAKE3_NO_AVX2)
-void blake3_hash_many_avx2(const uint8_t *const *inputs, size_t num_inputs,
-                           size_t blocks, const uint32_t key[8],
-                           uint64_t counter, bool increment_counter,
-                           uint8_t flags, uint8_t flags_start,
-                           uint8_t flags_end, uint8_t *out);
-#endif
-#if !defined(BLAKE3_NO_AVX512)
-void blake3_compress_in_place_avx512(uint32_t cv[8],
-                                     const uint8_t block[BLAKE3_BLOCK_LEN],
-                                     uint8_t block_len, uint64_t counter,
-                                     uint8_t flags);
-
-void blake3_compress_xof_avx512(const uint32_t cv[8],
-                                const uint8_t block[BLAKE3_BLOCK_LEN],
-                                uint8_t block_len, uint64_t counter,
-                                uint8_t flags, uint8_t out[64]);
-
-void blake3_hash_many_avx512(const uint8_t *const *inputs, size_t num_inputs,
-                             size_t blocks, const uint32_t key[8],
-                             uint64_t counter, bool increment_counter,
-                             uint8_t flags, uint8_t flags_start,
-                             uint8_t flags_end, uint8_t *out);
-#endif
-#endif
-
-#if BLAKE3_USE_NEON == 1
-void blake3_hash_many_neon(const uint8_t *const *inputs, size_t num_inputs,
-                           size_t blocks, const uint32_t key[8],
-                           uint64_t counter, bool increment_counter,
-                           uint8_t flags, uint8_t flags_start,
-                           uint8_t flags_end, uint8_t *out);
-#endif
-
-
-#endif /* BLAKE3_IMPL_H */
diff --git a/windows/src/blake3/blake3_portable.c b/windows/src/blake3/blake3_portable.c
deleted file mode 100644
index 062dd1b..0000000
--- a/windows/src/blake3/blake3_portable.c
+++ /dev/null
@@ -1,160 +0,0 @@
-#include "blake3_impl.h"
-#include <string.h>
-
-INLINE uint32_t rotr32(uint32_t w, uint32_t c) {
-  return (w >> c) | (w << (32 - c));
-}
-
-INLINE void g(uint32_t *state, size_t a, size_t b, size_t c, size_t d,
-              uint32_t x, uint32_t y) {
-  state[a] = state[a] + state[b] + x;
-  state[d] = rotr32(state[d] ^ state[a], 16);
-  state[c] = state[c] + state[d];
-  state[b] = rotr32(state[b] ^ state[c], 12);
-  state[a] = state[a] + state[b] + y;
-  state[d] = rotr32(state[d] ^ state[a], 8);
-  state[c] = state[c] + state[d];
-  state[b] = rotr32(state[b] ^ state[c], 7);
-}
-
-INLINE void round_fn(uint32_t state[16], const uint32_t *msg, size_t round) {
-  // Select the message schedule based on the round.
-  const uint8_t *schedule = MSG_SCHEDULE[round];
-
-  // Mix the columns.
-  g(state, 0, 4, 8, 12, msg[schedule[0]], msg[schedule[1]]);
-  g(state, 1, 5, 9, 13, msg[schedule[2]], msg[schedule[3]]);
-  g(state, 2, 6, 10, 14, msg[schedule[4]], msg[schedule[5]]);
-  g(state, 3, 7, 11, 15, msg[schedule[6]], msg[schedule[7]]);
-
-  // Mix the rows.
-  g(state, 0, 5, 10, 15, msg[schedule[8]], msg[schedule[9]]);
-  g(state, 1, 6, 11, 12, msg[schedule[10]], msg[schedule[11]]);
-  g(state, 2, 7, 8, 13, msg[schedule[12]], msg[schedule[13]]);
-  g(state, 3, 4, 9, 14, msg[schedule[14]], msg[schedule[15]]);
-}
-
-INLINE void compress_pre(uint32_t state[16], const uint32_t cv[8],
-                         const uint8_t block[BLAKE3_BLOCK_LEN],
-                         uint8_t block_len, uint64_t counter, uint8_t flags) {
-  uint32_t block_words[16];
-  block_words[0] = load32(block + 4 * 0);
-  block_words[1] = load32(block + 4 * 1);
-  block_words[2] = load32(block + 4 * 2);
-  block_words[3] = load32(block + 4 * 3);
-  block_words[4] = load32(block + 4 * 4);
-  block_words[5] = load32(block + 4 * 5);
-  block_words[6] = load32(block + 4 * 6);
-  block_words[7] = load32(block + 4 * 7);
-  block_words[8] = load32(block + 4 * 8);
-  block_words[9] = load32(block + 4 * 9);
-  block_words[10] = load32(block + 4 * 10);
-  block_words[11] = load32(block + 4 * 11);
-  block_words[12] = load32(block + 4 * 12);
-  block_words[13] = load32(block + 4 * 13);
-  block_words[14] = load32(block + 4 * 14);
-  block_words[15] = load32(block + 4 * 15);
-
-  state[0] = cv[0];
-  state[1] = cv[1];
-  state[2] = cv[2];
-  state[3] = cv[3];
-  state[4] = cv[4];
-  state[5] = cv[5];
-  state[6] = cv[6];
-  state[7] = cv[7];
-  state[8] = IV[0];
-  state[9] = IV[1];
-  state[10] = IV[2];
-  state[11] = IV[3];
-  state[12] = counter_low(counter);
-  state[13] = counter_high(counter);
-  state[14] = (uint32_t)block_len;
-  state[15] = (uint32_t)flags;
-
-  round_fn(state, &block_words[0], 0);
-  round_fn(state, &block_words[0], 1);
-  round_fn(state, &block_words[0], 2);
-  round_fn(state, &block_words[0], 3);
-  round_fn(state, &block_words[0], 4);
-  round_fn(state, &block_words[0], 5);
-  round_fn(state, &block_words[0], 6);
-}
-
-void blake3_compress_in_place_portable(uint32_t cv[8],
-                                       const uint8_t block[BLAKE3_BLOCK_LEN],
-                                       uint8_t block_len, uint64_t counter,
-                                       uint8_t flags) {
-  uint32_t state[16];
-  compress_pre(state, cv, block, block_len, counter, flags);
-  cv[0] = state[0] ^ state[8];
-  cv[1] = state[1] ^ state[9];
-  cv[2] = state[2] ^ state[10];
-  cv[3] = state[3] ^ state[11];
-  cv[4] = state[4] ^ state[12];
-  cv[5] = state[5] ^ state[13];
-  cv[6] = state[6] ^ state[14];
-  cv[7] = state[7] ^ state[15];
-}
-
-void blake3_compress_xof_portable(const uint32_t cv[8],
-                                  const uint8_t block[BLAKE3_BLOCK_LEN],
-                                  uint8_t block_len, uint64_t counter,
-                                  uint8_t flags, uint8_t out[64]) {
-  uint32_t state[16];
-  compress_pre(state, cv, block, block_len, counter, flags);
-
-  store32(&out[0 * 4], state[0] ^ state[8]);
-  store32(&out[1 * 4], state[1] ^ state[9]);
-  store32(&out[2 * 4], state[2] ^ state[10]);
-  store32(&out[3 * 4], state[3] ^ state[11]);
-  store32(&out[4 * 4], state[4] ^ state[12]);
-  store32(&out[5 * 4], state[5] ^ state[13]);
-  store32(&out[6 * 4], state[6] ^ state[14]);
-  store32(&out[7 * 4], state[7] ^ state[15]);
-  store32(&out[8 * 4], state[8] ^ cv[0]);
-  store32(&out[9 * 4], state[9] ^ cv[1]);
-  store32(&out[10 * 4], state[10] ^ cv[2]);
-  store32(&out[11 * 4], state[11] ^ cv[3]);
-  store32(&out[12 * 4], state[12] ^ cv[4]);
-  store32(&out[13 * 4], state[13] ^ cv[5]);
-  store32(&out[14 * 4], state[14] ^ cv[6]);
-  store32(&out[15 * 4], state[15] ^ cv[7]);
-}
-
-INLINE void hash_one_portable(const uint8_t *input, size_t blocks,
-                              const uint32_t key[8], uint64_t counter,
-                              uint8_t flags, uint8_t flags_start,
-                              uint8_t flags_end, uint8_t out[BLAKE3_OUT_LEN]) {
-  uint32_t cv[8];
-  memcpy(cv, key, BLAKE3_KEY_LEN);
-  uint8_t block_flags = flags | flags_start;
-  while (blocks > 0) {
-    if (blocks == 1) {
-      block_flags |= flags_end;
-    }
-    blake3_compress_in_place_portable(cv, input, BLAKE3_BLOCK_LEN, counter,
-                                      block_flags);
-    input = &input[BLAKE3_BLOCK_LEN];
-    blocks -= 1;
-    block_flags = flags;
-  }
-  store_cv_words(out, cv);
-}
-
-void blake3_hash_many_portable(const uint8_t *const *inputs, size_t num_inputs,
-                               size_t blocks, const uint32_t key[8],
-                               uint64_t counter, bool increment_counter,
-                               uint8_t flags, uint8_t flags_start,
-                               uint8_t flags_end, uint8_t *out) {
-  while (num_inputs > 0) {
-    hash_one_portable(inputs[0], blocks, key, counter, flags, flags_start,
-                      flags_end, out);
-    if (increment_counter) {
-      counter += 1;
-    }
-    inputs += 1;
-    num_inputs -= 1;
-    out = &out[BLAKE3_OUT_LEN];
-  }
-}
diff --git a/windows/src/blake3/blake3_sse2.c b/windows/src/blake3/blake3_sse2.c
deleted file mode 100644
index f4449ac..0000000
--- a/windows/src/blake3/blake3_sse2.c
+++ /dev/null
@@ -1,566 +0,0 @@
-#include "blake3_impl.h"
-
-#include <immintrin.h>
-
-#define DEGREE 4
-
-#define _mm_shuffle_ps2(a, b, c)                                               \
-  (_mm_castps_si128(                                                           \
-      _mm_shuffle_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b), (c))))
-
-INLINE __m128i loadu(const uint8_t src[16]) {
-  return _mm_loadu_si128((const __m128i *)src);
-}
-
-INLINE void storeu(__m128i src, uint8_t dest[16]) {
-  _mm_storeu_si128((__m128i *)dest, src);
-}
-
-INLINE __m128i addv(__m128i a, __m128i b) { return _mm_add_epi32(a, b); }
-
-// Note that clang-format doesn't like the name "xor" for some reason.
-INLINE __m128i xorv(__m128i a, __m128i b) { return _mm_xor_si128(a, b); }
-
-INLINE __m128i set1(uint32_t x) { return _mm_set1_epi32((int32_t)x); }
-
-INLINE __m128i set4(uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
-  return _mm_setr_epi32((int32_t)a, (int32_t)b, (int32_t)c, (int32_t)d);
-}
-
-INLINE __m128i rot16(__m128i x) {
-  return _mm_shufflehi_epi16(_mm_shufflelo_epi16(x, 0xB1), 0xB1);
-}
-
-INLINE __m128i rot12(__m128i x) {
-  return xorv(_mm_srli_epi32(x, 12), _mm_slli_epi32(x, 32 - 12));
-}
-
-INLINE __m128i rot8(__m128i x) {
-  return xorv(_mm_srli_epi32(x, 8), _mm_slli_epi32(x, 32 - 8));
-}
-
-INLINE __m128i rot7(__m128i x) {
-  return xorv(_mm_srli_epi32(x, 7), _mm_slli_epi32(x, 32 - 7));
-}
-
-INLINE void g1(__m128i *row0, __m128i *row1, __m128i *row2, __m128i *row3,
-               __m128i m) {
-  *row0 = addv(addv(*row0, m), *row1);
-  *row3 = xorv(*row3, *row0);
-  *row3 = rot16(*row3);
-  *row2 = addv(*row2, *row3);
-  *row1 = xorv(*row1, *row2);
-  *row1 = rot12(*row1);
-}
-
-INLINE void g2(__m128i *row0, __m128i *row1, __m128i *row2, __m128i *row3,
-               __m128i m) {
-  *row0 = addv(addv(*row0, m), *row1);
-  *row3 = xorv(*row3, *row0);
-  *row3 = rot8(*row3);
-  *row2 = addv(*row2, *row3);
-  *row1 = xorv(*row1, *row2);
-  *row1 = rot7(*row1);
-}
-
-// Note the optimization here of leaving row1 as the unrotated row, rather than
-// row0. All the message loads below are adjusted to compensate for this. See
-// discussion at https://github.com/sneves/blake2-avx2/pull/4
-INLINE void diagonalize(__m128i *row0, __m128i *row2, __m128i *row3) {
-  *row0 = _mm_shuffle_epi32(*row0, _MM_SHUFFLE(2, 1, 0, 3));
-  *row3 = _mm_shuffle_epi32(*row3, _MM_SHUFFLE(1, 0, 3, 2));
-  *row2 = _mm_shuffle_epi32(*row2, _MM_SHUFFLE(0, 3, 2, 1));
-}
-
-INLINE void undiagonalize(__m128i *row0, __m128i *row2, __m128i *row3) {
-  *row0 = _mm_shuffle_epi32(*row0, _MM_SHUFFLE(0, 3, 2, 1));
-  *row3 = _mm_shuffle_epi32(*row3, _MM_SHUFFLE(1, 0, 3, 2));
-  *row2 = _mm_shuffle_epi32(*row2, _MM_SHUFFLE(2, 1, 0, 3));
-}
-
-INLINE __m128i blend_epi16(__m128i a, __m128i b, const int16_t imm8) {
-  const __m128i bits = _mm_set_epi16(0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01);
-  __m128i mask = _mm_set1_epi16(imm8);
-  mask = _mm_and_si128(mask, bits);
-  mask = _mm_cmpeq_epi16(mask, bits);
-  return _mm_or_si128(_mm_and_si128(mask, b), _mm_andnot_si128(mask, a));
-}
-
-INLINE void compress_pre(__m128i rows[4], const uint32_t cv[8],
-                         const uint8_t block[BLAKE3_BLOCK_LEN],
-                         uint8_t block_len, uint64_t counter, uint8_t flags) {
-  rows[0] = loadu((uint8_t *)&cv[0]);
-  rows[1] = loadu((uint8_t *)&cv[4]);
-  rows[2] = set4(IV[0], IV[1], IV[2], IV[3]);
-  rows[3] = set4(counter_low(counter), counter_high(counter),
-                 (uint32_t)block_len, (uint32_t)flags);
-
-  __m128i m0 = loadu(&block[sizeof(__m128i) * 0]);
-  __m128i m1 = loadu(&block[sizeof(__m128i) * 1]);
-  __m128i m2 = loadu(&block[sizeof(__m128i) * 2]);
-  __m128i m3 = loadu(&block[sizeof(__m128i) * 3]);
-
-  __m128i t0, t1, t2, t3, tt;
-
-  // Round 1. The first round permutes the message words from the original
-  // input order, into the groups that get mixed in parallel.
-  t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(2, 0, 2, 0)); //  6  4  2  0
-  g1(&rows[0], &rows[1], &rows[2], &rows[3], t0);
-  t1 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 3, 1)); //  7  5  3  1
-  g2(&rows[0], &rows[1], &rows[2], &rows[3], t1);
-  diagonalize(&rows[0], &rows[2], &rows[3]);
-  t2 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(2, 0, 2, 0)); // 14 12 10  8
-  t2 = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2, 1, 0, 3));   // 12 10  8 14
-  g1(&rows[0], &rows[1], &rows[2], &rows[3], t2);
-  t3 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 1, 3, 1)); // 15 13 11  9
-  t3 = _mm_shuffle_epi32(t3, _MM_SHUFFLE(2, 1, 0, 3));   // 13 11  9 15
-  g2(&rows[0], &rows[1], &rows[2], &rows[3], t3);
-  undiagonalize(&rows[0], &rows[2], &rows[3]);
-  m0 = t0;
-  m1 = t1;
-  m2 = t2;
-  m3 = t3;
-
-  // Round 2. This round and all following rounds apply a fixed permutation
-  // to the message words from the round before.
-  t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2));
-  t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(0, 3, 2, 1));
-  g1(&rows[0], &rows[1], &rows[2], &rows[3], t0);
-  t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2));
-  tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(0, 0, 3, 3));
-  t1 = blend_epi16(tt, t1, 0xCC);
-  g2(&rows[0], &rows[1], &rows[2], &rows[3], t1);
-  diagonalize(&rows[0], &rows[2], &rows[3]);
-  t2 = _mm_unpacklo_epi64(m3, m1);
-  tt = blend_epi16(t2, m2, 0xC0);
-  t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(1, 3, 2, 0));
-  g1(&rows[0], &rows[1], &rows[2], &rows[3], t2);
-  t3 = _mm_unpackhi_epi32(m1, m3);
-  tt = _mm_unpacklo_epi32(m2, t3);
-  t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(0, 1, 3, 2));
-  g2(&rows[0], &rows[1], &rows[2], &rows[3], t3);
-  undiagonalize(&rows[0], &rows[2], &rows[3]);
-  m0 = t0;
-  m1 = t1;
-  m2 = t2;
-  m3 = t3;
-
-  // Round 3
-  t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2));
-  t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(0, 3, 2, 1));
-  g1(&rows[0], &rows[1], &rows[2], &rows[3], t0);
-  t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2));
-  tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(0, 0, 3, 3));
-  t1 = blend_epi16(tt, t1, 0xCC);
-  g2(&rows[0], &rows[1], &rows[2], &rows[3], t1);
-  diagonalize(&rows[0], &rows[2], &rows[3]);
-  t2 = _mm_unpacklo_epi64(m3, m1);
-  tt = blend_epi16(t2, m2, 0xC0);
-  t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(1, 3, 2, 0));
-  g1(&rows[0], &rows[1], &rows[2], &rows[3], t2);
-  t3 = _mm_unpackhi_epi32(m1, m3);
-  tt = _mm_unpacklo_epi32(m2, t3);
-  t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(0, 1, 3, 2));
-  g2(&rows[0], &rows[1], &rows[2], &rows[3], t3);
-  undiagonalize(&rows[0], &rows[2], &rows[3]);
-  m0 = t0;
-  m1 = t1;
-  m2 = t2;
-  m3 = t3;
-
-  // Round 4
-  t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2));
-  t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(0, 3, 2, 1));
-  g1(&rows[0], &rows[1], &rows[2], &rows[3], t0);
-  t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2));
-  tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(0, 0, 3, 3));
-  t1 = blend_epi16(tt, t1, 0xCC);
-  g2(&rows[0], &rows[1], &rows[2], &rows[3], t1);
-  diagonalize(&rows[0], &rows[2], &rows[3]);
-  t2 = _mm_unpacklo_epi64(m3, m1);
-  tt = blend_epi16(t2, m2, 0xC0);
-  t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(1, 3, 2, 0));
-  g1(&rows[0], &rows[1], &rows[2], &rows[3], t2);
-  t3 = _mm_unpackhi_epi32(m1, m3);
-  tt = _mm_unpacklo_epi32(m2, t3);
-  t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(0, 1, 3, 2));
-  g2(&rows[0], &rows[1], &rows[2], &rows[3], t3);
-  undiagonalize(&rows[0], &rows[2], &rows[3]);
-  m0 = t0;
-  m1 = t1;
-  m2 = t2;
-  m3 = t3;
-
-  // Round 5
-  t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2));
-  t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(0, 3, 2, 1));
-  g1(&rows[0], &rows[1], &rows[2], &rows[3], t0);
-  t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2));
-  tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(0, 0, 3, 3));
-  t1 = blend_epi16(tt, t1, 0xCC);
-  g2(&rows[0], &rows[1], &rows[2], &rows[3], t1);
-  diagonalize(&rows[0], &rows[2], &rows[3]);
-  t2 = _mm_unpacklo_epi64(m3, m1);
-  tt = blend_epi16(t2, m2, 0xC0);
-  t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(1, 3, 2, 0));
-  g1(&rows[0], &rows[1], &rows[2], &rows[3], t2);
-  t3 = _mm_unpackhi_epi32(m1, m3);
-  tt = _mm_unpacklo_epi32(m2, t3);
-  t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(0, 1, 3, 2));
-  g2(&rows[0], &rows[1], &rows[2], &rows[3], t3);
-  undiagonalize(&rows[0], &rows[2], &rows[3]);
-  m0 = t0;
-  m1 = t1;
-  m2 = t2;
-  m3 = t3;
-
-  // Round 6
-  t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2));
-  t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(0, 3, 2, 1));
-  g1(&rows[0], &rows[1], &rows[2], &rows[3], t0);
-  t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2));
-  tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(0, 0, 3, 3));
-  t1 = blend_epi16(tt, t1, 0xCC);
-  g2(&rows[0], &rows[1], &rows[2], &rows[3], t1);
-  diagonalize(&rows[0], &rows[2], &rows[3]);
-  t2 = _mm_unpacklo_epi64(m3, m1);
-  tt = blend_epi16(t2, m2, 0xC0);
-  t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(1, 3, 2, 0));
-  g1(&rows[0], &rows[1], &rows[2], &rows[3], t2);
-  t3 = _mm_unpackhi_epi32(m1, m3);
-  tt = _mm_unpacklo_epi32(m2, t3);
-  t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(0, 1, 3, 2));
-  g2(&rows[0], &rows[1], &rows[2], &rows[3], t3);
-  undiagonalize(&rows[0], &rows[2], &rows[3]);
-  m0 = t0;
-  m1 = t1;
-  m2 = t2;
-  m3 = t3;
-
-  // Round 7
-  t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2));
-  t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(0, 3, 2, 1));
-  g1(&rows[0], &rows[1], &rows[2], &rows[3], t0);
-  t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2));
-  tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(0, 0, 3, 3));
-  t1 = blend_epi16(tt, t1, 0xCC);
-  g2(&rows[0], &rows[1], &rows[2], &rows[3], t1);
-  diagonalize(&rows[0], &rows[2], &rows[3]);
-  t2 = _mm_unpacklo_epi64(m3, m1);
-  tt = blend_epi16(t2, m2, 0xC0);
-  t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(1, 3, 2, 0));
-  g1(&rows[0], &rows[1], &rows[2], &rows[3], t2);
-  t3 = _mm_unpackhi_epi32(m1, m3);
-  tt = _mm_unpacklo_epi32(m2, t3);
-  t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(0, 1, 3, 2));
-  g2(&rows[0], &rows[1], &rows[2], &rows[3], t3);
-  undiagonalize(&rows[0], &rows[2], &rows[3]);
-}
-
-void blake3_compress_in_place_sse2(uint32_t cv[8],
-                                   const uint8_t block[BLAKE3_BLOCK_LEN],
-                                   uint8_t block_len, uint64_t counter,
-                                   uint8_t flags) {
-  __m128i rows[4];
-  compress_pre(rows, cv, block, block_len, counter, flags);
-  storeu(xorv(rows[0], rows[2]), (uint8_t *)&cv[0]);
-  storeu(xorv(rows[1], rows[3]), (uint8_t *)&cv[4]);
-}
-
-void blake3_compress_xof_sse2(const uint32_t cv[8],
-                              const uint8_t block[BLAKE3_BLOCK_LEN],
-                              uint8_t block_len, uint64_t counter,
-                              uint8_t flags, uint8_t out[64]) {
-  __m128i rows[4];
-  compress_pre(rows, cv, block, block_len, counter, flags);
-  storeu(xorv(rows[0], rows[2]), &out[0]);
-  storeu(xorv(rows[1], rows[3]), &out[16]);
-  storeu(xorv(rows[2], loadu((uint8_t *)&cv[0])), &out[32]);
-  storeu(xorv(rows[3], loadu((uint8_t *)&cv[4])), &out[48]);
-}
-
-INLINE void round_fn(__m128i v[16], __m128i m[16], size_t r) {
-  v[0] = addv(v[0], m[(size_t)MSG_SCHEDULE[r][0]]);
-  v[1] = addv(v[1], m[(size_t)MSG_SCHEDULE[r][2]]);
-  v[2] = addv(v[2], m[(size_t)MSG_SCHEDULE[r][4]]);
-  v[3] = addv(v[3], m[(size_t)MSG_SCHEDULE[r][6]]);
-  v[0] = addv(v[0], v[4]);
-  v[1] = addv(v[1], v[5]);
-  v[2] = addv(v[2], v[6]);
-  v[3] = addv(v[3], v[7]);
-  v[12] = xorv(v[12], v[0]);
-  v[13] = xorv(v[13], v[1]);
-  v[14] = xorv(v[14], v[2]);
-  v[15] = xorv(v[15], v[3]);
-  v[12] = rot16(v[12]);
-  v[13] = rot16(v[13]);
-  v[14] = rot16(v[14]);
-  v[15] = rot16(v[15]);
-  v[8] = addv(v[8], v[12]);
-  v[9] = addv(v[9], v[13]);
-  v[10] = addv(v[10], v[14]);
-  v[11] = addv(v[11], v[15]);
-  v[4] = xorv(v[4], v[8]);
-  v[5] = xorv(v[5], v[9]);
-  v[6] = xorv(v[6], v[10]);
-  v[7] = xorv(v[7], v[11]);
-  v[4] = rot12(v[4]);
-  v[5] = rot12(v[5]);
-  v[6] = rot12(v[6]);
-  v[7] = rot12(v[7]);
-  v[0] = addv(v[0], m[(size_t)MSG_SCHEDULE[r][1]]);
-  v[1] = addv(v[1], m[(size_t)MSG_SCHEDULE[r][3]]);
-  v[2] = addv(v[2], m[(size_t)MSG_SCHEDULE[r][5]]);
-  v[3] = addv(v[3], m[(size_t)MSG_SCHEDULE[r][7]]);
-  v[0] = addv(v[0], v[4]);
-  v[1] = addv(v[1], v[5]);
-  v[2] = addv(v[2], v[6]);
-  v[3] = addv(v[3], v[7]);
-  v[12] = xorv(v[12], v[0]);
-  v[13] = xorv(v[13], v[1]);
-  v[14] = xorv(v[14], v[2]);
-  v[15] = xorv(v[15], v[3]);
-  v[12] = rot8(v[12]);
-  v[13] = rot8(v[13]);
-  v[14] = rot8(v[14]);
-  v[15] = rot8(v[15]);
-  v[8] = addv(v[8], v[12]);
-  v[9] = addv(v[9], v[13]);
-  v[10] = addv(v[10], v[14]);
-  v[11] = addv(v[11], v[15]);
-  v[4] = xorv(v[4], v[8]);
-  v[5] = xorv(v[5], v[9]);
-  v[6] = xorv(v[6], v[10]);
-  v[7] = xorv(v[7], v[11]);
-  v[4] = rot7(v[4]);
-  v[5] = rot7(v[5]);
-  v[6] = rot7(v[6]);
-  v[7] = rot7(v[7]);
-
-  v[0] = addv(v[0], m[(size_t)MSG_SCHEDULE[r][8]]);
-  v[1] = addv(v[1], m[(size_t)MSG_SCHEDULE[r][10]]);
-  v[2] = addv(v[2], m[(size_t)MSG_SCHEDULE[r][12]]);
-  v[3] = addv(v[3], m[(size_t)MSG_SCHEDULE[r][14]]);
-  v[0] = addv(v[0], v[5]);
-  v[1] = addv(v[1], v[6]);
-  v[2] = addv(v[2], v[7]);
-  v[3] = addv(v[3], v[4]);
-  v[15] = xorv(v[15], v[0]);
-  v[12] = xorv(v[12], v[1]);
-  v[13] = xorv(v[13], v[2]);
-  v[14] = xorv(v[14], v[3]);
-  v[15] = rot16(v[15]);
-  v[12] = rot16(v[12]);
-  v[13] = rot16(v[13]);
-  v[14] = rot16(v[14]);
-  v[10] = addv(v[10], v[15]);
-  v[11] = addv(v[11], v[12]);
-  v[8] = addv(v[8], v[13]);
-  v[9] = addv(v[9], v[14]);
-  v[5] = xorv(v[5], v[10]);
-  v[6] = xorv(v[6], v[11]);
-  v[7] = xorv(v[7], v[8]);
-  v[4] = xorv(v[4], v[9]);
-  v[5] = rot12(v[5]);
-  v[6] = rot12(v[6]);
-  v[7] = rot12(v[7]);
-  v[4] = rot12(v[4]);
-  v[0] = addv(v[0], m[(size_t)MSG_SCHEDULE[r][9]]);
-  v[1] = addv(v[1], m[(size_t)MSG_SCHEDULE[r][11]]);
-  v[2] = addv(v[2], m[(size_t)MSG_SCHEDULE[r][13]]);
-  v[3] = addv(v[3], m[(size_t)MSG_SCHEDULE[r][15]]);
-  v[0] = addv(v[0], v[5]);
-  v[1] = addv(v[1], v[6]);
-  v[2] = addv(v[2], v[7]);
-  v[3] = addv(v[3], v[4]);
-  v[15] = xorv(v[15], v[0]);
-  v[12] = xorv(v[12], v[1]);
-  v[13] = xorv(v[13], v[2]);
-  v[14] = xorv(v[14], v[3]);
-  v[15] = rot8(v[15]);
-  v[12] = rot8(v[12]);
-  v[13] = rot8(v[13]);
-  v[14] = rot8(v[14]);
-  v[10] = addv(v[10], v[15]);
-  v[11] = addv(v[11], v[12]);
-  v[8] = addv(v[8], v[13]);
-  v[9] = addv(v[9], v[14]);
-  v[5] = xorv(v[5], v[10]);
-  v[6] = xorv(v[6], v[11]);
-  v[7] = xorv(v[7], v[8]);
-  v[4] = xorv(v[4], v[9]);
-  v[5] = rot7(v[5]);
-  v[6] = rot7(v[6]);
-  v[7] = rot7(v[7]);
-  v[4] = rot7(v[4]);
-}
-
-INLINE void transpose_vecs(__m128i vecs[DEGREE]) {
-  // Interleave 32-bit lates. The low unpack is lanes 00/11 and the high is
-  // 22/33. Note that this doesn't split the vector into two lanes, as the
-  // AVX2 counterparts do.
-  __m128i ab_01 = _mm_unpacklo_epi32(vecs[0], vecs[1]);
-  __m128i ab_23 = _mm_unpackhi_epi32(vecs[0], vecs[1]);
-  __m128i cd_01 = _mm_unpacklo_epi32(vecs[2], vecs[3]);
-  __m128i cd_23 = _mm_unpackhi_epi32(vecs[2], vecs[3]);
-
-  // Interleave 64-bit lanes.
-  __m128i abcd_0 = _mm_unpacklo_epi64(ab_01, cd_01);
-  __m128i abcd_1 = _mm_unpackhi_epi64(ab_01, cd_01);
-  __m128i abcd_2 = _mm_unpacklo_epi64(ab_23, cd_23);
-  __m128i abcd_3 = _mm_unpackhi_epi64(ab_23, cd_23);
-
-  vecs[0] = abcd_0;
-  vecs[1] = abcd_1;
-  vecs[2] = abcd_2;
-  vecs[3] = abcd_3;
-}
-
-INLINE void transpose_msg_vecs(const uint8_t *const *inputs,
-                               size_t block_offset, __m128i out[16]) {
-  out[0] = loadu(&inputs[0][block_offset + 0 * sizeof(__m128i)]);
-  out[1] = loadu(&inputs[1][block_offset + 0 * sizeof(__m128i)]);
-  out[2] = loadu(&inputs[2][block_offset + 0 * sizeof(__m128i)]);
-  out[3] = loadu(&inputs[3][block_offset + 0 * sizeof(__m128i)]);
-  out[4] = loadu(&inputs[0][block_offset + 1 * sizeof(__m128i)]);
-  out[5] = loadu(&inputs[1][block_offset + 1 * sizeof(__m128i)]);
-  out[6] = loadu(&inputs[2][block_offset + 1 * sizeof(__m128i)]);
-  out[7] = loadu(&inputs[3][block_offset + 1 * sizeof(__m128i)]);
-  out[8] = loadu(&inputs[0][block_offset + 2 * sizeof(__m128i)]);
-  out[9] = loadu(&inputs[1][block_offset + 2 * sizeof(__m128i)]);
-  out[10] = loadu(&inputs[2][block_offset + 2 * sizeof(__m128i)]);
-  out[11] = loadu(&inputs[3][block_offset + 2 * sizeof(__m128i)]);
-  out[12] = loadu(&inputs[0][block_offset + 3 * sizeof(__m128i)]);
-  out[13] = loadu(&inputs[1][block_offset + 3 * sizeof(__m128i)]);
-  out[14] = loadu(&inputs[2][block_offset + 3 * sizeof(__m128i)]);
-  out[15] = loadu(&inputs[3][block_offset + 3 * sizeof(__m128i)]);
-  for (size_t i = 0; i < 4; ++i) {
-    _mm_prefetch((const void *)&inputs[i][block_offset + 256], _MM_HINT_T0);
-  }
-  transpose_vecs(&out[0]);
-  transpose_vecs(&out[4]);
-  transpose_vecs(&out[8]);
-  transpose_vecs(&out[12]);
-}
-
-INLINE void load_counters(uint64_t counter, bool increment_counter,
-                          __m128i *out_lo, __m128i *out_hi) {
-  const __m128i mask = _mm_set1_epi32(-(int32_t)increment_counter);
-  const __m128i add0 = _mm_set_epi32(3, 2, 1, 0);
-  const __m128i add1 = _mm_and_si128(mask, add0);
-  __m128i l = _mm_add_epi32(_mm_set1_epi32((int32_t)counter), add1);
-  __m128i carry = _mm_cmpgt_epi32(_mm_xor_si128(add1, _mm_set1_epi32(0x80000000)), 
-                                  _mm_xor_si128(   l, _mm_set1_epi32(0x80000000)));
-  __m128i h = _mm_sub_epi32(_mm_set1_epi32((int32_t)(counter >> 32)), carry);
-  *out_lo = l;
-  *out_hi = h;
-}
-
-static
-void blake3_hash4_sse2(const uint8_t *const *inputs, size_t blocks,
-                       const uint32_t key[8], uint64_t counter,
-                       bool increment_counter, uint8_t flags,
-                       uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
-  __m128i h_vecs[8] = {
-      set1(key[0]), set1(key[1]), set1(key[2]), set1(key[3]),
-      set1(key[4]), set1(key[5]), set1(key[6]), set1(key[7]),
-  };
-  __m128i counter_low_vec, counter_high_vec;
-  load_counters(counter, increment_counter, &counter_low_vec,
-                &counter_high_vec);
-  uint8_t block_flags = flags | flags_start;
-
-  for (size_t block = 0; block < blocks; block++) {
-    if (block + 1 == blocks) {
-      block_flags |= flags_end;
-    }
-    __m128i block_len_vec = set1(BLAKE3_BLOCK_LEN);
-    __m128i block_flags_vec = set1(block_flags);
-    __m128i msg_vecs[16];
-    transpose_msg_vecs(inputs, block * BLAKE3_BLOCK_LEN, msg_vecs);
-
-    __m128i v[16] = {
-        h_vecs[0],       h_vecs[1],        h_vecs[2],     h_vecs[3],
-        h_vecs[4],       h_vecs[5],        h_vecs[6],     h_vecs[7],
-        set1(IV[0]),     set1(IV[1]),      set1(IV[2]),   set1(IV[3]),
-        counter_low_vec, counter_high_vec, block_len_vec, block_flags_vec,
-    };
-    round_fn(v, msg_vecs, 0);
-    round_fn(v, msg_vecs, 1);
-    round_fn(v, msg_vecs, 2);
-    round_fn(v, msg_vecs, 3);
-    round_fn(v, msg_vecs, 4);
-    round_fn(v, msg_vecs, 5);
-    round_fn(v, msg_vecs, 6);
-    h_vecs[0] = xorv(v[0], v[8]);
-    h_vecs[1] = xorv(v[1], v[9]);
-    h_vecs[2] = xorv(v[2], v[10]);
-    h_vecs[3] = xorv(v[3], v[11]);
-    h_vecs[4] = xorv(v[4], v[12]);
-    h_vecs[5] = xorv(v[5], v[13]);
-    h_vecs[6] = xorv(v[6], v[14]);
-    h_vecs[7] = xorv(v[7], v[15]);
-
-    block_flags = flags;
-  }
-
-  transpose_vecs(&h_vecs[0]);
-  transpose_vecs(&h_vecs[4]);
-  // The first four vecs now contain the first half of each output, and the
-  // second four vecs contain the second half of each output.
-  storeu(h_vecs[0], &out[0 * sizeof(__m128i)]);
-  storeu(h_vecs[4], &out[1 * sizeof(__m128i)]);
-  storeu(h_vecs[1], &out[2 * sizeof(__m128i)]);
-  storeu(h_vecs[5], &out[3 * sizeof(__m128i)]);
-  storeu(h_vecs[2], &out[4 * sizeof(__m128i)]);
-  storeu(h_vecs[6], &out[5 * sizeof(__m128i)]);
-  storeu(h_vecs[3], &out[6 * sizeof(__m128i)]);
-  storeu(h_vecs[7], &out[7 * sizeof(__m128i)]);
-}
-
-INLINE void hash_one_sse2(const uint8_t *input, size_t blocks,
-                          const uint32_t key[8], uint64_t counter,
-                          uint8_t flags, uint8_t flags_start,
-                          uint8_t flags_end, uint8_t out[BLAKE3_OUT_LEN]) {
-  uint32_t cv[8];
-  memcpy(cv, key, BLAKE3_KEY_LEN);
-  uint8_t block_flags = flags | flags_start;
-  while (blocks > 0) {
-    if (blocks == 1) {
-      block_flags |= flags_end;
-    }
-    blake3_compress_in_place_sse2(cv, input, BLAKE3_BLOCK_LEN, counter,
-                                  block_flags);
-    input = &input[BLAKE3_BLOCK_LEN];
-    blocks -= 1;
-    block_flags = flags;
-  }
-  memcpy(out, cv, BLAKE3_OUT_LEN);
-}
-
-void blake3_hash_many_sse2(const uint8_t *const *inputs, size_t num_inputs,
-                           size_t blocks, const uint32_t key[8],
-                           uint64_t counter, bool increment_counter,
-                           uint8_t flags, uint8_t flags_start,
-                           uint8_t flags_end, uint8_t *out) {
-  while (num_inputs >= DEGREE) {
-    blake3_hash4_sse2(inputs, blocks, key, counter, increment_counter, flags,
-                      flags_start, flags_end, out);
-    if (increment_counter) {
-      counter += DEGREE;
-    }
-    inputs += DEGREE;
-    num_inputs -= DEGREE;
-    out = &out[DEGREE * BLAKE3_OUT_LEN];
-  }
-  while (num_inputs > 0) {
-    hash_one_sse2(inputs[0], blocks, key, counter, flags, flags_start,
-                  flags_end, out);
-    if (increment_counter) {
-      counter += 1;
-    }
-    inputs += 1;
-    num_inputs -= 1;
-    out = &out[BLAKE3_OUT_LEN];
-  }
-}
diff --git a/windows/src/blake3/blake3_sse41.c b/windows/src/blake3/blake3_sse41.c
deleted file mode 100644
index 87a8dae..0000000
--- a/windows/src/blake3/blake3_sse41.c
+++ /dev/null
@@ -1,560 +0,0 @@
-#include "blake3_impl.h"
-
-#include <immintrin.h>
-
-#define DEGREE 4
-
-#define _mm_shuffle_ps2(a, b, c)                                               \
-  (_mm_castps_si128(                                                           \
-      _mm_shuffle_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b), (c))))
-
-INLINE __m128i loadu(const uint8_t src[16]) {
-  return _mm_loadu_si128((const __m128i *)src);
-}
-
-INLINE void storeu(__m128i src, uint8_t dest[16]) {
-  _mm_storeu_si128((__m128i *)dest, src);
-}
-
-INLINE __m128i addv(__m128i a, __m128i b) { return _mm_add_epi32(a, b); }
-
-// Note that clang-format doesn't like the name "xor" for some reason.
-INLINE __m128i xorv(__m128i a, __m128i b) { return _mm_xor_si128(a, b); }
-
-INLINE __m128i set1(uint32_t x) { return _mm_set1_epi32((int32_t)x); }
-
-INLINE __m128i set4(uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
-  return _mm_setr_epi32((int32_t)a, (int32_t)b, (int32_t)c, (int32_t)d);
-}
-
-INLINE __m128i rot16(__m128i x) {
-  return _mm_shuffle_epi8(
-      x, _mm_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2));
-}
-
-INLINE __m128i rot12(__m128i x) {
-  return xorv(_mm_srli_epi32(x, 12), _mm_slli_epi32(x, 32 - 12));
-}
-
-INLINE __m128i rot8(__m128i x) {
-  return _mm_shuffle_epi8(
-      x, _mm_set_epi8(12, 15, 14, 13, 8, 11, 10, 9, 4, 7, 6, 5, 0, 3, 2, 1));
-}
-
-INLINE __m128i rot7(__m128i x) {
-  return xorv(_mm_srli_epi32(x, 7), _mm_slli_epi32(x, 32 - 7));
-}
-
-INLINE void g1(__m128i *row0, __m128i *row1, __m128i *row2, __m128i *row3,
-               __m128i m) {
-  *row0 = addv(addv(*row0, m), *row1);
-  *row3 = xorv(*row3, *row0);
-  *row3 = rot16(*row3);
-  *row2 = addv(*row2, *row3);
-  *row1 = xorv(*row1, *row2);
-  *row1 = rot12(*row1);
-}
-
-INLINE void g2(__m128i *row0, __m128i *row1, __m128i *row2, __m128i *row3,
-               __m128i m) {
-  *row0 = addv(addv(*row0, m), *row1);
-  *row3 = xorv(*row3, *row0);
-  *row3 = rot8(*row3);
-  *row2 = addv(*row2, *row3);
-  *row1 = xorv(*row1, *row2);
-  *row1 = rot7(*row1);
-}
-
-// Note the optimization here of leaving row1 as the unrotated row, rather than
-// row0. All the message loads below are adjusted to compensate for this. See
-// discussion at https://github.com/sneves/blake2-avx2/pull/4
-INLINE void diagonalize(__m128i *row0, __m128i *row2, __m128i *row3) {
-  *row0 = _mm_shuffle_epi32(*row0, _MM_SHUFFLE(2, 1, 0, 3));
-  *row3 = _mm_shuffle_epi32(*row3, _MM_SHUFFLE(1, 0, 3, 2));
-  *row2 = _mm_shuffle_epi32(*row2, _MM_SHUFFLE(0, 3, 2, 1));
-}
-
-INLINE void undiagonalize(__m128i *row0, __m128i *row2, __m128i *row3) {
-  *row0 = _mm_shuffle_epi32(*row0, _MM_SHUFFLE(0, 3, 2, 1));
-  *row3 = _mm_shuffle_epi32(*row3, _MM_SHUFFLE(1, 0, 3, 2));
-  *row2 = _mm_shuffle_epi32(*row2, _MM_SHUFFLE(2, 1, 0, 3));
-}
-
-INLINE void compress_pre(__m128i rows[4], const uint32_t cv[8],
-                         const uint8_t block[BLAKE3_BLOCK_LEN],
-                         uint8_t block_len, uint64_t counter, uint8_t flags) {
-  rows[0] = loadu((uint8_t *)&cv[0]);
-  rows[1] = loadu((uint8_t *)&cv[4]);
-  rows[2] = set4(IV[0], IV[1], IV[2], IV[3]);
-  rows[3] = set4(counter_low(counter), counter_high(counter),
-                 (uint32_t)block_len, (uint32_t)flags);
-
-  __m128i m0 = loadu(&block[sizeof(__m128i) * 0]);
-  __m128i m1 = loadu(&block[sizeof(__m128i) * 1]);
-  __m128i m2 = loadu(&block[sizeof(__m128i) * 2]);
-  __m128i m3 = loadu(&block[sizeof(__m128i) * 3]);
-
-  __m128i t0, t1, t2, t3, tt;
-
-  // Round 1. The first round permutes the message words from the original
-  // input order, into the groups that get mixed in parallel.
-  t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(2, 0, 2, 0)); //  6  4  2  0
-  g1(&rows[0], &rows[1], &rows[2], &rows[3], t0);
-  t1 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 3, 1)); //  7  5  3  1
-  g2(&rows[0], &rows[1], &rows[2], &rows[3], t1);
-  diagonalize(&rows[0], &rows[2], &rows[3]);
-  t2 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(2, 0, 2, 0)); // 14 12 10  8
-  t2 = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2, 1, 0, 3));   // 12 10  8 14
-  g1(&rows[0], &rows[1], &rows[2], &rows[3], t2);
-  t3 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 1, 3, 1)); // 15 13 11  9
-  t3 = _mm_shuffle_epi32(t3, _MM_SHUFFLE(2, 1, 0, 3));   // 13 11  9 15
-  g2(&rows[0], &rows[1], &rows[2], &rows[3], t3);
-  undiagonalize(&rows[0], &rows[2], &rows[3]);
-  m0 = t0;
-  m1 = t1;
-  m2 = t2;
-  m3 = t3;
-
-  // Round 2. This round and all following rounds apply a fixed permutation
-  // to the message words from the round before.
-  t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2));
-  t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(0, 3, 2, 1));
-  g1(&rows[0], &rows[1], &rows[2], &rows[3], t0);
-  t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2));
-  tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(0, 0, 3, 3));
-  t1 = _mm_blend_epi16(tt, t1, 0xCC);
-  g2(&rows[0], &rows[1], &rows[2], &rows[3], t1);
-  diagonalize(&rows[0], &rows[2], &rows[3]);
-  t2 = _mm_unpacklo_epi64(m3, m1);
-  tt = _mm_blend_epi16(t2, m2, 0xC0);
-  t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(1, 3, 2, 0));
-  g1(&rows[0], &rows[1], &rows[2], &rows[3], t2);
-  t3 = _mm_unpackhi_epi32(m1, m3);
-  tt = _mm_unpacklo_epi32(m2, t3);
-  t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(0, 1, 3, 2));
-  g2(&rows[0], &rows[1], &rows[2], &rows[3], t3);
-  undiagonalize(&rows[0], &rows[2], &rows[3]);
-  m0 = t0;
-  m1 = t1;
-  m2 = t2;
-  m3 = t3;
-
-  // Round 3
-  t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2));
-  t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(0, 3, 2, 1));
-  g1(&rows[0], &rows[1], &rows[2], &rows[3], t0);
-  t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2));
-  tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(0, 0, 3, 3));
-  t1 = _mm_blend_epi16(tt, t1, 0xCC);
-  g2(&rows[0], &rows[1], &rows[2], &rows[3], t1);
-  diagonalize(&rows[0], &rows[2], &rows[3]);
-  t2 = _mm_unpacklo_epi64(m3, m1);
-  tt = _mm_blend_epi16(t2, m2, 0xC0);
-  t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(1, 3, 2, 0));
-  g1(&rows[0], &rows[1], &rows[2], &rows[3], t2);
-  t3 = _mm_unpackhi_epi32(m1, m3);
-  tt = _mm_unpacklo_epi32(m2, t3);
-  t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(0, 1, 3, 2));
-  g2(&rows[0], &rows[1], &rows[2], &rows[3], t3);
-  undiagonalize(&rows[0], &rows[2], &rows[3]);
-  m0 = t0;
-  m1 = t1;
-  m2 = t2;
-  m3 = t3;
-
-  // Round 4
-  t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2));
-  t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(0, 3, 2, 1));
-  g1(&rows[0], &rows[1], &rows[2], &rows[3], t0);
-  t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2));
-  tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(0, 0, 3, 3));
-  t1 = _mm_blend_epi16(tt, t1, 0xCC);
-  g2(&rows[0], &rows[1], &rows[2], &rows[3], t1);
-  diagonalize(&rows[0], &rows[2], &rows[3]);
-  t2 = _mm_unpacklo_epi64(m3, m1);
-  tt = _mm_blend_epi16(t2, m2, 0xC0);
-  t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(1, 3, 2, 0));
-  g1(&rows[0], &rows[1], &rows[2], &rows[3], t2);
-  t3 = _mm_unpackhi_epi32(m1, m3);
-  tt = _mm_unpacklo_epi32(m2, t3);
-  t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(0, 1, 3, 2));
-  g2(&rows[0], &rows[1], &rows[2], &rows[3], t3);
-  undiagonalize(&rows[0], &rows[2], &rows[3]);
-  m0 = t0;
-  m1 = t1;
-  m2 = t2;
-  m3 = t3;
-
-  // Round 5
-  t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2));
-  t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(0, 3, 2, 1));
-  g1(&rows[0], &rows[1], &rows[2], &rows[3], t0);
-  t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2));
-  tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(0, 0, 3, 3));
-  t1 = _mm_blend_epi16(tt, t1, 0xCC);
-  g2(&rows[0], &rows[1], &rows[2], &rows[3], t1);
-  diagonalize(&rows[0], &rows[2], &rows[3]);
-  t2 = _mm_unpacklo_epi64(m3, m1);
-  tt = _mm_blend_epi16(t2, m2, 0xC0);
-  t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(1, 3, 2, 0));
-  g1(&rows[0], &rows[1], &rows[2], &rows[3], t2);
-  t3 = _mm_unpackhi_epi32(m1, m3);
-  tt = _mm_unpacklo_epi32(m2, t3);
-  t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(0, 1, 3, 2));
-  g2(&rows[0], &rows[1], &rows[2], &rows[3], t3);
-  undiagonalize(&rows[0], &rows[2], &rows[3]);
-  m0 = t0;
-  m1 = t1;
-  m2 = t2;
-  m3 = t3;
-
-  // Round 6
-  t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2));
-  t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(0, 3, 2, 1));
-  g1(&rows[0], &rows[1], &rows[2], &rows[3], t0);
-  t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2));
-  tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(0, 0, 3, 3));
-  t1 = _mm_blend_epi16(tt, t1, 0xCC);
-  g2(&rows[0], &rows[1], &rows[2], &rows[3], t1);
-  diagonalize(&rows[0], &rows[2], &rows[3]);
-  t2 = _mm_unpacklo_epi64(m3, m1);
-  tt = _mm_blend_epi16(t2, m2, 0xC0);
-  t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(1, 3, 2, 0));
-  g1(&rows[0], &rows[1], &rows[2], &rows[3], t2);
-  t3 = _mm_unpackhi_epi32(m1, m3);
-  tt = _mm_unpacklo_epi32(m2, t3);
-  t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(0, 1, 3, 2));
-  g2(&rows[0], &rows[1], &rows[2], &rows[3], t3);
-  undiagonalize(&rows[0], &rows[2], &rows[3]);
-  m0 = t0;
-  m1 = t1;
-  m2 = t2;
-  m3 = t3;
-
-  // Round 7
-  t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2));
-  t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(0, 3, 2, 1));
-  g1(&rows[0], &rows[1], &rows[2], &rows[3], t0);
-  t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2));
-  tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(0, 0, 3, 3));
-  t1 = _mm_blend_epi16(tt, t1, 0xCC);
-  g2(&rows[0], &rows[1], &rows[2], &rows[3], t1);
-  diagonalize(&rows[0], &rows[2], &rows[3]);
-  t2 = _mm_unpacklo_epi64(m3, m1);
-  tt = _mm_blend_epi16(t2, m2, 0xC0);
-  t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(1, 3, 2, 0));
-  g1(&rows[0], &rows[1], &rows[2], &rows[3], t2);
-  t3 = _mm_unpackhi_epi32(m1, m3);
-  tt = _mm_unpacklo_epi32(m2, t3);
-  t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(0, 1, 3, 2));
-  g2(&rows[0], &rows[1], &rows[2], &rows[3], t3);
-  undiagonalize(&rows[0], &rows[2], &rows[3]);
-}
-
-void blake3_compress_in_place_sse41(uint32_t cv[8],
-                                    const uint8_t block[BLAKE3_BLOCK_LEN],
-                                    uint8_t block_len, uint64_t counter,
-                                    uint8_t flags) {
-  __m128i rows[4];
-  compress_pre(rows, cv, block, block_len, counter, flags);
-  storeu(xorv(rows[0], rows[2]), (uint8_t *)&cv[0]);
-  storeu(xorv(rows[1], rows[3]), (uint8_t *)&cv[4]);
-}
-
-void blake3_compress_xof_sse41(const uint32_t cv[8],
-                               const uint8_t block[BLAKE3_BLOCK_LEN],
-                               uint8_t block_len, uint64_t counter,
-                               uint8_t flags, uint8_t out[64]) {
-  __m128i rows[4];
-  compress_pre(rows, cv, block, block_len, counter, flags);
-  storeu(xorv(rows[0], rows[2]), &out[0]);
-  storeu(xorv(rows[1], rows[3]), &out[16]);
-  storeu(xorv(rows[2], loadu((uint8_t *)&cv[0])), &out[32]);
-  storeu(xorv(rows[3], loadu((uint8_t *)&cv[4])), &out[48]);
-}
-
-INLINE void round_fn(__m128i v[16], __m128i m[16], size_t r) {
-  v[0] = addv(v[0], m[(size_t)MSG_SCHEDULE[r][0]]);
-  v[1] = addv(v[1], m[(size_t)MSG_SCHEDULE[r][2]]);
-  v[2] = addv(v[2], m[(size_t)MSG_SCHEDULE[r][4]]);
-  v[3] = addv(v[3], m[(size_t)MSG_SCHEDULE[r][6]]);
-  v[0] = addv(v[0], v[4]);
-  v[1] = addv(v[1], v[5]);
-  v[2] = addv(v[2], v[6]);
-  v[3] = addv(v[3], v[7]);
-  v[12] = xorv(v[12], v[0]);
-  v[13] = xorv(v[13], v[1]);
-  v[14] = xorv(v[14], v[2]);
-  v[15] = xorv(v[15], v[3]);
-  v[12] = rot16(v[12]);
-  v[13] = rot16(v[13]);
-  v[14] = rot16(v[14]);
-  v[15] = rot16(v[15]);
-  v[8] = addv(v[8], v[12]);
-  v[9] = addv(v[9], v[13]);
-  v[10] = addv(v[10], v[14]);
-  v[11] = addv(v[11], v[15]);
-  v[4] = xorv(v[4], v[8]);
-  v[5] = xorv(v[5], v[9]);
-  v[6] = xorv(v[6], v[10]);
-  v[7] = xorv(v[7], v[11]);
-  v[4] = rot12(v[4]);
-  v[5] = rot12(v[5]);
-  v[6] = rot12(v[6]);
-  v[7] = rot12(v[7]);
-  v[0] = addv(v[0], m[(size_t)MSG_SCHEDULE[r][1]]);
-  v[1] = addv(v[1], m[(size_t)MSG_SCHEDULE[r][3]]);
-  v[2] = addv(v[2], m[(size_t)MSG_SCHEDULE[r][5]]);
-  v[3] = addv(v[3], m[(size_t)MSG_SCHEDULE[r][7]]);
-  v[0] = addv(v[0], v[4]);
-  v[1] = addv(v[1], v[5]);
-  v[2] = addv(v[2], v[6]);
-  v[3] = addv(v[3], v[7]);
-  v[12] = xorv(v[12], v[0]);
-  v[13] = xorv(v[13], v[1]);
-  v[14] = xorv(v[14], v[2]);
-  v[15] = xorv(v[15], v[3]);
-  v[12] = rot8(v[12]);
-  v[13] = rot8(v[13]);
-  v[14] = rot8(v[14]);
-  v[15] = rot8(v[15]);
-  v[8] = addv(v[8], v[12]);
-  v[9] = addv(v[9], v[13]);
-  v[10] = addv(v[10], v[14]);
-  v[11] = addv(v[11], v[15]);
-  v[4] = xorv(v[4], v[8]);
-  v[5] = xorv(v[5], v[9]);
-  v[6] = xorv(v[6], v[10]);
-  v[7] = xorv(v[7], v[11]);
-  v[4] = rot7(v[4]);
-  v[5] = rot7(v[5]);
-  v[6] = rot7(v[6]);
-  v[7] = rot7(v[7]);
-
-  v[0] = addv(v[0], m[(size_t)MSG_SCHEDULE[r][8]]);
-  v[1] = addv(v[1], m[(size_t)MSG_SCHEDULE[r][10]]);
-  v[2] = addv(v[2], m[(size_t)MSG_SCHEDULE[r][12]]);
-  v[3] = addv(v[3], m[(size_t)MSG_SCHEDULE[r][14]]);
-  v[0] = addv(v[0], v[5]);
-  v[1] = addv(v[1], v[6]);
-  v[2] = addv(v[2], v[7]);
-  v[3] = addv(v[3], v[4]);
-  v[15] = xorv(v[15], v[0]);
-  v[12] = xorv(v[12], v[1]);
-  v[13] = xorv(v[13], v[2]);
-  v[14] = xorv(v[14], v[3]);
-  v[15] = rot16(v[15]);
-  v[12] = rot16(v[12]);
-  v[13] = rot16(v[13]);
-  v[14] = rot16(v[14]);
-  v[10] = addv(v[10], v[15]);
-  v[11] = addv(v[11], v[12]);
-  v[8] = addv(v[8], v[13]);
-  v[9] = addv(v[9], v[14]);
-  v[5] = xorv(v[5], v[10]);
-  v[6] = xorv(v[6], v[11]);
-  v[7] = xorv(v[7], v[8]);
-  v[4] = xorv(v[4], v[9]);
-  v[5] = rot12(v[5]);
-  v[6] = rot12(v[6]);
-  v[7] = rot12(v[7]);
-  v[4] = rot12(v[4]);
-  v[0] = addv(v[0], m[(size_t)MSG_SCHEDULE[r][9]]);
-  v[1] = addv(v[1], m[(size_t)MSG_SCHEDULE[r][11]]);
-  v[2] = addv(v[2], m[(size_t)MSG_SCHEDULE[r][13]]);
-  v[3] = addv(v[3], m[(size_t)MSG_SCHEDULE[r][15]]);
-  v[0] = addv(v[0], v[5]);
-  v[1] = addv(v[1], v[6]);
-  v[2] = addv(v[2], v[7]);
-  v[3] = addv(v[3], v[4]);
-  v[15] = xorv(v[15], v[0]);
-  v[12] = xorv(v[12], v[1]);
-  v[13] = xorv(v[13], v[2]);
-  v[14] = xorv(v[14], v[3]);
-  v[15] = rot8(v[15]);
-  v[12] = rot8(v[12]);
-  v[13] = rot8(v[13]);
-  v[14] = rot8(v[14]);
-  v[10] = addv(v[10], v[15]);
-  v[11] = addv(v[11], v[12]);
-  v[8] = addv(v[8], v[13]);
-  v[9] = addv(v[9], v[14]);
-  v[5] = xorv(v[5], v[10]);
-  v[6] = xorv(v[6], v[11]);
-  v[7] = xorv(v[7], v[8]);
-  v[4] = xorv(v[4], v[9]);
-  v[5] = rot7(v[5]);
-  v[6] = rot7(v[6]);
-  v[7] = rot7(v[7]);
-  v[4] = rot7(v[4]);
-}
-
-INLINE void transpose_vecs(__m128i vecs[DEGREE]) {
-  // Interleave 32-bit lates. The low unpack is lanes 00/11 and the high is
-  // 22/33. Note that this doesn't split the vector into two lanes, as the
-  // AVX2 counterparts do.
-  __m128i ab_01 = _mm_unpacklo_epi32(vecs[0], vecs[1]);
-  __m128i ab_23 = _mm_unpackhi_epi32(vecs[0], vecs[1]);
-  __m128i cd_01 = _mm_unpacklo_epi32(vecs[2], vecs[3]);
-  __m128i cd_23 = _mm_unpackhi_epi32(vecs[2], vecs[3]);
-
-  // Interleave 64-bit lanes.
-  __m128i abcd_0 = _mm_unpacklo_epi64(ab_01, cd_01);
-  __m128i abcd_1 = _mm_unpackhi_epi64(ab_01, cd_01);
-  __m128i abcd_2 = _mm_unpacklo_epi64(ab_23, cd_23);
-  __m128i abcd_3 = _mm_unpackhi_epi64(ab_23, cd_23);
-
-  vecs[0] = abcd_0;
-  vecs[1] = abcd_1;
-  vecs[2] = abcd_2;
-  vecs[3] = abcd_3;
-}
-
-INLINE void transpose_msg_vecs(const uint8_t *const *inputs,
-                               size_t block_offset, __m128i out[16]) {
-  out[0] = loadu(&inputs[0][block_offset + 0 * sizeof(__m128i)]);
-  out[1] = loadu(&inputs[1][block_offset + 0 * sizeof(__m128i)]);
-  out[2] = loadu(&inputs[2][block_offset + 0 * sizeof(__m128i)]);
-  out[3] = loadu(&inputs[3][block_offset + 0 * sizeof(__m128i)]);
-  out[4] = loadu(&inputs[0][block_offset + 1 * sizeof(__m128i)]);
-  out[5] = loadu(&inputs[1][block_offset + 1 * sizeof(__m128i)]);
-  out[6] = loadu(&inputs[2][block_offset + 1 * sizeof(__m128i)]);
-  out[7] = loadu(&inputs[3][block_offset + 1 * sizeof(__m128i)]);
-  out[8] = loadu(&inputs[0][block_offset + 2 * sizeof(__m128i)]);
-  out[9] = loadu(&inputs[1][block_offset + 2 * sizeof(__m128i)]);
-  out[10] = loadu(&inputs[2][block_offset + 2 * sizeof(__m128i)]);
-  out[11] = loadu(&inputs[3][block_offset + 2 * sizeof(__m128i)]);
-  out[12] = loadu(&inputs[0][block_offset + 3 * sizeof(__m128i)]);
-  out[13] = loadu(&inputs[1][block_offset + 3 * sizeof(__m128i)]);
-  out[14] = loadu(&inputs[2][block_offset + 3 * sizeof(__m128i)]);
-  out[15] = loadu(&inputs[3][block_offset + 3 * sizeof(__m128i)]);
-  for (size_t i = 0; i < 4; ++i) {
-    _mm_prefetch((const void *)&inputs[i][block_offset + 256], _MM_HINT_T0);
-  }
-  transpose_vecs(&out[0]);
-  transpose_vecs(&out[4]);
-  transpose_vecs(&out[8]);
-  transpose_vecs(&out[12]);
-}
-
-INLINE void load_counters(uint64_t counter, bool increment_counter,
-                          __m128i *out_lo, __m128i *out_hi) {
-  const __m128i mask = _mm_set1_epi32(-(int32_t)increment_counter);
-  const __m128i add0 = _mm_set_epi32(3, 2, 1, 0);
-  const __m128i add1 = _mm_and_si128(mask, add0);
-  __m128i l = _mm_add_epi32(_mm_set1_epi32((int32_t)counter), add1);
-  __m128i carry = _mm_cmpgt_epi32(_mm_xor_si128(add1, _mm_set1_epi32(0x80000000)), 
-                                  _mm_xor_si128(   l, _mm_set1_epi32(0x80000000)));
-  __m128i h = _mm_sub_epi32(_mm_set1_epi32((int32_t)(counter >> 32)), carry);
-  *out_lo = l;
-  *out_hi = h;
-}
-
-static
-void blake3_hash4_sse41(const uint8_t *const *inputs, size_t blocks,
-                        const uint32_t key[8], uint64_t counter,
-                        bool increment_counter, uint8_t flags,
-                        uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
-  __m128i h_vecs[8] = {
-      set1(key[0]), set1(key[1]), set1(key[2]), set1(key[3]),
-      set1(key[4]), set1(key[5]), set1(key[6]), set1(key[7]),
-  };
-  __m128i counter_low_vec, counter_high_vec;
-  load_counters(counter, increment_counter, &counter_low_vec,
-                &counter_high_vec);
-  uint8_t block_flags = flags | flags_start;
-
-  for (size_t block = 0; block < blocks; block++) {
-    if (block + 1 == blocks) {
-      block_flags |= flags_end;
-    }
-    __m128i block_len_vec = set1(BLAKE3_BLOCK_LEN);
-    __m128i block_flags_vec = set1(block_flags);
-    __m128i msg_vecs[16];
-    transpose_msg_vecs(inputs, block * BLAKE3_BLOCK_LEN, msg_vecs);
-
-    __m128i v[16] = {
-        h_vecs[0],       h_vecs[1],        h_vecs[2],     h_vecs[3],
-        h_vecs[4],       h_vecs[5],        h_vecs[6],     h_vecs[7],
-        set1(IV[0]),     set1(IV[1]),      set1(IV[2]),   set1(IV[3]),
-        counter_low_vec, counter_high_vec, block_len_vec, block_flags_vec,
-    };
-    round_fn(v, msg_vecs, 0);
-    round_fn(v, msg_vecs, 1);
-    round_fn(v, msg_vecs, 2);
-    round_fn(v, msg_vecs, 3);
-    round_fn(v, msg_vecs, 4);
-    round_fn(v, msg_vecs, 5);
-    round_fn(v, msg_vecs, 6);
-    h_vecs[0] = xorv(v[0], v[8]);
-    h_vecs[1] = xorv(v[1], v[9]);
-    h_vecs[2] = xorv(v[2], v[10]);
-    h_vecs[3] = xorv(v[3], v[11]);
-    h_vecs[4] = xorv(v[4], v[12]);
-    h_vecs[5] = xorv(v[5], v[13]);
-    h_vecs[6] = xorv(v[6], v[14]);
-    h_vecs[7] = xorv(v[7], v[15]);
-
-    block_flags = flags;
-  }
-
-  transpose_vecs(&h_vecs[0]);
-  transpose_vecs(&h_vecs[4]);
-  // The first four vecs now contain the first half of each output, and the
-  // second four vecs contain the second half of each output.
-  storeu(h_vecs[0], &out[0 * sizeof(__m128i)]);
-  storeu(h_vecs[4], &out[1 * sizeof(__m128i)]);
-  storeu(h_vecs[1], &out[2 * sizeof(__m128i)]);
-  storeu(h_vecs[5], &out[3 * sizeof(__m128i)]);
-  storeu(h_vecs[2], &out[4 * sizeof(__m128i)]);
-  storeu(h_vecs[6], &out[5 * sizeof(__m128i)]);
-  storeu(h_vecs[3], &out[6 * sizeof(__m128i)]);
-  storeu(h_vecs[7], &out[7 * sizeof(__m128i)]);
-}
-
-INLINE void hash_one_sse41(const uint8_t *input, size_t blocks,
-                           const uint32_t key[8], uint64_t counter,
-                           uint8_t flags, uint8_t flags_start,
-                           uint8_t flags_end, uint8_t out[BLAKE3_OUT_LEN]) {
-  uint32_t cv[8];
-  memcpy(cv, key, BLAKE3_KEY_LEN);
-  uint8_t block_flags = flags | flags_start;
-  while (blocks > 0) {
-    if (blocks == 1) {
-      block_flags |= flags_end;
-    }
-    blake3_compress_in_place_sse41(cv, input, BLAKE3_BLOCK_LEN, counter,
-                                   block_flags);
-    input = &input[BLAKE3_BLOCK_LEN];
-    blocks -= 1;
-    block_flags = flags;
-  }
-  memcpy(out, cv, BLAKE3_OUT_LEN);
-}
-
-void blake3_hash_many_sse41(const uint8_t *const *inputs, size_t num_inputs,
-                            size_t blocks, const uint32_t key[8],
-                            uint64_t counter, bool increment_counter,
-                            uint8_t flags, uint8_t flags_start,
-                            uint8_t flags_end, uint8_t *out) {
-  while (num_inputs >= DEGREE) {
-    blake3_hash4_sse41(inputs, blocks, key, counter, increment_counter, flags,
-                       flags_start, flags_end, out);
-    if (increment_counter) {
-      counter += DEGREE;
-    }
-    inputs += DEGREE;
-    num_inputs -= DEGREE;
-    out = &out[DEGREE * BLAKE3_OUT_LEN];
-  }
-  while (num_inputs > 0) {
-    hash_one_sse41(inputs[0], blocks, key, counter, flags, flags_start,
-                   flags_end, out);
-    if (increment_counter) {
-      counter += 1;
-    }
-    inputs += 1;
-    num_inputs -= 1;
-    out = &out[BLAKE3_OUT_LEN];
-  }
-}
diff --git a/windows/src/block.h b/windows/src/block.h
deleted file mode 100644
index 875337c..0000000
--- a/windows/src/block.h
+++ /dev/null
@@ -1,27 +0,0 @@
-
-// For creation
-int count_slice_info(PAR3_CTX *par3_ctx);
-int set_slice_info(PAR3_CTX *par3_ctx);
-int calculate_recovery_count(PAR3_CTX *par3_ctx);
-
-int allocate_recovery_block(PAR3_CTX *par3_ctx);
-int create_recovery_block(PAR3_CTX *par3_ctx);
-int create_recovery_block_split(PAR3_CTX *par3_ctx);
-int create_recovery_block_cohort(PAR3_CTX *par3_ctx);
-
-
-// For verification
-int substitute_input_block(PAR3_CTX *par3_ctx);
-int find_identical_block(PAR3_CTX *par3_ctx);
-uint64_t aggregate_input_block(PAR3_CTX *par3_ctx);
-uint64_t aggregate_recovery_block(PAR3_CTX *par3_ctx);
-uint64_t aggregate_block_cohort(PAR3_CTX *par3_ctx, uint32_t *lost_count_cohort, uint32_t *lack_count_cohort);
-uint32_t check_possible_restore(PAR3_CTX *par3_ctx);
-
-
-// For repair
-int make_block_list(PAR3_CTX *par3_ctx, uint64_t lost_count, uint32_t lost_count_cohort);
-int recover_lost_block(PAR3_CTX *par3_ctx, char *temp_path, int lost_count);
-int recover_lost_block_split(PAR3_CTX *par3_ctx, char *temp_path, uint64_t lost_count);
-int recover_lost_block_cohort(PAR3_CTX *par3_ctx, char *temp_path);
-
diff --git a/windows/src/block_check.c b/windows/src/block_check.c
deleted file mode 100644
index 6ec13be..0000000
--- a/windows/src/block_check.c
+++ /dev/null
@@ -1,672 +0,0 @@
-#include <errno.h>
-#include <inttypes.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#ifdef __linux__
-
-#include <limits.h>
-
-#elif _WIN32
-#endif
-
-#include "libpar3.h"
-
-
-// Data Packets substitute for lost input blocks.
-int substitute_input_block(PAR3_CTX *par3_ctx)
-{
-	int flag_show = 0, flag_substitute;
-	int64_t slice_index;
-	uint64_t item_index, packet_count;
-	uint64_t block_size, block_count, block_index;
-	PAR3_PKT_CTX *packet_list;
-	PAR3_BLOCK_CTX *block_list;
-	PAR3_SLICE_CTX *slice_list;
-
-	block_size = par3_ctx->block_size;
-	block_count = par3_ctx->block_count;
-	block_list = par3_ctx->block_list;
-	slice_list = par3_ctx->slice_list;
-	packet_list = par3_ctx->data_packet_list;
-	packet_count = par3_ctx->data_packet_count;
-
-	if ( (block_list == NULL) || (packet_list == NULL) )
-		return 0;
-
-	for (item_index = 0; item_index < packet_count; item_index++){
-		block_index = packet_list[item_index].index;
-		if (block_index >= block_count){
-			printf("Data Packet for block[%"PRIu64"] is wrong.\n", block_index);
-			return RET_LOGIC_ERROR;
-		}
-
-		// Update all slices in the block
-		flag_substitute = 0;
-		slice_index = block_list[block_index].slice;
-		while (slice_index != -1){
-			if (slice_list[slice_index].find_name == NULL){
-				flag_substitute++;
-				slice_list[slice_index].find_name = packet_list[item_index].name;
-				if (slice_list[slice_index].size == block_size){
-					slice_list[slice_index].find_offset = packet_list[item_index].offset + 56;
-				} else {
-					slice_list[slice_index].find_offset = packet_list[item_index].offset + 56 + slice_list[slice_index].tail_offset;
-				}
-			}
-			slice_index = slice_list[slice_index].next;
-		}
-
-		// Update found state
-		if (block_list[block_index].state & 1){	// Block of full size slice
-			block_list[block_index].state |= 4;
-		} else if (block_list[block_index].state & 2){	// Block of tail slices
-			block_list[block_index].state |= 8 | 16;
-		}
-		if (flag_substitute > 0){
-			if (par3_ctx->noise_level >= 2){
-				if (flag_show == 0){
-					flag_show++;
-					printf("\nSubstituting for lost blocks:\n\n");
-				}
-				printf("Map block[%2"PRId64"] to Data Packet.\n", block_index);
-			}
-		}
-	}
-
-	return 0;
-}
-
-// Find identical input blocks
-int find_identical_block(PAR3_CTX *par3_ctx)
-{
-	int flag_show = 0;
-	int64_t slice_index, find_index;
-	int64_t slice_index_i, slice_index_j;
-	uint64_t i, j, count;
-	uint64_t block_index_i, block_index_j;
-	PAR3_CMP_CTX *cmp_list;
-	PAR3_BLOCK_CTX *block_list;
-	PAR3_SLICE_CTX *slice_list;
-	PAR3_CHUNK_CTX *chunk_list;
-
-/*
-	// for debug
-	for (i = 0; i < par3_ctx->crc_count; i++){
-		printf("crc_list[%2"PRIu64"] = 0x%016"PRIx64" , block = %"PRIu64"\n", i, par3_ctx->crc_list[i].crc, par3_ctx->crc_list[i].index);
-	}
-	for (i = 0; i < par3_ctx->tail_count; i++){
-		printf("tail_list[%2"PRIu64"] = 0x%016"PRIx64" , slice = %"PRIu64"\n", i, par3_ctx->tail_list[i].crc, par3_ctx->tail_list[i].index);
-	}
-*/
-
-	// Compare full size blocks.
-	block_list = par3_ctx->block_list;
-	slice_list = par3_ctx->slice_list;
-	cmp_list = par3_ctx->crc_list;
-	count = par3_ctx->crc_count;
-	for (i = 0; i < count; i++){
-		for (j = i + 1; j < count; j++){
-			if (cmp_list[i].crc == cmp_list[j].crc){
-				// When CRC-64 of these blocks are same, compare hash values next.
-				block_index_i = cmp_list[i].index;
-				block_index_j = cmp_list[j].index;
-				if (memcmp(block_list[block_index_i].hash, block_list[block_index_j].hash, 16) == 0){
-					//printf("block[%"PRIu64"] and [%"PRIu64"] are same.\n", block_index_i, block_index_j);
-					if (block_list[block_index_i].state & 4){	// block[i] is found.
-						if ((block_list[block_index_j].state & 4) == 0){	// block[j] isn't found.
-							if (par3_ctx->noise_level >= 2){
-								if (flag_show == 0){
-									flag_show++;
-									printf("\nComparing lost slices to found slices:\n\n");
-								}
-								printf("Map block[%2"PRIu64"] to identical block[%2"PRIu64"].\n", block_index_j, block_index_i);
-							}
-							slice_index = block_list[block_index_j].slice;
-							find_index = block_list[block_index_i].slice;
-							// Search valid slice for this found block.
-							while ( (find_index != -1) && (slice_list[find_index].find_name == NULL) ){
-								find_index = slice_list[find_index].next;
-							}
-							if (find_index == -1){
-								// When there is no valid slice.
-								printf("Mapping information for block[%"PRIu64"] is wrong.\n", block_index_i);
-								return RET_LOGIC_ERROR;
-							}
-							// Copy reading source to another.
-							slice_list[slice_index].find_name = slice_list[find_index].find_name;
-							slice_list[slice_index].find_offset = slice_list[find_index].find_offset;
-							block_list[block_index_j].state |= 4;
-						}
-					} else if (block_list[block_index_j].state & 4){	// block[i] isn't found, and block[j] is found.
-						if (par3_ctx->noise_level >= 2){
-							if (flag_show == 0){
-								flag_show++;
-								printf("\nComparing lost slices to found slices:\n\n");
-							}
-							printf("Map block[%2"PRIu64"] to identical block[%2"PRIu64"].\n", block_index_i, block_index_j);
-						}
-						slice_index = block_list[block_index_i].slice;
-						find_index = block_list[block_index_j].slice;
-						// Search valid slice for this found block.
-						while ( (find_index != -1) && (slice_list[find_index].find_name == NULL) ){
-							find_index = slice_list[find_index].next;
-						}
-						if (find_index == -1){
-							// When there is no valid slice.
-							printf("Mapping information for block[%"PRIu64"] is wrong.\n", block_index_j);
-							return RET_LOGIC_ERROR;
-						}
-						// Copy reading source to another.
-						slice_list[slice_index].find_name = slice_list[find_index].find_name;
-						slice_list[slice_index].find_offset = slice_list[find_index].find_offset;
-						block_list[block_index_i].state |= 4;
-					}
-				}
-
-			} else {	// Because CRC list was sorted, no need to compare after different CRC.
-				break;
-			}
-		}
-
-		// When there are multiple slices for a block, map all slices.
-		block_index_i = cmp_list[i].index;
-		if (block_list[block_index_i].state & 4){	// block[i] has a valid slice.
-			slice_index = block_list[block_index_i].slice;
-			// Find valid slice.
-			find_index = slice_index;
-			while ( (find_index != -1) && (slice_list[find_index].find_name == NULL) ){
-				find_index = slice_list[find_index].next;
-			}
-			if (find_index == -1){
-				// When there is no valid slice.
-				printf("Mapping information for block[%"PRIu64"] is wrong.\n", block_index_i);
-				return RET_LOGIC_ERROR;
-			}
-			// Map other slices.
-			do {
-				if (slice_list[slice_index].find_name == NULL){
-					if (par3_ctx->noise_level >= 2){
-						if (flag_show == 0){
-							flag_show++;
-							printf("\nComparing lost slices to found slices:\n\n");
-						}
-						printf("Map slice[%2"PRId64"] to identical slice[%2"PRId64"] in block[%2"PRIu64"].\n",
-								slice_index, find_index, block_index_i);
-					}
-					slice_list[slice_index].find_name = slice_list[find_index].find_name;
-					slice_list[slice_index].find_offset = slice_list[find_index].find_offset;
-				}
-				slice_index = slice_list[slice_index].next;
-			} while (slice_index != -1);
-		}
-	}
-
-	// Compare chunk tail slices.
-	chunk_list = par3_ctx->chunk_list;
-	cmp_list = par3_ctx->tail_list;
-	count = par3_ctx->tail_count;
-	for (i = 0; i < count; i++){
-		for (j = i + 1; j < count; j++){
-			if (cmp_list[i].crc == cmp_list[j].crc){
-				// When CRC-64 of these slices are same, compare size and hash values next.
-				slice_index_i = cmp_list[i].index;
-				slice_index_j = cmp_list[j].index;
-				if (slice_list[slice_index_i].size == slice_list[slice_index_j].size){
-					if (memcmp(chunk_list[slice_list[slice_index_i].chunk].tail_hash, chunk_list[slice_list[slice_index_j].chunk].tail_hash, 16) == 0){
-						//printf("slice[%"PRIu64"] and [%"PRIu64"] are same.\n", slice_index_i, slice_index_j);
-						if (slice_list[slice_index_i].find_name != NULL){	// slice[i] is found.
-							if (slice_list[slice_index_j].find_name == NULL){	// slice[j] isn't found.
-								if (par3_ctx->noise_level >= 2){
-									if (flag_show == 0){
-										flag_show++;
-										printf("\nComparing lost slices to found slices:\n\n");
-									}
-									printf("Map slice[%2"PRIu64"] to identical slice[%2"PRIu64"].\n", slice_index_j, slice_index_i);
-								}
-								// Copy reading source to another.
-								block_index_j = slice_list[slice_index_j].block;
-								slice_list[slice_index_j].find_name = slice_list[slice_index_i].find_name;
-								slice_list[slice_index_j].find_offset = slice_list[slice_index_i].find_offset;
-								block_list[block_index_j].state |= 8;
-							}
-						} else if (slice_list[slice_index_j].find_name != NULL){	// slice[i] isn't found, and slice[j] is found.
-							if (par3_ctx->noise_level >= 2){
-								if (flag_show == 0){
-									flag_show++;
-									printf("\nComparing lost slices to found slices:\n\n");
-								}
-								printf("Map slice[%2"PRId64"] to identical slice[%2"PRId64"].\n", slice_index_i, slice_index_j);
-							}
-							// Copy reading source to another.
-							block_index_i = slice_list[slice_index_i].block;
-							slice_list[slice_index_i].find_name = slice_list[slice_index_j].find_name;
-							slice_list[slice_index_i].find_offset = slice_list[slice_index_j].find_offset;
-							block_list[block_index_i].state |= 8;
-						}
-					}
-				}
-
-			} else {	// Because CRC list was sorted, no need to compare after different CRC.
-				break;
-			}
-		}
-	}
-
-	return 0;
-}
-
-// Aggregate verified result of available input blocks
-uint64_t aggregate_input_block(PAR3_CTX *par3_ctx)
-{
-	int64_t slice_index;
-	uint64_t block_count, block_available, block_index;
-	uint64_t total_size, available_size, skip_count, old_count;
-	PAR3_BLOCK_CTX *block_list;
-	PAR3_SLICE_CTX *slice_list;
-
-	block_count = par3_ctx->block_count;
-	block_list = par3_ctx->block_list;
-	slice_list = par3_ctx->slice_list;
-
-	block_available = 0;
-	for (block_index = 0; block_index < block_count; block_index++){
-		if (block_list[block_index].state & (4 | 16)){
-			// When a block has a full slice, the whole block data is available.
-			block_available++;
-
-		} else if (block_list[block_index].state & 8){
-			// When a block has a tail slice, I need to check which data is available.
-			skip_count = old_count = 0;
-			available_size = 0;
-			total_size = block_list[block_index].size;	// total data size of chunk tails in this block
-			slice_index = block_list[block_index].slice;	// index of the first slice
-			while (slice_index != -1){
-				if (slice_list[slice_index].find_name != NULL){
-					if (slice_list[slice_index].tail_offset > available_size){
-						skip_count++;
-						//printf("block[%"PRIu64"]: skip_count = %"PRIu64"\n", block_index, skip_count);
-					} else if (slice_list[slice_index].tail_offset + slice_list[slice_index].size >= available_size){
-						available_size = slice_list[slice_index].tail_offset + slice_list[slice_index].size;
-						//printf("block[%"PRIu64"]: available = %"PRIu64" / %"PRIu64"\n", block_index, available_size, total_size);
-					}
-				//} else {
-				//	printf("slice[%"PRId64"] is missing.\n", slice_index);
-				}
-				slice_index = slice_list[slice_index].next;
-				if ( (slice_index == -1) && (available_size < total_size) && (skip_count != old_count) ){
-					//printf("block[%"PRIu64"]: skip_count = %"PRIu64" / %"PRIu64", try again\n", block_index, skip_count, old_count);
-					old_count = skip_count;
-					skip_count = 0;
-
-					// If a chunk tail was skipped, check again.
-					slice_index = block_list[block_index].slice;
-				}
-			}
-
-			// When whole data is available by all tail slices.
-			if (available_size == total_size){
-				block_list[block_index].state |= 16;
-				block_available++;
-			}
-		}
-	}
-
-	return block_available;
-}
-
-// Aggregate recovery blocks of each Matrix Packet, and return the max count
-uint64_t aggregate_recovery_block(PAR3_CTX *par3_ctx)
-{
-	uint8_t *packet_type, *buf;
-	uint8_t packet_checksum[16];
-	size_t offset, total_size;
-	uint64_t item_index, packet_size, packet_count;
-	uint64_t find_count, find_count_max;
-	PAR3_PKT_CTX *packet_list;
-
-	if (par3_ctx->matrix_packet_count == 0)
-		return 0;
-	if (par3_ctx->recv_packet_count == 0)
-		return 0;
-
-	buf = par3_ctx->matrix_packet;
-	total_size = par3_ctx->matrix_packet_size;
-	packet_list = par3_ctx->recv_packet_list;
-	packet_count = par3_ctx->recv_packet_count;
-
-	find_count_max = 0;
-	offset = 0;
-	while (offset + 48 < total_size){
-		memcpy(packet_checksum, buf + offset + 8, 16);
-		memcpy(&packet_size, buf + offset + 24, 8);
-		packet_type = buf + offset + 40;
-
-		// At this time, this supports only one Error Correction Codes at a time.
-
-		if (memcmp(packet_type, "PAR CAU\0", 8) == 0){	// Cauchy Matrix Packet
-			uint64_t hint_num;
-
-			// Search Recovery Data packet for this Matrix Packet
-			find_count = 0;
-			for (item_index = 0; item_index < packet_count; item_index++){
-				if (memcmp(packet_list[item_index].matrix, packet_checksum, 16) == 0){
-					find_count++;
-				}
-			}
-			// hint for number of recovery blocks
-			memcpy(&hint_num, buf + offset + 64, 8);
-			if (par3_ctx->noise_level >= 0){
-				printf("You have %"PRIu64" recovery blocks available for Cauchy Reed-Solomon Codes.\n", find_count);
-			}
-			if (par3_ctx->noise_level >= 1){
-				if (hint_num > 0){
-					printf("Number of recovery blocks would be %"PRIu64".\n", hint_num);
-				}
-			}
-			if (find_count > find_count_max){
-				find_count_max = find_count;
-				par3_ctx->ecc_method = 1;	// At this time, exclusive to others.
-				par3_ctx->max_recovery_block = hint_num;
-				par3_ctx->matrix_packet_offset = offset;
-			}
-
-		} else if (memcmp(packet_type, "PAR FFT\0", 8) == 0){	// FFT Matrix Packet
-			int8_t shift_num;
-			uint32_t extra_num;
-			uint64_t max_num;
-
-			// Search Recovery Data packet for this Matrix Packet
-			find_count = 0;
-			for (item_index = 0; item_index < packet_count; item_index++){
-				if (memcmp(packet_list[item_index].matrix, packet_checksum, 16) == 0){
-					find_count++;
-				}
-			}
-			// max number of recovery blocks
-			shift_num = buf[offset + 64];	// convert to signed integer
-			if ( (shift_num >= 0) && (shift_num <= 15) ){
-				max_num = (uint64_t)1 << shift_num;
-			} else {
-				max_num = 32768;
-			}
-			// number of interleaving blocks
-			extra_num = 0;
-			if ((packet_size > 65) && (packet_size <= 69)){	// Read 1 ~ 4 bytes of the last field
-				memcpy(&extra_num, buf + offset + 65, packet_size - 65);
-				max_num *= extra_num + 1;	// When interleaving, max count is multiplied by number of cohorts.
-			}
-			if (par3_ctx->noise_level >= 0){
-				printf("You have %"PRIu64" recovery blocks available for FFT based Reed-Solomon Codes.\n", find_count);
-			}
-			if (par3_ctx->noise_level >= 1){
-				printf("Max recovery block count = %"PRIu64"\n", max_num);
-				if (extra_num > 0){
-					printf("Number of cohort = %u (Interleaving = %u)\n", extra_num + 1, extra_num);
-					printf("Input block count per cohort = %"PRIu64"\n", (par3_ctx->block_count + extra_num) / (extra_num + 1));
-					printf("Max recovery block count per cohort = %"PRIu64"\n", max_num / (extra_num + 1));
-				}
-			}
-			if (find_count > find_count_max){
-				find_count_max = find_count;
-				par3_ctx->ecc_method = 8;
-				par3_ctx->interleave = extra_num;
-				par3_ctx->max_recovery_block = max_num;
-				par3_ctx->matrix_packet_offset = offset;
-			}
-
-		}
-
-		offset += packet_size;
-	}
-
-	return find_count_max;
-}
-
-// How many files to restore, when there are not enough blocks.
-uint32_t check_possible_restore(PAR3_CTX *par3_ctx)
-{
-	char *find_name;
-	uint32_t possible_count;
-	uint32_t file_count, file_index;
-	uint32_t chunk_index, chunk_num;
-	int64_t slice_index;
-	uint64_t block_size, chunk_size, file_size, slice_size;
-	PAR3_SLICE_CTX *slice_list;
-	PAR3_CHUNK_CTX *chunk_list;
-	PAR3_FILE_CTX *file_list;
-
-	if (par3_ctx->input_file_count == 0)
-		return 0;
-
-	file_count = par3_ctx->input_file_count;
-	block_size = par3_ctx->block_size;
-	slice_list = par3_ctx->slice_list;
-	chunk_list = par3_ctx->chunk_list;
-	file_list = par3_ctx->input_file_list;
-
-	possible_count = 0;
-	for (file_index = 0; file_index < file_count; file_index++){
-		// This input file is misnamed.
-		if (file_list[file_index].state & 4){
-			// Misnamed file will be corrected later.
-			//printf("misnamed file[%u]\n", file_index);
-			possible_count++;
-
-		// The input file is missing or damaged.
-		} else if (file_list[file_index].state & 3){
-			file_size = 0;
-			chunk_index = file_list[file_index].chunk;		// index of the first chunk
-			chunk_num = file_list[file_index].chunk_num;	// number of chunk descriptions
-			slice_index = file_list[file_index].slice;		// index of the first slice
-			//printf("chunk = %u+%u, slice = %"PRId64" ~, %s\n", chunk_index, chunk_num, slice_index, file_list[file_index].name);
-			while (chunk_num > 0){
-				chunk_size = chunk_list[chunk_index].size;
-				file_size += chunk_size;
-				while ( (chunk_size >= block_size) || (chunk_size >= 40) ){	// full size slice or chunk tail slice
-					slice_size = slice_list[slice_index].size;
-					find_name = slice_list[slice_index].find_name;
-					if (find_name == NULL){
-						//printf("slice[%"PRId64"] isn't found.\n", slice_index);
-						file_size--;
-						chunk_num = 1;
-						break;
-					}
-
-					slice_index++;
-					chunk_size -= slice_size;
-				}
-
-				chunk_index++;
-				chunk_num--;
-			}
-
-			//printf("file_size = %"PRIu64", %"PRIu64"\n", file_size, file_list[file_index].size);
-			if (file_size == file_list[file_index].size){
-				// Sign of repairable file
-				file_list[file_index].state |= 0x200;
-				possible_count++;
-			}
-		}
-	}
-	//printf("possible_count = %u\n", possible_count);
-
-	return possible_count;
-}
-
-// Make list of index for lost input blocks and using recovery blocks.
-int make_block_list(PAR3_CTX *par3_ctx, uint64_t lost_count, uint32_t lost_count_cohort)
-{
-	uint8_t *packet_checksum;
-	int *recv_id;
-	uint64_t count, index, id;
-	PAR3_BLOCK_CTX *block_list;
-	PAR3_PKT_CTX *packet_list;
-
-	if (par3_ctx->ecc_method & 1){	// Cauchy Reed-Solomon Codes
-		// Make list of index (lost input blocks and using recovery blocks)
-		count = lost_count * 2;
-	} else if (par3_ctx->ecc_method & 8){	// FFT based Reed-Solomon Codes
-		if (par3_ctx->interleave == 0){
-			// Make list of index (using recovery blocks)
-			count = lost_count;
-		} else {
-			// Make list of index (position)
-			count = lost_count_cohort;
-		}
-	} else {
-		// Make list of index (using recovery blocks)
-		count = lost_count;
-	}
-	recv_id = (int *) malloc(sizeof(int) * count);
-	if (recv_id == NULL){
-		printf("Failed to make list for using blocks\n");
-		return RET_MEMORY_ERROR;
-	}
-	par3_ctx->recv_id_list = recv_id;
-
-	if (par3_ctx->interleave > 0)
-		return 0;
-
-	// Get checksum of using Matrix Packet
-	packet_checksum = par3_ctx->matrix_packet + par3_ctx->matrix_packet_offset + 8;
-
-	// Set index of using recovery blocks
-	packet_list = par3_ctx->recv_packet_list;
-	count = par3_ctx->recv_packet_count;
-	id = 0;
-	for (index = 0; index < count; index++){
-		// Search only Recovery Data Packets belong to using Matrix Packet
-		if (memcmp(packet_list[index].matrix, packet_checksum, 16) == 0){
-			recv_id[id] = (int)(packet_list[index].index);
-			//printf("recv_id[%"PRIu64"] = %d\n", id, recv_id[id]);
-			id++;
-
-			// If there are more blocks than required, just ignore them.
-			// Cauchy Matrix should be invertible always.
-			// Or, is it safe to keep more for full rank ?
-			if (id >= lost_count)
-				break;
-		}
-	}
-
-	if (par3_ctx->ecc_method & 1){	// Cauchy Reed-Solomon Codes
-		int *lost_id = recv_id + lost_count;
-
-		// Set index of lost input blocks
-		block_list = par3_ctx->block_list;
-		count = par3_ctx->block_count;
-		id = 0;
-		for (index = 0; index < count; index++){
-			if ((block_list[index].state & (4 | 16)) == 0){
-				if (id >= lost_count){
-					printf("Number of lost input block is wrong.\n");
-					return RET_LOGIC_ERROR;
-				}
-
-				lost_id[id] = (int)index;
-				//printf("lost_id[%"PRIu64"] = %d\n", id, lost_id[id]);
-				id++;
-			}
-		}
-	}
-
-	return 0;
-}
-
-// Aggregate input blocks and recovery blocks of each cohort, and return lacking count;
-uint64_t aggregate_block_cohort(PAR3_CTX *par3_ctx, uint32_t *lost_count_cohort, uint32_t *lack_count_cohort)
-{
-	uint8_t *packet_checksum;
-	uint32_t cohort_count, cohort_index;
-	uint32_t lack_count, lack_count_max, lack_count_min;
-	uint32_t lost_count_max, lost_count_min;
-	uint32_t recv_count_max, recv_count_min;
-	uint32_t *lost_list, *recv_list;
-	uint64_t count, index, id, lack_count_total;
-	PAR3_BLOCK_CTX *block_list;
-	PAR3_PKT_CTX *packet_list;
-
-	cohort_count = par3_ctx->interleave + 1;
-
-	// Allocate memory and zero fill
-	lost_list = (uint32_t *) calloc(cohort_count * 2, sizeof(uint32_t));
-	recv_list = lost_list + cohort_count;
-	par3_ctx->lost_list = lost_list;	// Store here to refer and release later
-
-	// Get checksum of using Matrix Packet
-	packet_checksum = par3_ctx->matrix_packet + par3_ctx->matrix_packet_offset + 8;
-
-	// Check index of using recovery blocks
-	packet_list = par3_ctx->recv_packet_list;
-	count = par3_ctx->recv_packet_count;
-	for (index = 0; index < count; index++){
-		// Search only Recovery Data Packets belong to using Matrix Packet
-		if (memcmp(packet_list[index].matrix, packet_checksum, 16) == 0){
-			id = packet_list[index].index;
-			//printf("recv_packet[%"PRIu64"] = %"PRIu64", ", index, id);
-			id %= cohort_count;	// modulo
-			recv_list[id] += 1;
-			//printf("recv_list[%"PRIu64"] = %"PRIu64"\n", id, recv_list[id]);
-		}
-	}
-
-	// Check index of lost input blocks
-	block_list = par3_ctx->block_list;
-	count = par3_ctx->block_count;
-	for (index = 0; index < count; index++){
-		if ((block_list[index].state & (4 | 16)) == 0){
-			id = index % cohort_count;
-			lost_list[id] += 1;
-			//printf("lost block[%"PRIu64"] : lost_list[%"PRIu64"] = %u\n", index, id, lost_list[id]);
-		}
-	}
-
-	// Check each cohort
-	lack_count_total = 0;
-	lack_count_max = 0;
-	lack_count_min = UINT_MAX;
-	lost_count_max = 0;
-	lost_count_min = UINT_MAX;
-	recv_count_max = 0;
-	recv_count_min = UINT_MAX;
-	for (cohort_index = 0; cohort_index < cohort_count; cohort_index++){
-		if (lost_list[cohort_index] > recv_list[cohort_index]){
-			// If number of lost blocks is larger than number of recovery blocks, sum the value.
-			lack_count = lost_list[cohort_index] - recv_list[cohort_index];
-			lack_count_total += lack_count;
-			if (lack_count_max < lack_count)
-				lack_count_max = lack_count;
-			if (lack_count_min > lack_count)
-				lack_count_min = lack_count;
-		} else {
-			lack_count_min = 0;	// This cohort is locally repairable.
-		}
-		if (lost_count_max < lost_list[cohort_index])
-			lost_count_max = lost_list[cohort_index];
-		if (lost_count_min > lost_list[cohort_index])
-			lost_count_min = lost_list[cohort_index];
-		if (recv_count_max < recv_list[cohort_index])
-			recv_count_max = recv_list[cohort_index];
-		if (recv_count_min > recv_list[cohort_index])
-			recv_count_min = recv_list[cohort_index];
-	}
-	// Use these values at repair
-	if (lost_count_cohort != NULL)
-		*lost_count_cohort = lost_count_max;
-	if (lack_count_cohort != NULL)
-		*lack_count_cohort = lack_count_max;
-	if (par3_ctx->noise_level >= 1){
-		// Show min & max numbers of recovery blocks and lost input blocks among cohorts.
-		printf("Recovery block count among cohorts  = %u ~ %u\n", recv_count_min, recv_count_max);
-		printf("Lost block count among cohorts      = %u ~ %u\n", lost_count_min, lost_count_max);
-		if (lack_count_total > 0){
-			// Show numbers of required recovery blocks among cohorts.
-			printf("Required block count among cohorts  = %u ~ %u\n", lack_count_min, lack_count_max);
-		}
-	}
-
-	return lack_count_total;
-}
-
diff --git a/windows/src/block_create.c b/windows/src/block_create.c
deleted file mode 100644
index 6493961..0000000
--- a/windows/src/block_create.c
+++ /dev/null
@@ -1,1345 +0,0 @@
-/* Redefinition of _FILE_OFFSET_BITS must happen BEFORE including stdio.h */
-#ifdef __linux__
-#define _FILE_OFFSET_BITS 64
-#define _fseeki64 fseeko
-#elif _WIN32
-// avoid error of MSVC
-#define _CRT_SECURE_NO_WARNINGS
-#endif
-
-#include <errno.h>
-#include <inttypes.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <time.h>
-
-#include "libpar3.h"
-#include "common.h"
-#include "galois.h"
-#include "hash.h"
-#include "reedsolomon.h"
-#include "leopard/leopard.h"
-
-
-// When it uses Reed-Solomon Erasure Codes, it tries to allocate memory for all recovery blocks.
-int allocate_recovery_block(PAR3_CTX *par3_ctx)
-{
-	size_t alloc_size, region_size;
-
-	// Allocate tables before blocks.
-	if (par3_ctx->galois_poly == 0x1100B){	// 16-bit Galois Field (0x1100B).
-		par3_ctx->galois_table = gf16_create_table(par3_ctx->galois_poly);
-
-	} else if (par3_ctx->galois_poly == 0x11D){	// 8-bit Galois Field (0x11D).
-		par3_ctx->galois_table = gf8_create_table(par3_ctx->galois_poly);
-
-	} else {
-		printf("Galois Field (0x%X) isn't supported.\n", par3_ctx->galois_poly);
-		return RET_LOGIC_ERROR;
-	}
-	if (par3_ctx->galois_table == NULL){
-		printf("Failed to create tables for Galois Field (0x%X)\n", par3_ctx->galois_poly);
-		return RET_MEMORY_ERROR;
-	}
-
-	// Set memory alignment of block data to be 4.
-	// Increase at least 1 byte as checksum.
-	region_size = (par3_ctx->block_size + 4 + 3) & ~3;
-
-	// Limited memory usage
-	alloc_size = region_size * par3_ctx->recovery_block_count;
-	if ( (par3_ctx->memory_limit > 0) && (alloc_size > par3_ctx->memory_limit) )
-		return 0;
-
-	// Allocate memory to keep recovery blocks
-	par3_ctx->block_data = malloc(alloc_size);
-	//par3_ctx->block_data = NULL;	// For testing another method
-	if (par3_ctx->block_data != NULL){
-		par3_ctx->ecc_method |= 0x8000;	// Keep all recovery blocks on memory
-		if (par3_ctx->noise_level >= 2){
-			printf("\nAligned size of block data = %zu\n", region_size);
-			printf("Keep all recovery blocks on memory (%zu * %"PRIu64" = %zu)\n", region_size, par3_ctx->recovery_block_count, alloc_size);
-		}
-	}
-
-	return 0;
-}
-
-// This supports Reed-Solomon Erasure Codes on 8-bit or 16-bit Galois Field.
-// GF tables and recovery blocks were allocated already.
-int create_recovery_block(PAR3_CTX *par3_ctx)
-{
-	uint8_t *work_buf;
-	uint8_t gf_size;
-	int galois_poly;
-	int block_count, block_index;
-	int progress_old, progress_now;
-	uint32_t file_index, file_prev;
-	size_t block_size, region_size;
-	size_t data_size, read_size;
-	size_t tail_offset, tail_gap;
-	int64_t slice_index, file_offset;
-	PAR3_FILE_CTX *file_list;
-	PAR3_SLICE_CTX *slice_list;
-	PAR3_BLOCK_CTX *block_list;
-	FILE *fp;
-	time_t time_old, time_now;
-	clock_t clock_now;
-
-	if (par3_ctx->recovery_block_count == 0)
-		return -1;
-
-	// GF tables and recovery blocks must be stored on memory.
-	if ( (par3_ctx->galois_table == NULL) || (par3_ctx->block_data == NULL) )
-		return -1;
-
-	// Only when it uses Reed-Solomon Erasure Codes.
-	if ((par3_ctx->ecc_method & 1) == 0)
-		return -1;
-
-	block_size = par3_ctx->block_size;
-	block_count = (int)(par3_ctx->block_count);
-	gf_size = par3_ctx->gf_size;
-	galois_poly = par3_ctx->galois_poly;
-	file_list = par3_ctx->input_file_list;
-	slice_list = par3_ctx->slice_list;
-	block_list = par3_ctx->block_list;
-
-	// Allocate memory to read one input block and parity.
-	region_size = (block_size + 4 + 3) & ~3;
-	work_buf = malloc(region_size);
-	if (work_buf == NULL){
-		perror("Failed to allocate memory for input data");
-		return RET_MEMORY_ERROR;
-	}
-	par3_ctx->work_buf = work_buf;
-
-	if (par3_ctx->noise_level >= 0){
-		printf("\nComputing recovery blocks:\n");
-		progress_old = 0;
-		time_old = time(NULL);
-		clock_now = clock();
-	}
-
-	// Reed-Solomon Erasure Codes
-	file_prev = 0xFFFFFFFF;
-	fp = NULL;
-	for (block_index = 0; block_index < block_count; block_index++){
-		// Read each input block from input files.
-		data_size = block_list[block_index].size;
-		if (block_list[block_index].state & 1){	// including full size data
-			slice_index = block_list[block_index].slice;
-			while (slice_index != -1){
-				if (slice_list[slice_index].size == block_size)
-					break;
-				slice_index = slice_list[slice_index].next;
-			}
-			if (slice_index == -1){	// When there is no valid slice.
-				printf("Mapping information for block[%d] is wrong.\n", block_index);
-				if (fp != NULL)
-					fclose(fp);
-				return RET_LOGIC_ERROR;
-			}
-
-			// Read one slice from a file.
-			file_index = slice_list[slice_index].file;
-			file_offset = slice_list[slice_index].offset;
-			read_size = data_size;
-			if (par3_ctx->noise_level >= 3){
-				printf("Reading %zu bytes of slice[%"PRId64"] for input block[%d]\n", read_size, slice_index, block_index);
-			}
-			if ( (fp == NULL) || (file_index != file_prev) ){
-				if (fp != NULL){	// Close previous input file.
-					fclose(fp);
-					fp = NULL;
-				}
-				fp = fopen(file_list[file_index].name, "rb");
-				if (fp == NULL){
-					perror("Failed to open Input File");
-					return RET_FILE_IO_ERROR;
-				}
-				file_prev = file_index;
-			}
-			if (_fseeki64(fp, file_offset, SEEK_SET) != 0){
-				perror("Failed to seek Input File");
-				fclose(fp);
-				return RET_FILE_IO_ERROR;
-			}
-			if (fread(work_buf, 1, read_size, fp) != read_size){
-				perror("Failed to read slice on Input File");
-				fclose(fp);
-				return RET_FILE_IO_ERROR;
-			}
-
-		} else {	// tail data only (one tail or packed tails)
-			if (par3_ctx->noise_level >= 3){
-				printf("Reading %"PRIu64" bytes for input block[%d]\n", data_size, block_index);
-			}
-			tail_offset = 0;
-			while (tail_offset < data_size){	// Read tails until data end.
-				slice_index = block_list[block_index].slice;
-				while (slice_index != -1){
-					//printf("block = %"PRIu64", size = %zu, offset = %zu, slice = %"PRId64"\n", block_index, data_size, tail_offset, slice_index);
-					// Even when chunk tails are overlaped, it will find tail slice of next position.
-					if ( (slice_list[slice_index].tail_offset + slice_list[slice_index].size > tail_offset)
-							&& (slice_list[slice_index].tail_offset <= tail_offset) ){
-						break;
-					}
-					slice_index = slice_list[slice_index].next;
-				}
-				if (slice_index == -1){	// When there is no valid slice.
-					printf("Mapping information for block[%d] is wrong.\n", block_index);
-					if (fp != NULL)
-						fclose(fp);
-					return RET_LOGIC_ERROR;
-				}
-
-				// Read one slice from a file.
-				tail_gap = tail_offset - slice_list[slice_index].tail_offset;	// This tail slice may start before tail_offset.
-				//printf("tail_gap for slice[%"PRId64"] = %zu.\n", slice_index, tail_gap);
-				file_index = slice_list[slice_index].file;
-				file_offset = slice_list[slice_index].offset + tail_gap;
-				read_size = slice_list[slice_index].size - tail_gap;
-				if ( (fp == NULL) || (file_index != file_prev) ){
-					if (fp != NULL){	// Close previous input file.
-						fclose(fp);
-						fp = NULL;
-					}
-					fp = fopen(file_list[file_index].name, "rb");
-					if (fp == NULL){
-						perror("Failed to open Input File");
-						return RET_FILE_IO_ERROR;
-					}
-					file_prev = file_index;
-				}
-				if (_fseeki64(fp, file_offset, SEEK_SET) != 0){
-					perror("Failed to seek Input File");
-					fclose(fp);
-					return RET_FILE_IO_ERROR;
-				}
-				if (fread(work_buf + tail_offset, 1, read_size, fp) != read_size){
-					perror("Failed to read tail slice on Input File");
-					fclose(fp);
-					return RET_FILE_IO_ERROR;
-				}
-				tail_offset += read_size;
-			}
-		}
-		// Zero fill rest bytes
-		memset(work_buf + data_size, 0, region_size - data_size);
-
-		// At creating time, CRC of a block was set, even when the block includes multiple chunk tails.
-		// It appends chunk tails as tail packing, and calculates their total CRC for the block.
-		// But, after verification, a block without full size data doesn't have valid CRC value.
-		if (block_list[block_index].state & 64){
-			// Calculate checksum of block to confirm that input file was not changed.
-			if (crc64(work_buf, data_size, 0) != block_list[block_index].crc){
-				printf("Checksum of block[%d] is different.\n", block_index);
-				fclose(fp);
-				return RET_LOGIC_ERROR;
-			}
-		}
-
-		// Calculate parity bytes in the region
-		if (gf_size == 2){
-			gf16_region_create_parity(galois_poly, work_buf, region_size);
-		} else if (gf_size == 1){
-			gf8_region_create_parity(galois_poly, work_buf, region_size);
-		} else {
-			region_create_parity(work_buf, region_size);
-		}
-
-		// Multipy one input block for all recovery blocks.
-		rs_create_one_all(par3_ctx, block_index);
-
-		// Print progress percent
-		if ( (par3_ctx->noise_level >= 0) && (par3_ctx->noise_level <= 1) ){
-			time_now = time(NULL);
-			if (time_now != time_old){
-				time_old = time_now;
-				// Because block_count is 16-bit value, "int" (32-bit signed integer) is enough.
-				progress_now = (block_index * 1000) / block_count;
-				if (progress_now != progress_old){
-					progress_old = progress_now;
-					printf("%d.%d%%\r", progress_now / 10, progress_now % 10);	// 0.0% ~ 100.0%
-				}
-			}
-		}
-	}
-	if (fp != NULL){
-		if (fclose(fp) != 0){
-			perror("Failed to close Input File");
-			return RET_FILE_IO_ERROR;
-		}
-	}
-
-	// Release allocated memory
-	free(work_buf);
-	par3_ctx->work_buf = NULL;
-
-	if (par3_ctx->noise_level >= 0){
-		clock_now = clock() - clock_now;
-		printf("done in %.1f seconds.\n", (double)clock_now / CLOCKS_PER_SEC);
-		printf("\n");
-	}
-
-	return 0;
-}
-
-// This keeps all input blocks and recovery blocks partially by spliting every block.
-// GF tables and recovery blocks were allocated already.
-int create_recovery_block_split(PAR3_CTX *par3_ctx)
-{
-	char *name_prev, *file_name;
-	uint8_t *block_data, *buf_p;
-	uint8_t gf_size;
-	int ret, galois_poly;
-	int progress_old, progress_now;
-	uint32_t split_count;
-	uint32_t file_index, file_prev;
-	size_t io_size;
-	int64_t slice_index, file_offset;
-	uint64_t crc, block_index;
-	uint64_t block_size, block_count;
-	uint64_t recovery_block_count, first_recovery_block, max_recovery_block;
-	uint64_t alloc_size, region_size, split_size;
-	uint64_t data_size, part_size, split_offset;
-	uint64_t tail_offset, tail_gap;
-	uint64_t progress_total, progress_step;
-	PAR3_FILE_CTX *file_list;
-	PAR3_SLICE_CTX *slice_list;
-	PAR3_BLOCK_CTX *block_list;
-	PAR3_POS_CTX *position_list;
-	FILE *fp;
-	time_t time_old, time_now;
-	clock_t clock_now;
-
-	// For Leopard-RS library
-	uint32_t work_count;
-	uint8_t **original_data = NULL, **work_data = NULL;
-
-	block_size = par3_ctx->block_size;
-	block_count = par3_ctx->block_count;
-	recovery_block_count = par3_ctx->recovery_block_count;
-	first_recovery_block = par3_ctx->first_recovery_block;
-	max_recovery_block = par3_ctx->max_recovery_block;
-	gf_size = par3_ctx->gf_size;
-	galois_poly = par3_ctx->galois_poly;
-	file_list = par3_ctx->input_file_list;
-	slice_list = par3_ctx->slice_list;
-	block_list = par3_ctx->block_list;
-	position_list = par3_ctx->position_list;
-
-	if (recovery_block_count == 0)
-		return RET_LOGIC_ERROR;
-
-	// Set required memory size at first
-	if (par3_ctx->ecc_method & 8){	// FFT based Reed-Solomon Codes
-		ret = leo_init();	// Initialize Leopard-RS library.
-		if (ret != 0){
-			printf("Failed to initialize Leopard-RS library (%d)\n", ret);
-			return RET_LOGIC_ERROR;
-		}
-		work_count = leo_encode_work_count((uint32_t)block_count, (uint32_t)max_recovery_block);
-		// max_recovery_block is equal or larger than (first_recovery_block + recovery_block_count).
-		//printf("Leopard-RS: work_count = %u\n", work_count);
-		// Leopard-RS requires multiple of 64 bytes for SIMD.
-		region_size = (block_size + 4 + 63) & ~63;
-		alloc_size = region_size * (block_count + work_count);
-
-	} else {	// Reed-Solomon Erasure Codes
-		// Mmeory alignment is 4 bytes.
-		region_size = (block_size + 4 + 3) & ~3;
-		alloc_size = region_size * (block_count + recovery_block_count);
-	}
-
-	// for test split
-	//par3_ctx->memory_limit = (alloc_size + 1) / 2;
-	//par3_ctx->memory_limit = (alloc_size + 2) / 3;
-
-	// Limited memory usage
-	if ( (par3_ctx->memory_limit > 0) && (alloc_size > par3_ctx->memory_limit) ){
-		split_count = (uint32_t)((alloc_size + par3_ctx->memory_limit - 1) / par3_ctx->memory_limit);
-		split_size = (block_size + split_count - 1) / split_count;	// This is splitted block size to fit in limited memory.
-		if (gf_size == 2){
-			// aligned to 2 bytes for 16-bit Galois Field
-			split_size = (split_size + 1) & ~1;
-		}
-		if (split_size > block_size)
-			split_size = block_size;
-		split_count = (uint32_t)((block_size + split_size - 1) / split_size);
-		if (par3_ctx->noise_level >= 1){
-			printf("\nSplit block to %u pieces of %"PRIu64" bytes.\n", split_count, split_size);
-		}
-	} else {
-		split_count = 1;
-		split_size = block_size;
-	}
-
-	// Allocate memory to keep all splitted blocks.
-	if (par3_ctx->ecc_method & 8){	// FFT based Reed-Solomon Codes
-		// Leopard-RS requires alignment of 64 bytes.
-		region_size = (split_size + 4 + 63) & ~63;
-		alloc_size = region_size * (block_count + work_count);	// work_count is larger than recovery_block_count.
-		// Though Leopard-RS doesn't require memory alignment for SIMD, align to 32 bytes may be faster.
-		if (par3_ctx->noise_level >= 2){
-			printf("\nAligned size of block data = %"PRIu64"\n", region_size);
-			printf("Allocated memory size = %"PRIu64" * (%"PRIu64" + %u) = %"PRIu64"\n", region_size, block_count, work_count, alloc_size);
-		}
-	} else {	// Reed-Solomon Erasure Codes
-		region_size = (split_size + 4 + 3) & ~3;
-		alloc_size = region_size * (block_count + recovery_block_count);
-		if (par3_ctx->noise_level >= 2){
-			printf("\nAligned size of block data = %"PRIu64"\n", region_size);
-			printf("Allocated memory size = %"PRIu64" * (%"PRIu64" + %"PRIu64") = %"PRIu64"\n", region_size, block_count, recovery_block_count, alloc_size);
-		}
-	}
-	block_data = malloc(alloc_size);
-	if (block_data == NULL){
-		perror("Failed to allocate memory for block data");
-		return RET_MEMORY_ERROR;
-	}
-	par3_ctx->block_data = block_data;
-
-	if (par3_ctx->ecc_method & 8){	// FFT based Reed-Solomon Codes
-		// List of pointer
-		original_data = malloc(sizeof(block_data) * (block_count + work_count));
-		if (original_data == NULL){
-			perror("Failed to allocate memory for Leopard-RS");
-			return RET_MEMORY_ERROR;
-		}
-		buf_p = block_data;
-		for (block_index = 0; block_index < block_count; block_index++){
-			original_data[block_index] = buf_p;
-			buf_p += region_size;
-		}
-		work_data = original_data + block_count;
-		// Change order of recovery data to skip until first_recovery_block.
-		for (block_index = first_recovery_block; block_index < first_recovery_block + recovery_block_count; block_index++){
-			work_data[block_index] = buf_p;
-			buf_p += region_size;
-		}
-		for (block_index = 0; block_index < first_recovery_block; block_index++){
-			work_data[block_index] = buf_p;
-			buf_p += region_size;
-		}
-		for (block_index = first_recovery_block + recovery_block_count; block_index < work_count; block_index++){
-			work_data[block_index] = buf_p;
-			buf_p += region_size;
-		}
-		par3_ctx->matrix = original_data;	// Release this later
-	}
-
-	if (par3_ctx->noise_level >= 0){
-		printf("\nComputing recovery blocks:\n");
-		progress_total = (block_count * recovery_block_count + block_count + recovery_block_count) * split_count;
-		progress_step = 0;
-		progress_old = 0;
-		time_old = time(NULL);
-		clock_now = clock();
-	}
-
-	// This file access style would support all Error Correction Codes.
-	name_prev = NULL;
-	fp = NULL;
-	for (split_offset = 0; split_offset < block_size; split_offset += split_size){
-		buf_p = block_data;	// Starting position of input blocks
-		file_prev = 0xFFFFFFFF;
-
-		// Read all input blocks on memory
-		for (block_index = 0; block_index < block_count; block_index++){
-			// Read each input block from input files.
-			data_size = block_list[block_index].size;
-			part_size = data_size - split_offset;
-			if (part_size > split_size)
-				part_size = split_size;
-
-			if (block_list[block_index].state & 1){	// including full size data
-				slice_index = block_list[block_index].slice;
-				while (slice_index != -1){
-					if (slice_list[slice_index].size == block_size)
-						break;
-					slice_index = slice_list[slice_index].next;
-				}
-				if (slice_index == -1){	// When there is no valid slice.
-					printf("Mapping information for block[%"PRIu64"] is wrong.\n", block_index);
-					if (fp != NULL)
-						fclose(fp);
-					return RET_LOGIC_ERROR;
-				}
-
-				// Read a part of slice from a file.
-				file_index = slice_list[slice_index].file;
-				file_offset = slice_list[slice_index].offset + split_offset;
-				io_size = part_size;
-				if (par3_ctx->noise_level >= 3){
-					printf("Reading %zu bytes of slice[%"PRId64"] for input block[%"PRIu64"]\n", io_size, slice_index, block_index);
-				}
-				if ( (fp == NULL) || (file_index != file_prev) ){
-					if (fp != NULL){	// Close previous input file.
-						fclose(fp);
-						fp = NULL;
-					}
-					fp = fopen(file_list[file_index].name, "rb");
-					if (fp == NULL){
-						perror("Failed to open Input File");
-						return RET_FILE_IO_ERROR;
-					}
-					file_prev = file_index;
-				}
-				if (_fseeki64(fp, file_offset, SEEK_SET) != 0){
-					perror("Failed to seek Input File");
-					fclose(fp);
-					return RET_FILE_IO_ERROR;
-				}
-				if (fread(buf_p, 1, io_size, fp) != io_size){
-					perror("Failed to read slice on Input File");
-					fclose(fp);
-					return RET_FILE_IO_ERROR;
-				}
-
-			} else if (data_size > split_offset){	// tail data only (one tail or packed tails)
-				if (par3_ctx->noise_level >= 3){
-					printf("Reading %"PRIu64" bytes for input block[%"PRIu64"]\n", part_size, block_index);
-				}
-				tail_offset = split_offset;
-				while (tail_offset < split_offset + part_size){	// Read tails until data end.
-					slice_index = block_list[block_index].slice;
-					while (slice_index != -1){
-						//printf("block = %"PRIu64", size = %zu, offset = %zu, slice = %"PRId64"\n", block_index, data_size, tail_offset, slice_index);
-						// Even when chunk tails are overlaped, it will find tail slice of next position.
-						if ( (slice_list[slice_index].tail_offset + slice_list[slice_index].size > tail_offset)
-								&& (slice_list[slice_index].tail_offset <= tail_offset) ){
-							break;
-						}
-						slice_index = slice_list[slice_index].next;
-					}
-					if (slice_index == -1){	// When there is no valid slice.
-						printf("Mapping information for block[%"PRIu64"] is wrong.\n", block_index);
-						if (fp != NULL)
-							fclose(fp);
-						return RET_LOGIC_ERROR;
-					}
-
-					// Read one slice from a file.
-					tail_gap = tail_offset - slice_list[slice_index].tail_offset;	// This tail slice may start before tail_offset.
-					file_index = slice_list[slice_index].file;
-					file_offset = slice_list[slice_index].offset + tail_gap;
-					io_size = slice_list[slice_index].size - tail_gap;
-					if (io_size > part_size)
-						io_size = part_size;
-					//printf("tail_gap for slice[%"PRId64"] = %zu, io_size = %zu\n", slice_index, tail_gap, io_size);
-					if ( (fp == NULL) || (file_index != file_prev) ){
-						if (fp != NULL){	// Close previous input file.
-							fclose(fp);
-							fp = NULL;
-						}
-						fp = fopen(file_list[file_index].name, "rb");
-						if (fp == NULL){
-							perror("Failed to open Input File");
-							return RET_FILE_IO_ERROR;
-						}
-						file_prev = file_index;
-					}
-					if (_fseeki64(fp, file_offset, SEEK_SET) != 0){
-						perror("Failed to seek Input File");
-						fclose(fp);
-						return RET_FILE_IO_ERROR;
-					}
-					if (fread(buf_p + tail_offset - split_offset, 1, io_size, fp) != io_size){
-						perror("Failed to read tail slice on Input File");
-						fclose(fp);
-						return RET_FILE_IO_ERROR;
-					}
-					tail_offset += io_size;
-				}
-
-			} else {	// Zero fill partial input block
-				memset(buf_p, 0, region_size);
-			}
-
-			// Calculate checksum of block to confirm that input file was not changed.
-			if (split_offset == 0){
-				crc = 0;
-			} else {
-				memcpy(&crc, block_list[block_index].hash, 8);	// Use previous CRC value
-			}
-			if (data_size > split_offset){	// When there is slice data to process.
-				memset(buf_p + part_size, 0, region_size - part_size);	// Zero fill rest bytes
-				crc = crc64(buf_p, part_size, crc);
-
-				// Calculate parity bytes in the region
-				if (par3_ctx->ecc_method & 8){	// FFT based Reed-Solomon Codes
-					if (gf_size == 2){
-						leo_region_create_parity(buf_p, region_size);
-					} else {
-						region_create_parity(buf_p, region_size);
-					}
-				} else {
-					if (gf_size == 2){
-						gf16_region_create_parity(galois_poly, buf_p, region_size);
-					} else if (gf_size == 1){
-						gf8_region_create_parity(galois_poly, buf_p, region_size);
-					} else {
-						region_create_parity(buf_p, region_size);
-					}
-				}
-			}
-			// Intermediate CRC value is stored in "block_list[block_index].hash".
-			if (block_list[block_index].state & 64){
-				if (split_offset + split_size >= block_size){	// At the last
-					if (crc != block_list[block_index].crc){
-						printf("Checksum of block[%"PRIu64"] is different.\n", block_index);
-						fclose(fp);
-						return RET_LOGIC_ERROR;
-					}
-				} else {
-					memcpy(block_list[block_index].hash, &crc, 8);	// Save this CRC value
-				}
-			}
-
-			// Print progress percent
-			if ( (par3_ctx->noise_level >= 0) && (par3_ctx->noise_level <= 2) ){
-				progress_step++;
-				time_now = time(NULL);
-				if (time_now != time_old){
-					time_old = time_now;
-					progress_now = (int)((progress_step * 1000) / progress_total);
-					if (progress_now != progress_old){
-						progress_old = progress_now;
-						printf("%d.%d%%\r", progress_now / 10, progress_now % 10);	// 0.0% ~ 100.0%
-					}
-				}
-			}
-
-			buf_p += region_size;	// Goto next partial block
-		}
-		if (fp != NULL){
-			if (fclose(fp) != 0){
-				perror("Failed to close Input File");
-				return RET_FILE_IO_ERROR;
-			}
-			fp = NULL;
-		}
-
-		// Create all recovery blocks on memory
-		if (par3_ctx->ecc_method & 1){	// Cauchy Reed-Solomon Codes
-			rs_create_all(par3_ctx, region_size, progress_total, progress_step);
-
-		} else if (par3_ctx->ecc_method & 8){	// FFT based Reed-Solomon Codes
-			ret = leo_encode(region_size, (uint32_t)block_count, (uint32_t)max_recovery_block, work_count, original_data, work_data);
-			if (ret != 0){
-				printf("Failed to call Leopard-RS library (%d)\n", ret);
-				return RET_LOGIC_ERROR;
-			}
-
-		}
-		if ( (par3_ctx->noise_level >= 0) && (par3_ctx->noise_level <= 2) ){
-			progress_step += block_count * recovery_block_count;
-			time_old = time(NULL);
-		}
-
-		// Write all recovery blocks on recovery files
-		part_size = block_size - split_offset;
-		if (part_size > split_size)
-			part_size = split_size;
-		io_size = part_size;
-		buf_p = block_data + region_size * block_count;	// Starting position of recovery blocks
-		for (block_index = 0; block_index < recovery_block_count; block_index++){
-			// Check parity of recovery block to confirm that calculation was correct.
-			if (par3_ctx->ecc_method & 8){
-				if (gf_size == 2){
-					ret = leo_region_check_parity(buf_p, region_size);
-				} else {
-					ret = region_check_parity(buf_p, region_size);
-				}
-			} else {
-				if (gf_size == 2){
-					ret = gf16_region_check_parity(galois_poly, buf_p, region_size);
-				} else if (gf_size == 1){
-					ret = gf8_region_check_parity(galois_poly, buf_p, region_size);
-				} else {
-					ret = region_check_parity(buf_p, region_size);
-				}
-			}
-			if (ret != 0){
-				printf("Parity of recovery block[%"PRIu64"] is different.\n", block_index);
-				if (fp != NULL)
-					fclose(fp);
-				return RET_LOGIC_ERROR;
-			}
-
-			// Position of Recovery Data Packet in recovery file
-			file_name = position_list[block_index].name;
-			file_offset = position_list[block_index].offset + 88 + split_offset;
-
-			// Calculate CRC of packet data to check error later.
-			position_list[block_index].crc = crc64(buf_p, part_size, position_list[block_index].crc);
-
-			// Write partial recovery block
-			if ( (fp == NULL) || (file_name != name_prev) ){
-				if (fp != NULL){	// Close previous recovery file.
-					fclose(fp);
-					fp = NULL;
-				}
-				fp = fopen(file_name, "r+b");	// Over-write on existing file
-				if (fp == NULL){
-					perror("Failed to open Recovery File");
-					return RET_FILE_IO_ERROR;
-				}
-				name_prev = file_name;
-			}
-			if (_fseeki64(fp, file_offset, SEEK_SET) != 0){
-				perror("Failed to seek Recovery File");
-				fclose(fp);
-				return RET_FILE_IO_ERROR;
-			}
-			if (fwrite(buf_p, 1, part_size, fp) != part_size){
-				perror("Failed to write Recovery Block on Recovery File");
-				fclose(fp);
-				return RET_FILE_IO_ERROR;
-			}
-
-			// Print progress percent
-			if ( (par3_ctx->noise_level >= 0) && (par3_ctx->noise_level <= 2) ){
-				progress_step++;
-				time_now = time(NULL);
-				if (time_now != time_old){
-					time_old = time_now;
-					progress_now = (int)((progress_step * 1000) / progress_total);
-					if (progress_now != progress_old){
-						progress_old = progress_now;
-						printf("%d.%d%%\r", progress_now / 10, progress_now % 10);	// 0.0% ~ 100.0%
-					}
-				}
-			}
-
-			buf_p += region_size;
-		}
-	}
-
-/*
-{	// for debug
-FILE *fp2;
-buf_p = block_data + region_size * block_count;	// Starting position of recovery blocks
-
-fp2 = fopen("test.bin", "wb");
-fwrite(buf_p, 1, region_size * recovery_block_count, fp2);
-fclose(fp2);
-}
-*/
-
-	free(block_data);
-	par3_ctx->block_data = NULL;
-
-	// Allocate memory to read one Recovery Data Packet.
-	alloc_size = 80 + block_size;
-	buf_p = malloc(alloc_size);
-	if (buf_p == NULL){
-		perror("Failed to allocate memory for Recovery Data Packet");
-		return RET_MEMORY_ERROR;
-	}
-	par3_ctx->work_buf = buf_p;
-
-	// Calculate checksum of every Recovery Data Packet
-	io_size = 64 + block_size;	// packet header after checksum and packet body
-	for (block_index = 0; block_index < recovery_block_count; block_index++){
-		// Position of Recovery Data Packet in recovery file
-		file_name = position_list[block_index].name;
-		file_offset = position_list[block_index].offset + 8;	// Offset of checksum
-
-		// Read packet data and write checksum
-		if ( (fp == NULL) || (file_name != name_prev) ){
-			if (fp != NULL){	// Close previous recovery file.
-				fclose(fp);
-				fp = NULL;
-			}
-			fp = fopen(file_name, "r+b");	// Over-write on existing file
-			if (fp == NULL){
-				perror("Failed to open Recovery File");
-				return RET_FILE_IO_ERROR;
-			}
-			name_prev = file_name;
-		}
-		if (_fseeki64(fp, file_offset + 16, SEEK_SET) != 0){
-			perror("Failed to seek Recovery File");
-			fclose(fp);
-			return RET_FILE_IO_ERROR;
-		}
-		if (fread(buf_p + 16, 1, io_size, fp) != io_size){
-			perror("Failed to read Recovery Data Packet");
-			fclose(fp);
-			return RET_FILE_IO_ERROR;
-		}
-
-		// Compare CRC of written packet data to confirm integrity.
-		crc = crc64(buf_p + 16, io_size, 0);
-		if (crc != position_list[block_index].crc){
-			printf("Packet data of recovery block[%"PRIu64"] is different.\n", block_index);
-			fclose(fp);
-			return RET_LOGIC_ERROR;
-		}
-
-		// Calculate checksum of this packet
-		blake3(buf_p + 16, io_size, buf_p);
-
-		// Write checksum
-		if (_fseeki64(fp, file_offset, SEEK_SET) != 0){
-			perror("Failed to seek Recovery File");
-			fclose(fp);
-			return RET_FILE_IO_ERROR;
-		}
-		if (fwrite(buf_p, 1, 16, fp) != 16){
-			perror("Failed to write checksum of Recovery Data Packet");
-			fclose(fp);
-			return RET_FILE_IO_ERROR;
-		}
-	}
-	if (fclose(fp) != 0){
-		perror("Failed to close Recovery File");
-		return RET_FILE_IO_ERROR;
-	}
-
-	if (par3_ctx->noise_level >= 0){
-		if (par3_ctx->noise_level <= 2){
-			if (progress_step < progress_total)
-				printf("Didn't finish progress. %"PRIu64" / %"PRIu64"\n", progress_step, progress_total);
-		}
-		clock_now = clock() - clock_now;
-		printf("done in %.1f seconds.\n", (double)clock_now / CLOCKS_PER_SEC);
-		printf("\n");
-	}
-
-	// Release some allocated memory
-	free(buf_p);
-	par3_ctx->work_buf = NULL;
-	free(position_list);
-	par3_ctx->position_list = NULL;
-	if (par3_ctx->matrix){
-		free(par3_ctx->matrix);
-		par3_ctx->matrix = NULL;
-	}
-
-	return 0;
-}
-
-// At this time, interleaving is adapted only for FFT based Reed-Solomon Codes.
-// When there are multiple cohorts, it calculates recovery blocks in each cohort.
-// This keeps one cohort's all input blocks and recovery blocks partially by spliting every block.
-// GF tables and recovery blocks were allocated already.
-int create_recovery_block_cohort(PAR3_CTX *par3_ctx)
-{
-	char *name_prev, *file_name;
-	uint8_t *block_data, *buf_p;
-	uint8_t gf_size;
-	int ret, galois_poly;
-	int progress_old, progress_now;
-	uint32_t split_count;
-	uint32_t file_index, file_prev;
-	uint32_t cohort_count, cohort_index;
-	size_t io_size;
-	int64_t slice_index, file_offset;
-	uint64_t crc, block_index;
-	uint64_t block_size, block_count, recovery_block_count;
-	uint64_t block_count2, recovery_block_count2, first_recovery_block2, max_recovery_block2;
-	uint64_t alloc_size, region_size, split_size;
-	uint64_t data_size, part_size, split_offset;
-	uint64_t tail_offset, tail_gap;
-	uint64_t progress_total, progress_step;
-	PAR3_FILE_CTX *file_list;
-	PAR3_SLICE_CTX *slice_list;
-	PAR3_BLOCK_CTX *block_list;
-	PAR3_POS_CTX *position_list;
-	FILE *fp;
-	time_t time_old, time_now;
-	clock_t clock_now;
-
-	// For Leopard-RS library
-	uint32_t work_count;
-	uint8_t **original_data = NULL, **work_data = NULL;
-
-	block_size = par3_ctx->block_size;
-	block_count = par3_ctx->block_count;
-	recovery_block_count = par3_ctx->recovery_block_count;
-	gf_size = par3_ctx->gf_size;
-	galois_poly = par3_ctx->galois_poly;
-	file_list = par3_ctx->input_file_list;
-	slice_list = par3_ctx->slice_list;
-	block_list = par3_ctx->block_list;
-	position_list = par3_ctx->position_list;
-
-	if (recovery_block_count == 0)
-		return RET_LOGIC_ERROR;
-
-	// Set count for each cohort
-	cohort_count = (uint32_t)(par3_ctx->interleave) + 1;	// Minimum value is 2.
-	block_count2 = (block_count + cohort_count - 1) / cohort_count;	// round up
-	recovery_block_count2 = recovery_block_count / cohort_count;
-	first_recovery_block2 = par3_ctx->first_recovery_block / cohort_count;
-	max_recovery_block2 = par3_ctx->max_recovery_block / cohort_count;
-	// max_recovery_block2 is equal or larger than (first_recovery_block2 + recovery_block_count2).
-	//printf("cohort_count = %u, block_count2 = %"PRIu64"\n", cohort_count, block_count2);
-	//printf("recovery_block_count2 = %"PRIu64", first_recovery_block2 = %"PRIu64"\n", recovery_block_count2, first_recovery_block2);
-	//printf("max_recovery_block2 = %"PRIu64"\n", max_recovery_block2);
-
-	// Set required memory size at first
-	ret = leo_init();	// Initialize Leopard-RS library.
-	if (ret != 0){
-		printf("Failed to initialize Leopard-RS library (%d)\n", ret);
-		return RET_LOGIC_ERROR;
-	}
-	work_count = leo_encode_work_count((uint32_t)block_count2, (uint32_t)max_recovery_block2);
-	//printf("Leopard-RS: work_count = %u\n", work_count);
-	// Leopard-RS requires multiple of 64 bytes for SIMD.
-	region_size = (block_size + 4 + 63) & ~63;
-	alloc_size = region_size * (block_count2 + work_count);
-
-	// for test split
-	//par3_ctx->memory_limit = (alloc_size + 1) / 2;
-	//par3_ctx->memory_limit = (alloc_size + 2) / 3;
-
-	// Limited memory usage
-	if ( (par3_ctx->memory_limit > 0) && (alloc_size > par3_ctx->memory_limit) ){
-		split_count = (uint32_t)((alloc_size + par3_ctx->memory_limit - 1) / par3_ctx->memory_limit);
-		split_size = (block_size + split_count - 1) / split_count;	// This is splitted block size to fit in limited memory.
-		if (gf_size == 2){
-			// aligned to 2 bytes for 16-bit Galois Field
-			split_size = (split_size + 1) & ~1;
-		}
-		if (split_size > block_size)
-			split_size = block_size;
-		split_count = (uint32_t)((block_size + split_size - 1) / split_size);
-		if (par3_ctx->noise_level >= 1){
-			printf("\nSplit block to %u pieces of %"PRIu64" bytes.\n", split_count, split_size);
-		}
-	} else {
-		split_count = 1;
-		split_size = block_size;
-	}
-
-	// Allocate memory to keep all splitted blocks.
-	// Leopard-RS requires alignment of 64 bytes.
-	region_size = (split_size + 4 + 63) & ~63;
-	alloc_size = region_size * (block_count2 + work_count);	// work_count is larger than recovery_block_count.
-	// Though Leopard-RS doesn't require memory alignment for SIMD, align to 32 bytes may be faster.
-	if (par3_ctx->noise_level >= 2){
-		printf("\nAligned size of block data = %"PRIu64"\n", region_size);
-		printf("Allocated memory size = %"PRIu64" * (%"PRIu64" + %u) = %"PRIu64"\n", region_size, block_count2, work_count, alloc_size);
-	}
-	block_data = malloc(alloc_size);
-	if (block_data == NULL){
-		perror("Failed to allocate memory for block data");
-		return RET_MEMORY_ERROR;
-	}
-	par3_ctx->block_data = block_data;
-
-	// List of pointer
-	original_data = malloc(sizeof(block_data) * (block_count2 + work_count));
-	if (original_data == NULL){
-		perror("Failed to allocate memory for Leopard-RS");
-		return RET_MEMORY_ERROR;
-	}
-	buf_p = block_data;
-	for (block_index = 0; block_index < block_count2; block_index++){
-		original_data[block_index] = buf_p;
-		buf_p += region_size;
-	}
-	work_data = original_data + block_count2;
-	// Change order of recovery data to skip until first_recovery_block.
-	for (block_index = first_recovery_block2; block_index < first_recovery_block2 + recovery_block_count2; block_index++){
-		work_data[block_index] = buf_p;
-		buf_p += region_size;
-	}
-	for (block_index = 0; block_index < first_recovery_block2; block_index++){
-		work_data[block_index] = buf_p;
-		buf_p += region_size;
-	}
-	for (block_index = first_recovery_block2 + recovery_block_count2; block_index < work_count; block_index++){
-		work_data[block_index] = buf_p;
-		buf_p += region_size;
-	}
-	par3_ctx->matrix = original_data;	// Release this later
-
-	if (par3_ctx->noise_level >= 0){
-		printf("\nComputing recovery blocks:\n");
-		progress_total = (block_count2 * recovery_block_count + block_count + recovery_block_count) * split_count;
-		progress_step = 0;
-		progress_old = 0;
-		time_old = time(NULL);
-		clock_now = clock();
-	}
-
-	name_prev = NULL;
-	fp = NULL;
-	// Process each cohort
-	for (cohort_index = 0; cohort_index < cohort_count; cohort_index++){
-		if ( (cohort_count < 10) && (par3_ctx->noise_level >= 1) ){
-			split_count = 0;
-			split_offset = block_count % cohort_count;
-			if ( (split_offset > 0) && (cohort_index >= split_offset) )
-				split_count++;
-			printf("cohort[%u] : dummy = %u, recovery = %"PRIu64"\n", cohort_index, split_count, recovery_block_count2);
-		}
-		for (split_offset = 0; split_offset < block_size; split_offset += split_size){
-			//printf("cohort_index = %u, split_offset = %"PRIu64"\n", cohort_index, split_offset);
-			buf_p = block_data;	// Starting position of input blocks
-			file_prev = 0xFFFFFFFF;
-
-			// Read all input blocks belong to the cohort on memory
-			for (block_index = cohort_index; block_index < block_count; block_index += cohort_count){
-				// Read each input block from input files.
-				data_size = block_list[block_index].size;
-				part_size = data_size - split_offset;
-				if (part_size > split_size)
-					part_size = split_size;
-
-				if (block_list[block_index].state & 1){	// including full size data
-					slice_index = block_list[block_index].slice;
-					while (slice_index != -1){
-						if (slice_list[slice_index].size == block_size)
-							break;
-						slice_index = slice_list[slice_index].next;
-					}
-					if (slice_index == -1){	// When there is no valid slice.
-						printf("Mapping information for block[%"PRIu64"] is wrong.\n", block_index);
-						if (fp != NULL)
-							fclose(fp);
-						return RET_LOGIC_ERROR;
-					}
-
-					// Read a part of slice from a file.
-					file_index = slice_list[slice_index].file;
-					file_offset = slice_list[slice_index].offset + split_offset;
-					io_size = part_size;
-					if (par3_ctx->noise_level >= 3){
-						printf("Reading %zu bytes of slice[%"PRId64"] for input block[%"PRIu64"]\n", io_size, slice_index, block_index);
-					}
-					if ( (fp == NULL) || (file_index != file_prev) ){
-						if (fp != NULL){	// Close previous input file.
-							fclose(fp);
-							fp = NULL;
-						}
-						fp = fopen(file_list[file_index].name, "rb");
-						if (fp == NULL){
-							perror("Failed to open Input File");
-							return RET_FILE_IO_ERROR;
-						}
-						file_prev = file_index;
-					}
-					if (_fseeki64(fp, file_offset, SEEK_SET) != 0){
-						perror("Failed to seek Input File");
-						fclose(fp);
-						return RET_FILE_IO_ERROR;
-					}
-					if (fread(buf_p, 1, io_size, fp) != io_size){
-						perror("Failed to read slice on Input File");
-						fclose(fp);
-						return RET_FILE_IO_ERROR;
-					}
-
-				} else if (data_size > split_offset){	// tail data only (one tail or packed tails)
-					if (par3_ctx->noise_level >= 3){
-						printf("Reading %"PRIu64" bytes for input block[%"PRIu64"]\n", part_size, block_index);
-					}
-					tail_offset = split_offset;
-					while (tail_offset < split_offset + part_size){	// Read tails until data end.
-						slice_index = block_list[block_index].slice;
-						while (slice_index != -1){
-							//printf("block = %"PRIu64", size = %zu, offset = %zu, slice = %"PRId64"\n", block_index, data_size, tail_offset, slice_index);
-							// Even when chunk tails are overlaped, it will find tail slice of next position.
-							if ( (slice_list[slice_index].tail_offset + slice_list[slice_index].size > tail_offset)
-									&& (slice_list[slice_index].tail_offset <= tail_offset) ){
-								break;
-							}
-							slice_index = slice_list[slice_index].next;
-						}
-						if (slice_index == -1){	// When there is no valid slice.
-							printf("Mapping information for block[%"PRIu64"] is wrong.\n", block_index);
-							if (fp != NULL)
-								fclose(fp);
-							return RET_LOGIC_ERROR;
-						}
-
-						// Read one slice from a file.
-						tail_gap = tail_offset - slice_list[slice_index].tail_offset;	// This tail slice may start before tail_offset.
-						file_index = slice_list[slice_index].file;
-						file_offset = slice_list[slice_index].offset + tail_gap;
-						io_size = slice_list[slice_index].size - tail_gap;
-						if (io_size > part_size)
-							io_size = part_size;
-						//printf("tail_gap for slice[%"PRId64"] = %zu, io_size = %zu\n", slice_index, tail_gap, io_size);
-						if ( (fp == NULL) || (file_index != file_prev) ){
-							if (fp != NULL){	// Close previous input file.
-								fclose(fp);
-								fp = NULL;
-							}
-							fp = fopen(file_list[file_index].name, "rb");
-							if (fp == NULL){
-								perror("Failed to open Input File");
-								return RET_FILE_IO_ERROR;
-							}
-							file_prev = file_index;
-						}
-						if (_fseeki64(fp, file_offset, SEEK_SET) != 0){
-							perror("Failed to seek Input File");
-							fclose(fp);
-							return RET_FILE_IO_ERROR;
-						}
-						if (fread(buf_p + tail_offset - split_offset, 1, io_size, fp) != io_size){
-							perror("Failed to read tail slice on Input File");
-							fclose(fp);
-							return RET_FILE_IO_ERROR;
-						}
-						tail_offset += io_size;
-					}
-
-				} else {	// Zero fill partial input block
-					memset(buf_p, 0, region_size);
-				}
-
-				// Calculate checksum of block to confirm that input file was not changed.
-				if (split_offset == 0){
-					crc = 0;
-				} else {
-					memcpy(&crc, block_list[block_index].hash, 8);	// Use previous CRC value
-				}
-				if (data_size > split_offset){	// When there is slice data to process.
-					memset(buf_p + part_size, 0, region_size - part_size);	// Zero fill rest bytes
-					crc = crc64(buf_p, part_size, crc);
-
-					// Calculate parity bytes in the region
-					if (gf_size == 2){
-						leo_region_create_parity(buf_p, region_size);
-					} else {
-						region_create_parity(buf_p, region_size);
-					}
-				}
-				if (block_list[block_index].state & 64){
-					if (split_offset + split_size >= block_size){	// At the last
-						if (crc != block_list[block_index].crc){
-							printf("Checksum of block[%"PRIu64"] is different.\n", block_index);
-							fclose(fp);
-							return RET_LOGIC_ERROR;
-						}
-					} else {
-						memcpy(block_list[block_index].hash, &crc, 8);	// Save this CRC value
-					}
-				}
-
-				// Print progress percent
-				if ( (par3_ctx->noise_level >= 0) && (par3_ctx->noise_level <= 2) ){
-					progress_step++;
-					time_now = time(NULL);
-					if (time_now != time_old){
-						time_old = time_now;
-						progress_now = (int)((progress_step * 1000) / progress_total);
-						if (progress_now != progress_old){
-							progress_old = progress_now;
-							printf("%d.%d%%\r", progress_now / 10, progress_now % 10);	// 0.0% ~ 100.0%
-						}
-					}
-				}
-
-				buf_p += region_size;	// Goto next partial block
-			}
-			if (fp != NULL){
-				if (fclose(fp) != 0){
-					perror("Failed to close Input File");
-					return RET_FILE_IO_ERROR;
-				}
-				fp = NULL;
-			}
-
-			// When the last input block doesn't exist in this cohort, zero fill it.
-			if (block_index < block_count2 * cohort_count){
-				//printf("zero fill %"PRIu64", block_count2 * cohort_count = %"PRIu64"\n", block_index, block_count2 * cohort_count);
-				memset(buf_p, 0, region_size);
-			}
-
-			// Create all recovery blocks on memory
-			ret = leo_encode(region_size, (uint32_t)block_count2, (uint32_t)max_recovery_block2, work_count, original_data, work_data);
-			if (ret != 0){
-				printf("Failed to call Leopard-RS library (%d)\n", ret);
-				return RET_LOGIC_ERROR;
-			}
-
-			if ( (par3_ctx->noise_level >= 0) && (par3_ctx->noise_level <= 2) ){
-				progress_step += block_count2 * recovery_block_count2;
-				time_old = time(NULL);
-			}
-
-			// Write all recovery blocks on recovery files
-			part_size = block_size - split_offset;
-			if (part_size > split_size)
-				part_size = split_size;
-			io_size = part_size;
-			buf_p = block_data + region_size * block_count2;	// Starting position of recovery blocks
-			for (block_index = cohort_index; block_index < recovery_block_count; block_index += cohort_count){
-				// Check parity of recovery block to confirm that calculation was correct.
-				if (gf_size == 2){
-					ret = leo_region_check_parity(buf_p, region_size);
-				} else {
-					ret = region_check_parity(buf_p, region_size);
-				}
-				if (ret != 0){
-					printf("Parity of recovery block[%"PRIu64"] is different.\n", block_index);
-					if (fp != NULL)
-						fclose(fp);
-					return RET_LOGIC_ERROR;
-				}
-
-				// Position of Recovery Data Packet in recovery file
-				file_name = position_list[block_index].name;
-				file_offset = position_list[block_index].offset + 88 + split_offset;
-
-				// Calculate CRC of packet data to check error later.
-				position_list[block_index].crc = crc64(buf_p, part_size, position_list[block_index].crc);
-
-				// Write partial recovery block
-				if ( (fp == NULL) || (file_name != name_prev) ){
-					if (fp != NULL){	// Close previous recovery file.
-						fclose(fp);
-						fp = NULL;
-					}
-					fp = fopen(file_name, "r+b");	// Over-write on existing file
-					if (fp == NULL){
-						perror("Failed to open Recovery File");
-						return RET_FILE_IO_ERROR;
-					}
-					name_prev = file_name;
-				}
-				if (_fseeki64(fp, file_offset, SEEK_SET) != 0){
-					perror("Failed to seek Recovery File");
-					fclose(fp);
-					return RET_FILE_IO_ERROR;
-				}
-				if (fwrite(buf_p, 1, part_size, fp) != part_size){
-					perror("Failed to write Recovery Block on Recovery File");
-					fclose(fp);
-					return RET_FILE_IO_ERROR;
-				}
-
-				// Print progress percent
-				if ( (par3_ctx->noise_level >= 0) && (par3_ctx->noise_level <= 2) ){
-					progress_step++;
-					time_now = time(NULL);
-					if (time_now != time_old){
-						time_old = time_now;
-						progress_now = (int)((progress_step * 1000) / progress_total);
-						if (progress_now != progress_old){
-							progress_old = progress_now;
-							printf("%d.%d%%\r", progress_now / 10, progress_now % 10);	// 0.0% ~ 100.0%
-						}
-					}
-				}
-
-				buf_p += region_size;
-			}
-		}
-	}
-/*
-{	// for debug
-FILE *fp2;
-buf_p = block_data + region_size * block_count2;	// Starting position of recovery blocks
-
-fp2 = fopen("test.bin", "wb");
-fwrite(buf_p, 1, region_size * recovery_block_count2, fp2);
-fclose(fp2);
-}
-*/
-
-	free(block_data);
-	par3_ctx->block_data = NULL;
-
-	// Allocate memory to read one Recovery Data Packet.
-	alloc_size = 80 + block_size;
-	buf_p = malloc(alloc_size);
-	if (buf_p == NULL){
-		perror("Failed to allocate memory for Recovery Data Packet");
-		return RET_MEMORY_ERROR;
-	}
-	par3_ctx->work_buf = buf_p;
-
-	// Calculate checksum of every Recovery Data Packet
-	io_size = 64 + block_size;	// packet header after checksum and packet body
-	for (block_index = 0; block_index < recovery_block_count; block_index++){
-		// Position of Recovery Data Packet in recovery file
-		file_name = position_list[block_index].name;
-		file_offset = position_list[block_index].offset + 8;	// Offset of checksum
-
-		// Read packet data and write checksum
-		if ( (fp == NULL) || (file_name != name_prev) ){
-			if (fp != NULL){	// Close previous recovery file.
-				fclose(fp);
-				fp = NULL;
-			}
-			fp = fopen(file_name, "r+b");	// Over-write on existing file
-			if (fp == NULL){
-				perror("Failed to open Recovery File");
-				return RET_FILE_IO_ERROR;
-			}
-			name_prev = file_name;
-		}
-		if (_fseeki64(fp, file_offset + 16, SEEK_SET) != 0){
-			perror("Failed to seek Recovery File");
-			fclose(fp);
-			return RET_FILE_IO_ERROR;
-		}
-		if (fread(buf_p + 16, 1, io_size, fp) != io_size){
-			perror("Failed to read Recovery Data Packet");
-			fclose(fp);
-			return RET_FILE_IO_ERROR;
-		}
-
-		// Compare CRC of written packet data to confirm integrity.
-		crc = crc64(buf_p + 16, io_size, 0);
-		if (crc != position_list[block_index].crc){
-			printf("Packet data of recovery block[%"PRIu64"] is different.\n", block_index);
-			fclose(fp);
-			return RET_LOGIC_ERROR;
-		}
-
-		// Calculate checksum of this packet
-		blake3(buf_p + 16, io_size, buf_p);
-
-		// Write checksum
-		if (_fseeki64(fp, file_offset, SEEK_SET) != 0){
-			perror("Failed to seek Recovery File");
-			fclose(fp);
-			return RET_FILE_IO_ERROR;
-		}
-		if (fwrite(buf_p, 1, 16, fp) != 16){
-			perror("Failed to write checksum of Recovery Data Packet");
-			fclose(fp);
-			return RET_FILE_IO_ERROR;
-		}
-	}
-	if (fclose(fp) != 0){
-		perror("Failed to close Recovery File");
-		return RET_FILE_IO_ERROR;
-	}
-
-	if (par3_ctx->noise_level >= 0){
-		if (par3_ctx->noise_level <= 2){
-			if (progress_step < progress_total)
-				printf("Didn't finish progress. %"PRIu64" / %"PRIu64"\n", progress_step, progress_total);
-		}
-		clock_now = clock() - clock_now;
-		printf("done in %.1f seconds.\n", (double)clock_now / CLOCKS_PER_SEC);
-		printf("\n");
-	}
-
-	// Release some allocated memory
-	free(buf_p);
-	par3_ctx->work_buf = NULL;
-	free(position_list);
-	par3_ctx->position_list = NULL;
-	if (par3_ctx->matrix){
-		free(par3_ctx->matrix);
-		par3_ctx->matrix = NULL;
-	}
-
-	return 0;
-}
-
diff --git a/windows/src/block_map.c b/windows/src/block_map.c
deleted file mode 100644
index 0042621..0000000
--- a/windows/src/block_map.c
+++ /dev/null
@@ -1,501 +0,0 @@
-#include <errno.h>
-#include <inttypes.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#ifdef __linux__
-
-#include <limits.h>
-
-#elif _WIN32
-#endif
-
-#include "libpar3.h"
-#include "common.h"
-
-
-// Count how many number of input file slices, and allocate memory for them.
-int count_slice_info(PAR3_CTX *par3_ctx)
-{
-	uint32_t chunk_count;
-	uint64_t block_size, chunk_size, tail_size;
-	uint64_t block_count, slice_count;
-	PAR3_CHUNK_CTX *chunk_p;
-	PAR3_BLOCK_CTX *block_p;
-	PAR3_SLICE_CTX *slice_p;
-
-	// Copy variables from context to local.
-	block_size = par3_ctx->block_size;
-	block_count = par3_ctx->block_count;
-	chunk_count = par3_ctx->chunk_count;
-	chunk_p = par3_ctx->chunk_list;
-	if ( (chunk_count == 0) || (block_size == 0) || (block_count == 0) )
-		return RET_LOGIC_ERROR;
-
-	slice_count = 0;
-	while (chunk_count > 0){
-		chunk_size = chunk_p->size;
-		if (chunk_size != 0){
-			if (chunk_size >= block_size){
-				slice_count += chunk_size / block_size;
-				tail_size = chunk_size % block_size;
-			} else {
-				tail_size = chunk_size;
-			}
-			if (tail_size >= 40)
-				slice_count++;
-		}
-
-		chunk_p++;
-		chunk_count--;
-	}
-
-	par3_ctx->slice_count = slice_count;
-	if (par3_ctx->noise_level >= 2){
-		printf("Number of input file slice = %"PRIu64"\n", slice_count);
-	}
-
-	// Allocate memory for block and slice info.
-	if (par3_ctx->slice_list != NULL)
-		free(par3_ctx->slice_list);
-	par3_ctx->slice_list = malloc(sizeof(PAR3_SLICE_CTX) * slice_count);
-	if (par3_ctx->slice_list == NULL){
-		perror("Failed to allocate memory for input file slices");
-		return RET_MEMORY_ERROR;
-	}
-	if (par3_ctx->block_list != NULL)
-		free(par3_ctx->block_list);
-	par3_ctx->block_list = malloc(sizeof(PAR3_BLOCK_CTX) * block_count);
-	if (par3_ctx->block_list == NULL){
-		perror("Failed to allocate memory for input blocks");
-		return RET_MEMORY_ERROR;
-	}
-
-	// Initialize slice info.
-	slice_p = par3_ctx->slice_list;
-	while (slice_count > 0){
-		slice_p->next = -1;
-		slice_p->find_name = NULL;
-		slice_p->find_offset = 0;
-
-		slice_p++;
-		slice_count--;
-	}
-
-	// Initialize block info.
-	block_p = par3_ctx->block_list;
-	while (block_count > 0){
-		block_p->slice = -1;
-		block_p->size = 0;
-		block_p->crc = 0;
-		memset(block_p->hash, 0, 16);
-		block_p->state = 0;
-
-		block_p++;
-		block_count--;
-	}
-
-	return 0;
-}
-
-int set_slice_info(PAR3_CTX *par3_ctx)
-{
-	uint32_t num, num_pack, input_file_count;
-	uint32_t chunk_count, chunk_index, chunk_num;
-	int64_t index;
-	uint64_t block_size, chunk_size;
-	uint64_t block_count, file_offset, tail_offset;
-	uint64_t slice_count, slice_index, block_index;
-	uint64_t num_dedup;
-	PAR3_FILE_CTX *file_p;
-	PAR3_CHUNK_CTX *chunk_p;
-	PAR3_SLICE_CTX *slice_p, *slice_list;
-	PAR3_BLOCK_CTX *block_list;
-
-	// Copy variables from context to local.
-	input_file_count = par3_ctx->input_file_count;
-	block_size = par3_ctx->block_size;
-	block_count = par3_ctx->block_count;
-	chunk_count = par3_ctx->chunk_count;
-	slice_count = par3_ctx->slice_count;
-	block_list = par3_ctx->block_list;
-	slice_list = par3_ctx->slice_list;
-	slice_p = slice_list;
-
-	num_dedup = 0;
-	num_pack = 0;
-	slice_index = 0;
-	file_p = par3_ctx->input_file_list;
-	for (num = 0; num < input_file_count; num++){
-		if (file_p->size == 0){	// Skip empty files.
-			file_p++;
-			continue;
-		}
-
-		file_offset = 0;
-		chunk_index = file_p->chunk;	// index of the first chunk
-		chunk_num = file_p->chunk_num;	// number of chunk descriptions
-		file_p->slice = slice_index;	// index of the first slice
-		if (par3_ctx->noise_level >= 3){
-			printf("chunk = %u + %u, slice = %"PRIu64", file size = %"PRIu64" \"%s\"\n",
-					chunk_index, chunk_num, slice_index, file_p->size, file_p->name);
-		}
-
-		while (chunk_num > 0){	// check all chunk descriptions
-			if (chunk_index >= chunk_count){
-				printf("There are too many chunk descriptions. %u\n", chunk_index);
-				return RET_LOGIC_ERROR;
-			}
-			chunk_p = par3_ctx->chunk_list + chunk_index;
-			chunk_size = chunk_p->size;
-			if (chunk_size == 0){	// Unprotected Chunk Description
-				file_offset += chunk_p->block;
-				if (par3_ctx->noise_level >= 3){
-					printf("unprotected chunk size = %"PRIu64"\n", chunk_p->block);
-				}
-
-			} else {	// Protected Chunk Description
-				block_index = chunk_p->block;	// index of first input block holding chunk
-				if (par3_ctx->noise_level >= 3){
-					printf("chunk size = %"PRIu64", first block = %"PRIu64"\n", chunk_size, block_index);
-				}
-
-				while (chunk_size >= block_size){
-					if (slice_index >= slice_count){
-						printf("There are too many input file slices. %"PRIu64"\n", slice_index);
-						return RET_LOGIC_ERROR;
-					}
-					if (block_index >= block_count){
-						printf("There are too many input blocks. %"PRIu64"\n", block_index);
-						return RET_LOGIC_ERROR;
-					}
-					index = block_list[block_index].slice;
-					if (index != -1){
-						// If slice info was set elready, it's a same block.
-						while (slice_list[index].next != -1){
-							index = slice_list[index].next;
-						}
-						slice_list[index].next = slice_index;
-						num_dedup++;
-						if (par3_ctx->noise_level >= 3){
-							printf("old block[%2"PRIu64"] : slice[%2"PRIu64"] chunk[%2u] file %d, offset %"PRIu64"\n",
-									block_index, slice_index, chunk_index, num, file_offset);
-						}
-
-					} else {
-						block_list[block_index].slice = slice_index;
-						block_list[block_index].size = block_size;
-						block_list[block_index].state |= 1;
-						if (par3_ctx->noise_level >= 3){
-							printf("new block[%2"PRIu64"] : slice[%2"PRIu64"] chunk[%2u] file %d, offset %"PRIu64"\n",
-									block_index, slice_index, chunk_index, num, file_offset);
-						}
-					}
-
-					// set slice info
-					slice_p->chunk = chunk_index;
-					slice_p->file = num;
-					slice_p->offset = file_offset;
-					slice_p->size = block_size;
-					slice_p->block = block_index;
-					slice_p->tail_offset = 0;
-					slice_p++;
-					slice_index++;
-
-					block_index++;
-					file_offset += block_size;
-					chunk_size -= block_size;
-				}
-				if (chunk_size >= 40){	// Chunk tail size is equal or larger than 40 bytes.
-					if (slice_index >= slice_count){
-						printf("There are too many input file slices. %"PRIu64"\n", slice_index);
-						return RET_LOGIC_ERROR;
-					}
-					block_index = chunk_p->tail_block;	// index of block holding tail
-					if (block_index >= block_count){
-						printf("There are too many input blocks. %"PRIu64"\n", block_index);
-						return RET_LOGIC_ERROR;
-					}
-					tail_offset = chunk_p->tail_offset;
-					if (tail_offset + chunk_size > block_size){
-						printf("Chunk tail exceeds block size. %"PRIu64" + %"PRIu64"\n", tail_offset, chunk_size);
-						return RET_LOGIC_ERROR;
-					}
-					//printf("tail size = %"PRIu64", belong block = %"PRIu64", offset = %"PRIu64"\n", chunk_size, block_index, tail_offset);
-
-					index = block_list[block_index].slice;
-					if (index != -1){
-						// Search slice info to find same tail.
-						do {
-							//printf("slice[%2"PRIu64"].size = %"PRIu64", tail_offset = %"PRIu64"\n", index, slice_list[index].size, slice_list[index].tail_offset);
-							if ( (slice_list[index].size == chunk_size) && (slice_list[index].tail_offset == tail_offset) ){
-								break;
-							}
-							index = slice_list[index].next;
-						} while (index != -1);
-
-						if (index != -1){
-							num_dedup++;
-							if (par3_ctx->noise_level >= 3){
-								printf("o t block[%2"PRIu64"] : slice[%2"PRIu64"] chunk[%2u] file %d, offset %"PRIu64", tail size %"PRIu64", offset %"PRIu64"\n",
-										block_index, slice_index, chunk_index, num, file_offset, chunk_size, tail_offset);
-							}
-						} else {
-							num_pack++;
-							if (block_list[block_index].size < tail_offset + chunk_size)
-								block_list[block_index].size = tail_offset + chunk_size;
-							if (par3_ctx->noise_level >= 3){
-								printf("a t block[%2"PRIu64"] : slice[%2"PRIu64"] chunk[%2u] file %d, offset %"PRIu64", tail size %"PRIu64", offset %"PRIu64"\n",
-										block_index, slice_index, chunk_index, num, file_offset, chunk_size, tail_offset);
-							}
-						}
-
-						// If slice info was set elready, it may be a chunk tail in the same block.
-						index = block_list[block_index].slice;
-						while (slice_list[index].next != -1){
-							index = slice_list[index].next;
-						}
-						slice_list[index].next = slice_index;
-						//printf("slice[%2"PRIu64"].next = %"PRIu64"\n", index, slice_index);
-
-					} else {
-						block_list[block_index].slice = slice_index;
-						block_list[block_index].size = tail_offset + chunk_size;
-						block_list[block_index].state |= 2;
-						if (par3_ctx->noise_level >= 3){
-							printf("n t block[%2"PRIu64"] : slice[%2"PRIu64"] chunk[%2u] file %d, offset %"PRIu64", tail size %"PRIu64", offset %"PRIu64"\n",
-									block_index, slice_index, chunk_index, num, file_offset, chunk_size, tail_offset);
-						}
-					}
-
-					// set slice info
-					slice_p->chunk = chunk_index;
-					slice_p->file = num;
-					slice_p->offset = file_offset;
-					slice_p->size = chunk_size;
-					slice_p->block = block_index;
-					slice_p->tail_offset = tail_offset;
-					slice_p++;
-					slice_index++;
-
-				} else if (chunk_size > 0){	// Chunk tail size = 1~39 bytes.
-					if (par3_ctx->noise_level >= 3){
-						printf("    block no  : slice no  chunk[%2u] file %d, offset %"PRIu64", tail size %"PRIu64"\n",
-								chunk_index, num, file_offset, chunk_size);
-					}
-
-				}
-				file_offset += chunk_size;	// tail size
-			}
-
-			chunk_index++;	// goto next chunk
-			chunk_num--;
-		}
-
-		file_p++;
-	}
-
-	// Check every block has own slice.
-	for (block_index = 0; block_index < block_count; block_index++){
-		if (block_list[block_index].slice == -1){
-			printf("There is no slice for input block[%"PRIu64"].\n", block_index);
-			return RET_INSUFFICIENT_DATA;
-		}
-	}
-
-	// Check actual number of slices.
-	if (slice_index != slice_count){
-		printf("Number of input file slice = %"PRIu64" (max %"PRIu64")\n", slice_index, slice_count);
-		return RET_LOGIC_ERROR;
-	}
-	if (par3_ctx->noise_level >= 0){
-		printf("Tail packing = %u, Deduplication = %"PRIu64"\n", num_pack, num_dedup);
-	}
-
-	return 0;
-}
-
-// Calculate creating amount of recovery blocks from given redundancy
-int calculate_recovery_count(PAR3_CTX *par3_ctx)
-{
-	uint64_t total_count;
-
-	if (par3_ctx->block_count == 0){
-		par3_ctx->ecc_method = 0;
-		par3_ctx->redundancy_size = 0;
-		par3_ctx->recovery_block_count = 0;
-		par3_ctx->max_recovery_block = 0;
-		par3_ctx->interleave = 0;
-		return 0;	// There is no input block.
-	} else if (par3_ctx->ecc_method == 0){
-		// When using algothim was not specified.
-		par3_ctx->ecc_method = 1;	// At this time, select Cauchy Reed-Solomon Codes by default.
-	}
-
-	if ( (par3_ctx->recovery_block_count == 0) && (par3_ctx->redundancy_size == 0) )
-		return 0;	// Not specified
-
-	// When number of recovery blocks was not specified, set by redundancy.
-	if ( (par3_ctx->recovery_block_count == 0) && (par3_ctx->redundancy_size > 0) ){
-		// If redundancy_size is in range (0 ~ 250), it's a percent rate value.
-		if (par3_ctx->redundancy_size <= 250){
-			// When there is remainder at division, round up the quotient.
-			par3_ctx->recovery_block_count = (par3_ctx->block_count * par3_ctx->redundancy_size + 99) / 100;
-		}
-	}
-	if ( (par3_ctx->max_recovery_block == 0) && (par3_ctx->max_redundancy_size > 0) ){
-		if (par3_ctx->max_redundancy_size <= 250){
-			par3_ctx->max_recovery_block = (par3_ctx->block_count * par3_ctx->max_redundancy_size + 99) / 100;
-		}
-	}
-
-	// Test number of blocks
-	if (par3_ctx->ecc_method & 1){	// Cauchy Reed-Solomon Codes
-		if (par3_ctx->noise_level >= 0){
-			printf("Cauchy Reed-Solomon Codes\n");
-		}
-
-		// When max recovery block count is set, it must be equal or larger than creating recovery blocks.
-		if ((par3_ctx->max_recovery_block > 0) && (par3_ctx->max_recovery_block < par3_ctx->recovery_block_count))
-			par3_ctx->max_recovery_block = par3_ctx->recovery_block_count;
-
-		// Check total number of blocks
-		total_count = par3_ctx->block_count + par3_ctx->first_recovery_block + par3_ctx->recovery_block_count;
-		if (total_count < par3_ctx->block_count + par3_ctx->max_recovery_block)
-			total_count = par3_ctx->block_count + par3_ctx->max_recovery_block;
-		if (total_count > 65536){
-			printf("Total block count %"PRIu64" are too many.\n", total_count);
-			return RET_LOGIC_ERROR;
-		}
-
-		if (par3_ctx->noise_level >= 0){
-			printf("Recovery block count = %"PRIu64"\n", par3_ctx->recovery_block_count);
-			if (par3_ctx->max_recovery_block > 0){
-				printf("Max recovery block count = %"PRIu64"\n", par3_ctx->max_recovery_block);
-			}
-			printf("\n");
-		}
-
-	} else if (par3_ctx->ecc_method & 8){	// FFT based Reed-Solomon Codes
-		uint64_t cohort_count, i;
-
-		if (par3_ctx->noise_level >= 0){
-			printf("FFT based Reed-Solomon Codes\n");
-		}
-
-		// Caution ! Too many interleaving is bad for recovery.
-		// Number of cohort must be equal or less than number of input blocks.
-		cohort_count = par3_ctx->interleave + 1; // Minimum value is 1.
-		if (cohort_count > par3_ctx->block_count){
-			cohort_count = par3_ctx->block_count;
-			printf("Number of cohort is decreased to %"PRIu64".\n", cohort_count);
-		}
-
-		// When there are too many block, it uses interleaving automatically.
-		if (cohort_count == 1){
-			// Check total number of blocks
-			total_count = par3_ctx->block_count + par3_ctx->first_recovery_block + par3_ctx->recovery_block_count;
-			if (total_count < par3_ctx->block_count + par3_ctx->max_recovery_block)
-				total_count = par3_ctx->block_count + par3_ctx->max_recovery_block;
-			if (total_count > 65536){
-				cohort_count = (total_count + 65536 - 1) / 65536;
-				printf("Number of cohort is increased to %"PRIu64".\n", cohort_count);
-			}
-			total_count = par3_ctx->first_recovery_block + par3_ctx->recovery_block_count;
-			if (total_count < par3_ctx->max_recovery_block)
-				total_count = par3_ctx->max_recovery_block;
-			if (total_count > 32768 * cohort_count){
-				cohort_count = (total_count + 32768 - 1) / 32768;
-				printf("Number of cohort is increased to %"PRIu64".\n", cohort_count);
-			}
-		}
-
-		if (cohort_count > UINT_MAX){
-			printf("There are too many cohorts %"PRIu64".\n", cohort_count);
-			return RET_LOGIC_ERROR;
-		}
-		if (cohort_count > 1){
-			par3_ctx->interleave = (uint32_t)(cohort_count - 1);
-			if (par3_ctx->noise_level >= 0){
-				printf("Number of cohort = %"PRIu64" (Interleaving time = %u)\n", cohort_count, par3_ctx->interleave);
-				i = (par3_ctx->block_count + cohort_count - 1) / cohort_count;	// round up
-				printf("Input block count = %"PRIu64" (%"PRIu64" per cohort)\n", par3_ctx->block_count, i);
-			}
-		}
-
-		// Number of recovery block will be multiple of number of cohorts.
-		i = par3_ctx->recovery_block_count % cohort_count;
-		if (i > 0){
-			if (par3_ctx->noise_level >= 1){
-				printf("Recovery block count is increased from %"PRIu64" to %"PRIu64"\n", par3_ctx->recovery_block_count, par3_ctx->recovery_block_count + cohort_count - i);
-			}
-			par3_ctx->recovery_block_count += cohort_count - i;	// add to the remainder
-		}
-		if (par3_ctx->max_recovery_block > 0){
-			// When max recovery block count is set, it must be equal or larger than creating recovery blocks.
-			if (par3_ctx->max_recovery_block < par3_ctx->recovery_block_count)
-				par3_ctx->max_recovery_block = par3_ctx->recovery_block_count;
-			i = par3_ctx->max_recovery_block % cohort_count;
-			if (i > 0)
-				par3_ctx->max_recovery_block += cohort_count - i;	// add to the remainder
-		}
-		// First recovery block will be lower.
-		i = par3_ctx->first_recovery_block % cohort_count;
-		if (i > 0){
-			if (par3_ctx->noise_level >= 1){
-				printf("First recovery block is decreased from %"PRIu64" to %"PRIu64"\n", par3_ctx->first_recovery_block, par3_ctx->first_recovery_block - i);
-			}
-			par3_ctx->first_recovery_block -= i;	// erase the remainder
-		}
-
-		// Check total number of blocks
-		total_count = par3_ctx->block_count + par3_ctx->first_recovery_block + par3_ctx->recovery_block_count;
-		if (total_count < par3_ctx->block_count + par3_ctx->max_recovery_block)
-			total_count = par3_ctx->block_count + par3_ctx->max_recovery_block;
-		if (total_count > 65536 * cohort_count){
-			if (cohort_count == 1){
-				printf("Total block count %"PRIu64" are too many.\n", total_count);
-			} else {
-				i = (total_count + cohort_count - 1) / cohort_count;	// round up
-				printf("Total block count %"PRIu64" (%"PRIu64" per cohort) are too many.\n", total_count, i);
-			}
-			return RET_LOGIC_ERROR;
-		}
-		// Leopard-RS library has a restriction; recovery_count <= 32768
-		// Though it's possible to solve this problem, I don't try at this time.
-		total_count = par3_ctx->first_recovery_block + par3_ctx->recovery_block_count;
-		if (total_count < par3_ctx->max_recovery_block)
-			total_count = par3_ctx->max_recovery_block;
-		if (total_count > 32768 * cohort_count){
-			if (cohort_count == 1){
-				printf("Recovery block count %"PRIu64" are too many.\n", total_count);
-			} else {
-				printf("Recovery block count %"PRIu64" (%"PRIu64" per cohort) are too many.\n", total_count, total_count / cohort_count);
-			}
-			return RET_LOGIC_ERROR;
-		}
-
-		if (par3_ctx->noise_level >= 0){
-			if (cohort_count == 1){
-				printf("Recovery block count = %"PRIu64"\n", par3_ctx->recovery_block_count);
-			} else {
-				printf("Recovery block count = %"PRIu64" (%"PRIu64" per cohort)\n", par3_ctx->recovery_block_count, par3_ctx->recovery_block_count / cohort_count);
-			}
-			if (par3_ctx->max_recovery_block > 0){
-				if (cohort_count == 1){
-					printf("Max recovery block count = %"PRIu64"\n", par3_ctx->max_recovery_block);
-				} else {
-					printf("Max recovery block count = %"PRIu64" (%"PRIu64" per cohort)\n", par3_ctx->max_recovery_block, par3_ctx->max_recovery_block / cohort_count);
-				}
-			}
-			printf("\n");
-		}
-
-	} else {
-		printf("The specified Error Correction Codes (%u) isn't implemented yet.\n", par3_ctx->ecc_method);
-		return RET_LOGIC_ERROR;
-	}
-
-	return 0;
-}
diff --git a/windows/src/block_recover.c b/windows/src/block_recover.c
deleted file mode 100644
index 1ba0fd4..0000000
--- a/windows/src/block_recover.c
+++ /dev/null
@@ -1,2195 +0,0 @@
-/* Redefinition of _FILE_OFFSET_BITS must happen BEFORE including stdio.h */
-#ifdef __linux__
-#define _FILE_OFFSET_BITS 64
-#define _fseeki64 fseeko
-#define _fileno fileno
-#define _chsize_s ftruncate
-#elif _WIN32
-// avoid error of MSVC
-#define _CRT_SECURE_NO_WARNINGS
-#endif
-
-#include <errno.h>
-#include <inttypes.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <time.h>
-
-#ifdef __linux__
-
-#include <unistd.h>
-
-#elif _WIN32
-
-// MSVC headers
-#include <io.h>
-
-#endif
-
-#include "libpar3.h"
-#include "galois.h"
-#include "hash.h"
-#include "reedsolomon.h"
-#include "leopard/leopard.h"
-
-
-/*
-This keeps all lost input blocks on memory.
-
-read every input blocks except lost blocks
- per each input block
-  restore lost input slices
-  recover (multiple & add to) lost input blocks
-
-read every using recovery blocks
- per each recovery block
-  recover (multiple & add to) lost input blocks
-
-restore lost input blocks
-
-write chunk tails
-*/
-int recover_lost_block(PAR3_CTX *par3_ctx, char *temp_path, int lost_count)
-{
-	void *gf_table, *matrix;
-	char *name_prev, *file_name;
-	uint8_t *work_buf, buf_tail[40];
-	uint8_t *block_data;
-	uint8_t gf_size;
-	int galois_poly, *lost_id, *recv_id;
-	int block_count, block_index;
-	int lost_index, ret;
-	int progress_old, progress_now, progress_step;
-	uint32_t file_count, file_index, file_prev;
-	uint32_t chunk_index, chunk_num;
-	size_t slice_size;
-	int64_t slice_index, file_offset;
-	uint64_t block_size, region_size, data_size;
-	uint64_t tail_offset, tail_gap;
-	uint64_t packet_count, packet_index;
-	uint64_t file_size, chunk_size;
-	PAR3_BLOCK_CTX *block_list;
-	PAR3_SLICE_CTX *slice_list;
-	PAR3_CHUNK_CTX *chunk_list;
-	PAR3_FILE_CTX *file_list;
-	PAR3_PKT_CTX *packet_list;
-	FILE *fp_read, *fp_write;
-	time_t time_old, time_now;
-	clock_t clock_now;
-
-	//printf("\n ecc_method = 0x%x, lost_count = %d\n", par3_ctx->ecc_method, lost_count);
-
-	file_count = par3_ctx->input_file_count;
-	block_size = par3_ctx->block_size;
-	block_count = (int)(par3_ctx->block_count);
-	gf_size = par3_ctx->gf_size;
-	galois_poly = par3_ctx->galois_poly;
-	gf_table = par3_ctx->galois_table;
-	matrix = par3_ctx->matrix;
-	recv_id = par3_ctx->recv_id_list;
-	lost_id = recv_id + lost_count;
-	block_data = par3_ctx->block_data;
-	block_list = par3_ctx->block_list;
-	slice_list = par3_ctx->slice_list;
-	chunk_list = par3_ctx->chunk_list;
-	file_list = par3_ctx->input_file_list;
-	packet_list = par3_ctx->recv_packet_list;
-	packet_count = par3_ctx->recv_packet_count;
-
-	region_size = (block_size + 4 + 3) & ~3;
-
-	// Zero fill lost blocks
-	memset(block_data, 0, region_size * lost_count);
-
-	// Allocate memory to read one input block
-	work_buf = malloc(region_size);
-	if (work_buf == NULL){
-		perror("Failed to allocate memory for input data");
-		return RET_MEMORY_ERROR;
-	}
-	par3_ctx->work_buf = work_buf;
-
-	// Base name of temporary file
-	sprintf(temp_path, "par3_%02X%02X%02X%02X%02X%02X%02X%02X_",
-			par3_ctx->set_id[0], par3_ctx->set_id[1], par3_ctx->set_id[2], par3_ctx->set_id[3],
-			par3_ctx->set_id[4], par3_ctx->set_id[5], par3_ctx->set_id[6], par3_ctx->set_id[7]);
-
-	if (par3_ctx->noise_level >= 0){
-		printf("\nRecovering lost input blocks:\n");
-		progress_step = 0;
-		progress_old = 0;
-		time_old = time(NULL);
-		clock_now = clock();
-	}
-
-	// Store available input blocks on memory
-	name_prev = NULL;
-	file_prev = 0xFFFFFFFF;
-	fp_read = NULL;
-	fp_write = NULL;
-	for (block_index = 0; block_index < block_count; block_index++){
-		data_size = block_list[block_index].size;
-
-		// Read block data from found file.
-		if (block_list[block_index].state & 4){	// Full size data is available.
-			slice_index = block_list[block_index].slice;
-			while (slice_index != -1){
-				if (slice_list[slice_index].size == block_size)
-					break;
-				slice_index = slice_list[slice_index].next;
-			}
-			if (slice_index == -1){	// When there is no valid slice.
-				printf("Mapping information for block[%d] is wrong.\n", block_index);
-				if (fp_read != NULL)
-					fclose(fp_read);
-				if (fp_write != NULL)
-					fclose(fp_write);
-				return RET_LOGIC_ERROR;
-			}
-
-			// Read one slice from a found file.
-			file_name = slice_list[slice_index].find_name;
-			file_offset = slice_list[slice_index].find_offset;
-			slice_size = slice_list[slice_index].size;
-			if (par3_ctx->noise_level >= 3){
-				printf("Reading %zu bytes of slice[%"PRId64"] for input block[%d]\n", slice_size, slice_index, block_index);
-			}
-			if ( (fp_read == NULL) || (file_name != name_prev) ){
-				if (fp_read != NULL){	// Close previous input file.
-					fclose(fp_read);
-					fp_read = NULL;
-				}
-				fp_read = fopen(file_name, "rb");
-				if (fp_read == NULL){
-					perror("Failed to open Input File");
-					if (fp_write != NULL)
-						fclose(fp_write);
-					return RET_FILE_IO_ERROR;
-				}
-				name_prev = file_name;
-			}
-			if (_fseeki64(fp_read, file_offset, SEEK_SET) != 0){
-				perror("Failed to seek Input File");
-				fclose(fp_read);
-				if (fp_write != NULL)
-					fclose(fp_write);
-				return RET_FILE_IO_ERROR;
-			}
-			if (fread(work_buf, 1, slice_size, fp_read) != slice_size){
-				perror("Failed to read full slice on Input File");
-				fclose(fp_read);
-				if (fp_write != NULL)
-					fclose(fp_write);
-				return RET_FILE_IO_ERROR;
-			}
-
-		} else if (block_list[block_index].state & 16){	// All tail data is available. (one tail or packed tails)
-			if (par3_ctx->noise_level >= 3){
-				printf("Reading %"PRIu64" bytes for input block[%d]\n", data_size, block_index);
-			}
-			tail_offset = 0;
-			while (tail_offset < data_size){	// Read tails until data end.
-				slice_index = block_list[block_index].slice;
-				while (slice_index != -1){
-					//printf("block = %d, size = %"PRIu64", offset = %"PRIu64", slice = %"PRId64"\n", block_index, data_size, tail_offset, slice_index);
-					// Even when chunk tails are overlaped, it will find tail slice of next position.
-					if ( (slice_list[slice_index].tail_offset + slice_list[slice_index].size > tail_offset)
-							&& (slice_list[slice_index].tail_offset <= tail_offset) ){
-						break;
-					}
-					slice_index = slice_list[slice_index].next;
-				}
-				if (slice_index == -1){	// When there is no valid slice.
-					printf("Mapping information for block[%d] is wrong.\n", block_index);
-					if (fp_read != NULL)
-						fclose(fp_read);
-					if (fp_write != NULL)
-						fclose(fp_write);
-					return RET_LOGIC_ERROR;
-				}
-
-				// Read one slice from a file.
-				tail_gap = tail_offset - slice_list[slice_index].tail_offset;	// This tail slice may start before tail_offset.
-				file_name = slice_list[slice_index].find_name;
-				file_offset = slice_list[slice_index].find_offset + tail_gap;
-				slice_size = slice_list[slice_index].size - tail_gap;
-				if ( (fp_read == NULL) || (file_name != name_prev) ){
-					if (fp_read != NULL){	// Close previous input file.
-						fclose(fp_read);
-						fp_read = NULL;
-					}
-					fp_read = fopen(file_name, "rb");
-					if (fp_read == NULL){
-						perror("Failed to open Input File");
-						if (fp_write != NULL)
-							fclose(fp_write);
-						return RET_FILE_IO_ERROR;
-					}
-					name_prev = file_name;
-				}
-				if (_fseeki64(fp_read, file_offset, SEEK_SET) != 0){
-					perror("Failed to seek Input File");
-					fclose(fp_read);
-					if (fp_write != NULL)
-						fclose(fp_write);
-					return RET_FILE_IO_ERROR;
-				}
-				if (fread(work_buf + tail_offset, 1, slice_size, fp_read) != slice_size){
-					perror("Failed to read tail slice on Input File");
-					fclose(fp_read);
-					if (fp_write != NULL)
-						fclose(fp_write);
-					return RET_FILE_IO_ERROR;
-				}
-				tail_offset += slice_size;
-			}
-
-		} else {	// The input block was lost.
-			data_size = 0;	// Mark of lost block
-		}
-
-		if (data_size > 0){	// The block data is available.
-			// Zero fill rest bytes
-			memset(work_buf + data_size, 0, region_size - data_size);
-
-			// Restore lost input slices
-			slice_index = block_list[block_index].slice;
-			while (slice_index != -1){
-				file_index = slice_list[slice_index].file;
-				// If belong file is missing or damaged.
-				if ( ((file_list[file_index].state & 3) != 0) && ((file_list[file_index].state & 4) == 0) ){
-					// Write one lost slice on temporary file.
-					slice_size = slice_list[slice_index].size;
-					file_offset = slice_list[slice_index].offset;
-					tail_offset = slice_list[slice_index].tail_offset;
-					if (par3_ctx->noise_level >= 3){
-						printf("Writing %zu bytes of slice[%"PRId64"] on file[%u]:%"PRId64" in input block[%d]\n", slice_size, slice_index, file_index, file_offset, block_index);
-					}
-					if ( (fp_write == NULL) || (file_index != file_prev) ){
-						if (fp_write != NULL){	// Close previous temporary file.
-							fclose(fp_write);
-							fp_write = NULL;
-						}
-						sprintf(temp_path + 22, "%u.tmp", file_index);
-						fp_write = fopen(temp_path, "r+b");
-						if (fp_write == NULL){
-							perror("Failed to open temporary file");
-							if (fp_read != NULL)
-								fclose(fp_read);
-							return RET_FILE_IO_ERROR;
-						}
-						file_prev = file_index;
-					}
-					if (_fseeki64(fp_write, file_offset, SEEK_SET) != 0){
-						perror("Failed to seek temporary file");
-						if (fp_read != NULL)
-							fclose(fp_read);
-						fclose(fp_write);
-						return RET_FILE_IO_ERROR;
-					}
-					if (fwrite(work_buf + tail_offset, 1, slice_size, fp_write) != slice_size){
-						perror("Failed to write slice on temporary file");
-						if (fp_read != NULL)
-							fclose(fp_read);
-						fclose(fp_write);
-						return RET_FILE_IO_ERROR;
-					}
-				}
-
-				// Goto next slice
-				slice_index = slice_list[slice_index].next;
-			}
-
-			// Calculate parity bytes in the region
-			if (gf_size == 2){
-				gf16_region_create_parity(galois_poly, work_buf, region_size);
-			} else if (gf_size == 1){
-				gf8_region_create_parity(galois_poly, work_buf, region_size);
-			} else {
-				region_create_parity(work_buf, region_size);
-			}
-
-			// Recover (multiple & add to) lost input blocks
-			rs_recover_one_all(par3_ctx, block_index, lost_count);
-
-			// Print progress percent
-			if ( (par3_ctx->noise_level >= 0) && (par3_ctx->noise_level <= 2) ){
-				progress_step++;
-				time_now = time(NULL);
-				if (time_now != time_old){
-					time_old = time_now;
-					// Complexity is "block_count * lost_count * block_size".
-					// Because block_count is 16-bit value, "int" (32-bit signed integer) is enough.
-					progress_now = (progress_step * 1000) / block_count;
-					if (progress_now != progress_old){
-						progress_old = progress_now;
-						printf("%d.%d%%\r", progress_now / 10, progress_now % 10);	// 0.0% ~ 100.0%
-					}
-				}
-			}
-		}
-	}
-
-	// Read using recovery blocks
-	for (lost_index = 0; lost_index < lost_count; lost_index++){
-		block_index = recv_id[lost_index];
-
-		// Search packet for the recovery block
-		for (packet_index = 0; packet_index < packet_count; packet_index++){
-			if (packet_list[packet_index].index == block_index)
-				break;
-		}
-		if (packet_index >= packet_count){
-			printf("Packet information for block[%d] is wrong.\n", block_index);
-			if (fp_read != NULL)
-				fclose(fp_read);
-			if (fp_write != NULL)
-				fclose(fp_write);
-			return RET_LOGIC_ERROR;
-		}
-
-		// Read one Recovery Data Packet from a recovery file.
-		slice_size = block_size;
-		file_name = packet_list[packet_index].name;
-		file_offset = packet_list[packet_index].offset + 48 + 40;	// offset of the recovery block data
-		if (par3_ctx->noise_level >= 3){
-			printf("Reading Recovery Data[%"PRIu64"] for recovery block[%d]\n", packet_index, block_index);
-		}
-		if ( (fp_read == NULL) || (file_name != name_prev) ){
-			if (fp_read != NULL){	// Close previous recovery file.
-				fclose(fp_read);
-				fp_read = NULL;
-			}
-			fp_read = fopen(file_name, "rb");
-			if (fp_read == NULL){
-				perror("Failed to open recovery file");
-				if (fp_write != NULL)
-					fclose(fp_write);
-				return RET_FILE_IO_ERROR;
-			}
-			name_prev = file_name;
-		}
-		if (_fseeki64(fp_read, file_offset, SEEK_SET) != 0){
-			perror("Failed to seek recovery file");
-			fclose(fp_read);
-			if (fp_write != NULL)
-				fclose(fp_write);
-			return RET_FILE_IO_ERROR;
-		}
-		if (fread(work_buf, 1, slice_size, fp_read) != slice_size){
-			perror("Failed to read recovery data on recovery file");
-			fclose(fp_read);
-			if (fp_write != NULL)
-				fclose(fp_write);
-			return RET_FILE_IO_ERROR;
-		}
-		// Zero fill rest bytes
-		memset(work_buf + block_size, 0, region_size - block_size);
-
-		// Calculate parity bytes in the region
-		if (gf_size == 2){
-			gf16_region_create_parity(galois_poly, work_buf, region_size);
-		} else if (gf_size == 1){
-			gf8_region_create_parity(galois_poly, work_buf, region_size);
-		} else {
-			region_create_parity(work_buf, region_size);
-		}
-
-		// Recover (multiple & add to) lost input blocks
-		rs_recover_one_all(par3_ctx, lost_id[lost_index], lost_count);
-
-		// Print progress percent
-		if ( (par3_ctx->noise_level >= 0) && (par3_ctx->noise_level <= 2) ){
-			progress_step++;
-			time_now = time(NULL);
-			if (time_now != time_old){
-				time_old = time_now;
-				progress_now = (progress_step * 1000) / block_count;
-				if (progress_now != progress_old){
-					progress_old = progress_now;
-					printf("%d.%d%%\r", progress_now / 10, progress_now % 10);	// 0.0% ~ 100.0%
-				}
-			}
-		}
-	}
-
-	// Close reading file
-	if (fp_read != NULL){
-		if (fclose(fp_read) != 0){
-			perror("Failed to close Input File");
-			if (fp_write != NULL)
-				fclose(fp_write);
-			return RET_FILE_IO_ERROR;
-		}
-		fp_read = NULL;
-	}
-	free(work_buf);
-	par3_ctx->work_buf = NULL;
-
-	// Restore lost input blocks
-	for (lost_index = 0; lost_index < lost_count; lost_index++){
-		block_index = lost_id[lost_index];
-		work_buf = block_data + region_size * lost_index;
-
-		// Check parity of recovered block to confirm that calculation was correct.
-		if (gf_size == 2){
-			ret = gf16_region_check_parity(galois_poly, work_buf, region_size);
-		} else if (gf_size == 1){
-			ret = gf8_region_check_parity(galois_poly, work_buf, region_size);
-		} else {
-			ret = region_check_parity(work_buf, region_size);
-		}
-		if (ret != 0){
-			printf("Parity of recovered block[%d] is different.\n", block_index);
-			if (fp_write != NULL)
-				fclose(fp_write);
-			return RET_LOGIC_ERROR;
-		}
-
-		slice_index = block_list[block_index].slice;
-		while (slice_index != -1){
-			file_index = slice_list[slice_index].file;
-			// If belong file is missing or damaged.
-			if ( ((file_list[file_index].state & 3) != 0) && ((file_list[file_index].state & 4) == 0) ){
-				// Write one lost slice on temporary file.
-				slice_size = slice_list[slice_index].size;
-				file_offset = slice_list[slice_index].offset;
-				tail_offset = slice_list[slice_index].tail_offset;
-				if (par3_ctx->noise_level >= 3){
-					printf("Writing %zu bytes of slice[%"PRId64"] on file[%u]:%"PRId64" in lost block[%d]\n", slice_size, slice_index, file_index, file_offset, block_index);
-				}
-				if ( (fp_write == NULL) || (file_index != file_prev) ){
-					if (fp_write != NULL){	// Close previous temporary file.
-						fclose(fp_write);
-						fp_write = NULL;
-					}
-					sprintf(temp_path + 22, "%u.tmp", file_index);
-					fp_write = fopen(temp_path, "r+b");
-					if (fp_write == NULL){
-						perror("Failed to open temporary file");
-						return RET_FILE_IO_ERROR;
-					}
-					file_prev = file_index;
-				}
-				if (_fseeki64(fp_write, file_offset, SEEK_SET) != 0){
-					perror("Failed to seek temporary file");
-					fclose(fp_write);
-					return RET_FILE_IO_ERROR;
-				}
-				if (fwrite(work_buf + tail_offset, 1, slice_size, fp_write) != slice_size){
-					perror("Failed to write slice on temporary file");
-					fclose(fp_write);
-					return RET_FILE_IO_ERROR;
-				}
-			}
-
-			// Goto next slice
-			slice_index = slice_list[slice_index].next;
-		}
-	}
-
-	// Write chunk tails on input files
-	for (file_index = 0; file_index < file_count; file_index++){
-		// The input file is missing or damaged.
-		if ( ((file_list[file_index].state & 3) != 0) && ((file_list[file_index].state & 4) == 0) ){
-			file_size = 0;
-			chunk_index = file_list[file_index].chunk;		// index of the first chunk
-			chunk_num = file_list[file_index].chunk_num;	// number of chunk descriptions
-			slice_index = file_list[file_index].slice;		// index of the first slice
-			//printf("file[%d]: chunk = %u+%u, %s\n", file_index, chunk_index, chunk_num, file_list[file_index].name);
-			while (chunk_num > 0){
-				chunk_size = chunk_list[chunk_index].size;
-				if (chunk_size == 0){	// Unprotected Chunk Description
-					// Unprotected chunk will be filled by zeros after repair.
-					file_size += chunk_list[chunk_index].block;
-					if (chunk_num == 1){	// When unprotected chunk is the last in the input file, set end of file.
-						int file_no;
-						if (par3_ctx->noise_level >= 3){
-							printf("Zero padding unprotected chunk[%u] on file[%u]:%"PRId64"\n", chunk_index, file_index, file_size);
-						}
-						if ( (fp_write == NULL) || (file_index != file_prev) ){
-							if (fp_write != NULL){	// Close previous temporary file.
-								fclose(fp_write);
-								fp_write = NULL;
-							}
-							sprintf(temp_path + 22, "%u.tmp", file_index);
-							fp_write = fopen(temp_path, "r+b");
-							if (fp_write == NULL){
-								perror("Failed to open temporary file");
-								return RET_FILE_IO_ERROR;
-							}
-							file_prev = file_index;
-						}
-						file_no = _fileno(fp_write);
-						if (file_no < 0){
-							perror("Failed to seek temporary file");
-							fclose(fp_write);
-							return RET_FILE_IO_ERROR;
-						} else {
-							if (_chsize_s(file_no, file_size) != 0){
-								perror("Failed to resize temporary file");
-								fclose(fp_write);
-								return RET_FILE_IO_ERROR;
-							}
-						}
-					}
-
-				} else {	// Protected Chunk Description
-					while ( (chunk_size >= block_size) || (chunk_size >= 40) ){	// full size slice or chunk tail slice
-						slice_size = slice_list[slice_index].size;
-						slice_index++;
-						file_size += slice_size;
-						chunk_size -= slice_size;
-					}
-					if (chunk_size > 0){	// tiny chunk tail
-						file_offset = file_size;	// Offset of chunk tail
-						slice_size = chunk_size;	// Tiny chunk tail was stored in File Packet.
-						file_size += slice_size;
-
-						// copy 1 ~ 39 bytes
-						memcpy(buf_tail, &(chunk_list[chunk_index].tail_crc), 8);
-						memcpy(buf_tail + 8, chunk_list[chunk_index].tail_hash, 16);
-						memcpy(buf_tail + 24, &(chunk_list[chunk_index].tail_block), 8);
-						memcpy(buf_tail + 32, &(chunk_list[chunk_index].tail_offset), 8);
-
-						// Write tail slice on temporary file.
-						if (par3_ctx->noise_level >= 3){
-							printf("Writing %zu bytes of chunk[%u] tail on file[%u]:%"PRId64"\n", slice_size, chunk_index, file_index, file_offset);
-						}
-						if ( (fp_write == NULL) || (file_index != file_prev) ){
-							if (fp_write != NULL){	// Close previous temporary file.
-								fclose(fp_write);
-								fp_write = NULL;
-							}
-							sprintf(temp_path + 22, "%u.tmp", file_index);
-							fp_write = fopen(temp_path, "r+b");
-							if (fp_write == NULL){
-								perror("Failed to open temporary file");
-								return RET_FILE_IO_ERROR;
-							}
-							file_prev = file_index;
-						}
-						if (_fseeki64(fp_write, file_offset, SEEK_SET) != 0){
-							perror("Failed to seek temporary file");
-							fclose(fp_write);
-							return RET_FILE_IO_ERROR;
-						}
-						if (fwrite(buf_tail, 1, slice_size, fp_write) != slice_size){
-							perror("Failed to write tiny slice on temporary file");
-							fclose(fp_write);
-							return RET_FILE_IO_ERROR;
-						}
-					}
-				}
-
-				chunk_index++;
-				chunk_num--;
-			}
-
-			if (file_size != file_list[file_index].size){
-				printf("file size is bad. %s\n", temp_path);
-				return RET_LOGIC_ERROR;
-			} else {
-				file_list[file_index].state |= 0x100;
-			}
-		}
-	}
-
-	// Close writing file
-	if (fp_write != NULL){
-		if (fclose(fp_write) != 0){
-			perror("Failed to close temporary File");
-			return RET_FILE_IO_ERROR;
-		}
-	}
-
-	if (par3_ctx->noise_level >= 0){
-		clock_now = clock() - clock_now;
-		printf("done in %.1f seconds.\n", (double)clock_now / CLOCKS_PER_SEC);
-	}
-
-	// Release some allocated memory
-	free(recv_id);
-	par3_ctx->recv_id_list = NULL;
-	free(matrix);
-	par3_ctx->matrix = NULL;
-
-	return 0;
-}
-
-// This keeps all input blocks and recovery blocks partially by spliting every block.
-int recover_lost_block_split(PAR3_CTX *par3_ctx, char *temp_path, uint64_t lost_count)
-{
-	void *gf_table, *matrix;
-	char *name_prev, *file_name;
-	uint8_t buf_tail[40];
-	uint8_t *block_data, *buf_p;
-	uint8_t gf_size;
-	int galois_poly, *recv_id;
-	int ret;
-	int progress_old, progress_now;
-	uint32_t split_count;
-	uint32_t file_count, file_index, file_prev;
-	uint32_t chunk_index, chunk_num;
-	size_t io_size;
-	int64_t slice_index, file_offset;
-	uint64_t block_index, lost_index;
-	uint64_t block_size, block_count, max_recovery_block;
-	uint64_t alloc_size, region_size, split_size;
-	uint64_t data_size, part_size, split_offset;
-	uint64_t tail_offset, tail_gap;
-	uint64_t packet_count, packet_index;
-	uint64_t file_size, chunk_size;
-	uint64_t progress_total, progress_step;
-	PAR3_BLOCK_CTX *block_list;
-	PAR3_SLICE_CTX *slice_list;
-	PAR3_CHUNK_CTX *chunk_list;
-	PAR3_FILE_CTX *file_list;
-	PAR3_PKT_CTX *packet_list;
-	FILE *fp;
-	time_t time_old, time_now;
-	clock_t clock_now;
-
-	// For Leopard-RS library
-	uint32_t work_count;
-	uint8_t **original_data = NULL, **recovery_data = NULL, **work_data = NULL;
-
-	file_count = par3_ctx->input_file_count;
-	block_size = par3_ctx->block_size;
-	block_count = par3_ctx->block_count;
-	max_recovery_block = par3_ctx->max_recovery_block;
-	gf_size = par3_ctx->gf_size;
-	galois_poly = par3_ctx->galois_poly;
-	gf_table = par3_ctx->galois_table;
-	matrix = par3_ctx->matrix;
-	recv_id = par3_ctx->recv_id_list;
-	block_list = par3_ctx->block_list;
-	slice_list = par3_ctx->slice_list;
-	chunk_list = par3_ctx->chunk_list;
-	file_list = par3_ctx->input_file_list;
-	packet_list = par3_ctx->recv_packet_list;
-	packet_count = par3_ctx->recv_packet_count;
-
-	// Set required memory size at first
-	if (par3_ctx->ecc_method & 8){	// FFT based Reed-Solomon Codes
-		ret = leo_init();	// Initialize Leopard-RS library.
-		if (ret != 0){
-			printf("Failed to initialize Leopard-RS library (%d)\n", ret);
-			return RET_LOGIC_ERROR;
-		}
-		work_count = leo_decode_work_count((uint32_t)block_count, (uint32_t)max_recovery_block);
-		//printf("Leopard-RS: work_count = %u\n", work_count);
-		// Leopard-RS requires multiple of 64 bytes for SIMD.
-		region_size = (block_size + 4 + 63) & ~63;
-		alloc_size = region_size * (block_count + work_count);
-
-	} else {	// Reed-Solomon Erasure Codes
-		// Mmeory alignment is 4 bytes.
-		region_size = (block_size + 4 + 3) & ~3;
-		alloc_size = region_size * (block_count + lost_count);
-	}
-
-	// for test split
-	//par3_ctx->memory_limit = (alloc_size + 1) / 2;
-	//par3_ctx->memory_limit = (alloc_size + 2) / 3;
-
-	// Limited memory usage
-	if ( (par3_ctx->memory_limit > 0) && (alloc_size > par3_ctx->memory_limit) ){
-		split_count = (uint32_t)((alloc_size + par3_ctx->memory_limit - 1) / par3_ctx->memory_limit);
-		split_size = (block_size + split_count - 1) / split_count;	// This is splitted block size to fit in limited memory.
-		if (gf_size == 2){
-			// aligned to 2 bytes for 16-bit Galois Field
-			split_size = (split_size + 1) & ~1;
-		}
-		if (split_size > block_size)
-			split_size = block_size;
-		split_count = (uint32_t)((block_size + split_size - 1) / split_size);
-		if (par3_ctx->noise_level >= 1){
-			printf("\nSplit block to %u pieces of %"PRIu64" bytes.\n", split_count, split_size);
-		}
-	} else {
-		split_count = 1;
-		split_size = block_size;
-	}
-
-	// Allocate memory to keep all splitted blocks.
-	if (par3_ctx->ecc_method & 8){	// FFT based Reed-Solomon Codes
-		// Leopard-RS requires alignment of 64 bytes.
-		// At reading time, it stores using recovery blocks in place of lost input blocks.
-		// Recovered lost blocks are stored in work buffer.
-		// So, it will write back recovered data from there.
-		region_size = (split_size + 4 + 63) & ~63;
-		alloc_size = region_size * (block_count + work_count);
-		if (par3_ctx->noise_level >= 2){
-			printf("\nAligned size of block data = %"PRIu64"\n", region_size);
-			printf("Allocated memory size = %"PRIu64" * (%"PRIu64" + %u) = %"PRIu64"\n", region_size, block_count, work_count, alloc_size);
-		}
-	} else {	// Reed-Solomon Erasure Codes
-		region_size = (split_size + 4 + 3) & ~3;
-		alloc_size = region_size * (block_count + lost_count);
-		if (par3_ctx->noise_level >= 2){
-			printf("\nAligned size of block data = %"PRIu64"\n", region_size);
-			printf("Allocated memory size = %"PRIu64" * (%"PRIu64" + %"PRIu64") = %"PRIu64"\n", region_size, block_count, lost_count, alloc_size);
-		}
-	}
-	block_data = malloc(alloc_size);
-	if (block_data == NULL){
-		perror("Failed to allocate memory for block data");
-		return RET_MEMORY_ERROR;
-	}
-	par3_ctx->block_data = block_data;
-
-	if (par3_ctx->ecc_method & 8){	// FFT based Reed-Solomon Codes
-		// List of pointer
-		original_data = malloc(sizeof(block_data) * (block_count + max_recovery_block + work_count));
-		if (original_data == NULL){
-			perror("Failed to allocate memory for Leopard-RS");
-			return RET_MEMORY_ERROR;
-		}
-		recovery_data = original_data + block_count;
-		for (block_index = 0; block_index < max_recovery_block; block_index++){
-			// At first, clear position of recovery block.
-			recovery_data[block_index] = NULL;
-		}
-		buf_p = block_data;
-		lost_index = 0;
-		for (block_index = 0; block_index < block_count; block_index++){
-			if ((block_list[block_index].state & (4 | 16)) == 0){	// lost input block
-				original_data[block_index] = NULL;
-				// Using recovery blocks will be stored in place of lost input blocks.
-				recovery_data[ recv_id[lost_index] ] = buf_p;
-				lost_index++;
-			} else {
-				original_data[block_index] = buf_p;
-			}
-			buf_p += region_size;
-		}
-		work_data = recovery_data + max_recovery_block;
-		for (block_index = 0; block_index < work_count; block_index++){
-			work_data[block_index] = buf_p;
-			buf_p += region_size;
-		}
-		par3_ctx->matrix = original_data;	// Release this later
-	}
-
-	// Base name of temporary file
-	sprintf(temp_path, "par3_%02X%02X%02X%02X%02X%02X%02X%02X_",
-			par3_ctx->set_id[0], par3_ctx->set_id[1], par3_ctx->set_id[2], par3_ctx->set_id[3],
-			par3_ctx->set_id[4], par3_ctx->set_id[5], par3_ctx->set_id[6], par3_ctx->set_id[7]);
-
-	if (par3_ctx->noise_level >= 0){
-		printf("\nRecovering lost input blocks:\n");
-		// block_count = Number of input block (read)
-		// lost_count = Number of using recovery block (read)
-		// block_count * lost_count = Number of multiplication
-		// block_count = Number of input block (write)
-		progress_total = (block_count * lost_count + block_count * 2 + lost_count) * split_count;
-		progress_step = 0;
-		progress_old = 0;
-		time_old = time(NULL);
-		clock_now = clock();
-	}
-
-	// This file access style would support all Error Correction Codes.
-	file_prev = 0xFFFFFFFF;
-	fp = NULL;
-	for (split_offset = 0; split_offset < block_size; split_offset += split_size){
-		buf_p = block_data;	// Starting position of input blocks
-		name_prev = NULL;
-
-		// Store available input blocks on memory
-		for (block_index = 0; block_index < block_count; block_index++){
-			data_size = block_list[block_index].size;
-			part_size = data_size - split_offset;
-			if (part_size > split_size)
-				part_size = split_size;
-
-			// Read block data from found file.
-			if (block_list[block_index].state & 4){	// Full size data is available.
-				slice_index = block_list[block_index].slice;
-				while (slice_index != -1){
-					if (slice_list[slice_index].size == block_size)
-						break;
-					slice_index = slice_list[slice_index].next;
-				}
-				if (slice_index == -1){	// When there is no valid slice.
-					printf("Mapping information for block[%"PRIu64"] is wrong.\n", block_index);
-					if (fp != NULL)
-						fclose(fp);
-					return RET_LOGIC_ERROR;
-				}
-
-				// Read a part of slice from a file.
-				file_name = slice_list[slice_index].find_name;
-				file_offset = slice_list[slice_index].find_offset + split_offset;
-				io_size = part_size;
-				if (par3_ctx->noise_level >= 3){
-					printf("Reading %zu bytes of slice[%"PRId64"] for input block[%"PRIu64"]\n", io_size, slice_index, block_index);
-				}
-				if ( (fp == NULL) || (file_name != name_prev) ){
-					if (fp != NULL){	// Close previous input file.
-						fclose(fp);
-						fp = NULL;
-					}
-					fp = fopen(file_name, "rb");
-					if (fp == NULL){
-						perror("Failed to open Input File");
-						return RET_FILE_IO_ERROR;
-					}
-					name_prev = file_name;
-				}
-				if (_fseeki64(fp, file_offset, SEEK_SET) != 0){
-					perror("Failed to seek Input File");
-					fclose(fp);
-					return RET_FILE_IO_ERROR;
-				}
-				if (fread(buf_p, 1, io_size, fp) != io_size){
-					perror("Failed to read slice on Input File");
-					fclose(fp);
-					return RET_FILE_IO_ERROR;
-				}
-
-			// All tail data is available. (one tail or packed tails)
-			} else if ( (data_size > split_offset) && (block_list[block_index].state & 16) ){
-				if (par3_ctx->noise_level >= 3){
-					printf("Reading %"PRIu64" bytes for input block[%"PRIu64"]\n", part_size, block_index);
-				}
-				tail_offset = split_offset;
-				while (tail_offset < split_offset + part_size){	// Read tails until data end.
-					slice_index = block_list[block_index].slice;
-					while (slice_index != -1){
-						//printf("block = %d, size = %"PRIu64", offset = %"PRIu64", slice = %"PRId64"\n", block_index, data_size, tail_offset, slice_index);
-						// Even when chunk tails are overlaped, it will find tail slice of next position.
-						if ( (slice_list[slice_index].tail_offset + slice_list[slice_index].size > tail_offset)
-								&& (slice_list[slice_index].tail_offset <= tail_offset) ){
-							break;
-						}
-						slice_index = slice_list[slice_index].next;
-					}
-					if (slice_index == -1){	// When there is no valid slice.
-						printf("Mapping information for block[%"PRIu64"] is wrong.\n", block_index);
-						if (fp != NULL)
-							fclose(fp);
-						return RET_LOGIC_ERROR;
-					}
-
-					// Read one slice from a file.
-					tail_gap = tail_offset - slice_list[slice_index].tail_offset;	// This tail slice may start before tail_offset.
-					file_name = slice_list[slice_index].find_name;
-					file_offset = slice_list[slice_index].find_offset + tail_gap;
-					io_size = slice_list[slice_index].size - tail_gap;
-					if (io_size > part_size)
-						io_size = part_size;
-					if ( (fp == NULL) || (file_name != name_prev) ){
-						if (fp != NULL){	// Close previous input file.
-							fclose(fp);
-							fp = NULL;
-						}
-						fp = fopen(file_name, "rb");
-						if (fp == NULL){
-							perror("Failed to open Input File");
-							return RET_FILE_IO_ERROR;
-						}
-						name_prev = file_name;
-					}
-					if (_fseeki64(fp, file_offset, SEEK_SET) != 0){
-						perror("Failed to seek Input File");
-						fclose(fp);
-						return RET_FILE_IO_ERROR;
-					}
-					if (fread(buf_p + tail_offset - split_offset, 1, io_size, fp) != io_size){
-						perror("Failed to read tail slice on Input File");
-						fclose(fp);
-						return RET_FILE_IO_ERROR;
-					}
-					tail_offset += io_size;
-				}
-
-			} else {	// The input block was lost, or empty space in tail block.
-				if (block_list[block_index].state & 16){
-					// Zero fill partial input block
-					memset(buf_p, 0, region_size);
-				} else if (par3_ctx->ecc_method & 1){	// Cauchy Reed-Solomon Codes
-					// Zero fill lost input block
-					memset(buf_p, 0, region_size);
-				}
-				data_size = 0;	// No need to calculate parity.
-			}
-
-			if (data_size > split_offset){	// When there is slice data to process.
-				memset(buf_p + part_size, 0, region_size - part_size);	// Zero fill rest bytes
-				// No need to calculate CRC of reading block, because it will check recovered block later.
-
-				if (par3_ctx->ecc_method & 8){	// FFT based Reed-Solomon Codes
-					if (gf_size == 2){
-						leo_region_create_parity(buf_p, region_size);
-					} else {
-						region_create_parity(buf_p, region_size);
-					}
-				} else {
-					// Calculate parity bytes in the region
-					if (gf_size == 2){
-						gf16_region_create_parity(galois_poly, buf_p, region_size);
-					} else if (gf_size == 1){
-						gf8_region_create_parity(galois_poly, buf_p, region_size);
-					} else {
-						region_create_parity(buf_p, region_size);
-					}
-				}
-			}
-
-			// Print progress percent
-			if ( (par3_ctx->noise_level >= 0) && (par3_ctx->noise_level <= 2) ){
-				progress_step++;
-				time_now = time(NULL);
-				if (time_now != time_old){
-					time_old = time_now;
-					progress_now = (int)((progress_step * 1000) / progress_total);
-					if (progress_now != progress_old){
-						progress_old = progress_now;
-						printf("%d.%d%%\r", progress_now / 10, progress_now % 10);	// 0.0% ~ 100.0%
-					}
-				}
-			}
-
-			buf_p += region_size;	// Goto next partial block
-		}
-
-		// Read using recovery blocks
-		part_size = block_size - split_offset;
-		if (part_size > split_size)
-			part_size = split_size;
-		io_size = part_size;
-		for (lost_index = 0; lost_index < lost_count; lost_index++){
-			block_index = recv_id[lost_index];	// Index of the recovery block
-			if (par3_ctx->ecc_method & 8){	// FFT based Reed-Solomon Codes
-				buf_p = recovery_data[block_index];	// Address of the recovery block
-			}
-
-			// Search packet for the recovery block
-			for (packet_index = 0; packet_index < packet_count; packet_index++){
-				if (packet_list[packet_index].index == block_index)
-					break;
-			}
-			if (packet_index >= packet_count){
-				printf("Packet information for block[%"PRIu64"] is wrong.\n", block_index);
-				if (fp != NULL)
-					fclose(fp);
-				return RET_LOGIC_ERROR;
-			}
-
-			// Read one Recovery Data Packet from a recovery file.
-			file_name = packet_list[packet_index].name;
-			file_offset = packet_list[packet_index].offset + 48 + 40 + split_offset;	// offset of the recovery block data
-			if (par3_ctx->noise_level >= 3){
-				printf("Reading Recovery Data[%"PRIu64"] for recovery block[%"PRIu64"]\n", packet_index, block_index);
-			}
-			if ( (fp == NULL) || (file_name != name_prev) ){
-				if (fp != NULL){	// Close previous recovery file.
-					fclose(fp);
-					fp = NULL;
-				}
-				fp = fopen(file_name, "rb");
-				if (fp == NULL){
-					perror("Failed to open recovery file");
-					return RET_FILE_IO_ERROR;
-				}
-				name_prev = file_name;
-			}
-			if (_fseeki64(fp, file_offset, SEEK_SET) != 0){
-				perror("Failed to seek recovery file");
-				fclose(fp);
-				return RET_FILE_IO_ERROR;
-			}
-			if (fread(buf_p, 1, io_size, fp) != io_size){
-				perror("Failed to read recovery data on recovery file");
-				fclose(fp);
-				return RET_FILE_IO_ERROR;
-			}
-			memset(buf_p + part_size, 0, region_size - part_size);	// Zero fill rest bytes
-
-			if (par3_ctx->ecc_method & 8){	// FFT based Reed-Solomon Codes
-				// Because GF Multiplication doesn't work on FFT, it does XOR only.
-				if (gf_size == 2){
-					leo_region_create_parity(buf_p, region_size);
-				} else {
-					region_create_parity(buf_p, region_size);
-				}
-			} else {
-				// Calculate parity bytes in the region
-				if (gf_size == 2){
-					gf16_region_create_parity(galois_poly, buf_p, region_size);
-				} else if (gf_size == 1){
-					gf8_region_create_parity(galois_poly, buf_p, region_size);
-				} else {
-					region_create_parity(buf_p, region_size);
-				}
-			}
-
-			// Print progress percent
-			if ( (par3_ctx->noise_level >= 0) && (par3_ctx->noise_level <= 2) ){
-				progress_step++;
-				time_now = time(NULL);
-				if (time_now != time_old){
-					time_old = time_now;
-					progress_now = (int)((progress_step * 1000) / progress_total);
-					if (progress_now != progress_old){
-						progress_old = progress_now;
-						printf("%d.%d%%\r", progress_now / 10, progress_now % 10);	// 0.0% ~ 100.0%
-					}
-				}
-			}
-
-			buf_p += region_size;	// Goto next partial block
-		}
-
-		// Close recovery file, because next reading will be Input File.
-		if (fp != NULL){
-			if (fclose(fp) != 0){
-				perror("Failed to close recovery file");
-				return RET_FILE_IO_ERROR;
-			}
-			fp = NULL;
-		}
-
-/*
-if (par3_ctx->ecc_method & 8){	// FFT based Reed-Solomon Codes
-	printf("\n read block ok, progress = %"PRIu64" / %"PRIu64"\n", progress_step, progress_total);
-	for (block_index = 0; block_index < block_count; block_index++){
-		printf("original_data[%2"PRIu64"] = %p\n", block_index, original_data[block_index]);
-	}
-	for (block_index = 0; block_index < max_recovery_block; block_index++){
-		printf("recovery_data[%2"PRIu64"] = %p\n", block_index, recovery_data[block_index]);
-	}
-}
-*/
-
-		// Recover lost input blocks
-		if (par3_ctx->ecc_method & 1){	// Cauchy Reed-Solomon Codes
-			rs_recover_all(par3_ctx, region_size, (int)lost_count, progress_total, progress_step);
-
-		} else if (par3_ctx->ecc_method & 8){	// FFT based Reed-Solomon Codes
-			ret = leo_decode(region_size,
-							(uint32_t)block_count, (uint32_t)max_recovery_block, work_count,
-							original_data, recovery_data, work_data);
-			if (ret != 0){
-				printf("Failed to call Leopard-RS library (%d)\n", ret);
-				return RET_LOGIC_ERROR;
-			}
-
-			// Restore recovered data
-			buf_p = block_data;
-			for (block_index = 0; block_index < block_count; block_index++){
-				if ((block_list[block_index].state & (4 | 16)) == 0){	// lost input block
-					memcpy(buf_p, work_data[block_index], region_size);
-				}
-				buf_p += region_size;
-			}
-
-		}
-		if ( (par3_ctx->noise_level >= 0) && (par3_ctx->noise_level <= 2) ){
-			progress_step += block_count * lost_count;
-			time_old = time(NULL);
-		}
-
-		// Restore all input blocks
-		buf_p = block_data;
-		for (block_index = 0; block_index < block_count; block_index++){
-			if ((block_list[block_index].state & (4 | 16)) == 0){	// This input block was not complete.
-				// Check parity of recovered block to confirm that calculation was correct.
-				if (par3_ctx->ecc_method & 8){
-					if (gf_size == 2){
-						ret = leo_region_check_parity(buf_p, region_size);
-					} else {
-						ret = region_check_parity(buf_p, region_size);
-					}
-				} else {
-					if (gf_size == 2){
-						ret = gf16_region_check_parity(galois_poly, buf_p, region_size);
-					} else if (gf_size == 1){
-						ret = gf8_region_check_parity(galois_poly, buf_p, region_size);
-					} else {
-						ret = region_check_parity(buf_p, region_size);
-					}
-				}
-				if (ret != 0){
-					printf("Parity of recovered block[%"PRIu64"] is different.\n", block_index);
-					return RET_LOGIC_ERROR;
-				}
-			} else if ( (par3_ctx->ecc_method & 8) && (gf_size == 2) ){
-				leo_region_restore(buf_p, region_size);	// Return from ALTMAP
-			}
-
-			slice_index = block_list[block_index].slice;
-			while (slice_index != -1){
-				file_index = slice_list[slice_index].file;
-				// If belong file is missing or damaged.
-				if ( ((file_list[file_index].state & 3) != 0) && ((file_list[file_index].state & 4) == 0) ){
-					data_size = slice_list[slice_index].size;
-					file_offset = slice_list[slice_index].offset;
-					tail_offset = slice_list[slice_index].tail_offset;
-					if ( (tail_offset + data_size > split_offset) && (tail_offset < split_offset + split_size) ){
-						// Write a part of lost slice on temporary file.
-						if (tail_offset < split_offset){
-							tail_gap = 0;	// This tail slice may start before split_offset.
-							file_offset = file_offset + split_offset - tail_offset;
-							part_size = tail_offset + data_size - split_offset;
-							if (part_size > split_size)
-								part_size = split_size;
-						} else {
-							tail_gap = tail_offset - split_offset;
-							part_size = data_size;
-							if (part_size > split_offset + split_size - tail_offset)
-								part_size = split_offset + split_size - tail_offset;
-						}
-						io_size = part_size;
-						if (par3_ctx->noise_level >= 3){
-							printf("Writing %zu bytes of slice[%"PRId64"] on file[%u]:%"PRId64" in block[%"PRIu64"]\n", io_size, slice_index, file_index, file_offset, block_index);
-						}
-						if ( (fp == NULL) || (file_index != file_prev) ){
-							if (fp != NULL){	// Close previous temporary file.
-								fclose(fp);
-								fp = NULL;
-							}
-							sprintf(temp_path + 22, "%u.tmp", file_index);
-							fp = fopen(temp_path, "r+b");
-							if (fp == NULL){
-								perror("Failed to open temporary file");
-								return RET_FILE_IO_ERROR;
-							}
-							file_prev = file_index;
-						}
-						if (_fseeki64(fp, file_offset, SEEK_SET) != 0){
-							perror("Failed to seek temporary file");
-							fclose(fp);
-							return RET_FILE_IO_ERROR;
-						}
-						if (fwrite(buf_p + tail_gap, 1, io_size, fp) != io_size){
-							perror("Failed to write slice on temporary file");
-							fclose(fp);
-							return RET_FILE_IO_ERROR;
-						}
-					}
-				}
-
-				// Goto next slice
-				slice_index = slice_list[slice_index].next;
-			}
-
-			// Print progress percent
-			if ( (par3_ctx->noise_level >= 0) && (par3_ctx->noise_level <= 2) ){
-				progress_step++;
-				time_now = time(NULL);
-				if (time_now != time_old){
-					time_old = time_now;
-					progress_now = (int)((progress_step * 1000) / progress_total);
-					if (progress_now != progress_old){
-						progress_old = progress_now;
-						printf("%d.%d%%\r", progress_now / 10, progress_now % 10);	// 0.0% ~ 100.0%
-					}
-				}
-			}
-
-			buf_p += region_size;	// Goto next partial block
-		}
-	}
-
-	// Write chunk tails on input files
-	for (file_index = 0; file_index < file_count; file_index++){
-		// The input file is missing or damaged.
-		if ( ((file_list[file_index].state & 3) != 0) && ((file_list[file_index].state & 4) == 0) ){
-			file_size = 0;
-			chunk_index = file_list[file_index].chunk;		// index of the first chunk
-			chunk_num = file_list[file_index].chunk_num;	// number of chunk descriptions
-			slice_index = file_list[file_index].slice;		// index of the first slice
-			//printf("file[%d]: chunk = %u+%u, %s\n", file_index, chunk_index, chunk_num, file_list[file_index].name);
-			while (chunk_num > 0){
-				chunk_size = chunk_list[chunk_index].size;
-				if (chunk_size == 0){	// Unprotected Chunk Description
-					// Unprotected chunk will be filled by zeros after repair.
-					file_size += chunk_list[chunk_index].block;
-					if (chunk_num == 1){	// When unprotected chunk is the last in the input file, set end of file.
-						int file_no;
-						if (par3_ctx->noise_level >= 3){
-							printf("Zero padding unprotected chunk[%u] on file[%u]:%"PRId64"\n", chunk_index, file_index, file_size);
-						}
-						if ( (fp == NULL) || (file_index != file_prev) ){
-							if (fp != NULL){	// Close previous temporary file.
-								fclose(fp);
-								fp = NULL;
-							}
-							sprintf(temp_path + 22, "%u.tmp", file_index);
-							fp = fopen(temp_path, "r+b");
-							if (fp == NULL){
-								perror("Failed to open temporary file");
-								return RET_FILE_IO_ERROR;
-							}
-							file_prev = file_index;
-						}
-						file_no = _fileno(fp);
-						if (file_no < 0){
-							perror("Failed to seek temporary file");
-							fclose(fp);
-							return RET_FILE_IO_ERROR;
-						} else {
-							if (_chsize_s(file_no, file_size) != 0){
-								perror("Failed to resize temporary file");
-								fclose(fp);
-								return RET_FILE_IO_ERROR;
-							}
-						}
-					}
-
-				} else {	// Protected Chunk Description
-					while ( (chunk_size >= block_size) || (chunk_size >= 40) ){	// full size slice or chunk tail slice
-						data_size = slice_list[slice_index].size;
-						slice_index++;
-						file_size += data_size;
-						chunk_size -= data_size;
-					}
-					if (chunk_size > 0){	// tiny chunk tail
-						file_offset = file_size;	// Offset of chunk tail
-						io_size = chunk_size;	// Tiny chunk tail was stored in File Packet.
-						file_size += io_size;
-
-						// copy 1 ~ 39 bytes
-						memcpy(buf_tail, &(chunk_list[chunk_index].tail_crc), 8);
-						memcpy(buf_tail + 8, chunk_list[chunk_index].tail_hash, 16);
-						memcpy(buf_tail + 24, &(chunk_list[chunk_index].tail_block), 8);
-						memcpy(buf_tail + 32, &(chunk_list[chunk_index].tail_offset), 8);
-
-						// Write tail slice on temporary file.
-						if (par3_ctx->noise_level >= 3){
-							printf("Writing %zu bytes of chunk[%u] tail on file[%u]:%"PRId64"\n", io_size, chunk_index, file_index, file_offset);
-						}
-						if ( (fp == NULL) || (file_index != file_prev) ){
-							if (fp != NULL){	// Close previous temporary file.
-								fclose(fp);
-								fp = NULL;
-							}
-							sprintf(temp_path + 22, "%u.tmp", file_index);
-							fp = fopen(temp_path, "r+b");
-							if (fp == NULL){
-								perror("Failed to open temporary file");
-								return RET_FILE_IO_ERROR;
-							}
-							file_prev = file_index;
-						}
-						if (_fseeki64(fp, file_offset, SEEK_SET) != 0){
-							perror("Failed to seek temporary file");
-							fclose(fp);
-							return RET_FILE_IO_ERROR;
-						}
-						if (fwrite(buf_tail, 1, io_size, fp) != io_size){
-							perror("Failed to write tiny slice on temporary file");
-							fclose(fp);
-							return RET_FILE_IO_ERROR;
-						}
-					}
-				}
-
-				chunk_index++;
-				chunk_num--;
-			}
-
-			if (file_size != file_list[file_index].size){
-				printf("file size is bad. %s\n", temp_path);
-				return RET_LOGIC_ERROR;
-			} else {
-				file_list[file_index].state |= 0x100;
-			}
-		}
-	}
-
-	// Close writing file
-	if (fp != NULL){
-		if (fclose(fp) != 0){
-			perror("Failed to close temporary file");
-			return RET_FILE_IO_ERROR;
-		}
-	}
-
-	if (par3_ctx->noise_level >= 0){
-		if (par3_ctx->noise_level <= 2){
-			if (progress_step < progress_total)
-				printf("Didn't finish progress. %"PRIu64" / %"PRIu64"\n", progress_step, progress_total);
-		}
-		clock_now = clock() - clock_now;
-		printf("done in %.1f seconds.\n", (double)clock_now / CLOCKS_PER_SEC);
-	}
-
-	// Release some allocated memory
-	free(par3_ctx->recv_id_list);
-	par3_ctx->recv_id_list = NULL;
-	if (par3_ctx->matrix){
-		free(par3_ctx->matrix);
-		par3_ctx->matrix = NULL;
-	}
-
-	return 0;
-}
-
-// At this time, interleaving is adapted only for FFT based Reed-Solomon Codes.
-// When there are multiple cohorts, it recovers lost blocks in each cohort.
-// This keeps one cohort's all input blocks and recovery blocks partially by spliting every block.
-int recover_lost_block_cohort(PAR3_CTX *par3_ctx, char *temp_path)
-{
-	void *gf_table, *matrix;
-	char *name_prev, *file_name;
-	uint8_t buf_tail[40];
-	uint8_t *block_data, *buf_p;
-	uint8_t gf_size;
-	uint8_t *packet_checksum;
-	int galois_poly;
-	int ret;
-	int progress_old, progress_now;
-	uint32_t split_count;
-	uint32_t file_count, file_index, file_prev;
-	uint32_t chunk_index, chunk_num;
-	uint32_t cohort_count, cohort_index;
-	uint32_t lost_index, *lost_id;
-	uint32_t *lost_list, *recv_list;
-	size_t io_size;
-	int64_t slice_index, file_offset;
-	uint64_t block_index;
-	uint64_t block_size, block_count;
-	uint64_t block_count2, max_recovery_block2;
-	uint64_t alloc_size, region_size, split_size;
-	uint64_t data_size, part_size, split_offset;
-	uint64_t tail_offset, tail_gap;
-	uint64_t packet_count, packet_index;
-	uint64_t file_size, chunk_size;
-	uint64_t progress_total, progress_step;
-	PAR3_BLOCK_CTX *block_list;
-	PAR3_SLICE_CTX *slice_list;
-	PAR3_CHUNK_CTX *chunk_list;
-	PAR3_FILE_CTX *file_list;
-	PAR3_PKT_CTX *packet_list;
-	FILE *fp_read, *fp_write;
-	time_t time_old, time_now;
-	clock_t clock_now;
-
-	// For Leopard-RS library
-	uint32_t work_count;
-	uint8_t **original_data = NULL, **recovery_data = NULL, **work_data = NULL;
-
-	file_count = par3_ctx->input_file_count;
-	block_size = par3_ctx->block_size;
-	block_count = par3_ctx->block_count;
-	gf_size = par3_ctx->gf_size;
-	galois_poly = par3_ctx->galois_poly;
-	gf_table = par3_ctx->galois_table;
-	matrix = par3_ctx->matrix;
-	lost_id = par3_ctx->recv_id_list;
-	block_list = par3_ctx->block_list;
-	slice_list = par3_ctx->slice_list;
-	chunk_list = par3_ctx->chunk_list;
-	file_list = par3_ctx->input_file_list;
-	packet_list = par3_ctx->recv_packet_list;
-	packet_count = par3_ctx->recv_packet_count;
-	packet_checksum = par3_ctx->matrix_packet + par3_ctx->matrix_packet_offset + 8;
-
-	// Set count for each cohort
-	cohort_count = (uint32_t)(par3_ctx->interleave) + 1;	// Minimum value is 2.
-	block_count2 = (block_count + cohort_count - 1) / cohort_count;	// round up
-	max_recovery_block2 = par3_ctx->max_recovery_block / cohort_count;
-	lost_list = par3_ctx->lost_list;	// This was set at aggregate_block_cohort().
-	recv_list = lost_list + cohort_count;
-	//printf("cohort_count = %u, block_count2 = %"PRIu64", max_recovery_block2 = %"PRIu64"\n", cohort_count, block_count2, max_recovery_block2);
-	//for (cohort_index = 0; cohort_index < cohort_count; cohort_index++){
-	//	printf("lost_count2 = %u, recovery_block_count2 = %u\n", lost_list[cohort_index], recv_list[cohort_index]);
-	//}
-
-	// Set required memory size at first
-	ret = leo_init();	// Initialize Leopard-RS library.
-	if (ret != 0){
-		printf("Failed to initialize Leopard-RS library (%d)\n", ret);
-		return RET_LOGIC_ERROR;
-	}
-	work_count = leo_decode_work_count((uint32_t)block_count2, (uint32_t)max_recovery_block2);
-	//printf("Leopard-RS: work_count = %u\n", work_count);
-	// Leopard-RS requires multiple of 64 bytes for SIMD.
-	region_size = (block_size + 4 + 63) & ~63;
-	alloc_size = region_size * (block_count2 + work_count);
-
-	// for test split
-	//par3_ctx->memory_limit = (alloc_size + 1) / 2;
-	//par3_ctx->memory_limit = (alloc_size + 2) / 3;
-
-	// Limited memory usage
-	if ( (par3_ctx->memory_limit > 0) && (alloc_size > par3_ctx->memory_limit) ){
-		split_count = (uint32_t)((alloc_size + par3_ctx->memory_limit - 1) / par3_ctx->memory_limit);
-		split_size = (block_size + split_count - 1) / split_count;	// This is splitted block size to fit in limited memory.
-		if (gf_size == 2){
-			// aligned to 2 bytes for 16-bit Galois Field
-			split_size = (split_size + 1) & ~1;
-		}
-		if (split_size > block_size)
-			split_size = block_size;
-		split_count = (uint32_t)((block_size + split_size - 1) / split_size);
-		if (par3_ctx->noise_level >= 1){
-			printf("\nSplit block to %u pieces of %"PRIu64" bytes.\n", split_count, split_size);
-		}
-	} else {
-		split_count = 1;
-		split_size = block_size;
-	}
-
-	// Allocate memory to keep all splitted blocks.
-	// Leopard-RS requires alignment of 64 bytes.
-	// At reading time, it stores using recovery blocks in place of lost input blocks.
-	// Recovered lost blocks are stored in work buffer.
-	// So, it will write back recovered data from there.
-	region_size = (split_size + 4 + 63) & ~63;
-	alloc_size = region_size * (block_count2 + work_count);
-	if (alloc_size < block_size)
-		alloc_size = block_size;	// This buffer size must be enough large to copy a slice.
-	if (par3_ctx->noise_level >= 2){
-		printf("\nAligned size of block data = %"PRIu64"\n", region_size);
-		printf("Allocated memory size = %"PRIu64" * (%"PRIu64" + %u) = %"PRIu64"\n", region_size, block_count2, work_count, alloc_size);
-	}
-	block_data = malloc(alloc_size);
-	if (block_data == NULL){
-		perror("Failed to allocate memory for block data");
-		return RET_MEMORY_ERROR;
-	}
-	par3_ctx->block_data = block_data;
-
-	// List of pointer
-	original_data = malloc(sizeof(block_data) * (block_count2 + max_recovery_block2 + work_count));
-	if (original_data == NULL){
-		perror("Failed to allocate memory for Leopard-RS");
-		return RET_MEMORY_ERROR;
-	}
-	recovery_data = original_data + block_count2;
-	buf_p = block_data + region_size * block_count2;
-	work_data = recovery_data + max_recovery_block2;
-	for (block_index = 0; block_index < work_count; block_index++){
-		work_data[block_index] = buf_p;
-		buf_p += region_size;
-	}
-	par3_ctx->matrix = original_data;	// Release this later
-
-	// Base name of temporary file
-	sprintf(temp_path, "par3_%02X%02X%02X%02X%02X%02X%02X%02X_",
-			par3_ctx->set_id[0], par3_ctx->set_id[1], par3_ctx->set_id[2], par3_ctx->set_id[3],
-			par3_ctx->set_id[4], par3_ctx->set_id[5], par3_ctx->set_id[6], par3_ctx->set_id[7]);
-
-	if (par3_ctx->noise_level >= 0){
-		printf("\nRecovering lost input blocks:\n");
-		progress_total = 0;
-		if (par3_ctx->noise_level <= 1){
-			for (cohort_index = 0; cohort_index < cohort_count; cohort_index++){
-				if (lost_list[cohort_index] > recv_list[cohort_index])
-					continue;
-				if (lost_list[cohort_index] == 0){
-					// block_count2 = Number of input block (read & write)
-					progress_total += block_count2;
-				} else {
-					// block_count2 = Number of input block (read)
-					// lost_count2 = Number of using recovery block (read)
-					// block_count2 * lost_count2 = Number of multiplication
-					// block_count2 = Number of input block (write)
-					progress_total += block_count2 * lost_list[cohort_index] + block_count2 * 2 + lost_list[cohort_index];
-				}
-			}
-			progress_total *= split_count;
-		}
-		progress_step = 0;
-		progress_old = 0;
-		time_old = time(NULL);
-		clock_now = clock();
-	}
-
-	fp_read = NULL;
-	name_prev = NULL;
-	fp_write = NULL;
-	file_prev = 0xFFFFFFFF;
-	// Process each cohort
-	for (cohort_index = 0; cohort_index < cohort_count; cohort_index++){
-		if (lost_list[cohort_index] > recv_list[cohort_index]){	// Cannot recover blocks in this cohort.
-			//printf("cohort[%u] : lost = %u, recovery = %u\n", cohort_index, lost_list[cohort_index], recv_list[cohort_index]);
-			continue;
-		}
-		if (lost_list[cohort_index] == 0){	// No need to recover blocks in this cohort.
-			if ( (cohort_count < 10) && (par3_ctx->noise_level >= 1) ){
-				printf("cohort[%u] : no lost\n", cohort_index);
-			}
-
-			// Restore missing or damaged files by copying all input blocks
-			buf_p = block_data;
-			for (block_index = cohort_index; block_index < block_count; block_index += cohort_count){
-				slice_index = block_list[block_index].slice;
-				while (slice_index != -1){
-					file_index = slice_list[slice_index].file;
-					// If belong file is missing or damaged.
-					if ( ((file_list[file_index].state & 3) != 0) && ((file_list[file_index].state & 4) == 0) ){
-						// Read slice data from another file.
-						file_name = slice_list[slice_index].find_name;
-						file_offset = slice_list[slice_index].find_offset;
-						io_size = slice_list[slice_index].size;
-						if (par3_ctx->noise_level >= 3){
-							printf("Reading %zu bytes of slice[%"PRId64"] for input block[%"PRIu64"]\n", io_size, slice_index, block_index);
-						}
-						if ( (fp_read == NULL) || (file_name != name_prev) ){
-							if (fp_read != NULL){	// Close previous input file.
-								fclose(fp_read);
-								fp_read = NULL;
-							}
-							fp_read = fopen(file_name, "rb");
-							if (fp_read == NULL){
-								perror("Failed to open Input File");
-								if (fp_write != NULL)
-									fclose(fp_write);
-								return RET_FILE_IO_ERROR;
-							}
-							name_prev = file_name;
-						}
-						if (_fseeki64(fp_read, file_offset, SEEK_SET) != 0){
-							perror("Failed to seek Input File");
-							fclose(fp_read);
-							if (fp_write != NULL)
-								fclose(fp_write);
-							return RET_FILE_IO_ERROR;
-						}
-						if (fread(buf_p, 1, io_size, fp_read) != io_size){
-							perror("Failed to read slice on Input File");
-							fclose(fp_read);
-							if (fp_write != NULL)
-								fclose(fp_write);
-							return RET_FILE_IO_ERROR;
-						}
-
-						// Write slice data on temporary file.
-						file_offset = slice_list[slice_index].offset;
-						if (par3_ctx->noise_level >= 3){
-							printf("Writing %zu bytes of slice[%"PRId64"] on file[%u]\n", io_size, slice_index, file_index);
-						}
-						if ( (fp_write == NULL) || (file_index != file_prev) ){
-							if (fp_write != NULL){	// Close previous temporary file.
-								fclose(fp_write);
-								fp_write = NULL;
-							}
-							sprintf(temp_path + 22, "%u.tmp", file_index);
-							fp_write = fopen(temp_path, "r+b");
-							if (fp_write == NULL){
-								perror("Failed to open temporary file");
-								fclose(fp_read);
-								return RET_FILE_IO_ERROR;
-							}
-							file_prev = file_index;
-						}
-						if (_fseeki64(fp_write, file_offset, SEEK_SET) != 0){
-							perror("Failed to seek temporary file");
-							fclose(fp_read);
-							fclose(fp_write);
-							return RET_FILE_IO_ERROR;
-						}
-						if (fwrite(buf_p, 1, io_size, fp_write) != io_size){
-							perror("Failed to write slice on temporary file");
-							fclose(fp_read);
-							fclose(fp_write);
-							return RET_FILE_IO_ERROR;
-						}
-					}
-
-					// Goto next slice
-					slice_index = slice_list[slice_index].next;
-				}
-
-				// Print progress percent
-				if ( (par3_ctx->noise_level >= 0) && (par3_ctx->noise_level <= 2) ){
-					progress_step++;
-					time_now = time(NULL);
-					if (time_now != time_old){
-						time_old = time_now;
-						progress_now = (int)((progress_step * 1000) / progress_total);
-						if (progress_now != progress_old){
-							progress_old = progress_now;
-							printf("%d.%d%%\r", progress_now / 10, progress_now % 10);	// 0.0% ~ 100.0%
-						}
-					}
-				}
-			}
-			if ( (par3_ctx->noise_level >= 0) && (par3_ctx->noise_level <= 2) ){
-				// When the last input block doesn't exist in this cohort.
-				if (block_index < block_count2 * cohort_count){
-					progress_step++;
-				}
-			}
-			continue;
-		}
-
-		if ( (cohort_count < 10) && (par3_ctx->noise_level >= 1) ){
-			printf("cohort[%u] : lost = %u, recovery = %u\n", cohort_index, lost_list[cohort_index], recv_list[cohort_index]);
-		}
-		for (split_offset = 0; split_offset < block_size; split_offset += split_size){
-			//printf("cohort_index = %u, split_offset = %"PRIu64"\n", cohort_index, split_offset);
-			buf_p = block_data;	// Starting position of input blocks
-			lost_index = 0;
-
-			// Close writing file, because it will read many times and won't write for a while.
-			if (fp_write != NULL){
-				if (fclose(fp_write) != 0){
-					perror("Failed to close temporary file");
-					if (fp_read != NULL)
-						fclose(fp_read);
-					return RET_FILE_IO_ERROR;
-				}
-				fp_write = NULL;
-			}
-
-			// Store available input blocks on memory
-			for (block_index = cohort_index; block_index < block_count; block_index += cohort_count){
-				original_data[block_index / cohort_count] = buf_p;	// At first, set position of block data.
-				data_size = block_list[block_index].size;
-				part_size = data_size - split_offset;
-				if (part_size > split_size)
-					part_size = split_size;
-
-				// Read block data from found file.
-				if (block_list[block_index].state & 4){	// Full size data is available.
-					slice_index = block_list[block_index].slice;
-					while (slice_index != -1){
-						if (slice_list[slice_index].size == block_size)
-							break;
-						slice_index = slice_list[slice_index].next;
-					}
-					if (slice_index == -1){	// When there is no valid slice.
-						printf("Mapping information for block[%"PRIu64"] is wrong.\n", block_index);
-						if (fp_read != NULL)
-							fclose(fp_read);
-						return RET_LOGIC_ERROR;
-					}
-
-					// Read a part of slice from a file.
-					file_name = slice_list[slice_index].find_name;
-					file_offset = slice_list[slice_index].find_offset + split_offset;
-					io_size = part_size;
-					if (par3_ctx->noise_level >= 3){
-						printf("Reading %zu bytes of slice[%"PRId64"] for input block[%"PRIu64"]\n", io_size, slice_index, block_index);
-					}
-					if ( (fp_read == NULL) || (file_name != name_prev) ){
-						if (fp_read != NULL){	// Close previous input file.
-							fclose(fp_read);
-							fp_read = NULL;
-						}
-						fp_read = fopen(file_name, "rb");
-						if (fp_read == NULL){
-							perror("Failed to open Input File");
-							return RET_FILE_IO_ERROR;
-						}
-						name_prev = file_name;
-					}
-					if (_fseeki64(fp_read, file_offset, SEEK_SET) != 0){
-						perror("Failed to seek Input File");
-						fclose(fp_read);
-						return RET_FILE_IO_ERROR;
-					}
-					if (fread(buf_p, 1, io_size, fp_read) != io_size){
-						perror("Failed to read slice on Input File");
-						fclose(fp_read);
-						return RET_FILE_IO_ERROR;
-					}
-
-				// All tail data is available. (one tail or packed tails)
-				} else if ( (data_size > split_offset) && (block_list[block_index].state & 16) ){
-					if (par3_ctx->noise_level >= 3){
-						printf("Reading %"PRIu64" bytes for input block[%"PRIu64"]\n", part_size, block_index);
-					}
-					tail_offset = split_offset;
-					while (tail_offset < split_offset + part_size){	// Read tails until data end.
-						slice_index = block_list[block_index].slice;
-						while (slice_index != -1){
-							//printf("block = %d, size = %"PRIu64", offset = %"PRIu64", slice = %"PRId64"\n", block_index, data_size, tail_offset, slice_index);
-							// Even when chunk tails are overlaped, it will find tail slice of next position.
-							if ( (slice_list[slice_index].tail_offset + slice_list[slice_index].size > tail_offset)
-									&& (slice_list[slice_index].tail_offset <= tail_offset) ){
-								break;
-							}
-							slice_index = slice_list[slice_index].next;
-						}
-						if (slice_index == -1){	// When there is no valid slice.
-							printf("Mapping information for block[%"PRIu64"] is wrong.\n", block_index);
-							if (fp_read != NULL)
-								fclose(fp_read);
-							return RET_LOGIC_ERROR;
-						}
-
-						// Read one slice from a file.
-						tail_gap = tail_offset - slice_list[slice_index].tail_offset;	// This tail slice may start before tail_offset.
-						file_name = slice_list[slice_index].find_name;
-						file_offset = slice_list[slice_index].find_offset + tail_gap;
-						io_size = slice_list[slice_index].size - tail_gap;
-						if (io_size > part_size)
-							io_size = part_size;
-						if ( (fp_read == NULL) || (file_name != name_prev) ){
-							if (fp_read != NULL){	// Close previous input file.
-								fclose(fp_read);
-								fp_read = NULL;
-							}
-							fp_read = fopen(file_name, "rb");
-							if (fp_read == NULL){
-								perror("Failed to open Input File");
-								return RET_FILE_IO_ERROR;
-							}
-							name_prev = file_name;
-						}
-						if (_fseeki64(fp_read, file_offset, SEEK_SET) != 0){
-							perror("Failed to seek Input File");
-							fclose(fp_read);
-							return RET_FILE_IO_ERROR;
-						}
-						if (fread(buf_p + tail_offset - split_offset, 1, io_size, fp_read) != io_size){
-							perror("Failed to read tail slice on Input File");
-							fclose(fp_read);
-							return RET_FILE_IO_ERROR;
-						}
-						tail_offset += io_size;
-					}
-
-				} else {	// The input block was lost, or empty space in tail block.
-					if (block_list[block_index].state & 16){
-						// Zero fill partial input block
-						memset(buf_p, 0, region_size);
-					} else {	// Set index of this lost block
-						original_data[block_index / cohort_count] = NULL;	// Erase address
-						// Using recovery blocks will be stored in place of lost input blocks.
-						lost_id[lost_index] = (uint32_t)(block_index / cohort_count);
-						lost_index++;
-					}
-					data_size = 0;	// No need to calculate parity.
-				}
-
-				if (data_size > split_offset){	// When there is slice data to process.
-					memset(buf_p + part_size, 0, region_size - part_size);	// Zero fill rest bytes
-					// No need to calculate CRC of reading block, because it will check recovered block later.
-
-					if (gf_size == 2){
-						leo_region_create_parity(buf_p, region_size);
-					} else {
-						region_create_parity(buf_p, region_size);
-					}
-				}
-
-				// Print progress percent
-				if ( (par3_ctx->noise_level >= 0) && (par3_ctx->noise_level <= 2) ){
-					progress_step++;
-					time_now = time(NULL);
-					if (time_now != time_old){
-						time_old = time_now;
-						progress_now = (int)((progress_step * 1000) / progress_total);
-						if (progress_now != progress_old){
-							progress_old = progress_now;
-							printf("%d.%d%%\r", progress_now / 10, progress_now % 10);	// 0.0% ~ 100.0%
-						}
-					}
-				}
-
-				buf_p += region_size;	// Goto next partial block
-			}
-
-			// When the last input block doesn't exist in this cohort, zero fill it.
-			if (block_index < block_count2 * cohort_count){
-				//printf("zero fill %"PRIu64", block_count2 * cohort_count = %"PRIu64"\n", block_index, block_count2 * cohort_count);
-				memset(buf_p, 0, region_size);
-				original_data[block_index / cohort_count] = buf_p;
-				if ( (par3_ctx->noise_level >= 0) && (par3_ctx->noise_level <= 2) ){
-					progress_step++;
-				}
-			}
-			//printf("\n read input block ok, lost_index = %u, progress = %"PRIu64" / %"PRIu64"\n", lost_index, progress_step, progress_total);
-
-			// At first, clear position of recovery block.
-			for (block_index = 0; block_index < max_recovery_block2; block_index++){
-				recovery_data[block_index] = NULL;
-			}
-			lost_index = 0;
-
-			// Read using recovery blocks
-			part_size = block_size - split_offset;
-			if (part_size > split_size)
-				part_size = split_size;
-			io_size = part_size;
-			// Search packet for the recovery block
-			for (packet_index = 0; packet_index < packet_count; packet_index++){
-				if (memcmp(packet_list[packet_index].matrix, packet_checksum, 16) != 0)
-					continue;	// Search only Recovery Data Packets belong to using Matrix Packet
-
-				block_index = packet_list[packet_index].index;	// Index of the recovery block
-				if (block_index % cohort_count != cohort_index)
-					continue;	// Ignore useless recovery block in other cohorts.
-
-				//printf("lost_index = %u, recovery block = %"PRIu64" \n", lost_index, block_index);
-				buf_p = block_data + region_size * lost_id[lost_index];	// Address of the recovery block
-				// Set position of lost input block = address of using recovery block
-				recovery_data[block_index / cohort_count] = buf_p;
-				lost_index++;
-
-				// Read one Recovery Data Packet from a recovery file.
-				file_name = packet_list[packet_index].name;
-				file_offset = packet_list[packet_index].offset + 48 + 40 + split_offset;	// offset of the recovery block data
-				if (par3_ctx->noise_level >= 3){
-					printf("Reading Recovery Data[%"PRIu64"] for recovery block[%"PRIu64"]\n", packet_index, block_index);
-				}
-				if ( (fp_read == NULL) || (file_name != name_prev) ){
-					if (fp_read != NULL){	// Close previous recovery file.
-						fclose(fp_read);
-						fp_read = NULL;
-					}
-					fp_read = fopen(file_name, "rb");
-					if (fp_read == NULL){
-						perror("Failed to open recovery file");
-						return RET_FILE_IO_ERROR;
-					}
-					name_prev = file_name;
-				}
-				if (_fseeki64(fp_read, file_offset, SEEK_SET) != 0){
-					perror("Failed to seek recovery file");
-					fclose(fp_read);
-					return RET_FILE_IO_ERROR;
-				}
-				if (fread(buf_p, 1, io_size, fp_read) != io_size){
-					perror("Failed to read recovery data on recovery file");
-					fclose(fp_read);
-					return RET_FILE_IO_ERROR;
-				}
-				memset(buf_p + part_size, 0, region_size - part_size);	// Zero fill rest bytes
-
-				if (gf_size == 2){
-					leo_region_create_parity(buf_p, region_size);
-				} else {
-					region_create_parity(buf_p, region_size);
-				}
-
-				// Print progress percent
-				if ( (par3_ctx->noise_level >= 0) && (par3_ctx->noise_level <= 2) ){
-					progress_step++;
-					time_now = time(NULL);
-					if (time_now != time_old){
-						time_old = time_now;
-						progress_now = (int)((progress_step * 1000) / progress_total);
-						if (progress_now != progress_old){
-							progress_old = progress_now;
-							printf("%d.%d%%\r", progress_now / 10, progress_now % 10);	// 0.0% ~ 100.0%
-						}
-					}
-				}
-
-				// Exit loop, when it read enough recovery blocks.
-				if (lost_index == lost_list[cohort_index])
-					break;
-			}
-
-			// Close recovery file, because next reading will be Input File.
-			if (fp_read != NULL){
-				if (fclose(fp_read) != 0){
-					perror("Failed to close recovery file");
-					return RET_FILE_IO_ERROR;
-				}
-				fp_read = NULL;
-			}
-
-/*
-printf("\n read recovery block ok, lost_index = %u, progress = %"PRIu64" / %"PRIu64"\n", lost_index, progress_step, progress_total);
-
-for (block_index = 0; block_index < block_count2; block_index++){
-	printf("original_data[%2"PRIu64"] = %p\n", block_index, original_data[block_index]);
-}
-for (block_index = 0; block_index < max_recovery_block2; block_index++){
-	printf("recovery_data[%2"PRIu64"] = %p\n", block_index, recovery_data[block_index]);
-}
-for (block_index = 0; block_index < work_count; block_index++){
-	printf("work_data[%2"PRIu64"] = %p\n", block_index, work_data[block_index]);
-}
-
-{	// for debug
-FILE *fp2;
-fp2 = fopen("after_read.bin", "wb");
-fwrite(block_data, 1, region_size * (block_count2 + work_count), fp2);
-fclose(fp2);
-}
-*/
-
-			// Recover lost input blocks
-			ret = leo_decode(region_size,
-							(uint32_t)block_count2, (uint32_t)max_recovery_block2, work_count,
-							original_data, recovery_data, work_data);
-			if (ret != 0){
-				printf("Failed to call Leopard-RS library (%d)\n", ret);
-				return RET_LOGIC_ERROR;
-			}
-			//printf("\n decode ok, progress = %"PRIu64" / %"PRIu64"\n", progress_step, progress_total);
-
-			// Restore recovered data
-			buf_p = block_data;
-			for (block_index = 0; block_index < block_count2; block_index++){
-				if (original_data[block_index] == NULL){	// lost input block
-					memcpy(buf_p, work_data[block_index], region_size);
-				}
-				buf_p += region_size;
-			}
-
-			if ( (par3_ctx->noise_level >= 0) && (par3_ctx->noise_level <= 2) ){
-				progress_step += block_count2 * lost_index;
-				time_old = time(NULL);
-			}
-
-/*
-{	// for debug
-FILE *fp2;
-fp2 = fopen("after_recover.bin", "wb");
-fwrite(block_data, 1, region_size * (block_count2 + work_count), fp2);
-fclose(fp2);
-}
-printf("\n recover ok, progress = %"PRIu64" / %"PRIu64"\n", progress_step, progress_total);
-*/
-
-			// Restore all input blocks
-			buf_p = block_data;
-			for (block_index = cohort_index; block_index < block_count; block_index += cohort_count){
-				if ((block_list[block_index].state & (4 | 16)) == 0){	// This input block was not complete.
-					// Check parity of recovered block to confirm that calculation was correct.
-					if (gf_size == 2){
-						ret = leo_region_check_parity(buf_p, region_size);
-					} else {
-						ret = region_check_parity(buf_p, region_size);
-					}
-					if (ret != 0){
-						printf("Parity of recovered block[%"PRIu64"] is different.\n", block_index);
-						return RET_LOGIC_ERROR;
-					}
-				} else if (gf_size == 2){
-					leo_region_restore(buf_p, region_size);	// Return from ALTMAP
-				}
-
-				slice_index = block_list[block_index].slice;
-				while (slice_index != -1){
-					file_index = slice_list[slice_index].file;
-					// If belong file is missing or damaged.
-					if ( ((file_list[file_index].state & 3) != 0) && ((file_list[file_index].state & 4) == 0) ){
-						data_size = slice_list[slice_index].size;
-						file_offset = slice_list[slice_index].offset;
-						tail_offset = slice_list[slice_index].tail_offset;
-						if ( (tail_offset + data_size > split_offset) && (tail_offset < split_offset + split_size) ){
-							// Write a part of lost slice on temporary file.
-							if (tail_offset < split_offset){
-								tail_gap = 0;	// This tail slice may start before split_offset.
-								file_offset = file_offset + split_offset - tail_offset;
-								part_size = tail_offset + data_size - split_offset;
-								if (part_size > split_size)
-									part_size = split_size;
-							} else {
-								tail_gap = tail_offset - split_offset;
-								part_size = data_size;
-								if (part_size > split_offset + split_size - tail_offset)
-									part_size = split_offset + split_size - tail_offset;
-							}
-							io_size = part_size;
-							if (par3_ctx->noise_level >= 3){
-								printf("Writing %zu bytes of slice[%"PRId64"] on file[%u]:%"PRId64" in block[%"PRIu64"]\n", io_size, slice_index, file_index, file_offset, block_index);
-							}
-							if ( (fp_write == NULL) || (file_index != file_prev) ){
-								if (fp_write != NULL){	// Close previous temporary file.
-									fclose(fp_write);
-									fp_write = NULL;
-								}
-								sprintf(temp_path + 22, "%u.tmp", file_index);
-								fp_write = fopen(temp_path, "r+b");
-								if (fp_write == NULL){
-									perror("Failed to open temporary file");
-									return RET_FILE_IO_ERROR;
-								}
-								file_prev = file_index;
-							}
-							if (_fseeki64(fp_write, file_offset, SEEK_SET) != 0){
-								perror("Failed to seek temporary file");
-								fclose(fp_write);
-								return RET_FILE_IO_ERROR;
-							}
-							if (fwrite(buf_p + tail_gap, 1, io_size, fp_write) != io_size){
-								perror("Failed to write slice on temporary file");
-								fclose(fp_write);
-								return RET_FILE_IO_ERROR;
-							}
-						}
-					}
-
-					// Goto next slice
-					slice_index = slice_list[slice_index].next;
-				}
-
-				// Print progress percent
-				if ( (par3_ctx->noise_level >= 0) && (par3_ctx->noise_level <= 2) ){
-					progress_step++;
-					time_now = time(NULL);
-					if (time_now != time_old){
-						time_old = time_now;
-						progress_now = (int)((progress_step * 1000) / progress_total);
-						if (progress_now != progress_old){
-							progress_old = progress_now;
-							printf("%d.%d%%\r", progress_now / 10, progress_now % 10);	// 0.0% ~ 100.0%
-						}
-					}
-				}
-
-				buf_p += region_size;	// Goto next partial block
-			}
-			if ( (par3_ctx->noise_level >= 0) && (par3_ctx->noise_level <= 2) ){
-				// When the last input block doesn't exist in this cohort.
-				if (block_index < block_count2 * cohort_count){
-					progress_step++;
-				}
-			}
-			//printf("\n restore ok, progress = %"PRIu64" / %"PRIu64"\n", progress_step, progress_total);
-		}
-	}
-
-	// Close reading file
-	if (fp_read != NULL){
-		if (fclose(fp_read) != 0){
-			perror("Failed to close file");
-			if (fp_write != NULL)
-				fclose(fp_write);
-			return RET_FILE_IO_ERROR;
-		}
-		fp_read = NULL;
-	}
-
-	// Write chunk tails on input files
-	for (file_index = 0; file_index < file_count; file_index++){
-		// The input file is missing or damaged.
-		if ( ((file_list[file_index].state & 3) != 0) && ((file_list[file_index].state & 4) == 0) ){
-			file_size = 0;
-			chunk_index = file_list[file_index].chunk;		// index of the first chunk
-			chunk_num = file_list[file_index].chunk_num;	// number of chunk descriptions
-			slice_index = file_list[file_index].slice;		// index of the first slice
-			//printf("file[%d]: chunk = %u+%u, %s\n", file_index, chunk_index, chunk_num, file_list[file_index].name);
-			while (chunk_num > 0){
-				chunk_size = chunk_list[chunk_index].size;
-				chunk_size = chunk_list[chunk_index].size;
-				if (chunk_size == 0){	// Unprotected Chunk Description
-					// Unprotected chunk will be filled by zeros after repair.
-					file_size += chunk_list[chunk_index].block;
-					if (chunk_num == 1){	// When unprotected chunk is the last in the input file, set end of file.
-						int file_no;
-						if (par3_ctx->noise_level >= 3){
-							printf("Zero padding unprotected chunk[%u] on file[%u]:%"PRId64"\n", chunk_index, file_index, file_size);
-						}
-						if ( (fp_write == NULL) || (file_index != file_prev) ){
-							if (fp_write != NULL){	// Close previous temporary file.
-								fclose(fp_write);
-								fp_write = NULL;
-							}
-							sprintf(temp_path + 22, "%u.tmp", file_index);
-							fp_write = fopen(temp_path, "r+b");
-							if (fp_write == NULL){
-								perror("Failed to open temporary file");
-								return RET_FILE_IO_ERROR;
-							}
-							file_prev = file_index;
-						}
-						file_no = _fileno(fp_write);
-						if (file_no < 0){
-							perror("Failed to seek temporary file");
-							fclose(fp_write);
-							return RET_FILE_IO_ERROR;
-						} else {
-							if (_chsize_s(file_no, file_size) != 0){
-								perror("Failed to resize temporary file");
-								fclose(fp_write);
-								return RET_FILE_IO_ERROR;
-							}
-						}
-					}
-
-				} else {	// Protected Chunk Description
-					while ( (chunk_size >= block_size) || (chunk_size >= 40) ){	// full size slice or chunk tail slice
-						data_size = slice_list[slice_index].size;
-						slice_index++;
-						file_size += data_size;
-						chunk_size -= data_size;
-					}
-					if (chunk_size > 0){	// tiny chunk tail
-						file_offset = file_size;	// Offset of chunk tail
-						io_size = chunk_size;	// Tiny chunk tail was stored in File Packet.
-						file_size += io_size;
-
-						// copy 1 ~ 39 bytes
-						memcpy(buf_tail, &(chunk_list[chunk_index].tail_crc), 8);
-						memcpy(buf_tail + 8, chunk_list[chunk_index].tail_hash, 16);
-						memcpy(buf_tail + 24, &(chunk_list[chunk_index].tail_block), 8);
-						memcpy(buf_tail + 32, &(chunk_list[chunk_index].tail_offset), 8);
-
-						// Write tail slice on temporary file.
-						if (par3_ctx->noise_level >= 3){
-							printf("Writing %zu bytes of chunk[%u] tail on file[%u]:%"PRId64"\n", io_size, chunk_index, file_index, file_offset);
-						}
-						if ( (fp_write == NULL) || (file_index != file_prev) ){
-							if (fp_write != NULL){	// Close previous temporary file.
-								fclose(fp_write);
-								fp_write = NULL;
-							}
-							sprintf(temp_path + 22, "%u.tmp", file_index);
-							fp_write = fopen(temp_path, "r+b");
-							if (fp_write == NULL){
-								perror("Failed to open temporary file");
-								return RET_FILE_IO_ERROR;
-							}
-							file_prev = file_index;
-						}
-						if (_fseeki64(fp_write, file_offset, SEEK_SET) != 0){
-							perror("Failed to seek temporary file");
-							fclose(fp_write);
-							return RET_FILE_IO_ERROR;
-						}
-						if (fwrite(buf_tail, 1, io_size, fp_write) != io_size){
-							perror("Failed to write tiny slice on temporary file");
-							fclose(fp_write);
-							return RET_FILE_IO_ERROR;
-						}
-					}
-				}
-
-				chunk_index++;
-				chunk_num--;
-			}
-
-			if (file_size != file_list[file_index].size){
-				printf("file size is bad. %s\n", temp_path);
-				return RET_LOGIC_ERROR;
-			} else {
-				file_list[file_index].state |= 0x100;
-			}
-		}
-	}
-
-	// Close writing file
-	if (fp_write != NULL){
-		if (fclose(fp_write) != 0){
-			perror("Failed to close temporary file");
-			return RET_FILE_IO_ERROR;
-		}
-	}
-
-	if (par3_ctx->noise_level >= 0){
-		if (par3_ctx->noise_level <= 2){
-			if (progress_step < progress_total)
-				printf("Didn't finish progress. %"PRIu64" / %"PRIu64"\n", progress_step, progress_total);
-		}
-		clock_now = clock() - clock_now;
-		printf("done in %.1f seconds.\n", (double)clock_now / CLOCKS_PER_SEC);
-	}
-
-	// Release some allocated memory
-	free(par3_ctx->recv_id_list);
-	par3_ctx->recv_id_list = NULL;
-	if (par3_ctx->matrix){
-		free(par3_ctx->matrix);
-		par3_ctx->matrix = NULL;
-	}
-
-	return 0;
-}
-
diff --git a/windows/src/common.c b/windows/src/common.c
deleted file mode 100644
index bf76d2b..0000000
--- a/windows/src/common.c
+++ /dev/null
@@ -1,628 +0,0 @@
-#include <errno.h>
-#include <inttypes.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <wchar.h>
-
-#ifdef __linux__
-
-/* This definition of _MAX_FNAME works for GCC on POSIX systems */
-#include <limits.h>
-#include <sys/stat.h>
-#define _MAX_FNAME NAME_MAX
-
-#define _strnicmp strncasecmp
-#define _stricmp strcasecmp
-
-#elif _WIN32
-// MSVC headers
-#include <search.h>
-#include <io.h>
-#endif
-
-#include "common.h"
-
-
-
-#ifdef __linux__
-
-// Reproduce Windows system call with Linux's fstat 
-int64_t _filelengthi64(int fd) {
-  struct stat buffer;
-  int result;
-
-  result = fstat(fd, &buffer);
-  if (result == 0) {
-    // success
-    return buffer.st_size;
-  }
-  else {
-    // failure
-    return (int64_t) -1;  // this should sign-extend
-  }
-}
-
-
-/* This is based on code from: https://stackoverflow.com/questions/50119172/how-to-get-the-file-length-in-c-on-linux  
-   Why lseek instead of fstat??
-int64_t _filelengthi64(int filedes)
-{
-    off_t pos = lseek(filedes, 0, SEEK_CUR);
-    if (pos != (off_t)-1)
-    {
-        off_t size = lseek(filedes, 0, SEEK_END);
-        lseek(filedes, pos, SEEK_SET);
-        return (int64_t)size;
-    }
-    return (int64_t)-1;
-}
-*/
-
-
-
-#elif _WIN32
-#endif
-
-// return pointer of filename
-char * offset_file_name(char *file_path)
-{
-	int i;
-
-	for (i = (int)strlen(file_path) - 2; i >= 0; i--){
-		if ((file_path[i] == '\\') || (file_path[i] == '/'))
-			break;
-	}
-	i++;
-
-	return file_path + i;
-}
-
-// detect device name on Windows OS
-static int check_device_name(char *name, int len)
-{
-	if (len >= 3){
-		if ((name[3] == 0) || (name[3] == '.')){
-			if (_strnicmp(name, "CON", 3) == 0)
-				return 1;
-			if (_strnicmp(name, "PRN", 3) == 0)
-				return 1;
-			if (_strnicmp(name, "AUX", 3) == 0)
-				return 1;
-			if (_strnicmp(name, "NUL", 3) == 0)
-				return 1;
-		}
-		if (len >= 4){
-			if ((name[4] == 0) || (name[4] == '.')){
-				if (_strnicmp(name, "COM", 3) == 0){
-					if ((name[3] >= 0x31) && (name[3] <= 0x39))
-						return 1;
-				}
-				if (_strnicmp(name, "LPT", 3) == 0){
-					if ((name[3] >= 0x31) && (name[3] <= 0x39))
-						return 1;
-				}
-			}
-		}
-	}
-
-	return 0;
-}
-
-// Sanitize invalid filename on Windows OS.
-// filename must be UTF-8.
-// return 0 = ok, 1 = sanitize, 2 = warn
-int sanitize_file_name(char *name)
-{
-	int i, ret = 0, len = 0;
-
-	// erase control character 1~31. (return, tab, etc)
-	while (name[len] != 0){
-		if ( (name[len] >= 1) && (name[len] <= 31) ){
-			name[len] = '_';
-			ret |= 1;
-		}
-		len++;
-	}
-
-	// sanitize invalid character on Windows OS. ( \ / : * ? " < > | )
-	for (i = 0; i < len; i++){
-		if ( (name[i] == '\\') || (name[i] == '/') || (name[i] == ':') || (name[i] == '*') || (name[i] == '?')
-				 || (name[i] == '"') || (name[i] == '<') || (name[i] == '>') || (name[i] == '|') ){
-			name[i] = '_';
-			ret |= 1;
-		}
-	}
-
-	// refuse directory traversal (..)
-	if (name[0] == '.'){
-		if (name[1] == 0){
-			name[0] = '_';
-			ret |= 1;
-		} else if ( (name[1] == '.') && (name[2] == 0) ){
-			name[0] = '_';
-			name[1] = '_';
-			ret |= 1;
-		}
-	}
-
-	// warn " " at the top, "." or " " at the last.
-	if (name[0] == ' ')
-		ret |= 2;
-	if ( (len >= 2) && ( (name[len - 1] == '.') || (name[len - 1] == ' ') ) )
-		ret |= 2;
-
-	// warn device name on Windows OS.
-	if (check_device_name(name, len) != 0)
-		ret |= 2;
-
-	return ret;
-}
-
-
-#ifdef __linux__
-
-int get_absolute_path(char *absolute_path, char *relative_path, size_t max) {
-  // Linux is case-sensative, so it doesn't have to be lower/upper-cased.
-
-  // allocate buffer, in case max is less than PATH_MAX
-  char buf[PATH_MAX+1];
-
-  if (realpath(relative_path, buf) != NULL) {
-    strncpy(absolute_path, buf, max);
-    return 1;
-  } else {
-    return 0;
-  }
-}
-
-
-#elif _WIN32
-// convert relative path to absolute path
-int get_absolute_path(char *absolute_path, char *relative_path, size_t max)
-{
-	char *tmp_p;
-	size_t len;
-
-	// MSVC
-	struct _finddatai64_t c_file;
-	intptr_t handle;
-
-	// This function replaces "/" to "\" automatically.
-	if (_fullpath(absolute_path, relative_path, max) == NULL){
-		perror("Failed to make absolute path");
-		return 1;
-	}
-
-	// When the file exists, check each path component.
-	handle = _findfirst64(absolute_path, &c_file);
-	if (handle != (intptr_t) -1){
-		_findclose(handle);
-
-		// Even when case insensitive, use the original case for path component.
-		len = strlen(c_file.name);
-		tmp_p = strrchr(absolute_path, '\\');
-		if (tmp_p != NULL){
-			memcpy(tmp_p + 1, c_file.name, len);
-		}
-
-		// Check drive letter.
-		tmp_p = absolute_path;
-		if (tmp_p[1] == ':'){
-			if ( (tmp_p[0] >= 'a') && (tmp_p[0] <= 'z') ){
-				// Convert from lower case to upper case.
-				tmp_p[0] -= 'a' - 'A';
-			}
-			tmp_p = strchr(tmp_p, '\\');
-			if (tmp_p != NULL){
-				tmp_p[0] = '/';
-				tmp_p++;
-			}
-		}
-
-		// Check each path component.
-		tmp_p = strchr(tmp_p, '\\');
-		while (tmp_p != NULL){
-			tmp_p[0] = 0;
-
-			//printf("find = %s\n", absolute_path);
-			handle = _findfirst64(absolute_path, &c_file);
-			if (handle != (intptr_t) -1){
-				_findclose(handle);
-
-				//printf("component = %s\n", c_file.name);
-				len = strlen(c_file.name);
-				memcpy(tmp_p - len, c_file.name, len);
-			}
-
-			// Replace directory mark from Windows OS style "\" to UNIX style "/" for compatibility.
-			tmp_p[0] = '/';
-			tmp_p = strchr(tmp_p + 1, '\\');
-		}
-
-	} else {
-		// Even when the file doesn't exist, replace directory mark.
-		tmp_p = absolute_path;
-		tmp_p = strchr(tmp_p, '\\');
-		while (tmp_p != NULL){
-			tmp_p[0] = '/';
-			tmp_p = strchr(tmp_p + 1, '\\');
-		}
-	}
-
-	return 0;
-}
-#endif
-
-// copy filename, remove cover, replace directory mark
-size_t path_copy(char *dst, char *src, size_t max)
-{
-	char *tmp_p;
-	size_t len;
-
-	tmp_p = src;
-	len = strlen(tmp_p);
-	if ( (tmp_p[0] == '"') && (tmp_p[len - 1] == '"') ){
-		tmp_p++;
-		len -= 2;
-	}
-	if (len >= max){
-		dst[0] = 0;
-		return 0;
-	}
-
-	memcpy(dst, tmp_p, len);
-	dst[len] = 0;
-
-	// Replace directory mark from Windows OS style "\" to UNIX style "/" for compatibility.
-	tmp_p = strchr(dst, '\\');
-	while (tmp_p != NULL){
-		tmp_p[0] = '/';
-		tmp_p = strchr(tmp_p + 1, '\\');
-	}
-
-	return len;
-}
-
-// Erase return code at the end of text
-size_t trim_text(uint8_t *text, size_t len)
-{
-	// Space, Carriage Return (\r), Line Feed (\n), Tab (\t)
-	while ( (len > 0) && ( (text[len - 1] == ' ') || (text[len - 1] == '\n') || (text[len - 1] == '\r') || (text[len - 1] == '\t') ) ){
-		text[len - 1] = 0;
-		len--;
-	}
-
-	return len;
-}
-
-
-// Because Argz Functions don't exit on MSVC, I made similar functions.
-
-// add a name to the end of names
-// return 0 for success, else ENOMEM for memory error
-int namez_add(char **namez, size_t *namez_len, size_t *namez_max, const char *str)
-{
-	char *list_buf;
-	size_t list_len, list_max, len;
-
-	if (str == NULL)
-		return 0;
-	if (str[0] == 0)
-		return 0;
-
-	list_buf = *namez;
-	list_len = *namez_len;
-	list_max = *namez_max;
-
-	len = strlen(str);
-
-	if (list_buf == NULL){	// allocate memory at first
-		size_t alloc_size;
-
-		// first buffer size is multiple of 1024, larger than _MAX_FNAME * 4
-		alloc_size = _MAX_FNAME * 4;
-		if (alloc_size & 1023){
-			alloc_size = (alloc_size & ~1023) + 1024;
-		}
-		//printf("alloc_size = %d\n", alloc_size);
-
-		list_buf = malloc(alloc_size);
-		if (list_buf == NULL)
-			return 8;
-		list_len = 0;
-		list_max = alloc_size;
-	} else if (list_len + len >= list_max){	// increase memory area
-		char *tmp_p;
-		size_t alloc_size;
-
-		// additional size is multiple of 1024, larger than _MAX_FNAME * 2
-		alloc_size = _MAX_FNAME * 2;
-		if (alloc_size & 1023){
-			alloc_size = (alloc_size & ~1023) + 1024;
-		}
-		//printf("alloc_size = %d\n", alloc_size);
-
-		tmp_p = realloc(list_buf, list_max + alloc_size);
-		if (tmp_p == NULL)
-			return 8;
-		list_buf = tmp_p;
-		list_max += alloc_size;
-	}
-
-	memcpy(list_buf + list_len, str, len);
-	list_len += len;
-	list_buf[list_len] = 0;
-	list_len++;
-
-	*namez = list_buf;
-	*namez_len = list_len;
-	*namez_max = list_max;
-	return 0;
-}
-
-// return count of names
-int namez_count(char *namez, size_t namez_len)
-{
-	int num;
-	size_t off, len;
-
-	if (namez == NULL)
-		return 0;
-	if (namez[0] == 0)
-		return 0;
-
-	num = 0;
-	off = 0;
-	while (off < namez_len){
-		num++;
-		len = strlen(namez + off);
-		off += len + 1;
-	}
-
-	return num;
-}
-
-// remove an entry from names
-// return 0 for success, 1 for cannot find
-int namez_delete(char *namez, size_t *namez_len, char *entry)
-{
-	size_t list_len, len, off;
-
-	if (entry == NULL)
-		return 0;
-	if (entry[0] == 0)
-		return 0;
-
-	list_len = *namez_len;
-
-	if (namez == NULL)
-		return 1;
-
-	if ( (entry > namez) && ((size_t)(entry - namez) < list_len) ){
-		// entry is an item on the list
-		off = (size_t)(entry - namez);
-	} else {
-		// if entry is outside, search entry at first
-		off = 0;
-		while (off < list_len){
-			if (_stricmp(namez + off, entry) == 0){
-				break;
-			}
-			len = strlen(namez + off);
-			off += len + 1;
-		}
-		if (off >= list_len)
-			return 1;
-	}
-
-	len = strlen(entry) + 1;
-	memmove(namez + off, namez + off + len, list_len - off - len);
-	list_len -= len;
-
-	*namez_len = list_len;
-	return 0;
-}
-
-// search a match from names
-// return found position, or NULL for cannot find
-char * namez_search(char *namez, size_t namez_len, char *match)
-{
-	size_t len, off;
-
-	if (match == NULL)
-		return NULL;
-	if (match[0] == 0)
-		return NULL;
-	if (namez == NULL)
-		return NULL;
-
-	off = 0;
-	while (off < namez_len){
-		if (_stricmp(namez + off, match) == 0){
-			return namez + off;
-		}
-		len = strlen(namez + off);
-		off += len + 1;
-	}
-
-	return NULL;
-}
-
-// get a name by the index
-// return found position, or NULL for outside
-char * namez_get(char *namez, size_t namez_len, int index)
-{
-	size_t len, off;
-
-	if (index < 0)
-		return NULL;
-	if (namez == NULL)
-		return NULL;
-
-	off = 0;
-	while (off < namez_len){
-		if (index == 0)
-			return namez + off;
-		index--;
-		len = strlen(namez + off);
-		off += len + 1;
-	}
-
-	return NULL;
-}
-
-static int compare_string( const void *arg1, const void *arg2 )
-{
-	return strcmp( * ( char** ) arg1, * ( char** ) arg2 );
-}
-
-// sort names
-// return number of names for success, else -1 for error
-int namez_sort(char *namez, size_t namez_len)
-{
-	char *list_buf, **list_name;
-	int num, max;
-	size_t off, len;
-
-	if (namez == NULL)
-		return 0;
-	if (namez[0] == 0)
-		return 0;
-
-	// get count of names at first
-	max = 0;
-	off = 0;
-	while (off < namez_len){
-		max++;
-		len = strlen(namez + off);
-		off += len + 1;
-	}
-	if (max <= 1)
-		return max;	// when there is only one name, no need to sort.
-
-	// allocate memory for temporary area
-	list_buf = malloc(namez_len);
-	if (list_buf == NULL)
-		return -1;
-	memcpy(list_buf, namez, namez_len);
-
-	// allocate memory for offset
-	list_name = malloc(max * sizeof(char *));
-	if (list_name == NULL){
-		free(list_buf);
-		return -1;
-	}
-
-	// set offset of names
-	num = 0;
-	off = 0;
-	while (off < namez_len){
-		list_name[num] = list_buf + off;
-		//printf("list_name[%d] = %s\n", num, list_buf + off);
-		num++;
-		len = strlen(list_buf + off);
-		off += len + 1;
-	}
-
-	// quick sort
-	qsort( (void *)list_name, (size_t)max, sizeof(char *), compare_string );
-
-	// put back
-	off = 0;
-	for (num = 0; num < max; num++){
-		//printf("list_name[%d] = %s\n", num, list_name[num]);
-		len = strlen(list_name[num]);
-		memcpy(namez + off, list_name[num], len);
-		off += len;
-		namez[off] = 0;
-		off++;
-	}
-
-	free(list_buf);
-	free(list_name);
-	return max;
-}
-
-// return the maximum length of names
-size_t namez_maxlen(char *namez, size_t namez_len)
-{
-	size_t off, len, max_len;
-
-	if (namez == NULL)
-		return 0;
-	if (namez[0] == 0)
-		return 0;
-
-	max_len = 0;
-	off = 0;
-	while (off < namez_len){
-		len = strlen(namez + off);
-		if (max_len < len)
-			max_len = len;
-
-		off += len + 1;
-	}
-
-	return max_len;
-}
-
-
-// Combine 8 or 16 bytes to little endian 32-bit integer.
-unsigned int mem_or8(unsigned char buf[8])
-{
-	unsigned int *p4 = (unsigned int *)buf;
-
-	return (p4[0] | p4[1]);
-}
-unsigned int mem_or16(unsigned char buf[16])
-{
-	unsigned int *p4 = (unsigned int *)buf;
-
-	return (p4[0] | p4[1] | p4[2] | p4[3]);
-}
-
-
-// Popcount
-// https://en.wikipedia.org/wiki/Hamming_weight
-int popcount32(uint32_t x)
-{
-	x -= (x >> 1) & 0x55555555;						//put count of each 2 bits into those 2 bits
-	x = (x & 0x33333333) + ((x >> 2) & 0x33333333);	//put count of each 4 bits into those 4 bits 
-	x = (x + (x >> 4)) & 0x0f0f0f0f;				//put count of each 8 bits into those 8 bits 
-	x += x >>  8;									//put count of each 16 bits into their lowest 8 bits
-	x += x >> 16;									//put count of each 32 bits into their lowest 8 bits
-	return x & 0x7f;
-}
-
-// Return log2 of integer (round up)
-// log2(0) = error, log2(1) = 0, log2(2) = 1, log2(3) = 2, log2(4) = 2, log2(5) = 3, ...
-int roundup_log2(uint64_t x)
-{
-	int n = 0;
-
-	while (x > 1){
-		x = (x + 1) >> 1;	// This line same as "x = (x + 1) / 2;".
-		n++;
-	}
-
-	return n;
-}
-
-// Return next power of two at or above given value.
-uint64_t next_pow2(uint64_t x)
-{
-	uint64_t y = 1;	// 2 power 0
-
-	if (x == 0)
-		return 0;	// Special case
-	if (x >= 0x8000000000000000)
-		return 0x8000000000000000;	// Aboid over flow
-
-	while (y < x){
-		y = y << 1;
-	}
-
-	return y;
-}
-
diff --git a/windows/src/common.h b/windows/src/common.h
deleted file mode 100644
index 17957cc..0000000
--- a/windows/src/common.h
+++ /dev/null
@@ -1,41 +0,0 @@
-#ifndef __COMMON_H__
-#define __COMMON_H__
-
-#if __linux__
-
-#include <linux/limits.h>
-#define _MAX_PATH PATH_MAX
-
-// Windows system call 
-int64_t _filelengthi64(int fd);
-
-#elif _WIN32
-
-#endif
-
-
-
-char * offset_file_name(char *file_path);
-int sanitize_file_name(char *name);
-
-int get_absolute_path(char *absolute_path, char *relative_path, size_t max);
-size_t path_copy(char *dst, char *src, size_t max);
-
-size_t trim_text(uint8_t *text, size_t len);
-
-int namez_add(char **namez, size_t *namez_len, size_t *namez_max, const char *str);
-int namez_count(char *namez, size_t namez_len);
-int namez_delete(char *namez, size_t *namez_len, char *entry);
-char * namez_search(char *namez, size_t namez_len, char *match);
-char * namez_get(char *namez, size_t namez_len, int index);
-int namez_sort(char *namez, size_t namez_len);
-size_t namez_maxlen(char *namez, size_t namez_len);
-
-unsigned int mem_or8(unsigned char buf[8]);
-unsigned int mem_or16(unsigned char buf[16]);
-
-int popcount32(uint32_t x);
-int roundup_log2(uint64_t x);
-uint64_t next_pow2(uint64_t x);
-
-#endif // __COMMON_H__
diff --git a/windows/src/file.c b/windows/src/file.c
deleted file mode 100644
index f5cd4a3..0000000
--- a/windows/src/file.c
+++ /dev/null
@@ -1,604 +0,0 @@
-/* Redefinition of _FILE_OFFSET_BITS must happen BEFORE including stdio.h */
-#ifdef __linux__
-#define _FILE_OFFSET_BITS 64
-#define _stat64 stat
-#elif _WIN32
-// avoid error of MSVC
-#define _CRT_SECURE_NO_WARNINGS
-#endif
-
-#include <errno.h>
-#include <inttypes.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <time.h>
-
-#ifdef __linux__
-
-#warning "Assuming this Linux system uses 64-bit time."
-/* There doesn't seem to be a preprocessor test for 64-bit time_t! 
-   sizeof() is not available to the preprocessor.  */
-/* NOTE: ctime is not threadsafe.  It returns a pointer to a static buffer.
-   we should consider using ctime_r().  */
-#define __time64_t int64_t
-#define _ctime64 ctime
-
-#define _chmod chmod
-
-#include <sys/stat.h>
-#include <utime.h>
-
-// permission to write by owner
-#define _S_IWRITE S_IWUSR
-
-#define _utime utime
-#define _utimbuf utimbuf
-
-#elif _WIN32
-
-// MSVC headers
-#include <io.h>
-#include <sys/stat.h>
-#include <sys/utime.h>
-
-#endif
-
-#include "libpar3.h"
-#include "packet.h"
-
-
-/*
-Converting a time_t value to a FILETIME
-https://docs.microsoft.com/en-us/windows/win32/sysinfo/converting-a-time-t-value-to-a-file-time
-*/
-static uint64_t TimetToFileTime(uint64_t unix_time)
-{
-	uint64_t file_time;
-
-	file_time = (unix_time * 10000000LL) + 116444736000000000LL;
-
-	return file_time;
-}
-
-static uint64_t FileTimeToTimet(uint64_t file_time)
-{
-	uint64_t unix_time;
-
-	unix_time = (file_time - 116444736000000000LL) / 10000000LL;
-
-	return unix_time;
-}
-
-
-// File System Specific Packets (optional packets)
-/*
-https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/stat-functions?view=msvc-170
-st_gid and st_uid are not supported on Windows OS.
-st_atime may not be supported on Windows OS.
-st_ctime has different property on Windows OS.
-
-Bit of st_mode;
-_S_IFDIR  = 0x4000, Directory
-_S_IFREG  = 0x8000, Regular file
-_S_IREAD  = 0x0100, Read permission, owner
-_S_IWRITE = 0x0080, Write permission, owner
-_S_IEXEC  = 0x0040, Execute/search permission, owner
-
-At this time, it stores st_mtime and st_mode for compatibility.
-*/
-
-// UNIX Permissions Packet
-// 0 = write ok, 1 = failed (no checksum)
-// Return checksum of UNIX Permissions Packet in *checksum.
-int make_unix_permission_packet(PAR3_CTX *par3_ctx, char *file_name, uint8_t *checksum)
-{
-	uint8_t pkt_buf[84];	// This is the minimum size of this packet.
-	int ret;
-	size_t packet_size;
-	struct _stat64 stat_buf;
-
-	// Store infomation, only when scuucess.
-	if (_stat64(file_name, &stat_buf) != 0)
-		return 1;
-
-/*
-	printf("Status information of \"%s\"\n", file_name);
-	//printf("st_mtime = %016"PRIx64"\n", stat_buf.st_mtime);
-	printf("st_mtime = %s", _ctime64(&(stat_buf.st_mtime)));
-	printf("st_mode = 0x%04x\n\n", stat_buf.st_mode);
-*/
-
-	// It makes a packet on stack memory temporary.
-	packet_size = 48;
-	memset(pkt_buf + packet_size, 0xFF, 16);	// atime and ctime are not set.
-	packet_size += 16;
-	if (par3_ctx->file_system & 1){
-		memcpy(pkt_buf + packet_size, &(stat_buf.st_mtime), 8);	// mtime
-	} else {
-		memset(pkt_buf + packet_size, 0xFF, 8);	// Default value when mtime isn't set.
-	}
-	packet_size += 8;
-	memset(pkt_buf + packet_size, 0xFF, 8);	// owner UID and group GID are not set.
-	packet_size += 8;
-	if (par3_ctx->file_system & 2){
-		ret = stat_buf.st_mode & 0x0FFF;	// lower 12-bit of i_mode
-	} else {
-		ret = 0xFFFF;	// When this item isn't used, store an invalid value.
-	}
-	memcpy(pkt_buf + packet_size, &ret, 2);
-	packet_size += 2;
-	memset(pkt_buf + packet_size, 0, 2);	// length of string (no names)
-	packet_size += 2;
-	// Packet size = 48 + 36 = 84
-	make_packet_header(pkt_buf, packet_size, par3_ctx->set_id, "PAR UNX\0", 1);
-
-	// Check existing packets if this packet was made already.
-	ret = check_packet_exist(par3_ctx->file_system_packet, par3_ctx->file_system_packet_size, pkt_buf, packet_size);
-	//printf("ret = %d, size = %zu\n", ret, par3_ctx->file_system_packet_size);
-	if (ret == 0){
-		memcpy(par3_ctx->file_system_packet + par3_ctx->file_system_packet_size, pkt_buf, packet_size);
-		par3_ctx->file_system_packet_size += packet_size;
-		par3_ctx->file_system_packet_count += 1;
-	}
-
-	// Write checksum of this packet
-	memcpy(checksum, pkt_buf + 8, 16);
-
-	return 0;
-}
-
-
-
-// FAT Permissions Packet
-// 0 = write ok, 1 = failed (no checksum)
-// Return checksum of FAT Permissions Packet in *checksum.
-int make_fat_permission_packet(PAR3_CTX *par3_ctx, char *file_name, uint8_t *checksum)
-{
-	uint8_t pkt_buf[74];	// This is size of this packet.
-	int ret;
-	size_t packet_size;
-	uint64_t file_time;
-	struct _stat64 stat_buf;
-
-	// Store infomation, only when scuucess.
-	if (_stat64(file_name, &stat_buf) != 0)
-		return 1;
-
-/*
-	printf("Status information of \"%s\"\n", file_name);
-	printf("st_mtime = %s", _ctime64(&(stat_buf.st_mtime)));
-*/
-
-	// It makes a packet on stack memory temporary.
-	packet_size = 48;
-	memset(pkt_buf + packet_size, 0xFF, 16);	// CreationTimestamp and LastAccessTimestamp are not set.
-	packet_size += 16;
-	file_time = TimetToFileTime(stat_buf.st_mtime);	// Convert UNIX time to Windows FILETIME.
-	memcpy(pkt_buf + packet_size, &file_time, 8);	// LastWriteTimestamp
-	packet_size += 8;
-	memset(pkt_buf + packet_size, 0xFF, 2);	// FileAttributes isn't set.
-	packet_size += 2;
-	// Packet size = 48 + 26 = 74
-	make_packet_header(pkt_buf, packet_size, par3_ctx->set_id, "PAR FAT\0", 1);
-
-	// Check existing packets if this packet was made already.
-	ret = check_packet_exist(par3_ctx->file_system_packet, par3_ctx->file_system_packet_size, pkt_buf, packet_size);
-	//printf("ret = %d, size = %zu\n", ret, par3_ctx->file_system_packet_size);
-	if (ret == 0){
-		memcpy(par3_ctx->file_system_packet + par3_ctx->file_system_packet_size, pkt_buf, packet_size);
-		par3_ctx->file_system_packet_size += packet_size;
-		par3_ctx->file_system_packet_count += 1;
-	}
-
-	// Write checksum of this packet
-	memcpy(checksum, pkt_buf + 8, 16);
-
-	return 0;
-}
-
-
-// For showing file list
-static void show_file_system_info(PAR3_CTX *par3_ctx, uint8_t *checksum)
-{
-	uint8_t *packet_checksum, *packet_type, *buf;
-	size_t offset, total_size;
-	uint32_t item_value4;
-	uint64_t packet_size;
-	__time64_t item_value8;
-
-	buf = par3_ctx->file_system_packet;
-	total_size = par3_ctx->file_system_packet_size;
-
-	offset = 0;
-	while (offset + 48 < total_size){
-		packet_checksum = buf + offset + 8;
-		memcpy(&packet_size, buf + offset + 24, 8);
-		packet_type = buf + offset + 40;
-
-		if (memcmp(packet_checksum, checksum, 16) == 0){
-			//printf("packet_size = %"PRIu64"\n", packet_size);
-			if (memcmp(packet_type, "PAR UNX\0", 8) == 0){	// UNIX Permissions Packet
-				if (par3_ctx->file_system & 3){
-					printf("UNIX Permissions: ");
-					if (par3_ctx->file_system & 2){	// i_mode
-						item_value4 = 0;
-						memcpy(&item_value4, buf + offset + 48 + 32, 2);
-						if ((item_value4 & 0xF000) == 0){	// i_mode must be 12-bit value.
-							printf("i_mode = 0x%03x", item_value4);
-							if (par3_ctx->file_system & 1){
-								printf(" , ");
-							} else {
-								printf("\n");
-							}
-						}
-					}
-					if (par3_ctx->file_system & 1){	// mtime
-						memcpy(&item_value8, buf + offset + 48 + 16, 8);
-						if (item_value8 != 0xFFFFFFFFFFFFFFFF){
-							printf("mtime = %s", _ctime64(&item_value8));
-						}
-					}
-				}
-
-			} else if (memcmp(packet_type, "PAR FAT\0", 8) == 0){	// FAT Permissions Packet
-				if (par3_ctx->file_system & 0x10000){
-					printf("FAT Permissions: ");
-					memcpy(&item_value8, buf + offset + 48 + 16, 8);
-					if (item_value8 != 0xFFFFFFFFFFFFFFFF){
-						item_value8 = FileTimeToTimet(item_value8);
-						printf("LastWriteTime = %s", _ctime64(&item_value8));
-					}
-				}
-			}
-		}
-
-		offset += packet_size;
-	}
-}
-
-// packet_type: 1 = file, 2 = directory, 3 = root
-void read_file_system_option(PAR3_CTX *par3_ctx, int packet_type, int64_t offset)
-{
-	uint8_t *tmp_p;
-	int len;
-
-	if (packet_type == 1){	// File Packet
-		tmp_p = par3_ctx->file_packet;
-		if (offset + 48 + 2 + 24 + 1 > (int64_t)(par3_ctx->file_packet_size))
-			return;
-		tmp_p += offset + 48;
-		// Check name's length
-		len = 0;
-		memcpy(&len, tmp_p, 2);
-		//printf("file name length = %d\n", len);
-		if (offset + 48 + 2 + len + 24 + 1 > (int64_t)(par3_ctx->file_packet_size))
-			return;
-		tmp_p += 2 + len + 24;
-		// Check options
-		len = 0;
-		memcpy(&len, tmp_p, 1);
-		tmp_p += 1;
-
-	} else if (packet_type == 2){	// Directory Packet
-		tmp_p = par3_ctx->dir_packet;
-		if (offset + 48 + 2 + 4 > (int64_t)(par3_ctx->dir_packet_size))
-			return;
-		tmp_p += offset + 48;
-		// Check name's length
-		len = 0;
-		memcpy(&len, tmp_p, 2);
-		//printf("dir name length = %d\n", len);
-		if (offset + 48 + 2 + len + 4 > (int64_t)(par3_ctx->dir_packet_size))
-			return;
-		tmp_p += 2 + len;
-		// Check options
-		len = 0;
-		memcpy(&len, tmp_p, 4);
-		tmp_p += 4;
-
-	} else {
-		return;
-	}
-	//printf("number of options = %d\n", len);
-
-	while (len > 0){
-		show_file_system_info(par3_ctx, tmp_p);
-
-		// Goto next option
-		tmp_p += 16;
-		len--;
-	}
-}
-
-
-// For verification
-static int check_file_system_info(PAR3_CTX *par3_ctx, uint8_t *checksum, void *stat_p)
-{
-	uint8_t *packet_checksum, *packet_type, *buf;
-	int ret;
-	size_t offset, total_size;
-	uint32_t item_value4;
-	uint64_t packet_size;
-	__time64_t item_value8;
-	struct _stat64 *stat_buf;
-
-	stat_buf = stat_p;
-	//printf("i_mode = 0x%04x ", stat_buf->st_mode);
-	//printf("mtime = %s", _ctime64(&(stat_buf->st_mtime)));
-
-	buf = par3_ctx->file_system_packet;
-	total_size = par3_ctx->file_system_packet_size;
-
-	ret = 0;
-	offset = 0;
-	while (offset + 48 < total_size){
-		packet_checksum = buf + offset + 8;
-		memcpy(&packet_size, buf + offset + 24, 8);
-		packet_type = buf + offset + 40;
-
-		if (memcmp(packet_checksum, checksum, 16) == 0){
-			//printf("packet_size = %"PRIu64"\n", packet_size);
-			if (memcmp(packet_type, "PAR UNX\0", 8) == 0){	// UNIX Permissions Packet
-				if (par3_ctx->file_system & 3){
-					if (par3_ctx->file_system & 2){	// i_mode
-						item_value4 = 0;
-						memcpy(&item_value4, buf + offset + 48 + 32, 2);
-						if ((item_value4 & 0xF000) == 0){	// i_mode must be 12-bit value.
-							//printf("i_mode = 0x%04x\n", item_value4);
-							if (item_value4 != (stat_buf->st_mode & 0x0FFF)){
-								// Permission is different.
-								ret |= 0x20000;
-							}
-						}
-					}
-					if (par3_ctx->file_system & 1){	// mtime
-						memcpy(&item_value8, buf + offset + 48 + 16, 8);
-						if (item_value8 != 0xFFFFFFFFFFFFFFFF){
-							//printf("mtime = %s", _ctime64(&item_value8));
-							if (item_value8 != stat_buf->st_mtime){
-								// Timestamp is different.
-								ret |= 0x10000;
-							}
-						}
-					}
-				}
-
-			} else if (memcmp(packet_type, "PAR FAT\0", 8) == 0){	// FAT Permissions Packet
-				if (par3_ctx->file_system & 0x10000){	// LastWriteTimestamp
-					memcpy(&item_value8, buf + offset + 48 + 16, 8);
-					if (item_value8 != 0xFFFFFFFFFFFFFFFF){
-						item_value8 = FileTimeToTimet(item_value8);
-						//printf("LastWriteTime = %s", _ctime64(&item_value8));
-						if (item_value8 != stat_buf->st_mtime){
-							// Timestamp is different.
-							ret |= 0x10000;
-						}
-					}
-				}
-			}
-		}
-
-		offset += packet_size;
-	}
-
-	return ret;
-}
-
-
-// packet_type: 1 = file, 2 = directory, 3 = root
-int check_file_system_option(PAR3_CTX *par3_ctx, int packet_type, int64_t offset, void *stat_p)
-{
-	uint8_t *tmp_p;
-	int len, ret;
-
-	if (packet_type == 1){	// File Packet
-		tmp_p = par3_ctx->file_packet;
-		if (offset + 48 + 2 + 24 + 1 > (int64_t)(par3_ctx->file_packet_size))
-			return 0;
-		tmp_p += offset + 48;
-		// Check length of filename
-		len = 0;
-		memcpy(&len, tmp_p, 2);
-		//printf("filename length = %d\n", len);
-		if (offset + 48 + 2 + len + 24 + 1 > (int64_t)(par3_ctx->file_packet_size))
-			return 0;
-		tmp_p += 2 + len + 24;
-		// Check options
-		len = 0;
-		memcpy(&len, tmp_p, 1);
-		tmp_p += 1;
-
-	} else if (packet_type == 2){	// Directory Packet
-		tmp_p = par3_ctx->dir_packet;
-		if (offset + 48 + 2 + 4 > (int64_t)(par3_ctx->dir_packet_size))
-			return 0;
-		tmp_p += offset + 48;
-		// Check name's length
-		len = 0;
-		memcpy(&len, tmp_p, 2);
-		//printf("dir name length = %d\n", len);
-		if (offset + 48 + 2 + len + 4 > (int64_t)(par3_ctx->dir_packet_size))
-			return 0;
-		tmp_p += 2 + len;
-		// Check options
-		len = 0;
-		memcpy(&len, tmp_p, 4);
-		tmp_p += 4;
-
-	} else {
-		return 0;
-	}
-	//printf("number of options = %d\n", len);
-
-	ret = 0;
-	while (len > 0){
-		ret |= check_file_system_info(par3_ctx, tmp_p, stat_p);
-
-		// Goto next option
-		tmp_p += 16;
-		len--;
-	}
-
-	return ret;
-}
-
-
-
-// For repair
-static int reset_file_system_info(PAR3_CTX *par3_ctx, uint8_t *checksum, char *file_name)
-{
-	uint8_t *packet_checksum, *packet_type, *buf;
-	int ret;
-	size_t offset, total_size;
-	uint32_t item_value4;
-	uint64_t packet_size;
-	__time64_t item_value8;
-	struct _stat64 stat_buf;
-
-	buf = par3_ctx->file_system_packet;
-	total_size = par3_ctx->file_system_packet_size;
-
-	ret = 0;
-	offset = 0;
-	while (offset + 48 < total_size){
-		packet_checksum = buf + offset + 8;
-		memcpy(&packet_size, buf + offset + 24, 8);
-		packet_type = buf + offset + 40;
-
-		if (memcmp(packet_checksum, checksum, 16) == 0){
-			//printf("packet_size = %"PRIu64"\n", packet_size);
-			if (memcmp(packet_type, "PAR UNX\0", 8) == 0){	// UNIX Permissions Packet
-				// Recover infomation, only when scuucess.
-				if (_stat64(file_name, &stat_buf) == 0){
-					//printf("i_mode = 0x%04x ", stat_buf.st_mode);
-					//printf("mtime = %s", _ctime64(&(stat_buf.st_mtime)));
-
-					if (par3_ctx->file_system & 3){
-						if (par3_ctx->file_system & 1){	// mtime
-							memcpy(&item_value8, buf + offset + 48 + 16, 8);
-							if (item_value8 != 0xFFFFFFFFFFFFFFFF){
-								//printf("mtime = %s", _ctime64(&item_value8));
-								if (item_value8 != stat_buf.st_mtime){	// Timestamp is different.
-									struct _utimbuf ut;
-
-									// When there is no write permission, set temporary.
-									if ((stat_buf.st_mode & _S_IWRITE) == 0){
-										if (_chmod(file_name, stat_buf.st_mode | _S_IWRITE) == 0){
-											stat_buf.st_mode |= _S_IWRITE;
-										}
-									}
-
-									// After get write permission, change timestamp.
-									ut.actime = stat_buf.st_atime;	// Reuse current atime
-									ut.modtime = item_value8;		// Recover to stored mtime
-									if (_utime(file_name, &ut) != 0)
-										ret |= 0x10000;	// Failed to reset timestamp
-									// Caution ! this cannot modify directory on Windows OS.
-								}
-							}
-						}
-						if (par3_ctx->file_system & 2){	// i_mode
-							item_value4 = 0;
-							memcpy(&item_value4, buf + offset + 48 + 32, 2);
-							if ((item_value4 & 0xF000) == 0){	// i_mode must be 12-bit value.
-								if (item_value4 != (stat_buf.st_mode & 0x0FFF)){	// Permission is different.
-									//printf("i_mode = 0x%04x, 0x%04x\n", item_value4, stat_buf.st_mode & 0x0FFF);
-									if (_chmod(file_name, item_value4) != 0)
-										ret |= 0x20000;	// Failed to reset permissions
-								}
-							}
-						}
-					}
-				}
-
-			} else if (memcmp(packet_type, "PAR FAT\0", 8) == 0){	// FAT Permissions Packet
-				// Recover infomation, only when scuucess.
-				if (_stat64(file_name, &stat_buf) == 0){
-					//printf("mtime = %s", _ctime64(&(stat_buf.st_mtime)));
-
-					if (par3_ctx->file_system & 0x10000){	// LastWriteTimestamp
-						memcpy(&item_value8, buf + offset + 48 + 16, 8);
-						if (item_value8 != 0xFFFFFFFFFFFFFFFF){
-							item_value8 = FileTimeToTimet(item_value8);
-							//printf("LastWriteTime = %s", _ctime64(&item_value8));
-							if (item_value8 != stat_buf.st_mtime){	// Timestamp is different.
-								struct _utimbuf ut;
-
-								ut.actime = stat_buf.st_atime;	// Reuse current atime
-								ut.modtime = item_value8;		// Recover to stored mtime
-								if (_utime(file_name, &ut) != 0)
-									ret |= 0x10000;	// Failed to reset timestamp
-								// Caution ! UNIX time is low resolution than Windows FILETIME.
-							}
-						}
-					}
-				}
-			}
-		}
-
-		offset += packet_size;
-	}
-
-	return ret;
-}
-
-
-// packet_type: 1 = file, 2 = directory, 3 = root
-int test_file_system_option(PAR3_CTX *par3_ctx, int packet_type, int64_t offset, char *file_name)
-{
-	uint8_t *tmp_p;
-	int len, ret;
-
-	if (packet_type == 1){	// File Packet
-
-		tmp_p = par3_ctx->file_packet;
-		if (offset + 48 + 2 + 24 + 1 > (int64_t)(par3_ctx->file_packet_size))
-			return 0;
-		tmp_p += offset + 48;
-		// Check length of filename
-		len = 0;
-		memcpy(&len, tmp_p, 2);
-		//printf("filename length = %d\n", len);
-		if (offset + 48 + 2 + len + 24 + 1 > (int64_t)(par3_ctx->file_packet_size))
-			return 0;
-		tmp_p += 2 + len + 24;
-		// Check options
-		len = 0;
-		memcpy(&len, tmp_p, 1);
-		tmp_p += 1;
-
-	} else if (packet_type == 2){	// Directory Packet
-		tmp_p = par3_ctx->dir_packet;
-		if (offset + 48 + 2 + 4 > (int64_t)(par3_ctx->dir_packet_size))
-			return 0;
-		tmp_p += offset + 48;
-		// Check name's length
-		len = 0;
-		memcpy(&len, tmp_p, 2);
-		//printf("dir name length = %d\n", len);
-		if (offset + 48 + 2 + len + 4 > (int64_t)(par3_ctx->dir_packet_size))
-			return 0;
-		tmp_p += 2 + len;
-		// Check options
-		len = 0;
-		memcpy(&len, tmp_p, 4);
-		tmp_p += 4;
-
-	} else {
-		return 0;
-	}
-	//printf("number of options = %d\n", len);
-
-	ret = 0;
-	while (len > 0){
-		ret |= reset_file_system_info(par3_ctx, tmp_p, file_name);
-
-		// Goto next option
-		tmp_p += 16;
-		len--;
-	}
-
-	return ret;
-}
-
diff --git a/windows/src/file.h b/windows/src/file.h
deleted file mode 100644
index 3083bbc..0000000
--- a/windows/src/file.h
+++ /dev/null
@@ -1,18 +0,0 @@
-
-// File System Specific Packets
-
-// UNIX Permissions Packet
-int make_unix_permission_packet(PAR3_CTX *par3_ctx, char *file_name, uint8_t *checksum);
-
-// FAT Permissions Packet
-int make_fat_permission_packet(PAR3_CTX *par3_ctx, char *file_name, uint8_t *checksum);
-
-// For showing file list
-void read_file_system_option(PAR3_CTX *par3_ctx, int packet_type, int64_t offset);
-
-// For verification
-int check_file_system_option(PAR3_CTX *par3_ctx, int packet_type, int64_t offset, void *stat_p);
-
-// For repair
-int test_file_system_option(PAR3_CTX *par3_ctx, int packet_type, int64_t offset, char *file_name);
-
diff --git a/windows/src/galois.h b/windows/src/galois.h
deleted file mode 100644
index c1ad84e..0000000
--- a/windows/src/galois.h
+++ /dev/null
@@ -1,36 +0,0 @@
-
-// For 8-bit Galois Field
-uint8_t * gf8_create_table(int prim_poly);
-
-int gf8_multiply(uint8_t *galois_log_table, int x, int y);
-int gf8_divide(uint8_t *galois_log_table, int x, int y);
-int gf8_reciprocal(uint8_t *galois_log_table, int y);
-
-void gf8_region_multiply(uint8_t *galois_log_table,
-						uint8_t *region,	/* Region to multiply */
-						int multby,			/* Number to multiply by */
-						size_t nbytes,		/* Number of bytes in region */
-						uint8_t *r2,		/* If r2 != NULL, products go here */
-						int add);
-
-void gf8_region_create_parity(int prim_poly, uint8_t *buf, size_t region_size);
-int gf8_region_check_parity(int galois_poly, uint8_t *buf, size_t region_size);
-
-
-// For 16-bit Galois Field
-uint16_t * gf16_create_table(int prim_poly);
-
-int gf16_multiply(uint16_t *galois_log_table, int x, int y);
-int gf16_divide(uint16_t *galois_log_table, int x, int y);
-int gf16_reciprocal(uint16_t *galois_log_table, int y);
-
-void gf16_region_multiply(uint16_t *galois_log_table,
-						uint8_t *region,	/* Region to multiply */
-						int multby,			/* Number to multiply by */
-						size_t nbytes,		/* Number of bytes in region */
-						uint8_t *r2,		/* If r2 != NULL, products go here */
-						int add);
-
-void gf16_region_create_parity(int prim_poly, uint8_t *buf, size_t region_size);
-int gf16_region_check_parity(int galois_poly, uint8_t *buf, size_t region_size);
-
diff --git a/windows/src/galois16.c b/windows/src/galois16.c
deleted file mode 100644
index 47ab36e..0000000
--- a/windows/src/galois16.c
+++ /dev/null
@@ -1,350 +0,0 @@
-// This is based on source code of Jerasure (v1.2), and modified for 16-bit Galois Field.
-
-/* Galois.c
- * James S. Plank
- * April, 2007
-
-Galois.tar - Fast Galois Field Arithmetic Library in C/C++
-Copright (C) 2007 James S. Plank
-
-This library is free software; you can redistribute it and/or
-modify it under the terms of the GNU Lesser General Public
-License as published by the Free Software Foundation; either
-version 2.1 of the License, or (at your option) any later version.
-
-This library is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-Lesser General Public License for more details.
-
-You should have received a copy of the GNU Lesser General Public
-License along with this library; if not, write to the Free Software
-Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
-
-James S. Plank
-Department of Electrical Engineering and Computer Science
-University of Tennessee
-Knoxville, TN 37996
-plank@cs.utk.edu
-
- */
-
-#include <errno.h>
-#include <inttypes.h>
-#include <stdio.h>
-#include <stdlib.h>
-
-
-// Create tables for 16-bit Galois Field
-// Return main pointer of tables.
-uint16_t * gf16_create_table(int prim_poly)
-{
-	int j, b;
-	uint16_t *galois_log_table, *galois_ilog_table;
-
-	// Allocate tables on memory
-	// To fit CPU cache memory, table uses 16-bit integer.
-	galois_log_table = malloc(sizeof(uint16_t) * 65536 * 2);
-	if (galois_log_table == NULL)
-		return NULL;
-	galois_ilog_table = galois_log_table + 65536;
-
-	// galois_log_table[0] is invalid, because power of 2 never becomes 0.
-	galois_log_table[0] = prim_poly;	// Instead of invalid value, set generator polynomial.
-	galois_ilog_table[65535] = 1;	// 2 power 0 is 1. 2 power 65535 is 1.
-
-	b = 1;
-	for (j = 0; j < 65535; j++) {
-		galois_log_table[b] = j;
-		galois_ilog_table[j] = b;
-		b = b << 1;
-		if (b & 65536)
-			b = (b ^ prim_poly) & 65535;
-	}
-
-	return galois_log_table;
-}
-
-
-// Return (x * y)
-int gf16_multiply(uint16_t *galois_log_table, int x, int y)
-{
-	int sum_j;
-	uint16_t *galois_ilog_table;
-
-	if (x == 0 || y == 0)
-		return 0;
-	galois_ilog_table = galois_log_table + 65536;
-
-	sum_j = galois_log_table[x] + galois_log_table[y];
-	if (sum_j >= 65535)
-		sum_j -= 65535;
-
-	return galois_ilog_table[sum_j];
-}
-
-// Return (x / y)
-int gf16_divide(uint16_t *galois_log_table, int x, int y)
-{
-	int sum_j;
-	uint16_t *galois_ilog_table;
-
-	if (y == 0)
-		return -1;	// Error: division by zero
-	if (x == 0)
-		return 0;
-	galois_ilog_table = galois_log_table + 65536;
-
-	sum_j = galois_log_table[x] - galois_log_table[y];
-	if (sum_j < 0)
-		sum_j += 65535;
-
-	return galois_ilog_table[sum_j];
-}
-
-// Return (1 / y)
-int gf16_reciprocal(uint16_t *galois_log_table, int y)
-{
-	uint16_t *galois_ilog_table;
-
-	if (y == 0)
-		return -1;	// Error: division by zero
-	galois_ilog_table = galois_log_table + 65536;
-
-	return galois_ilog_table[ 65535 - galois_log_table[y] ];
-}
-
-
-// This is based on GF-Complete, Revision 1.03.
-// gf_w16_split_8_16_lazy_multiply_region
-
-/*
-
-Copyright (c) 2013, James S. Plank, Ethan L. Miller, Kevin M. Greenan,
-Benjamin A. Arnold, John A. Burnum, Adam W. Disney, Allen C. McBride
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions
-are met:
-
- - Redistributions of source code must retain the above copyright
-   notice, this list of conditions and the following disclaimer.
-
- - Redistributions in binary form must reproduce the above copyright
-   notice, this list of conditions and the following disclaimer in
-   the documentation and/or other materials provided with the
-   distribution.
-
- - Neither the name of the University of Tennessee nor the names of its
-   contributors may be used to endorse or promote products derived
-   from this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
-INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
-BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
-OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
-AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
-WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-
-*/
-void gf16_region_multiply(uint16_t *galois_log_table,
-						uint8_t *region,	/* Region to multiply */
-						int multby,			/* Number to multiply by */
-						size_t nbytes,		/* Number of bytes in region */
-						uint8_t *r2,		/* If r2 != NULL, products go here */
-						int add)
-{
-	uint16_t *ur1, *ur2;
-	int prod, v;
-	size_t i;
-
-	ur1 = (uint16_t *) region;
-	ur2 = (r2 == NULL) ? ur1 : (uint16_t *) r2;
-	nbytes /= 2;	// Convert unit from byte to count.
-
-	if (multby == 0) {
-		if (add == 0){
-			for (i = 0; i < nbytes; i++) {
-				ur2[i] = 0;
-			}
-		}
-
-	} else if (multby == 1) {
-		if (add == 0){
-			if (r2 != NULL){
-				for (i = 0; i < nbytes; i++) {
-					ur2[i] = ur1[i];
-				}
-			}
-		} else {
-			if (r2 != NULL){
-				for (i = 0; i < nbytes; i++) {
-					ur2[i] ^= ur1[i];
-				}
-			} else {
-				for (i = 0; i < nbytes; i++) {
-					ur2[i] = 0;
-				}
-			}
-		}
-
-	// Use 8-bit split tables, only when nbytes is enough long.
-	} else if (nbytes >= 1000){
-		int j, k, prim_poly;
-		uint16_t htable[256], ltable[256];
-
-		// This table setup requires a bit time.
-		prim_poly = galois_log_table[0] | 0x10000;
-		v = multby;
-		ltable[0] = 0;
-		for (j = 1; j < 256; j <<= 1) {
-			for (k = 0; k < j; k++)
-				ltable[k^j] = (v ^ ltable[k]);
-
-			// v = v * 2
-			v = (v & (1 << 15)) ? ((v << 1) ^ prim_poly) : (v << 1);
-		}
-		htable[0] = 0;
-		for (j = 1; j < 256; j <<= 1) {
-			for (k = 0; k < j; k++)
-				htable[k^j] = (v ^ htable[k]);
-
-			// v = v * 2
-			v = (v & (1 << 15)) ? ((v << 1) ^ prim_poly) : (v << 1);
-		}
-
-		if ( (r2 == NULL) || (add == 0) ) {
-			for (i = 0; i < nbytes; i++) {
-				v = ur1[i];
-				if (v == 0) {
-					ur2[i] = 0;
-				} else {
-				    prod = htable[v >> 8];
-				    prod ^= ltable[v & 0xFF];
-					ur2[i] = prod;
-				}
-			}
-		} else {
-			for (i = 0; i < nbytes; i++) {
-				v = ur1[i];
-				if (v != 0) {
-				    prod = htable[v >> 8];
-				    prod ^= ltable[v & 0xFF];
-					ur2[i] ^= prod;
-				}
-			}
-		}
-
-	// Use Log & iLog tables
-	} else {
-		uint16_t *galois_ilog_table;
-
-		galois_ilog_table = galois_log_table + 65536;
-		v = galois_log_table[multby];
-
-		if ( (r2 == NULL) || (add == 0) ) {
-			for (i = 0; i < nbytes; i++) {
-				if (ur1[i] == 0) {
-					ur2[i] = 0;
-				} else {
-					prod = galois_log_table[ur1[i]] + v;
-					if (prod >= 65535)
-						prod -= 65535;
-					ur2[i] = galois_ilog_table[prod];
-				}
-			}
-		} else {
-			for (i = 0; i < nbytes; i++) {
-				if (ur1[i] != 0) {
-					prod = galois_log_table[ur1[i]] + v;
-					if (prod >= 65535)
-						prod -= 65535;
-					ur2[i] ^= galois_ilog_table[prod];
-				}
-			}
-		}
-	}
-}
-
-
-// Create parity bytes in the region
-void gf16_region_create_parity(int prim_poly, uint8_t *buf, size_t region_size)
-{
-	uint32_t sum, temp, mask;
-
-	prim_poly &= 0xFFFF;	// reduce to 16-bit value
-
-	// XOR all block data to 4 bytes
-	sum = 0;
-	while (region_size > 4){
-		temp = *((uint32_t *)buf);
-
-		// store highest bits of each 16-bit integer
-		mask = (sum & 0x80008000) >> 15;	// 0x00010001 or 0x00000000
-
-		// When SIMD is used, multiple of 2 is faster.
-		// previous value multiply by 2
-		//sum = (sum & 0x7FFF7FFF) << 1;
-
-		// If multiple of 3 is good, it's possible by XOR to the original value.
-		// previous value multiply by 3
-		sum ^= (sum & 0x7FFF7FFF) << 1;
-
-		// prim_poly may be 0x100B
-		sum ^= mask * prim_poly;	// 0x100B100B or 0x00000000
-
-	 	// add new 4 bytes
-		sum ^= temp;
-
-		region_size -= 4;
-		buf += 4;
-	}
-
-	((uint32_t *)buf)[0] = sum;
-}
-
-// Check parity bytes in the region
-int gf16_region_check_parity(int galois_poly, uint8_t *buf, size_t region_size)
-{
-	uint32_t sum, temp, mask;
-
-	galois_poly &= 0xFFFF;	// reduce to 16-bit value
-
-	// XOR all block data to 4 bytes
-	sum = 0;
-	while (region_size > 4){
-		temp = *((uint32_t *)buf);
-
-		// store highest bits of each 8-bit integer
-		mask = (sum & 0x80008000) >> 15;	// 0x00010001 or 0x00000000
-
-		// previous value multiply by 2
-		//sum = (sum & 0x7FFF7FFF) << 1;
-
-		// previous value multiply by 3
-		sum ^= (sum & 0x7FFF7FFF) << 1;
-
-		// galois_poly may be 0x100B
-		sum ^= mask * galois_poly;	// 0x100B100B or 0x00000000
-
-	 	// add new 4 bytes
-		sum ^= temp;
-
-		region_size -= 4;
-		buf += 4;
-	}
-
-	// Parity is 4 bytes.
-	if (((uint32_t *)buf)[0] != sum)
-		return 1;
-
-	return 0;
-}
-
diff --git a/windows/src/galois8.c b/windows/src/galois8.c
deleted file mode 100644
index 8e640c2..0000000
--- a/windows/src/galois8.c
+++ /dev/null
@@ -1,292 +0,0 @@
-// This is based on source code of Jerasure (v1.2), and modified for 8-bit Galois Field.
-
-/* Galois.c
- * James S. Plank
- * April, 2007
-
-Galois.tar - Fast Galois Field Arithmetic Library in C/C++
-Copright (C) 2007 James S. Plank
-
-This library is free software; you can redistribute it and/or
-modify it under the terms of the GNU Lesser General Public
-License as published by the Free Software Foundation; either
-version 2.1 of the License, or (at your option) any later version.
-
-This library is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-Lesser General Public License for more details.
-
-You should have received a copy of the GNU Lesser General Public
-License along with this library; if not, write to the Free Software
-Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
-
-James S. Plank
-Department of Electrical Engineering and Computer Science
-University of Tennessee
-Knoxville, TN 37996
-plank@cs.utk.edu
-
- */
-
-#include <errno.h>
-#include <inttypes.h>
-#include <stdio.h>
-#include <stdlib.h>
-
-
-// Create tables for 8-bit Galois Field
-// Return main pointer of tables.
-uint8_t * gf8_create_table(int prim_poly)
-{
-	int j, b;
-	int x, y, logx, sum_j;
-	uint8_t *galois_log_table, *galois_ilog_table, *galois_mult_table;
-
-	// Allocate tables on memory
-	// To fit CPU cache memory, table uses 8-bit integer.
-	galois_log_table = malloc(sizeof(uint8_t) * 256 * (1 + 1 + 256));
-	if (galois_log_table == NULL)
-		return NULL;
-	galois_ilog_table = galois_log_table + 256;
-	galois_mult_table = galois_log_table + 256 * 2;
-
-	// galois_log_table[0] is invalid, because power of 2 never becomes 0.
-	galois_log_table[0] = prim_poly;	// Instead of invalid value, set generator polynomial.
-	galois_ilog_table[255] = 1;	// 2 power 0 is 1. 2 power 255 is 1.
-
-	b = 1;
-	for (j = 0; j < 255; j++) {
-		galois_log_table[b] = j;
-		galois_ilog_table[j] = b;
-		b = b << 1;
-		if (b & 256)
-			b = (b ^ prim_poly) & 255;
-	}
-
-	// Set multiply tables for x = 0
-	j = 0;
-	galois_mult_table[j] = 0;	// y = 0
-	j++;
-	for (y = 1; y < 256; y++){	// y > 0
-		galois_mult_table[j] = 0;
-		j++;
-	}
-
-	for (x = 1; x < 256; x++){	// x > 0
-		galois_mult_table[j] = 0;	// y = 0
-		j++;
-		logx = galois_log_table[x];
-		for (y = 1; y < 256; y++){	// y > 0
-			sum_j = logx + galois_log_table[y];
-			if (sum_j >= 255)
-				sum_j -= 255;
-			galois_mult_table[j] = galois_ilog_table[sum_j];
-			j++;
-		}
-	}
-
-	return galois_log_table;
-}
-
-
-// Return (x * y)
-/*
-// Normal slow version
-int gf8_multiply(uint8_t *galois_log_table, int x, int y)
-{
-	int sum_j;
-	int *galois_ilog_table;
-
-	if (x == 0 || y == 0)
-		return 0;
-	galois_ilog_table = galois_log_table + 256;
-
-	sum_j = galois_log_table[x] + galois_log_table[y];
-	if (sum_j >= 255)
-		sum_j -= 255;
-
-	return galois_ilog_table[sum_j];
-}
-*/
-
-// Using galois_mult_table
-int gf8_multiply(uint8_t *galois_log_table, int x, int y)
-{
-	uint8_t *galois_mult_table;
-
-	galois_mult_table = galois_log_table + 256 * 2;
-
-	return galois_mult_table[(x << 8) | y];
-}
-
-// Return (x / y)
-int gf8_divide(uint8_t *galois_log_table, int x, int y)
-{
-	int sum_j;
-	uint8_t *galois_ilog_table;
-
-	if (y == 0)
-		return -1;	// Error: division by zero
-	if (x == 0)
-		return 0;
-	galois_ilog_table = galois_log_table + 256;
-
-	sum_j = galois_log_table[x] - galois_log_table[y];
-	if (sum_j < 0)
-		sum_j += 255;
-
-	return galois_ilog_table[sum_j];
-}
-
-// Return (1 / y)
-int gf8_reciprocal(uint8_t *galois_log_table, int y)
-{
-	uint8_t *galois_ilog_table;
-
-	if (y == 0)
-		return -1;	// Error: division by zero
-	galois_ilog_table = galois_log_table + 256;
-
-	return galois_ilog_table[ 255 - galois_log_table[y] ];
-}
-
-
-// Simplify and support size_t for 64-bit build
-void gf8_region_multiply(uint8_t *galois_log_table,
-						uint8_t *region,	/* Region to multiply */
-						int multby,			/* Number to multiply by */
-						size_t nbytes,		/* Number of bytes in region */
-						uint8_t *r2,		/* If r2 != NULL, products go here */
-						int add)
-{
-	size_t i;
-
-	if (multby == 0) {
-		if (add == 0){
-			if (r2 == NULL)
-				r2 = region;
-
-			for (i = 0; i < nbytes; i++) {
-				r2[i] = 0;
-			}
-		}
-
-	} else if (multby == 1) {
-		if (add == 0){
-			if (r2 != NULL){
-				for (i = 0; i < nbytes; i++) {
-					r2[i] = region[i];
-				}
-			}
-		} else {
-			if (r2 != NULL){
-				for (i = 0; i < nbytes; i++) {
-					r2[i] ^= region[i];
-				}
-			} else {
-				for (i = 0; i < nbytes; i++) {
-					region[i] = 0;
-				}
-			}
-		}
-
-	} else {
-		uint8_t prod;
-		uint8_t *galois_mult_table;
-
-		galois_mult_table = galois_log_table + 256 * 2;
-		galois_mult_table += multby * 256;	// Shift mult_table offset by multby
-
-		if ( (r2 == NULL) || (add == 0) ) {
-			if (r2 == NULL)
-				r2 = region;
-
-			for (i = 0; i < nbytes; i++) {
-				prod = galois_mult_table[ region[i] ];
-				r2[i] = prod;
-			}
-		} else {
-			for (i = 0; i < nbytes; i++) {
-				prod = galois_mult_table[ region[i] ];
-				r2[i] ^= prod;
-			}
-		}
-	}
-}
-
-
-// Create parity bytes in the region
-void gf8_region_create_parity(int prim_poly, uint8_t *buf, size_t region_size)
-{
-	uint32_t sum, temp, mask;
-
-	prim_poly &= 0xFF;	// reduce to 8-bit value
-
-	// XOR all block data to 4 bytes
-	sum = 0;
-	while (region_size > 4){
-		temp = *((uint32_t *)buf);
-
-		// store highest bits of each 8-bit integer
-		mask = (sum & 0x80808080) >> 7;	// 0x01010101 or 0x00000000
-
-		// When SIMD is used, multiple of 2 is faster.
-		// previous value multiply by 2
-		//sum = (sum & 0x7F7F7F7F) << 1;
-
-		// If multiple of 3 is good, it's possible by XOR to the original value.
-		// previous value multiply by 3
-		sum ^= (sum & 0x7F7F7F7F) << 1;
-
-		// prim_poly may be 0x1D
-		sum ^= mask * prim_poly;	// 0x1D1D1D1D or 0x00000000
-
-	 	// add new 4 bytes
-		sum ^= temp;
-
-		region_size -= 4;
-		buf += 4;
-	}
-
-	((uint32_t *)buf)[0] = sum;
-}
-
-// Check parity bytes in the region
-int gf8_region_check_parity(int galois_poly, uint8_t *buf, size_t region_size)
-{
-	uint32_t sum, temp, mask;
-
-	galois_poly &= 0xFF;	// reduce to 8-bit value
-
-	// XOR all block data to 4 bytes
-	sum = 0;
-	while (region_size > 4){
-		temp = *((uint32_t *)buf);
-
-		// store highest bits of each 8-bit integer
-		mask = (sum & 0x80808080) >> 7;	// 0x01010101 or 0x00000000
-
-		// previous value multiply by 2
-		//sum = (sum & 0x7F7F7F7F) << 1;
-
-		// previous value multiply by 3
-		sum ^= (sum & 0x7F7F7F7F) << 1;
-
-		// galois_poly may be 0x1D
-		sum ^= mask * galois_poly;	// 0x1D1D1D1D or 0x00000000
-
-	 	// add new 4 bytes
-		sum ^= temp;
-
-		region_size -= 4;
-		buf += 4;
-	}
-
-	// Parity is 4 bytes.
-	if (((uint32_t *)buf)[0] != sum)
-		return 1;
-
-	return 0;
-}
-
diff --git a/windows/src/hash.c b/windows/src/hash.c
deleted file mode 100644
index 1d349b9..0000000
--- a/windows/src/hash.c
+++ /dev/null
@@ -1,624 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-// MSVC headers
-#include <search.h>
-
-#include "blake3/blake3.h"
-#include "libpar3.h"
-#include "hash.h"
-
-
-/*
-CRC-64-ISO
-https://reveng.sourceforge.io/crc-catalogue/17plus.htm
-
-The Go Authors (26 January 2017), The Go Programming Language, module src/hash/crc64/crc64_test.go
-https://go.dev/src/hash/crc64/crc64_test.go
-
-#define CRC64_POLY	0xD800000000000000	// CRC-64-ISO (little endian)
-*/
-
-// Basic function, which calculates each byte.
-/*
-uint64_t crc64(const uint8_t *buf, size_t size, uint64_t crc)
-{
-	uint64_t A;
-
-	crc = ~crc;
-	for (size_t i = 0; i < size; ++i){
-		A = crc ^ buf[i];
-		A = A << 56;
-		crc = (crc >> 8) ^ A ^ (A >> 1) ^ (A >> 3) ^ (A >> 4);
-	}
-	return ~crc;
-}
-*/
-
-/*
-Fast CRCs are from;
-[1] Gam D. Nguyen, Fast CRCs, IEEE Transactions on Computers, vol. 58, no.
-10, pp. 1321-1331, Oct. 2009.
-*/
-
-// Fast CRC function, which calculates 4 bytes per loop.
-uint64_t crc64(const uint8_t *buf, size_t size, uint64_t crc)
-{
-	uint64_t A;
-
-	crc = ~crc;	// bit flipping at first
-
-	// calculate each byte until 4-bytes alignment
-	while ((size > 0) && (((size_t)buf) & 3)){
-		A = crc ^ (*buf++);
-		A = A << 56;
-		crc = (crc >> 8) ^ A ^ (A >> 1) ^ (A >> 3) ^ (A >> 4);
-		size--;
-	}
-
-	// calculate 4-bytes per loop
-	while (size >= 4){
-		A = crc ^ (*((uint32_t *)buf));
-		A = A << 32;
-
-		// Below is same as this line;
-		// crc = (crc >> 32) ^ A ^ (A >> 1) ^ (A >> 3) ^ (A >> 4);
-		A = A ^ (A >> 1);
-		crc = (crc >> 32) ^ A ^ (A >> 3);
-
-		size -= 4;
-		buf += 4;
-	}
-
-	// calculate remaining bytes
-	while (size > 0){
-		A = crc ^ (*buf++);
-		A = A << 56;
-		crc = (crc >> 8) ^ A ^ (A >> 1) ^ (A >> 3) ^ (A >> 4);
-		size--;
-	}
-
-	return ~crc;	// bit flipping again
-}
-
-// This updates CRC-64 of zeros without bit flipping.
-static uint64_t crc64_update_zero(size_t size, uint64_t crc)
-{
-	uint64_t A;
-
-	// calculate 4-bytes per loop
-	while (size >= 4){
-		A = crc << 32;
-
-		// Below is same as this line;
-		// crc = (crc >> 32) ^ A ^ (A >> 1) ^ (A >> 3) ^ (A >> 4);
-		A = A ^ (A >> 1);
-		crc = (crc >> 32) ^ A ^ (A >> 3);
-
-		size -= 4;
-	}
-
-	while (size > 0){
-		A = crc << 56;
-		crc = (crc >> 8) ^ A ^ (A >> 1) ^ (A >> 3) ^ (A >> 4);
-		size--;
-	}
-
-	return crc;
-}
-
-// Updates CRC-64 with zeros
-uint64_t crc64_zero(size_t size, uint64_t crc)
-{
-	crc = ~crc;	// bit flipping at first
-
-	crc = crc64_update_zero(size, crc);
-
-	return ~crc;	// bit flipping again
-}
-
-// This return window_mask.
-static uint64_t init_slide_window(uint64_t window_size, uint64_t window_table[256])
-{
-	int i;
-	uint64_t rr, window_mask;
-
-	window_table[0] = 0; // This is always 0.
-	for (i = 1; i < 256; i++){
-		// calculate instant table of CRC-64-ISO
-		rr = i;
-		rr = rr << 56;
-		rr = rr ^ (rr >> 1) ^ (rr >> 3) ^ (rr >> 4);
-		window_table[i] = crc64_update_zero(window_size, rr);
-	}
-
-	window_mask = crc64_update_zero(window_size, ~0) ^ (~0);
-	//printf("window_mask = 0x%016I64X, 0x%016I64X\n", window_mask, rr);
-
-	return window_mask;
-}
-
-// table setup for slide window search
-void init_crc_slide_table(PAR3_CTX *par3_ctx, int flag_usage)
-{
-	if (flag_usage & 1){
-		// Creation needs block size only for deduplication.
-		par3_ctx->window_mask = init_slide_window(par3_ctx->block_size, par3_ctx->window_table);
-	}
-	if (flag_usage & 2){
-		// Verification needs 2 sizes for find blocks and chunk tails.
-		par3_ctx->window_mask40 = init_slide_window(40, par3_ctx->window_table40);
-	}
-}
-
-// Slide the CRC-64-ISO along a buffer by one byte (removing the old and adding the new).
-// crc = window_mask ^ crc_slide_byte(window_mask ^ crc, buffer[window], buffer[0], window_table);
-uint64_t crc_slide_byte(uint64_t crc, uint8_t byteNew, uint8_t byteOld, uint64_t window_table[256])
-{
-	uint64_t A;
-
-	// CRC-64-ISO doesn't use table look-up.
-	A = crc ^ byteNew;
-	A = A << 56;
-	crc = (crc >> 8) ^ A ^ (A >> 1) ^ (A >> 3) ^ (A >> 4);
-
-	return crc ^ window_table[byteOld];
-}
-
-
-// Compare CRC-64 values
-static int compare_crc( const void *arg1, const void *arg2 )
-{
-	PAR3_CMP_CTX *cmp1_p, *cmp2_p;
-
-	cmp1_p = ( PAR3_CMP_CTX * ) arg1;
-	cmp2_p = ( PAR3_CMP_CTX * ) arg2;
-
-	if (cmp1_p->crc < cmp2_p->crc)
-		return -1;
-	if (cmp1_p->crc > cmp2_p->crc)
-		return 1;
-
-	return 0;
-}
-
-// Compare CRC-64 of blocks
-// Return index of a block, which has the same CRC-64 and fingerprint hash.
-// When no match, return -1 ~ -2. When fingerprint hash was calculated, return -3.
-int64_t crc_list_compare(PAR3_CTX *par3_ctx, uint64_t crc, uint8_t *buf, uint8_t hash[16])
-{
-	uint64_t count, index;
-	PAR3_CMP_CTX cmp_key, *cmp_p, *cmp2_p;
-	PAR3_BLOCK_CTX *block_list;
-
-	count = par3_ctx->crc_count;
-	if (count == 0)
-		return -1;
-
-	// Binary search
-	cmp_key.crc = crc;
-	cmp_p = (PAR3_CMP_CTX *)bsearch( &cmp_key, par3_ctx->crc_list, (size_t)count, sizeof(PAR3_CMP_CTX), compare_crc );
-	if (cmp_p == NULL)
-		return -2;
-
-	block_list = par3_ctx->block_list;
-	blake3(buf, par3_ctx->block_size, hash);
-	if (memcmp(hash, block_list[cmp_p->index].hash, 16) == 0)
-		return cmp_p->index;
-
-	// Search lower items of same CRC-64
-	cmp2_p = cmp_p;
-	index = cmp_p - par3_ctx->crc_list;
-	while (index > 0){
-		cmp2_p--;
-		if (cmp2_p->crc != crc)
-			break;
-		if (memcmp(hash, block_list[cmp2_p->index].hash, 16) == 0)
-			return cmp2_p->index;
-		index--;
-	}
-
-	// Search higher items of same CRC-64
-	cmp2_p = cmp_p;
-	index = cmp_p - par3_ctx->crc_list;
-	while (index + 1 < count){
-		cmp2_p++;
-		if (cmp2_p->crc != crc)
-			break;
-		if (memcmp(hash, block_list[cmp2_p->index].hash, 16) == 0)
-			return cmp2_p->index;
-		index++;
-	}
-
-	return -3;
-}
-
-// Add new crc in list and sort items.
-void crc_list_add(PAR3_CTX *par3_ctx, uint64_t crc, uint64_t index)
-{
-	uint64_t count;
-
-	count = par3_ctx->crc_count;
-
-	// Add new item.
-	par3_ctx->crc_list[count].crc = crc;
-	par3_ctx->crc_list[count].index = index;
-	count++;
-
-	// Quick sort items.
-	qsort( (void *)(par3_ctx->crc_list), (size_t)count, sizeof(PAR3_CMP_CTX), compare_crc );
-
-	par3_ctx->crc_count = count;
-}
-
-// Make list of crc for seaching full size blocks and chunk tails.
-int crc_list_make(PAR3_CTX *par3_ctx)
-{
-	uint64_t full_count, tail_count, index;
-	uint64_t block_size, block_count, chunk_count, slice_count;
-	PAR3_BLOCK_CTX *block_p;
-	PAR3_CHUNK_CTX *chunk_list;
-	PAR3_SLICE_CTX *slice_p;
-	PAR3_CMP_CTX *crc_list, *tail_list;
-
-	if (par3_ctx->block_count == 0){
-		par3_ctx->crc_count = 0;
-		par3_ctx->tail_count = 0;
-		return 0;
-	}
-
-	// Allocate list of CRC-64 (double size for local copy)
-	block_count = par3_ctx->block_count;
-	crc_list = malloc(sizeof(PAR3_CMP_CTX) * block_count * 2);
-	if (crc_list == NULL){
-		perror("Failed to allocate memory for comparison of CRC-64");
-		return RET_MEMORY_ERROR;
-	}
-	par3_ctx->crc_list = crc_list;
-
-	// At this time, number of tails is unknown.
-	// When a chunk size is multiple of block size, the chunk has no tail.
-	chunk_count = par3_ctx->chunk_count;
-	tail_list = malloc(sizeof(PAR3_CMP_CTX) * chunk_count * 2);
-	if (tail_list == NULL){
-		perror("Failed to allocate memory for comparison of CRC-64");
-		return RET_MEMORY_ERROR;
-	}
-	par3_ctx->tail_list = tail_list;
-
-	full_count = 0;
-	tail_count = 0;
-	block_p = par3_ctx->block_list;
-	chunk_list = par3_ctx->chunk_list;
-	slice_count = par3_ctx->slice_count;
-	slice_p = par3_ctx->slice_list;
-	block_size = par3_ctx->block_size;
-
-	// Find block of full size data, and set CRC of the block.
-	for (index = 0; index < block_count; index++){
-		// Even if checksum doesn't exist, the block is included.
-		if (block_p->state & 1){
-			crc_list[full_count].crc = block_p->crc;
-			crc_list[full_count].index = index;
-			full_count++;
-		}
-
-		block_p++;
-	}
-
-	// Find slice for chunk tail, and set CRC of the chunk.
-	for (index = 0; index < slice_count; index++){
-		if (slice_p->size < block_size){	// This slice is a chunk tail.
-			tail_list[tail_count].crc = chunk_list[slice_p->chunk].tail_crc;
-			tail_list[tail_count].index = index;
-			tail_count++;
-		}
-
-		slice_p++;
-	}
-
-	// Re-allocate memory for actual number of CRC-64
-	if (full_count < block_count){
-		if (full_count > 0){
-			crc_list = realloc(par3_ctx->crc_list, sizeof(PAR3_CMP_CTX) * full_count * 2);
-			if (crc_list == NULL){
-				perror("Failed to re-allocate memory for comparison of CRC-64");
-				return RET_MEMORY_ERROR;
-			}
-			par3_ctx->crc_list = crc_list;
-		} else {
-			free(par3_ctx->crc_list);
-			par3_ctx->crc_list = NULL;
-		}
-	}
-	if (tail_count < chunk_count){
-		if (tail_count > 0){
-			tail_list = realloc(par3_ctx->tail_list, sizeof(PAR3_CMP_CTX) * tail_count * 2);
-			if (tail_list == NULL){
-				perror("Failed to re-allocate memory for comparison of CRC-64");
-				return RET_MEMORY_ERROR;
-			}
-			par3_ctx->tail_list = tail_list;
-		} else {
-			free(par3_ctx->tail_list);
-			par3_ctx->tail_list = NULL;
-		}
-	}
-
-	// Quick sort items.
-	if (full_count > 1){
-		// CRC for full size block
-		qsort( (void *)crc_list, (size_t)full_count, sizeof(PAR3_CMP_CTX), compare_crc );
-	}
-	if (tail_count > 1){
-		// CRC for chunk tail
-		qsort( (void *)tail_list, (size_t)tail_count, sizeof(PAR3_CMP_CTX), compare_crc );
-	}
-
-	par3_ctx->crc_count = full_count;
-	par3_ctx->tail_count = tail_count;
-
-	return 0;
-}
-
-// Replace crc of a block, and sort again.
-void crc_list_replace(PAR3_CTX *par3_ctx, uint64_t crc, uint64_t index)
-{
-	int64_t i, count;
-	PAR3_CMP_CTX *crc_list;
-
-	if ( (par3_ctx->crc_list == NULL) || (par3_ctx->crc_count == 0) )
-		return;
-
-	crc_list = par3_ctx->crc_list;
-	count = par3_ctx->crc_count;
-
-	// Search the item and replace the value.
-	for (i = 0; i < count; i++){
-		if (crc_list[i].index == index){
-			crc_list[i].crc = crc;
-			i = -1;
-			break;
-		}
-	}
-
-	if ( (count > 1) && (i == -1) ){
-		// Quick sort items.
-		qsort( (void *)crc_list, (size_t)count, sizeof(PAR3_CMP_CTX), compare_crc );
-	}
-}
-
-// Compare CRC-64 of blocks or chunk tails
-// Return index of the first item, which has the same CRC-64.
-// When no match, return -1 ~ -2
-int64_t cmp_list_search(PAR3_CTX *par3_ctx, uint64_t crc, PAR3_CMP_CTX *cmp_list, int64_t count)
-{
-	int64_t index;
-	PAR3_CMP_CTX cmp_key, *cmp_p;
-
-	if (count == 0)
-		return -1;
-
-	// Binary search
-	cmp_key.crc = crc;
-	cmp_p = (PAR3_CMP_CTX *)bsearch( &cmp_key, cmp_list, (size_t)count, sizeof(PAR3_CMP_CTX), compare_crc );
-	if (cmp_p == NULL)
-		return -2;
-
-	// Search lower items of same CRC-64
-	index = cmp_p - cmp_list;
-	while (index > 0){
-		cmp_p--;
-		if (cmp_p->crc != crc)
-			break;
-		index--;
-	}
-
-	return index;
-}
-
-// Compare CRC-64 of blocks or chunk tails
-// When no match, return -1 ~ -3
-int64_t cmp_list_search_index(PAR3_CTX *par3_ctx, uint64_t crc, int64_t id, PAR3_CMP_CTX *cmp_list, int64_t count)
-{
-	int64_t index;
-	PAR3_CMP_CTX cmp_key, *cmp_p, *cmp2_p;
-
-	if (count == 0)
-		return -1;
-
-	// Binary search
-	cmp_key.crc = crc;
-	cmp_p = (PAR3_CMP_CTX *)bsearch( &cmp_key, cmp_list, (size_t)count, sizeof(PAR3_CMP_CTX), compare_crc );
-	if (cmp_p == NULL)
-		return -2;
-
-	// Search lower items of same CRC-64
-	cmp2_p = cmp_p;
-	index = cmp_p - cmp_list;
-	if (cmp_p->index == id)
-		return index;
-	while (index > 0){
-		cmp2_p--;
-		if (cmp2_p->crc != crc)
-			break;
-		if (cmp2_p->index == id)
-			return index;
-		index--;
-	}
-
-	// Search higher items of same CRC-64
-	cmp2_p = cmp_p;
-	index = cmp_p - cmp_list;
-	while (index + 1 < count){
-		cmp2_p++;
-		if (cmp2_p->crc != crc)
-			break;
-		if (cmp2_p->index == id)
-			return index;
-		index++;
-	}
-
-	return -3;
-}
-
-
-/*
-This BLAKE3 code is non-SIMD subset of portable version from below;
-https://github.com/BLAKE3-team/BLAKE3
-
-The official C implementation of BLAKE3.
-
-This work is released into the public domain with CC0 1.0. Alternatively, it is
-licensed under the Apache License 2.0.
-*/
-
-// One time calculation, which returns 16-bytes hash value.
-void blake3(const uint8_t *buf, size_t size, uint8_t *hash)
-{
-	// Initialize the hasher.
-	blake3_hasher hasher;
-	blake3_hasher_init(&hasher);
-
-	blake3_hasher_update(&hasher, buf, size);
-
-	// Finalize the hash.
-	blake3_hasher_finalize(&hasher, hash, 16);
-}
-
-
-// Create parity bytes in the region
-void region_create_parity(uint8_t *buf, size_t region_size)
-{
-	uint32_t sum;
-
-	// XOR all block data to 4 bytes
-	sum = 0;
-	while (region_size > 4){
-		sum ^= *((uint32_t *)buf);
-
-		region_size -= 4;
-		buf += 4;
-	}
-
-	// Parity is saved at the last 4-bytes.
-	((uint32_t *)buf)[0] = sum;
-}
-
-// Check parity bytes in the region
-int region_check_parity(uint8_t *buf, size_t region_size)
-{
-	uint32_t sum;
-
-	// XOR all block data to 4 bytes
-	sum = 0;
-	while (region_size > 4){
-		sum ^= *((uint32_t *)buf);
-
-		region_size -= 4;
-		buf += 4;
-	}
-
-	// Parity is saved at the last 4-bytes.
-	if (((uint32_t *)buf)[0] != sum)
-		return 1;
-
-	return 0;
-}
-
-
-// Create parity bytes in the region for Leopard-RS (ALTMAP)
-// region_size must be multiple of 64.
-void leo_region_create_parity(uint8_t *buf, size_t region_size)
-{
-	uint8_t temp_buf[64];
-	size_t i;
-	uint32_t sum;
-
-	// XOR all block data to 4 bytes.
-	sum = 0;
-	while (region_size >= 64){
-		if (region_size == 64){	// Parity is saved at the last 4-bytes.
-			for (i = 0; i < 60; i += 4){
-				sum ^= *((uint32_t *)buf);
-				buf += 4;
-			}
-			((uint32_t *)buf)[0] = sum;
-			buf += 4;
-		} else {
-			for (i = 0; i < 64; i += 4){
-				sum ^= *((uint32_t *)buf);
-				buf += 4;
-			}
-		}
-
-		// move to ALTMAP
-		buf -= 64;
-		for (i = 0; i < 32; i++){
-			temp_buf[i     ] = buf[i * 2    ];
-			temp_buf[i + 32] = buf[i * 2 + 1];
-		}
-		memcpy(buf, temp_buf, 64);
-
-		buf += 64;
-		region_size -= 64;
-	}
-}
-
-// Check parity bytes in the region for Leopard-RS (ALTMAP)
-int leo_region_check_parity(uint8_t *buf, size_t region_size)
-{
-	uint8_t temp_buf[64];
-	size_t i;
-	uint32_t sum;
-
-	// XOR all block data to 4 bytes.
-	sum = 0;
-	while (region_size >= 64){
-		// return from ALTMAP
-		for (i = 0; i < 32; i++){
-			temp_buf[i * 2    ] = buf[i     ];
-			temp_buf[i * 2 + 1] = buf[i + 32];
-		}
-		memcpy(buf, temp_buf, 64);
-
-		if (region_size == 64){	// Parity is saved at the last 4-bytes.
-			for (i = 0; i < 60; i += 4){
-				sum ^= *((uint32_t *)buf);
-				buf += 4;
-			}
-			if (((uint32_t *)buf)[0] != sum)
-				return 1;
-		} else {
-			for (i = 0; i < 64; i += 4){
-				sum ^= *((uint32_t *)buf);
-				buf += 4;
-			}
-		}
-
-		region_size -= 64;
-	}
-
-	return 0;
-}
-
-// Restore region bytes from ALTMAP for Leopard-RS
-void leo_region_restore(uint8_t *buf, size_t region_size)
-{
-	uint8_t temp_buf[64];
-	size_t i;
-
-	while (region_size >= 64){
-		// return from ALTMAP
-		for (i = 0; i < 32; i++){
-			temp_buf[i * 2    ] = buf[i     ];
-			temp_buf[i * 2 + 1] = buf[i + 32];
-		}
-		memcpy(buf, temp_buf, 64);
-
-		buf += 64;
-		region_size -= 64;
-	}
-}
-
diff --git a/windows/src/hash.h b/windows/src/hash.h
deleted file mode 100644
index 58858ce..0000000
--- a/windows/src/hash.h
+++ /dev/null
@@ -1,32 +0,0 @@
-
-// CRC-64-ISO
-uint64_t crc64(const uint8_t *buf, size_t size, uint64_t crc);
-uint64_t crc64_zero(size_t size, uint64_t crc);
-
-// table setup for slide window search
-void init_crc_slide_table(PAR3_CTX *par3_ctx, int flag_usage);
-uint64_t crc_slide_byte(uint64_t crc, uint8_t byteNew, uint8_t byteOld, uint64_t window_table[256]);
-
-// for sort and search CRC-64
-int64_t crc_list_compare(PAR3_CTX *par3_ctx, uint64_t crc, uint8_t *buf, uint8_t hash[16]);
-void crc_list_add(PAR3_CTX *par3_ctx, uint64_t crc, uint64_t index);
-int crc_list_make(PAR3_CTX *par3_ctx);
-void crc_list_replace(PAR3_CTX *par3_ctx, uint64_t crc, uint64_t index);
-
-int64_t cmp_list_search(PAR3_CTX *par3_ctx, uint64_t crc, PAR3_CMP_CTX *cmp_list, int64_t count);
-int64_t cmp_list_search_index(PAR3_CTX *par3_ctx, uint64_t crc, int64_t id, PAR3_CMP_CTX *cmp_list, int64_t count);
-
-
-// BLAKE3
-void blake3(const uint8_t *buf, size_t size, uint8_t *hash);
-
-
-// parity bytes in the region
-void region_create_parity(uint8_t *buf, size_t region_size);
-int region_check_parity(uint8_t *buf, size_t region_size);
-
-// parity bytes in the region for Leopard-RS (ALTMAP)
-void leo_region_create_parity(uint8_t *buf, size_t region_size);
-int leo_region_check_parity(uint8_t *buf, size_t region_size);
-void leo_region_restore(uint8_t *buf, size_t region_size);
-
diff --git a/windows/src/inside.h b/windows/src/inside.h
deleted file mode 100644
index 1dd9019..0000000
--- a/windows/src/inside.h
+++ /dev/null
@@ -1,9 +0,0 @@
-
-int check_outside_format(PAR3_CTX *par3_ctx, int *format_type, int *copy_size);
-uint64_t inside_zip_size(PAR3_CTX *par3_ctx, uint64_t block_size, int footer_size,
-			uint64_t *block_count, uint64_t *recv_block_count, int *packet_repeat_count);
-
-int delete_inside_data(PAR3_CTX *par3_ctx);
-
-int copy_inside_data(PAR3_CTX *par3_ctx, char *temp_path);
-
diff --git a/windows/src/inside_zip.c b/windows/src/inside_zip.c
deleted file mode 100644
index 05c2eb1..0000000
--- a/windows/src/inside_zip.c
+++ /dev/null
@@ -1,812 +0,0 @@
-/* Redefinition of _FILE_OFFSET_BITS must happen BEFORE including stdio.h */
-#ifdef __linux__
-#define _FILE_OFFSET_BITS 64
-#define _fseeki64 fseeko
-#define _fileno fileno
-#define _chsize_s ftruncate
-#elif _WIN32
-// avoid error of MSVC
-#define _CRT_SECURE_NO_WARNINGS
-#endif
-
-#include <errno.h>
-#include <inttypes.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#ifdef __linux__
-
-#include <unistd.h>
-
-#elif _WIN32
-
-// MSVC headers
-#include <io.h>
-
-#endif
-
-#include "libpar3.h"
-#include "hash.h"
-#include "inside.h"
-#include "common.h"
-
-
-#define ZIP_SEARCH_SIZE	1024
-
-// Check ZIP file format and total size of footer sections
-// format_type : 0 = Unknown, 1 = PAR3, 2 = ZIP, 3 = 7z
-// copy_size   : 0 = 7z, 22 or 98 or more = ZIP
-int check_outside_format(PAR3_CTX *par3_ctx, int *format_type, int *copy_size)
-{
-	uint8_t buf[ZIP_SEARCH_SIZE];
-	int64_t file_size, read_size, offset;
-	FILE *fp;
-
-	*format_type = 0;
-	*copy_size = 0;
-	file_size = par3_ctx->total_file_size;
-
-	//printf("ZIP filename = \"%s\"\n", par3_ctx->par_filename);
-	fp = fopen(par3_ctx->par_filename, "rb");
-	if (fp == NULL){
-		perror("Failed to open Outside file");
-		return RET_FILE_IO_ERROR;
-	}
-
-	// Check file format
-	// 7z =  Signature of starting 6-bytes
-	// zip = local file header signature (starting 4-bytes) and
-	//       end of central directory record (last 22-bytes)
-	if (fread(buf, 1, 32, fp) != 32){
-		perror("Failed to read Outside file");
-		fclose(fp);
-		return RET_FILE_IO_ERROR;
-	}
-	if (((uint32_t *)buf)[0] == 0x04034b50){	// ZIP archive
-		int footer_size = 0;
-		int64_t ecdr_size;	// end of central directory record
-		int64_t cdh_size, cdh_offset;	// central directory header
-		*format_type = 2;
-
-		// Read some bytes from the last of ZIP file
-		read_size = ZIP_SEARCH_SIZE;
-		if (read_size > file_size)
-			read_size = file_size;
-		if (_fseeki64(fp, - read_size, SEEK_END) != 0){	// Seek from end of file
-			perror("Failed to seek Outside file");
-			fclose(fp);
-			return RET_FILE_IO_ERROR;
-		}
-		if (fread(buf, 1, read_size, fp) != read_size){
-			perror("Failed to read Outside file");
-			fclose(fp);
-			return RET_FILE_IO_ERROR;
-		}
-
-		// Search ZIP signature from the last
-		offset = read_size - 22;
-		while (offset >= 0){
-			if (((uint32_t *)(buf + offset))[0] == 0x06054b50){	// End of central directory record
-				ecdr_size = read_size - offset;
-				cdh_size = 0;
-				memcpy(&cdh_size, buf + offset + 12, 4);	// size of the central directory
-				cdh_offset = 0;
-				memcpy(&cdh_offset, buf + offset + 16, 4);	// offset of start of central directory
-				if (cdh_offset + cdh_size + ecdr_size == file_size){
-					footer_size = (int)ecdr_size;
-					break;
-				} else if ( (cdh_size == 0xFFFFFFFF) || (cdh_offset == 0xFFFFFFFF) ){
-					// This ZIP file may be ZIP64 format.
-					offset -= 19;
-				} else if (cdh_offset + cdh_size + ecdr_size < file_size){
-					fclose(fp);
-					printf("There is additional data in ZIP file already.\n");
-					return RET_LOGIC_ERROR;
-				}
-			} else if (((uint32_t *)(buf + offset))[0] == 0x06064b50){	// Zip64 end of central directory record
-				ecdr_size = read_size - offset;
-				memcpy(&cdh_size, buf + offset + 40, 8);	// size of the central directory
-				memcpy(&cdh_offset, buf + offset + 48, 8);	 // offset of start of central directory
-				if (cdh_offset + cdh_size + ecdr_size == file_size){
-					footer_size = (int)ecdr_size;
-					break;
-				} else if (cdh_offset + cdh_size + ecdr_size < file_size){
-					fclose(fp);
-					printf("There is additional data in ZIP file already.\n");
-					return RET_LOGIC_ERROR;
-				}
-			}
-
-			offset--;
-		}
-		if (footer_size == 0){	// Not found
-			fclose(fp);
-			printf("Invalid ZIP file format\n");
-			return RET_LOGIC_ERROR;
-		}
-		*copy_size = footer_size;
-
-	} else if ( (((uint16_t *)buf)[0] == 0x7A37) && (((uint32_t *)(buf + 2))[0] == 0x1C27AFBC) ){	// 7z archive
-		int64_t header_size;
-		*format_type = 3;
-
-		// Check size in Start Header
-		memcpy(&offset, buf + 12, 8);		// NextHeaderOffset
-		memcpy(&header_size, buf + 20, 8);	// NextHeaderSize
-		if (32 + offset + header_size < file_size){
-			fclose(fp);
-			printf("There is additional data in 7z file already.\n");
-			return RET_LOGIC_ERROR;
-		} else if (32 + offset + header_size != file_size){
-			fclose(fp);
-			printf("Invalid 7z file format\n");
-			return RET_LOGIC_ERROR;
-		}
-
-		// Check the end of archive file
-		if (_fseeki64(fp, 32 + offset + header_size - 2, SEEK_SET) != 0){
-			perror("Failed to seek Outside file");
-			fclose(fp);
-			return RET_FILE_IO_ERROR;
-		}
-		if (fread(buf, 1, 2, fp) != 2){
-			perror("Failed to read Outside file");
-			fclose(fp);
-			return RET_FILE_IO_ERROR;
-		}
-		if (((uint16_t *)buf)[0] != 0x0000){	// Property ID (0x00 = kEnd) and Size (0 bytes)
-			fclose(fp);
-			printf("Invalid 7z file format\n");
-			return RET_LOGIC_ERROR;
-		}
-
-	} else {	// Unknown format
-		fclose(fp);
-		printf("Unknown file format\n");
-		return RET_LOGIC_ERROR;
-	}
-
-	if (fclose(fp) != 0){
-		perror("Failed to close Outside file");
-		return RET_FILE_IO_ERROR;
-	}
-
-	return 0;
-}
-
-
-// It appends PAR3 packets after a ZIP file.
-// It appends the last three ZIP sections after PAR3 packets, too.
-// Zip64 end of central directory record, Zip64 end of central directory locator, End of central directory record
-// [ Original ZIP file ] [ PAR3 packets ] [ Duplicated ZIP section ]
-uint64_t inside_zip_size(PAR3_CTX *par3_ctx,
-		uint64_t block_size,	// Block size to calculate total packet size
-		int footer_size,		// Copy size after appending recovery data
-		uint64_t *block_count,
-		uint64_t *recv_block_count,
-		int *packet_repeat_count)
-{
-	int repeat_count, redundancy_percent;
-	int footer_block_count, tail_block_count;
-	uint64_t i;
-	uint64_t input_block_count, data_block_count, recovery_block_count;
-	uint64_t data_size, data_tail_size, footer_tail_size;
-	uint64_t common_packet_size, total_packet_size;
-	uint64_t start_packet_size, ext_data_packet_size;
-	uint64_t matrix_packet_size, recv_data_packet_size;
-	uint64_t file_packet_size,  root_packet_size;
-
-	if (par3_ctx->redundancy_size <= 250){
-		redundancy_percent = par3_ctx->redundancy_size;
-	} else {
-		redundancy_percent = 0;
-	}
-
-	// Because it duplicates ZIP sections, there are 3 protected chunks.
-	// [ data chunk ] [ footer chunk ] [ unprotected chunk ] [ duplicated footer chunk ]
-	data_size = par3_ctx->total_file_size - footer_size;
-	if (par3_ctx->noise_level >= 2){
-		printf("data_size = %"PRId64", footer_size = %d, block_size = %"PRId64"\n", data_size, footer_size, block_size);
-	}
-	// On the other hand, there are 1 protected chunk in 7-Zip.
-	// [ data chunk ] [ unprotected chunk ]
-
-	// How many blocks in 1st protected chunk
-	tail_block_count = 0;
-	data_block_count = data_size / block_size;
-	data_tail_size = data_size % block_size;
-	if (data_tail_size >= 40)
-		tail_block_count++;
-
-	// How many blocks in 2nd protected chunk
-	footer_block_count = (int)(footer_size / block_size);
-	footer_tail_size = footer_size % block_size;
-	if (footer_tail_size >= 40){
-		if (data_tail_size >= 40){	// Try tail packing
-			if (data_tail_size + footer_tail_size <= block_size){
-				// Tail packing is possible.
-			} else {
-				tail_block_count++;
-			}
-		} else {
-			tail_block_count++;
-		}
-	}
-	// Because data in 2nd chunk and 3rd chunk are identical, deduplication works.
-
-	// Create at least 1 recovery block
-	input_block_count = data_block_count + footer_block_count + tail_block_count;
-	if (redundancy_percent == 0){
-		recovery_block_count = 1;
-	} else {
-		recovery_block_count = (input_block_count * redundancy_percent + 99) / 100;	// Round up
-		if (recovery_block_count < 1)
-			recovery_block_count = 1;
-	}
-
-	if (par3_ctx->noise_level >= 2){
-		printf("data_block = %"PRId64", footer_block = %d, tail_block = %d\n", data_block_count, footer_block_count, tail_block_count);
-		printf("input_block_count = %"PRId64", recovery_block_count = %"PRId64"\n", input_block_count, recovery_block_count);
-	}
-
-	// Creator Packet
-	if (par3_ctx->noise_level >= 1){
-		printf("Creator Packet size = %"PRId64"\n", par3_ctx->creator_packet_size);
-	}
-
-	// Start Packet
-	start_packet_size = 48 + 33 + 1;	// Assume GF(2^8) at first
-	if (input_block_count + recovery_block_count > 256)
-		start_packet_size++;	// Use GF(2^16) for many blocks
-	common_packet_size = start_packet_size;
-	if (par3_ctx->noise_level >= 1){
-		printf("Start Packet size = %"PRId64"\n", start_packet_size);
-	}
-
-	// External Data Packet
-	// Count full size blocks only in each protected chunk
-	ext_data_packet_size = 48 + 8 + 24 * data_block_count;	// 1st chunk
-	if (footer_block_count > 0)
-		ext_data_packet_size += 48 + 8 + 24 * footer_block_count;	// 2nd chunk
-	common_packet_size += ext_data_packet_size;
-	if (par3_ctx->noise_level >= 1){
-		printf("External Data Packet size = %"PRId64"\n", ext_data_packet_size);
-	}
-
-	// Matrix Packet
-	// Cauchy Matrix Packet
-	matrix_packet_size = 48 + 24;
-	common_packet_size += matrix_packet_size;
-	if (par3_ctx->noise_level >= 1){
-		printf("Cauchy Matrix Packet size = %"PRId64"\n", matrix_packet_size);
-	}
-
-	// Recovery Data Packet
-	recv_data_packet_size = 48 + 40 + block_size;
-	if (par3_ctx->noise_level >= 1){
-		printf("Recovery Data Packet size = %"PRId64"\n", recv_data_packet_size);
-	}
-
-	// File Packet
-	i = strlen(par3_ctx->par_filename);
-	file_packet_size = 48 + 2 + i + 25;
-	// Protected Chunk Description (data chunk)
-	file_packet_size += 8;	// length of protected chunk
-	if (data_size >= block_size)
-		file_packet_size += 8;	// index of first input block holding chunk
-	if (data_tail_size >= 40){
-		file_packet_size += 40;
-	} else {
-		file_packet_size += data_tail_size;
-	}
-	if (footer_size > 0){
-		// Protected Chunk Description (footer chunk)
-		file_packet_size += 8;	// length of protected chunk
-		if (footer_size >= block_size)
-			file_packet_size += 8;	// index of first input block holding chunk
-		if (footer_tail_size >= 40){
-			file_packet_size += 40;
-		} else {
-			file_packet_size += footer_tail_size;
-		}
-	}
-	// Unprotected Chunk Description (par3 packets)
-	file_packet_size += 16;
-	if (footer_size > 0){
-		// Protected Chunk Description (duplicated footer chunk)
-		file_packet_size += 8;	// length of protected chunk
-		if (footer_size >= block_size)
-			file_packet_size += 8;	// index of first input block holding chunk
-		if (footer_tail_size >= 40){
-			file_packet_size += 40;
-		} else {
-			file_packet_size += footer_tail_size;
-		}
-	}
-	common_packet_size += file_packet_size;
-	if (par3_ctx->noise_level >= 1){
-		printf("File Packet size = %"PRId64"\n", file_packet_size);
-	}
-
-	// Root Packet
-	root_packet_size = 48 + 13 + 16;
-	common_packet_size += root_packet_size;
-	if (par3_ctx->noise_level >= 1){
-		printf("Root Packet size = %"PRId64"\n", root_packet_size);
-	}
-
-	// How many times to duplicate common packets
-	// number of blocks = 1 ~ 3 : number of copies = 2
-	// number of blocks = 4 ~ 7 : number of copies = 3
-	// number of blocks = 8 ~ 15 : number of copies = 4
-	// number of blocks = 16 ~ 31 : number of copies = 5
-	// number of blocks = 32 ~ 63 : number of copies = 6
-	// number of blocks = 64 ~ 127 : number of copies = 7
-	// number of blocks = 128 ~ 255 : number of copies = 8
-	// number of blocks = 256 ~ 511 : number of copies = 9
-	// number of blocks = 512 ~ 1023 : number of copies = 10
-	// number of blocks = 1024 ~ 2047 : number of copies = 11
-	// number of blocks = 2048 ~ 4095 : number of copies = 12
-	// number of blocks = 4096 ~ 8191 : number of copies = 13
-	// number of blocks = 8192 ~ 16383 : number of copies = 14
-	// number of blocks = 16384 ~ 32767 : number of copies = 15
-	// number of blocks = 32768 ~ 65535 : number of copies = 16
-	repeat_count = 2;
-	for (i = 4; i <= recovery_block_count; i *= 2)	// log2(recovery_block_count)
-		repeat_count++;
-	// Limit repetition by redundancy
-	// Redundancy = 0 ~ 5% : Max 4 times
-	// Redundancy = 6 ~ 10% : Max "redundancy - 1" times
-	// Redundancy = 11% : 11 * 100 / 111 = 9.91, Max 9 times
-	// Redundancy = 20% : 20 * 100 / 120 = 16.66, Max 16 times
-	if (redundancy_percent <= 5){
-		if (repeat_count > 4)
-			repeat_count = 4;
-	} else if (redundancy_percent <= 10){
-		if (repeat_count > redundancy_percent - 1)
-			repeat_count = redundancy_percent - 1;
-	} else if (redundancy_percent < 20){	// n * 100 / (100 + n)
-		int limit_count = (redundancy_percent * 100) / (100 + redundancy_percent);
-		if (repeat_count > limit_count)
-			repeat_count = limit_count;
-	}
-	if (par3_ctx->repetition_limit > 0){	// Limit repetition of packets in each file.
-		int limit_count = par3_ctx->repetition_limit - 1;	// Additional copies
-		if (repeat_count > limit_count)
-			repeat_count = limit_count;
-	}
-	if (par3_ctx->noise_level >= 2){
-		printf("repeat_count = %d\n", repeat_count);
-	}
-
-	// Calculate total size of PAR3 packets (repeated multiple times)
-	total_packet_size = par3_ctx->creator_packet_size;
-	total_packet_size += common_packet_size * repeat_count;
-	total_packet_size += recv_data_packet_size * recovery_block_count;
-	if (par3_ctx->noise_level >= 1){
-		printf("Common packet size = %"PRId64"\n", common_packet_size);
-		printf("Total packet size = %"PRId64"\n\n", total_packet_size);
-	}
-
-	*block_count = input_block_count;
-	*recv_block_count = recovery_block_count;
-	*packet_repeat_count = repeat_count;
-	return total_packet_size;
-}
-
-
-// Check ZIP file format and delete inside data
-// At this time, this supports appended data only.
-int delete_inside_data(PAR3_CTX *par3_ctx)
-{
-	uint8_t buf[ZIP_SEARCH_SIZE];
-	int file_no;
-	int64_t file_size, read_size, offset;
-	FILE *fp;
-
-	file_size = par3_ctx->total_file_size;
-
-	//printf("ZIP filename = \"%s\"\n", par3_ctx->par_filename);
-	fp = fopen(par3_ctx->par_filename, "r+b");
-	if (fp == NULL){
-		perror("Failed to open Outside file");
-		return RET_FILE_IO_ERROR;
-	}
-
-	// Check file format
-	// 7z =  Signature of starting 6-bytes
-	// zip = local file header signature (starting 4-bytes) and
-	//       end of central directory record (last 22-bytes)
-	if (fread(buf, 1, 32, fp) != 32){
-		perror("Failed to read Outside file");
-		fclose(fp);
-		return RET_FILE_IO_ERROR;
-	}
-	if (((uint32_t *)buf)[0] == 0x04034b50){	// ZIP archive
-		int footer_size = 0;
-		int64_t ecdr_size;	// end of central directory record
-		int64_t cdh_size, cdh_offset;	// central directory header
-
-		// Read some bytes from the last of ZIP file
-		read_size = ZIP_SEARCH_SIZE;
-		if (read_size > file_size)
-			read_size = file_size;
-		if (_fseeki64(fp, - read_size, SEEK_END) != 0){	// Seek from end of file
-			perror("Failed to seek Outside file");
-			fclose(fp);
-			return RET_FILE_IO_ERROR;
-		}
-		if (fread(buf, 1, read_size, fp) != read_size){
-			perror("Failed to read Outside file");
-			fclose(fp);
-			return RET_FILE_IO_ERROR;
-		}
-
-		// Search ZIP signature from the last
-		offset = read_size - 22;
-		while (offset >= 0){
-			if (((uint32_t *)(buf + offset))[0] == 0x06054b50){	// End of central directory record
-				ecdr_size = read_size - offset;
-				cdh_size = 0;
-				memcpy(&cdh_size, buf + offset + 12, 4);	// size of the central directory
-				cdh_offset = 0;
-				memcpy(&cdh_offset, buf + offset + 16, 4);	// offset of start of central directory
-				if (cdh_offset + cdh_size + ecdr_size == file_size){
-					fclose(fp);
-					printf("There isn't additional data in ZIP file yet.\n");
-					return RET_LOGIC_ERROR;
-				} else if ( (cdh_size == 0xFFFFFFFF) || (cdh_offset == 0xFFFFFFFF) ){
-					// This ZIP file may be ZIP64 format.
-					offset -= 20;	// Skip [zip64 end of central directory locator]
-				} else if (cdh_offset + cdh_size + ecdr_size < file_size){
-					footer_size = (int)ecdr_size;
-					break;
-				}
-			} else if (((uint32_t *)(buf + offset))[0] == 0x06064b50){	// Zip64 end of central directory record
-				ecdr_size = read_size - offset;
-				memcpy(&cdh_size, buf + offset + 40, 8);	// size of the central directory
-				memcpy(&cdh_offset, buf + offset + 48, 8);	 // offset of start of central directory
-				if (cdh_offset + cdh_size + ecdr_size == file_size){
-					fclose(fp);
-					printf("There isn't additional data in ZIP file yet.\n");
-					return RET_LOGIC_ERROR;
-				} else if (cdh_offset + cdh_size + ecdr_size < file_size){
-					footer_size = (int)ecdr_size;
-					break;
-				}
-			}
-
-			offset--;
-		}
-		if (footer_size == 0){	// Not found
-			fclose(fp);
-			printf("Invalid ZIP file format\n");
-			return RET_LOGIC_ERROR;
-		}
-
-		// Check ZIP signature at original position
-		if (_fseeki64(fp, cdh_offset, SEEK_SET) != 0){
-			perror("Failed to seek Outside file");
-			fclose(fp);
-			return RET_FILE_IO_ERROR;
-		}
-		if (fread(buf, 1, 4, fp) != 4){
-			perror("Failed to read Outside file");
-			fclose(fp);
-			return RET_FILE_IO_ERROR;
-		}
-		if (((uint32_t *)buf)[0] != 0x02014b50){	// central file header signature
-			fclose(fp);
-			printf("Invalid ZIP file format\n");
-			return RET_LOGIC_ERROR;
-		}
-		if (_fseeki64(fp, cdh_offset + cdh_size, SEEK_SET) != 0){
-			perror("Failed to seek Outside file");
-			fclose(fp);
-			return RET_FILE_IO_ERROR;
-		}
-		if (fread(buf, 1, 4, fp) != 4){
-			perror("Failed to read Outside file");
-			fclose(fp);
-			return RET_FILE_IO_ERROR;
-		}
-		if ( (((uint32_t *)buf)[0] != 0x06054b50)	&&	// end of central directory record
-				(((uint32_t *)buf)[0] != 0x02014b50) ){	// zip64 end of central directory record
-			fclose(fp);
-			printf("Invalid ZIP file format\n");
-			return RET_LOGIC_ERROR;
-		}
-		if (par3_ctx->noise_level >= 0){
-			printf("Original ZIP file size = %"PRId64"\n", cdh_offset + cdh_size + ecdr_size);
-		}
-
-		// Delete appended data by resizing to the original ZIP file
-		file_no = _fileno(fp);
-		if (file_no < 0){
-			perror("Failed to seek Outside file");
-			fclose(fp);
-			return RET_FILE_IO_ERROR;
-		} else {
-			if (_chsize_s(file_no, cdh_offset + cdh_size + ecdr_size) != 0){
-				perror("Failed to resize Outside file");
-				fclose(fp);
-				return RET_FILE_IO_ERROR;
-			}
-		}
-
-	} else if ( (((uint16_t *)buf)[0] == 0x7A37) && (((uint32_t *)(buf + 2))[0] == 0x1C27AFBC) ){	// 7z archive
-		int64_t header_size;
-
-		// Check size in Start Header
-		memcpy(&offset, buf + 12, 8);		// NextHeaderOffset
-		memcpy(&header_size, buf + 20, 8);	// NextHeaderSize
-		if (32 + offset + header_size == file_size){
-			fclose(fp);
-			printf("There isn't additional data in 7z file yet.\n");
-			return RET_LOGIC_ERROR;
-		} else if (32 + offset + header_size > file_size){
-			fclose(fp);
-			printf("Invalid 7z file format\n");
-			return RET_LOGIC_ERROR;
-		}
-		if (par3_ctx->noise_level >= 0){
-			printf("Original 7z file size = %"PRId64"\n", 32 + offset + header_size);
-		}
-
-		// Check end mark at original position
-		if (_fseeki64(fp, 32 + offset + header_size - 2, SEEK_SET) != 0){
-			perror("Failed to seek Outside file");
-			fclose(fp);
-			return RET_FILE_IO_ERROR;
-		}
-		if (fread(buf, 1, 2, fp) != 2){
-			perror("Failed to read Outside file");
-			fclose(fp);
-			return RET_FILE_IO_ERROR;
-		}
-		if (((uint16_t *)buf)[0] != 0x0000){	// Property ID (0x00 = kEnd) and Size (0 bytes)
-			fclose(fp);
-			printf("Invalid 7z file format\n");
-			return RET_LOGIC_ERROR;
-		}
-
-		// Delete appended data by resizing to the original 7z file
-		file_no = _fileno(fp);
-		if (file_no < 0){
-			perror("Failed to seek Outside file");
-			fclose(fp);
-			return RET_FILE_IO_ERROR;
-		} else {
-			if (_chsize_s(file_no, 32 + offset + header_size) != 0){
-				perror("Failed to resize Outside file");
-				fclose(fp);
-				return RET_FILE_IO_ERROR;
-			}
-		}
-
-	} else {	// Unknown format
-		fclose(fp);
-		printf("Unknown file format\n");
-		return RET_LOGIC_ERROR;
-	}
-
-	if (fclose(fp) != 0){
-		perror("Failed to close ZIP file");
-		return RET_FILE_IO_ERROR;
-	}
-
-	return 0;
-}
-
-// Copy complete PAR3 packets from damaged file to repaired file
-int copy_inside_data(PAR3_CTX *par3_ctx, char *temp_path)
-{
-	uint8_t *buf, buf_hash[16];
-	uint32_t chunk_num;
-	uint64_t offset, chunk_offset, slice_offset, file_offset;
-	uint64_t chunk_size, slice_size;
-	uint64_t packet_size, total_packet_size;
-	uint64_t slice_count, slice_index;
-	uint64_t alloc_size, buf_size;
-	PAR3_SLICE_CTX *slice_list;
-	PAR3_CHUNK_CTX *chunk_p;
-	FILE *fp_read, *fp_write;
-
-	slice_count = par3_ctx->slice_count;
-	slice_list = par3_ctx->slice_list;
-	chunk_p = par3_ctx->chunk_list;
-	chunk_num = par3_ctx->chunk_count;
-
-	// Get range of target unprotected chunk
-	offset = 0;
-	chunk_offset = 0;
-	if (chunk_num < 2)
-		return RET_LOGIC_ERROR;
-	while (chunk_num > 0){	// check all chunk descriptions
-		chunk_size = chunk_p->size;
-		if (chunk_size == 0){	// Unprotected Chunk Description
-			chunk_size = chunk_p->block;
-			chunk_offset = offset;
-			break;
-		} else {	// Protected Chunk Description
-			offset += chunk_size;
-		}
-		chunk_p++;
-		chunk_num--;
-	}
-	if (chunk_offset == 0)
-		return RET_LOGIC_ERROR;
-	if (par3_ctx->noise_level >= 2){
-		printf("\nUnprotected Chunk: offset = %"PRId64", size = %"PRId64"\n", chunk_offset, chunk_size);
-	}
-
-	// Buffer size must be larger than each packet size.
-	// The minimum size is 4 KB to reduce time of file access.
-	alloc_size = (chunk_size + 4095) & ~4095;
-	packet_size = 0;
-	if ( (par3_ctx->memory_limit != 0) && (alloc_size > par3_ctx->memory_limit) ){
-		// Size of Recovery Data Packet at first
-		packet_size = 48 + 40 + par3_ctx->block_size;
-		if (packet_size < par3_ctx->creator_packet_size)
-			packet_size = par3_ctx->creator_packet_size;
-		if (packet_size < par3_ctx->comment_packet_size)
-			packet_size = par3_ctx->comment_packet_size;
-		if (packet_size < par3_ctx->start_packet_size)
-			packet_size = par3_ctx->start_packet_size;
-		if (packet_size < par3_ctx->matrix_packet_size)
-			packet_size = par3_ctx->matrix_packet_size;
-		if (packet_size < par3_ctx->file_packet_size)
-			packet_size = par3_ctx->file_packet_size;
-		if (packet_size < par3_ctx->dir_packet_size)
-			packet_size = par3_ctx->dir_packet_size;
-		if (packet_size < par3_ctx->root_packet_size)
-			packet_size = par3_ctx->root_packet_size;
-		// If packet size is larger than memory limit, use packet size.
-		if (packet_size > par3_ctx->memory_limit){
-			alloc_size = (packet_size + 4095) & ~4095;
-		} else {
-			alloc_size = par3_ctx->memory_limit;
-		}
-	}
-	if (par3_ctx->noise_level >= 3){
-		printf("alloc_size = %"PRId64", packet_size = %"PRId64"\n", alloc_size, packet_size);
-	}
-
-	// Allocate buffer to keep PAR3 packet
-	// To check completeness of the packet, it needs to read the entire bytes on memory.
-	buf = malloc(alloc_size);
-	if (buf == NULL){
-		perror("Failed to allocate memory for PAR3 packet");
-		return RET_MEMORY_ERROR;
-	}
-	par3_ctx->work_buf = buf;
-
-	// Search PAR3 packets in outside file
-	fp_read = fopen(par3_ctx->par_filename, "rb");
-	if (fp_read == NULL){
-		perror("Failed to open Outside file");
-		return RET_FILE_IO_ERROR;
-	}
-	fp_write = fopen(temp_path, "wb");
-	if (fp_write == NULL){
-		perror("Failed to open temporary file");
-		fclose(fp_read);
-		return RET_FILE_IO_ERROR;
-	}
-	if (_fseeki64(fp_write, chunk_offset, SEEK_SET) != 0){
-		perror("Failed to seek temporary file");
-		fclose(fp_read);
-		fclose(fp_write);
-		return RET_FILE_IO_ERROR;
-	}
-
-	file_offset = 0;
-	total_packet_size = 0;
-	while (total_packet_size < chunk_size){
-		// Skip found input file slices in damaged file
-		slice_index = 0;
-		while (slice_index < slice_count){
-			if (slice_list[slice_index].find_name != NULL){
-				slice_offset = slice_list[slice_index].find_offset;
-				slice_size = slice_list[slice_index].size;
-				if ( (slice_offset + slice_size > file_offset) && (slice_offset < file_offset + 48) ){
-					//printf("file_offset = %"PRId64", slice_index = %"PRId64"\n", file_offset, slice_index);
-					file_offset = slice_offset + slice_size;
-					// Check again from the first slice
-					slice_index = 0;
-					continue;
-				}
-			}
-			slice_index++;
-		}
-		if (_fseeki64(fp_read, file_offset, SEEK_SET) != 0){
-			perror("Failed to seek Outside file");
-			fclose(fp_read);
-			fclose(fp_write);
-			return RET_FILE_IO_ERROR;
-		}
-
-		// Read some packets at once
-		buf_size = fread(buf, 1, alloc_size, fp_read);
-		if (par3_ctx->noise_level >= 3){
-			printf("file_offset = %"PRId64", buf_size = %"PRId64"\n", file_offset, buf_size);
-		}
-		offset = 0;
-		while (offset + 48 < buf_size){
-			if (memcmp(buf + offset, "PAR3\0PKT", 8) == 0){	// check Magic sequence
-				// read packet size
-				memcpy(&packet_size, buf + (offset + 24), 8);
-				if (packet_size <= 48){	// If packet is too small, just ignore it.
-					offset += 8;
-					continue;
-				}
-				// If packet exceeds buffer, read more bytes.
-				if (offset + packet_size > buf_size){
-					// slide data to top
-					memmove(buf, buf + offset, buf_size - offset);
-					file_offset += offset;
-					// read following data
-					buf_size = buf_size - offset + fread(buf + buf_size - offset, 1, offset, fp_read);
-					if (par3_ctx->noise_level >= 3){
-						printf("file_offset = %"PRId64", buf_size = %"PRId64", offset = %"PRId64", packet_size = %"PRId64"\n", file_offset, buf_size, offset, packet_size);
-					}
-					offset = 0;
-					if (packet_size > buf_size){
-						offset += 8;
-						continue;
-					}
-				}
-
-				// check fingerprint hash of the packet
-				blake3(buf + (offset + 24), packet_size - 24, buf_hash);
-				if (memcmp(buf + (offset + 8), buf_hash, 16) != 0){
-					// If checksum is different, ignore the packet.
-					offset += 8;
-					continue;
-				}
-				if (par3_ctx->noise_level >= 3){
-					printf("Complete packet: offset = %"PRId64" + %"PRId64", size = %"PRId64"\n", file_offset, offset, packet_size);
-				}
-
-				// write packet on temporary file
-				if (fwrite(buf + offset, 1, packet_size, fp_write) != packet_size){
-					perror("Failed to write packet on temporary file");
-					fclose(fp_read);
-					fclose(fp_write);
-					return RET_FILE_IO_ERROR;
-				}
-				total_packet_size += packet_size;
-
-				offset += packet_size;
-			} else {
-				offset++;
-			}
-		}
-		file_offset += offset;
-
-		// Exit at end of file
-		if (feof(fp_read) != 0)
-			break;
-	}
-
-	if (fclose(fp_read) != 0){
-		perror("Failed to close Outside file");
-		fclose(fp_write);
-		return RET_FILE_IO_ERROR;
-	}
-	if (fclose(fp_write) != 0){
-		perror("Failed to close temporary file");
-		return RET_FILE_IO_ERROR;
-	}
-
-	free(buf);
-	par3_ctx->work_buf = NULL;
-
-	if (par3_ctx->noise_level >= 2){
-		printf("Total size of copied complete packets = %"PRId64"\n", total_packet_size);
-	}
-
-	return 0;
-}
-
diff --git a/windows/src/leopard/LeopardCommon.cpp b/windows/src/leopard/LeopardCommon.cpp
deleted file mode 100644
index 9632915..0000000
--- a/windows/src/leopard/LeopardCommon.cpp
+++ /dev/null
@@ -1,472 +0,0 @@
-/*
-    Copyright (c) 2017 Christopher A. Taylor.  All rights reserved.
-
-    Redistribution and use in source and binary forms, with or without
-    modification, are permitted provided that the following conditions are met:
-
-    * Redistributions of source code must retain the above copyright notice,
-      this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright notice,
-      this list of conditions and the following disclaimer in the documentation
-      and/or other materials provided with the distribution.
-    * Neither the name of Leopard-RS nor the names of its contributors may be
-      used to endorse or promote products derived from this software without
-      specific prior written permission.
-
-    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-    ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-    POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#include "LeopardCommon.h"
-
-#include <thread>
-
-namespace leopard {
-
-
-//------------------------------------------------------------------------------
-// Runtime CPU Architecture Check
-//
-// Feature checks stolen shamelessly from
-// https://github.com/jedisct1/libsodium/blob/master/src/libsodium/sodium/runtime.c
-
-#if defined(HAVE_ANDROID_GETCPUFEATURES)
-    #include <cpu-features.h>
-#endif
-
-#if defined(LEO_TRY_NEON)
-# if defined(IOS) && defined(__ARM_NEON__)
-    // Requires iPhone 5S or newer
-# else
-    // Remember to add LOCAL_STATIC_LIBRARIES := cpufeatures
-    bool CpuHasNeon = false; // V6 / V7
-    bool CpuHasNeon64 = false; // 64-bit
-# endif
-#endif
-
-
-#if !defined(LEO_TARGET_MOBILE)
-
-#ifdef _MSC_VER
-    #include <intrin.h> // __cpuid
-    #pragma warning(disable: 4752) // found Intel(R) Advanced Vector Extensions; consider using /arch:AVX
-#endif
-
-#ifdef LEO_TRY_AVX2
-    bool CpuHasAVX2 = false;
-#endif
-
-bool CpuHasSSSE3 = false;
-
-#define CPUID_EBX_AVX2    0x00000020
-#define CPUID_ECX_SSSE3   0x00000200
-
-static void _cpuid(unsigned int cpu_info[4U], const unsigned int cpu_info_type)
-{
-#if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_AMD64) || defined(_M_IX86))
-    __cpuid((int *) cpu_info, cpu_info_type);
-#else //if defined(HAVE_CPUID)
-    cpu_info[0] = cpu_info[1] = cpu_info[2] = cpu_info[3] = 0;
-# ifdef __i386__
-    __asm__ __volatile__ ("pushfl; pushfl; "
-                          "popl %0; "
-                          "movl %0, %1; xorl %2, %0; "
-                          "pushl %0; "
-                          "popfl; pushfl; popl %0; popfl" :
-                          "=&r" (cpu_info[0]), "=&r" (cpu_info[1]) :
-                          "i" (0x200000));
-    if (((cpu_info[0] ^ cpu_info[1]) & 0x200000) == 0) {
-        return; /* LCOV_EXCL_LINE */
-    }
-# endif
-# ifdef __i386__
-    __asm__ __volatile__ ("xchgl %%ebx, %k1; cpuid; xchgl %%ebx, %k1" :
-                          "=a" (cpu_info[0]), "=&r" (cpu_info[1]),
-                          "=c" (cpu_info[2]), "=d" (cpu_info[3]) :
-                          "0" (cpu_info_type), "2" (0U));
-# elif defined(__x86_64__)
-    __asm__ __volatile__ ("xchgq %%rbx, %q1; cpuid; xchgq %%rbx, %q1" :
-                          "=a" (cpu_info[0]), "=&r" (cpu_info[1]),
-                          "=c" (cpu_info[2]), "=d" (cpu_info[3]) :
-                          "0" (cpu_info_type), "2" (0U));
-# else
-    __asm__ __volatile__ ("cpuid" :
-                          "=a" (cpu_info[0]), "=b" (cpu_info[1]),
-                          "=c" (cpu_info[2]), "=d" (cpu_info[3]) :
-                          "0" (cpu_info_type), "2" (0U));
-# endif
-#endif
-}
-
-#elif defined(LEO_USE_SSE2NEON)
-bool CpuHasSSSE3 = true;
-#endif // defined(LEO_TARGET_MOBILE)
-
-
-void InitializeCPUArch()
-{
-#if defined(LEO_TRY_NEON) && defined(HAVE_ANDROID_GETCPUFEATURES)
-    AndroidCpuFamily family = android_getCpuFamily();
-    if (family == ANDROID_CPU_FAMILY_ARM)
-    {
-        if (android_getCpuFeatures() & ANDROID_CPU_ARM_FEATURE_NEON)
-            CpuHasNeon = true;
-    }
-    else if (family == ANDROID_CPU_FAMILY_ARM64)
-    {
-        CpuHasNeon = true;
-        if (android_getCpuFeatures() & ANDROID_CPU_ARM64_FEATURE_ASIMD)
-            CpuHasNeon64 = true;
-    }
-#endif
-
-#if !defined(LEO_TARGET_MOBILE)
-    unsigned int cpu_info[4];
-
-    _cpuid(cpu_info, 1);
-    CpuHasSSSE3 = ((cpu_info[2] & CPUID_ECX_SSSE3) != 0);
-
-#if defined(LEO_TRY_AVX2)
-    _cpuid(cpu_info, 7);
-    CpuHasAVX2 = ((cpu_info[1] & CPUID_EBX_AVX2) != 0);
-#endif // LEO_TRY_AVX2
-
-#ifndef LEO_USE_SSSE3_OPT
-    CpuHasSSSE3 = false;
-#endif // LEO_USE_SSSE3_OPT
-#ifndef LEO_USE_AVX2_OPT
-    CpuHasAVX2 = false;
-#endif // LEO_USE_AVX2_OPT
-
-#endif // LEO_TARGET_MOBILE
-}
-
-
-//------------------------------------------------------------------------------
-// XOR Memory
-
-void xor_mem(
-    void * LEO_RESTRICT vx, const void * LEO_RESTRICT vy,
-    uint64_t bytes)
-{
-#if defined(LEO_TRY_AVX2)
-    if (CpuHasAVX2)
-    {
-        LEO_M256 * LEO_RESTRICT x32 = reinterpret_cast<LEO_M256 *>(vx);
-        const LEO_M256 * LEO_RESTRICT y32 = reinterpret_cast<const LEO_M256 *>(vy);
-        while (bytes >= 128)
-        {
-            const LEO_M256 x0 = _mm256_xor_si256(_mm256_loadu_si256(x32),     _mm256_loadu_si256(y32));
-            const LEO_M256 x1 = _mm256_xor_si256(_mm256_loadu_si256(x32 + 1), _mm256_loadu_si256(y32 + 1));
-            const LEO_M256 x2 = _mm256_xor_si256(_mm256_loadu_si256(x32 + 2), _mm256_loadu_si256(y32 + 2));
-            const LEO_M256 x3 = _mm256_xor_si256(_mm256_loadu_si256(x32 + 3), _mm256_loadu_si256(y32 + 3));
-            _mm256_storeu_si256(x32, x0);
-            _mm256_storeu_si256(x32 + 1, x1);
-            _mm256_storeu_si256(x32 + 2, x2);
-            _mm256_storeu_si256(x32 + 3, x3);
-            x32 += 4, y32 += 4;
-            bytes -= 128;
-        };
-        if (bytes > 0)
-        {
-            const LEO_M256 x0 = _mm256_xor_si256(_mm256_loadu_si256(x32),     _mm256_loadu_si256(y32));
-            const LEO_M256 x1 = _mm256_xor_si256(_mm256_loadu_si256(x32 + 1), _mm256_loadu_si256(y32 + 1));
-            _mm256_storeu_si256(x32, x0);
-            _mm256_storeu_si256(x32 + 1, x1);
-        }
-        return;
-    }
-#endif // LEO_TRY_AVX2
-
-    LEO_M128 * LEO_RESTRICT x16 = reinterpret_cast<LEO_M128 *>(vx);
-    const LEO_M128 * LEO_RESTRICT y16 = reinterpret_cast<const LEO_M128 *>(vy);
-    do
-    {
-        const LEO_M128 x0 = _mm_xor_si128(_mm_loadu_si128(x16),     _mm_loadu_si128(y16));
-        const LEO_M128 x1 = _mm_xor_si128(_mm_loadu_si128(x16 + 1), _mm_loadu_si128(y16 + 1));
-        const LEO_M128 x2 = _mm_xor_si128(_mm_loadu_si128(x16 + 2), _mm_loadu_si128(y16 + 2));
-        const LEO_M128 x3 = _mm_xor_si128(_mm_loadu_si128(x16 + 3), _mm_loadu_si128(y16 + 3));
-        _mm_storeu_si128(x16, x0);
-        _mm_storeu_si128(x16 + 1, x1);
-        _mm_storeu_si128(x16 + 2, x2);
-        _mm_storeu_si128(x16 + 3, x3);
-        x16 += 4, y16 += 4;
-        bytes -= 64;
-    } while (bytes > 0);
-}
-
-#ifdef LEO_M1_OPT
-
-void xor_mem_2to1(
-    void * LEO_RESTRICT x,
-    const void * LEO_RESTRICT y,
-    const void * LEO_RESTRICT z,
-    uint64_t bytes)
-{
-#if defined(LEO_TRY_AVX2)
-    if (CpuHasAVX2)
-    {
-        LEO_M256 * LEO_RESTRICT x32 = reinterpret_cast<LEO_M256 *>(x);
-        const LEO_M256 * LEO_RESTRICT y32 = reinterpret_cast<const LEO_M256 *>(y);
-        const LEO_M256 * LEO_RESTRICT z32 = reinterpret_cast<const LEO_M256 *>(z);
-        while (bytes >= 128)
-        {
-            LEO_M256 x0 = _mm256_xor_si256(_mm256_loadu_si256(x32), _mm256_loadu_si256(y32));
-            x0 = _mm256_xor_si256(x0, _mm256_loadu_si256(z32));
-            LEO_M256 x1 = _mm256_xor_si256(_mm256_loadu_si256(x32 + 1), _mm256_loadu_si256(y32 + 1));
-            x1 = _mm256_xor_si256(x1, _mm256_loadu_si256(z32 + 1));
-            LEO_M256 x2 = _mm256_xor_si256(_mm256_loadu_si256(x32 + 2), _mm256_loadu_si256(y32 + 2));
-            x2 = _mm256_xor_si256(x2, _mm256_loadu_si256(z32 + 2));
-            LEO_M256 x3 = _mm256_xor_si256(_mm256_loadu_si256(x32 + 3), _mm256_loadu_si256(y32 + 3));
-            x3 = _mm256_xor_si256(x3, _mm256_loadu_si256(z32 + 3));
-            _mm256_storeu_si256(x32, x0);
-            _mm256_storeu_si256(x32 + 1, x1);
-            _mm256_storeu_si256(x32 + 2, x2);
-            _mm256_storeu_si256(x32 + 3, x3);
-            x32 += 4, y32 += 4, z32 += 4;
-            bytes -= 128;
-        };
-
-        if (bytes > 0)
-        {
-            LEO_M256 x0 = _mm256_xor_si256(_mm256_loadu_si256(x32),     _mm256_loadu_si256(y32));
-            x0 = _mm256_xor_si256(x0, _mm256_loadu_si256(z32));
-            LEO_M256 x1 = _mm256_xor_si256(_mm256_loadu_si256(x32 + 1), _mm256_loadu_si256(y32 + 1));
-            x1 = _mm256_xor_si256(x1, _mm256_loadu_si256(z32 + 1));
-            _mm256_storeu_si256(x32, x0);
-            _mm256_storeu_si256(x32 + 1, x1);
-        }
-
-        return;
-    }
-#endif // LEO_TRY_AVX2
-
-    LEO_M128 * LEO_RESTRICT x16 = reinterpret_cast<LEO_M128 *>(x);
-    const LEO_M128 * LEO_RESTRICT y16 = reinterpret_cast<const LEO_M128 *>(y);
-    const LEO_M128 * LEO_RESTRICT z16 = reinterpret_cast<const LEO_M128 *>(z);
-    do
-    {
-        LEO_M128 x0 = _mm_xor_si128(_mm_loadu_si128(x16), _mm_loadu_si128(y16));
-        x0 = _mm_xor_si128(x0, _mm_loadu_si128(z16));
-        LEO_M128 x1 = _mm_xor_si128(_mm_loadu_si128(x16 + 1), _mm_loadu_si128(y16 + 1));
-        x1 = _mm_xor_si128(x1, _mm_loadu_si128(z16 + 1));
-        LEO_M128 x2 = _mm_xor_si128(_mm_loadu_si128(x16 + 2), _mm_loadu_si128(y16 + 2));
-        x2 = _mm_xor_si128(x2, _mm_loadu_si128(z16 + 2));
-        LEO_M128 x3 = _mm_xor_si128(_mm_loadu_si128(x16 + 3), _mm_loadu_si128(y16 + 3));
-        x3 = _mm_xor_si128(x3, _mm_loadu_si128(z16 + 3));
-        _mm_storeu_si128(x16, x0);
-        _mm_storeu_si128(x16 + 1, x1);
-        _mm_storeu_si128(x16 + 2, x2);
-        _mm_storeu_si128(x16 + 3, x3);
-        x16 += 4, y16 += 4, z16 += 4;
-        bytes -= 64;
-    } while (bytes > 0);
-}
-
-#endif // LEO_M1_OPT
-
-#ifdef LEO_USE_VECTOR4_OPT
-
-void xor_mem4(
-    void * LEO_RESTRICT vx_0, const void * LEO_RESTRICT vy_0,
-    void * LEO_RESTRICT vx_1, const void * LEO_RESTRICT vy_1,
-    void * LEO_RESTRICT vx_2, const void * LEO_RESTRICT vy_2,
-    void * LEO_RESTRICT vx_3, const void * LEO_RESTRICT vy_3,
-    uint64_t bytes)
-{
-#if defined(LEO_TRY_AVX2)
-    if (CpuHasAVX2)
-    {
-        LEO_M256 * LEO_RESTRICT       x32_0 = reinterpret_cast<LEO_M256 *>      (vx_0);
-        const LEO_M256 * LEO_RESTRICT y32_0 = reinterpret_cast<const LEO_M256 *>(vy_0);
-        LEO_M256 * LEO_RESTRICT       x32_1 = reinterpret_cast<LEO_M256 *>      (vx_1);
-        const LEO_M256 * LEO_RESTRICT y32_1 = reinterpret_cast<const LEO_M256 *>(vy_1);
-        LEO_M256 * LEO_RESTRICT       x32_2 = reinterpret_cast<LEO_M256 *>      (vx_2);
-        const LEO_M256 * LEO_RESTRICT y32_2 = reinterpret_cast<const LEO_M256 *>(vy_2);
-        LEO_M256 * LEO_RESTRICT       x32_3 = reinterpret_cast<LEO_M256 *>      (vx_3);
-        const LEO_M256 * LEO_RESTRICT y32_3 = reinterpret_cast<const LEO_M256 *>(vy_3);
-        while (bytes >= 128)
-        {
-            const LEO_M256 x0_0 = _mm256_xor_si256(_mm256_loadu_si256(x32_0),     _mm256_loadu_si256(y32_0));
-            const LEO_M256 x1_0 = _mm256_xor_si256(_mm256_loadu_si256(x32_0 + 1), _mm256_loadu_si256(y32_0 + 1));
-            const LEO_M256 x2_0 = _mm256_xor_si256(_mm256_loadu_si256(x32_0 + 2), _mm256_loadu_si256(y32_0 + 2));
-            const LEO_M256 x3_0 = _mm256_xor_si256(_mm256_loadu_si256(x32_0 + 3), _mm256_loadu_si256(y32_0 + 3));
-            _mm256_storeu_si256(x32_0, x0_0);
-            _mm256_storeu_si256(x32_0 + 1, x1_0);
-            _mm256_storeu_si256(x32_0 + 2, x2_0);
-            _mm256_storeu_si256(x32_0 + 3, x3_0);
-            x32_0 += 4, y32_0 += 4;
-            const LEO_M256 x0_1 = _mm256_xor_si256(_mm256_loadu_si256(x32_1),     _mm256_loadu_si256(y32_1));
-            const LEO_M256 x1_1 = _mm256_xor_si256(_mm256_loadu_si256(x32_1 + 1), _mm256_loadu_si256(y32_1 + 1));
-            const LEO_M256 x2_1 = _mm256_xor_si256(_mm256_loadu_si256(x32_1 + 2), _mm256_loadu_si256(y32_1 + 2));
-            const LEO_M256 x3_1 = _mm256_xor_si256(_mm256_loadu_si256(x32_1 + 3), _mm256_loadu_si256(y32_1 + 3));
-            _mm256_storeu_si256(x32_1, x0_1);
-            _mm256_storeu_si256(x32_1 + 1, x1_1);
-            _mm256_storeu_si256(x32_1 + 2, x2_1);
-            _mm256_storeu_si256(x32_1 + 3, x3_1);
-            x32_1 += 4, y32_1 += 4;
-            const LEO_M256 x0_2 = _mm256_xor_si256(_mm256_loadu_si256(x32_2),     _mm256_loadu_si256(y32_2));
-            const LEO_M256 x1_2 = _mm256_xor_si256(_mm256_loadu_si256(x32_2 + 1), _mm256_loadu_si256(y32_2 + 1));
-            const LEO_M256 x2_2 = _mm256_xor_si256(_mm256_loadu_si256(x32_2 + 2), _mm256_loadu_si256(y32_2 + 2));
-            const LEO_M256 x3_2 = _mm256_xor_si256(_mm256_loadu_si256(x32_2 + 3), _mm256_loadu_si256(y32_2 + 3));
-            _mm256_storeu_si256(x32_2, x0_2);
-            _mm256_storeu_si256(x32_2 + 1, x1_2);
-            _mm256_storeu_si256(x32_2 + 2, x2_2);
-            _mm256_storeu_si256(x32_2 + 3, x3_2);
-            x32_2 += 4, y32_2 += 4;
-            const LEO_M256 x0_3 = _mm256_xor_si256(_mm256_loadu_si256(x32_3),     _mm256_loadu_si256(y32_3));
-            const LEO_M256 x1_3 = _mm256_xor_si256(_mm256_loadu_si256(x32_3 + 1), _mm256_loadu_si256(y32_3 + 1));
-            const LEO_M256 x2_3 = _mm256_xor_si256(_mm256_loadu_si256(x32_3 + 2), _mm256_loadu_si256(y32_3 + 2));
-            const LEO_M256 x3_3 = _mm256_xor_si256(_mm256_loadu_si256(x32_3 + 3), _mm256_loadu_si256(y32_3 + 3));
-            _mm256_storeu_si256(x32_3,     x0_3);
-            _mm256_storeu_si256(x32_3 + 1, x1_3);
-            _mm256_storeu_si256(x32_3 + 2, x2_3);
-            _mm256_storeu_si256(x32_3 + 3, x3_3);
-            x32_3 += 4, y32_3 += 4;
-            bytes -= 128;
-        }
-        if (bytes > 0)
-        {
-            const LEO_M256 x0_0 = _mm256_xor_si256(_mm256_loadu_si256(x32_0),     _mm256_loadu_si256(y32_0));
-            const LEO_M256 x1_0 = _mm256_xor_si256(_mm256_loadu_si256(x32_0 + 1), _mm256_loadu_si256(y32_0 + 1));
-            const LEO_M256 x0_1 = _mm256_xor_si256(_mm256_loadu_si256(x32_1),     _mm256_loadu_si256(y32_1));
-            const LEO_M256 x1_1 = _mm256_xor_si256(_mm256_loadu_si256(x32_1 + 1), _mm256_loadu_si256(y32_1 + 1));
-            _mm256_storeu_si256(x32_0, x0_0);
-            _mm256_storeu_si256(x32_0 + 1, x1_0);
-            _mm256_storeu_si256(x32_1, x0_1);
-            _mm256_storeu_si256(x32_1 + 1, x1_1);
-            const LEO_M256 x0_2 = _mm256_xor_si256(_mm256_loadu_si256(x32_2),     _mm256_loadu_si256(y32_2));
-            const LEO_M256 x1_2 = _mm256_xor_si256(_mm256_loadu_si256(x32_2 + 1), _mm256_loadu_si256(y32_2 + 1));
-            const LEO_M256 x0_3 = _mm256_xor_si256(_mm256_loadu_si256(x32_3),     _mm256_loadu_si256(y32_3));
-            const LEO_M256 x1_3 = _mm256_xor_si256(_mm256_loadu_si256(x32_3 + 1), _mm256_loadu_si256(y32_3 + 1));
-            _mm256_storeu_si256(x32_2,     x0_2);
-            _mm256_storeu_si256(x32_2 + 1, x1_2);
-            _mm256_storeu_si256(x32_3,     x0_3);
-            _mm256_storeu_si256(x32_3 + 1, x1_3);
-        }
-        return;
-    }
-#endif // LEO_TRY_AVX2
-    LEO_M128 * LEO_RESTRICT       x16_0 = reinterpret_cast<LEO_M128 *>      (vx_0);
-    const LEO_M128 * LEO_RESTRICT y16_0 = reinterpret_cast<const LEO_M128 *>(vy_0);
-    LEO_M128 * LEO_RESTRICT       x16_1 = reinterpret_cast<LEO_M128 *>      (vx_1);
-    const LEO_M128 * LEO_RESTRICT y16_1 = reinterpret_cast<const LEO_M128 *>(vy_1);
-    LEO_M128 * LEO_RESTRICT       x16_2 = reinterpret_cast<LEO_M128 *>      (vx_2);
-    const LEO_M128 * LEO_RESTRICT y16_2 = reinterpret_cast<const LEO_M128 *>(vy_2);
-    LEO_M128 * LEO_RESTRICT       x16_3 = reinterpret_cast<LEO_M128 *>      (vx_3);
-    const LEO_M128 * LEO_RESTRICT y16_3 = reinterpret_cast<const LEO_M128 *>(vy_3);
-    do
-    {
-        const LEO_M128 x0_0 = _mm_xor_si128(_mm_loadu_si128(x16_0),     _mm_loadu_si128(y16_0));
-        const LEO_M128 x1_0 = _mm_xor_si128(_mm_loadu_si128(x16_0 + 1), _mm_loadu_si128(y16_0 + 1));
-        const LEO_M128 x2_0 = _mm_xor_si128(_mm_loadu_si128(x16_0 + 2), _mm_loadu_si128(y16_0 + 2));
-        const LEO_M128 x3_0 = _mm_xor_si128(_mm_loadu_si128(x16_0 + 3), _mm_loadu_si128(y16_0 + 3));
-        _mm_storeu_si128(x16_0, x0_0);
-        _mm_storeu_si128(x16_0 + 1, x1_0);
-        _mm_storeu_si128(x16_0 + 2, x2_0);
-        _mm_storeu_si128(x16_0 + 3, x3_0);
-        x16_0 += 4, y16_0 += 4;
-        const LEO_M128 x0_1 = _mm_xor_si128(_mm_loadu_si128(x16_1),     _mm_loadu_si128(y16_1));
-        const LEO_M128 x1_1 = _mm_xor_si128(_mm_loadu_si128(x16_1 + 1), _mm_loadu_si128(y16_1 + 1));
-        const LEO_M128 x2_1 = _mm_xor_si128(_mm_loadu_si128(x16_1 + 2), _mm_loadu_si128(y16_1 + 2));
-        const LEO_M128 x3_1 = _mm_xor_si128(_mm_loadu_si128(x16_1 + 3), _mm_loadu_si128(y16_1 + 3));
-        _mm_storeu_si128(x16_1, x0_1);
-        _mm_storeu_si128(x16_1 + 1, x1_1);
-        _mm_storeu_si128(x16_1 + 2, x2_1);
-        _mm_storeu_si128(x16_1 + 3, x3_1);
-        x16_1 += 4, y16_1 += 4;
-        const LEO_M128 x0_2 = _mm_xor_si128(_mm_loadu_si128(x16_2),     _mm_loadu_si128(y16_2));
-        const LEO_M128 x1_2 = _mm_xor_si128(_mm_loadu_si128(x16_2 + 1), _mm_loadu_si128(y16_2 + 1));
-        const LEO_M128 x2_2 = _mm_xor_si128(_mm_loadu_si128(x16_2 + 2), _mm_loadu_si128(y16_2 + 2));
-        const LEO_M128 x3_2 = _mm_xor_si128(_mm_loadu_si128(x16_2 + 3), _mm_loadu_si128(y16_2 + 3));
-        _mm_storeu_si128(x16_2, x0_2);
-        _mm_storeu_si128(x16_2 + 1, x1_2);
-        _mm_storeu_si128(x16_2 + 2, x2_2);
-        _mm_storeu_si128(x16_2 + 3, x3_2);
-        x16_2 += 4, y16_2 += 4;
-        const LEO_M128 x0_3 = _mm_xor_si128(_mm_loadu_si128(x16_3),     _mm_loadu_si128(y16_3));
-        const LEO_M128 x1_3 = _mm_xor_si128(_mm_loadu_si128(x16_3 + 1), _mm_loadu_si128(y16_3 + 1));
-        const LEO_M128 x2_3 = _mm_xor_si128(_mm_loadu_si128(x16_3 + 2), _mm_loadu_si128(y16_3 + 2));
-        const LEO_M128 x3_3 = _mm_xor_si128(_mm_loadu_si128(x16_3 + 3), _mm_loadu_si128(y16_3 + 3));
-        _mm_storeu_si128(x16_3,     x0_3);
-        _mm_storeu_si128(x16_3 + 1, x1_3);
-        _mm_storeu_si128(x16_3 + 2, x2_3);
-        _mm_storeu_si128(x16_3 + 3, x3_3);
-        x16_3 += 4, y16_3 += 4;
-        bytes -= 64;
-    } while (bytes > 0);
-}
-
-#endif // LEO_USE_VECTOR4_OPT
-
-void VectorXOR_Threads(
-    const uint64_t bytes,
-    unsigned count,
-    void** x,
-    void** y)
-{
-#ifdef LEO_USE_VECTOR4_OPT
-    if (count >= 4)
-    {
-        int i_end = count - 4;
-#pragma omp parallel for
-        for (int i = 0; i <= i_end; i += 4)
-        {
-            xor_mem4(
-                x[i + 0], y[i + 0],
-                x[i + 1], y[i + 1],
-                x[i + 2], y[i + 2],
-                x[i + 3], y[i + 3],
-                bytes);
-        }
-        count %= 4;
-        i_end -= count;
-        x += i_end;
-        y += i_end;
-    }
-#endif // LEO_USE_VECTOR4_OPT
-
-    for (unsigned i = 0; i < count; ++i)
-        xor_mem(x[i], y[i], bytes);
-}
-void VectorXOR(
-    const uint64_t bytes,
-    unsigned count,
-    void** x,
-    void** y)
-{
-#ifdef LEO_USE_VECTOR4_OPT
-    if (count >= 4)
-    {
-        int i_end = count - 4;
-        for (int i = 0; i <= i_end; i += 4)
-        {
-            xor_mem4(
-                x[i + 0], y[i + 0],
-                x[i + 1], y[i + 1],
-                x[i + 2], y[i + 2],
-                x[i + 3], y[i + 3],
-                bytes);
-        }
-        count %= 4;
-        i_end -= count;
-        x += i_end;
-        y += i_end;
-    }
-#endif // LEO_USE_VECTOR4_OPT
-
-    for (unsigned i = 0; i < count; ++i)
-        xor_mem(x[i], y[i], bytes);
-}
-
-
-} // namespace leopard
diff --git a/windows/src/leopard/LeopardCommon.h b/windows/src/leopard/LeopardCommon.h
deleted file mode 100644
index 45bac3d..0000000
--- a/windows/src/leopard/LeopardCommon.h
+++ /dev/null
@@ -1,502 +0,0 @@
-/*
-    Copyright (c) 2017 Christopher A. Taylor.  All rights reserved.
-
-    Redistribution and use in source and binary forms, with or without
-    modification, are permitted provided that the following conditions are met:
-
-    * Redistributions of source code must retain the above copyright notice,
-      this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright notice,
-      this list of conditions and the following disclaimer in the documentation
-      and/or other materials provided with the distribution.
-    * Neither the name of Leopard-RS nor the names of its contributors may be
-      used to endorse or promote products derived from this software without
-      specific prior written permission.
-
-    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-    ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-    POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#pragma once
-
-/*
-    TODO:
-
-    Mid-term:
-    + Add compile-time selectable XOR-only rowops instead of MULADD
-    + Look into 12-bit fields as a performance optimization
-
-    Long-term:
-    + Evaluate the error locator polynomial based on fast polynomial interpolations in O(k log^2 k)
-    + Look into getting EncodeL working so we can support larger recovery sets
-    + Implement the decoder algorithm from {3} based on the Forney algorithm
-*/
-
-/*
-    FFT Data Layout:
-
-    We pack the data into memory in this order:
-
-    [Recovery Data (Power of Two = M)] [Original Data] [Zero Padding out to 65536]
-
-    For encoding, the placement is implied instead of actual memory layout.
-    For decoding, the layout is explicitly used.
-*/
-
-/*
-    Encoder algorithm:
-
-    The encoder is described in {3}.  Operations are done O(K Log M),
-    where K is the original data size, and M is up to twice the
-    size of the recovery set.
-
-    Roughly in brief:
-
-        Recovery = FFT( IFFT(Data_0) xor IFFT(Data_1) xor ... )
-
-    It walks the original data M chunks at a time performing the IFFT.
-    Each IFFT intermediate result is XORed together into the first M chunks of
-    the data layout.  Finally the FFT is performed.
-
-    Encoder optimizations:
-    * The first IFFT can be performed directly in the first M chunks.
-    * The zero padding can be skipped while performing the final IFFT.
-    Unrolling is used in the code to accomplish both these optimizations.
-    * The final FFT can be truncated also if recovery set is not a power of 2.
-    It is easy to truncate the FFT by ending the inner loop early.
-    * The FFT operations can be unrolled two layers at a time so that instead
-    of writing the result of the first layer out and reading it back in for
-    the second layer, those interactions can happen in registers immediately.
-*/
-
-/*
-    Decoder algorithm:
-
-    The decoder is described in {1}.  Operations are done O(N Log N), where N is up
-    to twice the size of the original data as described below.
-
-    Roughly in brief:
-
-        Original = -ErrLocator * FFT( Derivative( IFFT( ErrLocator * ReceivedData ) ) )
-
-
-    Precalculations:
-    ---------------
-
-    At startup initialization, FFTInitialize() precalculates FWT(L) as
-    described by equation (92) in {1}, where L = Log[i] for i = 0..Order,
-    Order = 256 or 65536 for FF8/16.  This is stored in the LogWalsh vector.
-
-    It also precalculates the FFT skew factors (s_i) as described by
-    equation (28).  This is stored in the FFTSkew vector.
-
-    For memory workspace N data chunks are needed, where N is a power of two
-    at or above M + K.  K is the original data size and M is the next power
-    of two above the recovery data size.  For example for K = 200 pieces of
-    data and 10% redundancy, there are 20 redundant pieces, which rounds up
-    to 32 = M.  M + K = 232 pieces, so N rounds up to 256.
-
-
-    Online calculations:
-    -------------------
-
-    At runtime, the error locator polynomial is evaluated using the
-    Fast Walsh-Hadamard transform as described in {1} equation (92).
-
-    At runtime the data is explicit laid out in workspace memory like this:
-    [Recovery Data (Power of Two = M)] [Original Data (K)] [Zero Padding out to N]
-
-    Data that was lost is replaced with zeroes.
-    Data that was received, including recovery data, is multiplied by the error
-    locator polynomial as it is copied into the workspace.
-
-    The IFFT is applied to the entire workspace of N chunks.
-    Since the IFFT starts with pairs of inputs and doubles in width at each
-    iteration, the IFFT is optimized by skipping zero padding at the end until
-    it starts mixing with non-zero data.
-
-    The formal derivative is applied to the entire workspace of N chunks.
-    This is a massive XOR loop that runs 4 columns in parallel for speed.
-
-    The FFT is applied to the entire workspace of N chunks.
-    The FFT is optimized by only performing intermediate calculations required
-    to recover lost data.  Since it starts wide and ends up working on adjacent
-    pairs, at some point the intermediate results are not needed for data that
-    will not be read by the application.  This optimization is implemented by
-    the ErrorBitfield class.
-
-    Finally, only recovered data is multiplied by the negative of the
-    error locator polynomial as it is copied into the front of the
-    workspace for the application to retrieve.
-*/
-
-/*
-    Finite field arithmetic optimizations:
-
-    For faster finite field multiplication, large tables are precomputed and
-    applied during encoding/decoding on 64 bytes of data at a time using
-    SSSE3 or AVX2 vector instructions and the ALTMAP approach from Jerasure.
-
-    Addition in this finite field is XOR, and a vectorized memory XOR routine
-    is also used.
-*/
-
-#include "leopard.h"
-
-#include <stdint.h>
-#ifdef _WIN32
-#include <malloc.h>
-#endif //_WIN32
-#include <vector>
-#include <atomic>
-#include <memory>
-#include <mutex>
-#include <condition_variable>
-
-
-//------------------------------------------------------------------------------
-// Constants
-
-// Enable 8-bit or 16-bit fields
-#define LEO_HAS_FF8
-#define LEO_HAS_FF16
-
-// Enable using SIMD instructions
-#define LEO_USE_SSSE3_OPT
-#define LEO_USE_AVX2_OPT
-
-// Avoid calculating final FFT values in decoder using bitfield
-#define LEO_ERROR_BITFIELD_OPT
-
-// Interleave butterfly operations between layer pairs in FFT
-#define LEO_INTERLEAVE_BUTTERFLY4_OPT
-
-// Optimize M=1 case
-#define LEO_M1_OPT
-
-// Unroll inner loops 4 times
-#define LEO_USE_VECTOR4_OPT
-
-// MacOS M1
-#if defined(__aarch64__)
-  #define LEO_USE_SSE2NEON
-  #define LEO_TARGET_MOBILE
-#endif
-
-//------------------------------------------------------------------------------
-// Debug
-
-// Some bugs only repro in release mode, so this can be helpful
-//#define LEO_DEBUG_IN_RELEASE
-
-#if defined(_DEBUG) || defined(DEBUG) || defined(LEO_DEBUG_IN_RELEASE)
-    #define LEO_DEBUG
-    #ifdef _WIN32
-        #define LEO_DEBUG_BREAK __debugbreak()
-    #else
-        #define LEO_DEBUG_BREAK __builtin_trap()
-    #endif
-    #define LEO_DEBUG_ASSERT(cond) { if (!(cond)) { LEO_DEBUG_BREAK; } }
-#else
-    #define LEO_DEBUG_BREAK ;
-    #define LEO_DEBUG_ASSERT(cond) ;
-#endif
-
-
-//------------------------------------------------------------------------------
-// Windows Header
-
-#ifdef _WIN32
-    #define WIN32_LEAN_AND_MEAN
-
-    #ifndef _WINSOCKAPI_
-        #define DID_DEFINE_WINSOCKAPI
-        #define _WINSOCKAPI_
-    #endif
-    #ifndef NOMINMAX
-        #define NOMINMAX
-    #endif
-    #ifndef _WIN32_WINNT
-        #define _WIN32_WINNT 0x0601 /* Windows 7+ */
-    #endif
-
-    #include <windows.h>
-#endif
-
-#ifdef DID_DEFINE_WINSOCKAPI
-    #undef _WINSOCKAPI_
-    #undef DID_DEFINE_WINSOCKAPI
-#endif
-
-
-//------------------------------------------------------------------------------
-// Platform/Architecture
-
-#ifdef _MSC_VER
-    #include <intrin.h>
-#endif
-
-#if defined(ANDROID) || defined(IOS)
-    #define LEO_TARGET_MOBILE
-#endif // ANDROID
-
-#if defined(__AVX2__) || (defined (_MSC_VER) && _MSC_VER >= 1900)
-    #define LEO_TRY_AVX2 /* 256-bit */
-    #include <immintrin.h>
-    #define LEO_ALIGN_BYTES 32
-#else // __AVX2__
-    #define LEO_ALIGN_BYTES 16
-#endif // __AVX2__
-
-#if !defined(LEO_TARGET_MOBILE)
-    // Note: MSVC currently only supports SSSE3 but not AVX2
-    #include <tmmintrin.h> // SSSE3: _mm_shuffle_epi8
-    #include <emmintrin.h> // SSE2
-#elif defined(LEO_USE_SSE2NEON)
-    #include "sse2neon/sse2neon.h"
-#endif // LEO_TARGET_MOBILE
-
-#if defined(HAVE_ARM_NEON_H)
-    #include <arm_neon.h>
-#endif // HAVE_ARM_NEON_H
-
-#if defined(LEO_TARGET_MOBILE)
-
-    #define LEO_ALIGNED_ACCESSES /* Inputs must be aligned to LEO_ALIGN_BYTES */
-
-# if defined(HAVE_ARM_NEON_H)
-    // Compiler-specific 128-bit SIMD register keyword
-    #define LEO_M128 uint8x16_t
-    #define LEO_TRY_NEON
-#elif defined(LEO_USE_SSE2NEON)
-    #define LEO_M128 __m128i
-#else
-    #define LEO_M128 uint64_t
-# endif
-
-#else // LEO_TARGET_MOBILE
-
-    // Compiler-specific 128-bit SIMD register keyword
-    #define LEO_M128 __m128i
-
-#endif // LEO_TARGET_MOBILE
-
-#ifdef LEO_TRY_AVX2
-    // Compiler-specific 256-bit SIMD register keyword
-    #define LEO_M256 __m256i
-#endif
-
-// Compiler-specific C++11 restrict keyword
-#define LEO_RESTRICT __restrict
-
-// Compiler-specific force inline keyword
-#ifdef _MSC_VER
-    #define LEO_FORCE_INLINE inline __forceinline
-#else
-    #define LEO_FORCE_INLINE inline __attribute__((always_inline))
-#endif
-
-// Compiler-specific alignment keyword
-// Note: Alignment only matters for ARM NEON where it should be 16
-#ifdef _MSC_VER
-    #define LEO_ALIGNED __declspec(align(LEO_ALIGN_BYTES))
-#else // _MSC_VER
-    #define LEO_ALIGNED __attribute__((aligned(LEO_ALIGN_BYTES)))
-#endif // _MSC_VER
-
-
-namespace leopard {
-
-
-//------------------------------------------------------------------------------
-// Runtime CPU Architecture Check
-
-// Initialize CPU architecture flags
-void InitializeCPUArch();
-
-
-#if defined(LEO_TRY_NEON)
-# if defined(IOS) && defined(__ARM_NEON__)
-    // Does device support NEON?
-    static const bool CpuHasNeon = true;
-    static const bool CpuHasNeon64 = true;
-# else
-    // Does device support NEON?
-    // Remember to add LOCAL_STATIC_LIBRARIES := cpufeatures
-    extern bool CpuHasNeon; // V6 / V7
-    extern bool CpuHasNeon64; // 64-bit
-# endif
-#endif
-
-#if !defined(LEO_TARGET_MOBILE)
-# if defined(LEO_TRY_AVX2)
-    // Does CPU support AVX2?
-    extern bool CpuHasAVX2;
-# endif
-    // Does CPU support SSSE3?
-    extern bool CpuHasSSSE3;
-#elif defined(LEO_USE_SSE2NEON)
-    extern bool CpuHasSSSE3;
-#endif // LEO_TARGET_MOBILE
-
-
-//------------------------------------------------------------------------------
-// Portable Intrinsics
-
-// Returns highest bit index 0..31 where the first non-zero bit is found
-// Precondition: x != 0
-LEO_FORCE_INLINE unsigned LastNonzeroBit32(unsigned x)
-{
-#ifdef _MSC_VER
-    unsigned long index;
-    // Note: Ignoring result because x != 0
-    _BitScanReverse(&index, (uint32_t)x);
-    return (unsigned)index;
-#else
-    // Note: Ignoring return value of 0 because x != 0
-    static_assert(sizeof(unsigned) == 4, "Assuming 32 bit unsigneds in LastNonzeroBit32");
-    return 31 - (unsigned)__builtin_clz(x);
-#endif
-}
-
-// Returns next power of two at or above given value
-LEO_FORCE_INLINE unsigned NextPow2(unsigned n)
-{
-    return 2UL << LastNonzeroBit32(n - 1);
-}
-
-
-//------------------------------------------------------------------------------
-// XOR Memory
-//
-// This works for both 8-bit and 16-bit finite fields
-
-// x[] ^= y[]
-void xor_mem(
-    void * LEO_RESTRICT x, const void * LEO_RESTRICT y,
-    uint64_t bytes);
-
-#ifdef LEO_M1_OPT
-
-// x[] ^= y[] ^ z[]
-void xor_mem_2to1(
-    void * LEO_RESTRICT x,
-    const void * LEO_RESTRICT y,
-    const void * LEO_RESTRICT z,
-    uint64_t bytes);
-
-#endif // LEO_M1_OPT
-
-#ifdef LEO_USE_VECTOR4_OPT
-
-// For i = {0, 1, 2, 3}: x_i[] ^= x_i[]
-void xor_mem4(
-    void * LEO_RESTRICT x_0, const void * LEO_RESTRICT y_0,
-    void * LEO_RESTRICT x_1, const void * LEO_RESTRICT y_1,
-    void * LEO_RESTRICT x_2, const void * LEO_RESTRICT y_2,
-    void * LEO_RESTRICT x_3, const void * LEO_RESTRICT y_3,
-    uint64_t bytes);
-
-#endif // LEO_USE_VECTOR4_OPT
-
-// x[] ^= y[]
-void VectorXOR(
-    const uint64_t bytes,
-    unsigned count,
-    void** x,
-    void** y);
-
-// x[] ^= y[] (Multithreaded)
-void VectorXOR_Threads(
-    const uint64_t bytes,
-    unsigned count,
-    void** x,
-    void** y);
-
-
-//------------------------------------------------------------------------------
-// XORSummer
-
-class XORSummer
-{
-public:
-    // Set the addition destination and byte count
-    LEO_FORCE_INLINE void Initialize(void* dest)
-    {
-        DestBuffer = dest;
-        Waiting = nullptr;
-    }
-
-    // Accumulate some source data
-    LEO_FORCE_INLINE void Add(const void* src, const uint64_t bytes)
-    {
-#ifdef LEO_M1_OPT
-        if (Waiting)
-        {
-            xor_mem_2to1(DestBuffer, src, Waiting, bytes);
-            Waiting = nullptr;
-        }
-        else
-            Waiting = src;
-#else // LEO_M1_OPT
-        xor_mem(DestBuffer, src, bytes);
-#endif // LEO_M1_OPT
-    }
-
-    // Finalize in the destination buffer
-    LEO_FORCE_INLINE void Finalize(const uint64_t bytes)
-    {
-#ifdef LEO_M1_OPT
-        if (Waiting)
-            xor_mem(DestBuffer, Waiting, bytes);
-#endif // LEO_M1_OPT
-    }
-
-protected:
-    void* DestBuffer;
-    const void* Waiting;
-};
-
-
-//------------------------------------------------------------------------------
-// SIMD-Safe Aligned Memory Allocations
-
-static const unsigned kAlignmentBytes = LEO_ALIGN_BYTES;
-
-static LEO_FORCE_INLINE uint8_t* SIMDSafeAllocate(size_t size)
-{
-    uint8_t* data = (uint8_t*)calloc(1, kAlignmentBytes + size);
-    if (!data)
-        return nullptr;
-    unsigned offset = (unsigned)((uintptr_t)data % kAlignmentBytes);
-    data += kAlignmentBytes - offset;
-    data[-1] = (uint8_t)offset;
-    return data;
-}
-
-static LEO_FORCE_INLINE void SIMDSafeFree(void* ptr)
-{
-    if (!ptr)
-        return;
-    uint8_t* data = (uint8_t*)ptr;
-    unsigned offset = data[-1];
-    if (offset >= kAlignmentBytes)
-    {
-        LEO_DEBUG_BREAK; // Should never happen
-        return;
-    }
-    data -= kAlignmentBytes - offset;
-    free(data);
-}
-
-
-} // namespace leopard
diff --git a/windows/src/leopard/LeopardFF16.cpp b/windows/src/leopard/LeopardFF16.cpp
deleted file mode 100644
index 3241c66..0000000
--- a/windows/src/leopard/LeopardFF16.cpp
+++ /dev/null
@@ -1,1799 +0,0 @@
-/*
-    Copyright (c) 2017 Christopher A. Taylor.  All rights reserved.
-
-    Redistribution and use in source and binary forms, with or without
-    modification, are permitted provided that the following conditions are met:
-
-    * Redistributions of source code must retain the above copyright notice,
-      this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright notice,
-      this list of conditions and the following disclaimer in the documentation
-      and/or other materials provided with the distribution.
-    * Neither the name of Leopard-RS nor the names of its contributors may be
-      used to endorse or promote products derived from this software without
-      specific prior written permission.
-
-    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-    ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-    POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#include "LeopardFF16.h"
-
-#ifdef LEO_HAS_FF16
-
-#include <string.h>
-
-#ifdef _MSC_VER
-    #pragma warning(disable: 4752) // found Intel(R) Advanced Vector Extensions; consider using /arch:AVX
-#endif
-
-namespace leopard { namespace ff16 {
-
-
-//------------------------------------------------------------------------------
-// Datatypes and Constants
-
-// Basis used for generating logarithm tables
-static const ffe_t kCantorBasis[kBits] = {
-    0x0001, 0xACCA, 0x3C0E, 0x163E,
-    0xC582, 0xED2E, 0x914C, 0x4012,
-    0x6C98, 0x10D8, 0x6A72, 0xB900,
-    0xFDB8, 0xFB34, 0xFF38, 0x991E
-};
-
-// Using the Cantor basis here enables us to avoid a lot of extra calculations
-// when applying the formal derivative in decoding.
-
-
-//------------------------------------------------------------------------------
-// Field Operations
-
-// z = x + y (mod kModulus)
-static inline ffe_t AddMod(const ffe_t a, const ffe_t b)
-{
-    const unsigned sum = (unsigned)a + b;
-
-    // Partial reduction step, allowing for kModulus to be returned
-    return static_cast<ffe_t>(sum + (sum >> kBits));
-}
-
-// z = x - y (mod kModulus)
-static inline ffe_t SubMod(const ffe_t a, const ffe_t b)
-{
-    const unsigned dif = (unsigned)a - b;
-
-    // Partial reduction step, allowing for kModulus to be returned
-    return static_cast<ffe_t>(dif + (dif >> kBits));
-}
-
-
-//------------------------------------------------------------------------------
-// Fast Walsh-Hadamard Transform (FWHT) (mod kModulus)
-
-// {a, b} = {a + b, a - b} (Mod Q)
-static LEO_FORCE_INLINE void FWHT_2(ffe_t& LEO_RESTRICT a, ffe_t& LEO_RESTRICT b)
-{
-    const ffe_t sum = AddMod(a, b);
-    const ffe_t dif = SubMod(a, b);
-    a = sum;
-    b = dif;
-}
-
-static LEO_FORCE_INLINE void FWHT_4(ffe_t* data, unsigned s)
-{
-    const unsigned s2 = s << 1;
-
-    ffe_t t0 = data[0];
-    ffe_t t1 = data[s];
-    ffe_t t2 = data[s2];
-    ffe_t t3 = data[s2 + s];
-
-    FWHT_2(t0, t1);
-    FWHT_2(t2, t3);
-    FWHT_2(t0, t2);
-    FWHT_2(t1, t3);
-
-    data[0] = t0;
-    data[s] = t1;
-    data[s2] = t2;
-    data[s2 + s] = t3;
-}
-
-// Decimation in time (DIT) Fast Walsh-Hadamard Transform
-// Unrolls pairs of layers to perform cross-layer operations in registers
-// m_truncated: Number of elements that are non-zero at the front of data
-static void FWHT(ffe_t* data, const unsigned m, const unsigned m_truncated)
-{
-    // Decimation in time: Unroll 2 layers at a time
-    unsigned dist = 1, dist4 = 4;
-    for (; dist4 <= m; dist = dist4, dist4 <<= 2)
-    {
-        // For each set of dist*4 elements:
-#pragma omp parallel for
-        for (int r = 0; r < (int)m_truncated; r += dist4)
-        {
-            // For each set of dist elements:
-            const int i_end = r + dist;
-            for (int i = r; i < i_end; ++i)
-                FWHT_4(data + i, dist);
-        }
-    }
-
-    // If there is one layer left:
-    if (dist < m)
-#pragma omp parallel for
-        for (int i = 0; i < (int)dist; ++i)
-            FWHT_2(data[i], data[i + dist]);
-}
-
-
-//------------------------------------------------------------------------------
-// Logarithm Tables
-
-static ffe_t LogLUT[kOrder];
-static ffe_t ExpLUT[kOrder];
-
-
-// Returns a * Log(b)
-static ffe_t MultiplyLog(ffe_t a, ffe_t log_b)
-{
-    /*
-        Note that this operation is not a normal multiplication in a finite
-        field because the right operand is already a logarithm.  This is done
-        because it moves K table lookups from the Decode() method into the
-        initialization step that is less performance critical.  The LogWalsh[]
-        table below contains precalculated logarithms so it is easier to do
-        all the other multiplies in that form as well.
-    */
-    if (a == 0)
-        return 0;
-    return ExpLUT[AddMod(LogLUT[a], log_b)];
-}
-
-
-// Initialize LogLUT[], ExpLUT[]
-static void InitializeLogarithmTables()
-{
-    // LFSR table generation:
-
-    unsigned state = 1;
-    for (unsigned i = 0; i < kModulus; ++i)
-    {
-        ExpLUT[state] = static_cast<ffe_t>(i);
-        state <<= 1;
-        if (state >= kOrder)
-            state ^= kPolynomial;
-    }
-    ExpLUT[0] = kModulus;
-
-    // Conversion to Cantor basis:
-
-    LogLUT[0] = 0;
-    for (unsigned i = 0; i < kBits; ++i)
-    {
-        const ffe_t basis = kCantorBasis[i];
-        const unsigned width = static_cast<unsigned>(1UL << i);
-
-        for (unsigned j = 0; j < width; ++j)
-            LogLUT[j + width] = LogLUT[j] ^ basis;
-    }
-
-    for (unsigned i = 0; i < kOrder; ++i)
-        LogLUT[i] = ExpLUT[LogLUT[i]];
-
-    for (unsigned i = 0; i < kOrder; ++i)
-        ExpLUT[LogLUT[i]] = i;
-
-    ExpLUT[kModulus] = ExpLUT[0];
-}
-
-
-//------------------------------------------------------------------------------
-// Multiplies
-
-/*
-    The multiplication algorithm used follows the approach outlined in {4}.
-    Specifically section 7 outlines the algorithm used here for 16-bit fields.
-    The ALTMAP memory layout is used since there is no need to convert in/out.
-*/
-
-struct Multiply128LUT_t
-{
-    LEO_M128 Lo[4];
-    LEO_M128 Hi[4];
-};
-
-static const Multiply128LUT_t* Multiply128LUT = nullptr;
-
-#define LEO_MUL_TABLES_128(table, log_m) \
-    const LEO_M128 T0_lo_##table = _mm_loadu_si128(&Multiply128LUT[log_m].Lo[0]); \
-    const LEO_M128 T1_lo_##table = _mm_loadu_si128(&Multiply128LUT[log_m].Lo[1]); \
-    const LEO_M128 T2_lo_##table = _mm_loadu_si128(&Multiply128LUT[log_m].Lo[2]); \
-    const LEO_M128 T3_lo_##table = _mm_loadu_si128(&Multiply128LUT[log_m].Lo[3]); \
-    const LEO_M128 T0_hi_##table = _mm_loadu_si128(&Multiply128LUT[log_m].Hi[0]); \
-    const LEO_M128 T1_hi_##table = _mm_loadu_si128(&Multiply128LUT[log_m].Hi[1]); \
-    const LEO_M128 T2_hi_##table = _mm_loadu_si128(&Multiply128LUT[log_m].Hi[2]); \
-    const LEO_M128 T3_hi_##table = _mm_loadu_si128(&Multiply128LUT[log_m].Hi[3]);
-
-// 128-bit {prod_lo, prod_hi} = {value_lo, value_hi} * log_m
-#define LEO_MUL_128(value_lo, value_hi, table) { \
-            LEO_M128 data_1 = _mm_srli_epi64(value_lo, 4); \
-            LEO_M128 data_0 = _mm_and_si128(value_lo, clr_mask); \
-            data_1 = _mm_and_si128(data_1, clr_mask); \
-            prod_lo = _mm_shuffle_epi8(T0_lo_##table, data_0); \
-            prod_hi = _mm_shuffle_epi8(T0_hi_##table, data_0); \
-            prod_lo = _mm_xor_si128(prod_lo, _mm_shuffle_epi8(T1_lo_##table, data_1)); \
-            prod_hi = _mm_xor_si128(prod_hi, _mm_shuffle_epi8(T1_hi_##table, data_1)); \
-            data_0 = _mm_and_si128(value_hi, clr_mask); \
-            data_1 = _mm_srli_epi64(value_hi, 4); \
-            data_1 = _mm_and_si128(data_1, clr_mask); \
-            prod_lo = _mm_xor_si128(prod_lo, _mm_shuffle_epi8(T2_lo_##table, data_0)); \
-            prod_hi = _mm_xor_si128(prod_hi, _mm_shuffle_epi8(T2_hi_##table, data_0)); \
-            prod_lo = _mm_xor_si128(prod_lo, _mm_shuffle_epi8(T3_lo_##table, data_1)); \
-            prod_hi = _mm_xor_si128(prod_hi, _mm_shuffle_epi8(T3_hi_##table, data_1)); }
-
-// {x_lo, x_hi} ^= {y_lo, y_hi} * log_m
-#define LEO_MULADD_128(x_lo, x_hi, y_lo, y_hi, table) { \
-            LEO_M128 prod_lo, prod_hi; \
-            LEO_MUL_128(y_lo, y_hi, table); \
-            x_lo = _mm_xor_si128(x_lo, prod_lo); \
-            x_hi = _mm_xor_si128(x_hi, prod_hi); }
-
-
-#if defined(LEO_TRY_AVX2)
-
-struct Multiply256LUT_t
-{
-    LEO_M256 Lo[4];
-    LEO_M256 Hi[4];
-};
-
-static const Multiply256LUT_t* Multiply256LUT = nullptr;
-
-#define LEO_MUL_TABLES_256(table, log_m) \
-        const LEO_M256 T0_lo_##table = _mm256_loadu_si256(&Multiply256LUT[log_m].Lo[0]); \
-        const LEO_M256 T1_lo_##table = _mm256_loadu_si256(&Multiply256LUT[log_m].Lo[1]); \
-        const LEO_M256 T2_lo_##table = _mm256_loadu_si256(&Multiply256LUT[log_m].Lo[2]); \
-        const LEO_M256 T3_lo_##table = _mm256_loadu_si256(&Multiply256LUT[log_m].Lo[3]); \
-        const LEO_M256 T0_hi_##table = _mm256_loadu_si256(&Multiply256LUT[log_m].Hi[0]); \
-        const LEO_M256 T1_hi_##table = _mm256_loadu_si256(&Multiply256LUT[log_m].Hi[1]); \
-        const LEO_M256 T2_hi_##table = _mm256_loadu_si256(&Multiply256LUT[log_m].Hi[2]); \
-        const LEO_M256 T3_hi_##table = _mm256_loadu_si256(&Multiply256LUT[log_m].Hi[3]);
-
-// 256-bit {prod_lo, prod_hi} = {value_lo, value_hi} * log_m
-#define LEO_MUL_256(value_lo, value_hi, table) { \
-            LEO_M256 data_1 = _mm256_srli_epi64(value_lo, 4); \
-            LEO_M256 data_0 = _mm256_and_si256(value_lo, clr_mask); \
-            data_1 = _mm256_and_si256(data_1, clr_mask); \
-            prod_lo = _mm256_shuffle_epi8(T0_lo_##table, data_0); \
-            prod_hi = _mm256_shuffle_epi8(T0_hi_##table, data_0); \
-            prod_lo = _mm256_xor_si256(prod_lo, _mm256_shuffle_epi8(T1_lo_##table, data_1)); \
-            prod_hi = _mm256_xor_si256(prod_hi, _mm256_shuffle_epi8(T1_hi_##table, data_1)); \
-            data_0 = _mm256_and_si256(value_hi, clr_mask); \
-            data_1 = _mm256_srli_epi64(value_hi, 4); \
-            data_1 = _mm256_and_si256(data_1, clr_mask); \
-            prod_lo = _mm256_xor_si256(prod_lo, _mm256_shuffle_epi8(T2_lo_##table, data_0)); \
-            prod_hi = _mm256_xor_si256(prod_hi, _mm256_shuffle_epi8(T2_hi_##table, data_0)); \
-            prod_lo = _mm256_xor_si256(prod_lo, _mm256_shuffle_epi8(T3_lo_##table, data_1)); \
-            prod_hi = _mm256_xor_si256(prod_hi, _mm256_shuffle_epi8(T3_hi_##table, data_1)); }
-
-// {x_lo, x_hi} ^= {y_lo, y_hi} * log_m
-#define LEO_MULADD_256(x_lo, x_hi, y_lo, y_hi, table) { \
-            LEO_M256 prod_lo, prod_hi; \
-            LEO_MUL_256(y_lo, y_hi, table); \
-            x_lo = _mm256_xor_si256(x_lo, prod_lo); \
-            x_hi = _mm256_xor_si256(x_hi, prod_hi); }
-
-#endif // LEO_TRY_AVX2
-
-// Stores the partial products of x * y at offset x + y * 65536
-// Repeated accesses from the same y value are faster
-struct Product16Table
-{
-    ffe_t LUT[4 * 16];
-};
-static const Product16Table* Multiply16LUT = nullptr;
-
-
-// Reference version of muladd: x[] ^= y[] * log_m
-static LEO_FORCE_INLINE void RefMulAdd(
-    void* LEO_RESTRICT x,
-    const void* LEO_RESTRICT y,
-    ffe_t log_m,
-    uint64_t bytes)
-{
-    const ffe_t* LEO_RESTRICT lut = Multiply16LUT[log_m].LUT;
-    const uint8_t * LEO_RESTRICT y1 = reinterpret_cast<const uint8_t *>(y);
-    uint8_t * LEO_RESTRICT x1 = reinterpret_cast<uint8_t *>(x);
-
-    do
-    {
-        for (unsigned i = 0; i < 32; ++i)
-        {
-            const unsigned lo = y1[i];
-            const unsigned hi = y1[i + 32];
-
-            const ffe_t prod = \
-                lut[(lo & 15)] ^ \
-                lut[(lo >> 4) + 16] ^ \
-                lut[(hi & 15) + 32] ^ \
-                lut[(hi >> 4) + 48];
-
-            x1[i] ^= (uint8_t)prod;
-            x1[i + 32] ^= (uint8_t)(prod >> 8);
-        }
-
-        x1 += 64, y1 += 64;
-        bytes -= 64;
-    } while (bytes > 0);
-
-}
-
-// Reference version of mul: x[] = y[] * log_m
-static LEO_FORCE_INLINE void RefMul(
-    void* LEO_RESTRICT x,
-    const void* LEO_RESTRICT y,
-    ffe_t log_m,
-    uint64_t bytes)
-{
-    const ffe_t* LEO_RESTRICT lut = Multiply16LUT[log_m].LUT;
-    const uint8_t * LEO_RESTRICT y1 = reinterpret_cast<const uint8_t *>(y);
-    uint8_t * LEO_RESTRICT x1 = reinterpret_cast<uint8_t *>(x);
-
-    do
-    {
-        for (unsigned i = 0; i < 32; ++i)
-        {
-            const unsigned lo = y1[i];
-            const unsigned hi = y1[i + 32];
-
-            const ffe_t prod = \
-                lut[(lo & 15)] ^ \
-                lut[(lo >> 4) + 16] ^ \
-                lut[(hi & 15) + 32] ^ \
-                lut[(hi >> 4) + 48];
-
-            x1[i] = (uint8_t)prod;
-            x1[i + 32] = (uint8_t)(prod >> 8);
-        }
-
-        x1 += 64, y1 += 64;
-        bytes -= 64;
-    } while (bytes > 0);
-}
-
-
-static void InitializeMultiplyTables()
-{
-    // If we cannot use the PSHUFB instruction, generate Multiply8LUT:
-    if (!CpuHasSSSE3)
-    {
-        Multiply16LUT = new Product16Table[65536];
-
-        // For each log_m multiplicand:
-#pragma omp parallel for
-        for (int log_m = 0; log_m < (int)kOrder; ++log_m)
-        {
-            const Product16Table& lut = Multiply16LUT[log_m];
-
-            for (unsigned nibble = 0, shift = 0; nibble < 4; ++nibble, shift += 4)
-            {
-                ffe_t* nibble_lut = (ffe_t*)&lut.LUT[nibble * 16];
-
-                for (unsigned x_nibble = 0; x_nibble < 16; ++x_nibble)
-                {
-                    const ffe_t prod = MultiplyLog(x_nibble << shift, static_cast<ffe_t>(log_m));
-                    nibble_lut[x_nibble] = prod;
-                }
-            }
-        }
-
-        return;
-    }
-
-#if defined(LEO_TRY_AVX2)
-    if (CpuHasAVX2)
-        Multiply256LUT = reinterpret_cast<const Multiply256LUT_t*>(SIMDSafeAllocate(sizeof(Multiply256LUT_t) * kOrder));
-    else
-#endif // LEO_TRY_AVX2
-        Multiply128LUT = reinterpret_cast<const Multiply128LUT_t*>(SIMDSafeAllocate(sizeof(Multiply128LUT_t) * kOrder));
-
-    // For each value we could multiply by:
-#pragma omp parallel for
-    for (int log_m = 0; log_m < (int)kOrder; ++log_m)
-    {
-        // For each 4 bits of the finite field width in bits:
-        for (unsigned i = 0, shift = 0; i < 4; ++i, shift += 4)
-        {
-            // Construct 16 entry LUT for PSHUFB
-            uint8_t prod_lo[16], prod_hi[16];
-            for (ffe_t x = 0; x < 16; ++x)
-            {
-                const ffe_t prod = MultiplyLog(x << shift, static_cast<ffe_t>(log_m));
-                prod_lo[x] = static_cast<uint8_t>(prod);
-                prod_hi[x] = static_cast<uint8_t>(prod >> 8);
-            }
-
-            const LEO_M128 value_lo = _mm_loadu_si128((LEO_M128*)prod_lo);
-            const LEO_M128 value_hi = _mm_loadu_si128((LEO_M128*)prod_hi);
-
-            // Store in 128-bit wide table
-#if defined(LEO_TRY_AVX2)
-            if (!CpuHasAVX2)
-#endif // LEO_TRY_AVX2
-            {
-                _mm_storeu_si128((LEO_M128*)&Multiply128LUT[log_m].Lo[i], value_lo);
-                _mm_storeu_si128((LEO_M128*)&Multiply128LUT[log_m].Hi[i], value_hi);
-            }
-
-            // Store in 256-bit wide table
-#if defined(LEO_TRY_AVX2)
-            if (CpuHasAVX2)
-            {
-                _mm256_storeu_si256((LEO_M256*)&Multiply256LUT[log_m].Lo[i],
-                    _mm256_broadcastsi128_si256(value_lo));
-                _mm256_storeu_si256((LEO_M256*)&Multiply256LUT[log_m].Hi[i],
-                    _mm256_broadcastsi128_si256(value_hi));
-            }
-#endif // LEO_TRY_AVX2
-        }
-    }
-}
-
-
-static void mul_mem(
-    void * LEO_RESTRICT x, const void * LEO_RESTRICT y,
-    ffe_t log_m, uint64_t bytes)
-{
-#if defined(LEO_TRY_AVX2)
-    if (CpuHasAVX2)
-    {
-        LEO_MUL_TABLES_256(0, log_m);
-
-        const LEO_M256 clr_mask = _mm256_set1_epi8(0x0f);
-
-        LEO_M256 * LEO_RESTRICT x32 = reinterpret_cast<LEO_M256 *>(x);
-        const LEO_M256 * LEO_RESTRICT y32 = reinterpret_cast<const LEO_M256 *>(y);
-
-        do
-        {
-#define LEO_MUL_256_LS(x_ptr, y_ptr) { \
-            const LEO_M256 data_lo = _mm256_loadu_si256(y_ptr); \
-            const LEO_M256 data_hi = _mm256_loadu_si256(y_ptr + 1); \
-            LEO_M256 prod_lo, prod_hi; \
-            LEO_MUL_256(data_lo, data_hi, 0); \
-            _mm256_storeu_si256(x_ptr, prod_lo); \
-            _mm256_storeu_si256(x_ptr + 1, prod_hi); }
-
-            LEO_MUL_256_LS(x32, y32);
-            y32 += 2, x32 += 2;
-
-            bytes -= 64;
-        } while (bytes > 0);
-
-        return;
-    }
-#endif // LEO_TRY_AVX2
-
-    if (CpuHasSSSE3)
-    {
-        LEO_MUL_TABLES_128(0, log_m);
-
-        const LEO_M128 clr_mask = _mm_set1_epi8(0x0f);
-
-        LEO_M128 * LEO_RESTRICT x16 = reinterpret_cast<LEO_M128 *>(x);
-        const LEO_M128 * LEO_RESTRICT y16 = reinterpret_cast<const LEO_M128 *>(y);
-
-        do
-        {
-#define LEO_MUL_128_LS(x_ptr, y_ptr) { \
-                const LEO_M128 data_lo = _mm_loadu_si128(y_ptr); \
-                const LEO_M128 data_hi = _mm_loadu_si128(y_ptr + 2); \
-                LEO_M128 prod_lo, prod_hi; \
-                LEO_MUL_128(data_lo, data_hi, 0); \
-                _mm_storeu_si128(x_ptr, prod_lo); \
-                _mm_storeu_si128(x_ptr + 2, prod_hi); }
-
-            LEO_MUL_128_LS(x16 + 1, y16 + 1);
-            LEO_MUL_128_LS(x16, y16);
-            x16 += 4, y16 += 4;
-
-            bytes -= 64;
-        } while (bytes > 0);
-
-        return;
-    }
-
-    RefMul(x, y, log_m, bytes);
-}
-
-
-//------------------------------------------------------------------------------
-// FFT
-
-// Twisted factors used in FFT
-static ffe_t FFTSkew[kModulus];
-
-// Factors used in the evaluation of the error locator polynomial
-static ffe_t LogWalsh[kOrder];
-
-
-static void FFTInitialize()
-{
-    ffe_t temp[kBits - 1];
-
-    // Generate FFT skew vector {1}:
-
-    for (unsigned i = 1; i < kBits; ++i)
-        temp[i - 1] = static_cast<ffe_t>(1UL << i);
-
-    for (unsigned m = 0; m < (kBits - 1); ++m)
-    {
-        const unsigned step = 1UL << (m + 1);
-
-        FFTSkew[(1UL << m) - 1] = 0;
-
-        for (unsigned i = m; i < (kBits - 1); ++i)
-        {
-            const unsigned s = (1UL << (i + 1));
-
-            for (unsigned j = (1UL << m) - 1; j < s; j += step)
-                FFTSkew[j + s] = FFTSkew[j] ^ temp[i];
-        }
-
-        temp[m] = kModulus - LogLUT[MultiplyLog(temp[m], LogLUT[temp[m] ^ 1])];
-
-        for (unsigned i = m + 1; i < (kBits - 1); ++i)
-        {
-            const ffe_t sum = AddMod(LogLUT[temp[i] ^ 1], temp[m]);
-            temp[i] = MultiplyLog(temp[i], sum);
-        }
-    }
-
-    for (unsigned i = 0; i < kModulus; ++i)
-        FFTSkew[i] = LogLUT[FFTSkew[i]];
-
-    // Precalculate FWHT(Log[i]):
-
-    for (unsigned i = 0; i < kOrder; ++i)
-        LogWalsh[i] = LogLUT[i];
-    LogWalsh[0] = 0;
-
-    FWHT(LogWalsh, kOrder, kOrder);
-}
-
-/*
-    Decimation in time IFFT:
-
-    The decimation in time IFFT algorithm allows us to unroll 2 layers at a time,
-    performing calculations on local registers and faster cache memory.
-
-    Each ^___^ below indicates a butterfly between the associated indices.
-
-    The ifft_butterfly(x, y) operation:
-
-        y[] ^= x[]
-        if (log_m != kModulus)
-            x[] ^= exp(log(y[]) + log_m)
-
-    Layer 0:
-        0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7
-        ^_^ ^_^ ^_^ ^_^ ^_^ ^_^ ^_^ ^_^
-
-    Layer 1:
-        0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7
-        ^___^   ^___^   ^___^   ^___^
-          ^___^   ^___^   ^___^   ^___^
-  
-    Layer 2:
-        0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 
-        ^_______^       ^_______^
-          ^_______^       ^_______^
-            ^_______^       ^_______^
-              ^_______^       ^_______^
-
-    Layer 3:
-        0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7
-        ^_______________^
-          ^_______________^
-            ^_______________^
-              ^_______________^
-                ^_______________^
-                  ^_______________^
-                    ^_______________^
-                      ^_______________^
-
-    DIT layer 0-1 operations, grouped 4 at a time:
-        {0-1, 2-3, 0-2, 1-3},
-        {4-5, 6-7, 4-6, 5-7},
-
-    DIT layer 1-2 operations, grouped 4 at a time:
-        {0-2, 4-6, 0-4, 2-6},
-        {1-3, 5-7, 1-5, 3-7},
-
-    DIT layer 2-3 operations, grouped 4 at a time:
-        {0-4, 0'-4', 0-0', 4-4'},
-        {1-5, 1'-5', 1-1', 5-5'},
-*/
-
-// 2-way butterfly
-static void IFFT_DIT2(
-    void * LEO_RESTRICT x, void * LEO_RESTRICT y,
-    ffe_t log_m, uint64_t bytes)
-{
-#if defined(LEO_TRY_AVX2)
-    if (CpuHasAVX2)
-    {
-        LEO_MUL_TABLES_256(0, log_m);
-
-        const LEO_M256 clr_mask = _mm256_set1_epi8(0x0f);
-
-        LEO_M256 * LEO_RESTRICT x32 = reinterpret_cast<LEO_M256 *>(x);
-        LEO_M256 * LEO_RESTRICT y32 = reinterpret_cast<LEO_M256 *>(y);
-
-        do
-        {
-#define LEO_IFFTB_256(x_ptr, y_ptr) { \
-            LEO_M256 x_lo = _mm256_loadu_si256(x_ptr); \
-            LEO_M256 x_hi = _mm256_loadu_si256(x_ptr + 1); \
-            LEO_M256 y_lo = _mm256_loadu_si256(y_ptr); \
-            LEO_M256 y_hi = _mm256_loadu_si256(y_ptr + 1); \
-            y_lo = _mm256_xor_si256(y_lo, x_lo); \
-            y_hi = _mm256_xor_si256(y_hi, x_hi); \
-            _mm256_storeu_si256(y_ptr, y_lo); \
-            _mm256_storeu_si256(y_ptr + 1, y_hi); \
-            LEO_MULADD_256(x_lo, x_hi, y_lo, y_hi, 0); \
-            _mm256_storeu_si256(x_ptr, x_lo); \
-            _mm256_storeu_si256(x_ptr + 1, x_hi); }
-
-            LEO_IFFTB_256(x32, y32);
-            y32 += 2, x32 += 2;
-
-            bytes -= 64;
-        } while (bytes > 0);
-
-        return;
-    }
-#endif // LEO_TRY_AVX2
-
-    if (CpuHasSSSE3)
-    {
-        LEO_MUL_TABLES_128(0, log_m);
-
-        const LEO_M128 clr_mask = _mm_set1_epi8(0x0f);
-
-        LEO_M128 * LEO_RESTRICT x16 = reinterpret_cast<LEO_M128 *>(x);
-        LEO_M128 * LEO_RESTRICT y16 = reinterpret_cast<LEO_M128 *>(y);
-
-        do
-        {
-#define LEO_IFFTB_128(x_ptr, y_ptr) { \
-                LEO_M128 x_lo = _mm_loadu_si128(x_ptr); \
-                LEO_M128 x_hi = _mm_loadu_si128(x_ptr + 2); \
-                LEO_M128 y_lo = _mm_loadu_si128(y_ptr); \
-                LEO_M128 y_hi = _mm_loadu_si128(y_ptr + 2); \
-                y_lo = _mm_xor_si128(y_lo, x_lo); \
-                y_hi = _mm_xor_si128(y_hi, x_hi); \
-                _mm_storeu_si128(y_ptr, y_lo); \
-                _mm_storeu_si128(y_ptr + 2, y_hi); \
-                LEO_MULADD_128(x_lo, x_hi, y_lo, y_hi, 0); \
-                _mm_storeu_si128(x_ptr, x_lo); \
-                _mm_storeu_si128(x_ptr + 2, x_hi); }
-
-            LEO_IFFTB_128(x16 + 1, y16 + 1);
-            LEO_IFFTB_128(x16, y16);
-            x16 += 4, y16 += 4;
-
-            bytes -= 64;
-        } while (bytes > 0);
-
-        return;
-    }
-
-    // Reference version:
-    xor_mem(y, x, bytes);
-    RefMulAdd(x, y, log_m, bytes);
-}
-
-
-// 4-way butterfly
-static void IFFT_DIT4(
-    uint64_t bytes,
-    void** work,
-    unsigned dist,
-    const ffe_t log_m01,
-    const ffe_t log_m23,
-    const ffe_t log_m02)
-{
-#ifdef LEO_INTERLEAVE_BUTTERFLY4_OPT
-
-#if defined(LEO_TRY_AVX2)
-
-    if (CpuHasAVX2)
-    {
-        LEO_MUL_TABLES_256(01, log_m01);
-        LEO_MUL_TABLES_256(23, log_m23);
-        LEO_MUL_TABLES_256(02, log_m02);
-
-        const LEO_M256 clr_mask = _mm256_set1_epi8(0x0f);
-
-        LEO_M256 * LEO_RESTRICT work0 = reinterpret_cast<LEO_M256 *>(work[0]);
-        LEO_M256 * LEO_RESTRICT work1 = reinterpret_cast<LEO_M256 *>(work[dist]);
-        LEO_M256 * LEO_RESTRICT work2 = reinterpret_cast<LEO_M256 *>(work[dist * 2]);
-        LEO_M256 * LEO_RESTRICT work3 = reinterpret_cast<LEO_M256 *>(work[dist * 3]);
-
-        do
-        {
-            LEO_M256 work_reg_lo_0 = _mm256_loadu_si256(work0);
-            LEO_M256 work_reg_hi_0 = _mm256_loadu_si256(work0 + 1);
-            LEO_M256 work_reg_lo_1 = _mm256_loadu_si256(work1);
-            LEO_M256 work_reg_hi_1 = _mm256_loadu_si256(work1 + 1);
-
-            // First layer:
-            work_reg_lo_1 = _mm256_xor_si256(work_reg_lo_0, work_reg_lo_1);
-            work_reg_hi_1 = _mm256_xor_si256(work_reg_hi_0, work_reg_hi_1);
-            if (log_m01 != kModulus)
-                LEO_MULADD_256(work_reg_lo_0, work_reg_hi_0, work_reg_lo_1, work_reg_hi_1, 01);
-
-            LEO_M256 work_reg_lo_2 = _mm256_loadu_si256(work2);
-            LEO_M256 work_reg_hi_2 = _mm256_loadu_si256(work2 + 1);
-            LEO_M256 work_reg_lo_3 = _mm256_loadu_si256(work3);
-            LEO_M256 work_reg_hi_3 = _mm256_loadu_si256(work3 + 1);
-
-            work_reg_lo_3 = _mm256_xor_si256(work_reg_lo_2, work_reg_lo_3);
-            work_reg_hi_3 = _mm256_xor_si256(work_reg_hi_2, work_reg_hi_3);
-            if (log_m23 != kModulus)
-                LEO_MULADD_256(work_reg_lo_2, work_reg_hi_2, work_reg_lo_3, work_reg_hi_3, 23);
-
-            // Second layer:
-            work_reg_lo_2 = _mm256_xor_si256(work_reg_lo_0, work_reg_lo_2);
-            work_reg_hi_2 = _mm256_xor_si256(work_reg_hi_0, work_reg_hi_2);
-            work_reg_lo_3 = _mm256_xor_si256(work_reg_lo_1, work_reg_lo_3);
-            work_reg_hi_3 = _mm256_xor_si256(work_reg_hi_1, work_reg_hi_3);
-            if (log_m02 != kModulus)
-            {
-                LEO_MULADD_256(work_reg_lo_0, work_reg_hi_0, work_reg_lo_2, work_reg_hi_2, 02);
-                LEO_MULADD_256(work_reg_lo_1, work_reg_hi_1, work_reg_lo_3, work_reg_hi_3, 02);
-            }
-
-            _mm256_storeu_si256(work0, work_reg_lo_0);
-            _mm256_storeu_si256(work0 + 1, work_reg_hi_0);
-            _mm256_storeu_si256(work1, work_reg_lo_1);
-            _mm256_storeu_si256(work1 + 1, work_reg_hi_1);
-            _mm256_storeu_si256(work2, work_reg_lo_2);
-            _mm256_storeu_si256(work2 + 1, work_reg_hi_2);
-            _mm256_storeu_si256(work3, work_reg_lo_3);
-            _mm256_storeu_si256(work3 + 1, work_reg_hi_3);
-
-            work0 += 2, work1 += 2, work2 += 2, work3 += 2;
-
-            bytes -= 64;
-        } while (bytes > 0);
-
-        return;
-    }
-
-#endif // LEO_TRY_AVX2
-
-    if (CpuHasSSSE3)
-    {
-        LEO_MUL_TABLES_128(01, log_m01);
-        LEO_MUL_TABLES_128(23, log_m23);
-        LEO_MUL_TABLES_128(02, log_m02);
-
-        const LEO_M128 clr_mask = _mm_set1_epi8(0x0f);
-
-        LEO_M128 * LEO_RESTRICT work0 = reinterpret_cast<LEO_M128 *>(work[0]);
-        LEO_M128 * LEO_RESTRICT work1 = reinterpret_cast<LEO_M128 *>(work[dist]);
-        LEO_M128 * LEO_RESTRICT work2 = reinterpret_cast<LEO_M128 *>(work[dist * 2]);
-        LEO_M128 * LEO_RESTRICT work3 = reinterpret_cast<LEO_M128 *>(work[dist * 3]);
-
-        do
-        {
-            for (unsigned i = 0; i < 2; ++i)
-            {
-                LEO_M128 work_reg_lo_0 = _mm_loadu_si128(work0);
-                LEO_M128 work_reg_hi_0 = _mm_loadu_si128(work0 + 2);
-                LEO_M128 work_reg_lo_1 = _mm_loadu_si128(work1);
-                LEO_M128 work_reg_hi_1 = _mm_loadu_si128(work1 + 2);
-
-                // First layer:
-                work_reg_lo_1 = _mm_xor_si128(work_reg_lo_0, work_reg_lo_1);
-                work_reg_hi_1 = _mm_xor_si128(work_reg_hi_0, work_reg_hi_1);
-                if (log_m01 != kModulus)
-                    LEO_MULADD_128(work_reg_lo_0, work_reg_hi_0, work_reg_lo_1, work_reg_hi_1, 01);
-
-                LEO_M128 work_reg_lo_2 = _mm_loadu_si128(work2);
-                LEO_M128 work_reg_hi_2 = _mm_loadu_si128(work2 + 2);
-                LEO_M128 work_reg_lo_3 = _mm_loadu_si128(work3);
-                LEO_M128 work_reg_hi_3 = _mm_loadu_si128(work3 + 2);
-
-                work_reg_lo_3 = _mm_xor_si128(work_reg_lo_2, work_reg_lo_3);
-                work_reg_hi_3 = _mm_xor_si128(work_reg_hi_2, work_reg_hi_3);
-                if (log_m23 != kModulus)
-                    LEO_MULADD_128(work_reg_lo_2, work_reg_hi_2, work_reg_lo_3, work_reg_hi_3, 23);
-
-                // Second layer:
-                work_reg_lo_2 = _mm_xor_si128(work_reg_lo_0, work_reg_lo_2);
-                work_reg_hi_2 = _mm_xor_si128(work_reg_hi_0, work_reg_hi_2);
-                work_reg_lo_3 = _mm_xor_si128(work_reg_lo_1, work_reg_lo_3);
-                work_reg_hi_3 = _mm_xor_si128(work_reg_hi_1, work_reg_hi_3);
-                if (log_m02 != kModulus)
-                {
-                    LEO_MULADD_128(work_reg_lo_0, work_reg_hi_0, work_reg_lo_2, work_reg_hi_2, 02);
-                    LEO_MULADD_128(work_reg_lo_1, work_reg_hi_1, work_reg_lo_3, work_reg_hi_3, 02);
-                }
-
-                _mm_storeu_si128(work0, work_reg_lo_0);
-                _mm_storeu_si128(work0 + 2, work_reg_hi_0);
-                _mm_storeu_si128(work1, work_reg_lo_1);
-                _mm_storeu_si128(work1 + 2, work_reg_hi_1);
-                _mm_storeu_si128(work2, work_reg_lo_2);
-                _mm_storeu_si128(work2 + 2, work_reg_hi_2);
-                _mm_storeu_si128(work3, work_reg_lo_3);
-                _mm_storeu_si128(work3 + 2, work_reg_hi_3);
-
-                work0++, work1++, work2++, work3++;
-            }
-
-            work0 += 2, work1 += 2, work2 += 2, work3 += 2;
-            bytes -= 64;
-        } while (bytes > 0);
-
-        return;
-    }
-
-#endif // LEO_INTERLEAVE_BUTTERFLY4_OPT
-
-    // First layer:
-    if (log_m01 == kModulus)
-        xor_mem(work[dist], work[0], bytes);
-    else
-        IFFT_DIT2(work[0], work[dist], log_m01, bytes);
-
-    if (log_m23 == kModulus)
-        xor_mem(work[dist * 3], work[dist * 2], bytes);
-    else
-        IFFT_DIT2(work[dist * 2], work[dist * 3], log_m23, bytes);
-
-    // Second layer:
-    if (log_m02 == kModulus)
-    {
-        xor_mem(work[dist * 2], work[0], bytes);
-        xor_mem(work[dist * 3], work[dist], bytes);
-    }
-    else
-    {
-        IFFT_DIT2(work[0], work[dist * 2], log_m02, bytes);
-        IFFT_DIT2(work[dist], work[dist * 3], log_m02, bytes);
-    }
-}
-
-
-// Unrolled IFFT for encoder
-static void IFFT_DIT_Encoder(
-    const uint64_t bytes,
-    const void* const* data,
-    const unsigned m_truncated,
-    void** work,
-    void** xor_result,
-    const unsigned m,
-    const ffe_t* skewLUT)
-{
-    // I tried rolling the memcpy/memset into the first layer of the FFT and
-    // found that it only yields a 4% performance improvement, which is not
-    // worth the extra complexity.
-#pragma omp parallel for
-    for (int i = 0; i < (int)m_truncated; ++i)
-        memcpy(work[i], data[i], bytes);
-#pragma omp parallel for
-    for (int i = m_truncated; i < (int)m; ++i)
-        memset(work[i], 0, bytes);
-
-    // I tried splitting up the first few layers into L3-cache sized blocks but
-    // found that it only provides about 5% performance boost, which is not
-    // worth the extra complexity.
-
-    // Decimation in time: Unroll 2 layers at a time
-    unsigned dist = 1, dist4 = 4;
-    for (; dist4 <= m; dist = dist4, dist4 <<= 2)
-    {
-        // For each set of dist*4 elements:
-#pragma omp parallel for
-        for (int r = 0; r < (int)m_truncated; r += dist4)
-        {
-            const unsigned i_end = r + dist;
-            const ffe_t log_m01 = skewLUT[i_end];
-            const ffe_t log_m02 = skewLUT[i_end + dist];
-            const ffe_t log_m23 = skewLUT[i_end + dist * 2];
-
-            // For each set of dist elements:
-            for (int i = r; i < (int)i_end; ++i)
-            {
-                IFFT_DIT4(
-                    bytes,
-                    work + i,
-                    dist,
-                    log_m01,
-                    log_m23,
-                    log_m02);
-            }
-        }
-
-        // I tried alternating sweeps left->right and right->left to reduce cache misses.
-        // It provides about 1% performance boost when done for both FFT and IFFT, so it
-        // does not seem to be worth the extra complexity.
-    }
-
-    // If there is one layer left:
-    if (dist < m)
-    {
-        // Assuming that dist = m / 2
-        LEO_DEBUG_ASSERT(dist * 2 == m);
-
-        const ffe_t log_m = skewLUT[dist];
-
-        if (log_m == kModulus)
-            VectorXOR_Threads(bytes, dist, work + dist, work);
-        else
-        {
-#pragma omp parallel for
-            for (int i = 0; i < (int)dist; ++i)
-            {
-                IFFT_DIT2(
-                    work[i],
-                    work[i + dist],
-                    log_m,
-                    bytes);
-            }
-        }
-    }
-
-    // I tried unrolling this but it does not provide more than 5% performance
-    // improvement for 16-bit finite fields, so it's not worth the complexity.
-    if (xor_result)
-        VectorXOR_Threads(bytes, m, xor_result, work);
-}
-
-
-// Basic no-frills version for decoder
-static void IFFT_DIT_Decoder(
-    const uint64_t bytes,
-    const unsigned m_truncated,
-    void** work,
-    const unsigned m,
-    const ffe_t* skewLUT)
-{
-    // Decimation in time: Unroll 2 layers at a time
-    unsigned dist = 1, dist4 = 4;
-    for (; dist4 <= m; dist = dist4, dist4 <<= 2)
-    {
-        // For each set of dist*4 elements:
-#pragma omp parallel for
-        for (int r = 0; r < (int)m_truncated; r += dist4)
-        {
-            const unsigned i_end = r + dist;
-            const ffe_t log_m01 = skewLUT[i_end];
-            const ffe_t log_m02 = skewLUT[i_end + dist];
-            const ffe_t log_m23 = skewLUT[i_end + dist * 2];
-
-            // For each set of dist elements:
-            for (int i = r; i < (int)i_end; ++i)
-            {
-                IFFT_DIT4(
-                    bytes,
-                    work + i,
-                    dist,
-                    log_m01,
-                    log_m23,
-                    log_m02);
-            }
-        }
-    }
-
-    // If there is one layer left:
-    if (dist < m)
-    {
-        // Assuming that dist = m / 2
-        LEO_DEBUG_ASSERT(dist * 2 == m);
-
-        const ffe_t log_m = skewLUT[dist];
-
-        if (log_m == kModulus)
-            VectorXOR_Threads(bytes, dist, work + dist, work);
-        else
-        {
-#pragma omp parallel for
-            for (int i = 0; i < (int)dist; ++i)
-            {
-                IFFT_DIT2(
-                    work[i],
-                    work[i + dist],
-                    log_m,
-                    bytes);
-            }
-        }
-    }
-}
-
-/*
-    Decimation in time FFT:
-
-    The decimation in time FFT algorithm allows us to unroll 2 layers at a time,
-    performing calculations on local registers and faster cache memory.
-
-    Each ^___^ below indicates a butterfly between the associated indices.
-
-    The fft_butterfly(x, y) operation:
-
-        if (log_m != kModulus)
-            x[] ^= exp(log(y[]) + log_m)
-        y[] ^= x[]
-
-    Layer 0:
-        0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7
-        ^_______________^
-          ^_______________^
-            ^_______________^
-              ^_______________^
-                ^_______________^
-                  ^_______________^
-                    ^_______________^
-                      ^_______________^
-
-    Layer 1:
-        0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 
-        ^_______^       ^_______^
-          ^_______^       ^_______^
-            ^_______^       ^_______^
-              ^_______^       ^_______^
-  
-    Layer 2:
-        0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7
-        ^___^   ^___^   ^___^   ^___^
-          ^___^   ^___^   ^___^   ^___^
-
-    Layer 3:
-        0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7
-        ^_^ ^_^ ^_^ ^_^ ^_^ ^_^ ^_^ ^_^
-
-    DIT layer 0-1 operations, grouped 4 at a time:
-        {0-0', 4-4', 0-4, 0'-4'},
-        {1-1', 5-5', 1-5, 1'-5'},
-
-    DIT layer 1-2 operations, grouped 4 at a time:
-        {0-4, 2-6, 0-2, 4-6},
-        {1-5, 3-7, 1-3, 5-7},
-
-    DIT layer 2-3 operations, grouped 4 at a time:
-        {0-2, 1-3, 0-1, 2-3},
-        {4-6, 5-7, 4-5, 6-7},
-*/
-
-// 2-way butterfly
-static void FFT_DIT2(
-    void * LEO_RESTRICT x, void * LEO_RESTRICT y,
-    ffe_t log_m, uint64_t bytes)
-{
-#if defined(LEO_TRY_AVX2)
-    if (CpuHasAVX2)
-    {
-        LEO_MUL_TABLES_256(0, log_m);
-
-        const LEO_M256 clr_mask = _mm256_set1_epi8(0x0f);
-
-        LEO_M256 * LEO_RESTRICT x32 = reinterpret_cast<LEO_M256 *>(x);
-        LEO_M256 * LEO_RESTRICT y32 = reinterpret_cast<LEO_M256 *>(y);
-
-        do
-        {
-#define LEO_FFTB_256(x_ptr, y_ptr) { \
-            LEO_M256 x_lo = _mm256_loadu_si256(x_ptr); \
-            LEO_M256 x_hi = _mm256_loadu_si256(x_ptr + 1); \
-            LEO_M256 y_lo = _mm256_loadu_si256(y_ptr); \
-            LEO_M256 y_hi = _mm256_loadu_si256(y_ptr + 1); \
-            LEO_MULADD_256(x_lo, x_hi, y_lo, y_hi, 0); \
-            _mm256_storeu_si256(x_ptr, x_lo); \
-            _mm256_storeu_si256(x_ptr + 1, x_hi); \
-            y_lo = _mm256_xor_si256(y_lo, x_lo); \
-            y_hi = _mm256_xor_si256(y_hi, x_hi); \
-            _mm256_storeu_si256(y_ptr, y_lo); \
-            _mm256_storeu_si256(y_ptr + 1, y_hi); }
-
-            LEO_FFTB_256(x32, y32);
-            y32 += 2, x32 += 2;
-
-            bytes -= 64;
-        } while (bytes > 0);
-
-        return;
-    }
-#endif // LEO_TRY_AVX2
-
-    if (CpuHasSSSE3)
-    {
-        LEO_MUL_TABLES_128(0, log_m);
-
-        const LEO_M128 clr_mask = _mm_set1_epi8(0x0f);
-
-        LEO_M128 * LEO_RESTRICT x16 = reinterpret_cast<LEO_M128 *>(x);
-        LEO_M128 * LEO_RESTRICT y16 = reinterpret_cast<LEO_M128 *>(y);
-
-        do
-        {
-#define LEO_FFTB_128(x_ptr, y_ptr) { \
-                LEO_M128 x_lo = _mm_loadu_si128(x_ptr); \
-                LEO_M128 x_hi = _mm_loadu_si128(x_ptr + 2); \
-                LEO_M128 y_lo = _mm_loadu_si128(y_ptr); \
-                LEO_M128 y_hi = _mm_loadu_si128(y_ptr + 2); \
-                LEO_MULADD_128(x_lo, x_hi, y_lo, y_hi, 0); \
-                _mm_storeu_si128(x_ptr, x_lo); \
-                _mm_storeu_si128(x_ptr + 2, x_hi); \
-                y_lo = _mm_xor_si128(y_lo, x_lo); \
-                y_hi = _mm_xor_si128(y_hi, x_hi); \
-                _mm_storeu_si128(y_ptr, y_lo); \
-                _mm_storeu_si128(y_ptr + 2, y_hi); }
-
-            LEO_FFTB_128(x16 + 1, y16 + 1);
-            LEO_FFTB_128(x16, y16);
-            x16 += 4, y16 += 4;
-
-            bytes -= 64;
-        } while (bytes > 0);
-
-        return;
-    }
-
-    // Reference version:
-    RefMulAdd(x, y, log_m, bytes);
-    xor_mem(y, x, bytes);
-}
-
-
-// 4-way butterfly
-static void FFT_DIT4(
-    uint64_t bytes,
-    void** work,
-    unsigned dist,
-    const ffe_t log_m01,
-    const ffe_t log_m23,
-    const ffe_t log_m02)
-{
-#ifdef LEO_INTERLEAVE_BUTTERFLY4_OPT
-
-#if defined(LEO_TRY_AVX2)
-
-    if (CpuHasAVX2)
-    {
-        LEO_MUL_TABLES_256(01, log_m01);
-        LEO_MUL_TABLES_256(23, log_m23);
-        LEO_MUL_TABLES_256(02, log_m02);
-
-        const LEO_M256 clr_mask = _mm256_set1_epi8(0x0f);
-
-        LEO_M256 * LEO_RESTRICT work0 = reinterpret_cast<LEO_M256 *>(work[0]);
-        LEO_M256 * LEO_RESTRICT work1 = reinterpret_cast<LEO_M256 *>(work[dist]);
-        LEO_M256 * LEO_RESTRICT work2 = reinterpret_cast<LEO_M256 *>(work[dist * 2]);
-        LEO_M256 * LEO_RESTRICT work3 = reinterpret_cast<LEO_M256 *>(work[dist * 3]);
-
-        do
-        {
-            LEO_M256 work_reg_lo_0 = _mm256_loadu_si256(work0);
-            LEO_M256 work_reg_hi_0 = _mm256_loadu_si256(work0 + 1);
-            LEO_M256 work_reg_lo_1 = _mm256_loadu_si256(work1);
-            LEO_M256 work_reg_hi_1 = _mm256_loadu_si256(work1 + 1);
-            LEO_M256 work_reg_lo_2 = _mm256_loadu_si256(work2);
-            LEO_M256 work_reg_hi_2 = _mm256_loadu_si256(work2 + 1);
-            LEO_M256 work_reg_lo_3 = _mm256_loadu_si256(work3);
-            LEO_M256 work_reg_hi_3 = _mm256_loadu_si256(work3 + 1);
-
-            // First layer:
-            if (log_m02 != kModulus)
-            {
-                LEO_MULADD_256(work_reg_lo_0, work_reg_hi_0, work_reg_lo_2, work_reg_hi_2, 02);
-                LEO_MULADD_256(work_reg_lo_1, work_reg_hi_1, work_reg_lo_3, work_reg_hi_3, 02);
-            }
-            work_reg_lo_2 = _mm256_xor_si256(work_reg_lo_0, work_reg_lo_2);
-            work_reg_hi_2 = _mm256_xor_si256(work_reg_hi_0, work_reg_hi_2);
-            work_reg_lo_3 = _mm256_xor_si256(work_reg_lo_1, work_reg_lo_3);
-            work_reg_hi_3 = _mm256_xor_si256(work_reg_hi_1, work_reg_hi_3);
-
-            // Second layer:
-            if (log_m01 != kModulus)
-                LEO_MULADD_256(work_reg_lo_0, work_reg_hi_0, work_reg_lo_1, work_reg_hi_1, 01);
-            work_reg_lo_1 = _mm256_xor_si256(work_reg_lo_0, work_reg_lo_1);
-            work_reg_hi_1 = _mm256_xor_si256(work_reg_hi_0, work_reg_hi_1);
-
-            _mm256_storeu_si256(work0, work_reg_lo_0);
-            _mm256_storeu_si256(work0 + 1, work_reg_hi_0);
-            _mm256_storeu_si256(work1, work_reg_lo_1);
-            _mm256_storeu_si256(work1 + 1, work_reg_hi_1);
-
-            if (log_m23 != kModulus)
-                LEO_MULADD_256(work_reg_lo_2, work_reg_hi_2, work_reg_lo_3, work_reg_hi_3, 23);
-            work_reg_lo_3 = _mm256_xor_si256(work_reg_lo_2, work_reg_lo_3);
-            work_reg_hi_3 = _mm256_xor_si256(work_reg_hi_2, work_reg_hi_3);
-
-            _mm256_storeu_si256(work2, work_reg_lo_2);
-            _mm256_storeu_si256(work2 + 1, work_reg_hi_2);
-            _mm256_storeu_si256(work3, work_reg_lo_3);
-            _mm256_storeu_si256(work3 + 1, work_reg_hi_3);
-
-            work0 += 2, work1 += 2, work2 += 2, work3 += 2;
-
-            bytes -= 64;
-        } while (bytes > 0);
-
-        return;
-    }
-
-#endif // LEO_TRY_AVX2
-
-    if (CpuHasSSSE3)
-    {
-        LEO_MUL_TABLES_128(01, log_m01);
-        LEO_MUL_TABLES_128(23, log_m23);
-        LEO_MUL_TABLES_128(02, log_m02);
-
-        const LEO_M128 clr_mask = _mm_set1_epi8(0x0f);
-
-        LEO_M128 * LEO_RESTRICT work0 = reinterpret_cast<LEO_M128 *>(work[0]);
-        LEO_M128 * LEO_RESTRICT work1 = reinterpret_cast<LEO_M128 *>(work[dist]);
-        LEO_M128 * LEO_RESTRICT work2 = reinterpret_cast<LEO_M128 *>(work[dist * 2]);
-        LEO_M128 * LEO_RESTRICT work3 = reinterpret_cast<LEO_M128 *>(work[dist * 3]);
-
-        do
-        {
-            for (unsigned i = 0; i < 2; ++i)
-            {
-                LEO_M128 work_reg_lo_0 = _mm_loadu_si128(work0);
-                LEO_M128 work_reg_hi_0 = _mm_loadu_si128(work0 + 2);
-                LEO_M128 work_reg_lo_1 = _mm_loadu_si128(work1);
-                LEO_M128 work_reg_hi_1 = _mm_loadu_si128(work1 + 2);
-                LEO_M128 work_reg_lo_2 = _mm_loadu_si128(work2);
-                LEO_M128 work_reg_hi_2 = _mm_loadu_si128(work2 + 2);
-                LEO_M128 work_reg_lo_3 = _mm_loadu_si128(work3);
-                LEO_M128 work_reg_hi_3 = _mm_loadu_si128(work3 + 2);
-
-                // First layer:
-                if (log_m02 != kModulus)
-                {
-                    LEO_MULADD_128(work_reg_lo_0, work_reg_hi_0, work_reg_lo_2, work_reg_hi_2, 02);
-                    LEO_MULADD_128(work_reg_lo_1, work_reg_hi_1, work_reg_lo_3, work_reg_hi_3, 02);
-                }
-                work_reg_lo_2 = _mm_xor_si128(work_reg_lo_0, work_reg_lo_2);
-                work_reg_hi_2 = _mm_xor_si128(work_reg_hi_0, work_reg_hi_2);
-                work_reg_lo_3 = _mm_xor_si128(work_reg_lo_1, work_reg_lo_3);
-                work_reg_hi_3 = _mm_xor_si128(work_reg_hi_1, work_reg_hi_3);
-
-                // Second layer:
-                if (log_m01 != kModulus)
-                    LEO_MULADD_128(work_reg_lo_0, work_reg_hi_0, work_reg_lo_1, work_reg_hi_1, 01);
-                work_reg_lo_1 = _mm_xor_si128(work_reg_lo_0, work_reg_lo_1);
-                work_reg_hi_1 = _mm_xor_si128(work_reg_hi_0, work_reg_hi_1);
-
-                _mm_storeu_si128(work0, work_reg_lo_0);
-                _mm_storeu_si128(work0 + 2, work_reg_hi_0);
-                _mm_storeu_si128(work1, work_reg_lo_1);
-                _mm_storeu_si128(work1 + 2, work_reg_hi_1);
-
-                if (log_m23 != kModulus)
-                    LEO_MULADD_128(work_reg_lo_2, work_reg_hi_2, work_reg_lo_3, work_reg_hi_3, 23);
-                work_reg_lo_3 = _mm_xor_si128(work_reg_lo_2, work_reg_lo_3);
-                work_reg_hi_3 = _mm_xor_si128(work_reg_hi_2, work_reg_hi_3);
-
-                _mm_storeu_si128(work2, work_reg_lo_2);
-                _mm_storeu_si128(work2 + 2, work_reg_hi_2);
-                _mm_storeu_si128(work3, work_reg_lo_3);
-                _mm_storeu_si128(work3 + 2, work_reg_hi_3);
-
-                work0++, work1++, work2++, work3++;
-            }
-
-            work0 += 2, work1 += 2, work2 += 2, work3 += 2;
-            bytes -= 64;
-        } while (bytes > 0);
-
-        return;
-    }
-
-#endif // LEO_INTERLEAVE_BUTTERFLY4_OPT
-
-    // First layer:
-    if (log_m02 == kModulus)
-    {
-        xor_mem(work[dist * 2], work[0], bytes);
-        xor_mem(work[dist * 3], work[dist], bytes);
-    }
-    else
-    {
-        FFT_DIT2(work[0], work[dist * 2], log_m02, bytes);
-        FFT_DIT2(work[dist], work[dist * 3], log_m02, bytes);
-    }
-
-    // Second layer:
-    if (log_m01 == kModulus)
-        xor_mem(work[dist], work[0], bytes);
-    else
-        FFT_DIT2(work[0], work[dist], log_m01, bytes);
-
-    if (log_m23 == kModulus)
-        xor_mem(work[dist * 3], work[dist * 2], bytes);
-    else
-        FFT_DIT2(work[dist * 2], work[dist * 3], log_m23, bytes);
-}
-
-
-// In-place FFT for encoder and decoder
-static void FFT_DIT(
-    const uint64_t bytes,
-    void** work,
-    const unsigned m_truncated,
-    const unsigned m,
-    const ffe_t* skewLUT)
-{
-    // Decimation in time: Unroll 2 layers at a time
-    unsigned dist4 = m, dist = m >> 2;
-    for (; dist != 0; dist4 = dist, dist >>= 2)
-    {
-        // For each set of dist*4 elements:
-#pragma omp parallel for
-        for (int r = 0; r < (int)m_truncated; r += dist4)
-        {
-            const unsigned i_end = r + dist;
-            const ffe_t log_m01 = skewLUT[i_end];
-            const ffe_t log_m02 = skewLUT[i_end + dist];
-            const ffe_t log_m23 = skewLUT[i_end + dist * 2];
-
-            // For each set of dist elements:
-            for (int i = r; i < (int)i_end; ++i)
-            {
-                FFT_DIT4(
-                    bytes,
-                    work + i,
-                    dist,
-                    log_m01,
-                    log_m23,
-                    log_m02);
-            }
-        }
-    }
-
-    // If there is one layer left:
-    if (dist4 == 2)
-    {
-#pragma omp parallel for
-        for (int r = 0; r < (int)m_truncated; r += 2)
-        {
-            const ffe_t log_m = skewLUT[r + 1];
-
-            if (log_m == kModulus)
-                xor_mem(work[r + 1], work[r], bytes);
-            else
-            {
-                FFT_DIT2(
-                    work[r],
-                    work[r + 1],
-                    log_m,
-                    bytes);
-            }
-        }
-    }
-}
-
-
-//------------------------------------------------------------------------------
-// Reed-Solomon Encode
-
-void ReedSolomonEncode(
-    uint64_t buffer_bytes,
-    unsigned original_count,
-    unsigned recovery_count,
-    unsigned m,
-    const void* const * data,
-    void** work)
-{
-    // work <- IFFT(data, m, m)
-
-    const ffe_t* skewLUT = FFTSkew + m - 1;
-
-    IFFT_DIT_Encoder(
-        buffer_bytes,
-        data,
-        original_count < m ? original_count : m,
-        work,
-        nullptr, // No xor output
-        m,
-        skewLUT);
-
-    const unsigned last_count = original_count % m;
-    if (m >= original_count)
-        goto skip_body;
-
-    // For sets of m data pieces:
-    for (unsigned i = m; i + m <= original_count; i += m)
-    {
-        data += m;
-        skewLUT += m;
-
-        // work <- work xor IFFT(data + i, m, m + i)
-
-        IFFT_DIT_Encoder(
-            buffer_bytes,
-            data, // data source
-            m,
-            work + m, // temporary workspace
-            work, // xor destination
-            m,
-            skewLUT);
-    }
-
-    // Handle final partial set of m pieces:
-    if (last_count != 0)
-    {
-        data += m;
-        skewLUT += m;
-
-        // work <- work xor IFFT(data + i, m, m + i)
-
-        IFFT_DIT_Encoder(
-            buffer_bytes,
-            data, // data source
-            last_count,
-            work + m, // temporary workspace
-            work, // xor destination
-            m,
-            skewLUT);
-    }
-
-skip_body:
-
-    // work <- FFT(work, m, 0)
-    FFT_DIT(
-        buffer_bytes,
-        work,
-        recovery_count,
-        m,
-        FFTSkew - 1);
-}
-
-
-//------------------------------------------------------------------------------
-// ErrorBitfield
-
-#ifdef LEO_ERROR_BITFIELD_OPT
-
-// Used in decoding to decide which final FFT operations to perform
-class ErrorBitfield
-{
-    static const unsigned kWordMips = 5;
-    static const unsigned kWords = kOrder / 64;
-    uint64_t Words[kWordMips][kWords] = {};
-
-    static const unsigned kBigMips = 6;
-    static const unsigned kBigWords = (kWords + 63) / 64;
-    uint64_t BigWords[kBigMips][kBigWords] = {};
-
-    static const unsigned kBiggestMips = 4;
-    uint64_t BiggestWords[kBiggestMips] = {};
-
-public:
-    LEO_FORCE_INLINE void Set(unsigned i)
-    {
-        Words[0][i / 64] |= (uint64_t)1 << (i % 64);
-    }
-
-    void Prepare();
-
-    LEO_FORCE_INLINE bool IsNeeded(unsigned mip_level, unsigned bit) const
-    {
-        if (mip_level >= 16)
-            return true;
-        if (mip_level >= 12)
-        {
-            bit /= 4096;
-            return 0 != (BiggestWords[mip_level - 12] & ((uint64_t)1 << bit));
-        }
-        if (mip_level >= 6)
-        {
-            bit /= 64;
-            return 0 != (BigWords[mip_level - 6][bit / 64] & ((uint64_t)1 << (bit % 64)));
-        }
-        return 0 != (Words[mip_level - 1][bit / 64] & ((uint64_t)1 << (bit % 64)));
-    }
-};
-
-static const uint64_t kHiMasks[5] = {
-    0xAAAAAAAAAAAAAAAAULL,
-    0xCCCCCCCCCCCCCCCCULL,
-    0xF0F0F0F0F0F0F0F0ULL,
-    0xFF00FF00FF00FF00ULL,
-    0xFFFF0000FFFF0000ULL,
-};
-
-void ErrorBitfield::Prepare()
-{
-    // First mip level is for final layer of FFT: pairs of data
-    for (unsigned i = 0; i < kWords; ++i)
-    {
-        uint64_t w_i = Words[0][i];
-        const uint64_t hi2lo0 = w_i | ((w_i & kHiMasks[0]) >> 1);
-        const uint64_t lo2hi0 = ((w_i & (kHiMasks[0] >> 1)) << 1);
-        Words[0][i] = w_i = hi2lo0 | lo2hi0;
-
-        for (unsigned j = 1, bits = 2; j < kWordMips; ++j, bits <<= 1)
-        {
-            const uint64_t hi2lo_j = w_i | ((w_i & kHiMasks[j]) >> bits);
-            const uint64_t lo2hi_j = ((w_i & (kHiMasks[j] >> bits)) << bits);
-            Words[j][i] = w_i = hi2lo_j | lo2hi_j;
-        }
-    }
-
-    for (unsigned i = 0; i < kBigWords; ++i)
-    {
-        uint64_t w_i = 0;
-        uint64_t bit = 1;
-        const uint64_t* src = &Words[kWordMips - 1][i * 64];
-        for (unsigned j = 0; j < 64; ++j, bit <<= 1)
-        {
-            const uint64_t w = src[j];
-            w_i |= (w | (w >> 32) | (w << 32)) & bit;
-        }
-        BigWords[0][i] = w_i;
-
-        for (unsigned j = 1, bits = 1; j < kBigMips; ++j, bits <<= 1)
-        {
-            const uint64_t hi2lo_j = w_i | ((w_i & kHiMasks[j - 1]) >> bits);
-            const uint64_t lo2hi_j = ((w_i & (kHiMasks[j - 1] >> bits)) << bits);
-            BigWords[j][i] = w_i = hi2lo_j | lo2hi_j;
-        }
-    }
-
-    uint64_t w_i = 0;
-    uint64_t bit = 1;
-    const uint64_t* src = &BigWords[kBigMips - 1][0];
-    for (unsigned j = 0; j < kBigWords; ++j, bit <<= 1)
-    {
-        const uint64_t w = src[j];
-        w_i |= (w | (w >> 32) | (w << 32)) & bit;
-    }
-    BiggestWords[0] = w_i;
-
-    for (unsigned j = 1, bits = 1; j < kBiggestMips; ++j, bits <<= 1)
-    {
-        const uint64_t hi2lo_j = w_i | ((w_i & kHiMasks[j - 1]) >> bits);
-        const uint64_t lo2hi_j = ((w_i & (kHiMasks[j - 1] >> bits)) << bits);
-        BiggestWords[j] = w_i = hi2lo_j | lo2hi_j;
-    }
-}
-
-
-static void FFT_DIT_ErrorBits(
-    const uint64_t bytes,
-    void** work,
-    const unsigned n_truncated,
-    const unsigned n,
-    const ffe_t* skewLUT,
-    const ErrorBitfield& error_bits)
-{
-    unsigned mip_level = LastNonzeroBit32(n);
-
-    // Decimation in time: Unroll 2 layers at a time
-    unsigned dist4 = n, dist = n >> 2;
-    for (; dist != 0; dist4 = dist, dist >>= 2, mip_level -=2)
-    {
-        // For each set of dist*4 elements:
-#pragma omp parallel for
-        for (int r = 0; r < (int)n_truncated; r += dist4)
-        {
-            if (!error_bits.IsNeeded(mip_level, r))
-                continue;
-
-            const unsigned i_end = r + dist;
-            const ffe_t log_m01 = skewLUT[i_end];
-            const ffe_t log_m02 = skewLUT[i_end + dist];
-            const ffe_t log_m23 = skewLUT[i_end + dist * 2];
-
-            // For each set of dist elements:
-#pragma omp parallel for
-            for (int i = r; i < (int)i_end; ++i)
-            {
-                FFT_DIT4(
-                    bytes,
-                    work + i,
-                    dist,
-                    log_m01,
-                    log_m23,
-                    log_m02);
-            }
-        }
-    }
-
-    // If there is one layer left:
-    if (dist4 == 2)
-    {
-#pragma omp parallel for
-        for (int r = 0; r < (int)n_truncated; r += 2)
-        {
-            if (!error_bits.IsNeeded(mip_level, r))
-                continue;
-
-            const ffe_t log_m = skewLUT[r + 1];
-
-            if (log_m == kModulus)
-                xor_mem(work[r + 1], work[r], bytes);
-            else
-            {
-                FFT_DIT2(
-                    work[r],
-                    work[r + 1],
-                    log_m,
-                    bytes);
-            }
-        }
-    }
-}
-
-#endif // LEO_ERROR_BITFIELD_OPT
-
-
-//------------------------------------------------------------------------------
-// Reed-Solomon Decode
-
-void ReedSolomonDecode(
-    uint64_t buffer_bytes,
-    unsigned original_count,
-    unsigned recovery_count,
-    unsigned m, // NextPow2(recovery_count)
-    unsigned n, // NextPow2(m + original_count) = work_count
-    const void* const * const original, // original_count entries
-    const void* const * const recovery, // recovery_count entries
-    void** work) // n entries
-{
-    // Fill in error locations
-
-#ifdef LEO_ERROR_BITFIELD_OPT
-    ErrorBitfield error_bits;
-#endif // LEO_ERROR_BITFIELD_OPT
-
-    ffe_t error_locations[kOrder] = {};
-    for (unsigned i = 0; i < recovery_count; ++i)
-        if (!recovery[i])
-            error_locations[i] = 1;
-    for (unsigned i = recovery_count; i < m; ++i)
-        error_locations[i] = 1;
-    for (unsigned i = 0; i < original_count; ++i)
-    {
-        if (!original[i])
-        {
-            error_locations[i + m] = 1;
-#ifdef LEO_ERROR_BITFIELD_OPT
-            error_bits.Set(i + m);
-#endif // LEO_ERROR_BITFIELD_OPT
-        }
-    }
-
-#ifdef LEO_ERROR_BITFIELD_OPT
-    error_bits.Prepare();
-#endif // LEO_ERROR_BITFIELD_OPT
-
-    // Evaluate error locator polynomial
-
-    FWHT(error_locations, kOrder, m + original_count);
-
-#pragma omp parallel for
-    for (int i = 0; i < (int)kOrder; ++i)
-        error_locations[i] = ((unsigned)error_locations[i] * (unsigned)LogWalsh[i]) % kModulus;
-
-    FWHT(error_locations, kOrder, kOrder);
-
-    // work <- recovery data
-
-#pragma omp parallel for
-    for (int i = 0; i < (int)recovery_count; ++i)
-    {
-        if (recovery[i])
-            mul_mem(work[i], recovery[i], error_locations[i], buffer_bytes);
-        else
-            memset(work[i], 0, buffer_bytes);
-    }
-#pragma omp parallel for
-    for (int i = recovery_count; i < (int)m; ++i)
-        memset(work[i], 0, buffer_bytes);
-
-    // work <- original data
-
-#pragma omp parallel for
-    for (int i = 0; i < (int)original_count; ++i)
-    {
-        if (original[i])
-            mul_mem(work[m + i], original[i], error_locations[m + i], buffer_bytes);
-        else
-            memset(work[m + i], 0, buffer_bytes);
-    }
-#pragma omp parallel for
-    for (int i = m + original_count; i < (int)n; ++i)
-        memset(work[i], 0, buffer_bytes);
-
-    // work <- IFFT(work, n, 0)
-
-    IFFT_DIT_Decoder(
-        buffer_bytes,
-        m + original_count,
-        work,
-        n,
-        FFTSkew - 1);
-
-    // work <- FormalDerivative(work, n)
-
-    for (unsigned i = 1; i < n; ++i)
-    {
-        const unsigned width = ((i ^ (i - 1)) + 1) >> 1;
-
-        if (width < 8)
-        {
-            VectorXOR(
-                buffer_bytes,
-                width,
-                work + i - width,
-                work + i);
-        }
-        else
-        {
-            VectorXOR_Threads(
-                buffer_bytes,
-                width,
-                work + i - width,
-                work + i);
-        }
-    }
-
-    // work <- FFT(work, n, 0) truncated to m + original_count
-
-    const unsigned output_count = m + original_count;
-
-#ifdef LEO_ERROR_BITFIELD_OPT
-    FFT_DIT_ErrorBits(buffer_bytes, work, output_count, n, FFTSkew - 1, error_bits);
-#else
-    FFT_DIT(buffer_bytes, work, output_count, n, FFTSkew - 1);
-#endif
-
-    // Reveal erasures
-
-    for (unsigned i = 0; i < original_count; ++i)
-        if (!original[i])
-            mul_mem(work[i], work[i + m], kModulus - error_locations[i + m], buffer_bytes);
-}
-
-
-//------------------------------------------------------------------------------
-// API
-
-static bool IsInitialized = false;
-
-bool Initialize()
-{
-    if (IsInitialized)
-        return true;
-
-    InitializeLogarithmTables();
-    InitializeMultiplyTables();
-    FFTInitialize();
-
-    IsInitialized = true;
-    return true;
-}
-
-
-}} // namespace leopard::ff16
-
-#endif // LEO_HAS_FF16
diff --git a/windows/src/leopard/LeopardFF16.h b/windows/src/leopard/LeopardFF16.h
deleted file mode 100644
index 6f8ba6c..0000000
--- a/windows/src/leopard/LeopardFF16.h
+++ /dev/null
@@ -1,93 +0,0 @@
-/*
-    Copyright (c) 2017 Christopher A. Taylor.  All rights reserved.
-
-    Redistribution and use in source and binary forms, with or without
-    modification, are permitted provided that the following conditions are met:
-
-    * Redistributions of source code must retain the above copyright notice,
-      this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright notice,
-      this list of conditions and the following disclaimer in the documentation
-      and/or other materials provided with the distribution.
-    * Neither the name of Leopard-RS nor the names of its contributors may be
-      used to endorse or promote products derived from this software without
-      specific prior written permission.
-
-    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-    ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-    POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#pragma once
-
-#include "LeopardCommon.h"
-
-#ifdef LEO_HAS_FF16
-
-/*
-    16-bit Finite Field Math
-
-    This finite field contains 65536 elements and so each element is one byte.
-    This library is designed for data that is a multiple of 64 bytes in size.
-
-    Algorithms are described in LeopardCommon.h
-*/
-
-namespace leopard { namespace ff16 {
-
-
-//------------------------------------------------------------------------------
-// Datatypes and Constants
-
-// Finite field element type
-typedef uint16_t ffe_t;
-
-// Number of bits per element
-static const unsigned kBits = 16;
-
-// Finite field order: Number of elements in the field
-static const unsigned kOrder = 65536;
-
-// Modulus for field operations
-static const ffe_t kModulus = 65535;
-
-// LFSR Polynomial that generates the field elements
-static const unsigned kPolynomial = 0x1002D;
-
-
-//------------------------------------------------------------------------------
-// API
-
-// Returns false if the self-test fails
-bool Initialize();
-
-void ReedSolomonEncode(
-    uint64_t buffer_bytes,
-    unsigned original_count,
-    unsigned recovery_count,
-    unsigned m, // = NextPow2(recovery_count)
-    const void* const * const data,
-    void** work); // m * 2 elements
-
-void ReedSolomonDecode(
-    uint64_t buffer_bytes,
-    unsigned original_count,
-    unsigned recovery_count,
-    unsigned m, // = NextPow2(recovery_count)
-    unsigned n, // = NextPow2(m + original_count)
-    const void* const * const original, // original_count elements
-    const void* const * const recovery, // recovery_count elements
-    void** work); // n elements
-
-
-}} // namespace leopard::ff16
-
-#endif // LEO_HAS_FF16
diff --git a/windows/src/leopard/LeopardFF8.cpp b/windows/src/leopard/LeopardFF8.cpp
deleted file mode 100644
index 2f941f8..0000000
--- a/windows/src/leopard/LeopardFF8.cpp
+++ /dev/null
@@ -1,1940 +0,0 @@
-/*
-    Copyright (c) 2017 Christopher A. Taylor.  All rights reserved.
-
-    Redistribution and use in source and binary forms, with or without
-    modification, are permitted provided that the following conditions are met:
-
-    * Redistributions of source code must retain the above copyright notice,
-      this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright notice,
-      this list of conditions and the following disclaimer in the documentation
-      and/or other materials provided with the distribution.
-    * Neither the name of Leopard-RS nor the names of its contributors may be
-      used to endorse or promote products derived from this software without
-      specific prior written permission.
-
-    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-    ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-    POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#include "LeopardFF8.h"
-
-#ifdef LEO_HAS_FF8
-
-#include <string.h>
-
-#ifdef _MSC_VER
-    #pragma warning(disable: 4752) // found Intel(R) Advanced Vector Extensions; consider using /arch:AVX
-#endif
-
-namespace leopard { namespace ff8 {
-
-
-//------------------------------------------------------------------------------
-// Datatypes and Constants
-
-// Basis used for generating logarithm tables
-static const ffe_t kCantorBasis[kBits] = {
-    1, 214, 152, 146, 86, 200, 88, 230
-};
-
-// Using the Cantor basis {2} here enables us to avoid a lot of extra calculations
-// when applying the formal derivative in decoding.
-
-
-//------------------------------------------------------------------------------
-// Field Operations
-
-// z = x + y (mod kModulus)
-static inline ffe_t AddMod(const ffe_t a, const ffe_t b)
-{
-    const unsigned sum = static_cast<unsigned>(a) + b;
-
-    // Partial reduction step, allowing for kModulus to be returned
-    return static_cast<ffe_t>(sum + (sum >> kBits));
-}
-
-// z = x - y (mod kModulus)
-static inline ffe_t SubMod(const ffe_t a, const ffe_t b)
-{
-    const unsigned dif = static_cast<unsigned>(a) - b;
-
-    // Partial reduction step, allowing for kModulus to be returned
-    return static_cast<ffe_t>(dif + (dif >> kBits));
-}
-
-
-//------------------------------------------------------------------------------
-// Fast Walsh-Hadamard Transform (FWHT) (mod kModulus)
-
-// {a, b} = {a + b, a - b} (Mod Q)
-static LEO_FORCE_INLINE void FWHT_2(ffe_t& LEO_RESTRICT a, ffe_t& LEO_RESTRICT b)
-{
-    const ffe_t sum = AddMod(a, b);
-    const ffe_t dif = SubMod(a, b);
-    a = sum;
-    b = dif;
-}
-
-static LEO_FORCE_INLINE void FWHT_4(ffe_t* data, unsigned s)
-{
-    const unsigned s2 = s << 1;
-
-    ffe_t t0 = data[0];
-    ffe_t t1 = data[s];
-    ffe_t t2 = data[s2];
-    ffe_t t3 = data[s2 + s];
-
-    FWHT_2(t0, t1);
-    FWHT_2(t2, t3);
-    FWHT_2(t0, t2);
-    FWHT_2(t1, t3);
-
-    data[0] = t0;
-    data[s] = t1;
-    data[s2] = t2;
-    data[s2 + s] = t3;
-}
-
-// Decimation in time (DIT) Fast Walsh-Hadamard Transform
-// Unrolls pairs of layers to perform cross-layer operations in registers
-// m_truncated: Number of elements that are non-zero at the front of data
-static void FWHT(ffe_t* data, const unsigned m, const unsigned m_truncated)
-{
-    // Decimation in time: Unroll 2 layers at a time
-    unsigned dist = 1, dist4 = 4;
-    for (; dist4 <= m; dist = dist4, dist4 <<= 2)
-    {
-        // For each set of dist*4 elements:
-        for (unsigned r = 0; r < m_truncated; r += dist4)
-        {
-            // For each set of dist elements:
-            for (unsigned i = r; i < r + dist; ++i)
-                FWHT_4(data + i, dist);
-        }
-    }
-
-    // If there is one layer left:
-    if (dist < m)
-        for (unsigned i = 0; i < dist; ++i)
-            FWHT_2(data[i], data[i + dist]);
-}
-
-
-//------------------------------------------------------------------------------
-// Logarithm Tables
-
-static ffe_t LogLUT[kOrder];
-static ffe_t ExpLUT[kOrder];
-
-
-// Returns a * Log(b)
-static ffe_t MultiplyLog(ffe_t a, ffe_t log_b)
-{
-    /*
-        Note that this operation is not a normal multiplication in a finite
-        field because the right operand is already a logarithm.  This is done
-        because it moves K table lookups from the Decode() method into the
-        initialization step that is less performance critical.  The LogWalsh[]
-        table below contains precalculated logarithms so it is easier to do
-        all the other multiplies in that form as well.
-    */
-    if (a == 0)
-        return 0;
-    return ExpLUT[AddMod(LogLUT[a], log_b)];
-}
-
-
-// Initialize LogLUT[], ExpLUT[]
-static void InitializeLogarithmTables()
-{
-    // LFSR table generation:
-
-    unsigned state = 1;
-    for (unsigned i = 0; i < kModulus; ++i)
-    {
-        ExpLUT[state] = static_cast<ffe_t>(i);
-        state <<= 1;
-        if (state >= kOrder)
-            state ^= kPolynomial;
-    }
-    ExpLUT[0] = kModulus;
-
-    // Conversion to Cantor basis {2}:
-
-    LogLUT[0] = 0;
-    for (unsigned i = 0; i < kBits; ++i)
-    {
-        const ffe_t basis = kCantorBasis[i];
-        const unsigned width = static_cast<unsigned>(1UL << i);
-
-        for (unsigned j = 0; j < width; ++j)
-            LogLUT[j + width] = LogLUT[j] ^ basis;
-    }
-
-    for (unsigned i = 0; i < kOrder; ++i)
-        LogLUT[i] = ExpLUT[LogLUT[i]];
-
-    // Generate Exp table from Log table:
-
-    for (unsigned i = 0; i < kOrder; ++i)
-        ExpLUT[LogLUT[i]] = i;
-
-    // Note: Handles modulus wrap around with LUT
-    ExpLUT[kModulus] = ExpLUT[0];
-}
-
-
-//------------------------------------------------------------------------------
-// Multiplies
-
-/*
-    The multiplication algorithm used follows the approach outlined in {4}.
-    Specifically section 6 outlines the algorithm used here for 8-bit fields.
-*/
-
-struct Multiply128LUT_t
-{
-    LEO_M128 Value[2];
-};
-
-static const Multiply128LUT_t* Multiply128LUT = nullptr;
-
-// 128-bit x_reg ^= y_reg * log_m
-#define LEO_MULADD_128(x_reg, y_reg, table_lo, table_hi) { \
-                LEO_M128 lo = _mm_and_si128(y_reg, clr_mask); \
-                lo = _mm_shuffle_epi8(table_lo, lo); \
-                LEO_M128 hi = _mm_srli_epi64(y_reg, 4); \
-                hi = _mm_and_si128(hi, clr_mask); \
-                hi = _mm_shuffle_epi8(table_hi, hi); \
-                x_reg = _mm_xor_si128(x_reg, _mm_xor_si128(lo, hi)); }
-
-#if defined(LEO_TRY_AVX2)
-
-struct Multiply256LUT_t
-{
-    LEO_M256 Value[2];
-};
-
-static const Multiply256LUT_t* Multiply256LUT = nullptr;
-
-// 256-bit x_reg ^= y_reg * log_m
-#define LEO_MULADD_256(x_reg, y_reg, table_lo, table_hi) { \
-                LEO_M256 lo = _mm256_and_si256(y_reg, clr_mask); \
-                lo = _mm256_shuffle_epi8(table_lo, lo); \
-                LEO_M256 hi = _mm256_srli_epi64(y_reg, 4); \
-                hi = _mm256_and_si256(hi, clr_mask); \
-                hi = _mm256_shuffle_epi8(table_hi, hi); \
-                x_reg = _mm256_xor_si256(x_reg, _mm256_xor_si256(lo, hi)); }
-
-#endif // LEO_TRY_AVX2
-
-// Stores the product of x * y at offset x + y * 256
-// Repeated accesses from the same y value are faster
-static const ffe_t* Multiply8LUT = nullptr;
-
-
-// Reference version of muladd: x[] ^= y[] * log_m
-static LEO_FORCE_INLINE void RefMulAdd(
-    void* LEO_RESTRICT x,
-    const void* LEO_RESTRICT y,
-    ffe_t log_m,
-    uint64_t bytes)
-{
-    const ffe_t* LEO_RESTRICT lut = Multiply8LUT + (unsigned)log_m * 256;
-    const ffe_t * LEO_RESTRICT y1 = reinterpret_cast<const ffe_t *>(y);
-
-#ifdef LEO_TARGET_MOBILE
-    ffe_t * LEO_RESTRICT x1 = reinterpret_cast<ffe_t *>(x);
-
-    do
-    {
-        for (unsigned j = 0; j < 64; ++j)
-            x1[j] ^= lut[y1[j]];
-
-        x1 += 64, y1 += 64;
-        bytes -= 64;
-} while (bytes > 0);
-#else
-    uint64_t * LEO_RESTRICT x8 = reinterpret_cast<uint64_t *>(x);
-
-    do
-    {
-        for (unsigned j = 0; j < 8; ++j)
-        {
-            uint64_t x_0 = x8[j];
-            x_0 ^= (uint64_t)lut[y1[0]];
-            x_0 ^= (uint64_t)lut[y1[1]] << 8;
-            x_0 ^= (uint64_t)lut[y1[2]] << 16;
-            x_0 ^= (uint64_t)lut[y1[3]] << 24;
-            x_0 ^= (uint64_t)lut[y1[4]] << 32;
-            x_0 ^= (uint64_t)lut[y1[5]] << 40;
-            x_0 ^= (uint64_t)lut[y1[6]] << 48;
-            x_0 ^= (uint64_t)lut[y1[7]] << 56;
-            x8[j] = x_0;
-            y1 += 8;
-        }
-
-        x8 += 8;
-        bytes -= 64;
-    } while (bytes > 0);
-#endif
-}
-
-// Reference version of mul: x[] = y[] * log_m
-static LEO_FORCE_INLINE void RefMul(
-    void* LEO_RESTRICT x,
-    const void* LEO_RESTRICT y,
-    ffe_t log_m,
-    uint64_t bytes)
-{
-    const ffe_t* LEO_RESTRICT lut = Multiply8LUT + (unsigned)log_m * 256;
-    const ffe_t * LEO_RESTRICT y1 = reinterpret_cast<const ffe_t *>(y);
-
-#ifdef LEO_TARGET_MOBILE
-    ffe_t * LEO_RESTRICT x1 = reinterpret_cast<ffe_t *>(x);
-
-    do
-    {
-        for (unsigned j = 0; j < 64; ++j)
-            x1[j] ^= lut[y1[j]];
-
-        x1 += 64, y1 += 64;
-        bytes -= 64;
-    } while (bytes > 0);
-#else
-    uint64_t * LEO_RESTRICT x8 = reinterpret_cast<uint64_t *>(x);
-
-    do
-    {
-        for (unsigned j = 0; j < 8; ++j)
-        {
-            uint64_t x_0 = (uint64_t)lut[y1[0]];
-            x_0 ^= (uint64_t)lut[y1[1]] << 8;
-            x_0 ^= (uint64_t)lut[y1[2]] << 16;
-            x_0 ^= (uint64_t)lut[y1[3]] << 24;
-            x_0 ^= (uint64_t)lut[y1[4]] << 32;
-            x_0 ^= (uint64_t)lut[y1[5]] << 40;
-            x_0 ^= (uint64_t)lut[y1[6]] << 48;
-            x_0 ^= (uint64_t)lut[y1[7]] << 56;
-            x8[j] = x_0;
-            y1 += 8;
-        }
-
-        x8 += 8;
-        bytes -= 64;
-    } while (bytes > 0);
-#endif
-}
-
-static void InitializeMultiplyTables()
-{
-    // If we cannot use the PSHUFB instruction, generate Multiply8LUT:
-    if (!CpuHasSSSE3)
-    {
-        Multiply8LUT = new ffe_t[256 * 256];
-
-        // For each left-multiplicand:
-        for (unsigned x = 0; x < 256; ++x)
-        {
-            ffe_t* lut = (ffe_t*)Multiply8LUT + x;
-
-            if (x == 0)
-            {
-                for (unsigned log_y = 0; log_y < 256; ++log_y, lut += 256)
-                    *lut = 0;
-            }
-            else
-            {
-                const ffe_t log_x = LogLUT[x];
-
-                for (unsigned log_y = 0; log_y < 256; ++log_y, lut += 256)
-                {
-                    const ffe_t prod = ExpLUT[AddMod(log_x, log_y)];
-                    *lut = prod;
-                }
-            }
-        }
-
-        return;
-    }
-
-#ifdef LEO_TRY_AVX2
-    if (CpuHasAVX2)
-        Multiply256LUT = reinterpret_cast<const Multiply256LUT_t*>(SIMDSafeAllocate(sizeof(Multiply256LUT_t) * kOrder));
-    else
-#endif // LEO_TRY_AVX2
-        Multiply128LUT = reinterpret_cast<const Multiply128LUT_t*>(SIMDSafeAllocate(sizeof(Multiply128LUT_t) * kOrder));
-
-    // For each value we could multiply by:
-    for (unsigned log_m = 0; log_m < kOrder; ++log_m)
-    {
-        // For each 4 bits of the finite field width in bits:
-        for (unsigned i = 0, shift = 0; i < 2; ++i, shift += 4)
-        {
-            // Construct 16 entry LUT for PSHUFB
-            uint8_t lut[16];
-            for (ffe_t x = 0; x < 16; ++x)
-                lut[x] = MultiplyLog(x << shift, static_cast<ffe_t>(log_m));
-
-            const LEO_M128 *v_ptr = reinterpret_cast<const LEO_M128 *>(&lut[0]);
-            const LEO_M128 value = _mm_loadu_si128(v_ptr);
-
-            // Store in 128-bit wide table
-#if defined(LEO_TRY_AVX2)
-            if (!CpuHasAVX2)
-#endif // LEO_TRY_AVX2
-                _mm_storeu_si128((LEO_M128*)&Multiply128LUT[log_m].Value[i], value);
-
-            // Store in 256-bit wide table
-#if defined(LEO_TRY_AVX2)
-            if (CpuHasAVX2)
-            {
-                _mm256_storeu_si256((LEO_M256*)&Multiply256LUT[log_m].Value[i],
-                    _mm256_broadcastsi128_si256(value));
-            }
-#endif // LEO_TRY_AVX2
-        }
-    }
-}
-
-
-static void mul_mem(
-    void * LEO_RESTRICT x, const void * LEO_RESTRICT y,
-    ffe_t log_m, uint64_t bytes)
-{
-#if defined(LEO_TRY_AVX2)
-    if (CpuHasAVX2)
-    {
-        const LEO_M256 table_lo_y = _mm256_loadu_si256(&Multiply256LUT[log_m].Value[0]);
-        const LEO_M256 table_hi_y = _mm256_loadu_si256(&Multiply256LUT[log_m].Value[1]);
-
-        const LEO_M256 clr_mask = _mm256_set1_epi8(0x0f);
-
-        LEO_M256 * LEO_RESTRICT x32 = reinterpret_cast<LEO_M256 *>(x);
-        const LEO_M256 * LEO_RESTRICT y32 = reinterpret_cast<const LEO_M256 *>(y);
-
-        do
-        {
-#define LEO_MUL_256(x_ptr, y_ptr) { \
-            LEO_M256 data = _mm256_loadu_si256(y_ptr); \
-            LEO_M256 lo = _mm256_and_si256(data, clr_mask); \
-            lo = _mm256_shuffle_epi8(table_lo_y, lo); \
-            LEO_M256 hi = _mm256_srli_epi64(data, 4); \
-            hi = _mm256_and_si256(hi, clr_mask); \
-            hi = _mm256_shuffle_epi8(table_hi_y, hi); \
-            _mm256_storeu_si256(x_ptr, _mm256_xor_si256(lo, hi)); }
-
-            LEO_MUL_256(x32 + 1, y32 + 1);
-            LEO_MUL_256(x32, y32);
-            y32 += 2, x32 += 2;
-
-            bytes -= 64;
-        } while (bytes > 0);
-
-        return;
-    }
-#endif // LEO_TRY_AVX2
-
-    if (CpuHasSSSE3)
-    {
-        const LEO_M128 table_lo_y = _mm_loadu_si128(&Multiply128LUT[log_m].Value[0]);
-        const LEO_M128 table_hi_y = _mm_loadu_si128(&Multiply128LUT[log_m].Value[1]);
-
-        const LEO_M128 clr_mask = _mm_set1_epi8(0x0f);
-
-        LEO_M128 * LEO_RESTRICT x16 = reinterpret_cast<LEO_M128 *>(x);
-        const LEO_M128 * LEO_RESTRICT y16 = reinterpret_cast<const LEO_M128 *>(y);
-
-        do
-        {
-#define LEO_MUL_128(x_ptr, y_ptr) { \
-                LEO_M128 data = _mm_loadu_si128(y_ptr); \
-                LEO_M128 lo = _mm_and_si128(data, clr_mask); \
-                lo = _mm_shuffle_epi8(table_lo_y, lo); \
-                LEO_M128 hi = _mm_srli_epi64(data, 4); \
-                hi = _mm_and_si128(hi, clr_mask); \
-                hi = _mm_shuffle_epi8(table_hi_y, hi); \
-                _mm_storeu_si128(x_ptr, _mm_xor_si128(lo, hi)); }
-
-            LEO_MUL_128(x16 + 3, y16 + 3);
-            LEO_MUL_128(x16 + 2, y16 + 2);
-            LEO_MUL_128(x16 + 1, y16 + 1);
-            LEO_MUL_128(x16, y16);
-            x16 += 4, y16 += 4;
-
-            bytes -= 64;
-        } while (bytes > 0);
-
-        return;
-    }
-
-    // Reference version:
-    RefMul(x, y, log_m, bytes);
-}
-
-
-//------------------------------------------------------------------------------
-// FFT
-
-// Twisted factors used in FFT
-static ffe_t FFTSkew[kModulus];
-
-// Factors used in the evaluation of the error locator polynomial
-static ffe_t LogWalsh[kOrder];
-
-
-static void FFTInitialize()
-{
-    ffe_t temp[kBits - 1];
-
-    // Generate FFT skew vector {1}:
-
-    for (unsigned i = 1; i < kBits; ++i)
-        temp[i - 1] = static_cast<ffe_t>(1UL << i);
-
-    for (unsigned m = 0; m < (kBits - 1); ++m)
-    {
-        const unsigned step = 1UL << (m + 1);
-
-        FFTSkew[(1UL << m) - 1] = 0;
-
-        for (unsigned i = m; i < (kBits - 1); ++i)
-        {
-            const unsigned s = (1UL << (i + 1));
-
-            for (unsigned j = (1UL << m) - 1; j < s; j += step)
-                FFTSkew[j + s] = FFTSkew[j] ^ temp[i];
-        }
-
-        temp[m] = kModulus - LogLUT[MultiplyLog(temp[m], LogLUT[temp[m] ^ 1])];
-
-        for (unsigned i = m + 1; i < (kBits - 1); ++i)
-        {
-            const ffe_t sum = AddMod(LogLUT[temp[i] ^ 1], temp[m]);
-            temp[i] = MultiplyLog(temp[i], sum);
-        }
-    }
-
-    for (unsigned i = 0; i < kModulus; ++i)
-        FFTSkew[i] = LogLUT[FFTSkew[i]];
-
-    // Precalculate FWHT(Log[i]):
-
-    for (unsigned i = 0; i < kOrder; ++i)
-        LogWalsh[i] = LogLUT[i];
-    LogWalsh[0] = 0;
-
-    FWHT(LogWalsh, kOrder, kOrder);
-}
-
-/*
-    Decimation in time IFFT:
-
-    The decimation in time IFFT algorithm allows us to unroll 2 layers at a time,
-    performing calculations on local registers and faster cache memory.
-
-    Each ^___^ below indicates a butterfly between the associated indices.
-
-    The ifft_butterfly(x, y) operation:
-
-        y[] ^= x[]
-        if (log_m != kModulus)
-            x[] ^= exp(log(y[]) + log_m)
-
-    Layer 0:
-        0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7
-        ^_^ ^_^ ^_^ ^_^ ^_^ ^_^ ^_^ ^_^
-
-    Layer 1:
-        0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7
-        ^___^   ^___^   ^___^   ^___^
-          ^___^   ^___^   ^___^   ^___^
-  
-    Layer 2:
-        0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 
-        ^_______^       ^_______^
-          ^_______^       ^_______^
-            ^_______^       ^_______^
-              ^_______^       ^_______^
-
-    Layer 3:
-        0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7
-        ^_______________^
-          ^_______________^
-            ^_______________^
-              ^_______________^
-                ^_______________^
-                  ^_______________^
-                    ^_______________^
-                      ^_______________^
-
-    DIT layer 0-1 operations, grouped 4 at a time:
-        {0-1, 2-3, 0-2, 1-3},
-        {4-5, 6-7, 4-6, 5-7},
-
-    DIT layer 1-2 operations, grouped 4 at a time:
-        {0-2, 4-6, 0-4, 2-6},
-        {1-3, 5-7, 1-5, 3-7},
-
-    DIT layer 2-3 operations, grouped 4 at a time:
-        {0-4, 0'-4', 0-0', 4-4'},
-        {1-5, 1'-5', 1-1', 5-5'},
-*/
-
-// 2-way butterfly
-static void IFFT_DIT2(
-    void * LEO_RESTRICT x, void * LEO_RESTRICT y,
-    ffe_t log_m, uint64_t bytes)
-{
-#if defined(LEO_TRY_AVX2)
-    if (CpuHasAVX2)
-    {
-        const LEO_M256 table_lo_y = _mm256_loadu_si256(&Multiply256LUT[log_m].Value[0]);
-        const LEO_M256 table_hi_y = _mm256_loadu_si256(&Multiply256LUT[log_m].Value[1]);
-
-        const LEO_M256 clr_mask = _mm256_set1_epi8(0x0f);
-
-        LEO_M256 * LEO_RESTRICT x32 = reinterpret_cast<LEO_M256 *>(x);
-        LEO_M256 * LEO_RESTRICT y32 = reinterpret_cast<LEO_M256 *>(y);
-
-        do
-        {
-#define LEO_IFFTB_256(x_ptr, y_ptr) { \
-            LEO_M256 x_data = _mm256_loadu_si256(x_ptr); \
-            LEO_M256 y_data = _mm256_loadu_si256(y_ptr); \
-            y_data = _mm256_xor_si256(y_data, x_data); \
-            _mm256_storeu_si256(y_ptr, y_data); \
-            LEO_MULADD_256(x_data, y_data, table_lo_y, table_hi_y); \
-            _mm256_storeu_si256(x_ptr, x_data); }
-
-            LEO_IFFTB_256(x32 + 1, y32 + 1);
-            LEO_IFFTB_256(x32, y32);
-            y32 += 2, x32 += 2;
-
-            bytes -= 64;
-        } while (bytes > 0);
-
-        return;
-    }
-#endif // LEO_TRY_AVX2
-
-    if (CpuHasSSSE3)
-    {
-        const LEO_M128 table_lo_y = _mm_loadu_si128(&Multiply128LUT[log_m].Value[0]);
-        const LEO_M128 table_hi_y = _mm_loadu_si128(&Multiply128LUT[log_m].Value[1]);
-
-        const LEO_M128 clr_mask = _mm_set1_epi8(0x0f);
-
-        LEO_M128 * LEO_RESTRICT x16 = reinterpret_cast<LEO_M128 *>(x);
-        LEO_M128 * LEO_RESTRICT y16 = reinterpret_cast<LEO_M128 *>(y);
-
-        do
-        {
-#define LEO_IFFTB_128(x_ptr, y_ptr) { \
-            LEO_M128 x_data = _mm_loadu_si128(x_ptr); \
-            LEO_M128 y_data = _mm_loadu_si128(y_ptr); \
-            y_data = _mm_xor_si128(y_data, x_data); \
-            _mm_storeu_si128(y_ptr, y_data); \
-            LEO_MULADD_128(x_data, y_data, table_lo_y, table_hi_y); \
-            _mm_storeu_si128(x_ptr, x_data); }
-
-            LEO_IFFTB_128(x16 + 3, y16 + 3);
-            LEO_IFFTB_128(x16 + 2, y16 + 2);
-            LEO_IFFTB_128(x16 + 1, y16 + 1);
-            LEO_IFFTB_128(x16, y16);
-            x16 += 4, y16 += 4;
-
-            bytes -= 64;
-        } while (bytes > 0);
-
-        return;
-    }
-
-    // Reference version:
-    xor_mem(y, x, bytes);
-    RefMulAdd(x, y, log_m, bytes);
-}
-
-
-// 4-way butterfly
-static void IFFT_DIT4(
-    uint64_t bytes,
-    void** work,
-    unsigned dist,
-    const ffe_t log_m01,
-    const ffe_t log_m23,
-    const ffe_t log_m02)
-{
-#ifdef LEO_INTERLEAVE_BUTTERFLY4_OPT
-
-#if defined(LEO_TRY_AVX2)
-
-    if (CpuHasAVX2)
-    {
-        const LEO_M256 t01_lo = _mm256_loadu_si256(&Multiply256LUT[log_m01].Value[0]);
-        const LEO_M256 t01_hi = _mm256_loadu_si256(&Multiply256LUT[log_m01].Value[1]);
-        const LEO_M256 t23_lo = _mm256_loadu_si256(&Multiply256LUT[log_m23].Value[0]);
-        const LEO_M256 t23_hi = _mm256_loadu_si256(&Multiply256LUT[log_m23].Value[1]);
-        const LEO_M256 t02_lo = _mm256_loadu_si256(&Multiply256LUT[log_m02].Value[0]);
-        const LEO_M256 t02_hi = _mm256_loadu_si256(&Multiply256LUT[log_m02].Value[1]);
-
-        const LEO_M256 clr_mask = _mm256_set1_epi8(0x0f);
-
-        LEO_M256 * LEO_RESTRICT work0 = reinterpret_cast<LEO_M256 *>(work[0]);
-        LEO_M256 * LEO_RESTRICT work1 = reinterpret_cast<LEO_M256 *>(work[dist]);
-        LEO_M256 * LEO_RESTRICT work2 = reinterpret_cast<LEO_M256 *>(work[dist * 2]);
-        LEO_M256 * LEO_RESTRICT work3 = reinterpret_cast<LEO_M256 *>(work[dist * 3]);
-
-        do
-        {
-            // First layer:
-            LEO_M256 work0_reg = _mm256_loadu_si256(work0);
-            LEO_M256 work1_reg = _mm256_loadu_si256(work1);
-
-            work1_reg = _mm256_xor_si256(work0_reg, work1_reg);
-            if (log_m01 != kModulus)
-                LEO_MULADD_256(work0_reg, work1_reg, t01_lo, t01_hi);
-
-            LEO_M256 work2_reg = _mm256_loadu_si256(work2);
-            LEO_M256 work3_reg = _mm256_loadu_si256(work3);
-
-            work3_reg = _mm256_xor_si256(work2_reg, work3_reg);
-            if (log_m23 != kModulus)
-                LEO_MULADD_256(work2_reg, work3_reg, t23_lo, t23_hi);
-
-            // Second layer:
-            work2_reg = _mm256_xor_si256(work0_reg, work2_reg);
-            work3_reg = _mm256_xor_si256(work1_reg, work3_reg);
-            if (log_m02 != kModulus)
-            {
-                LEO_MULADD_256(work0_reg, work2_reg, t02_lo, t02_hi);
-                LEO_MULADD_256(work1_reg, work3_reg, t02_lo, t02_hi);
-            }
-
-            _mm256_storeu_si256(work0, work0_reg);
-            _mm256_storeu_si256(work1, work1_reg);
-            _mm256_storeu_si256(work2, work2_reg);
-            _mm256_storeu_si256(work3, work3_reg);
-            work0++, work1++, work2++, work3++;
-
-            bytes -= 32;
-        } while (bytes > 0);
-
-        return;
-    }
-
-#endif // LEO_TRY_AVX2
-
-    if (CpuHasSSSE3)
-    {
-        const LEO_M128 t01_lo = _mm_loadu_si128(&Multiply128LUT[log_m01].Value[0]);
-        const LEO_M128 t01_hi = _mm_loadu_si128(&Multiply128LUT[log_m01].Value[1]);
-        const LEO_M128 t23_lo = _mm_loadu_si128(&Multiply128LUT[log_m23].Value[0]);
-        const LEO_M128 t23_hi = _mm_loadu_si128(&Multiply128LUT[log_m23].Value[1]);
-        const LEO_M128 t02_lo = _mm_loadu_si128(&Multiply128LUT[log_m02].Value[0]);
-        const LEO_M128 t02_hi = _mm_loadu_si128(&Multiply128LUT[log_m02].Value[1]);
-
-        const LEO_M128 clr_mask = _mm_set1_epi8(0x0f);
-
-        LEO_M128 * LEO_RESTRICT work0 = reinterpret_cast<LEO_M128 *>(work[0]);
-        LEO_M128 * LEO_RESTRICT work1 = reinterpret_cast<LEO_M128 *>(work[dist]);
-        LEO_M128 * LEO_RESTRICT work2 = reinterpret_cast<LEO_M128 *>(work[dist * 2]);
-        LEO_M128 * LEO_RESTRICT work3 = reinterpret_cast<LEO_M128 *>(work[dist * 3]);
-
-        do
-        {
-            // First layer:
-            LEO_M128 work0_reg = _mm_loadu_si128(work0);
-            LEO_M128 work1_reg = _mm_loadu_si128(work1);
-
-            work1_reg = _mm_xor_si128(work0_reg, work1_reg);
-            if (log_m01 != kModulus)
-                LEO_MULADD_128(work0_reg, work1_reg, t01_lo, t01_hi);
-
-            LEO_M128 work2_reg = _mm_loadu_si128(work2);
-            LEO_M128 work3_reg = _mm_loadu_si128(work3);
-
-            work3_reg = _mm_xor_si128(work2_reg, work3_reg);
-            if (log_m23 != kModulus)
-                LEO_MULADD_128(work2_reg, work3_reg, t23_lo, t23_hi);
-
-            // Second layer:
-            work2_reg = _mm_xor_si128(work0_reg, work2_reg);
-            work3_reg = _mm_xor_si128(work1_reg, work3_reg);
-            if (log_m02 != kModulus)
-            {
-                LEO_MULADD_128(work0_reg, work2_reg, t02_lo, t02_hi);
-                LEO_MULADD_128(work1_reg, work3_reg, t02_lo, t02_hi);
-            }
-
-            _mm_storeu_si128(work0, work0_reg);
-            _mm_storeu_si128(work1, work1_reg);
-            _mm_storeu_si128(work2, work2_reg);
-            _mm_storeu_si128(work3, work3_reg);
-            work0++, work1++, work2++, work3++;
-
-            bytes -= 16;
-        } while (bytes > 0);
-
-        return;
-    }
-
-#endif // LEO_INTERLEAVE_BUTTERFLY4_OPT
-
-    // First layer:
-    if (log_m01 == kModulus)
-        xor_mem(work[dist], work[0], bytes);
-    else
-        IFFT_DIT2(work[0], work[dist], log_m01, bytes);
-
-    if (log_m23 == kModulus)
-        xor_mem(work[dist * 3], work[dist * 2], bytes);
-    else
-        IFFT_DIT2(work[dist * 2], work[dist * 3], log_m23, bytes);
-
-    // Second layer:
-    if (log_m02 == kModulus)
-    {
-        xor_mem(work[dist * 2], work[0], bytes);
-        xor_mem(work[dist * 3], work[dist], bytes);
-    }
-    else
-    {
-        IFFT_DIT2(work[0], work[dist * 2], log_m02, bytes);
-        IFFT_DIT2(work[dist], work[dist * 3], log_m02, bytes);
-    }
-}
-
-
-// {x_out, y_out} ^= IFFT_DIT2( {x_in, y_in} )
-static void IFFT_DIT2_xor(
-    void * LEO_RESTRICT x_in, void * LEO_RESTRICT y_in,
-    void * LEO_RESTRICT x_out, void * LEO_RESTRICT y_out,
-    const ffe_t log_m, uint64_t bytes)
-{
-#if defined(LEO_TRY_AVX2)
-    if (CpuHasAVX2)
-    {
-        const LEO_M256 table_lo_y = _mm256_loadu_si256(&Multiply256LUT[log_m].Value[0]);
-        const LEO_M256 table_hi_y = _mm256_loadu_si256(&Multiply256LUT[log_m].Value[1]);
-
-        const LEO_M256 clr_mask = _mm256_set1_epi8(0x0f);
-
-        const LEO_M256 * LEO_RESTRICT x32_in = reinterpret_cast<const LEO_M256 *>(x_in);
-        const LEO_M256 * LEO_RESTRICT y32_in = reinterpret_cast<const LEO_M256 *>(y_in);
-        LEO_M256 * LEO_RESTRICT x32_out = reinterpret_cast<LEO_M256 *>(x_out);
-        LEO_M256 * LEO_RESTRICT y32_out = reinterpret_cast<LEO_M256 *>(y_out);
-
-        do
-        {
-#define LEO_IFFTB_256_XOR(x_ptr_in, y_ptr_in, x_ptr_out, y_ptr_out) { \
-            LEO_M256 x_data_out = _mm256_loadu_si256(x_ptr_out); \
-            LEO_M256 y_data_out = _mm256_loadu_si256(y_ptr_out); \
-            LEO_M256 x_data_in = _mm256_loadu_si256(x_ptr_in); \
-            LEO_M256 y_data_in = _mm256_loadu_si256(y_ptr_in); \
-            y_data_in = _mm256_xor_si256(y_data_in, x_data_in); \
-            y_data_out = _mm256_xor_si256(y_data_out, y_data_in); \
-            _mm256_storeu_si256(y_ptr_out, y_data_out); \
-            LEO_MULADD_256(x_data_in, y_data_in, table_lo_y, table_hi_y); \
-            x_data_out = _mm256_xor_si256(x_data_out, x_data_in); \
-            _mm256_storeu_si256(x_ptr_out, x_data_out); }
-
-            LEO_IFFTB_256_XOR(x32_in + 1, y32_in + 1, x32_out + 1, y32_out + 1);
-            LEO_IFFTB_256_XOR(x32_in, y32_in, x32_out, y32_out);
-            y32_in += 2, x32_in += 2, y32_out += 2, x32_out += 2;
-
-            bytes -= 64;
-        } while (bytes > 0);
-
-        return;
-    }
-#endif // LEO_TRY_AVX2
-
-    if (CpuHasSSSE3)
-    {
-        const LEO_M128 table_lo_y = _mm_loadu_si128(&Multiply128LUT[log_m].Value[0]);
-        const LEO_M128 table_hi_y = _mm_loadu_si128(&Multiply128LUT[log_m].Value[1]);
-
-        const LEO_M128 clr_mask = _mm_set1_epi8(0x0f);
-
-        const LEO_M128 * LEO_RESTRICT x16_in = reinterpret_cast<const LEO_M128 *>(x_in);
-        const LEO_M128 * LEO_RESTRICT y16_in = reinterpret_cast<const LEO_M128 *>(y_in);
-        LEO_M128 * LEO_RESTRICT x16_out = reinterpret_cast<LEO_M128 *>(x_out);
-        LEO_M128 * LEO_RESTRICT y16_out = reinterpret_cast<LEO_M128 *>(y_out);
-
-        do
-        {
-#define LEO_IFFTB_128_XOR(x_ptr_in, y_ptr_in, x_ptr_out, y_ptr_out) { \
-            LEO_M128 x_data_out = _mm_loadu_si128(x_ptr_out); \
-            LEO_M128 y_data_out = _mm_loadu_si128(y_ptr_out); \
-            LEO_M128 x_data_in = _mm_loadu_si128(x_ptr_in); \
-            LEO_M128 y_data_in = _mm_loadu_si128(y_ptr_in); \
-            y_data_in = _mm_xor_si128(y_data_in, x_data_in); \
-            y_data_out = _mm_xor_si128(y_data_out, y_data_in); \
-            _mm_storeu_si128(y_ptr_out, y_data_out); \
-            LEO_MULADD_128(x_data_in, y_data_in, table_lo_y, table_hi_y); \
-            x_data_out = _mm_xor_si128(x_data_out, x_data_in); \
-            _mm_storeu_si128(x_ptr_out, x_data_out); }
-
-            LEO_IFFTB_128_XOR(x16_in + 3, y16_in + 3, x16_out + 3, y16_out + 3);
-            LEO_IFFTB_128_XOR(x16_in + 2, y16_in + 2, x16_out + 2, y16_out + 2);
-            LEO_IFFTB_128_XOR(x16_in + 1, y16_in + 1, x16_out + 1, y16_out + 1);
-            LEO_IFFTB_128_XOR(x16_in, y16_in, x16_out, y16_out);
-            y16_in += 4, x16_in += 4, y16_out += 4, x16_out += 4;
-
-            bytes -= 64;
-        } while (bytes > 0);
-
-        return;
-    }
-
-    // Reference version:
-    xor_mem(y_in, x_in, bytes);
-    RefMulAdd(x_in, y_in, log_m, bytes);
-    xor_mem(y_out, y_in, bytes);
-    xor_mem(x_out, x_in, bytes);
-}
-
-
-// xor_result ^= IFFT_DIT4(work)
-static void IFFT_DIT4_xor(
-    uint64_t bytes,
-    void** work_in,
-    void** xor_out,
-    unsigned dist,
-    const ffe_t log_m01,
-    const ffe_t log_m23,
-    const ffe_t log_m02)
-{
-#ifdef LEO_INTERLEAVE_BUTTERFLY4_OPT
-
-#if defined(LEO_TRY_AVX2)
-
-    if (CpuHasAVX2)
-    {
-        const LEO_M256 t01_lo = _mm256_loadu_si256(&Multiply256LUT[log_m01].Value[0]);
-        const LEO_M256 t01_hi = _mm256_loadu_si256(&Multiply256LUT[log_m01].Value[1]);
-        const LEO_M256 t23_lo = _mm256_loadu_si256(&Multiply256LUT[log_m23].Value[0]);
-        const LEO_M256 t23_hi = _mm256_loadu_si256(&Multiply256LUT[log_m23].Value[1]);
-        const LEO_M256 t02_lo = _mm256_loadu_si256(&Multiply256LUT[log_m02].Value[0]);
-        const LEO_M256 t02_hi = _mm256_loadu_si256(&Multiply256LUT[log_m02].Value[1]);
-
-        const LEO_M256 clr_mask = _mm256_set1_epi8(0x0f);
-
-        const LEO_M256 * LEO_RESTRICT work0 = reinterpret_cast<const LEO_M256 *>(work_in[0]);
-        const LEO_M256 * LEO_RESTRICT work1 = reinterpret_cast<const LEO_M256 *>(work_in[dist]);
-        const LEO_M256 * LEO_RESTRICT work2 = reinterpret_cast<const LEO_M256 *>(work_in[dist * 2]);
-        const LEO_M256 * LEO_RESTRICT work3 = reinterpret_cast<const LEO_M256 *>(work_in[dist * 3]);
-        LEO_M256 * LEO_RESTRICT xor0 = reinterpret_cast<LEO_M256 *>(xor_out[0]);
-        LEO_M256 * LEO_RESTRICT xor1 = reinterpret_cast<LEO_M256 *>(xor_out[dist]);
-        LEO_M256 * LEO_RESTRICT xor2 = reinterpret_cast<LEO_M256 *>(xor_out[dist * 2]);
-        LEO_M256 * LEO_RESTRICT xor3 = reinterpret_cast<LEO_M256 *>(xor_out[dist * 3]);
-
-        do
-        {
-            // First layer:
-            LEO_M256 work0_reg = _mm256_loadu_si256(work0);
-            LEO_M256 work1_reg = _mm256_loadu_si256(work1);
-            work0++, work1++;
-
-            work1_reg = _mm256_xor_si256(work0_reg, work1_reg);
-            if (log_m01 != kModulus)
-                LEO_MULADD_256(work0_reg, work1_reg, t01_lo, t01_hi);
-
-            LEO_M256 work2_reg = _mm256_loadu_si256(work2);
-            LEO_M256 work3_reg = _mm256_loadu_si256(work3);
-            work2++, work3++;
-
-            work3_reg = _mm256_xor_si256(work2_reg, work3_reg);
-            if (log_m23 != kModulus)
-                LEO_MULADD_256(work2_reg, work3_reg, t23_lo, t23_hi);
-
-            // Second layer:
-            work2_reg = _mm256_xor_si256(work0_reg, work2_reg);
-            work3_reg = _mm256_xor_si256(work1_reg, work3_reg);
-            if (log_m02 != kModulus)
-            {
-                LEO_MULADD_256(work0_reg, work2_reg, t02_lo, t02_hi);
-                LEO_MULADD_256(work1_reg, work3_reg, t02_lo, t02_hi);
-            }
-
-            work0_reg = _mm256_xor_si256(work0_reg, _mm256_loadu_si256(xor0));
-            work1_reg = _mm256_xor_si256(work1_reg, _mm256_loadu_si256(xor1));
-            work2_reg = _mm256_xor_si256(work2_reg, _mm256_loadu_si256(xor2));
-            work3_reg = _mm256_xor_si256(work3_reg, _mm256_loadu_si256(xor3));
-
-            _mm256_storeu_si256(xor0, work0_reg);
-            _mm256_storeu_si256(xor1, work1_reg);
-            _mm256_storeu_si256(xor2, work2_reg);
-            _mm256_storeu_si256(xor3, work3_reg);
-            xor0++, xor1++, xor2++, xor3++;
-
-            bytes -= 32;
-        } while (bytes > 0);
-
-        return;
-    }
-
-#endif // LEO_TRY_AVX2
-
-    if (CpuHasSSSE3)
-    {
-        const LEO_M128 t01_lo = _mm_loadu_si128(&Multiply128LUT[log_m01].Value[0]);
-        const LEO_M128 t01_hi = _mm_loadu_si128(&Multiply128LUT[log_m01].Value[1]);
-        const LEO_M128 t23_lo = _mm_loadu_si128(&Multiply128LUT[log_m23].Value[0]);
-        const LEO_M128 t23_hi = _mm_loadu_si128(&Multiply128LUT[log_m23].Value[1]);
-        const LEO_M128 t02_lo = _mm_loadu_si128(&Multiply128LUT[log_m02].Value[0]);
-        const LEO_M128 t02_hi = _mm_loadu_si128(&Multiply128LUT[log_m02].Value[1]);
-
-        const LEO_M128 clr_mask = _mm_set1_epi8(0x0f);
-
-        const LEO_M128 * LEO_RESTRICT work0 = reinterpret_cast<const LEO_M128 *>(work_in[0]);
-        const LEO_M128 * LEO_RESTRICT work1 = reinterpret_cast<const LEO_M128 *>(work_in[dist]);
-        const LEO_M128 * LEO_RESTRICT work2 = reinterpret_cast<const LEO_M128 *>(work_in[dist * 2]);
-        const LEO_M128 * LEO_RESTRICT work3 = reinterpret_cast<const LEO_M128 *>(work_in[dist * 3]);
-        LEO_M128 * LEO_RESTRICT xor0 = reinterpret_cast<LEO_M128 *>(xor_out[0]);
-        LEO_M128 * LEO_RESTRICT xor1 = reinterpret_cast<LEO_M128 *>(xor_out[dist]);
-        LEO_M128 * LEO_RESTRICT xor2 = reinterpret_cast<LEO_M128 *>(xor_out[dist * 2]);
-        LEO_M128 * LEO_RESTRICT xor3 = reinterpret_cast<LEO_M128 *>(xor_out[dist * 3]);
-
-        do
-        {
-            // First layer:
-            LEO_M128 work0_reg = _mm_loadu_si128(work0);
-            LEO_M128 work1_reg = _mm_loadu_si128(work1);
-            work0++, work1++;
-
-            work1_reg = _mm_xor_si128(work0_reg, work1_reg);
-            if (log_m01 != kModulus)
-                LEO_MULADD_128(work0_reg, work1_reg, t01_lo, t01_hi);
-
-            LEO_M128 work2_reg = _mm_loadu_si128(work2);
-            LEO_M128 work3_reg = _mm_loadu_si128(work3);
-            work2++, work3++;
-
-            work3_reg = _mm_xor_si128(work2_reg, work3_reg);
-            if (log_m23 != kModulus)
-                LEO_MULADD_128(work2_reg, work3_reg, t23_lo, t23_hi);
-
-            // Second layer:
-            work2_reg = _mm_xor_si128(work0_reg, work2_reg);
-            work3_reg = _mm_xor_si128(work1_reg, work3_reg);
-            if (log_m02 != kModulus)
-            {
-                LEO_MULADD_128(work0_reg, work2_reg, t02_lo, t02_hi);
-                LEO_MULADD_128(work1_reg, work3_reg, t02_lo, t02_hi);
-            }
-
-            work0_reg = _mm_xor_si128(work0_reg, _mm_loadu_si128(xor0));
-            work1_reg = _mm_xor_si128(work1_reg, _mm_loadu_si128(xor1));
-            work2_reg = _mm_xor_si128(work2_reg, _mm_loadu_si128(xor2));
-            work3_reg = _mm_xor_si128(work3_reg, _mm_loadu_si128(xor3));
-
-            _mm_storeu_si128(xor0, work0_reg);
-            _mm_storeu_si128(xor1, work1_reg);
-            _mm_storeu_si128(xor2, work2_reg);
-            _mm_storeu_si128(xor3, work3_reg);
-            xor0++, xor1++, xor2++, xor3++;
-
-            bytes -= 16;
-        } while (bytes > 0);
-
-        return;
-    }
-
-#endif // LEO_INTERLEAVE_BUTTERFLY4_OPT
-
-    // First layer:
-    if (log_m01 == kModulus)
-        xor_mem(work_in[dist], work_in[0], bytes);
-    else
-        IFFT_DIT2(work_in[0], work_in[dist], log_m01, bytes);
-
-    if (log_m23 == kModulus)
-        xor_mem(work_in[dist * 3], work_in[dist * 2], bytes);
-    else
-        IFFT_DIT2(work_in[dist * 2], work_in[dist * 3], log_m23, bytes);
-
-    // Second layer:
-    if (log_m02 == kModulus)
-    {
-        xor_mem(work_in[dist * 2], work_in[0], bytes);
-        xor_mem(work_in[dist * 3], work_in[dist], bytes);
-    }
-    else
-    {
-        IFFT_DIT2(work_in[0], work_in[dist * 2], log_m02, bytes);
-        IFFT_DIT2(work_in[dist], work_in[dist * 3], log_m02, bytes);
-    }
-
-    xor_mem(xor_out[0], work_in[0], bytes);
-    xor_mem(xor_out[dist], work_in[dist], bytes);
-    xor_mem(xor_out[dist * 2], work_in[dist * 2], bytes);
-    xor_mem(xor_out[dist * 3], work_in[dist * 3], bytes);
-}
-
-
-// Unrolled IFFT for encoder
-static void IFFT_DIT_Encoder(
-    const uint64_t bytes,
-    const void* const* data,
-    const unsigned m_truncated,
-    void** work,
-    void** xor_result,
-    const unsigned m,
-    const ffe_t* skewLUT)
-{
-    // I tried rolling the memcpy/memset into the first layer of the FFT and
-    // found that it only yields a 4% performance improvement, which is not
-    // worth the extra complexity.
-    for (unsigned i = 0; i < m_truncated; ++i)
-        memcpy(work[i], data[i], bytes);
-    for (unsigned i = m_truncated; i < m; ++i)
-        memset(work[i], 0, bytes);
-
-    // I tried splitting up the first few layers into L3-cache sized blocks but
-    // found that it only provides about 5% performance boost, which is not
-    // worth the extra complexity.
-
-    // Decimation in time: Unroll 2 layers at a time
-    unsigned dist = 1, dist4 = 4;
-    for (; dist4 <= m; dist = dist4, dist4 <<= 2)
-    {
-        // For each set of dist*4 elements:
-        for (unsigned r = 0; r < m_truncated; r += dist4)
-        {
-            const unsigned i_end = r + dist;
-            const ffe_t log_m01 = skewLUT[i_end];
-            const ffe_t log_m02 = skewLUT[i_end + dist];
-            const ffe_t log_m23 = skewLUT[i_end + dist * 2];
-
-            if (dist4 == m && xor_result)
-            {
-                // For each set of dist elements:
-                for (unsigned i = r; i < i_end; ++i)
-                {
-                    IFFT_DIT4_xor(
-                        bytes,
-                        work + i,
-                        xor_result + i,
-                        dist,
-                        log_m01,
-                        log_m23,
-                        log_m02);
-                }
-            }
-            else
-            {
-                // For each set of dist elements:
-                for (unsigned i = r; i < i_end; ++i)
-                {
-                    IFFT_DIT4(
-                        bytes,
-                        work + i,
-                        dist,
-                        log_m01,
-                        log_m23,
-                        log_m02);
-                }
-            }
-        }
-
-        // I tried alternating sweeps left->right and right->left to reduce cache misses.
-        // It provides about 1% performance boost when done for both FFT and IFFT, so it
-        // does not seem to be worth the extra complexity.
-    }
-
-    // If there is one layer left:
-    if (dist < m)
-    {
-        // Assuming that dist = m / 2
-        LEO_DEBUG_ASSERT(dist * 2 == m);
-
-        const ffe_t log_m = skewLUT[dist];
-
-        if (xor_result)
-        {
-            if (log_m == kModulus)
-            {
-                for (unsigned i = 0; i < dist; ++i)
-                    xor_mem_2to1(xor_result[i], work[i], work[i + dist], bytes);
-            }
-            else
-            {
-                for (unsigned i = 0; i < dist; ++i)
-                {
-                    IFFT_DIT2_xor(
-                        work[i],
-                        work[i + dist],
-                        xor_result[i],
-                        xor_result[i + dist],
-                        log_m,
-                        bytes);
-                }
-            }
-        }
-        else
-        {
-            if (log_m == kModulus)
-                VectorXOR(bytes, dist, work + dist, work);
-            else
-            {
-                for (unsigned i = 0; i < dist; ++i)
-                {
-                    IFFT_DIT2(
-                        work[i],
-                        work[i + dist],
-                        log_m,
-                        bytes);
-                }
-            }
-        }
-    }
-}
-
-
-// Basic no-frills version for decoder
-static void IFFT_DIT_Decoder(
-    const uint64_t bytes,
-    const unsigned m_truncated,
-    void** work,
-    const unsigned m,
-    const ffe_t* skewLUT)
-{
-    // Decimation in time: Unroll 2 layers at a time
-    unsigned dist = 1, dist4 = 4;
-    for (; dist4 <= m; dist = dist4, dist4 <<= 2)
-    {
-        // For each set of dist*4 elements:
-        for (unsigned r = 0; r < m_truncated; r += dist4)
-        {
-            const unsigned i_end = r + dist;
-            const ffe_t log_m01 = skewLUT[i_end];
-            const ffe_t log_m02 = skewLUT[i_end + dist];
-            const ffe_t log_m23 = skewLUT[i_end + dist * 2];
-
-            // For each set of dist elements:
-            for (unsigned i = r; i < i_end; ++i)
-            {
-                IFFT_DIT4(
-                    bytes,
-                    work + i,
-                    dist,
-                    log_m01,
-                    log_m23,
-                    log_m02);
-            }
-        }
-    }
-
-    // If there is one layer left:
-    if (dist < m)
-    {
-        // Assuming that dist = m / 2
-        LEO_DEBUG_ASSERT(dist * 2 == m);
-
-        const ffe_t log_m = skewLUT[dist];
-
-        if (log_m == kModulus)
-            VectorXOR(bytes, dist, work + dist, work);
-        else
-        {
-            for (unsigned i = 0; i < dist; ++i)
-            {
-                IFFT_DIT2(
-                    work[i],
-                    work[i + dist],
-                    log_m,
-                    bytes);
-            }
-        }
-    }
-}
-
-/*
-    Decimation in time FFT:
-
-    The decimation in time FFT algorithm allows us to unroll 2 layers at a time,
-    performing calculations on local registers and faster cache memory.
-
-    Each ^___^ below indicates a butterfly between the associated indices.
-
-    The fft_butterfly(x, y) operation:
-
-        if (log_m != kModulus)
-            x[] ^= exp(log(y[]) + log_m)
-        y[] ^= x[]
-
-    Layer 0:
-        0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7
-        ^_______________^
-          ^_______________^
-            ^_______________^
-              ^_______________^
-                ^_______________^
-                  ^_______________^
-                    ^_______________^
-                      ^_______________^
-
-    Layer 1:
-        0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 
-        ^_______^       ^_______^
-          ^_______^       ^_______^
-            ^_______^       ^_______^
-              ^_______^       ^_______^
-  
-    Layer 2:
-        0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7
-        ^___^   ^___^   ^___^   ^___^
-          ^___^   ^___^   ^___^   ^___^
-
-    Layer 3:
-        0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7
-        ^_^ ^_^ ^_^ ^_^ ^_^ ^_^ ^_^ ^_^
-
-    DIT layer 0-1 operations, grouped 4 at a time:
-        {0-0', 4-4', 0-4, 0'-4'},
-        {1-1', 5-5', 1-5, 1'-5'},
-
-    DIT layer 1-2 operations, grouped 4 at a time:
-        {0-4, 2-6, 0-2, 4-6},
-        {1-5, 3-7, 1-3, 5-7},
-
-    DIT layer 2-3 operations, grouped 4 at a time:
-        {0-2, 1-3, 0-1, 2-3},
-        {4-6, 5-7, 4-5, 6-7},
-*/
-
-// 2-way butterfly
-static void FFT_DIT2(
-    void * LEO_RESTRICT x, void * LEO_RESTRICT y,
-    ffe_t log_m, uint64_t bytes)
-{
-#if defined(LEO_TRY_AVX2)
-    if (CpuHasAVX2)
-    {
-        const LEO_M256 table_lo_y = _mm256_loadu_si256(&Multiply256LUT[log_m].Value[0]);
-        const LEO_M256 table_hi_y = _mm256_loadu_si256(&Multiply256LUT[log_m].Value[1]);
-
-        const LEO_M256 clr_mask = _mm256_set1_epi8(0x0f);
-
-        LEO_M256 * LEO_RESTRICT x32 = reinterpret_cast<LEO_M256 *>(x);
-        LEO_M256 * LEO_RESTRICT y32 = reinterpret_cast<LEO_M256 *>(y);
-
-        do
-        {
-#define LEO_FFTB_256(x_ptr, y_ptr) { \
-            LEO_M256 y_data = _mm256_loadu_si256(y_ptr); \
-            LEO_M256 x_data = _mm256_loadu_si256(x_ptr); \
-            LEO_MULADD_256(x_data, y_data, table_lo_y, table_hi_y); \
-            y_data = _mm256_xor_si256(y_data, x_data); \
-            _mm256_storeu_si256(x_ptr, x_data); \
-            _mm256_storeu_si256(y_ptr, y_data); }
-
-            LEO_FFTB_256(x32 + 1, y32 + 1);
-            LEO_FFTB_256(x32, y32);
-            y32 += 2, x32 += 2;
-
-            bytes -= 64;
-        } while (bytes > 0);
-
-        return;
-    }
-#endif // LEO_TRY_AVX2
-
-    if (CpuHasSSSE3)
-    {
-        const LEO_M128 table_lo_y = _mm_loadu_si128(&Multiply128LUT[log_m].Value[0]);
-        const LEO_M128 table_hi_y = _mm_loadu_si128(&Multiply128LUT[log_m].Value[1]);
-
-        const LEO_M128 clr_mask = _mm_set1_epi8(0x0f);
-
-        LEO_M128 * LEO_RESTRICT x16 = reinterpret_cast<LEO_M128 *>(x);
-        LEO_M128 * LEO_RESTRICT y16 = reinterpret_cast<LEO_M128 *>(y);
-
-        do
-        {
-#define LEO_FFTB_128(x_ptr, y_ptr) { \
-            LEO_M128 y_data = _mm_loadu_si128(y_ptr); \
-            LEO_M128 x_data = _mm_loadu_si128(x_ptr); \
-            LEO_MULADD_128(x_data, y_data, table_lo_y, table_hi_y); \
-            y_data = _mm_xor_si128(y_data, x_data); \
-            _mm_storeu_si128(x_ptr, x_data); \
-            _mm_storeu_si128(y_ptr, y_data); }
-
-            LEO_FFTB_128(x16 + 3, y16 + 3);
-            LEO_FFTB_128(x16 + 2, y16 + 2);
-            LEO_FFTB_128(x16 + 1, y16 + 1);
-            LEO_FFTB_128(x16, y16);
-            x16 += 4, y16 += 4;
-
-            bytes -= 64;
-        } while (bytes > 0);
-
-        return;
-    }
-
-    // Reference version:
-    RefMulAdd(x, y, log_m, bytes);
-    xor_mem(y, x, bytes);
-}
-
-
-// 4-way butterfly
-static void FFT_DIT4(
-    uint64_t bytes,
-    void** work,
-    unsigned dist,
-    const ffe_t log_m01,
-    const ffe_t log_m23,
-    const ffe_t log_m02)
-{
-#ifdef LEO_INTERLEAVE_BUTTERFLY4_OPT
-
-#if defined(LEO_TRY_AVX2)
-    if (CpuHasAVX2)
-    {
-        const LEO_M256 t01_lo = _mm256_loadu_si256(&Multiply256LUT[log_m01].Value[0]);
-        const LEO_M256 t01_hi = _mm256_loadu_si256(&Multiply256LUT[log_m01].Value[1]);
-        const LEO_M256 t23_lo = _mm256_loadu_si256(&Multiply256LUT[log_m23].Value[0]);
-        const LEO_M256 t23_hi = _mm256_loadu_si256(&Multiply256LUT[log_m23].Value[1]);
-        const LEO_M256 t02_lo = _mm256_loadu_si256(&Multiply256LUT[log_m02].Value[0]);
-        const LEO_M256 t02_hi = _mm256_loadu_si256(&Multiply256LUT[log_m02].Value[1]);
-
-        const LEO_M256 clr_mask = _mm256_set1_epi8(0x0f);
-
-        LEO_M256 * LEO_RESTRICT work0 = reinterpret_cast<LEO_M256 *>(work[0]);
-        LEO_M256 * LEO_RESTRICT work1 = reinterpret_cast<LEO_M256 *>(work[dist]);
-        LEO_M256 * LEO_RESTRICT work2 = reinterpret_cast<LEO_M256 *>(work[dist * 2]);
-        LEO_M256 * LEO_RESTRICT work3 = reinterpret_cast<LEO_M256 *>(work[dist * 3]);
-
-        do
-        {
-            LEO_M256 work0_reg = _mm256_loadu_si256(work0);
-            LEO_M256 work2_reg = _mm256_loadu_si256(work2);
-            LEO_M256 work1_reg = _mm256_loadu_si256(work1);
-            LEO_M256 work3_reg = _mm256_loadu_si256(work3);
-
-            // First layer:
-            if (log_m02 != kModulus)
-            {
-                LEO_MULADD_256(work0_reg, work2_reg, t02_lo, t02_hi);
-                LEO_MULADD_256(work1_reg, work3_reg, t02_lo, t02_hi);
-            }
-            work2_reg = _mm256_xor_si256(work0_reg, work2_reg);
-            work3_reg = _mm256_xor_si256(work1_reg, work3_reg);
-
-            // Second layer:
-            if (log_m01 != kModulus)
-                LEO_MULADD_256(work0_reg, work1_reg, t01_lo, t01_hi);
-            work1_reg = _mm256_xor_si256(work0_reg, work1_reg);
-
-            _mm256_storeu_si256(work0, work0_reg);
-            _mm256_storeu_si256(work1, work1_reg);
-            work0++, work1++;
-
-            if (log_m23 != kModulus)
-                LEO_MULADD_256(work2_reg, work3_reg, t23_lo, t23_hi);
-            work3_reg = _mm256_xor_si256(work2_reg, work3_reg);
-
-            _mm256_storeu_si256(work2, work2_reg);
-            _mm256_storeu_si256(work3, work3_reg);
-            work2++, work3++;
-
-            bytes -= 32;
-        } while (bytes > 0);
-
-        return;
-    }
-#endif // LEO_TRY_AVX2
-
-    if (CpuHasSSSE3)
-    {
-        const LEO_M128 t01_lo = _mm_loadu_si128(&Multiply128LUT[log_m01].Value[0]);
-        const LEO_M128 t01_hi = _mm_loadu_si128(&Multiply128LUT[log_m01].Value[1]);
-        const LEO_M128 t23_lo = _mm_loadu_si128(&Multiply128LUT[log_m23].Value[0]);
-        const LEO_M128 t23_hi = _mm_loadu_si128(&Multiply128LUT[log_m23].Value[1]);
-        const LEO_M128 t02_lo = _mm_loadu_si128(&Multiply128LUT[log_m02].Value[0]);
-        const LEO_M128 t02_hi = _mm_loadu_si128(&Multiply128LUT[log_m02].Value[1]);
-
-        const LEO_M128 clr_mask = _mm_set1_epi8(0x0f);
-
-        LEO_M128 * LEO_RESTRICT work0 = reinterpret_cast<LEO_M128 *>(work[0]);
-        LEO_M128 * LEO_RESTRICT work1 = reinterpret_cast<LEO_M128 *>(work[dist]);
-        LEO_M128 * LEO_RESTRICT work2 = reinterpret_cast<LEO_M128 *>(work[dist * 2]);
-        LEO_M128 * LEO_RESTRICT work3 = reinterpret_cast<LEO_M128 *>(work[dist * 3]);
-
-        do
-        {
-            LEO_M128 work0_reg = _mm_loadu_si128(work0);
-            LEO_M128 work2_reg = _mm_loadu_si128(work2);
-            LEO_M128 work1_reg = _mm_loadu_si128(work1);
-            LEO_M128 work3_reg = _mm_loadu_si128(work3);
-
-            // First layer:
-            if (log_m02 != kModulus)
-            {
-                LEO_MULADD_128(work0_reg, work2_reg, t02_lo, t02_hi);
-                LEO_MULADD_128(work1_reg, work3_reg, t02_lo, t02_hi);
-            }
-            work2_reg = _mm_xor_si128(work0_reg, work2_reg);
-            work3_reg = _mm_xor_si128(work1_reg, work3_reg);
-
-            // Second layer:
-            if (log_m01 != kModulus)
-                LEO_MULADD_128(work0_reg, work1_reg, t01_lo, t01_hi);
-            work1_reg = _mm_xor_si128(work0_reg, work1_reg);
-
-            _mm_storeu_si128(work0, work0_reg);
-            _mm_storeu_si128(work1, work1_reg);
-            work0++, work1++;
-
-            if (log_m23 != kModulus)
-                LEO_MULADD_128(work2_reg, work3_reg, t23_lo, t23_hi);
-            work3_reg = _mm_xor_si128(work2_reg, work3_reg);
-
-            _mm_storeu_si128(work2, work2_reg);
-            _mm_storeu_si128(work3, work3_reg);
-            work2++, work3++;
-
-            bytes -= 16;
-        } while (bytes > 0);
-
-        return;
-    }
-
-#endif // LEO_INTERLEAVE_BUTTERFLY4_OPT
-
-    // First layer:
-    if (log_m02 == kModulus)
-    {
-        xor_mem(work[dist * 2], work[0], bytes);
-        xor_mem(work[dist * 3], work[dist], bytes);
-    }
-    else
-    {
-        FFT_DIT2(work[0], work[dist * 2], log_m02, bytes);
-        FFT_DIT2(work[dist], work[dist * 3], log_m02, bytes);
-    }
-
-    // Second layer:
-    if (log_m01 == kModulus)
-        xor_mem(work[dist], work[0], bytes);
-    else
-        FFT_DIT2(work[0], work[dist], log_m01, bytes);
-
-    if (log_m23 == kModulus)
-        xor_mem(work[dist * 3], work[dist * 2], bytes);
-    else
-        FFT_DIT2(work[dist * 2], work[dist * 3], log_m23, bytes);
-}
-
-
-// In-place FFT for encoder and decoder
-static void FFT_DIT(
-    const uint64_t bytes,
-    void** work,
-    const unsigned m_truncated,
-    const unsigned m,
-    const ffe_t* skewLUT)
-{
-    // Decimation in time: Unroll 2 layers at a time
-    unsigned dist4 = m, dist = m >> 2;
-    for (; dist != 0; dist4 = dist, dist >>= 2)
-    {
-        // For each set of dist*4 elements:
-        for (unsigned r = 0; r < m_truncated; r += dist4)
-        {
-            const unsigned i_end = r + dist;
-            const ffe_t log_m01 = skewLUT[i_end];
-            const ffe_t log_m02 = skewLUT[i_end + dist];
-            const ffe_t log_m23 = skewLUT[i_end + dist * 2];
-
-            // For each set of dist elements:
-            for (unsigned i = r; i < i_end; ++i)
-            {
-                FFT_DIT4(
-                    bytes,
-                    work + i,
-                    dist,
-                    log_m01,
-                    log_m23,
-                    log_m02);
-            }
-        }
-    }
-
-    // If there is one layer left:
-    if (dist4 == 2)
-    {
-        for (unsigned r = 0; r < m_truncated; r += 2)
-        {
-            const ffe_t log_m = skewLUT[r + 1];
-
-            if (log_m == kModulus)
-                xor_mem(work[r + 1], work[r], bytes);
-            else
-            {
-                FFT_DIT2(
-                    work[r],
-                    work[r + 1],
-                    log_m,
-                    bytes);
-            }
-        }
-    }
-}
-
-
-//------------------------------------------------------------------------------
-// Reed-Solomon Encode
-
-void ReedSolomonEncode(
-    uint64_t buffer_bytes,
-    unsigned original_count,
-    unsigned recovery_count,
-    unsigned m,
-    const void* const* data,
-    void** work)
-{
-    // work <- IFFT(data, m, m)
-
-    const ffe_t* skewLUT = FFTSkew + m - 1;
-
-    IFFT_DIT_Encoder(
-        buffer_bytes,
-        data,
-        original_count < m ? original_count : m,
-        work,
-        nullptr, // No xor output
-        m,
-        skewLUT);
-
-    const unsigned last_count = original_count % m;
-    if (m >= original_count)
-        goto skip_body;
-
-    // For sets of m data pieces:
-    for (unsigned i = m; i + m <= original_count; i += m)
-    {
-        data += m;
-        skewLUT += m;
-
-        // work <- work xor IFFT(data + i, m, m + i)
-
-        IFFT_DIT_Encoder(
-            buffer_bytes,
-            data, // data source
-            m,
-            work + m, // temporary workspace
-            work, // xor destination
-            m,
-            skewLUT);
-    }
-
-    // Handle final partial set of m pieces:
-    if (last_count != 0)
-    {
-        data += m;
-        skewLUT += m;
-
-        // work <- work xor IFFT(data + i, m, m + i)
-
-        IFFT_DIT_Encoder(
-            buffer_bytes,
-            data, // data source
-            last_count,
-            work + m, // temporary workspace
-            work, // xor destination
-            m,
-            skewLUT);
-    }
-
-skip_body:
-
-    // work <- FFT(work, m, 0)
-    FFT_DIT(
-        buffer_bytes,
-        work,
-        recovery_count,
-        m,
-        FFTSkew - 1);
-}
-
-
-//------------------------------------------------------------------------------
-// ErrorBitfield
-
-#ifdef LEO_ERROR_BITFIELD_OPT
-
-// Used in decoding to decide which final FFT operations to perform
-class ErrorBitfield
-{
-    static const unsigned kWords = kOrder / 64;
-    uint64_t Words[7][kWords] = {};
-
-public:
-    LEO_FORCE_INLINE void Set(unsigned i)
-    {
-        Words[0][i / 64] |= (uint64_t)1 << (i % 64);
-    }
-
-    void Prepare();
-
-    LEO_FORCE_INLINE bool IsNeeded(unsigned mip_level, unsigned bit) const
-    {
-        if (mip_level >= 8)
-            return true;
-        return 0 != (Words[mip_level - 1][bit / 64] & ((uint64_t)1 << (bit % 64)));
-    }
-};
-
-static const uint64_t kHiMasks[5] = {
-    0xAAAAAAAAAAAAAAAAULL,
-    0xCCCCCCCCCCCCCCCCULL,
-    0xF0F0F0F0F0F0F0F0ULL,
-    0xFF00FF00FF00FF00ULL,
-    0xFFFF0000FFFF0000ULL,
-};
-
-void ErrorBitfield::Prepare()
-{
-    // First mip level is for final layer of FFT: pairs of data
-    for (unsigned i = 0; i < kWords; ++i)
-    {
-        uint64_t w_i = Words[0][i];
-        const uint64_t hi2lo0 = w_i | ((w_i & kHiMasks[0]) >> 1);
-        const uint64_t lo2hi0 = ((w_i & (kHiMasks[0] >> 1)) << 1);
-        Words[0][i] = w_i = hi2lo0 | lo2hi0;
-
-        for (unsigned j = 1, bits = 2; j < 5; ++j, bits <<= 1)
-        {
-            const uint64_t hi2lo_j = w_i | ((w_i & kHiMasks[j]) >> bits);
-            const uint64_t lo2hi_j = ((w_i & (kHiMasks[j] >> bits)) << bits);
-            Words[j][i] = w_i = hi2lo_j | lo2hi_j;
-        }
-    }
-
-    for (unsigned i = 0; i < kWords; ++i)
-    {
-        uint64_t w = Words[4][i];
-        w |= w >> 32;
-        w |= w << 32;
-        Words[5][i] = w;
-    }
-
-    for (unsigned i = 0; i < kWords; i += 2)
-        Words[6][i] = Words[6][i + 1] = Words[5][i] | Words[5][i + 1];
-}
-
-
-static void FFT_DIT_ErrorBits(
-    const uint64_t bytes,
-    void** work,
-    const unsigned n_truncated,
-    const unsigned n,
-    const ffe_t* skewLUT,
-    const ErrorBitfield& error_bits)
-{
-    unsigned mip_level = LastNonzeroBit32(n);
-
-    // Decimation in time: Unroll 2 layers at a time
-    unsigned dist4 = n, dist = n >> 2;
-    for (; dist != 0; dist4 = dist, dist >>= 2, mip_level -=2)
-    {
-        // For each set of dist*4 elements:
-        for (unsigned r = 0; r < n_truncated; r += dist4)
-        {
-            if (!error_bits.IsNeeded(mip_level, r))
-                continue;
-
-            const ffe_t log_m01 = skewLUT[r + dist];
-            const ffe_t log_m23 = skewLUT[r + dist * 3];
-            const ffe_t log_m02 = skewLUT[r + dist * 2];
-
-            // For each set of dist elements:
-            for (unsigned i = r; i < r + dist; ++i)
-            {
-                FFT_DIT4(
-                    bytes,
-                    work + i,
-                    dist,
-                    log_m01,
-                    log_m23,
-                    log_m02);
-            }
-        }
-    }
-
-    // If there is one layer left:
-    if (dist4 == 2)
-    {
-        for (unsigned r = 0; r < n_truncated; r += 2)
-        {
-            if (!error_bits.IsNeeded(mip_level, r))
-                continue;
-
-            const ffe_t log_m = skewLUT[r + 1];
-
-            if (log_m == kModulus)
-                xor_mem(work[r + 1], work[r], bytes);
-            else
-            {
-                FFT_DIT2(
-                    work[r],
-                    work[r + 1],
-                    log_m,
-                    bytes);
-            }
-        }
-    }
-}
-
-#endif // LEO_ERROR_BITFIELD_OPT
-
-
-//------------------------------------------------------------------------------
-// Reed-Solomon Decode
-
-void ReedSolomonDecode(
-    uint64_t buffer_bytes,
-    unsigned original_count,
-    unsigned recovery_count,
-    unsigned m, // NextPow2(recovery_count)
-    unsigned n, // NextPow2(m + original_count) = work_count
-    const void* const * const original, // original_count entries
-    const void* const * const recovery, // recovery_count entries
-    void** work) // n entries
-{
-    // Fill in error locations
-
-#ifdef LEO_ERROR_BITFIELD_OPT
-    ErrorBitfield error_bits;
-#endif // LEO_ERROR_BITFIELD_OPT
-
-    ffe_t error_locations[kOrder] = {};
-    for (unsigned i = 0; i < recovery_count; ++i)
-        if (!recovery[i])
-            error_locations[i] = 1;
-    for (unsigned i = recovery_count; i < m; ++i)
-        error_locations[i] = 1;
-    for (unsigned i = 0; i < original_count; ++i)
-    {
-        if (!original[i])
-        {
-            error_locations[i + m] = 1;
-#ifdef LEO_ERROR_BITFIELD_OPT
-            error_bits.Set(i + m);
-#endif // LEO_ERROR_BITFIELD_OPT
-        }
-    }
-
-#ifdef LEO_ERROR_BITFIELD_OPT
-    error_bits.Prepare();
-#endif // LEO_ERROR_BITFIELD_OPT
-
-    // Evaluate error locator polynomial
-
-    FWHT(error_locations, kOrder, m + original_count);
-
-    for (unsigned i = 0; i < kOrder; ++i)
-        error_locations[i] = ((unsigned)error_locations[i] * (unsigned)LogWalsh[i]) % kModulus;
-
-    FWHT(error_locations, kOrder, kOrder);
-
-    // work <- recovery data
-
-    for (unsigned i = 0; i < recovery_count; ++i)
-    {
-        if (recovery[i])
-            mul_mem(work[i], recovery[i], error_locations[i], buffer_bytes);
-        else
-            memset(work[i], 0, buffer_bytes);
-    }
-    for (unsigned i = recovery_count; i < m; ++i)
-        memset(work[i], 0, buffer_bytes);
-
-    // work <- original data
-
-    for (unsigned i = 0; i < original_count; ++i)
-    {
-        if (original[i])
-            mul_mem(work[m + i], original[i], error_locations[m + i], buffer_bytes);
-        else
-            memset(work[m + i], 0, buffer_bytes);
-    }
-    for (unsigned i = m + original_count; i < n; ++i)
-        memset(work[i], 0, buffer_bytes);
-
-    // work <- IFFT(work, n, 0)
-
-    IFFT_DIT_Decoder(
-        buffer_bytes,
-        m + original_count,
-        work,
-        n,
-        FFTSkew - 1);
-
-    // work <- FormalDerivative(work, n)
-
-    for (unsigned i = 1; i < n; ++i)
-    {
-        const unsigned width = ((i ^ (i - 1)) + 1) >> 1;
-
-        VectorXOR(
-            buffer_bytes,
-            width,
-            work + i - width,
-            work + i);
-    }
-
-    // work <- FFT(work, n, 0) truncated to m + original_count
-
-    const unsigned output_count = m + original_count;
-
-#ifdef LEO_ERROR_BITFIELD_OPT
-    FFT_DIT_ErrorBits(buffer_bytes, work, output_count, n, FFTSkew - 1, error_bits);
-#else
-    FFT_DIT(buffer_bytes, work, output_count, n, FFTSkew - 1);
-#endif
-
-    // Reveal erasures
-
-    for (unsigned i = 0; i < original_count; ++i)
-        if (!original[i])
-            mul_mem(work[i], work[i + m], kModulus - error_locations[i + m], buffer_bytes);
-}
-
-
-//------------------------------------------------------------------------------
-// API
-
-static bool IsInitialized = false;
-
-bool Initialize()
-{
-    if (IsInitialized)
-        return true;
-
-    InitializeLogarithmTables();
-    InitializeMultiplyTables();
-    FFTInitialize();
-
-    IsInitialized = true;
-    return true;
-}
-
-
-}} // namespace leopard::ff8
-
-#endif // LEO_HAS_FF8
diff --git a/windows/src/leopard/LeopardFF8.h b/windows/src/leopard/LeopardFF8.h
deleted file mode 100644
index fe19ed5..0000000
--- a/windows/src/leopard/LeopardFF8.h
+++ /dev/null
@@ -1,93 +0,0 @@
-/*
-    Copyright (c) 2017 Christopher A. Taylor.  All rights reserved.
-
-    Redistribution and use in source and binary forms, with or without
-    modification, are permitted provided that the following conditions are met:
-
-    * Redistributions of source code must retain the above copyright notice,
-      this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright notice,
-      this list of conditions and the following disclaimer in the documentation
-      and/or other materials provided with the distribution.
-    * Neither the name of Leopard-RS nor the names of its contributors may be
-      used to endorse or promote products derived from this software without
-      specific prior written permission.
-
-    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-    ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-    POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#pragma once
-
-#include "LeopardCommon.h"
-
-#ifdef LEO_HAS_FF8
-
-/*
-    8-bit Finite Field Math
-
-    This finite field contains 256 elements and so each element is one byte.
-    This library is designed for data that is a multiple of 64 bytes in size.
-
-    Algorithms are described in LeopardCommon.h
-*/
-
-namespace leopard { namespace ff8 {
-
-
-//------------------------------------------------------------------------------
-// Datatypes and Constants
-
-// Finite field element type
-typedef uint8_t ffe_t;
-
-// Number of bits per element
-static const unsigned kBits = 8;
-
-// Finite field order: Number of elements in the field
-static const unsigned kOrder = 256;
-
-// Modulus for field operations
-static const ffe_t kModulus = 255;
-
-// LFSR Polynomial that generates the field elements
-static const unsigned kPolynomial = 0x11D;
-
-
-//------------------------------------------------------------------------------
-// API
-
-// Returns false if the self-test fails
-bool Initialize();
-
-void ReedSolomonEncode(
-    uint64_t buffer_bytes,
-    unsigned original_count,
-    unsigned recovery_count,
-    unsigned m, // = NextPow2(recovery_count)
-    const void* const * const data,
-    void** work); // m * 2 elements
-
-void ReedSolomonDecode(
-    uint64_t buffer_bytes,
-    unsigned original_count,
-    unsigned recovery_count,
-    unsigned m, // = NextPow2(recovery_count)
-    unsigned n, // = NextPow2(m + original_count)
-    const void* const * const original, // original_count elements
-    const void* const * const recovery, // recovery_count elements
-    void** work); // n elements
-
-
-}} // namespace leopard::ff8
-
-#endif // LEO_HAS_FF8
diff --git a/windows/src/leopard/leopard.cpp b/windows/src/leopard/leopard.cpp
deleted file mode 100644
index 5d8f2f1..0000000
--- a/windows/src/leopard/leopard.cpp
+++ /dev/null
@@ -1,339 +0,0 @@
-/*
-    Copyright (c) 2017 Christopher A. Taylor.  All rights reserved.
-
-    Redistribution and use in source and binary forms, with or without
-    modification, are permitted provided that the following conditions are met:
-
-    * Redistributions of source code must retain the above copyright notice,
-      this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright notice,
-      this list of conditions and the following disclaimer in the documentation
-      and/or other materials provided with the distribution.
-    * Neither the name of Leopard-RS nor the names of its contributors may be
-      used to endorse or promote products derived from this software without
-      specific prior written permission.
-
-    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-    ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-    POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#include "leopard.h"
-#include "LeopardCommon.h"
-
-#ifdef LEO_HAS_FF8
-    #include "LeopardFF8.h"
-#endif // LEO_HAS_FF8
-#ifdef LEO_HAS_FF16
-    #include "LeopardFF16.h"
-#endif // LEO_HAS_FF16
-
-#include <string.h>
-
-extern "C" {
-
-
-//------------------------------------------------------------------------------
-// Initialization API
-
-static bool m_Initialized = false;
-
-LEO_EXPORT int leo_init_(int version)
-{
-    if (version != LEO_VERSION)
-        return Leopard_InvalidInput;
-
-    leopard::InitializeCPUArch();
-
-#ifdef LEO_HAS_FF8
-    if (!leopard::ff8::Initialize())
-        return Leopard_Platform;
-#endif // LEO_HAS_FF8
-
-#ifdef LEO_HAS_FF16
-    if (!leopard::ff16::Initialize())
-        return Leopard_Platform;
-#endif // LEO_HAS_FF16
-
-
-    m_Initialized = true;
-    return Leopard_Success;
-}
-
-//------------------------------------------------------------------------------
-// Result
-
-LEO_EXPORT const char* leo_result_string(LeopardResult result)
-{
-    switch (result)
-    {
-    case Leopard_Success: return "Operation succeeded";
-    case Leopard_NeedMoreData: return "Not enough recovery data received";
-    case Leopard_TooMuchData: return "Buffer counts are too high";
-    case Leopard_InvalidSize: return "Buffer size must be a multiple of 64 bytes";
-    case Leopard_InvalidCounts: return "Invalid counts provided";
-    case Leopard_InvalidInput: return "A function parameter was invalid";
-    case Leopard_Platform: return "Platform is unsupported";
-    case Leopard_CallInitialize: return "Call leo_init() first";
-    }
-    return "Unknown";
-}
-
-
-//------------------------------------------------------------------------------
-// Encoder API
-
-LEO_EXPORT unsigned leo_encode_work_count(
-    unsigned original_count,
-    unsigned recovery_count)
-{
-    if (original_count == 1)
-        return recovery_count;
-    if (recovery_count == 1)
-        return 1;
-    return leopard::NextPow2(recovery_count) * 2;
-}
-
-// recovery_data = parity of original_data (xor sum)
-static void EncodeM1(
-    uint64_t buffer_bytes,
-    unsigned original_count,
-    const void* const * const original_data,
-    void* recovery_data)
-{
-    memcpy(recovery_data, original_data[0], buffer_bytes);
-
-    leopard::XORSummer summer;
-    summer.Initialize(recovery_data);
-
-    for (unsigned i = 1; i < original_count; ++i)
-        summer.Add(original_data[i], buffer_bytes);
-
-    summer.Finalize(buffer_bytes);
-}
-
-LEO_EXPORT LeopardResult leo_encode(
-    uint64_t buffer_bytes,                    // Number of bytes in each data buffer
-    unsigned original_count,                  // Number of original_data[] buffer pointers
-    unsigned recovery_count,                  // Number of recovery_data[] buffer pointers
-    unsigned work_count,                      // Number of work_data[] buffer pointers, from leo_encode_work_count()
-    const void* const * const original_data,  // Array of pointers to original data buffers
-    void** work_data)                         // Array of work buffers
-{
-    if (buffer_bytes <= 0 || buffer_bytes % 64 != 0)
-        return Leopard_InvalidSize;
-
-    if (recovery_count <= 0)
-        return Leopard_InvalidCounts;
-
-    if (!original_data || !work_data)
-        return Leopard_InvalidInput;
-
-    if (!m_Initialized)
-        return Leopard_CallInitialize;
-
-    // Handle k = 1 case
-    if (original_count == 1)
-    {
-        for (unsigned i = 0; i < recovery_count; ++i)
-            memcpy(work_data[i], original_data[i], buffer_bytes);
-        return Leopard_Success;
-    }
-
-    // Handle m = 1 case
-    if (recovery_count == 1)
-    {
-        EncodeM1(
-            buffer_bytes,
-            original_count,
-            original_data,
-            work_data[0]);
-        return Leopard_Success;
-    }
-
-    const unsigned m = leopard::NextPow2(recovery_count);
-    const unsigned n = leopard::NextPow2(m + original_count);
-
-    if (work_count != m * 2)
-        return Leopard_InvalidCounts;
-
-#ifdef LEO_HAS_FF8
-    if (n <= leopard::ff8::kOrder)
-    {
-        leopard::ff8::ReedSolomonEncode(
-            buffer_bytes,
-            original_count,
-            recovery_count,
-            m,
-            original_data,
-            work_data);
-    }
-    else
-#endif // LEO_HAS_FF8
-#ifdef LEO_HAS_FF16
-    if (n <= leopard::ff16::kOrder)
-    {
-        leopard::ff16::ReedSolomonEncode(
-            buffer_bytes,
-            original_count,
-            recovery_count,
-            m,
-            original_data,
-            work_data);
-    }
-    else
-#endif // LEO_HAS_FF16
-        return Leopard_TooMuchData;
-
-    return Leopard_Success;
-}
-
-
-//------------------------------------------------------------------------------
-// Decoder API
-
-LEO_EXPORT unsigned leo_decode_work_count(
-    unsigned original_count,
-    unsigned recovery_count)
-{
-    if (original_count == 1 || recovery_count == 1)
-        return original_count;
-    const unsigned m = leopard::NextPow2(recovery_count);
-    const unsigned n = leopard::NextPow2(m + original_count);
-    return n;
-}
-
-static void DecodeM1(
-    uint64_t buffer_bytes,
-    unsigned original_count,
-    const void* const * original_data,
-    const void* recovery_data,
-    void* work_data)
-{
-    memcpy(work_data, recovery_data, buffer_bytes);
-
-    leopard::XORSummer summer;
-    summer.Initialize(work_data);
-
-    for (unsigned i = 0; i < original_count; ++i)
-        if (original_data[i])
-            summer.Add(original_data[i], buffer_bytes);
-
-    summer.Finalize(buffer_bytes);
-}
-
-LEO_EXPORT LeopardResult leo_decode(
-    uint64_t buffer_bytes,                    // Number of bytes in each data buffer
-    unsigned original_count,                  // Number of original_data[] buffer pointers
-    unsigned recovery_count,                  // Number of recovery_data[] buffer pointers
-    unsigned work_count,                      // Number of buffer pointers in work_data[]
-    const void* const * const original_data,  // Array of original data buffers
-    const void* const * const recovery_data,  // Array of recovery data buffers
-    void** work_data)                         // Array of work data buffers
-{
-    if (buffer_bytes <= 0 || buffer_bytes % 64 != 0)
-        return Leopard_InvalidSize;
-
-    if (recovery_count <= 0)
-        return Leopard_InvalidCounts;
-
-    if (!original_data || !recovery_data || !work_data)
-        return Leopard_InvalidInput;
-
-    if (!m_Initialized)
-        return Leopard_CallInitialize;
-
-    // Check if not enough recovery data arrived
-    unsigned original_loss_count = 0;
-    unsigned original_loss_i = 0;
-    for (unsigned i = 0; i < original_count; ++i)
-    {
-        if (!original_data[i])
-        {
-            ++original_loss_count;
-            original_loss_i = i;
-        }
-    }
-    unsigned recovery_got_count = 0;
-    unsigned recovery_got_i = 0;
-    for (unsigned i = 0; i < recovery_count; ++i)
-    {
-        if (recovery_data[i])
-        {
-            ++recovery_got_count;
-            recovery_got_i = i;
-        }
-    }
-    if (recovery_got_count < original_loss_count)
-        return Leopard_NeedMoreData;
-
-    // Handle k = 1 case
-    if (original_count == 1)
-    {
-        memcpy(work_data[0], recovery_data[recovery_got_i], buffer_bytes);
-        return Leopard_Success;
-    }
-
-    // Handle m = 1 case
-    if (recovery_count == 1)
-    {
-        DecodeM1(
-            buffer_bytes,
-            original_count,
-            original_data,
-            recovery_data[0],
-            work_data[original_loss_i]);
-        return Leopard_Success;
-    }
-
-    const unsigned m = leopard::NextPow2(recovery_count);
-    const unsigned n = leopard::NextPow2(m + original_count);
-
-    if (work_count != n)
-        return Leopard_InvalidCounts;
-
-#ifdef LEO_HAS_FF8
-    if (n <= leopard::ff8::kOrder)
-    {
-        leopard::ff8::ReedSolomonDecode(
-            buffer_bytes,
-            original_count,
-            recovery_count,
-            m,
-            n,
-            original_data,
-            recovery_data,
-            work_data);
-    }
-    else
-#endif // LEO_HAS_FF8
-#ifdef LEO_HAS_FF16
-    if (n <= leopard::ff16::kOrder)
-    {
-        leopard::ff16::ReedSolomonDecode(
-            buffer_bytes,
-            original_count,
-            recovery_count,
-            m,
-            n,
-            original_data,
-            recovery_data,
-            work_data);
-    }
-    else
-#endif // LEO_HAS_FF16
-        return Leopard_TooMuchData;
-
-    return Leopard_Success;
-}
-
-
-} // extern "C"
diff --git a/windows/src/leopard/leopard.h b/windows/src/leopard/leopard.h
deleted file mode 100644
index 98dd182..0000000
--- a/windows/src/leopard/leopard.h
+++ /dev/null
@@ -1,242 +0,0 @@
-/*
-    Copyright (c) 2017 Christopher A. Taylor.  All rights reserved.
-
-    Redistribution and use in source and binary forms, with or without
-    modification, are permitted provided that the following conditions are met:
-
-    * Redistributions of source code must retain the above copyright notice,
-      this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright notice,
-      this list of conditions and the following disclaimer in the documentation
-      and/or other materials provided with the distribution.
-    * Neither the name of Leopard-RS nor the names of its contributors may be
-      used to endorse or promote products derived from this software without
-      specific prior written permission.
-
-    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-    ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-    POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef CAT_LEOPARD_RS_H
-#define CAT_LEOPARD_RS_H
-
-/*
-    Leopard-RS
-    MDS Reed-Solomon Erasure Correction Codes for Large Data in C
-
-    Algorithms are described in LeopardCommon.h
-
-
-    Inspired by discussion with:
-
-    Sian-Jhen Lin <sjhenglin@gmail.com> : Author of {1} {3}, basis for Leopard
-    Bulat Ziganshin <bulat.ziganshin@gmail.com> : Author of FastECC
-    Yutaka Sawada <tenfon@outlook.jp> : Author of MultiPar
-
-
-    References:
-
-    {1} S.-J. Lin, T. Y. Al-Naffouri, Y. S. Han, and W.-H. Chung,
-    "Novel Polynomial Basis with Fast Fourier Transform
-    and Its Application to Reed-Solomon Erasure Codes"
-    IEEE Trans. on Information Theory, pp. 6284-6299, November, 2016.
-
-    {2} D. G. Cantor, "On arithmetical algorithms over finite fields",
-    Journal of Combinatorial Theory, Series A, vol. 50, no. 2, pp. 285-300, 1989.
-
-    {3} Sian-Jheng Lin, Wei-Ho Chung, "An Efficient (n, k) Information
-    Dispersal Algorithm for High Code Rate System over Fermat Fields,"
-    IEEE Commun. Lett., vol.16, no.12, pp. 2036-2039, Dec. 2012.
-
-    {4} Plank, J. S., Greenan, K. M., Miller, E. L., "Screaming fast Galois Field
-    arithmetic using Intel SIMD instructions."  In: FAST-2013: 11th Usenix
-    Conference on File and Storage Technologies, San Jose, 2013
-*/
-
-// Library version
-#define LEO_VERSION 2
-
-// Tweak if the functions are exported or statically linked
-//#define LEO_DLL /* Defined when building/linking as DLL */
-//#define LEO_BUILDING /* Defined by the library makefile */
-
-#if defined(LEO_BUILDING)
-# if defined(LEO_DLL)
-    #define LEO_EXPORT __declspec(dllexport)
-# else
-    #define LEO_EXPORT
-# endif
-#else
-# if defined(LEO_DLL)
-    #define LEO_EXPORT __declspec(dllimport)
-# else
-    #define LEO_EXPORT extern
-# endif
-#endif
-
-#include <stdint.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-//------------------------------------------------------------------------------
-// Initialization API
-
-/*
-    leo_init()
-
-    Perform static initialization for the library, verifying that the platform
-    is supported.
-
-    Returns 0 on success and other values on failure.
-*/
-
-LEO_EXPORT int leo_init_(int version);
-#define leo_init() leo_init_(LEO_VERSION)
-
-
-//------------------------------------------------------------------------------
-// Shared Constants / Datatypes
-
-// Results
-typedef enum LeopardResultT
-{
-    Leopard_Success           =  0, // Operation succeeded
-
-    Leopard_NeedMoreData      = -1, // Not enough recovery data received
-    Leopard_TooMuchData       = -2, // Buffer counts are too high
-    Leopard_InvalidSize       = -3, // Buffer size must be a multiple of 64 bytes
-    Leopard_InvalidCounts     = -4, // Invalid counts provided
-    Leopard_InvalidInput      = -5, // A function parameter was invalid
-    Leopard_Platform          = -6, // Platform is unsupported
-    Leopard_CallInitialize    = -7, // Call leo_init() first
-} LeopardResult;
-
-// Convert Leopard result to string
-LEO_EXPORT const char* leo_result_string(LeopardResult result);
-
-
-//------------------------------------------------------------------------------
-// Encoder API
-
-/*
-    leo_encode_work_count()
-
-    Calculate the number of work_data buffers to provide to leo_encode().
-
-    The sum of original_count + recovery_count must not exceed 65536.
-
-    Returns the work_count value to pass into leo_encode().
-    Returns 0 on invalid input.
-*/
-LEO_EXPORT unsigned leo_encode_work_count(
-    unsigned original_count,
-    unsigned recovery_count);
-
-/*
-    leo_encode()
-
-    Generate recovery data.
-
-    original_count: Number of original_data[] buffers provided.
-    recovery_count: Number of desired recovery data buffers.
-    buffer_bytes:   Number of bytes in each data buffer.
-    original_data:  Array of pointers to original data buffers.
-    work_count:     Number of work_data[] buffers, from leo_encode_work_count().
-    work_data:      Array of pointers to work data buffers.
-
-    The sum of original_count + recovery_count must not exceed 65536.
-    The recovery_count <= original_count.
-
-    The buffer_bytes must be a multiple of 64.
-    Each buffer should have the same number of bytes.
-    Even the last piece must be rounded up to the block size.
-
-    Let buffer_bytes = The number of bytes in each buffer:
-
-        original_count = static_cast<unsigned>(
-            ((uint64_t)total_bytes + buffer_bytes - 1) / buffer_bytes);
-
-    Or if the number of pieces is known:
-
-        buffer_bytes = static_cast<unsigned>(
-            ((uint64_t)total_bytes + original_count - 1) / original_count);
-
-    Returns Leopard_Success on success.
-    * The first set of recovery_count buffers in work_data will be the result.
-    Returns other values on errors.
-*/
-LEO_EXPORT LeopardResult leo_encode(
-    uint64_t buffer_bytes,                    // Number of bytes in each data buffer
-    unsigned original_count,                  // Number of original_data[] buffer pointers
-    unsigned recovery_count,                  // Number of recovery_data[] buffer pointers
-    unsigned work_count,                      // Number of work_data[] buffer pointers, from leo_encode_work_count()
-    const void* const * const original_data,  // Array of pointers to original data buffers
-    void** work_data);                        // Array of work buffers
-
-
-//------------------------------------------------------------------------------
-// Decoder API
-
-/*
-    leo_decode_work_count()
-
-    Calculate the number of work_data buffers to provide to leo_decode().
-
-    The sum of original_count + recovery_count must not exceed 65536.
-
-    Returns the work_count value to pass into leo_encode().
-    Returns 0 on invalid input.
-*/
-LEO_EXPORT unsigned leo_decode_work_count(
-    unsigned original_count,
-    unsigned recovery_count);
-
-/*
-    leo_decode()
-
-    Decode original data from recovery data.
-
-    buffer_bytes:   Number of bytes in each data buffer.
-    original_count: Number of original_data[] buffers provided.
-    original_data:  Array of pointers to original data buffers.
-    recovery_count: Number of recovery_data[] buffers provided.
-    recovery_data:  Array of pointers to recovery data buffers.
-    work_count:     Number of work_data[] buffers, from leo_decode_work_count().
-    work_data:      Array of pointers to work data buffers.
-
-    Lost original/recovery data should be set to NULL.
-
-    The sum of recovery_count + the number of non-NULL original data must be at
-    least original_count in order to perform recovery.
-
-    Returns Leopard_Success on success.
-    Returns other values on errors.
-*/
-LEO_EXPORT LeopardResult leo_decode(
-    uint64_t buffer_bytes,                    // Number of bytes in each data buffer
-    unsigned original_count,                  // Number of original_data[] buffer pointers
-    unsigned recovery_count,                  // Number of recovery_data[] buffer pointers
-    unsigned work_count,                      // Number of buffer pointers in work_data[]
-    const void* const * const original_data,  // Array of original data buffers
-    const void* const * const recovery_data,  // Array of recovery data buffers
-    void** work_data);                        // Array of work data buffers
-
-
-#ifdef __cplusplus
-}
-#endif
-
-
-#endif // CAT_LEOPARD_RS_H
diff --git a/windows/src/libpar3.c b/windows/src/libpar3.c
deleted file mode 100644
index ceeb620..0000000
--- a/windows/src/libpar3.c
+++ /dev/null
@@ -1,1244 +0,0 @@
-/* Redefinition of _FILE_OFFSET_BITS must happen BEFORE including stdio.h */
-#ifdef __linux__
-#define _FILE_OFFSET_BITS 64
-#define _stat64 stat
-#elif _WIN32
-// avoid error of MSVC
-#define _CRT_SECURE_NO_WARNINGS
-#endif
-
-#include <errno.h>
-#include <inttypes.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <math.h>
-
-#ifdef __linux__
-
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <strings.h>
-#define _strnicmp strncasecmp
-#define _stricmp strcasecmp
-
-#elif _WIN32
-
-// MSVC headers
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <direct.h>
-#include <io.h>
-#endif
-
-#include "libpar3.h"
-#include "common.h"
-
-
-#ifdef __linux__
-#elif _WIN32
-
-// recursive search into sub-directories
-static int path_search_recursive(PAR3_CTX *par3_ctx, char *sub_dir)
-{
-	char new_dir[_MAX_PATH * 2];
-	int ret;
-	size_t dir_len;
-
-	// MSVC
-	struct _finddatai64_t c_file;
-	intptr_t handle;
-
-	//printf("recursive search \"%s\"\n", sub_dir);
-	dir_len = strlen(sub_dir);
-	memcpy(new_dir, sub_dir, dir_len);
-	new_dir[dir_len] = '/';
-	dir_len++;
-	new_dir[dir_len] = 0;
-
-	handle = _findfirst64("*", &c_file);
-	if (handle != (intptr_t) -1){
-		do {
-			// ignore "." or ".."
-			if ( (strcmp(c_file.name, ".") == 0) || (strcmp(c_file.name, "..") == 0) )
-				continue;
-			// ignore hidden or system files
-			if ( ((c_file.attrib & _A_HIDDEN) != 0) || ((c_file.attrib & _A_SYSTEM) != 0) )
-				continue;
-
-			// add relative path to the found filename
-			strcpy(new_dir + dir_len, c_file.name);
-			//printf("found = \"%s\"\n", new_dir);
-			if (strlen(new_dir) >= _MAX_PATH){
-				printf("Found file path is too long \"%s\"\n", new_dir);
-				_findclose(handle);
-				return RET_FILE_IO_ERROR;
-			}
-
-			if ((c_file.attrib & _A_SUBDIR) == 0){	// when the name is a file
-
-				// check name in list, and ignore if exist
-				if (namez_search(par3_ctx->input_file_name, par3_ctx->input_file_name_len, new_dir) != NULL)
-					continue;
-
-				// add found filename with relative path
-				if ( namez_add(&(par3_ctx->input_file_name), &(par3_ctx->input_file_name_len), &(par3_ctx->input_file_name_max), new_dir) != 0){
-					_findclose(handle);
-					return RET_MEMORY_ERROR;
-				}
-
-			} else {	// recursive search is enabled
-
-				// check name in list, and ignore if exist
-				if (namez_search(par3_ctx->input_dir_name, par3_ctx->input_dir_name_len, new_dir) != NULL)
-					continue;
-
-				// add found filename with relative path
-				if ( namez_add(&(par3_ctx->input_dir_name), &(par3_ctx->input_dir_name_len), &(par3_ctx->input_dir_name_max), new_dir) != 0){
-					_findclose(handle);
-					return RET_MEMORY_ERROR;
-				}
-
-				// goto inner directory
-				if (_chdir(c_file.name) != 0){
-					perror("Failed to go sub directory");
-					return RET_FILE_IO_ERROR;
-				}
-
-				// try to search inner directory
-				ret = path_search_recursive(par3_ctx, new_dir);
-				if (ret != 0){
-					_findclose(handle);
-					return ret;
-				}
-
-				// return to parent (this) directory
-				if (_chdir("..") != 0){
-					perror("Failed to return parent directory");
-					return RET_FILE_IO_ERROR;
-				}
-			}
-
-		} while( _findnext64( handle, &c_file ) == 0 );
-
-		_findclose(handle);
-	}
-
-	return 0;
-}
-
-// match_path may be relative path from current working directory
-int path_search(PAR3_CTX *par3_ctx, char *match_path, int flag_recursive)
-{
-	char *tmp_p, *match_name;
-	char cur_dir[_MAX_PATH], new_dir[_MAX_PATH * 2];
-	int ret;
-	size_t dir_len, len, base_len;
-
-	// MSVC
-	struct _finddatai64_t c_file;
-	intptr_t handle;
-
-	// when match_path includes directory, change to the directory at first
-	tmp_p = strrchr(match_path, '/');
-	if (tmp_p != NULL){
-		match_name = tmp_p + 1;
-
-		// store current working directory, and will resume later
-		tmp_p = _getcwd(cur_dir, _MAX_PATH);
-		if (tmp_p == NULL){
-			perror("Failed to get current working directory");
-			return RET_FILE_IO_ERROR;
-		}
-		//printf("cur_dir = \"%s\"\n", cur_dir);
-
-		// directory may be an absolute path belong to current working directory
-		ret = get_absolute_path(new_dir, cur_dir, _MAX_PATH);
-		if (ret != 0){
-			printf("Failed to convert current working directory to absolute path\n");
-			return RET_FILE_IO_ERROR;
-		}
-		//printf("absolute = \"%s\"\n", new_dir);
-		dir_len = strlen(new_dir);
-		if (_strnicmp(new_dir, match_path, dir_len) == 0){
-			len = (size_t)(match_name - 1 - match_path - dir_len);
-			//printf("dir_len = %zd, len = %zd\n", dir_len, len);
-			if (len <= 1){
-				len = 0;
-			} else {	// copy sub-directries
-				len--;
-				memcpy(new_dir + 2, match_path + dir_len + 1, len);
-			}
-		} else {
-			// directory may be a relative path from base-path
-			len = (size_t)(match_name - 1 - match_path);
-			memcpy(new_dir + 2, match_path, len);
-		}
-		new_dir[0] = '.';
-		new_dir[1] = '/';
-		new_dir[2 + len] = 0;
-		//printf("new_dir = \"%s\"\n", new_dir);
-
-		// check the sub-directory was stored already
-		if (len > 0){
-			ret = path_search(par3_ctx, new_dir + 2, 0);
-			if (ret != 0){
-				printf("Failed to test sub-directories\n");
-				return RET_FILE_IO_ERROR;
-			}
-		}
-
-		// move to the sub directory
-		if (_chdir(new_dir) != 0){
-			perror("Failed to change working directory");
-			return RET_FILE_IO_ERROR;
-		}
-
-		// get the new working directory
-		tmp_p = _getcwd(new_dir, _MAX_PATH);
-		if (tmp_p == NULL){
-			perror("Failed to get new working directory");
-			return RET_FILE_IO_ERROR;
-		}
-		//printf("new_dir = \"%s\"\n", new_dir);
-
-		// check the directory is a child
-		base_len = strlen(cur_dir);
-		if (memcmp(cur_dir, new_dir, base_len) != 0){	// relative path is out side
-			// return to original working directory
-			if (_chdir(cur_dir) != 0){
-				perror("Failed to resume working directory");
-				return RET_FILE_IO_ERROR;
-			}
-			printf("Ignoring out of base-path input file: %s\n", match_path);
-			return RET_FILE_IO_ERROR;
-		}
-		base_len++;	// add the last "/"
-
-		// replace directory mark from Windows OS style "\" to UNIX style "/"
-		tmp_p = strchr(new_dir + base_len, '\\');
-		while (tmp_p != NULL){
-			tmp_p[0] = '/';
-			tmp_p = strchr(tmp_p, '\\');
-		}
-		strcat(new_dir, "/");
-		//printf("dir path = \"%s\"\n", new_dir);
-
-		// check case for sensitive system
-		tmp_p = strchr(new_dir + base_len, '/');
-		while (tmp_p != NULL){
-			tmp_p[0] = 0;
-			//printf("path component = \"%s\"\n", new_dir);
-			handle = _findfirst64(new_dir, &c_file);
-			if (handle != (intptr_t) -1){
-				// If case is different, use the original case.
-				//printf("found component = \"%s\"\n", c_file.name);
-				len = strlen(c_file.name);
-				if (strcmp(tmp_p - len, c_file.name) != 0)
-					strcpy(tmp_p - len, c_file.name);
-				_findclose(handle);
-			}
-			tmp_p[0] = '/';
-			tmp_p = strchr(tmp_p + 1, '/');
-		}
-
-		// get the relative path
-		dir_len = strlen(new_dir) - base_len;
-		memmove(new_dir, new_dir + base_len, dir_len + 1);	// copy path, which inlcudes the last null string
-		//printf("relative path = \"%s\"\n", new_dir);
-		//printf("finding name  = \"%s\"\n", match_name);
-
-	} else {
-		match_name = match_path;
-		dir_len = 0;
-	}
-
-	handle = _findfirst64(match_name, &c_file);
-	if (handle != (intptr_t) -1){
-		do {
-			// ignore "." or ".."
-			if ( (strcmp(c_file.name, ".") == 0) || (strcmp(c_file.name, "..") == 0) )
-				continue;
-			// ignore hidden or system files
-			if ( ((c_file.attrib & _A_HIDDEN) != 0) || ((c_file.attrib & _A_SYSTEM) != 0) )
-				continue;
-
-			// found filename may different case from the specified name
-			// add relative path to the found filename
-			strcpy(new_dir + dir_len, c_file.name);
-			//printf("found = \"%s\"\n", new_dir);
-			if (strlen(new_dir) >= _MAX_PATH){
-				printf("Found file path is too long \"%s\"\n", new_dir);
-				_findclose(handle);
-				return RET_FILE_IO_ERROR;
-			}
-
-			if ((c_file.attrib & _A_SUBDIR) == 0){	// When the name is file
-
-				// check name in list, and ignore if exist
-				if (namez_search(par3_ctx->input_file_name, par3_ctx->input_file_name_len, new_dir) != NULL)
-					continue;
-
-				// add found filename with relative path
-				if ( namez_add(&(par3_ctx->input_file_name), &(par3_ctx->input_file_name_len), &(par3_ctx->input_file_name_max), new_dir) != 0){
-					_findclose(handle);
-					return RET_MEMORY_ERROR;
-				}
-
-			} else if (flag_recursive == 'R'){	// When the name is a directory and recursive search is enabled
-
-				// check name in list, and ignore if exist
-				if (namez_search(par3_ctx->input_dir_name, par3_ctx->input_dir_name_len, new_dir) != NULL)
-					continue;
-
-				// add found filename with relative path
-				if ( namez_add(&(par3_ctx->input_dir_name), &(par3_ctx->input_dir_name_len), &(par3_ctx->input_dir_name_max), new_dir) != 0){
-					_findclose(handle);
-					return RET_MEMORY_ERROR;
-				}
-
-				// goto inner directory
-				if (_chdir(c_file.name) != 0){
-					perror("Failed to go sub directory");
-					return RET_FILE_IO_ERROR;
-				}
-
-				// try to search inner directory
-				ret = path_search_recursive(par3_ctx, new_dir);
-				if (ret != 0){
-					_findclose(handle);
-					return ret;
-				}
-
-				// return to parent (this) directory
-				if (_chdir("..") != 0){
-					perror("Failed to return parent directory");
-					return RET_FILE_IO_ERROR;
-				}
-
-			} else {	// When the name is just a directory
-
-				// check name in list, and ignore if exist
-				if (namez_search(par3_ctx->input_dir_name, par3_ctx->input_dir_name_len, new_dir) != NULL)
-					continue;
-
-				// add found filename with relative path
-				if ( namez_add(&(par3_ctx->input_dir_name), &(par3_ctx->input_dir_name_len), &(par3_ctx->input_dir_name_max), new_dir) != 0){
-					_findclose(handle);
-					return RET_MEMORY_ERROR;
-				}
-
-			}
-
-		} while( _findnext64( handle, &c_file ) == 0 );
-
-		_findclose(handle);
-	}
-
-	// resume original working directory
-	if (match_name != match_path){
-		if (_chdir(cur_dir) != 0){
-			perror("Failed to resume working directory");
-			return RET_FILE_IO_ERROR;
-		}
-	}
-
-	return 0;
-}
-
-// Searching extra files are file only.
-// match_path may be relative path from current working directory
-int extra_search(PAR3_CTX *par3_ctx, char *match_path)
-{
-	char *tmp_p, *match_name;
-	char cur_dir[_MAX_PATH], new_dir[_MAX_PATH * 2];
-	size_t dir_len, len, base_len;
-
-	// MSVC
-	struct _finddatai64_t c_file;
-	intptr_t handle;
-
-	// when match_path includes directory, change to the directory at first
-	tmp_p = strrchr(match_path, '/');
-	if (tmp_p != NULL){
-		match_name = tmp_p + 1;
-
-		// directory may be a relative path from base-path
-		len = (size_t)(tmp_p - match_path);
-		memcpy(new_dir + 2, match_path, len);
-		new_dir[0] = '.';
-		new_dir[1] = '/';
-		new_dir[2 + len] = 0;
-		//printf("new_dir = \"%s\"\n", new_dir);
-
-		// store current working directory, and will resume later
-		tmp_p = _getcwd(cur_dir, _MAX_PATH);
-		if (tmp_p == NULL){
-			perror("Failed to get current working directory");
-			return RET_FILE_IO_ERROR;
-		}
-
-		// move to the sub directory
-		if (_chdir(new_dir) != 0){
-			perror("Failed to change working directory");
-			return RET_FILE_IO_ERROR;
-		}
-
-		// get the new working directory
-		tmp_p = _getcwd(new_dir, _MAX_PATH);
-		if (tmp_p == NULL){
-			perror("Failed to get new working directory");
-			return RET_FILE_IO_ERROR;
-		}
-		//printf("new_dir = \"%s\"\n", new_dir);
-
-		// check the directory is a child
-		base_len = strlen(cur_dir);
-		if (memcmp(cur_dir, new_dir, base_len) != 0){	// relative path is out side
-			// return to original working directory
-			if (_chdir(cur_dir) != 0){
-				perror("Failed to resume working directory");
-				return 6;
-			}
-			printf("Ignoring out of base-path extra file: %s\n", match_path);
-			return RET_FILE_IO_ERROR;
-		}
-		base_len++;	// add the last "/"
-
-		// replace directory mark from Windows OS style "\" to UNIX style "/"
-		tmp_p = strchr(new_dir, '\\');
-		while (tmp_p != NULL){
-			tmp_p[0] = '/';
-			tmp_p = strchr(tmp_p, '\\');
-		}
-		strcat(new_dir, "/");
-		//printf("dir path = \"%s\"\n", new_dir);
-
-		// check case for sensitive system
-		tmp_p = strchr(new_dir + base_len, '/');
-		while (tmp_p != NULL){
-			tmp_p[0] = 0;
-			//printf("path component = \"%s\"\n", new_dir);
-			handle = _findfirst64(new_dir, &c_file);
-			if (handle != (intptr_t) -1){
-				// If case is different, use the original case.
-				//printf("found component = \"%s\"\n", c_file.name);
-				len = strlen(c_file.name);
-				if (strcmp(tmp_p - len, c_file.name) != 0)
-					strcpy(tmp_p - len, c_file.name);
-				_findclose(handle);
-			}
-			tmp_p[0] = '/';
-			tmp_p = strchr(tmp_p + 1, '/');
-		}
-
-		// Extra files use absolute path, too.
-		if (par3_ctx->absolute_path != 0){
-			dir_len = strlen(new_dir);
-
-			// Options -abs and -ABS are different.
-			if (par3_ctx->absolute_path != 'A'){
-				// Remove drive letter
-				if ( (new_dir[1] == ':') && (new_dir[0] >= 'A') && (new_dir[0] <= 'Z') ){
-					dir_len -= 2;
-					memmove(new_dir, new_dir + 2, dir_len + 1);
-				}
-			}
-			//printf("asolute path = \"%s\"\n", new_dir);
-
-		} else {
-			// get the relative path
-			dir_len = strlen(new_dir) - base_len;
-			memmove(new_dir, new_dir + base_len, dir_len + 1);	// copy path, which inlcudes the last null string
-			//printf("relative path = \"%s\"\n", new_dir);
-		}
-
-	} else {
-		match_name = match_path;
-		dir_len = 0;
-
-		// Extra files use absolute path, too.
-		if (par3_ctx->absolute_path != 0){
-			// Use current directory as base path.
-			tmp_p = _getcwd(new_dir, _MAX_PATH);
-			if (tmp_p == NULL){
-				perror("Failed to get current working directory");
-				return RET_FILE_IO_ERROR;
-			}
-
-			// replace directory mark from Windows OS style "\" to UNIX style "/"
-			tmp_p = strchr(new_dir, '\\');
-			while (tmp_p != NULL){
-				tmp_p[0] = '/';
-				tmp_p = strchr(tmp_p, '\\');
-			}
-			strcat(new_dir, "/");
-			dir_len = strlen(new_dir);
-
-			// Options -abs and -ABS are different.
-			if (par3_ctx->absolute_path != 'A'){
-				// Remove drive letter
-				if ( (new_dir[1] == ':') && (new_dir[0] >= 'A') && (new_dir[0] <= 'Z') ){
-					dir_len -= 2;
-					memmove(new_dir, new_dir + 2, dir_len + 1);
-				}
-			}
-			//printf("asolute path = \"%s\"\n", new_dir);
-		}
-	}
-
-	//printf("extra file search \"%s\"\n", match_name);
-	handle = _findfirst64(match_name, &c_file);
-	if (handle != (intptr_t) -1){
-		do {
-			// ignore "." or ".."
-			if ( (strcmp(c_file.name, ".") == 0) || (strcmp(c_file.name, "..") == 0) )
-				continue;
-			// ignore hidden or system files
-			if ( ((c_file.attrib & _A_HIDDEN) != 0) || ((c_file.attrib & _A_SYSTEM) != 0) || ((c_file.attrib & _A_SUBDIR) != 0) )
-				continue;
-
-			// found filename may different case from the specified name
-			// add relative path to the found filename
-			strcpy(new_dir + dir_len, c_file.name);
-			//printf("found = \"%s\"\n", new_dir);
-			if (strlen(new_dir) >= _MAX_PATH){
-				printf("Found file path is too long \"%s\"\n", new_dir);
-				_findclose(handle);
-				return RET_FILE_IO_ERROR;
-			}
-
-			// check name in list, and ignore if exist
-			if (namez_search(par3_ctx->extra_file_name, par3_ctx->extra_file_name_len, new_dir) != NULL)
-				continue;
-
-			// add found filename with relative path
-			if ( namez_add(&(par3_ctx->extra_file_name), &(par3_ctx->extra_file_name_len), &(par3_ctx->extra_file_name_max), new_dir) != 0){
-				_findclose(handle);
-				return RET_MEMORY_ERROR;
-			}
-
-		} while( _findnext64( handle, &c_file ) == 0 );
-
-		_findclose(handle);
-	}
-
-	// resume original working directory
-	if (match_name != match_path){
-		if (_chdir(cur_dir) != 0){
-			perror("Failed to resume working directory");
-			return RET_FILE_IO_ERROR;
-		}
-	}
-
-	return 0;
-}
-
-#endif
-
-
-// get information of input files
-int get_file_status(PAR3_CTX *par3_ctx)
-{
-	char *list_name;
-	int ret;
-	uint32_t num;
-	size_t len;
-	uint64_t file_size;
-	struct _stat64 stat_buf;
-	PAR3_FILE_CTX *file_p;
-
-	// Decrease memory for file and directory names.
-	if (par3_ctx->input_file_name_len < par3_ctx->input_file_name_max){
-		//printf("input_file_name_len = %zu, input_file_name_max = %zu\n", par3_ctx->input_file_name_len, par3_ctx->input_file_name_max);
-		list_name = realloc(par3_ctx->input_file_name, par3_ctx->input_file_name_len);
-		if (list_name == NULL){
-			perror("Failed to allocate memory for file name");
-			return RET_MEMORY_ERROR;
-		}
-		par3_ctx->input_file_name = list_name;
-		par3_ctx->input_file_name_max = par3_ctx->input_file_name_len;
-	}
-	if (par3_ctx->input_dir_name_len < par3_ctx->input_dir_name_max){
-		//printf("input_dir_name_len = %zu, input_dir_name_max = %zu\n", par3_ctx->input_dir_name_len, par3_ctx->input_dir_name_max);
-		list_name = realloc(par3_ctx->input_dir_name, par3_ctx->input_dir_name_len);
-		if (list_name == NULL){
-			perror("Failed to allocate memory for directory name");
-			return RET_MEMORY_ERROR;
-		}
-		par3_ctx->input_dir_name = list_name;
-		par3_ctx->input_dir_name_max = par3_ctx->input_dir_name_len;
-	}
-
-	// Allocate directory list at here.
-	num = par3_ctx->input_dir_count;
-	if (num > 0){
-		PAR3_DIR_CTX *dir_p;
-
-		dir_p = malloc(sizeof(PAR3_DIR_CTX) * num);
-		if (dir_p == NULL){
-			perror("Failed to allocate memory for input directory");
-			return RET_MEMORY_ERROR;
-		}
-		par3_ctx->input_dir_list = dir_p;
-
-		list_name = par3_ctx->input_dir_name;
-		while (num > 0){
-			dir_p->name = list_name;	// pointer to the directory name
-
-			len = strlen(list_name);
-			list_name += len + 1;
-
-			dir_p++;
-			num--;
-		}
-	}
-
-	num = par3_ctx->input_file_count;
-	if (num == 0)
-		return 0;
-
-	file_p = malloc(sizeof(PAR3_FILE_CTX) * num);
-	if (file_p == NULL){
-		perror("Failed to allocate memory for input file");
-		return RET_MEMORY_ERROR;
-	}
-	par3_ctx->input_file_list = file_p;
-
-	list_name = par3_ctx->input_file_name;
-	par3_ctx->total_file_size = 0;
-	par3_ctx->max_file_size = 0;
-	while (num > 0){
-		ret = _stat64(list_name, &stat_buf);
-		if (ret != 0){
-			printf("Failed to get status information of \"%s\"\n", list_name);
-			return RET_FILE_IO_ERROR;
-		}
-		file_size = stat_buf.st_size;
-		//printf("st_mode = %04x \"%s\"\n", stat_buf.st_mode, list_name);
-
-		file_p->name = list_name;	// pointer to the file name
-		file_p->size = file_size;	// 64-bit unsigned integer
-		file_p->crc = 0;
-		file_p->state = 0;
-
-		par3_ctx->total_file_size += file_size;
-		if (par3_ctx->max_file_size < file_size)
-			par3_ctx->max_file_size = file_size;
-
-		len = strlen(list_name);
-		list_name += len + 1;
-		file_p++;
-		num--;
-	}
-
-	if (par3_ctx->noise_level >= 0){
-		printf("Total file size = %"PRIu64"\n", par3_ctx->total_file_size);
-		printf("Max file size = %"PRIu64"\n", par3_ctx->max_file_size);
-	}
-
-	return 0;
-}
-
-// suggest a block size for given input files
-// block count = block size * 1%
-uint64_t suggest_block_size(PAR3_CTX *par3_ctx)
-{
-	uint64_t block_size, block_count;
-	long double f;
-
-	// If every input files are smaller than 40 bytes, block size will be 40.
-	if (par3_ctx->max_file_size <= 40)
-		return 40;
-
-	// Let block count to be 1% of block size.
-	// total file size = block size * block count
-	// total file size = block size * block size * 1%
-	// total file size * 100 = block size * block size
-	// block size = root(total file size) * 10
-	f = (long double)(par3_ctx->total_file_size);
-	f = sqrtl(f) * 10;
-	block_size = (uint64_t)f;
-
-	// Block size should not become larger than the max input file.
-	if (block_size > par3_ctx->max_file_size)
-		block_size = par3_ctx->max_file_size;
-
-	// Block size should be larger than 8 bytes.
-	if (block_size < 8)
-		block_size = 8;
-
-	// Block size is good to be power of 2.
-	block_count = 8;
-	while (block_count * 2 <= block_size)
-		block_count *= 2;
-	//printf("block_size = %"PRIu64", power of 2 = %"PRIu64"\n", block_size, block_count);
-	block_size = block_count;
-
-	// test possible number of blocks
-	block_count = calculate_block_count(par3_ctx, block_size);
-	//printf("1st block_count = %"PRIu64"\n", block_count);
-	if (block_count > 128){	// If range is 16-bit Reed Solomon Codes, more block count is possible.
-		if (block_count <= 1000){	// When there are too few blocks
-			block_size /= 2;
-			if (block_size < 40)
-				block_size = 40;	// The miminum block size will be 40 bytes.
-		}
-	}
-	while (block_count > 32768){	// When there are too many blocks
-		block_size *= 2;
-		block_count = calculate_block_count(par3_ctx, block_size);
-	}
-	//printf("2nd block_count = %"PRIu64"\n", block_count);
-
-	// If total number of input blocks is equal or less than 128,
-	// PAR3 uses 8-bit Reed-Solomon Codes.
-	// Or else, PAR3 uses 16-bit Reed-Solomon Codes for 129 or more input blocks.
-	if ((block_count > 128) && (block_size & 1)){
-		// Block size must be multiple of 2 for 16-bit Reed-Solomon Codes.
-		block_size += 1;
-	}
-
-	return block_size;
-}
-
-// try to calculate number of blocks for given input file and block size
-uint64_t calculate_block_count(PAR3_CTX *par3_ctx, uint64_t block_size)
-{
-	uint32_t num;
-	uint64_t file_size, block_count, full_count, tail_size;
-	PAR3_FILE_CTX *file_p;
-
-	num = par3_ctx->input_file_count;
-	if (num <= 0)
-		return 0;
-
-	file_p = par3_ctx->input_file_list;
-
-	block_count = 0;
-	while (num > 0){
-		file_size = file_p->size;
-		if (file_size > 0){
-			// how many full size block in a file
-			full_count = file_size / block_size;
-			block_count += full_count;
-
-			// if tail chunk size is equal or larger than 40 bytes, it will make block
-			tail_size = file_size - (block_size * full_count);
-			if (tail_size >= 40)
-				block_count++;
-		}
-
-		file_p++;
-		num--;
-	}
-
-	return block_count;
-}
-
-// Comparison functions
- // Sort files by tail size for tail packing.
-static int compare_tail_size( const void *arg1, const void *arg2 )
-{
-	PAR3_FILE_CTX *file1_p, *file2_p;
-
-	file1_p = ( PAR3_FILE_CTX * ) arg1;
-	file2_p = ( PAR3_FILE_CTX * ) arg2;
-
-	// Move long tail size to the former.
-	if (file1_p->chk[0] < file2_p->chk[0])
-		return 1;
-	if (file1_p->chk[0] > file2_p->chk[0])
-		return -1;
-
-	// Move long file size to the former.
-	if (file1_p->size < file2_p->size)
-		return 1;
-	if (file1_p->size > file2_p->size)
-		return -1;
-
-	return strcmp( file1_p->name, file2_p->name );
-}
-
-// Move directory with children to the former.
-// Move file or directory without children to the latter.
-static int compare_directory( const void *arg1, const void *arg2 )
-{
-	PAR3_DIR_CTX *dir1_p, *dir2_p;
-	char *str1, *str2, *dir1, *dir2;
-	int ret;
-
-	dir1_p = ( PAR3_DIR_CTX * ) arg1;
-	dir2_p = ( PAR3_DIR_CTX * ) arg2;
-	str1 = dir1_p->name;
-	str2 = dir2_p->name;
-
-	while(1) {
-		// check directory
-		dir1 = strchr(str1, '/');
-		dir2 = strchr(str2, '/');
-
-		if (dir1 == NULL){
-			if (dir2 == NULL){	// when both names don't have directory
-				// just compare as name
-				return strcmp(str1, str2);
-			} else {
-				// when 2nd name has directory and 1st name has not
-				dir2[0] = 0;
-				ret = strcmp(str1, str2);
-				dir2[0] = '/';
-				if (ret != 0)
-					return ret;
-				return 1;
-			}
-		} else {
-			if (dir2 == NULL){
-				// when 1st name has directory and 2nd name has not
-				dir1[0] = 0;
-				ret = strcmp(str1, str2);
-				dir1[0] = '/';
-				if (ret != 0)
-					return ret;
-				return -1;
-			} else {	// when both names have directory
-				// compare sub-directory
-				dir1[0] = 0;
-				dir2[0] = 0;
-				ret = strcmp(str1, str2);
-				dir1[0] = '/';
-				dir2[0] = '/';
-				if (ret != 0)
-					return ret;
-
-				// goto child directory
-				str1 = dir1 + 1;
-				str2 = dir2 + 1;
-			}
-		}
-	}
-
-	return ret;
-}
-
-// sort input files for efficient tail packing.
-int sort_input_set(PAR3_CTX *par3_ctx)
-{
-	uint32_t num;
-
-	num = par3_ctx->input_file_count;
-	if (num > 0){
-		PAR3_FILE_CTX *file_p;
-		uint64_t block_size, file_size, tail_size;
-
-		// set tail size of each file temporary
-		block_size = par3_ctx->block_size;
-		file_p = par3_ctx->input_file_list;
-		while (num > 0){
-			file_size = file_p->size;
-			if (file_size > 0){
-				tail_size = file_size % block_size;
-				file_p->chk[0] = tail_size;
-			} else {
-				file_p->chk[0] = 0;
-			}
-			//printf("file size = %"PRIu64", tail size = %"PRIu64"\n", file_size, tail_size);
-
-			file_p++;
-			num--;
-		}
-
-		num = par3_ctx->input_file_count;
-		file_p = par3_ctx->input_file_list;
-
-		if (num > 1){
-			// quick sort
-			qsort( (void *)file_p, (size_t)num, sizeof(PAR3_FILE_CTX), compare_tail_size );
-		}
-
-		if (par3_ctx->noise_level >= 0){
-			while (num > 0){
-				printf("input file = \"%s\" %"PRIu64" / %"PRIu64"\n", file_p->name, file_p->chk[0], file_p->size);
-
-				file_p++;
-				num--;
-			}
-			printf("\n");
-		}
-	}
-
-	num = par3_ctx->input_dir_count;
-	if (num > 0){
-		PAR3_DIR_CTX *dir_p;
-
-		dir_p = par3_ctx->input_dir_list;
-
-		if (num > 1){
-			// quick sort
-			qsort( (void *)dir_p, (size_t)num, sizeof(PAR3_DIR_CTX), compare_directory );
-		}
-
-		if (par3_ctx->noise_level >= 0){
-			while (num > 0){
-				printf("input dir  = \"%s\"\n", dir_p->name);
-
-				dir_p++;
-				num--;
-			}
-			printf("\n");
-		}
-	}
-
-	return 0;
-}
-
-
-#ifdef __linux__
-#elif _WIN32
-
-// search other par files from base filename
-int par_search(PAR3_CTX *par3_ctx, char *base_name, int flag_other)
-{
-	char find_path[_MAX_PATH], *list_name;
-	int file_count;
-	size_t dir_len, len, off, list_len;
-	uint64_t max_file_size;
-
-	// MSVC
-	struct _finddatai64_t c_file;
-	intptr_t handle;
-
-	file_count = 0;
-	max_file_size = 0;
-	strcpy(find_path, base_name);
-
-	// get length of directory part
-	dir_len = offset_file_name(find_path) - find_path;
-	//printf("dir_len = %zu\n", dir_len);
-
-	handle = _findfirst64(find_path, &c_file);
-	if (handle != (intptr_t) -1){
-		strcpy(find_path + dir_len, c_file.name);
-		//printf("found = \"%s\", size = %"PRId64"\n", find_path, c_file.size);
-		if (max_file_size < (uint64_t)c_file.size)
-			max_file_size = c_file.size;
-		file_count++;
-
-		// add found filename with absolute path
-		if ( namez_add(&(par3_ctx->par_file_name), &(par3_ctx->par_file_name_len), &(par3_ctx->par_file_name_max), find_path) != 0){
-			_findclose(handle);
-			return RET_MEMORY_ERROR;
-		}
-
-		_findclose(handle);
-	}
-
-	if (flag_other != 0){	// search other files
-		// "something.*.par3" cannot find "something.par3".
-		// "something*.par3" may find "something_different.par3".
-		// So, I use "something.*par3" for matching.
-
-		// something.par3 -> something.*par3
-		len = strlen(find_path);
-		// remove file extension
-		if (_stricmp(find_path + len - 5, ".par3") == 0){
-			find_path[len - 5] = 0;
-			len -= 5;
-		}
-		// remove ".vol#+#" or ".part#+#"
-		while (len > 0){
-			if (find_path[len] == '.'){
-				if ( (_strnicmp(find_path + len, ".vol", 4) == 0) || (_strnicmp(find_path + len, ".part", 5) == 0) )
-					find_path[len] = 0;
-				break;
-			}
-			len--;
-		}
-		// add matching words
-		strcat(find_path + len, ".*par3");
-		//printf("find path = \"%s\"\n", find_path);
-
-		handle = _findfirst64(find_path, &c_file);
-		if (handle != (intptr_t) -1){
-			do {
-				// ignore hidden or system files or directory
-				if ( ((c_file.attrib & _A_HIDDEN) != 0) || ((c_file.attrib & _A_SYSTEM) != 0) || ((c_file.attrib & _A_SUBDIR) != 0) )
-					continue;
-
-				// add absolute path to the found filename
-				if (dir_len + strlen(c_file.name) >= _MAX_PATH){
-					printf("Found file path is too long \"%s\"\n", c_file.name);
-					_findclose(handle);
-					return RET_FILE_IO_ERROR;
-				}
-				strcpy(find_path + dir_len, c_file.name);
-
-				// check name in list, and ignore if exist
-				if (namez_search(par3_ctx->par_file_name, par3_ctx->par_file_name_len, find_path) != NULL)
-					continue;
-
-				//printf("found = \"%s\", size = %"PRId64"\n", find_path, c_file.size);
-				if (max_file_size < (uint64_t)c_file.size)
-					max_file_size = c_file.size;
-				file_count++;
-
-				// add found filename with absolute path
-				if ( namez_add(&(par3_ctx->par_file_name), &(par3_ctx->par_file_name_len), &(par3_ctx->par_file_name_max), find_path) != 0){
-					_findclose(handle);
-					return RET_MEMORY_ERROR;
-				}
-
-			} while( _findnext64( handle, &c_file ) == 0 );
-
-			_findclose(handle);
-		}
-
-		// bring par files from extra files
-		if (par3_ctx->extra_file_name_len > 0){
-			list_name = par3_ctx->extra_file_name;
-			list_len = par3_ctx->extra_file_name_len;
-			off = 0;
-			while (off < list_len){
-				//printf("extra file = \"%s\"\n", list_name + off);
-				len = strlen(list_name + off);
-				if (_stricmp(list_name + off + len - 5, ".par3") == 0){	// move this file to list of par files
-					// Filename may be relative path.
-					if (par3_ctx->base_path[0] != 0){
-						int ret = get_absolute_path(find_path, list_name + off, _MAX_PATH - 8);
-						//printf("file = \"%s\"\n", find_path);
-						if (ret != 0){
-							off += len + 1;
-							continue;
-						}
-					} else {
-						strcpy(find_path, list_name + off);
-					}
-
-					// check name in list, and ignore if exist
-					if (namez_search(par3_ctx->par_file_name, par3_ctx->par_file_name_len, find_path) == NULL){
-						struct _stat64 stat_buf;
-						if (_stat64(find_path, &stat_buf) == 0){
-							//printf("found = \"%s\", size = %"PRId64"\n", find_path, stat_buf.st_size);
-							if (max_file_size < (uint64_t)stat_buf.st_size)
-								max_file_size = stat_buf.st_size;
-							file_count++;
-						}
-
-						// add found filename
-						if ( namez_add(&(par3_ctx->par_file_name), &(par3_ctx->par_file_name_len), &(par3_ctx->par_file_name_max), find_path) != 0){
-							return RET_MEMORY_ERROR;
-						}
-					} else if (par3_ctx->noise_level >= 2){
-						printf("extra file = \"%s\" is listed already.\n", list_name + off);
-					}
-
-					// remove from list of extra files
-					len += 1;	// add the last null string
-					memmove(list_name + off, list_name + off + len, list_len - off - len);
-					list_len -= len;
-
-				} else {	// goto next filename
-					off += len + 1;
-				}
-			}
-			par3_ctx->extra_file_name_len = list_len;
-
-/*
-			// debug output to see extra files after remove
-			off = 0;
-			while (off < list_len){
-				printf("after file = \"%s\"\n", list_name + off);
-				len = strlen(list_name + off);
-				off += len + 1;
-			}
-*/
-			if (list_len == 0){	// When all extra files were par files
-				free(par3_ctx->extra_file_name);
-				par3_ctx->extra_file_name = NULL;
-				par3_ctx->extra_file_name_max = 0;
-			}
-		}
-	}
-
-	// Decrease memory for par files.
-	if (par3_ctx->par_file_name_len < par3_ctx->par_file_name_max){
-		//printf("par_file_name_len = %zu, par_file_name_max = %zu\n", par3_ctx->par_file_name_len, par3_ctx->par_file_name_max);
-		list_name = realloc(par3_ctx->par_file_name, par3_ctx->par_file_name_len);
-		if (list_name == NULL){
-			perror("Failed to allocate memory for file name");
-			return RET_MEMORY_ERROR;
-		}
-		par3_ctx->par_file_name = list_name;
-		par3_ctx->par_file_name_max = par3_ctx->par_file_name_len;
-	}
-
-	// If no file found, error exit.
-	if (par3_ctx->par_file_name_len == 0){
-		printf("PAR file is not found\n");
-		return RET_FILE_IO_ERROR;
-	}
-
-	par3_ctx->max_file_size = max_file_size;
-	if (par3_ctx->noise_level >= 1){
-		printf("Number of PAR file = %d\n", file_count);
-		printf("Max par file size = %"PRIu64"\n", par3_ctx->max_file_size);
-	}
-
-	return 0;
-}
-#endif
-
-
-// This function releases all allocated memory.
-void par3_release(PAR3_CTX *par3_ctx)
-{
-	if (par3_ctx->input_file_name){
-		free(par3_ctx->input_file_name);
-		par3_ctx->input_file_name = NULL;
-		par3_ctx->input_file_name_len = 0;
-		par3_ctx->input_file_name_max = 0;
-	}
-	if (par3_ctx->input_file_list){
-		free(par3_ctx->input_file_list);
-		par3_ctx->input_file_list = NULL;
-		par3_ctx->input_file_count = 0;
-	}
-	if (par3_ctx->input_dir_name){
-		free(par3_ctx->input_dir_name);
-		par3_ctx->input_dir_name = NULL;
-		par3_ctx->input_dir_name_len = 0;
-		par3_ctx->input_dir_name_max = 0;
-	}
-	if (par3_ctx->input_dir_list){
-		free(par3_ctx->input_dir_list);
-		par3_ctx->input_dir_list = NULL;
-		par3_ctx->input_dir_count = 0;
-	}
-	if (par3_ctx->par_file_name){
-		free(par3_ctx->par_file_name);
-		par3_ctx->par_file_name = NULL;
-		par3_ctx->par_file_name_len = 0;
-		par3_ctx->par_file_name_max = 0;
-	}
-
-	if (par3_ctx->chunk_list){
-		free(par3_ctx->chunk_list);
-		par3_ctx->chunk_list = NULL;
-		par3_ctx->chunk_count = 0;
-	}
-	if (par3_ctx->slice_list){
-		free(par3_ctx->slice_list);
-		par3_ctx->slice_list = NULL;
-		par3_ctx->slice_count = 0;
-	}
-	if (par3_ctx->block_list){
-		free(par3_ctx->block_list);
-		par3_ctx->block_list = NULL;
-	}
-
-	if (par3_ctx->block_data){
-		free(par3_ctx->block_data);
-		par3_ctx->block_data = NULL;
-	}
-	if (par3_ctx->work_buf){
-		free(par3_ctx->work_buf);
-		par3_ctx->work_buf = NULL;
-	}
-	if (par3_ctx->crc_list){
-		free(par3_ctx->crc_list);
-		par3_ctx->crc_list = NULL;
-	}
-
-	if (par3_ctx->creator_packet){
-		free(par3_ctx->creator_packet);
-		par3_ctx->creator_packet = NULL;
-		par3_ctx->creator_packet_size = 0;
-		par3_ctx->creator_packet_count = 0;
-	}
-	if (par3_ctx->comment_packet){
-		free(par3_ctx->comment_packet);
-		par3_ctx->comment_packet = NULL;
-		par3_ctx->comment_packet_size = 0;
-		par3_ctx->comment_packet_count = 0;
-	}
-	if (par3_ctx->start_packet){
-		free(par3_ctx->start_packet);
-		par3_ctx->start_packet = NULL;
-		par3_ctx->start_packet_size = 0;
-		par3_ctx->start_packet_count = 0;
-	}
-	if (par3_ctx->matrix_packet){
-		free(par3_ctx->matrix_packet);
-		par3_ctx->matrix_packet = NULL;
-		par3_ctx->matrix_packet_size = 0;
-		par3_ctx->matrix_packet_count = 0;
-	}
-	if (par3_ctx->file_packet){
-		free(par3_ctx->file_packet);
-		par3_ctx->file_packet = NULL;
-		par3_ctx->file_packet_size = 0;
-		par3_ctx->file_packet_count = 0;
-	}
-	if (par3_ctx->dir_packet){
-		free(par3_ctx->dir_packet);
-		par3_ctx->dir_packet = NULL;
-		par3_ctx->dir_packet_size = 0;
-		par3_ctx->dir_packet_count = 0;
-	}
-	if (par3_ctx->root_packet){
-		free(par3_ctx->root_packet);
-		par3_ctx->root_packet = NULL;
-		par3_ctx->root_packet_size = 0;
-		par3_ctx->root_packet_count = 0;
-	}
-	if (par3_ctx->ext_data_packet){
-		free(par3_ctx->ext_data_packet);
-		par3_ctx->ext_data_packet = NULL;
-		par3_ctx->ext_data_packet_size = 0;
-		par3_ctx->ext_data_packet_count = 0;
-	}
-	if (par3_ctx->file_system_packet){
-		free(par3_ctx->file_system_packet);
-		par3_ctx->file_system_packet = NULL;
-		par3_ctx->file_system_packet_size = 0;
-		par3_ctx->file_system_packet_count = 0;
-	}
-	if (par3_ctx->common_packet){
-		free(par3_ctx->common_packet);
-		par3_ctx->common_packet = NULL;
-		par3_ctx->common_packet_size = 0;
-		par3_ctx->common_packet_count = 0;
-	}
-
-	if (par3_ctx->position_list){
-		free(par3_ctx->position_list);
-		par3_ctx->position_list = NULL;
-	}
-	if (par3_ctx->data_packet_list){
-		free(par3_ctx->data_packet_list);
-		par3_ctx->data_packet_list = NULL;
-		par3_ctx->data_packet_count = 0;
-	}
-	if (par3_ctx->recv_packet_list){
-		free(par3_ctx->recv_packet_list);
-		par3_ctx->recv_packet_list = NULL;
-		par3_ctx->recv_packet_count = 0;
-	}
-
-	if (par3_ctx->galois_table){
-		free(par3_ctx->galois_table);
-		par3_ctx->galois_table = NULL;
-	}
-	if (par3_ctx->recv_id_list){
-		free(par3_ctx->recv_id_list);
-		par3_ctx->recv_id_list = NULL;
-	}
-	if (par3_ctx->matrix){
-		free(par3_ctx->matrix);
-		par3_ctx->matrix = NULL;
-	}
-	if (par3_ctx->lost_list){
-		free(par3_ctx->lost_list);
-		par3_ctx->lost_list = NULL;
-	}
-}
-
diff --git a/windows/src/libpar3.h b/windows/src/libpar3.h
deleted file mode 100644
index 516a1d2..0000000
--- a/windows/src/libpar3.h
+++ /dev/null
@@ -1,301 +0,0 @@
-#ifndef __LIBPAR3_H__
-#define __LIBPAR3_H__
-
-
-#if __linux__
-
-#include <linux/limits.h>
-#define _MAX_PATH PATH_MAX
-
-#elif _WIN32
-
-// no windows-specific includes here
-
-#endif
-
-// Return value of par3cmdline (same as par2cmdline)
-#define RET_SUCCESS             0
-
-#define RET_REPAIR_POSSIBLE     1	// Data files are damaged and there is
-									// enough recovery data available to repair them.
-
-#define RET_REPAIR_NOT_POSSIBLE 2	// Data files are damaged and there is insufficient
-									// recovery data available to be able to repair them.
-
-#define RET_INVALID_COMMAND     3	// There was something wrong with the command line arguments
-
-#define RET_INSUFFICIENT_DATA   4	// The PAR3 files did not contain sufficient information
-									// about the data files to be able to verify them.
-
-#define RET_REPAIR_FAILED       5	// Repair completed but the data files still appear to be damaged.
-
-#define RET_FILE_IO_ERROR       6	// An error occurred when accessing files
-#define RET_LOGIC_ERROR         7	// In internal error occurred
-#define RET_MEMORY_ERROR        8	// Out of memory
-
-
-typedef struct {
-	uint64_t size;		// total size of chunk
-	uint64_t block;		// index of first input block holding chunk
-
-	uint64_t tail_crc;		// CRC-64 of first 40 bytes of tail
-	uint8_t tail_hash[16];	// hash of all bytes of tail
-	uint64_t tail_block;	// index of block holding tail
-	uint64_t tail_offset;	// offset of tail inside block
-} PAR3_CHUNK_CTX;
-
-typedef struct {
-	char *name;			// file name
-	uint64_t size;		// file size
-	uint64_t crc;		// CRC-64 of the first 16 KB
-	uint8_t hash[16];	// BLAKE3 hash of the protected data
-
-	uint32_t chunk;		// index of the first chunk
-	uint32_t chunk_num;	// number of chunk descriptions
-	uint64_t slice;		// index of the first slice
-
-	uint64_t chk[2];	// checksum of File Packet
-	int64_t offset;		// offset bytes of this File Packet
-
-	uint32_t state;		// Result of verification (bit flag)
-						// 1 = missing, 2 = damaged
-						// 4 = misnamed, higher bit is (extra_id << 3).
-						// 0x0100 = repaired, 0x0200 = repairable
-						// 0x8000 = not file
-						// 0x10000 = different timestamp
-						// 0x20000 = different permissions
-						// 0x80000000 = Unprotected Chunk Description
-} PAR3_FILE_CTX;
-
-typedef struct {
-	char *name;			// directory name
-	uint64_t chk[2];	// checksum of Directory Packet
-	int64_t offset;		// offset bytes of this Directory Packet
-} PAR3_DIR_CTX;
-
-typedef struct {
-	uint32_t chunk;		// index of belong chunk description
-	uint32_t file;		// index of belong input file
-	int64_t offset;		// offset bytes of slice in belong input file
-	uint64_t size;		// size of slice
-
-	uint64_t block;			// index of input block holding this slice
-	uint64_t tail_offset;	// offset bytes of the tail slice in belong block
-	int64_t next;			// index of next slice in a same block
-
-							// Result of verification
-	char *find_name;		// filename of belong found file
-	int64_t find_offset;	// offset bytes of found slice
-} PAR3_SLICE_CTX;
-
-typedef struct {
-	int64_t slice;		// index of the first slice (in multiple slices)
-	uint64_t size;		// data size in the block
-
-	uint64_t crc;		// CRC-64-ISO
-	uint8_t hash[16];	// BLAKE3 hash
-
-	uint32_t state;	// bit flag: 1 = including full size data, 2 = including tail data
-					// 64 = calculated CRC-64 of used area
-					// Result of verification
-					// 4 = found full data, 8 = found tail data, 16 = found all tails
-					// 64 = found checksum on External Data Packet
-} PAR3_BLOCK_CTX;
-
-typedef struct {
-	uint64_t index;	// index of block
-	uint64_t crc;	// CRC-64 of block
-} PAR3_CMP_CTX;
-
-typedef struct {
-	uint64_t id;		// InputSetID
-	uint8_t root[16];	// checksum from Root packet
-	uint8_t matrix[16];	// checksum from Matrix packet
-
-	uint64_t index;		// index of block
-	char *name;			// name of belong file
-	int64_t offset;		// offset bytes of packet
-} PAR3_PKT_CTX;
-
-typedef struct {
-	uint64_t crc;		// CRC-64 of packet
-	char *name;			// name of belong file
-	int64_t offset;		// offset bytes of packet
-} PAR3_POS_CTX;
-
-typedef struct {
-	// Command-line options
-	int noise_level;
-	int64_t recovery_file_scheme;	// -1= Uniform, -2= Limit, 1~= Limit size
-	char deduplication;
-	char data_packet;
-	char absolute_path;
-	uint32_t file_system;	// Bit flag to store/recover in File System Specific Packets
-							// UNIX Permissions Packet: 1 = mtime, 2 = i_mode
-							// FAT Permissions Packet: 0x10000 = LastWriteTimestamp
-	uint32_t search_limit;	// how long time to slide search (milli second)
-	uint64_t memory_limit;	// how much memory to use (byte)
-	int repetition_limit;	// max repetition of packets in each file
-
-	// For CRC-64 as rolling hash
-	uint64_t window_table[256];		// slide window search for block size
-	uint64_t window_mask;
-	uint64_t window_table40[256];	// slide window search for the first 40-bytes of chunk tails
-	uint64_t window_mask40;
-
-	uint8_t *work_buf;		// Working buffer for temporary usage
-	PAR3_CMP_CTX *crc_list;	// List of CRC-64 for slide window search
-	uint64_t crc_count;		// Number of CRC-64 in the list
-	PAR3_CMP_CTX *tail_list;
-	uint64_t tail_count;
-
-	uint8_t set_id[8];	// InputSetID
-	uint8_t attribute;	// attributes in Root Packet
-	uint8_t gf_size;	// The size of the Galois field in bytes
-
-	int galois_poly;		// The generator polynomial of the Galois field
-	void *galois_table;		// Pointer of tables for (finite) galois field arithmetic
-	uint32_t ecc_method;	// Bit flag: 1 = Reed-Solomon Erasure Codes with Cauchy Matrix
-							//           2 = Erasure Codes with Sparse Random Matrix (no support yet)
-							//           4 = LDPC (no support yet)
-							//           8 = FFT based Reed-Solomon Codes
-							//      0x8000 = Keep all recovery blocks or lost blocks on memory
-
-	uint32_t interleave;	// Number of interleaving (Number of cohorts = this value + 1)
-	uint32_t *lost_list;	// List for lost blocks and recovery blocks for every cohorts
-
-	int *recv_id_list;		// List for index of using recovery blocks
-	void *matrix;
-
-	uint64_t block_size;
-	uint64_t block_count;		// This may be max or possible value at creating.
-	PAR3_BLOCK_CTX *block_list;	// List of block information
-	uint8_t *block_data;
-
-	uint64_t first_recovery_block;
-	uint64_t max_recovery_block;
-	uint64_t recovery_block_count;
-	uint32_t recovery_file_count;
-	uint32_t redundancy_size;	// Lower 8-bit (0~250) is percent, or 251=KB, 252=MB, 253=GB.
-	uint32_t max_redundancy_size;
-
-	char base_path[_MAX_PATH];
-	char par_filename[_MAX_PATH];
-
-	uint64_t total_file_size;
-	uint64_t max_file_size;
-
-	uint32_t input_file_count;
-	PAR3_FILE_CTX *input_file_list;	// List of file information
-	char *input_file_name;			// List of file names
-	size_t input_file_name_len;		// current used size
-	size_t input_file_name_max;		// allocated size on memory
-
-	uint32_t input_dir_count;
-	PAR3_DIR_CTX *input_dir_list;	// List of directory information
-	char *input_dir_name;			// List of directory names
-	size_t input_dir_name_len;		// current used size
-	size_t input_dir_name_max;		// allocated size on memory
-
-	char *par_file_name;			// List of PAR3 file names
-	size_t par_file_name_len;		// current used size
-	size_t par_file_name_max;		// allocated size on memory
-
-//	uint32_t extra_file_count;
-	char *extra_file_name;			// List of extra file names
-	size_t extra_file_name_len;		// current used size
-	size_t extra_file_name_max;		// allocated size on memory
-
-	uint32_t chunk_count;
-	PAR3_CHUNK_CTX *chunk_list;		// List of chunk description
-	uint64_t slice_count;
-	PAR3_SLICE_CTX *slice_list;		// List of input file slice
-
-	uint8_t *creator_packet;		// pointer to Creator Packet
-	size_t creator_packet_size;		// size of Creator Packet
-	uint32_t creator_packet_count;
-	uint8_t *comment_packet;		// pointer to Comment Packet
-	size_t comment_packet_size;		// size of Comment Packet
-	uint32_t comment_packet_count;
-
-	uint8_t *start_packet;			// pointer to Start Packet
-	size_t start_packet_size;		// size of Start Packet
-	uint32_t start_packet_count;
-	uint8_t *matrix_packet;			// pointer to Matrix Packets
-	size_t matrix_packet_size;		// total size of Matrix Packets
-	uint32_t matrix_packet_count;
-	size_t matrix_packet_offset;	// offset of using Matrix Packet for recovery
-
-	uint8_t *file_packet;			// pointer to File Packets
-	size_t file_packet_size;		// total size of File Packets
-	uint32_t file_packet_count;
-	uint8_t *dir_packet;			// pointer to Directory Packets
-	size_t dir_packet_size;			// total size of Directory Packets
-	uint32_t dir_packet_count;
-	uint8_t *root_packet;			// pointer to Root Packet
-	size_t root_packet_size;		// size of Root Packet
-	uint32_t root_packet_count;
-
-	uint8_t *ext_data_packet;		// pointer to External Data Packets
-	size_t ext_data_packet_size;	// total size of External Data Packets
-	uint32_t ext_data_packet_count;
-
-	uint8_t *file_system_packet;	// pointer to File System Specific Packets
-	size_t file_system_packet_size;	// total size of File System Specific Packets
-	uint32_t file_system_packet_count;
-
-	uint8_t *common_packet;			// pointer to duplicated common packets
-	size_t common_packet_size;		// total size of duplicated common packets
-	size_t common_packet_count;
-
-	PAR3_POS_CTX *position_list;	// List of packet position
-	PAR3_PKT_CTX *data_packet_list;	// List of Data Packets
-	uint64_t data_packet_count;
-	PAR3_PKT_CTX *recv_packet_list;	// List of Recovery Data Packets
-	uint64_t recv_packet_count;
-
-} PAR3_CTX;
-
-
-
-// About input files
-int path_search(PAR3_CTX *par3_ctx, char *match_path, int flag_recursive);
-int get_file_status(PAR3_CTX *par3_ctx);
-uint64_t suggest_block_size(PAR3_CTX *par3_ctx);
-uint64_t calculate_block_count(PAR3_CTX *par3_ctx, uint64_t block_size);
-int sort_input_set(PAR3_CTX *par3_ctx);
-
-
-// Add text in Creator Packet or Comment Packet
-int add_creator_text(PAR3_CTX *par3_ctx, char *text);
-int add_comment_text(PAR3_CTX *par3_ctx, char *text);
-
-// For creation
-int par3_trial(PAR3_CTX *par3_ctx, char *temp_path);
-int par3_create(PAR3_CTX *par3_ctx, char *temp_path);
-
-
-// About par files
-int par_search(PAR3_CTX *par3_ctx, char *base_name, int flag_other);
-int extra_search(PAR3_CTX *par3_ctx, char *match_path);
-
-// For verification and repair
-int par3_list(PAR3_CTX *par3_ctx);
-int par3_verify(PAR3_CTX *par3_ctx);
-int par3_repair(PAR3_CTX *par3_ctx, char *temp_path);
-
-
-// For creation after verification
-int par3_extend(PAR3_CTX *par3_ctx, char command_trial, char *temp_path);
-
-
-// Release internal allocated memory
-void par3_release(PAR3_CTX *par3_ctx);
-
-
-// For PAR inside ZIP
-int par3_insert_zip(PAR3_CTX *par3_ctx, char command_trial);
-int par3_delete_zip(PAR3_CTX *par3_ctx);
-
-
-#endif // __LIBPAR3_H__
diff --git a/windows/src/libpar3_create.c b/windows/src/libpar3_create.c
deleted file mode 100644
index a159839..0000000
--- a/windows/src/libpar3_create.c
+++ /dev/null
@@ -1,315 +0,0 @@
-#include <errno.h>
-#include <inttypes.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "libpar3.h"
-#include "map.h"
-#include "packet.h"
-#include "write.h"
-#include "block.h"
-
-
-// add text in Creator Packet
-int add_creator_text(PAR3_CTX *par3_ctx, char *text)
-{
-	uint8_t *tmp_p;
-	size_t len, alloc_size;
-
-	len = strlen(text);
-	if (len == 0)
-		return 0;
-
-	if (par3_ctx->creator_packet == NULL){	// When there is no packet yet, allocate now.
-		alloc_size = 48 + len;
-		par3_ctx->creator_packet = malloc(alloc_size);
-		if (par3_ctx->creator_packet == NULL){
-			perror("Failed to allocate memory for Creator Packet");
-			return RET_MEMORY_ERROR;
-		}
-	} else {	// When there is packet already, add new text to previous text.
-		alloc_size = par3_ctx->creator_packet_size + len;
-		tmp_p = realloc(par3_ctx->creator_packet, alloc_size);
-		if (tmp_p == NULL){
-			perror("Failed to re-allocate memory for Creator Packet");
-			return RET_MEMORY_ERROR;
-		}
-		par3_ctx->creator_packet = tmp_p;
-	}
-	par3_ctx->creator_packet_size = alloc_size;
-	memcpy(par3_ctx->creator_packet + alloc_size - len, text, len);
-	par3_ctx->creator_packet_count = 1;
-
-	return 0;
-}
-
-// add text in Comment Packet
-int add_comment_text(PAR3_CTX *par3_ctx, char *text)
-{
-	uint8_t *tmp_p;
-	size_t len, alloc_size;
-
-	// If text is covered by ", remove them.
-	len = strlen(text);
-	if ( (len > 2) && (text[0] == '"') && (text[len - 1] == '"') ){
-		text++;
-		len -= 2;
-	}
-	if (len == 0)
-		return 0;
-
-	if (par3_ctx->comment_packet == NULL){	// When there is no packet yet, allocate now.
-		alloc_size = 48 + len;
-		par3_ctx->comment_packet = malloc(alloc_size);
-		if (par3_ctx->comment_packet == NULL){
-			perror("Failed to allocate memory for Comment Packet");
-			return RET_MEMORY_ERROR;
-		}
-	} else {	// When there is packet already, add new comment to previous comment.
-		alloc_size = par3_ctx->comment_packet_size + 1 + len;
-		tmp_p = realloc(par3_ctx->comment_packet, alloc_size);
-		if (tmp_p == NULL){
-			perror("Failed to re-allocate memory for Comment Packet");
-			return RET_MEMORY_ERROR;
-		}
-		par3_ctx->comment_packet = tmp_p;
-		tmp_p += par3_ctx->comment_packet_size;
-		tmp_p[0] = '\n';	// Put "\n" between comments.
-	}
-	par3_ctx->comment_packet_size = alloc_size;
-	memcpy(par3_ctx->comment_packet + alloc_size - len, text, len);
-	par3_ctx->comment_packet_count = 1;
-
-	return 0;
-}
-
-
-int par3_trial(PAR3_CTX *par3_ctx, char *temp_path)
-{
-	int ret;
-	uint64_t total_par_size;	// Total size of Index File, Archive Files, and Recovery Files.
-
-	// Load input blocks on memory.
-	if (par3_ctx->block_count == 0){
-		ret = map_chunk_tail(par3_ctx);
-	} else if (par3_ctx->deduplication == '1'){	// Simple deduplication
-		ret = map_input_block(par3_ctx);
-	} else if (par3_ctx->deduplication == '2'){	// Deduplication with slide search
-		ret = map_input_block_slide(par3_ctx);
-	} else {
-		// Because this doesn't read file data, InputSetID will differ.
-		ret = map_input_block_trial(par3_ctx);
-
-		// This is for debug.
-		// When no deduplication, no need to read input files in trial.
-//		ret = map_input_block_simple(par3_ctx);
-	}
-	if (ret != 0)
-		return ret;
-
-	// Call this function before creating Start Packet.
-	ret = calculate_recovery_count(par3_ctx);
-	if (ret != 0)
-		return ret;
-
-	// Creator Packet, Comment Packet, Start Packet
-	ret = make_start_packet(par3_ctx, 1);
-	if (ret != 0)
-		return ret;
-
-	// Only when recovery blocks will be created, make Matrix Packet.
-	if (par3_ctx->recovery_block_count > 0){
-		ret = make_matrix_packet(par3_ctx);
-		if (ret != 0)
-			return ret;
-	}
-
-	// File Packet, Directory Packet, Root Packet
-	ret = make_file_packet(par3_ctx);
-	if (ret != 0)
-		return ret;
-
-	// External Data Packet
-	ret = make_ext_data_packet(par3_ctx);
-	if (ret != 0)
-		return ret;
-
-	// Try Index File
-	total_par_size = try_index_file(par3_ctx);
-
-	// Try other PAR3 files
-	if ( (par3_ctx->block_count > 0) && ( (par3_ctx->data_packet != 0) || (par3_ctx->recovery_block_count > 0) ) ){
-		ret = duplicate_common_packet(par3_ctx);
-		if (ret != 0)
-			return ret;
-
-		// Write PAR3 files with input blocks
-		if (par3_ctx->data_packet != 0){
-			ret = try_archive_file(par3_ctx, temp_path, &total_par_size);
-			if (ret != 0)
-				return ret;
-		}
-
-		// Write PAR3 files with recovery blocks
-		if (par3_ctx->recovery_block_count > 0){
-			ret = try_recovery_file(par3_ctx, temp_path, &total_par_size);
-			if (ret != 0)
-				return ret;
-		}
-	}
-
-	// Show efficiency rate
-	if (par3_ctx->noise_level >= -1){
-		double rate1, rate2;
-		// rate1 "File data in Source blocks" = "total size of input file data" / "total size of source blocks"
-		// rate2 "Recovery data in PAR files" = "total size of recovery blocks" / "total size of PAR files"
-		// rate of "Efficiency of PAR files" = rate1 * rate2
-		printf("\nTotal size of PAR files = %"PRIu64"\n", total_par_size);
-		if ( (par3_ctx->block_count == 0) || (total_par_size == 0) ){
-			rate1 = 0;
-			rate2 = 0;
-		} else {
-			// Tiny chunk tails (1~39 bytes) don't consume blocks.
-			// Duplicate data reuses same blocks.
-			// Sum using bytes in every blocks to calculate total file data size.
-			uint64_t block_count, total_data_size;
-			PAR3_BLOCK_CTX *block_p;
-
-			block_count = par3_ctx->block_count;
-			block_p = par3_ctx->block_list;
-			total_data_size = 0;
-			while (block_count > 0){
-				total_data_size += block_p->size;
-				block_p++;
-				block_count--;
-			}
-			//printf("Total file data in input blocks = %"PRIu64"\n", total_data_size);
-
-			rate1 = (double)total_data_size / (double)(par3_ctx->block_size * par3_ctx->block_count);
-			if (par3_ctx->data_packet != 0){	// Archive Files are same as 100% redundancy.
-				rate2 = (double)(total_data_size + par3_ctx->block_size * par3_ctx->recovery_block_count) / (double)total_par_size;
-			} else {
-				rate2 = (double)(par3_ctx->block_size * par3_ctx->recovery_block_count) / (double)total_par_size;
-			}
-		}
-		// Truncate two decimal places (use integer instead of showing double directly)
-		//printf("rate1 = %f, rate2 = %f\n", rate1, rate2);
-		ret = (int)(rate1 * 1000);
-		printf("File data in Source blocks = %d.%d%%\n", ret / 10, ret % 10);
-		ret = (int)(rate2 * 1000);
-		printf("Recovery data in PAR files = %d.%d%%\n", ret / 10, ret % 10);
-		ret = (int)(rate1 * rate2 * 1000);
-		printf("Efficiency of PAR files    = %d.%d%%\n", ret / 10, ret % 10);
-	}
-
-	return 0;
-}
-
-int par3_create(PAR3_CTX *par3_ctx, char *temp_path)
-{
-	int ret;
-
-	// Map input file slices into input blocks.
-	if (par3_ctx->block_count == 0){
-		ret = map_chunk_tail(par3_ctx);
-	} else if (par3_ctx->deduplication == '1'){	// Simple deduplication
-		ret = map_input_block(par3_ctx);
-	} else if (par3_ctx->deduplication == '2'){	// Deduplication with slide search
-		ret = map_input_block_slide(par3_ctx);
-	} else {
-		ret = map_input_block_simple(par3_ctx);
-	}
-	if (ret != 0)
-		return ret;
-
-	// Call this function before creating Start Packet.
-	ret = calculate_recovery_count(par3_ctx);
-	if (ret != 0)
-		return ret;
-
-	// Creator Packet, Comment Packet, Start Packet
-	ret = make_start_packet(par3_ctx, 0);
-	if (ret != 0)
-		return ret;
-
-	// Only when recovery blocks will be created, make Matrix Packet.
-	if (par3_ctx->recovery_block_count > 0){
-		ret = make_matrix_packet(par3_ctx);
-		if (ret != 0)
-			return ret;
-	}
-
-	// File Packet, Directory Packet, Root Packet
-	ret = make_file_packet(par3_ctx);
-	if (ret != 0)
-		return ret;
-
-	// External Data Packet
-	ret = make_ext_data_packet(par3_ctx);
-	if (ret != 0)
-		return ret;
-
-	// Write Index File
-	ret = write_index_file(par3_ctx);
-	if (ret != 0)
-		return ret;
-
-	// Write other PAR3 files
-	if ( (par3_ctx->block_count > 0) && ( (par3_ctx->data_packet != 0) || (par3_ctx->recovery_block_count > 0) ) ){
-		ret = duplicate_common_packet(par3_ctx);
-		if (ret != 0)
-			return ret;
-
-		// When it uses Reed-Solomon Erasure Codes, it tries to keep all recovery blocks on memory.
-		if (par3_ctx->ecc_method & 1){
-			ret = allocate_recovery_block(par3_ctx);
-			if (ret != 0)
-				return ret;
-		}
-
-		// Write PAR3 files with input blocks
-		if (par3_ctx->data_packet != 0){
-			ret = write_archive_file(par3_ctx, temp_path);
-			if (ret != 0)
-				return ret;
-		}
-
-		// If there are enough memory to keep all recovery blocks,
-		// it calculates recovery blocks before writing Recovery Data Packets.
-		if (par3_ctx->ecc_method & 0x8000){
-			ret = create_recovery_block(par3_ctx);
-			if (ret < 0){
-				par3_ctx->ecc_method &= ~0x8000;
-			} else if (ret > 0){
-				return ret;
-			}
-		}
-
-		// Write PAR3 files with recovery blocks
-		if (par3_ctx->recovery_block_count > 0){
-			ret = write_recovery_file(par3_ctx, temp_path);
-			if (ret != 0){
-				//remove_recovery_file(par3_ctx);	// Remove partially created files
-				return ret;
-			}
-		}
-
-		// When recovery blocks were not created yet, calculate and write at here.
-		if ((par3_ctx->ecc_method & 0x8000) == 0){
-			if ( (par3_ctx->ecc_method & 8) && (par3_ctx->interleave > 0) ){
-				// Interleaving is adapted only for FFT based Reed-Solomon Codes.
-				ret = create_recovery_block_cohort(par3_ctx);
-			} else {
-				ret = create_recovery_block_split(par3_ctx);
-			}
-			if (ret != 0){
-				//remove_recovery_file(par3_ctx);	// Remove partially created files
-				return ret;
-			}
-		}
-	}
-
-	return 0;
-}
-
diff --git a/windows/src/libpar3_extra.c b/windows/src/libpar3_extra.c
deleted file mode 100644
index 182e3c4..0000000
--- a/windows/src/libpar3_extra.c
+++ /dev/null
@@ -1,464 +0,0 @@
-#include <errno.h>
-#include <inttypes.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "libpar3.h"
-#include "block.h"
-#include "packet.h"
-#include "read.h"
-#include "verify.h"
-#include "write.h"
-
-
-// Check Matrix Packet in refered PAR file.
-static int check_matrix_packet(PAR3_CTX *par3_ctx)
-{
-	uint8_t *packet_type, *buf;
-	size_t offset, total_size;
-	uint64_t packet_size;
-
-	printf("\n");
-	if (par3_ctx->matrix_packet_count == 0)
-		return 0;
-
-	buf = par3_ctx->matrix_packet;
-	total_size = par3_ctx->matrix_packet_size;
-
-	offset = 0;
-	while (offset + 48 < total_size){
-		memcpy(&packet_size, buf + offset + 24, 8);
-		packet_type = buf + offset + 40;
-
-		// At this time, this supports only one Error Correction Codes at a time.
-		// Return the first found one.
-
-		if (memcmp(packet_type, "PAR CAU\0", 8) == 0){	// Cauchy Matrix Packet
-			uint64_t first_num, last_num, hint_num;
-
-			// Read numbers
-			memcpy(&first_num, buf + offset + 48, 8);
-			memcpy(&last_num, buf + offset + 56, 8);
-			memcpy(&hint_num, buf + offset + 64, 8);
-			if (par3_ctx->noise_level >= 1){
-				printf("Cauchy Matrix Packet:\n");
-				printf("Index of first input block         = %"PRIu64"\n", first_num);
-				printf("Index of last input block plus 1   = %"PRIu64"\n", last_num);
-				printf("hint for number of recovery blocks = %"PRIu64"\n", hint_num);
-				printf("\n");
-			}
-
-			// Return error, if par3cmdline doesn't support the given number.
-			if (first_num != 0){
-				printf("Compatibility issue: Index of first input block\n");
-				return RET_LOGIC_ERROR;
-			}
-			if (last_num != 0){
-				printf("Compatibility issue: Index of last input block\n");
-				return RET_LOGIC_ERROR;
-			}
-
-			// Return error, if read number is different from specified option.
-			if ( (par3_ctx->ecc_method != 0) && (par3_ctx->ecc_method != 1) ){
-				printf("Compatibility issue: Error Correction Codes is different.\n");
-				return RET_INVALID_COMMAND;
-			}
-			if ( (par3_ctx->max_recovery_block != 0) || (par3_ctx->max_redundancy_size != 0) ){
-				printf("Compatibility issue: Max number of recovery blocks.\n");
-				return RET_INVALID_COMMAND;
-			}
-
-			par3_ctx->ecc_method = 1;
-			par3_ctx->max_recovery_block = hint_num;
-			par3_ctx->matrix_packet_offset = offset;
-			return -1;
-
-		} else if (memcmp(packet_type, "PAR FFT\0", 8) == 0){	// FFT Matrix Packet
-			int8_t shift_num;
-			uint32_t extra_num;
-			uint64_t first_num, last_num, max_num;
-
-			// Read numbers
-			memcpy(&first_num, buf + offset + 48, 8);
-			memcpy(&last_num, buf + offset + 56, 8);
-			shift_num = buf[offset + 64];	// convert to signed integer
-			if ( (shift_num >= 0) && (shift_num <= 15) ){
-				max_num = (uint64_t)1 << shift_num;
-			} else {
-				max_num = 32768;
-			}
-			extra_num = 0;
-			if ((packet_size > 65) && (packet_size <= 69)){	// Read 1 ~ 4 bytes of the last field
-				memcpy(&extra_num, buf + offset + 65, packet_size - 65);
-			}
-			if (par3_ctx->noise_level >= 1){
-				printf("FFT Matrix Packet:\n");
-				printf("Index of first input block       = %"PRIu64"\n", first_num);
-				printf("Index of last input block plus 1 = %"PRIu64"\n", last_num);
-				printf("Max number of recovery blocks    = %"PRIu64"\n", max_num);
-				printf("Number of interleaving blocks    = %u\n", extra_num);
-				printf("\n");
-			}
-
-			// Return error, if par3cmdline doesn't support the given number.
-			if (first_num != 0){
-				printf("Compatibility issue: Index of first input block\n");
-				return RET_LOGIC_ERROR;
-			}
-			if (last_num != 0){
-				printf("Compatibility issue: Index of last input block\n");
-				return RET_LOGIC_ERROR;
-			}
-
-			// Return error, if read number is different from specified option.
-			if ( (par3_ctx->ecc_method != 0) && (par3_ctx->ecc_method != 8) ){
-				printf("Compatibility issue: Error Correction Codes is different.\n");
-				return RET_INVALID_COMMAND;
-			}
-			if ( (par3_ctx->max_recovery_block != 0) || (par3_ctx->max_redundancy_size != 0) ){
-				printf("Compatibility issue: Max number of recovery blocks.\n");
-				return RET_INVALID_COMMAND;
-			}
-			if ( (par3_ctx->interleave != 0) && (par3_ctx->interleave != extra_num) ){
-				printf("Compatibility issue: Number of interleaving is different.\n");
-				return RET_INVALID_COMMAND;
-			}
-
-			par3_ctx->ecc_method = 8;
-			par3_ctx->interleave = extra_num;
-			par3_ctx->max_recovery_block = max_num * (extra_num + 1);	// When interleaving, max count is multiplied by number of cohorts.
-			par3_ctx->matrix_packet_offset = offset;
-			return -8;
-
-		}
-
-		offset += packet_size;
-	}
-
-	return 0;
-}
-
-// Calculate extending amount of recovery blocks from given redundancy
-static int calculate_extra_count(PAR3_CTX *par3_ctx)
-{
-	uint64_t total_count;
-
-	if (par3_ctx->block_count == 0){
-		par3_ctx->ecc_method = 0;
-		par3_ctx->redundancy_size = 0;
-		par3_ctx->recovery_block_count = 0;
-		return 0;	// There is no input block.
-	}
-	if ( (par3_ctx->recovery_block_count == 0) && (par3_ctx->redundancy_size == 0) )
-		return 0;	// Not specified
-
-	// When number of recovery blocks was not specified, set by redundancy.
-	if ( (par3_ctx->recovery_block_count == 0) && (par3_ctx->redundancy_size > 0) ){
-		// If redundancy_size is in range (0 ~ 250), it's a percent rate value.
-		if (par3_ctx->redundancy_size <= 250){
-			// When there is remainder at division, round up the quotient.
-			par3_ctx->recovery_block_count = (par3_ctx->block_count * par3_ctx->redundancy_size + 99) / 100;
-		}
-	}
-	if ( (par3_ctx->max_recovery_block == 0) && (par3_ctx->max_redundancy_size > 0) ){
-		if (par3_ctx->max_redundancy_size <= 250){
-			par3_ctx->max_recovery_block = (par3_ctx->block_count * par3_ctx->max_redundancy_size + 99) / 100;
-		}
-	}
-
-	// Test number of blocks
-	if (par3_ctx->ecc_method & 1){	// Cauchy Reed-Solomon Codes
-		if (par3_ctx->noise_level >= 0){
-			printf("Cauchy Reed-Solomon Codes\n");
-		}
-
-		// When max recovery block count is set, it must be equal or larger than creating recovery blocks.
-		if ((par3_ctx->max_recovery_block > 0) && (par3_ctx->max_recovery_block < par3_ctx->recovery_block_count))
-			par3_ctx->max_recovery_block = par3_ctx->recovery_block_count;
-
-		// Check total number of blocks
-		total_count = par3_ctx->block_count + par3_ctx->first_recovery_block + par3_ctx->recovery_block_count;
-		if (total_count < par3_ctx->block_count + par3_ctx->max_recovery_block)
-			total_count = par3_ctx->block_count + par3_ctx->max_recovery_block;
-		if (total_count > 65536){
-			printf("Total block count %"PRIu64" are too many.\n", total_count);
-			return RET_LOGIC_ERROR;
-		}
-
-		if (par3_ctx->noise_level >= 0){
-			printf("Recovery block count = %"PRIu64"\n", par3_ctx->recovery_block_count);
-			if (par3_ctx->max_recovery_block > 0){
-				printf("Max recovery block count = %"PRIu64"\n", par3_ctx->max_recovery_block);
-			}
-			printf("\n");
-		}
-
-	} else if (par3_ctx->ecc_method & 8){	// FFT based Reed-Solomon Codes
-		uint64_t cohort_count, i;
-
-		if (par3_ctx->noise_level >= 0){
-			printf("FFT based Reed-Solomon Codes\n");
-		}
-
-		// Reuse previous settings
-		cohort_count = par3_ctx->interleave + 1; // Minimum value is 1.
-		if (cohort_count > 1){
-			if (par3_ctx->noise_level >= 0){
-				printf("Number of cohort = %"PRIu64" (Interleaving time = %u)\n", cohort_count, par3_ctx->interleave);
-				i = (par3_ctx->block_count + cohort_count - 1) / cohort_count;	// round up
-				printf("Input block count = %"PRIu64" (%"PRIu64" per cohort)\n", par3_ctx->block_count, i);
-			}
-		}
-
-		// Number of recovery block will be multiple of number of cohorts.
-		i = par3_ctx->recovery_block_count % cohort_count;
-		if (i > 0){
-			if (par3_ctx->noise_level >= 1){
-				printf("Recovery block count is increased from %"PRIu64" to %"PRIu64"\n", par3_ctx->recovery_block_count, par3_ctx->recovery_block_count + cohort_count - i);
-			}
-			par3_ctx->recovery_block_count += cohort_count - i;	// add to the remainder
-		}
-		// First recovery block will be lower.
-		i = par3_ctx->first_recovery_block % cohort_count;
-		if (i > 0){
-			if (par3_ctx->noise_level >= 1){
-				printf("First recovery block is decreased from %"PRIu64" to %"PRIu64"\n", par3_ctx->first_recovery_block, par3_ctx->first_recovery_block - i);
-			}
-			par3_ctx->first_recovery_block -= i;	// erase the remainder
-		}
-
-		// Check total number of blocks
-		total_count = par3_ctx->block_count + par3_ctx->first_recovery_block + par3_ctx->recovery_block_count;
-		if (total_count < par3_ctx->block_count + par3_ctx->max_recovery_block)
-			total_count = par3_ctx->block_count + par3_ctx->max_recovery_block;
-		if (total_count > 65536 * cohort_count){
-			if (cohort_count == 1){
-				printf("Total block count %"PRIu64" are too many.\n", total_count);
-			} else {
-				i = (total_count + cohort_count - 1) / cohort_count;	// round up
-				printf("Total block count %"PRIu64" (%"PRIu64" per cohort) are too many.\n", total_count, i);
-			}
-			return RET_LOGIC_ERROR;
-		}
-		// Leopard-RS library has a restriction; recovery_count <= 32768
-		// Though it's possible to solve this problem, I don't try at this time.
-		total_count = par3_ctx->first_recovery_block + par3_ctx->recovery_block_count;
-		if (total_count < par3_ctx->max_recovery_block)
-			total_count = par3_ctx->max_recovery_block;
-		if (total_count > 32768 * cohort_count){
-			if (cohort_count == 1){
-				printf("Recovery block count %"PRIu64" are too many.\n", total_count);
-			} else {
-				printf("Recovery block count %"PRIu64" (%"PRIu64" per cohort) are too many.\n", total_count, total_count / cohort_count);
-			}
-			return RET_LOGIC_ERROR;
-		}
-
-		if (par3_ctx->noise_level >= 0){
-			if (cohort_count == 1){
-				printf("Recovery block count = %"PRIu64"\n", par3_ctx->recovery_block_count);
-			} else {
-				printf("Recovery block count = %"PRIu64" (%"PRIu64" per cohort)\n", par3_ctx->recovery_block_count, par3_ctx->recovery_block_count / cohort_count);
-			}
-			if (par3_ctx->max_recovery_block > 0){
-				if (cohort_count == 1){
-					printf("Max recovery block count = %"PRIu64"\n", par3_ctx->max_recovery_block);
-				} else {
-					printf("Max recovery block count = %"PRIu64" (%"PRIu64" per cohort)\n", par3_ctx->max_recovery_block, par3_ctx->max_recovery_block / cohort_count);
-				}
-			}
-			printf("\n");
-		}
-
-	} else {
-		printf("The specified Error Correction Codes (%u) isn't implemented yet.\n", par3_ctx->ecc_method);
-		return RET_LOGIC_ERROR;
-	}
-
-	return 0;
-}
-
-
-int par3_extend(PAR3_CTX *par3_ctx, char command_trial, char *temp_path)
-{
-	int ret;
-	uint32_t missing_file_count, damaged_file_count, bad_file_count;
-
-	ret = read_packet(par3_ctx);
-	if (ret != 0)
-		return ret;
-
-	ret = parse_vital_packet(par3_ctx);
-	if (ret != 0)
-		return ret;
-
-	// Show archived file data.
-	if (par3_ctx->noise_level >= 0)
-		show_data_size(par3_ctx);
-	if (par3_ctx->noise_level == 1){
-		show_read_result(par3_ctx, 1);
-	} else if (par3_ctx->noise_level >= 2){
-		show_read_result(par3_ctx, 2);
-	}
-
-	// Map input file slices into blocks
-	if (par3_ctx->block_count > 0){
-		ret = count_slice_info(par3_ctx);
-		if (ret != 0)
-			return ret;
-
-		ret = set_slice_info(par3_ctx);
-		if (ret != 0)
-			return ret;
-
-		ret = parse_external_data_packet(par3_ctx);
-		if (ret != 0)
-			return ret;
-	}
-
-	// Check input file
-	missing_file_count = 0;
-	damaged_file_count = 0;
-	bad_file_count = 0;
-	ret = verify_input_file(par3_ctx, &missing_file_count, &damaged_file_count, &bad_file_count);
-	if (ret != 0)
-		return ret;
-
-	// It's possible to create recovery blocks, only when all input files are complete.
-	// Ignore different timestamp or property.
-	if (missing_file_count + damaged_file_count > 0){
-		printf("\n");
-		printf("%u files are missing or damaged.\n", missing_file_count + damaged_file_count);
-		return RET_REPAIR_NOT_POSSIBLE;
-	}
-
-	// Check Matrix Packet to create compatible recovery blocks.
-	ret = check_matrix_packet(par3_ctx);
-	if (ret < 0){	// Use previous settings in found Matrix Packet
-		ret = calculate_extra_count(par3_ctx);
-		if (ret != 0)
-			return ret;
-
-	} else if (ret == 0){	// Adopt options in command-line
-		ret = calculate_recovery_count(par3_ctx);
-		if (ret != 0)
-			return ret;
-
-		// Make new Matrix Packet
-		if (par3_ctx->recovery_block_count > 0){
-			ret = make_matrix_packet(par3_ctx);
-			if (ret != 0)
-				return ret;
-		}
-		/*
-		{	// Debug output to compare result
-			FILE *fp;
-			fp = fopen("debug.txt", "wb");
-			if (fp != NULL){
-				fwrite(par3_ctx->matrix_packet, 1, par3_ctx->matrix_packet_size, fp);
-				fclose(fp);
-			}
-		}
-		*/
-
-	} else {
-		return ret;
-	}
-
-	if (command_trial != 0){
-		uint64_t total_par_size;	// This is a dummy item, when it doesn't show efficiency rate.
-
-		// Try Index File
-		total_par_size = try_index_file(par3_ctx);
-
-		// Try other PAR3 files
-		if ( (par3_ctx->block_count > 0) && ( (par3_ctx->data_packet != 0) || (par3_ctx->recovery_block_count > 0) ) ){
-			ret = duplicate_common_packet(par3_ctx);
-			if (ret != 0)
-				return ret;
-
-			// Write PAR3 files with input blocks
-			if (par3_ctx->data_packet != 0){
-				ret = try_archive_file(par3_ctx, temp_path, &total_par_size);
-				if (ret != 0)
-					return ret;
-			}
-
-			// Write PAR3 files with recovery blocks
-			if (par3_ctx->recovery_block_count > 0){
-				ret = try_recovery_file(par3_ctx, temp_path, &total_par_size);
-				if (ret != 0)
-					return ret;
-			}
-		}
-
-		// Because a user cannot change setting to create extra recovery blocks,
-		// showing efficiency rate will be useless.
-		//printf("\nTotal size of PAR files = %"PRIu64"\n", total_par_size);
-
-	} else {
-		// Write Index File
-		ret = write_index_file(par3_ctx);
-		if (ret != 0)
-			return ret;
-
-		// Write other PAR3 files
-		if ( (par3_ctx->block_count > 0) && ( (par3_ctx->data_packet != 0) || (par3_ctx->recovery_block_count > 0) ) ){
-			ret = duplicate_common_packet(par3_ctx);
-			if (ret != 0)
-				return ret;
-
-			// When it uses Reed-Solomon Erasure Codes, it tries to keep all recovery blocks on memory.
-			if (par3_ctx->ecc_method & 1){
-				ret = allocate_recovery_block(par3_ctx);
-				if (ret != 0)
-					return ret;
-			}
-
-			// Write PAR3 files with input blocks
-			if (par3_ctx->data_packet != 0){
-				ret = write_archive_file(par3_ctx, temp_path);
-				if (ret != 0)
-					return ret;
-			}
-
-			// If there are enough memory to keep all recovery blocks,
-			// it calculates recovery blocks before writing Recovery Data Packets.
-			if (par3_ctx->ecc_method & 0x8000){
-				ret = create_recovery_block(par3_ctx);
-				if (ret < 0){
-					par3_ctx->ecc_method &= ~0x8000;
-				} else if (ret > 0){
-					return ret;
-				}
-			}
-
-			// Write PAR3 files with recovery blocks
-			if (par3_ctx->recovery_block_count > 0){
-				ret = write_recovery_file(par3_ctx, temp_path);
-				if (ret != 0){
-					//remove_recovery_file(par3_ctx);	// Remove partially created files
-					return ret;
-				}
-			}
-
-			// When recovery blocks were not created yet, calculate and write at here.
-			if ((par3_ctx->ecc_method & 0x8000) == 0){
-				if ( (par3_ctx->ecc_method & 8) && (par3_ctx->interleave > 0) ){
-					// Interleaving is adapted only for FFT based Reed-Solomon Codes.
-					ret = create_recovery_block_cohort(par3_ctx);
-				} else {
-					ret = create_recovery_block_split(par3_ctx);
-				}
-				if (ret != 0){
-					//remove_recovery_file(par3_ctx);	// Remove partially created files
-					return ret;
-				}
-			}
-		}
-	}
-
-	return 0;
-}
-
diff --git a/windows/src/libpar3_inside.c b/windows/src/libpar3_inside.c
deleted file mode 100644
index 0f3c9c5..0000000
--- a/windows/src/libpar3_inside.c
+++ /dev/null
@@ -1,224 +0,0 @@
-#include <errno.h>
-#include <inttypes.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <math.h>
-
-#include "libpar3.h"
-#include "inside.h"
-#include "common.h"
-#include "map.h"
-#include "packet.h"
-#include "block.h"
-#include "write.h"
-
-
-static uint64_t initial_block_size(uint64_t data_size)
-{
-	uint64_t block_size;
-	long double f;
-
-	if (data_size <= 40)
-		return 40;
-
-	// Let block count to be 1% of block size.
-	f = (long double)data_size;
-	f = sqrtl(f) * 10;
-	block_size = (uint64_t)f;
-
-	// If number of blocks is very few, return the minimum size.
-	// Because it creates at least 1 recovery block, number of input blocks should be more than 100.
-	if (data_size / block_size < 128)
-		block_size = next_pow2(data_size / 256);
-	if (block_size <= 40)
-		return 40;
-
-	// Block size is good to be power of 2.
-	block_size = next_pow2(block_size);
-
-	// 1000 blocks would be enough to protect a file.
-	while (data_size / block_size > 2048){
-		block_size *= 2;
-	}
-
-	return block_size;
-}
-
-// Insert PAR3 packets in ZIP file
-int par3_insert_zip(PAR3_CTX *par3_ctx, char command_trial)
-{
-	int ret, format_type, copy_size;
-	int repeat_count, best_repeat_count;
-	uint64_t original_file_size;
-	uint64_t block_size, best_block_size;
-	uint64_t block_count, best_block_count;
-	uint64_t recv_block_count, best_recv_block_count;
-	uint64_t total_packet_size, best_total_size;
-
-	ret = check_outside_format(par3_ctx, &format_type, &copy_size);
-	if (ret != 0)
-		return ret;
-
-	//printf("ecc_method = %x\n", par3_ctx->ecc_method);
-	par3_ctx->ecc_method = 1;	// At this time, select Cauchy Reed-Solomon Codes by default.
-
-	original_file_size = par3_ctx->total_file_size;
-	block_size = initial_block_size(original_file_size);
-/*
-	// to test rare case for debug
-	block_size = 150;
-	copy_size = 350;
-*/
-	if (par3_ctx->noise_level >= 1){
-		printf("\nTesting block size from %"PRId64":\n\n", block_size);
-	}
-	best_block_size = block_size;
-	best_total_size = inside_zip_size(par3_ctx, block_size, copy_size, &best_block_count, &best_recv_block_count, &best_repeat_count);
-	if (block_size == 40){
-		block_size = 64;
-	} else {
-		block_size *= 2;
-	}
-	while (block_size * 2 <= original_file_size){	// Try to find better block size
-		total_packet_size = inside_zip_size(par3_ctx, block_size, copy_size, &block_count, &recv_block_count, &repeat_count);
-		// When the difference is very small (like 1.6%), selecting more blocks would be safe.
-		// (original_file_size + total_packet_size) < (original_file_size + best_total_size) * 63 / 64
-		if ((original_file_size + total_packet_size) * 64 < (original_file_size + best_total_size) * 63){
-		//if (total_packet_size < best_total_size){
-			best_total_size = total_packet_size;
-			best_block_size = block_size;
-			best_block_count = block_count;
-			best_recv_block_count = recv_block_count;
-			best_repeat_count = repeat_count;
-		} else {
-			break;
-		}
-		block_size *= 2;
-	}
-	par3_ctx->block_size = best_block_size;
-	par3_ctx->block_count = best_block_count;
-	par3_ctx->recovery_block_count = best_recv_block_count;
-	par3_ctx->max_recovery_block = best_recv_block_count;
-	repeat_count = best_repeat_count;
-	if (par3_ctx->noise_level >= 1){
-		printf("Best block size = %"PRId64"\nblock count = %"PRId64", recvory block count = %"PRId64", repeat count = %d\n",
-				best_block_size, best_block_count, best_recv_block_count, repeat_count);
-	}
-
-	if (command_trial != 0){
-		// Show efficiency rate
-		if (par3_ctx->noise_level >= -1){
-			double rate;
-			// rate of "Additional PAR data" = "additional data size" / "original file size"
-			// rate of "Redundancy in blocks" = "number of recovery blocks" / "number of input blocks"
-			// rate of "Efficiency of PAR data" = "total size of recovery blocks" / "additional data size"
-			printf("\nSize of Outside file = %"PRIu64"\n", original_file_size + best_total_size + copy_size);
-			// Truncate two decimal places (use integer instead of showing double directly)
-			//printf("rate1 = %f, rate2 = %f\n", rate1, rate2);
-			rate = (double)(best_total_size + copy_size) / (double)original_file_size;
-			ret = (int)(rate * 1000);
-			printf("Additional PAR data    = %d.%d%%\n", ret / 10, ret % 10);
-			rate = (double)best_recv_block_count / (double)best_block_count;
-			ret = (int)(rate * 1000);
-			printf("Redundancy of blocks   = %d.%d%%\n", ret / 10, ret % 10);
-			rate = (double)(best_block_size * best_recv_block_count) / (double)(best_total_size + copy_size);
-			ret = (int)(rate * 1000);
-			printf("Efficiency of PAR data = %d.%d%%\n", ret / 10, ret % 10);
-		}
-		return 0;
-	}
-
-	// Map input file slices into input blocks.
-	if (format_type == 2){	// ZIP (.zip)
-		// It splits original file into 2 chunks and appends 2 chunks.
-		// [ data chunk ] [ footer chunk ] [ unprotected chunk ] [ duplicated footer chunk ]
-		if (par3_ctx->noise_level >= 2){
-			printf("ZIP file format (.zip)\n");
-		}
-		// Special funtion is required for additonal chunks.
-		ret = map_input_block_zip(par3_ctx, copy_size, best_total_size);
-
-	} else if (format_type == 3){	// 7-Zip (.7z)
-		// It appends 1 chunk.
-		// [ protected chunk ] [ unprotected chunk ]
-		if (par3_ctx->noise_level >= 2){
-			printf("7-Zip file format (.7z)\n");
-		}
-		ret = map_input_block_zip(par3_ctx, 0, best_total_size);
-
-	} else {
-		ret = RET_LOGIC_ERROR;
-	}
-	if (ret != 0)
-		return ret;
-
-	// Creator Packet, Comment Packet, Start Packet
-	ret = make_start_packet(par3_ctx, 0);
-	if (ret != 0)
-		return ret;
-
-	// Matrix Packet
-	ret = make_matrix_packet(par3_ctx);
-	if (ret != 0)
-		return ret;
-
-	// File Packet, Directory Packet, Root Packet
-	ret = make_file_packet(par3_ctx);
-	if (ret != 0)
-		return ret;
-
-	// External Data Packet
-	ret = make_ext_data_packet(par3_ctx);
-	if (ret != 0)
-		return ret;
-
-	ret = duplicate_common_packet(par3_ctx);
-	if (ret != 0)
-		return ret;
-
-	// When it uses Reed-Solomon Erasure Codes, it tries to keep all recovery blocks on memory.
-	if (par3_ctx->ecc_method & 1){
-		ret = allocate_recovery_block(par3_ctx);
-		if (ret != 0)
-			return ret;
-	}
-
-	// If there are enough memory to keep all recovery blocks,
-	// it calculates recovery blocks before writing Recovery Data Packets.
-	if (par3_ctx->ecc_method & 0x8000){
-		ret = create_recovery_block(par3_ctx);
-		if (ret < 0){
-			par3_ctx->ecc_method &= ~0x8000;
-		} else if (ret > 0){
-			return ret;
-		}
-	}
-
-	// Insert space (unprotected chunks) into outside file
-	ret = insert_space_zip(par3_ctx, copy_size, repeat_count);
-	if (ret != 0)
-		return ret;
-
-	// When recovery blocks were not created yet, calculate and write at here.
-	if ((par3_ctx->ecc_method & 0x8000) == 0){
-		ret = create_recovery_block_split(par3_ctx);
-		if (ret != 0)
-			return ret;
-	}
-
-	return 0;
-}
-
-// Delete PAR3 packets from ZIP file
-int par3_delete_zip(PAR3_CTX *par3_ctx)
-{
-	int ret;
-
-	ret = delete_inside_data(par3_ctx);
-	if (ret != 0)
-		return ret;
-
-	return 0;
-}
-
diff --git a/windows/src/libpar3_verify.c b/windows/src/libpar3_verify.c
deleted file mode 100644
index c932ff6..0000000
--- a/windows/src/libpar3_verify.c
+++ /dev/null
@@ -1,488 +0,0 @@
-#include <errno.h>
-#include <inttypes.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "libpar3.h"
-#include "block.h"
-#include "packet.h"
-#include "read.h"
-#include "repair.h"
-#include "verify.h"
-#include "reedsolomon.h"
-
-
-int par3_list(PAR3_CTX *par3_ctx)
-{
-	int ret;
-
-	ret = read_packet(par3_ctx);
-	if (ret != 0)
-		return ret;
-
-	ret = parse_vital_packet(par3_ctx);
-	if (ret != 0)
-		return ret;
-
-	// Show archived file data.
-	if (par3_ctx->noise_level >= 0)
-		show_data_size(par3_ctx);
-	if (par3_ctx->noise_level == -1){
-		show_read_result(par3_ctx, 0);
-	} else if (par3_ctx->noise_level == 0){
-		show_read_result(par3_ctx, 1);
-	} else if (par3_ctx->noise_level >= 1){
-		show_read_result(par3_ctx, 2);
-	}
-
-	return 0;
-}
-
-int par3_verify(PAR3_CTX *par3_ctx)
-{
-	int ret;
-	uint32_t missing_dir_count, bad_dir_count;
-	uint32_t missing_file_count, damaged_file_count, misnamed_file_count, bad_file_count;
-	uint32_t possible_count, lack_count_cohort;
-	uint64_t block_count, block_available;
-	uint64_t recovery_block_available, recovery_block_lack;
-
-	ret = read_packet(par3_ctx);
-	if (ret != 0)
-		return ret;
-
-	ret = parse_vital_packet(par3_ctx);
-	if (ret != 0)
-		return ret;
-
-	// Show archived file data.
-	if (par3_ctx->noise_level >= 0)
-		show_data_size(par3_ctx);
-	if (par3_ctx->noise_level == 1){
-		show_read_result(par3_ctx, 1);
-	} else if (par3_ctx->noise_level >= 2){
-		show_read_result(par3_ctx, 2);
-	}
-
-	// Map input file slices into blocks
-	block_count = par3_ctx->block_count;
-	if (block_count > 0){
-		ret = count_slice_info(par3_ctx);
-		if (ret != 0)
-			return ret;
-
-		ret = set_slice_info(par3_ctx);
-		if (ret != 0)
-			return ret;
-
-		ret = parse_external_data_packet(par3_ctx);
-		if (ret != 0)
-			return ret;
-	}
-
-	// Check input file and directory.
-	missing_dir_count = 0;
-	bad_dir_count = 0;
-	check_input_directory(par3_ctx, &missing_dir_count, &bad_dir_count);
-	missing_file_count = 0;
-	damaged_file_count = 0;
-	misnamed_file_count = 0;
-	bad_file_count = 0;
-	ret = verify_input_file(par3_ctx, &missing_file_count, &damaged_file_count, &bad_file_count);
-	if (ret != 0)
-		return ret;
-
-	if (missing_file_count + damaged_file_count > 0){
-		// Data Packets substitute for lost input blocks.
-		ret = substitute_input_block(par3_ctx);
-		if (ret != 0)
-			return ret;
-
-		// Find identical input blocks
-		ret = find_identical_block(par3_ctx);
-		if (ret != 0)
-			return ret;
-
-		// Aggregate verified result of available input blocks
-		block_available = aggregate_input_block(par3_ctx);
-
-		// When blocks are not enough, check extra files next.
-		if (block_available < block_count){
-			// Check extra files and misnamed files.
-			ret = verify_extra_file(par3_ctx, &missing_file_count, &damaged_file_count, &misnamed_file_count);
-			if (ret != 0)
-				return ret;
-
-			// Aggregate again
-			block_available = aggregate_input_block(par3_ctx);
-		}
-
-	} else {	// When all input files are complete.
-		block_available = block_count;
-	}
-
-	if (missing_dir_count + bad_dir_count + missing_file_count + damaged_file_count + misnamed_file_count + bad_file_count == 0){
-		// There is no damaged or missing files.
-		if (par3_ctx->noise_level >= -1){
-			printf("\n");
-			printf("All files are correct, repair is not required.\n");
-		}
-		return 0;
-	}
-
-	// There are damaged or missing files.
-	if (par3_ctx->noise_level >= -1){
-		printf("\nRepair is required.\n");
-	}
-	if (par3_ctx->noise_level >= 0){
-		if (missing_dir_count > 0){
-			printf("%u directories are missing.\n", missing_dir_count);
-		}
-		if (bad_dir_count > 0){
-			printf("%u directories are different.\n", bad_dir_count);
-		}
-		if (par3_ctx->input_dir_count - missing_dir_count - bad_dir_count > 0){
-			printf("%u directories are ok.\n", par3_ctx->input_dir_count - missing_dir_count - bad_dir_count);
-		}
-		if (misnamed_file_count > 0){
-			printf("%u files have the wrong name.\n", misnamed_file_count);
-		}
-		if (missing_file_count > 0){
-			printf("%u files are missing.\n", missing_file_count);
-		}
-		if (damaged_file_count > 0){
-			printf("%u files exist but are damaged.\n", damaged_file_count);
-		}
-		if (bad_file_count > 0){
-			printf("%u files are different.\n", bad_file_count);
-		}
-		if (par3_ctx->input_file_count - missing_file_count - damaged_file_count - misnamed_file_count - bad_file_count > 0){
-			printf("%u files are ok.\n", par3_ctx->input_file_count - missing_file_count - damaged_file_count - misnamed_file_count - bad_file_count);
-		}
-		if (missing_file_count + damaged_file_count > 0){
-			printf("You have %"PRIu64" out of %"PRIu64" input blocks available.\n", block_available, block_count);
-		}
-	}
-
-	// Aggregate recovery blocks of each Matrix Packet
-	recovery_block_available = aggregate_recovery_block(par3_ctx);
-	if (par3_ctx->interleave == 0){
-		if (block_available + recovery_block_available >= block_count){
-			recovery_block_lack = 0;
-		} else {
-			recovery_block_lack = block_count - block_available - recovery_block_available;
-		}
-	} else {
-		recovery_block_lack = aggregate_block_cohort(par3_ctx, NULL, &lack_count_cohort);
-	}
-	if (recovery_block_lack == 0){
-		if (par3_ctx->noise_level >= -1){
-			printf("Repair is possible.\n");
-		}
-		if (par3_ctx->noise_level >= 0){
-			if (block_available >= block_count){	// Found enough input blocks.
-				printf("None of the recovery blocks will be used for the repair.\n");
-			} else {
-				if (block_available + recovery_block_available > block_count){
-					printf("You have an excess of %"PRIu64" recovery blocks.\n", block_available + recovery_block_available - block_count);
-				}
-				printf("%"PRIu64" recovery blocks will be used to repair.\n", block_count - block_available);
-			}
-		}
-		return RET_REPAIR_POSSIBLE;
-
-	} else {	// Need more blocks to repair.
-		possible_count = check_possible_restore(par3_ctx);
-		if (par3_ctx->noise_level >= -1){
-			if (missing_dir_count + bad_dir_count + possible_count > 0){
-				printf("Repair is possible partially.\n");
-			} else {
-				printf("Repair is not possible.\n");
-			}
-			if (par3_ctx->interleave == 0){
-				printf("You need %"PRIu64" more recovery blocks to be able to repair.\n", recovery_block_lack);
-			} else {
-				printf("You need %"PRIu64" more recovery blocks (%u volumes) to be able to repair.\n", recovery_block_lack, lack_count_cohort);
-			}
-		}
-		return RET_REPAIR_NOT_POSSIBLE;
-	}
-}
-
-int par3_repair(PAR3_CTX *par3_ctx, char *temp_path)
-{
-	int ret;
-	uint32_t missing_dir_count, bad_dir_count;
-	uint32_t missing_file_count, damaged_file_count, misnamed_file_count, bad_file_count;
-	uint32_t possible_count, lost_count_cohort, lack_count_cohort;
-	uint64_t block_count, block_available;
-	uint64_t recovery_block_available, recovery_block_lack;
-
-	ret = read_packet(par3_ctx);
-	if (ret != 0)
-		return ret;
-
-	ret = parse_vital_packet(par3_ctx);
-	if (ret != 0)
-		return ret;
-
-	// Show archived file data.
-	if (par3_ctx->noise_level >= 0)
-		show_data_size(par3_ctx);
-	if (par3_ctx->noise_level == 1){
-		show_read_result(par3_ctx, 1);
-	} else if (par3_ctx->noise_level >= 2){
-		show_read_result(par3_ctx, 2);
-	}
-
-	// Map input file slices into blocks
-	block_count = par3_ctx->block_count;
-	if (block_count > 0){
-		ret = count_slice_info(par3_ctx);
-		if (ret != 0)
-			return ret;
-
-		ret = set_slice_info(par3_ctx);
-		if (ret != 0)
-			return ret;
-
-		ret = parse_external_data_packet(par3_ctx);
-		if (ret != 0)
-			return ret;
-	}
-
-	// Check input file and directory.
-	missing_dir_count = 0;
-	bad_dir_count = 0;
-	check_input_directory(par3_ctx, &missing_dir_count, &bad_dir_count);
-	missing_file_count = 0;
-	damaged_file_count = 0;
-	misnamed_file_count = 0;
-	bad_file_count = 0;
-	ret = verify_input_file(par3_ctx, &missing_file_count, &damaged_file_count, &bad_file_count);
-	if (ret != 0)
-		return ret;
-
-	if (missing_file_count + damaged_file_count > 0){
-		// Data Packets substitute for lost input blocks.
-		ret = substitute_input_block(par3_ctx);
-		if (ret != 0)
-			return ret;
-
-		// Find identical input blocks
-		ret = find_identical_block(par3_ctx);
-		if (ret != 0)
-			return ret;
-
-		// Aggregate verified result of available input blocks
-		block_available = aggregate_input_block(par3_ctx);
-
-		// When blocks are not enough, check extra files next.
-		if (block_available < block_count){
-			// Check extra files and misnamed files.
-			ret = verify_extra_file(par3_ctx, &missing_file_count, &damaged_file_count, &misnamed_file_count);
-			if (ret != 0)
-				return ret;
-
-			// Aggregate again
-			block_available = aggregate_input_block(par3_ctx);
-		}
-
-	} else {	// When all input files are complete.
-		block_available = block_count;
-	}
-
-	if (missing_dir_count + bad_dir_count + missing_file_count + damaged_file_count + misnamed_file_count + bad_file_count == 0){
-		// There is no damaged or missing files.
-		if (par3_ctx->noise_level >= -1){
-			printf("\n");
-			printf("All files are correct, repair is not required.\n");
-		}
-		return 0;
-	}
-
-	// There are damaged or missing files.
-	if (par3_ctx->noise_level >= -1){
-		printf("\nRepair is required.\n");
-	}
-	if (par3_ctx->noise_level >= 0){
-		if (missing_dir_count > 0){
-			printf("%u directories are missing.\n", missing_dir_count);
-		}
-		if (bad_dir_count > 0){
-			printf("%u directories are different.\n", bad_dir_count);
-		}
-		if (par3_ctx->input_dir_count - missing_dir_count - bad_dir_count > 0){
-			printf("%u directories are ok.\n", par3_ctx->input_dir_count - missing_dir_count - bad_dir_count);
-		}
-		if (misnamed_file_count > 0){
-			printf("%u files have the wrong name.\n", misnamed_file_count);
-		}
-		if (missing_file_count > 0){
-			printf("%u files are missing.\n", missing_file_count);
-		}
-		if (damaged_file_count > 0){
-			printf("%u files exist but are damaged.\n", damaged_file_count);
-		}
-		if (bad_file_count > 0){
-			printf("%u files are different.\n", bad_file_count);
-		}
-		if (par3_ctx->input_file_count - missing_file_count - damaged_file_count - misnamed_file_count - bad_file_count > 0){
-			printf("%u files are ok.\n", par3_ctx->input_file_count - missing_file_count - damaged_file_count - misnamed_file_count - bad_file_count);
-		}
-		if (missing_file_count + damaged_file_count > 0){
-			printf("You have %"PRIu64" out of %"PRIu64" input blocks available.\n", block_available, block_count);
-		}
-	}
-
-	// Aggregate recovery blocks of each Matrix Packet
-	recovery_block_available = aggregate_recovery_block(par3_ctx);
-	if (par3_ctx->interleave == 0){
-		if (block_available + recovery_block_available >= block_count){
-			recovery_block_lack = 0;
-		} else {
-			recovery_block_lack = block_count - block_available - recovery_block_available;
-		}
-		lost_count_cohort = (uint32_t)(block_count - block_available);
-	} else {
-		recovery_block_lack = aggregate_block_cohort(par3_ctx, &lost_count_cohort, &lack_count_cohort);
-	}
-	if (recovery_block_lack == 0){
-		if (par3_ctx->noise_level >= -1){
-			printf("Repair is possible.\n");
-		}
-		if (par3_ctx->noise_level >= 0){
-			if (block_available >= block_count){	// Found enough input blocks.
-				printf("None of the recovery blocks will be used for the repair.\n");
-			} else {
-				if (block_available + recovery_block_available > block_count){
-					printf("You have an excess of %"PRIu64" recovery blocks.\n", block_available + recovery_block_available - block_count);
-				}
-				printf("%"PRIu64" recovery blocks will be used to repair.\n", block_count - block_available);
-			}
-		}
-
-	} else {	// Need more blocks to repair.
-		possible_count = check_possible_restore(par3_ctx);
-		if (par3_ctx->noise_level >= -1){
-			if (missing_dir_count + bad_dir_count + possible_count > 0){
-				printf("Repair is possible partially.\n");
-			} else {
-				printf("Repair is not possible.\n");
-			}
-			if (par3_ctx->interleave == 0){
-				printf("You need %"PRIu64" more recovery blocks to be able to repair.\n", recovery_block_lack);
-			} else {
-				printf("You need %"PRIu64" more recovery blocks (%u volumes) to be able to repair.\n", recovery_block_lack, lack_count_cohort);
-			}
-		}
-		if (missing_dir_count + bad_dir_count + possible_count == 0){
-			// When repair is impossible at all, end here.
-			return RET_REPAIR_NOT_POSSIBLE;
-		}
-		// Even when complete repair is impossible, try to repair as possible as it can.
-	}
-	par3_ctx->recovery_block_count = recovery_block_available;
-	possible_count = missing_dir_count + bad_dir_count + missing_file_count + damaged_file_count + misnamed_file_count + bad_file_count;
-
-	// When some directories are missing.
-	if (missing_dir_count > 0){
-		// Reconstruct directory tree
-		missing_dir_count = reconstruct_directory_tree(par3_ctx);
-		// If all directories become ok, return zero.
-	}
-
-	// When some files are missing or damaged.
-	if (missing_file_count + damaged_file_count + misnamed_file_count > 0){
-
-		// When input blocks are enough, restore missing and damaged file.
-		if (block_available >= block_count){
-
-			// Create temporary files for lost input files
-			ret = create_temp_file(par3_ctx, temp_path);
-			if (ret != 0)
-				return ret;
-
-			// Restore content of input files
-			ret = restore_input_file(par3_ctx, temp_path);
-			if (ret != 0)
-				return ret;
-
-		// When recovery blocks are enough, recover lost input blocks.
-		} else if (recovery_block_lack == 0){
-
-			// Make list of index for lost input blocks and using recovery blocks.
-			ret =  make_block_list(par3_ctx, block_count - block_available, lost_count_cohort);
-			if (ret != 0)
-				return ret;
-
-			if (par3_ctx->ecc_method & 1){	// Cauchy Reed-Solomon Erasure Codes
-				// Construct matrix for Reed-Solomon Codes, and solve linear equation.
-				ret = rs_compute_matrix(par3_ctx, block_count - block_available);
-				if (ret != 0)
-					return ret;
-			}
-
-			// Create temporary files for lost input files
-			ret = create_temp_file(par3_ctx, temp_path);
-			if (ret != 0)
-				return ret;
-
-			// If there are enough memory to keep all lost blocks
-			if (par3_ctx->ecc_method & 0x8000){
-				// Recover lost input blocks at reading each input block.
-				ret = recover_lost_block(par3_ctx, temp_path, (int)(block_count - block_available));
-				if (ret != 0)
-					return ret;
-
-			} else {
-				// Recover lost input blocks by spliting every block.
-				if ( (par3_ctx->ecc_method & 8) && (par3_ctx->interleave > 0) ){
-					// Interleaving is adapted only for FFT based Reed-Solomon Codes.
-					ret = recover_lost_block_cohort(par3_ctx, temp_path);
-				} else {
-					ret = recover_lost_block_split(par3_ctx, temp_path, block_count - block_available);
-				}
-				if (ret != 0)
-					return ret;
-			}
-
-		// Even when blocks are not enough, this tries to repair as possible as it can.
-		} else {
-			// Try to restore content of input files
-			ret = try_restore_input_file(par3_ctx, temp_path);
-			if (ret != 0)
-				return ret;
-		}
-	}
-
-	// Verify repaired file and rename to original name
-	ret = verify_repaired_file(par3_ctx, temp_path, &missing_file_count, &damaged_file_count, &misnamed_file_count, &bad_file_count);
-	if (ret != 0)
-		return ret;
-
-	// When property of some directories are different.
-	if (bad_dir_count > 0){
-		// Reset options of directories
-		bad_dir_count = reset_directory_option(par3_ctx);
-		// If all directories become ok, return zero.
-	}
-
-	if (missing_dir_count + bad_dir_count + missing_file_count + damaged_file_count + misnamed_file_count + bad_file_count == 0){
-		// When it repaired all input set
-		printf("\nRepair complete.\n");
-		return 0;
-
-	} else if (missing_dir_count + bad_dir_count + missing_file_count + damaged_file_count + misnamed_file_count + bad_file_count < possible_count){
-		// Though it repaired some files, others are damaged or missing still.
-		printf("\nRepair partially.\n");
-		return RET_REPAIR_FAILED;
-
-	} else {
-		// There are damaged or missing files still.
-		printf("\nRepair failed.\n");
-		return RET_REPAIR_FAILED;
-	}
-}
-
diff --git a/windows/src/main.c b/windows/src/main.c
deleted file mode 100644
index 2947b2e..0000000
--- a/windows/src/main.c
+++ /dev/null
@@ -1,1202 +0,0 @@
-#ifdef _WIN32
-// avoid error of MSVC
-#define _CRT_SECURE_NO_WARNINGS
-#endif
-
-#include <errno.h>
-#include <inttypes.h>
-#include <locale.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#ifdef __linux__
-
-#include <unistd.h> 
-#define _chdir  chdir
-#define _getcwd getcwd
-
-#include <strings.h>
-#define _strnicmp strncasecmp
-#define _stricmp strcasecmp
-
-/* This definition of _MAX_FNAME works for GCC on POSIX systems */
-#include <limits.h>
-#define _MAX_FNAME NAME_MAX
-
-/* Not sure if this is the right definition, but it seems like a safe one.*/
-#define _MAX_DIR PATH_MAX
-
-#elif _WIN32
-// MSVC headers
-#include <direct.h>
-#endif
-
-#include "libpar3.h"
-#include "common.h"
-
-
-// This application name and version
-#define PACKAGE "par3cmdline"
-#define VERSION "0.0.1"
-
-static void print_version(int show_copyright)
-{
-	printf(PACKAGE " version " VERSION "\n");
-
-	if (show_copyright){
-		printf(
-"\nCopyright (C) 2025 Yutaka Sawada.\n\n"
-"par3cmdline comes with ABSOLUTELY NO WARRANTY.\n\n"
-"This is free software; you can redistribute it and/or modify it\n"
-"under the terms of the GNU Lesser General Public License as published\n"
-"by the Free Software Foundation; either version 2.1 of the License,\n"
-"or (at your option) any later version.\n"
-		);
-	}
-}
-
-static void print_help(void)
-{
-	printf(
-"Usage:\n"
-"  par3 -h  : show this help\n"
-"  par3 -V  : show version\n"
-"  par3 -VV : show version and copyright\n\n"
-"  par3 tc       [options] <PAR3 file> [files] : Try to create PAR3 files\n"
-"  par3 te       [options] <PAR3 file> [file]  : Try to extend PAR3 files\n"
-"  par3 c(reate) [options] <PAR3 file> [files] : Create PAR3 files\n"
-"  par3 e(xtend) [options] <PAR3 file> [file]  : Extend PAR3 files\n"
-"  par3 v(erify) [options] <PAR3 file> [files] : Verify files using PAR3 file\n"
-"  par3 r(epair) [options] <PAR3 file> [files] : Repair files using PAR3 files\n"
-"  par3 l(ist)   [options] <PAR3 file>         : List files in PAR3 file\n"
-"  par3 ti       [options] <ZIP file>          : Try to insert PAR in ZIP file\n"
-"  par3 i(nsert) [options] <ZIP file>          : Insert PAR in ZIP file\n"
-"  par3 d(elete) [options] <ZIP file>          : Delete PAR from ZIP file\n"
-"  par3 vs       [options] <ZIP file>  [files] : Verify itself\n"
-"  par3 rs       [options] <ZIP file>  [files] : Repair itself\n"
-"\n"
-"Options: (all uses)\n"
-"  -B<path> : Set the base-path to use as reference for the datafiles\n"
-"  -v [-v]  : Be more verbose\n"
-"  -q [-q]  : Be more quiet (-q -q gives silence)\n"
-"  -m<n>    : Memory to use\n"
-"  --       : Treat all following arguments as filenames\n"
-"  -abs     : Enable absolute path\n"
-"Options: (verify or repair)\n"
-"  -S<n>    : Searching time limit (milli second)\n"
-"Options: (create)\n"
-"  -b<n>    : Set the Block-Count\n"
-"  -s<n>    : Set the Block-Size (don't use both -b and -s)\n"
-"  -r<n>    : Level of redundancy (%%)\n"
-"  -rm<n>   : Maximum redundancy (%%)\n"
-"  -c<n>    : Recovery Block-Count (don't use both -r and -c)\n"
-"  -cf<n>   : First Recovery-Block-Number\n"
-"  -cm<n>   : Maximum Recovery Block-Count\n"
-"  -u       : Uniform recovery file sizes\n"
-"  -l       : Limit size of recovery files (don't use both -u and -l)\n"
-"  -n<n>    : Number of recovery files (don't use both -n and -l)\n"
-"  -R       : Recurse into subdirectories\n"
-"  -D       : Store Data packets\n"
-"  -d<n>    : Enable deduplication of input blocks\n"
-"  -e<n>    : Set using Error Correction Codes\n"
-"  -i<n>    : Number of interleaving"
-"  -fu<n>   : Use UNIX Permissions Packet\n"
-"  -ff      : Use FAT Permissions Packet\n"
-"  -lp<n>   : Limit repetition of packets in each file\n"
-"  -C<text> : Set comment\n"
-	);
-}
-
-int main(int argc, char *argv[])
-{
-	char **utf8_argv = NULL, *utf8_argv_buf = NULL;
-	char *tmp_p, file_name[_MAX_PATH];
-	int argi, ret;
-	size_t len;
-	PAR3_CTX *par3_ctx = NULL;
-
-	// command-line options
-	char command_operation = 0;
-	char command_trial = 0;
-	char command_option = 0;
-
-	// For non UTF-8 code page system
-	ret = 1;
-	tmp_p = setlocale(LC_ALL, "");
-	if ( (argc > 2) && (tmp_p != NULL) && (strstr(tmp_p, "utf8") == NULL) ){
-		wchar_t *w_argv_buf;
-
-		//printf("default locale = %s\n", tmp_p);
-		len = 0;
-		for (argi = 2; argi < argc; argi++){
-			//printf("argv[%d] = %s\n", argi, argv[argi]);
-			len += strlen(argv[argi]) + 1;
-		}
-		len++;
-		//printf("total length of argv = %zu\n", len);
-		utf8_argv_buf = malloc(len * 4 + sizeof(wchar_t) * len * 2);
-		utf8_argv = malloc(sizeof(char *) * argc);
-		if ( (utf8_argv != NULL) && (utf8_argv_buf != NULL) ){
-			w_argv_buf = (wchar_t *)(utf8_argv_buf + len * 4);
-			tmp_p = utf8_argv_buf;
-			for (argi = 2; argi < argc; argi++){
-				len = strlen(argv[argi]);
-				memcpy(tmp_p, argv[argi], len);
-				tmp_p += len;
-				tmp_p[0] = '\n';
-				tmp_p++;
-			}
-			tmp_p[0] = 0;
-			tmp_p++;
-			len = tmp_p - utf8_argv_buf;
-			//printf("total length of argv = %zu\n", len);
-			//printf("total argv =\n%s\n", utf8_argv_buf);
-			mbstowcs(w_argv_buf, utf8_argv_buf, len);
-
-			// change to UTF-8
-			if (setlocale(LC_ALL, ".UTF-8") == NULL){	// could not change locale
-				printf("Failed to set UTF-8.\nUnicode filename won't be supported.\n");
-				free(utf8_argv);
-				utf8_argv = NULL;
-				free(utf8_argv_buf);
-				utf8_argv_buf = NULL;
-			} else{	// convert each argv to UTF-8 text.
-				wcstombs(utf8_argv_buf, w_argv_buf, len * 4);
-
-				utf8_argv[0] = argv[0];
-				utf8_argv[1] = argv[1];
-				tmp_p = utf8_argv_buf;
-				for (argi = 2; argi < argc; argi++){
-					utf8_argv[argi] = tmp_p;
-					tmp_p = strchr(tmp_p, '\n');
-					tmp_p[0] = 0;
-					tmp_p++;
-					//printf("utf8_argv[%d] = %s\n", argi, utf8_argv[argi]);
-				}
-			}
-			ret = 0;
-		}
-	}
-
-	if (ret){	// change locale's code page to use UTF-8
-		tmp_p = setlocale(LC_ALL, ".UTF-8");
-		if (tmp_p == NULL){
-			printf("Failed to set UTF-8.\nUnicode filename won't be supported.\n");
-		}
-	}
-
-	// After here, use "ret = *" and "goto prepare_return;" to release memory before return.
-
-	if (argc < 3){
-		if (argc == 2){
-			if (strcmp(argv[1], "-h") == 0){
-				print_help();
-				ret = 0;
-				goto prepare_return;
-			} else if (strcmp(argv[1], "-V") == 0){
-				print_version(0);
-				ret = 0;
-				goto prepare_return;
-			} else if (strcmp(argv[1], "-VV") == 0){
-				print_version(1);
-				ret = 0;
-				goto prepare_return;
-			}
-		}
-		printf("Not enough command line arguments.\n");
-		printf("To show help, type: par3 -h\n");
-		ret = RET_INVALID_COMMAND;
-		goto prepare_return;
-	}
-
-	// check command
-	if ( (strcmp(argv[1], "c") == 0) || (strcmp(argv[1], "create") == 0) ){
-		command_operation = 'c';	// create
-	} else if ( (strcmp(argv[1], "v") == 0) || (strcmp(argv[1], "verify") == 0) ){
-		command_operation = 'v';	// verify
-	} else if ( (strcmp(argv[1], "r") == 0) || (strcmp(argv[1], "repair") == 0) ){
-		command_operation = 'r';	// repair
-	} else if ( (strcmp(argv[1], "l") == 0) || (strcmp(argv[1], "list") == 0) ){
-		command_operation = 'l';	// list
-	} else if ( (strcmp(argv[1], "e") == 0) || (strcmp(argv[1], "extend") == 0) ){
-		command_operation = 'e';	// extend
-
-	} else if (strcmp(argv[1], "tc") == 0){
-		command_operation = 'c';	// try to create
-		command_trial = 't';
-	} else if (strcmp(argv[1], "te") == 0){
-		command_operation = 'e';	// try to extend
-		command_trial = 't';
-
-	} else if ( (strcmp(argv[1], "i") == 0) || (strcmp(argv[1], "insert") == 0) ){
-		command_operation = 'i';	// insert PAR in ZIP
-	} else if (strcmp(argv[1], "ti") == 0){
-		command_operation = 'i';	// try to insert PAR ito ZIP
-		command_trial = 't';
-	} else if ( (strcmp(argv[1], "d") == 0) || (strcmp(argv[1], "delete") == 0) ){
-		command_operation = 'd';	// delete PAR from ZIP
-
-	} else if (strcmp(argv[1], "vs") == 0){
-		command_operation = 'v';	// verify itself
-		command_option = 's';
-	} else if (strcmp(argv[1], "rs") == 0){
-		command_operation = 'r';	// repair itself
-		command_option = 's';
-
-	} else {
-		print_help();
-		ret = RET_INVALID_COMMAND;
-		goto prepare_return;
-	}
-
-	// Init context.
-	par3_ctx = malloc(sizeof(PAR3_CTX));
-	if (par3_ctx == NULL){
-		perror("Failed to allocate memory\n");
-		ret = RET_MEMORY_ERROR;
-		goto prepare_return;
-	}
-	memset(par3_ctx, 0, sizeof(PAR3_CTX));
-
-	if ( (command_operation == 'c') || (command_operation == 'i') ){
-		// add text in Creator Packet
-		ret = add_creator_text(par3_ctx, PACKAGE " version " VERSION
-					"\n(https://github.com/Parchive/par3cmdline)\n");
-		if (ret != 0){
-			ret = RET_MEMORY_ERROR;
-			goto prepare_return;
-		}
-	}
-
-	// read options
-	for (argi = 2; argi < argc; argi++){
-		if (utf8_argv != NULL){
-			tmp_p = utf8_argv[argi];
-		} else {
-			tmp_p = argv[argi];
-		}
-		if (tmp_p[0] == '-'){
-			tmp_p++;	// skip the first "-" in front of an option
-			if (strcmp(tmp_p, "-") == 0){	// End of options
-				break;
-
-			} else if (strcmp(tmp_p, "v") == 0){
-				par3_ctx->noise_level++;
-			} else if (strcmp(tmp_p, "vv") == 0){
-				par3_ctx->noise_level += 2;
-			} else if (strcmp(tmp_p, "vvv") == 0){
-				par3_ctx->noise_level += 3;
-			} else if (strcmp(tmp_p, "q") == 0){
-				par3_ctx->noise_level--;
-			} else if (strcmp(tmp_p, "qq") == 0){
-				par3_ctx->noise_level -= 2;
-
-			} else if ( (tmp_p[0] == 'm') && (tmp_p[1] >= '0') && (tmp_p[1] <= '9') ){	// Set the memory limit
-				if (par3_ctx->memory_limit > 0){
-					printf("Cannot specify memory limit twice.\n");
-					ret = RET_INVALID_COMMAND;
-					goto prepare_return;
-				} else {
-					char *end_p;
-					// Get the character that stops the scan
-					par3_ctx->memory_limit = strtoull(tmp_p + 1, &end_p, 10);
-					//printf("end char = %s\n", end_p);
-					if ( (_stricmp(end_p, "g") == 0) || (_stricmp(end_p, "gb") == 0) ){
-						par3_ctx->memory_limit <<= 30;	// GB
-					} else if ( (_stricmp(end_p, "m") == 0) || (_stricmp(end_p, "mb") == 0) ){
-						par3_ctx->memory_limit <<= 20;	// MB
-					} else if ( (_stricmp(end_p, "k") == 0) || (_stricmp(end_p, "kb") == 0) ){
-						par3_ctx->memory_limit <<= 10;	// KB
-					}
-				}
-
-			} else if ( (tmp_p[0] == 'S') && (tmp_p[1] >= '0') && (tmp_p[1] <= '9') ){	// Set searching time limit
-				if ( (command_operation != 'v') && (command_operation != 'r') ){
-					printf("Cannot specify searching time limit unless reparing or verifying.\n");
-					ret = RET_INVALID_COMMAND;
-					goto prepare_return;
-				} else if (par3_ctx->search_limit > 0){
-					printf("Cannot specify searching time limit twice.\n");
-					ret = RET_INVALID_COMMAND;
-					goto prepare_return;
-				} else {
-					par3_ctx->search_limit = strtoul(tmp_p + 1, NULL, 10);
-				}
-
-			} else if ( (tmp_p[0] == 'B') && (tmp_p[1] != 0) ){	// Set the base-path manually
-				if (command_operation == 'l'){
-					printf("Cannot specify base-path for listing.\n");
-					ret = RET_INVALID_COMMAND;
-					goto prepare_return;
-				} else if ( (command_operation == 'i') || (command_operation == 'd') ){
-					printf("Cannot specify base-path for PAR inside.\n");
-					ret = RET_INVALID_COMMAND;
-					goto prepare_return;
-				} else if (par3_ctx->base_path[0] != 0){
-					printf("Cannot specify base-path twice.\n");
-					ret = RET_INVALID_COMMAND;
-					goto prepare_return;
-				} else {
-					path_copy(par3_ctx->base_path, tmp_p + 1, _MAX_DIR - 32);
-				}
-
-			} else if ( (tmp_p[0] == 'b') && (tmp_p[1] >= '0') && (tmp_p[1] <= '9') ){	// Set the block count
-				if (command_operation != 'c'){
-					printf("Cannot specify block count unless creating.\n");
-					ret = RET_INVALID_COMMAND;
-					goto prepare_return;
-				} else if (par3_ctx->block_count > 0){
-					printf("Cannot specify block count twice.\n");
-					ret = RET_INVALID_COMMAND;
-					goto prepare_return;
-				} else if (par3_ctx->block_size > 0){
-					printf("Cannot specify both block count and block size.\n");
-					ret = RET_INVALID_COMMAND;
-					goto prepare_return;
-				} else {
-					par3_ctx->block_count = strtoull(tmp_p + 1, NULL, 10);
-				}
-
-			} else if ( (tmp_p[0] == 's') && (tmp_p[1] >= '0') && (tmp_p[1] <= '9') ){	// Set the block size
-				if (command_operation != 'c'){
-					printf("Cannot specify block size unless creating.\n");
-					ret = RET_INVALID_COMMAND;
-					goto prepare_return;
-				} else if (par3_ctx->block_size > 0){
-					printf("Cannot specify block size twice.\n");
-					ret = RET_INVALID_COMMAND;
-					goto prepare_return;
-				} else if (par3_ctx->block_count > 0){
-					printf("Cannot specify both block count and block size.\n");
-					ret = RET_INVALID_COMMAND;
-					goto prepare_return;
-				} else {
-					par3_ctx->block_size = strtoull(tmp_p + 1, NULL, 10);
-				}
-
-			} else if ( (tmp_p[0] == 'r') && (tmp_p[1] >= '0') && (tmp_p[1] <= '9') ){	// Set the amount of redundancy required
-				if ( (command_operation != 'c') && (command_operation != 'e') && (command_operation != 'i') ){
-					printf("Cannot specify redundancy unless creating.\n");
-					ret = RET_INVALID_COMMAND;
-					goto prepare_return;
-				} else if (par3_ctx->redundancy_size > 0){
-					printf("Cannot specify redundancy twice.\n");
-					ret = RET_INVALID_COMMAND;
-					goto prepare_return;
-				} else if (par3_ctx->recovery_block_count > 0){
-					printf("Cannot specify both redundancy and recovery block count.\n");
-					ret = RET_INVALID_COMMAND;
-					goto prepare_return;
-				} else {
-					par3_ctx->redundancy_size = strtoul(tmp_p + 1, NULL, 10);
-					if (par3_ctx->redundancy_size > 250){
-						printf("Invalid redundancy option: %u\n", par3_ctx->redundancy_size);
-						par3_ctx->redundancy_size = 0;	// reset
-					}
-/*
-					// Store redundancy for "PAR inside"
-					if ( (command_operation == 'i') && (par3_ctx->redundancy_size > 0) ){
-						if (add_creator_text(par3_ctx, tmp_p - 1) != 0){
-							ret = RET_MEMORY_ERROR;
-							goto prepare_return;
-						}
-					}
-*/
-				}
-
-			} else if ( (tmp_p[0] == 'r') && (tmp_p[1] == 'm') && (tmp_p[2] >= '0') && (tmp_p[2] <= '9') ){	// Specify the Max redundancy
-				if ( (command_operation != 'c') && (command_operation != 'e') ){
-					printf("Cannot specify max redundancy unless creating.\n");
-					ret = RET_INVALID_COMMAND;
-					goto prepare_return;
-				} else if (par3_ctx->max_redundancy_size > 0){
-					printf("Cannot specify max redundancy twice.\n");
-					ret = RET_INVALID_COMMAND;
-					goto prepare_return;
-				} else if (par3_ctx->max_recovery_block > 0){
-					printf("Cannot specify both max redundancy and recovery block count.\n");
-					ret = RET_INVALID_COMMAND;
-					goto prepare_return;
-				} else {
-					par3_ctx->max_redundancy_size = strtoul(tmp_p + 2, NULL, 10);
-					if (par3_ctx->max_redundancy_size > 250){
-						printf("Invalid max redundancy option: %u\n", par3_ctx->max_redundancy_size);
-						par3_ctx->max_redundancy_size = 0;	// reset
-					}
-				}
-
-			} else if ( (tmp_p[0] == 'c') && (tmp_p[1] >= '0') && (tmp_p[1] <= '9') ){	// Set the number of recovery blocks to create
-				if ( (command_operation != 'c') && (command_operation != 'e') ){
-					printf("Cannot specify recovery block count unless creating.\n");
-					ret = RET_INVALID_COMMAND;
-					goto prepare_return;
-				} else if (par3_ctx->recovery_block_count > 0){
-					printf("Cannot specify recovery block count twice.\n");
-					ret = RET_INVALID_COMMAND;
-					goto prepare_return;
-				} else if (par3_ctx->redundancy_size > 0){
-					printf("Cannot specify both recovery block count and redundancy.\n");
-					ret = RET_INVALID_COMMAND;
-					goto prepare_return;
-				} else {
-					par3_ctx->recovery_block_count = strtoull(tmp_p + 1, NULL, 10);
-				}
-
-			/*
-			This feature may require another command like append or extra.
-			It needs a parent PAR3 file instead of input files.
-			It needs to verify before creating recovery blocks.
-			*/
-			} else if ( (tmp_p[0] == 'c') && (tmp_p[1] == 'f') && (tmp_p[2] >= '0') && (tmp_p[2] <= '9') ){	// Specify the First recovery block number
-				if ( (command_operation != 'c') && (command_operation != 'e') ){
-					printf("Cannot specify first block number unless creating.\n");
-					ret = RET_INVALID_COMMAND;
-					goto prepare_return;
-				} else if (par3_ctx->first_recovery_block > 0){
-					printf("Cannot specify first block twice.\n");
-					ret = RET_INVALID_COMMAND;
-					goto prepare_return;
-				} else {
-					par3_ctx->first_recovery_block = strtoull(tmp_p + 2, NULL, 10);
-/*
-					if (par3_ctx->first_recovery_block > 0){
-						if (add_creator_text(par3_ctx, tmp_p - 1) != 0){
-							ret = RET_MEMORY_ERROR;
-							goto prepare_return;
-						}
-					}
-*/
-				}
-
-			} else if ( (tmp_p[0] == 'c') && (tmp_p[1] == 'm') && (tmp_p[2] >= '0') && (tmp_p[2] <= '9') ){	// Specify the Max recovery block count
-				if ( (command_operation != 'c') && (command_operation != 'e') ){
-					printf("Cannot specify max recovery block count unless creating.\n");
-					ret = RET_INVALID_COMMAND;
-					goto prepare_return;
-				} else if (par3_ctx->max_recovery_block > 0){
-					printf("Cannot specify max recovery block count twice.\n");
-					ret = RET_INVALID_COMMAND;
-					goto prepare_return;
-				} else {
-					par3_ctx->max_recovery_block = strtoull(tmp_p + 2, NULL, 10);
-				}
-
-			} else if (strcmp(tmp_p, "u") == 0){	// Specify uniformly sized recovery files
-				if ( (command_operation != 'c') && (command_operation != 'e') ){
-					printf("Cannot specify uniform files unless creating.\n");
-					ret = RET_INVALID_COMMAND;
-					goto prepare_return;
-				} else if (par3_ctx->recovery_file_scheme != 0){
-					printf("Cannot specify two recovery file size schemes.\n");
-					ret = RET_INVALID_COMMAND;
-					goto prepare_return;
-				} else {
-					par3_ctx->recovery_file_scheme = -1;
-/*
-					if (add_creator_text(par3_ctx, tmp_p - 1) != 0){
-						ret = RET_MEMORY_ERROR;
-						goto prepare_return;
-					}
-*/
-				}
-
-			} else if ( (tmp_p[0] == 'l') && ( (tmp_p[1] == 0) || ( (tmp_p[1] >= '0') && (tmp_p[1] <= '9') ) ) ){	// Limit the size of the recovery files
-				if ( (command_operation != 'c') && (command_operation != 'e') ){
-					printf("Cannot specify limit files unless creating.\n");
-					ret = RET_INVALID_COMMAND;
-					goto prepare_return;
-				} else if (par3_ctx->recovery_file_scheme != 0){
-					printf("Cannot specify two recovery file size schemes.\n");
-					ret = RET_INVALID_COMMAND;
-					goto prepare_return;
-				} else if (par3_ctx->recovery_file_count > 0){
-					printf("Cannot specify limited size and number of files at the same time.\n");
-					ret = RET_INVALID_COMMAND;
-					goto prepare_return;
-				} else {
-					if (tmp_p[1] == 0){
-						par3_ctx->recovery_file_scheme = -2;
-					} else {
-						par3_ctx->recovery_file_scheme = strtoll(tmp_p + 1, NULL, 10);
-					}
-				}
-
-			} else if ( (tmp_p[0] == 'n') && (tmp_p[1] >= '0') && (tmp_p[1] <= '9') ){	// Specify the number of recovery files
-				if ( (command_operation != 'c') && (command_operation != 'e') ){
-					printf("Cannot specify recovery file count unless creating.\n");
-					ret = RET_INVALID_COMMAND;
-					goto prepare_return;
-				} else if (par3_ctx->recovery_file_count > 0){
-					printf("Cannot specify recovery file count twice.\n");
-					ret = RET_INVALID_COMMAND;
-					goto prepare_return;
-				} else if ( (par3_ctx->recovery_file_scheme == -2) || (par3_ctx->recovery_file_scheme > 0) ){
-					printf("Cannot specify limited size and number of files at the same time.\n");
-					ret = RET_INVALID_COMMAND;
-					goto prepare_return;
-				} else {
-					par3_ctx->recovery_file_count = strtoul(tmp_p + 1, NULL, 10);
-				}
-
-			} else if (strcmp(tmp_p, "R") == 0){	// Enable recursive search
-				if (command_operation != 'c'){
-					printf("Cannot specify Recursive unless creating.\n");
-					ret = RET_INVALID_COMMAND;
-					goto prepare_return;
-				} else {
-					command_option = 'R';
-				}
-
-			} else if (strcmp(tmp_p, "D") == 0){	// Store Data packets
-				if ( (command_operation != 'c') && (command_operation != 'e') ){
-					printf("Cannot specify Data packet unless creating.\n");
-					ret = RET_INVALID_COMMAND;
-					goto prepare_return;
-				} else {
-					par3_ctx->data_packet = 'D';
-				}
-
-			} else if ( (tmp_p[0] == 'd') && (tmp_p[1] >= '0') && (tmp_p[1] <= '2') ){	// Enable deduplication
-				if (command_operation != 'c'){
-					printf("Cannot specify deduplication unless creating.\n");
-					ret = RET_INVALID_COMMAND;
-					goto prepare_return;
-				} else if (par3_ctx->deduplication != 0){
-					printf("Cannot specify deduplication twice.\n");
-					ret = RET_INVALID_COMMAND;
-					goto prepare_return;
-				} else {
-					par3_ctx->deduplication = tmp_p[1];
-					if (par3_ctx->deduplication != '0'){
-						if (add_creator_text(par3_ctx, tmp_p - 1) != 0){	// Store this option for debug
-							ret = RET_MEMORY_ERROR;
-							goto prepare_return;
-						}
-					}
-				}
-
-			} else if ( (tmp_p[0] == 'e') && (tmp_p[1] >= '0') && (tmp_p[1] <= '9') ){	// Error Correction Codes
-				if ( (command_operation != 'c') && (command_operation != 'e') ){
-					printf("Cannot specify Error Correction Codes unless creating.\n");
-					ret = RET_INVALID_COMMAND;
-					goto prepare_return;
-				} else if (par3_ctx->ecc_method != 0){
-					printf("Cannot specify Error Correction Codes twice.\n");
-					ret = RET_INVALID_COMMAND;
-					goto prepare_return;
-				} else {
-					par3_ctx->ecc_method = strtoul(tmp_p + 1, NULL, 10);
-					if (popcount32(par3_ctx->ecc_method) > 1){
-						printf("Cannot specify multiple Error Correction Codes.\n");
-						par3_ctx->ecc_method = 0;
-					}
-				}
-
-			} else if ( (tmp_p[0] == 'i') && (tmp_p[1] >= '0') && (tmp_p[1] <= '9') ){	// Interleaving
-				if ( (command_operation != 'c') && (command_operation != 'e') ){
-					printf("Cannot specify interleaving unless creating.\n");
-					ret = RET_INVALID_COMMAND;
-					goto prepare_return;
-				} else if (par3_ctx->interleave != 0){
-					printf("Cannot specify interleaving twice.\n");
-					ret = RET_INVALID_COMMAND;
-					goto prepare_return;
-				} else {
-					par3_ctx->interleave = strtoul(tmp_p + 1, NULL, 10);
-/*
-					if (par3_ctx->interleave != 0){
-						if (add_creator_text(par3_ctx, tmp_p - 1) != 0){	// Store this option for debug
-							ret = RET_MEMORY_ERROR;
-							goto prepare_return;
-						}
-					}
-*/
-				}
-
-			} else if ( (tmp_p[0] == 'f') && (tmp_p[1] == 'u')
-					&& ( (tmp_p[2] == 0) || ( (tmp_p[2] >= '0') && (tmp_p[2] <= '9') ) ) ){	// UNIX Permissions Packet
-				if ((par3_ctx->file_system & 7) != 0){
-					printf("Cannot specify UNIX Permissions Packet twice.\n");
-					ret = RET_INVALID_COMMAND;
-					goto prepare_return;
-				} else {
-					if (tmp_p[2] == 0){
-						ret = 7;	// 1 = mtime, 2 = i_mode, 4 = directory
-					} else {
-						ret = strtoul(tmp_p + 2, NULL, 10) & 7;
-					}
-					par3_ctx->file_system |= ret;
-					if (command_operation == 'c'){	// Only creating time
-						if (add_creator_text(par3_ctx, tmp_p - 1) != 0){	// Store this option for debug
-							ret = RET_MEMORY_ERROR;
-							goto prepare_return;
-						}
-					}
-				}
-
-			} else if (strcmp(tmp_p, "ff") == 0){	// FAT Permissions Packet
-				if ((par3_ctx->file_system & 0x10000) != 0){
-					printf("Cannot specify FAT Permissions Packet twice.\n");
-					ret = RET_INVALID_COMMAND;
-					goto prepare_return;
-				} else {
-					par3_ctx->file_system |= 0x10000;
-					if (command_operation == 'c'){	// Only creating time
-						if (add_creator_text(par3_ctx, tmp_p - 1) != 0){	// Store this option for debug
-							ret = RET_MEMORY_ERROR;
-							goto prepare_return;
-						}
-					}
-				}
-
-			} else if ( (tmp_p[0] == 'l') && (tmp_p[1] == 'p')
-					&& (tmp_p[2] >= '0') && (tmp_p[2] <= '9') ){	// Max repetition
-				if ( (command_operation != 'c') && (command_operation != 'e') ){
-					printf("Cannot specify max repetition unless creating.\n");
-					ret = RET_INVALID_COMMAND;
-					goto prepare_return;
-				} else if (par3_ctx->repetition_limit != 0){
-					printf("Cannot specify max repetition twice.\n");
-					ret = RET_INVALID_COMMAND;
-					goto prepare_return;
-				} else {
-					par3_ctx->repetition_limit = strtoul(tmp_p + 2, NULL, 10);
-				}
-
-			} else if ( (tmp_p[0] == 'C') && (tmp_p[1] != 0) ){	// Set comment
-				if (command_operation != 'c'){
-					printf("Cannot specify comment unless creating.\n");
-					ret = RET_INVALID_COMMAND;
-					goto prepare_return;
-				}
-				if (add_comment_text(par3_ctx, tmp_p + 1) != 0){
-					ret = RET_MEMORY_ERROR;
-					goto prepare_return;
-				}
-
-			} else if ( (strcmp(tmp_p, "abs") == 0) || (strcmp(tmp_p, "ABS") == 0) ){	// Enable absolute path
-				if (par3_ctx->absolute_path != 0){
-					printf("Cannot enable absolute path twice.\n");
-					ret = RET_INVALID_COMMAND;
-					goto prepare_return;
-				} else {
-					if (tmp_p[0] == 'A'){
-						par3_ctx->absolute_path = 'A';
-					} else {
-						par3_ctx->absolute_path = 'a';
-					}
-				}
-
-			} else {
-				printf("Invalid option specified: %s\n", tmp_p - 1);
-				ret = RET_INVALID_COMMAND;
-				goto prepare_return;
-			}
-
-		} else {
-			break;
-		}
-	}
-
-	if (par3_ctx->creator_packet_size > 0){
-		// Erase return code at the end of Creator text
-		par3_ctx->creator_packet_size = trim_text(par3_ctx->creator_packet, par3_ctx->creator_packet_size);
-	}
-	if (par3_ctx->comment_packet_size > 0){
-		// Erase return code at the end of Comment text
-		par3_ctx->comment_packet_size = trim_text(par3_ctx->comment_packet, par3_ctx->comment_packet_size);
-	}
-
-	// read PAR filename
-	if (argi < argc){
-		if (utf8_argv != NULL){
-			tmp_p = utf8_argv[argi];
-		} else {
-			tmp_p = argv[argi];
-		}
-		argi++;
-
-		// PAR filename must not include wildcard (* or ?).
-		len = strcspn(tmp_p, "*?");
-		if (len < strlen(tmp_p)){
-			printf("Found wildcard in PAR filename, %s\n", tmp_p);
-			par3_ctx->par_filename[0] = 0;
-		} else {
-			// may add ".vol32768+32768.par3"
-			if (path_copy(par3_ctx->par_filename, tmp_p, _MAX_PATH - 20) == 0){
-				par3_ctx->par_filename[0] = 0;
-			} else {
-				ret = get_absolute_path(file_name, par3_ctx->par_filename, _MAX_PATH - 8);
-				if (ret != 0){
-					printf("Failed to convert PAR filename to absolute path\n");
-					ret = RET_FILE_IO_ERROR;
-					goto prepare_return;
-				}
-				// PAR filename may be an absolute path.
-				if (_stricmp(file_name, par3_ctx->par_filename) == 0){
-					// If base-path is empty, set parent of PAR file.
-					if (par3_ctx->base_path[0] == 0){
-						tmp_p = strrchr(file_name, '/');
-						if (tmp_p != NULL)
-							memcpy(par3_ctx->base_path, file_name, tmp_p - file_name);
-					}
-				// PAR filename may be a relative path from current working directory.
-				} else if (par3_ctx->base_path[0] != 0){
-					// If base-path isn't empty, it was relative from current working directory.
-					strcpy(par3_ctx->par_filename, file_name);
-				}
-			}
-		}
-	}
-	if (par3_ctx->par_filename[0] == 0){
-		printf("PAR filename is not specified\n");
-		ret = RET_INVALID_COMMAND;
-		goto prepare_return;
-	} else if ( (command_operation == 'i') || (command_operation == 'd') || (command_option == 's') ){
-		// It removes sub-directory from PAR filename when using "PAR inside" feature.
-		tmp_p = offset_file_name(par3_ctx->par_filename);
-		if (tmp_p > par3_ctx->par_filename){
-			strcpy(file_name, tmp_p);
-			tmp_p[-1] = 0;
-			strcpy(par3_ctx->base_path, par3_ctx->par_filename);
-			strcpy(par3_ctx->par_filename, file_name);
-		} else {
-			par3_ctx->base_path[0] = 0;	// clear base-path
-		}
-		if (command_option == 's'){	// Check file extension for "PAR inside ZIP"
-			tmp_p = par3_ctx->par_filename;
-			len = strlen(tmp_p);
-			if ( (_stricmp(tmp_p + len - 4, ".zip") != 0) && (_stricmp(tmp_p + len - 3, ".7z") != 0) ){
-				printf("File extension is different from ZIP.\n");
-				ret = RET_FILE_IO_ERROR;
-				goto prepare_return;
-			}
-		}
-	} else {
-		tmp_p = par3_ctx->par_filename;
-		len = strlen(tmp_p);
-		// add standard extension
-		if (_stricmp(tmp_p + len - 5, ".par3") != 0){
-			strcat(tmp_p, ".par3");
-		}
-	}
-
-	if (par3_ctx->base_path[0] != 0){
-		if (par3_ctx->absolute_path != 0){	// Convert base-path to absolute path.
-			ret = get_absolute_path(file_name, par3_ctx->base_path, _MAX_PATH - 4);
-			if (ret != 0){
-				printf("Failed to convert base-path to absolute path\n");
-				ret = RET_FILE_IO_ERROR;
-				goto prepare_return;
-			}
-			strcpy(par3_ctx->base_path, file_name);
-		}
-
-		// change current directory to the specified base-path
-		if (_chdir(par3_ctx->base_path) != 0){
-			perror("Failed to change working directory");
-			ret = RET_FILE_IO_ERROR;
-			goto prepare_return;
-		}
-	} else if ( (command_operation == 'c') && (par3_ctx->absolute_path != 0) ){
-		// If base-path is empty at creation, current working directory becomes the absolute path.
-		if (_getcwd(par3_ctx->base_path, _MAX_PATH - 4) == NULL){
-			perror("Failed to get current working directory\n");
-			ret = RET_FILE_IO_ERROR;
-			goto prepare_return;
-		}
-	}
-
-	if (par3_ctx->noise_level >= 1){
-		if (par3_ctx->memory_limit != 0){
-			if ((par3_ctx->memory_limit & ((1 << 30) - 1)) == 0){
-				printf("memory_limit = %"PRIu64" GB\n", par3_ctx->memory_limit >> 30);
-			} else if ((par3_ctx->memory_limit & ((1 << 20) - 1)) == 0){
-				printf("memory_limit = %"PRIu64" MB\n", par3_ctx->memory_limit >> 20);
-			} else if ((par3_ctx->memory_limit & ((1 << 10) - 1)) == 0){
-				printf("memory_limit = %"PRIu64" KB\n", par3_ctx->memory_limit >> 10);
-			} else {
-				printf("memory_limit = %"PRIu64" Bytes\n", par3_ctx->memory_limit);
-			}
-		}
-		if (par3_ctx->search_limit != 0)
-			printf("search_limit = %d ms\n", par3_ctx->search_limit);
-		if (par3_ctx->block_count != 0)
-			printf("Specified block count = %"PRIu64"\n", par3_ctx->block_count);
-		if (par3_ctx->block_size != 0)
-			printf("Specified block size = %"PRIu64"\n", par3_ctx->block_size);
-		if (par3_ctx->redundancy_size != 0)
-			printf("Specified redundancy = %u %%\n", par3_ctx->redundancy_size);
-		if (par3_ctx->max_redundancy_size != 0)
-			printf("max_redundancy_size = %u\n", par3_ctx->max_redundancy_size);
-		if (par3_ctx->recovery_block_count != 0)
-			printf("recovery_block_count = %"PRIu64"\n", par3_ctx->recovery_block_count);
-		if (par3_ctx->first_recovery_block != 0)
-			printf("First recovery block number = %"PRIu64"\n", par3_ctx->first_recovery_block);
-		if (par3_ctx->max_recovery_block != 0)
-			printf("max_recovery_block = %"PRIu64"\n", par3_ctx->max_recovery_block);
-		if (par3_ctx->recovery_file_count != 0)
-			printf("Specified number of recovery files = %u\n", par3_ctx->recovery_file_count);
-		if (par3_ctx->recovery_file_scheme != 0){
-			if (par3_ctx->recovery_file_scheme == -1){
-				printf("Recovery file sizing = uniform\n");
-			} else if (par3_ctx->recovery_file_scheme == -2){
-				printf("Recovery file sizing = limit\n");
-			} else if (par3_ctx->recovery_file_scheme > 0){
-				printf("Recovery file sizing = limit to %"PRId64"\n", par3_ctx->recovery_file_scheme);
-			}
-		}
-		if (par3_ctx->ecc_method != 0)
-			printf("Error Correction Codes = %u\n", par3_ctx->ecc_method);
-		if (par3_ctx->interleave != 0){
-			if (par3_ctx->ecc_method == 8){	// FFT based Reed-Solomon Codes
-				printf("Specified interleaving times = %u\n", par3_ctx->interleave);
-			} else {	// Disabled at other Error Correction Codes.
-				par3_ctx->interleave = 0;
-			}
-		}
-		if (par3_ctx->file_system != 0)
-			printf("File System Packet = 0x%X\n", par3_ctx->file_system);
-		if (par3_ctx->deduplication != 0)
-			printf("deduplication = level %c\n", par3_ctx->deduplication);
-		if (command_option == 'R')
-			printf("recursive search = enable\n");
-		if (par3_ctx->absolute_path != 0)
-			printf("Absolute path = enable\n");
-		if (par3_ctx->data_packet != 0)
-			printf("Data packet = store\n");
-		if (par3_ctx->repetition_limit != 0)
-			printf("Max packet repetition = %u\n", par3_ctx->repetition_limit);
-		if (par3_ctx->base_path[0] != 0)
-			printf("Base path = \"%s\"\n", par3_ctx->base_path);
-		printf("PAR file = \"%s\"\n", par3_ctx->par_filename);
-		printf("\n");
-	}
-
-	if (command_operation == 'c'){	// Create
-
-		// When there is no argument for input file, return to the PAR file name.
-		if (argi == argc)
-			argi--;
-
-		// search input files
-		for (; argi < argc; argi++){
-			if (utf8_argv != NULL){
-				tmp_p = utf8_argv[argi];
-			} else {
-				tmp_p = argv[argi];
-			}
-
-			// read path of an input file
-			path_copy(file_name, tmp_p, _MAX_FNAME - 32);
-			if (file_name[0] == 0)
-				continue;
-			//if (par3_ctx->noise_level >= 2){
-			//	printf("argv[%d] = \"%s\"\n", argi, file_name);
-			//}
-
-			// search files by wild card matching
-			ret = path_search(par3_ctx, file_name, command_option);
-			if (ret != 0){
-				printf("Failed to search: %s\n", file_name);
-				goto prepare_return;
-			}
-		}
-
-		// release UTF-8 argv
-		if (utf8_argv != NULL){
-			free(utf8_argv);
-			utf8_argv = NULL;
-		}
-		if (utf8_argv_buf != NULL){
-			free(utf8_argv_buf);
-			utf8_argv_buf = NULL;
-		}
-
-		// Count number of found input files and directories.
-		par3_ctx->input_file_count = namez_count(par3_ctx->input_file_name, par3_ctx->input_file_name_len);
-		par3_ctx->input_dir_count = namez_count(par3_ctx->input_dir_name, par3_ctx->input_dir_name_len);
-		if (par3_ctx->input_file_count + par3_ctx->input_dir_count == 0){
-			printf("You must specify a list of files when creating.\n");
-			ret = RET_INVALID_COMMAND;
-			goto prepare_return;
-		}
-		if (par3_ctx->noise_level >= 0){
-			printf("Number of input file = %u, directory = %u\n", par3_ctx->input_file_count, par3_ctx->input_dir_count);
-		}
-
-		// get information of input files
-		ret = get_file_status(par3_ctx);
-		if (ret != 0){
-			printf("Failed to check file status\n");
-			goto prepare_return;
-		}
-		if (par3_ctx->block_count > 0){
-			// It's difficult to predict arrangement of blocks.
-			// Calculate "Block size" from "Total data size" dividing "Block count" simply.
-			// The result may be different from the specified block count.
-			par3_ctx->block_size = (par3_ctx->total_file_size + par3_ctx->block_count - 1) / par3_ctx->block_count;
-			// Block size must be multiple of 2 for 16-bit Reed-Solomon Codes.
-			if (par3_ctx->block_size & 1)
-				par3_ctx->block_size += 1;
-			if (par3_ctx->noise_level >= 0){
-				printf("Suggested block size = %"PRIu64"\n", par3_ctx->block_size);
-			}
-		} else if (par3_ctx->block_size == 0){
-			par3_ctx->block_size = suggest_block_size(par3_ctx);
-			if (par3_ctx->noise_level >= 0){
-				printf("Suggested block size = %"PRIu64"\n", par3_ctx->block_size);
-			}
-		} else if (par3_ctx->block_size & 1){
-			// Always increasing to multiple of 2 is easier ?
-			//if ( (par3_ctx->recovery_block_count > 128) || (par3_ctx->max_recovery_block > 128)
-			//		|| (calculate_block_count(par3_ctx, par3_ctx->block_size) > 128) ){
-				// Block size must be multiple of 2 for 16-bit Reed-Solomon Codes.
-				par3_ctx->block_size += 1;
-				if (par3_ctx->noise_level >= 0){
-					printf("Suggested block size = %"PRIu64"\n", par3_ctx->block_size);
-				}
-			//}
-		}
-		par3_ctx->block_count = calculate_block_count(par3_ctx, par3_ctx->block_size);
-		if (par3_ctx->noise_level >= 0){
-			printf("Possible block count = %"PRIu64"\n", par3_ctx->block_count);
-			printf("\n");
-		}
-
-		// sort input files for efficient tail packing.
-		ret = sort_input_set(par3_ctx);
-		if (ret != 0){
-			printf("Failed to sort input sets\n");
-			goto prepare_return;
-		}
-
-		if (command_trial == 0){
-			// create recovery files
-			ret = par3_create(par3_ctx, file_name);
-		} else {
-			// try to create recovery files
-			ret = par3_trial(par3_ctx, file_name);
-		}
-		if (ret != 0){
-			printf("Failed to create PAR file\n");
-			goto prepare_return;
-		}
-		if (par3_ctx->noise_level >= -1)
-			printf("Done\n");
-
-	} else if ( (command_operation == 'v') || (command_operation == 'r') || (command_operation == 'l') ){	// Verify, Repair or List
-
-		if (command_operation != 'l'){	// Verify or Repair
-			// search extra files
-			for (; argi < argc; argi++){
-				if (utf8_argv != NULL){
-					tmp_p = utf8_argv[argi];
-				} else {
-					tmp_p = argv[argi];
-				}
-
-				// read relative path of an input file
-				path_copy(file_name, tmp_p, _MAX_FNAME - 32);
-				if (file_name[0] == 0)
-					continue;
-				//if (par3_ctx->noise_level >= 2){
-				//	printf("argv[%d] = \"%s\"\n", argi, file_name);
-				//}
-
-				// search files by wild card matching
-				ret = extra_search(par3_ctx, file_name);
-				if (ret != 0){
-					printf("Failed to search: %s\n", file_name);
-					goto prepare_return;
-				}
-			}
-		}
-
-		// release UTF-8 argv
-		if (utf8_argv != NULL){
-			free(utf8_argv);
-			utf8_argv = NULL;
-		}
-		if (utf8_argv_buf != NULL){
-			free(utf8_argv_buf);
-			utf8_argv_buf = NULL;
-		}
-
-		// search par files
-		if ( (command_operation == 'l') || (command_option == 's') ){	// List or Self
-			ret = par_search(par3_ctx, par3_ctx->par_filename, 0);	// Check the specified PAR3 file only.
-		} else {	// Verify or Repair
-			ret = par_search(par3_ctx, par3_ctx->par_filename, 1);	// Check other PAR3 files, too.
-		}
-		if (ret != 0){
-			printf("Failed to search PAR files\n");
-			goto prepare_return;
-		}
-
-		if (command_operation == 'l'){
-			ret = par3_list(par3_ctx);
-			if (ret != 0){
-				printf("Failed to list files in PAR file\n");
-				goto prepare_return;
-			}
-			if (par3_ctx->noise_level >= -1)
-				printf("Listed\n");
-
-		} else if (command_operation == 'v'){
-			ret = par3_verify(par3_ctx);
-			if ( (ret != 0) && (ret != RET_REPAIR_POSSIBLE) && (ret != RET_REPAIR_NOT_POSSIBLE) ){
-				printf("Failed to verify with PAR file\n");
-				goto prepare_return;
-			}
-
-		} else {
-			ret = par3_repair(par3_ctx, file_name);
-			if ( (ret != 0) && (ret != RET_REPAIR_FAILED) && (ret != RET_REPAIR_NOT_POSSIBLE) ){
-				printf("Failed to repair with PAR file\n");
-				goto prepare_return;
-			}
-		}
-
-	} else if (command_operation == 'e'){	// Extend
-
-		// Base name of reference files is same as creating PAR3 files.
-		if (argi == argc){
-			if (par3_ctx->noise_level >= 1){
-				printf("Reference file = \"%s\"\n", par3_ctx->par_filename);
-			}
-			ret = par_search(par3_ctx, par3_ctx->par_filename, 1);
-			if (ret != 0){
-				printf("Failed to search PAR files\n");
-				goto prepare_return;
-			}
-
-		// search reference files
-		} else {
-			if (utf8_argv != NULL){
-				tmp_p = utf8_argv[argi];
-			} else {
-				tmp_p = argv[argi];
-			}
-
-			// read relative path of a reference file
-			path_copy(file_name, tmp_p, _MAX_FNAME - 32);
-			if (file_name[0] == 0){
-				printf("PAR filename is not specified\n");
-				ret = RET_INVALID_COMMAND;
-				goto prepare_return;
-			}
-			// PAR filename must not include wildcard (* or ?).
-			len = strcspn(file_name, "*?");
-			if (len < strlen(file_name)){
-				printf("Found wildcard in PAR filename, %s\n", file_name);
-				ret = RET_INVALID_COMMAND;
-				goto prepare_return;
-			} else {
-				// PAR filename may be a relative path from current working directory.
-				if (par3_ctx->base_path[0] != 0){
-					char absolute_path[_MAX_PATH];
-					// if base-path isn't empty, relative from current working directory.
-					ret = get_absolute_path(absolute_path, file_name, _MAX_PATH - 8);
-					if (ret != 0){
-						printf("Failed to convert PAR filename to absolute path\n");
-						ret = RET_FILE_IO_ERROR;
-						goto prepare_return;
-					}
-					strcpy(file_name, absolute_path);
-				}
-			}
-			if (par3_ctx->noise_level >= 1){
-				printf("Reference file = \"%s\"\n", file_name);
-			}
-
-			// search par files
-			ret = par_search(par3_ctx, file_name, 1);
-			if (ret != 0){
-				printf("Failed to search: %s\n", file_name);
-				goto prepare_return;
-			}
-		}
-
-		// release UTF-8 argv
-		if (utf8_argv != NULL){
-			free(utf8_argv);
-			utf8_argv = NULL;
-		}
-		if (utf8_argv_buf != NULL){
-			free(utf8_argv_buf);
-			utf8_argv_buf = NULL;
-		}
-
-		ret = par3_extend(par3_ctx, command_trial, file_name);
-		if (ret != 0){
-			printf("Failed to extend PAR file\n");
-			goto prepare_return;
-		}
-		if (par3_ctx->noise_level >= -1)
-			printf("Done\n");
-
-	} else if ( (command_operation == 'i') || (command_operation == 'd') ){	// PAR inside
-
-		// Outside file = input file = PAR file
-		par3_ctx->input_file_name_len = strlen(par3_ctx->par_filename) + 1;
-		par3_ctx->input_file_name_max = par3_ctx->input_file_name_len;
-		par3_ctx->input_file_name = malloc(par3_ctx->input_file_name_max);
-		if (par3_ctx->input_file_name == NULL){
-			ret = RET_MEMORY_ERROR;
-			goto prepare_return;
-		}
-		strcpy(par3_ctx->input_file_name, par3_ctx->par_filename);
-		//printf("input file = \"%s\"\n", par3_ctx->input_file_name);
-		par3_ctx->input_file_count = 1;
-
-		// release UTF-8 argv
-		if (utf8_argv != NULL){
-			free(utf8_argv);
-			utf8_argv = NULL;
-		}
-		if (utf8_argv_buf != NULL){
-			free(utf8_argv_buf);
-			utf8_argv_buf = NULL;
-		}
-
-		// get information of input files
-		ret = get_file_status(par3_ctx);
-		if (ret != 0){
-			printf("Failed to check file status\n");
-			goto prepare_return;
-		}
-
-		if (command_operation == 'i'){
-			// insert PAR3 packets in ZIP file
-			ret = par3_insert_zip(par3_ctx, command_trial);
-		} else if (command_operation == 'd'){
-			// delete PAR3 packets from ZIP file
-			ret = par3_delete_zip(par3_ctx);
-		} else {
-			ret = RET_INVALID_COMMAND;
-		}
-		if (ret != 0){
-			printf("Failed to operate PAR inside ZIP\n");
-			goto prepare_return;
-		}
-		if (par3_ctx->noise_level >= -1)
-			printf("Done\n");
-	}
-
-	ret = 0;
-prepare_return:
-
-	// release memory
-	if (utf8_argv != NULL)
-		free(utf8_argv);
-	if (utf8_argv_buf != NULL)
-		free(utf8_argv_buf);
-	if (par3_ctx != NULL){
-		par3_release(par3_ctx);
-		free(par3_ctx);
-	}
-
-	return ret;
-}
diff --git a/windows/src/map.c b/windows/src/map.c
deleted file mode 100644
index e5b369f..0000000
--- a/windows/src/map.c
+++ /dev/null
@@ -1,570 +0,0 @@
-#ifdef _WIN32
-// avoid error of MSVC
-#define _CRT_SECURE_NO_WARNINGS
-#endif
-
-#include <errno.h>
-#include <inttypes.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <time.h>
-
-#include "blake3/blake3.h"
-#include "libpar3.h"
-#include "hash.h"
-
-
-// map input file slices into input blocks without slide search
-int map_input_block(PAR3_CTX *par3_ctx)
-{
-	uint8_t *work_buf, buf_tail[40], buf_hash[16];
-	int progress_old, progress_now;
-	uint32_t num, num_pack, input_file_count;
-	uint32_t chunk_count, chunk_index, chunk_num;
-	int64_t find_index, previous_index, tail_offset;
-	uint64_t block_size, tail_size, file_offset;
-	uint64_t block_count, block_index;
-	uint64_t slice_index, index, last_index;
-	uint64_t crc, num_dedup;
-	uint64_t progress_total, progress_step;
-	PAR3_FILE_CTX *file_p;
-	PAR3_CHUNK_CTX *chunk_p, *chunk_list;
-	PAR3_SLICE_CTX *slice_p, *slice_list;
-	PAR3_BLOCK_CTX *block_p, *block_list;
-	PAR3_CMP_CTX *crc_list;
-	FILE *fp;
-	blake3_hasher hasher;
-	time_t time_old, time_now;
-	clock_t clock_now;
-
-	// Copy variables from context to local.
-	input_file_count = par3_ctx->input_file_count;
-	block_size = par3_ctx->block_size;
-	block_count = par3_ctx->block_count;
-	if ( (input_file_count == 0) || (block_size == 0) || (block_count == 0) )
-		return RET_LOGIC_ERROR;
-
-	// For deduplication, allocate chunks description as 4 * number of input files.
-	// Note, empty file won't use Chunk Description.
-	chunk_count = input_file_count * 4;
-	if (par3_ctx->noise_level >= 2){
-		printf("Initial chunk count = %u (input file count = %u)\n", chunk_count, input_file_count);
-	}
-	chunk_p = malloc(sizeof(PAR3_CHUNK_CTX) * chunk_count);
-	if (chunk_p == NULL){
-		perror("Failed to allocate memory for chunk description");
-		return RET_MEMORY_ERROR;
-	}
-	chunk_list = chunk_p;
-	par3_ctx->chunk_list = chunk_p;
-
-	// When no slide search, number of input file slice is same as number of input blocks.
-	// Deduplication against full size blocks only.
-	slice_p = malloc(sizeof(PAR3_SLICE_CTX) * block_count);
-	if (slice_p == NULL){
-		perror("Failed to allocate memory for input file slices");
-		return RET_MEMORY_ERROR;
-	}
-	slice_list = slice_p;
-	par3_ctx->slice_list = slice_p;
-
-	// Allocate max number of input blocks at first.
-	block_p = malloc(sizeof(PAR3_BLOCK_CTX) * block_count);
-	if (block_p == NULL){
-		perror("Failed to allocate memory for input blocks");
-		return RET_MEMORY_ERROR;
-	}
-	block_list = block_p;
-	par3_ctx->block_list = block_p;
-
-	// Allocate list of CRC-64 for maximum items
-	crc_list = malloc(sizeof(PAR3_CMP_CTX) * block_count);
-	if (crc_list == NULL){
-		perror("Failed to allocate memory for comparison of CRC-64");
-		return RET_MEMORY_ERROR;
-	}
-	par3_ctx->crc_list = crc_list;
-	par3_ctx->crc_count = 0;	// There is no item yet.
-
-	// Allocate memory to store file data temporary.
-	work_buf = malloc(block_size);
-	if (work_buf == NULL){
-		perror("Failed to allocate memory for input file data");
-		return RET_MEMORY_ERROR;
-	}
-
-	if (par3_ctx->noise_level >= 0){
-		printf("\nComputing hash:\n");
-		progress_total = par3_ctx->total_file_size;
-		progress_step = 0;
-		progress_old = 0;
-		time_old = time(NULL);
-		clock_now = clock();
-	}
-
-	// Read data of input files on memory
-	num_dedup = 0;
-	num_pack = 0;
-	chunk_index = 0;
-	block_index = 0;
-	slice_index = 0;
-	file_p = par3_ctx->input_file_list;
-	for (num = 0; num < input_file_count; num++){
-		blake3_hasher_init(&hasher);
-		if (file_p->size == 0){	// Skip empty files.
-			blake3_hasher_finalize(&hasher, file_p->hash, 16);
-			file_p++;
-			continue;
-		}
-		if (par3_ctx->noise_level >= 2){
-			printf("file size = %"PRIu64" \"%s\"\n", file_p->size, file_p->name);
-		}
-
-		fp = fopen(file_p->name, "rb");
-		if (fp == NULL){
-			perror("Failed to open input file");
-			return RET_FILE_IO_ERROR;
-		}
-
-		// First chunk in this file
-		previous_index = -4;
-		file_p->chunk = chunk_index;	// There is at least one chunk in each file.
-		chunk_p->size = 0;
-		chunk_p->block = 0;
-		chunk_num = 0;
-
-		// Read full size blocks
-		file_offset = 0;
-		while (file_offset + block_size <= file_p->size){
-			// read full block from input file
-			if (fread(work_buf, 1, (size_t)block_size, fp) != (size_t)block_size){
-				perror("Failed to read full size chunk on input file");
-				fclose(fp);
-				return RET_FILE_IO_ERROR;
-			}
-
-			// Print progress percent
-			if ( (par3_ctx->noise_level >= 0) && (par3_ctx->noise_level <= 2) ){
-				progress_step += block_size;
-				time_now = time(NULL);
-				if (time_now != time_old){
-					time_old = time_now;
-					progress_now = (int)((progress_step * 1000) / progress_total);
-					if (progress_now != progress_old){
-						progress_old = progress_now;
-						printf("%d.%d%%\r", progress_now / 10, progress_now % 10);	// 0.0% ~ 100.0%
-					}
-				}
-			}
-
-			// calculate CRC-64 of the first 16 KB
-			if (file_offset + block_size < 16384){
-				file_p->crc = crc64(work_buf, (size_t)block_size, file_p->crc);
-			} else if (file_offset < 16384){
-				file_p->crc = crc64(work_buf, (size_t)(16384 - file_offset), file_p->crc);
-			}
-			blake3_hasher_update(&hasher, work_buf, (size_t)block_size);
-
-			// Compare current CRC-64 with previous blocks.
-			crc = crc64(work_buf, (size_t)block_size, 0);
-			find_index = crc_list_compare(par3_ctx, crc, work_buf, buf_hash);
-			//printf("find_index = %"PRId64", previous_index = %"PRId64"\n", find_index, previous_index);
-			if (find_index < 0){	// No match
-				// Add full size block into list
-				crc_list_add(par3_ctx, crc, block_index);
-
-				// set block info
-				block_p->slice = slice_index;
-				block_p->size = block_size;
-				block_p->crc = crc;
-				if (find_index == -3){
-					memcpy(block_p->hash, buf_hash, 16);
-				} else {
-					blake3(work_buf, (size_t)block_size, block_p->hash);
-				}
-				block_p->state = 1 | 64;
-
-				// set chunk info
-				if ( (chunk_p->size > 0) && (previous_index >= 0) ){	// When there are old blocks already in the chunk.
-					// Close previous chunk.
-					chunk_num++;
-					chunk_index++;
-					if (chunk_index >= chunk_count){
-						chunk_count *= 2;
-						chunk_p = realloc(par3_ctx->chunk_list, sizeof(PAR3_CHUNK_CTX) * chunk_count);
-						if (chunk_p == NULL){
-							perror("Failed to re-allocate memory for chunk description");
-							fclose(fp);
-							return RET_MEMORY_ERROR;
-						}
-						chunk_list = chunk_p;
-						par3_ctx->chunk_list = chunk_p;
-						chunk_p += chunk_index;
-					} else {
-						chunk_p++;
-					}
-					chunk_p->size = 0;
-				}
-				if (chunk_p->size == 0){	// When this is the first block in the chunk.
-					// Save index of starting block.
-					chunk_p->block = block_index;
-				}
-				chunk_p->size += block_size;
-				previous_index = -4;
-
-				// set slice info
-				slice_p->chunk = chunk_index;
-				slice_p->block = block_index;
-				if (par3_ctx->noise_level >= 3){
-					printf("new block[%2"PRIu64"] : slice[%2"PRIu64"] chunk[%2u] file %d, offset %"PRIu64"\n",
-							block_index, slice_index, chunk_index, num, file_offset);
-				}
-
-				block_p++;
-				block_index++;
-
-			} else {	// Match with a previous block
-				// update the last slice info of previous block
-				index = block_list[find_index].slice;
-				while (slice_list[index].next != -1){
-					index = slice_list[index].next;
-				}
-				//printf("first index = %"PRIu64", same = %"PRIu64", slice_index = %"PRIu64"\n", block_list[find_index].slice, index, slice_index);
-				slice_list[index].next = slice_index;
-
-				if ( (chunk_p->size > 0) &&	// When there are blocks already in the chunk.
-						(find_index != previous_index + 1) ){	// If found block isn't the next of previous block.
-
-					// Close previous chunk.
-					chunk_num++;
-					chunk_index++;
-					if (chunk_index >= chunk_count){
-						chunk_count *= 2;
-						chunk_p = realloc(par3_ctx->chunk_list, sizeof(PAR3_CHUNK_CTX) * chunk_count);
-						if (chunk_p == NULL){
-							perror("Failed to re-allocate memory for chunk description");
-							fclose(fp);
-							return RET_MEMORY_ERROR;
-						}
-						chunk_list = chunk_p;
-						par3_ctx->chunk_list = chunk_p;
-						chunk_p += chunk_index;
-					} else {
-						chunk_p++;
-					}
-
-					// Start next chunk
-					chunk_p->size = 0;
-				}
-				if (chunk_p->size == 0){	// When this is the first block in the chunk.
-					// Save index of starting block.
-					chunk_p->block = find_index;
-				}
-
-				// set slice info
-				slice_p->chunk = chunk_index;
-				slice_p->block = find_index;
-				if (par3_ctx->noise_level >= 3){
-					printf("old block[%2"PRId64"] : slice[%2"PRIu64"] chunk[%2u] file %d, offset %"PRIu64"\n",
-							find_index, slice_index, chunk_index, num, file_offset);
-				}
-
-				// set chunk info
-				chunk_p->size += block_size;
-				previous_index = find_index;
-				num_dedup++;
-			}
-
-			// set common slice info
-			slice_p->file = num;
-			slice_p->offset = file_offset;
-			slice_p->size = block_size;
-			slice_p->tail_offset = 0;
-			slice_p->next = -1;
-
-			// prepare next slice info
-			slice_p++;
-			slice_index++;
-
-			file_offset += block_size;
-		}
-
-		// Calculate size of chunk tail, and read it.
-		tail_size = file_p->size - file_offset;
-		//printf("tail_size = %"PRIu64", file size = %"PRIu64", offset %"PRIu64"\n", tail_size, file_p->size, file_offset);
-		if (tail_size >= 40){
-			// read chunk tail from input file on temporary block
-			if (fread(work_buf, 1, (size_t)tail_size, fp) != (size_t)tail_size){
-				perror("Failed to read tail chunk on input file");
-				fclose(fp);
-				return RET_FILE_IO_ERROR;
-			}
-
-			// Print progress percent
-			if ( (par3_ctx->noise_level >= 0) && (par3_ctx->noise_level <= 2) ){
-				progress_step += tail_size;
-				time_now = time(NULL);
-				if (time_now != time_old){
-					time_old = time_now;
-					progress_now = (int)((progress_step * 1000) / progress_total);
-					if (progress_now != progress_old){
-						progress_old = progress_now;
-						printf("%d.%d%%\r", progress_now / 10, progress_now % 10);	// 0.0% ~ 100.0%
-					}
-				}
-			}
-
-			// calculate checksum of chunk tail
-			chunk_p->tail_crc = crc64(work_buf, 40, 0);
-			blake3(work_buf, (size_t)tail_size, chunk_p->tail_hash);
-
-			// search existing tails of same data
-			tail_offset = 0;
-			for (index = 0; index < slice_index; index++){
-				//printf("tail size = %"PRIu64"\n", slice_list[index].size);
-				if (slice_list[index].size == tail_size){	// same size tail
-					//printf("crc = 0x%016"PRIx64", 0x%016"PRIx64" chunk[%2u]\n", chunk_p->tail_crc, chunk_list[slice_list[index].chunk].tail_crc, slice_list[index].chunk);
-					if (chunk_p->tail_crc == chunk_list[slice_list[index].chunk].tail_crc){
-						if (memcmp(chunk_p->tail_hash, chunk_list[slice_list[index].chunk].tail_hash, 16) == 0){
-							tail_offset = -1;
-
-							// find the last slice info in the block
-							last_index = index;
-							while (slice_list[last_index].next != -1){
-								last_index = slice_list[last_index].next;
-							}
-							break;
-						}
-					}
-				}
-			}
-			if (tail_offset == 0){
-				// search existing blocks to check available space
-				for (index = 0; index < block_index; index++){
-					if (block_list[index].size + tail_size <= block_size){
-						// When tail can fit in the space, put the tail there.
-						tail_offset = block_list[index].size;
-
-						// find the last slice info in the block
-						last_index = block_list[index].slice;
-						while (slice_list[last_index].next != -1){
-							last_index = slice_list[last_index].next;
-						}
-						break;
-					}
-				}
-			}
-			//printf("tail_offset = %"PRId64"\n", tail_offset);
-
-			if (tail_offset < 0){	// Same data as previous tail
-				if (par3_ctx->noise_level >= 3){
-					printf("o t block[%2"PRIu64"] : slice[%2"PRIu64"] chunk[%2u] file %d, offset %"PRIu64", tail size %"PRIu64", offset %"PRIu64"\n",
-							slice_list[index].block, slice_index, chunk_index, num, file_offset, tail_size, slice_list[index].tail_offset);
-				}
-				slice_list[last_index].next = slice_index;	// These same tails have same offset and size.
-
-				// set slice info
-				slice_p->block = slice_list[index].block;
-				slice_p->tail_offset = slice_list[index].tail_offset;
-
-				// set chunk tail info
-				chunk_p->tail_block = slice_p->block;
-				chunk_p->tail_offset = slice_p->tail_offset;
-				num_dedup++;
-
-			} else if (tail_offset == 0){	// Put tail in new block
-				if (par3_ctx->noise_level >= 3){
-					printf("n t block[%2"PRIu64"] : slice[%2"PRIu64"] chunk[%2u] file %d, offset %"PRIu64", tail size %"PRIu64"\n",
-							block_index, slice_index, chunk_index, num, file_offset, tail_size);
-				}
-
-				// set slice info
-				slice_p->block = block_index;
-				slice_p->tail_offset = 0;
-
-				// set chunk tail info
-				chunk_p->tail_block = block_index;
-				chunk_p->tail_offset = 0;
-
-				// set block info (block for tails don't store checksum)
-				block_p->slice = slice_index;
-				block_p->size = tail_size;
-				block_p->crc = crc64(work_buf, (size_t)tail_size, 0);
-				block_p->state = 2 | 64;
-				block_p++;
-				block_index++;
-
-			} else {	// Put tail after another tail
-				if (par3_ctx->noise_level >= 3){
-					printf("a t block[%2"PRIu64"] : slice[%2"PRIu64"] chunk[%2u] file %d, offset %"PRIu64", tail size %"PRIu64", offset %"PRId64"\n",
-							index, slice_index, chunk_index, num, file_offset, tail_size, tail_offset);
-				}
-				slice_list[last_index].next = slice_index;	// update "next" item in the previous tail
-
-				// set slice info
-				slice_p->block = index;
-				slice_p->tail_offset = tail_offset;
-
-				// set chunk tail info
-				chunk_p->tail_block = index;
-				chunk_p->tail_offset = tail_offset;
-				num_pack++;
-
-				// update block info
-				block_list[slice_p->block].size = tail_offset + tail_size;
-				block_list[slice_p->block].crc = crc64(work_buf, (size_t)tail_size, block_list[slice_p->block].crc);
-			}
-
-			// calculate CRC-64 of the first 16 KB
-			if (file_offset + tail_size < 16384){
-				file_p->crc = crc64(work_buf, (size_t)tail_size, file_p->crc);
-			} else if (file_offset < 16384){
-				file_p->crc = crc64(work_buf, (size_t)(16384 - file_offset), file_p->crc);
-			}
-			blake3_hasher_update(&hasher, work_buf, (size_t)tail_size);
-
-			// set common slice info
-			slice_p->file = num;
-			slice_p->offset = file_offset;
-			slice_p->size = tail_size;
-			slice_p->chunk = chunk_index;
-			slice_p->next = -1;
-			slice_p++;
-			slice_index++;
-
-		} else if (tail_size > 0){
-			// When tail size is 1~39 bytes, it's saved in File Packet.
-			if (fread(buf_tail, 1, (size_t)tail_size, fp) != (size_t)tail_size){
-				perror("Failed to read tail chunk on input file");
-				fclose(fp);
-				return RET_FILE_IO_ERROR;
-			}
-			memset(buf_tail + tail_size, 0, 40 - tail_size);	// zero fill the rest bytes
-			if (par3_ctx->noise_level >= 3){
-				printf("    block no  : slice no  chunk[%2u] file %d, offset %"PRIu64", tail size %"PRIu64"\n",
-						chunk_index, num, file_offset, tail_size);
-			}
-
-			// Print progress percent
-			if ( (par3_ctx->noise_level >= 0) && (par3_ctx->noise_level <= 2) ){
-				progress_step += tail_size;
-				time_now = time(NULL);
-				if (time_now != time_old){
-					time_old = time_now;
-					progress_now = (int)((progress_step * 1000) / progress_total);
-					if (progress_now != progress_old){
-						progress_old = progress_now;
-						printf("%d.%d%%\r", progress_now / 10, progress_now % 10);	// 0.0% ~ 100.0%
-					}
-				}
-			}
-
-			// calculate CRC-64 of the first 16 KB
-			if (file_offset + tail_size < 16384){
-				file_p->crc = crc64(buf_tail, (size_t)tail_size, file_p->crc);
-			} else if (file_offset < 16384){
-				file_p->crc = crc64(buf_tail, (size_t)(16384 - file_offset), file_p->crc);
-			}
-			blake3_hasher_update(&hasher, buf_tail, (size_t)tail_size);
-
-			// copy 1 ~ 39 bytes
-			memcpy(&(chunk_p->tail_crc), buf_tail, 8);
-			memcpy(chunk_p->tail_hash, buf_tail + 8, 16);
-			memcpy(&(chunk_p->tail_block), buf_tail + 24, 8);
-			memcpy(&(chunk_p->tail_offset), buf_tail + 32, 8);
-		}
-		chunk_p->size += tail_size;
-
-		// Close chunk description
-		if (chunk_p->size > 0){
-			chunk_num++;
-			chunk_index++;
-			if (chunk_index >= chunk_count){
-				chunk_count *= 2;
-				chunk_p = realloc(par3_ctx->chunk_list, sizeof(PAR3_CHUNK_CTX) * chunk_count);
-				if (chunk_p == NULL){
-					perror("Failed to re-allocate memory for chunk description");
-					fclose(fp);
-					return RET_MEMORY_ERROR;
-				}
-				chunk_list = chunk_p;
-				par3_ctx->chunk_list = chunk_p;
-				chunk_p += chunk_index;
-			} else {
-				chunk_p++;
-			}
-		}
-		file_p->chunk_num = chunk_num;
-
-		blake3_hasher_finalize(&hasher, file_p->hash, 16);
-		if (fclose(fp) != 0){
-			perror("Failed to close input file");
-			return RET_FILE_IO_ERROR;
-		}
-
-		file_p++;
-	}
-
-	// Release temporary buffer.
-	free(crc_list);
-	par3_ctx->crc_list = NULL;
-	par3_ctx->crc_count = 0;
-	free(work_buf);
-	par3_ctx->work_buf = NULL;
-
-	if (par3_ctx->noise_level >= 0){
-		if (par3_ctx->noise_level <= 2){
-			if (progress_step < progress_total)
-				printf("Didn't finish progress. %"PRIu64" / %"PRIu64"\n", progress_step, progress_total);
-		}
-		clock_now = clock() - clock_now;
-		printf("done in %.1f seconds.\n", (double)clock_now / CLOCKS_PER_SEC);
-		printf("\n");
-	}
-
-	// Re-allocate memory for actual number of chunk description
-	if (par3_ctx->noise_level >= 0){
-		printf("Number of chunk description = %u (max %u)\n", chunk_index, chunk_count);
-	}
-	if (chunk_index < chunk_count){
-		if (chunk_index > 0){
-			chunk_p = realloc(par3_ctx->chunk_list, sizeof(PAR3_CHUNK_CTX) * chunk_index);
-			if (chunk_p == NULL){
-				perror("Failed to re-allocate memory for chunk description");
-				return RET_MEMORY_ERROR;
-			}
-			par3_ctx->chunk_list = chunk_p;
-		} else {
-			free(par3_ctx->chunk_list);
-			par3_ctx->chunk_list = NULL;
-		}
-	}
-	par3_ctx->chunk_count = chunk_index;
-
-	// Check actual number of slice info
-	if (slice_index != block_count){
-		printf("Number of input file slice = %"PRIu64" (max %"PRIu64")\n", slice_index, block_count);
-		return RET_LOGIC_ERROR;
-	}
-	par3_ctx->slice_count = slice_index;
-
-	// Update actual number of input blocks
-	if (block_index < block_count){
-		block_count = block_index;
-		par3_ctx->block_count = block_count;
-
-		// realloc
-		block_p = realloc(par3_ctx->block_list, sizeof(PAR3_BLOCK_CTX) * block_count);
-		if (block_p == NULL){
-			perror("Failed to re-allocate memory for input blocks");
-			return RET_MEMORY_ERROR;
-		}
-		par3_ctx->block_list = block_p;
-	}
-	if (par3_ctx->noise_level >= 0){
-		printf("Actual block count = %"PRIu64", Tail packing = %u, Deduplication = %"PRIu64"\n", block_count, num_pack, num_dedup);
-	}
-
-	return 0;
-}
diff --git a/windows/src/map.h b/windows/src/map.h
deleted file mode 100644
index f15b704..0000000
--- a/windows/src/map.h
+++ /dev/null
@@ -1,18 +0,0 @@
-
-// map chunk tails, when there are no input blocks.
-int map_chunk_tail(PAR3_CTX *par3_ctx);
-
-// map input file slices into input blocks without deduplication
-int map_input_block_simple(PAR3_CTX *par3_ctx);
-int map_input_block_trial(PAR3_CTX *par3_ctx);
-
-// map input file slices into input blocks without slide search
-int map_input_block(PAR3_CTX *par3_ctx);
-
-// map input file slices into input blocks with slide search
-int map_input_block_slide(PAR3_CTX *par3_ctx);
-
-
-// Par inside ZIP
-int map_input_block_zip(PAR3_CTX *par3_ctx, int footer_size, uint64_t unprotected_size);
-
diff --git a/windows/src/map_inside.c b/windows/src/map_inside.c
deleted file mode 100644
index 5400951..0000000
--- a/windows/src/map_inside.c
+++ /dev/null
@@ -1,695 +0,0 @@
-/* Redefinition of _FILE_OFFSET_BITS must happen BEFORE including stdio.h */
-#ifdef __linux__
-#define _FILE_OFFSET_BITS 64
-#define _fseeki64 fseeko
-#elif _WIN32
-// avoid error of MSVC
-#define _CRT_SECURE_NO_WARNINGS
-#endif
-
-#include <errno.h>
-#include <inttypes.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <time.h>
-
-#include "blake3/blake3.h"
-#include "libpar3.h"
-#include "hash.h"
-
-
-// map input file slices into input blocks for outside ZIP file
-int map_input_block_zip(PAR3_CTX *par3_ctx, int footer_size, uint64_t unprotected_size)
-{
-	uint8_t *work_buf, buf_tail[40];
-	int progress_old, progress_now;
-	uint32_t num_pack, num_dedup;
-	uint32_t chunk_index, chunk_count;
-	uint64_t original_file_size, data_size, slice_count;
-	uint64_t block_size, tail_size, file_offset, tail_offset;
-	uint64_t block_count, block_index, slice_index, tail_index;
-	uint64_t progress_total, progress_step;
-	PAR3_FILE_CTX *file_p;
-	PAR3_CHUNK_CTX *chunk_p, *chunk_list;
-	PAR3_SLICE_CTX *slice_p, *slice_list;
-	PAR3_BLOCK_CTX *block_p, *block_list;
-	FILE *fp;
-	blake3_hasher hasher;
-	time_t time_old, time_now;
-	clock_t clock_now;
-
-	// Copy variables from context to local.
-	block_size = par3_ctx->block_size;
-	block_count = par3_ctx->block_count;
-	if ( (par3_ctx->input_file_count != 1) || (block_size == 0) || (block_count == 0) )
-		return RET_LOGIC_ERROR;
-
-	original_file_size = par3_ctx->total_file_size;
-	if (footer_size > 0){
-		// It splits original file into 2 chunks and appends 2 chunks.
-		// [ data chunk ] [ footer chunk ] [ unprotected chunk ] [ duplicated footer chunk ]
-		data_size = original_file_size - footer_size;
-		chunk_count = 4;
-	} else {
-		// It appends 1 chunk.
-		// [ protected chunk ] [ unprotected chunk ]
-		data_size = original_file_size;
-		chunk_count = 2;
-	}
-
-	chunk_p = malloc(sizeof(PAR3_CHUNK_CTX) * chunk_count);
-	if (chunk_p == NULL){
-		perror("Failed to allocate memory for chunk description");
-		return RET_MEMORY_ERROR;
-	}
-	chunk_list = chunk_p;
-	par3_ctx->chunk_list = chunk_p;
-
-	// Number of input file slice may be more than number of input blocks.
-	slice_count = footer_size / block_size;
-	if (footer_size % block_size >= 40)
-		slice_count++;
-	slice_count *= 2;
-	slice_count += data_size / block_size;
-	if (data_size % block_size >= 40)
-		slice_count++;
-	slice_p = malloc(sizeof(PAR3_SLICE_CTX) * slice_count);
-	if (slice_p == NULL){
-		perror("Failed to allocate memory for input file slices");
-		return RET_MEMORY_ERROR;
-	}
-	slice_list = slice_p;
-	par3_ctx->slice_list = slice_p;
-
-	// Number of input blocks is calculated already.
-	block_p = malloc(sizeof(PAR3_BLOCK_CTX) * block_count);
-	if (block_p == NULL){
-		perror("Failed to allocate memory for input blocks");
-		return RET_MEMORY_ERROR;
-	}
-	block_list = block_p;
-	par3_ctx->block_list = block_p;
-
-	// Allocate memory to store file data temporary.
-	work_buf = malloc(block_size);
-	if (work_buf == NULL){
-		perror("Failed to allocate memory for input data");
-		return RET_MEMORY_ERROR;
-	}
-	par3_ctx->work_buf = work_buf;
-
-	if (par3_ctx->noise_level >= 0){
-		printf("\nComputing hash:\n");
-		progress_total = original_file_size + footer_size;
-		progress_step = 0;
-		progress_old = 0;
-		time_old = time(NULL);
-		clock_now = clock();
-	}
-
-	// Read data of input files on memory
-	num_dedup = 0;
-	num_pack = 0;
-	tail_offset = 0;
-	chunk_index = 0;
-	block_index = 0;
-	slice_index = 0;
-	file_p = par3_ctx->input_file_list;
-
-	blake3_hasher_init(&hasher);
-	fp = fopen(file_p->name, "rb");
-	if (fp == NULL){
-		perror("Failed to open input file");
-		return RET_FILE_IO_ERROR;
-	}
-
-	// 1st chunk is a protected chunk
-	file_p->chunk = 0;
-	file_p->chunk_num = chunk_count;
-	chunk_p->size = data_size;
-	chunk_p->block = 0;
-
-	// Read full size blocks
-	file_offset = 0;
-	while (file_offset + block_size <= data_size){
-		// read full block from input file
-		if (fread(work_buf, 1, (size_t)block_size, fp) != (size_t)block_size){
-			perror("Failed to read full size chunk on input file");
-			fclose(fp);
-			return RET_FILE_IO_ERROR;
-		}
-
-		// Print progress percent
-		if ( (par3_ctx->noise_level >= 0) && (par3_ctx->noise_level <= 2) ){
-			progress_step += block_size;
-			time_now = time(NULL);
-			if (time_now != time_old){
-				time_old = time_now;
-				progress_now = (int)((progress_step * 1000) / progress_total);
-				if (progress_now != progress_old){
-					progress_old = progress_now;
-					printf("%d.%d%%\r", progress_now / 10, progress_now % 10);	// 0.0% ~ 100.0%
-				}
-			}
-		}
-
-		if (original_file_size >= 16384){
-			// calculate CRC-64 of the first 16 KB
-			if (file_offset + block_size < 16384){
-				file_p->crc = crc64(work_buf, (size_t)block_size, file_p->crc);
-			} else if (file_offset < 16384){
-				file_p->crc = crc64(work_buf, (size_t)(16384 - file_offset), file_p->crc);
-			}
-		}
-		blake3_hasher_update(&hasher, work_buf, (size_t)block_size);
-
-		// set block info
-		block_p->slice = slice_index;
-		block_p->size = block_size;
-		block_p->crc = crc64(work_buf, (size_t)block_size, 0);
-		blake3(work_buf, (size_t)block_size, block_p->hash);
-		block_p->state = 1 | 64;
-
-		// set slice info
-		slice_p->chunk = chunk_index;
-		slice_p->file = 0;
-		slice_p->offset = file_offset;
-		slice_p->size = block_size;
-		slice_p->block = block_index;
-		slice_p->tail_offset = 0;
-		slice_p->next = -1;
-		if (par3_ctx->noise_level >= 3){
-			printf("new block[%2"PRIu64"] : slice[%2"PRIu64"] chunk[%2u], offset %"PRIu64"\n",
-					block_index, slice_index, chunk_index, file_offset);
-		}
-		slice_p++;
-		slice_index++;
-
-		file_offset += block_size;
-		block_p++;
-		block_index++;
-	}
-
-	// Calculate size of chunk tail, and read it.
-	tail_size = data_size - file_offset;
-	//printf("tail_size = %"PRIu64", file size = %"PRIu64", offset %"PRIu64"\n", tail_size, original_file_size, file_offset);
-	if (tail_size >= 40){
-		// read chunk tail from input file
-		if (fread(work_buf, 1, (size_t)tail_size, fp) != (size_t)tail_size){
-			perror("Failed to read tail chunk on input file");
-			fclose(fp);
-			return RET_FILE_IO_ERROR;
-		}
-
-		// Print progress percent
-		if ( (par3_ctx->noise_level >= 0) && (par3_ctx->noise_level <= 2) ){
-			progress_step += tail_size;
-			time_now = time(NULL);
-			if (time_now != time_old){
-				time_old = time_now;
-				progress_now = (int)((progress_step * 1000) / progress_total);
-				if (progress_now != progress_old){
-					progress_old = progress_now;
-					printf("%d.%d%%\r", progress_now / 10, progress_now % 10);	// 0.0% ~ 100.0%
-				}
-			}
-		}
-
-		// calculate checksum of chunk tail
-		chunk_p->tail_crc = crc64(work_buf, 40, 0);
-		blake3(work_buf, (size_t)tail_size, chunk_p->tail_hash);
-
-		// Put tail in new block
-		if (par3_ctx->noise_level >= 3){
-			printf("n t block[%2"PRIu64"] : slice[%2"PRIu64"] chunk[%2u], offset %"PRIu64", tail size %"PRIu64"\n",
-					block_index, slice_index, chunk_index, file_offset, tail_size);
-		}
-
-		// set slice info
-		slice_p->block = block_index;
-		slice_p->tail_offset = 0;
-
-		// set chunk tail info
-		tail_index = slice_index;
-		tail_offset = tail_size;
-		chunk_p->tail_block = block_index;
-		chunk_p->tail_offset = 0;
-
-		// set block info (block for tails don't store checksum)
-		block_p->slice = slice_index;
-		block_p->size = tail_size;
-		block_p->crc = crc64(work_buf, (size_t)tail_size, 0);
-		block_p->state = 2 | 64;
-		block_p++;
-		block_index++;
-
-		if (original_file_size >= 16384){
-			// calculate CRC-64 of the first 16 KB
-			if (file_offset + tail_size < 16384){
-				file_p->crc = crc64(work_buf, (size_t)tail_size, file_p->crc);
-			} else if (file_offset < 16384){
-				file_p->crc = crc64(work_buf, (size_t)(16384 - file_offset), file_p->crc);
-			}
-		}
-		blake3_hasher_update(&hasher, work_buf, (size_t)tail_size);
-
-		// set common slice info
-		slice_p->file = 0;
-		slice_p->offset = file_offset;
-		slice_p->size = tail_size;
-		slice_p->chunk = chunk_index;
-		slice_p->next = -1;
-		slice_p++;
-		slice_index++;
-
-	} else if (tail_size > 0){
-		// When tail size is 1~39 bytes, it's saved in File Packet.
-		if (fread(buf_tail, 1, (size_t)tail_size, fp) != (size_t)tail_size){
-			perror("Failed to read tail chunk on input file");
-			fclose(fp);
-			return RET_FILE_IO_ERROR;
-		}
-		memset(buf_tail + tail_size, 0, 40 - tail_size);	// zero fill the rest bytes
-		if (par3_ctx->noise_level >= 3){
-			printf("    block no  : slice no  chunk[%2u], offset %"PRIu64", tail size %"PRIu64"\n",
-					chunk_index, file_offset, tail_size);
-		}
-
-		// Print progress percent
-		if ( (par3_ctx->noise_level >= 0) && (par3_ctx->noise_level <= 2) ){
-			progress_step += tail_size;
-			time_now = time(NULL);
-			if (time_now != time_old){
-				time_old = time_now;
-				progress_now = (int)((progress_step * 1000) / progress_total);
-				if (progress_now != progress_old){
-					progress_old = progress_now;
-					printf("%d.%d%%\r", progress_now / 10, progress_now % 10);	// 0.0% ~ 100.0%
-				}
-			}
-		}
-
-		if (original_file_size >= 16384){
-			// calculate CRC-64 of the first 16 KB
-			if (file_offset + tail_size < 16384){
-				file_p->crc = crc64(buf_tail, (size_t)tail_size, file_p->crc);
-			} else if (file_offset < 16384){
-				file_p->crc = crc64(buf_tail, (size_t)(16384 - file_offset), file_p->crc);
-			}
-		}
-		blake3_hasher_update(&hasher, buf_tail, (size_t)tail_size);
-
-		// copy 1 ~ 39 bytes
-		memcpy(&(chunk_p->tail_crc), buf_tail, 8);
-		memcpy(chunk_p->tail_hash, buf_tail + 8, 16);
-		memcpy(&(chunk_p->tail_block), buf_tail + 24, 8);
-		memcpy(&(chunk_p->tail_offset), buf_tail + 32, 8);
-	}
-	file_offset += tail_size;
-	chunk_p++;
-	chunk_index++;
-
-	// When there is footer, 2nd chunk is a protected chunk.
-	if (footer_size > 0){
-		//printf("file_offset = %"PRId64"\n", file_offset);
-		chunk_p->size = footer_size;
-		chunk_p->block = block_index;
-
-		// Read full size blocks
-		while (file_offset + block_size <= original_file_size){
-			// read full block from input file
-			if (fread(work_buf, 1, (size_t)block_size, fp) != (size_t)block_size){
-				perror("Failed to read full size chunk on input file");
-				fclose(fp);
-				return RET_FILE_IO_ERROR;
-			}
-
-			// Print progress percent
-			if ( (par3_ctx->noise_level >= 0) && (par3_ctx->noise_level <= 2) ){
-				progress_step += block_size;
-				time_now = time(NULL);
-				if (time_now != time_old){
-					time_old = time_now;
-					progress_now = (int)((progress_step * 1000) / progress_total);
-					if (progress_now != progress_old){
-						progress_old = progress_now;
-						printf("%d.%d%%\r", progress_now / 10, progress_now % 10);	// 0.0% ~ 100.0%
-					}
-				}
-			}
-
-			if (original_file_size >= 16384){
-				// calculate CRC-64 of the first 16 KB
-				if (file_offset + block_size < 16384){
-					file_p->crc = crc64(work_buf, (size_t)block_size, file_p->crc);
-				} else if (file_offset < 16384){
-					file_p->crc = crc64(work_buf, (size_t)(16384 - file_offset), file_p->crc);
-				}
-			}
-			blake3_hasher_update(&hasher, work_buf, (size_t)block_size);
-
-			// set block info
-			block_p->slice = slice_index;
-			block_p->size = block_size;
-			block_p->crc = crc64(work_buf, (size_t)block_size, 0);
-			blake3(work_buf, (size_t)block_size, block_p->hash);
-			block_p->state = 1 | 64;
-
-			// set slice info
-			slice_p->chunk = chunk_index;
-			slice_p->file = 0;
-			slice_p->offset = file_offset;
-			slice_p->size = block_size;
-			slice_p->block = block_index;
-			slice_p->tail_offset = 0;
-			slice_p->next = -1;
-			if (par3_ctx->noise_level >= 3){
-				printf("new block[%2"PRIu64"] : slice[%2"PRIu64"] chunk[%2u], offset %"PRIu64"\n",
-						block_index, slice_index, chunk_index, file_offset);
-			}
-			slice_p++;
-			slice_index++;
-
-			file_offset += block_size;
-			block_p++;
-			block_index++;
-		}
-
-		// Calculate size of chunk tail, and read it.
-		tail_size = original_file_size - file_offset;
-		//printf("tail_size = %"PRIu64", file size = %"PRIu64", offset %"PRIu64"\n", tail_size, original_file_size, file_offset);
-		if (tail_size >= 40){
-			// read chunk tail from input file
-			if (fread(work_buf, 1, (size_t)tail_size, fp) != (size_t)tail_size){
-				perror("Failed to read tail chunk on input file");
-				fclose(fp);
-				return RET_FILE_IO_ERROR;
-			}
-
-			// Print progress percent
-			if ( (par3_ctx->noise_level >= 0) && (par3_ctx->noise_level <= 2) ){
-				progress_step += tail_size;
-				time_now = time(NULL);
-				if (time_now != time_old){
-					time_old = time_now;
-					progress_now = (int)((progress_step * 1000) / progress_total);
-					if (progress_now != progress_old){
-						progress_old = progress_now;
-						printf("%d.%d%%\r", progress_now / 10, progress_now % 10);	// 0.0% ~ 100.0%
-					}
-				}
-			}
-
-			// calculate checksum of chunk tail
-			chunk_p->tail_crc = crc64(work_buf, 40, 0);
-			blake3(work_buf, (size_t)tail_size, chunk_p->tail_hash);
-
-			if ( (tail_offset == 0) || (tail_offset + tail_size > block_size) ){	// Put tail in new block
-				if (par3_ctx->noise_level >= 3){
-					printf("n t block[%2"PRIu64"] : slice[%2"PRIu64"] chunk[%2u], offset %"PRIu64", tail size %"PRIu64"\n",
-							block_index, slice_index, chunk_index, file_offset, tail_size);
-				}
-
-				// set slice info
-				slice_p->block = block_index;
-				slice_p->tail_offset = 0;
-
-				// set chunk tail info
-				chunk_p->tail_block = block_index;
-				chunk_p->tail_offset = 0;
-
-				// set block info (block for tails don't store checksum)
-				block_p->slice = slice_index;
-				block_p->size = tail_size;
-				block_p->crc = crc64(work_buf, (size_t)tail_size, 0);
-				block_p->state = 2 | 64;
-				block_p++;
-				block_index++;
-
-			} else {	// Put tail after another tail
-				if (par3_ctx->noise_level >= 3){
-					printf("a t block[%2"PRIu64"] : slice[%2"PRIu64"] chunk[%2u], offset %"PRIu64", tail size %"PRIu64", offset %"PRId64"\n",
-							slice_list[tail_index].block, slice_index, chunk_index, file_offset, tail_size, tail_offset);
-				}
-				slice_list[tail_index].next = slice_index;	// update "next" item in the front tail
-
-				// set slice info
-				slice_p->block = slice_list[tail_index].block;
-				slice_p->tail_offset = tail_offset;
-
-				// set chunk tail info
-				chunk_p->tail_block = slice_list[tail_index].block;
-				chunk_p->tail_offset = tail_offset;
-				num_pack++;
-
-				// update block info
-				block_list[slice_p->block].size = tail_offset + tail_size;
-				block_list[slice_p->block].crc = crc64(work_buf, (size_t)tail_size, block_list[slice_p->block].crc);
-			}
-
-			if (original_file_size >= 16384){
-				// calculate CRC-64 of the first 16 KB
-				if (file_offset + tail_size < 16384){
-					file_p->crc = crc64(work_buf, (size_t)tail_size, file_p->crc);
-				} else if (file_offset < 16384){
-					file_p->crc = crc64(work_buf, (size_t)(16384 - file_offset), file_p->crc);
-				}
-			}
-			blake3_hasher_update(&hasher, work_buf, (size_t)tail_size);
-
-			// set common slice info
-			tail_index = slice_index;
-			slice_p->file = 0;
-			slice_p->offset = file_offset;
-			slice_p->size = tail_size;
-			slice_p->chunk = chunk_index;
-			slice_p->next = -1;
-			slice_p++;
-			slice_index++;
-
-		} else if (tail_size > 0){
-			// When tail size is 1~39 bytes, it's saved in File Packet.
-			if (fread(buf_tail, 1, (size_t)tail_size, fp) != (size_t)tail_size){
-				perror("Failed to read tail chunk on input file");
-				fclose(fp);
-				return RET_FILE_IO_ERROR;
-			}
-			memset(buf_tail + tail_size, 0, 40 - tail_size);	// zero fill the rest bytes
-			if (par3_ctx->noise_level >= 3){
-				printf("    block no  : slice no  chunk[%2u], offset %"PRIu64", tail size %"PRIu64"\n",
-						chunk_index, file_offset, tail_size);
-			}
-
-			// Print progress percent
-			if ( (par3_ctx->noise_level >= 0) && (par3_ctx->noise_level <= 2) ){
-				progress_step += tail_size;
-				time_now = time(NULL);
-				if (time_now != time_old){
-					time_old = time_now;
-					progress_now = (int)((progress_step * 1000) / progress_total);
-					if (progress_now != progress_old){
-						progress_old = progress_now;
-						printf("%d.%d%%\r", progress_now / 10, progress_now % 10);	// 0.0% ~ 100.0%
-					}
-				}
-			}
-
-			if (original_file_size >= 16384){
-				// calculate CRC-64 of the first 16 KB
-				if (file_offset + tail_size < 16384){
-					file_p->crc = crc64(buf_tail, (size_t)tail_size, file_p->crc);
-				} else if (file_offset < 16384){
-					file_p->crc = crc64(buf_tail, (size_t)(16384 - file_offset), file_p->crc);
-				}
-			}
-			blake3_hasher_update(&hasher, buf_tail, (size_t)tail_size);
-
-			// copy 1 ~ 39 bytes
-			memcpy(&(chunk_p->tail_crc), buf_tail, 8);
-			memcpy(chunk_p->tail_hash, buf_tail + 8, 16);
-			memcpy(&(chunk_p->tail_block), buf_tail + 24, 8);
-			memcpy(&(chunk_p->tail_offset), buf_tail + 32, 8);
-		}
-		file_offset += tail_size;
-		chunk_p++;
-		chunk_index++;
-
-		// 3rd chunk is an uprotected chunk.
-		chunk_p->size = 0;
-		chunk_p->block = unprotected_size;
-		if (par3_ctx->noise_level >= 3){
-			printf("    block no  : slice no  chunk[%2u], offset %"PRIu64", unprotected size %"PRIu64"\n",
-					chunk_index, file_offset, unprotected_size);
-		}
-		chunk_p++;
-		chunk_index++;
-
-		// 4th chunk is a protected chunk, which content is same as 2nd chunk.
-		chunk_p->size = footer_size;
-		chunk_p->block = chunk_list[1].block;
-		chunk_p->tail_crc = chunk_list[1].tail_crc;
-		memcpy(chunk_p->tail_hash, chunk_list[1].tail_hash, 16);
-		chunk_p->tail_block = chunk_list[1].tail_block;
-		chunk_p->tail_offset = chunk_list[1].tail_offset;
-
-		// Seek to the start of 2nd chunk
-		if (_fseeki64(fp, data_size, SEEK_SET) != 0){
-			perror("Failed to seek input file");
-			fclose(fp);
-			return RET_FILE_IO_ERROR;
-		}
-
-		file_offset = original_file_size + unprotected_size;
-		while (file_offset + block_size <= original_file_size + unprotected_size + footer_size){
-			// read full block from input file
-			if (fread(work_buf, 1, (size_t)block_size, fp) != (size_t)block_size){
-				perror("Failed to read full size chunk on input file");
-				fclose(fp);
-				return RET_FILE_IO_ERROR;
-			}
-
-			// Print progress percent
-			if ( (par3_ctx->noise_level >= 0) && (par3_ctx->noise_level <= 2) ){
-				progress_step += block_size;
-				time_now = time(NULL);
-				if (time_now != time_old){
-					time_old = time_now;
-					progress_now = (int)((progress_step * 1000) / progress_total);
-					if (progress_now != progress_old){
-						progress_old = progress_now;
-						printf("%d.%d%%\r", progress_now / 10, progress_now % 10);	// 0.0% ~ 100.0%
-					}
-				}
-			}
-
-			blake3_hasher_update(&hasher, work_buf, (size_t)block_size);
-
-			// set slice info
-			slice_p->chunk = chunk_index;
-			slice_p->file = 0;
-			slice_p->offset = file_offset;
-			slice_p->size = block_size;
-			slice_p->block = chunk_list[1].block + num_dedup;
-			slice_p->tail_offset = 0;
-			slice_p->next = -1;
-			if (par3_ctx->noise_level >= 3){
-				printf("new block[%2"PRIu64"] : slice[%2"PRIu64"] chunk[%2u], offset %"PRIu64"\n",
-						chunk_list[1].block + num_dedup, slice_index, chunk_index, file_offset);
-			}
-			slice_p++;
-			slice_index++;
-			file_offset += block_size;
-			num_dedup++;
-		}
-		tail_size = footer_size % block_size;
-		if (tail_size > 0){
-			// read chunk tail from input file
-			if (fread(work_buf, 1, (size_t)tail_size, fp) != (size_t)tail_size){
-				perror("Failed to read tail chunk on input file");
-				fclose(fp);
-				return RET_FILE_IO_ERROR;
-			}
-
-			// Print progress percent
-			if ( (par3_ctx->noise_level >= 0) && (par3_ctx->noise_level <= 2) ){
-				progress_step += tail_size;
-				time_now = time(NULL);
-				if (time_now != time_old){
-					time_old = time_now;
-					progress_now = (int)((progress_step * 1000) / progress_total);
-					if (progress_now != progress_old){
-						progress_old = progress_now;
-						printf("%d.%d%%\r", progress_now / 10, progress_now % 10);	// 0.0% ~ 100.0%
-					}
-				}
-			}
-
-			blake3_hasher_update(&hasher, work_buf, (size_t)tail_size);
-		}
-		if (tail_size >= 40){
-			slice_p->chunk = chunk_index;
-			slice_p->file = 0;
-			slice_p->offset = file_offset;
-			slice_p->size = tail_size;
-			slice_p->block = slice_list[tail_index].block;
-			slice_p->tail_offset = slice_list[tail_index].tail_offset;
-			slice_p->next = -1;
-			if (par3_ctx->noise_level >= 3){
-				if ( (tail_offset == 0) || (tail_offset + tail_size > block_size) ){	// Put tail in new block
-					printf("n t block[%2"PRIu64"] : slice[%2"PRIu64"] chunk[%2u], offset %"PRIu64", tail size %"PRIu64"\n",
-							chunk_list[1].block + num_dedup, slice_index, chunk_index, file_offset, tail_size);
-				} else {
-					printf("a t block[%2"PRIu64"] : slice[%2"PRIu64"] chunk[%2u], offset %"PRIu64", tail size %"PRIu64", offset %"PRId64"\n",
-							slice_list[tail_index].block, slice_index, chunk_index, file_offset, tail_size, tail_offset);
-				}
-			}
-			slice_p++;
-			slice_index++;
-			num_dedup++;
-		} else if (tail_size > 0){
-			if (par3_ctx->noise_level >= 3){
-				printf("    block no  : slice no  chunk[%2u], offset %"PRIu64", tail size %"PRIu64"\n",
-						chunk_index, file_offset, tail_size);
-			}
-		}
-		chunk_index++;
-
-	// When there is no footer, 2nd chunk is an unprotected chunk.
-	} else {
-		chunk_p->size = 0;
-		chunk_p->block = unprotected_size;
-		if (par3_ctx->noise_level >= 3){
-			printf("    block no  : slice no  chunk[%2u], offset %"PRIu64", unprotected size %"PRIu64"\n",
-					chunk_index, file_offset, unprotected_size);
-		}
-		chunk_index++;
-	}
-
-	blake3_hasher_finalize(&hasher, file_p->hash, 16);
-	if (fclose(fp) != 0){
-		perror("Failed to close input file");
-		return RET_FILE_IO_ERROR;
-	}
-
-	// Release temporary buffer.
-	free(work_buf);
-	par3_ctx->work_buf = NULL;
-
-	if (par3_ctx->noise_level >= 0){
-		if (par3_ctx->noise_level <= 2){
-			if (progress_step < progress_total)
-				printf("Didn't finish progress. %"PRIu64" / %"PRIu64"\n", progress_step, progress_total);
-		}
-		clock_now = clock() - clock_now;
-		printf("done in %.1f seconds.\n", (double)clock_now / CLOCKS_PER_SEC);
-		printf("\n");
-	}
-
-	// Check actual number of chunk description
-	if (par3_ctx->noise_level >= 0){
-		printf("Number of chunk description = %u (max %u)\n", chunk_index, chunk_count);
-	}
-	if (chunk_index != chunk_count)
-		return RET_LOGIC_ERROR;
-	par3_ctx->chunk_count = chunk_index;
-
-	// Check actual number of slice info
-	if (slice_index != slice_count){
-		printf("Number of input file slices = %"PRIu64" (max %"PRIu64")\n", slice_index, slice_count);
-		return RET_LOGIC_ERROR;
-	}
-	par3_ctx->slice_count = slice_index;
-
-	// Check actual number of input blocks
-	if (block_index != block_count){
-		printf("Number of input blocks = %"PRIu64" (max %"PRIu64")\n", block_index, block_count);
-		return RET_LOGIC_ERROR;
-	}
-	if (par3_ctx->noise_level >= 0){
-		printf("Actual block count = %"PRIu64", Tail packing = %u, Deduplication = %u\n", block_count, num_pack, num_dedup);
-	}
-
-	return 0;
-}
-
diff --git a/windows/src/map_simple.c b/windows/src/map_simple.c
deleted file mode 100644
index e290a2d..0000000
--- a/windows/src/map_simple.c
+++ /dev/null
@@ -1,738 +0,0 @@
-#ifdef _WIN32
-// avoid error of MSVC
-#define _CRT_SECURE_NO_WARNINGS
-#endif
-
-#include <errno.h>
-#include <inttypes.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <time.h>
-
-#include "blake3/blake3.h"
-#include "libpar3.h"
-#include "hash.h"
-
-
-// map input file slices into input blocks without deduplication
-int map_input_block_simple(PAR3_CTX *par3_ctx)
-{
-	uint8_t *work_buf, buf_tail[40];
-	int progress_old, progress_now;
-	uint32_t num, num_pack;
-	uint32_t input_file_count, chunk_index;
-	uint64_t block_size, tail_size, file_offset, tail_offset;
-	uint64_t block_count, block_index, slice_index, index;
-	uint64_t progress_total, progress_step;
-	PAR3_FILE_CTX *file_p;
-	PAR3_CHUNK_CTX *chunk_p;
-	PAR3_SLICE_CTX *slice_p, *slice_list;
-	PAR3_BLOCK_CTX *block_p, *block_list;
-	FILE *fp;
-	blake3_hasher hasher;
-	time_t time_old, time_now;
-	clock_t clock_now;
-
-	// Copy variables from context to local.
-	input_file_count = par3_ctx->input_file_count;
-	block_size = par3_ctx->block_size;
-	block_count = par3_ctx->block_count;
-	if ( (input_file_count == 0) || (block_size == 0) || (block_count == 0) )
-		return RET_LOGIC_ERROR;
-
-	// When no deduplication, number of chunks may be same as number of input files.
-	// Note, empty file won't use Chunk Description.
-	chunk_p = malloc(sizeof(PAR3_CHUNK_CTX) * input_file_count);
-	if (chunk_p == NULL){
-		perror("Failed to allocate memory for chunk description");
-		return RET_MEMORY_ERROR;
-	}
-	par3_ctx->chunk_list = chunk_p;
-
-	// When no deduplication, number of input file slice is same as number of input blocks.
-	slice_p = malloc(sizeof(PAR3_SLICE_CTX) * block_count);
-	if (slice_p == NULL){
-		perror("Failed to allocate memory for input file slices");
-		return RET_MEMORY_ERROR;
-	}
-	slice_list = slice_p;
-	par3_ctx->slice_list = slice_p;
-
-	// When no deduplication, number of input blocks is calculable.
-	block_p = malloc(sizeof(PAR3_BLOCK_CTX) * block_count);
-	if (block_p == NULL){
-		perror("Failed to allocate memory for input blocks");
-		return RET_MEMORY_ERROR;
-	}
-	block_list = block_p;
-	par3_ctx->block_list = block_p;
-
-	// Allocate memory to store file data temporary.
-	work_buf = malloc(block_size);
-	if (work_buf == NULL){
-		perror("Failed to allocate memory for input data");
-		return RET_MEMORY_ERROR;
-	}
-	par3_ctx->work_buf = work_buf;
-
-	if (par3_ctx->noise_level >= 0){
-		printf("\nComputing hash:\n");
-		progress_total = par3_ctx->total_file_size;
-		progress_step = 0;
-		progress_old = 0;
-		time_old = time(NULL);
-		clock_now = clock();
-	}
-
-	// Read data of input files on memory
-	num_pack = 0;
-	chunk_index = 0;
-	block_index = 0;
-	slice_index = 0;
-	file_p = par3_ctx->input_file_list;
-	for (num = 0; num < input_file_count; num++){
-		blake3_hasher_init(&hasher);
-		if (file_p->size == 0){	// Skip empty files.
-			blake3_hasher_finalize(&hasher, file_p->hash, 16);
-			file_p++;
-			continue;
-		}
-		if (par3_ctx->noise_level >= 2){
-			printf("file size = %"PRIu64" \"%s\"\n", file_p->size, file_p->name);
-		}
-
-		fp = fopen(file_p->name, "rb");
-		if (fp == NULL){
-			perror("Failed to open input file");
-			return RET_FILE_IO_ERROR;
-		}
-
-		// When no deduplication, chunk's index is same as file's index.
-		file_p->chunk = chunk_index;	// single chunk in each file
-		file_p->chunk_num = 1;
-		chunk_p->size = file_p->size;	// file size = chunk size
-		chunk_p->block = block_index;
-
-		// Read full size blocks
-		file_offset = 0;
-		while (file_offset + block_size <= file_p->size){
-			// read full block from input file
-			if (fread(work_buf, 1, (size_t)block_size, fp) != (size_t)block_size){
-				perror("Failed to read full size chunk on input file");
-				fclose(fp);
-				return RET_FILE_IO_ERROR;
-			}
-
-			// Print progress percent
-			if ( (par3_ctx->noise_level >= 0) && (par3_ctx->noise_level <= 2) ){
-				progress_step += block_size;
-				time_now = time(NULL);
-				if (time_now != time_old){
-					time_old = time_now;
-					progress_now = (int)((progress_step * 1000) / progress_total);
-					if (progress_now != progress_old){
-						progress_old = progress_now;
-						printf("%d.%d%%\r", progress_now / 10, progress_now % 10);	// 0.0% ~ 100.0%
-					}
-				}
-			}
-
-			// calculate CRC-64 of the first 16 KB
-			if (file_offset + block_size < 16384){
-				file_p->crc = crc64(work_buf, (size_t)block_size, file_p->crc);
-			} else if (file_offset < 16384){
-				file_p->crc = crc64(work_buf, (size_t)(16384 - file_offset), file_p->crc);
-			}
-			blake3_hasher_update(&hasher, work_buf, (size_t)block_size);
-
-			// set block info
-			block_p->slice = slice_index;
-			block_p->size = block_size;
-			block_p->crc = crc64(work_buf, (size_t)block_size, 0);
-			blake3(work_buf, (size_t)block_size, block_p->hash);
-			block_p->state = 1 | 64;
-
-			// set slice info
-			slice_p->chunk = chunk_index;
-			slice_p->file = num;
-			slice_p->offset = file_offset;
-			slice_p->size = block_size;
-			slice_p->block = block_index;
-			slice_p->tail_offset = 0;
-			slice_p->next = -1;
-			if (par3_ctx->noise_level >= 3){
-				printf("new block[%2"PRIu64"] : slice[%2"PRIu64"] chunk[%2u] file %d, offset %"PRIu64"\n",
-						block_index, slice_index, chunk_index, num, file_offset);
-			}
-			slice_p++;
-			slice_index++;
-
-			file_offset += block_size;
-			block_p++;
-			block_index++;
-		}
-
-		// Calculate size of chunk tail, and read it.
-		tail_size = file_p->size - file_offset;
-		//printf("tail_size = %"PRIu64", file size = %"PRIu64", offset %"PRIu64"\n", tail_size, file_p->size, file_offset);
-		if (tail_size >= 40){
-			// read chunk tail from input file
-			if (fread(work_buf, 1, (size_t)tail_size, fp) != (size_t)tail_size){
-				perror("Failed to read tail chunk on input file");
-				fclose(fp);
-				return RET_FILE_IO_ERROR;
-			}
-
-			// Print progress percent
-			if ( (par3_ctx->noise_level >= 0) && (par3_ctx->noise_level <= 2) ){
-				progress_step += tail_size;
-				time_now = time(NULL);
-				if (time_now != time_old){
-					time_old = time_now;
-					progress_now = (int)((progress_step * 1000) / progress_total);
-					if (progress_now != progress_old){
-						progress_old = progress_now;
-						printf("%d.%d%%\r", progress_now / 10, progress_now % 10);	// 0.0% ~ 100.0%
-					}
-				}
-			}
-
-			// calculate checksum of chunk tail
-			chunk_p->tail_crc = crc64(work_buf, 40, 0);
-			blake3(work_buf, (size_t)tail_size, chunk_p->tail_hash);
-
-			// search existing tails to check available space
-			tail_offset = 0;
-			for (index = 0; index < slice_index; index++){
-				if ( (slice_list[index].next == -1) && (slice_list[index].size < block_size) ){	// the last tail in the block
-					if (slice_list[index].tail_offset + slice_list[index].size + tail_size <= block_size){
-						// When tail can fit in the space, put the tail there.
-						tail_offset = slice_list[index].tail_offset + slice_list[index].size;
-						break;
-					}
-				}
-			}
-			//printf("tail_offset = %"PRId64"\n", tail_offset);
-
-			if (tail_offset == 0){	// Put tail in new block
-				if (par3_ctx->noise_level >= 3){
-					printf("n t block[%2"PRIu64"] : slice[%2"PRIu64"] chunk[%2u] file %d, offset %"PRIu64", tail size %"PRIu64"\n",
-							block_index, slice_index, chunk_index, num, file_offset, tail_size);
-				}
-
-				// set slice info
-				slice_p->block = block_index;
-				slice_p->tail_offset = 0;
-
-				// set chunk tail info
-				chunk_p->tail_block = block_index;
-				chunk_p->tail_offset = 0;
-
-				// set block info (block for tails don't store checksum)
-				block_p->slice = slice_index;
-				block_p->size = tail_size;
-				block_p->crc = crc64(work_buf, (size_t)tail_size, 0);
-				block_p->state = 2 | 64;
-				block_p++;
-				block_index++;
-
-			} else {	// Put tail after another tail
-				if (par3_ctx->noise_level >= 3){
-					printf("a t block[%2"PRIu64"] : slice[%2"PRIu64"] chunk[%2u] file %d, offset %"PRIu64", tail size %"PRIu64", offset %"PRId64"\n",
-							slice_list[index].block, slice_index, chunk_index, num, file_offset, tail_size, tail_offset);
-				}
-				slice_list[index].next = slice_index;	// update "next" item in the front tail
-
-				// set slice info
-				slice_p->block = slice_list[index].block;
-				slice_p->tail_offset = tail_offset;
-
-				// set chunk tail info
-				chunk_p->tail_block = slice_list[index].block;
-				chunk_p->tail_offset = tail_offset;
-				num_pack++;
-
-				// update block info
-				block_list[slice_p->block].size = tail_offset + tail_size;
-				block_list[slice_p->block].crc = crc64(work_buf, (size_t)tail_size, block_list[slice_p->block].crc);
-			}
-
-			// calculate CRC-64 of the first 16 KB
-			if (file_offset + tail_size < 16384){
-				file_p->crc = crc64(work_buf, (size_t)tail_size, file_p->crc);
-			} else if (file_offset < 16384){
-				file_p->crc = crc64(work_buf, (size_t)(16384 - file_offset), file_p->crc);
-			}
-			blake3_hasher_update(&hasher, work_buf, (size_t)tail_size);
-
-			// set common slice info
-			slice_p->file = num;
-			slice_p->offset = file_offset;
-			slice_p->size = tail_size;
-			slice_p->chunk = chunk_index;
-			slice_p->next = -1;
-			slice_p++;
-			slice_index++;
-
-		} else if (tail_size > 0){
-			// When tail size is 1~39 bytes, it's saved in File Packet.
-			if (fread(buf_tail, 1, (size_t)tail_size, fp) != (size_t)tail_size){
-				perror("Failed to read tail chunk on input file");
-				fclose(fp);
-				return RET_FILE_IO_ERROR;
-			}
-			memset(buf_tail + tail_size, 0, 40 - tail_size);	// zero fill the rest bytes
-			if (par3_ctx->noise_level >= 3){
-				printf("    block no  : slice no  chunk[%2u] file %d, offset %"PRIu64", tail size %"PRIu64"\n",
-						chunk_index, num, file_offset, tail_size);
-			}
-
-			// Print progress percent
-			if ( (par3_ctx->noise_level >= 0) && (par3_ctx->noise_level <= 2) ){
-				progress_step += tail_size;
-				time_now = time(NULL);
-				if (time_now != time_old){
-					time_old = time_now;
-					progress_now = (int)((progress_step * 1000) / progress_total);
-					if (progress_now != progress_old){
-						progress_old = progress_now;
-						printf("%d.%d%%\r", progress_now / 10, progress_now % 10);	// 0.0% ~ 100.0%
-					}
-				}
-			}
-
-			// calculate CRC-64 of the first 16 KB
-			if (file_offset + tail_size < 16384){
-				file_p->crc = crc64(buf_tail, (size_t)tail_size, file_p->crc);
-			} else if (file_offset < 16384){
-				file_p->crc = crc64(buf_tail, (size_t)(16384 - file_offset), file_p->crc);
-			}
-			blake3_hasher_update(&hasher, buf_tail, (size_t)tail_size);
-
-			// copy 1 ~ 39 bytes
-			memcpy(&(chunk_p->tail_crc), buf_tail, 8);
-			memcpy(chunk_p->tail_hash, buf_tail + 8, 16);
-			memcpy(&(chunk_p->tail_block), buf_tail + 24, 8);
-			memcpy(&(chunk_p->tail_offset), buf_tail + 32, 8);
-		}
-
-		blake3_hasher_finalize(&hasher, file_p->hash, 16);
-		if (fclose(fp) != 0){
-			perror("Failed to close input file");
-			return RET_FILE_IO_ERROR;
-		}
-
-		file_p++;
-		chunk_p++;	// Each input file contains single chunk description.
-		chunk_index++;
-	}
-
-	// Release temporary buffer.
-	free(work_buf);
-	par3_ctx->work_buf = NULL;
-
-	if (par3_ctx->noise_level >= 0){
-		if (par3_ctx->noise_level <= 2){
-			if (progress_step < progress_total)
-				printf("Didn't finish progress. %"PRIu64" / %"PRIu64"\n", progress_step, progress_total);
-		}
-		clock_now = clock() - clock_now;
-		printf("done in %.1f seconds.\n", (double)clock_now / CLOCKS_PER_SEC);
-		printf("\n");
-	}
-
-	// Re-allocate memory for actual number of chunk description
-	if (par3_ctx->noise_level >= 0){
-		printf("Number of chunk description = %u (max %u)\n", chunk_index, input_file_count);
-	}
-	if (chunk_index < input_file_count){
-		if (chunk_index > 0){
-			chunk_p = realloc(par3_ctx->chunk_list, sizeof(PAR3_CHUNK_CTX) * chunk_index);
-			if (chunk_p == NULL){
-				perror("Failed to re-allocate memory for chunk description");
-				return RET_MEMORY_ERROR;
-			}
-			par3_ctx->chunk_list = chunk_p;
-		} else {
-			free(par3_ctx->chunk_list);
-			par3_ctx->chunk_list = NULL;
-		}
-	}
-	par3_ctx->chunk_count = chunk_index;
-
-	// Check actual number of slice info
-	if (slice_index != block_count){
-		printf("Number of input file slices = %"PRIu64" (max %"PRIu64")\n", slice_index, block_count);
-		return RET_LOGIC_ERROR;
-	}
-	par3_ctx->slice_count = slice_index;
-
-	// Update actual number of input blocks
-	if (block_index < block_count){
-		block_count = block_index;
-		par3_ctx->block_count = block_count;
-
-		// realloc
-		block_p = realloc(par3_ctx->block_list, sizeof(PAR3_BLOCK_CTX) * block_count);
-		if (block_p == NULL){
-			perror("Failed to re-allocate memory for input blocks");
-			return RET_MEMORY_ERROR;
-		}
-		par3_ctx->block_list = block_p;
-	}
-	if (par3_ctx->noise_level >= 0){
-		printf("Actual block count = %"PRIu64", Tail packing = %u\n", block_count, num_pack);
-	}
-
-	return 0;
-}
-
-
-// map chunk tails, when there are no input blocks.
-int map_chunk_tail(PAR3_CTX *par3_ctx)
-{
-	uint8_t buf_tail[40];
-	uint32_t num;
-	uint32_t input_file_count, chunk_index;
-	uint64_t tail_size;
-	PAR3_FILE_CTX *file_p;
-	PAR3_CHUNK_CTX *chunk_p;
-	FILE *fp;
-	blake3_hasher hasher;
-
-	// Copy variables from context to local.
-	input_file_count = par3_ctx->input_file_count;
-	if (par3_ctx->block_count != 0)
-		return RET_LOGIC_ERROR;
-
-	// When no deduplication, number of chunks may be same as number of input files.
-	// Note, empty file won't use Chunk Description.
-	chunk_p = malloc(sizeof(PAR3_CHUNK_CTX) * input_file_count);
-	if (chunk_p == NULL){
-		perror("Failed to allocate memory for chunk description");
-		return RET_MEMORY_ERROR;
-	}
-	par3_ctx->chunk_list = chunk_p;
-
-	// Read data of input files on memory
-	chunk_index = 0;
-	file_p = par3_ctx->input_file_list;
-	for (num = 0; num < input_file_count; num++){
-		blake3_hasher_init(&hasher);
-		if (file_p->size == 0){	// Skip empty files.
-			blake3_hasher_finalize(&hasher, file_p->hash, 16);
-			file_p++;
-			continue;
-		}
-		if (par3_ctx->noise_level >= 2){
-			printf("file size = %"PRIu64" \"%s\"\n", file_p->size, file_p->name);
-		}
-
-		fp = fopen(file_p->name, "rb");
-		if (fp == NULL){
-			perror("Failed to open input file");
-			return RET_FILE_IO_ERROR;
-		}
-
-		// When no deduplication, chunk's index is same as file's index.
-		file_p->chunk = chunk_index;	// single chunk in each file
-		file_p->chunk_num = 1;
-		chunk_p->size = file_p->size;	// file size = chunk size
-		chunk_p->block = 0;
-
-		tail_size = file_p->size;
-		// When tail size is 1~39-bytes, it's saved in File Packet.
-		if (fread(buf_tail, 1, (size_t)tail_size, fp) != (size_t)tail_size){
-			perror("Failed to read tail chunk on input file");
-			fclose(fp);
-			return RET_FILE_IO_ERROR;
-		}
-		memset(buf_tail + tail_size, 0, 40 - tail_size);	// zero fill the rest bytes
-
-		// calculate CRC-64 of the first 16 KB
-		if (tail_size < 16384){
-			file_p->crc = crc64(buf_tail, (size_t)tail_size, file_p->crc);
-		} else {
-			file_p->crc = crc64(buf_tail, 16384, file_p->crc);
-		}
-		blake3_hasher_update(&hasher, buf_tail, (size_t)tail_size);
-
-		// copy 1 ~ 39 bytes
-		memcpy(&(chunk_p->tail_crc), buf_tail, 8);
-		memcpy(chunk_p->tail_hash, buf_tail + 8, 16);
-		memcpy(&(chunk_p->tail_block), buf_tail + 24, 8);
-		memcpy(&(chunk_p->tail_offset), buf_tail + 32, 8);
-
-		blake3_hasher_finalize(&hasher, file_p->hash, 16);
-		if (fclose(fp) != 0){
-			perror("Failed to close input file");
-			return RET_FILE_IO_ERROR;
-		}
-
-		file_p++;
-		chunk_p++;	// Each input file contains single chunk description.
-		chunk_index++;
-	}
-
-	// Re-allocate memory for actual number of chunk description
-	if (par3_ctx->noise_level >= 0){
-		printf("Number of chunk description = %u (max %u)\n\n", chunk_index, input_file_count);
-	}
-	if (chunk_index < input_file_count){
-		if (chunk_index > 0){
-			chunk_p = realloc(par3_ctx->chunk_list, sizeof(PAR3_CHUNK_CTX) * chunk_index);
-			if (chunk_p == NULL){
-				perror("Failed to re-allocate memory for chunk description");
-				return RET_MEMORY_ERROR;
-			}
-			par3_ctx->chunk_list = chunk_p;
-		} else {
-			free(par3_ctx->chunk_list);
-			par3_ctx->chunk_list = NULL;
-		}
-	}
-	par3_ctx->chunk_count = chunk_index;
-
-	return 0;
-}
-
-
-// map input file slices into input blocks without reading file
-int map_input_block_trial(PAR3_CTX *par3_ctx)
-{
-	uint32_t num, num_pack;
-	uint32_t input_file_count, chunk_index;
-	uint64_t block_size, tail_size, file_offset, tail_offset;
-	uint64_t block_count, block_index, slice_index, index;
-	PAR3_FILE_CTX *file_p;
-	PAR3_CHUNK_CTX *chunk_p;
-	PAR3_SLICE_CTX *slice_p, *slice_list;
-	PAR3_BLOCK_CTX *block_p, *block_list;
-
-	// Copy variables from context to local.
-	input_file_count = par3_ctx->input_file_count;
-	block_size = par3_ctx->block_size;
-	block_count = par3_ctx->block_count;
-	if ( (input_file_count == 0) || (block_size == 0) || (block_count == 0) )
-		return RET_LOGIC_ERROR;
-
-	// When no deduplication, number of chunks may be same as number of input files.
-	// Note, empty file won't use Chunk Description.
-	chunk_p = malloc(sizeof(PAR3_CHUNK_CTX) * input_file_count);
-	if (chunk_p == NULL){
-		perror("Failed to allocate memory for chunk description");
-		return RET_MEMORY_ERROR;
-	}
-	par3_ctx->chunk_list = chunk_p;
-
-	// When no deduplication, number of input file slice is same as number of input blocks.
-	slice_p = malloc(sizeof(PAR3_SLICE_CTX) * block_count);
-	if (slice_p == NULL){
-		perror("Failed to allocate memory for input file slices");
-		return RET_MEMORY_ERROR;
-	}
-	slice_list = slice_p;
-	par3_ctx->slice_list = slice_p;
-
-	// When no deduplication, number of input blocks is calculable.
-	block_p = malloc(sizeof(PAR3_BLOCK_CTX) * block_count);
-	if (block_p == NULL){
-		perror("Failed to allocate memory for input blocks");
-		return RET_MEMORY_ERROR;
-	}
-	block_list = block_p;
-	par3_ctx->block_list = block_p;
-
-	// Read data of input files on memory
-	num_pack = 0;
-	chunk_index = 0;
-	block_index = 0;
-	slice_index = 0;
-	file_p = par3_ctx->input_file_list;
-	for (num = 0; num < input_file_count; num++){
-		if (file_p->size == 0){	// Skip empty files.
-			file_p++;
-			continue;
-		}
-		if (par3_ctx->noise_level >= 2){
-			printf("file size = %"PRIu64" \"%s\"\n", file_p->size, file_p->name);
-		}
-
-		// When no deduplication, chunk's index is same as file's index.
-		file_p->chunk = chunk_index;	// single chunk in each file
-		file_p->chunk_num = 1;
-		chunk_p->size = file_p->size;	// file size = chunk size
-		chunk_p->block = block_index;
-
-		// Not calculate CRC-64 of the first 16 KB
-		file_p->crc = 0;
-
-		// Read full size blocks
-		file_offset = 0;
-		while (file_offset + block_size <= file_p->size){
-			// set block info
-			block_p->slice = slice_index;
-			block_p->size = block_size;
-			block_p->crc = 0;
-			memset(block_p->hash, 0, 16);	// Not calculate hash
-			block_p->state = 1 | 64 | 128;
-
-			// set slice info
-			slice_p->chunk = chunk_index;
-			slice_p->file = num;
-			slice_p->offset = file_offset;
-			slice_p->size = block_size;
-			slice_p->block = block_index;
-			slice_p->tail_offset = 0;
-			slice_p->next = -1;
-			if (par3_ctx->noise_level >= 3){
-				printf("new block[%2"PRIu64"] : slice[%2"PRIu64"] chunk[%2u] file %d, offset %"PRIu64"\n",
-						block_index, slice_index, chunk_index, num, file_offset);
-			}
-			slice_p++;
-			slice_index++;
-
-			file_offset += block_size;
-			block_p++;
-			block_index++;
-		}
-
-		// Calculate size of chunk tail, and read it.
-		tail_size = file_p->size - file_offset;
-		//printf("tail_size = %"PRIu64", file size = %"PRIu64", offset %"PRIu64"\n", tail_size, file_p->size, file_offset);
-		if (tail_size >= 40){
-			// Not calculate checksum of chunk tail
-			chunk_p->tail_crc = 0;
-			memset(chunk_p->tail_hash, 0, 16);
-
-			// search existing tails to check available space
-			tail_offset = 0;
-			for (index = 0; index < slice_index; index++){
-				if ( (slice_list[index].next == -1) && (slice_list[index].size < block_size) ){	// the last tail in the block
-					if (slice_list[index].tail_offset + slice_list[index].size + tail_size <= block_size){
-						// When tail can fit in the space, put the tail there.
-						tail_offset = slice_list[index].tail_offset + slice_list[index].size;
-						break;
-					}
-				}
-			}
-			//printf("tail_offset = %"PRId64"\n", tail_offset);
-
-			if (tail_offset == 0){	// Put tail in new block
-				if (par3_ctx->noise_level >= 3){
-					printf("n t block[%2"PRIu64"] : slice[%2"PRIu64"] chunk[%2u] file %d, offset %"PRIu64", tail size %"PRIu64"\n",
-							block_index, slice_index, chunk_index, num, file_offset, tail_size);
-				}
-
-				// set slice info
-				slice_p->block = block_index;
-				slice_p->tail_offset = 0;
-
-				// set chunk tail info
-				chunk_p->tail_block = block_index;
-				chunk_p->tail_offset = 0;
-
-				// set block info (block for tails don't store checksum)
-				block_p->slice = slice_index;
-				block_p->size = tail_size;
-				block_p->state = 2 | 64;
-				block_p++;
-				block_index++;
-
-			} else {	// Put tail after another tail
-				if (par3_ctx->noise_level >= 3){
-					printf("a t block[%2"PRIu64"] : slice[%2"PRIu64"] chunk[%2u] file %d, offset %"PRIu64", tail size %"PRIu64", offset %"PRId64"\n",
-							slice_list[index].block, slice_index, chunk_index, num, file_offset, tail_size, tail_offset);
-				}
-				slice_list[index].next = slice_index;	// update "next" item in the front tail
-
-				// set slice info
-				slice_p->block = slice_list[index].block;
-				slice_p->tail_offset = tail_offset;
-
-				// set chunk tail info
-				chunk_p->tail_block = slice_list[index].block;
-				chunk_p->tail_offset = tail_offset;
-				num_pack++;
-
-				// update block info
-				block_list[slice_p->block].size = tail_offset + tail_size;
-			}
-
-			// set common slice info
-			slice_p->file = num;
-			slice_p->offset = file_offset;
-			slice_p->size = tail_size;
-			slice_p->chunk = chunk_index;
-			slice_p->next = -1;
-			slice_p++;
-			slice_index++;
-
-		} else if (tail_size > 0){
-			if (par3_ctx->noise_level >= 3){
-				printf("    block no  : slice no  chunk[%2u] file %d, offset %"PRIu64", tail size %"PRIu64"\n",
-						chunk_index, num, file_offset, tail_size);
-			}
-
-			// Reset
-			chunk_p->tail_crc = 0;
-			memset(chunk_p->tail_hash, 0, 16);
-			chunk_p->tail_block = 0;
-			chunk_p->tail_offset = 0;
-		}
-
-		// Not calculate file hash
-		memset(file_p->hash, 0, 16);
-
-		file_p++;
-		chunk_p++;	// Each input file contains single chunk description.
-		chunk_index++;
-	}
-
-	// Re-allocate memory for actual number of chunk description
-	if (par3_ctx->noise_level >= 0){
-		printf("Number of chunk description = %u (max %u)\n", chunk_index, input_file_count);
-	}
-	if (chunk_index < input_file_count){
-		if (chunk_index > 0){
-			chunk_p = realloc(par3_ctx->chunk_list, sizeof(PAR3_CHUNK_CTX) * chunk_index);
-			if (chunk_p == NULL){
-				perror("Failed to re-allocate memory for chunk description");
-				return RET_MEMORY_ERROR;
-			}
-			par3_ctx->chunk_list = chunk_p;
-		} else {
-			free(par3_ctx->chunk_list);
-			par3_ctx->chunk_list = NULL;
-		}
-	}
-	par3_ctx->chunk_count = chunk_index;
-
-	// Check actual number of slice info
-	if (slice_index != block_count){
-		printf("Number of input file slices = %"PRIu64" (max %"PRIu64")\n", slice_index, block_count);
-		return RET_LOGIC_ERROR;
-	}
-	par3_ctx->slice_count = slice_index;
-
-	// Update actual number of input blocks
-	if (block_index < block_count){
-		block_count = block_index;
-		par3_ctx->block_count = block_count;
-
-		// realloc
-		block_p = realloc(par3_ctx->block_list, sizeof(PAR3_BLOCK_CTX) * block_count);
-		if (block_p == NULL){
-			perror("Failed to re-allocate memory for input blocks");
-			return RET_MEMORY_ERROR;
-		}
-		par3_ctx->block_list = block_p;
-	}
-	if (par3_ctx->noise_level >= 0){
-		printf("Actual block count = %"PRIu64", Tail packing = %u\n", block_count, num_pack);
-	}
-
-	return 0;
-}
-
diff --git a/windows/src/map_slide.c b/windows/src/map_slide.c
deleted file mode 100644
index 902c5b6..0000000
--- a/windows/src/map_slide.c
+++ /dev/null
@@ -1,970 +0,0 @@
-#ifdef _WIN32
-// avoid error of MSVC
-#define _CRT_SECURE_NO_WARNINGS
-#endif
-
-#include <errno.h>
-#include <inttypes.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <time.h>
-
-#include "blake3/blake3.h"
-#include "libpar3.h"
-#include "hash.h"
-
-
-// map input file slices into input blocks with slide search
-int map_input_block_slide(PAR3_CTX *par3_ctx)
-{
-	uint8_t *buf_p, *work_buf, buf_tail[40], buf_hash[16];
-	int progress_old, progress_now;
-	uint32_t num, num_pack, input_file_count;
-	uint32_t chunk_count, chunk_index, chunk_num;
-	int64_t find_index, previous_index, tail_offset;
-	uint64_t block_size, tail_size, file_offset;
-	uint64_t file_size, read_size, slide_offset;
-	uint64_t block_count, block_index;
-	uint64_t slice_count, slice_index, index, last_index;
-	uint64_t crc, crc_slide, window_mask, *window_table, num_dedup;
-	uint64_t progress_total, progress_step;
-	PAR3_FILE_CTX *file_p;
-	PAR3_CHUNK_CTX *chunk_p, *chunk_list;
-	PAR3_SLICE_CTX *slice_p, *slice_list;
-	PAR3_BLOCK_CTX *block_p, *block_list;
-	PAR3_CMP_CTX *crc_list;
-	FILE *fp;
-	blake3_hasher hasher;
-	time_t time_old, time_now;
-	clock_t clock_now;
-
-	// Copy variables from context to local.
-	input_file_count = par3_ctx->input_file_count;
-	block_size = par3_ctx->block_size;
-	block_count = par3_ctx->block_count;
-	if ( (input_file_count == 0) || (block_size == 0) || (block_count == 0) )
-		return RET_LOGIC_ERROR;
-
-	// Table setup for slide window search of duplicated blocks.
-	init_crc_slide_table(par3_ctx, 1);
-	window_mask = par3_ctx->window_mask;
-	window_table = par3_ctx->window_table;
-
-	// For deduplication, allocate chunks description as 4 * number of input files.
-	// Note, empty file won't use Chunk Description.
-	chunk_count = input_file_count * 4;
-	if (par3_ctx->noise_level >= 2){
-		printf("Initial chunk count = %u (input file count = %u)\n", chunk_count, input_file_count);
-	}
-	chunk_p = malloc(sizeof(PAR3_CHUNK_CTX) * chunk_count);
-	if (chunk_p == NULL){
-		perror("Failed to allocate memory for chunk description");
-		return RET_MEMORY_ERROR;
-	}
-	chunk_list = chunk_p;
-	par3_ctx->chunk_list = chunk_p;
-
-	// For deduplication, allocate input file slices as 2 * number of input blocks.
-	slice_count = block_count * 2;
-	if (par3_ctx->noise_level >= 2){
-		printf("Initial input file slice count = %"PRIu64" (input block count = %"PRIu64")\n", slice_count, block_count);
-	}
-	slice_p = malloc(sizeof(PAR3_SLICE_CTX) * slice_count);
-	if (slice_p == NULL){
-		perror("Failed to allocate memory for input file slices");
-		return RET_MEMORY_ERROR;
-	}
-	slice_list = slice_p;
-	par3_ctx->slice_list = slice_p;
-
-	// Allocate max number of input blocks at first.
-	block_p = malloc(sizeof(PAR3_BLOCK_CTX) * block_count);
-	if (block_p == NULL){
-		perror("Failed to allocate memory for input blocks");
-		return RET_MEMORY_ERROR;
-	}
-	block_list = block_p;
-	par3_ctx->block_list = block_p;
-
-	// Allocate list of CRC-64 for maximum items
-	crc_list = malloc(sizeof(PAR3_CMP_CTX) * block_count);
-	if (crc_list == NULL){
-		perror("Failed to allocate memory for comparison of CRC-64");
-		return RET_MEMORY_ERROR;
-	}
-	par3_ctx->crc_list = crc_list;
-	par3_ctx->crc_count = 0;	// There is no item yet.
-
-	// Allocate memory to store file data temporary.
-	work_buf = malloc(block_size * 2);
-	if (work_buf == NULL){
-		perror("Failed to allocate memory for input data");
-		return RET_MEMORY_ERROR;
-	}
-	par3_ctx->work_buf = work_buf;
-
-	if (par3_ctx->noise_level >= 0){
-		printf("\nComputing hash:\n");
-		progress_total = par3_ctx->total_file_size;
-		progress_step = 0;
-		progress_old = 0;
-		time_old = time(NULL);
-		clock_now = clock();
-	}
-
-	// Read data of input files on memory
-	num_dedup = 0;
-	num_pack = 0;
-	chunk_index = 0;
-	block_index = 0;
-	slice_index = 0;
-	file_p = par3_ctx->input_file_list;
-	for (num = 0; num < input_file_count; num++){
-		blake3_hasher_init(&hasher);
-		if (file_p->size == 0){	// Skip empty files.
-			blake3_hasher_finalize(&hasher, file_p->hash, 16);
-			file_p++;
-			continue;
-		}
-		if (par3_ctx->noise_level >= 2){
-			printf("file size = %"PRIu64" \"%s\"\n", file_p->size, file_p->name);
-		}
-
-		fp = fopen(file_p->name, "rb");
-		if (fp == NULL){
-			perror("Failed to open input file");
-			return RET_FILE_IO_ERROR;
-		}
-
-		// Read two blocks at first.
-		file_size = file_p->size;
-		read_size = block_size * 2;
-		if (read_size > file_size)
-			read_size = file_size;
-		if (fread(work_buf, 1, (size_t)read_size, fp) != read_size){
-			perror("Failed to read first blocks on input file");
-			fclose(fp);
-			return RET_FILE_IO_ERROR;
-		}
-
-		// Print progress percent
-		if ( (par3_ctx->noise_level >= 0) && (par3_ctx->noise_level <= 2) ){
-			progress_step += read_size;
-			time_now = time(NULL);
-			if (time_now != time_old){
-				time_old = time_now;
-				progress_now = (int)((progress_step * 1000) / progress_total);
-				if (progress_now != progress_old){
-					progress_old = progress_now;
-					printf("%d.%d%%\r", progress_now / 10, progress_now % 10);	// 0.0% ~ 100.0%
-				}
-			}
-		}
-
-		// calculate CRC-64 of the first 16 KB
-		if (read_size < 16384){
-			file_p->crc = crc64(work_buf, (size_t)read_size, 0);
-		} else {
-			file_p->crc = crc64(work_buf, 16384, 0);
-		}
-		blake3_hasher_update(&hasher, work_buf, (size_t)read_size);
-
-		// First chunk in this file
-		previous_index = -4;
-		file_p->chunk = chunk_index;	// There is at least one chunk in each file.
-		chunk_p->size = 0;
-		chunk_p->block = 0;
-		chunk_num = 0;
-
-		// Compare input blocks.
-		if (read_size >= block_size)
-			crc = crc64(work_buf, block_size, 0);
-		file_offset = 0;
-		while (file_offset + block_size <= file_size){
-
-			// Compare current CRC-64 with previous blocks.
-			find_index = crc_list_compare(par3_ctx, crc, work_buf, buf_hash);
-			//printf("find_index = %"PRId64", previous_index = %"PRId64"\n", find_index, previous_index);
-			if (find_index < 0){	// No match
-
-				if (par3_ctx->crc_count > 0){	// Slide search
-					//printf("slide: file %d, offset %"PRIu64", crc_count = %"PRIu64"\n", num, file_offset, par3_ctx->crc_count);
-					crc_slide = crc;
-					slide_offset = 0;
-					while (slide_offset + 1 < block_size){
-						crc_slide = window_mask ^ crc_slide_byte(window_mask ^ crc_slide,
-								work_buf[slide_offset + block_size], work_buf[slide_offset], window_table);
-						slide_offset++;
-						//printf("offset = %"PRIu64", crc = 0x%016"PRIx64", 0x%016"PRIx64"\n", slide_offset, crc64(work_buf + slide_offset, block_size, 0), crc_slide);
-
-						find_index = crc_list_compare(par3_ctx, crc_slide, work_buf + slide_offset, buf_hash);
-						if (find_index >= 0)
-							break;
-					}
-				}
-
-				if (find_index >= 0){	// When same block was found while slide search.
-					// Close previous chunk with tail.
-					tail_size = slide_offset;
-					//printf("tail_size = %"PRIu64", offset %"PRIu64"\n", tail_size, file_offset);
-					if (tail_size >= 40){
-						// calculate checksum of chunk tail
-						chunk_p->tail_crc = crc64(work_buf, 40, 0);
-						blake3(work_buf, (size_t)tail_size, chunk_p->tail_hash);
-
-						// search existing tails of same data
-						tail_offset = 0;
-						for (index = 0; index < slice_index; index++){
-							//printf("tail size = %"PRIu64"\n", slice_list[index].size);
-							if (slice_list[index].size == tail_size){	// same size tail
-								//printf("crc = 0x%016"PRIx64", 0x%016"PRIx64" chunk[%2u]\n", chunk_p->tail_crc, chunk_list[slice_list[index].chunk].tail_crc, slice_list[index].chunk);
-								if (chunk_p->tail_crc == chunk_list[slice_list[index].chunk].tail_crc){
-									if (memcmp(chunk_p->tail_hash, chunk_list[slice_list[index].chunk].tail_hash, 16) == 0){
-										tail_offset = -1;
-
-										// find the last slice info in the block
-										last_index = index;
-										while (slice_list[last_index].next != -1){
-											last_index = slice_list[last_index].next;
-										}
-										break;
-									}
-								}
-							}
-						}
-						if (tail_offset == 0){
-							// search existing blocks to check available space
-							for (index = 0; index < block_index; index++){
-								if (block_list[index].size + tail_size <= block_size){
-									// When tail can fit in the space, put the tail there.
-									tail_offset = block_list[index].size;
-
-									// find the last slice info in the block
-									last_index = block_list[index].slice;
-									while (slice_list[last_index].next != -1){
-										last_index = slice_list[last_index].next;
-									}
-									break;
-								}
-							}
-						}
-						//printf("tail_offset = %"PRId64"\n", tail_offset);
-
-						if (tail_offset < 0){	// Same data as previous tail
-							if (par3_ctx->noise_level >= 3){
-								printf("o t block[%2"PRIu64"] : slice[%2"PRIu64"] chunk[%2u] file %d, offset %"PRIu64", tail size %"PRIu64", offset %"PRIu64"\n",
-										slice_list[index].block, slice_index, chunk_index, num, file_offset, tail_size, slice_list[index].tail_offset);
-							}
-							slice_list[last_index].next = slice_index;	// These same tails have same offset and size.
-
-							// set slice info
-							slice_p->block = slice_list[index].block;
-							slice_p->tail_offset = slice_list[index].tail_offset;
-
-							// set chunk tail info
-							chunk_p->tail_block = slice_p->block;
-							chunk_p->tail_offset = slice_p->tail_offset;
-							num_dedup++;
-
-						} else if (tail_offset == 0){	// Put tail in new block
-							if (par3_ctx->noise_level >= 3){
-								printf("n t block[%2"PRIu64"] : slice[%2"PRIu64"] chunk[%2u] file %d, offset %"PRIu64", tail size %"PRIu64"\n",
-										block_index, slice_index, chunk_index, num, file_offset, tail_size);
-							}
-
-							// set slice info
-							slice_p->block = block_index;
-							slice_p->tail_offset = 0;
-
-							// set chunk tail info
-							chunk_p->tail_block = block_index;
-							chunk_p->tail_offset = 0;
-
-							// set block info (block for tails don't store checksum)
-							block_p->slice = slice_index;
-							block_p->size = tail_size;
-							block_p->crc = crc64(work_buf, (size_t)tail_size, 0);
-							block_p->state = 2 | 64;
-							block_p++;
-							block_index++;
-
-						} else {	// Put tail after another tail
-							if (par3_ctx->noise_level >= 3){
-								printf("a t block[%2"PRIu64"] : slice[%2"PRIu64"] chunk[%2u] file %d, offset %"PRIu64", tail size %"PRIu64", offset %"PRId64"\n",
-										index, slice_index, chunk_index, num, file_offset, tail_size, tail_offset);
-							}
-							slice_list[last_index].next = slice_index;	// update "next" item in the front tail
-
-							// set slice info
-							slice_p->block = index;
-							slice_p->tail_offset = tail_offset;
-
-							// set chunk tail info
-							chunk_p->tail_block = index;
-							chunk_p->tail_offset = tail_offset;
-							num_pack++;
-
-							// update block info
-							block_list[slice_p->block].size = tail_offset + tail_size;
-							block_list[slice_p->block].crc = crc64(work_buf, (size_t)tail_size, block_list[slice_p->block].crc);
-						}
-
-						// set common slice info
-						slice_p->file = num;
-						slice_p->offset = file_offset;
-						slice_p->size = tail_size;
-						slice_p->chunk = chunk_index;
-						slice_p->next = -1;
-						slice_index++;
-						if (slice_index >= slice_count){
-							slice_count *= 2;
-							slice_p = realloc(par3_ctx->slice_list, sizeof(PAR3_SLICE_CTX) * slice_count);
-							if (slice_p == NULL){
-								perror("Failed to re-allocate memory for input file slices");
-								fclose(fp);
-								return RET_MEMORY_ERROR;
-							}
-							slice_list = slice_p;
-							par3_ctx->slice_list = slice_p;
-							slice_p += slice_index;
-						} else {
-							slice_p++;
-						}
-
-					} else {	// When tail size is 1~39 bytes, it's saved in File Packet.
-						memcpy(buf_tail, work_buf, tail_size);	// block size may be smaller than 40 bytes.
-						memset(buf_tail + tail_size, 0, 40 - tail_size);	// zero fill the rest bytes
-						if (par3_ctx->noise_level >= 3){
-							printf("    block no  : slice no  chunk[%2u] file %d, offset %"PRIu64", tail size %"PRIu64"\n",
-									chunk_index, num, file_offset, tail_size);
-						}
-
-						// copy 1 ~ 39 bytes
-						memcpy(&(chunk_p->tail_crc), buf_tail, 8);
-						memcpy(chunk_p->tail_hash, buf_tail + 8, 16);
-						memcpy(&(chunk_p->tail_block), buf_tail + 24, 8);
-						memcpy(&(chunk_p->tail_offset), buf_tail + 32, 8);
-					}
-					chunk_p->size += tail_size;
-
-					// Close chunk description
-					chunk_num++;
-					chunk_index++;
-					if (chunk_index >= chunk_count){
-						chunk_count *= 2;
-						chunk_p = realloc(par3_ctx->chunk_list, sizeof(PAR3_CHUNK_CTX) * chunk_count);
-						if (chunk_p == NULL){
-							perror("Failed to re-allocate memory for chunk description");
-							fclose(fp);
-							return RET_MEMORY_ERROR;
-						}
-						chunk_list = chunk_p;
-						par3_ctx->chunk_list = chunk_p;
-						chunk_p += chunk_index;
-					} else {
-						chunk_p++;
-					}
-
-					// Match with a previous block while slide search
-					// update the last slice info of previous block
-					index = block_list[find_index].slice;
-					while (slice_list[index].next != -1){
-						index = slice_list[index].next;
-					}
-					//printf("first index = %"PRIu64", same = %"PRIu64", slice_index = %"PRIu64"\n", block_list[find_index].slice, index, slice_index);
-					slice_list[index].next = slice_index;
-
-					// Start this chunk
-					chunk_p->size = 0;
-					chunk_p->block = find_index;
-
-					// set slice info
-					slice_p->chunk = chunk_index;
-					slice_p->file = num;
-					slice_p->offset = file_offset + slide_offset;
-					slice_p->size = block_size;
-					slice_p->block = find_index;
-					slice_p->tail_offset = 0;
-					slice_p->next = -1;
-					if (par3_ctx->noise_level >= 3){
-						printf("o s block[%2"PRId64"] : slice[%2"PRIu64"] chunk[%2u] file %d, offset %"PRIu64"\n",
-								find_index, slice_index, chunk_index, num, file_offset + slide_offset);
-					}
-					slice_index++;
-					if (slice_index >= slice_count){
-						slice_count *= 2;
-						slice_p = realloc(par3_ctx->slice_list, sizeof(PAR3_SLICE_CTX) * slice_count);
-						if (slice_p == NULL){
-							perror("Failed to re-allocate memory for input file slices");
-							fclose(fp);
-							return RET_MEMORY_ERROR;
-						}
-						slice_list = slice_p;
-						par3_ctx->slice_list = slice_p;
-						slice_p += slice_index;
-					} else {
-						slice_p++;
-					}
-
-					// set chunk info
-					chunk_p->size += block_size;
-					previous_index = find_index;
-					num_dedup++;
-
-					// Read remain of partial block on first position, and next block on second position.
-					file_offset += slide_offset + block_size;
-					if (file_offset >= file_size){
-						//printf("file_offset = %"PRIu64", file_size = %"PRIu64", EOF\n", file_offset, file_size);
-						break;
-					}
-					read_size = slide_offset + block_size;
-					// Slide partial block to the top
-					memcpy(work_buf, work_buf + slide_offset + block_size, (size_t)(block_size - slide_offset));
-					if (file_offset + (block_size - slide_offset) >= file_size){
-						read_size = 0;
-					} else if (file_offset + block_size * 2 > file_size){
-						read_size = file_size - file_offset - (block_size - slide_offset);
-					}
-					//printf("file_offset = %"PRIu64", read_size = %"PRIu64"\n", file_offset, read_size);
-					if (read_size > 0){
-						buf_p = work_buf + (block_size - slide_offset);
-						if (fread(buf_p, 1, (size_t)read_size, fp) != read_size){
-							perror("Failed to read next block on input file");
-							fclose(fp);
-							return RET_FILE_IO_ERROR;
-						}
-
-						// Print progress percent
-						if ( (par3_ctx->noise_level >= 0) && (par3_ctx->noise_level <= 2) ){
-							progress_step += read_size;
-							time_now = time(NULL);
-							if (time_now != time_old){
-								time_old = time_now;
-								progress_now = (int)((progress_step * 1000) / progress_total);
-								if (progress_now != progress_old){
-									progress_old = progress_now;
-									printf("%d.%d%%\r", progress_now / 10, progress_now % 10);	// 0.0% ~ 100.0%
-								}
-							}
-						}
-
-						// calculate CRC-64 of the first 16 KB
-						if (file_offset + (block_size - slide_offset) + read_size < 16384){
-							file_p->crc = crc64(buf_p, (size_t)read_size, file_p->crc);
-						} else if (file_offset + (block_size - slide_offset) < 16384){
-							file_p->crc = crc64(buf_p, (size_t)(16384 - file_offset - (block_size - slide_offset)), file_p->crc);
-						}
-						blake3_hasher_update(&hasher, buf_p, (size_t)read_size);
-					}
-
-					// Calculate CRC-64 of next block.
-					if (file_offset + block_size <= file_size)
-						crc = crc64(work_buf, block_size, 0);
-
-				} else {	// When same block was not found.
-					// Add full size block into list
-					crc_list_add(par3_ctx, crc, block_index);
-
-					// set block info
-					block_p->slice = slice_index;
-					block_p->size = block_size;
-					block_p->crc = crc;
-					if (find_index == -3){
-						memcpy(block_p->hash, buf_hash, 16);
-					} else {
-						blake3(work_buf, (size_t)block_size, block_p->hash);
-					}
-					block_p->state = 1 | 64;
-
-					// set chunk info
-					if ( (chunk_p->size > 0) && (previous_index >= 0) ){	// When there are old blocks already in the chunk.
-						// Close previous chunk.
-						chunk_num++;
-						chunk_index++;
-						if (chunk_index >= chunk_count){
-							chunk_count *= 2;
-							chunk_p = realloc(par3_ctx->chunk_list, sizeof(PAR3_CHUNK_CTX) * chunk_count);
-							if (chunk_p == NULL){
-								perror("Failed to re-allocate memory for chunk description");
-								fclose(fp);
-								return RET_MEMORY_ERROR;
-							}
-							chunk_list = chunk_p;
-							par3_ctx->chunk_list = chunk_p;
-							chunk_p += chunk_index;
-						} else {
-							chunk_p++;
-						}
-						chunk_p->size = 0;
-					}
-					if (chunk_p->size == 0){	// When this is the first block in the chunk.
-						// Save index of starting block.
-						chunk_p->block = block_index;
-					}
-					chunk_p->size += block_size;
-					previous_index = -4;
-
-					// set slice info
-					slice_p->chunk = chunk_index;
-					slice_p->file = num;
-					slice_p->offset = file_offset;
-					slice_p->size = block_size;
-					slice_p->block = block_index;
-					slice_p->tail_offset = 0;
-					slice_p->next = -1;
-					if (par3_ctx->noise_level >= 3){
-						printf("new block[%2"PRIu64"] : slice[%2"PRIu64"] chunk[%2u] file %d, offset %"PRIu64"\n",
-								block_index, slice_index, chunk_index, num, file_offset);
-					}
-					slice_index++;
-					if (slice_index >= slice_count){
-						slice_count *= 2;
-						slice_p = realloc(par3_ctx->slice_list, sizeof(PAR3_SLICE_CTX) * slice_count);
-						if (slice_p == NULL){
-							perror("Failed to re-allocate memory for input file slices");
-							fclose(fp);
-							return RET_MEMORY_ERROR;
-						}
-						slice_list = slice_p;
-						par3_ctx->slice_list = slice_p;
-						slice_p += slice_index;
-					} else {
-						slice_p++;
-					}
-
-					block_p++;
-					block_index++;
-
-					// Read next block on second position.
-					file_offset += block_size;
-					if (file_offset >= file_size){
-						//printf("file_offset = %"PRIu64", file_size = %"PRIu64", EOF\n", file_offset, file_size);
-						break;
-					}
-					read_size = block_size;
-					if (file_offset + block_size >= file_size){
-						// Slide block of second position to former position.
-						memcpy(work_buf, work_buf + block_size, (size_t)(file_size - file_offset));
-						read_size = 0;
-					} else if (file_offset + block_size * 2 > file_size){
-						read_size = file_size - file_offset - block_size;
-					}
-					//printf("file_offset = %"PRIu64", read_size = %"PRIu64"\n", file_offset, read_size);
-					if (read_size > 0){
-						// Slide block of second position to former position.
-						memcpy(work_buf, work_buf + block_size, (size_t)block_size);
-
-						if (fread(work_buf + block_size, 1, (size_t)read_size, fp) != read_size){
-							perror("Failed to read next block on input file");
-							fclose(fp);
-							return RET_FILE_IO_ERROR;
-						}
-
-						// Print progress percent
-						if ( (par3_ctx->noise_level >= 0) && (par3_ctx->noise_level <= 2) ){
-							progress_step += read_size;
-							time_now = time(NULL);
-							if (time_now != time_old){
-								time_old = time_now;
-								progress_now = (int)((progress_step * 1000) / progress_total);
-								if (progress_now != progress_old){
-									progress_old = progress_now;
-									printf("%d.%d%%\r", progress_now / 10, progress_now % 10);	// 0.0% ~ 100.0%
-								}
-							}
-						}
-
-						// calculate CRC-64 of the first 16 KB
-						if (file_offset + block_size + read_size < 16384){
-							file_p->crc = crc64(work_buf + block_size, (size_t)read_size, file_p->crc);
-						} else if (file_offset + block_size < 16384){
-							file_p->crc = crc64(work_buf + block_size, (size_t)(16384 - file_offset - block_size), file_p->crc);
-						}
-						blake3_hasher_update(&hasher, work_buf + block_size, (size_t)read_size);
-					}
-
-					// Calculate CRC-64 of next block.
-					if (file_offset + block_size <= file_size)
-						crc = crc64(work_buf, block_size, 0);
-				}
-
-			} else {	// Match with a previous block
-				// update the last slice info of previous block
-				index = block_list[find_index].slice;
-				while (slice_list[index].next != -1){
-					index = slice_list[index].next;
-				}
-				//printf("first index = %"PRIu64", same = %"PRIu64", slice_index = %"PRIu64"\n", block_list[find_index].slice, index, slice_index);
-				slice_list[index].next = slice_index;
-
-				if ( (chunk_p->size > 0) &&	// When there are blocks already in the chunk.
-						(find_index != previous_index + 1) ){	// If found block isn't the next of previous block.
-
-					// Close previous chunk.
-					chunk_num++;
-					chunk_index++;
-					if (chunk_index >= chunk_count){
-						chunk_count *= 2;
-						chunk_p = realloc(par3_ctx->chunk_list, sizeof(PAR3_CHUNK_CTX) * chunk_count);
-						if (chunk_p == NULL){
-							perror("Failed to re-allocate memory for chunk description");
-							fclose(fp);
-							return RET_MEMORY_ERROR;
-						}
-						chunk_list = chunk_p;
-						par3_ctx->chunk_list = chunk_p;
-						chunk_p += chunk_index;
-					} else {
-						chunk_p++;
-					}
-
-					// Start next chunk
-					chunk_p->size = 0;
-				}
-				if (chunk_p->size == 0){	// When this is the first block in the chunk.
-					// Save index of starting block.
-					chunk_p->block = find_index;
-				}
-
-				// set slice info
-				slice_p->chunk = chunk_index;
-				slice_p->file = num;
-				slice_p->offset = file_offset;
-				slice_p->size = block_size;
-				slice_p->block = find_index;
-				slice_p->tail_offset = 0;
-				slice_p->next = -1;
-				if (par3_ctx->noise_level >= 3){
-					printf("old block[%2"PRIu64"] : slice[%2"PRIu64"] chunk[%2u] file %d, offset %"PRIu64"\n",
-							find_index, slice_index, chunk_index, num, file_offset);
-				}
-				slice_index++;
-				if (slice_index >= slice_count){
-					slice_count *= 2;
-					slice_p = realloc(par3_ctx->slice_list, sizeof(PAR3_SLICE_CTX) * slice_count);
-					if (slice_p == NULL){
-						perror("Failed to re-allocate memory for input file slices");
-						fclose(fp);
-						return RET_MEMORY_ERROR;
-					}
-					slice_list = slice_p;
-					par3_ctx->slice_list = slice_p;
-					slice_p += slice_index;
-				} else {
-					slice_p++;
-				}
-
-				// set chunk info
-				chunk_p->size += block_size;
-				previous_index = find_index;
-				num_dedup++;
-
-				// Read next block on second position.
-				file_offset += block_size;
-				if (file_offset >= file_size){
-					//printf("file_offset = %"PRIu64", file_size = %"PRIu64", EOF\n", file_offset, file_size);
-					break;
-				}
-				read_size = block_size;
-				if (file_offset + block_size >= file_size){
-					// Slide block of second position to former position.
-					memcpy(work_buf, work_buf + block_size, (size_t)(file_size - file_offset));
-					read_size = 0;
-				} else if (file_offset + block_size * 2 > file_size){
-					read_size = file_size - file_offset - block_size;
-				}
-				//printf("file_offset = %"PRIu64", read_size = %"PRIu64"\n", file_offset, read_size);
-				if (read_size > 0){
-					// Slide block of second position to former position.
-					memcpy(work_buf, work_buf + block_size, (size_t)block_size);
-
-					if (fread(work_buf + block_size, 1, (size_t)read_size, fp) != read_size){
-						perror("Failed to read next block on input file");
-						fclose(fp);
-						return RET_FILE_IO_ERROR;
-					}
-
-					// Print progress percent
-					if ( (par3_ctx->noise_level >= 0) && (par3_ctx->noise_level <= 2) ){
-						progress_step += read_size;
-						time_now = time(NULL);
-						if (time_now != time_old){
-							time_old = time_now;
-							progress_now = (int)((progress_step * 1000) / progress_total);
-							if (progress_now != progress_old){
-								progress_old = progress_now;
-								printf("%d.%d%%\r", progress_now / 10, progress_now % 10);	// 0.0% ~ 100.0%
-							}
-						}
-					}
-
-					// calculate CRC-64 of the first 16 KB
-					if (file_offset + block_size + read_size < 16384){
-						file_p->crc = crc64(work_buf + block_size, (size_t)read_size, file_p->crc);
-					} else if (file_offset + block_size < 16384){
-						file_p->crc = crc64(work_buf + block_size, (size_t)(16384 - file_offset - block_size), file_p->crc);
-					}
-					blake3_hasher_update(&hasher, work_buf + block_size, (size_t)read_size);
-				}
-
-				// Calculate CRC-64 of next block.
-				if (file_offset + block_size <= file_size)
-					crc = crc64(work_buf, block_size, 0);
-			}
-		}
-
-		// Calculate size of chunk tail. (tail data was read on work_buf already.)
-		tail_size = file_size - file_offset;
-		//printf("tail_size = %"PRIu64", file size = %"PRIu64", offset %"PRIu64"\n", tail_size, file_size, file_offset);
-		if (tail_size >= 40){
-			// calculate checksum of chunk tail
-			chunk_p->tail_crc = crc64(work_buf, 40, 0);
-			blake3(work_buf, (size_t)tail_size, chunk_p->tail_hash);
-
-			// search existing tails of same data
-			tail_offset = 0;
-			for (index = 0; index < slice_index; index++){
-				//printf("tail size = %"PRIu64"\n", slice_list[index].size);
-				if (slice_list[index].size == tail_size){	// same size tail
-					//printf("crc = 0x%016I64x, 0x%016I64x chunk[%2u]\n", chunk_p->tail_crc, chunk_list[slice_list[index].chunk].tail_crc, slice_list[index].chunk);
-					if (chunk_p->tail_crc == chunk_list[slice_list[index].chunk].tail_crc){
-						if (memcmp(chunk_p->tail_hash, chunk_list[slice_list[index].chunk].tail_hash, 16) == 0){
-							tail_offset = -1;
-
-							// find the last slice info in the block
-							last_index = index;
-							while (slice_list[last_index].next != -1){
-								last_index = slice_list[last_index].next;
-							}
-							break;
-						}
-					}
-				}
-			}
-			if (tail_offset == 0){
-				// search existing blocks to check available space
-				for (index = 0; index < block_index; index++){
-					if (block_list[index].size + tail_size <= block_size){
-						// When tail can fit in the space, put the tail there.
-						tail_offset = block_list[index].size;
-
-						// find the last slice info in the block
-						last_index = block_list[index].slice;
-						while (slice_list[last_index].next != -1){
-							last_index = slice_list[last_index].next;
-						}
-						break;
-					}
-				}
-			}
-			//printf("tail_offset = %"PRId64"\n", tail_offset);
-
-			if (tail_offset < 0){	// Same data as previous tail
-				if (par3_ctx->noise_level >= 3){
-					printf("o t block[%2"PRIu64"] : slice[%2"PRIu64"] chunk[%2u] file %d, offset %"PRIu64", tail size %"PRIu64", offset %"PRIu64"\n",
-							slice_list[index].block, slice_index, chunk_index, num, file_offset, tail_size, slice_list[index].tail_offset);
-				}
-				slice_list[last_index].next = slice_index;	// These same tails have same offset and size.
-
-				// set slice info
-				slice_p->block = slice_list[index].block;
-				slice_p->tail_offset = slice_list[index].tail_offset;
-
-				// set chunk tail info
-				chunk_p->tail_block = slice_p->block;
-				chunk_p->tail_offset = slice_p->tail_offset;
-				num_dedup++;
-
-			} else if (tail_offset == 0){	// Put tail in new block
-				if (par3_ctx->noise_level >= 3){
-					printf("n t block[%2"PRIu64"] : slice[%2"PRIu64"] chunk[%2u] file %d, offset %"PRIu64", tail size %"PRIu64"\n",
-							block_index, slice_index, chunk_index, num, file_offset, tail_size);
-				}
-
-				// set slice info
-				slice_p->block = block_index;
-				slice_p->tail_offset = 0;
-
-				// set chunk tail info
-				chunk_p->tail_block = block_index;
-				chunk_p->tail_offset = 0;
-
-				// set block info (block for tails don't store checksum)
-				block_p->slice = slice_index;
-				block_p->size = tail_size;
-				block_p->crc = crc64(work_buf, (size_t)tail_size, 0);
-				block_p->state = 2 | 64;
-				block_p++;
-				block_index++;
-
-			} else {	// Put tail after another tail
-				if (par3_ctx->noise_level >= 3){
-					printf("a t block[%2"PRIu64"] : slice[%2"PRIu64"] chunk[%2u] file %d, offset %"PRIu64", tail size %"PRIu64", offset %"PRId64"\n",
-							index, slice_index, chunk_index, num, file_offset, tail_size, tail_offset);
-				}
-				slice_list[last_index].next = slice_index;	// update "next" item in the front tail
-
-				// set slice info
-				slice_p->block = index;
-				slice_p->tail_offset = tail_offset;
-
-				// set chunk tail info
-				chunk_p->tail_block = index;
-				chunk_p->tail_offset = tail_offset;
-				num_pack++;
-
-				// update block info
-				block_list[slice_p->block].size = tail_offset + tail_size;
-				block_list[slice_p->block].crc = crc64(work_buf, (size_t)tail_size, block_list[slice_p->block].crc);
-			}
-
-			// set common slice info
-			slice_p->file = num;
-			slice_p->offset = file_offset;
-			slice_p->size = tail_size;
-			slice_p->chunk = chunk_index;
-			slice_p->next = -1;
-			slice_index++;
-			if (slice_index >= slice_count){
-				slice_count *= 2;
-				slice_p = realloc(par3_ctx->slice_list, sizeof(PAR3_SLICE_CTX) * slice_count);
-				if (slice_p == NULL){
-					perror("Failed to re-allocate memory for input file slices");
-					fclose(fp);
-					return RET_MEMORY_ERROR;
-				}
-				slice_list = slice_p;
-				par3_ctx->slice_list = slice_p;
-				slice_p += slice_index;
-			} else {
-				slice_p++;
-			}
-
-		} else if (tail_size > 0){	// When tail size is 1~39 bytes, it's saved in File Packet.
-			memcpy(buf_tail, work_buf, tail_size);	// block size may be smaller than 40 bytes.
-			memset(buf_tail + tail_size, 0, 40 - tail_size);	// zero fill the rest bytes
-			if (par3_ctx->noise_level >= 3){
-				printf("    block no  : slice no  chunk[%2u] file %d, offset %"PRIu64", tail size %"PRIu64"\n",
-						chunk_index, num, file_offset, tail_size);
-			}
-
-			// copy 1 ~ 39 bytes
-			memcpy(&(chunk_p->tail_crc), buf_tail, 8);
-			memcpy(chunk_p->tail_hash, buf_tail + 8, 16);
-			memcpy(&(chunk_p->tail_block), buf_tail + 24, 8);
-			memcpy(&(chunk_p->tail_offset), buf_tail + 32, 8);
-		}
-		chunk_p->size += tail_size;
-
-		// Close chunk description
-		if (chunk_p->size > 0){
-			chunk_num++;
-			chunk_index++;
-			if (chunk_index >= chunk_count){
-				chunk_count *= 2;
-				chunk_p = realloc(par3_ctx->chunk_list, sizeof(PAR3_CHUNK_CTX) * chunk_count);
-				if (chunk_p == NULL){
-					perror("Failed to re-allocate memory for chunk description");
-					fclose(fp);
-					return RET_MEMORY_ERROR;
-				}
-				chunk_list = chunk_p;
-				par3_ctx->chunk_list = chunk_p;
-				chunk_p += chunk_index;
-			} else {
-				chunk_p++;
-			}
-		}
-		file_p->chunk_num = chunk_num;
-
-		blake3_hasher_finalize(&hasher, file_p->hash, 16);
-		if (fclose(fp) != 0){
-			perror("Failed to close input file");
-			return RET_FILE_IO_ERROR;
-		}
-
-		file_p++;
-	}
-
-/*
-	// for debug
-	for (i = 0; i < par3_ctx->crc_count; i++){
-		printf("crc_list[%2u] = 0x%016I64x , %"PRIu64"\n", i, crc_list[i].crc, crc_list[i].index);
-	}
-*/
-
-	// Release temporary buffer.
-	free(crc_list);
-	par3_ctx->crc_list = NULL;
-	par3_ctx->crc_count = 0;
-	free(work_buf);
-	par3_ctx->work_buf = NULL;
-
-	if (par3_ctx->noise_level >= 0){
-		if (par3_ctx->noise_level <= 2){
-			if (progress_step < progress_total)
-				printf("Didn't finish progress. %"PRIu64" / %"PRIu64"\n", progress_step, progress_total);
-		}
-		clock_now = clock() - clock_now;
-		printf("done in %.1f seconds.\n", (double)clock_now / CLOCKS_PER_SEC);
-		printf("\n");
-	}
-
-	// Re-allocate memory for actual number of chunk description
-	if (par3_ctx->noise_level >= 0){
-		printf("Number of chunk description = %u (max %u)\n", chunk_index, chunk_count);
-	}
-	if (chunk_index < chunk_count){
-		if (chunk_index > 0){
-			chunk_p = realloc(par3_ctx->chunk_list, sizeof(PAR3_CHUNK_CTX) * chunk_index);
-			if (chunk_p == NULL){
-				perror("Failed to re-allocate memory for chunk description");
-				return RET_MEMORY_ERROR;
-			}
-			par3_ctx->chunk_list = chunk_p;
-		} else {
-			free(par3_ctx->chunk_list);
-			par3_ctx->chunk_list = NULL;
-		}
-	}
-	par3_ctx->chunk_count = chunk_index;
-
-	// Re-allocate memory for actual number of input file slices
-	if (slice_index < slice_count){
-		if (par3_ctx->noise_level >= 1){
-			printf("Number of input file slice = %"PRIu64" (max %"PRIu64")\n", slice_index, slice_count);
-		}
-		if (slice_index > 0){
-			slice_p = realloc(par3_ctx->slice_list, sizeof(PAR3_SLICE_CTX) * slice_index);
-			if (slice_p == NULL){
-				perror("Failed to re-allocate memory for input file slices");
-				return RET_MEMORY_ERROR;
-			}
-			par3_ctx->slice_list = slice_p;
-		} else {
-			free(par3_ctx->slice_list);
-			par3_ctx->slice_list = NULL;
-		}
-	}
-	par3_ctx->slice_count = slice_index;
-
-	// Update actual number of input blocks
-	if (block_index < block_count){
-		block_count = block_index;
-		par3_ctx->block_count = block_count;
-
-		// realloc
-		block_p = realloc(par3_ctx->block_list, sizeof(PAR3_BLOCK_CTX) * block_count);
-		if (block_p == NULL){
-			perror("Failed to re-allocate memory for input blocks");
-			return RET_MEMORY_ERROR;
-		}
-		par3_ctx->block_list = block_p;
-	}
-	if (par3_ctx->noise_level >= 0){
-		printf("Actual block count = %"PRIu64", Tail packing = %u, Deduplication = %"PRIu64"\n", block_count, num_pack, num_dedup);
-	}
-
-	return 0;
-}
diff --git a/windows/src/packet.h b/windows/src/packet.h
deleted file mode 100644
index 8d06f63..0000000
--- a/windows/src/packet.h
+++ /dev/null
@@ -1,23 +0,0 @@
-
-// for creation
-
-void make_packet_header(uint8_t *buf, uint64_t packet_size, uint8_t *set_id, uint8_t *packet_type, int flag_hash);
-
-int make_start_packet(PAR3_CTX *par3_ctx, int flag_trial);
-int make_matrix_packet(PAR3_CTX *par3_ctx);
-int make_file_packet(PAR3_CTX *par3_ctx);
-int make_ext_data_packet(PAR3_CTX *par3_ctx);
-
-int duplicate_common_packet(PAR3_CTX *par3_ctx);
-
-
-// for verification
-
-int check_packet_exist(uint8_t *buf, size_t buf_size, uint8_t *packet, uint64_t packet_size);
-int add_found_packet(PAR3_CTX *par3_ctx, uint8_t *packet);
-int list_found_packet(PAR3_CTX *par3_ctx, uint8_t *packet, char *filename, int64_t offset);
-int check_packet_set(PAR3_CTX *par3_ctx);
-
-int parse_vital_packet(PAR3_CTX *par3_ctx);
-int parse_external_data_packet(PAR3_CTX *par3_ctx);
-
diff --git a/windows/src/packet_add.c b/windows/src/packet_add.c
deleted file mode 100644
index f377be5..0000000
--- a/windows/src/packet_add.c
+++ /dev/null
@@ -1,861 +0,0 @@
-#include <errno.h>
-#include <inttypes.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "libpar3.h"
-
-
-// 0 = no packet yet, 1 = the packet exists already
-int check_packet_exist(uint8_t *buf, size_t buf_size, uint8_t *packet, uint64_t packet_size)
-{
-	size_t offset;
-	uint64_t this_size;
-
-	offset = 0;
-	while (offset + packet_size <= buf_size){
-		// compare packet size
-		memcpy(&this_size, buf + offset + 24, 8);
-		if (this_size == packet_size){
-			// compare checksums
-			if (memcmp(buf + offset + 8, packet + 8, 16) == 0){
-				return 1;
-			}
-		}
-
-		offset += this_size;
-	}
-
-	return 0;
-}
-
-// It allocates memory for each packet type, and stores the packet.
-// -2 = unknown type, -1 = the packet exists already, 0 = added, 1~ = error
-int add_found_packet(PAR3_CTX *par3_ctx, uint8_t *packet)
-{
-	uint8_t *packet_type, *tmp_p;
-	uint64_t packet_size;
-
-	// read packet size
-	memcpy(&packet_size, packet + 24, 8);
-
-	// allocate memory for the packet type
-	packet_type = packet + 40;
-	if (memcmp(packet_type, "PAR CRE\0", 8) == 0){	// Creator Packet
-		if (par3_ctx->creator_packet == NULL){
-			par3_ctx->creator_packet = malloc(packet_size);
-			if (par3_ctx->creator_packet == NULL){
-				perror("Failed to allocate memory for Creator Packet");
-				return RET_MEMORY_ERROR;
-			}
-			memcpy(par3_ctx->creator_packet, packet, packet_size);
-			par3_ctx->creator_packet_size = packet_size;
-			par3_ctx->creator_packet_count = 1;
-		} else if (check_packet_exist(par3_ctx->creator_packet, par3_ctx->creator_packet_size, packet, packet_size) == 1){
-			// If there is the packet already, just exit.
-			return -1;
-		} else {
-			// Add this packet after other packets.
-			tmp_p = realloc(par3_ctx->creator_packet, par3_ctx->creator_packet_size + packet_size);
-			if (tmp_p == NULL){
-				perror("Failed to re-allocate memory for Creator Packet");
-				return RET_MEMORY_ERROR;
-			}
-			par3_ctx->creator_packet = tmp_p;
-			memcpy(par3_ctx->creator_packet + par3_ctx->creator_packet_size, packet, packet_size);
-			par3_ctx->creator_packet_size += packet_size;
-			par3_ctx->creator_packet_count++;
-		}
-
-	} else if (memcmp(packet_type, "PAR COM\0", 8) == 0){	// Comment Packet
-		if (par3_ctx->comment_packet == NULL){
-			par3_ctx->comment_packet = malloc(packet_size);
-			if (par3_ctx->comment_packet == NULL){
-				perror("Failed to allocate memory for Comment Packet");
-				return RET_MEMORY_ERROR;
-			}
-			memcpy(par3_ctx->comment_packet, packet, packet_size);
-			par3_ctx->comment_packet_size = packet_size;
-			par3_ctx->comment_packet_count = 1;
-		} else if (check_packet_exist(par3_ctx->comment_packet, par3_ctx->comment_packet_size, packet, packet_size) == 1){
-			// If there is the packet already, just exit.
-			return -1;
-		} else {
-			// Add this packet after other packets.
-			tmp_p = realloc(par3_ctx->comment_packet, par3_ctx->comment_packet_size + packet_size);
-			if (tmp_p == NULL){
-				perror("Failed to re-allocate memory for Comment Packet");
-				return RET_MEMORY_ERROR;
-			}
-			par3_ctx->comment_packet = tmp_p;
-			memcpy(par3_ctx->comment_packet + par3_ctx->comment_packet_size, packet, packet_size);
-			par3_ctx->comment_packet_size += packet_size;
-			par3_ctx->comment_packet_count++;
-		}
-
-	} else if (memcmp(packet_type, "PAR STA\0", 8) == 0){	// Start Packet
-		if (par3_ctx->start_packet == NULL){
-			par3_ctx->start_packet = malloc(packet_size);
-			if (par3_ctx->start_packet == NULL){
-				perror("Failed to allocate memory for Start Packet");
-				return RET_MEMORY_ERROR;
-			}
-			memcpy(par3_ctx->start_packet, packet, packet_size);
-			par3_ctx->start_packet_size = packet_size;
-			par3_ctx->start_packet_count = 1;
-		} else if (check_packet_exist(par3_ctx->start_packet, par3_ctx->start_packet_size, packet, packet_size) == 1){
-			// If there is the packet already, just exit.
-			return -1;
-		} else {
-			// Add this packet after other packets.
-			tmp_p = realloc(par3_ctx->start_packet, par3_ctx->start_packet_size + packet_size);
-			if (tmp_p == NULL){
-				perror("Failed to re-allocate memory for Start Packet");
-				return RET_MEMORY_ERROR;
-			}
-			par3_ctx->start_packet = tmp_p;
-			memcpy(par3_ctx->start_packet + par3_ctx->start_packet_size, packet, packet_size);
-			par3_ctx->start_packet_size += packet_size;
-			par3_ctx->start_packet_count++;
-		}
-
-	} else if (memcmp(packet_type, "PAR FIL\0", 8) == 0){	// File Packet
-		if (par3_ctx->file_packet == NULL){
-			par3_ctx->file_packet = malloc(packet_size);
-			if (par3_ctx->file_packet == NULL){
-				perror("Failed to allocate memory for File Packet");
-				return RET_MEMORY_ERROR;
-			}
-			memcpy(par3_ctx->file_packet, packet, packet_size);
-			par3_ctx->file_packet_size = packet_size;
-			par3_ctx->file_packet_count = 1;
-		} else if (check_packet_exist(par3_ctx->file_packet, par3_ctx->file_packet_size, packet, packet_size) == 1){
-			// If there is the packet already, just exit.
-			return -1;
-		} else {
-			// Add this packet after other packets.
-			tmp_p = realloc(par3_ctx->file_packet, par3_ctx->file_packet_size + packet_size);
-			if (tmp_p == NULL){
-				perror("Failed to re-allocate memory for File Packet");
-				return RET_MEMORY_ERROR;
-			}
-			par3_ctx->file_packet = tmp_p;
-			memcpy(par3_ctx->file_packet + par3_ctx->file_packet_size, packet, packet_size);
-			par3_ctx->file_packet_size += packet_size;
-			par3_ctx->file_packet_count++;
-		}
-
-	} else if (memcmp(packet_type, "PAR DIR\0", 8) == 0){	// Directory Packet
-		if (par3_ctx->dir_packet == NULL){
-			par3_ctx->dir_packet = malloc(packet_size);
-			if (par3_ctx->dir_packet == NULL){
-				perror("Failed to allocate memory for Directory Packet");
-				return RET_MEMORY_ERROR;
-			}
-			memcpy(par3_ctx->dir_packet, packet, packet_size);
-			par3_ctx->dir_packet_size = packet_size;
-			par3_ctx->dir_packet_count = 1;
-		} else if (check_packet_exist(par3_ctx->dir_packet, par3_ctx->dir_packet_size, packet, packet_size) == 1){
-			// If there is the packet already, just exit.
-			return -1;
-		} else {
-			// Add this packet after other packets.
-			tmp_p = realloc(par3_ctx->dir_packet, par3_ctx->dir_packet_size + packet_size);
-			if (tmp_p == NULL){
-				perror("Failed to re-allocate memory for Directory Packet");
-				return RET_MEMORY_ERROR;
-			}
-			par3_ctx->dir_packet = tmp_p;
-			memcpy(par3_ctx->dir_packet + par3_ctx->dir_packet_size, packet, packet_size);
-			par3_ctx->dir_packet_size += packet_size;
-			par3_ctx->dir_packet_count++;
-		}
-
-	} else if (memcmp(packet_type, "PAR ROO\0", 8) == 0){	// Root Packet
-		if (par3_ctx->root_packet == NULL){
-			par3_ctx->root_packet = malloc(packet_size);
-			if (par3_ctx->root_packet == NULL){
-				perror("Failed to allocate memory for Root Packet");
-				return RET_MEMORY_ERROR;
-			}
-			memcpy(par3_ctx->root_packet, packet, packet_size);
-			par3_ctx->root_packet_size = packet_size;
-			par3_ctx->root_packet_count = 1;
-		} else if (check_packet_exist(par3_ctx->root_packet, par3_ctx->root_packet_size, packet, packet_size) == 1){
-			// If there is the packet already, just exit.
-			return -1;
-		} else {
-			// Add this packet after other packets.
-			tmp_p = realloc(par3_ctx->root_packet, par3_ctx->root_packet_size + packet_size);
-			if (tmp_p == NULL){
-				perror("Failed to re-allocate memory for Root Packet");
-				return RET_MEMORY_ERROR;
-			}
-			par3_ctx->root_packet = tmp_p;
-			memcpy(par3_ctx->root_packet + par3_ctx->root_packet_size, packet, packet_size);
-			par3_ctx->root_packet_size += packet_size;
-			par3_ctx->root_packet_count++;
-		}
-
-	} else if (memcmp(packet_type, "PAR EXT\0", 8) == 0){	// External Data Packet
-		if (par3_ctx->ext_data_packet == NULL){
-			par3_ctx->ext_data_packet = malloc(packet_size);
-			if (par3_ctx->ext_data_packet == NULL){
-				perror("Failed to allocate memory for External Data Packet");
-				return RET_MEMORY_ERROR;
-			}
-			memcpy(par3_ctx->ext_data_packet, packet, packet_size);
-			par3_ctx->ext_data_packet_size = packet_size;
-			par3_ctx->ext_data_packet_count = 1;
-		} else if (check_packet_exist(par3_ctx->ext_data_packet, par3_ctx->ext_data_packet_size, packet, packet_size) == 1){
-			// If there is the packet already, just exit.
-			return -1;
-		} else {
-			// Add this packet after other packets.
-			tmp_p = realloc(par3_ctx->ext_data_packet, par3_ctx->ext_data_packet_size + packet_size);
-			if (tmp_p == NULL){
-				perror("Failed to re-allocate memory for External Data Packet");
-				return RET_MEMORY_ERROR;
-			}
-			par3_ctx->ext_data_packet = tmp_p;
-			memcpy(par3_ctx->ext_data_packet + par3_ctx->ext_data_packet_size, packet, packet_size);
-			par3_ctx->ext_data_packet_size += packet_size;
-			par3_ctx->ext_data_packet_count++;
-		}
-
-	} else if (memcmp(packet_type, "PAR CAU\0", 8) == 0){	// Cauchy Matrix Packet
-		if (par3_ctx->matrix_packet == NULL){
-			par3_ctx->matrix_packet = malloc(packet_size);
-			if (par3_ctx->matrix_packet == NULL){
-				perror("Failed to allocate memory for Matrix Packet");
-				return RET_MEMORY_ERROR;
-			}
-			memcpy(par3_ctx->matrix_packet, packet, packet_size);
-			par3_ctx->matrix_packet_size = packet_size;
-			par3_ctx->matrix_packet_count = 1;
-		} else if (check_packet_exist(par3_ctx->matrix_packet, par3_ctx->matrix_packet_size, packet, packet_size) == 1){
-			// If there is the packet already, just exit.
-			return -1;
-		} else {
-			// Add this packet after other packets.
-			tmp_p = realloc(par3_ctx->matrix_packet, par3_ctx->matrix_packet_size + packet_size);
-			if (tmp_p == NULL){
-				perror("Failed to re-allocate memory for Matrix Packet");
-				return RET_MEMORY_ERROR;
-			}
-			par3_ctx->matrix_packet = tmp_p;
-			memcpy(par3_ctx->matrix_packet + par3_ctx->matrix_packet_size, packet, packet_size);
-			par3_ctx->matrix_packet_size += packet_size;
-			par3_ctx->matrix_packet_count++;
-		}
-
-	} else if (memcmp(packet_type, "PAR FFT\0", 8) == 0){	// FFT Matrix Packet
-		if (par3_ctx->matrix_packet == NULL){
-			par3_ctx->matrix_packet = malloc(packet_size);
-			if (par3_ctx->matrix_packet == NULL){
-				perror("Failed to allocate memory for Matrix Packet");
-				return RET_MEMORY_ERROR;
-			}
-			memcpy(par3_ctx->matrix_packet, packet, packet_size);
-			par3_ctx->matrix_packet_size = packet_size;
-			par3_ctx->matrix_packet_count = 1;
-		} else if (check_packet_exist(par3_ctx->matrix_packet, par3_ctx->matrix_packet_size, packet, packet_size) == 1){
-			// If there is the packet already, just exit.
-			return -1;
-		} else {
-			// Add this packet after other packets.
-			tmp_p = realloc(par3_ctx->matrix_packet, par3_ctx->matrix_packet_size + packet_size);
-			if (tmp_p == NULL){
-				perror("Failed to re-allocate memory for Matrix Packet");
-				return RET_MEMORY_ERROR;
-			}
-			par3_ctx->matrix_packet = tmp_p;
-			memcpy(par3_ctx->matrix_packet + par3_ctx->matrix_packet_size, packet, packet_size);
-			par3_ctx->matrix_packet_size += packet_size;
-			par3_ctx->matrix_packet_count++;
-		}
-
-	// UNIX Permissions Packet or FAT Permissions Packet
-	} else if ( (memcmp(packet_type, "PAR UNX\0", 8) == 0)
-				|| (memcmp(packet_type, "PAR FAT\0", 8) == 0) ){
-		if (par3_ctx->file_system_packet == NULL){
-			par3_ctx->file_system_packet = malloc(packet_size);
-			if (par3_ctx->file_system_packet == NULL){
-				perror("Failed to allocate memory for File System Packet");
-				return RET_MEMORY_ERROR;
-			}
-			memcpy(par3_ctx->file_system_packet, packet, packet_size);
-			par3_ctx->file_system_packet_size = packet_size;
-			par3_ctx->file_system_packet_count = 1;
-		} else if (check_packet_exist(par3_ctx->file_system_packet, par3_ctx->file_system_packet_size, packet, packet_size) == 1){
-			// If there is the packet already, just exit.
-			return -1;
-		} else {
-			// Add this packet after other packets.
-			tmp_p = realloc(par3_ctx->file_system_packet, par3_ctx->file_system_packet_size + packet_size);
-			if (tmp_p == NULL){
-				perror("Failed to re-allocate memory for File System Packet");
-				return RET_MEMORY_ERROR;
-			}
-			par3_ctx->file_system_packet = tmp_p;
-			memcpy(par3_ctx->file_system_packet + par3_ctx->file_system_packet_size, packet, packet_size);
-			par3_ctx->file_system_packet_size += packet_size;
-			par3_ctx->file_system_packet_count++;
-		}
-
-	} else {	// Unknown packet type
-		return -2;
-/*
-make context for large packets ?
-Data Packet and Recovery Data Packet will be too large to store on memory.
-*/
-	}
-
-	return 0;
-}
-
-// 0 = no packet yet, 1 = the packet exists already
-static int check_item_exist(PAR3_PKT_CTX *list, uint64_t item_count, uint64_t id, uint64_t index, uint8_t *cmp_buf)
-{
-	while (item_count != 0){
-		// compare InputSetID and index
-		if ( (list->id == id) && (list->index == index) ){
-			if (cmp_buf == NULL){	// Data Packet
-				return 1;
-			} else {	// Recovery Data Packet
-				// comprare other values
-				if ( (memcmp(list->root, cmp_buf, 16) == 0) && (memcmp(list->matrix, cmp_buf + 16, 16) == 0) ){
-					return 1;
-				}
-			}
-		}
-
-		list++;
-		item_count--;
-	}
-
-	return 0;
-}
-
-// It allocates memory for each packet type, and lists the packet.
-// -2 = unknown type, -1 = the packet exists already, 0 = added, 1~ = error
-int list_found_packet(PAR3_CTX *par3_ctx, uint8_t *packet, char *filename, int64_t offset)
-{
-	uint8_t *packet_type, cmp_buf[32];
-	uint64_t set_id, index, count;
-	PAR3_PKT_CTX *list;
-
-	// allocate memory for the packet type
-	packet_type = packet + 40;
-	if (memcmp(packet_type, "PAR DAT\0", 8) == 0){	// Data Packet
-		memcpy(&set_id, packet + 32, 8);	// InputSetID
-		memcpy(&index, packet + 48, 8);		// Index of input block
-		if (par3_ctx->data_packet_list == NULL){
-			list = malloc(sizeof(PAR3_PKT_CTX));
-			if (list == NULL){
-				perror("Failed to allocate memory for Data Packet");
-				return RET_MEMORY_ERROR;
-			}
-			par3_ctx->data_packet_list = list;
-			list[0].id = set_id;
-			memset(list[0].root, 0, 16);	// Zero fill unused values
-			memset(list[0].matrix, 0, 16);
-			list[0].index = index;
-			list[0].name = filename;
-			list[0].offset = offset;
-			par3_ctx->data_packet_count = 1;
-		} else if (check_item_exist(par3_ctx->data_packet_list, par3_ctx->data_packet_count, set_id, index, NULL) == 1){
-			// If there is the packet already, just exit.
-			return -1;
-		} else {
-			// Add this packet after other packets.
-			count = par3_ctx->data_packet_count;
-			list = realloc(par3_ctx->data_packet_list, sizeof(PAR3_PKT_CTX) * (count + 1));
-			if (list == NULL){
-				perror("Failed to re-allocate memory for Data Packet");
-				return RET_MEMORY_ERROR;
-			}
-			par3_ctx->data_packet_list = list;
-			list[count].id = set_id;
-			memset(list[count].root, 0, 16);	// Zero fill unused values
-			memset(list[count].matrix, 0, 16);
-			list[count].index = index;
-			list[count].name = filename;
-			list[count].offset = offset;
-			par3_ctx->data_packet_count += 1;
-		}
-
-	} else if (memcmp(packet_type, "PAR REC\0", 8) == 0){	// Recovery Data Packet
-		memcpy(&set_id, packet + 32, 8);		// InputSetID
-		memcpy(cmp_buf, packet + 48, 16);		// checksum from Root packet
-		memcpy(cmp_buf + 16, packet + 64, 16);	// checksum from Matrix packet
-		memcpy(&index, packet + 80, 8);			// Index of recovery block
-		if (par3_ctx->recv_packet_list == NULL){
-			list = malloc(sizeof(PAR3_PKT_CTX));
-			if (list == NULL){
-				perror("Failed to allocate memory for Recovery Data Packet");
-				return RET_MEMORY_ERROR;
-			}
-			par3_ctx->recv_packet_list = list;
-			list[0].id = set_id;
-			memcpy(list[0].root, cmp_buf, 16);
-			memcpy(list[0].matrix, cmp_buf + 16, 16);
-			list[0].index = index;
-			list[0].name = filename;
-			list[0].offset = offset;
-			par3_ctx->recv_packet_count = 1;
-		} else if (check_item_exist(par3_ctx->recv_packet_list, par3_ctx->recv_packet_count, set_id, index, cmp_buf) == 1){
-			// If there is the packet already, just exit.
-			return -1;
-		} else {
-			// Add this packet after other packets.
-			count = par3_ctx->recv_packet_count;
-			list = realloc(par3_ctx->recv_packet_list, sizeof(PAR3_PKT_CTX) * (count + 1));
-			if (list == NULL){
-				perror("Failed to re-allocate memory for Recovery Data Packet");
-				return RET_MEMORY_ERROR;
-			}
-			par3_ctx->recv_packet_list = list;
-			list[count].id = set_id;
-			memcpy(list[count].root, cmp_buf, 16);
-			memcpy(list[count].matrix, cmp_buf + 16, 16);
-			list[count].index = index;
-			list[count].name = filename;
-			list[count].offset = offset;
-			par3_ctx->recv_packet_count += 1;
-		}
-
-	} else {
-		return -2;
-	}
-
-	return 0;
-}
-
-// Delete packets and move former, return new size.
-static size_t adjust_packet_buf(uint8_t *buf, size_t buf_size, uint64_t *id_list, int id_count, uint32_t *new_count)
-{
-	int i;
-	uint32_t count;
-	size_t offset;
-	uint64_t packet_size, this_id;
-
-	count = 0;
-	offset = 0;
-	while (offset < buf_size){
-		// read packet size
-		memcpy(&packet_size, buf + offset + 24, 8);
-
-		// check SetID
-		memcpy(&this_id, buf + offset + 32, 8);
-		for (i = 0; i < id_count; i++){
-			if (id_list[i] == this_id)
-				break;
-		}
-		if (i == id_count){	// When packet didn't match, delete it.
-			memmove(buf + offset, buf + offset + packet_size, buf_size - offset - packet_size);
-			buf_size -= packet_size;
-
-		} else {	// goto next packet
-			count++;
-			offset += packet_size;
-		}
-	}
-	*new_count = count;
-
-	return buf_size;
-}
-
-// Delete items and move former, return new count.
-static uint64_t adjust_packet_list(PAR3_PKT_CTX *list, uint64_t item_count, uint64_t *id_list, int id_count, uint8_t *root)
-{
-	int i;
-	uint64_t this_id, item_index;
-
-	item_index = 0;
-	while (item_index < item_count){
-		// check SetID
-		this_id = list[item_index].id;
-		for (i = 0; i < id_count; i++){
-			if (id_list[i] == this_id){
-				if (root == NULL){	// Data Packet
-					break;
-				} else {	// Recovery Data Packet
-					// Use the recovery data after confirming checksum from Root Packet
-					if (memcmp(list[item_index].root, root, 16) == 0){
-						break;
-					}
-				}
-			}
-		}
-		if (i == id_count){	// When packet didn't match, delete it.
-			memmove(list + item_index, list + item_index + 1, sizeof(PAR3_PKT_CTX) * (item_count - item_index - 1));
-			item_count--;
-
-		} else {	// goto next packet
-			item_index++;
-		}
-	}
-
-	return item_index;
-}
-
-// Remove useless packets
-static int remove_other_packet(PAR3_CTX *par3_ctx, uint64_t *id_list, int id_count)
-{
-	uint8_t *tmp_p;
-	uint32_t new_count;
-	size_t new_size;
-	uint64_t item_count;
-	PAR3_PKT_CTX *list;
-
-	if (par3_ctx->creator_packet_size > 0){
-		new_size = adjust_packet_buf(par3_ctx->creator_packet, par3_ctx->creator_packet_size, id_list, id_count, &new_count);
-		if (new_size == 0){
-			free(par3_ctx->creator_packet);
-			par3_ctx->creator_packet = NULL;
-			par3_ctx->creator_packet_size = 0;
-			par3_ctx->creator_packet_count = 0;
-		} else if (new_size < par3_ctx->creator_packet_size){
-			tmp_p = realloc(par3_ctx->creator_packet, new_size);
-			if (tmp_p == NULL){
-				perror("Failed to re-allocate memory for Creator Packet");
-				return RET_MEMORY_ERROR;
-			}
-			par3_ctx->creator_packet = tmp_p;
-			par3_ctx->creator_packet_size = new_size;
-			par3_ctx->creator_packet_count = new_count;
-		}
-	}
-	if (par3_ctx->comment_packet_size > 0){
-		new_size = adjust_packet_buf(par3_ctx->comment_packet, par3_ctx->comment_packet_size, id_list, id_count, &new_count);
-		if (new_size == 0){
-			free(par3_ctx->comment_packet);
-			par3_ctx->comment_packet = NULL;
-			par3_ctx->comment_packet_size = 0;
-			par3_ctx->comment_packet_count = 0;
-		} else if (new_size < par3_ctx->comment_packet_size){
-			tmp_p = realloc(par3_ctx->comment_packet, new_size);
-			if (tmp_p == NULL){
-				perror("Failed to re-allocate memory for Comment Packet");
-				return RET_MEMORY_ERROR;
-			}
-			par3_ctx->comment_packet = tmp_p;
-			par3_ctx->comment_packet_size = new_size;
-			par3_ctx->comment_packet_count = new_count;
-		}
-	}
-	if (par3_ctx->start_packet_size > 0){
-		new_size = adjust_packet_buf(par3_ctx->start_packet, par3_ctx->start_packet_size, id_list, id_count, &new_count);
-		if (new_size == 0){
-			free(par3_ctx->start_packet);
-			par3_ctx->start_packet = NULL;
-			par3_ctx->start_packet_size = 0;
-			par3_ctx->start_packet_count = 0;
-		} else if (new_size < par3_ctx->start_packet_size){
-			tmp_p = realloc(par3_ctx->start_packet, new_size);
-			if (tmp_p == NULL){
-				perror("Failed to re-allocate memory for Start Packet");
-				return RET_MEMORY_ERROR;
-			}
-			par3_ctx->start_packet = tmp_p;
-			par3_ctx->start_packet_size = new_size;
-			par3_ctx->start_packet_count = new_count;
-		}
-	}
-	if (par3_ctx->matrix_packet_size > 0){
-		new_size = adjust_packet_buf(par3_ctx->matrix_packet, par3_ctx->matrix_packet_size, id_list, id_count, &new_count);
-		if (new_size == 0){
-			free(par3_ctx->matrix_packet);
-			par3_ctx->matrix_packet = NULL;
-			par3_ctx->matrix_packet_size = 0;
-			par3_ctx->matrix_packet_count = 0;
-		} else if (new_size < par3_ctx->matrix_packet_size){
-			tmp_p = realloc(par3_ctx->matrix_packet, new_size);
-			if (tmp_p == NULL){
-				perror("Failed to re-allocate memory for Matrix Packet");
-				return RET_MEMORY_ERROR;
-			}
-			par3_ctx->matrix_packet = tmp_p;
-			par3_ctx->matrix_packet_size = new_size;
-			par3_ctx->matrix_packet_count = new_count;
-		}
-	}
-	if (par3_ctx->file_packet_size > 0){
-		new_size = adjust_packet_buf(par3_ctx->file_packet, par3_ctx->file_packet_size, id_list, id_count, &new_count);
-		if (new_size == 0){
-			free(par3_ctx->file_packet);
-			par3_ctx->file_packet = NULL;
-			par3_ctx->file_packet_size = 0;
-			par3_ctx->file_packet_count = 0;
-		} else if (new_size < par3_ctx->file_packet_size){
-			tmp_p = realloc(par3_ctx->file_packet, new_size);
-			if (tmp_p == NULL){
-				perror("Failed to re-allocate memory for File Packet");
-				return RET_MEMORY_ERROR;
-			}
-			par3_ctx->file_packet = tmp_p;
-			par3_ctx->file_packet_size = new_size;
-			par3_ctx->file_packet_count = new_count;
-		}
-	}
-	if (par3_ctx->dir_packet_size > 0){
-		new_size = adjust_packet_buf(par3_ctx->dir_packet, par3_ctx->dir_packet_size, id_list, id_count, &new_count);
-		if (new_size == 0){
-			free(par3_ctx->dir_packet);
-			par3_ctx->dir_packet = NULL;
-			par3_ctx->dir_packet_size = 0;
-			par3_ctx->dir_packet_count = 0;
-		} else if (new_size < par3_ctx->dir_packet_size){
-			tmp_p = realloc(par3_ctx->dir_packet, new_size);
-			if (tmp_p == NULL){
-				perror("Failed to re-allocate memory for Directory Packet");
-				return RET_MEMORY_ERROR;
-			}
-			par3_ctx->dir_packet = tmp_p;
-			par3_ctx->dir_packet_size = new_size;
-			par3_ctx->dir_packet_count = new_count;
-		}
-	}
-	if (par3_ctx->root_packet_size > 0){
-		// Root Packet is only the last descendant SetID.
-		new_size = adjust_packet_buf(par3_ctx->root_packet, par3_ctx->root_packet_size, (uint64_t *)(par3_ctx->set_id), 1, &new_count);
-		if (new_size == 0){
-			free(par3_ctx->root_packet);
-			par3_ctx->root_packet = NULL;
-			par3_ctx->root_packet_size = 0;
-			par3_ctx->root_packet_count = 0;
-		} else if (new_size < par3_ctx->root_packet_size){
-			tmp_p = realloc(par3_ctx->root_packet, new_size);
-			if (tmp_p == NULL){
-				perror("Failed to re-allocate memory for Root Packet");
-				return RET_MEMORY_ERROR;
-			}
-			par3_ctx->root_packet = tmp_p;
-			par3_ctx->root_packet_size = new_size;
-			par3_ctx->root_packet_count = new_count;
-		}
-	}
-	if (par3_ctx->file_system_packet_size > 0){
-		new_size = adjust_packet_buf(par3_ctx->file_system_packet, par3_ctx->file_system_packet_size, id_list, id_count, &new_count);
-		if (new_size == 0){
-			free(par3_ctx->file_system_packet);
-			par3_ctx->file_system_packet = NULL;
-			par3_ctx->file_system_packet_size = 0;
-			par3_ctx->file_system_packet_count = 0;
-		} else if (new_size < par3_ctx->file_system_packet_size){
-			tmp_p = realloc(par3_ctx->file_system_packet, new_size);
-			if (tmp_p == NULL){
-				perror("Failed to re-allocate memory for File System Packet");
-				return RET_MEMORY_ERROR;
-			}
-			par3_ctx->file_system_packet = tmp_p;
-			par3_ctx->file_system_packet_size = new_size;
-			par3_ctx->file_system_packet_count = new_count;
-		}
-	}
-	if (par3_ctx->ext_data_packet_size > 0){
-		new_size = adjust_packet_buf(par3_ctx->ext_data_packet, par3_ctx->ext_data_packet_size, id_list, id_count, &new_count);
-		if (new_size == 0){
-			free(par3_ctx->ext_data_packet);
-			par3_ctx->ext_data_packet = NULL;
-			par3_ctx->ext_data_packet_size = 0;
-			par3_ctx->ext_data_packet_count = 0;
-		} else if (new_size < par3_ctx->ext_data_packet_size){
-			tmp_p = realloc(par3_ctx->ext_data_packet, new_size);
-			if (tmp_p == NULL){
-				perror("Failed to re-allocate memory for External Data Packet");
-				return RET_MEMORY_ERROR;
-			}
-			par3_ctx->ext_data_packet = tmp_p;
-			par3_ctx->ext_data_packet_size = new_size;
-			par3_ctx->ext_data_packet_count = new_count;
-		}
-	}
-
-	if (par3_ctx->data_packet_count > 0){
-		item_count = adjust_packet_list(par3_ctx->data_packet_list, par3_ctx->data_packet_count, id_list, id_count, NULL);
-		if (item_count == 0){
-			free(par3_ctx->data_packet_list);
-			par3_ctx->data_packet_list = NULL;
-			par3_ctx->data_packet_count = 0;
-		} else if (item_count < par3_ctx->data_packet_count){
-			list = realloc(par3_ctx->data_packet_list, sizeof(PAR3_PKT_CTX) * item_count);
-			if (list == NULL){
-				perror("Failed to re-allocate memory for Data Packet");
-				return RET_MEMORY_ERROR;
-			}
-			par3_ctx->data_packet_list = list;
-			par3_ctx->data_packet_count = item_count;
-		}
-	}
-	if (par3_ctx->recv_packet_count > 0){
-		if (par3_ctx->root_packet != NULL){
-			tmp_p = par3_ctx->root_packet + 8;	// checksum from Root Packet
-		} else {
-			tmp_p = NULL;
-		}
-		item_count = adjust_packet_list(par3_ctx->recv_packet_list, par3_ctx->recv_packet_count, id_list, id_count, tmp_p);
-		if (item_count == 0){
-			free(par3_ctx->recv_packet_list);
-			par3_ctx->recv_packet_list = NULL;
-			par3_ctx->recv_packet_count = 0;
-		} else if (item_count < par3_ctx->recv_packet_count){
-			list = realloc(par3_ctx->recv_packet_list, sizeof(PAR3_PKT_CTX) * item_count);
-			if (list == NULL){
-				perror("Failed to re-allocate memory for Recovery Data Packet");
-				return RET_MEMORY_ERROR;
-			}
-			par3_ctx->recv_packet_list = list;
-			par3_ctx->recv_packet_count = item_count;
-		}
-	}
-
-	return 0;
-}
-
-// check InputSetID of packets
-int check_packet_set(PAR3_CTX *par3_ctx)
-{
-	if (par3_ctx->start_packet_count == 0){
-		printf("Failed to find PAR3 Start Packet\n");
-		return RET_INSUFFICIENT_DATA;
-	}
-
-	if (par3_ctx->start_packet_count == 1){
-		// copy SetID from the packet
-		memcpy(par3_ctx->set_id, par3_ctx->start_packet + 32, 8);
-		if (par3_ctx->noise_level >= 2){
-			printf("\n");
-			printf("InputSetID = %02X %02X %02X %02X %02X %02X %02X %02X\n",
-					par3_ctx->set_id[0], par3_ctx->set_id[1], par3_ctx->set_id[2], par3_ctx->set_id[3],
-					par3_ctx->set_id[4], par3_ctx->set_id[5], par3_ctx->set_id[6], par3_ctx->set_id[7]);
-		}
-
-		// remove uesless packets by checking SetID
-		if (remove_other_packet(par3_ctx, (uint64_t *)(par3_ctx->set_id), 1) != 0){
-			return RET_MEMORY_ERROR;
-		}
-
-	} else {	// When there are multiple Start Packets, test "incremental backup".
-		uint8_t *tmp_p;
-		int i, id_count;
-		size_t max, offset, packet_size;
-		uint64_t *id_list, parent_id, this_id;
-
-		id_list = malloc(sizeof(uint64_t) * par3_ctx->start_packet_count);
-		if (id_list == NULL){
-			perror("Failed to allocate memory for InputSetID");
-			return RET_MEMORY_ERROR;
-		}
-
-		// check SetID of the first packet.
-		id_count = 0;
-		tmp_p = par3_ctx->start_packet;
-		max = par3_ctx->start_packet_size;
-		memcpy(&this_id, tmp_p + 32, 8);
-		memcpy(&parent_id, tmp_p + 48 + 8, 8);
-		memcpy(id_list + id_count, &this_id, 8);
-		id_count++;
-
-		if (parent_id == 0){	// This packet is the ancestor of PAR3 Sets.
-			// search descendant Sets
-			while (parent_id == 0){
-				offset = 0;
-				while (offset < max){
-					// check parent's SetID
-					if (memcmp(&this_id, tmp_p + offset + 48 + 8, 8) == 0){
-						memcpy(&this_id, tmp_p + offset + 32, 8);
-						memcpy(id_list + id_count, &this_id, 8);
-						id_count++;
-
-						parent_id = 0;	// search child again
-						break;
-					} else {
-						parent_id = 1;
-					}
-
-					// goto next packet
-					memcpy(&packet_size, tmp_p + offset + 24, 8);
-					offset += packet_size;
-				}
-			}
-			// use the last descendant's SetID
-			memcpy(par3_ctx->set_id, &this_id, 8);
-
-		} else {	// This packet is the child of another PAR3 Set.
-			// use this SetID
-			memcpy(par3_ctx->set_id, &this_id, 8);
-
-			// search ancestor Sets
-			while (parent_id != 0){
-				offset = 0;
-				while (offset < max){
-					// check another SetID
-					if (memcmp(&parent_id, tmp_p + offset + 32, 8) == 0){
-						memcpy(id_list + id_count, &parent_id, 8);
-						id_count++;
-
-						memcpy(&parent_id, tmp_p + offset + 48 + 8, 8);
-						// If parent_id isn't 0, search parent again.
-						break;
-					} else {
-						parent_id = 0;
-					}
-
-					// goto next packet
-					memcpy(&packet_size, tmp_p + offset + 24, 8);
-					offset += packet_size;
-				}
-			}
-		}
-
-		if (par3_ctx->noise_level >= 2){
-			printf("\n");
-			// show SetIDs of PAR3 Sets.
-			for (i = 0; i < id_count; i++){
-				printf("InputSetID = %02"PRIx64" %02"PRIx64" %02"PRIx64" %02"PRIx64" %02"PRIx64" %02"PRIx64" %02"PRIx64" %02"PRIx64"\n",
-						(id_list[i] & 0xFF), (id_list[i] >> 8) & 0xFF,
-						(id_list[i] >> 16) & 0xFF, (id_list[i] >> 24) & 0xFF,
-						(id_list[i] >> 32) & 0xFF, (id_list[i] >> 40) & 0xFF, (id_list[i] >> 48) & 0xFF, id_list[i] >> 56);
-			}
-		}
-
-		// remove uesless packets by checking SetID
-		if (remove_other_packet(par3_ctx, id_list, id_count) != 0){
-			free(id_list);
-			return RET_MEMORY_ERROR;
-		}
-		free(id_list);
-	}
-
-	if (par3_ctx->noise_level >= 1){
-		printf("\nSet packet:\n");
-		if (par3_ctx->creator_packet_count > 0)
-			printf("Number of Creator Packet       =%3u (%4"PRId64" bytes)\n", par3_ctx->creator_packet_count, par3_ctx->creator_packet_size);
-		if (par3_ctx->comment_packet_count > 0)
-			printf("Number of Comment Packet       =%3u (%4"PRId64" bytes)\n", par3_ctx->comment_packet_count, par3_ctx->comment_packet_size);
-		if (par3_ctx->start_packet_count > 0)
-			printf("Number of Start Packet         =%3u (%4"PRId64" bytes)\n", par3_ctx->start_packet_count, par3_ctx->start_packet_size);
-		if (par3_ctx->matrix_packet_count > 0)
-			printf("Number of Matrix Packet        =%3u (%4"PRId64" bytes)\n", par3_ctx->matrix_packet_count, par3_ctx->matrix_packet_size);
-		if (par3_ctx->file_packet_count > 0)
-			printf("Number of File Packet          =%3u (%4"PRId64" bytes)\n", par3_ctx->file_packet_count, par3_ctx->file_packet_size);
-		if (par3_ctx->dir_packet_count > 0)
-			printf("Number of Directory Packet     =%3u (%4"PRId64" bytes)\n", par3_ctx->dir_packet_count, par3_ctx->dir_packet_size);
-		if (par3_ctx->root_packet_count > 0)
-			printf("Number of Root Packet          =%3u (%4"PRId64" bytes)\n", par3_ctx->root_packet_count, par3_ctx->root_packet_size);
-		if (par3_ctx->file_system_packet_count > 0)
-			printf("Number of File System Packet   =%3u (%4"PRId64" bytes)\n", par3_ctx->file_system_packet_count, par3_ctx->file_system_packet_size);
-		if (par3_ctx->ext_data_packet_count > 0)
-			printf("Number of External Data Packet =%3u (%4"PRId64" bytes)\n", par3_ctx->ext_data_packet_count, par3_ctx->ext_data_packet_size);
-		if (par3_ctx->data_packet_count > 0)
-			printf("Number of Data Packet          =%3"PRIu64"\n", par3_ctx->data_packet_count);
-		if (par3_ctx->recv_packet_count > 0)
-			printf("Number of Recovery Data Packet =%3"PRIu64"\n", par3_ctx->recv_packet_count);
-	}
-
-	return 0;
-}
-
diff --git a/windows/src/packet_make.c b/windows/src/packet_make.c
deleted file mode 100644
index 74fc881..0000000
--- a/windows/src/packet_make.c
+++ /dev/null
@@ -1,1235 +0,0 @@
-/* Redefinition of _FILE_OFFSET_BITS must happen BEFORE including stdio.h */
-#ifdef __linux__
-#define _FILE_OFFSET_BITS 64
-#define _stat64 stat
-#elif _WIN32
-#endif
-
-#include <errno.h>
-#include <inttypes.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#ifdef __linux__
-
-#include <sys/stat.h>
-
-#elif _WIN32
-
-// MSVC headers
-#include <sys/stat.h>
-
-#endif
-
-#include "blake3/blake3.h"
-#include "libpar3.h"
-#include "hash.h"
-#include "common.h"
-#include "file.h"
-
-
-// Fill each field in packet header, and calculate hash of packet.
-void make_packet_header(uint8_t *buf, uint64_t packet_size, uint8_t *set_id, uint8_t *packet_type, int flag_hash)
-{
-	memcpy(buf, "PAR3\0PKT", 8);
-	if (packet_size > 0)
-		memcpy(buf + 24, &packet_size, 8);
-	if (set_id != NULL)
-		memcpy(buf + 32, set_id, 8);
-	if (packet_type != NULL)
-		memcpy(buf + 40, packet_type, 8);
-	if (flag_hash)
-		blake3(buf + 24, packet_size - 24, buf + 8);
-}
-
-
-// Input is packet body of Start Packet.
-// Return generated InputSetID at "buf - 16".
-static void generate_set_id(PAR3_CTX *par3_ctx, uint8_t *buf, size_t body_size)
-{
-	uint32_t num, chunk_num;
-	size_t len;
-	blake3_hasher hasher;
-	struct _stat64 stat_buf;
-
-	// prepare a globally unique random number
-	blake3_hasher_init(&hasher);
-
-	// all the files' contents
-	if (par3_ctx->input_file_count > 0){
-		uint32_t index;
-		uint64_t total_size, block_size, chunk_size;
-		PAR3_FILE_CTX *file_p;
-		PAR3_CHUNK_CTX *chunk_list;
-
-		block_size = par3_ctx->block_size;
-		chunk_list = par3_ctx->chunk_list;
-		file_p = par3_ctx->input_file_list;
-		num = par3_ctx->input_file_count;
-		while (num > 0){
-			// file name
-			len = strlen(file_p->name) + 1;	// Include null string as boundary mark.
-			blake3_hasher_update(&hasher, file_p->name, len);
-
-			// file size
-			blake3_hasher_update(&hasher, &(file_p->size), 8);
-
-			// file hash of protected chunks
-			blake3_hasher_update(&hasher, file_p->hash, 16);
-
-			// If it includes options (releated packets), calculate the data also.
-			if (par3_ctx->file_system & 0x10003){
-				if (_stat64(file_p->name, &stat_buf) == 0){
-					if (par3_ctx->file_system & 1)
-						blake3_hasher_update(&hasher, &(stat_buf.st_mtime), 8);
-					if (par3_ctx->file_system & 2)
-						blake3_hasher_update(&hasher, &(stat_buf.st_mode), 8);
-					if (par3_ctx->file_system & 0x10000)
-						blake3_hasher_update(&hasher, &(stat_buf.st_mtime), 8);
-				}
-			}
-
-			// Chunk Descriptions
-			if (file_p->size > 0){
-				total_size = 0;
-				index = file_p->chunk;
-				chunk_num = file_p->chunk_num;
-				while (chunk_num > 0){
-					// size of chunk
-					chunk_size = chunk_list[index].size;
-					total_size += chunk_size;
-					blake3_hasher_update(&hasher, &chunk_size, 8);
-
-					if ( (chunk_size == 0) || (chunk_size >= block_size) ){
-						// index of first input block holding chunk
-						blake3_hasher_update(&hasher, &(chunk_list[index].block), 8);
-					}
-
-					if (chunk_size % block_size >= 40){
-						// index of block holding tail
-						blake3_hasher_update(&hasher, &(chunk_list[index].tail_block), 8);
-						blake3_hasher_update(&hasher, &(chunk_list[index].tail_offset), 8);
-					}
-
-					// When there are multiple chunks in the file.
-					index++;
-					chunk_num--;
-				}
-			}
-
-			file_p++;
-			num--;
-		}
-	}
-
-	// all the directories' contents
-	if (par3_ctx->input_dir_count > 0){
-		PAR3_DIR_CTX *dir_p;
-
-		dir_p = par3_ctx->input_dir_list;
-		num = par3_ctx->input_dir_count;
-		while (num > 0){
-			// directory name
-			len = strlen(dir_p->name) + 1;	// Include null string as boundary mark.
-			blake3_hasher_update(&hasher, dir_p->name, len);
-
-			// If it includes options (releated packets), calculate the data also.
-			if ( ((par3_ctx->file_system & 4) != 0) && ((par3_ctx->file_system & 3) != 0) ){
-				if (_stat64(dir_p->name, &stat_buf) == 0){
-					if (par3_ctx->file_system & 1)
-						blake3_hasher_update(&hasher, &(stat_buf.st_mtime), 8);
-					if (par3_ctx->file_system & 2)
-						blake3_hasher_update(&hasher, &(stat_buf.st_mode), 8);
-				}
-			}
-
-			dir_p++;
-			num--;
-		}
-	}
-
-	// absolute path
-	if (par3_ctx->absolute_path != 0){
-		uint8_t *tmp_p;
-
-		// convert Windows's directory mark "\" to UNIX's one "/".
-		tmp_p = par3_ctx->base_path;
-		while (tmp_p[0] != 0){
-			if (tmp_p[0] == '\\')
-				tmp_p[0] = '/';
-			tmp_p++;
-		}
-
-		len = strlen(par3_ctx->base_path) + 1;	// Include null string as boundary mark.
-		blake3_hasher_update(&hasher, par3_ctx->base_path, len);
-	}
-
-	// result in 8-bytes hash for a globally unique random number
-	blake3_hasher_finalize(&hasher, buf - 8, 8);
-
-	// calculate hash of packet body for InputSetID
-	blake3_hasher_init(&hasher);
-	// include bytes of the random number at first
-	blake3_hasher_update(&hasher, buf - 8, 8);
-	// parent's InputSetID, parent's Root, block size, Galois field parameters
-	blake3_hasher_update(&hasher, buf, body_size);
-	blake3_hasher_finalize(&hasher, buf - 16, 8);
-}
-
-
-// Start Packet, Creator Packet, Comment Packet
-int make_start_packet(PAR3_CTX *par3_ctx, int flag_trial)
-{
-	uint8_t *tmp_p;
-	size_t packet_size;
-
-	// When there is packet already, just exit.
-	if (par3_ctx->start_packet_size > 0)
-		return 0;
-
-	// Packet size depends on galois field size.
-	packet_size = 48 + 8 + 16 + 8 + 1;	// 81 + additional bytes
-	if (par3_ctx->start_packet == NULL){
-		par3_ctx->start_packet = malloc(packet_size + 4);	// Upto 32-bit Galois Field
-		if (par3_ctx->start_packet == NULL){
-			perror("Failed to allocate memory for Start Packet");
-			return RET_MEMORY_ERROR;
-		}
-	}
-
-	// Set initial value temporary.
-	tmp_p = par3_ctx->start_packet + 48;
-	// At this time, "incremental backup" feature isn't made.
-	memset(tmp_p, 0, 24);	// When there is no parent, fill zeros.
-	tmp_p += 24;
-	memcpy(tmp_p, &(par3_ctx->block_size), 8);	// Block size
-	tmp_p += 8;
-	// Galois Field is varied by using Error Correction Codes.
-	if (par3_ctx->ecc_method & 1){	// Reed-Solomon Erasure Codes with Cauchy Matrix
-		if ( ( (par3_ctx->block_count > 128) && (par3_ctx->max_recovery_block == 0) )
-				|| (par3_ctx->block_count + par3_ctx->first_recovery_block + par3_ctx->recovery_block_count > 256)
-				|| (par3_ctx->block_count + par3_ctx->max_recovery_block > 256) ){
-			// When there are 129 or more input blocks, use 16-bit Galois Field (0x1100B).
-			par3_ctx->galois_poly = 0x1100B;
-			par3_ctx->gf_size = 2;
-			tmp_p[0] = 2;
-			tmp_p[1] = 0x0B;
-			tmp_p[2] = 0x10;
-		} else if (par3_ctx->block_count > 0){
-			// When there are 128 or less input blocks, use 8-bit Galois Field (0x11D).
-			par3_ctx->galois_poly = 0x11D;
-			par3_ctx->gf_size = 1;
-			tmp_p[0] = 1;
-			tmp_p[1] = 0x1D;
-		}
-
-	} else if (par3_ctx->ecc_method & 8){	// FFT based Reed-Solomon Codes
-		// This value is used in Leopard-RS library.
-		if ((par3_ctx->first_recovery_block + par3_ctx->recovery_block_count == 1) || (par3_ctx->max_recovery_block == 1)){
-			par3_ctx->gf_size = 0;	// XOR sum
-		} else if (par3_ctx->block_count > 0){
-			uint64_t possible_count, m, n;
-			possible_count = par3_ctx->first_recovery_block + par3_ctx->recovery_block_count;
-			if (possible_count < par3_ctx->max_recovery_block)
-				possible_count = par3_ctx->max_recovery_block;
-			// Number of recovery block per cohort
-			if (par3_ctx->interleave > 0)
-				possible_count = (possible_count + par3_ctx->interleave) / (par3_ctx->interleave + 1);
-			m = next_pow2(possible_count);
-			//printf("m = next_pow2(%"PRIu64") = %"PRIu64"\n", possible_count, m);
-			possible_count = par3_ctx->block_count;
-			// Number of input block per cohort
-			if (par3_ctx->interleave > 0)
-				possible_count = (possible_count + par3_ctx->interleave) / (par3_ctx->interleave + 1);
-			n = next_pow2(m + possible_count);
-			//printf("n = next_pow2(%"PRIu64" + %"PRIu64") = %"PRIu64"\n", m, possible_count, n);
-			if (n <= 256){	// LEO_HAS_FF8
-				par3_ctx->galois_poly = 0x11D;
-				par3_ctx->gf_size = 1;
-				tmp_p[0] = 1;
-				tmp_p[1] = 0x1D;
-			} else {	// LEO_HAS_FF16
-				par3_ctx->galois_poly = 0x1002D;
-				par3_ctx->gf_size = 2;
-				tmp_p[0] = 2;
-				tmp_p[1] = 0x2D;
-				tmp_p[2] = 0x00;
-			}
-		}
-	}
-	if (par3_ctx->gf_size == 0){	// When there is no input blocks, no need to set Galois Field.
-		par3_ctx->galois_poly = 0;
-		tmp_p[0] = 0;
-	} else {
-		if (par3_ctx->noise_level >= 1){
-			printf("\nGalois field size = %u\n", par3_ctx->gf_size);
-			printf("Galois field generator = 0x%X\n", par3_ctx->galois_poly);
-		}
-	}
-	packet_size += par3_ctx->gf_size;
-	par3_ctx->start_packet_size = packet_size;
-	par3_ctx->start_packet_count = 1;
-
-	if (flag_trial == 0){	// Trial mode doesn't calculate InputSetID.
-		// generate InputSetID
-		generate_set_id(par3_ctx, par3_ctx->start_packet + 48, par3_ctx->start_packet_size - 48);
-		memcpy(par3_ctx->set_id, par3_ctx->start_packet + 32, 8);
-		if (par3_ctx->noise_level >= 1){
-			printf("InputSetID = %02X %02X %02X %02X %02X %02X %02X %02X\n",
-					par3_ctx->set_id[0], par3_ctx->set_id[1], par3_ctx->set_id[2], par3_ctx->set_id[3],
-					par3_ctx->set_id[4], par3_ctx->set_id[5], par3_ctx->set_id[6], par3_ctx->set_id[7]);
-		}
-	}
-
-	// Because SetID was written already, ignore SetID here.
-	make_packet_header(par3_ctx->start_packet, par3_ctx->start_packet_size, NULL, "PAR STA\0", 1);
-
-	// Make header of Creator Packet and Comment Packet, too.
-	if (par3_ctx->creator_packet_size > 0)
-		make_packet_header(par3_ctx->creator_packet, par3_ctx->creator_packet_size, par3_ctx->set_id, "PAR CRE\0", 1);
-	if (par3_ctx->comment_packet_size > 0)
-		make_packet_header(par3_ctx->comment_packet, par3_ctx->comment_packet_size, par3_ctx->set_id, "PAR COM\0", 1);
-
-	if (par3_ctx->noise_level >= 1){
-		printf("Size of Start Packet = %zu\n", par3_ctx->start_packet_size);
-		if (par3_ctx->creator_packet_size > 0)
-			printf("Size of Creator Packet = %zu\n", par3_ctx->creator_packet_size);
-		if (par3_ctx->comment_packet_size > 0)
-			printf("Size of Comment Packet = %zu\n", par3_ctx->comment_packet_size);
-	}
-
-	return 0;
-}
-
-// Matrix Packet
-int make_matrix_packet(PAR3_CTX *par3_ctx)
-{
-	uint8_t *tmp_p;
-	size_t packet_size;
-
-	// When there is no input blocks, just exit.
-	if (par3_ctx->block_count == 0)
-		return 0;
-
-	// When there is no packet yet, error exit.
-	if (par3_ctx->start_packet_size == 0)
-		return RET_LOGIC_ERROR;
-
-	// When there is packet already, just exit.
-	if (par3_ctx->matrix_packet_size > 0)
-		return 0;
-
-	// Max packet size of each type
-	// Cauchy Matrix Packet : 48 + 24 = 72
-	// Sparse Random Matrix Packet : 48 + 40 = 88
-	// Explicit Matrix Packet : not supported yet
-	packet_size = 88;	// Set the largest size temporary.
-	if (par3_ctx->matrix_packet == NULL){
-		par3_ctx->matrix_packet = malloc(packet_size);
-		if (par3_ctx->matrix_packet == NULL){
-			perror("Failed to allocate memory for Matrix Packet");
-			return RET_MEMORY_ERROR;
-		}
-	}
-
-	if (par3_ctx->ecc_method & 1){	// Cauchy Matrix Packet
-		tmp_p = par3_ctx->matrix_packet + 48;
-		// If the encoding client wants to compute recovery data for every input block, they use the values 0 and 0.
-		if (par3_ctx->max_recovery_block == 0){
-			// If the number of rows is unknown, the hint is set to zero.
-			memset(tmp_p, 0, 24);	// Thus, three items are zero.
-			tmp_p += 24;
-		} else {
-			memset(tmp_p, 0, 16);	// Two items are zero.
-			tmp_p += 16;
-			// Set hint for number of recovery blocks
-			// This will cause compatibility issue. Be careful !
-			memcpy(tmp_p, &(par3_ctx->max_recovery_block), 8);
-			tmp_p += 8;
-		}
-		packet_size = 72;
-		make_packet_header(par3_ctx->matrix_packet, packet_size, par3_ctx->set_id, "PAR CAU\0", 1);
-
-
-/*
-	} else if (par3_ctx->ecc_method & 2){	// Sparse Random Matrix Packet
-		par3_ctx->ecc_method = 2;
-		tmp_p = par3_ctx->matrix_packet + 48;
-		// How to know maximum number of recovery blocks ?
-		memset(tmp_p, 0, 24);
-		tmp_p += 24;
-		// How to select number of non-zero elements per input block ?
-		// Is it a density rate against number of input blocks ? such like 1%
-		
-		// How is random number generator seed ?
-		// Is it ok to set a fixed value always ?
-		// Start Packet has unique random number already.
-
-		packet_size = 88;
-		make_packet_header(par3_ctx->matrix_packet, packet_size, par3_ctx->set_id, "PAR SPA\0", 1);
-*/
-
-	} else if (par3_ctx->ecc_method & 8){	// FFT Matrix Packet
-		tmp_p = par3_ctx->matrix_packet + 48;
-		memset(tmp_p, 0, 16);	// At this time, two items are zero.
-		tmp_p += 16;
-		// If the max count was not set, use the creating number of recovery blocks.
-		if (par3_ctx->max_recovery_block < par3_ctx->first_recovery_block + par3_ctx->recovery_block_count)
-			par3_ctx->max_recovery_block = par3_ctx->first_recovery_block + par3_ctx->recovery_block_count;
-		// Store max count as power. Because the value range is 1 ~ 32768, log2 range is 0 ~ 15.
-		if (par3_ctx->interleave == 0){
-			tmp_p[0] = roundup_log2(par3_ctx->max_recovery_block);
-		} else {	// When interleaving, max count is divided by number of cohorts.
-			tmp_p[0] = roundup_log2(par3_ctx->max_recovery_block / (par3_ctx->interleave + 1));
-		}
-		tmp_p += 1;
-		// Store number of interleaving blocks
-		// In normal usage, it will be less than 2 bytes.
-		// 32,768 blocks * 256 cohorts = max 8,388,608 blocks
-		// 32,768 blocks * 65,536 cohorts = max 2,147,483,648 blocks
-		// So, it won't use 2 bytes mostly. It won't use 3 or 4 bytes really.
-		// Then, par3cmdline doesn't support 5 or more bytes at this time.
-		if (par3_ctx->interleave == 0){	// None (0 bytes)
-			packet_size = 65;
-		} else if (par3_ctx->interleave < 256){	// 1 byte = 1 ~ 255
-			memcpy(tmp_p, &(par3_ctx->interleave), 1);
-			tmp_p += 1;
-			packet_size = 66;
-		} else if (par3_ctx->interleave < 65536){	// 2 bytes = 256 ~ 65535
-			memcpy(tmp_p, &(par3_ctx->interleave), 2);
-			tmp_p += 2;
-			packet_size = 67;
-		} else if (par3_ctx->interleave < 16777216){	// 3 bytes = 65536 ~ 16777215
-			memcpy(tmp_p, &(par3_ctx->interleave), 3);
-			tmp_p += 3;
-			packet_size = 68;
-		} else {	// 4 bytes = 16777216 ~ 4294967295
-			memcpy(tmp_p, &(par3_ctx->interleave), 4);
-			tmp_p += 4;
-			packet_size = 69;
-		}
-		make_packet_header(par3_ctx->matrix_packet, packet_size, par3_ctx->set_id, "PAR FFT\0", 1);
-		//printf("max_recovery_block = %"PRIu64", interleave = %u\n", par3_ctx->max_recovery_block, par3_ctx->interleave);
-
-	} else {
-		printf("The specified Error Correction Codes (%u) isn't implemented yet.\n", par3_ctx->ecc_method);
-		return RET_LOGIC_ERROR;
-	}
-
-	par3_ctx->matrix_packet_size = packet_size;
-	par3_ctx->matrix_packet_count = 1;
-	if (par3_ctx->noise_level >= 1){
-		printf("Size of Matrix Packet = %zu\n", packet_size);
-	}
-
-	return 0;
-}
-
-static int compare_checksum( const void *arg1, const void *arg2 )
-{
-	return memcmp( ( unsigned char* ) arg1, ( unsigned char* ) arg2, 16);
-}
-
-// File Packet, Directory Packet, Root Packet
-int make_file_packet(PAR3_CTX *par3_ctx)
-{
-	uint8_t *tmp_p, *name_p, *chk_p;
-	uint32_t num, max, i, packet_count, absolute_num, option_num;
-	size_t alloc_size, packet_size, total_packet_size, len, option_offset;
-	size_t file_alloc_size, dir_alloc_size, root_alloc_size, file_system_alloc_size;
-	PAR3_FILE_CTX *file_p, *file_list;
-	PAR3_DIR_CTX *dir_p, *dir_list;
-
-	// When there is no packet yet, error exit.
-	if (par3_ctx->start_packet_size == 0)
-		return RET_LOGIC_ERROR;
-
-	// When there is packet already, just exit.
-	if (par3_ctx->root_packet_size > 0)
-		return 0;
-
-	// Allocate buffer for packets. (This isn't strict size, but a little larger.)
-	num = par3_ctx->input_file_count;
-	if (num > 0){
-		alloc_size = (48 + 2 + 8 + 16 + 1) * num;	// packet header, length of name, CRC-64, hash, options
-		if (par3_ctx->file_system & 3)
-			alloc_size += 16 * num;	// UNIX Permissions Packet
-		if (par3_ctx->file_system & 0x10000)
-			alloc_size += 16 * num;	// FAT Permissions Packet
-		alloc_size += par3_ctx->input_file_name_len - num;	// subtle null-string of each name
-		alloc_size += (16 + 40) * par3_ctx->chunk_count;	// chunk description with tail info
-		if (par3_ctx->noise_level >= 2){
-			printf("Possible total size of File Packet = %zu\n", alloc_size);
-		}
-		file_alloc_size = alloc_size;
-		if (par3_ctx->file_packet == NULL){
-			par3_ctx->file_packet = malloc(alloc_size);
-			if (par3_ctx->file_packet == NULL){
-				perror("Failed to allocate memory for File Packet");
-				return RET_MEMORY_ERROR;
-			}
-		}
-	}
-
-	absolute_num = 0;
-	if (par3_ctx->absolute_path != 0){	// Enable absolute path
-		par3_ctx->attribute |= 1;
-		if (par3_ctx->absolute_path == 'A')	// include drive letter on Windows OS
-			absolute_num = 1;
-		tmp_p = par3_ctx->base_path;
-		while (tmp_p[0] != 0){
-			if (tmp_p[0] == '/')
-				absolute_num++;
-			tmp_p++;
-		}
-		//printf("Number of directory part in absolute path = %u\n", absolute_num);
-	}
-	num = par3_ctx->input_dir_count;
-	if (num + absolute_num > 0){
-		alloc_size = (48 + 2 + 4) * (num + absolute_num);	// packet header, length of name, CRC-64, options
-		alloc_size += par3_ctx->input_dir_name_len - num;	// subtle null-string of each name
-		if (absolute_num > 0)
-			alloc_size += strlen(par3_ctx->base_path);
-		num = par3_ctx->input_file_count + par3_ctx->input_dir_count + absolute_num;
-		alloc_size += 16 * num;	// checksums of File Packet and Directory Packet
-		if (par3_ctx->noise_level >= 2){
-			printf("Possible total size of Directory Packet = %zu\n", alloc_size);
-		}
-		dir_alloc_size = alloc_size;
-		if (par3_ctx->dir_packet == NULL){
-			par3_ctx->dir_packet = malloc(alloc_size);
-			if (par3_ctx->dir_packet == NULL){
-				perror("Failed to allocate memory for Directory Packet");
-				return RET_MEMORY_ERROR;
-			}
-		}
-	}
-
-	if (par3_ctx->file_system & 0x10003){	// UNIX Permissions Packet or FAT Permissions Packet
-		alloc_size = 0;
-		if (par3_ctx->file_system & 3){	// UNIX Permissions Packet
-			// Every files and directories may have this optional packet.
-			num = par3_ctx->input_file_count + par3_ctx->input_dir_count + absolute_num;
-			alloc_size += 84 * num;	// mtime and i_mode are set.
-			if (par3_ctx->noise_level >= 2){
-				printf("Possible total size of UNIX Permissions Packet = %u\n", 84 * num);
-			}
-		}
-		if (par3_ctx->file_system & 0x10000){	// FAT Permissions Packet
-			// Every files may have this optional packet. (exclude directories)
-			num = par3_ctx->input_file_count;
-			alloc_size += 74 * num;	// LastWriteTimestamp is set.
-			if (par3_ctx->noise_level >= 2){
-				printf("Possible total size of FAT Permissions Packet = %u\n", 74 * num);
-			}
-		}
-		file_system_alloc_size = alloc_size;
-		if (par3_ctx->file_system_packet == NULL){
-			par3_ctx->file_system_packet = malloc(alloc_size);
-			if (par3_ctx->file_system_packet == NULL){
-				perror("Failed to allocate memory for File System Packet");
-				return RET_MEMORY_ERROR;
-			}
-		}
-		par3_ctx->file_system_packet_size = 0;
-		par3_ctx->file_system_packet_count = 0;
-	}
-
-	alloc_size = 48 + 8 + 1 + 4;	// packet header, index, attributes, options
-	num = par3_ctx->input_file_count + par3_ctx->input_dir_count;
-	alloc_size += 16 * num;	// checksums of File Packets and Directory Packets
-	if (par3_ctx->noise_level >= 2){
-		printf("Possible total size of Root Packet = %zu\n", alloc_size);
-	}
-	root_alloc_size = alloc_size;
-	if (par3_ctx->root_packet == NULL){
-		par3_ctx->root_packet = malloc(alloc_size);
-		if (par3_ctx->root_packet == NULL){
-			perror("Failed to allocate memory for Root Packet");
-			return RET_MEMORY_ERROR;
-		}
-	}
-
-	// Number of File Packet may be same as number of input files.
-	// When there are same files in different directories, deduplication detects them.
-	// Deduplication may reduce number of File Packets.
-	file_list = par3_ctx->input_file_list;
-	packet_count = 0;
-	num = par3_ctx->input_file_count;
-	if (num > 0){
-		uint8_t buf_tail[40];
-		uint32_t chunk_index, chunk_num;
-		uint64_t block_size, tail_size, total_size;
-		PAR3_CHUNK_CTX *chunk_p;
-
-		total_packet_size = 0;
-		block_size = par3_ctx->block_size;
-		tmp_p = par3_ctx->file_packet;
-		file_p = par3_ctx->input_file_list;
-		chunk_p = par3_ctx->chunk_list;
-		while (num > 0){
-			// offset of this packet
-			file_p->offset = tmp_p - par3_ctx->file_packet;
-			packet_size = 48;
-			// Remove sub-directories to store name only.
-			name_p = strrchr(file_p->name, '/');
-			if (name_p == NULL){	// There is no sub-directory.
-				name_p = file_p->name;
-			} else {	// When there is sub-directory.
-				name_p++;
-			}
-
-			// length of filename in bytes
-			len = strlen(name_p);
-			memcpy(tmp_p + packet_size, &len, 2);
-			packet_size += 2;
-			// filename
-			memcpy(tmp_p + packet_size, name_p, len);
-			packet_size += len;
-			// hash of the first 16kB of the file
-			memcpy(tmp_p + packet_size, &(file_p->crc), 8);
-			packet_size += 8;
-			// hash of the protected data in the file
-			memcpy(tmp_p + packet_size, file_p->hash, 16);
-			packet_size += 16;
-
-			// number of options
-			option_offset = packet_size;
-			option_num = 0;
-			packet_size += 1;
-			// UNIX Permissions Packet
-			if (par3_ctx->file_system & 3){
-				if (make_unix_permission_packet(par3_ctx, file_p->name, tmp_p + packet_size) == 0){
-					option_num++;
-					packet_size += 16;
-				}
-			}
-			// FAT Permissions Packet
-			if (par3_ctx->file_system & 0x10000){
-				if (make_fat_permission_packet(par3_ctx, file_p->name, tmp_p + packet_size) == 0){
-					option_num++;
-					packet_size += 16;
-				}
-			}
-			tmp_p[option_offset] = option_num;	// Value is saved in 1-byte.
-
-			if (file_p->size > 0){	// chunk descriptions
-				total_size = 0;
-				chunk_index = file_p->chunk;
-				chunk_num = file_p->chunk_num;
-				while (chunk_num > 0){
-					// If the first field is zero, it means Unprotected Chunk Description.
-					if (chunk_p[chunk_index].size == 0){	// Unprotected Chunk Description
-						file_p->state |= 0x80000000;
-						// zeros
-						memset(tmp_p + packet_size, 0, 8);
-						packet_size += 8;
-						// length of chunk
-						total_size += chunk_p[chunk_index].block;
-						memcpy(tmp_p + packet_size, &(chunk_p[chunk_index].block), 8);
-						packet_size += 8;
-
-					} else {	// Protected Chunk Description
-						// length of protected chunk
-						total_size += chunk_p[chunk_index].size;
-						memcpy(tmp_p + packet_size, &(chunk_p[chunk_index].size), 8);
-						packet_size += 8;
-						if (chunk_p[chunk_index].size >= block_size){
-							// index of first input block holding chunk
-							memcpy(tmp_p + packet_size, &(chunk_p[chunk_index].block), 8);
-							packet_size += 8;
-							//printf("chunk[%2u], block[%2"PRIu64"], %s\n", chunk_index, chunk_p[chunk_index].index, file_p->name);
-						}
-						tail_size = chunk_p[chunk_index].size % block_size;
-						if (tail_size >= 40){
-							// hash of first 40 bytes of tail
-							memcpy(tmp_p + packet_size, &(chunk_p[chunk_index].tail_crc), 8);
-							packet_size += 8;
-							// hash of all of tail
-							memcpy(tmp_p + packet_size, chunk_p[chunk_index].tail_hash, 16);
-							packet_size += 16;
-							// index of block holding tail
-							memcpy(tmp_p + packet_size, &(chunk_p[chunk_index].tail_block), 8);
-							packet_size += 8;
-							// offset of tail inside block
-							memcpy(tmp_p + packet_size, &(chunk_p[chunk_index].tail_offset), 8);
-							packet_size += 8;
-						} else if (tail_size > 0){
-							memcpy(buf_tail, &(chunk_p[chunk_index].tail_crc), 8);
-							memcpy(buf_tail + 8, chunk_p[chunk_index].tail_hash, 16);
-							memcpy(buf_tail + 24, &(chunk_p[chunk_index].tail_block), 8);
-							memcpy(buf_tail + 32, &(chunk_p[chunk_index].tail_offset), 8);
-							// tail's contents
-							memcpy(tmp_p + packet_size, buf_tail, tail_size);
-							packet_size += tail_size;
-						}
-					}
-
-					chunk_index++;	// goto next chunk
-					chunk_num--;
-				}
-
-				// When all chunks are protected, check total size of chunks.
-				if ( ((file_p->state & 0x80000000) == 0) && (total_size != file_p->size) ){
-					printf("Error: total size of chunks = %"PRIu64", file size = %"PRIu64"\n", total_size, file_p->size);
-					return RET_LOGIC_ERROR;
-				}
-			}
-
-			// packet header
-			make_packet_header(tmp_p, packet_size, par3_ctx->set_id, "PAR FIL\0", 1);
-			// Copy checksum of packet for Directory & Root Packet
-			memcpy(file_p->chk, tmp_p + 8, 16);
-
-			// Checksum of packet for empty files with same filename may be same.
-			// If there is a same checksum already, erase the later duplicated packet.
-			max = par3_ctx->input_file_count - num;
-			for (i = 0; i < max; i++){
-				if ( (file_p->chk[0] == file_list[i].chk[0]) && (file_p->chk[1] == file_list[i].chk[1]) ){
-					//printf("find duplicated File Packet ! %u and %u\n", i, max);
-					//printf("offset %"PRId64" and %"PRId64"\n", file_list[i].offset, file_p->offset);
-					file_p->offset = file_list[i].offset;
-					break;
-				}
-			}
-			if (i == max){
-				packet_count++;
-				tmp_p += packet_size;
-				total_packet_size += packet_size;
-			}
-
-			file_p++;
-			num--;
-		}
-
-		if (total_packet_size < file_alloc_size){	// Reduce memory usage to used size.
-			tmp_p = realloc(par3_ctx->file_packet, total_packet_size);
-			if (tmp_p == NULL){
-				perror("Failed to re-allocate memory for File Packet");
-				return RET_MEMORY_ERROR;
-			}
-			par3_ctx->file_packet = tmp_p;
-		}
-		par3_ctx->file_packet_size = total_packet_size;
-		par3_ctx->file_packet_count = packet_count;
-		if (par3_ctx->noise_level >= 1){
-			printf("Total size of File Packet = %zu (count = %u / %u)\n", total_packet_size, packet_count, par3_ctx->input_file_count);
-		}
-	}
-
-	// Allocate buffer for children's checksums.
-	alloc_size = packet_count + par3_ctx->input_dir_count;
-	if (alloc_size < 1)
-		alloc_size = 1;
-	//printf("Possible number of File Packet and Diretory Packet = %zu\n", alloc_size);
-	alloc_size *= 16;	// size of total checksums
-	chk_p = malloc(alloc_size);
-
-	// Number of Directory Packet may be same as number of input directories.
-	// When there are same empty folder in different directories, there are less packets.
-	dir_list = par3_ctx->input_dir_list;
-	num = par3_ctx->input_dir_count;
-	if (num + absolute_num > 0){
-		total_packet_size = 0;
-		packet_count = 0;
-		tmp_p = par3_ctx->dir_packet;
-		dir_p = par3_ctx->input_dir_list;
-		while (num > 0){
-			// offset of this packet
-			dir_p->offset = tmp_p - par3_ctx->dir_packet;
-			packet_size = 48;
-			// Remove sub-directories to store name only.
-			name_p = strrchr(dir_p->name, '/');
-			if (name_p == NULL){	// There is no sub-directory.
-				name_p = dir_p->name;
-			} else {	// When there is sub-directory.
-				name_p++;
-			}
-
-			// length of string in bytes
-			len = strlen(name_p);
-			memcpy(tmp_p + packet_size, &len, 2);
-			packet_size += 2;
-			// name of directory
-			memcpy(tmp_p + packet_size, name_p, len);
-			packet_size += len;
-
-			// number of options
-			option_offset = packet_size;
-			option_num = 0;
-			packet_size += 4;
-			// UNIX Permissions Packet
-			if ( ((par3_ctx->file_system & 4) != 0) && ((par3_ctx->file_system & 3) != 0) ){
-				if (make_unix_permission_packet(par3_ctx, dir_p->name, tmp_p + packet_size) == 0){
-					option_num++;
-					packet_size += 16;
-				}
-			}
-			memcpy(tmp_p + option_offset, &option_num, 4);	// Value is saved in 4-bytes.
-
-			// Search children files
-			//printf("search children of \"%s\"\n", dir_p->name);
-			len = strlen(dir_p->name);
-			alloc_size = 0;
-			max = par3_ctx->input_file_count;
-			for (i = 0; i < max; i++){
-				if ((file_list[i].name[len] == '/') && (strncmp(dir_p->name, file_list[i].name, len) == 0)){
-					if (strchr(file_list[i].name + len + 1, '/') == NULL){
-						//printf("find child F[%2u] \"%s\"\n", i, file_list[i].name);
-						memcpy(chk_p + alloc_size, file_list[i].chk, 16);
-						alloc_size += 16;
-					}
-				}
-			}
-			// Search children directories
-			max = par3_ctx->input_dir_count - num;
-			for (i = 0; i < max; i++){
-				if ((dir_list[i].name[len] == '/') && (strncmp(dir_p->name, dir_list[i].name, len) == 0)){
-					if (strchr(dir_list[i].name + len + 1, '/') == NULL){
-						//printf("find child D[%2u] \"%s\"\n", i, dir_list[i].name);
-						memcpy(chk_p + alloc_size, dir_list[i].chk, 16);
-						alloc_size += 16;
-					}
-				}
-			}
-			//printf("found children of \"%s\" = %zu\n", dir_p->name, alloc_size / 16);
-			if (alloc_size > 16){
-				// quick sort
-				qsort( (void *)chk_p, alloc_size / 16, 16, compare_checksum );
-			}
-
-			// checksums of File and Directory packets
-			memcpy(tmp_p + packet_size, chk_p, alloc_size);
-			packet_size += alloc_size;
-
-			// packet header
-			make_packet_header(tmp_p, packet_size, par3_ctx->set_id, "PAR DIR\0", 1);
-			// Copy checksum of packet for Directory & Root Packet
-			memcpy(dir_p->chk, tmp_p + 8, 16);
-
-			// Checksum of packet for empty files with same filename may be same.
-			// If there is a same checksum already, erase the later duplicated packet.
-			max = par3_ctx->input_dir_count - num;
-			for (i = 0; i < max; i++){
-				if ( (dir_p->chk[0] == dir_list[i].chk[0]) && (dir_p->chk[1] == dir_list[i].chk[1]) ){
-					//printf("find duplicated Directory Packet ! %u and %u\n", i, max);
-					dir_p->offset = dir_list[i].offset;
-					break;
-				}
-			}
-			if (i == max){
-				packet_count++;
-				tmp_p += packet_size;
-				total_packet_size += packet_size;
-			}
-
-			dir_p++;
-			num--;
-		}
-
-		if (absolute_num > 0){	// Add parts of absolute path
-			name_p = strrchr(par3_ctx->base_path, '/');
-			if (name_p != NULL){
-				name_p++;
-
-				packet_size = 48;
-				// length of string in bytes
-				len = strlen(name_p);
-				memcpy(tmp_p + packet_size, &len, 2);
-				packet_size += 2;
-				// name of directory
-				memcpy(tmp_p + packet_size, name_p, len);
-				packet_size += len;
-				// number of options
-				memset(tmp_p + packet_size, 0, 4);
-				packet_size += 4;
-				// Directories of base path don't store File System Specific Packets.
-				// Changing property of parent directories will be a security risk.
-
-				// Search children files (similar to Root Packet's children)
-				//printf("search base path's children\n");
-				alloc_size = 0;
-				max = par3_ctx->input_file_count;
-				for (i = 0; i < max; i++){
-					if (strchr(file_list[i].name, '/') == NULL){
-						//printf("find child F[%2u] \"%s\"\n", i, file_list[i].name);
-						memcpy(chk_p + alloc_size, file_list[i].chk, 16);
-						alloc_size += 16;
-					}
-				}
-				// Search children directories
-				max = par3_ctx->input_dir_count;
-				for (i = 0; i < max; i++){
-					if (strchr(dir_list[i].name, '/') == NULL){
-						//printf("find child D[%2u] \"%s\"\n", i, dir_list[i].name);
-						memcpy(chk_p + alloc_size, dir_list[i].chk, 16);
-						alloc_size += 16;
-					}
-				}
-				//printf("found base path's children = %zu\n", alloc_size / 16);
-				if (alloc_size > 16){
-					// quick sort
-					qsort( (void *)chk_p, alloc_size / 16, 16, compare_checksum );
-				}
-
-				// checksums of File and Directory packets
-				memcpy(tmp_p + packet_size, chk_p, alloc_size);
-				packet_size += alloc_size;
-
-				// packet header
-				make_packet_header(tmp_p, packet_size, par3_ctx->set_id, "PAR DIR\0", 1);
-				// Copy checksum of packet for Directory & Root Packet
-				memcpy(chk_p, tmp_p + 8, 16);
-
-				packet_count++;
-				tmp_p += packet_size;
-				total_packet_size += packet_size;
-
-				name_p--;
-			}
-
-			// check other directory marks
-			while (name_p >= par3_ctx->base_path){
-				if ( (name_p[0] == '/') && (name_p > par3_ctx->base_path) )
-					name_p--;
-
-				// find next directory mark
-				while ( (name_p[0] != '/') && (name_p > par3_ctx->base_path) )
-					name_p--;
-				if (name_p[0] == '/')
-					name_p++;
-
-				if (par3_ctx->absolute_path != 'A'){
-					// don't include drive letter on Windows OS
-					if ( (name_p[1] == ':') && (name_p[2] == '/') )
-						break;
-				}
-
-				packet_size = 48;
-				// length of string in bytes
-				len = 0;
-				while (name_p[len] != '/')
-					len++;
-				memcpy(tmp_p + packet_size, &len, 2);
-				packet_size += 2;
-				// name of directory
-				memcpy(tmp_p + packet_size, name_p, len);
-				packet_size += len;
-				// number of options
-				memset(tmp_p + packet_size, 0, 4);
-				packet_size += 4;
-
-				// checksums of Directory packet (sub directory is only one.)
-				memcpy(tmp_p + packet_size, chk_p, 16);
-				packet_size += 16;
-
-				// packet header
-				make_packet_header(tmp_p, packet_size, par3_ctx->set_id, "PAR DIR\0", 1);
-				// Copy checksum of packet for Directory & Root Packet
-				memcpy(chk_p, tmp_p + 8, 16);
-
-				packet_count++;
-				tmp_p += packet_size;
-				total_packet_size += packet_size;
-
-				if (name_p > par3_ctx->base_path){
-					if (name_p[-1] == '/')
-						name_p--;
-				}
-				if (name_p <= par3_ctx->base_path)
-					break;
-			}
-		}
-
-		if (total_packet_size < dir_alloc_size){	// Reduce memory usage to used size.
-			tmp_p = realloc(par3_ctx->dir_packet, total_packet_size);
-			if (tmp_p == NULL){
-				perror("Failed to re-allocate memory for Directory Packet");
-				free(chk_p);
-				return RET_MEMORY_ERROR;
-			}
-			par3_ctx->dir_packet = tmp_p;
-		}
-		par3_ctx->dir_packet_size = total_packet_size;
-		par3_ctx->dir_packet_count = packet_count;
-		if (par3_ctx->noise_level >= 1){
-			printf("Total size of Directory Packet = %zu (count = %u / %u)\n", total_packet_size, packet_count, par3_ctx->input_dir_count);
-		}
-	}
-
-	// Root Packet
-	tmp_p = par3_ctx->root_packet;
-	packet_count = 0;
-	packet_size = 48;
-	// Lowest unused index for input blocks.
-	memcpy(tmp_p + packet_size, &(par3_ctx->block_count), 8);
-	packet_size += 8;
-	// attributes
-	tmp_p[packet_size] = par3_ctx->attribute;
-	packet_size += 1;
-	// number of options
-	memset(tmp_p + packet_size, 0, 4);
-	packet_size += 4;
-	// This doesn't support packets for options yet.
-
-	if (absolute_num > 0){	// Add parts of absolute path
-		alloc_size = 16;
-	} else {
-		// Search children files
-		//printf("search root's children\n");
-		alloc_size = 0;
-		max = par3_ctx->input_file_count;
-		for (i = 0; i < max; i++){
-			if (strchr(file_list[i].name, '/') == NULL){
-				//printf("find child F[%2u] \"%s\"\n", i, file_list[i].name);
-				memcpy(chk_p + alloc_size, file_list[i].chk, 16);
-				alloc_size += 16;
-			}
-		}
-		// Search children directories
-		max = par3_ctx->input_dir_count;
-		for (i = 0; i < max; i++){
-			if (strchr(dir_list[i].name, '/') == NULL){
-				//printf("find child D[%2u] \"%s\"\n", i, dir_list[i].name);
-				memcpy(chk_p + alloc_size, dir_list[i].chk, 16);
-				alloc_size += 16;
-			}
-		}
-		//printf("found root's children = %zu\n", alloc_size / 16);
-		if (alloc_size > 16){
-			// quick sort
-			qsort( (void *)chk_p, alloc_size / 16, 16, compare_checksum );
-		}
-	}
-	// checksums of File and Directory packets
-	memcpy(tmp_p + packet_size, chk_p, alloc_size);
-	packet_size += alloc_size;
-
-	// packet header
-	make_packet_header(tmp_p, packet_size, par3_ctx->set_id, "PAR ROO\0", 1);
-
-	if (packet_size < root_alloc_size){	// Reduce memory usage to used size.
-		tmp_p = realloc(par3_ctx->root_packet, packet_size);
-		if (tmp_p == NULL){
-			perror("Failed to re-allocate memory for Root Packet");
-			free(chk_p);
-			return RET_MEMORY_ERROR;
-		}
-		par3_ctx->root_packet = tmp_p;
-	}
-	par3_ctx->root_packet_size = packet_size;
-	par3_ctx->root_packet_count = 1;
-	if (par3_ctx->noise_level >= 1){
-		printf("Size of Root Packet = %zu (children = %zu)\n", packet_size, alloc_size / 16);
-	}
-	free(chk_p);
-
-	if (par3_ctx->file_system & 0x10003){	// UNIX Permissions Packet or FAT Permissions Packet
-		if (par3_ctx->file_system_packet_size == 0){
-			free(par3_ctx->file_system_packet);
-			par3_ctx->file_system_packet = NULL;
-		} else {
-			if (par3_ctx->file_system_packet_size < file_system_alloc_size){	// Reduce memory usage to used size.
-				tmp_p = realloc(par3_ctx->file_system_packet, par3_ctx->file_system_packet_size);
-				if (tmp_p == NULL){
-					perror("Failed to re-allocate memory for File System Packet");
-					return RET_MEMORY_ERROR;
-				}
-				par3_ctx->file_system_packet = tmp_p;
-			}
-			if (par3_ctx->noise_level >= 1){
-				printf("Size of File System Packet = %zu (count = %u)\n", par3_ctx->file_system_packet_size, par3_ctx->file_system_packet_count);
-			}
-		}
-	}
-
-	return 0;
-}
-
-// External Data Packet
-int make_ext_data_packet(PAR3_CTX *par3_ctx)
-{
-	uint8_t *tmp_p;
-	size_t write_packet_count, packet_size;
-	int64_t find_block_count;	// use sign for flag
-	uint64_t block_count, block_size;
-	PAR3_BLOCK_CTX *block_p;
-
-	// When there is no input blocks, just exit.
-	if (par3_ctx->block_count == 0)
-		return 0;
-
-	// When there is no packet yet, error exit.
-	if (par3_ctx->start_packet_size == 0)
-		return RET_LOGIC_ERROR;
-
-	// When there is packet already, just exit.
-	if (par3_ctx->ext_data_packet_size > 0)
-		return 0;
-
-	// Count how many packets to make.
-	block_count = par3_ctx->block_count;
-	block_size = par3_ctx->block_size;
-	block_p = par3_ctx->block_list;
-	//printf("Number of input blocks = %"PRIu64"\n", block_count);
-	find_block_count = 0;
-	write_packet_count = 0;
-	while (block_count > 0){
-		if (block_p->state & 1){	// block of full size data
-			if (find_block_count < 0)
-				find_block_count *= -1;
-			find_block_count++;
-		} else {	// block of chunk tail
-			if (find_block_count > 0){	// after full block
-				find_block_count *= -1;
-				write_packet_count++;
-			}
-		}
-
-		block_count--;
-		block_p++;
-	}
-	if (find_block_count > 0){	// after full block
-		write_packet_count++;
-	} else {
-		find_block_count *= -1;
-	}
-
-	// If there is no full size blocks, checksums are saved in File Packets.
-	if (par3_ctx->noise_level >= 2){
-		printf("Number of External Data Packet = %zu (number of full size blocks = %"PRId64")\n", write_packet_count, find_block_count);
-	}
-	if (write_packet_count == 0)
-		return 0;
-
-	// Calculate total size of packets
-	packet_size = write_packet_count * (48 + 8);	// packet header (48-bytes) + index (8-bytes)
-	packet_size += find_block_count * 24;	// CRC-64 + BLAKE3 (24-bytes) per full size blocks
-	if (par3_ctx->noise_level >= 1){
-		printf("Total size of External Data Packet = %zu\n", packet_size);
-	}
-	if (par3_ctx->ext_data_packet == NULL){
-		par3_ctx->ext_data_packet = malloc(packet_size);
-		if (par3_ctx->ext_data_packet == NULL){
-			perror("Failed to allocate memory for External Data Packet");
-			return RET_MEMORY_ERROR;
-		}
-	}
-	par3_ctx->ext_data_packet_size = packet_size;
-	par3_ctx->ext_data_packet_count = (uint32_t)write_packet_count;
-
-	// Copy checksums
-	block_count = par3_ctx->block_count;
-	block_p = par3_ctx->block_list;
-	tmp_p = par3_ctx->ext_data_packet;
-	find_block_count = 0;
-	write_packet_count = 0;
-	while (block_count > 0){
-		if (block_p->state & 1){	// block of full size data
-			if (write_packet_count == 0){
-				tmp_p += 48;	// skip packet header
-				memcpy(tmp_p, &find_block_count, 8);	// Index of the first input block
-				tmp_p += 8;
-			}
-			memcpy(tmp_p, &(block_p->crc), 8);	// rolling hash
-			tmp_p += 8;
-			memcpy(tmp_p, block_p->hash, 16);	// 16-byte fingerprint hash
-			tmp_p += 16;
-			write_packet_count++;
-		} else {	// block of chunk tail
-			if (write_packet_count > 0){	// after full block
-				// make packet header
-				packet_size = 48 + 8 + write_packet_count * 24;
-				make_packet_header(tmp_p - packet_size, packet_size, par3_ctx->set_id, "PAR EXT\0", 1);
-				write_packet_count = 0;
-			}
-		}
-
-		find_block_count++;
-		block_count--;
-		block_p++;
-	}
-	if (write_packet_count > 0){	// after full block
-		// make packet header
-		packet_size = 48 + 8 + write_packet_count * 24;
-		make_packet_header(tmp_p - packet_size, packet_size, par3_ctx->set_id, "PAR EXT\0", 1);
-	}
-
-	return 0;
-}
-
-// Duplicate common packets between PAR3 files
-int duplicate_common_packet(PAR3_CTX *par3_ctx)
-{
-	uint8_t *tmp_p;
-	size_t packet_size, packet_count;
-
-	// When there is no packet yet, error exit.
-	if (par3_ctx->start_packet_size == 0)
-		return RET_LOGIC_ERROR;
-
-	// When there are packets already, just exit.
-	if (par3_ctx->common_packet_size > 0)
-		return 0;
-
-	// Creator Packet and Comment Packet are not repeated.
-	// Other important optional packets may be included in future.
-	packet_size = par3_ctx->start_packet_size + par3_ctx->matrix_packet_size
-			+ par3_ctx->file_packet_size + par3_ctx->dir_packet_size + par3_ctx->root_packet_size
-			+ par3_ctx->ext_data_packet_size + par3_ctx->file_system_packet_size;
-
-	if (par3_ctx->common_packet == NULL){
-		par3_ctx->common_packet = malloc(packet_size);
-		if (par3_ctx->common_packet == NULL){
-			perror("Failed to allocate memory for duplicated packets");
-			return RET_MEMORY_ERROR;
-		}
-	}
-
-	// Copy packets
-	tmp_p = par3_ctx->common_packet;
-	memcpy(tmp_p, par3_ctx->start_packet, par3_ctx->start_packet_size);
-	tmp_p += par3_ctx->start_packet_size;
-	packet_count = 1;
-	if (par3_ctx->matrix_packet_size > 0){
-		memcpy(tmp_p, par3_ctx->matrix_packet, par3_ctx->matrix_packet_size);
-		tmp_p += par3_ctx->matrix_packet_size;
-		packet_count += par3_ctx->matrix_packet_count;
-	}
-	if (par3_ctx->file_packet_size > 0){
-		memcpy(tmp_p, par3_ctx->file_packet, par3_ctx->file_packet_size);
-		tmp_p += par3_ctx->file_packet_size;
-		packet_count += par3_ctx->file_packet_count;
-	}
-	if (par3_ctx->dir_packet_size > 0){
-		memcpy(tmp_p, par3_ctx->dir_packet, par3_ctx->dir_packet_size);
-		tmp_p += par3_ctx->dir_packet_size;
-		packet_count += par3_ctx->dir_packet_count;
-	}
-	// Root Packet exists always.
-	memcpy(tmp_p, par3_ctx->root_packet, par3_ctx->root_packet_size);
-	tmp_p += par3_ctx->root_packet_size;
-	packet_count++;
-	if (par3_ctx->ext_data_packet_size > 0){
-		memcpy(tmp_p, par3_ctx->ext_data_packet, par3_ctx->ext_data_packet_size);
-		tmp_p += par3_ctx->ext_data_packet_size;
-		packet_count += par3_ctx->ext_data_packet_count;
-	}
-	// Optional packets
-	if (par3_ctx->file_system_packet_size > 0){
-		memcpy(tmp_p, par3_ctx->file_system_packet, par3_ctx->file_system_packet_size);
-		tmp_p += par3_ctx->file_system_packet_size;
-		packet_count += par3_ctx->file_system_packet_count;
-	}
-
-	par3_ctx->common_packet_size = packet_size;
-	par3_ctx->common_packet_count = packet_count;
-
-	if (par3_ctx->noise_level >= 1){
-		printf("\nTotal size of common packets = %zu (count = %zu)\n", packet_size, packet_count);
-	}
-
-	return 0;
-}
-
diff --git a/windows/src/packet_parse.c b/windows/src/packet_parse.c
deleted file mode 100644
index d418648..0000000
--- a/windows/src/packet_parse.c
+++ /dev/null
@@ -1,780 +0,0 @@
-#include <errno.h>
-#include <inttypes.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "libpar3.h"
-#include "common.h"
-
-
-// Count number of chunk descriptions.
-static int count_chunk_description(PAR3_CTX *par3_ctx, uint8_t *chunk, size_t description_size)
-{
-	size_t offset;
-	uint64_t block_size, chunk_size, tail_size;
-
-	block_size = par3_ctx->block_size;
-
-	offset = 0;
-	while (offset < description_size){
-		par3_ctx->chunk_count++;
-
-		memcpy(&chunk_size, chunk + offset, 8);
-		offset += 8;	// length of chunk
-		if (chunk_size == 0){	// zeros if not protected
-			offset += 8;
-		} else {
-			if (block_size == 0){
-				printf("Block size must be larger than 0 for chunk.\n");
-				return RET_LOGIC_ERROR;
-			}
-			if (chunk_size >= block_size){
-				offset += 8;	// index of first input block holding chunk
-			}
-			tail_size = chunk_size % block_size;
-			if (tail_size < 40){
-				offset += tail_size;	// tail is 1 ~ 39.
-			} else {
-				offset += 40;
-			}
-		}
-	}
-	if (offset != description_size){
-		printf("Size of chunk description is wrong, %zu\n", description_size);
-		return RET_LOGIC_ERROR;
-	}
-
-	return 0;
-}
-
-// Read packets and count number of files and directories.
-// Check error or missing data, too.
-static int count_directory_tree(PAR3_CTX *par3_ctx, uint8_t *checksum, size_t checksum_size, size_t dir_len)
-{
-	uint8_t *file_packet, *dir_packet;
-	int ret, flag_find;
-	uint32_t num;
-	size_t checksum_offset, len, offset;
-	size_t file_packet_size, dir_packet_size, packet_size, packet_offset;
-
-	if ( (checksum_size == 0) || (checksum_size & 15) ){
-		printf("Size of checksums for children is wrong, %zu\n", checksum_size);
-		return RET_LOGIC_ERROR;
-	}
-
-	file_packet = par3_ctx->file_packet;
-	file_packet_size = par3_ctx->file_packet_size;
-	dir_packet = par3_ctx->dir_packet;
-	dir_packet_size = par3_ctx->dir_packet_size;
-
-	checksum_offset = 0;
-	while (checksum_offset < checksum_size){
-		flag_find = 0;
-		if (file_packet_size > 0){
-			packet_offset = 0;
-			while (packet_offset < file_packet_size){
-				memcpy(&packet_size, file_packet + packet_offset + 24, 8);
-				if (packet_size >= 60){
-					if (memcmp(checksum + checksum_offset, file_packet + packet_offset + 8, 16) == 0){
-						flag_find = 1;
-						par3_ctx->input_file_count++;
-
-						// file name
-						offset = 48;
-						len = 0;
-						memcpy(&len, file_packet + packet_offset + offset, 2);	// length of string in bytes
-						if (len == 0){
-							printf("file name is too short.\n");
-							return RET_LOGIC_ERROR;
-						} else if (dir_len + len >= _MAX_PATH){
-							printf("Input file's path is too long.\n");
-							return RET_LOGIC_ERROR;
-						}
-						par3_ctx->input_file_name_max += dir_len + len + 1;
-
-						// options
-						offset += 2 + len + 8 + 16;
-						num = 0;
-						memcpy(&num, file_packet + packet_offset + offset, 1);	// number of options
-						//printf("number of options = %u\n", num);
-
-						// chunk descriptions
-						offset += 1 + 16 * num;
-						if (offset < packet_size){
-							ret = count_chunk_description(par3_ctx, file_packet + packet_offset + offset, packet_size - offset);
-							if (ret != 0)
-								return ret;
-						} else if (offset > packet_size){	// Either length of name or number of options is wrong.
-							printf("File Packet data is wrong.\n");
-							return RET_LOGIC_ERROR;
-						}
-						break;
-					}
-				}
-				packet_offset += packet_size;
-			}
-		}
-		if ( (flag_find == 0) && (dir_packet_size > 0) ){
-			packet_offset = 0;
-			while (packet_offset < dir_packet_size){
-				memcpy(&packet_size, dir_packet + packet_offset + 24, 8);
-				if (packet_size >= 55){
-					if (memcmp(checksum + checksum_offset, dir_packet + packet_offset + 8, 16) == 0){
-						flag_find = 1;
-						par3_ctx->input_dir_count++;
-
-						// directory name
-						offset = 48;
-						len = 0;
-						memcpy(&len, dir_packet + packet_offset + offset, 2);	// length of string in bytes
-						if (len == 0){
-							printf("directory name is too short.\n");
-							return RET_LOGIC_ERROR;
-						} else if (dir_len + len >= _MAX_PATH){
-							printf("Input directory's path is too long.\n");
-							return RET_LOGIC_ERROR;
-						}
-						// PAR3 file's absolute path is enabled, only when a user set option.
-						if ( (dir_len == 0) && ((par3_ctx->attribute & 1) != 0) && (par3_ctx->absolute_path != 0) ){
-							// It doesn't check drive letter at this time.
-							dir_len++;	// add "/" at the top
-						}
-						par3_ctx->input_dir_name_max += dir_len + len + 1;
-
-						// options
-						offset += 2 + len;
-						memcpy(&num, dir_packet + packet_offset + offset, 4);	// number of options
-						offset += 4 + 16 * num;
-						if (offset < packet_size){
-							// goto children
-							ret = count_directory_tree(par3_ctx, dir_packet + packet_offset + offset, packet_size - offset, dir_len + len + 1);
-							if (ret != 0)
-								return ret;
-						} else if (offset > packet_size){	// Either length of name or number of options is wrong.
-							printf("Directory Packet data is wrong.\n");
-							return RET_LOGIC_ERROR;
-						}
-						break;
-					}
-				}
-				packet_offset += packet_size;
-			}
-		}
-		if (flag_find == 0){
-			printf("File Packet or Directory Packet is missing.\n");
-			return RET_INSUFFICIENT_DATA;
-		}
-
-		checksum_offset += 16;
-	}
-
-	return 0;
-}
-
-static int parse_chunk_description(PAR3_CTX *par3_ctx, uint8_t *chunk, size_t description_size)
-{
-	uint8_t buf_tail[40];
-	uint32_t chunk_num;
-	size_t offset;
-	uint64_t block_size, block_count;
-	uint64_t chunk_size, tail_size, file_size;
-	PAR3_CHUNK_CTX *chunk_p;
-	PAR3_FILE_CTX *file_p;
-
-	block_size = par3_ctx->block_size;
-	block_count = par3_ctx->block_count;
-	chunk_p = par3_ctx->chunk_list + par3_ctx->chunk_count;
-	file_p = par3_ctx->input_file_list + par3_ctx->input_file_count;
-
-	chunk_num = 0;
-	file_size = 0;
-	offset = 0;
-	while (offset < description_size){
-		memcpy(&chunk_size, chunk + offset, 8);
-		offset += 8;	// length of chunk
-		chunk_p->size = chunk_size;
-		if (chunk_size == 0){	// zeros if not protected
-			// Unprotected Chunk Description
-			file_p->state |= 0x80000000;
-			memcpy(&(chunk_p->block), chunk + offset, 8);	// length of chunk
-			offset += 8;
-			file_size += chunk_p->block;
-
-		} else {
-			// Protected Chunk Description
-			if (block_size == 0){
-				printf("Block size must be larger than 0 for chunk.\n");
-				return RET_LOGIC_ERROR;
-			}
-			file_size += chunk_size;
-			if (chunk_size >= block_size){
-				memcpy(&(chunk_p->block), chunk + offset, 8);
-				if (chunk_p->block >= block_count){
-					printf("First block of chunk exceeds block count. %"PRIu64"\n", chunk_p->block);
-					return RET_LOGIC_ERROR;
-				}
-				offset += 8;	// index of first input block holding chunk
-			} else {
-				chunk_p->block = 0;
-			}
-			tail_size = chunk_size % block_size;
-			if (tail_size < 40){
-				memcpy(buf_tail, chunk + offset, tail_size);
-				memset(buf_tail + tail_size, 0, 40 - tail_size);
-				offset += tail_size;	// tail is 1 ~ 39.
-			} else {
-				memcpy(buf_tail, chunk + offset, 40);
-				offset += 40;
-			}
-			memcpy(&(chunk_p->tail_crc), buf_tail, 8);
-			memcpy(chunk_p->tail_hash, buf_tail + 8, 16);
-			memcpy(&(chunk_p->tail_block), buf_tail + 24, 8);
-			memcpy(&(chunk_p->tail_offset), buf_tail + 32, 8);
-			if (tail_size >= 40){
-				if (chunk_p->tail_block >= block_count){
-					printf("Tail block of chunk exceeds block count. %"PRIu64"\n", chunk_p->tail_block);
-					return RET_LOGIC_ERROR;
-				}
-			}
-		}
-
-		chunk_num++;
-		par3_ctx->chunk_count++;
-		chunk_p++;
-	}
-	if (offset != description_size){
-		printf("Size of chunk description is wrong, %zu\n", description_size);
-		return RET_LOGIC_ERROR;
-	}
-	file_p->size = file_size;
-	file_p->chunk_num = chunk_num;
-
-	return 0;
-}
-
-// construct directory tree from root to child
-static int construct_directory_tree(PAR3_CTX *par3_ctx, uint8_t *checksum, size_t checksum_size, char *sub_dir)
-{
-	uint8_t *file_packet, *dir_packet;
-	int ret, flag_find;
-	uint32_t num;
-	size_t checksum_offset, dir_len, len, offset;
-	size_t file_packet_size, dir_packet_size, packet_size, packet_offset;
-	PAR3_FILE_CTX *file_p;
-	PAR3_DIR_CTX *dir_p;
-
-	if ( (checksum_size == 0) || (checksum_size & 15) ){
-		printf("Size of checksums for children is wrong, %zu\n", checksum_size);
-		return RET_LOGIC_ERROR;
-	}
-
-	file_packet = par3_ctx->file_packet;
-	file_packet_size = par3_ctx->file_packet_size;
-	dir_packet = par3_ctx->dir_packet;
-	dir_packet_size = par3_ctx->dir_packet_size;
-
-	dir_len = strlen(sub_dir);
-	checksum_offset = 0;
-	while (checksum_offset < checksum_size){
-		flag_find = 0;
-		if (file_packet_size > 0){
-			packet_offset = 0;
-			while (packet_offset < file_packet_size){
-				memcpy(&packet_size, file_packet + packet_offset + 24, 8);
-				if (packet_size >= 60){
-					if (memcmp(checksum + checksum_offset, file_packet + packet_offset + 8, 16) == 0){
-						flag_find = 1;
-						file_p = par3_ctx->input_file_list + par3_ctx->input_file_count;
-						file_p->offset = packet_offset;	// offset of packet
-						memcpy(file_p->chk, file_packet + packet_offset + 8, 16);	// checksum of packet
-
-						// file name
-						offset = 48;
-						len = 0;
-						memcpy(&len, file_packet + packet_offset + offset, 2);
-						if (len == 0){
-							printf("file name is too short.\n");
-							return RET_LOGIC_ERROR;
-						} else if (dir_len + len >= _MAX_PATH){
-							printf("Input file's path is too long.\n");
-							return RET_LOGIC_ERROR;
-						}
-						offset += 2;
-						memcpy(sub_dir + dir_len, file_packet + packet_offset + offset, len);
-						sub_dir[dir_len + len] = 0;
-						if (par3_ctx->noise_level >= 3){
-							printf("input file = \"%s\"\n", sub_dir);
-						}
-						ret = sanitize_file_name(sub_dir + dir_len);
-						if (par3_ctx->noise_level >= 0){
-							if (ret & 1){
-								printf("Warning, file name was sanitized to \"%s\".\n", sub_dir + dir_len);
-							} else if (ret & 2){
-								printf("Warning, file name \"%s\" is bad.\n", sub_dir + dir_len);
-							}
-						}
-
-						// check name in list
-						if (namez_search(par3_ctx->input_file_name, par3_ctx->input_file_name_len, sub_dir) != NULL){
-							printf("There is same file name already. %s\n", sub_dir);
-							return RET_LOGIC_ERROR;
-						}
-
-						// add found filename
-						if (namez_add(&(par3_ctx->input_file_name), &(par3_ctx->input_file_name_len), &(par3_ctx->input_file_name_max), sub_dir) != 0){
-							printf("Failed to add file name. %s\n", sub_dir);
-							return RET_MEMORY_ERROR;
-						}
-						file_p->name = par3_ctx->input_file_name + par3_ctx->input_file_name_len - (dir_len + len + 1);
-
-						// hash of the first 16kB of the file
-						offset += len;
-						memcpy(&(file_p->crc), file_packet + packet_offset + offset, 8);
-
-						// hash of the protected data in the file
-						offset += 8;
-						memcpy(file_p->hash, file_packet + packet_offset + offset, 16);
-
-						// options
-						offset += 16;
-						num = 0;
-						memcpy(&num, file_packet + packet_offset + offset, 1);	// number of options
-
-						// At this time, this doesn't support options yet.
-						//printf("number of options = %u\n", num);
-
-						// chunk descriptions
-						file_p->size = 0;
-						file_p->chunk = par3_ctx->chunk_count;
-						file_p->chunk_num = 0;
-						file_p->state = 0;
-						offset += 1 + 16 * num;
-						if (offset < packet_size){	// When there are chunk descriptions.
-							ret = parse_chunk_description(par3_ctx, file_packet + packet_offset + offset, packet_size - offset);
-							if (ret != 0)
-								return ret;
-						} else if (offset > packet_size){	// Either length of name or number of options is wrong.
-							printf("File Packet data is wrong.\n");
-							return RET_LOGIC_ERROR;
-						}
-						par3_ctx->input_file_count++;
-
-						break;
-					}
-				}
-				packet_offset += packet_size;
-			}
-		}
-		if ( (flag_find == 0) && (dir_packet_size > 0) ){
-			packet_offset = 0;
-			while (packet_offset < dir_packet_size){
-				memcpy(&packet_size, dir_packet + packet_offset + 24, 8);
-				if (packet_size >= 55){
-					if (memcmp(checksum + checksum_offset, dir_packet + packet_offset + 8, 16) == 0){
-						flag_find = 1;
-						dir_p = par3_ctx->input_dir_list + par3_ctx->input_dir_count;
-						dir_p->offset = packet_offset;	// offset of packet
-						memcpy(dir_p->chk, dir_packet + packet_offset + 8, 16);	// checksum of packet
-
-						// directory name
-						offset = 48;
-						len = 0;
-						memcpy(&len, dir_packet + packet_offset + offset, 2);	// length of string in bytes
-						if (len == 0){
-							printf("directory name is too short.\n");
-							return RET_LOGIC_ERROR;
-						} else if (dir_len + len >= _MAX_PATH){
-							printf("Input directory's path is too long.\n");
-							return RET_LOGIC_ERROR;
-						}
-						offset += 2;
-						memcpy(sub_dir + dir_len, dir_packet + packet_offset + offset, len);
-						sub_dir[dir_len + len] = 0;
-						if (par3_ctx->noise_level >= 3){
-							printf("input dir  = \"%s\"\n", sub_dir);
-						}
-						// PAR3 file's absolute path is enabled, only when a user set option.
-						if ( (dir_len == 0) && ((par3_ctx->attribute & 1) != 0) && (par3_ctx->absolute_path != 0) ){
-							if ( (len == 2) && (sub_dir[1] == ':') ){
-								sub_dir[1] = '_';	// replace drive letter mark temporary
-								ret = sanitize_file_name(sub_dir);
-								sub_dir[1] = ':';	// return to original mark
-							} else {
-								ret = sanitize_file_name(sub_dir);
-								memmove(sub_dir + 1, sub_dir, len + 1);	// slide name by including the last null-string
-								sub_dir[0] = '/';
-								dir_len++;	// add "/" at the top
-							}
-						} else {
-							ret = sanitize_file_name(sub_dir + dir_len);
-						}
-						if (par3_ctx->noise_level >= 0){
-							if (ret & 1){
-								printf("Warning, directory name was sanitized to \"%s\".\n", sub_dir + dir_len);
-							} else if (ret & 2){
-								printf("Warning, directory name \"%s\" is bad.\n", sub_dir + dir_len);
-							}
-						}
-
-						// check name in list
-						if (namez_search(par3_ctx->input_dir_name, par3_ctx->input_dir_name_len, sub_dir) != NULL){
-							printf("There is same directory name already. %s\n", sub_dir);
-							return RET_LOGIC_ERROR;
-						}
-
-						// add found name
-						if (namez_add(&(par3_ctx->input_dir_name), &(par3_ctx->input_dir_name_len), &(par3_ctx->input_dir_name_max), sub_dir) != 0){
-							printf("Failed to add directory name. %s\n", sub_dir);
-							return RET_MEMORY_ERROR;
-						}
-						dir_p->name = par3_ctx->input_dir_name + par3_ctx->input_dir_name_len - (dir_len + len + 1);
-						par3_ctx->input_dir_count++;
-
-						// options
-						offset += len;
-						memcpy(&num, dir_packet + packet_offset + offset, 4);	// number of options
-						offset += 4 + 16 * num;
-						if (offset < packet_size){
-							// goto children
-							// Though Windows OS supports both "/" and "\" as directory mark, I use "/" here for compatibility.
-							sub_dir[dir_len + len] = '/';	// directory mark
-							sub_dir[dir_len + len + 1] = 0;
-							ret = construct_directory_tree(par3_ctx, dir_packet + packet_offset + offset, packet_size - offset, sub_dir);
-							if (ret != 0)
-								return ret;
-						} else if (offset > packet_size){	// Either length of name or number of options is wrong.
-							printf("Directory Packet data is wrong.\n");
-							return RET_LOGIC_ERROR;
-						}
-
-						break;
-					}
-				}
-				packet_offset += packet_size;
-			}
-		}
-		if (flag_find == 0){
-			printf("File Packet or Directory Packet is missing.\n");
-			return RET_INSUFFICIENT_DATA;
-		}
-
-		checksum_offset += 16;
-	}
-
-	return 0;
-}
-
-// parse information in packets
-int parse_vital_packet(PAR3_CTX *par3_ctx)
-{
-	char file_path[_MAX_PATH];
-	uint8_t *tmp_p;
-	int ret;
-	uint32_t num;
-	size_t len;
-	uint64_t packet_size;
-
-	if (par3_ctx->noise_level >= 0){
-		// Read and show Creator text
-		if (par3_ctx->creator_packet_size > 0){
-			memcpy(&packet_size, par3_ctx->creator_packet + 24, 8);
-			if (packet_size > 48){
-				len = packet_size - 48;
-				//printf("creator len = %zu\n", len);
-				tmp_p = malloc(len + 1);	// allocate buffer for the last null-string
-				if (tmp_p != NULL){
-					memcpy(tmp_p, par3_ctx->creator_packet + 48, len);
-					tmp_p[len] = 0;
-					trim_text(tmp_p, len);	// Erase return code at the end of text
-					printf("\nCreator text:\n%s\n", tmp_p);
-					free(tmp_p);
-				}
-			}
-		}
-
-		// Read and show Comment text
-		if (par3_ctx->comment_packet_size > 0){
-			memcpy(&packet_size, par3_ctx->comment_packet + 24, 8);
-			if (packet_size > 48){
-				len = packet_size - 48;
-				tmp_p = malloc(len + 1);	// allocate buffer for the last null-string
-				if (tmp_p != NULL){
-					memcpy(tmp_p, par3_ctx->comment_packet + 48, len);
-					tmp_p[len] = 0;
-					trim_text(tmp_p, len);	// Erase return code at the end of text
-					if (strchr(tmp_p, '\n') == NULL){
-						printf("\nComment text: %s\n", tmp_p);
-					} else {
-						printf("\nComment text:\n%s\n", tmp_p);
-					}
-					free(tmp_p);
-				}
-			}
-		}
-	}
-
-	// Read Start Packet
-	if (par3_ctx->start_packet_size > 0){
-		memcpy(&packet_size, par3_ctx->start_packet + 24, 8);
-		len = 48;	// size of packet header
-		if (packet_size >= 89){	// To support old Start Packet for compatibility
-			// This will be removed in future, when PAR3 spec is updated.
-			len += 8;
-			//printf("Start Packet is old, %"PRIu64"\n", packet_size);
-		}
-		if (mem_or8(par3_ctx->start_packet + len) != 0){	// check parent's InputSetID
-			if (mem_or16(par3_ctx->start_packet + len + 8) == 0){	// check parent's Root packet
-				printf("Checksum of the parent's Root Packet is wrong.\n");
-				return RET_LOGIC_ERROR;
-			}
-		}
-		memcpy(&(par3_ctx->block_size), par3_ctx->start_packet + len + 24, 8);
-		memcpy(&(par3_ctx->gf_size), par3_ctx->start_packet + len + 32, 1);
-		if (par3_ctx->gf_size > 2){	// At this time, this supports 8-bit or 16-bit Galois Field only.
-			printf("Size of Galois Field is too large, %u\n", par3_ctx->gf_size);
-			return RET_LOGIC_ERROR;
-		}
-		if ( (par3_ctx->gf_size > 0) && (par3_ctx->gf_size < 4) ){
-			memcpy(&(par3_ctx->galois_poly), par3_ctx->start_packet + len + 33, par3_ctx->gf_size);
-			par3_ctx->galois_poly |= 1 << (par3_ctx->gf_size * 8);
-		}
-		if (packet_size != len + 33 + par3_ctx->gf_size){	// check packet size is valid
-			printf("Start Packet size is wrong, %"PRIu64"\n", packet_size);
-			return RET_LOGIC_ERROR;
-		}
-	}
-	if (par3_ctx->noise_level >= 0){
-		printf("\n");
-		printf("Block size = %"PRIu64"\n", par3_ctx->block_size);
-		if (par3_ctx->noise_level >= 1){
-			printf("Galois field size = %u\n", par3_ctx->gf_size);
-			printf("Galois field generator = 0x%X\n", par3_ctx->galois_poly);
-		}
-	}
-
-	// Read Root Packet
-	if ( (par3_ctx->root_packet_size == 0) || (par3_ctx->root_packet_count == 0) ){
-		printf("There is no Root Packet.\n");
-		return RET_INSUFFICIENT_DATA;
-	}
-	if (par3_ctx->root_packet_count > 1){
-		printf("There are multiple different Root Packets.\n");
-		return RET_LOGIC_ERROR;
-	}
-	memcpy(&packet_size, par3_ctx->root_packet + 24, 8);
-	if (packet_size <= 61){
-		printf("Root Packet is too small, %"PRIu64"\n", packet_size);
-		return RET_INSUFFICIENT_DATA;
-	}
-	tmp_p = par3_ctx->root_packet + 48;	// packet body
-	memcpy(&(par3_ctx->block_count), tmp_p, 8);
-	memcpy(&(par3_ctx->attribute), tmp_p + 8, 1);
-	memcpy(&num, tmp_p + 9, 4);	// number of options
-	if (packet_size < 48 + 8 + 1 + 4 + (16 * num)){
-		printf("Root Packet is too small, %"PRIu64"\n", packet_size);
-		return RET_INSUFFICIENT_DATA;
-	}
-	tmp_p += 8 + 1 + 4 + (16 * num);	// skip options at this time
-	len = packet_size - 48 - (8 + 1 + 4) - (16 * num);
-	if ( (len == 0) || (len & 15) ){
-		printf("Size of checksums for children is wrong, %zu\n", len);
-		return RET_LOGIC_ERROR;
-	}
-	if (par3_ctx->noise_level >= 0){
-		printf("Block count = %"PRIu64"\n", par3_ctx->block_count);
-		printf("Root attribute = %u\n", par3_ctx->attribute);
-	}
-
-	// count number of files and directories
-	// total length of file and directory names
-	par3_ctx->chunk_count = 0;
-	par3_ctx->input_file_count = 0;
-	par3_ctx->input_dir_count = 0;
-	par3_ctx->input_file_name_max = 0;
-	par3_ctx->input_dir_name_max = 0;
-	ret = count_directory_tree(par3_ctx, tmp_p, len, 0);
-	if (ret != 0)
-		return ret;
-	if ( (par3_ctx->block_count > 0) && (par3_ctx->chunk_count == 0) ){
-		printf("There is no chunk description.\n");
-		return RET_LOGIC_ERROR;
-	}
-	if (par3_ctx->noise_level >= 0){
-		printf("Number of input file = %u, directory = %u\n", par3_ctx->input_file_count, par3_ctx->input_dir_count);
-		printf("Number of chunk description = %u\n", par3_ctx->chunk_count);
-	}
-	//printf("input_file_name_max = %zu, input_dir_name_max = %zu\n", par3_ctx->input_file_name_max, par3_ctx->input_dir_name_max);
-
-	// allocate memory for file and directory name
-	if (par3_ctx->input_file_name != NULL){
-		free(par3_ctx->input_file_name);
-		par3_ctx->input_file_name = NULL;
-	}
-	if (par3_ctx->input_file_name_max > 0){
-		par3_ctx->input_file_name = malloc(par3_ctx->input_file_name_max);
-		if (par3_ctx->input_file_name == NULL){
-			perror("Failed to allocate memory for file name");
-			return RET_MEMORY_ERROR;
-		}
-	}
-	par3_ctx->input_file_name_len = 0;
-	if (par3_ctx->input_dir_name != NULL){
-		free(par3_ctx->input_dir_name);
-		par3_ctx->input_dir_name = NULL;
-	}
-	if (par3_ctx->input_dir_name_max > 0){
-		par3_ctx->input_dir_name = malloc(par3_ctx->input_dir_name_max);
-		if (par3_ctx->input_dir_name == NULL){
-			perror("Failed to allocate memory for directory name");
-			return RET_MEMORY_ERROR;
-		}
-	}
-	par3_ctx->input_dir_name_len = 0;
-
-	// allocate memory for chunk, file, and directory info
-	if (par3_ctx->chunk_list != NULL){
-		free(par3_ctx->chunk_list);
-		par3_ctx->chunk_list = NULL;
-	}
-	if (par3_ctx->chunk_count > 0){
-		par3_ctx->chunk_list = malloc(sizeof(PAR3_CHUNK_CTX) * par3_ctx->chunk_count);
-		if (par3_ctx->chunk_list == NULL){
-			perror("Failed to allocate memory for chunk description");
-			return RET_MEMORY_ERROR;
-		}
-	}
-	par3_ctx->chunk_count = 0;
-	if (par3_ctx->input_file_list != NULL){
-		free(par3_ctx->input_file_list);
-		par3_ctx->input_file_list = NULL;
-	}
-	if (par3_ctx->input_file_count > 0){
-		par3_ctx->input_file_list = malloc(sizeof(PAR3_FILE_CTX) * par3_ctx->input_file_count);
-		if (par3_ctx->input_file_list == NULL){
-			perror("Failed to allocate memory for input file");
-			return RET_MEMORY_ERROR;
-		}
-	}
-	par3_ctx->input_file_count = 0;
-	if (par3_ctx->input_dir_list != NULL){
-		free(par3_ctx->input_dir_list);
-		par3_ctx->input_dir_list = NULL;
-	}
-	if (par3_ctx->input_dir_count > 0){
-		par3_ctx->input_dir_list = malloc(sizeof(PAR3_DIR_CTX) * par3_ctx->input_dir_count);
-		if (par3_ctx->input_dir_list == NULL){
-			perror("Failed to allocate memory for input directory");
-			return RET_MEMORY_ERROR;
-		}
-	}
-	par3_ctx->input_dir_count = 0;
-
-	// directory tree
-	file_path[0] = 0;
-	ret = construct_directory_tree(par3_ctx, tmp_p, len, file_path);
-	if (ret != 0)
-		return ret;
-	//printf("input_file_name_len = %zu, input_file_name_max = %zu\n", par3_ctx->input_file_name_len, par3_ctx->input_file_name_max);
-	//printf("input_dir_name_len = %zu, input_dir_name_max = %zu\n", par3_ctx->input_dir_name_len, par3_ctx->input_dir_name_max);
-
-
-/*
-	// read OK ?
-	if (par3_ctx->noise_level >= 0){
-		if (par3_ctx->input_file_count > 0){
-			PAR3_FILE_CTX *file_p;
-
-			printf("\nNumber of input file = %u (chunk = %u)\n", par3_ctx->input_file_count, par3_ctx->chunk_count);
-			file_p = par3_ctx->input_file_list;
-			num = par3_ctx->input_file_count;
-			while (num > 0){
-				printf("input file = \"%s\", size = %"PRIu64"\n", file_p->name, file_p->size);
-				//printf("index of file = %u, index of the first chunk = %u\n", par3_ctx->input_file_count, file_p->chunk);
-
-				file_p++;
-				num--;
-			}
-		}
-		if (par3_ctx->input_dir_count > 0){
-			PAR3_DIR_CTX *dir_p;
-
-		printf("\nNumber of input directory = %u\n", par3_ctx->input_dir_count);
-			dir_p = par3_ctx->input_dir_list;
-			num = par3_ctx->input_dir_count;
-			while (num > 0){
-				printf("input dir  = \"%s\"\n", dir_p->name);
-
-				dir_p++;
-				num--;
-			}
-		}
-	}
-	printf("Done\n");
-*/
-
-	return 0;
-}
-
-// parse information in External Data Packets
-int parse_external_data_packet(PAR3_CTX *par3_ctx)
-{
-	uint8_t *tmp_p, *hash;
-	uint32_t num;
-	uint64_t block_count, packet_size, index, count;
-	PAR3_BLOCK_CTX *block_p, *block_list;
-
-	num = par3_ctx->ext_data_packet_count;
-	block_count = par3_ctx->block_count;
-	block_list = par3_ctx->block_list;
-	tmp_p = par3_ctx->ext_data_packet;
-	while (num > 0){
-		memcpy(&packet_size, tmp_p + 24, 8);
-		if (packet_size < 48 + 8 + 24){
-			printf("External Data Packet is too small.\n");
-			return RET_LOGIC_ERROR;
-		}
-		memcpy(&index, tmp_p + 48, 8);	// Index of the first input block
-		hash = tmp_p + 56;
-		count = packet_size - 56;
-		if (count % 24 != 0){
-			printf("External Data Packet for %"PRIu64" is bad.\n", index);
-			return RET_LOGIC_ERROR;
-		}
-		count /= 24;
-		if (index + count > block_count){
-			printf("External Data Packet for %"PRIu64" is too large (%"PRIu64").\n", index, count);
-			return RET_LOGIC_ERROR;
-		}
-
-		// set hash values for blocks
-		block_p = block_list + index;
-		while (count > 0){
-			memcpy(&(block_p->crc), hash, 8);
-			hash += 8;
-			memcpy(block_p->hash, hash, 16);
-			hash += 16;
-			block_p->state |= 64;	// mark of setting checksum for this block
-
-			block_p++;
-			count--;
-		}
-
-		tmp_p += packet_size;
-		num--;
-	}
-
-	if (par3_ctx->noise_level >= 1){
-		// Checksum for full size blocks is required for verification.
-		// But, checking complete file by file's hash may be possible.
-		for (index = 0; index < block_count; index++){
-			//printf("block[%2"PRIu64"] crc = 0x%016"PRIx64"\n", index, block_list[index].crc);
-			if ((block_list[index].state & (1 | 64)) == 1){
-				printf("Warning, checksum of input block[%"PRIu64"] doesn't exist.\n", index);
-			}
-		}
-	}
-
-	return 0;
-}
-
diff --git a/windows/src/par3cmdline.sln b/windows/src/par3cmdline.sln
deleted file mode 100644
index 3e8583b..0000000
--- a/windows/src/par3cmdline.sln
+++ /dev/null
@@ -1,31 +0,0 @@
-﻿
-Microsoft Visual Studio Solution File, Format Version 12.00
-# Visual Studio Version 17
-VisualStudioVersion = 17.3.32901.215
-MinimumVisualStudioVersion = 10.0.40219.1
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "par3cmdline", "par3cmdline.vcxproj", "{FEAA3BBB-6A6E-4EF3-B444-9F13E69A9795}"
-EndProject
-Global
-	GlobalSection(SolutionConfigurationPlatforms) = preSolution
-		Debug|x64 = Debug|x64
-		Debug|x86 = Debug|x86
-		Release|x64 = Release|x64
-		Release|x86 = Release|x86
-	EndGlobalSection
-	GlobalSection(ProjectConfigurationPlatforms) = postSolution
-		{FEAA3BBB-6A6E-4EF3-B444-9F13E69A9795}.Debug|x64.ActiveCfg = Debug|x64
-		{FEAA3BBB-6A6E-4EF3-B444-9F13E69A9795}.Debug|x64.Build.0 = Debug|x64
-		{FEAA3BBB-6A6E-4EF3-B444-9F13E69A9795}.Debug|x86.ActiveCfg = Debug|Win32
-		{FEAA3BBB-6A6E-4EF3-B444-9F13E69A9795}.Debug|x86.Build.0 = Debug|Win32
-		{FEAA3BBB-6A6E-4EF3-B444-9F13E69A9795}.Release|x64.ActiveCfg = Release|x64
-		{FEAA3BBB-6A6E-4EF3-B444-9F13E69A9795}.Release|x64.Build.0 = Release|x64
-		{FEAA3BBB-6A6E-4EF3-B444-9F13E69A9795}.Release|x86.ActiveCfg = Release|Win32
-		{FEAA3BBB-6A6E-4EF3-B444-9F13E69A9795}.Release|x86.Build.0 = Release|Win32
-	EndGlobalSection
-	GlobalSection(SolutionProperties) = preSolution
-		HideSolutionNode = FALSE
-	EndGlobalSection
-	GlobalSection(ExtensibilityGlobals) = postSolution
-		SolutionGuid = {F9C6C3D2-B15F-4A66-993D-29CDFBDA05B2}
-	EndGlobalSection
-EndGlobal
diff --git a/windows/src/par3cmdline.vcxproj b/windows/src/par3cmdline.vcxproj
deleted file mode 100644
index e5eae2b..0000000
--- a/windows/src/par3cmdline.vcxproj
+++ /dev/null
@@ -1,218 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<Project DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
-  <ItemGroup Label="ProjectConfigurations">
-    <ProjectConfiguration Include="Debug|Win32">
-      <Configuration>Debug</Configuration>
-      <Platform>Win32</Platform>
-    </ProjectConfiguration>
-    <ProjectConfiguration Include="Release|Win32">
-      <Configuration>Release</Configuration>
-      <Platform>Win32</Platform>
-    </ProjectConfiguration>
-    <ProjectConfiguration Include="Debug|x64">
-      <Configuration>Debug</Configuration>
-      <Platform>x64</Platform>
-    </ProjectConfiguration>
-    <ProjectConfiguration Include="Release|x64">
-      <Configuration>Release</Configuration>
-      <Platform>x64</Platform>
-    </ProjectConfiguration>
-  </ItemGroup>
-  <PropertyGroup Label="Globals">
-    <VCProjectVersion>16.0</VCProjectVersion>
-    <Keyword>Win32Proj</Keyword>
-    <ProjectGuid>{feaa3bbb-6a6e-4ef3-b444-9f13e69a9795}</ProjectGuid>
-    <RootNamespace>par3cmdline</RootNamespace>
-    <WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
-  </PropertyGroup>
-  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
-    <ConfigurationType>Application</ConfigurationType>
-    <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v143</PlatformToolset>
-    <CharacterSet>MultiByte</CharacterSet>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
-    <ConfigurationType>Application</ConfigurationType>
-    <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v143</PlatformToolset>
-    <WholeProgramOptimization>true</WholeProgramOptimization>
-    <CharacterSet>MultiByte</CharacterSet>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
-    <ConfigurationType>Application</ConfigurationType>
-    <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v143</PlatformToolset>
-    <CharacterSet>MultiByte</CharacterSet>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
-    <ConfigurationType>Application</ConfigurationType>
-    <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v143</PlatformToolset>
-    <WholeProgramOptimization>true</WholeProgramOptimization>
-    <CharacterSet>MultiByte</CharacterSet>
-  </PropertyGroup>
-  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
-  <ImportGroup Label="ExtensionSettings">
-  </ImportGroup>
-  <ImportGroup Label="Shared">
-  </ImportGroup>
-  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <PropertyGroup Label="UserMacros" />
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
-    <LinkIncremental>true</LinkIncremental>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
-    <LinkIncremental>false</LinkIncremental>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-    <GenerateManifest>false</GenerateManifest>
-    <TargetName>par3</TargetName>
-    <LinkIncremental>true</LinkIncremental>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
-    <TargetName>par3</TargetName>
-    <GenerateManifest>false</GenerateManifest>
-    <LinkIncremental>false</LinkIncremental>
-  </PropertyGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
-    <ClCompile>
-      <WarningLevel>Level3</WarningLevel>
-      <SDLCheck>true</SDLCheck>
-      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <ConformanceMode>true</ConformanceMode>
-    </ClCompile>
-    <Link>
-      <SubSystem>Console</SubSystem>
-      <GenerateDebugInformation>true</GenerateDebugInformation>
-    </Link>
-  </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
-    <ClCompile>
-      <WarningLevel>Level3</WarningLevel>
-      <FunctionLevelLinking>true</FunctionLevelLinking>
-      <IntrinsicFunctions>true</IntrinsicFunctions>
-      <SDLCheck>true</SDLCheck>
-      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <ConformanceMode>true</ConformanceMode>
-    </ClCompile>
-    <Link>
-      <SubSystem>Console</SubSystem>
-      <EnableCOMDATFolding>true</EnableCOMDATFolding>
-      <OptimizeReferences>true</OptimizeReferences>
-      <GenerateDebugInformation>true</GenerateDebugInformation>
-    </Link>
-  </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-    <ClCompile>
-      <WarningLevel>Level3</WarningLevel>
-      <SDLCheck>true</SDLCheck>
-      <PreprocessorDefinitions>_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <ConformanceMode>true</ConformanceMode>
-      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
-    </ClCompile>
-    <Link>
-      <SubSystem>Console</SubSystem>
-      <GenerateDebugInformation>false</GenerateDebugInformation>
-    </Link>
-  </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
-    <ClCompile>
-      <WarningLevel>Level3</WarningLevel>
-      <FunctionLevelLinking>true</FunctionLevelLinking>
-      <IntrinsicFunctions>true</IntrinsicFunctions>
-      <SDLCheck>true</SDLCheck>
-      <PreprocessorDefinitions>NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <ConformanceMode>true</ConformanceMode>
-      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
-    </ClCompile>
-    <Link>
-      <SubSystem>Console</SubSystem>
-      <EnableCOMDATFolding>true</EnableCOMDATFolding>
-      <OptimizeReferences>true</OptimizeReferences>
-      <GenerateDebugInformation>false</GenerateDebugInformation>
-      <LinkTimeCodeGeneration>UseFastLinkTimeCodeGeneration</LinkTimeCodeGeneration>
-    </Link>
-  </ItemDefinitionGroup>
-  <ItemGroup>
-    <ClCompile Include="blake3\blake3.c" />
-    <ClCompile Include="blake3\blake3_avx2.c" />
-    <ClCompile Include="blake3\blake3_avx512.c" />
-    <ClCompile Include="blake3\blake3_dispatch.c" />
-    <ClCompile Include="blake3\blake3_portable.c" />
-    <ClCompile Include="blake3\blake3_sse2.c" />
-    <ClCompile Include="blake3\blake3_sse41.c" />
-    <ClCompile Include="block_check.c" />
-    <ClCompile Include="block_create.c" />
-    <ClCompile Include="block_map.c" />
-    <ClCompile Include="block_recover.c" />
-    <ClCompile Include="common.c" />
-    <ClCompile Include="file.c" />
-    <ClCompile Include="galois16.c" />
-    <ClCompile Include="galois8.c" />
-    <ClCompile Include="inside_zip.c" />
-    <ClCompile Include="leopard\leopard.cpp" />
-    <ClCompile Include="leopard\LeopardCommon.cpp" />
-    <ClCompile Include="leopard\LeopardFF16.cpp" />
-    <ClCompile Include="leopard\LeopardFF8.cpp" />
-    <ClCompile Include="libpar3_create.c" />
-    <ClCompile Include="libpar3_extra.c" />
-    <ClCompile Include="libpar3_inside.c" />
-    <ClCompile Include="libpar3_verify.c" />
-    <ClCompile Include="map.c" />
-    <ClCompile Include="map_inside.c" />
-    <ClCompile Include="map_simple.c" />
-    <ClCompile Include="map_slide.c" />
-    <ClCompile Include="hash.c" />
-    <ClCompile Include="libpar3.c" />
-    <ClCompile Include="main.c" />
-    <ClCompile Include="packet_add.c" />
-    <ClCompile Include="packet_make.c" />
-    <ClCompile Include="packet_parse.c" />
-    <ClCompile Include="read.c" />
-    <ClCompile Include="reedsolomon.c" />
-    <ClCompile Include="reedsolomon16.c" />
-    <ClCompile Include="reedsolomon8.c" />
-    <ClCompile Include="repair.c" />
-    <ClCompile Include="verify.c" />
-    <ClCompile Include="verify_check.c" />
-    <ClCompile Include="write_inside.c" />
-    <ClCompile Include="write_trial.c" />
-    <ClCompile Include="write.c" />
-  </ItemGroup>
-  <ItemGroup>
-    <ClInclude Include="blake3\blake3.h" />
-    <ClInclude Include="blake3\blake3_impl.h" />
-    <ClInclude Include="block.h" />
-    <ClInclude Include="common.h" />
-    <ClInclude Include="file.h" />
-    <ClInclude Include="inside.h" />
-    <ClInclude Include="leopard\leopard.h" />
-    <ClInclude Include="leopard\LeopardCommon.h" />
-    <ClInclude Include="leopard\LeopardFF16.h" />
-    <ClInclude Include="leopard\LeopardFF8.h" />
-    <ClInclude Include="map.h" />
-    <ClInclude Include="hash.h" />
-    <ClInclude Include="libpar3.h" />
-    <ClInclude Include="packet.h" />
-    <ClInclude Include="read.h" />
-    <ClInclude Include="reedsolomon.h" />
-    <ClInclude Include="repair.h" />
-    <ClInclude Include="verify.h" />
-    <ClInclude Include="write.h" />
-  </ItemGroup>
-  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
-  <ImportGroup Label="ExtensionTargets">
-  </ImportGroup>
-</Project>
\ No newline at end of file
diff --git a/windows/src/par3cmdline.vcxproj.filters b/windows/src/par3cmdline.vcxproj.filters
deleted file mode 100644
index 1b56d05..0000000
--- a/windows/src/par3cmdline.vcxproj.filters
+++ /dev/null
@@ -1,222 +0,0 @@
-﻿<?xml version="1.0" encoding="utf-8"?>
-<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
-  <ItemGroup>
-    <Filter Include="ソース ファイル">
-      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
-      <Extensions>cpp;c;cc;cxx;c++;cppm;ixx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
-    </Filter>
-    <Filter Include="ヘッダー ファイル">
-      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
-      <Extensions>h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd</Extensions>
-    </Filter>
-    <Filter Include="リソース ファイル">
-      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
-      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
-    </Filter>
-    <Filter Include="ソース ファイル\blake3">
-      <UniqueIdentifier>{f8e04432-ac2d-4fb7-86c5-ecfd5d89c909}</UniqueIdentifier>
-    </Filter>
-    <Filter Include="ヘッダー ファイル\blake3">
-      <UniqueIdentifier>{4d183ad8-05e1-45ae-983b-837f0437b8e4}</UniqueIdentifier>
-    </Filter>
-    <Filter Include="ソース ファイル\leopard">
-      <UniqueIdentifier>{f560bf0f-2468-4c3e-bf05-7c6caa32666a}</UniqueIdentifier>
-    </Filter>
-    <Filter Include="ヘッダー ファイル\leopard">
-      <UniqueIdentifier>{8e61fd08-3522-4964-a13d-b53bc20a0ab2}</UniqueIdentifier>
-    </Filter>
-  </ItemGroup>
-  <ItemGroup>
-    <ClCompile Include="common.c">
-      <Filter>ソース ファイル</Filter>
-    </ClCompile>
-    <ClCompile Include="libpar3.c">
-      <Filter>ソース ファイル</Filter>
-    </ClCompile>
-    <ClCompile Include="main.c">
-      <Filter>ソース ファイル</Filter>
-    </ClCompile>
-    <ClCompile Include="blake3\blake3.c">
-      <Filter>ソース ファイル\blake3</Filter>
-    </ClCompile>
-    <ClCompile Include="hash.c">
-      <Filter>ソース ファイル</Filter>
-    </ClCompile>
-    <ClCompile Include="map.c">
-      <Filter>ソース ファイル</Filter>
-    </ClCompile>
-    <ClCompile Include="packet_make.c">
-      <Filter>ソース ファイル</Filter>
-    </ClCompile>
-    <ClCompile Include="write.c">
-      <Filter>ソース ファイル</Filter>
-    </ClCompile>
-    <ClCompile Include="map_slide.c">
-      <Filter>ソース ファイル</Filter>
-    </ClCompile>
-    <ClCompile Include="map_simple.c">
-      <Filter>ソース ファイル</Filter>
-    </ClCompile>
-    <ClCompile Include="write_trial.c">
-      <Filter>ソース ファイル</Filter>
-    </ClCompile>
-    <ClCompile Include="read.c">
-      <Filter>ソース ファイル</Filter>
-    </ClCompile>
-    <ClCompile Include="packet_add.c">
-      <Filter>ソース ファイル</Filter>
-    </ClCompile>
-    <ClCompile Include="packet_parse.c">
-      <Filter>ソース ファイル</Filter>
-    </ClCompile>
-    <ClCompile Include="block_map.c">
-      <Filter>ソース ファイル</Filter>
-    </ClCompile>
-    <ClCompile Include="verify.c">
-      <Filter>ソース ファイル</Filter>
-    </ClCompile>
-    <ClCompile Include="verify_check.c">
-      <Filter>ソース ファイル</Filter>
-    </ClCompile>
-    <ClCompile Include="libpar3_create.c">
-      <Filter>ソース ファイル</Filter>
-    </ClCompile>
-    <ClCompile Include="libpar3_verify.c">
-      <Filter>ソース ファイル</Filter>
-    </ClCompile>
-    <ClCompile Include="block_check.c">
-      <Filter>ソース ファイル</Filter>
-    </ClCompile>
-    <ClCompile Include="galois8.c">
-      <Filter>ソース ファイル</Filter>
-    </ClCompile>
-    <ClCompile Include="block_create.c">
-      <Filter>ソース ファイル</Filter>
-    </ClCompile>
-    <ClCompile Include="reedsolomon.c">
-      <Filter>ソース ファイル</Filter>
-    </ClCompile>
-    <ClCompile Include="repair.c">
-      <Filter>ソース ファイル</Filter>
-    </ClCompile>
-    <ClCompile Include="reedsolomon8.c">
-      <Filter>ソース ファイル</Filter>
-    </ClCompile>
-    <ClCompile Include="block_recover.c">
-      <Filter>ソース ファイル</Filter>
-    </ClCompile>
-    <ClCompile Include="galois16.c">
-      <Filter>ソース ファイル</Filter>
-    </ClCompile>
-    <ClCompile Include="reedsolomon16.c">
-      <Filter>ソース ファイル</Filter>
-    </ClCompile>
-    <ClCompile Include="leopard\leopard.cpp">
-      <Filter>ソース ファイル\leopard</Filter>
-    </ClCompile>
-    <ClCompile Include="leopard\LeopardCommon.cpp">
-      <Filter>ソース ファイル\leopard</Filter>
-    </ClCompile>
-    <ClCompile Include="leopard\LeopardFF16.cpp">
-      <Filter>ソース ファイル\leopard</Filter>
-    </ClCompile>
-    <ClCompile Include="leopard\LeopardFF8.cpp">
-      <Filter>ソース ファイル\leopard</Filter>
-    </ClCompile>
-    <ClCompile Include="file.c">
-      <Filter>ソース ファイル</Filter>
-    </ClCompile>
-    <ClCompile Include="blake3\blake3_avx2.c">
-      <Filter>ソース ファイル\blake3</Filter>
-    </ClCompile>
-    <ClCompile Include="blake3\blake3_avx512.c">
-      <Filter>ソース ファイル\blake3</Filter>
-    </ClCompile>
-    <ClCompile Include="blake3\blake3_dispatch.c">
-      <Filter>ソース ファイル\blake3</Filter>
-    </ClCompile>
-    <ClCompile Include="blake3\blake3_sse2.c">
-      <Filter>ソース ファイル\blake3</Filter>
-    </ClCompile>
-    <ClCompile Include="blake3\blake3_sse41.c">
-      <Filter>ソース ファイル\blake3</Filter>
-    </ClCompile>
-    <ClCompile Include="blake3\blake3_portable.c">
-      <Filter>ソース ファイル\blake3</Filter>
-    </ClCompile>
-    <ClCompile Include="libpar3_extra.c">
-      <Filter>ソース ファイル</Filter>
-    </ClCompile>
-    <ClCompile Include="libpar3_inside.c">
-      <Filter>ソース ファイル</Filter>
-    </ClCompile>
-    <ClCompile Include="inside_zip.c">
-      <Filter>ソース ファイル</Filter>
-    </ClCompile>
-    <ClCompile Include="map_inside.c">
-      <Filter>ソース ファイル</Filter>
-    </ClCompile>
-    <ClCompile Include="write_inside.c">
-      <Filter>ソース ファイル</Filter>
-    </ClCompile>
-  </ItemGroup>
-  <ItemGroup>
-    <ClInclude Include="libpar3.h">
-      <Filter>ヘッダー ファイル</Filter>
-    </ClInclude>
-    <ClInclude Include="common.h">
-      <Filter>ヘッダー ファイル</Filter>
-    </ClInclude>
-    <ClInclude Include="blake3\blake3.h">
-      <Filter>ヘッダー ファイル\blake3</Filter>
-    </ClInclude>
-    <ClInclude Include="blake3\blake3_impl.h">
-      <Filter>ヘッダー ファイル\blake3</Filter>
-    </ClInclude>
-    <ClInclude Include="hash.h">
-      <Filter>ヘッダー ファイル</Filter>
-    </ClInclude>
-    <ClInclude Include="map.h">
-      <Filter>ヘッダー ファイル</Filter>
-    </ClInclude>
-    <ClInclude Include="packet.h">
-      <Filter>ヘッダー ファイル</Filter>
-    </ClInclude>
-    <ClInclude Include="write.h">
-      <Filter>ヘッダー ファイル</Filter>
-    </ClInclude>
-    <ClInclude Include="read.h">
-      <Filter>ヘッダー ファイル</Filter>
-    </ClInclude>
-    <ClInclude Include="block.h">
-      <Filter>ヘッダー ファイル</Filter>
-    </ClInclude>
-    <ClInclude Include="verify.h">
-      <Filter>ヘッダー ファイル</Filter>
-    </ClInclude>
-    <ClInclude Include="reedsolomon.h">
-      <Filter>ヘッダー ファイル</Filter>
-    </ClInclude>
-    <ClInclude Include="repair.h">
-      <Filter>ヘッダー ファイル</Filter>
-    </ClInclude>
-    <ClInclude Include="leopard\leopard.h">
-      <Filter>ヘッダー ファイル\leopard</Filter>
-    </ClInclude>
-    <ClInclude Include="leopard\LeopardCommon.h">
-      <Filter>ヘッダー ファイル\leopard</Filter>
-    </ClInclude>
-    <ClInclude Include="leopard\LeopardFF16.h">
-      <Filter>ヘッダー ファイル\leopard</Filter>
-    </ClInclude>
-    <ClInclude Include="leopard\LeopardFF8.h">
-      <Filter>ヘッダー ファイル\leopard</Filter>
-    </ClInclude>
-    <ClInclude Include="file.h">
-      <Filter>ヘッダー ファイル</Filter>
-    </ClInclude>
-    <ClInclude Include="inside.h">
-      <Filter>ヘッダー ファイル</Filter>
-    </ClInclude>
-  </ItemGroup>
-</Project>
\ No newline at end of file
diff --git a/windows/src/par3cmdline.vcxproj.user b/windows/src/par3cmdline.vcxproj.user
deleted file mode 100644
index dc63f8a..0000000
--- a/windows/src/par3cmdline.vcxproj.user
+++ /dev/null
@@ -1,6 +0,0 @@
-﻿<?xml version="1.0" encoding="utf-8"?>
-<Project ToolsVersion="Current" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
-  <PropertyGroup>
-    <ShowAllFiles>false</ShowAllFiles>
-  </PropertyGroup>
-</Project>
\ No newline at end of file
diff --git a/windows/src/read.c b/windows/src/read.c
deleted file mode 100644
index b9e1695..0000000
--- a/windows/src/read.c
+++ /dev/null
@@ -1,369 +0,0 @@
-/* Redefinition of _FILE_OFFSET_BITS must happen BEFORE including stdio.h */
-#ifdef __linux__
-#define _FILE_OFFSET_BITS 64
-#define _fileno fileno
-#elif _WIN32
-// avoid error of MSVC
-#define _CRT_SECURE_NO_WARNINGS
-#endif
-
-#include <errno.h>
-#include <inttypes.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#ifdef __linux__
-
-#include <sys/stat.h>
-
-#elif _WIN32
-
-// MSVC headers
-#include <io.h>
-
-#endif
-
-#include "blake3/blake3.h"
-#include "libpar3.h"
-#include "common.h"
-#include "hash.h"
-#include "packet.h"
-#include "file.h"
-
-
-int read_packet(PAR3_CTX *par3_ctx)
-{
-	char *namez, packet_type[9];
-	uint8_t *buf, buf_hash[16];
-	int ret;
-	size_t namez_len, namez_off;
-	size_t buf_size, read_size, max, offset;
-	uint64_t file_size, file_offset, packet_size;
-	uint64_t packet_count, new_packet_count;
-	FILE *fp;
-
-	//for debug
-	//par3_ctx->memory_limit = 1024;
-
-	packet_type[8] = 0;	// Set null string.
-
-	// Allocate buffer to keep PAR file.
-	buf_size = par3_ctx->max_file_size;
-	if ( (par3_ctx->memory_limit != 0) && (buf_size > par3_ctx->memory_limit) ){
-		buf_size = par3_ctx->memory_limit;	// multiple of MB
-		// When buffer size is 1 MB, readable minimum packet size becomes 1 MB, too.
-		// So, a user should not set small limit.
-	}
-	if (par3_ctx->noise_level >= 2){
-		printf("buffer size for PAR files = %"PRIu64"\n", buf_size);
-	}
-	buf = malloc(buf_size);
-	if (buf == NULL){
-		perror("Failed to allocate memory for PAR files");
-		return RET_MEMORY_ERROR;
-	}
-	par3_ctx->work_buf = buf;
-
-	namez = par3_ctx->par_file_name;
-	namez_len = par3_ctx->par_file_name_len;
-	namez_off = 0;
-	while (namez_off < namez_len){
-		if (par3_ctx->noise_level >= -1){
-			printf("Loading \"%s\".\n", namez + namez_off);
-		}
-
-		fp = fopen(namez + namez_off, "rb");
-		if (fp == NULL){
-			printf("Failed to open \"%s\", skip to next file.\n", namez + namez_off);
-			namez_off += strlen(namez + namez_off) + 1;
-			continue;
-		}
-
-		// get file size
-		file_size = _filelengthi64(_fileno(fp));
-
-		// Read file data at first.
-		read_size = buf_size;
-		if (file_size < buf_size)
-			read_size = file_size;
-		//printf("file data = %"PRIu64", read_size = %zu, remain = %zu\n", file_size, read_size, file_size - read_size);
-		if (fread(buf, 1, read_size, fp) != read_size){
-			printf("Failed to read \"%s\", skip to next file.\n", namez + namez_off);
-			namez_off += strlen(namez + namez_off) + 1;
-			fclose(fp);
-			continue;
-		}
-		file_size -= read_size;
-		max = read_size;
-
-		file_offset = 0;
-		packet_count = 0;
-		new_packet_count = 0;
-		offset = 0;
-		while (offset + 48 < max){
-			if (memcmp(buf + offset, "PAR3\0PKT", 8) == 0){	// check Magic sequence
-				// read packet size
-				memcpy(&packet_size, buf + (offset + 24), 8);
-				if (packet_size <= 48){	// If packet is too small, just ignore it.
-					offset += 8;
-					continue;
-				}
-				if (offset + packet_size > buf_size + file_size){	// If not enough data, ignore the packet.
-					offset += 8;
-					continue;
-				}
-				if (packet_size > buf_size){	// If packet is larger than buffer, show error and continue.
-					if (par3_ctx->noise_level >= 1){
-						memcpy(packet_type, buf + (offset + 40), 8);
-						printf("Warning, packet is too large. size = %"PRIu64", type = %s\n", packet_size, packet_type);
-					}
-					offset += 8;
-					continue;
-				}
-				// If packet exceeds buffer, read more bytes.
-				if (offset + packet_size > buf_size){
-					read_size = offset;
-					if (read_size > file_size)
-						read_size = file_size;
-
-					// slide data to top
-					memmove(buf, buf + offset, buf_size - offset);
-					//printf("file data = %"PRIu64", offset = %zu, read_size = %zu, ", file_size, offset, read_size);
-					if (fread(buf + buf_size - offset, 1, read_size, fp) != read_size){
-						printf("Failed to read \"%s\", skip to next file.\n", namez + namez_off);
-						namez_off += strlen(namez + namez_off) + 1;
-						fclose(fp);
-						continue;
-					}
-					file_size -= read_size;
-					max = buf_size - offset + read_size;
-					//printf("remain = %"PRIu64", max = %zu\n", file_size, max);
-					file_offset += offset;
-					offset = 0;
-				}
-
-				// check fingerprint hash of the packet
-				blake3(buf + (offset + 24), packet_size - 24, buf_hash);
-				if (memcmp(buf + (offset + 8), buf_hash, 16) != 0){
-					// If checksum is different, ignore the packet.
-					offset += 8;
-					continue;
-				}
-				packet_count++;
-
-				// read packet type
-				memcpy(packet_type, buf + (offset + 40), 8);
-				if (par3_ctx->noise_level >= 3){
-					printf("offset =%6"PRIu64", size =%5"PRIu64", type = %s\n", file_offset + offset, packet_size, packet_type);
-				}
-
-				// store the found packet
-				ret = add_found_packet(par3_ctx, buf + offset);
-				if (ret == -2){
-					ret = list_found_packet(par3_ctx, buf + offset, namez + namez_off, file_offset + offset);
-				}
-				if (ret > 0){
-					fclose(fp);
-					return ret;
-				} else if (ret == 0){
-					new_packet_count++;
-				}
-
-				offset += packet_size;
-			} else {
-				offset++;
-			}
-
-			if ( (file_size > 0) && (offset + 48 >= max) ){	// read more bytes
-				read_size = offset;
-				if (read_size > file_size)
-					read_size = file_size;
-
-				// slide data to top
-				memmove(buf, buf + offset, buf_size - offset);
-				//printf("file_size = %"PRIu64", offset = %zu, read_size = %zu, ", file_size, offset, read_size);
-				if (fread(buf + buf_size - offset, 1, read_size, fp) != read_size){
-					printf("Failed to read \"%s\", skip to next file.\n", namez + namez_off);
-					namez_off += strlen(namez + namez_off) + 1;
-					fclose(fp);
-					continue;
-				}
-				file_size -= read_size;
-				max = buf_size - offset + read_size;
-				//printf("remain = %"PRIu64", max = %zu\n", file_size, max);
-				file_offset += offset;
-				offset = 0;
-			}
-		}
-
-		if (fclose(fp) != 0){
-			printf("Failed to close \"%s\", skip to next file.\n", namez + namez_off);
-			namez_off += strlen(namez + namez_off) + 1;
-			continue;
-		}
-
-		if (par3_ctx->noise_level >= 0){
-			printf("Loaded %"PRIu64" new packets (found %"PRIu64" packets)\n", new_packet_count, packet_count);
-		}
-
-		namez_off += strlen(namez + namez_off) + 1;
-	}
-	free(buf);
-	par3_ctx->work_buf = NULL;
-
-	if (par3_ctx->noise_level >= 2){
-		printf("\nTotal packet:\n");
-		if (par3_ctx->creator_packet_count > 0)
-			printf("Number of Creator Packet       =%3u (%4"PRId64" bytes)\n", par3_ctx->creator_packet_count, par3_ctx->creator_packet_size);
-		if (par3_ctx->comment_packet_count > 0)
-			printf("Number of Comment Packet       =%3u (%4"PRId64" bytes)\n", par3_ctx->comment_packet_count, par3_ctx->comment_packet_size);
-		if (par3_ctx->start_packet_count > 0)
-			printf("Number of Start Packet         =%3u (%4"PRId64" bytes)\n", par3_ctx->start_packet_count, par3_ctx->start_packet_size);
-		if (par3_ctx->matrix_packet_count > 0)
-			printf("Number of Matrix Packet        =%3u (%4"PRId64" bytes)\n", par3_ctx->matrix_packet_count, par3_ctx->matrix_packet_size);
-		if (par3_ctx->file_packet_count > 0)
-			printf("Number of File Packet          =%3u (%4"PRId64" bytes)\n", par3_ctx->file_packet_count, par3_ctx->file_packet_size);
-		if (par3_ctx->dir_packet_count > 0)
-			printf("Number of Directory Packet     =%3u (%4"PRId64" bytes)\n", par3_ctx->dir_packet_count, par3_ctx->dir_packet_size);
-		if (par3_ctx->root_packet_count > 0)
-			printf("Number of Root Packet          =%3u (%4"PRId64" bytes)\n", par3_ctx->root_packet_count, par3_ctx->root_packet_size);
-		if (par3_ctx->file_system_packet_count > 0)
-			printf("Number of File System Packet   =%3u (%4"PRId64" bytes)\n", par3_ctx->file_system_packet_count, par3_ctx->file_system_packet_size);
-		if (par3_ctx->ext_data_packet_count > 0)
-			printf("Number of External Data Packet =%3u (%4"PRId64" bytes)\n", par3_ctx->ext_data_packet_count, par3_ctx->ext_data_packet_size);
-		if (par3_ctx->data_packet_count > 0)
-			printf("Number of Data Packet          =%3"PRIu64"\n", par3_ctx->data_packet_count);
-		if (par3_ctx->recv_packet_count > 0)
-			printf("Number of Recovery Data Packet =%3"PRIu64"\n", par3_ctx->recv_packet_count);
-	}
-	ret = check_packet_set(par3_ctx);
-	if (ret != 0)
-		return ret;
-
-	return 0;
-}
-
-void show_read_result(PAR3_CTX *par3_ctx, int flag_detail)
-{
-	uint32_t num;
-
-	if (par3_ctx->input_file_count > 0){
-		PAR3_FILE_CTX *file_p;
-
-		num = (uint32_t)namez_maxlen(par3_ctx->input_file_name, par3_ctx->input_file_name_len);
-		if (num < 8)
-			num = 8;
-		printf("\n");
-		if (flag_detail == 0){
-			if (num > 119)
-				num = 119;	// max characters per line
-			printf(" File (%d)\n", par3_ctx->input_file_count);
-			printf(" ");
-		} else if (flag_detail == 1){
-			if (num > 104)
-				num = 104;	// 119 - 15 = 104
-			printf(" Size (Bytes)  File (%d)\n", par3_ctx->input_file_count);
-			printf(" ------------  ");
-		} else {
-			if (num > 71)
-				num = 71;	// 119 - 48 = 71
-			printf(" Size (Bytes)            BLAKE3 Hash            File (%d)\n", par3_ctx->input_file_count);
-			printf(" ------------ --------------------------------  ");
-		}
-		while (num > 0){
-			printf("-");
-			num--;
-		}
-		printf("\n");
-
-		file_p = par3_ctx->input_file_list;
-		num = par3_ctx->input_file_count;
-		while (num > 0){
-			if (flag_detail == 0){
-				printf("\"%s\"\n", file_p->name);
-			} else if (flag_detail == 1){
-				printf("%13"PRIu64" \"%s\"\n", file_p->size, file_p->name);
-			} else {
-				printf("%13"PRIu64" ", file_p->size);
-				printf("%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x ",
-					file_p->hash[0], file_p->hash[1], file_p->hash[2], file_p->hash[3],
-					file_p->hash[4], file_p->hash[5], file_p->hash[6], file_p->hash[7],
-					file_p->hash[8], file_p->hash[9], file_p->hash[10], file_p->hash[11],
-					file_p->hash[12], file_p->hash[13], file_p->hash[14], file_p->hash[15]);
-				printf("\"%s\"\n", file_p->name);
-
-				if (par3_ctx->file_system & 0x10003){	// UNIX Permissions Packet or FAT Permissions Packet
-					//printf("offset of File Packet = %"PRId64"\n", file_p->offset);
-					read_file_system_option(par3_ctx, 1, file_p->offset);
-				}
-			}
-			//printf("index of file = %u, index of the first chunk = %u\n", par3_ctx->input_file_count, file_p->chunk);
-
-			file_p++;
-			num--;
-		}
-	}
-	if (par3_ctx->input_dir_count > 0){
-		PAR3_DIR_CTX *dir_p;
-
-		num = (uint32_t)namez_maxlen(par3_ctx->input_dir_name, par3_ctx->input_dir_name_len);
-		if (num < 13)
-			num = 13;
-		if (num > 119)
-			num = 119;	// max 120 characters per line
-		printf("\n");
-		printf(" Directory (%d)\n", par3_ctx->input_dir_count);
-		printf(" ");
-		while (num > 0){
-			printf("-");
-			num--;
-		}
-		printf("\n");
-
-		dir_p = par3_ctx->input_dir_list;
-		num = par3_ctx->input_dir_count;
-		while (num > 0){
-			printf("\"%s\"\n", dir_p->name);
-
-			if ( ((par3_ctx->file_system & 4) != 0) && ((par3_ctx->file_system & 3) != 0) ){	// UNIX Permissions Packet
-				//printf("offset of Directory Packet = %"PRId64"\n", dir_p->offset);
-				read_file_system_option(par3_ctx, 2, dir_p->offset);
-			}
-
-			dir_p++;
-			num--;
-		}
-	}
-	printf("\n");
-}
-
-void show_data_size(PAR3_CTX *par3_ctx)
-{
-	uint32_t num;
-	uint64_t max_size, total_size;
-	PAR3_FILE_CTX *file_p;
-
-	if (par3_ctx->input_file_count == 0)
-		return;
-
-	max_size = 0;
-	total_size = 0;
-	file_p = par3_ctx->input_file_list;
-	num = par3_ctx->input_file_count;
-	while (num > 0){
-		total_size += file_p->size;
-		if (max_size < file_p->size)
-			max_size = file_p->size;
-
-		file_p++;
-		num--;
-	}
-
-	par3_ctx->total_file_size = total_size;
-	par3_ctx->max_file_size = max_size;
-
-	printf("Total file size = %"PRIu64"\n", total_size);
-	printf("Max file size = %"PRIu64"\n", max_size);
-}
-
diff --git a/windows/src/read.h b/windows/src/read.h
deleted file mode 100644
index 3243617..0000000
--- a/windows/src/read.h
+++ /dev/null
@@ -1,6 +0,0 @@
-
-int read_packet(PAR3_CTX *par3_ctx);
-
-void show_read_result(PAR3_CTX *par3_ctx, int flag_detail);
-void show_data_size(PAR3_CTX *par3_ctx);
-
diff --git a/windows/src/reedsolomon.c b/windows/src/reedsolomon.c
deleted file mode 100644
index e56749c..0000000
--- a/windows/src/reedsolomon.c
+++ /dev/null
@@ -1,310 +0,0 @@
-#include <errno.h>
-#include <inttypes.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <time.h>
-
-#include "libpar3.h"
-#include "galois.h"
-#include "hash.h"
-#include "reedsolomon.h"
-
-
-// Create all recovery blocks from one input block.
-void rs_create_one_all(PAR3_CTX *par3_ctx, int x_index)
-{
-	void *gf_table;
-	uint8_t *work_buf, *buf_p;
-	uint8_t gf_size;
-	int first_num, element;
-	int y_index, y_R;
-	int recovery_block_count;
-	size_t region_size;
-
-	recovery_block_count = (int)(par3_ctx->recovery_block_count);
-	first_num = (int)(par3_ctx->first_recovery_block);
-	gf_size = par3_ctx->gf_size;
-	gf_table = par3_ctx->galois_table;
-	work_buf = par3_ctx->work_buf;
-	buf_p = par3_ctx->block_data;
-
-	// For every recovery block
-	region_size = (par3_ctx->block_size + 4 + 3) & ~3;
-	for (y_index = 0; y_index < recovery_block_count; y_index++){
-		// Calculate Matrix elements
-		if (par3_ctx->gf_size == 2){	// 16-bit Galois Field
-			y_R = 65535 - (y_index + first_num);
-			element = gf16_reciprocal(gf_table, x_index ^ y_R);	// inv( x_index ^ y_R )
-
-			// If x_index == 0, just put values.
-			// If x_index > 0, add values on previous values.
-			gf16_region_multiply(gf_table, work_buf, element, region_size, buf_p, x_index);
-
-		} else {	// 8-bit Galois Field
-			y_R = 255 - (y_index + first_num);
-			element = gf8_reciprocal(gf_table, x_index ^ y_R);	// inv( x_index ^ y_R )
-
-			// If x_index == 0, just put values.
-			// If x_index > 0, add values on previous values.
-			gf8_region_multiply(gf_table, work_buf, element, region_size, buf_p, x_index);
-		}
-		//printf("x = %d, R = %d, y_R = %d, element = %d\n", x_index, y_index + first_num, y_R, element);
-
-		buf_p += region_size;
-	}
-}
-
-// Create all recovery blocks from all input blocks.
-void rs_create_all(PAR3_CTX *par3_ctx, size_t region_size, uint64_t progress_total, uint64_t progress_step)
-{
-	void *gf_table;
-	uint8_t *block_data, *input_p, *recv_p;
-	uint8_t gf_size;
-	int first_num, element;
-	int x_index, y_index, y_R;
-	int block_count, recovery_block_count;
-	int progress_old, progress_now;
-	time_t time_old, time_now;
-
-	block_count = (int)(par3_ctx->block_count);
-	recovery_block_count = (int)(par3_ctx->recovery_block_count);
-	first_num = (int)(par3_ctx->first_recovery_block);
-	gf_size = par3_ctx->gf_size;
-	gf_table = par3_ctx->galois_table;
-	block_data = par3_ctx->block_data;
-	recv_p = block_data + region_size * block_count;
-
-	if ( (par3_ctx->noise_level >= 0) && (par3_ctx->noise_level <= 1) ){
-		progress_old = 0;
-		time_old = time(NULL);
-	}
-
-	// For every recovery block
-	for (y_index = 0; y_index < recovery_block_count; y_index++){
-		input_p = block_data;
-
-		// For every input block
-		for (x_index = 0; x_index < block_count; x_index++){
-
-			// Calculate Matrix elements
-			if (par3_ctx->gf_size == 2){	// 16-bit Galois Field
-				y_R = 65535 - (y_index + first_num);
-				element = gf16_reciprocal(gf_table, x_index ^ y_R);	// inv( x_index ^ y_R )
-
-				// If x_index == 0, just put values.
-				// If x_index > 0, add values on previous values.
-				gf16_region_multiply(gf_table, input_p, element, region_size, recv_p, x_index);
-
-			} else {	// 8-bit Galois Field
-				y_R = 255 - (y_index + first_num);
-				element = gf8_reciprocal(gf_table, x_index ^ y_R);	// inv( x_index ^ y_R )
-
-				// If x_index == 0, just put values.
-				// If x_index > 0, add values on previous values.
-				gf8_region_multiply(gf_table, input_p, element, region_size, recv_p, x_index);
-			}
-			//printf("x = %d, R = %d, y_R = %d, element = %d\n", x_index, y_index + first_num, y_R, element);
-
-			input_p += region_size;
-		}
-
-		// Print progress percent
-		if ( (par3_ctx->noise_level >= 0) && (par3_ctx->noise_level <= 1) ){
-			progress_step += block_count;
-			time_now = time(NULL);
-			if (time_now != time_old){
-				time_old = time_now;
-				progress_now = (int)((progress_step * 1000) / progress_total);
-				if (progress_now != progress_old){
-					progress_old = progress_now;
-					printf("%d.%d%%\r", progress_now / 10, progress_now % 10);	// 0.0% ~ 100.0%
-				}
-			}
-		}
-
-		recv_p += region_size;
-	}
-}
-
-
-// Construct matrix for Cauchy Reed-Solomon, and solve linear equation.
-int rs_compute_matrix(PAR3_CTX *par3_ctx, uint64_t lost_count)
-{
-	int ret;
-	size_t alloc_size, region_size;
-
-	// Only when it uses Reed-Solomon Erasure Codes.
-	if ((par3_ctx->ecc_method & 1) == 0)
-		return RET_LOGIC_ERROR;
-
-	if (par3_ctx->gf_size == 2){	// 16-bit Galois Field
-		par3_ctx->galois_table = gf16_create_table(par3_ctx->galois_poly);
-
-	} else if (par3_ctx->gf_size == 1){	// 8-bit Galois Field
-		par3_ctx->galois_table = gf8_create_table(par3_ctx->galois_poly);
-
-	} else {
-		printf("Galois Field (0x%X) isn't supported.\n", par3_ctx->galois_poly);
-		return RET_LOGIC_ERROR;
-	}
-	if (par3_ctx->galois_table == NULL){
-		printf("Failed to create tables for Galois Field (0x%X)\n", par3_ctx->galois_poly);
-		return RET_MEMORY_ERROR;
-	}
-
-	// Make matrix
-	if (par3_ctx->gf_size == 2){	// 16-bit Reed-Solomon Codes
-		// Either functions should work.
-		// As blocks are more, Gaussian elimination become too slow.
-		//ret = rs16_gaussian_elimination(par3_ctx, (int)lost_count);
-		ret = rs16_invert_matrix_cauchy(par3_ctx, (int)lost_count);
-		if (ret != 0)
-			return ret;
-
-	} else if (par3_ctx->gf_size == 1){	// 8-bit Reed-Solomon Codes
-		// Either functions should work.
-		// Gaussian elimination is enough fast for a few blocks.
-		ret = rs8_gaussian_elimination(par3_ctx, (int)lost_count);
-		//ret = rs8_invert_matrix_cauchy(par3_ctx, (int)lost_count);
-		if (ret != 0)
-			return ret;
-	}
-
-	// Set memory alignment of block data to be 4.
-	// Increase at least 1 byte as checksum.
-	region_size = (par3_ctx->block_size + 4 + 3) & ~3;
-
-	// Limited memory usage
-	alloc_size = region_size * lost_count;
-	if ( (par3_ctx->memory_limit > 0) && (alloc_size > par3_ctx->memory_limit) )
-		return 0;
-
-	// Allocate memory to keep lost blocks
-	par3_ctx->block_data = malloc(alloc_size);
-	//par3_ctx->block_data = NULL;	// For testing another method
-	if (par3_ctx->block_data != NULL){
-		par3_ctx->ecc_method |= 0x8000;	// Keep all lost blocks on memory
-		if (par3_ctx->noise_level >= 2){
-			printf("\nAligned size of block data = %zu\n", region_size);
-			printf("Keep all lost blocks on memory (%zu * %"PRIu64" = %zu)\n", region_size, lost_count, alloc_size);
-		}
-	}
-
-	return 0;
-}
-
-// Recover all lost input blocks from one block.
-void rs_recover_one_all(PAR3_CTX *par3_ctx, int x_index, int lost_count)
-{
-	void *gf_table, *matrix;
-	uint8_t *work_buf, *buf_p;
-	uint8_t gf_size;
-	int y_index, factor;
-	int block_count;
-	size_t region_size;
-
-	block_count = (int)(par3_ctx->block_count);
-	gf_size = par3_ctx->gf_size;
-	gf_table = par3_ctx->galois_table;
-	matrix = par3_ctx->matrix;
-	work_buf = par3_ctx->work_buf;
-	buf_p = par3_ctx->block_data;
-
-	// For every lost block
-	region_size = (par3_ctx->block_size + 4 + 3) & ~3;
-	for (y_index = 0; y_index < lost_count; y_index++){
-		if (gf_size == 2){
-			factor = ((uint16_t *)matrix)[ block_count * y_index + x_index ];
-			gf16_region_multiply(gf_table, work_buf, factor, region_size, buf_p, 1);
-		} else {
-			factor = ((uint8_t *)matrix)[ block_count * y_index + x_index ];
-			gf8_region_multiply(gf_table, work_buf, factor, region_size, buf_p, 1);
-		}
-		//printf("%d-th lost block += input block[%d] * %2x\n", y_index, x_index, factor);
-
-		buf_p += region_size;
-	}
-}
-
-// Recover all lost input blocks from all blocks.
-void rs_recover_all(PAR3_CTX *par3_ctx, size_t region_size, int lost_count, uint64_t progress_total, uint64_t progress_step)
-{
-	void *gf_table, *matrix;
-	uint8_t *block_data, *buf_p, *input_p, *recv_p;
-	uint8_t gf_size;
-	int *lost_id;
-	int x_index, y_index, lost_index, factor;
-	int block_count;
-	int progress_old, progress_now;
-	time_t time_old, time_now;
-
-	block_count = (int)(par3_ctx->block_count);
-	gf_size = par3_ctx->gf_size;
-	gf_table = par3_ctx->galois_table;
-	matrix = par3_ctx->matrix;
-	lost_id = par3_ctx->recv_id_list + lost_count;
-	block_data = par3_ctx->block_data;
-	recv_p = block_data + region_size * block_count;
-
-	if ( (par3_ctx->noise_level >= 0) && (par3_ctx->noise_level <= 1) ){
-		progress_old = 0;
-		time_old = time(NULL);
-	}
-
-	// For every lost block
-	for (y_index = 0; y_index < lost_count; y_index++){
-		buf_p = block_data + region_size * lost_id[y_index];
-		input_p = block_data;
-
-		// For every available input block
-		lost_index = 0;
-		for (x_index = 0; x_index < block_count; x_index++){
-			if (x_index == lost_id[lost_index]){
-				lost_index++;
-				input_p += region_size;
-				continue;
-			}
-
-			if (gf_size == 2){
-				factor = ((uint16_t *)matrix)[ block_count * y_index + x_index ];
-				gf16_region_multiply(gf_table, input_p, factor, region_size, buf_p, 1);
-			} else {
-				factor = ((uint8_t *)matrix)[ block_count * y_index + x_index ];
-				gf8_region_multiply(gf_table, input_p, factor, region_size, buf_p, 1);
-			}
-
-			input_p += region_size;
-		}
-
-		// For every using recovery block
-		for (lost_index = 0; lost_index < lost_count; lost_index++){
-			x_index = lost_id[lost_index];
-
-			if (gf_size == 2){
-				factor = ((uint16_t *)matrix)[ block_count * y_index + x_index ];
-				gf16_region_multiply(gf_table, input_p, factor, region_size, buf_p, 1);
-			} else {
-				factor = ((uint8_t *)matrix)[ block_count * y_index + x_index ];
-				gf8_region_multiply(gf_table, input_p, factor, region_size, buf_p, 1);
-			}
-
-			input_p += region_size;
-		}
-
-		// Print progress percent
-		if ( (par3_ctx->noise_level >= 0) && (par3_ctx->noise_level <= 1) ){
-			progress_step += block_count;
-			time_now = time(NULL);
-			if (time_now != time_old){
-				time_old = time_now;
-				progress_now = (int)((progress_step * 1000) / progress_total);
-				if (progress_now != progress_old){
-					progress_old = progress_now;
-					printf("%d.%d%%\r", progress_now / 10, progress_now % 10);	// 0.0% ~ 100.0%
-				}
-			}
-		}
-	}
-}
-
diff --git a/windows/src/reedsolomon.h b/windows/src/reedsolomon.h
deleted file mode 100644
index 2e9144d..0000000
--- a/windows/src/reedsolomon.h
+++ /dev/null
@@ -1,28 +0,0 @@
-
-// Create all recovery blocks from one input block.
-void rs_create_one_all(PAR3_CTX *par3_ctx, int x_index);
-
-// Create all recovery blocks from all input blocks.
-void rs_create_all(PAR3_CTX *par3_ctx, size_t region_size,
-				uint64_t progress_total, uint64_t progress_step);
-
-
-// Construct matrix for Reed-Solomon, and solve linear equation.
-int rs_compute_matrix(PAR3_CTX *par3_ctx, uint64_t lost_count);
-
-
-// for 8-bit Cauchy Reed-Solomon
-int rs8_gaussian_elimination(PAR3_CTX *par3_ctx, int lost_count);
-int rs8_invert_matrix_cauchy(PAR3_CTX *par3_ctx, int lost_count);
-
-// for 16-bit Cauchy Reed-Solomon
-int rs16_gaussian_elimination(PAR3_CTX *par3_ctx, int lost_count);
-int rs16_invert_matrix_cauchy(PAR3_CTX *par3_ctx, int lost_count);
-
-// Recover all lost input blocks from one block.
-void rs_recover_one_all(PAR3_CTX *par3_ctx, int x_index, int lost_count);
-
-// Recover all lost input blocks from all blocks.
-void rs_recover_all(PAR3_CTX *par3_ctx, size_t region_size, int lost_count,
-				uint64_t progress_total, uint64_t progress_step);
-
diff --git a/windows/src/reedsolomon16.c b/windows/src/reedsolomon16.c
deleted file mode 100644
index 0691d1b..0000000
--- a/windows/src/reedsolomon16.c
+++ /dev/null
@@ -1,328 +0,0 @@
-#include <errno.h>
-#include <inttypes.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <time.h>
-
-#include "libpar3.h"
-#include "galois.h"
-
-
-// Gaussian elimination of matrix for Cauchy Reed-Solomon
-int rs16_gaussian_elimination(PAR3_CTX *par3_ctx, int lost_count)
-{
-	uint16_t *gf_table, *matrix;
-	int x, y, y_R, y2;
-	int *lost_id, *recv_id;
-	int block_count;
-	int pivot, factor, factor2;
-	int progress_old, progress_now;
-	time_t time_old, time_now;
-	clock_t clock_now;
-
-	if (lost_count == 0)
-		return 0;
-
-	block_count = (int)(par3_ctx->block_count);
-	gf_table = par3_ctx->galois_table;
-	recv_id = par3_ctx->recv_id_list;
-	lost_id = recv_id + lost_count;
-
-	// Allocate matrix on memory
-	matrix = malloc(sizeof(uint16_t) * block_count * lost_count);
-	if (matrix == NULL){
-		printf("Failed to allocate memory for matrix\n");
-		return RET_MEMORY_ERROR;
-	}
-	par3_ctx->matrix = matrix;
-
-	// Set matrix elements
-	for (y = 0; y < lost_count; y++){	// per each recovery block
-		// These are elements of generator matrix.
-		y_R = 65535 - recv_id[y];	// y_R = MAX - y_index
-		for (x = 0; x < block_count; x++){
-			// inv( x_index ^ y_R )
-			matrix[block_count * y + x] = gf16_reciprocal(gf_table, x ^ y_R);
-		}
-
-		// No need to set values for recovery blocks,
-		// because they will be put in positions of lost blocks.
-	}
-
-	if (par3_ctx->noise_level >= 3){
-		printf("\n generator matrix (%d * %d):\n", block_count, lost_count);
-		for (y = 0; y < lost_count; y++){
-			printf("lost%5d <- recv%5d =", lost_id[y], recv_id[y]);
-			for (x = 0; x < block_count; x++){
-				printf(" %4x", matrix[block_count * y + x]);
-			}
-			printf("\n");
-		}
-	}
-
-	// Gaussian elimination
-	if (par3_ctx->noise_level >= 0){
-		printf("\nComputing Reed Solomon matrix:\n");
-		progress_old = 0;
-		time_old = time(NULL);
-		clock_now = clock();
-	}
-	for (y = 0; y < lost_count; y++){
-		// Let pivot value to be 1.
-		pivot = lost_id[y];
-		factor = matrix[block_count * y + pivot];
-		if (factor == 0){
-			printf("Failed to invert matrix\n");
-			return RET_LOGIC_ERROR;
-		}
-		factor = gf16_reciprocal(gf_table, factor);
-		gf16_region_multiply(gf_table, (uint8_t *)(matrix + block_count * y), factor, block_count * 2, NULL, 0);
-
-		// Erase values of same pivot on other rows.
-		for (y2 = 0; y2 < lost_count; y2++){
-			if (y2 == y)
-				continue;
-
-			factor2 = matrix[block_count * y2 + pivot];
-			gf16_region_multiply(gf_table, (uint8_t *)(matrix + block_count * y), factor2, block_count * 2, (uint8_t *)(matrix + block_count * y2), 1);
-
-			// After eliminate the pivot value, store "factor * factor2" value on the pivot.
-			matrix[block_count * y2 + pivot] = gf16_multiply(gf_table, factor, factor2);
-		}
-
-		// After eliminate the pivot columun, store "factor" value on the pivot.
-		matrix[block_count * y + pivot] = factor;
-
-		// Print progress percent
-		if (par3_ctx->noise_level >= 0){
-			time_now = time(NULL);
-			if (time_now != time_old){
-				time_old = time_now;
-				// Complexity is "lost_count * lost_count * block_count".
-				// Because lost_count is 16-bit value, "int" (32-bit signed integer) is enough.
-				progress_now = (y * 1000) / lost_count;
-				if (progress_now != progress_old){
-					progress_old = progress_now;
-					printf("%d.%d%%\r", progress_now / 10, progress_now % 10);	// 0.0% ~ 100.0%
-				}
-			}
-		}
-	}
-	if (par3_ctx->noise_level >= 0){
-		clock_now = clock() - clock_now;
-		printf("done in %.1f seconds.\n", (double)clock_now / CLOCKS_PER_SEC);
-	}
-
-	if (par3_ctx->noise_level >= 3){
-		printf("\n recovery matrix (%d * %d):\n", block_count, lost_count);
-		for (y = 0; y < lost_count; y++){
-			printf("recv%5d -> lost%5d =", recv_id[y], lost_id[y]);
-			for (x = 0; x < block_count; x++){
-				printf(" %4x", matrix[block_count * y + x]);
-			}
-			printf("\n");
-		}
-	}
-
-	return 0;
-}
-
-
-/*
-
-Fast inversion for Cauchy matrix
-This method is based on sample code of persicum's RSC32.
-
-The inverting theory may be described in these pages;
-
-Cauchy matrix
-https://en.wikipedia.org/wiki/Cauchy_matrix
-
-Inverse of Cauchy Matrix
-https://proofwiki.org/wiki/Inverse_of_Cauchy_Matrix
-
-*/
-int rs16_invert_matrix_cauchy(PAR3_CTX *par3_ctx, int lost_count)
-{
-	uint16_t *gf_table, *matrix;
-	int *x, *y, *a, *b, *c, *d;
-	int i, j, k;
-	int *lost_id, *recv_id;
-	int block_count;
-	int progress_old, progress_now;
-	time_t time_old, time_now;
-	clock_t clock_now;
-
-	if (lost_count == 0)
-		return 0;
-
-	block_count = (int)(par3_ctx->block_count);
-	gf_table = par3_ctx->galois_table;
-	recv_id = par3_ctx->recv_id_list;
-	lost_id = recv_id + lost_count;
-
-	// Allocate matrix on memory
-	matrix = malloc(sizeof(uint16_t) * block_count * lost_count);
-	if (matrix == NULL){
-		printf("Failed to allocate memory for matrix\n");
-		return RET_MEMORY_ERROR;
-	}
-	par3_ctx->matrix = matrix;
-
-	// Allocate working buffer on memory
-	a = calloc(block_count * 6, sizeof(int));
-	if (a == NULL){
-		printf("Failed to allocate memory for inversion\n");
-		return RET_MEMORY_ERROR;
-	}
-	b = a + block_count;
-	c = b + block_count;
-	d = c + block_count;
-	x = d + block_count;
-	y = x + block_count;
-
-	if (par3_ctx->noise_level >= 0){
-		printf("\nComputing Reed Solomon matrix:\n");
-		progress_old = 0;
-		time_old = time(NULL);
-		clock_now = clock();
-	}
-
-	// Set index of lost input blocks
-	for (i = 0; i < lost_count; i++){
-		y[i] = lost_id[i];
-	}
-	// Set index of existing input blocks after
-	k = 0;
-	for (j = 0; j < block_count; j++){
-		if (k < lost_count && j == lost_id[k]){
-			k++;
-			continue;
-		}
-		y[i] = j;
-		i++;
-	}
-
-	// Set index of using recovery blocks
-	for (i = 0; i < lost_count; i++){
-		x[i] = 65535 - recv_id[i];	// y_R = MAX - y_index
-	}
-	// Set index of existing input blocks after
-	for (; i < block_count; i++){
-		x[i] = y[i];
-	}
-
-	for (i = 0; i < block_count; i++){
-		a[i] = 1;
-		b[i] = 1;
-		c[i] = 1;
-		d[i] = 1;
-
-		for (j = 0; j < lost_count; j++){
-			if (i != j){
-				a[i] = gf16_multiply(gf_table, a[i], x[i] ^ x[j]);
-				b[i] = gf16_multiply(gf_table, b[i], y[i] ^ y[j]);
-			}
-
-			c[i] = gf16_multiply(gf_table, c[i], x[i] ^ y[j]);
-			d[i] = gf16_multiply(gf_table, d[i], y[i] ^ x[j]);
-		}
-
-		// Print progress percent
-		if (par3_ctx->noise_level >= 0){
-			time_now = time(NULL);
-			if (time_now != time_old){
-				time_old = time_now;
-				// Complexity is "lost_count * block_count * 2".
-				// Because lost_count is 16-bit value, "int" (32-bit signed integer) is enough.
-				progress_now = (i * 1000) / (block_count + lost_count);
-				if (progress_now != progress_old){
-					progress_old = progress_now;
-					printf("%d.%d%%\r", progress_now / 10, progress_now % 10);	// 0.0% ~ 100.0%
-				}
-			}
-		}
-	}
-
-/*
-	if (par3_ctx->noise_level >= 3){
-		printf("\n fast inversion (%d * 6):\n", block_count);
-		printf("y =");
-		for (i = 0; i < block_count; i++){
-			printf(" %4x", y[i]);
-		}
-		printf("\n");
-		printf("x =");
-		for (i = 0; i < block_count; i++){
-			printf(" %4x", x[i]);
-		}
-		printf("\n");
-		printf("a =");
-		for (i = 0; i < block_count; i++){
-			printf(" %4x", a[i]);
-		}
-		printf("\n");
-		printf("b =");
-		for (i = 0; i < block_count; i++){
-			printf(" %4x", b[i]);
-		}
-		printf("\n");
-		printf("c =");
-		for (i = 0; i < block_count; i++){
-			printf(" %4x", c[i]);
-		}
-		printf("\n");
-		printf("d =");
-		for (i = 0; i < block_count; i++){
-			printf(" %4x", d[i]);
-		}
-		printf("\n");
-	}
-*/
-
-	for (i = 0; i < lost_count; i++){
-		for (j = 0; j < block_count; j++){
-			k = gf16_multiply(gf_table, a[j], b[i]);
-			k = gf16_reciprocal(gf_table, gf16_multiply(gf_table, k, x[j] ^ y[i]));
-			k = gf16_multiply(gf_table, gf16_multiply(gf_table, c[j], d[i]), k);
-			matrix[ block_count * i + y[j] ] = k;
-		}
-
-		// Print progress percent
-		if (par3_ctx->noise_level >= 0){
-			time_now = time(NULL);
-			if (time_now != time_old){
-				time_old = time_now;
-				// Complexity is "lost_count * block_count * 2".
-				// Because lost_count is 16-bit value, "int" (32-bit signed integer) is enough.
-				progress_now = ((block_count + i) * 1000) / (block_count + lost_count);
-				if (progress_now != progress_old){
-					progress_old = progress_now;
-					printf("%d.%d%%\r", progress_now / 10, progress_now % 10);	// 0.0% ~ 100.0%
-				}
-			}
-		}
-	}
-	if (par3_ctx->noise_level >= 0){
-		clock_now = clock() - clock_now;
-		printf("done in %.1f seconds.\n", (double)clock_now / CLOCKS_PER_SEC);
-	}
-
-	if (par3_ctx->noise_level >= 3){
-		printf("\n recovery matrix (%d * %d):\n", block_count, lost_count);
-		for (i = 0; i < lost_count; i++){
-			printf("recv%5d -> lost%5d =", recv_id[i], lost_id[i]);
-			for (j = 0; j < block_count; j++){
-				printf(" %4x", matrix[block_count * i + j]);
-			}
-			printf("\n");
-		}
-	}
-
-	// Deallocate working buffer
-	free(a);
-
-	return 0;
-}
-
diff --git a/windows/src/reedsolomon8.c b/windows/src/reedsolomon8.c
deleted file mode 100644
index fa7d1c7..0000000
--- a/windows/src/reedsolomon8.c
+++ /dev/null
@@ -1,255 +0,0 @@
-#include <errno.h>
-#include <inttypes.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "libpar3.h"
-#include "galois.h"
-
-
-// Gaussian elimination of matrix for Cauchy Reed-Solomon
-int rs8_gaussian_elimination(PAR3_CTX *par3_ctx, int lost_count)
-{
-	uint8_t *gf_table, *matrix;
-	int x, y, y_R, y2;
-	int *lost_id, *recv_id;
-	int block_count;
-	int pivot, factor, factor2;
-
-	if (lost_count == 0)
-		return 0;
-
-	block_count = (int)(par3_ctx->block_count);
-	gf_table = par3_ctx->galois_table;
-	recv_id = par3_ctx->recv_id_list;
-	lost_id = recv_id + lost_count;
-
-	// Allocate matrix on memory
-	matrix = malloc(block_count * lost_count);
-	if (matrix == NULL){
-		printf("Failed to allocate memory for matrix\n");
-		return RET_MEMORY_ERROR;
-	}
-	par3_ctx->matrix = matrix;
-
-	// Set matrix elements
-	for (y = 0; y < lost_count; y++){	// per each recovery block
-		// These are elements of generator matrix.
-		y_R = 255 - recv_id[y];	// y_R = MAX - y_index
-		for (x = 0; x < block_count; x++){
-			// inv( x_index ^ y_R )
-			matrix[block_count * y + x] = gf8_reciprocal(gf_table, x ^ y_R);
-		}
-
-		// No need to set values for recovery blocks,
-		// because they will be put in positions of lost blocks.
-	}
-
-	if (par3_ctx->noise_level >= 3){
-		printf("\n generator matrix (%d * %d):\n", block_count, lost_count);
-		for (y = 0; y < lost_count; y++){
-			printf("lost%3d <- recv%3d =", lost_id[y], recv_id[y]);
-			for (x = 0; x < block_count; x++){
-				printf(" %2x", matrix[block_count * y + x]);
-			}
-			printf("\n");
-		}
-	}
-
-	// Gaussian elimination
-	for (y = 0; y < lost_count; y++){
-		// Let pivot value to be 1.
-		pivot = lost_id[y];
-		factor = matrix[block_count * y + pivot];
-		if (factor == 0){
-			printf("Failed to invert matrix\n");
-			return RET_LOGIC_ERROR;
-		}
-		factor = gf8_reciprocal(gf_table, factor);
-		gf8_region_multiply(gf_table, matrix + block_count * y, factor, block_count, NULL, 0);
-
-		// Erase values of same pivot on other rows.
-		for (y2 = 0; y2 < lost_count; y2++){
-			if (y2 == y)
-				continue;
-
-			factor2 = matrix[block_count * y2 + pivot];
-			gf8_region_multiply(gf_table, matrix + block_count * y, factor2, block_count, matrix + block_count * y2, 1);
-
-			// After eliminate the pivot value, store "factor * factor2" value on the pivot.
-			matrix[block_count * y2 + pivot] = gf8_multiply(gf_table, factor, factor2);
-		}
-
-		// After eliminate the pivot columun, store "factor" value on the pivot.
-		matrix[block_count * y + pivot] = factor;
-	}
-
-	if (par3_ctx->noise_level >= 3){
-		printf("\n recovery matrix (%d * %d):\n", block_count, lost_count);
-		for (y = 0; y < lost_count; y++){
-			printf("recv%3d -> lost%3d =", recv_id[y], lost_id[y]);
-			for (x = 0; x < block_count; x++){
-				printf(" %2x", matrix[block_count * y + x]);
-			}
-			printf("\n");
-		}
-	}
-
-	return 0;
-}
-
-
-/*
-
-Fast inversion for Cauchy matrix
-This method is based on sample code of persicum's RSC32.
-
-The inverting theory may be described in these pages;
-
-Cauchy matrix
-https://en.wikipedia.org/wiki/Cauchy_matrix
-
-Inverse of Cauchy Matrix
-https://proofwiki.org/wiki/Inverse_of_Cauchy_Matrix
-
-*/
-int rs8_invert_matrix_cauchy(PAR3_CTX *par3_ctx, int lost_count)
-{
-	uint8_t *gf_table, *matrix;
-	int *x, *y, *a, *b, *c, *d;
-	int i, j, k;
-	int *lost_id, *recv_id;
-	int block_count;
-
-	if (lost_count == 0)
-		return 0;
-
-	block_count = (int)(par3_ctx->block_count);
-	gf_table = par3_ctx->galois_table;
-	recv_id = par3_ctx->recv_id_list;
-	lost_id = recv_id + lost_count;
-
-	// Allocate matrix on memory
-	matrix = malloc(block_count * lost_count);
-	if (matrix == NULL){
-		printf("Failed to allocate memory for matrix\n");
-		return RET_MEMORY_ERROR;
-	}
-	par3_ctx->matrix = matrix;
-
-	// Allocate working buffer on memory
-	a = calloc(block_count * 6, sizeof(int));
-	if (a == NULL){
-		printf("Failed to allocate memory for inversion\n");
-		return RET_MEMORY_ERROR;
-	}
-	b = a + block_count;
-	c = b + block_count;
-	d = c + block_count;
-	x = d + block_count;
-	y = x + block_count;
-
-	// Set index of lost input blocks
-	for (i = 0; i < lost_count; i++){
-		y[i] = lost_id[i];
-	}
-	// Set index of existing input blocks after
-	k = 0;
-	for (j = 0; j < block_count; j++){
-		if (k < lost_count && j == lost_id[k]){
-			k++;
-			continue;
-		}
-		y[i] = j;
-		i++;
-	}
-
-	// Set index of using recovery blocks
-	for (i = 0; i < lost_count; i++){
-		x[i] = 255 - recv_id[i];	// y_R = MAX - y_index
-	}
-	// Set index of existing input blocks after
-	for (; i < block_count; i++){
-		x[i] = y[i];
-	}
-
-	for (i = 0; i < block_count; i++){
-		a[i] = 1;
-		b[i] = 1;
-		c[i] = 1;
-		d[i] = 1;
-
-		for (j = 0; j < lost_count; j++){
-			if (i != j){
-				a[i] = gf8_multiply(gf_table, a[i], x[i] ^ x[j]);
-				b[i] = gf8_multiply(gf_table, b[i], y[i] ^ y[j]);
-			}
-
-			c[i] = gf8_multiply(gf_table, c[i], x[i] ^ y[j]);
-			d[i] = gf8_multiply(gf_table, d[i], y[i] ^ x[j]);
-		}
-	}
-
-/*
-	if (par3_ctx->noise_level >= 3){
-		printf("\n fast inversion (%d * 6):\n", block_count);
-		printf("y =");
-		for (i = 0; i < block_count; i++){
-			printf(" %2x", y[i]);
-		}
-		printf("\n");
-		printf("x =");
-		for (i = 0; i < block_count; i++){
-			printf(" %2x", x[i]);
-		}
-		printf("\n");
-		printf("a =");
-		for (i = 0; i < block_count; i++){
-			printf(" %2x", a[i]);
-		}
-		printf("\n");
-		printf("b =");
-		for (i = 0; i < block_count; i++){
-			printf(" %2x", b[i]);
-		}
-		printf("\n");
-		printf("c =");
-		for (i = 0; i < block_count; i++){
-			printf(" %2x", c[i]);
-		}
-		printf("\n");
-		printf("d =");
-		for (i = 0; i < block_count; i++){
-			printf(" %2x", d[i]);
-		}
-		printf("\n");
-	}
-*/
-
-	for (i = 0; i < lost_count; i++){
-		for (j = 0; j < block_count; j++){
-			k = gf8_multiply(gf_table, a[j], b[i]);
-			k = gf8_reciprocal(gf_table, gf8_multiply(gf_table, k, x[j] ^ y[i]));
-			k = gf8_multiply(gf_table, gf8_multiply(gf_table, c[j], d[i]), k);
-			matrix[ block_count * i + y[j] ] = k;
-		}
-	}
-
-	if (par3_ctx->noise_level >= 3){
-		printf("\n recovery matrix (%d * %d):\n", block_count, lost_count);
-		for (i = 0; i < lost_count; i++){
-			printf("recv%3d -> lost%3d =", recv_id[i], lost_id[i]);
-			for (j = 0; j < block_count; j++){
-				printf(" %2x", matrix[block_count * i + j]);
-			}
-			printf("\n");
-		}
-	}
-
-	// Deallocate working buffer
-	free(a);
-
-	return 0;
-}
-
diff --git a/windows/src/repair.c b/windows/src/repair.c
deleted file mode 100644
index 7b133c5..0000000
--- a/windows/src/repair.c
+++ /dev/null
@@ -1,777 +0,0 @@
-/* Redefinition of _FILE_OFFSET_BITS must happen BEFORE including stdio.h */
-#ifdef __linux__
-#define _FILE_OFFSET_BITS 64
-#define _fseeki64 fseeko
-#define _stat64 stat
-#elif _WIN32
-// avoid error of MSVC
-#define _CRT_SECURE_NO_WARNINGS
-#endif
-
-#include <errno.h>
-#include <inttypes.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#ifdef __linux__
-
-#include <sys/stat.h>
-
-// default permissions on directory is read, write and search by owner
-#define _mkdir(dirname) mkdir(dirname, S_IRUSR | S_IWUSR | S_IXUSR)
-
-#elif _WIN32
-
-// MSVC headers
-#include <direct.h>
-#include <sys/stat.h>
-
-#define S_ISDIR(m) (((m) & _S_IFMT) == _S_IFDIR)
-
-#endif
-
-#include "libpar3.h"
-#include "common.h"
-#include "file.h"
-#include "inside.h"
-#include "verify.h"
-
-// It will restore permissions or attributes after files are repaired.
-// return 0 = no need repair, 1 = restored successfully, 2 = failed
-// 0x8000 = not directory
-static int restore_directory(char *path)
-{
-	struct _stat64 stat_buf;
-
-	if (_stat64(path, &stat_buf) != 0){	// Missing directory
-		// Create the directory
-		if (_mkdir(path) == 0){
-			return 1;	// Made directory
-		} else {
-			return 2;	// Failed
-		}
-
-	} else {
-		if (!S_ISDIR(stat_buf.st_mode))
-			return 0x8000;
-	}
-
-	return 0;
-}
-
-
-// Reconstruct directory tree of input set
-uint32_t reconstruct_directory_tree(PAR3_CTX *par3_ctx)
-{
-	int ret, flag_show = 0;
-	uint32_t num, failed_dir_count;
-	PAR3_DIR_CTX *dir_p;
-
-	if (par3_ctx->input_dir_count == 0)
-		return 0;
-
-	failed_dir_count = 0;
-	num = par3_ctx->input_dir_count;
-	dir_p = par3_ctx->input_dir_list;
-	while (num > 0){
-		ret = restore_directory(dir_p->name);
-		if (ret != 0){
-			if (par3_ctx->noise_level >= 1){
-				if (flag_show == 0){
-					flag_show++;
-					printf("\nReconstructing input directories:\n\n");
-				}
-			}
-			if (ret == 1){
-				if (par3_ctx->noise_level >= 1){
-					printf("Target: \"%s\" - made.\n", dir_p->name);
-				}
-			} else {
-				failed_dir_count++;
-				if (par3_ctx->noise_level >= 1){
-					printf("Target: \"%s\" - failed.\n", dir_p->name);
-				}
-			}
-		}
-
-		dir_p++;
-		num--;
-	}
-
-	return failed_dir_count;
-}
-
-// Create temporary files for lost input files
-int create_temp_file(PAR3_CTX *par3_ctx, char *temp_path)
-{
-	uint32_t file_count, file_index;
-	PAR3_FILE_CTX *file_list;
-	FILE *fp;
-
-	if (par3_ctx->input_file_count == 0)
-		return 0;
-
-	file_count = par3_ctx->input_file_count;
-	file_list = par3_ctx->input_file_list;
-
-	// Base name of temporary file
-	sprintf(temp_path, "par3_%02X%02X%02X%02X%02X%02X%02X%02X_",
-			par3_ctx->set_id[0], par3_ctx->set_id[1], par3_ctx->set_id[2], par3_ctx->set_id[3],
-			par3_ctx->set_id[4], par3_ctx->set_id[5], par3_ctx->set_id[6], par3_ctx->set_id[7]);
-
-	for (file_index = 0; file_index < file_count; file_index++){
-		// The input file is missing or damaged.
-		if ( ((file_list[file_index].state & 3) != 0) && ((file_list[file_index].state & 4) == 0) ){
-			sprintf(temp_path + 22, "%u.tmp", file_index);
-			//fp = fopen(temp_path, "wbx");	// Error at over writing temporary file
-			fp = fopen(temp_path, "wb");	// There is a risk of over writing existing file of same name.
-			if (fp == NULL){
-				perror("Failed to create temporary file");
-				return RET_FILE_IO_ERROR;
-			}
-
-			if (fclose(fp) != 0){
-				perror("Failed to close temporary file");
-				return RET_FILE_IO_ERROR;
-			}
-		}
-	}
-
-	if ( (file_count == 1) && (file_list[0].state & 0x80000000) ){	// Copy PAR3 packets in unprotected chunks
-		int ret;
-		sprintf(temp_path + 22, "%u.tmp", 0);
-		ret = copy_inside_data(par3_ctx, temp_path);
-		if (ret != 0)
-			return ret;
-	}
-
-	return 0;
-}
-
-// Restore content of input files
-int restore_input_file(PAR3_CTX *par3_ctx, char *temp_path)
-{
-	char *name_prev, *find_name;
-	uint8_t *work_buf, buf_tail[40];
-	uint32_t file_count, file_index;
-	uint32_t chunk_index, chunk_num;
-	size_t slice_size;
-	int64_t slice_index, file_offset;
-	uint64_t block_size, chunk_size, file_size;
-	PAR3_SLICE_CTX *slice_list;
-	PAR3_CHUNK_CTX *chunk_list;
-	PAR3_FILE_CTX *file_list;
-	FILE *fp_write, *fp_read;
-
-	if (par3_ctx->input_file_count == 0)
-		return 0;
-
-	file_count = par3_ctx->input_file_count;
-	block_size = par3_ctx->block_size;
-	slice_list = par3_ctx->slice_list;
-	chunk_list = par3_ctx->chunk_list;
-	file_list = par3_ctx->input_file_list;
-
-	// Allocate memory to read one input block
-	work_buf = malloc(block_size);
-	if (work_buf == NULL){
-		perror("Failed to allocate memory for input data");
-		return RET_MEMORY_ERROR;
-	}
-	par3_ctx->work_buf = work_buf;
-
-	// Base name of temporary file
-	sprintf(temp_path, "par3_%02X%02X%02X%02X%02X%02X%02X%02X_",
-			par3_ctx->set_id[0], par3_ctx->set_id[1], par3_ctx->set_id[2], par3_ctx->set_id[3],
-			par3_ctx->set_id[4], par3_ctx->set_id[5], par3_ctx->set_id[6], par3_ctx->set_id[7]);
-
-	if (par3_ctx->noise_level >= 1){
-		printf("\nRestoring input files:\n\n");
-	}
-
-	name_prev = NULL;
-	fp_read = NULL;
-	for (file_index = 0; file_index < file_count; file_index++){
-		// The input file is missing or damaged.
-		if ( ((file_list[file_index].state & 3) != 0) && ((file_list[file_index].state & 4) == 0) ){
-			sprintf(temp_path + 22, "%u.tmp", file_index);
-			fp_write = fopen(temp_path, "r+b");
-			if (fp_write == NULL){
-				perror("Failed to open temporary file");
-				return RET_FILE_IO_ERROR;
-			}
-
-			file_size = 0;
-			chunk_index = file_list[file_index].chunk;		// index of the first chunk
-			chunk_num = file_list[file_index].chunk_num;	// number of chunk descriptions
-			slice_index = file_list[file_index].slice;		// index of the first slice
-			//printf("chunk = %u+%u, %s\n", chunk_index, chunk_num, file_list[file_index].name);
-			while (chunk_num > 0){
-				chunk_size = chunk_list[chunk_index].size;
-				if (chunk_size == 0){	// Unprotected Chunk Description
-					// Unprotected chunk will be filled by zeros after repair.
-					file_size += chunk_list[chunk_index].block;
-					if (_fseeki64(fp_write, file_size, SEEK_SET) != 0){
-						perror("Failed to seek temporary file");
-						fclose(fp_read);
-						fclose(fp_write);
-						return RET_FILE_IO_ERROR;
-					}
-
-				} else {	// Protected Chunk Description
-					file_size += chunk_size;
-					while ( (chunk_size >= block_size) || (chunk_size >= 40) ){	// full size slice or chunk tail slice
-						slice_size = slice_list[slice_index].size;
-						file_offset = slice_list[slice_index].find_offset;
-						find_name = slice_list[slice_index].find_name;
-						if (find_name == NULL){
-							printf("Input slice[%"PRId64"] was not found.\n", slice_index);
-							if (fp_read != NULL)
-								fclose(fp_read);
-							fclose(fp_write);
-							return RET_LOGIC_ERROR;
-						}
-
-						// Read input file slice from another file.
-						if ( (fp_read == NULL) || (find_name != name_prev) ){
-							if (fp_read != NULL){	// Close previous another file.
-								fclose(fp_read);
-								fp_read = NULL;
-							}
-							fp_read = fopen(find_name, "rb");
-							if (fp_read == NULL){
-								perror("Failed to open another file");
-								fclose(fp_write);
-								return RET_FILE_IO_ERROR;
-							}
-							name_prev = find_name;
-						}
-						if (_fseeki64(fp_read, file_offset, SEEK_SET) != 0){
-							perror("Failed to seek another file");
-							fclose(fp_read);
-							fclose(fp_write);
-							return RET_FILE_IO_ERROR;
-						}
-						if (fread(work_buf, 1, slice_size, fp_read) != slice_size){
-							perror("Failed to read full slice on input file");
-							fclose(fp_read);
-							fclose(fp_write);
-							return RET_FILE_IO_ERROR;
-						}
-
-						// Write input file slice on temporary file.
-						if (fwrite(work_buf, 1, slice_size, fp_write) != slice_size){
-							perror("Failed to write slice on temporary file");
-							fclose(fp_read);
-							fclose(fp_write);
-							return RET_FILE_IO_ERROR;
-						}
-
-						slice_index++;
-						chunk_size -= slice_size;
-					}
-
-					if (chunk_size > 0){	// tiny chunk tail
-						slice_size = chunk_size;	// Tiny chunk tail was stored in File Packet.
-
-						// copy 1 ~ 39 bytes
-						memcpy(buf_tail, &(chunk_list[chunk_index].tail_crc), 8);
-						memcpy(buf_tail + 8, chunk_list[chunk_index].tail_hash, 16);
-						memcpy(buf_tail + 24, &(chunk_list[chunk_index].tail_block), 8);
-						memcpy(buf_tail + 32, &(chunk_list[chunk_index].tail_offset), 8);
-
-						// Write input file slice on temporary file.
-						if (fwrite(buf_tail, 1, slice_size, fp_write) != slice_size){
-							perror("Failed to write tiny slice on temporary file");
-							fclose(fp_read);
-							fclose(fp_write);
-							return RET_FILE_IO_ERROR;
-						}
-					}
-
-				}
-
-				chunk_index++;
-				chunk_num--;
-			}
-
-			if (fclose(fp_write) != 0){
-				perror("Failed to close temporary file");
-				return RET_FILE_IO_ERROR;
-			}
-
-			if (file_size != file_list[file_index].size){
-				if (par3_ctx->noise_level >= 1){
-					printf("Target: \"%s\" - failed.\n", temp_path);
-				}
-				if (fp_read != NULL)
-					fclose(fp_read);
-				return RET_LOGIC_ERROR;
-			} else {
-				file_list[file_index].state |= 0x100;
-				if (par3_ctx->noise_level >= 1){
-					printf("Target: \"%s\" - restored temporary.\n", temp_path);
-				}
-			}
-		}
-	}
-
-	if (fp_read != NULL)
-		fclose(fp_read);
-
-	free(work_buf);
-	par3_ctx->work_buf = NULL;
-
-	return 0;
-}
-
-// Try to restore content of input files
-int try_restore_input_file(PAR3_CTX *par3_ctx, char *temp_path)
-{
-	char *name_prev, *find_name;
-	uint8_t *work_buf, buf_tail[40];
-	uint32_t file_count, file_index;
-	uint32_t chunk_index, chunk_num;
-	size_t slice_size;
-	int64_t slice_index, file_offset;
-	uint64_t block_size, chunk_size, file_size;
-	PAR3_SLICE_CTX *slice_list;
-	PAR3_CHUNK_CTX *chunk_list;
-	PAR3_FILE_CTX *file_list;
-	FILE *fp_write, *fp_read;
-
-	if (par3_ctx->input_file_count == 0)
-		return 0;
-
-	file_count = par3_ctx->input_file_count;
-	block_size = par3_ctx->block_size;
-	slice_list = par3_ctx->slice_list;
-	chunk_list = par3_ctx->chunk_list;
-	file_list = par3_ctx->input_file_list;
-
-	// Allocate memory to read one input block
-	work_buf = malloc(block_size);
-	if (work_buf == NULL){
-		perror("Failed to allocate memory for input data");
-		return RET_MEMORY_ERROR;
-	}
-	par3_ctx->work_buf = work_buf;
-
-	// Base name of temporary file
-	sprintf(temp_path, "par3_%02X%02X%02X%02X%02X%02X%02X%02X_",
-			par3_ctx->set_id[0], par3_ctx->set_id[1], par3_ctx->set_id[2], par3_ctx->set_id[3],
-			par3_ctx->set_id[4], par3_ctx->set_id[5], par3_ctx->set_id[6], par3_ctx->set_id[7]);
-
-	if (par3_ctx->noise_level >= 1){
-		printf("\nRestoring input files:\n\n");
-	}
-
-	name_prev = NULL;
-	fp_read = NULL;
-	for (file_index = 0; file_index < file_count; file_index++){
-		// The input file is missing or damaged.
-		if ( ((file_list[file_index].state & 3) != 0) && ((file_list[file_index].state & 4) == 0)
-				&& ((file_list[file_index].state & 0x200) != 0) ){	// Checked repairable already
-			sprintf(temp_path + 22, "%u.tmp", file_index);
-			fp_write = fopen(temp_path, "wb");	// There is a risk of over writing existing file of same name.
-			if (fp_write == NULL){
-				perror("Failed to open temporary file");
-				return RET_FILE_IO_ERROR;
-			}
-
-			file_size = 0;
-			chunk_index = file_list[file_index].chunk;		// index of the first chunk
-			chunk_num = file_list[file_index].chunk_num;	// number of chunk descriptions
-			slice_index = file_list[file_index].slice;		// index of the first slice
-			//printf("chunk = %u+%u, %s\n", chunk_index, chunk_num, file_list[file_index].name);
-			while (chunk_num > 0){
-				chunk_size = chunk_list[chunk_index].size;
-				if (chunk_size == 0){	// Unprotected Chunk Description
-					// Unprotected chunk will be filled by zeros after repair.
-					file_size += chunk_list[chunk_index].block;
-					if (_fseeki64(fp_write, file_size, SEEK_SET) != 0){
-						perror("Failed to seek temporary file");
-						fclose(fp_read);
-						fclose(fp_write);
-						return RET_FILE_IO_ERROR;
-					}
-
-				} else {	// Protected Chunk Description
-
-					file_size += chunk_size;
-					while ( (chunk_size >= block_size) || (chunk_size >= 40) ){	// full size slice or chunk tail slice
-						slice_size = slice_list[slice_index].size;
-						file_offset = slice_list[slice_index].find_offset;
-						find_name = slice_list[slice_index].find_name;
-						if (find_name == NULL){
-							printf("Input slice[%"PRId64"] was not found.\n", slice_index);
-							if (fp_read != NULL)
-								fclose(fp_read);
-							fclose(fp_write);
-							return RET_LOGIC_ERROR;
-						}
-
-						// Read input file slice from another file.
-						if ( (fp_read == NULL) || (find_name != name_prev) ){
-							if (fp_read != NULL){	// Close previous another file.
-								fclose(fp_read);
-								fp_read = NULL;
-							}
-							fp_read = fopen(find_name, "rb");
-							if (fp_read == NULL){
-								perror("Failed to open another file");
-								fclose(fp_write);
-								return RET_FILE_IO_ERROR;
-							}
-							name_prev = find_name;
-						}
-						if (_fseeki64(fp_read, file_offset, SEEK_SET) != 0){
-							perror("Failed to seek another file");
-							fclose(fp_read);
-							fclose(fp_write);
-							return RET_FILE_IO_ERROR;
-						}
-						if (fread(work_buf, 1, slice_size, fp_read) != slice_size){
-							perror("Failed to read full slice on input file");
-							fclose(fp_read);
-							fclose(fp_write);
-							return RET_FILE_IO_ERROR;
-						}
-
-						// Write input file slice on temporary file.
-						if (fwrite(work_buf, 1, slice_size, fp_write) != slice_size){
-							perror("Failed to write slice on temporary file");
-							fclose(fp_read);
-							fclose(fp_write);
-							return RET_FILE_IO_ERROR;
-						}
-
-						slice_index++;
-						chunk_size -= slice_size;
-					}
-
-					if (chunk_size > 0){	// tiny chunk tail
-						slice_size = chunk_size;	// Tiny chunk tail was stored in File Packet.
-
-						// copy 1 ~ 39 bytes
-						memcpy(buf_tail, &(chunk_list[chunk_index].tail_crc), 8);
-						memcpy(buf_tail + 8, chunk_list[chunk_index].tail_hash, 16);
-						memcpy(buf_tail + 24, &(chunk_list[chunk_index].tail_block), 8);
-						memcpy(buf_tail + 32, &(chunk_list[chunk_index].tail_offset), 8);
-
-						// Write input file slice on temporary file.
-						if (fwrite(buf_tail, 1, slice_size, fp_write) != slice_size){
-							perror("Failed to write tiny slice on temporary file");
-							fclose(fp_read);
-							fclose(fp_write);
-							return RET_FILE_IO_ERROR;
-						}
-					}
-				}
-
-				chunk_index++;
-				chunk_num--;
-			}
-
-			if (fclose(fp_write) != 0){
-				perror("Failed to close temporary file");
-				return RET_FILE_IO_ERROR;
-			}
-
-			if (file_size != file_list[file_index].size){
-				if (par3_ctx->noise_level >= 1){
-					printf("Target: \"%s\" - failed.\n", temp_path);
-				}
-				if (fp_read != NULL)
-					fclose(fp_read);
-				return RET_LOGIC_ERROR;
-			} else {
-				file_list[file_index].state |= 0x100;
-				if (par3_ctx->noise_level >= 1){
-					printf("Target: \"%s\" - restored temporary.\n", temp_path);
-				}
-			}
-		}
-	}
-
-	if (fp_read != NULL)
-		fclose(fp_read);
-
-	free(work_buf);
-	par3_ctx->work_buf = NULL;
-
-	return 0;
-}
-
-// Backup damaged file by adding number at the last
-static int backup_file(char *filename)
-{
-	char backup_name[_MAX_PATH + 8];
-	int num;
-	size_t len;
-
-	strcpy(backup_name, filename);
-	len = strlen(backup_name);
-	if (len + 2 >= _MAX_PATH)
-		return 1;
-
-	for (num = 1; num < 10000; num++){
-		sprintf(backup_name + len, ".%d", num);
-		if (strlen(backup_name) >= _MAX_PATH){
-			break;	// Filename became too long by added number.
-		}
-		if (rename(filename, backup_name) == 0){
-			return 0;	// backup OK
-		}
-	}
-
-	return 2;
-}
-
-// Verify repaired file and rename to original name
-int verify_repaired_file(PAR3_CTX *par3_ctx, char *temp_path,
-		uint32_t *missing_file_count, uint32_t *damaged_file_count, uint32_t *misnamed_file_count, uint32_t *bad_file_count)
-{
-	int flag_show = 0;
-	char *file_name;
-	int ret;
-	uint32_t file_count, file_index;
-	PAR3_FILE_CTX *file_list;
-
-	if (par3_ctx->input_file_count == 0)
-		return 0;
-
-	file_count = par3_ctx->input_file_count;
-	file_list = par3_ctx->input_file_list;
-
-	// Allocate buffer to store file data temporary.
-	par3_ctx->work_buf = malloc(par3_ctx->block_size);
-	if (par3_ctx->work_buf == NULL){
-		perror("Failed to allocate memory for input data");
-		return RET_MEMORY_ERROR;
-	}
-
-	// Base name of temporary file
-	sprintf(temp_path, "par3_%02X%02X%02X%02X%02X%02X%02X%02X_",
-			par3_ctx->set_id[0], par3_ctx->set_id[1], par3_ctx->set_id[2], par3_ctx->set_id[3],
-			par3_ctx->set_id[4], par3_ctx->set_id[5], par3_ctx->set_id[6], par3_ctx->set_id[7]);
-
-	*missing_file_count = 0;
-	*damaged_file_count = 0;
-	*misnamed_file_count = 0;
-	*bad_file_count = 0;
-	for (file_index = 0; file_index < file_count; file_index++){
-		// This input file is misnamed.
-		if (file_list[file_index].state & 4){
-			if (par3_ctx->noise_level >= 0){
-				if (flag_show == 0){
-					flag_show++;
-					printf("\nVerifying repaired files:\n\n");
-				}
-			}
-
-			//printf("state = 0x%08X\n", file_list[file_index].state);
-			if (file_list[file_index].state & 2){	// The original file is damaged.
-				// Backup damaged file
-				backup_file(file_list[file_index].name);
-
-				// Or delete damaged file by purge option ?
-				// Deleting level, such like: -p, -p1, -p2
-			}
-
-			// Get wrong filename
-			ret = file_list[file_index].state >> 3;	// Index of extra file
-			file_name = namez_get(par3_ctx->extra_file_name, par3_ctx->extra_file_name_len, ret);
-
-			// Correct to original filename
-			if (rename(file_name, file_list[file_index].name) != 0){
-				perror("Failed to rename misnamed file");
-
-				// No need to return backup file.
-				*misnamed_file_count += 1;
-				if (par3_ctx->noise_level >= 0){
-					printf("Target: \"%s\" - failed.\n", file_list[file_index].name);
-				}
-
-			} else if (par3_ctx->file_system & 0x10003){	// test property
-				ret = test_file_system_option(par3_ctx, 1, file_list[file_index].offset, file_list[file_index].name);
-				if (ret == 0){
-					if (par3_ctx->noise_level >= 0){
-						printf("Target: \"%s\" - repaired.\n", file_list[file_index].name);
-					}
-				} else {
-					*bad_file_count += 1;	// Though file data was repaired, property is different.
-					if (par3_ctx->noise_level >= 0){
-						printf("Target: \"%s\" - failed.\n", file_list[file_index].name);
-					}
-				}
-
-			} else {
-				if (par3_ctx->noise_level >= 0){
-					printf("Target: \"%s\" - repaired.\n", file_list[file_index].name);
-				}
-			}
-
-		// This input file is missing or damaged.
-		} else if ((file_list[file_index].state & 0x104) == 0x100){	// This missing or damaged file was repaired.
-			if (par3_ctx->noise_level >= 0){
-				if (flag_show == 0){
-					flag_show++;
-					printf("\nVerifying repaired files:\n\n");
-				}
-			}
-
-			sprintf(temp_path + 22, "%u.tmp", file_index);
-			ret = check_complete_file(par3_ctx, temp_path, file_index, file_list[file_index].size, NULL);
-			if (ret > 0)
-				return ret;	// error
-			if (ret == 0){
-				if (file_list[file_index].state & 2){
-					// Backup damaged file
-					backup_file(file_list[file_index].name);
-
-					// Or delete damaged file by purge option ?
-					// Deleting level, such like: -p, -p1, -p2
-				}
-
-				// Return to original filename
-				if (rename(temp_path, file_list[file_index].name) != 0){
-					perror("Failed to rename temporary file");
-
-					// Delete the temporary file
-					if (remove(temp_path) != 0){
-						perror("Failed to delete temporary file");
-					}
-					if (file_list[file_index].state & 2){
-						*damaged_file_count += 1;
-					} else if (file_list[file_index].state & 1){
-						*missing_file_count += 1;
-					}
-					if (par3_ctx->noise_level >= 0){
-						printf("Target: \"%s\" - failed.\n", file_list[file_index].name);
-					}
-
-				} else if (par3_ctx->file_system & 0x10003){	// test property
-					ret = test_file_system_option(par3_ctx, 1, file_list[file_index].offset, file_list[file_index].name);
-					if (ret == 0){
-						if (par3_ctx->noise_level >= 0){
-							printf("Target: \"%s\" - repaired.\n", file_list[file_index].name);
-						}
-					} else {
-						*bad_file_count += 1;	// Though file data was repaired, property is different.
-						if (par3_ctx->noise_level >= 0){
-							printf("Target: \"%s\" - failed.\n", file_list[file_index].name);
-						}
-					}
-
-				} else {
-					if (par3_ctx->noise_level >= 0){
-						if (file_list[file_index].state & 0x80000000){	// Completeness of unprotected chunks is unknown.
-							printf("Target: \"%s\" - protected data was repaired.\n", file_list[file_index].name);
-						} else {
-							printf("Target: \"%s\" - repaired.\n", file_list[file_index].name);
-						}
-					}
-				}
-
-			} else {	// Repaired file is bad.
-				// Delete the temporary file
-				if (remove(temp_path) != 0){
-					perror("Failed to delete temporary file");
-				}
-				if (file_list[file_index].state & 2){
-					*damaged_file_count += 1;
-				} else if (file_list[file_index].state & 1){
-					*missing_file_count += 1;
-				}
-				if (par3_ctx->noise_level >= 0){
-					printf("Target: \"%s\" - failed.\n", file_list[file_index].name);
-				}
-			}
-
-		// Not repaired files.
-		} else if (file_list[file_index].state & 4){
-			*misnamed_file_count += 1;
-		} else if (file_list[file_index].state & 2){
-			*damaged_file_count += 1;
-		} else if (file_list[file_index].state & 1){
-			*missing_file_count += 1;
-
-		// Complete, but different property
-		} else if ( ((file_list[file_index].state & 0x7FFF0000) != 0) && ((par3_ctx->file_system & 0x10003) != 0) ){
-			if (par3_ctx->noise_level >= 0){
-				if (flag_show == 0){
-					flag_show++;
-					printf("\nVerifying repaired files:\n\n");
-				}
-			}
-
-			ret = test_file_system_option(par3_ctx, 1, file_list[file_index].offset, file_list[file_index].name);
-			if (ret == 0){
-				if (par3_ctx->noise_level >= 0){
-					printf("Target: \"%s\" - repaired.\n", file_list[file_index].name);
-				}
-			} else {
-				*bad_file_count += 1;
-				if (par3_ctx->noise_level >= 0){
-					printf("Target: \"%s\" - failed.\n", file_list[file_index].name);
-				}
-			}
-		}
-	}
-
-	free(par3_ctx->work_buf);
-	par3_ctx->work_buf = NULL;
-
-	return 0;
-}
-
-// Reset option of directories
-uint32_t reset_directory_option(PAR3_CTX *par3_ctx)
-{
-	int ret, flag_show = 0;
-	uint32_t num, failed_dir_count;
-	PAR3_DIR_CTX *dir_p;
-
-	if (par3_ctx->input_dir_count == 0)
-		return 0;
-
-	if ( ((par3_ctx->file_system & 4) == 0) || ((par3_ctx->file_system & 3) == 0) )
-		return 0;
-
-	failed_dir_count = 0;
-	num = par3_ctx->input_dir_count;
-	dir_p = par3_ctx->input_dir_list;
-	while (num > 0){
-		ret = check_directory(par3_ctx, dir_p->name, dir_p->offset);
-		if (ret & 0xFFFF0000){
-			//printf("check_directory = 0x%x\n", ret);
-
-			if (par3_ctx->noise_level >= 0){
-				if (flag_show == 0){
-					flag_show++;
-					printf("\nReseting input directories:\n\n");
-				}
-			}
-
-			ret = test_file_system_option(par3_ctx, 2, dir_p->offset, dir_p->name);
-			//printf("test_file_system_option = 0x%x\n", ret);
-			if (ret == 0){
-				if (par3_ctx->noise_level >= 0){
-					printf("Target: \"%s\" - repaired.\n", dir_p->name);
-				}
-			} else {
-				failed_dir_count++;
-				if (par3_ctx->noise_level >= 0){
-					printf("Target: \"%s\" - failed.\n", dir_p->name);
-				}
-			}
-		}
-
-		dir_p++;
-		num--;
-	}
-
-	return failed_dir_count;
-}
diff --git a/windows/src/repair.h b/windows/src/repair.h
deleted file mode 100644
index 45101fe..0000000
--- a/windows/src/repair.h
+++ /dev/null
@@ -1,17 +0,0 @@
-
-uint32_t reconstruct_directory_tree(PAR3_CTX *par3_ctx);
-
-// When there are enough input blocks after verification, no need Recovery Codes.
-int create_temp_file(PAR3_CTX *par3_ctx, char *temp_path);
-int restore_input_file(PAR3_CTX *par3_ctx, char *temp_path);
-
-// For partial repair, when there are not enough blocks.
-int try_restore_input_file(PAR3_CTX *par3_ctx, char *temp_path);
-
-// Confirm input files after repair
-int verify_repaired_file(PAR3_CTX *par3_ctx, char *temp_path,
-		uint32_t *missing_file_count, uint32_t *damaged_file_count, uint32_t *misnamed_file_count, uint32_t *bad_file_count);
-
-// Reset option of directories
-uint32_t reset_directory_option(PAR3_CTX *par3_ctx);
-
diff --git a/windows/src/verify.c b/windows/src/verify.c
deleted file mode 100644
index ca93f12..0000000
--- a/windows/src/verify.c
+++ /dev/null
@@ -1,434 +0,0 @@
-/* Redefinition of _FILE_OFFSET_BITS must happen BEFORE including stdio.h */
-#ifdef __linux__
-#define _FILE_OFFSET_BITS 64
-#define _stat64 stat
-#elif _WIN32
-#endif
-
-#include <errno.h>
-#include <inttypes.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#ifdef __linux__
-#include <sys/stat.h>
-#elif _WIN32
-// MSVC headers
-#include <sys/stat.h>
-
-// _S_IFDIR = 0x4000
-#define S_ISDIR(m) (((m) & _S_IFMT) == _S_IFDIR)
-// _S_IFREG = 0x8000
-#define S_ISREG(m) (((m) & _S_IFMT) == _S_IFREG)
-#endif
-
-#include "blake3/blake3.h"
-#include "libpar3.h"
-#include "common.h"
-#include "hash.h"
-#include "file.h"
-#include "verify.h"
-
-
-// This will check permission and attributes in future.
-// return 0 = exist, 1 = missing
-// 0x8000 = not directory
-// 0x****0000 = different property (timestamp, permission, or attribute)
-int check_directory(PAR3_CTX *par3_ctx, char *path, int64_t offset)
-{
-	struct _stat64 stat_buf;
-
-	// Check infomation, only when scuucess.
-	if (_stat64(path, &stat_buf) != 0)
-		return 1;
-
-	if (!S_ISDIR(stat_buf.st_mode))
-		return 0x8000;
-
-	if ( (offset >= 0) && ((par3_ctx->file_system & 4) != 0) && ((par3_ctx->file_system & 3) != 0) ){
-		//printf("offset of Directory Packet = %"PRId64"\n", offset);
-		return check_file_system_option(par3_ctx, 2, offset, &stat_buf);
-	}
-
-	return 0;
-}
-
-
-// Check existense of each input directory.
-// Return number of missing directories.
-void check_input_directory(PAR3_CTX *par3_ctx, uint32_t *missing_dir_count, uint32_t *bad_dir_count)
-{
-	int ret;
-	uint32_t num;
-	PAR3_DIR_CTX *dir_p;
-
-	if (par3_ctx->input_dir_count == 0)
-		return;
-
-	if (par3_ctx->noise_level >= -1){
-		printf("\nVerifying input directories:\n\n");
-	}
-
-	num = par3_ctx->input_dir_count;
-	dir_p = par3_ctx->input_dir_list;
-	while (num > 0){
-		if (par3_ctx->noise_level >= -1){
-			printf("Target: \"%s\"", dir_p->name);
-		}
-		ret = check_directory(par3_ctx, dir_p->name, dir_p->offset);
-		//printf("\n check_directory = 0x%x\n", ret);
-		if (ret == 0){
-			if (par3_ctx->noise_level >= -1){
-				printf(" - found.\n");
-			}
-		} else if (ret == 1){
-			*missing_dir_count += 1;
-			if (par3_ctx->noise_level >= -1){
-				printf(" - missing.\n");
-			}
-		} else if (ret == 0x8000){
-			*missing_dir_count += 1;
-			if (par3_ctx->noise_level >= -1){
-				printf(" - not directory.\n");
-			}
-		} else if (ret & 0xFFFF0000){
-			*bad_dir_count += 1;
-			if (par3_ctx->noise_level >= -1){
-				if ((ret & 0xFFFF0000) == 0x10000){
-					printf(" - different timestamp.\n");
-				} else if ((ret & 0xFFFF0000) == 0x20000){
-					printf(" - different permissions.\n");
-				} else {
-					printf(" - different property.\n");
-				}
-			}
-		} else {
-			if (par3_ctx->noise_level >= -1){
-				printf(" - unknown.\n");
-			}
-		}
-
-		dir_p++;
-		num--;
-	}
-}
-
-
-
-// This will check permission and attributes, only when you set an option.
-// return 0 = exist, 1 = missing
-// 0x8000 = not file
-// 0x****0000 = different property (timestamp, permission, or attribute)
-static int check_file(PAR3_CTX *par3_ctx, char *path, uint64_t *current_size, int64_t offset)
-{
-	struct _stat64 stat_buf;
-
-	// Check infomation, only when scuucess.
-	if (_stat64(path, &stat_buf) != 0)
-		return 1;
-
-	// Get size of existing file.
-	*current_size = stat_buf.st_size;	// This may be different from original size.
-
-	if (!S_ISREG(stat_buf.st_mode))
-		return 0x8000;
-
-	if ( (offset >= 0) && (par3_ctx->file_system & 0x10003) ){
-		//printf("offset of File Packet = %"PRId64"\n", offset);
-		return check_file_system_option(par3_ctx, 1, offset, &stat_buf);
-	}
-
-	return 0;
-}
-
-
-// Check existense and content of each input file.
-int verify_input_file(PAR3_CTX *par3_ctx, uint32_t *missing_file_count, uint32_t *damaged_file_count, uint32_t *bad_file_count)
-{
-	int ret;
-	uint32_t num;
-	uint64_t current_size, file_offset, file_damage;
-	PAR3_FILE_CTX *file_p;
-
-	if (par3_ctx->input_file_count == 0)
-		return 0;
-
-	// Remove input files from extra files
-	if (par3_ctx->extra_file_name_len > 0){
-		char *list_name;
-		size_t len, off, list_len;
-
-		list_name = par3_ctx->extra_file_name;
-		list_len = par3_ctx->extra_file_name_len;
-		off = 0;
-		while (off < list_len){
-			//printf("extra file = \"%s\"\n", list_name + off);
-			len = strlen(list_name + off);
-
-			// check name in list, and ignore if exist
-			if (namez_search(par3_ctx->input_file_name, par3_ctx->input_file_name_len, list_name + off) != NULL){
-				//printf("extra file = \"%s\" is an input file.\n", list_name + off);
-
-				// remove from list of extra files
-				len += 1;	// add the last null string
-				memmove(list_name + off, list_name + off + len, list_len - off - len);
-				list_len -= len;
-
-			} else {	// goto next filename
-				off += len + 1;
-			}
-		}
-		par3_ctx->extra_file_name_len = list_len;
-
-		if (list_len == 0){	// When all extra files were par files
-			free(par3_ctx->extra_file_name);
-			par3_ctx->extra_file_name = NULL;
-			par3_ctx->extra_file_name_max = 0;
-		}
-
-		// Decrease memory for extra files.
-		if (par3_ctx->extra_file_name_len < par3_ctx->extra_file_name_max){
-			//printf("extra_file_name_len = %zu, extra_file_name_max = %zu\n", par3_ctx->extra_file_name_len, par3_ctx->extra_file_name_max);
-			list_name = realloc(par3_ctx->extra_file_name, par3_ctx->extra_file_name_len);
-			if (list_name == NULL){
-				perror("Failed to allocate memory for extra file");
-				return RET_MEMORY_ERROR;
-			}
-			par3_ctx->extra_file_name = list_name;
-			par3_ctx->extra_file_name_max = par3_ctx->extra_file_name_len;
-		}
-	}
-
-	// Table setup for slide window search
-	init_crc_slide_table(par3_ctx, 3);
-	ret = crc_list_make(par3_ctx);
-	if (ret != 0)
-		return ret;
-	if (par3_ctx->noise_level >= 2){
-		printf("Number of full size block = %"PRIu64", chunk tail = %"PRIu64"\n", par3_ctx->crc_count, par3_ctx->tail_count);
-/*
-		// for debug
-		for (uint64_t i = 0; i < par3_ctx->crc_count; i++){
-			printf("crc_list[%2"PRIu64"] = 0x%016"PRIx64" , block = %"PRIu64"\n", i, par3_ctx->crc_list[i].crc, par3_ctx->crc_list[i].index);
-		}
-		for (uint64_t i = 0; i < par3_ctx->tail_count; i++){
-			printf("tail_list[%2"PRIu64"] = 0x%016"PRIx64" , slice = %"PRIu64"\n", i, par3_ctx->tail_list[i].crc, par3_ctx->tail_list[i].index);
-		}
-*/
-	}
-
-	if (par3_ctx->noise_level >= 0){
-		printf("\nVerifying input files:\n\n");
-	}
-
-	// Allocate buffer to store file data temporary.
-	par3_ctx->work_buf = malloc(par3_ctx->block_size * 2);
-	if (par3_ctx->work_buf == NULL){
-		perror("Failed to allocate memory for temporary file data");
-		return RET_MEMORY_ERROR;
-	}
-
-	file_p = par3_ctx->input_file_list;
-	for (num = 0; num < par3_ctx->input_file_count; num++){
-		ret = check_file(par3_ctx, file_p->name, &current_size, file_p->offset);
-		//printf("check_file = 0x%x, size = %"PRIu64"\n", ret, current_size);
-		file_p->state |= ret;
-		if ( ((ret & 0xFFFF) == 0) && ( (file_p->size > 0) || (current_size > 0) ) ){
-			if (par3_ctx->noise_level >= 0){
-				printf("Opening: \"%s\"\n", file_p->name);
-			}
-			file_offset = 0;
-			ret = check_complete_file(par3_ctx, file_p->name, num, current_size, &file_offset);
-			//printf("ret = %d, size = %"PRIu64", offset = %"PRIu64"\n", ret, current_size, file_offset);
-			if (ret > 0)
-				return ret;	// error
-			if (ret == 0){
-				if (file_p->state & 0x7FFF0000){
-					*bad_file_count += 1;
-					if (par3_ctx->noise_level >= -1){
-						if ((file_p->state & 0x7FFF0000) == 0x10000){
-							printf("Target: \"%s\" - different timestamp.\n", file_p->name);
-						} else if ((file_p->state & 0x7FFF0000) == 0x20000){
-							printf("Target: \"%s\" - different permissions.\n", file_p->name);
-						} else {
-							printf("Target: \"%s\" - different property.\n", file_p->name);
-						}
-					}
-				} else {
-					// While file data is complete, file name may be different case on Windows PC.
-					// Because Windows OS is case insensitive, I ignore the case, too.
-					if (par3_ctx->noise_level >= -1){
-						if (file_p->state & 0x80000000){	// Completeness of unprotected chunks is unknown.
-							printf("Target: \"%s\" - protected data is complete.\n", file_p->name);
-						} else {
-							printf("Target: \"%s\" - complete.\n", file_p->name);
-						}
-					}
-				}
-			} else {
-				file_p->state |= 2;
-				*damaged_file_count += 1;
-
-				// Start slide search after the last found block position.
-				ret = check_damaged_file(par3_ctx, file_p->name, current_size, file_offset, &file_damage, NULL);
-				//printf("ret = %d, size = %"PRIu64", offset = %"PRIu64", damage = %"PRIu64"\n",
-				//		ret, current_size, file_offset, file_damage);
-				if (ret != 0)
-					return ret;
-				if (par3_ctx->noise_level >= -1){
-					printf("Target: \"%s\" - damaged. %"PRIu64" of %"PRIu64" bytes available.\n",
-							file_p->name, current_size - file_damage, current_size);
-				}
-			}
-
-		} else {
-			if (par3_ctx->noise_level >= -1){
-				printf("Target: \"%s\"", file_p->name);
-			}
-			if (ret == 0){
-				if (par3_ctx->noise_level >= -1){
-					printf(" - found.\n");
-				}
-			} else if (ret == 1){
-				*missing_file_count += 1;
-				if (par3_ctx->noise_level >= -1){
-					printf(" - missing.\n");
-				}
-			} else if (ret == 0x8000){
-				*missing_file_count += 1;
-				if (par3_ctx->noise_level >= -1){
-					printf(" - not file.\n");
-				}
-			} else {
-				if (par3_ctx->noise_level >= -1){
-					printf(" - unknown.\n");
-				}
-			}
-		}
-
-		file_p++;
-	}
-
-	return 0;
-}
-
-// Check extra files and misnamed files.
-int verify_extra_file(PAR3_CTX *par3_ctx, uint32_t *missing_file_count, uint32_t *damaged_file_count, uint32_t *misnamed_file_count)
-{
-	int ret, flag_show = 0;
-	char *list_name;
-	size_t len, off, list_len;
-	uint8_t buf_hash[16], *tmp_p;
-	uint32_t num, extra_id;
-	uint64_t current_size, file_damage;
-	PAR3_FILE_CTX *file_p;
-
-	if (par3_ctx->extra_file_name_len == 0)
-		return 0;
-
-	extra_id = 0;
-	list_name = par3_ctx->extra_file_name;
-	list_len = par3_ctx->extra_file_name_len;
-	off = 0;
-	while (off < list_len){
-		len = strlen(list_name + off);
-
-		if (par3_ctx->noise_level >= 0){
-			if (flag_show == 0){
-				flag_show++;
-				printf("\nScanning extra files:\n\n");
-			}
-
-			printf("Opening: \"%s\"\n", list_name + off);
-		}
-
-		// Get file size
-		ret = check_file(par3_ctx, list_name + off, &current_size, -1);
-		if ((ret & 0xFFFF) != 0){
-			if (par3_ctx->noise_level >= -1){
-				printf("Target: \"%s\" - unknown.\n", list_name + off);
-			}
-			extra_id++;
-			off += len + 1;	// goto next filename
-			break;
-		}
-
-		// Check possibility of misnamed file
-		tmp_p = NULL;
-		file_p = par3_ctx->input_file_list;
-		num = par3_ctx->input_file_count;
-		while (num > 0){
-			// No need to compare to compelete input files.
-			if (file_p->state & (1 | 2)){	// missing or damaged
-				if (file_p->size == current_size){
-					//printf("Calculate file hash to check misnamed file later.\n");
-					tmp_p = buf_hash;
-					break;
-				}
-			}
-
-			file_p++;
-			num--;
-		}
-
-		// Calculate file hash to find misnamed file later.
-		ret = check_damaged_file(par3_ctx, list_name + off, current_size, 0, &file_damage, tmp_p);
-		//printf("ret = %d, size = %"PRIu64", damage = %"PRIu64"\n", ret, current_size, file_damage);
-		if (ret != 0)
-			return ret;
-
-		if (tmp_p != NULL){	// Check misnamed file here
-/*
-// for debug
-printf("%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x\n",
-	buf_hash[0], buf_hash[1], buf_hash[2], buf_hash[3],
-	buf_hash[4], buf_hash[5], buf_hash[6], buf_hash[7],
-	buf_hash[8], buf_hash[9], buf_hash[10], buf_hash[11],
-	buf_hash[12], buf_hash[13], buf_hash[14], buf_hash[15]);
-*/
-
-			// Compare size and hash to find misnamed file.
-			file_p = par3_ctx->input_file_list;
-			num = par3_ctx->input_file_count;
-			while (num > 0){
-				// No need to compare to compelete input files.
-				if (file_p->state & (1 | 2)){	// missing or damaged
-					if (file_p->size == current_size){
-						if (memcmp(file_p->hash, buf_hash, 16) == 0){
-							*misnamed_file_count += 1;
-							if (file_p->state & 1){	// When this was missing file.
-								*missing_file_count -= 1;
-							} else if (file_p->state & 2){	// When this was damaged file.
-								*damaged_file_count -= 1;
-							}
-							file_p->state |= (extra_id << 3) | 4;
-
-							//printf("Extra file[%u] is misnamed file of \"%s\".\n", extra_id, file_p->name);
-							ret = 4;
-							break;
-						}
-					}
-				}
-
-				file_p++;
-				num--;
-			}
-		}
-
-		if (par3_ctx->noise_level >= -1){
-			if (ret & 4){
-				printf("Target: \"%s\" - is a match for \"%s\".\n", list_name + off, file_p->name);
-			} else {
-				printf("Target: \"%s\" - %"PRIu64" of %"PRIu64" bytes available.\n",
-						list_name + off, current_size - file_damage, current_size);
-			}
-		}
-
-		extra_id++;
-		off += len + 1;	// goto next filename
-	}
-
-	return 0;
-}
-
diff --git a/windows/src/verify.h b/windows/src/verify.h
deleted file mode 100644
index a502164..0000000
--- a/windows/src/verify.h
+++ /dev/null
@@ -1,15 +0,0 @@
-
-int check_directory(PAR3_CTX *par3_ctx, char *path, int64_t offset);
-void check_input_directory(PAR3_CTX *par3_ctx, uint32_t *missing_dir_count, uint32_t *bad_dir_count);
-
-int verify_input_file(PAR3_CTX *par3_ctx, uint32_t *missing_file_count, uint32_t *damaged_file_count, uint32_t *bad_file_count);
-int verify_extra_file(PAR3_CTX *par3_ctx, uint32_t *missing_file_count, uint32_t *damaged_file_count, uint32_t *misnamed_file_count);
-
-
-// Find available slices in an input file
-int check_complete_file(PAR3_CTX *par3_ctx, char *filename, uint32_t file_id,
-	uint64_t current_size, uint64_t *offset_next);
-
-int check_damaged_file(PAR3_CTX *par3_ctx, char *filename,
-	uint64_t file_size, uint64_t file_offset, uint64_t *file_damage, uint8_t *file_hash);
-
diff --git a/windows/src/verify_check.c b/windows/src/verify_check.c
deleted file mode 100644
index f18d232..0000000
--- a/windows/src/verify_check.c
+++ /dev/null
@@ -1,1013 +0,0 @@
-/* Redefinition of _FILE_OFFSET_BITS must happen BEFORE including stdio.h */
-#ifdef __linux__
-#define _FILE_OFFSET_BITS 64
-#define _fseeki64 fseeko
-#define _fileno fileno
-#elif _WIN32
-// avoid error of MSVC
-#define _CRT_SECURE_NO_WARNINGS
-#endif
-
-#include <errno.h>
-#include <inttypes.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <time.h>
-
-#ifdef __linux__
-#elif _WIN32
-// MSVC headers
-#include <io.h>
-#endif
-
-#include "blake3/blake3.h"
-#include "libpar3.h"
-#include "common.h"
-#include "hash.h"
-
-
-/*
-offset_next = File data is complete until here.
-When checking after repair, offset_next should be NULL.
-
-return 0 = complete, -1 = not enough data, -2 = too many data
- -3 = CRC of the first 16 KB is different, -4 = block data is different
- -5 = chunk tail is different, -6 = tiny chunk tail is different
- -7 = file hash is different
-*/
-int check_complete_file(PAR3_CTX *par3_ctx, char *filename, uint32_t file_id,
-	uint64_t current_size, uint64_t *offset_next)
-{
-	uint8_t *work_buf, buf_tail[40], buf_hash[16];
-	uint32_t chunk_index, chunk_num, flag_unknown;
-	int64_t block_index;
-	uint64_t block_size, slice_index;
-	uint64_t chunk_size, tail_size;
-	uint64_t file_size, file_offset;
-	uint64_t size16k, crc16k, crc;
-	PAR3_FILE_CTX *file_p;
-	PAR3_CHUNK_CTX *chunk_list;
-	PAR3_BLOCK_CTX *block_list;
-	PAR3_SLICE_CTX *slice_list;
-	FILE *fp;
-	blake3_hasher hasher;
-
-	if (filename == NULL){
-		printf("File name is bad.\n");
-		return RET_LOGIC_ERROR;
-	}
-	if (file_id >= par3_ctx->input_file_count){
-		printf("File ID is bad. %u\n", file_id);
-		return RET_LOGIC_ERROR;
-	}
-
-	file_p = par3_ctx->input_file_list + file_id;
-	file_size = file_p->size;
-	chunk_num = file_p->chunk_num;
-	if (offset_next != NULL){	// Don't show this after repair.
-		if (par3_ctx->noise_level >= 1){
-			printf("chunk count = %u, current file size = %"PRIu64", original size = %"PRIu64"\n", chunk_num, current_size, file_size);
-		}
-		if ( (file_p->state & 0x80000000) && (par3_ctx->noise_level >= 2) ){
-			chunk_list = par3_ctx->chunk_list;
-			block_index = 0;
-			chunk_index = file_p->chunk;
-			while (chunk_num > 0){
-				if (chunk_list[chunk_index].size == 0)
-					block_index++;
-				chunk_index++;
-				chunk_num--;
-			}
-			printf("Number of unprotected chunk = %"PRId64"\n", block_index);
-			chunk_num = file_p->chunk_num;
-		}
-	}
-	if ( (file_size == 0) && (current_size > 0) ){
-		// If original file size was 0, no need to check file data.
-		return -2;
-	}
-
-	// Copy variables from context to local.
-	block_size = par3_ctx->block_size;
-	work_buf = par3_ctx->work_buf;
-	chunk_list = par3_ctx->chunk_list;
-	block_list = par3_ctx->block_list;
-	slice_list = par3_ctx->slice_list;
-
-	fp = fopen(filename, "rb");
-	if (fp == NULL){
-		perror("Failed to open input file");
-		return RET_FILE_IO_ERROR;
-	}
-
-	if (offset_next == NULL){	// Check file size after repair
-		int file_no = _fileno(fp);
-		if (file_no >= 0){
-			current_size = _filelengthi64(file_no);
-			//printf("file_size = %"PRIu64", current_size = %"PRIu64"\n", file_size, current_size);
-			if (current_size < file_size){
-				fclose(fp);
-				return -1;
-			} else if (current_size > file_size){
-				fclose(fp);
-				return -2;
-			}
-		}
-	}
-
-	// Only when stored CRC-64 is valid, check the first 16 KB.
-	crc16k = 0;
-	if (file_p->state & 0x80000000){	// There is Unprotected Chunk Description. Such like "PAR inside".
-		size16k = 0;
-	} else if (file_size < 16384){
-		size16k = file_size;
-	} else {
-		size16k = 16384;
-	}
-
-	chunk_index = file_p->chunk;	// First chunk in this file
-	slice_index = file_p->slice;
-	chunk_size = chunk_list[chunk_index].size;
-	block_index = chunk_list[chunk_index].block;
-	if (par3_ctx->noise_level >= 3){
-		printf("first chunk = %u, size = %"PRIu64", block index = %"PRId64"\n", chunk_index, chunk_size, block_index);
-	}
-	blake3_hasher_init(&hasher);
-	flag_unknown = 0;
-
-	file_offset = 0;
-	while (chunk_size == 0){	// zeros if not protected
-		// Seek to end of unprotected chunk
-		if (_fseeki64(fp, block_index, SEEK_CUR) != 0){
-			perror("Failed to seek Outside file");
-			fclose(fp);
-			return RET_FILE_IO_ERROR;
-		}
-		file_offset += block_index;
-		chunk_num--;
-		if (chunk_num > 0){
-			chunk_index++;
-			chunk_size = chunk_list[chunk_index].size;
-			block_index = chunk_list[chunk_index].block;
-			if (par3_ctx->noise_level >= 3){
-				printf("next chunk = %u, size = %"PRIu64", block index = %"PRId64"\n", chunk_index, chunk_size, block_index);
-			}
-		}
-	}
-	while ( (file_offset < current_size) && (file_offset < file_size) ){
-		if (chunk_size > 0){	// read chunk data
-			if (chunk_size >= block_size){
-				if (file_offset + block_size > current_size){	// Not enough data
-					fclose(fp);
-					return -1;
-				}
-				if ( (file_offset == 0) && (size16k > 0) && (size16k < block_size) ){
-					// When block size is larger than 16 KB, check the first 16 KB at first.
-					if (fread(work_buf, 1, (size_t)size16k, fp) != size16k){
-						perror("Failed to read the first 16 KB of input file");
-						fclose(fp);
-						return RET_FILE_IO_ERROR;
-					}
-					crc16k = crc64(work_buf, (size_t)size16k, 0);
-					if (crc16k != file_p->crc){
-						fclose(fp);
-						return -3;
-					}
-					if (fread(work_buf + size16k, 1, (size_t)(block_size - size16k), fp) != block_size - size16k){
-						perror("Failed to read the first block of input file");
-						fclose(fp);
-						return RET_FILE_IO_ERROR;
-					}
-					size16k = 0;
-
-				} else {
-					if (fread(work_buf, 1, (size_t)block_size, fp) != block_size){
-						perror("Failed to read a block on input file");
-						fclose(fp);
-						return RET_FILE_IO_ERROR;
-					}
-				}
-
-				// Check CRC-64 of the first 16 KB
-				if (size16k > 0){
-					if (size16k <= block_size){
-						crc16k = crc64(work_buf, (size_t)size16k, crc16k);
-						size16k = 0;
-						if (crc16k != file_p->crc){
-							fclose(fp);
-							return -3;
-						}
-					} else {	// need more data
-						crc16k = crc64(work_buf, (size_t)block_size, crc16k);
-						size16k -= block_size;
-					}
-				}
-
-				// Comparison is possible, only when checksum exists.
-				if (block_list[block_index].state & 64){
-
-					// Check CRC-64 at first
-					crc = crc64(work_buf, (size_t)block_size, 0);
-					//printf("crc = 0x%016"PRIx64", 0x%016"PRIx64"\n", crc, block_list[block_index].crc);
-					if (crc == block_list[block_index].crc){
-						blake3(work_buf, (size_t)block_size, buf_hash);
-						if (memcmp(buf_hash, block_list[block_index].hash, 16) == 0){
-							if (par3_ctx->noise_level >= 3){
-								printf("full block[%2"PRId64"] : slice[%2"PRIu64"] chunk[%2u] file %d, offset = %"PRIu64"\n",
-										block_index, slice_index, chunk_index, file_id, file_offset);
-							}
-							slice_list[slice_index].find_name = file_p->name;
-							slice_list[slice_index].find_offset = file_offset;
-							block_list[block_index].state |= 4;
-						} else {	// BLAKE3 hash is different.
-							block_index = -1;
-						}
-					} else {	// CRC-64 is different.
-						block_index = -1;
-					}
-					if (block_index == -1){	// Block data is different.
-						fclose(fp);
-						return -4;
-					}
-
-				} else {	// This block's checksum is unknown.
-					// When current file size is smaller than original size, it's impossible to check file's hash value.
-					if (current_size < file_size){
-						fclose(fp);
-						return -1;
-					}
-
-					// set this checksum temporary
-					block_list[block_index].crc = crc64(work_buf, (size_t)block_size, 0);
-					blake3(work_buf, (size_t)block_size, block_list[block_index].hash);
-
-					flag_unknown = 1;	// sign of unknown checksum
-				}
-
-				blake3_hasher_update(&hasher, work_buf, (size_t)block_size);
-				block_index++;
-				slice_index++;
-				chunk_size -= block_size;
-				file_offset += block_size;
-				if ( (flag_unknown == 0) && (offset_next != NULL) )
-					*offset_next = file_offset;
-
-			} else if (chunk_size >= 40){
-				tail_size = chunk_size;
-				if (file_offset + tail_size > current_size){	// Not enough data
-					fclose(fp);
-					return -1;
-				}
-				if ( (file_offset == 0) && (size16k > 0) && (size16k < tail_size) ){
-					// When block size is larger than 16 KB, check the first 16 KB at first.
-					if (fread(work_buf, 1, (size_t)size16k, fp) != size16k){
-						perror("Failed to read the first 16 KB of input file");
-						fclose(fp);
-						return RET_FILE_IO_ERROR;
-					}
-					crc16k = crc64(work_buf, (size_t)size16k, 0);
-					if (crc16k != file_p->crc){
-						fclose(fp);
-						return -3;
-					}
-					if (fread(work_buf + size16k, 1, (size_t)(tail_size - size16k), fp) != tail_size - size16k){
-						perror("Failed to read the first tail of input file");
-						fclose(fp);
-						return RET_FILE_IO_ERROR;
-					}
-					size16k = 0;
-
-				} else {
-					if (fread(work_buf, 1, (size_t)tail_size, fp) != tail_size){
-						perror("Failed to read a tail on input file");
-						fclose(fp);
-						return RET_FILE_IO_ERROR;
-					}
-				}
-
-				// Check CRC-64 of the first 16 KB
-				if (size16k > 0){
-					if (size16k <= tail_size){
-						crc16k = crc64(work_buf, (size_t)size16k, crc16k);
-						size16k = 0;
-						if (crc16k != file_p->crc){
-							fclose(fp);
-							return -3;
-						}
-					} else {	// need more data
-						crc16k = crc64(work_buf, (size_t)tail_size, crc16k);
-						size16k -= tail_size;
-					}
-				}
-
-				// Check CRC-64 at first
-				block_index = chunk_list[chunk_index].tail_block;
-				crc = crc64(work_buf, 40, 0);	// Check the first 40-bytes only.
-				if (crc == chunk_list[chunk_index].tail_crc){
-					blake3(work_buf, (size_t)tail_size, buf_hash);
-					if (memcmp(buf_hash, chunk_list[chunk_index].tail_hash, 16) == 0){
-						if (par3_ctx->noise_level >= 3){
-							printf("tail block[%2"PRId64"] : slice[%2"PRIu64"] chunk[%2u] file %d, offset = %"PRIu64", size = %"PRIu64"\n",
-									block_index, slice_index, chunk_index, file_id, file_offset, tail_size);
-						}
-						slice_list[slice_index].find_name = file_p->name;
-						slice_list[slice_index].find_offset = file_offset;
-						block_list[block_index].state |= 8;
-					} else {	// BLAKE3 hash is different.
-						block_index = -1;
-					}
-				} else {	// CRC-64 is different.
-					block_index = -1;
-				}
-
-				if (block_index == -1){	// Tail data is different.
-					fclose(fp);
-					return -5;
-				}
-				blake3_hasher_update(&hasher, work_buf, (size_t)tail_size);
-				slice_index++;
-				chunk_size -= tail_size;
-				file_offset += tail_size;
-				if ( (flag_unknown == 0) && (offset_next != NULL) )
-					*offset_next = file_offset;
-
-			} else if (chunk_size > 0){	// 1 ~ 39 bytes
-				tail_size = chunk_size;
-				if (file_offset + tail_size > current_size){	// Not enough data
-					fclose(fp);
-					return -1;
-				}
-				if (fread(work_buf, 1, (size_t)tail_size, fp) != tail_size){
-					perror("Failed to read tail data on input file");
-					fclose(fp);
-					return RET_FILE_IO_ERROR;
-				}
-
-				// Check CRC-64 of the first 16 KB
-				if (size16k > 0){
-					if (size16k <= tail_size){
-						crc16k = crc64(work_buf, (size_t)size16k, crc16k);
-						size16k = 0;
-						if (crc16k != file_p->crc){
-							fclose(fp);
-							return -3;
-						}
-					} else {	// need more data
-						crc16k = crc64(work_buf, (size_t)tail_size, crc16k);
-						size16k -= tail_size;
-					}
-				}
-
-				// copy tail bytes
-				memcpy(buf_tail, &(chunk_list[chunk_index].tail_crc), 8);
-				memcpy(buf_tail + 8, chunk_list[chunk_index].tail_hash, 16);
-				memcpy(buf_tail + 24, &(chunk_list[chunk_index].tail_block), 8);
-				memcpy(buf_tail + 32, &(chunk_list[chunk_index].tail_offset), 8);
-
-				// Compare bytes directly.
-				if (memcmp(work_buf, buf_tail, (size_t)tail_size) == 0){
-					if (par3_ctx->noise_level >= 3){
-						printf("tail block no  : slice no  chunk[%2u] file %d, offset = %"PRIu64", size = %"PRIu64"\n",
-								chunk_index, file_id, file_offset, tail_size);
-					}
-					// Tiny chunk tail isn't counted as searching slice.
-				} else {	// Tail data is different.
-					fclose(fp);
-					return -6;
-				}
-
-				blake3_hasher_update(&hasher, work_buf, (size_t)tail_size);
-				chunk_size -= tail_size;
-				file_offset += tail_size;
-				if ( (flag_unknown == 0) && (offset_next != NULL) )
-					*offset_next = file_offset;
-			}
-
-		} else {	// goto next chunk
-			chunk_num--;
-			if (chunk_num == 0)
-				break;	// When there is no chunk description anymore, exit from loop.
-			chunk_index++;
-
-			chunk_size = chunk_list[chunk_index].size;
-			block_index = chunk_list[chunk_index].block;
-			if (par3_ctx->noise_level >= 3){
-				printf("next chunk = %u, size = %"PRIu64", block index = %"PRId64"\n", chunk_index, chunk_size, block_index);
-			}
-			if (chunk_size == 0){	// zeros if not protected
-				// Seek to end of unprotected chunk
-				if (_fseeki64(fp, block_index, SEEK_CUR) != 0){
-					perror("Failed to seek Outside file");
-					fclose(fp);
-					return RET_FILE_IO_ERROR;
-				}
-				file_offset += block_index;
-			}
-		}
-	}
-
-	if (fclose(fp) != 0){
-		perror("Failed to close input file");
-		return RET_FILE_IO_ERROR;
-	}
-
-	if (file_offset < file_size){
-		// File size is smaller than original size.
-		return -1;
-	}
-
-	// Check file's hash at the last.
-	blake3_hasher_finalize(&hasher, buf_hash, 16);
-	if (memcmp(buf_hash, file_p->hash, 16) != 0){
-		if (mem_or16(file_p->hash) != 0){	// Ignore case of zero bytes, as it was not calculated.
-			// File hash is different.
-			return -7;
-		}
-
-	} else if (flag_unknown != 0){
-		// Even when checksum is unknown, file data is complete.
-		if (offset_next != NULL)
-			*offset_next = file_size;
-		file_offset = 0;
-
-		// Set block info
-		chunk_index = file_p->chunk;
-		chunk_num = file_p->chunk_num;
-		slice_index = file_p->slice;
-		while (chunk_num > 0){
-			chunk_size = chunk_list[chunk_index].size;
-			block_index = chunk_list[chunk_index].block;
-
-			if (chunk_size == 0){	// Unprotected Chunk Description
-				file_offset += block_index;
-
-			} else {	// Protected Chunk Description
-				// Check all blocks in the chunk
-				while (chunk_size >= block_size){
-					if (slice_list[slice_index].find_name == NULL){	// When this slice was not found.
-						if (par3_ctx->noise_level >= 3){
-							printf("full block[%2"PRId64"] : slice[%2"PRIu64"] chunk[%2u] file %d, offset = %"PRIu64", no checksum\n",
-									block_index, slice_index, chunk_index, file_id, file_offset);
-						}
-						slice_list[slice_index].find_name = file_p->name;
-						slice_list[slice_index].find_offset = file_offset;	// Set slice at ordinary position.
-
-						if ((block_list[block_index].state & 64) == 0){	// There was no checksum for this block.
-							block_list[block_index].state |= (4 | 64);	// Found block and calculated its checksum.
-
-							// It's possible to use this checksum for later search.
-							crc_list_replace(par3_ctx, block_list[block_index].crc, block_index);
-						}
-					}
-
-					slice_index++;
-					block_index++;
-					file_offset += block_size;
-					chunk_size -= block_size;
-				}
-				if (chunk_size >= 40)
-					slice_index++;
-				file_offset += chunk_size;
-			}
-
-			chunk_index++;	// goto next chunk
-			chunk_num--;
-		}
-	}
-
-	// At last, file data of original size is complete.
-	if (file_offset > file_size){
-		// But, file size is larger than original size.
-		return -2;
-	}
-
-	return 0;
-}
-
-#define CHECK_SLIDE_INTERVAL 8
-#define CHECK_SLIDE_RANGE 10
-
-// This checks available slices in the file.
-// This uses pointer of filename, instead of file ID.
-int check_damaged_file(PAR3_CTX *par3_ctx, char *filename,
-	uint64_t file_size, uint64_t file_offset, uint64_t *file_damage, uint8_t *file_hash)
-{
-	uint8_t *work_buf, buf_hash[16], buf_hash2[16];
-	int flag_slide, hash_counter;
-	int64_t find_index, block_index, slice_index;
-	int64_t crc_count, tail_count;
-	int64_t next_offset, next_slice, slice_count;
-	uint64_t block_size, read_size, slide_offset, slide_start;
-	uint64_t crc, crc40, tail_size, temp_crc;
-	uint64_t uniform_start, uniform_end, hash_offset;
-	uint64_t window_mask, *window_table, window_mask40, *window_table40;
-	uint64_t damage_size, find_last, find_min, find_max;
-	PAR3_BLOCK_CTX *block_list;
-	PAR3_SLICE_CTX *slice_list;
-	PAR3_CHUNK_CTX *chunk_list;
-	PAR3_CMP_CTX *crc_list, *tail_list;
-	FILE *fp;
-	clock_t time_slide, time_limit;
-	blake3_hasher hasher;
-
-	if (filename == NULL){
-		printf("File name is bad.\n");
-		return RET_LOGIC_ERROR;
-	}
-	if (file_offset >= file_size){
-		if (file_damage != NULL)
-			*file_damage = 0;
-		return 0;
-	}
-	if (par3_ctx->noise_level >= 1){
-		printf("current file size = %"PRIu64", start = %"PRIu64", \"%s\"\n", file_size, file_offset, filename);
-	}
-
-	// File data till file_offset is available.
-	damage_size = 0;
-	find_last = find_max = file_offset;
-
-	// Copy variables from context to local.
-	block_size = par3_ctx->block_size;
-	slice_count = par3_ctx->slice_count;
-	work_buf = par3_ctx->work_buf;
-	block_list = par3_ctx->block_list;
-	slice_list = par3_ctx->slice_list;
-	chunk_list = par3_ctx->chunk_list;
-
-	// Set time limit for slide search
-	if (par3_ctx->search_limit != 0){
-		time_limit = par3_ctx->search_limit;
-	} else {
-		time_limit = 100;	// Time out is 100 ms by default.
-	}
-
-	// Prepare to search blocks.
-	window_mask = par3_ctx->window_mask;
-	window_table = par3_ctx->window_table;
-	window_mask40 = par3_ctx->window_mask40;
-	window_table40 = par3_ctx->window_table40;
-
-	// Copy CRC-list for local usage.
-	crc_count = par3_ctx->crc_count;
-	crc_list = par3_ctx->crc_list + crc_count; // Allocated memory for CRC-list was double size.
-	memcpy(crc_list, par3_ctx->crc_list, sizeof(PAR3_CMP_CTX) * crc_count);
-	tail_count = par3_ctx->tail_count;
-	tail_list = par3_ctx->tail_list + tail_count;
-	memcpy(tail_list, par3_ctx->tail_list, sizeof(PAR3_CMP_CTX) * tail_count);
-	// It's possible to remove items from the list, when a slice was found in this file.
-
-	fp = fopen(filename, "rb");
-	if (fp == NULL){
-		perror("Failed to open input file");
-		return RET_FILE_IO_ERROR;
-	}
-
-	// Move file pinter
-	if (file_offset > 0){
-		if (_fseeki64(fp, file_offset, SEEK_SET) != 0){
-			perror("Failed to seek input file");
-			fclose(fp);
-			return RET_FILE_IO_ERROR;
-		}
-	}
-
-	// Read two blocks at first.
-	read_size = block_size * 2;
-	if (read_size > file_size - file_offset)
-		read_size = file_size - file_offset;
-	if (fread(work_buf, 1, (size_t)read_size, fp) != read_size){
-		perror("Failed to read first blocks on input file");
-		fclose(fp);
-		return RET_FILE_IO_ERROR;
-	}
-	//printf("file_offset = %"PRIu64", read_size = %"PRIu64"\n", file_offset, read_size);
-	if (file_hash != NULL){
-		blake3_hasher_init(&hasher);
-		blake3_hasher_update(&hasher, work_buf, (size_t)read_size);
-	}
-
-	// Calculate CRC-64 of the first block.
-	if ( (crc_count > 0) && (read_size >= block_size) )
-		crc = crc64(work_buf, block_size, 0);
-	if ( (tail_count > 0) && (read_size >= 40) )
-		crc40 = crc64(work_buf, 40, 0);
-	//printf("block crc = 0x%016"PRIx64", tail crc = 0x%016"PRIx64"\n", crc, crc40);
-
-	next_offset = -1;
-	while (file_offset < file_size){
-		// Prepare to check range of found slices
-		find_min = file_size;
-
-		// Prepare to check range of uniform bytes.
-		uniform_start = 0xFFFFFFFFFFFFFFFF;
-		uniform_end = 0;
-
-		// Check predicted slice in orderly position at first.
-		flag_slide = 0;
-		slide_start = 0;
-		if (next_offset >= 0){
-			slide_offset = next_offset;
-			slice_index = next_slice;
-			next_offset = -1;	// Copied values and reset
-			tail_size = slice_list[slice_index].size;
-			//printf("Check at first, offset = %"PRId64", slice = %"PRId64", size = %"PRIu64"\n", slide_offset, slice_index, tail_size);
-			if (tail_size == block_size){	// Full size slice
-				block_index = slice_list[slice_index].block;	// index of block
-				temp_crc = crc64(work_buf + slide_offset, block_size, 0);
-				if (temp_crc == block_list[block_index].crc){
-					blake3(work_buf + slide_offset, block_size, buf_hash);
-					if (memcmp(buf_hash, block_list[block_index].hash, 16) == 0){
-						if (par3_ctx->noise_level >= 3){
-							printf("p fu block[%2"PRId64"] : slice[%2"PRId64"] offset = %"PRIu64" + %"PRIu64"\n",
-									block_index, slice_index, file_offset, slide_offset);
-						}
-						if ((block_list[block_index].state & 4) == 0){	// When this block was not found yet.
-							// Store filename & position of this slice for later reading.
-							slice_list[slice_index].find_name = filename;
-							slice_list[slice_index].find_offset = file_offset + slide_offset;
-							block_list[block_index].state |= 4;
-						}
-						if (find_min > file_offset + slide_offset)
-							find_min = file_offset + slide_offset;
-						if (find_max < file_offset + slide_offset + block_size)
-							find_max = file_offset + slide_offset + block_size;
-
-						// When CRC and BLAKE3 match, remove this item from crc_list.
-						find_index = cmp_list_search_index(par3_ctx, temp_crc, block_index, crc_list, crc_count);
-						if (find_index >= 0){
-							if (find_index + 1 < crc_count)
-								memmove(crc_list + find_index, crc_list + find_index + 1, sizeof(PAR3_CMP_CTX) * (crc_count - find_index - 1));
-							crc_count--;
-							//printf("Remove item[%"PRId64"] : block[%"PRIu64"] from crc_list. crc_count = %"PRIu64"\n", find_index, block_index, crc_count);
-						}
-
-						// When predicted slice was found, cancel slide search.
-						if (slice_list[slice_index].next == -1){	// There is only one slice for the found block.
-							if (slice_index + 1 < slice_count){	// There is next slice
-								if (slice_list[slice_index + 1].chunk == slice_list[slice_index].chunk){	// Belong to same chunk
-									next_offset = slide_offset;
-									next_slice = slice_index + 1;
-									flag_slide |= 7;	// Cancel slide and calculate CRC-64 after reading next block.
-								}
-							}
-						}
-					}
-				}
-
-			} else {	// Chunk tail slice
-				temp_crc = crc64(work_buf + slide_offset, 40, 0);
-				if (temp_crc == chunk_list[slice_list[slice_index].chunk].tail_crc){
-					blake3(work_buf + slide_offset, tail_size, buf_hash);
-					if (memcmp(buf_hash, chunk_list[slice_list[slice_index].chunk].tail_hash, 16) == 0){
-						block_index = slice_list[slice_index].block;	// index of block
-						if (par3_ctx->noise_level >= 3){
-							printf("p ta block[%2"PRId64"] : slice[%2"PRId64"] offset = %"PRIu64" + %"PRIu64", tail size = %"PRIu64", offset = %"PRIu64"\n",
-									block_index, slice_index, file_offset, slide_offset, tail_size, slice_list[slice_index].tail_offset);
-						}
-						if (slice_list[slice_index].find_name == NULL){	// When this slice was not found yet.
-							// Store filename & position of this slice for later reading.
-							slice_list[slice_index].find_name = filename;
-							slice_list[slice_index].find_offset = file_offset + slide_offset;
-							block_list[block_index].state |= 8;
-						}
-						if (find_min > file_offset + slide_offset)
-							find_min = file_offset + slide_offset;
-						if (find_max < file_offset + slide_offset + tail_size)
-							find_max = file_offset + slide_offset + tail_size;
-
-						// When CRC and BLAKE3 match, remove this item from tail_list.
-						find_index = cmp_list_search_index(par3_ctx, temp_crc, slice_index, tail_list, tail_count);
-						if (find_index >= 0){
-							if (find_index + 1 < tail_count)
-								memmove(tail_list + find_index, tail_list + find_index + 1, sizeof(PAR3_CMP_CTX) * (tail_count - find_index - 1));
-							tail_count--;
-							//printf("Remove item[%"PRId64"] : block[%"PRIu64"] from tail_list. tail_count = %"PRIu64"\n", find_index, block_index, tail_count);
-						}
-
-						// Even when predicted slice was found, continue slide search.
-						slide_start = slide_offset + tail_size;	// Set starting offset to the last of chunk tail.
-						if (slide_start >= block_size){	// When slide will be canceled.
-							flag_slide |= 7;	// Cancel slide and calculate CRC-64 after reading next block.
-						} else {
-							// Calculate CRC-64 to slide later.
-							if ( (crc_count > 0) && (file_offset + slide_start + block_size <= file_size) ){
-								//printf("Calculate CRC-64 of next block.\n");
-								crc = crc64(work_buf + slide_start, block_size, 0);
-							}
-							if ( (tail_count > 0) && (file_offset + slide_start + 40 <= file_size) ){
-								//printf("Calculate CRC-64 of next tail.\n");
-								crc40 = crc64(work_buf + slide_start, 40, 0);
-							}
-						}
-					}
-				}
-			}
-		} else {
-			next_offset = -1;
-		}
-
-		// Compare current CRC-64 with full size blocks.
-		if ( ((flag_slide & 4) == 0) && (crc_count > 0) && (file_offset + slide_start + block_size <= file_size) ){
-			//printf("slide: offset = %"PRIu64" + %"PRIu64", block crc = 0x%016"PRIx64"\n", file_offset, slide_start, crc);
-			hash_counter = 0;
-			hash_offset = 0;
-			time_slide = clock();	// Store starting time of slide search.
-			slide_offset = slide_start;
-			while ( (slide_offset < block_size) && (file_offset + slide_offset + block_size <= file_size) ){
-				tail_size = 0;
-				// find_index is the first index of the matching CRC-64. There may be multiple items.
-				find_index = cmp_list_search(par3_ctx, crc, crc_list, crc_count);
-				while (find_index >= 0){	// When CRC-64 is same.
-					block_index = crc_list[find_index].index;	// index of block
-					if (tail_size == 0){	// When it didn't hash the block data yet.
-						tail_size++;
-						blake3(work_buf + slide_offset, block_size, buf_hash);
-
-						// Count number of hashing.
-						if (hash_counter == 0)
-							hash_offset = slide_offset;
-						hash_counter++;
-						//printf("block[%"PRIu64"], hashing = %d, offset = %"PRIu64" + %"PRIu64".\n", block_index, hash_counter, file_offset, slide_offset);
-					}
-					if (memcmp(buf_hash, block_list[block_index].hash, 16) == 0){
-						slice_index = block_list[block_index].slice;
-						if (par3_ctx->noise_level >= 3){
-							printf("full block[%2"PRId64"] : slice[%2"PRId64"] offset = %"PRIu64" + %"PRIu64"\n",
-									block_index, slice_index, file_offset, slide_offset);
-						}
-						if ((block_list[block_index].state & 4) == 0){	// When this block was not found yet.
-							// Store filename & position of this slice for later reading.
-							slice_list[slice_index].find_name = filename;
-							slice_list[slice_index].find_offset = file_offset + slide_offset;
-							block_list[block_index].state |= 4;
-						}
-						if (find_min > file_offset + slide_offset)
-							find_min = file_offset + slide_offset;
-						if (find_max < file_offset + slide_offset + block_size)
-							find_max = file_offset + slide_offset + block_size;
-
-						// Store offset of found block to check at first in next loop.
-						if (next_offset == -1){
-							next_offset = slide_offset;
-							next_slice = slice_index;
-						} else {
-							next_offset = -2;
-						}
-
-						// When CRC and BLAKE3 match, remove this item from crc_list.
-						if (find_index + 1 < crc_count)
-							memmove(crc_list + find_index, crc_list + find_index + 1, sizeof(PAR3_CMP_CTX) * (crc_count - find_index - 1));
-						crc_count--;
-						// The same block won't be found in this file anymore.
-						// It may be found in another damaged or extra file.
-						//printf("Remove item[%"PRId64"] : block[%"PRIu64"] from crc_list. crc_count = %"PRIu64"\n", find_index, block_index, crc_count);
-						// If the block was found in another file already, find and remove the item again.
-
-					} else {	// Goto next item
-						find_index++;
-					}
-
-					if (find_index == crc_count)
-						break;
-					if (crc_list[find_index].crc != crc)
-						break;
-				}
-
-				temp_crc = crc;	// Save previous CRC-64 to compare later
-				crc = window_mask ^ crc_slide_byte(window_mask ^ crc,
-						work_buf[slide_offset + block_size], work_buf[slide_offset], window_table);
-				slide_offset++;
-
-				if (hash_counter >= CHECK_SLIDE_INTERVAL){	// Check freeze after sliding several bytes each.
-					// When hashing over than 8 times per 1 KB range.
-					if (slide_offset - hash_offset <= ((uint64_t)CHECK_SLIDE_INTERVAL << CHECK_SLIDE_RANGE)){
-						// When sliding over than time limit.
-						if (clock() - time_slide >= time_limit){
-							if (par3_ctx->noise_level >= 1){
-								printf("Interrupt slide block by time out. offset = %"PRIu64" + %"PRIu64".\n", file_offset, slide_offset);
-							}
-							flag_slide |= 1;
-							break;
-						}
-					}
-					hash_counter = 0;
-					hash_offset = slide_offset;
-				}
-				if (crc == temp_crc){	// When CRC-64 is same after sliding 1 byte.
-					if (tail_size == 0)
-						blake3(work_buf + slide_offset - 1, block_size, buf_hash);
-					blake3(work_buf + slide_offset, block_size, buf_hash2);
-					if (memcmp(buf_hash, buf_hash2, 16) == 0){	// If BLAKE3 hash is same also, the data is uniform.
-						uniform_start = slide_offset - 1;
-						while ( (slide_offset < block_size) && (file_offset + slide_offset + block_size <= file_size)
-								&& (crc == temp_crc) ){	// Skip the area of uniform data.
-							crc = window_mask ^ crc_slide_byte(window_mask ^ crc,
-									work_buf[slide_offset + block_size], work_buf[slide_offset], window_table);
-							slide_offset++;
-						}
-						uniform_end = slide_offset;	// Offset of the last byte of uniform data
-						if (par3_ctx->noise_level >= 3){
-							printf("Because data is uniform, skip from %"PRIu64" to %"PRIu64".\n", uniform_start, slide_offset);
-						}
-					}
-				}
-			}
-
-			// When one block was found while sliding search.
-			if (next_offset >= 0){
-				if (slice_list[next_slice].next == -1){	// There is only one slice for the found block.
-					slice_index = next_slice + 1;
-					if (slice_index < slice_count){	// There is next slice
-						if (slice_list[next_slice].chunk == slice_list[slice_index].chunk){	// Belong to same chunk
-							next_slice = slice_index;
-						} else {
-							next_offset = -2;
-						}
-					} else {
-						next_offset = -2;
-					}
-				} else {
-					next_offset = -2;
-				}
-			}
-		}
-
-		// Compare current CRC-64 with chunk tails.
-		if ( ((flag_slide & 4) == 0) && (tail_count > 0) && (file_offset + slide_start + 40 <= file_size) ){
-			//printf("slide: offset = %"PRIu64" + %"PRIu64", tail crc = 0x%016"PRIx64"\n", file_offset, slide_start, crc40);
-			hash_counter = 0;
-			hash_offset = 0;
-			time_slide = clock();	// Store starting time of slide search.
-			slide_offset = slide_start;
-			while ( (slide_offset < block_size) && (file_offset + slide_offset + 40 <= file_size) ){
-				// Because CRC-64 for chunk tails is a range of the first 40-bytes, total data may be different.
-				tail_size = 0;
-				// find_index is the first index of the matching CRC-64. There may be multiple items.
-				find_index = cmp_list_search(par3_ctx, crc40, tail_list, tail_count);
-				while (find_index >= 0){	// When CRC-64 is same.
-					slice_index = tail_list[find_index].index;	// index of slice
-					if (tail_size != slice_list[slice_index].size){
-						tail_size = slice_list[slice_index].size;
-
-						if ( (uniform_end > 0) && (slide_offset + tail_size < uniform_end + block_size) ){
-							// Don't compare hash value, while uniform data.
-							//printf("Skip slice[%"PRIu64"] in uniform data. offset = %"PRIu64" + %"PRIu64".\n", slice_index, file_offset, slide_offset);
-							memset(buf_hash, 0, 16);
-
-						} else if (file_offset + slide_offset + tail_size <= file_size){
-							blake3(work_buf + slide_offset, tail_size, buf_hash);
-
-							// Count number of hashing.
-							if (hash_counter == 0)
-								hash_offset = slide_offset;
-							hash_counter++;
-							//printf("slice[%"PRIu64"], hashing = %d, offset = %"PRIu64" + %"PRIu64".\n", slice_index, hash_counter, file_offset, slide_offset);
-
-						} else {
-							// When chunk tail exceeds file data, hash value becomes zero.
-							memset(buf_hash, 0, 16);
-						}
-					}
-					if (memcmp(buf_hash, chunk_list[slice_list[slice_index].chunk].tail_hash, 16) == 0){
-						// When a chunk tail was found while slide search.
-						block_index = slice_list[slice_index].block;
-						if (par3_ctx->noise_level >= 3){
-							printf("tail block[%2"PRIu64"] : slice[%2"PRId64"] offset = %"PRIu64" + %"PRIu64", tail size = %"PRIu64", offset = %"PRIu64"\n",
-									block_index, slice_index, file_offset, slide_offset, tail_size, slice_list[slice_index].tail_offset);
-						}
-						if (slice_list[slice_index].find_name == NULL){	// When this slice was not found yet.
-							// Store filename & position of this slice for later reading.
-							slice_list[slice_index].find_name = filename;
-							slice_list[slice_index].find_offset = file_offset + slide_offset;
-							block_list[block_index].state |= 8;
-						}
-						if (find_min > file_offset + slide_offset)
-							find_min = file_offset + slide_offset;
-						if (find_max < file_offset + slide_offset + tail_size)
-							find_max = file_offset + slide_offset + tail_size;
-
-						// When only one full size slice was found
-						if (next_offset >= 0){
-							if (slide_offset + tail_size > (uint64_t)next_offset)	// Check overlap of found slices
-								next_offset = -2;
-						}
-
-						// When CRC and BLAKE3 match, remove this item from tail_list.
-						if (find_index + 1 < tail_count)
-							memmove(tail_list + find_index, tail_list + find_index + 1, sizeof(PAR3_CMP_CTX) * (tail_count - find_index - 1));
-						tail_count--;
-						//printf("Remove item[%"PRId64"] : block[%"PRIu64"] from tail_list. tail_count = %"PRIu64"\n", find_index, block_index, tail_count);
-
-					} else {	// Goto next item
-						find_index++;
-					}
-
-					if (find_index == tail_count)
-						break;
-					if (tail_list[find_index].crc != crc40)
-						break;
-				}
-
-				temp_crc = crc40;	// Save previous CRC-64 to compare later
-				crc40 = window_mask40 ^ crc_slide_byte(window_mask40 ^ crc40,
-						work_buf[slide_offset + 40], work_buf[slide_offset], window_table40);
-				slide_offset++;
-
-				if (hash_counter >= CHECK_SLIDE_INTERVAL){	// Check freeze after sliding several bytes each.
-					// When hashing over than 8 times in 8 KB range. (average >= 1 time / 1 KB)
-					if (slide_offset - hash_offset <= ((uint64_t)CHECK_SLIDE_INTERVAL << CHECK_SLIDE_RANGE)){
-						// When sliding over than time limit.
-						if (clock() - time_slide >= time_limit){
-							if (par3_ctx->noise_level >= 1){
-								printf("Interrupt slide tail by time out. offset = %"PRIu64" + %"PRIu64".\n", file_offset, slide_offset);
-							}
-							flag_slide |= 2;
-							break;
-						}
-					}
-					hash_counter = 0;
-					hash_offset = slide_offset;
-				}
-				if (crc40 == temp_crc){	// When CRC-64 is same after sliding 1 byte.
-					// When offset is inside of uniform data.
-					if ( (slide_offset >= uniform_start) && (slide_offset < uniform_end) ){
-						// Skip the area of uniform data.
-						slide_offset = uniform_end;
-						if (par3_ctx->noise_level >= 3){
-							printf("While data is uniform, skip from %"PRIu64" to %"PRIu64".\n", uniform_start, uniform_end);
-						}
-						// No need to re-calculate CRC-64 after skip, because the value is same for uniform data.
-						// Chunk tail is smaller than block size always.
-					}
-				}
-			}
-		}
-		//printf("block crc = 0x%016"PRIx64", tail crc = 0x%016"PRIx64"\n", crc, crc40);
-
-		// Check range of found slices
-		if ( (find_min < file_size) && (find_min > find_last) )
-			damage_size += find_min - find_last;
-		find_last = find_max;
-		//printf("file_offset = %"PRIu64", find_min = %"PRIu64", find_max %"PRIu64", damage_size = %"PRIu64"\n",
-		//		file_offset, find_min, find_max, damage_size);
-
-		// Read next block on second position.
-		file_offset += block_size;
-		if (file_offset >= file_size){
-			//printf("file_offset = %"PRIu64", file_size = %"PRIu64", EOF\n", file_offset, file_size);
-			break;
-		}
-		read_size = block_size;
-		if (file_offset + block_size >= file_size){
-			// Slide block of second position to former position.
-			memcpy(work_buf, work_buf + block_size, (size_t)(file_size - file_offset));
-			read_size = 0;
-		} else if (file_offset + block_size * 2 > file_size){
-			read_size = file_size - file_offset - block_size;
-		}
-		//printf("file_offset = %"PRIu64", read_size = %"PRIu64"\n", file_offset, read_size);
-		if (read_size > 0){
-			// Slide block of second position to former position.
-			memcpy(work_buf, work_buf + block_size, (size_t)block_size);
-
-			if (fread(work_buf + block_size, 1, (size_t)read_size, fp) != read_size){
-				perror("Failed to read next block on input file");
-				fclose(fp);
-				return RET_FILE_IO_ERROR;
-			}
-			if (file_hash != NULL)
-				blake3_hasher_update(&hasher, work_buf + block_size, (size_t)read_size);
-		}
-
-
-		// Only when skipping slide, calculate CRC-64 of next block.
-		if ( ((flag_slide & 1) != 0) && (file_offset + block_size <= file_size) ){
-			//printf("Calculate CRC-64 of next block.\n");
-			crc = crc64(work_buf, block_size, 0);
-		}
-		if ( ((flag_slide & 2) != 0) && (file_offset + 40 <= file_size) ){
-			//printf("Calculate CRC-64 of next tail.\n");
-			crc40 = crc64(work_buf, 40, 0);
-		}
-	}
-
-	// Check the last damaged area in this file
-	if (find_max < file_size)
-		damage_size += file_size - find_max;
-	if (file_damage != NULL)
-		*file_damage = damage_size;
-
-	if (fclose(fp) != 0){
-		perror("Failed to close input file");
-		return RET_FILE_IO_ERROR;
-	}
-
-	// Calculate file hash to compare with misnamed files.
-	if (file_hash != NULL)
-		blake3_hasher_finalize(&hasher, file_hash, 16);
-
-	return 0;
-}
-
diff --git a/windows/src/write.c b/windows/src/write.c
deleted file mode 100644
index 39c0706..0000000
--- a/windows/src/write.c
+++ /dev/null
@@ -1,926 +0,0 @@
-/* Redefinition of _FILE_OFFSET_BITS must happen BEFORE including stdio.h */
-#ifdef __linux__
-#define _FILE_OFFSET_BITS 64
-#define _fseeki64 fseeko
-#define _ftelli64 ftello
-#elif _WIN32
-// avoid error of MSVC
-#define _CRT_SECURE_NO_WARNINGS
-#endif
-
-#include <errno.h>
-#include <inttypes.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "blake3/blake3.h"
-#include "libpar3.h"
-#include "common.h"
-#include "galois.h"
-#include "hash.h"
-#include "packet.h"
-#include "write.h"
-
-
-// Write Index File
-int write_index_file(PAR3_CTX *par3_ctx)
-{
-	size_t write_size;
-	FILE *fp;
-
-	fp = fopen(par3_ctx->par_filename, "wb");
-	if (fp == NULL){
-		perror("Failed to open Index File");
-		return RET_FILE_IO_ERROR;
-	}
-
-	// Creator Packet
-	write_size = par3_ctx->creator_packet_size;
-	if (write_size > 0){
-		if (fwrite(par3_ctx->creator_packet, 1, write_size, fp) != write_size){
-			perror("Failed to write Creator Packet on Index File");
-			fclose(fp);
-			return RET_FILE_IO_ERROR;
-		}
-	}
-
-	// Start Packet
-	write_size = par3_ctx->start_packet_size;
-	if (write_size > 0){
-		if (fwrite(par3_ctx->start_packet, 1, write_size, fp) != write_size){
-			perror("Failed to write Start Packet on Index File");
-			fclose(fp);
-			return RET_FILE_IO_ERROR;
-		}
-	}
-
-	// Matrix Packet
-	write_size = par3_ctx->matrix_packet_size;
-	if (write_size > 0){
-		if (fwrite(par3_ctx->matrix_packet, 1, write_size, fp) != write_size){
-			perror("Failed to write Matrix Packet on Index File");
-			fclose(fp);
-			return RET_FILE_IO_ERROR;
-		}
-	}
-
-	// File Packet
-	write_size = par3_ctx->file_packet_size;
-	if (write_size > 0){
-		if (fwrite(par3_ctx->file_packet, 1, write_size, fp) != write_size){
-			perror("Failed to write File Packet on Index File");
-			fclose(fp);
-			return RET_FILE_IO_ERROR;
-		}
-	}
-
-	// Directory Packet
-	write_size = par3_ctx->dir_packet_size;
-	if (write_size > 0){
-		if (fwrite(par3_ctx->dir_packet, 1, write_size, fp) != write_size){
-			perror("Failed to write Directory Packet on Index File");
-			fclose(fp);
-			return RET_FILE_IO_ERROR;
-		}
-	}
-
-	// Root Packet
-	write_size = par3_ctx->root_packet_size;
-	if (write_size > 0){
-		if (fwrite(par3_ctx->root_packet, 1, write_size, fp) != write_size){
-			perror("Failed to write Root Packet on Index File");
-			fclose(fp);
-			return RET_FILE_IO_ERROR;
-		}
-	}
-
-	// External Data Packet
-	write_size = par3_ctx->ext_data_packet_size;
-	if (write_size > 0){
-		if (fwrite(par3_ctx->ext_data_packet, 1, write_size, fp) != write_size){
-			perror("Failed to write External Data Packet on Index File");
-			fclose(fp);
-			return RET_FILE_IO_ERROR;
-		}
-	}
-
-	// File System Specific Packets
-	write_size = par3_ctx->file_system_packet_size;
-	if (write_size > 0){
-		if (fwrite(par3_ctx->file_system_packet, 1, write_size, fp) != write_size){
-			perror("Failed to write File System Packet on Index File");
-			fclose(fp);
-			return RET_FILE_IO_ERROR;
-		}
-	}
-
-	// Comment Packet
-	write_size = par3_ctx->comment_packet_size;
-	if (write_size > 0){
-		if (fwrite(par3_ctx->comment_packet, 1, write_size, fp) != write_size){
-			perror("Failed to write Comment Packet on Index File");
-			fclose(fp);
-			return RET_FILE_IO_ERROR;
-		}
-	}
-
-	if (fclose(fp) != 0){
-		perror("Failed to close Index File");
-		return RET_FILE_IO_ERROR;
-	}
-
-	if (par3_ctx->noise_level >= -1)
-		printf("Wrote index file, %s\n", offset_file_name(par3_ctx->par_filename));
-
-	return 0;
-}
-
-/*
-Redundancy of critical packets;
-number of blocks = 0 ~ 1 : number of copies = 1
-number of blocks = 2 ~ 3 : number of copies = 2
-number of blocks = 4 ~ 7 : number of copies = 3
-number of blocks = 8 ~ 15 : number of copies = 4
-number of blocks = 16 ~ 31 : number of copies = 5
-number of blocks = 32 ~ 63 : number of copies = 6
-number of blocks = 64 ~ 127 : number of copies = 7
-number of blocks = 128 ~ 255 : number of copies = 8
-number of blocks = 256 ~ 511 : number of copies = 9
-number of blocks = 512 ~ 1023 : number of copies = 10
-number of blocks = 1024 ~ 2047 : number of copies = 11
-number of blocks = 2048 ~ 4095 : number of copies = 12
-number of blocks = 4096 ~ 8191 : number of copies = 13
-number of blocks = 8192 ~ 16383 : number of copies = 14
-number of blocks = 16384 ~ 32767 : number of copies = 15
-number of blocks = 32768 ~ 65535 : number of copies = 16
-*/
-static int write_data_packet(PAR3_CTX *par3_ctx, char *file_name, uint64_t each_start, uint64_t each_count)
-{
-	uint8_t *work_buf, *common_packet, packet_header[56];
-	uint32_t file_index, file_prev;
-	uint32_t cohort_count;
-	int64_t slice_index;
-	uint64_t num, file_offset;
-	uint64_t block_count, block_index, block_max;
-	size_t block_size, read_size, tail_offset;
-	size_t write_size, write_size2;
-	size_t packet_count, packet_to, packet_from;
-	size_t common_packet_size, packet_size, packet_offset;
-	PAR3_FILE_CTX *file_list;
-	PAR3_SLICE_CTX *slice_list;
-	PAR3_BLOCK_CTX *block_list;
-	FILE *fp_write, *fp_read;
-	blake3_hasher hasher;
-
-	block_size = par3_ctx->block_size;
-	work_buf = par3_ctx->work_buf;
-	file_list = par3_ctx->input_file_list;
-	slice_list = par3_ctx->slice_list;
-	block_list = par3_ctx->block_list;
-	common_packet = par3_ctx->common_packet;
-	common_packet_size = par3_ctx->common_packet_size;
-
-	// Set count for each cohort
-	if (par3_ctx->interleave > 0){
-		block_count = par3_ctx->block_count;
-		cohort_count = par3_ctx->interleave + 1;
-	}
-
-	// How many repetition of common packet.
-	packet_count = 0;	// reduce 1, because put 1st copy at first.
-	for (num = 2; num <= each_count; num *= 2)	// log2(each_count)
-		packet_count++;
-	if (par3_ctx->repetition_limit > 0){	// Limit repetition of packets in each file.
-		size_t limit_count = par3_ctx->repetition_limit - 1;	// Additional copies
-		if (packet_count > limit_count)
-			packet_count = limit_count;
-	}
-	//printf("each_count = %"PRIu64", repetition = %zu\n", each_count, packet_count);
-	packet_count *= par3_ctx->common_packet_count;
-	//printf("number of repeated packets = %zu\n", packet_count);
-
-	fp_write = fopen(file_name, "wb");
-	if (fp_write == NULL){
-		perror("Failed to open Archive File");
-		return RET_FILE_IO_ERROR;
-	}
-
-	// Creator Packet
-	write_size = par3_ctx->creator_packet_size;
-	if (write_size > 0){
-		if (fwrite(par3_ctx->creator_packet, 1, write_size, fp_write) != write_size){
-			perror("Failed to write Creator Packet on Archive File");
-			fclose(fp_write);
-			return RET_FILE_IO_ERROR;
-		}
-	}
-
-	// First common packets
-	write_size = common_packet_size;
-	if (fwrite(common_packet, 1, write_size, fp_write) != write_size){
-		perror("Failed to write first common packets on Archive File");
-		fclose(fp_write);
-		return RET_FILE_IO_ERROR;
-	}
-
-	// Data Packet and repeated common packets
-	file_prev = 0xFFFFFFFF;
-	fp_read = NULL;
-	packet_from = 0;
-	packet_offset = 0;
-	for (num = each_start; num < each_start + each_count; num++){
-		if (par3_ctx->interleave == 0){
-			block_index = num;
-			block_max = block_index + 1;
-		} else {	// Write multiple blocks at interleaving
-			block_index = num * cohort_count;	// Starting index of the block
-			block_max = block_index + cohort_count;	// How many blocks in the volume
-			if (block_max > block_count)
-				block_max = block_count;
-		}
-		//printf("block_index = %"PRIu64", block_max = %"PRIu64"\n", block_index, block_max);
-
-		while (block_index < block_max){
-			// data size in the block
-			write_size = block_list[block_index].size;
-
-			// packet header
-			make_packet_header(packet_header, 56 + write_size, par3_ctx->set_id, "PAR DAT\0", 0);
-
-			// The index of the input block
-			memcpy(packet_header + 48, &block_index, 8);
-
-			// Read block data from file.
-			if (block_list[block_index].state & 1){	// including full size data
-				slice_index = block_list[block_index].slice;
-				while (slice_index != -1){
-					if (slice_list[slice_index].size == block_size)
-						break;
-					slice_index = slice_list[slice_index].next;
-				}
-				if (slice_index == -1){	// When there is no valid slice.
-					printf("Mapping information for block[%"PRIu64"] is wrong.\n", block_index);
-					fclose(fp_write);
-					if (fp_read != NULL)
-						fclose(fp_read);
-					return RET_LOGIC_ERROR;
-				}
-
-				// Read one slice from a file.
-				file_index = slice_list[slice_index].file;
-				file_offset = slice_list[slice_index].offset;
-				read_size = slice_list[slice_index].size;
-				//printf("Reading %zu bytes of slice[%"PRId64"] on file[%u] for block[%"PRIu64"].\n", read_size, slice_index, file_index, block_index);
-				if ( (fp_read == NULL) || (file_index != file_prev) ){
-					if (fp_read != NULL){	// Close previous input file.
-						fclose(fp_read);
-						fp_read = NULL;
-					}
-					fp_read = fopen(file_list[file_index].name, "rb");
-					if (fp_read == NULL){
-						perror("Failed to open input file");
-						fclose(fp_write);
-						return RET_FILE_IO_ERROR;
-					}
-					file_prev = file_index;
-				}
-				if (_fseeki64(fp_read, file_offset, SEEK_SET) != 0){
-					perror("Failed to seek input file");
-					fclose(fp_read);
-					fclose(fp_write);
-					return RET_FILE_IO_ERROR;
-				}
-				if (fread(work_buf, 1, read_size, fp_read) != read_size){
-					perror("Failed to read full slice on input file");
-					fclose(fp_read);
-					fclose(fp_write);
-					return RET_FILE_IO_ERROR;
-				}
-
-			} else {	// tail data only (one tail or packed tails)
-				//printf("Reading %zu bytes for block[%"PRIu64"].\n", read_size, block_index);
-				tail_offset = 0;
-				while (tail_offset < write_size){	// Read tails until data end.
-					slice_index = block_list[block_index].slice;
-					while (slice_index != -1){
-						//printf("block = %"PRIu64", size = %zu, offset = %zu, slice = %"PRId64"\n", block_index, write_size, tail_offset, slice_index);
-						// Even when chunk tails are overlaped, it will find tail slice of next position.
-						if ( (slice_list[slice_index].tail_offset + slice_list[slice_index].size > tail_offset)
-								&& (slice_list[slice_index].tail_offset <= tail_offset) ){
-							break;
-						}
-						slice_index = slice_list[slice_index].next;
-					}
-					if (slice_index == -1){	// When there is no valid slice.
-						printf("Mapping information for block[%"PRIu64"] is wrong.\n", block_index);
-						fclose(fp_write);
-						if (fp_read != NULL)
-							fclose(fp_read);
-						return RET_LOGIC_ERROR;
-					}
-
-					// Read one slice from a file.
-					file_index = slice_list[slice_index].file;
-					file_offset = slice_list[slice_index].offset;
-					read_size = slice_list[slice_index].size;
-					if ( (fp_read == NULL) || (file_index != file_prev) ){
-						if (fp_read != NULL){	// Close previous input file.
-							fclose(fp_read);
-							fp_read = NULL;
-						}
-						fp_read = fopen(file_list[file_index].name, "rb");
-						if (fp_read == NULL){
-							perror("Failed to open input file");
-							fclose(fp_write);
-							return RET_FILE_IO_ERROR;
-						}
-						file_prev = file_index;
-					}
-					if (_fseeki64(fp_read, file_offset, SEEK_SET) != 0){
-						perror("Failed to seek input file");
-						fclose(fp_read);
-						fclose(fp_write);
-						return RET_FILE_IO_ERROR;
-					}
-					if (fread(work_buf + tail_offset, 1, read_size, fp_read) != read_size){
-						perror("Failed to read tail slice on input file");
-						fclose(fp_read);
-						fclose(fp_write);
-						return RET_FILE_IO_ERROR;
-					}
-					tail_offset += read_size;
-				}
-
-				// Zero fill rest bytes
-				//if (write_size < block_size)
-				//	memset(work_buf + write_size, 0, block_size - write_size);
-			}
-
-			// At creating time, CRC of a block was set, even when the block includes multiple chunk tails.
-			// It appends chunk tails as tail packing, and calculates their total CRC for the block.
-			// But, after verification, a block without full size data doesn't have valid CRC value.
-			if (block_list[block_index].state & 64){
-				// Calculate checksum of block to confirm that input file was not changed.
-				if (crc64(work_buf, write_size, 0) != block_list[block_index].crc){
-					printf("Checksum of block[%"PRIu64"] is different.\n", block_index);
-					fclose(fp_read);
-					fclose(fp_write);
-					return RET_LOGIC_ERROR;
-				}
-			}
-
-			// Calculate checksum of packet here.
-			blake3_hasher_init(&hasher);
-			blake3_hasher_update(&hasher, packet_header + 24, 24 + 8);
-			blake3_hasher_update(&hasher, work_buf, write_size);
-			blake3_hasher_finalize(&hasher, packet_header + 8, 16);
-
-			// Write packet header and data on file.
-			if (fwrite(packet_header, 1, 56, fp_write) != 56){
-				perror("Failed to write Data Packet on Archive File");
-				fclose(fp_write);
-				fclose(fp_read);
-				return RET_FILE_IO_ERROR;
-			}
-			if (fwrite(work_buf, 1, write_size, fp_write) != write_size){
-				perror("Failed to write Data Packet on Archive File");
-				fclose(fp_write);
-				fclose(fp_read);
-				return RET_FILE_IO_ERROR;
-			}
-
-			block_index++;	// Goto next block
-		}
-
-		// How many common packets to write here.
-		write_size = 0;
-		write_size2 = 0;
-		packet_to = packet_count * (num - each_start + 1) / each_count;
-		//printf("write from %zu to %zu\n", packet_from, packet_to);
-		while (packet_to - packet_from > 0){
-			// Read packet size of each packet from packet_offset, and add them.
-			memcpy(&packet_size, common_packet + packet_offset + write_size + 24, 8);
-			write_size += packet_size;
-			packet_from++;
-			if (packet_offset + write_size >= common_packet_size)
-				break;
-		}
-		while (packet_to - packet_from > 0){
-			// Read packet size of each packet from the first, and add them.
-			memcpy(&packet_size, common_packet + write_size2 + 24, 8);
-			write_size2 += packet_size;
-			packet_from++;
-		}
-
-		// Write common packets
-		if (write_size > 0){
-			//printf("packet_offset = %zu, write_size = %zu, total = %zu\n", packet_offset, write_size, packet_offset + write_size);
-			if (fwrite(common_packet + packet_offset, 1, write_size, fp_write) != write_size){
-				perror("Failed to write repeated common packet on Archive File");
-				fclose(fp_write);
-				fclose(fp_read);
-				return RET_FILE_IO_ERROR;
-			}
-			// This offset doesn't exceed common_packet_size.
-			packet_offset += write_size;
-			if (packet_offset >= common_packet_size)
-				packet_offset -= common_packet_size;
-		}
-		if (write_size2 > 0){
-			//printf("write_size2 = %zu = packet_offset\n", write_size2);
-			if (fwrite(common_packet, 1, write_size2, fp_write) != write_size2){
-				perror("Failed to write repeated common packet on Archive File");
-				fclose(fp_write);
-				fclose(fp_read);
-				return RET_FILE_IO_ERROR;
-			}
-			// Current offset is saved.
-			packet_offset = write_size2;
-		}
-	}
-
-	// Comment Packet
-	write_size = par3_ctx->comment_packet_size;
-	if (write_size > 0){
-		if (fwrite(par3_ctx->comment_packet, 1, write_size, fp_write) != write_size){
-			perror("Failed to write Comment Packet on Archive File");
-			fclose(fp_write);
-			if (fp_read != NULL)
-				fclose(fp_read);
-			return RET_FILE_IO_ERROR;
-		}
-	}
-
-	if (fp_read != NULL){
-		if (fclose(fp_read) != 0){
-			perror("Failed to close input file");
-			fclose(fp_write);
-			return RET_FILE_IO_ERROR;
-		}
-	}
-	if (fclose(fp_write) != 0){
-		perror("Failed to close Archive File");
-		return RET_FILE_IO_ERROR;
-	}
-
-	return 0;
-}
-
-// Write PAR3 files with Data packets (input blocks)
-int write_archive_file(PAR3_CTX *par3_ctx, char *file_name)
-{
-	int digit_num1, digit_num2;
-	uint32_t file_count;
-	size_t len, region_size;
-	int64_t recovery_file_scheme;
-	uint64_t block_count, base_num;
-	uint64_t each_start, each_count, max_count;
-
-	block_count = par3_ctx->block_count;
-	if (block_count == 0)
-		return 0;
-	recovery_file_scheme = par3_ctx->recovery_file_scheme;
-	if (recovery_file_scheme == -2)
-		recovery_file_scheme = par3_ctx->max_file_size;
-
-	// Allocate memory to read one input block and parity.
-	region_size = (par3_ctx->block_size + 4 + 3) & ~3;
-	par3_ctx->work_buf = malloc(region_size);
-	if (par3_ctx->work_buf == NULL){
-		perror("Failed to allocate memory for input data");
-		return RET_MEMORY_ERROR;
-	}
-
-	// Remove the last ".par3" from base PAR3 filename.
-	strcpy(file_name, par3_ctx->par_filename);
-	len = strlen(file_name);
-	if (strcmp(file_name + len - 5, ".par3") == 0){
-		len -= 5;
-		file_name[len] = 0;
-		//printf("len = %zu, base name = %s\n", len, file_name);
-	}
-
-	// Set count for each cohort
-	if (par3_ctx->interleave > 0){
-		block_count = (block_count + par3_ctx->interleave) / (par3_ctx->interleave + 1);	// round up
-	}
-
-	// Calculate block count and digits max.
-	file_count = calculate_digit_max(par3_ctx, 56, block_count, 0, &base_num, &max_count, &digit_num1, &digit_num2);
-	if (len + 11 + digit_num1 + digit_num2 >= _MAX_PATH){	// .part#+#.par3
-		printf("PAR filename will be too long.\n");
-		return RET_FILE_IO_ERROR;
-	}
-
-	if (par3_ctx->noise_level >= 1){
-		show_sizing_scheme(par3_ctx, file_count, base_num, max_count);
-	}
-
-	// Write each PAR3 file.
-	each_start = 0;
-	while (block_count > 0){
-		if (file_count > 0){
-			if (recovery_file_scheme == -1){	// Uniform
-				each_count = max_count;
-				if (base_num > 0){
-					base_num--;
-					if (base_num == 0)
-						max_count--;
-				}
-
-			} else {	// Variable (base number * power of 2)
-				each_count = base_num;
-				base_num *= 2;
-				if (each_count > block_count)
-					each_count = block_count;
-			}
-
-		} else {
-			if (recovery_file_scheme == -1){	// Uniform
-				each_count = block_count;
-
-			} else if (recovery_file_scheme > 0){	// Limit size
-				each_count = base_num;
-				if (each_count > max_count){
-					each_count = max_count;
-				} else {
-					base_num *= 2;
-				}
-				if (each_count > block_count)
-					each_count = block_count;
-
-			} else {	// Power of 2
-				each_count = base_num;
-				base_num *= 2;
-				if (each_count > block_count)
-					each_count = block_count;
-			}
-		}
-
-		sprintf(file_name + len, ".part%0*"PRIu64"+%0*"PRIu64".par3", digit_num1, each_start, digit_num2, each_count);
-		if (write_data_packet(par3_ctx, file_name, each_start, each_count) != 0){
-			return RET_FILE_IO_ERROR;
-		}
-		if (par3_ctx->noise_level >= -1)
-			printf("Wrote archive file, %s\n", offset_file_name(file_name));
-
-		each_start += each_count;
-		block_count -= each_count;
-	}
-
-	free(par3_ctx->work_buf);
-	par3_ctx->work_buf = NULL;
-
-	return 0;
-}
-
-
-// Recovery Data packet with dummy recovery block
-static int write_recovery_packet(PAR3_CTX *par3_ctx, char *file_name, uint64_t each_start, uint64_t each_count)
-{
-	uint8_t *buf_p, *common_packet, packet_header[88];
-	uint8_t gf_size;
-	int galois_poly, ret;
-	uint32_t cohort_count;
-	uint64_t num, first_num;
-	uint64_t block_index, block_max;
-	size_t block_size, region_size;
-	size_t write_size, write_size2;
-	size_t packet_count, packet_to, packet_from;
-	size_t common_packet_size, packet_size, packet_offset;
-	PAR3_POS_CTX *position_list;
-	FILE *fp;
-	blake3_hasher hasher;
-
-	block_size = par3_ctx->block_size;
-	first_num = par3_ctx->first_recovery_block;
-	gf_size = par3_ctx->gf_size;
-	galois_poly = par3_ctx->galois_poly;
-	common_packet = par3_ctx->common_packet;
-	common_packet_size = par3_ctx->common_packet_size;
-	position_list = par3_ctx->position_list;
-	cohort_count = par3_ctx->interleave + 1;
-
-	region_size = (block_size + 4 + 3) & ~3;
-	buf_p = par3_ctx->block_data;
-	if (par3_ctx->ecc_method & 0x8000){
-		// Move to the position of starting recovery block
-		buf_p += (each_start - par3_ctx->first_recovery_block) * region_size;
-	}
-
-	// How many repetition of common packet.
-	packet_count = 0;	// reduce 1, because put 1st copy at first.
-	for (num = 2; num <= each_count; num *= 2)	// log2(each_count)
-		packet_count++;
-	if (par3_ctx->repetition_limit > 0){	// Limit repetition of packets in each file.
-		size_t limit_count = par3_ctx->repetition_limit - 1;	// Additional copies
-		if (packet_count > limit_count)
-			packet_count = limit_count;
-	}
-	//printf("each_count = %"PRIu64", repetition = %zu\n", each_count, packet_count);
-	packet_count *= par3_ctx->common_packet_count;
-	//printf("number of repeated packets = %zu\n", packet_count);
-
-	fp = fopen(file_name, "wb");
-	if (fp == NULL){
-		perror("Failed to open Recovery File");
-		return RET_FILE_IO_ERROR;
-	}
-
-	// Creator Packet
-	write_size = par3_ctx->creator_packet_size;
-	if (write_size > 0){
-		if (fwrite(par3_ctx->creator_packet, 1, write_size, fp) != write_size){
-			perror("Failed to write Creator Packet on Recovery File");
-			fclose(fp);
-			return RET_FILE_IO_ERROR;
-		}
-	}
-
-	// First common packets
-	write_size = common_packet_size;
-	if (fwrite(common_packet, 1, write_size, fp) != write_size){
-		perror("Failed to write first common packets on Recovery File");
-		fclose(fp);
-		return RET_FILE_IO_ERROR;
-	}
-
-	// Common items in packet header of Recovery Data Packets
-	memset(packet_header + 8, 0, 16);	// Zero fill checksum of packet as a sign of not calculated yet
-	memcpy(packet_header + 48, par3_ctx->root_packet + 8, 16);	// The checksum from the Root packet
-	memcpy(packet_header + 64, par3_ctx->matrix_packet + 8, 16);
-
-	// Recovery Data Packet and repeated common packets
-	packet_from = 0;
-	packet_offset = 0;
-	for (num = each_start; num < each_start + each_count; num++){
-		if (par3_ctx->interleave == 0){
-			block_index = num;
-			block_max = block_index + 1;
-		} else {	// Write multiple blocks at interleaving
-			block_index = num * cohort_count;	// Starting index of the block
-			block_max = block_index + cohort_count;	// How many blocks in the volume
-		}
-		//printf("block_index = %"PRIu64", block_max = %"PRIu64"\n", block_index, block_max);
-
-		while (block_index < block_max){
-			// packet header
-			make_packet_header(packet_header, 88 + block_size, par3_ctx->set_id, "PAR REC\0", 0);
-
-			// The index of the recovery block
-			memcpy(packet_header + 80, &block_index, 8);
-
-			// When there is enough memory to keep all recovery blocks,
-			// recovery blocks were created already.
-			if (par3_ctx->ecc_method & 0x8000){
-				// Check parity of recovery block to confirm that calculation was correct.
-				if (gf_size == 2){
-					ret = gf16_region_check_parity(galois_poly, buf_p, region_size);
-				} else if (gf_size == 1){
-					ret = gf8_region_check_parity(galois_poly, buf_p, region_size);
-				} else {
-					ret = region_check_parity(buf_p, region_size);
-				}
-				if (ret != 0){
-					printf("Parity of recovery block[%"PRIu64"] is different.\n", block_index);
-					fclose(fp);
-					return RET_LOGIC_ERROR;
-				}
-
-				// Calculate checksum of packet here.
-				blake3_hasher_init(&hasher);
-				blake3_hasher_update(&hasher, packet_header + 24, 24 + 40);
-				blake3_hasher_update(&hasher, buf_p, block_size);
-				blake3_hasher_finalize(&hasher, packet_header + 8, 16);
-
-				// Write packet header and recovery data on file.
-				if (fwrite(packet_header, 1, 88, fp) != 88){
-					perror("Failed to write Recovery Data Packet on Recovery File");
-					fclose(fp);
-					return RET_FILE_IO_ERROR;
-				}
-				if (fwrite(buf_p, 1, block_size, fp) != block_size){
-					perror("Failed to write Recovery Data Packet on Recovery File");
-					fclose(fp);
-					return RET_FILE_IO_ERROR;
-				}
-				buf_p += region_size;
-
-			// When there isn't enough memory to keep all blocks, zero fill the block area.
-			} else {
-				// Save position of each recovery block for later wariting.
-				position_list[block_index - first_num].name = par3_ctx->par_file_name + par3_ctx->par_file_name_len - strlen(file_name) - 1;
-				position_list[block_index - first_num].offset = _ftelli64(fp);
-				if (position_list[block_index - first_num].offset < 0){
-					perror("Failed to get current position of Recovery File");
-					fclose(fp);
-					return RET_FILE_IO_ERROR;
-				}
-				//printf("block[%"PRIu64"] offset = %"PRId64", %s\n", block_index, position_list[block_index - first_num].offset, position_list[block_index - first_num].name);
-
-				// Calculate CRC of packet data to check error, because state of BLAKE3 hash is too large.
-				position_list[block_index - first_num].crc = crc64(packet_header + 24, 64, 0);
-
-				// Write packet header and dummy data on file.
-				if (fwrite(packet_header, 1, 88, fp) != 88){
-					perror("Failed to write Recovery Data Packet on Recovery File");
-					fclose(fp);
-					return RET_FILE_IO_ERROR;
-				}
-				// Write zero bytes as dummy
-				if (block_size > 1){
-					if (_fseeki64(fp, block_size - 1, SEEK_CUR) != 0){
-						perror("Failed to seek Recovery File");
-						fclose(fp);
-						return RET_FILE_IO_ERROR;
-					}
-				}
-				if (fwrite(packet_header + 8, 1, 1, fp) != 1){	// Write the last 1 byte of zero.
-					perror("Failed to write Recovery Data Packet on Recovery File");
-					fclose(fp);
-					return RET_FILE_IO_ERROR;
-				}
-			}
-
-			block_index++;	// Goto next block
-		}
-
-		// How many common packets to write here.
-		write_size = 0;
-		write_size2 = 0;
-		packet_to = packet_count * (num - each_start + 1) / each_count;
-		//printf("write from %zu to %zu\n", packet_from, packet_to);
-		while (packet_to - packet_from > 0){
-			// Read packet size of each packet from packet_offset, and add them.
-			memcpy(&packet_size, common_packet + packet_offset + write_size + 24, 8);
-			write_size += packet_size;
-			packet_from++;
-			if (packet_offset + write_size >= common_packet_size)
-				break;
-		}
-		while (packet_to - packet_from > 0){
-			// Read packet size of each packet from the first, and add them.
-			memcpy(&packet_size, common_packet + write_size2 + 24, 8);
-			write_size2 += packet_size;
-			packet_from++;
-		}
-
-		// Write common packets
-		if (write_size > 0){
-			//printf("packet_offset = %zu, write_size = %zu, total = %zu\n", packet_offset, write_size, packet_offset + write_size);
-			if (fwrite(common_packet + packet_offset, 1, write_size, fp) != write_size){
-				perror("Failed to write repeated common packet on Recovery File");
-				fclose(fp);
-				return RET_FILE_IO_ERROR;
-			}
-			// This offset doesn't exceed common_packet_size.
-			packet_offset += write_size;
-			if (packet_offset >= common_packet_size)
-				packet_offset -= common_packet_size;
-		}
-		if (write_size2 > 0){
-			//printf("write_size2 = %zu = packet_offset\n", write_size2);
-			if (fwrite(common_packet, 1, write_size2, fp) != write_size2){
-				perror("Failed to write repeated common packet on Recovery File");
-				fclose(fp);
-				return RET_FILE_IO_ERROR;
-			}
-			// Current offset is saved.
-			packet_offset = write_size2;
-		}
-	}
-
-	// Comment Packet
-	write_size = par3_ctx->comment_packet_size;
-	if (write_size > 0){
-		if (fwrite(par3_ctx->comment_packet, 1, write_size, fp) != write_size){
-			perror("Failed to write Comment Packet on Recovery File");
-			fclose(fp);
-			return RET_FILE_IO_ERROR;
-		}
-	}
-
-	if (fclose(fp) != 0){
-		perror("Failed to close Recovery File");
-		return RET_FILE_IO_ERROR;
-	}
-
-	return 0;
-}
-
-// Write PAR3 files with Recovery Data packets (recovery blocks are not written yet)
-int write_recovery_file(PAR3_CTX *par3_ctx, char *file_name)
-{
-	int digit_num1, digit_num2;
-	uint32_t file_count;
-	int64_t recovery_file_scheme;
-	uint64_t block_count, base_num, first_num;
-	uint64_t each_start, each_count, max_count;
-	size_t len;
-
-	block_count = par3_ctx->recovery_block_count;
-	if (block_count == 0)
-		return 0;
-	recovery_file_scheme = par3_ctx->recovery_file_scheme;
-	if (recovery_file_scheme == -2)
-		recovery_file_scheme = par3_ctx->max_file_size;
-	first_num = par3_ctx->first_recovery_block;
-
-	// Remove the last ".par3" from base PAR3 filename.
-	strcpy(file_name, par3_ctx->par_filename);
-	len = strlen(file_name);
-	if (strcmp(file_name + len - 5, ".par3") == 0){
-		len -= 5;
-		file_name[len] = 0;
-		//printf("len = %zu, base name = %s\n", len, file_name);
-	}
-
-	// When recovery blocks were not created yet, allocate memory to store packet position.
-	if ((par3_ctx->ecc_method & 0x8000) == 0){
-		par3_ctx->position_list = malloc(sizeof(PAR3_POS_CTX) * block_count);
-		if (par3_ctx->position_list == NULL){
-			perror("Failed to allocate memory for position list");
-			return RET_MEMORY_ERROR;
-		}
-	}
-
-	// Set count for each cohort
-	if (par3_ctx->interleave > 0){
-		block_count = (block_count + par3_ctx->interleave) / (par3_ctx->interleave + 1);	// round up
-		first_num = (first_num + par3_ctx->interleave) / (par3_ctx->interleave + 1);
-	}
-
-	// Calculate block count and digits max.
-	file_count = calculate_digit_max(par3_ctx, 88, block_count, first_num, &base_num, &max_count, &digit_num1, &digit_num2);
-	if (len + 10 + digit_num1 + digit_num2 >= _MAX_PATH){	// .vol#+#.par3
-		printf("PAR filename will be too long.\n");
-		return RET_FILE_IO_ERROR;
-	}
-
-	if (par3_ctx->noise_level >= 1){
-		show_sizing_scheme(par3_ctx, file_count, base_num, max_count);
-	}
-
-	// Write each PAR3 file.
-	each_start = first_num;
-	while (block_count > 0){
-		if (file_count > 0){
-			if (recovery_file_scheme == -1){	// Uniform
-				each_count = max_count;
-				if (base_num > 0){
-					base_num--;
-					if (base_num == 0)
-						max_count--;
-				}
-
-			} else {	// Variable (base number * power of 2)
-				each_count = base_num;
-				base_num *= 2;
-				if (each_count > block_count)
-					each_count = block_count;
-			}
-
-		} else {
-			if (recovery_file_scheme == -1){	// Uniform
-				each_count = block_count;
-
-			} else if (recovery_file_scheme > 0){	// Limit size
-				each_count = base_num;
-				if (each_count > max_count){
-					each_count = max_count;
-				} else {
-					base_num *= 2;
-				}
-				if (each_count > block_count)
-					each_count = block_count;
-
-			} else {	// Power of 2
-				each_count = base_num;
-				base_num *= 2;
-				if (each_count > block_count)
-					each_count = block_count;
-			}
-		}
-
-		sprintf(file_name + len, ".vol%0*"PRIu64"+%0*"PRIu64".par3", digit_num1, each_start, digit_num2, each_count);
-		if ((par3_ctx->ecc_method & 0x8000) == 0){
-			// When recovery blocks were not created yet, keep list of PAR filename.
-			if ( namez_add(&(par3_ctx->par_file_name), &(par3_ctx->par_file_name_len), &(par3_ctx->par_file_name_max), file_name) != 0){
-				perror("Failed to allocate memory for PAR filename");
-				return RET_MEMORY_ERROR;
-			}
-		}
-		if (write_recovery_packet(par3_ctx, file_name, each_start, each_count) != 0){
-			return RET_FILE_IO_ERROR;
-		}
-		if (par3_ctx->noise_level >= -1)
-			printf("Wrote recovery file, %s\n", offset_file_name(file_name));
-
-		each_start += each_count;
-		block_count -= each_count;
-	}
-
-	return 0;
-}
-
diff --git a/windows/src/write.h b/windows/src/write.h
deleted file mode 100644
index 6935bbb..0000000
--- a/windows/src/write.h
+++ /dev/null
@@ -1,33 +0,0 @@
-
-void show_sizing_scheme(PAR3_CTX *par3_ctx,
-		uint32_t file_count, uint64_t base_num, uint64_t max_count);
-
-uint32_t calculate_digit_max(PAR3_CTX *par3_ctx,
-		uint64_t header_size, uint64_t block_count, uint64_t first_num,
-		uint64_t *p_base_num, uint64_t *p_max_count,
-		int *p_digit_num1, int *p_digit_num2);
-
-void remove_recovery_file(PAR3_CTX *par3_ctx, char *file_name);
-
-
-// for Create
-
-int write_index_file(PAR3_CTX *par3_ctx);
-
-int write_archive_file(PAR3_CTX *par3_ctx, char *file_name);
-
-int write_recovery_file(PAR3_CTX *par3_ctx, char *file_name);
-
-
-// for Trial
-
-uint64_t try_index_file(PAR3_CTX *par3_ctx);
-
-int try_archive_file(PAR3_CTX *par3_ctx, char *file_name, uint64_t *recovery_file_size);
-
-int try_recovery_file(PAR3_CTX *par3_ctx, char *file_name, uint64_t *recovery_file_size);
-
-
-// for PAR inside
-int insert_space_zip(PAR3_CTX *par3_ctx, int footer_size, int repeat_count);
-
diff --git a/windows/src/write_inside.c b/windows/src/write_inside.c
deleted file mode 100644
index 1a32b9d..0000000
--- a/windows/src/write_inside.c
+++ /dev/null
@@ -1,259 +0,0 @@
-/* Redefinition of _FILE_OFFSET_BITS must happen BEFORE including stdio.h */
-#ifdef __linux__
-#define _FILE_OFFSET_BITS 64
-#define _fseeki64 fseeko
-#define _ftelli64 ftello
-#elif _WIN32
-// avoid error of MSVC
-#define _CRT_SECURE_NO_WARNINGS
-#endif
-
-#include <errno.h>
-#include <inttypes.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "blake3/blake3.h"
-#include "libpar3.h"
-#include "galois.h"
-#include "hash.h"
-#include "packet.h"
-#include "write.h"
-
-
-// Insert space into outside ZIP file
-int insert_space_zip(PAR3_CTX *par3_ctx, int footer_size, int repeat_count)
-{
-	uint8_t *buf_p, *common_packet, packet_header[88];
-	uint8_t gf_size;
-	int galois_poly, ret;
-	uint64_t block_count, block_index;
-	uint64_t each_count, each_max;
-	size_t block_size, region_size;
-	size_t write_size;
-	size_t common_packet_size;
-	PAR3_POS_CTX *position_list;
-	FILE *fp;
-	blake3_hasher hasher;
-
-	block_count = par3_ctx->recovery_block_count;
-	block_size = par3_ctx->block_size;
-	gf_size = par3_ctx->gf_size;
-	galois_poly = par3_ctx->galois_poly;
-	common_packet = par3_ctx->common_packet;
-	common_packet_size = par3_ctx->common_packet_size;
-	region_size = (block_size + 4 + 3) & ~3;
-	buf_p = par3_ctx->block_data;
-
-	// When recovery blocks were not created yet, allocate memory to store packet position.
-	position_list = NULL;
-	if ((par3_ctx->ecc_method & 0x8000) == 0){
-		position_list = malloc(sizeof(PAR3_POS_CTX) * block_count);
-		if (position_list == NULL){
-			perror("Failed to allocate memory for position list");
-			return RET_MEMORY_ERROR;
-		}
-		par3_ctx->position_list = position_list;
-	}
-
-	if (repeat_count <= 2){
-		// Put mass of common packets in front & rear of Recovery Data
-		// [ common packets ] [ Recovery data packets ] [ common packets ] [ Creator packet ]
-		each_max = block_count;
-	} else {
-		// Insert mass of common packets between Recovery Data
-		each_max = (block_count + repeat_count - 2) / (repeat_count - 1);
-	}
-	//printf("block_count = %"PRIu64", repeat_count = %d, each_max = %"PRIu64"\n", block_count, repeat_count, each_max);
-
-	fp = fopen(par3_ctx->par_filename, "r+b");
-	if (fp == NULL){
-		perror("Failed to open Outside file");
-		return RET_FILE_IO_ERROR;
-	}
-
-	// Put the first mass of common packets
-	//printf("first common packets\n");
-	if (_fseeki64(fp, 0, SEEK_END) != 0){
-		perror("Failed to seek Outside file");
-		fclose(fp);
-		return RET_FILE_IO_ERROR;
-	}
-	write_size = common_packet_size;
-	if (fwrite(common_packet, 1, write_size, fp) != write_size){
-		perror("Failed to write first common packets on Outside file");
-		fclose(fp);
-		return RET_FILE_IO_ERROR;
-	}
-
-	// Common items in packet header of Recovery Data Packets
-	memset(packet_header + 8, 0, 16);	// Zero fill checksum of packet as a sign of not calculated yet
-	memcpy(packet_header + 48, par3_ctx->root_packet + 8, 16);	// The checksum from the Root packet
-	memcpy(packet_header + 64, par3_ctx->matrix_packet + 8, 16);
-
-	// Recovery Data Packet and repeated common packets
-	each_count = 0;
-	for (block_index = 0; block_index < block_count; block_index++){
-		//printf("block_index = %"PRIu64"\n", block_index);
-
-		// packet header
-		make_packet_header(packet_header, 88 + block_size, par3_ctx->set_id, "PAR REC\0", 0);
-
-		// The index of the recovery block
-		memcpy(packet_header + 80, &block_index, 8);
-
-		// When there is enough memory to keep all recovery blocks, recovery blocks were created already.
-		if (par3_ctx->ecc_method & 0x8000){
-			// Check parity of recovery block to confirm that calculation was correct.
-			if (gf_size == 2){
-				ret = gf16_region_check_parity(galois_poly, buf_p, region_size);
-			} else if (gf_size == 1){
-				ret = gf8_region_check_parity(galois_poly, buf_p, region_size);
-			} else {
-				ret = region_check_parity(buf_p, region_size);
-			}
-			if (ret != 0){
-				printf("Parity of recovery block[%"PRIu64"] is different.\n", block_index);
-				fclose(fp);
-				return RET_LOGIC_ERROR;
-			}
-
-			// Calculate checksum of packet here.
-			blake3_hasher_init(&hasher);
-			blake3_hasher_update(&hasher, packet_header + 24, 24 + 40);
-			blake3_hasher_update(&hasher, buf_p, block_size);
-			blake3_hasher_finalize(&hasher, packet_header + 8, 16);
-
-			// Write packet header and recovery data on file.
-			if (fwrite(packet_header, 1, 88, fp) != 88){
-				perror("Failed to write Recovery Data Packet on Outside file");
-				fclose(fp);
-				return RET_FILE_IO_ERROR;
-			}
-			if (fwrite(buf_p, 1, block_size, fp) != block_size){
-				perror("Failed to write Recovery Data Packet on Outside file");
-				fclose(fp);
-				return RET_FILE_IO_ERROR;
-			}
-			buf_p += region_size;
-
-		// When there isn't enough memory to keep all blocks, zero fill the block area.
-		} else {
-			// Save position of each recovery block for later wariting.
-			position_list[block_index].name = par3_ctx->par_filename;
-			position_list[block_index].offset = _ftelli64(fp);
-			if (position_list[block_index].offset < 0){
-				perror("Failed to get current position of Outside file");
-				fclose(fp);
-				return RET_FILE_IO_ERROR;
-			}
-			//printf("block[%"PRIu64"] offset = %"PRId64", %s\n", block_index, position_list[block_index].offset, position_list[block_index].name);
-
-			// Calculate CRC of packet data to check error, because state of BLAKE3 hash is too large.
-			position_list[block_index].crc = crc64(packet_header + 24, 64, 0);
-
-			// Write packet header and dummy data on file.
-			if (fwrite(packet_header, 1, 88, fp) != 88){
-				perror("Failed to write Recovery Data Packet on Outside file");
-				fclose(fp);
-				return RET_FILE_IO_ERROR;
-			}
-			// Write zero bytes as dummy
-			if (block_size > 1){
-				if (_fseeki64(fp, block_size - 1, SEEK_CUR) != 0){
-					perror("Failed to seek Outside file");
-					fclose(fp);
-					return RET_FILE_IO_ERROR;
-				}
-			}
-			if (fwrite(packet_header + 8, 1, 1, fp) != 1){	// Write the last 1 byte of zero.
-				perror("Failed to write Recovery Data Packet on Outside file");
-				fclose(fp);
-				return RET_FILE_IO_ERROR;
-			}
-		}
-
-		// Put mass of common packets between Recovery Data Packets
-		each_count++;
-		if (each_count == each_max){
-			//printf("common packets\n");
-			write_size = common_packet_size;
-			if (fwrite(common_packet, 1, write_size, fp) != write_size){
-				perror("Failed to write common packets on Outside file");
-				fclose(fp);
-				return RET_FILE_IO_ERROR;
-			}
-			each_count = 0;
-		}
-	}
-
-	// Put the last mass of common packets
-	if (each_count > 0){
-		//printf("last common packets\n");
-		write_size = common_packet_size;
-		if (fwrite(common_packet, 1, write_size, fp) != write_size){
-			perror("Failed to write last common packets on Outside file");
-			fclose(fp);
-			return RET_FILE_IO_ERROR;
-		}
-	}
-
-	// Creator Packet
-	write_size = par3_ctx->creator_packet_size;
-	if (write_size > 0){
-		if (fwrite(par3_ctx->creator_packet, 1, write_size, fp) != write_size){
-			perror("Failed to write Creator Packet on Outside file");
-			fclose(fp);
-			return RET_FILE_IO_ERROR;
-		}
-	}
-
-	// Copy footer at the last
-	if (footer_size > 0){
-		buf_p = malloc(footer_size);
-		if (buf_p == NULL){
-			perror("Failed to allocate memory for footer");
-			fclose(fp);
-			return RET_MEMORY_ERROR;
-		}
-
-		// Read footer from the last of original ZIP file
-		if (_fseeki64(fp, par3_ctx->total_file_size - footer_size, SEEK_SET) != 0){
-			perror("Failed to seek Outside file");
-			fclose(fp);
-			free(buf_p);
-			return RET_FILE_IO_ERROR;
-		}
-		if (fread(buf_p, 1, footer_size, fp) != footer_size){
-			perror("Failed to read Outside file");
-			fclose(fp);
-			free(buf_p);
-			return RET_FILE_IO_ERROR;
-		}
-
-		// Write footer at the last of outside ZIP file
-		if (_fseeki64(fp, 0, SEEK_END) != 0){
-			perror("Failed to seek Outside file");
-			fclose(fp);
-			free(buf_p);
-			return RET_FILE_IO_ERROR;
-		}
-		if (fwrite(buf_p, 1, footer_size, fp) != footer_size){
-			perror("Failed to write footer on Outside file");
-			fclose(fp);
-			free(buf_p);
-			return RET_FILE_IO_ERROR;
-		}
-
-		free(buf_p);
-	}
-
-	if (fclose(fp) != 0){
-		perror("Failed to close Outside file");
-		return RET_FILE_IO_ERROR;
-	}
-
-	return 0;
-}
-
diff --git a/windows/src/write_trial.c b/windows/src/write_trial.c
deleted file mode 100644
index 3710175..0000000
--- a/windows/src/write_trial.c
+++ /dev/null
@@ -1,798 +0,0 @@
-#ifdef _WIN32
-// avoid error of MSVC
-#define _CRT_SECURE_NO_WARNINGS
-#endif
-
-#include <errno.h>
-#include <inttypes.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "libpar3.h"
-#include "common.h"
-
-
-// Try Index File
-uint64_t try_index_file(PAR3_CTX *par3_ctx)
-{
-	uint64_t file_size;
-
-	file_size = 0;
-
-	// Creator Packet
-	file_size += par3_ctx->creator_packet_size;
-
-	// Start Packet
-	file_size += par3_ctx->start_packet_size;
-
-	// Matrix Packet
-	file_size += par3_ctx->matrix_packet_size;
-
-	// File Packet
-	file_size += par3_ctx->file_packet_size;
-
-	// Directory Packet
-	file_size += par3_ctx->dir_packet_size;
-
-	// Root Packet
-	file_size += par3_ctx->root_packet_size;
-
-	// External Data Packet
-	file_size += par3_ctx->ext_data_packet_size;
-
-	// File System Specific Packets
-	file_size += par3_ctx->file_system_packet_size;
-
-	// Comment Packet
-	file_size += par3_ctx->comment_packet_size;
-
-	if (par3_ctx->noise_level >= -1)
-		printf("Size of index file = %"PRIu64", %s\n", file_size, offset_file_name(par3_ctx->par_filename));
-
-	return file_size;
-}
-
-
-// Print sizing scheme
-void show_sizing_scheme(PAR3_CTX *par3_ctx,
-		uint32_t file_count, uint64_t base_num, uint64_t max_count)
-{
-	uint32_t cohort_count = par3_ctx->interleave + 1;
-	int64_t recovery_file_scheme = par3_ctx->recovery_file_scheme;
-	if (recovery_file_scheme == -2)
-		recovery_file_scheme = par3_ctx->max_file_size;
-
-	if (file_count > 0){
-		if (recovery_file_scheme == -1){	// Uniform
-			if (cohort_count > 1){
-				if (base_num > 0){
-					printf("Put [%"PRIu64" ~ %"PRIu64"] * %u blocks each on %u files.\n", max_count - 1, max_count, cohort_count, file_count);
-				} else {
-					printf("Put %"PRIu64" * %u blocks each on %u files.\n", max_count, cohort_count, file_count);
-				}
-			} else {
-				if (base_num > 0){
-					printf("Put [%"PRIu64" ~ %"PRIu64"] blocks each on %u files.\n", max_count - 1, max_count, file_count);
-				} else {
-					printf("Put %"PRIu64" blocks each on %u files.\n", max_count, file_count);
-				}
-			}
-		} else {	// Variable (base number * power of 2)
-			if (cohort_count > 1){
-				printf("Put [%"PRIu64" ~ %"PRIu64"] * %u blocks each on %u files.\n", base_num, max_count, cohort_count, file_count);
-			} else {
-				printf("Put [%"PRIu64" ~ %"PRIu64"] blocks each on %u files.\n", base_num, max_count, file_count);
-			}
-		}
-	} else {
-		if (recovery_file_scheme == -1){	// Uniform
-			if (cohort_count > 1){
-				printf("Put %"PRIu64" * %u blocks on a single file.\n", max_count, cohort_count);
-			} else {
-				printf("Put %"PRIu64" blocks on a single file.\n", max_count);
-			}
-		} else if (recovery_file_scheme > 0){	// Limit size
-			if (cohort_count > 1){
-				printf("Put \"power of 2\" * %u blocks on files incrementaly, until %"PRIu64" * %u blocks each.\n", cohort_count, max_count, cohort_count);
-			} else {
-				printf("Put \"power of 2\" blocks on files incrementaly, until %"PRIu64" blocks each.\n", max_count);
-			}
-		} else {	// Power of 2
-			if (cohort_count > 1){
-				printf("Put \"power of 2\" * %u blocks on files incrementaly.\n", cohort_count);
-			} else {
-				printf("Put \"power of 2\" blocks on files incrementaly.\n");
-			}
-		}
-	}
-}
-
-// Try to calculate sum of all packets in a file.
-uint64_t try_total_packet_size(PAR3_CTX *par3_ctx,
-		uint64_t packet_size, uint64_t packet_count)
-{
-	uint64_t file_size, num, repeat_count;
-
-	// How many repetition of common packet.
-	repeat_count = 1;
-	for (num = 2; num <= packet_count; num *= 2)	// log2(packet_count)
-		repeat_count++;
-	if (par3_ctx->repetition_limit > 0){	// Limit repetition of packets in each file.
-		uint64_t limit_count = par3_ctx->repetition_limit - 1;	// Additional copies
-		if (repeat_count > limit_count)
-			repeat_count = limit_count;
-	}
-	//printf("packet_count = %"PRIu64", repetition = %zu\n", packet_count, repeat_count);
-
-	// Creator Packet
-	file_size = par3_ctx->creator_packet_size;
-
-	// Common packets
-	file_size += par3_ctx->common_packet_size * repeat_count;
-
-	// Specified packets
-	file_size += packet_size * packet_count;
-
-	return file_size;
-}
-
-// Try how many blocks in each file, and calculate how many digits for each block count.
-// Return number of output files.
-uint32_t calculate_digit_max(PAR3_CTX *par3_ctx,
-		uint64_t header_size, uint64_t block_count, uint64_t first_num,
-		uint64_t *p_base_num, uint64_t *p_max_count,
-		int *p_digit_num1, int *p_digit_num2)
-{
-	int digit_num1, digit_num2;
-	uint32_t file_count;
-	int64_t recovery_file_scheme;
-	uint64_t num, base_num;
-	uint64_t each_start, each_count, max_count;
-
-	recovery_file_scheme = par3_ctx->recovery_file_scheme;
-	if (recovery_file_scheme == -2)
-		recovery_file_scheme = par3_ctx->max_file_size;
-
-	// Check max number of digits.
-	file_count = par3_ctx->recovery_file_count;
-	if (file_count > block_count)
-		file_count = (uint32_t)block_count;	// Number of file cannot exceed number of blocks.
-	if (file_count > 0){	// When writing archive file, number of input block files will be same as recovery block files.
-		if (recovery_file_scheme == -1){	// Uniform
-			max_count = block_count / file_count;
-			base_num = block_count % file_count;
-			each_start = block_count - max_count;
-			if (base_num > 0)
-				max_count++;
-
-		} else {	// Variable (base number * power of 2)
-			max_count = 1;
-			for (num = 1; num < file_count; num++){
-				max_count = max_count * 2 + 1;
-				if (max_count >= block_count){
-					file_count = (uint32_t)(num + 1);
-					break;
-				}
-			}
-			//printf("file_count = %u, (2 pow file_count) - 1 = %"PRIu64"\n", file_count, max_count);
-			if (max_count < block_count){	// Multiply by 2
-				base_num = (block_count + max_count - 1) / max_count;	// round up
-			} else {	// Number of file is reduced.
-				base_num = 1;
-			}
-
-			num = base_num;
-			max_count = 0;
-			each_start = 0;
-			each_count = 0;
-			while (block_count > 0){
-				each_start += each_count;
-				each_count = num;
-				num *= 2;
-				if (each_count > block_count)
-					each_count = block_count;
-				if (max_count < each_count)
-					max_count = each_count;
-
-				block_count -= each_count;
-			}
-		}
-
-	} else {	// Set number of files automatically.
-		if (recovery_file_scheme == -1){	// Uniform
-			// Put all blocks on a single file.
-			base_num = 0;
-			max_count = block_count;
-			each_start = 0;
-
-		} else if (recovery_file_scheme > 0){	// Limit size
-			uint64_t packet_size, total_size;
-			uint64_t limit_size, min_count, upper_count, next_count;
-
-			packet_size = par3_ctx->block_size + header_size;	// Size of Recovery Data Packet or Data Packet
-			if (par3_ctx->interleave > 0)	// Multipy at interleaving
-				packet_size *= par3_ctx->interleave + 1;
-
-			// Calculate limit number of blocks
-			limit_size = recovery_file_scheme;
-			upper_count = (limit_size + packet_size - 1) / packet_size;
-			total_size = try_total_packet_size(par3_ctx, packet_size, upper_count);
-			if (total_size <= limit_size){
-				min_count = upper_count;
-			} else {
-				min_count = 1;
-				next_count = (1 + upper_count) / 2;
-			}
-			while (upper_count > min_count){
-				//printf("min_count = %"PRIu64", upper_count = %"PRIu64", next_count = %"PRIu64"\n", min_count, upper_count, next_count);
-				total_size = try_total_packet_size(par3_ctx, packet_size, next_count);
-				//printf("total_size = %"PRIu64" (%"PRIu64")\n", total_size, next_count);
-				if (total_size > limit_size){
-					upper_count = next_count;
-					next_count = (min_count + upper_count) / 2;
-				} else if (total_size < limit_size){
-					min_count = next_count;
-					next_count = (min_count + upper_count) / 2;
-				} else {
-					min_count = next_count;
-				}
-				if (next_count == min_count)
-					break;
-			}
-			num = min_count;
-			//printf("limit_size = %"PRIu64", limit_count = %"PRIu64"\n", limit_size, num);
-
-			base_num = 1;
-			each_start = 0;
-			each_count = 0;
-			max_count = 0;
-			while (block_count > 0){
-				each_start += each_count;
-				each_count = base_num;
-				if (each_count > num){	// When containing blocks exceeds limit count
-					each_count = num;
-				} else {
-					base_num *= 2;
-				}
-				if (each_count > block_count)
-					each_count = block_count;
-				if (max_count < each_count)
-					max_count = each_count;
-
-				block_count -= each_count;
-			}
-			base_num = 1;
-
-		} else {	// Power of 2
-			base_num = 1;
-			each_start = 0;
-			each_count = 0;
-			max_count = 0;
-			while (block_count > 0){
-				each_start += each_count;
-				each_count = base_num;
-				base_num *= 2;
-				if (each_count > block_count)
-					each_count = block_count;
-				if (max_count < each_count)
-					max_count = each_count;
-
-				block_count -= each_count;
-			}
-			base_num = 1;
-		}
-	}
-
-	// Calculate how many digits for each block count.
-	//printf("max_start = %"PRIu64", max_count = %"PRIu64"\n", each_start, max_count);
-	digit_num1 = 1;
-	num = each_start + first_num;
-	while (num >= 10){
-		num /= 10;
-		digit_num1++;
-	}
-	digit_num2 = 1;
-	num = max_count;
-	while (num >= 10){
-		num /= 10;
-		digit_num2++;
-	}
-
-	// Return values
-	*p_base_num = base_num;
-	*p_max_count = max_count;
-	*p_digit_num1 = digit_num1;
-	*p_digit_num2 = digit_num2;
-
-	return file_count;
-}
-
-
-static uint64_t try_data_packet(PAR3_CTX *par3_ctx, char *file_name, uint64_t each_start, uint64_t each_count)
-{
-	uint8_t *common_packet;
-	uint32_t cohort_count, write_count;
-	uint64_t file_size, num;
-	uint64_t block_count, block_index, block_max;
-	size_t write_size, write_size2;
-	size_t packet_count, packet_to, packet_from;
-	size_t common_packet_size, packet_size, packet_offset;
-	PAR3_BLOCK_CTX *block_list;
-
-	block_list = par3_ctx->block_list;
-	common_packet = par3_ctx->common_packet;
-	common_packet_size = par3_ctx->common_packet_size;
-
-	// Set count for each cohort
-	if (par3_ctx->interleave > 0){
-		block_count = par3_ctx->block_count;
-		cohort_count = par3_ctx->interleave + 1;
-	}
-
-	// How many repetition of common packet.
-	packet_count = 0;	// reduce 1, because put 1st copy at first.
-	for (num = 2; num <= each_count; num *= 2)	// log2(each_count)
-		packet_count++;
-	if (par3_ctx->repetition_limit > 0){	// Limit repetition of packets in each file.
-		size_t limit_count = par3_ctx->repetition_limit - 1;	// Additional copies
-		if (packet_count > limit_count)
-			packet_count = limit_count;
-	}
-	//printf("each_count = %"PRIu64", repetition = %zu\n", each_count, packet_count);
-	packet_count *= par3_ctx->common_packet_count;
-	//printf("number of repeated packets = %zu\n", packet_count);
-
-	file_size = 0;
-
-	// Creator Packet
-	file_size += par3_ctx->creator_packet_size;
-
-	// First common packets
-	file_size += common_packet_size;
-
-	// Data Packet and repeated common packets
-	packet_from = 0;
-	packet_offset = 0;
-	for (num = each_start; num < each_start + each_count; num++){
-		// data size in the block
-		if (par3_ctx->interleave == 0){
-			write_size = block_list[num].size;
-			write_count = 1;
-		} else {	// Write multiple blocks at interleaving
-			write_size = 0;
-			write_count = cohort_count;
-			block_index = num * cohort_count;	// Starting index of the block
-			block_max = block_index + cohort_count;	// How many blocks in the volume
-			if (block_max > block_count){
-				block_max = block_count;
-				write_count = (uint32_t)(block_max - block_index);
-			}
-			//printf("block_index = %"PRIu64", block_max = %"PRIu64"\n", block_index, block_max);
-			while (block_index < block_max){
-				write_size += block_list[block_index].size;
-				block_index++;
-			}
-		}
-		//printf("write_size = %zu, write_count = %u\n", write_size, write_count);
-
-		// Write packet header and data on file.
-		file_size += (48 + 8) * write_count;
-		file_size += write_size;
-
-		// How many common packets to write here.
-		write_size = 0;
-		write_size2 = 0;
-		packet_to = packet_count * (num - each_start + 1) / each_count;
-		//printf("write from %zu to %zu\n", packet_from, packet_to);
-		while (packet_to - packet_from > 0){
-			// Read packet size of each packet from packet_offset, and add them.
-			memcpy(&packet_size, common_packet + packet_offset + write_size + 24, 8);
-			write_size += packet_size;
-			packet_from++;
-			if (packet_offset + write_size >= common_packet_size)
-				break;
-		}
-		while (packet_to - packet_from > 0){
-			// Read packet size of each packet from the first, and add them.
-			memcpy(&packet_size, common_packet + write_size2 + 24, 8);
-			write_size2 += packet_size;
-			packet_from++;
-		}
-
-		// Write common packets
-		if (write_size > 0){
-			//printf("packet_offset = %zu, write_size = %zu, total = %zu\n", packet_offset, write_size, packet_offset + write_size);
-			file_size += write_size;
-			// This offset doesn't exceed common_packet_size.
-			packet_offset += write_size;
-			if (packet_offset >= common_packet_size)
-				packet_offset -= common_packet_size;
-		}
-		if (write_size2 > 0){
-			//printf("write_size2 = %zu = packet_offset\n", write_size2);
-			file_size += write_size2;
-			// Current offset is saved.
-			packet_offset = write_size2;
-		}
-	}
-
-	// Comment Packet
-	file_size += par3_ctx->comment_packet_size;
-
-	if (par3_ctx->noise_level >= -1)
-		printf("Size of archive file = %"PRIu64", %s\n", file_size, offset_file_name(file_name));
-
-	return file_size;
-}
-
-// Write PAR3 files with Data packets (input blocks)
-int try_archive_file(PAR3_CTX *par3_ctx, char *file_name, uint64_t *recovery_file_size)
-{
-	int digit_num1, digit_num2;
-	uint32_t file_count;
-	int64_t recovery_file_scheme;
-	uint64_t block_count, base_num;
-	uint64_t each_start, each_count, max_count;
-	size_t len;
-
-	block_count = par3_ctx->block_count;
-	if (block_count == 0)
-		return 0;
-	recovery_file_scheme = par3_ctx->recovery_file_scheme;
-	if (recovery_file_scheme == -2)
-		recovery_file_scheme = par3_ctx->max_file_size;
-
-	// Remove the last ".par3" from base PAR3 filename.
-	strcpy(file_name, par3_ctx->par_filename);
-	len = strlen(file_name);
-	if (strcmp(file_name + len - 5, ".par3") == 0){
-		len -= 5;
-		file_name[len] = 0;
-		//printf("len = %zu, base name = %s\n", len, file_name);
-	}
-
-	// Set count for each cohort
-	if (par3_ctx->interleave > 0){
-		block_count = (block_count + par3_ctx->interleave) / (par3_ctx->interleave + 1);	// round up
-	}
-
-	// Calculate block count and digits max.
-	file_count = calculate_digit_max(par3_ctx, 56, block_count, 0, &base_num, &max_count, &digit_num1, &digit_num2);
-	if (len + 11 + digit_num1 + digit_num2 >= _MAX_PATH){	// .part#+#.par3
-		printf("PAR filename will be too long.\n");
-		return RET_FILE_IO_ERROR;
-	}
-
-	if (par3_ctx->noise_level >= 1){
-		show_sizing_scheme(par3_ctx, file_count, base_num, max_count);
-	}
-
-	// Write each PAR3 file.
-	each_start = 0;
-	while (block_count > 0){
-		if (file_count > 0){
-			if (recovery_file_scheme == -1){	// Uniform
-				each_count = max_count;
-				if (base_num > 0){
-					base_num--;
-					if (base_num == 0)
-						max_count--;
-				}
-
-			} else {	// Variable (base number * power of 2)
-				each_count = base_num;
-				base_num *= 2;
-				if (each_count > block_count)
-					each_count = block_count;
-			}
-
-		} else {
-			if (recovery_file_scheme == -1){	// Uniform
-				each_count = block_count;
-
-			} else if (recovery_file_scheme > 0){	// Limit size
-				each_count = base_num;
-				if (each_count > max_count){
-					each_count = max_count;
-				} else {
-					base_num *= 2;
-				}
-				if (each_count > block_count)
-					each_count = block_count;
-
-			} else {	// Power of 2
-				each_count = base_num;
-				base_num *= 2;
-				if (each_count > block_count)
-					each_count = block_count;
-			}
-		}
-
-		sprintf(file_name + len, ".part%0*"PRIu64"+%0*"PRIu64".par3", digit_num1, each_start, digit_num2, each_count);
-		*recovery_file_size += try_data_packet(par3_ctx, file_name, each_start, each_count);
-
-		each_start += each_count;
-		block_count -= each_count;
-	}
-
-	return 0;
-}
-
-
-static uint64_t try_recovery_packet(PAR3_CTX *par3_ctx, char *file_name, uint64_t each_start, uint64_t each_count)
-{
-	uint8_t *common_packet;
-	uint32_t cohort_count;
-	uint64_t file_size, block_size, num;
-	size_t write_size, write_size2;
-	size_t packet_count, packet_to, packet_from;
-	size_t common_packet_size, packet_size, packet_offset;
-
-	block_size = par3_ctx->block_size;
-	common_packet = par3_ctx->common_packet;
-	common_packet_size = par3_ctx->common_packet_size;
-	cohort_count = par3_ctx->interleave + 1;
-
-	// How many repetition of common packet.
-	packet_count = 0;	// reduce 1, because put 1st copy at first.
-	for (num = 2; num <= each_count; num *= 2)	// log2(each_count)
-		packet_count++;
-	if (par3_ctx->repetition_limit > 0){	// Limit repetition of packets in each file.
-		size_t limit_count = par3_ctx->repetition_limit - 1;	// Additional copies
-		if (packet_count > limit_count)
-			packet_count = limit_count;
-	}
-	//printf("each_count = %"PRIu64", repetition = %zu\n", each_count, packet_count);
-	packet_count *= par3_ctx->common_packet_count;
-	//printf("number of repeated packets = %zu\n", packet_count);
-
-	file_size = 0;
-
-	// Creator Packet
-	file_size += par3_ctx->creator_packet_size;
-
-	// First common packets
-	file_size += common_packet_size;
-
-	// Recovery Data Packet and repeated common packets
-	packet_from = 0;
-	packet_offset = 0;
-	for (num = each_start; num < each_start + each_count; num++){
-		// Write packet header and dummy data on file.
-		// It will write recovery block later.
-		file_size += (48 + 40) * cohort_count;
-		file_size += block_size * cohort_count;
-
-		// How many common packets to write here.
-		write_size = 0;
-		write_size2 = 0;
-		packet_to = packet_count * (num - each_start + 1) / each_count;
-		//printf("write from %zu to %zu\n", packet_from, packet_to);
-		while (packet_to - packet_from > 0){
-			// Read packet size of each packet from packet_offset, and add them.
-			memcpy(&packet_size, common_packet + packet_offset + write_size + 24, 8);
-			write_size += packet_size;
-			packet_from++;
-			if (packet_offset + write_size >= common_packet_size)
-				break;
-		}
-		while (packet_to - packet_from > 0){
-			// Read packet size of each packet from the first, and add them.
-			memcpy(&packet_size, common_packet + write_size2 + 24, 8);
-			write_size2 += packet_size;
-			packet_from++;
-		}
-
-		// Write common packets
-		if (write_size > 0){
-			//printf("packet_offset = %zu, write_size = %zu, total = %zu\n", packet_offset, write_size, packet_offset + write_size);
-			file_size += write_size;
-			// This offset doesn't exceed common_packet_size.
-			packet_offset += write_size;
-			if (packet_offset >= common_packet_size)
-				packet_offset -= common_packet_size;
-		}
-		if (write_size2 > 0){
-			//printf("write_size2 = %zu = packet_offset\n", write_size2);
-			file_size += write_size2;
-			// Current offset is saved.
-			packet_offset = write_size2;
-		}
-	}
-
-	// Comment Packet
-	file_size += par3_ctx->comment_packet_size;
-
-	if (par3_ctx->noise_level >= -1)
-		printf("Size of recovery file = %"PRIu64", %s\n", file_size, offset_file_name(file_name));
-
-	return file_size;
-}
-
-// Write PAR3 files with Recovery Data packets (recovery blocks are not written yet)
-int try_recovery_file(PAR3_CTX *par3_ctx, char *file_name, uint64_t *recovery_file_size)
-{
-	int digit_num1, digit_num2;
-	uint32_t file_count;
-	int64_t recovery_file_scheme;
-	uint64_t block_count, base_num, first_num;
-	uint64_t each_start, each_count, max_count;
-	size_t len;
-
-	block_count = par3_ctx->recovery_block_count;
-	if (block_count == 0)
-		return 0;
-	recovery_file_scheme = par3_ctx->recovery_file_scheme;
-	if (recovery_file_scheme == -2)
-		recovery_file_scheme = par3_ctx->max_file_size;
-	first_num = par3_ctx->first_recovery_block;
-
-	// Remove the last ".par3" from base PAR3 filename.
-	strcpy(file_name, par3_ctx->par_filename);
-	len = strlen(file_name);
-	if (strcmp(file_name + len - 5, ".par3") == 0){
-		len -= 5;
-		file_name[len] = 0;
-		//printf("len = %zu, base name = %s\n", len, file_name);
-	}
-
-	// Set count for each cohort
-	if (par3_ctx->interleave > 0){
-		block_count = (block_count + par3_ctx->interleave) / (par3_ctx->interleave + 1);	// round up
-		first_num = (first_num + par3_ctx->interleave) / (par3_ctx->interleave + 1);
-	}
-
-	// Calculate block count and digits max.
-	file_count = calculate_digit_max(par3_ctx, 88, block_count, first_num, &base_num, &max_count, &digit_num1, &digit_num2);
-	if (len + 10 + digit_num1 + digit_num2 >= _MAX_PATH){	// .vol#+#.par3
-		printf("PAR filename will be too long.\n");
-		return RET_FILE_IO_ERROR;
-	}
-
-	if (par3_ctx->noise_level >= 1){
-		show_sizing_scheme(par3_ctx, file_count, base_num, max_count);
-	}
-
-	// Write each PAR3 file.
-	each_start = first_num;
-	while (block_count > 0){
-		if (file_count > 0){
-			if (recovery_file_scheme == -1){	// Uniform
-				each_count = max_count;
-				if (base_num > 0){
-					base_num--;
-					if (base_num == 0)
-						max_count--;
-				}
-
-			} else {	// Variable (base number * power of 2)
-				each_count = base_num;
-				base_num *= 2;
-				if (each_count > block_count)
-					each_count = block_count;
-			}
-
-		} else {
-			if (recovery_file_scheme == -1){	// Uniform
-				each_count = block_count;
-
-			} else if (recovery_file_scheme > 0){	// Limit size
-				each_count = base_num;
-				if (each_count > max_count){
-					each_count = max_count;
-				} else {
-					base_num *= 2;
-				}
-				if (each_count > block_count)
-					each_count = block_count;
-
-			} else {	// Power of 2
-				each_count = base_num;
-				base_num *= 2;
-				if (each_count > block_count)
-					each_count = block_count;
-			}
-		}
-
-		sprintf(file_name + len, ".vol%0*"PRIu64"+%0*"PRIu64".par3", digit_num1, each_start, digit_num2, each_count);
-		*recovery_file_size += try_recovery_packet(par3_ctx, file_name, each_start, each_count);
-
-		each_start += each_count;
-		block_count -= each_count;
-	}
-
-	return 0;
-}
-
-// Erase created PAR3 files, when error occured.
-void remove_recovery_file(PAR3_CTX *par3_ctx, char *file_name)
-{
-	int digit_num1, digit_num2;
-	uint32_t file_count;
-	int64_t recovery_file_scheme;
-	uint64_t block_count, base_num, first_num;
-	uint64_t each_start, each_count, max_count;
-	size_t len;
-
-/*
-	// Do you remove Index file, too ?
-	if (remove(par3_ctx->par_filename) != 0){
-		if (errno != ENOENT)
-			return;	// Failed to remove Index file
-	}
-*/
-
-	block_count = par3_ctx->recovery_block_count;
-	recovery_file_scheme = par3_ctx->recovery_file_scheme;
-	if (recovery_file_scheme == -2)
-		recovery_file_scheme = par3_ctx->max_file_size;
-	first_num = par3_ctx->first_recovery_block;
-
-	// Remove the last ".par3" from base PAR3 filename.
-	strcpy(file_name, par3_ctx->par_filename);
-	len = strlen(file_name);
-	if (strcmp(file_name + len - 5, ".par3") == 0){
-		len -= 5;
-		file_name[len] = 0;
-		//printf("len = %zu, base name = %s\n", len, file_name);
-	}
-
-	// Set count for each cohort
-	if (par3_ctx->interleave > 0){
-		block_count = (block_count + par3_ctx->interleave) / (par3_ctx->interleave + 1);	// round up
-	}
-
-	// Calculate block count and digits max.
-	file_count = calculate_digit_max(par3_ctx, 88, block_count, first_num, &base_num, &max_count, &digit_num1, &digit_num2);
-
-	// Remove each PAR3 file.
-	each_start = first_num;
-	while (block_count > 0){
-		if (file_count > 0){
-			if (recovery_file_scheme == -1){	// Uniform
-				each_count = max_count;
-				if (base_num > 0){
-					base_num--;
-					if (base_num == 0)
-						max_count--;
-				}
-
-			} else {	// Variable (base number * power of 2)
-				each_count = base_num;
-				base_num *= 2;
-				if (each_count > block_count)
-					each_count = block_count;
-			}
-
-		} else {
-			if (recovery_file_scheme == -1){	// Uniform
-				each_count = block_count;
-
-			} else if (recovery_file_scheme > 0){	// Limit size
-				each_count = base_num;
-				base_num *= 2;
-				if (each_count > max_count)
-					each_count = max_count;
-				if (each_count > block_count)
-					each_count = block_count;
-
-			} else {	// Power of 2
-				each_count = base_num;
-				base_num *= 2;
-				if (each_count > block_count)
-					each_count = block_count;
-			}
-		}
-
-		sprintf(file_name + len, ".vol%0*"PRIu64"+%0*"PRIu64".par3", digit_num1, each_start, digit_num2, each_count);
-		if (remove(file_name) != 0){
-			if (errno != ENOENT)
-				return;	// Failed to remove PAR3 file
-		}
-
-		each_start += each_count;
-		block_count -= each_count;
-	}
-}
-