v6.16.0

Merge branch 'dev'
Update CHANGELOG.md
2025-12-06 23:52:38 -05:00 · 2021-11-26 18:57:07 +07:00 · 2021-11-26 18:51:53 +07:00 · 2021-11-26 18:50:34 +07:00 · 2021-11-25 19:21:50 +07:00 · 2021-11-25 13:19:01 +01:00
120 changed files with 75194 additions and 2455 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,22 @@
+# v6.16.0
+- [#2712](https://github.com/xmrig/xmrig/pull/2712) **GhostRider algorithm (Raptoreum) support**: read the [RELEASE NOTES](src/crypto/ghostrider/README.md) for quick start guide and performance comparisons.
+- [#2682](https://github.com/xmrig/xmrig/pull/2682) Fixed: use cn-heavy optimization only for Vermeer CPUs.
+- [#2684](https://github.com/xmrig/xmrig/pull/2684) MSR mod: fix for error 183.
+
+# v6.15.3
+- [#2614](https://github.com/xmrig/xmrig/pull/2614) OpenCL fixes for non-AMD platforms.
+- [#2623](https://github.com/xmrig/xmrig/pull/2623) Fixed compiling without kawpow.
+- [#2636](https://github.com/xmrig/xmrig/pull/2636) [#2639](https://github.com/xmrig/xmrig/pull/2639) AstroBWT speedup (up to +35%).
+- [#2646](https://github.com/xmrig/xmrig/pull/2646) Fixed MSVC compilation error.
+
+# v6.15.2
+- [#2606](https://github.com/xmrig/xmrig/pull/2606) Fixed: AstroBWT auto-config ignored `max-threads-hint`.
+- Fixed possible crash on Windows (regression in v6.15.1).
+
+# v6.15.1
+- [#2586](https://github.com/xmrig/xmrig/pull/2586) Fixed Windows 7 compatibility.
+- [#2594](https://github.com/xmrig/xmrig/pull/2594) Added Windows taskbar icon colors.
+
 # v6.15.0
 - [#2548](https://github.com/xmrig/xmrig/pull/2548) Added automatic coin detection for daemon mining.
 - [#2563](https://github.com/xmrig/xmrig/pull/2563) Added new algorithm RandomX Graft (`rx/graft`).
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -10,6 +10,7 @@ option(WITH_RANDOMX         "Enable RandomX algorithms family" ON)
 option(WITH_ARGON2          "Enable Argon2 algorithms family" ON)
 option(WITH_ASTROBWT        "Enable AstroBWT algorithms family" ON)
 option(WITH_KAWPOW          "Enable KawPow algorithms family" ON)
+option(WITH_GHOSTRIDER      "Enable GhostRider algorithm" ON)
 option(WITH_HTTP            "Enable HTTP protocol support (client/server)" ON)
 option(WITH_DEBUG_LOG       "Enable debug log output" OFF)
 option(WITH_TLS             "Enable OpenSSL support" ON)
@@ -18,6 +19,8 @@ option(WITH_MSR             "Enable MSR mod & 1st-gen Ryzen fix" ON)
 option(WITH_ENV_VARS        "Enable environment variables support in config file" ON)
 option(WITH_EMBEDDED_CONFIG "Enable internal embedded JSON config" OFF)
 option(WITH_OPENCL          "Enable OpenCL backend" ON)
+set(WITH_OPENCL_VERSION 200 CACHE STRING "Target OpenCL version")
+set_property(CACHE WITH_OPENCL_VERSION PROPERTY STRINGS 120 200 210 220)
 option(WITH_CUDA            "Enable CUDA backend" ON)
 option(WITH_NVML            "Enable NVML (NVIDIA Management Library) support (only if CUDA backend enabled)" ON)
 option(WITH_ADL             "Enable ADL (AMD Display Library) or sysfs support (only if OpenCL backend enabled)" ON)
@@ -56,6 +59,7 @@ set(HEADERS
    src/core/config/usage.h
    src/core/Controller.h
    src/core/Miner.h
+    src/core/Taskbar.h
    src/net/interfaces/IJobResultListener.h
    src/net/JobResult.h
    src/net/JobResults.h
@@ -104,6 +108,7 @@ set(SOURCES
    src/core/config/ConfigTransform.cpp
    src/core/Controller.cpp
    src/core/Miner.cpp
+    src/core/Taskbar.cpp
    src/net/JobResults.cpp
    src/net/Network.cpp
    src/net/strategies/DonateStrategy.cpp
@@ -124,6 +129,10 @@ set(SOURCES_CRYPTO
    src/crypto/common/VirtualMemory.cpp
   )

+if (CMAKE_C_COMPILER_ID MATCHES GNU)
+    set_source_files_properties(src/crypto/cn/CnHash.cpp PROPERTIES COMPILE_FLAGS "-Ofast -fno-tree-vectorize")
+endif()
+
 if (WITH_HWLOC)
    list(APPEND HEADERS_CRYPTO
        src/crypto/common/NUMAMemoryPool.h
@@ -182,6 +191,7 @@ include(cmake/randomx.cmake)
 include(cmake/argon2.cmake)
 include(cmake/astrobwt.cmake)
 include(cmake/kawpow.cmake)
+include(cmake/ghostrider.cmake)
 include(cmake/OpenSSL.cmake)
 include(cmake/asm.cmake)

@@ -217,7 +227,7 @@ if (WITH_DEBUG_LOG)
 endif()

 add_executable(${CMAKE_PROJECT_NAME} ${HEADERS} ${SOURCES} ${SOURCES_OS} ${HEADERS_CRYPTO} ${SOURCES_CRYPTO} ${SOURCES_SYSLOG} ${TLS_SOURCES} ${XMRIG_ASM_SOURCES})
-target_link_libraries(${CMAKE_PROJECT_NAME} ${XMRIG_ASM_LIBRARY} ${OPENSSL_LIBRARIES} ${UV_LIBRARIES} ${EXTRA_LIBS} ${CPUID_LIB} ${ARGON2_LIBRARY} ${ETHASH_LIBRARY})
+target_link_libraries(${CMAKE_PROJECT_NAME} ${XMRIG_ASM_LIBRARY} ${OPENSSL_LIBRARIES} ${UV_LIBRARIES} ${EXTRA_LIBS} ${CPUID_LIB} ${ARGON2_LIBRARY} ${ETHASH_LIBRARY} ${GHOSTRIDER_LIBRARY})

 if (WIN32)
    add_custom_command(TARGET ${CMAKE_PROJECT_NAME} POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different "${CMAKE_SOURCE_DIR}/bin/WinRing0/WinRing0x64.sys" $<TARGET_FILE_DIR:${CMAKE_PROJECT_NAME}>)
@@ -225,6 +235,7 @@ if (WIN32)
    add_custom_command(TARGET ${CMAKE_PROJECT_NAME} POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different "${CMAKE_SOURCE_DIR}/scripts/benchmark_10M.cmd" $<TARGET_FILE_DIR:${CMAKE_PROJECT_NAME}>)
    add_custom_command(TARGET ${CMAKE_PROJECT_NAME} POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different "${CMAKE_SOURCE_DIR}/scripts/pool_mine_example.cmd" $<TARGET_FILE_DIR:${CMAKE_PROJECT_NAME}>)
    add_custom_command(TARGET ${CMAKE_PROJECT_NAME} POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different "${CMAKE_SOURCE_DIR}/scripts/solo_mine_example.cmd" $<TARGET_FILE_DIR:${CMAKE_PROJECT_NAME}>)
+    add_custom_command(TARGET ${CMAKE_PROJECT_NAME} POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different "${CMAKE_SOURCE_DIR}/scripts/rtm_ghostrider_example.cmd" $<TARGET_FILE_DIR:${CMAKE_PROJECT_NAME}>)
 endif()

 if (CMAKE_CXX_COMPILER_ID MATCHES Clang AND CMAKE_BUILD_TYPE STREQUAL Release AND NOT CMAKE_GENERATOR STREQUAL Xcode)
--- a/cmake/ghostrider.cmake
+++ b/cmake/ghostrider.cmake
@@ -0,0 +1,8 @@
+if (WITH_GHOSTRIDER)
+    add_definitions(/DXMRIG_ALGO_GHOSTRIDER)
+    add_subdirectory(src/crypto/ghostrider)
+    set(GHOSTRIDER_LIBRARY ghostrider)
+else()
+    remove_definitions(/DXMRIG_ALGO_GHOSTRIDER)
+    set(GHOSTRIDER_LIBRARY "")
+endif()
--- a/scripts/rtm_ghostrider_example.cmd
+++ b/scripts/rtm_ghostrider_example.cmd
@@ -0,0 +1,20 @@
+:: Example batch file for mining Raptoreum at a pool
+::
+:: Format:
+::      xmrig.exe -a gr -o <pool address>:<pool port> -u <pool username/wallet> -p <pool password>
+::
+:: Fields:
+::      pool address            The host name of the pool stratum or its IP address, for example raptoreumemporium.com
+::      pool port               The port of the pool's stratum to connect to, for example 3333. Check your pool's getting started page.
+::      pool username/wallet    For most pools, this is the wallet address you want to mine to. Some pools require a username
+::      pool password           For most pools this can be just 'x'. For pools using usernames, you may need to provide a password as configured on the pool.
+::
+:: List of Raptoreum mining pools:
+::      https://miningpoolstats.stream/raptoreum
+::
+:: Choose pools outside of top 5 to help Raptoreum network be more decentralized!
+:: Smaller pools also often have smaller fees/payout limits.
+
+cd %~dp0
+xmrig.exe -a gr -o raptoreumemporium.com:3008 -u WALLET_ADDRESS -p x
+pause
--- a/src/3rdparty/getopt/getopt.h
+++ b/src/3rdparty/getopt/getopt.h
@@ -109,11 +109,7 @@ char    *optarg;		/* argument associated with option */
 extern char __declspec(dllimport) *__progname;
 #endif

-#ifdef __CYGWIN__
 static char EMSG[] = "";
-#else
-#define	EMSG		""
-#endif

 static int getopt_internal(int, char * const *, const char *,
 			   const struct option *, int *, int);
--- a/src/backend/common/Workers.cpp
+++ b/src/backend/common/Workers.cpp
@@ -239,6 +239,9 @@ xmrig::IWorker *xmrig::Workers<CpuLaunchData>::create(Thread<CpuLaunchData> *han

    case 5:
        return new CpuWorker<5>(handle->id(), handle->config());
+
+    case 8:
+        return new CpuWorker<8>(handle->id(), handle->config());
    }

    return nullptr;
--- a/src/backend/cpu/CpuConfig.cpp
+++ b/src/backend/cpu/CpuConfig.cpp
@@ -122,8 +122,15 @@ std::vector<xmrig::CpuLaunchData> xmrig::CpuConfig::get(const Miner *miner, cons
    const size_t count = threads.count();
    out.reserve(count);

+    std::vector<int64_t> affinities;
+    affinities.reserve(count);
+
+    for (const auto& thread : threads.data()) {
+        affinities.emplace_back(thread.affinity());
+    }
+
    for (const auto &thread : threads.data()) {
-        out.emplace_back(miner, algorithm, *this, thread, count);
+        out.emplace_back(miner, algorithm, *this, thread, count, affinities);
    }

    return out;
@@ -200,6 +207,7 @@ void xmrig::CpuConfig::generate()
    count += xmrig::generate<Algorithm::RANDOM_X>(m_threads, m_limit);
    count += xmrig::generate<Algorithm::ARGON2>(m_threads, m_limit);
    count += xmrig::generate<Algorithm::ASTROBWT>(m_threads, m_limit);
+    count += xmrig::generate<Algorithm::GHOSTRIDER>(m_threads, m_limit);

    m_shouldSave |= count > 0;
 }
--- a/src/backend/cpu/CpuConfig_gen.h
+++ b/src/backend/cpu/CpuConfig_gen.h
@@ -161,6 +161,15 @@ size_t inline generate<Algorithm::ASTROBWT>(Threads<CpuThreads>& threads, uint32
 }
 #endif

+#ifdef XMRIG_ALGO_GHOSTRIDER
+template<>
+size_t inline generate<Algorithm::GHOSTRIDER>(Threads<CpuThreads>& threads, uint32_t limit)
+{
+    return generate(Algorithm::kGHOSTRIDER, threads, Algorithm::GHOSTRIDER_RTM, limit);
+}
+#endif
+
+
 } /* namespace xmrig */


--- a/src/backend/cpu/CpuLaunchData.cpp
+++ b/src/backend/cpu/CpuLaunchData.cpp
@@ -32,7 +32,7 @@
 #include <algorithm>


-xmrig::CpuLaunchData::CpuLaunchData(const Miner *miner, const Algorithm &algorithm, const CpuConfig &config, const CpuThread &thread, size_t threads) :
+xmrig::CpuLaunchData::CpuLaunchData(const Miner *miner, const Algorithm &algorithm, const CpuConfig &config, const CpuThread &thread, size_t threads, const std::vector<int64_t>& affinities) :
    algorithm(algorithm),
    assembly(config.assembly()),
    astrobwtAVX2(config.astrobwtAVX2()),
@@ -44,7 +44,8 @@ xmrig::CpuLaunchData::CpuLaunchData(const Miner *miner, const Algorithm &algorit
    affinity(thread.affinity()),
    miner(miner),
    threads(threads),
-    intensity(std::min<uint32_t>(thread.intensity(), algorithm.maxIntensity()))
+    intensity(std::min<uint32_t>(thread.intensity(), algorithm.maxIntensity())),
+    affinities(affinities)
 {
 }

--- a/src/backend/cpu/CpuLaunchData.h
+++ b/src/backend/cpu/CpuLaunchData.h
@@ -44,7 +44,7 @@ class Miner;
 class CpuLaunchData
 {
 public:
-    CpuLaunchData(const Miner *miner, const Algorithm &algorithm, const CpuConfig &config, const CpuThread &thread, size_t threads);
+    CpuLaunchData(const Miner *miner, const Algorithm &algorithm, const CpuConfig &config, const CpuThread &thread, size_t threads, const std::vector<int64_t>& affinities);

    bool isEqual(const CpuLaunchData &other) const;
    CnHash::AlgoVariant av() const;
@@ -68,6 +68,7 @@ public:
    const Miner *miner;
    const size_t threads;
    const uint32_t intensity;
+    const std::vector<int64_t> affinities;
 };


--- a/src/backend/cpu/CpuThread.h
+++ b/src/backend/cpu/CpuThread.h
@@ -41,7 +41,7 @@ public:
    CpuThread(const rapidjson::Value &value);

    inline bool isEqual(const CpuThread &other) const       { return other.m_affinity == m_affinity && other.m_intensity == m_intensity; }
-    inline bool isValid() const                             { return m_intensity <= 5; }
+    inline bool isValid() const                             { return m_intensity <= 8; }
    inline int64_t affinity() const                         { return m_affinity; }
    inline uint32_t intensity() const                       { return m_intensity == 0 ? 1 : m_intensity; }

--- a/src/backend/cpu/CpuWorker.cpp
+++ b/src/backend/cpu/CpuWorker.cpp
@@ -34,6 +34,7 @@
 #include "crypto/rx/Rx.h"
 #include "crypto/rx/RxDataset.h"
 #include "crypto/rx/RxVm.h"
+#include "crypto/ghostrider/ghostrider.h"
 #include "net/JobResults.h"


@@ -82,7 +83,8 @@ xmrig::CpuWorker<N>::CpuWorker(size_t id, const CpuLaunchData &data) :
 {
 #   ifdef XMRIG_ALGO_CN_HEAVY
    // cn-heavy optimization for Zen3 CPUs
-    if ((N == 1) && (m_av == CnHash::AV_SINGLE) && (m_algorithm.family() == Algorithm::CN_HEAVY) && (m_assembly != Assembly::NONE) && (Cpu::info()->arch() == ICpuInfo::ARCH_ZEN3)) {
+    const bool is_vermeer = (Cpu::info()->arch() == ICpuInfo::ARCH_ZEN3) && (Cpu::info()->model() == 0x21);
+    if ((N == 1) && (m_av == CnHash::AV_SINGLE) && (m_algorithm.family() == Algorithm::CN_HEAVY) && (m_assembly != Assembly::NONE) && is_vermeer) {
        std::lock_guard<std::mutex> lock(cn_heavyZen3MemoryMutex);
        if (!cn_heavyZen3Memory) {
            // Round up number of threads to the multiple of 8
@@ -96,6 +98,10 @@ xmrig::CpuWorker<N>::CpuWorker(size_t id, const CpuLaunchData &data) :
    {
        m_memory = new VirtualMemory(m_algorithm.l3() * N, data.hugePages, false, true, node());
    }
+
+#   ifdef XMRIG_ALGO_GHOSTRIDER
+    m_ghHelper = ghostrider::create_helper_thread(affinity(), data.affinities);
+#   endif
 }


@@ -114,6 +120,10 @@ xmrig::CpuWorker<N>::~CpuWorker()
    {
        delete m_memory;
    }
+
+#   ifdef XMRIG_ALGO_GHOSTRIDER
+    ghostrider::destroy_helper_thread(m_ghHelper);
+#   endif
 }


@@ -151,6 +161,12 @@ bool xmrig::CpuWorker<N>::selfTest()
    }
 #   endif

+#   ifdef XMRIG_ALGO_GHOSTRIDER
+    if (m_algorithm.family() == Algorithm::GHOSTRIDER) {
+        return N == 8;
+    }
+#   endif
+
    allocateCnCtx();

    if (m_algorithm.family() == Algorithm::CN) {
@@ -274,9 +290,9 @@ void xmrig::CpuWorker<N>::start()
            bool valid = true;

            uint8_t miner_signature_saved[64];
-            uint8_t* miner_signature_ptr = m_job.blob() + m_job.nonceOffset() + m_job.nonceSize();

 #           ifdef XMRIG_ALGO_RANDOMX
+            uint8_t* miner_signature_ptr = m_job.blob() + m_job.nonceOffset() + m_job.nonceSize();
            if (job.algorithm().family() == Algorithm::RANDOM_X) {
                if (first) {
                    first = false;
@@ -299,16 +315,30 @@ void xmrig::CpuWorker<N>::start()
            else
 #           endif
            {
+                switch (job.algorithm().family()) {
+
 #               ifdef XMRIG_ALGO_ASTROBWT
-                if (job.algorithm().family() == Algorithm::ASTROBWT) {
+                case Algorithm::ASTROBWT:
                    if (!astrobwt::astrobwt_dero(m_job.blob(), job.size(), m_ctx[0]->memory, m_hash, m_astrobwtMaxSize, m_astrobwtAVX2)) {
                        valid = false;
                    }
-                }
-                else
+                    break;
 #               endif
-                {
+
+#               ifdef XMRIG_ALGO_GHOSTRIDER
+                case Algorithm::GHOSTRIDER:
+                    if (N == 8) {
+                        ghostrider::hash_octa(m_job.blob(), job.size(), m_hash, m_ctx, m_ghHelper);
+                    }
+                    else {
+                        valid = false;
+                    }
+                    break;
+#               endif
+
+                default:
                    fn(job.algorithm())(m_job.blob(), job.size(), m_hash, m_ctx, job.height());
+                    break;
                }

                if (!nextRound()) {
@@ -483,6 +513,7 @@ template class CpuWorker<2>;
 template class CpuWorker<3>;
 template class CpuWorker<4>;
 template class CpuWorker<5>;
+template class CpuWorker<8>;

 } // namespace xmrig

--- a/src/backend/cpu/CpuWorker.h
+++ b/src/backend/cpu/CpuWorker.h
@@ -38,6 +38,11 @@ namespace xmrig {
 class RxVm;


+#ifdef XMRIG_ALGO_GHOSTRIDER
+namespace ghostrider { struct HelperThread; }
+#endif
+
+
 template<size_t N>
 class CpuWorker : public Worker
 {
@@ -87,6 +92,10 @@ private:
    randomx_vm *m_vm        = nullptr;
 #   endif

+#   ifdef XMRIG_ALGO_GHOSTRIDER
+    ghostrider::HelperThread* m_ghHelper = nullptr;
+#   endif
+
 #   ifdef XMRIG_FEATURE_BENCHMARK
    uint32_t m_benchSize    = 0;
 #   endif
@@ -102,6 +111,7 @@ extern template class CpuWorker<2>;
 extern template class CpuWorker<3>;
 extern template class CpuWorker<4>;
 extern template class CpuWorker<5>;
+extern template class CpuWorker<8>;


 } // namespace xmrig
--- a/src/backend/cpu/interfaces/ICpuInfo.h
+++ b/src/backend/cpu/interfaces/ICpuInfo.h
@@ -111,6 +111,7 @@ public:
    virtual size_t packages() const                                                 = 0;
    virtual size_t threads() const                                                  = 0;
    virtual Vendor vendor() const                                                   = 0;
+    virtual uint32_t model() const                                                  = 0;
 };


--- a/src/backend/cpu/platform/BasicCpuInfo.cpp
+++ b/src/backend/cpu/platform/BasicCpuInfo.cpp
@@ -361,6 +361,12 @@ xmrig::CpuThreads xmrig::BasicCpuInfo::threads(const Algorithm &algorithm, uint3
    }
 #   endif

+#   ifdef XMRIG_ALGO_GHOSTRIDER
+    if (f == Algorithm::GHOSTRIDER) {
+        return CpuThreads(std::max<size_t>(count / 2, 1), 8);
+    }
+#   endif
+
    return CpuThreads(std::max<size_t>(count / 2, 1), 1);
 }

--- a/src/backend/cpu/platform/BasicCpuInfo.h
+++ b/src/backend/cpu/platform/BasicCpuInfo.h
@@ -62,6 +62,14 @@ protected:
    inline size_t packages() const override                     { return 1; }
    inline size_t threads() const override                      { return m_threads; }
    inline Vendor vendor() const override                       { return m_vendor; }
+    inline uint32_t model() const override
+    {
+#   ifndef XMRIG_ARM
+        return m_model;
+#   else
+        return 0;
+#   endif
+    }

 protected:
    Arch m_arch             = ARCH_UNKNOWN;
--- a/src/backend/cpu/platform/BasicCpuInfo_arm.cpp
+++ b/src/backend/cpu/platform/BasicCpuInfo_arm.cpp
@@ -99,8 +99,14 @@ const char *xmrig::BasicCpuInfo::backend() const
 }


-xmrig::CpuThreads xmrig::BasicCpuInfo::threads(const Algorithm &, uint32_t) const
+xmrig::CpuThreads xmrig::BasicCpuInfo::threads(const Algorithm &algorithm, uint32_t) const
 {
+#   ifdef XMRIG_ALGO_GHOSTRIDER
+    if (algorithm.family() == Algorithm::GHOSTRIDER) {
+        return CpuThreads(threads(), 8);
+    }
+#   endif
+
    return CpuThreads(threads());
 }

--- a/src/backend/cpu/platform/HwlocCpuInfo.cpp
+++ b/src/backend/cpu/platform/HwlocCpuInfo.cpp
@@ -216,12 +216,6 @@ bool xmrig::HwlocCpuInfo::membind(hwloc_const_bitmap_t nodeset)

 xmrig::CpuThreads xmrig::HwlocCpuInfo::threads(const Algorithm &algorithm, uint32_t limit) const
 {
-#   ifdef XMRIG_ALGO_ASTROBWT
-    if (algorithm == Algorithm::ASTROBWT_DERO) {
-        return allThreads(algorithm, limit);
-    }
-#   endif
-
 #   ifndef XMRIG_ARM
    if (L2() == 0 && L3() == 0) {
        return BasicCpuInfo::threads(algorithm, limit);
@@ -275,8 +269,10 @@ xmrig::CpuThreads xmrig::HwlocCpuInfo::allThreads(const Algorithm &algorithm, ui
    CpuThreads threads;
    threads.reserve(m_threads);

+    const uint32_t intensity = (algorithm.family() == Algorithm::GHOSTRIDER) ? 8 : 0;
+
    for (const int32_t pu : m_units) {
-        threads.add(pu, 0);
+        threads.add(pu, intensity);
    }

    if (threads.isEmpty()) {
@@ -302,14 +298,33 @@ void xmrig::HwlocCpuInfo::processTopLevelCache(hwloc_obj_t cache, const Algorith
    cores.reserve(m_cores);
    findByType(cache, HWLOC_OBJ_CORE, [&cores](hwloc_obj_t found) { cores.emplace_back(found); });

+#   ifdef XMRIG_ALGO_GHOSTRIDER
+    if ((algorithm == Algorithm::GHOSTRIDER_RTM) && (PUs > cores.size()) && (PUs < cores.size() * 2)) {
+        // Don't use E-cores on Alder Lake
+        cores.erase(std::remove_if(cores.begin(), cores.end(), [](hwloc_obj_t c) { return hwloc_bitmap_weight(c->cpuset) == 1; }), cores.end());
+
+        // This shouldn't happen, but check it anyway
+        if (cores.empty()) {
+            findByType(cache, HWLOC_OBJ_CORE, [&cores](hwloc_obj_t found) { cores.emplace_back(found); });
+        }
+    }
+#   endif
+
    size_t L3               = cache->attr->cache.size;
    const bool L3_exclusive = isCacheExclusive(cache);
    size_t L2               = 0;
    int L2_associativity    = 0;
    size_t extra            = 0;
-    const size_t scratchpad = algorithm.l3();
+    size_t scratchpad       = algorithm.l3();
    uint32_t intensity      = algorithm.maxIntensity() == 1 ? 0 : 1;

+#   ifdef XMRIG_ALGO_ASTROBWT
+    if (algorithm == Algorithm::ASTROBWT_DERO) {
+        // Use fake low value to force usage of all available cores for AstroBWT (taking 'limit' into account)
+        scratchpad = 16 * 1024;
+    }
+#   endif
+
    if (cache->attr->cache.depth == 3) {
        for (size_t i = 0; i < cache->arity; ++i) {
            hwloc_obj_t l2 = cache->children[i];
@@ -350,6 +365,15 @@ void xmrig::HwlocCpuInfo::processTopLevelCache(hwloc_obj_t cache, const Algorith
        cacheHashes = std::min(cacheHashes, limit);
    }

+#   ifdef XMRIG_ALGO_GHOSTRIDER
+    if (algorithm == Algorithm::GHOSTRIDER_RTM) {
+        // GhostRider implementation runs 8 hashes at a time
+        intensity = 8;
+        // Always 1 thread per core (it uses additional helper thread when possible)
+        cacheHashes = std::min(cacheHashes, cores.size());
+    }
+#   endif
+
    if (cacheHashes >= PUs) {
        for (hwloc_obj_t core : cores) {
            const std::vector<hwloc_obj_t> units = findByType(core, HWLOC_OBJ_PU);
--- a/src/backend/opencl/cl/cn/blake256.cl
+++ b/src/backend/opencl/cl/cn/blake256.cl
@@ -27,7 +27,7 @@
 *
 * @author   djm34
 */
-__constant static const int sigma[16][16] = {
+__constant STATIC const int sigma[16][16] = {
 		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
 		{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 },
 		{ 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 },
@@ -47,7 +47,7 @@ __constant static const int sigma[16][16] = {
 };


-__constant static const sph_u32  c_IV256[8] = {
+__constant STATIC const sph_u32  c_IV256[8] = {
 	0x6A09E667, 0xBB67AE85,
 	0x3C6EF372, 0xA54FF53A,
 	0x510E527F, 0x9B05688C,
@@ -55,13 +55,13 @@ __constant static const sph_u32  c_IV256[8] = {
 };

 /* Second part (64-80) msg never change, store it */
-__constant static const sph_u32  c_Padding[16] = {
+__constant STATIC const sph_u32  c_Padding[16] = {
 	0, 0, 0, 0,
 	0x80000000, 0, 0, 0,
 	0, 0, 0, 0,
 	0, 1, 0, 640,
 };
-__constant static const sph_u32  c_u256[16] = {
+__constant STATIC const sph_u32  c_u256[16] = {
 	0x243F6A88, 0x85A308D3,
 	0x13198A2E, 0x03707344,
 	0xA4093822, 0x299F31D0,
--- a/src/backend/opencl/cl/cn/cryptonight.cl
+++ b/src/backend/opencl/cl/cn/cryptonight.cl
@@ -22,6 +22,15 @@
 *   along with this program. If not, see <http://www.gnu.org/licenses/>.
 */

+#ifdef STATIC
+#   undef STATIC
+#endif
+#ifdef cl_amd_media_ops
+#   define STATIC static
+#else
+#   define STATIC
+#endif
+
 /* For Mesa clover support */
 #ifdef cl_clang_storage_class_specifiers
 #   pragma OPENCL EXTENSION cl_clang_storage_class_specifiers : enable
@@ -39,7 +48,7 @@
 #include "keccak.cl"


-#if defined(__NV_CL_C_VERSION) && STRIDED_INDEX != 0
+#if (defined(__NV_CL_C_VERSION) || defined(__APPLE__)) && STRIDED_INDEX != 0
 #   undef STRIDED_INDEX
 #   define STRIDED_INDEX 0
 #endif
@@ -755,7 +764,7 @@ __kernel void cn2(__global uint4 *Scratchpad, __global ulong *states, __global u

 __kernel void Skein(__global ulong *states, __global uint *BranchBuf, __global uint *output, ulong Target, uint Threads)
 {
-    const uint idx = get_global_id(0) - get_global_offset(0);
+    const uint idx = getIdx();

    // do not use early return here
    if(idx < BranchBuf[Threads]) {
@@ -800,9 +809,9 @@ __kernel void Skein(__global ulong *states, __global uint *BranchBuf, __global u
        // Note that comparison is equivalent to subtraction - we can't just compare 8 32-bit values
        // and expect an accurate result for target > 32-bit without implementing carries
        if (p.s3 <= Target) {
-            ulong outIdx = atomic_inc(output + 0xFF);
+            const uint outIdx = atomic_inc(output + 0xFF);
            if (outIdx < 0xFF) {
-                output[outIdx] = BranchBuf[idx] + (uint) get_global_offset(0);
+                ((__global uint *)output)[outIdx] = BranchBuf[idx] + (uint)get_global_offset(0);
            }
        }
    }
@@ -838,7 +847,7 @@ __kernel void Skein(__global ulong *states, __global uint *BranchBuf, __global u

 __kernel void JH(__global ulong *states, __global uint *BranchBuf, __global uint *output, ulong Target, uint Threads)
 {
-    const uint idx = get_global_id(0) - get_global_offset(0);
+    const uint idx = getIdx();

    // do not use early return here
    if (idx < BranchBuf[Threads]) {
@@ -872,9 +881,9 @@ __kernel void JH(__global ulong *states, __global uint *BranchBuf, __global uint
        // Note that comparison is equivalent to subtraction - we can't just compare 8 32-bit values
        // and expect an accurate result for target > 32-bit without implementing carries
        if (h7l <= Target) {
-            ulong outIdx = atomic_inc(output + 0xFF);
+            const uint outIdx = atomic_inc(output + 0xFF);
            if (outIdx < 0xFF) {
-                output[outIdx] = BranchBuf[idx] + (uint) get_global_offset(0);
+                ((__global uint *)output)[outIdx] = BranchBuf[idx] + (uint)get_global_offset(0);
            }
        }
    }
@@ -886,7 +895,7 @@ __kernel void JH(__global ulong *states, __global uint *BranchBuf, __global uint

 __kernel void Blake(__global ulong *states, __global uint *BranchBuf, __global uint *output, ulong Target, uint Threads)
 {
-    const uint idx = get_global_id(0) - get_global_offset(0);
+    const uint idx = getIdx();

    // do not use early return here
    if (idx < BranchBuf[Threads]) {
@@ -973,9 +982,9 @@ __kernel void Blake(__global ulong *states, __global uint *BranchBuf, __global u
        // and expect an accurate result for target > 32-bit without implementing carries
        uint2 t = (uint2)(h[6],h[7]);
        if (as_ulong(t) <= Target) {
-            ulong outIdx = atomic_inc(output + 0xFF);
+            const uint outIdx = atomic_inc(output + 0xFF);
            if (outIdx < 0xFF) {
-                output[outIdx] = BranchBuf[idx] + (uint) get_global_offset(0);
+                ((__global uint *)output)[outIdx] = BranchBuf[idx] + (uint)get_global_offset(0);
            }
        }
    }
@@ -987,7 +996,7 @@ __kernel void Blake(__global ulong *states, __global uint *BranchBuf, __global u

 __kernel void Groestl(__global ulong *states, __global uint *BranchBuf, __global uint *output, ulong Target, uint Threads)
 {
-    const uint idx = get_global_id(0) - get_global_offset(0);
+    const uint idx = getIdx();

    // do not use early return here
    if (idx < BranchBuf[Threads]) {
@@ -1073,9 +1082,9 @@ __kernel void Groestl(__global ulong *states, __global uint *BranchBuf, __global
        // Note that comparison is equivalent to subtraction - we can't just compare 8 32-bit values
        // and expect an accurate result for target > 32-bit without implementing carries
        if (State[7] <= Target) {
-            ulong outIdx = atomic_inc(output + 0xFF);
+            const uint outIdx = atomic_inc(output + 0xFF);
            if (outIdx < 0xFF) {
-                output[outIdx] = BranchBuf[idx] + (uint) get_global_offset(0);
+                ((__global uint *)output)[outIdx] = BranchBuf[idx] + (uint)get_global_offset(0);
            }
        }
    }
--- a/src/backend/opencl/cl/cn/cryptonight_cl.h
+++ b/src/backend/opencl/cl/cn/cryptonight_cl.h
--- a/src/backend/opencl/cl/cn/cryptonight_r_cl.h
+++ b/src/backend/opencl/cl/cn/cryptonight_r_cl.h
@@ -2,7 +2,7 @@

 namespace xmrig {

-static const char cryptonight_r_defines_cl[7709] = {
+static const char cryptonight_r_defines_cl[7703] = {
    0x23,0x69,0x66,0x64,0x65,0x66,0x20,0x5f,0x5f,0x4e,0x56,0x5f,0x43,0x4c,0x5f,0x43,0x5f,0x56,0x45,0x52,0x53,0x49,0x4f,0x4e,0x0a,0x23,0x64,0x65,0x66,0x69,0x6e,0x65,
    0x20,0x53,0x43,0x52,0x41,0x54,0x43,0x48,0x50,0x41,0x44,0x5f,0x43,0x48,0x55,0x4e,0x4b,0x28,0x4e,0x29,0x20,0x28,0x2a,0x28,0x5f,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x20,
    0x75,0x69,0x6e,0x74,0x34,0x2a,0x29,0x28,0x28,0x5f,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x20,0x75,0x63,0x68,0x61,0x72,0x2a,0x29,0x28,0x73,0x63,0x72,0x61,0x74,0x63,0x68,
@@ -26,17 +26,17 @@ static const char cryptonight_r_defines_cl[7709] = {
    0x23,0x64,0x65,0x66,0x69,0x6e,0x65,0x20,0x52,0x4f,0x54,0x5f,0x42,0x49,0x54,0x53,0x20,0x33,0x32,0x0a,0x23,0x64,0x65,0x66,0x69,0x6e,0x65,0x20,0x4d,0x45,0x4d,0x5f,
    0x43,0x48,0x55,0x4e,0x4b,0x20,0x28,0x31,0x20,0x3c,0x3c,0x20,0x4d,0x45,0x4d,0x5f,0x43,0x48,0x55,0x4e,0x4b,0x5f,0x45,0x58,0x50,0x4f,0x4e,0x45,0x4e,0x54,0x29,0x0a,
    0x23,0x69,0x66,0x6e,0x64,0x65,0x66,0x20,0x57,0x4f,0x4c,0x46,0x5f,0x41,0x45,0x53,0x5f,0x43,0x4c,0x0a,0x23,0x64,0x65,0x66,0x69,0x6e,0x65,0x20,0x57,0x4f,0x4c,0x46,
-    0x5f,0x41,0x45,0x53,0x5f,0x43,0x4c,0x0a,0x23,0x69,0x66,0x64,0x65,0x66,0x20,0x63,0x6c,0x5f,0x61,0x6d,0x64,0x5f,0x6d,0x65,0x64,0x69,0x61,0x5f,0x6f,0x70,0x73,0x32,
-    0x0a,0x23,0x70,0x72,0x61,0x67,0x6d,0x61,0x20,0x4f,0x50,0x45,0x4e,0x43,0x4c,0x20,0x45,0x58,0x54,0x45,0x4e,0x53,0x49,0x4f,0x4e,0x20,0x63,0x6c,0x5f,0x61,0x6d,0x64,
-    0x5f,0x6d,0x65,0x64,0x69,0x61,0x5f,0x6f,0x70,0x73,0x32,0x20,0x3a,0x20,0x65,0x6e,0x61,0x62,0x6c,0x65,0x0a,0x23,0x64,0x65,0x66,0x69,0x6e,0x65,0x20,0x78,0x6d,0x72,
-    0x69,0x67,0x5f,0x61,0x6d,0x64,0x5f,0x62,0x66,0x65,0x28,0x73,0x72,0x63,0x30,0x2c,0x20,0x73,0x72,0x63,0x31,0x2c,0x20,0x73,0x72,0x63,0x32,0x29,0x20,0x61,0x6d,0x64,
-    0x5f,0x62,0x66,0x65,0x28,0x73,0x72,0x63,0x30,0x2c,0x20,0x73,0x72,0x63,0x31,0x2c,0x20,0x73,0x72,0x63,0x32,0x29,0x0a,0x23,0x65,0x6c,0x73,0x65,0x0a,0x69,0x6e,0x6c,
-    0x69,0x6e,0x65,0x20,0x69,0x6e,0x74,0x20,0x78,0x6d,0x72,0x69,0x67,0x5f,0x61,0x6d,0x64,0x5f,0x62,0x66,0x65,0x28,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,
+    0x5f,0x41,0x45,0x53,0x5f,0x43,0x4c,0x0a,0x23,0x69,0x66,0x64,0x65,0x66,0x20,0x53,0x54,0x41,0x54,0x49,0x43,0x0a,0x23,0x75,0x6e,0x64,0x65,0x66,0x20,0x53,0x54,0x41,
+    0x54,0x49,0x43,0x0a,0x23,0x65,0x6e,0x64,0x69,0x66,0x0a,0x23,0x69,0x66,0x64,0x65,0x66,0x20,0x63,0x6c,0x5f,0x61,0x6d,0x64,0x5f,0x6d,0x65,0x64,0x69,0x61,0x5f,0x6f,
+    0x70,0x73,0x32,0x0a,0x23,0x64,0x65,0x66,0x69,0x6e,0x65,0x20,0x53,0x54,0x41,0x54,0x49,0x43,0x20,0x73,0x74,0x61,0x74,0x69,0x63,0x0a,0x23,0x70,0x72,0x61,0x67,0x6d,
+    0x61,0x20,0x4f,0x50,0x45,0x4e,0x43,0x4c,0x20,0x45,0x58,0x54,0x45,0x4e,0x53,0x49,0x4f,0x4e,0x20,0x63,0x6c,0x5f,0x61,0x6d,0x64,0x5f,0x6d,0x65,0x64,0x69,0x61,0x5f,
+    0x6f,0x70,0x73,0x32,0x20,0x3a,0x20,0x65,0x6e,0x61,0x62,0x6c,0x65,0x0a,0x23,0x65,0x6c,0x73,0x65,0x0a,0x23,0x64,0x65,0x66,0x69,0x6e,0x65,0x20,0x53,0x54,0x41,0x54,
+    0x49,0x43,0x0a,0x69,0x6e,0x6c,0x69,0x6e,0x65,0x20,0x69,0x6e,0x74,0x20,0x61,0x6d,0x64,0x5f,0x62,0x66,0x65,0x28,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,
    0x20,0x73,0x72,0x63,0x30,0x2c,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x20,0x6f,0x66,0x66,0x73,0x65,0x74,0x2c,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,
    0x6e,0x74,0x20,0x77,0x69,0x64,0x74,0x68,0x29,0x0a,0x7b,0x0a,0x69,0x66,0x28,0x28,0x6f,0x66,0x66,0x73,0x65,0x74,0x2b,0x77,0x69,0x64,0x74,0x68,0x29,0x3c,0x33,0x32,
    0x75,0x29,0x20,0x7b,0x0a,0x72,0x65,0x74,0x75,0x72,0x6e,0x20,0x28,0x73,0x72,0x63,0x30,0x3c,0x3c,0x28,0x33,0x32,0x75,0x2d,0x6f,0x66,0x66,0x73,0x65,0x74,0x2d,0x77,
    0x69,0x64,0x74,0x68,0x29,0x29,0x3e,0x3e,0x28,0x33,0x32,0x75,0x2d,0x77,0x69,0x64,0x74,0x68,0x29,0x3b,0x0a,0x7d,0x0a,0x72,0x65,0x74,0x75,0x72,0x6e,0x20,0x73,0x72,
-    0x63,0x30,0x3e,0x3e,0x6f,0x66,0x66,0x73,0x65,0x74,0x3b,0x0a,0x7d,0x0a,0x23,0x65,0x6e,0x64,0x69,0x66,0x0a,0x73,0x74,0x61,0x74,0x69,0x63,0x20,0x63,0x6f,0x6e,0x73,
+    0x63,0x30,0x3e,0x3e,0x6f,0x66,0x66,0x73,0x65,0x74,0x3b,0x0a,0x7d,0x0a,0x23,0x65,0x6e,0x64,0x69,0x66,0x0a,0x53,0x54,0x41,0x54,0x49,0x43,0x20,0x63,0x6f,0x6e,0x73,
    0x74,0x20,0x5f,0x5f,0x63,0x6f,0x6e,0x73,0x74,0x61,0x6e,0x74,0x20,0x75,0x69,0x6e,0x74,0x20,0x41,0x45,0x53,0x30,0x5f,0x43,0x5b,0x32,0x35,0x36,0x5d,0x20,0x3d,0x0a,
    0x7b,0x0a,0x30,0x78,0x41,0x35,0x36,0x33,0x36,0x33,0x43,0x36,0x55,0x2c,0x30,0x78,0x38,0x34,0x37,0x43,0x37,0x43,0x46,0x38,0x55,0x2c,0x30,0x78,0x39,0x39,0x37,0x37,
    0x37,0x37,0x45,0x45,0x55,0x2c,0x30,0x78,0x38,0x44,0x37,0x42,0x37,0x42,0x46,0x36,0x55,0x2c,0x0a,0x30,0x78,0x30,0x44,0x46,0x32,0x46,0x32,0x46,0x46,0x55,0x2c,0x30,
@@ -136,114 +136,114 @@ static const char cryptonight_r_defines_cl[7709] = {
    0x30,0x78,0x43,0x33,0x34,0x31,0x34,0x31,0x38,0x32,0x55,0x2c,0x30,0x78,0x42,0x30,0x39,0x39,0x39,0x39,0x32,0x39,0x55,0x2c,0x30,0x78,0x37,0x37,0x32,0x44,0x32,0x44,
    0x35,0x41,0x55,0x2c,0x30,0x78,0x31,0x31,0x30,0x46,0x30,0x46,0x31,0x45,0x55,0x2c,0x0a,0x30,0x78,0x43,0x42,0x42,0x30,0x42,0x30,0x37,0x42,0x55,0x2c,0x30,0x78,0x46,
    0x43,0x35,0x34,0x35,0x34,0x41,0x38,0x55,0x2c,0x30,0x78,0x44,0x36,0x42,0x42,0x42,0x42,0x36,0x44,0x55,0x2c,0x30,0x78,0x33,0x41,0x31,0x36,0x31,0x36,0x32,0x43,0x55,
-    0x0a,0x7d,0x3b,0x0a,0x23,0x64,0x65,0x66,0x69,0x6e,0x65,0x20,0x42,0x59,0x54,0x45,0x28,0x78,0x2c,0x20,0x79,0x29,0x20,0x28,0x78,0x6d,0x72,0x69,0x67,0x5f,0x61,0x6d,
-    0x64,0x5f,0x62,0x66,0x65,0x28,0x28,0x78,0x29,0x2c,0x20,0x28,0x79,0x29,0x20,0x3c,0x3c,0x20,0x33,0x55,0x2c,0x20,0x38,0x55,0x29,0x29,0x0a,0x23,0x69,0x66,0x20,0x28,
-    0x41,0x4c,0x47,0x4f,0x20,0x3d,0x3d,0x20,0x41,0x4c,0x47,0x4f,0x5f,0x43,0x4e,0x5f,0x48,0x45,0x41,0x56,0x59,0x5f,0x54,0x55,0x42,0x45,0x29,0x0a,0x69,0x6e,0x6c,0x69,
-    0x6e,0x65,0x20,0x75,0x69,0x6e,0x74,0x34,0x20,0x41,0x45,0x53,0x5f,0x52,0x6f,0x75,0x6e,0x64,0x5f,0x62,0x69,0x74,0x74,0x75,0x62,0x65,0x32,0x28,0x63,0x6f,0x6e,0x73,
-    0x74,0x20,0x5f,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x20,0x2a,0x41,0x45,0x53,0x30,0x2c,0x63,0x6f,0x6e,0x73,0x74,0x20,0x5f,0x5f,0x6c,0x6f,0x63,
-    0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x20,0x2a,0x41,0x45,0x53,0x31,0x2c,0x75,0x69,0x6e,0x74,0x34,0x20,0x78,0x2c,0x75,0x69,0x6e,0x74,0x34,0x20,0x6b,0x29,0x0a,0x7b,
-    0x0a,0x78,0x3d,0x7e,0x78,0x3b,0x0a,0x6b,0x2e,0x73,0x30,0x20,0x5e,0x3d,0x20,0x41,0x45,0x53,0x30,0x5b,0x42,0x59,0x54,0x45,0x28,0x78,0x2e,0x73,0x30,0x2c,0x30,0x29,
-    0x5d,0x5e,0x41,0x45,0x53,0x31,0x5b,0x42,0x59,0x54,0x45,0x28,0x78,0x2e,0x73,0x31,0x2c,0x31,0x29,0x5d,0x5e,0x72,0x6f,0x74,0x61,0x74,0x65,0x28,0x41,0x45,0x53,0x30,
-    0x5b,0x42,0x59,0x54,0x45,0x28,0x78,0x2e,0x73,0x32,0x2c,0x32,0x29,0x5d,0x5e,0x41,0x45,0x53,0x31,0x5b,0x42,0x59,0x54,0x45,0x28,0x78,0x2e,0x73,0x33,0x2c,0x33,0x29,
-    0x5d,0x2c,0x31,0x36,0x55,0x29,0x3b,0x0a,0x78,0x2e,0x73,0x30,0x20,0x5e,0x3d,0x20,0x6b,0x2e,0x73,0x30,0x3b,0x0a,0x6b,0x2e,0x73,0x31,0x20,0x5e,0x3d,0x20,0x41,0x45,
-    0x53,0x30,0x5b,0x42,0x59,0x54,0x45,0x28,0x78,0x2e,0x73,0x31,0x2c,0x30,0x29,0x5d,0x5e,0x41,0x45,0x53,0x31,0x5b,0x42,0x59,0x54,0x45,0x28,0x78,0x2e,0x73,0x32,0x2c,
-    0x31,0x29,0x5d,0x5e,0x72,0x6f,0x74,0x61,0x74,0x65,0x28,0x41,0x45,0x53,0x30,0x5b,0x42,0x59,0x54,0x45,0x28,0x78,0x2e,0x73,0x33,0x2c,0x32,0x29,0x5d,0x5e,0x41,0x45,
-    0x53,0x31,0x5b,0x42,0x59,0x54,0x45,0x28,0x78,0x2e,0x73,0x30,0x2c,0x33,0x29,0x5d,0x2c,0x31,0x36,0x55,0x29,0x3b,0x0a,0x78,0x2e,0x73,0x31,0x20,0x5e,0x3d,0x20,0x6b,
-    0x2e,0x73,0x31,0x3b,0x0a,0x6b,0x2e,0x73,0x32,0x20,0x5e,0x3d,0x20,0x41,0x45,0x53,0x30,0x5b,0x42,0x59,0x54,0x45,0x28,0x78,0x2e,0x73,0x32,0x2c,0x30,0x29,0x5d,0x5e,
-    0x41,0x45,0x53,0x31,0x5b,0x42,0x59,0x54,0x45,0x28,0x78,0x2e,0x73,0x33,0x2c,0x31,0x29,0x5d,0x5e,0x72,0x6f,0x74,0x61,0x74,0x65,0x28,0x41,0x45,0x53,0x30,0x5b,0x42,
-    0x59,0x54,0x45,0x28,0x78,0x2e,0x73,0x30,0x2c,0x32,0x29,0x5d,0x5e,0x41,0x45,0x53,0x31,0x5b,0x42,0x59,0x54,0x45,0x28,0x78,0x2e,0x73,0x31,0x2c,0x33,0x29,0x5d,0x2c,
-    0x31,0x36,0x55,0x29,0x3b,0x0a,0x78,0x2e,0x73,0x32,0x20,0x5e,0x3d,0x20,0x6b,0x2e,0x73,0x32,0x3b,0x0a,0x6b,0x2e,0x73,0x33,0x20,0x5e,0x3d,0x20,0x41,0x45,0x53,0x30,
-    0x5b,0x42,0x59,0x54,0x45,0x28,0x78,0x2e,0x73,0x33,0x2c,0x30,0x29,0x5d,0x5e,0x41,0x45,0x53,0x31,0x5b,0x42,0x59,0x54,0x45,0x28,0x78,0x2e,0x73,0x30,0x2c,0x31,0x29,
-    0x5d,0x5e,0x72,0x6f,0x74,0x61,0x74,0x65,0x28,0x41,0x45,0x53,0x30,0x5b,0x42,0x59,0x54,0x45,0x28,0x78,0x2e,0x73,0x31,0x2c,0x32,0x29,0x5d,0x5e,0x41,0x45,0x53,0x31,
-    0x5b,0x42,0x59,0x54,0x45,0x28,0x78,0x2e,0x73,0x32,0x2c,0x33,0x29,0x5d,0x2c,0x31,0x36,0x55,0x29,0x3b,0x0a,0x72,0x65,0x74,0x75,0x72,0x6e,0x20,0x6b,0x3b,0x0a,0x7d,
-    0x0a,0x23,0x65,0x6e,0x64,0x69,0x66,0x0a,0x75,0x69,0x6e,0x74,0x34,0x20,0x41,0x45,0x53,0x5f,0x52,0x6f,0x75,0x6e,0x64,0x28,0x63,0x6f,0x6e,0x73,0x74,0x20,0x5f,0x5f,
-    0x6c,0x6f,0x63,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x20,0x2a,0x41,0x45,0x53,0x30,0x2c,0x63,0x6f,0x6e,0x73,0x74,0x20,0x5f,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x20,0x75,
-    0x69,0x6e,0x74,0x20,0x2a,0x41,0x45,0x53,0x31,0x2c,0x63,0x6f,0x6e,0x73,0x74,0x20,0x5f,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x20,0x2a,0x41,0x45,
-    0x53,0x32,0x2c,0x63,0x6f,0x6e,0x73,0x74,0x20,0x5f,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x20,0x2a,0x41,0x45,0x53,0x33,0x2c,0x63,0x6f,0x6e,0x73,
-    0x74,0x20,0x75,0x69,0x6e,0x74,0x34,0x20,0x58,0x2c,0x75,0x69,0x6e,0x74,0x34,0x20,0x6b,0x65,0x79,0x29,0x0a,0x7b,0x0a,0x6b,0x65,0x79,0x2e,0x73,0x30,0x20,0x5e,0x3d,
-    0x20,0x41,0x45,0x53,0x30,0x5b,0x42,0x59,0x54,0x45,0x28,0x58,0x2e,0x73,0x30,0x2c,0x30,0x29,0x5d,0x5e,0x41,0x45,0x53,0x31,0x5b,0x42,0x59,0x54,0x45,0x28,0x58,0x2e,
-    0x73,0x31,0x2c,0x31,0x29,0x5d,0x5e,0x41,0x45,0x53,0x32,0x5b,0x42,0x59,0x54,0x45,0x28,0x58,0x2e,0x73,0x32,0x2c,0x32,0x29,0x5d,0x5e,0x41,0x45,0x53,0x33,0x5b,0x42,
-    0x59,0x54,0x45,0x28,0x58,0x2e,0x73,0x33,0x2c,0x33,0x29,0x5d,0x3b,0x0a,0x6b,0x65,0x79,0x2e,0x73,0x31,0x20,0x5e,0x3d,0x20,0x41,0x45,0x53,0x30,0x5b,0x42,0x59,0x54,
-    0x45,0x28,0x58,0x2e,0x73,0x31,0x2c,0x30,0x29,0x5d,0x5e,0x41,0x45,0x53,0x31,0x5b,0x42,0x59,0x54,0x45,0x28,0x58,0x2e,0x73,0x32,0x2c,0x31,0x29,0x5d,0x5e,0x41,0x45,
-    0x53,0x32,0x5b,0x42,0x59,0x54,0x45,0x28,0x58,0x2e,0x73,0x33,0x2c,0x32,0x29,0x5d,0x5e,0x41,0x45,0x53,0x33,0x5b,0x42,0x59,0x54,0x45,0x28,0x58,0x2e,0x73,0x30,0x2c,
-    0x33,0x29,0x5d,0x3b,0x0a,0x6b,0x65,0x79,0x2e,0x73,0x32,0x20,0x5e,0x3d,0x20,0x41,0x45,0x53,0x30,0x5b,0x42,0x59,0x54,0x45,0x28,0x58,0x2e,0x73,0x32,0x2c,0x30,0x29,
-    0x5d,0x5e,0x41,0x45,0x53,0x31,0x5b,0x42,0x59,0x54,0x45,0x28,0x58,0x2e,0x73,0x33,0x2c,0x31,0x29,0x5d,0x5e,0x41,0x45,0x53,0x32,0x5b,0x42,0x59,0x54,0x45,0x28,0x58,
-    0x2e,0x73,0x30,0x2c,0x32,0x29,0x5d,0x5e,0x41,0x45,0x53,0x33,0x5b,0x42,0x59,0x54,0x45,0x28,0x58,0x2e,0x73,0x31,0x2c,0x33,0x29,0x5d,0x3b,0x0a,0x6b,0x65,0x79,0x2e,
-    0x73,0x33,0x20,0x5e,0x3d,0x20,0x41,0x45,0x53,0x30,0x5b,0x42,0x59,0x54,0x45,0x28,0x58,0x2e,0x73,0x33,0x2c,0x30,0x29,0x5d,0x5e,0x41,0x45,0x53,0x31,0x5b,0x42,0x59,
-    0x54,0x45,0x28,0x58,0x2e,0x73,0x30,0x2c,0x31,0x29,0x5d,0x5e,0x41,0x45,0x53,0x32,0x5b,0x42,0x59,0x54,0x45,0x28,0x58,0x2e,0x73,0x31,0x2c,0x32,0x29,0x5d,0x5e,0x41,
-    0x45,0x53,0x33,0x5b,0x42,0x59,0x54,0x45,0x28,0x58,0x2e,0x73,0x32,0x2c,0x33,0x29,0x5d,0x3b,0x0a,0x72,0x65,0x74,0x75,0x72,0x6e,0x20,0x6b,0x65,0x79,0x3b,0x0a,0x7d,
-    0x0a,0x75,0x69,0x6e,0x74,0x34,0x20,0x41,0x45,0x53,0x5f,0x52,0x6f,0x75,0x6e,0x64,0x5f,0x54,0x77,0x6f,0x5f,0x54,0x61,0x62,0x6c,0x65,0x73,0x28,0x63,0x6f,0x6e,0x73,
-    0x74,0x20,0x5f,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x20,0x2a,0x41,0x45,0x53,0x30,0x2c,0x63,0x6f,0x6e,0x73,0x74,0x20,0x5f,0x5f,0x6c,0x6f,0x63,
-    0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x20,0x2a,0x41,0x45,0x53,0x31,0x2c,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x34,0x20,0x58,0x2c,0x75,0x69,0x6e,0x74,
-    0x34,0x20,0x6b,0x65,0x79,0x29,0x0a,0x7b,0x0a,0x6b,0x65,0x79,0x2e,0x73,0x30,0x20,0x5e,0x3d,0x20,0x41,0x45,0x53,0x30,0x5b,0x42,0x59,0x54,0x45,0x28,0x58,0x2e,0x73,
-    0x30,0x2c,0x30,0x29,0x5d,0x5e,0x41,0x45,0x53,0x31,0x5b,0x42,0x59,0x54,0x45,0x28,0x58,0x2e,0x73,0x31,0x2c,0x31,0x29,0x5d,0x5e,0x72,0x6f,0x74,0x61,0x74,0x65,0x28,
-    0x41,0x45,0x53,0x30,0x5b,0x42,0x59,0x54,0x45,0x28,0x58,0x2e,0x73,0x32,0x2c,0x32,0x29,0x5d,0x5e,0x41,0x45,0x53,0x31,0x5b,0x42,0x59,0x54,0x45,0x28,0x58,0x2e,0x73,
-    0x33,0x2c,0x33,0x29,0x5d,0x2c,0x31,0x36,0x55,0x29,0x3b,0x0a,0x6b,0x65,0x79,0x2e,0x73,0x31,0x20,0x5e,0x3d,0x20,0x41,0x45,0x53,0x30,0x5b,0x42,0x59,0x54,0x45,0x28,
-    0x58,0x2e,0x73,0x31,0x2c,0x30,0x29,0x5d,0x5e,0x41,0x45,0x53,0x31,0x5b,0x42,0x59,0x54,0x45,0x28,0x58,0x2e,0x73,0x32,0x2c,0x31,0x29,0x5d,0x5e,0x72,0x6f,0x74,0x61,
-    0x74,0x65,0x28,0x41,0x45,0x53,0x30,0x5b,0x42,0x59,0x54,0x45,0x28,0x58,0x2e,0x73,0x33,0x2c,0x32,0x29,0x5d,0x5e,0x41,0x45,0x53,0x31,0x5b,0x42,0x59,0x54,0x45,0x28,
-    0x58,0x2e,0x73,0x30,0x2c,0x33,0x29,0x5d,0x2c,0x31,0x36,0x55,0x29,0x3b,0x0a,0x6b,0x65,0x79,0x2e,0x73,0x32,0x20,0x5e,0x3d,0x20,0x41,0x45,0x53,0x30,0x5b,0x42,0x59,
-    0x54,0x45,0x28,0x58,0x2e,0x73,0x32,0x2c,0x30,0x29,0x5d,0x5e,0x41,0x45,0x53,0x31,0x5b,0x42,0x59,0x54,0x45,0x28,0x58,0x2e,0x73,0x33,0x2c,0x31,0x29,0x5d,0x5e,0x72,
-    0x6f,0x74,0x61,0x74,0x65,0x28,0x41,0x45,0x53,0x30,0x5b,0x42,0x59,0x54,0x45,0x28,0x58,0x2e,0x73,0x30,0x2c,0x32,0x29,0x5d,0x5e,0x41,0x45,0x53,0x31,0x5b,0x42,0x59,
-    0x54,0x45,0x28,0x58,0x2e,0x73,0x31,0x2c,0x33,0x29,0x5d,0x2c,0x31,0x36,0x55,0x29,0x3b,0x0a,0x6b,0x65,0x79,0x2e,0x73,0x33,0x20,0x5e,0x3d,0x20,0x41,0x45,0x53,0x30,
-    0x5b,0x42,0x59,0x54,0x45,0x28,0x58,0x2e,0x73,0x33,0x2c,0x30,0x29,0x5d,0x5e,0x41,0x45,0x53,0x31,0x5b,0x42,0x59,0x54,0x45,0x28,0x58,0x2e,0x73,0x30,0x2c,0x31,0x29,
-    0x5d,0x5e,0x72,0x6f,0x74,0x61,0x74,0x65,0x28,0x41,0x45,0x53,0x30,0x5b,0x42,0x59,0x54,0x45,0x28,0x58,0x2e,0x73,0x31,0x2c,0x32,0x29,0x5d,0x5e,0x41,0x45,0x53,0x31,
-    0x5b,0x42,0x59,0x54,0x45,0x28,0x58,0x2e,0x73,0x32,0x2c,0x33,0x29,0x5d,0x2c,0x31,0x36,0x55,0x29,0x3b,0x0a,0x72,0x65,0x74,0x75,0x72,0x6e,0x20,0x6b,0x65,0x79,0x3b,
-    0x0a,0x7d,0x0a,0x73,0x74,0x61,0x74,0x69,0x63,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x5f,0x5f,0x63,0x6f,0x6e,0x73,0x74,0x61,0x6e,0x74,0x20,0x75,0x63,0x68,0x61,0x72,
-    0x20,0x72,0x63,0x6f,0x6e,0x5b,0x38,0x5d,0x3d,0x7b,0x20,0x30,0x78,0x38,0x64,0x2c,0x30,0x78,0x30,0x31,0x2c,0x30,0x78,0x30,0x32,0x2c,0x30,0x78,0x30,0x34,0x2c,0x30,
-    0x78,0x30,0x38,0x2c,0x30,0x78,0x31,0x30,0x2c,0x30,0x78,0x32,0x30,0x2c,0x30,0x78,0x34,0x30,0x20,0x7d,0x3b,0x0a,0x73,0x74,0x61,0x74,0x69,0x63,0x20,0x63,0x6f,0x6e,
-    0x73,0x74,0x20,0x5f,0x5f,0x63,0x6f,0x6e,0x73,0x74,0x61,0x6e,0x74,0x20,0x75,0x63,0x68,0x61,0x72,0x20,0x73,0x62,0x6f,0x78,0x5b,0x32,0x35,0x36,0x5d,0x20,0x3d,0x0a,
-    0x7b,0x0a,0x30,0x78,0x36,0x33,0x2c,0x30,0x78,0x37,0x43,0x2c,0x30,0x78,0x37,0x37,0x2c,0x30,0x78,0x37,0x42,0x2c,0x30,0x78,0x46,0x32,0x2c,0x30,0x78,0x36,0x42,0x2c,
-    0x30,0x78,0x36,0x46,0x2c,0x30,0x78,0x43,0x35,0x2c,0x30,0x78,0x33,0x30,0x2c,0x30,0x78,0x30,0x31,0x2c,0x30,0x78,0x36,0x37,0x2c,0x30,0x78,0x32,0x42,0x2c,0x30,0x78,
-    0x46,0x45,0x2c,0x30,0x78,0x44,0x37,0x2c,0x30,0x78,0x41,0x42,0x2c,0x30,0x78,0x37,0x36,0x2c,0x0a,0x30,0x78,0x43,0x41,0x2c,0x30,0x78,0x38,0x32,0x2c,0x30,0x78,0x43,
-    0x39,0x2c,0x30,0x78,0x37,0x44,0x2c,0x30,0x78,0x46,0x41,0x2c,0x30,0x78,0x35,0x39,0x2c,0x30,0x78,0x34,0x37,0x2c,0x30,0x78,0x46,0x30,0x2c,0x30,0x78,0x41,0x44,0x2c,
-    0x30,0x78,0x44,0x34,0x2c,0x30,0x78,0x41,0x32,0x2c,0x30,0x78,0x41,0x46,0x2c,0x30,0x78,0x39,0x43,0x2c,0x30,0x78,0x41,0x34,0x2c,0x30,0x78,0x37,0x32,0x2c,0x30,0x78,
-    0x43,0x30,0x2c,0x0a,0x30,0x78,0x42,0x37,0x2c,0x30,0x78,0x46,0x44,0x2c,0x30,0x78,0x39,0x33,0x2c,0x30,0x78,0x32,0x36,0x2c,0x30,0x78,0x33,0x36,0x2c,0x30,0x78,0x33,
-    0x46,0x2c,0x30,0x78,0x46,0x37,0x2c,0x30,0x78,0x43,0x43,0x2c,0x30,0x78,0x33,0x34,0x2c,0x30,0x78,0x41,0x35,0x2c,0x30,0x78,0x45,0x35,0x2c,0x30,0x78,0x46,0x31,0x2c,
-    0x30,0x78,0x37,0x31,0x2c,0x30,0x78,0x44,0x38,0x2c,0x30,0x78,0x33,0x31,0x2c,0x30,0x78,0x31,0x35,0x2c,0x0a,0x30,0x78,0x30,0x34,0x2c,0x30,0x78,0x43,0x37,0x2c,0x30,
-    0x78,0x32,0x33,0x2c,0x30,0x78,0x43,0x33,0x2c,0x30,0x78,0x31,0x38,0x2c,0x30,0x78,0x39,0x36,0x2c,0x30,0x78,0x30,0x35,0x2c,0x30,0x78,0x39,0x41,0x2c,0x30,0x78,0x30,
-    0x37,0x2c,0x30,0x78,0x31,0x32,0x2c,0x30,0x78,0x38,0x30,0x2c,0x30,0x78,0x45,0x32,0x2c,0x30,0x78,0x45,0x42,0x2c,0x30,0x78,0x32,0x37,0x2c,0x30,0x78,0x42,0x32,0x2c,
-    0x30,0x78,0x37,0x35,0x2c,0x0a,0x30,0x78,0x30,0x39,0x2c,0x30,0x78,0x38,0x33,0x2c,0x30,0x78,0x32,0x43,0x2c,0x30,0x78,0x31,0x41,0x2c,0x30,0x78,0x31,0x42,0x2c,0x30,
-    0x78,0x36,0x45,0x2c,0x30,0x78,0x35,0x41,0x2c,0x30,0x78,0x41,0x30,0x2c,0x30,0x78,0x35,0x32,0x2c,0x30,0x78,0x33,0x42,0x2c,0x30,0x78,0x44,0x36,0x2c,0x30,0x78,0x42,
-    0x33,0x2c,0x30,0x78,0x32,0x39,0x2c,0x30,0x78,0x45,0x33,0x2c,0x30,0x78,0x32,0x46,0x2c,0x30,0x78,0x38,0x34,0x2c,0x0a,0x30,0x78,0x35,0x33,0x2c,0x30,0x78,0x44,0x31,
-    0x2c,0x30,0x78,0x30,0x30,0x2c,0x30,0x78,0x45,0x44,0x2c,0x30,0x78,0x32,0x30,0x2c,0x30,0x78,0x46,0x43,0x2c,0x30,0x78,0x42,0x31,0x2c,0x30,0x78,0x35,0x42,0x2c,0x30,
-    0x78,0x36,0x41,0x2c,0x30,0x78,0x43,0x42,0x2c,0x30,0x78,0x42,0x45,0x2c,0x30,0x78,0x33,0x39,0x2c,0x30,0x78,0x34,0x41,0x2c,0x30,0x78,0x34,0x43,0x2c,0x30,0x78,0x35,
-    0x38,0x2c,0x30,0x78,0x43,0x46,0x2c,0x0a,0x30,0x78,0x44,0x30,0x2c,0x30,0x78,0x45,0x46,0x2c,0x30,0x78,0x41,0x41,0x2c,0x30,0x78,0x46,0x42,0x2c,0x30,0x78,0x34,0x33,
-    0x2c,0x30,0x78,0x34,0x44,0x2c,0x30,0x78,0x33,0x33,0x2c,0x30,0x78,0x38,0x35,0x2c,0x30,0x78,0x34,0x35,0x2c,0x30,0x78,0x46,0x39,0x2c,0x30,0x78,0x30,0x32,0x2c,0x30,
-    0x78,0x37,0x46,0x2c,0x30,0x78,0x35,0x30,0x2c,0x30,0x78,0x33,0x43,0x2c,0x30,0x78,0x39,0x46,0x2c,0x30,0x78,0x41,0x38,0x2c,0x0a,0x30,0x78,0x35,0x31,0x2c,0x30,0x78,
-    0x41,0x33,0x2c,0x30,0x78,0x34,0x30,0x2c,0x30,0x78,0x38,0x46,0x2c,0x30,0x78,0x39,0x32,0x2c,0x30,0x78,0x39,0x44,0x2c,0x30,0x78,0x33,0x38,0x2c,0x30,0x78,0x46,0x35,
-    0x2c,0x30,0x78,0x42,0x43,0x2c,0x30,0x78,0x42,0x36,0x2c,0x30,0x78,0x44,0x41,0x2c,0x30,0x78,0x32,0x31,0x2c,0x30,0x78,0x31,0x30,0x2c,0x30,0x78,0x46,0x46,0x2c,0x30,
-    0x78,0x46,0x33,0x2c,0x30,0x78,0x44,0x32,0x2c,0x0a,0x30,0x78,0x43,0x44,0x2c,0x30,0x78,0x30,0x43,0x2c,0x30,0x78,0x31,0x33,0x2c,0x30,0x78,0x45,0x43,0x2c,0x30,0x78,
-    0x35,0x46,0x2c,0x30,0x78,0x39,0x37,0x2c,0x30,0x78,0x34,0x34,0x2c,0x30,0x78,0x31,0x37,0x2c,0x30,0x78,0x43,0x34,0x2c,0x30,0x78,0x41,0x37,0x2c,0x30,0x78,0x37,0x45,
-    0x2c,0x30,0x78,0x33,0x44,0x2c,0x30,0x78,0x36,0x34,0x2c,0x30,0x78,0x35,0x44,0x2c,0x30,0x78,0x31,0x39,0x2c,0x30,0x78,0x37,0x33,0x2c,0x0a,0x30,0x78,0x36,0x30,0x2c,
-    0x30,0x78,0x38,0x31,0x2c,0x30,0x78,0x34,0x46,0x2c,0x30,0x78,0x44,0x43,0x2c,0x30,0x78,0x32,0x32,0x2c,0x30,0x78,0x32,0x41,0x2c,0x30,0x78,0x39,0x30,0x2c,0x30,0x78,
-    0x38,0x38,0x2c,0x30,0x78,0x34,0x36,0x2c,0x30,0x78,0x45,0x45,0x2c,0x30,0x78,0x42,0x38,0x2c,0x30,0x78,0x31,0x34,0x2c,0x30,0x78,0x44,0x45,0x2c,0x30,0x78,0x35,0x45,
-    0x2c,0x30,0x78,0x30,0x42,0x2c,0x30,0x78,0x44,0x42,0x2c,0x0a,0x30,0x78,0x45,0x30,0x2c,0x30,0x78,0x33,0x32,0x2c,0x30,0x78,0x33,0x41,0x2c,0x30,0x78,0x30,0x41,0x2c,
-    0x30,0x78,0x34,0x39,0x2c,0x30,0x78,0x30,0x36,0x2c,0x30,0x78,0x32,0x34,0x2c,0x30,0x78,0x35,0x43,0x2c,0x30,0x78,0x43,0x32,0x2c,0x30,0x78,0x44,0x33,0x2c,0x30,0x78,
-    0x41,0x43,0x2c,0x30,0x78,0x36,0x32,0x2c,0x30,0x78,0x39,0x31,0x2c,0x30,0x78,0x39,0x35,0x2c,0x30,0x78,0x45,0x34,0x2c,0x30,0x78,0x37,0x39,0x2c,0x0a,0x30,0x78,0x45,
-    0x37,0x2c,0x30,0x78,0x43,0x38,0x2c,0x30,0x78,0x33,0x37,0x2c,0x30,0x78,0x36,0x44,0x2c,0x30,0x78,0x38,0x44,0x2c,0x30,0x78,0x44,0x35,0x2c,0x30,0x78,0x34,0x45,0x2c,
-    0x30,0x78,0x41,0x39,0x2c,0x30,0x78,0x36,0x43,0x2c,0x30,0x78,0x35,0x36,0x2c,0x30,0x78,0x46,0x34,0x2c,0x30,0x78,0x45,0x41,0x2c,0x30,0x78,0x36,0x35,0x2c,0x30,0x78,
-    0x37,0x41,0x2c,0x30,0x78,0x41,0x45,0x2c,0x30,0x78,0x30,0x38,0x2c,0x0a,0x30,0x78,0x42,0x41,0x2c,0x30,0x78,0x37,0x38,0x2c,0x30,0x78,0x32,0x35,0x2c,0x30,0x78,0x32,
-    0x45,0x2c,0x30,0x78,0x31,0x43,0x2c,0x30,0x78,0x41,0x36,0x2c,0x30,0x78,0x42,0x34,0x2c,0x30,0x78,0x43,0x36,0x2c,0x30,0x78,0x45,0x38,0x2c,0x30,0x78,0x44,0x44,0x2c,
-    0x30,0x78,0x37,0x34,0x2c,0x30,0x78,0x31,0x46,0x2c,0x30,0x78,0x34,0x42,0x2c,0x30,0x78,0x42,0x44,0x2c,0x30,0x78,0x38,0x42,0x2c,0x30,0x78,0x38,0x41,0x2c,0x0a,0x30,
-    0x78,0x37,0x30,0x2c,0x30,0x78,0x33,0x45,0x2c,0x30,0x78,0x42,0x35,0x2c,0x30,0x78,0x36,0x36,0x2c,0x30,0x78,0x34,0x38,0x2c,0x30,0x78,0x30,0x33,0x2c,0x30,0x78,0x46,
-    0x36,0x2c,0x30,0x78,0x30,0x45,0x2c,0x30,0x78,0x36,0x31,0x2c,0x30,0x78,0x33,0x35,0x2c,0x30,0x78,0x35,0x37,0x2c,0x30,0x78,0x42,0x39,0x2c,0x30,0x78,0x38,0x36,0x2c,
-    0x30,0x78,0x43,0x31,0x2c,0x30,0x78,0x31,0x44,0x2c,0x30,0x78,0x39,0x45,0x2c,0x0a,0x30,0x78,0x45,0x31,0x2c,0x30,0x78,0x46,0x38,0x2c,0x30,0x78,0x39,0x38,0x2c,0x30,
-    0x78,0x31,0x31,0x2c,0x30,0x78,0x36,0x39,0x2c,0x30,0x78,0x44,0x39,0x2c,0x30,0x78,0x38,0x45,0x2c,0x30,0x78,0x39,0x34,0x2c,0x30,0x78,0x39,0x42,0x2c,0x30,0x78,0x31,
-    0x45,0x2c,0x30,0x78,0x38,0x37,0x2c,0x30,0x78,0x45,0x39,0x2c,0x30,0x78,0x43,0x45,0x2c,0x30,0x78,0x35,0x35,0x2c,0x30,0x78,0x32,0x38,0x2c,0x30,0x78,0x44,0x46,0x2c,
-    0x0a,0x30,0x78,0x38,0x43,0x2c,0x30,0x78,0x41,0x31,0x2c,0x30,0x78,0x38,0x39,0x2c,0x30,0x78,0x30,0x44,0x2c,0x30,0x78,0x42,0x46,0x2c,0x30,0x78,0x45,0x36,0x2c,0x30,
-    0x78,0x34,0x32,0x2c,0x30,0x78,0x36,0x38,0x2c,0x30,0x78,0x34,0x31,0x2c,0x30,0x78,0x39,0x39,0x2c,0x30,0x78,0x32,0x44,0x2c,0x30,0x78,0x30,0x46,0x2c,0x30,0x78,0x42,
-    0x30,0x2c,0x30,0x78,0x35,0x34,0x2c,0x30,0x78,0x42,0x42,0x2c,0x30,0x78,0x31,0x36,0x0a,0x7d,0x3b,0x0a,0x23,0x64,0x65,0x66,0x69,0x6e,0x65,0x20,0x53,0x75,0x62,0x57,
-    0x6f,0x72,0x64,0x28,0x69,0x6e,0x77,0x29,0x20,0x28,0x28,0x73,0x62,0x6f,0x78,0x5b,0x42,0x59,0x54,0x45,0x28,0x69,0x6e,0x77,0x2c,0x20,0x33,0x29,0x5d,0x20,0x3c,0x3c,
-    0x20,0x32,0x34,0x29,0x20,0x7c,0x20,0x28,0x73,0x62,0x6f,0x78,0x5b,0x42,0x59,0x54,0x45,0x28,0x69,0x6e,0x77,0x2c,0x20,0x32,0x29,0x5d,0x20,0x3c,0x3c,0x20,0x31,0x36,
-    0x29,0x20,0x7c,0x20,0x28,0x73,0x62,0x6f,0x78,0x5b,0x42,0x59,0x54,0x45,0x28,0x69,0x6e,0x77,0x2c,0x20,0x31,0x29,0x5d,0x20,0x3c,0x3c,0x20,0x38,0x29,0x20,0x7c,0x20,
-    0x73,0x62,0x6f,0x78,0x5b,0x42,0x59,0x54,0x45,0x28,0x69,0x6e,0x77,0x2c,0x20,0x30,0x29,0x5d,0x29,0x0a,0x76,0x6f,0x69,0x64,0x20,0x41,0x45,0x53,0x45,0x78,0x70,0x61,
-    0x6e,0x64,0x4b,0x65,0x79,0x32,0x35,0x36,0x28,0x75,0x69,0x6e,0x74,0x20,0x2a,0x6b,0x65,0x79,0x62,0x75,0x66,0x29,0x0a,0x7b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,
-    0x6e,0x74,0x20,0x63,0x3d,0x38,0x2c,0x69,0x3d,0x31,0x3b,0x20,0x63,0x3c,0x34,0x30,0x3b,0x20,0x2b,0x2b,0x63,0x29,0x20,0x7b,0x0a,0x75,0x69,0x6e,0x74,0x20,0x74,0x3d,
-    0x28,0x28,0x21,0x28,0x63,0x26,0x37,0x29,0x29,0x7c,0x7c,0x28,0x28,0x63,0x26,0x37,0x29,0x3d,0x3d,0x34,0x29,0x29,0x3f,0x53,0x75,0x62,0x57,0x6f,0x72,0x64,0x28,0x6b,
-    0x65,0x79,0x62,0x75,0x66,0x5b,0x63,0x2d,0x31,0x5d,0x29,0x3a,0x6b,0x65,0x79,0x62,0x75,0x66,0x5b,0x63,0x2d,0x31,0x5d,0x3b,0x0a,0x6b,0x65,0x79,0x62,0x75,0x66,0x5b,
-    0x63,0x5d,0x3d,0x6b,0x65,0x79,0x62,0x75,0x66,0x5b,0x63,0x2d,0x38,0x5d,0x5e,0x28,0x28,0x21,0x28,0x63,0x26,0x37,0x29,0x29,0x3f,0x72,0x6f,0x74,0x61,0x74,0x65,0x28,
-    0x74,0x2c,0x32,0x34,0x55,0x29,0x5e,0x61,0x73,0x5f,0x75,0x69,0x6e,0x74,0x28,0x28,0x75,0x63,0x68,0x61,0x72,0x34,0x29,0x28,0x72,0x63,0x6f,0x6e,0x5b,0x69,0x2b,0x2b,
-    0x5d,0x2c,0x30,0x55,0x2c,0x30,0x55,0x2c,0x30,0x55,0x29,0x29,0x3a,0x74,0x29,0x3b,0x0a,0x7d,0x0a,0x7d,0x0a,0x23,0x65,0x6e,0x64,0x69,0x66,0x0a,0x00
+    0x0a,0x7d,0x3b,0x0a,0x23,0x64,0x65,0x66,0x69,0x6e,0x65,0x20,0x42,0x59,0x54,0x45,0x28,0x78,0x2c,0x20,0x79,0x29,0x20,0x28,0x61,0x6d,0x64,0x5f,0x62,0x66,0x65,0x28,
+    0x28,0x78,0x29,0x2c,0x20,0x28,0x79,0x29,0x20,0x3c,0x3c,0x20,0x33,0x55,0x2c,0x20,0x38,0x55,0x29,0x29,0x0a,0x23,0x69,0x66,0x20,0x28,0x41,0x4c,0x47,0x4f,0x20,0x3d,
+    0x3d,0x20,0x41,0x4c,0x47,0x4f,0x5f,0x43,0x4e,0x5f,0x48,0x45,0x41,0x56,0x59,0x5f,0x54,0x55,0x42,0x45,0x29,0x0a,0x69,0x6e,0x6c,0x69,0x6e,0x65,0x20,0x75,0x69,0x6e,
+    0x74,0x34,0x20,0x41,0x45,0x53,0x5f,0x52,0x6f,0x75,0x6e,0x64,0x5f,0x62,0x69,0x74,0x74,0x75,0x62,0x65,0x32,0x28,0x63,0x6f,0x6e,0x73,0x74,0x20,0x5f,0x5f,0x6c,0x6f,
+    0x63,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x20,0x2a,0x41,0x45,0x53,0x30,0x2c,0x63,0x6f,0x6e,0x73,0x74,0x20,0x5f,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x20,0x75,0x69,0x6e,
+    0x74,0x20,0x2a,0x41,0x45,0x53,0x31,0x2c,0x75,0x69,0x6e,0x74,0x34,0x20,0x78,0x2c,0x75,0x69,0x6e,0x74,0x34,0x20,0x6b,0x29,0x0a,0x7b,0x0a,0x78,0x3d,0x7e,0x78,0x3b,
+    0x0a,0x6b,0x2e,0x73,0x30,0x20,0x5e,0x3d,0x20,0x41,0x45,0x53,0x30,0x5b,0x42,0x59,0x54,0x45,0x28,0x78,0x2e,0x73,0x30,0x2c,0x30,0x29,0x5d,0x5e,0x41,0x45,0x53,0x31,
+    0x5b,0x42,0x59,0x54,0x45,0x28,0x78,0x2e,0x73,0x31,0x2c,0x31,0x29,0x5d,0x5e,0x72,0x6f,0x74,0x61,0x74,0x65,0x28,0x41,0x45,0x53,0x30,0x5b,0x42,0x59,0x54,0x45,0x28,
+    0x78,0x2e,0x73,0x32,0x2c,0x32,0x29,0x5d,0x5e,0x41,0x45,0x53,0x31,0x5b,0x42,0x59,0x54,0x45,0x28,0x78,0x2e,0x73,0x33,0x2c,0x33,0x29,0x5d,0x2c,0x31,0x36,0x55,0x29,
+    0x3b,0x0a,0x78,0x2e,0x73,0x30,0x20,0x5e,0x3d,0x20,0x6b,0x2e,0x73,0x30,0x3b,0x0a,0x6b,0x2e,0x73,0x31,0x20,0x5e,0x3d,0x20,0x41,0x45,0x53,0x30,0x5b,0x42,0x59,0x54,
+    0x45,0x28,0x78,0x2e,0x73,0x31,0x2c,0x30,0x29,0x5d,0x5e,0x41,0x45,0x53,0x31,0x5b,0x42,0x59,0x54,0x45,0x28,0x78,0x2e,0x73,0x32,0x2c,0x31,0x29,0x5d,0x5e,0x72,0x6f,
+    0x74,0x61,0x74,0x65,0x28,0x41,0x45,0x53,0x30,0x5b,0x42,0x59,0x54,0x45,0x28,0x78,0x2e,0x73,0x33,0x2c,0x32,0x29,0x5d,0x5e,0x41,0x45,0x53,0x31,0x5b,0x42,0x59,0x54,
+    0x45,0x28,0x78,0x2e,0x73,0x30,0x2c,0x33,0x29,0x5d,0x2c,0x31,0x36,0x55,0x29,0x3b,0x0a,0x78,0x2e,0x73,0x31,0x20,0x5e,0x3d,0x20,0x6b,0x2e,0x73,0x31,0x3b,0x0a,0x6b,
+    0x2e,0x73,0x32,0x20,0x5e,0x3d,0x20,0x41,0x45,0x53,0x30,0x5b,0x42,0x59,0x54,0x45,0x28,0x78,0x2e,0x73,0x32,0x2c,0x30,0x29,0x5d,0x5e,0x41,0x45,0x53,0x31,0x5b,0x42,
+    0x59,0x54,0x45,0x28,0x78,0x2e,0x73,0x33,0x2c,0x31,0x29,0x5d,0x5e,0x72,0x6f,0x74,0x61,0x74,0x65,0x28,0x41,0x45,0x53,0x30,0x5b,0x42,0x59,0x54,0x45,0x28,0x78,0x2e,
+    0x73,0x30,0x2c,0x32,0x29,0x5d,0x5e,0x41,0x45,0x53,0x31,0x5b,0x42,0x59,0x54,0x45,0x28,0x78,0x2e,0x73,0x31,0x2c,0x33,0x29,0x5d,0x2c,0x31,0x36,0x55,0x29,0x3b,0x0a,
+    0x78,0x2e,0x73,0x32,0x20,0x5e,0x3d,0x20,0x6b,0x2e,0x73,0x32,0x3b,0x0a,0x6b,0x2e,0x73,0x33,0x20,0x5e,0x3d,0x20,0x41,0x45,0x53,0x30,0x5b,0x42,0x59,0x54,0x45,0x28,
+    0x78,0x2e,0x73,0x33,0x2c,0x30,0x29,0x5d,0x5e,0x41,0x45,0x53,0x31,0x5b,0x42,0x59,0x54,0x45,0x28,0x78,0x2e,0x73,0x30,0x2c,0x31,0x29,0x5d,0x5e,0x72,0x6f,0x74,0x61,
+    0x74,0x65,0x28,0x41,0x45,0x53,0x30,0x5b,0x42,0x59,0x54,0x45,0x28,0x78,0x2e,0x73,0x31,0x2c,0x32,0x29,0x5d,0x5e,0x41,0x45,0x53,0x31,0x5b,0x42,0x59,0x54,0x45,0x28,
+    0x78,0x2e,0x73,0x32,0x2c,0x33,0x29,0x5d,0x2c,0x31,0x36,0x55,0x29,0x3b,0x0a,0x72,0x65,0x74,0x75,0x72,0x6e,0x20,0x6b,0x3b,0x0a,0x7d,0x0a,0x23,0x65,0x6e,0x64,0x69,
+    0x66,0x0a,0x75,0x69,0x6e,0x74,0x34,0x20,0x41,0x45,0x53,0x5f,0x52,0x6f,0x75,0x6e,0x64,0x28,0x63,0x6f,0x6e,0x73,0x74,0x20,0x5f,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x20,
+    0x75,0x69,0x6e,0x74,0x20,0x2a,0x41,0x45,0x53,0x30,0x2c,0x63,0x6f,0x6e,0x73,0x74,0x20,0x5f,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x20,0x2a,0x41,
+    0x45,0x53,0x31,0x2c,0x63,0x6f,0x6e,0x73,0x74,0x20,0x5f,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x20,0x2a,0x41,0x45,0x53,0x32,0x2c,0x63,0x6f,0x6e,
+    0x73,0x74,0x20,0x5f,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x20,0x2a,0x41,0x45,0x53,0x33,0x2c,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,
+    0x34,0x20,0x58,0x2c,0x75,0x69,0x6e,0x74,0x34,0x20,0x6b,0x65,0x79,0x29,0x0a,0x7b,0x0a,0x6b,0x65,0x79,0x2e,0x73,0x30,0x20,0x5e,0x3d,0x20,0x41,0x45,0x53,0x30,0x5b,
+    0x42,0x59,0x54,0x45,0x28,0x58,0x2e,0x73,0x30,0x2c,0x30,0x29,0x5d,0x5e,0x41,0x45,0x53,0x31,0x5b,0x42,0x59,0x54,0x45,0x28,0x58,0x2e,0x73,0x31,0x2c,0x31,0x29,0x5d,
+    0x5e,0x41,0x45,0x53,0x32,0x5b,0x42,0x59,0x54,0x45,0x28,0x58,0x2e,0x73,0x32,0x2c,0x32,0x29,0x5d,0x5e,0x41,0x45,0x53,0x33,0x5b,0x42,0x59,0x54,0x45,0x28,0x58,0x2e,
+    0x73,0x33,0x2c,0x33,0x29,0x5d,0x3b,0x0a,0x6b,0x65,0x79,0x2e,0x73,0x31,0x20,0x5e,0x3d,0x20,0x41,0x45,0x53,0x30,0x5b,0x42,0x59,0x54,0x45,0x28,0x58,0x2e,0x73,0x31,
+    0x2c,0x30,0x29,0x5d,0x5e,0x41,0x45,0x53,0x31,0x5b,0x42,0x59,0x54,0x45,0x28,0x58,0x2e,0x73,0x32,0x2c,0x31,0x29,0x5d,0x5e,0x41,0x45,0x53,0x32,0x5b,0x42,0x59,0x54,
+    0x45,0x28,0x58,0x2e,0x73,0x33,0x2c,0x32,0x29,0x5d,0x5e,0x41,0x45,0x53,0x33,0x5b,0x42,0x59,0x54,0x45,0x28,0x58,0x2e,0x73,0x30,0x2c,0x33,0x29,0x5d,0x3b,0x0a,0x6b,
+    0x65,0x79,0x2e,0x73,0x32,0x20,0x5e,0x3d,0x20,0x41,0x45,0x53,0x30,0x5b,0x42,0x59,0x54,0x45,0x28,0x58,0x2e,0x73,0x32,0x2c,0x30,0x29,0x5d,0x5e,0x41,0x45,0x53,0x31,
+    0x5b,0x42,0x59,0x54,0x45,0x28,0x58,0x2e,0x73,0x33,0x2c,0x31,0x29,0x5d,0x5e,0x41,0x45,0x53,0x32,0x5b,0x42,0x59,0x54,0x45,0x28,0x58,0x2e,0x73,0x30,0x2c,0x32,0x29,
+    0x5d,0x5e,0x41,0x45,0x53,0x33,0x5b,0x42,0x59,0x54,0x45,0x28,0x58,0x2e,0x73,0x31,0x2c,0x33,0x29,0x5d,0x3b,0x0a,0x6b,0x65,0x79,0x2e,0x73,0x33,0x20,0x5e,0x3d,0x20,
+    0x41,0x45,0x53,0x30,0x5b,0x42,0x59,0x54,0x45,0x28,0x58,0x2e,0x73,0x33,0x2c,0x30,0x29,0x5d,0x5e,0x41,0x45,0x53,0x31,0x5b,0x42,0x59,0x54,0x45,0x28,0x58,0x2e,0x73,
+    0x30,0x2c,0x31,0x29,0x5d,0x5e,0x41,0x45,0x53,0x32,0x5b,0x42,0x59,0x54,0x45,0x28,0x58,0x2e,0x73,0x31,0x2c,0x32,0x29,0x5d,0x5e,0x41,0x45,0x53,0x33,0x5b,0x42,0x59,
+    0x54,0x45,0x28,0x58,0x2e,0x73,0x32,0x2c,0x33,0x29,0x5d,0x3b,0x0a,0x72,0x65,0x74,0x75,0x72,0x6e,0x20,0x6b,0x65,0x79,0x3b,0x0a,0x7d,0x0a,0x75,0x69,0x6e,0x74,0x34,
+    0x20,0x41,0x45,0x53,0x5f,0x52,0x6f,0x75,0x6e,0x64,0x5f,0x54,0x77,0x6f,0x5f,0x54,0x61,0x62,0x6c,0x65,0x73,0x28,0x63,0x6f,0x6e,0x73,0x74,0x20,0x5f,0x5f,0x6c,0x6f,
+    0x63,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x20,0x2a,0x41,0x45,0x53,0x30,0x2c,0x63,0x6f,0x6e,0x73,0x74,0x20,0x5f,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x20,0x75,0x69,0x6e,
+    0x74,0x20,0x2a,0x41,0x45,0x53,0x31,0x2c,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x34,0x20,0x58,0x2c,0x75,0x69,0x6e,0x74,0x34,0x20,0x6b,0x65,0x79,0x29,
+    0x0a,0x7b,0x0a,0x6b,0x65,0x79,0x2e,0x73,0x30,0x20,0x5e,0x3d,0x20,0x41,0x45,0x53,0x30,0x5b,0x42,0x59,0x54,0x45,0x28,0x58,0x2e,0x73,0x30,0x2c,0x30,0x29,0x5d,0x5e,
+    0x41,0x45,0x53,0x31,0x5b,0x42,0x59,0x54,0x45,0x28,0x58,0x2e,0x73,0x31,0x2c,0x31,0x29,0x5d,0x5e,0x72,0x6f,0x74,0x61,0x74,0x65,0x28,0x41,0x45,0x53,0x30,0x5b,0x42,
+    0x59,0x54,0x45,0x28,0x58,0x2e,0x73,0x32,0x2c,0x32,0x29,0x5d,0x5e,0x41,0x45,0x53,0x31,0x5b,0x42,0x59,0x54,0x45,0x28,0x58,0x2e,0x73,0x33,0x2c,0x33,0x29,0x5d,0x2c,
+    0x31,0x36,0x55,0x29,0x3b,0x0a,0x6b,0x65,0x79,0x2e,0x73,0x31,0x20,0x5e,0x3d,0x20,0x41,0x45,0x53,0x30,0x5b,0x42,0x59,0x54,0x45,0x28,0x58,0x2e,0x73,0x31,0x2c,0x30,
+    0x29,0x5d,0x5e,0x41,0x45,0x53,0x31,0x5b,0x42,0x59,0x54,0x45,0x28,0x58,0x2e,0x73,0x32,0x2c,0x31,0x29,0x5d,0x5e,0x72,0x6f,0x74,0x61,0x74,0x65,0x28,0x41,0x45,0x53,
+    0x30,0x5b,0x42,0x59,0x54,0x45,0x28,0x58,0x2e,0x73,0x33,0x2c,0x32,0x29,0x5d,0x5e,0x41,0x45,0x53,0x31,0x5b,0x42,0x59,0x54,0x45,0x28,0x58,0x2e,0x73,0x30,0x2c,0x33,
+    0x29,0x5d,0x2c,0x31,0x36,0x55,0x29,0x3b,0x0a,0x6b,0x65,0x79,0x2e,0x73,0x32,0x20,0x5e,0x3d,0x20,0x41,0x45,0x53,0x30,0x5b,0x42,0x59,0x54,0x45,0x28,0x58,0x2e,0x73,
+    0x32,0x2c,0x30,0x29,0x5d,0x5e,0x41,0x45,0x53,0x31,0x5b,0x42,0x59,0x54,0x45,0x28,0x58,0x2e,0x73,0x33,0x2c,0x31,0x29,0x5d,0x5e,0x72,0x6f,0x74,0x61,0x74,0x65,0x28,
+    0x41,0x45,0x53,0x30,0x5b,0x42,0x59,0x54,0x45,0x28,0x58,0x2e,0x73,0x30,0x2c,0x32,0x29,0x5d,0x5e,0x41,0x45,0x53,0x31,0x5b,0x42,0x59,0x54,0x45,0x28,0x58,0x2e,0x73,
+    0x31,0x2c,0x33,0x29,0x5d,0x2c,0x31,0x36,0x55,0x29,0x3b,0x0a,0x6b,0x65,0x79,0x2e,0x73,0x33,0x20,0x5e,0x3d,0x20,0x41,0x45,0x53,0x30,0x5b,0x42,0x59,0x54,0x45,0x28,
+    0x58,0x2e,0x73,0x33,0x2c,0x30,0x29,0x5d,0x5e,0x41,0x45,0x53,0x31,0x5b,0x42,0x59,0x54,0x45,0x28,0x58,0x2e,0x73,0x30,0x2c,0x31,0x29,0x5d,0x5e,0x72,0x6f,0x74,0x61,
+    0x74,0x65,0x28,0x41,0x45,0x53,0x30,0x5b,0x42,0x59,0x54,0x45,0x28,0x58,0x2e,0x73,0x31,0x2c,0x32,0x29,0x5d,0x5e,0x41,0x45,0x53,0x31,0x5b,0x42,0x59,0x54,0x45,0x28,
+    0x58,0x2e,0x73,0x32,0x2c,0x33,0x29,0x5d,0x2c,0x31,0x36,0x55,0x29,0x3b,0x0a,0x72,0x65,0x74,0x75,0x72,0x6e,0x20,0x6b,0x65,0x79,0x3b,0x0a,0x7d,0x0a,0x53,0x54,0x41,
+    0x54,0x49,0x43,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x5f,0x5f,0x63,0x6f,0x6e,0x73,0x74,0x61,0x6e,0x74,0x20,0x75,0x63,0x68,0x61,0x72,0x20,0x72,0x63,0x6f,0x6e,0x5b,
+    0x38,0x5d,0x3d,0x7b,0x20,0x30,0x78,0x38,0x64,0x2c,0x30,0x78,0x30,0x31,0x2c,0x30,0x78,0x30,0x32,0x2c,0x30,0x78,0x30,0x34,0x2c,0x30,0x78,0x30,0x38,0x2c,0x30,0x78,
+    0x31,0x30,0x2c,0x30,0x78,0x32,0x30,0x2c,0x30,0x78,0x34,0x30,0x20,0x7d,0x3b,0x0a,0x53,0x54,0x41,0x54,0x49,0x43,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x5f,0x5f,0x63,
+    0x6f,0x6e,0x73,0x74,0x61,0x6e,0x74,0x20,0x75,0x63,0x68,0x61,0x72,0x20,0x73,0x62,0x6f,0x78,0x5b,0x32,0x35,0x36,0x5d,0x20,0x3d,0x0a,0x7b,0x0a,0x30,0x78,0x36,0x33,
+    0x2c,0x30,0x78,0x37,0x43,0x2c,0x30,0x78,0x37,0x37,0x2c,0x30,0x78,0x37,0x42,0x2c,0x30,0x78,0x46,0x32,0x2c,0x30,0x78,0x36,0x42,0x2c,0x30,0x78,0x36,0x46,0x2c,0x30,
+    0x78,0x43,0x35,0x2c,0x30,0x78,0x33,0x30,0x2c,0x30,0x78,0x30,0x31,0x2c,0x30,0x78,0x36,0x37,0x2c,0x30,0x78,0x32,0x42,0x2c,0x30,0x78,0x46,0x45,0x2c,0x30,0x78,0x44,
+    0x37,0x2c,0x30,0x78,0x41,0x42,0x2c,0x30,0x78,0x37,0x36,0x2c,0x0a,0x30,0x78,0x43,0x41,0x2c,0x30,0x78,0x38,0x32,0x2c,0x30,0x78,0x43,0x39,0x2c,0x30,0x78,0x37,0x44,
+    0x2c,0x30,0x78,0x46,0x41,0x2c,0x30,0x78,0x35,0x39,0x2c,0x30,0x78,0x34,0x37,0x2c,0x30,0x78,0x46,0x30,0x2c,0x30,0x78,0x41,0x44,0x2c,0x30,0x78,0x44,0x34,0x2c,0x30,
+    0x78,0x41,0x32,0x2c,0x30,0x78,0x41,0x46,0x2c,0x30,0x78,0x39,0x43,0x2c,0x30,0x78,0x41,0x34,0x2c,0x30,0x78,0x37,0x32,0x2c,0x30,0x78,0x43,0x30,0x2c,0x0a,0x30,0x78,
+    0x42,0x37,0x2c,0x30,0x78,0x46,0x44,0x2c,0x30,0x78,0x39,0x33,0x2c,0x30,0x78,0x32,0x36,0x2c,0x30,0x78,0x33,0x36,0x2c,0x30,0x78,0x33,0x46,0x2c,0x30,0x78,0x46,0x37,
+    0x2c,0x30,0x78,0x43,0x43,0x2c,0x30,0x78,0x33,0x34,0x2c,0x30,0x78,0x41,0x35,0x2c,0x30,0x78,0x45,0x35,0x2c,0x30,0x78,0x46,0x31,0x2c,0x30,0x78,0x37,0x31,0x2c,0x30,
+    0x78,0x44,0x38,0x2c,0x30,0x78,0x33,0x31,0x2c,0x30,0x78,0x31,0x35,0x2c,0x0a,0x30,0x78,0x30,0x34,0x2c,0x30,0x78,0x43,0x37,0x2c,0x30,0x78,0x32,0x33,0x2c,0x30,0x78,
+    0x43,0x33,0x2c,0x30,0x78,0x31,0x38,0x2c,0x30,0x78,0x39,0x36,0x2c,0x30,0x78,0x30,0x35,0x2c,0x30,0x78,0x39,0x41,0x2c,0x30,0x78,0x30,0x37,0x2c,0x30,0x78,0x31,0x32,
+    0x2c,0x30,0x78,0x38,0x30,0x2c,0x30,0x78,0x45,0x32,0x2c,0x30,0x78,0x45,0x42,0x2c,0x30,0x78,0x32,0x37,0x2c,0x30,0x78,0x42,0x32,0x2c,0x30,0x78,0x37,0x35,0x2c,0x0a,
+    0x30,0x78,0x30,0x39,0x2c,0x30,0x78,0x38,0x33,0x2c,0x30,0x78,0x32,0x43,0x2c,0x30,0x78,0x31,0x41,0x2c,0x30,0x78,0x31,0x42,0x2c,0x30,0x78,0x36,0x45,0x2c,0x30,0x78,
+    0x35,0x41,0x2c,0x30,0x78,0x41,0x30,0x2c,0x30,0x78,0x35,0x32,0x2c,0x30,0x78,0x33,0x42,0x2c,0x30,0x78,0x44,0x36,0x2c,0x30,0x78,0x42,0x33,0x2c,0x30,0x78,0x32,0x39,
+    0x2c,0x30,0x78,0x45,0x33,0x2c,0x30,0x78,0x32,0x46,0x2c,0x30,0x78,0x38,0x34,0x2c,0x0a,0x30,0x78,0x35,0x33,0x2c,0x30,0x78,0x44,0x31,0x2c,0x30,0x78,0x30,0x30,0x2c,
+    0x30,0x78,0x45,0x44,0x2c,0x30,0x78,0x32,0x30,0x2c,0x30,0x78,0x46,0x43,0x2c,0x30,0x78,0x42,0x31,0x2c,0x30,0x78,0x35,0x42,0x2c,0x30,0x78,0x36,0x41,0x2c,0x30,0x78,
+    0x43,0x42,0x2c,0x30,0x78,0x42,0x45,0x2c,0x30,0x78,0x33,0x39,0x2c,0x30,0x78,0x34,0x41,0x2c,0x30,0x78,0x34,0x43,0x2c,0x30,0x78,0x35,0x38,0x2c,0x30,0x78,0x43,0x46,
+    0x2c,0x0a,0x30,0x78,0x44,0x30,0x2c,0x30,0x78,0x45,0x46,0x2c,0x30,0x78,0x41,0x41,0x2c,0x30,0x78,0x46,0x42,0x2c,0x30,0x78,0x34,0x33,0x2c,0x30,0x78,0x34,0x44,0x2c,
+    0x30,0x78,0x33,0x33,0x2c,0x30,0x78,0x38,0x35,0x2c,0x30,0x78,0x34,0x35,0x2c,0x30,0x78,0x46,0x39,0x2c,0x30,0x78,0x30,0x32,0x2c,0x30,0x78,0x37,0x46,0x2c,0x30,0x78,
+    0x35,0x30,0x2c,0x30,0x78,0x33,0x43,0x2c,0x30,0x78,0x39,0x46,0x2c,0x30,0x78,0x41,0x38,0x2c,0x0a,0x30,0x78,0x35,0x31,0x2c,0x30,0x78,0x41,0x33,0x2c,0x30,0x78,0x34,
+    0x30,0x2c,0x30,0x78,0x38,0x46,0x2c,0x30,0x78,0x39,0x32,0x2c,0x30,0x78,0x39,0x44,0x2c,0x30,0x78,0x33,0x38,0x2c,0x30,0x78,0x46,0x35,0x2c,0x30,0x78,0x42,0x43,0x2c,
+    0x30,0x78,0x42,0x36,0x2c,0x30,0x78,0x44,0x41,0x2c,0x30,0x78,0x32,0x31,0x2c,0x30,0x78,0x31,0x30,0x2c,0x30,0x78,0x46,0x46,0x2c,0x30,0x78,0x46,0x33,0x2c,0x30,0x78,
+    0x44,0x32,0x2c,0x0a,0x30,0x78,0x43,0x44,0x2c,0x30,0x78,0x30,0x43,0x2c,0x30,0x78,0x31,0x33,0x2c,0x30,0x78,0x45,0x43,0x2c,0x30,0x78,0x35,0x46,0x2c,0x30,0x78,0x39,
+    0x37,0x2c,0x30,0x78,0x34,0x34,0x2c,0x30,0x78,0x31,0x37,0x2c,0x30,0x78,0x43,0x34,0x2c,0x30,0x78,0x41,0x37,0x2c,0x30,0x78,0x37,0x45,0x2c,0x30,0x78,0x33,0x44,0x2c,
+    0x30,0x78,0x36,0x34,0x2c,0x30,0x78,0x35,0x44,0x2c,0x30,0x78,0x31,0x39,0x2c,0x30,0x78,0x37,0x33,0x2c,0x0a,0x30,0x78,0x36,0x30,0x2c,0x30,0x78,0x38,0x31,0x2c,0x30,
+    0x78,0x34,0x46,0x2c,0x30,0x78,0x44,0x43,0x2c,0x30,0x78,0x32,0x32,0x2c,0x30,0x78,0x32,0x41,0x2c,0x30,0x78,0x39,0x30,0x2c,0x30,0x78,0x38,0x38,0x2c,0x30,0x78,0x34,
+    0x36,0x2c,0x30,0x78,0x45,0x45,0x2c,0x30,0x78,0x42,0x38,0x2c,0x30,0x78,0x31,0x34,0x2c,0x30,0x78,0x44,0x45,0x2c,0x30,0x78,0x35,0x45,0x2c,0x30,0x78,0x30,0x42,0x2c,
+    0x30,0x78,0x44,0x42,0x2c,0x0a,0x30,0x78,0x45,0x30,0x2c,0x30,0x78,0x33,0x32,0x2c,0x30,0x78,0x33,0x41,0x2c,0x30,0x78,0x30,0x41,0x2c,0x30,0x78,0x34,0x39,0x2c,0x30,
+    0x78,0x30,0x36,0x2c,0x30,0x78,0x32,0x34,0x2c,0x30,0x78,0x35,0x43,0x2c,0x30,0x78,0x43,0x32,0x2c,0x30,0x78,0x44,0x33,0x2c,0x30,0x78,0x41,0x43,0x2c,0x30,0x78,0x36,
+    0x32,0x2c,0x30,0x78,0x39,0x31,0x2c,0x30,0x78,0x39,0x35,0x2c,0x30,0x78,0x45,0x34,0x2c,0x30,0x78,0x37,0x39,0x2c,0x0a,0x30,0x78,0x45,0x37,0x2c,0x30,0x78,0x43,0x38,
+    0x2c,0x30,0x78,0x33,0x37,0x2c,0x30,0x78,0x36,0x44,0x2c,0x30,0x78,0x38,0x44,0x2c,0x30,0x78,0x44,0x35,0x2c,0x30,0x78,0x34,0x45,0x2c,0x30,0x78,0x41,0x39,0x2c,0x30,
+    0x78,0x36,0x43,0x2c,0x30,0x78,0x35,0x36,0x2c,0x30,0x78,0x46,0x34,0x2c,0x30,0x78,0x45,0x41,0x2c,0x30,0x78,0x36,0x35,0x2c,0x30,0x78,0x37,0x41,0x2c,0x30,0x78,0x41,
+    0x45,0x2c,0x30,0x78,0x30,0x38,0x2c,0x0a,0x30,0x78,0x42,0x41,0x2c,0x30,0x78,0x37,0x38,0x2c,0x30,0x78,0x32,0x35,0x2c,0x30,0x78,0x32,0x45,0x2c,0x30,0x78,0x31,0x43,
+    0x2c,0x30,0x78,0x41,0x36,0x2c,0x30,0x78,0x42,0x34,0x2c,0x30,0x78,0x43,0x36,0x2c,0x30,0x78,0x45,0x38,0x2c,0x30,0x78,0x44,0x44,0x2c,0x30,0x78,0x37,0x34,0x2c,0x30,
+    0x78,0x31,0x46,0x2c,0x30,0x78,0x34,0x42,0x2c,0x30,0x78,0x42,0x44,0x2c,0x30,0x78,0x38,0x42,0x2c,0x30,0x78,0x38,0x41,0x2c,0x0a,0x30,0x78,0x37,0x30,0x2c,0x30,0x78,
+    0x33,0x45,0x2c,0x30,0x78,0x42,0x35,0x2c,0x30,0x78,0x36,0x36,0x2c,0x30,0x78,0x34,0x38,0x2c,0x30,0x78,0x30,0x33,0x2c,0x30,0x78,0x46,0x36,0x2c,0x30,0x78,0x30,0x45,
+    0x2c,0x30,0x78,0x36,0x31,0x2c,0x30,0x78,0x33,0x35,0x2c,0x30,0x78,0x35,0x37,0x2c,0x30,0x78,0x42,0x39,0x2c,0x30,0x78,0x38,0x36,0x2c,0x30,0x78,0x43,0x31,0x2c,0x30,
+    0x78,0x31,0x44,0x2c,0x30,0x78,0x39,0x45,0x2c,0x0a,0x30,0x78,0x45,0x31,0x2c,0x30,0x78,0x46,0x38,0x2c,0x30,0x78,0x39,0x38,0x2c,0x30,0x78,0x31,0x31,0x2c,0x30,0x78,
+    0x36,0x39,0x2c,0x30,0x78,0x44,0x39,0x2c,0x30,0x78,0x38,0x45,0x2c,0x30,0x78,0x39,0x34,0x2c,0x30,0x78,0x39,0x42,0x2c,0x30,0x78,0x31,0x45,0x2c,0x30,0x78,0x38,0x37,
+    0x2c,0x30,0x78,0x45,0x39,0x2c,0x30,0x78,0x43,0x45,0x2c,0x30,0x78,0x35,0x35,0x2c,0x30,0x78,0x32,0x38,0x2c,0x30,0x78,0x44,0x46,0x2c,0x0a,0x30,0x78,0x38,0x43,0x2c,
+    0x30,0x78,0x41,0x31,0x2c,0x30,0x78,0x38,0x39,0x2c,0x30,0x78,0x30,0x44,0x2c,0x30,0x78,0x42,0x46,0x2c,0x30,0x78,0x45,0x36,0x2c,0x30,0x78,0x34,0x32,0x2c,0x30,0x78,
+    0x36,0x38,0x2c,0x30,0x78,0x34,0x31,0x2c,0x30,0x78,0x39,0x39,0x2c,0x30,0x78,0x32,0x44,0x2c,0x30,0x78,0x30,0x46,0x2c,0x30,0x78,0x42,0x30,0x2c,0x30,0x78,0x35,0x34,
+    0x2c,0x30,0x78,0x42,0x42,0x2c,0x30,0x78,0x31,0x36,0x0a,0x7d,0x3b,0x0a,0x23,0x64,0x65,0x66,0x69,0x6e,0x65,0x20,0x53,0x75,0x62,0x57,0x6f,0x72,0x64,0x28,0x69,0x6e,
+    0x77,0x29,0x20,0x28,0x28,0x73,0x62,0x6f,0x78,0x5b,0x42,0x59,0x54,0x45,0x28,0x69,0x6e,0x77,0x2c,0x20,0x33,0x29,0x5d,0x20,0x3c,0x3c,0x20,0x32,0x34,0x29,0x20,0x7c,
+    0x20,0x28,0x73,0x62,0x6f,0x78,0x5b,0x42,0x59,0x54,0x45,0x28,0x69,0x6e,0x77,0x2c,0x20,0x32,0x29,0x5d,0x20,0x3c,0x3c,0x20,0x31,0x36,0x29,0x20,0x7c,0x20,0x28,0x73,
+    0x62,0x6f,0x78,0x5b,0x42,0x59,0x54,0x45,0x28,0x69,0x6e,0x77,0x2c,0x20,0x31,0x29,0x5d,0x20,0x3c,0x3c,0x20,0x38,0x29,0x20,0x7c,0x20,0x73,0x62,0x6f,0x78,0x5b,0x42,
+    0x59,0x54,0x45,0x28,0x69,0x6e,0x77,0x2c,0x20,0x30,0x29,0x5d,0x29,0x0a,0x76,0x6f,0x69,0x64,0x20,0x41,0x45,0x53,0x45,0x78,0x70,0x61,0x6e,0x64,0x4b,0x65,0x79,0x32,
+    0x35,0x36,0x28,0x75,0x69,0x6e,0x74,0x20,0x2a,0x6b,0x65,0x79,0x62,0x75,0x66,0x29,0x0a,0x7b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x20,0x63,0x3d,0x38,
+    0x2c,0x69,0x3d,0x31,0x3b,0x20,0x63,0x3c,0x34,0x30,0x3b,0x20,0x2b,0x2b,0x63,0x29,0x20,0x7b,0x0a,0x75,0x69,0x6e,0x74,0x20,0x74,0x3d,0x28,0x28,0x21,0x28,0x63,0x26,
+    0x37,0x29,0x29,0x7c,0x7c,0x28,0x28,0x63,0x26,0x37,0x29,0x3d,0x3d,0x34,0x29,0x29,0x3f,0x53,0x75,0x62,0x57,0x6f,0x72,0x64,0x28,0x6b,0x65,0x79,0x62,0x75,0x66,0x5b,
+    0x63,0x2d,0x31,0x5d,0x29,0x3a,0x6b,0x65,0x79,0x62,0x75,0x66,0x5b,0x63,0x2d,0x31,0x5d,0x3b,0x0a,0x6b,0x65,0x79,0x62,0x75,0x66,0x5b,0x63,0x5d,0x3d,0x6b,0x65,0x79,
+    0x62,0x75,0x66,0x5b,0x63,0x2d,0x38,0x5d,0x5e,0x28,0x28,0x21,0x28,0x63,0x26,0x37,0x29,0x29,0x3f,0x72,0x6f,0x74,0x61,0x74,0x65,0x28,0x74,0x2c,0x32,0x34,0x55,0x29,
+    0x5e,0x61,0x73,0x5f,0x75,0x69,0x6e,0x74,0x28,0x28,0x75,0x63,0x68,0x61,0x72,0x34,0x29,0x28,0x72,0x63,0x6f,0x6e,0x5b,0x69,0x2b,0x2b,0x5d,0x2c,0x30,0x55,0x2c,0x30,
+    0x55,0x2c,0x30,0x55,0x29,0x29,0x3a,0x74,0x29,0x3b,0x0a,0x7d,0x0a,0x7d,0x0a,0x23,0x65,0x6e,0x64,0x69,0x66,0x0a,0x00
 };

 static const char cryptonight_r_cl[3424] = {
--- a/src/backend/opencl/cl/cn/groestl256.cl
+++ b/src/backend/opencl/cl/cn/groestl256.cl
@@ -55,7 +55,7 @@
 #define PC64(j, r)  ((sph_u64)((j) + (r)))
 #define QC64(j, r)  (((sph_u64)(r) << 56) ^ (~((sph_u64)(j) << 56)))

-static const __constant ulong T0_G[] =
+STATIC const __constant ulong T0_G[] =
 {
 	0xc6a597f4a5f432c6UL, 0xf884eb9784976ff8UL, 0xee99c7b099b05eeeUL, 0xf68df78c8d8c7af6UL, 
 	0xff0de5170d17e8ffUL, 0xd6bdb7dcbddc0ad6UL, 0xdeb1a7c8b1c816deUL, 0x915439fc54fc6d91UL, 
@@ -123,7 +123,7 @@ static const __constant ulong T0_G[] =
 	0x7bcbf646cb463d7bUL, 0xa8fc4b1ffc1fb7a8UL, 0x6dd6da61d6610c6dUL, 0x2c3a584e3a4e622cUL
 };

-static const __constant ulong T4_G[] =
+STATIC const __constant ulong T4_G[] =
 {
 	0xA5F432C6C6A597F4UL, 0x84976FF8F884EB97UL, 0x99B05EEEEE99C7B0UL, 0x8D8C7AF6F68DF78CUL, 
 	0x0D17E8FFFF0DE517UL, 0xBDDC0AD6D6BDB7DCUL, 0xB1C816DEDEB1A7C8UL, 0x54FC6D91915439FCUL, 
@@ -286,4 +286,3 @@ static const __constant ulong T4_G[] =
 		for (int r = 0; r < 10; r ++) \
 			ROUND_SMALL_Q(a, r); \
 		} while (0)
-
--- a/src/backend/opencl/cl/cn/jh.cl
+++ b/src/backend/opencl/cl/cn/jh.cl
@@ -103,7 +103,7 @@ typedef ulong sph_u64;
    x3 ^= x4; \
  } while (0)

-static const __constant ulong C[] =
+STATIC const __constant ulong C[] =
 {
 	0x67F815DFA2DED572UL, 0x571523B70A15847BUL, 0xF6875A4D90D6AB81UL, 0x402BD1C3C54F9F4EUL, 
 	0x9CFA455CE03A98EAUL, 0x9A99B26699D2C503UL, 0x8A53BBF2B4960266UL, 0x31A2DB881A1456B5UL, 
--- a/src/backend/opencl/cl/cn/keccak.cl
+++ b/src/backend/opencl/cl/cn/keccak.cl
@@ -2,7 +2,7 @@
 #define XMRIG_KECCAK_CL


-static const __constant ulong keccakf_rndc[24] =
+STATIC const __constant ulong keccakf_rndc[24] =
 {
    0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
    0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
@@ -15,14 +15,14 @@ static const __constant ulong keccakf_rndc[24] =
 };


-static const __constant uint keccakf_rotc[24] =
+STATIC const __constant uint keccakf_rotc[24] =
 {
    1,  3,  6,  10, 15, 21, 28, 36, 45, 55, 2,  14,
    27, 41, 56, 8,  25, 43, 62, 18, 39, 61, 20, 44
 };


-static const __constant uint keccakf_piln[24] =
+STATIC const __constant uint keccakf_piln[24] =
 {
    10, 7,  11, 17, 18, 3, 5,  16, 8,  21, 24, 4,
    15, 23, 19, 13, 12, 2, 20, 14, 22, 9,  6,  1
--- a/src/backend/opencl/cl/cn/wolf-aes.cl
+++ b/src/backend/opencl/cl/cn/wolf-aes.cl
@@ -1,11 +1,14 @@
 #ifndef WOLF_AES_CL
 #define WOLF_AES_CL

+#ifdef STATIC
+#   undef STATIC
+#endif
 #ifdef cl_amd_media_ops2
+#   define STATIC static
 #   pragma OPENCL EXTENSION cl_amd_media_ops2 : enable
-
-#   define xmrig_amd_bfe(src0, src1, src2) amd_bfe(src0, src1, src2)
 #else
+#   define STATIC
 /* taken from: https://www.khronos.org/registry/OpenCL/extensions/amd/cl_amd_media_ops2.txt
 *     Built-in Function:
 *     uintn amd_bfe (uintn src0, uintn src1, uintn src2)
@@ -21,7 +24,7 @@
 *         dst.s0 = src0.s0 >> offset;
 *     similar operation applied to other components of the vectors
 */
-inline int xmrig_amd_bfe(const uint src0, const uint offset, const uint width)
+inline int amd_bfe(const uint src0, const uint offset, const uint width)
 {
    /* casts are removed because we can implement everything as uint
     * int offset = src1;
@@ -41,10 +44,9 @@ inline int xmrig_amd_bfe(const uint src0, const uint offset, const uint width)
 }
 #endif

-
 // AES table - the other three are generated on the fly

-static const __constant uint AES0_C[256] =
+STATIC const __constant uint AES0_C[256] =
 {
    0xA56363C6U, 0x847C7CF8U, 0x997777EEU, 0x8D7B7BF6U,
    0x0DF2F2FFU, 0xBD6B6BD6U, 0xB16F6FDEU, 0x54C5C591U,
@@ -112,7 +114,7 @@ static const __constant uint AES0_C[256] =
    0xCBB0B07BU, 0xFC5454A8U, 0xD6BBBB6DU, 0x3A16162CU
 };

-#define BYTE(x, y) (xmrig_amd_bfe((x), (y) << 3U, 8U))
+#define BYTE(x, y) (amd_bfe((x), (y) << 3U, 8U))

 #if (ALGO == ALGO_CN_HEAVY_TUBE)
 inline uint4 AES_Round_bittube2(const __local uint *AES0, const __local uint *AES1, uint4 x, uint4 k)
@@ -150,10 +152,10 @@ uint4 AES_Round_Two_Tables(const __local uint *AES0, const __local uint *AES1, c
 }


-static const __constant uchar rcon[8] = { 0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40 };
+STATIC const __constant uchar rcon[8] = { 0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40 };


-static const __constant uchar sbox[256] =
+STATIC const __constant uchar sbox[256] =
 {
    0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5, 0x30, 0x01, 0x67, 0x2B, 0xFE, 0xD7, 0xAB, 0x76,
    0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0, 0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0,
--- a/src/backend/opencl/cl/cn/wolf-skein.cl
+++ b/src/backend/opencl/cl/cn/wolf-skein.cl
@@ -1,10 +1,14 @@
 #ifndef WOLF_SKEIN_CL
 #define WOLF_SKEIN_CL

+#ifdef STATIC
+#   undef STATIC
+#endif
 #ifdef cl_amd_media_ops
+#   define STATIC static
 #   pragma OPENCL EXTENSION cl_amd_media_ops : enable
-#   define xmrig_amd_bitalign(src0, src1, src2) amd_bitalign(src0, src1, src2)
 #else
+#   define STATIC
 /* taken from https://www.khronos.org/registry/OpenCL/extensions/amd/cl_amd_media_ops.txt
 * Build-in Function
 *     uintn  amd_bitalign (uintn src0, uintn src1, uintn src2)
@@ -15,7 +19,7 @@
 * The implemented function is modified because the last is in our case always a scalar.
 * We can ignore the bitwise AND operation.
 */
-inline uint2 xmrig_amd_bitalign(const uint2 src0, const uint2 src1, const uint src2)
+inline uint2 amd_bitalign(const uint2 src0, const uint2 src1, const uint src2)
 {
    uint2 result;
    result.s0 = (uint) (((((long)src0.s0) << 32) | (long)src1.s0) >> (src2));
@@ -28,7 +32,7 @@ inline uint2 xmrig_amd_bitalign(const uint2 src0, const uint2 src1, const uint s

 #define SKEIN_KS_PARITY 0x1BD11BDAA9FC1A22

-static const __constant ulong SKEIN256_IV[8] =
+STATIC const __constant ulong SKEIN256_IV[8] =
 {
    0xCCD044A12FDB3E13UL, 0xE83590301A79A9EBUL,
    0x55AEA0614F816E6FUL, 0x2A2767A4AE9B94DBUL,
@@ -36,7 +40,7 @@ static const __constant ulong SKEIN256_IV[8] =
    0xC36FBAF9393AD185UL, 0x3EEDBA1833EDFC13UL
 };

-static const __constant ulong SKEIN512_256_IV[8] =
+STATIC const __constant ulong SKEIN512_256_IV[8] =
 {
    0xCCD044A12FDB3E13UL, 0xE83590301A79A9EBUL,
    0x55AEA0614F816E6FUL, 0x2A2767A4AE9B94DBUL,
@@ -54,10 +58,10 @@ static const __constant ulong SKEIN512_256_IV[8] =
 ulong SKEIN_ROT(const uint2 x, const uint y)
 {
    if (y < 32) {
-        return(as_ulong(xmrig_amd_bitalign(x, x.s10, 32 - y)));
+        return(as_ulong(amd_bitalign(x, x.s10, 32 - y)));
    }
    else {
-        return(as_ulong(xmrig_amd_bitalign(x.s10, x, 32 - (y - 32))));
+        return(as_ulong(amd_bitalign(x.s10, x, 32 - (y - 32))));
    }
 }

--- a/src/backend/opencl/cl/kawpow/kawpow_dag.cl
+++ b/src/backend/opencl/cl/kawpow/kawpow_dag.cl
@@ -51,7 +51,7 @@ static uint2 ROL2(const uint2 a, const int offset)
    }
    return result;
 }
-#elif PLATFORM == OPENCL_PLATFORM_AMD
+#elif defined(cl_amd_media_ops)
 #pragma OPENCL EXTENSION cl_amd_media_ops : enable
 static uint2 ROL2(const uint2 vv, const int r)
 {
--- a/src/backend/opencl/cl/kawpow/kawpow_dag_cl.h
+++ b/src/backend/opencl/cl/kawpow/kawpow_dag_cl.h
@@ -2,7 +2,7 @@

 namespace xmrig {

-static const char kawpow_dag_cl[6068] = {
+static const char kawpow_dag_cl[6062] = {
    0x23,0x69,0x66,0x64,0x65,0x66,0x20,0x63,0x6c,0x5f,0x63,0x6c,0x61,0x6e,0x67,0x5f,0x73,0x74,0x6f,0x72,0x61,0x67,0x65,0x5f,0x63,0x6c,0x61,0x73,0x73,0x5f,0x73,0x70,
    0x65,0x63,0x69,0x66,0x69,0x65,0x72,0x73,0x0a,0x23,0x70,0x72,0x61,0x67,0x6d,0x61,0x20,0x4f,0x50,0x45,0x4e,0x43,0x4c,0x20,0x45,0x58,0x54,0x45,0x4e,0x53,0x49,0x4f,
    0x4e,0x20,0x63,0x6c,0x5f,0x63,0x6c,0x61,0x6e,0x67,0x5f,0x73,0x74,0x6f,0x72,0x61,0x67,0x65,0x5f,0x63,0x6c,0x61,0x73,0x73,0x5f,0x73,0x70,0x65,0x63,0x69,0x66,0x69,
@@ -76,123 +76,123 @@ static const char kawpow_dag_cl[6068] = {
    0x6d,0x28,0x22,0x73,0x68,0x66,0x2e,0x6c,0x2e,0x77,0x72,0x61,0x70,0x2e,0x62,0x33,0x32,0x20,0x25,0x30,0x2c,0x25,0x31,0x2c,0x25,0x32,0x2c,0x25,0x33,0x3b,0x22,0x3a,
    0x22,0x3d,0x72,0x22,0x28,0x72,0x65,0x73,0x75,0x6c,0x74,0x2e,0x79,0x29,0x3a,0x22,0x72,0x22,0x28,0x61,0x2e,0x78,0x29,0x2c,0x22,0x72,0x22,0x28,0x61,0x2e,0x79,0x29,
    0x2c,0x22,0x72,0x22,0x28,0x6f,0x66,0x66,0x73,0x65,0x74,0x29,0x29,0x3b,0x0a,0x7d,0x0a,0x72,0x65,0x74,0x75,0x72,0x6e,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x3b,0x0a,
-    0x7d,0x0a,0x23,0x65,0x6c,0x69,0x66,0x20,0x50,0x4c,0x41,0x54,0x46,0x4f,0x52,0x4d,0x20,0x3d,0x3d,0x20,0x4f,0x50,0x45,0x4e,0x43,0x4c,0x5f,0x50,0x4c,0x41,0x54,0x46,
-    0x4f,0x52,0x4d,0x5f,0x41,0x4d,0x44,0x0a,0x23,0x70,0x72,0x61,0x67,0x6d,0x61,0x20,0x4f,0x50,0x45,0x4e,0x43,0x4c,0x20,0x45,0x58,0x54,0x45,0x4e,0x53,0x49,0x4f,0x4e,
-    0x20,0x63,0x6c,0x5f,0x61,0x6d,0x64,0x5f,0x6d,0x65,0x64,0x69,0x61,0x5f,0x6f,0x70,0x73,0x20,0x3a,0x20,0x65,0x6e,0x61,0x62,0x6c,0x65,0x0a,0x73,0x74,0x61,0x74,0x69,
-    0x63,0x20,0x75,0x69,0x6e,0x74,0x32,0x20,0x52,0x4f,0x4c,0x32,0x28,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x32,0x20,0x76,0x76,0x2c,0x63,0x6f,0x6e,0x73,
-    0x74,0x20,0x69,0x6e,0x74,0x20,0x72,0x29,0x0a,0x7b,0x0a,0x69,0x66,0x28,0x72,0x3c,0x3d,0x33,0x32,0x29,0x0a,0x7b,0x0a,0x72,0x65,0x74,0x75,0x72,0x6e,0x20,0x61,0x6d,
-    0x64,0x5f,0x62,0x69,0x74,0x61,0x6c,0x69,0x67,0x6e,0x28,0x28,0x76,0x76,0x29,0x2e,0x78,0x79,0x2c,0x28,0x76,0x76,0x29,0x2e,0x79,0x78,0x2c,0x33,0x32,0x2d,0x72,0x29,
-    0x3b,0x0a,0x7d,0x0a,0x65,0x6c,0x73,0x65,0x0a,0x7b,0x0a,0x72,0x65,0x74,0x75,0x72,0x6e,0x20,0x61,0x6d,0x64,0x5f,0x62,0x69,0x74,0x61,0x6c,0x69,0x67,0x6e,0x28,0x28,
-    0x76,0x76,0x29,0x2e,0x79,0x78,0x2c,0x28,0x76,0x76,0x29,0x2e,0x78,0x79,0x2c,0x36,0x34,0x2d,0x72,0x29,0x3b,0x0a,0x7d,0x0a,0x7d,0x0a,0x23,0x65,0x6c,0x73,0x65,0x0a,
-    0x73,0x74,0x61,0x74,0x69,0x63,0x20,0x75,0x69,0x6e,0x74,0x32,0x20,0x52,0x4f,0x4c,0x32,0x28,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x32,0x20,0x76,0x2c,
-    0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x6e,0x29,0x0a,0x7b,0x0a,0x75,0x69,0x6e,0x74,0x32,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x3b,0x0a,0x69,0x66,0x28,
-    0x6e,0x3c,0x3d,0x33,0x32,0x29,0x0a,0x7b,0x0a,0x72,0x65,0x73,0x75,0x6c,0x74,0x2e,0x79,0x3d,0x28,0x28,0x76,0x2e,0x79,0x3c,0x3c,0x28,0x6e,0x29,0x29,0x7c,0x28,0x76,
-    0x2e,0x78,0x3e,0x3e,0x28,0x33,0x32,0x2d,0x6e,0x29,0x29,0x29,0x3b,0x0a,0x72,0x65,0x73,0x75,0x6c,0x74,0x2e,0x78,0x3d,0x28,0x28,0x76,0x2e,0x78,0x3c,0x3c,0x28,0x6e,
-    0x29,0x29,0x7c,0x28,0x76,0x2e,0x79,0x3e,0x3e,0x28,0x33,0x32,0x2d,0x6e,0x29,0x29,0x29,0x3b,0x0a,0x7d,0x0a,0x65,0x6c,0x73,0x65,0x0a,0x7b,0x0a,0x72,0x65,0x73,0x75,
-    0x6c,0x74,0x2e,0x79,0x3d,0x28,0x28,0x76,0x2e,0x78,0x3c,0x3c,0x28,0x6e,0x2d,0x33,0x32,0x29,0x29,0x7c,0x28,0x76,0x2e,0x79,0x3e,0x3e,0x28,0x36,0x34,0x2d,0x6e,0x29,
-    0x29,0x29,0x3b,0x0a,0x72,0x65,0x73,0x75,0x6c,0x74,0x2e,0x78,0x3d,0x28,0x28,0x76,0x2e,0x79,0x3c,0x3c,0x28,0x6e,0x2d,0x33,0x32,0x29,0x29,0x7c,0x28,0x76,0x2e,0x78,
-    0x3e,0x3e,0x28,0x36,0x34,0x2d,0x6e,0x29,0x29,0x29,0x3b,0x0a,0x7d,0x0a,0x72,0x65,0x74,0x75,0x72,0x6e,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x3b,0x0a,0x7d,0x0a,0x23,
-    0x65,0x6e,0x64,0x69,0x66,0x0a,0x73,0x74,0x61,0x74,0x69,0x63,0x20,0x76,0x6f,0x69,0x64,0x20,0x63,0x68,0x69,0x28,0x75,0x69,0x6e,0x74,0x32,0x2a,0x20,0x61,0x2c,0x63,
-    0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x20,0x6e,0x2c,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x32,0x2a,0x20,0x74,0x29,0x0a,0x7b,0x0a,0x61,0x5b,
-    0x6e,0x2b,0x30,0x5d,0x3d,0x62,0x69,0x74,0x73,0x65,0x6c,0x65,0x63,0x74,0x28,0x74,0x5b,0x6e,0x2b,0x30,0x5d,0x5e,0x74,0x5b,0x6e,0x2b,0x32,0x5d,0x2c,0x74,0x5b,0x6e,
-    0x2b,0x30,0x5d,0x2c,0x74,0x5b,0x6e,0x2b,0x31,0x5d,0x29,0x3b,0x0a,0x61,0x5b,0x6e,0x2b,0x31,0x5d,0x3d,0x62,0x69,0x74,0x73,0x65,0x6c,0x65,0x63,0x74,0x28,0x74,0x5b,
-    0x6e,0x2b,0x31,0x5d,0x5e,0x74,0x5b,0x6e,0x2b,0x33,0x5d,0x2c,0x74,0x5b,0x6e,0x2b,0x31,0x5d,0x2c,0x74,0x5b,0x6e,0x2b,0x32,0x5d,0x29,0x3b,0x0a,0x61,0x5b,0x6e,0x2b,
-    0x32,0x5d,0x3d,0x62,0x69,0x74,0x73,0x65,0x6c,0x65,0x63,0x74,0x28,0x74,0x5b,0x6e,0x2b,0x32,0x5d,0x5e,0x74,0x5b,0x6e,0x2b,0x34,0x5d,0x2c,0x74,0x5b,0x6e,0x2b,0x32,
-    0x5d,0x2c,0x74,0x5b,0x6e,0x2b,0x33,0x5d,0x29,0x3b,0x0a,0x61,0x5b,0x6e,0x2b,0x33,0x5d,0x3d,0x62,0x69,0x74,0x73,0x65,0x6c,0x65,0x63,0x74,0x28,0x74,0x5b,0x6e,0x2b,
-    0x33,0x5d,0x5e,0x74,0x5b,0x6e,0x2b,0x30,0x5d,0x2c,0x74,0x5b,0x6e,0x2b,0x33,0x5d,0x2c,0x74,0x5b,0x6e,0x2b,0x34,0x5d,0x29,0x3b,0x0a,0x61,0x5b,0x6e,0x2b,0x34,0x5d,
-    0x3d,0x62,0x69,0x74,0x73,0x65,0x6c,0x65,0x63,0x74,0x28,0x74,0x5b,0x6e,0x2b,0x34,0x5d,0x5e,0x74,0x5b,0x6e,0x2b,0x31,0x5d,0x2c,0x74,0x5b,0x6e,0x2b,0x34,0x5d,0x2c,
-    0x74,0x5b,0x6e,0x2b,0x30,0x5d,0x29,0x3b,0x0a,0x7d,0x0a,0x73,0x74,0x61,0x74,0x69,0x63,0x20,0x76,0x6f,0x69,0x64,0x20,0x6b,0x65,0x63,0x63,0x61,0x6b,0x5f,0x66,0x31,
-    0x36,0x30,0x30,0x5f,0x72,0x6f,0x75,0x6e,0x64,0x28,0x75,0x69,0x6e,0x74,0x32,0x2a,0x20,0x61,0x2c,0x75,0x69,0x6e,0x74,0x20,0x72,0x29,0x0a,0x7b,0x0a,0x75,0x69,0x6e,
-    0x74,0x32,0x20,0x74,0x5b,0x32,0x35,0x5d,0x3b,0x0a,0x75,0x69,0x6e,0x74,0x32,0x20,0x75,0x3b,0x0a,0x74,0x5b,0x30,0x5d,0x3d,0x61,0x5b,0x30,0x5d,0x5e,0x61,0x5b,0x35,
-    0x5d,0x5e,0x61,0x5b,0x31,0x30,0x5d,0x5e,0x61,0x5b,0x31,0x35,0x5d,0x5e,0x61,0x5b,0x32,0x30,0x5d,0x3b,0x0a,0x74,0x5b,0x31,0x5d,0x3d,0x61,0x5b,0x31,0x5d,0x5e,0x61,
-    0x5b,0x36,0x5d,0x5e,0x61,0x5b,0x31,0x31,0x5d,0x5e,0x61,0x5b,0x31,0x36,0x5d,0x5e,0x61,0x5b,0x32,0x31,0x5d,0x3b,0x0a,0x74,0x5b,0x32,0x5d,0x3d,0x61,0x5b,0x32,0x5d,
-    0x5e,0x61,0x5b,0x37,0x5d,0x5e,0x61,0x5b,0x31,0x32,0x5d,0x5e,0x61,0x5b,0x31,0x37,0x5d,0x5e,0x61,0x5b,0x32,0x32,0x5d,0x3b,0x0a,0x74,0x5b,0x33,0x5d,0x3d,0x61,0x5b,
-    0x33,0x5d,0x5e,0x61,0x5b,0x38,0x5d,0x5e,0x61,0x5b,0x31,0x33,0x5d,0x5e,0x61,0x5b,0x31,0x38,0x5d,0x5e,0x61,0x5b,0x32,0x33,0x5d,0x3b,0x0a,0x74,0x5b,0x34,0x5d,0x3d,
-    0x61,0x5b,0x34,0x5d,0x5e,0x61,0x5b,0x39,0x5d,0x5e,0x61,0x5b,0x31,0x34,0x5d,0x5e,0x61,0x5b,0x31,0x39,0x5d,0x5e,0x61,0x5b,0x32,0x34,0x5d,0x3b,0x0a,0x75,0x3d,0x74,
-    0x5b,0x34,0x5d,0x5e,0x52,0x4f,0x4c,0x32,0x28,0x74,0x5b,0x31,0x5d,0x2c,0x31,0x29,0x3b,0x0a,0x61,0x5b,0x30,0x5d,0x20,0x5e,0x3d,0x20,0x75,0x3b,0x0a,0x61,0x5b,0x35,
-    0x5d,0x20,0x5e,0x3d,0x20,0x75,0x3b,0x0a,0x61,0x5b,0x31,0x30,0x5d,0x20,0x5e,0x3d,0x20,0x75,0x3b,0x0a,0x61,0x5b,0x31,0x35,0x5d,0x20,0x5e,0x3d,0x20,0x75,0x3b,0x0a,
-    0x61,0x5b,0x32,0x30,0x5d,0x20,0x5e,0x3d,0x20,0x75,0x3b,0x0a,0x75,0x3d,0x74,0x5b,0x30,0x5d,0x5e,0x52,0x4f,0x4c,0x32,0x28,0x74,0x5b,0x32,0x5d,0x2c,0x31,0x29,0x3b,
-    0x0a,0x61,0x5b,0x31,0x5d,0x20,0x5e,0x3d,0x20,0x75,0x3b,0x0a,0x61,0x5b,0x36,0x5d,0x20,0x5e,0x3d,0x20,0x75,0x3b,0x0a,0x61,0x5b,0x31,0x31,0x5d,0x20,0x5e,0x3d,0x20,
-    0x75,0x3b,0x0a,0x61,0x5b,0x31,0x36,0x5d,0x20,0x5e,0x3d,0x20,0x75,0x3b,0x0a,0x61,0x5b,0x32,0x31,0x5d,0x20,0x5e,0x3d,0x20,0x75,0x3b,0x0a,0x75,0x3d,0x74,0x5b,0x31,
-    0x5d,0x5e,0x52,0x4f,0x4c,0x32,0x28,0x74,0x5b,0x33,0x5d,0x2c,0x31,0x29,0x3b,0x0a,0x61,0x5b,0x32,0x5d,0x20,0x5e,0x3d,0x20,0x75,0x3b,0x0a,0x61,0x5b,0x37,0x5d,0x20,
-    0x5e,0x3d,0x20,0x75,0x3b,0x0a,0x61,0x5b,0x31,0x32,0x5d,0x20,0x5e,0x3d,0x20,0x75,0x3b,0x0a,0x61,0x5b,0x31,0x37,0x5d,0x20,0x5e,0x3d,0x20,0x75,0x3b,0x0a,0x61,0x5b,
-    0x32,0x32,0x5d,0x20,0x5e,0x3d,0x20,0x75,0x3b,0x0a,0x75,0x3d,0x74,0x5b,0x32,0x5d,0x5e,0x52,0x4f,0x4c,0x32,0x28,0x74,0x5b,0x34,0x5d,0x2c,0x31,0x29,0x3b,0x0a,0x61,
-    0x5b,0x33,0x5d,0x20,0x5e,0x3d,0x20,0x75,0x3b,0x0a,0x61,0x5b,0x38,0x5d,0x20,0x5e,0x3d,0x20,0x75,0x3b,0x0a,0x61,0x5b,0x31,0x33,0x5d,0x20,0x5e,0x3d,0x20,0x75,0x3b,
-    0x0a,0x61,0x5b,0x31,0x38,0x5d,0x20,0x5e,0x3d,0x20,0x75,0x3b,0x0a,0x61,0x5b,0x32,0x33,0x5d,0x20,0x5e,0x3d,0x20,0x75,0x3b,0x0a,0x75,0x3d,0x74,0x5b,0x33,0x5d,0x5e,
-    0x52,0x4f,0x4c,0x32,0x28,0x74,0x5b,0x30,0x5d,0x2c,0x31,0x29,0x3b,0x0a,0x61,0x5b,0x34,0x5d,0x20,0x5e,0x3d,0x20,0x75,0x3b,0x0a,0x61,0x5b,0x39,0x5d,0x20,0x5e,0x3d,
-    0x20,0x75,0x3b,0x0a,0x61,0x5b,0x31,0x34,0x5d,0x20,0x5e,0x3d,0x20,0x75,0x3b,0x0a,0x61,0x5b,0x31,0x39,0x5d,0x20,0x5e,0x3d,0x20,0x75,0x3b,0x0a,0x61,0x5b,0x32,0x34,
-    0x5d,0x20,0x5e,0x3d,0x20,0x75,0x3b,0x0a,0x74,0x5b,0x30,0x5d,0x3d,0x61,0x5b,0x30,0x5d,0x3b,0x0a,0x74,0x5b,0x31,0x30,0x5d,0x3d,0x52,0x4f,0x4c,0x32,0x28,0x61,0x5b,
-    0x31,0x5d,0x2c,0x31,0x29,0x3b,0x0a,0x74,0x5b,0x32,0x30,0x5d,0x3d,0x52,0x4f,0x4c,0x32,0x28,0x61,0x5b,0x32,0x5d,0x2c,0x36,0x32,0x29,0x3b,0x0a,0x74,0x5b,0x35,0x5d,
-    0x3d,0x52,0x4f,0x4c,0x32,0x28,0x61,0x5b,0x33,0x5d,0x2c,0x32,0x38,0x29,0x3b,0x0a,0x74,0x5b,0x31,0x35,0x5d,0x3d,0x52,0x4f,0x4c,0x32,0x28,0x61,0x5b,0x34,0x5d,0x2c,
-    0x32,0x37,0x29,0x3b,0x0a,0x74,0x5b,0x31,0x36,0x5d,0x3d,0x52,0x4f,0x4c,0x32,0x28,0x61,0x5b,0x35,0x5d,0x2c,0x33,0x36,0x29,0x3b,0x0a,0x74,0x5b,0x31,0x5d,0x3d,0x52,
-    0x4f,0x4c,0x32,0x28,0x61,0x5b,0x36,0x5d,0x2c,0x34,0x34,0x29,0x3b,0x0a,0x74,0x5b,0x31,0x31,0x5d,0x3d,0x52,0x4f,0x4c,0x32,0x28,0x61,0x5b,0x37,0x5d,0x2c,0x36,0x29,
-    0x3b,0x0a,0x74,0x5b,0x32,0x31,0x5d,0x3d,0x52,0x4f,0x4c,0x32,0x28,0x61,0x5b,0x38,0x5d,0x2c,0x35,0x35,0x29,0x3b,0x0a,0x74,0x5b,0x36,0x5d,0x3d,0x52,0x4f,0x4c,0x32,
-    0x28,0x61,0x5b,0x39,0x5d,0x2c,0x32,0x30,0x29,0x3b,0x0a,0x74,0x5b,0x37,0x5d,0x3d,0x52,0x4f,0x4c,0x32,0x28,0x61,0x5b,0x31,0x30,0x5d,0x2c,0x33,0x29,0x3b,0x0a,0x74,
-    0x5b,0x31,0x37,0x5d,0x3d,0x52,0x4f,0x4c,0x32,0x28,0x61,0x5b,0x31,0x31,0x5d,0x2c,0x31,0x30,0x29,0x3b,0x0a,0x74,0x5b,0x32,0x5d,0x3d,0x52,0x4f,0x4c,0x32,0x28,0x61,
-    0x5b,0x31,0x32,0x5d,0x2c,0x34,0x33,0x29,0x3b,0x0a,0x74,0x5b,0x31,0x32,0x5d,0x3d,0x52,0x4f,0x4c,0x32,0x28,0x61,0x5b,0x31,0x33,0x5d,0x2c,0x32,0x35,0x29,0x3b,0x0a,
-    0x74,0x5b,0x32,0x32,0x5d,0x3d,0x52,0x4f,0x4c,0x32,0x28,0x61,0x5b,0x31,0x34,0x5d,0x2c,0x33,0x39,0x29,0x3b,0x0a,0x74,0x5b,0x32,0x33,0x5d,0x3d,0x52,0x4f,0x4c,0x32,
-    0x28,0x61,0x5b,0x31,0x35,0x5d,0x2c,0x34,0x31,0x29,0x3b,0x0a,0x74,0x5b,0x38,0x5d,0x3d,0x52,0x4f,0x4c,0x32,0x28,0x61,0x5b,0x31,0x36,0x5d,0x2c,0x34,0x35,0x29,0x3b,
-    0x0a,0x74,0x5b,0x31,0x38,0x5d,0x3d,0x52,0x4f,0x4c,0x32,0x28,0x61,0x5b,0x31,0x37,0x5d,0x2c,0x31,0x35,0x29,0x3b,0x0a,0x74,0x5b,0x33,0x5d,0x3d,0x52,0x4f,0x4c,0x32,
-    0x28,0x61,0x5b,0x31,0x38,0x5d,0x2c,0x32,0x31,0x29,0x3b,0x0a,0x74,0x5b,0x31,0x33,0x5d,0x3d,0x52,0x4f,0x4c,0x32,0x28,0x61,0x5b,0x31,0x39,0x5d,0x2c,0x38,0x29,0x3b,
-    0x0a,0x74,0x5b,0x31,0x34,0x5d,0x3d,0x52,0x4f,0x4c,0x32,0x28,0x61,0x5b,0x32,0x30,0x5d,0x2c,0x31,0x38,0x29,0x3b,0x0a,0x74,0x5b,0x32,0x34,0x5d,0x3d,0x52,0x4f,0x4c,
-    0x32,0x28,0x61,0x5b,0x32,0x31,0x5d,0x2c,0x32,0x29,0x3b,0x0a,0x74,0x5b,0x39,0x5d,0x3d,0x52,0x4f,0x4c,0x32,0x28,0x61,0x5b,0x32,0x32,0x5d,0x2c,0x36,0x31,0x29,0x3b,
-    0x0a,0x74,0x5b,0x31,0x39,0x5d,0x3d,0x52,0x4f,0x4c,0x32,0x28,0x61,0x5b,0x32,0x33,0x5d,0x2c,0x35,0x36,0x29,0x3b,0x0a,0x74,0x5b,0x34,0x5d,0x3d,0x52,0x4f,0x4c,0x32,
-    0x28,0x61,0x5b,0x32,0x34,0x5d,0x2c,0x31,0x34,0x29,0x3b,0x0a,0x63,0x68,0x69,0x28,0x61,0x2c,0x30,0x2c,0x74,0x29,0x3b,0x0a,0x61,0x5b,0x30,0x5d,0x20,0x5e,0x3d,0x20,
-    0x4b,0x65,0x63,0x63,0x61,0x6b,0x5f,0x66,0x31,0x36,0x30,0x30,0x5f,0x52,0x43,0x5b,0x72,0x5d,0x3b,0x0a,0x63,0x68,0x69,0x28,0x61,0x2c,0x35,0x2c,0x74,0x29,0x3b,0x0a,
-    0x63,0x68,0x69,0x28,0x61,0x2c,0x31,0x30,0x2c,0x74,0x29,0x3b,0x0a,0x63,0x68,0x69,0x28,0x61,0x2c,0x31,0x35,0x2c,0x74,0x29,0x3b,0x0a,0x63,0x68,0x69,0x28,0x61,0x2c,
-    0x32,0x30,0x2c,0x74,0x29,0x3b,0x0a,0x7d,0x0a,0x73,0x74,0x61,0x74,0x69,0x63,0x20,0x76,0x6f,0x69,0x64,0x20,0x6b,0x65,0x63,0x63,0x61,0x6b,0x5f,0x66,0x31,0x36,0x30,
-    0x30,0x5f,0x6e,0x6f,0x5f,0x61,0x62,0x73,0x6f,0x72,0x62,0x28,0x75,0x69,0x6e,0x74,0x32,0x2a,0x20,0x61,0x2c,0x75,0x69,0x6e,0x74,0x20,0x6f,0x75,0x74,0x5f,0x73,0x69,
-    0x7a,0x65,0x2c,0x75,0x69,0x6e,0x74,0x20,0x69,0x73,0x6f,0x6c,0x61,0x74,0x65,0x29,0x0a,0x7b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x20,0x72,0x3d,0x30,
-    0x3b,0x20,0x72,0x3c,0x32,0x34,0x3b,0x29,0x0a,0x7b,0x0a,0x69,0x66,0x28,0x69,0x73,0x6f,0x6c,0x61,0x74,0x65,0x29,0x0a,0x7b,0x0a,0x6b,0x65,0x63,0x63,0x61,0x6b,0x5f,
-    0x66,0x31,0x36,0x30,0x30,0x5f,0x72,0x6f,0x75,0x6e,0x64,0x28,0x61,0x2c,0x72,0x2b,0x2b,0x29,0x3b,0x0a,0x7d,0x0a,0x7d,0x0a,0x7d,0x0a,0x23,0x64,0x65,0x66,0x69,0x6e,
-    0x65,0x20,0x63,0x6f,0x70,0x79,0x28,0x64,0x73,0x74,0x2c,0x20,0x73,0x72,0x63,0x2c,0x20,0x63,0x6f,0x75,0x6e,0x74,0x29,0x20,0x5c,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,
-    0x69,0x6e,0x74,0x20,0x69,0x3d,0x30,0x3b,0x20,0x69,0x21,0x3d,0x63,0x6f,0x75,0x6e,0x74,0x3b,0x20,0x2b,0x2b,0x69,0x29,0x20,0x5c,0x0a,0x7b,0x20,0x5c,0x0a,0x28,0x64,
-    0x73,0x74,0x29,0x5b,0x69,0x5d,0x3d,0x28,0x73,0x72,0x63,0x29,0x5b,0x69,0x5d,0x3b,0x20,0x5c,0x0a,0x7d,0x0a,0x73,0x74,0x61,0x74,0x69,0x63,0x20,0x75,0x69,0x6e,0x74,
-    0x20,0x66,0x6e,0x76,0x28,0x75,0x69,0x6e,0x74,0x20,0x78,0x2c,0x75,0x69,0x6e,0x74,0x20,0x79,0x29,0x0a,0x7b,0x0a,0x72,0x65,0x74,0x75,0x72,0x6e,0x20,0x78,0x2a,0x46,
-    0x4e,0x56,0x5f,0x50,0x52,0x49,0x4d,0x45,0x5e,0x79,0x3b,0x0a,0x7d,0x0a,0x73,0x74,0x61,0x74,0x69,0x63,0x20,0x75,0x69,0x6e,0x74,0x34,0x20,0x66,0x6e,0x76,0x34,0x28,
-    0x75,0x69,0x6e,0x74,0x34,0x20,0x78,0x2c,0x75,0x69,0x6e,0x74,0x34,0x20,0x79,0x29,0x0a,0x7b,0x0a,0x72,0x65,0x74,0x75,0x72,0x6e,0x20,0x78,0x2a,0x46,0x4e,0x56,0x5f,
-    0x50,0x52,0x49,0x4d,0x45,0x5e,0x79,0x3b,0x0a,0x7d,0x0a,0x74,0x79,0x70,0x65,0x64,0x65,0x66,0x20,0x75,0x6e,0x69,0x6f,0x6e,0x0a,0x7b,0x0a,0x75,0x69,0x6e,0x74,0x20,
-    0x77,0x6f,0x72,0x64,0x73,0x5b,0x36,0x34,0x2f,0x73,0x69,0x7a,0x65,0x6f,0x66,0x28,0x75,0x69,0x6e,0x74,0x29,0x5d,0x3b,0x0a,0x75,0x69,0x6e,0x74,0x32,0x20,0x75,0x69,
-    0x6e,0x74,0x32,0x73,0x5b,0x36,0x34,0x2f,0x73,0x69,0x7a,0x65,0x6f,0x66,0x28,0x75,0x69,0x6e,0x74,0x32,0x29,0x5d,0x3b,0x0a,0x75,0x69,0x6e,0x74,0x34,0x20,0x75,0x69,
-    0x6e,0x74,0x34,0x73,0x5b,0x36,0x34,0x2f,0x73,0x69,0x7a,0x65,0x6f,0x66,0x28,0x75,0x69,0x6e,0x74,0x34,0x29,0x5d,0x3b,0x0a,0x7d,0x20,0x68,0x61,0x73,0x68,0x36,0x34,
-    0x5f,0x74,0x3b,0x0a,0x74,0x79,0x70,0x65,0x64,0x65,0x66,0x20,0x75,0x6e,0x69,0x6f,0x6e,0x0a,0x7b,0x0a,0x75,0x69,0x6e,0x74,0x20,0x77,0x6f,0x72,0x64,0x73,0x5b,0x32,
-    0x30,0x30,0x2f,0x73,0x69,0x7a,0x65,0x6f,0x66,0x28,0x75,0x69,0x6e,0x74,0x29,0x5d,0x3b,0x0a,0x75,0x69,0x6e,0x74,0x32,0x20,0x75,0x69,0x6e,0x74,0x32,0x73,0x5b,0x32,
-    0x30,0x30,0x2f,0x73,0x69,0x7a,0x65,0x6f,0x66,0x28,0x75,0x69,0x6e,0x74,0x32,0x29,0x5d,0x3b,0x0a,0x75,0x69,0x6e,0x74,0x34,0x20,0x75,0x69,0x6e,0x74,0x34,0x73,0x5b,
-    0x32,0x30,0x30,0x2f,0x73,0x69,0x7a,0x65,0x6f,0x66,0x28,0x75,0x69,0x6e,0x74,0x34,0x29,0x5d,0x3b,0x0a,0x7d,0x20,0x68,0x61,0x73,0x68,0x32,0x30,0x30,0x5f,0x74,0x3b,
-    0x0a,0x74,0x79,0x70,0x65,0x64,0x65,0x66,0x20,0x73,0x74,0x72,0x75,0x63,0x74,0x0a,0x7b,0x0a,0x75,0x69,0x6e,0x74,0x34,0x20,0x75,0x69,0x6e,0x74,0x34,0x73,0x5b,0x31,
-    0x32,0x38,0x2f,0x73,0x69,0x7a,0x65,0x6f,0x66,0x28,0x75,0x69,0x6e,0x74,0x34,0x29,0x5d,0x3b,0x0a,0x7d,0x20,0x68,0x61,0x73,0x68,0x31,0x32,0x38,0x5f,0x74,0x3b,0x0a,
-    0x73,0x74,0x61,0x74,0x69,0x63,0x20,0x76,0x6f,0x69,0x64,0x20,0x53,0x48,0x41,0x33,0x5f,0x35,0x31,0x32,0x28,0x75,0x69,0x6e,0x74,0x32,0x2a,0x20,0x73,0x2c,0x75,0x69,
-    0x6e,0x74,0x20,0x69,0x73,0x6f,0x6c,0x61,0x74,0x65,0x29,0x0a,0x7b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x20,0x69,0x3d,0x38,0x3b,0x20,0x69,0x21,0x3d,
-    0x32,0x35,0x3b,0x20,0x2b,0x2b,0x69,0x29,0x0a,0x7b,0x0a,0x73,0x5b,0x69,0x5d,0x3d,0x28,0x75,0x69,0x6e,0x74,0x32,0x29,0x7b,0x30,0x2c,0x30,0x7d,0x3b,0x0a,0x7d,0x0a,
-    0x73,0x5b,0x38,0x5d,0x2e,0x78,0x3d,0x30,0x78,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x31,0x3b,0x0a,0x73,0x5b,0x38,0x5d,0x2e,0x79,0x3d,0x30,0x78,0x38,0x30,0x30,0x30,
-    0x30,0x30,0x30,0x30,0x3b,0x0a,0x6b,0x65,0x63,0x63,0x61,0x6b,0x5f,0x66,0x31,0x36,0x30,0x30,0x5f,0x6e,0x6f,0x5f,0x61,0x62,0x73,0x6f,0x72,0x62,0x28,0x73,0x2c,0x38,
-    0x2c,0x69,0x73,0x6f,0x6c,0x61,0x74,0x65,0x29,0x3b,0x0a,0x7d,0x0a,0x73,0x74,0x61,0x74,0x69,0x63,0x20,0x75,0x69,0x6e,0x74,0x20,0x66,0x61,0x73,0x74,0x5f,0x6d,0x6f,
-    0x64,0x28,0x75,0x69,0x6e,0x74,0x20,0x61,0x2c,0x75,0x69,0x6e,0x74,0x34,0x20,0x64,0x29,0x0a,0x7b,0x0a,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x6c,0x6f,0x6e,0x67,0x20,
-    0x74,0x3d,0x61,0x3b,0x0a,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x20,0x71,0x3d,0x28,0x28,0x74,0x2b,0x64,0x2e,0x79,0x29,0x2a,0x64,0x2e,0x78,0x29,0x3e,
-    0x3e,0x64,0x2e,0x7a,0x3b,0x0a,0x72,0x65,0x74,0x75,0x72,0x6e,0x20,0x61,0x2d,0x71,0x2a,0x64,0x2e,0x77,0x3b,0x0a,0x7d,0x0a,0x5f,0x5f,0x6b,0x65,0x72,0x6e,0x65,0x6c,
-    0x20,0x76,0x6f,0x69,0x64,0x20,0x65,0x74,0x68,0x61,0x73,0x68,0x5f,0x63,0x61,0x6c,0x63,0x75,0x6c,0x61,0x74,0x65,0x5f,0x64,0x61,0x67,0x5f,0x69,0x74,0x65,0x6d,0x28,
-    0x75,0x69,0x6e,0x74,0x20,0x73,0x74,0x61,0x72,0x74,0x2c,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x68,0x61,0x73,0x68,0x36,0x34,0x5f,0x74,0x20,0x63,0x6f,0x6e,
-    0x73,0x74,0x2a,0x20,0x67,0x5f,0x6c,0x69,0x67,0x68,0x74,0x2c,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x68,0x61,0x73,0x68,0x36,0x34,0x5f,0x74,0x2a,0x20,0x67,
-    0x5f,0x64,0x61,0x67,0x2c,0x75,0x69,0x6e,0x74,0x20,0x69,0x73,0x6f,0x6c,0x61,0x74,0x65,0x2c,0x75,0x69,0x6e,0x74,0x20,0x64,0x61,0x67,0x5f,0x77,0x6f,0x72,0x64,0x73,
-    0x2c,0x75,0x69,0x6e,0x74,0x34,0x20,0x6c,0x69,0x67,0x68,0x74,0x5f,0x77,0x6f,0x72,0x64,0x73,0x29,0x0a,0x7b,0x0a,0x75,0x69,0x6e,0x74,0x20,0x63,0x6f,0x6e,0x73,0x74,
-    0x20,0x6e,0x6f,0x64,0x65,0x5f,0x69,0x6e,0x64,0x65,0x78,0x3d,0x73,0x74,0x61,0x72,0x74,0x2b,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,
-    0x30,0x29,0x3b,0x0a,0x69,0x66,0x28,0x6e,0x6f,0x64,0x65,0x5f,0x69,0x6e,0x64,0x65,0x78,0x3e,0x3d,0x64,0x61,0x67,0x5f,0x77,0x6f,0x72,0x64,0x73,0x29,0x0a,0x72,0x65,
-    0x74,0x75,0x72,0x6e,0x3b,0x0a,0x68,0x61,0x73,0x68,0x32,0x30,0x30,0x5f,0x74,0x20,0x64,0x61,0x67,0x5f,0x6e,0x6f,0x64,0x65,0x3b,0x0a,0x63,0x6f,0x70,0x79,0x28,0x64,
-    0x61,0x67,0x5f,0x6e,0x6f,0x64,0x65,0x2e,0x75,0x69,0x6e,0x74,0x34,0x73,0x2c,0x67,0x5f,0x6c,0x69,0x67,0x68,0x74,0x5b,0x66,0x61,0x73,0x74,0x5f,0x6d,0x6f,0x64,0x28,
-    0x6e,0x6f,0x64,0x65,0x5f,0x69,0x6e,0x64,0x65,0x78,0x2c,0x6c,0x69,0x67,0x68,0x74,0x5f,0x77,0x6f,0x72,0x64,0x73,0x29,0x5d,0x2e,0x75,0x69,0x6e,0x74,0x34,0x73,0x2c,
-    0x34,0x29,0x3b,0x0a,0x64,0x61,0x67,0x5f,0x6e,0x6f,0x64,0x65,0x2e,0x77,0x6f,0x72,0x64,0x73,0x5b,0x30,0x5d,0x20,0x5e,0x3d,0x20,0x6e,0x6f,0x64,0x65,0x5f,0x69,0x6e,
-    0x64,0x65,0x78,0x3b,0x0a,0x53,0x48,0x41,0x33,0x5f,0x35,0x31,0x32,0x28,0x64,0x61,0x67,0x5f,0x6e,0x6f,0x64,0x65,0x2e,0x75,0x69,0x6e,0x74,0x32,0x73,0x2c,0x69,0x73,
-    0x6f,0x6c,0x61,0x74,0x65,0x29,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x20,0x69,0x3d,0x30,0x3b,0x20,0x69,0x21,0x3d,0x45,0x54,0x48,0x41,0x53,0x48,
-    0x5f,0x44,0x41,0x54,0x41,0x53,0x45,0x54,0x5f,0x50,0x41,0x52,0x45,0x4e,0x54,0x53,0x3b,0x20,0x2b,0x2b,0x69,0x29,0x0a,0x7b,0x0a,0x75,0x69,0x6e,0x74,0x20,0x70,0x61,
-    0x72,0x65,0x6e,0x74,0x5f,0x69,0x6e,0x64,0x65,0x78,0x3d,0x66,0x61,0x73,0x74,0x5f,0x6d,0x6f,0x64,0x28,0x66,0x6e,0x76,0x28,0x6e,0x6f,0x64,0x65,0x5f,0x69,0x6e,0x64,
-    0x65,0x78,0x5e,0x69,0x2c,0x64,0x61,0x67,0x5f,0x6e,0x6f,0x64,0x65,0x2e,0x77,0x6f,0x72,0x64,0x73,0x5b,0x69,0x20,0x25,0x20,0x4e,0x4f,0x44,0x45,0x5f,0x57,0x4f,0x52,
-    0x44,0x53,0x5d,0x29,0x2c,0x6c,0x69,0x67,0x68,0x74,0x5f,0x77,0x6f,0x72,0x64,0x73,0x29,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x20,0x77,0x3d,0x30,
-    0x3b,0x20,0x77,0x21,0x3d,0x34,0x3b,0x20,0x2b,0x2b,0x77,0x29,0x0a,0x64,0x61,0x67,0x5f,0x6e,0x6f,0x64,0x65,0x2e,0x75,0x69,0x6e,0x74,0x34,0x73,0x5b,0x77,0x5d,0x3d,
-    0x66,0x6e,0x76,0x34,0x28,0x64,0x61,0x67,0x5f,0x6e,0x6f,0x64,0x65,0x2e,0x75,0x69,0x6e,0x74,0x34,0x73,0x5b,0x77,0x5d,0x2c,0x67,0x5f,0x6c,0x69,0x67,0x68,0x74,0x5b,
-    0x70,0x61,0x72,0x65,0x6e,0x74,0x5f,0x69,0x6e,0x64,0x65,0x78,0x5d,0x2e,0x75,0x69,0x6e,0x74,0x34,0x73,0x5b,0x77,0x5d,0x29,0x3b,0x0a,0x7d,0x0a,0x53,0x48,0x41,0x33,
-    0x5f,0x35,0x31,0x32,0x28,0x64,0x61,0x67,0x5f,0x6e,0x6f,0x64,0x65,0x2e,0x75,0x69,0x6e,0x74,0x32,0x73,0x2c,0x69,0x73,0x6f,0x6c,0x61,0x74,0x65,0x29,0x3b,0x0a,0x63,
-    0x6f,0x70,0x79,0x28,0x67,0x5f,0x64,0x61,0x67,0x5b,0x6e,0x6f,0x64,0x65,0x5f,0x69,0x6e,0x64,0x65,0x78,0x5d,0x2e,0x75,0x69,0x6e,0x74,0x34,0x73,0x2c,0x64,0x61,0x67,
-    0x5f,0x6e,0x6f,0x64,0x65,0x2e,0x75,0x69,0x6e,0x74,0x34,0x73,0x2c,0x34,0x29,0x3b,0x0a,0x7d,0x0a,0x00
+    0x7d,0x0a,0x23,0x65,0x6c,0x69,0x66,0x20,0x64,0x65,0x66,0x69,0x6e,0x65,0x64,0x28,0x63,0x6c,0x5f,0x61,0x6d,0x64,0x5f,0x6d,0x65,0x64,0x69,0x61,0x5f,0x6f,0x70,0x73,
+    0x29,0x0a,0x23,0x70,0x72,0x61,0x67,0x6d,0x61,0x20,0x4f,0x50,0x45,0x4e,0x43,0x4c,0x20,0x45,0x58,0x54,0x45,0x4e,0x53,0x49,0x4f,0x4e,0x20,0x63,0x6c,0x5f,0x61,0x6d,
+    0x64,0x5f,0x6d,0x65,0x64,0x69,0x61,0x5f,0x6f,0x70,0x73,0x20,0x3a,0x20,0x65,0x6e,0x61,0x62,0x6c,0x65,0x0a,0x73,0x74,0x61,0x74,0x69,0x63,0x20,0x75,0x69,0x6e,0x74,
+    0x32,0x20,0x52,0x4f,0x4c,0x32,0x28,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x32,0x20,0x76,0x76,0x2c,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,
+    0x72,0x29,0x0a,0x7b,0x0a,0x69,0x66,0x28,0x72,0x3c,0x3d,0x33,0x32,0x29,0x0a,0x7b,0x0a,0x72,0x65,0x74,0x75,0x72,0x6e,0x20,0x61,0x6d,0x64,0x5f,0x62,0x69,0x74,0x61,
+    0x6c,0x69,0x67,0x6e,0x28,0x28,0x76,0x76,0x29,0x2e,0x78,0x79,0x2c,0x28,0x76,0x76,0x29,0x2e,0x79,0x78,0x2c,0x33,0x32,0x2d,0x72,0x29,0x3b,0x0a,0x7d,0x0a,0x65,0x6c,
+    0x73,0x65,0x0a,0x7b,0x0a,0x72,0x65,0x74,0x75,0x72,0x6e,0x20,0x61,0x6d,0x64,0x5f,0x62,0x69,0x74,0x61,0x6c,0x69,0x67,0x6e,0x28,0x28,0x76,0x76,0x29,0x2e,0x79,0x78,
+    0x2c,0x28,0x76,0x76,0x29,0x2e,0x78,0x79,0x2c,0x36,0x34,0x2d,0x72,0x29,0x3b,0x0a,0x7d,0x0a,0x7d,0x0a,0x23,0x65,0x6c,0x73,0x65,0x0a,0x73,0x74,0x61,0x74,0x69,0x63,
+    0x20,0x75,0x69,0x6e,0x74,0x32,0x20,0x52,0x4f,0x4c,0x32,0x28,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x32,0x20,0x76,0x2c,0x63,0x6f,0x6e,0x73,0x74,0x20,
+    0x69,0x6e,0x74,0x20,0x6e,0x29,0x0a,0x7b,0x0a,0x75,0x69,0x6e,0x74,0x32,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x3b,0x0a,0x69,0x66,0x28,0x6e,0x3c,0x3d,0x33,0x32,0x29,
+    0x0a,0x7b,0x0a,0x72,0x65,0x73,0x75,0x6c,0x74,0x2e,0x79,0x3d,0x28,0x28,0x76,0x2e,0x79,0x3c,0x3c,0x28,0x6e,0x29,0x29,0x7c,0x28,0x76,0x2e,0x78,0x3e,0x3e,0x28,0x33,
+    0x32,0x2d,0x6e,0x29,0x29,0x29,0x3b,0x0a,0x72,0x65,0x73,0x75,0x6c,0x74,0x2e,0x78,0x3d,0x28,0x28,0x76,0x2e,0x78,0x3c,0x3c,0x28,0x6e,0x29,0x29,0x7c,0x28,0x76,0x2e,
+    0x79,0x3e,0x3e,0x28,0x33,0x32,0x2d,0x6e,0x29,0x29,0x29,0x3b,0x0a,0x7d,0x0a,0x65,0x6c,0x73,0x65,0x0a,0x7b,0x0a,0x72,0x65,0x73,0x75,0x6c,0x74,0x2e,0x79,0x3d,0x28,
+    0x28,0x76,0x2e,0x78,0x3c,0x3c,0x28,0x6e,0x2d,0x33,0x32,0x29,0x29,0x7c,0x28,0x76,0x2e,0x79,0x3e,0x3e,0x28,0x36,0x34,0x2d,0x6e,0x29,0x29,0x29,0x3b,0x0a,0x72,0x65,
+    0x73,0x75,0x6c,0x74,0x2e,0x78,0x3d,0x28,0x28,0x76,0x2e,0x79,0x3c,0x3c,0x28,0x6e,0x2d,0x33,0x32,0x29,0x29,0x7c,0x28,0x76,0x2e,0x78,0x3e,0x3e,0x28,0x36,0x34,0x2d,
+    0x6e,0x29,0x29,0x29,0x3b,0x0a,0x7d,0x0a,0x72,0x65,0x74,0x75,0x72,0x6e,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x3b,0x0a,0x7d,0x0a,0x23,0x65,0x6e,0x64,0x69,0x66,0x0a,
+    0x73,0x74,0x61,0x74,0x69,0x63,0x20,0x76,0x6f,0x69,0x64,0x20,0x63,0x68,0x69,0x28,0x75,0x69,0x6e,0x74,0x32,0x2a,0x20,0x61,0x2c,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,
+    0x69,0x6e,0x74,0x20,0x6e,0x2c,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x32,0x2a,0x20,0x74,0x29,0x0a,0x7b,0x0a,0x61,0x5b,0x6e,0x2b,0x30,0x5d,0x3d,0x62,
+    0x69,0x74,0x73,0x65,0x6c,0x65,0x63,0x74,0x28,0x74,0x5b,0x6e,0x2b,0x30,0x5d,0x5e,0x74,0x5b,0x6e,0x2b,0x32,0x5d,0x2c,0x74,0x5b,0x6e,0x2b,0x30,0x5d,0x2c,0x74,0x5b,
+    0x6e,0x2b,0x31,0x5d,0x29,0x3b,0x0a,0x61,0x5b,0x6e,0x2b,0x31,0x5d,0x3d,0x62,0x69,0x74,0x73,0x65,0x6c,0x65,0x63,0x74,0x28,0x74,0x5b,0x6e,0x2b,0x31,0x5d,0x5e,0x74,
+    0x5b,0x6e,0x2b,0x33,0x5d,0x2c,0x74,0x5b,0x6e,0x2b,0x31,0x5d,0x2c,0x74,0x5b,0x6e,0x2b,0x32,0x5d,0x29,0x3b,0x0a,0x61,0x5b,0x6e,0x2b,0x32,0x5d,0x3d,0x62,0x69,0x74,
+    0x73,0x65,0x6c,0x65,0x63,0x74,0x28,0x74,0x5b,0x6e,0x2b,0x32,0x5d,0x5e,0x74,0x5b,0x6e,0x2b,0x34,0x5d,0x2c,0x74,0x5b,0x6e,0x2b,0x32,0x5d,0x2c,0x74,0x5b,0x6e,0x2b,
+    0x33,0x5d,0x29,0x3b,0x0a,0x61,0x5b,0x6e,0x2b,0x33,0x5d,0x3d,0x62,0x69,0x74,0x73,0x65,0x6c,0x65,0x63,0x74,0x28,0x74,0x5b,0x6e,0x2b,0x33,0x5d,0x5e,0x74,0x5b,0x6e,
+    0x2b,0x30,0x5d,0x2c,0x74,0x5b,0x6e,0x2b,0x33,0x5d,0x2c,0x74,0x5b,0x6e,0x2b,0x34,0x5d,0x29,0x3b,0x0a,0x61,0x5b,0x6e,0x2b,0x34,0x5d,0x3d,0x62,0x69,0x74,0x73,0x65,
+    0x6c,0x65,0x63,0x74,0x28,0x74,0x5b,0x6e,0x2b,0x34,0x5d,0x5e,0x74,0x5b,0x6e,0x2b,0x31,0x5d,0x2c,0x74,0x5b,0x6e,0x2b,0x34,0x5d,0x2c,0x74,0x5b,0x6e,0x2b,0x30,0x5d,
+    0x29,0x3b,0x0a,0x7d,0x0a,0x73,0x74,0x61,0x74,0x69,0x63,0x20,0x76,0x6f,0x69,0x64,0x20,0x6b,0x65,0x63,0x63,0x61,0x6b,0x5f,0x66,0x31,0x36,0x30,0x30,0x5f,0x72,0x6f,
+    0x75,0x6e,0x64,0x28,0x75,0x69,0x6e,0x74,0x32,0x2a,0x20,0x61,0x2c,0x75,0x69,0x6e,0x74,0x20,0x72,0x29,0x0a,0x7b,0x0a,0x75,0x69,0x6e,0x74,0x32,0x20,0x74,0x5b,0x32,
+    0x35,0x5d,0x3b,0x0a,0x75,0x69,0x6e,0x74,0x32,0x20,0x75,0x3b,0x0a,0x74,0x5b,0x30,0x5d,0x3d,0x61,0x5b,0x30,0x5d,0x5e,0x61,0x5b,0x35,0x5d,0x5e,0x61,0x5b,0x31,0x30,
+    0x5d,0x5e,0x61,0x5b,0x31,0x35,0x5d,0x5e,0x61,0x5b,0x32,0x30,0x5d,0x3b,0x0a,0x74,0x5b,0x31,0x5d,0x3d,0x61,0x5b,0x31,0x5d,0x5e,0x61,0x5b,0x36,0x5d,0x5e,0x61,0x5b,
+    0x31,0x31,0x5d,0x5e,0x61,0x5b,0x31,0x36,0x5d,0x5e,0x61,0x5b,0x32,0x31,0x5d,0x3b,0x0a,0x74,0x5b,0x32,0x5d,0x3d,0x61,0x5b,0x32,0x5d,0x5e,0x61,0x5b,0x37,0x5d,0x5e,
+    0x61,0x5b,0x31,0x32,0x5d,0x5e,0x61,0x5b,0x31,0x37,0x5d,0x5e,0x61,0x5b,0x32,0x32,0x5d,0x3b,0x0a,0x74,0x5b,0x33,0x5d,0x3d,0x61,0x5b,0x33,0x5d,0x5e,0x61,0x5b,0x38,
+    0x5d,0x5e,0x61,0x5b,0x31,0x33,0x5d,0x5e,0x61,0x5b,0x31,0x38,0x5d,0x5e,0x61,0x5b,0x32,0x33,0x5d,0x3b,0x0a,0x74,0x5b,0x34,0x5d,0x3d,0x61,0x5b,0x34,0x5d,0x5e,0x61,
+    0x5b,0x39,0x5d,0x5e,0x61,0x5b,0x31,0x34,0x5d,0x5e,0x61,0x5b,0x31,0x39,0x5d,0x5e,0x61,0x5b,0x32,0x34,0x5d,0x3b,0x0a,0x75,0x3d,0x74,0x5b,0x34,0x5d,0x5e,0x52,0x4f,
+    0x4c,0x32,0x28,0x74,0x5b,0x31,0x5d,0x2c,0x31,0x29,0x3b,0x0a,0x61,0x5b,0x30,0x5d,0x20,0x5e,0x3d,0x20,0x75,0x3b,0x0a,0x61,0x5b,0x35,0x5d,0x20,0x5e,0x3d,0x20,0x75,
+    0x3b,0x0a,0x61,0x5b,0x31,0x30,0x5d,0x20,0x5e,0x3d,0x20,0x75,0x3b,0x0a,0x61,0x5b,0x31,0x35,0x5d,0x20,0x5e,0x3d,0x20,0x75,0x3b,0x0a,0x61,0x5b,0x32,0x30,0x5d,0x20,
+    0x5e,0x3d,0x20,0x75,0x3b,0x0a,0x75,0x3d,0x74,0x5b,0x30,0x5d,0x5e,0x52,0x4f,0x4c,0x32,0x28,0x74,0x5b,0x32,0x5d,0x2c,0x31,0x29,0x3b,0x0a,0x61,0x5b,0x31,0x5d,0x20,
+    0x5e,0x3d,0x20,0x75,0x3b,0x0a,0x61,0x5b,0x36,0x5d,0x20,0x5e,0x3d,0x20,0x75,0x3b,0x0a,0x61,0x5b,0x31,0x31,0x5d,0x20,0x5e,0x3d,0x20,0x75,0x3b,0x0a,0x61,0x5b,0x31,
+    0x36,0x5d,0x20,0x5e,0x3d,0x20,0x75,0x3b,0x0a,0x61,0x5b,0x32,0x31,0x5d,0x20,0x5e,0x3d,0x20,0x75,0x3b,0x0a,0x75,0x3d,0x74,0x5b,0x31,0x5d,0x5e,0x52,0x4f,0x4c,0x32,
+    0x28,0x74,0x5b,0x33,0x5d,0x2c,0x31,0x29,0x3b,0x0a,0x61,0x5b,0x32,0x5d,0x20,0x5e,0x3d,0x20,0x75,0x3b,0x0a,0x61,0x5b,0x37,0x5d,0x20,0x5e,0x3d,0x20,0x75,0x3b,0x0a,
+    0x61,0x5b,0x31,0x32,0x5d,0x20,0x5e,0x3d,0x20,0x75,0x3b,0x0a,0x61,0x5b,0x31,0x37,0x5d,0x20,0x5e,0x3d,0x20,0x75,0x3b,0x0a,0x61,0x5b,0x32,0x32,0x5d,0x20,0x5e,0x3d,
+    0x20,0x75,0x3b,0x0a,0x75,0x3d,0x74,0x5b,0x32,0x5d,0x5e,0x52,0x4f,0x4c,0x32,0x28,0x74,0x5b,0x34,0x5d,0x2c,0x31,0x29,0x3b,0x0a,0x61,0x5b,0x33,0x5d,0x20,0x5e,0x3d,
+    0x20,0x75,0x3b,0x0a,0x61,0x5b,0x38,0x5d,0x20,0x5e,0x3d,0x20,0x75,0x3b,0x0a,0x61,0x5b,0x31,0x33,0x5d,0x20,0x5e,0x3d,0x20,0x75,0x3b,0x0a,0x61,0x5b,0x31,0x38,0x5d,
+    0x20,0x5e,0x3d,0x20,0x75,0x3b,0x0a,0x61,0x5b,0x32,0x33,0x5d,0x20,0x5e,0x3d,0x20,0x75,0x3b,0x0a,0x75,0x3d,0x74,0x5b,0x33,0x5d,0x5e,0x52,0x4f,0x4c,0x32,0x28,0x74,
+    0x5b,0x30,0x5d,0x2c,0x31,0x29,0x3b,0x0a,0x61,0x5b,0x34,0x5d,0x20,0x5e,0x3d,0x20,0x75,0x3b,0x0a,0x61,0x5b,0x39,0x5d,0x20,0x5e,0x3d,0x20,0x75,0x3b,0x0a,0x61,0x5b,
+    0x31,0x34,0x5d,0x20,0x5e,0x3d,0x20,0x75,0x3b,0x0a,0x61,0x5b,0x31,0x39,0x5d,0x20,0x5e,0x3d,0x20,0x75,0x3b,0x0a,0x61,0x5b,0x32,0x34,0x5d,0x20,0x5e,0x3d,0x20,0x75,
+    0x3b,0x0a,0x74,0x5b,0x30,0x5d,0x3d,0x61,0x5b,0x30,0x5d,0x3b,0x0a,0x74,0x5b,0x31,0x30,0x5d,0x3d,0x52,0x4f,0x4c,0x32,0x28,0x61,0x5b,0x31,0x5d,0x2c,0x31,0x29,0x3b,
+    0x0a,0x74,0x5b,0x32,0x30,0x5d,0x3d,0x52,0x4f,0x4c,0x32,0x28,0x61,0x5b,0x32,0x5d,0x2c,0x36,0x32,0x29,0x3b,0x0a,0x74,0x5b,0x35,0x5d,0x3d,0x52,0x4f,0x4c,0x32,0x28,
+    0x61,0x5b,0x33,0x5d,0x2c,0x32,0x38,0x29,0x3b,0x0a,0x74,0x5b,0x31,0x35,0x5d,0x3d,0x52,0x4f,0x4c,0x32,0x28,0x61,0x5b,0x34,0x5d,0x2c,0x32,0x37,0x29,0x3b,0x0a,0x74,
+    0x5b,0x31,0x36,0x5d,0x3d,0x52,0x4f,0x4c,0x32,0x28,0x61,0x5b,0x35,0x5d,0x2c,0x33,0x36,0x29,0x3b,0x0a,0x74,0x5b,0x31,0x5d,0x3d,0x52,0x4f,0x4c,0x32,0x28,0x61,0x5b,
+    0x36,0x5d,0x2c,0x34,0x34,0x29,0x3b,0x0a,0x74,0x5b,0x31,0x31,0x5d,0x3d,0x52,0x4f,0x4c,0x32,0x28,0x61,0x5b,0x37,0x5d,0x2c,0x36,0x29,0x3b,0x0a,0x74,0x5b,0x32,0x31,
+    0x5d,0x3d,0x52,0x4f,0x4c,0x32,0x28,0x61,0x5b,0x38,0x5d,0x2c,0x35,0x35,0x29,0x3b,0x0a,0x74,0x5b,0x36,0x5d,0x3d,0x52,0x4f,0x4c,0x32,0x28,0x61,0x5b,0x39,0x5d,0x2c,
+    0x32,0x30,0x29,0x3b,0x0a,0x74,0x5b,0x37,0x5d,0x3d,0x52,0x4f,0x4c,0x32,0x28,0x61,0x5b,0x31,0x30,0x5d,0x2c,0x33,0x29,0x3b,0x0a,0x74,0x5b,0x31,0x37,0x5d,0x3d,0x52,
+    0x4f,0x4c,0x32,0x28,0x61,0x5b,0x31,0x31,0x5d,0x2c,0x31,0x30,0x29,0x3b,0x0a,0x74,0x5b,0x32,0x5d,0x3d,0x52,0x4f,0x4c,0x32,0x28,0x61,0x5b,0x31,0x32,0x5d,0x2c,0x34,
+    0x33,0x29,0x3b,0x0a,0x74,0x5b,0x31,0x32,0x5d,0x3d,0x52,0x4f,0x4c,0x32,0x28,0x61,0x5b,0x31,0x33,0x5d,0x2c,0x32,0x35,0x29,0x3b,0x0a,0x74,0x5b,0x32,0x32,0x5d,0x3d,
+    0x52,0x4f,0x4c,0x32,0x28,0x61,0x5b,0x31,0x34,0x5d,0x2c,0x33,0x39,0x29,0x3b,0x0a,0x74,0x5b,0x32,0x33,0x5d,0x3d,0x52,0x4f,0x4c,0x32,0x28,0x61,0x5b,0x31,0x35,0x5d,
+    0x2c,0x34,0x31,0x29,0x3b,0x0a,0x74,0x5b,0x38,0x5d,0x3d,0x52,0x4f,0x4c,0x32,0x28,0x61,0x5b,0x31,0x36,0x5d,0x2c,0x34,0x35,0x29,0x3b,0x0a,0x74,0x5b,0x31,0x38,0x5d,
+    0x3d,0x52,0x4f,0x4c,0x32,0x28,0x61,0x5b,0x31,0x37,0x5d,0x2c,0x31,0x35,0x29,0x3b,0x0a,0x74,0x5b,0x33,0x5d,0x3d,0x52,0x4f,0x4c,0x32,0x28,0x61,0x5b,0x31,0x38,0x5d,
+    0x2c,0x32,0x31,0x29,0x3b,0x0a,0x74,0x5b,0x31,0x33,0x5d,0x3d,0x52,0x4f,0x4c,0x32,0x28,0x61,0x5b,0x31,0x39,0x5d,0x2c,0x38,0x29,0x3b,0x0a,0x74,0x5b,0x31,0x34,0x5d,
+    0x3d,0x52,0x4f,0x4c,0x32,0x28,0x61,0x5b,0x32,0x30,0x5d,0x2c,0x31,0x38,0x29,0x3b,0x0a,0x74,0x5b,0x32,0x34,0x5d,0x3d,0x52,0x4f,0x4c,0x32,0x28,0x61,0x5b,0x32,0x31,
+    0x5d,0x2c,0x32,0x29,0x3b,0x0a,0x74,0x5b,0x39,0x5d,0x3d,0x52,0x4f,0x4c,0x32,0x28,0x61,0x5b,0x32,0x32,0x5d,0x2c,0x36,0x31,0x29,0x3b,0x0a,0x74,0x5b,0x31,0x39,0x5d,
+    0x3d,0x52,0x4f,0x4c,0x32,0x28,0x61,0x5b,0x32,0x33,0x5d,0x2c,0x35,0x36,0x29,0x3b,0x0a,0x74,0x5b,0x34,0x5d,0x3d,0x52,0x4f,0x4c,0x32,0x28,0x61,0x5b,0x32,0x34,0x5d,
+    0x2c,0x31,0x34,0x29,0x3b,0x0a,0x63,0x68,0x69,0x28,0x61,0x2c,0x30,0x2c,0x74,0x29,0x3b,0x0a,0x61,0x5b,0x30,0x5d,0x20,0x5e,0x3d,0x20,0x4b,0x65,0x63,0x63,0x61,0x6b,
+    0x5f,0x66,0x31,0x36,0x30,0x30,0x5f,0x52,0x43,0x5b,0x72,0x5d,0x3b,0x0a,0x63,0x68,0x69,0x28,0x61,0x2c,0x35,0x2c,0x74,0x29,0x3b,0x0a,0x63,0x68,0x69,0x28,0x61,0x2c,
+    0x31,0x30,0x2c,0x74,0x29,0x3b,0x0a,0x63,0x68,0x69,0x28,0x61,0x2c,0x31,0x35,0x2c,0x74,0x29,0x3b,0x0a,0x63,0x68,0x69,0x28,0x61,0x2c,0x32,0x30,0x2c,0x74,0x29,0x3b,
+    0x0a,0x7d,0x0a,0x73,0x74,0x61,0x74,0x69,0x63,0x20,0x76,0x6f,0x69,0x64,0x20,0x6b,0x65,0x63,0x63,0x61,0x6b,0x5f,0x66,0x31,0x36,0x30,0x30,0x5f,0x6e,0x6f,0x5f,0x61,
+    0x62,0x73,0x6f,0x72,0x62,0x28,0x75,0x69,0x6e,0x74,0x32,0x2a,0x20,0x61,0x2c,0x75,0x69,0x6e,0x74,0x20,0x6f,0x75,0x74,0x5f,0x73,0x69,0x7a,0x65,0x2c,0x75,0x69,0x6e,
+    0x74,0x20,0x69,0x73,0x6f,0x6c,0x61,0x74,0x65,0x29,0x0a,0x7b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x20,0x72,0x3d,0x30,0x3b,0x20,0x72,0x3c,0x32,0x34,
+    0x3b,0x29,0x0a,0x7b,0x0a,0x69,0x66,0x28,0x69,0x73,0x6f,0x6c,0x61,0x74,0x65,0x29,0x0a,0x7b,0x0a,0x6b,0x65,0x63,0x63,0x61,0x6b,0x5f,0x66,0x31,0x36,0x30,0x30,0x5f,
+    0x72,0x6f,0x75,0x6e,0x64,0x28,0x61,0x2c,0x72,0x2b,0x2b,0x29,0x3b,0x0a,0x7d,0x0a,0x7d,0x0a,0x7d,0x0a,0x23,0x64,0x65,0x66,0x69,0x6e,0x65,0x20,0x63,0x6f,0x70,0x79,
+    0x28,0x64,0x73,0x74,0x2c,0x20,0x73,0x72,0x63,0x2c,0x20,0x63,0x6f,0x75,0x6e,0x74,0x29,0x20,0x5c,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x20,0x69,0x3d,
+    0x30,0x3b,0x20,0x69,0x21,0x3d,0x63,0x6f,0x75,0x6e,0x74,0x3b,0x20,0x2b,0x2b,0x69,0x29,0x20,0x5c,0x0a,0x7b,0x20,0x5c,0x0a,0x28,0x64,0x73,0x74,0x29,0x5b,0x69,0x5d,
+    0x3d,0x28,0x73,0x72,0x63,0x29,0x5b,0x69,0x5d,0x3b,0x20,0x5c,0x0a,0x7d,0x0a,0x73,0x74,0x61,0x74,0x69,0x63,0x20,0x75,0x69,0x6e,0x74,0x20,0x66,0x6e,0x76,0x28,0x75,
+    0x69,0x6e,0x74,0x20,0x78,0x2c,0x75,0x69,0x6e,0x74,0x20,0x79,0x29,0x0a,0x7b,0x0a,0x72,0x65,0x74,0x75,0x72,0x6e,0x20,0x78,0x2a,0x46,0x4e,0x56,0x5f,0x50,0x52,0x49,
+    0x4d,0x45,0x5e,0x79,0x3b,0x0a,0x7d,0x0a,0x73,0x74,0x61,0x74,0x69,0x63,0x20,0x75,0x69,0x6e,0x74,0x34,0x20,0x66,0x6e,0x76,0x34,0x28,0x75,0x69,0x6e,0x74,0x34,0x20,
+    0x78,0x2c,0x75,0x69,0x6e,0x74,0x34,0x20,0x79,0x29,0x0a,0x7b,0x0a,0x72,0x65,0x74,0x75,0x72,0x6e,0x20,0x78,0x2a,0x46,0x4e,0x56,0x5f,0x50,0x52,0x49,0x4d,0x45,0x5e,
+    0x79,0x3b,0x0a,0x7d,0x0a,0x74,0x79,0x70,0x65,0x64,0x65,0x66,0x20,0x75,0x6e,0x69,0x6f,0x6e,0x0a,0x7b,0x0a,0x75,0x69,0x6e,0x74,0x20,0x77,0x6f,0x72,0x64,0x73,0x5b,
+    0x36,0x34,0x2f,0x73,0x69,0x7a,0x65,0x6f,0x66,0x28,0x75,0x69,0x6e,0x74,0x29,0x5d,0x3b,0x0a,0x75,0x69,0x6e,0x74,0x32,0x20,0x75,0x69,0x6e,0x74,0x32,0x73,0x5b,0x36,
+    0x34,0x2f,0x73,0x69,0x7a,0x65,0x6f,0x66,0x28,0x75,0x69,0x6e,0x74,0x32,0x29,0x5d,0x3b,0x0a,0x75,0x69,0x6e,0x74,0x34,0x20,0x75,0x69,0x6e,0x74,0x34,0x73,0x5b,0x36,
+    0x34,0x2f,0x73,0x69,0x7a,0x65,0x6f,0x66,0x28,0x75,0x69,0x6e,0x74,0x34,0x29,0x5d,0x3b,0x0a,0x7d,0x20,0x68,0x61,0x73,0x68,0x36,0x34,0x5f,0x74,0x3b,0x0a,0x74,0x79,
+    0x70,0x65,0x64,0x65,0x66,0x20,0x75,0x6e,0x69,0x6f,0x6e,0x0a,0x7b,0x0a,0x75,0x69,0x6e,0x74,0x20,0x77,0x6f,0x72,0x64,0x73,0x5b,0x32,0x30,0x30,0x2f,0x73,0x69,0x7a,
+    0x65,0x6f,0x66,0x28,0x75,0x69,0x6e,0x74,0x29,0x5d,0x3b,0x0a,0x75,0x69,0x6e,0x74,0x32,0x20,0x75,0x69,0x6e,0x74,0x32,0x73,0x5b,0x32,0x30,0x30,0x2f,0x73,0x69,0x7a,
+    0x65,0x6f,0x66,0x28,0x75,0x69,0x6e,0x74,0x32,0x29,0x5d,0x3b,0x0a,0x75,0x69,0x6e,0x74,0x34,0x20,0x75,0x69,0x6e,0x74,0x34,0x73,0x5b,0x32,0x30,0x30,0x2f,0x73,0x69,
+    0x7a,0x65,0x6f,0x66,0x28,0x75,0x69,0x6e,0x74,0x34,0x29,0x5d,0x3b,0x0a,0x7d,0x20,0x68,0x61,0x73,0x68,0x32,0x30,0x30,0x5f,0x74,0x3b,0x0a,0x74,0x79,0x70,0x65,0x64,
+    0x65,0x66,0x20,0x73,0x74,0x72,0x75,0x63,0x74,0x0a,0x7b,0x0a,0x75,0x69,0x6e,0x74,0x34,0x20,0x75,0x69,0x6e,0x74,0x34,0x73,0x5b,0x31,0x32,0x38,0x2f,0x73,0x69,0x7a,
+    0x65,0x6f,0x66,0x28,0x75,0x69,0x6e,0x74,0x34,0x29,0x5d,0x3b,0x0a,0x7d,0x20,0x68,0x61,0x73,0x68,0x31,0x32,0x38,0x5f,0x74,0x3b,0x0a,0x73,0x74,0x61,0x74,0x69,0x63,
+    0x20,0x76,0x6f,0x69,0x64,0x20,0x53,0x48,0x41,0x33,0x5f,0x35,0x31,0x32,0x28,0x75,0x69,0x6e,0x74,0x32,0x2a,0x20,0x73,0x2c,0x75,0x69,0x6e,0x74,0x20,0x69,0x73,0x6f,
+    0x6c,0x61,0x74,0x65,0x29,0x0a,0x7b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x20,0x69,0x3d,0x38,0x3b,0x20,0x69,0x21,0x3d,0x32,0x35,0x3b,0x20,0x2b,0x2b,
+    0x69,0x29,0x0a,0x7b,0x0a,0x73,0x5b,0x69,0x5d,0x3d,0x28,0x75,0x69,0x6e,0x74,0x32,0x29,0x7b,0x30,0x2c,0x30,0x7d,0x3b,0x0a,0x7d,0x0a,0x73,0x5b,0x38,0x5d,0x2e,0x78,
+    0x3d,0x30,0x78,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x31,0x3b,0x0a,0x73,0x5b,0x38,0x5d,0x2e,0x79,0x3d,0x30,0x78,0x38,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x3b,0x0a,
+    0x6b,0x65,0x63,0x63,0x61,0x6b,0x5f,0x66,0x31,0x36,0x30,0x30,0x5f,0x6e,0x6f,0x5f,0x61,0x62,0x73,0x6f,0x72,0x62,0x28,0x73,0x2c,0x38,0x2c,0x69,0x73,0x6f,0x6c,0x61,
+    0x74,0x65,0x29,0x3b,0x0a,0x7d,0x0a,0x73,0x74,0x61,0x74,0x69,0x63,0x20,0x75,0x69,0x6e,0x74,0x20,0x66,0x61,0x73,0x74,0x5f,0x6d,0x6f,0x64,0x28,0x75,0x69,0x6e,0x74,
+    0x20,0x61,0x2c,0x75,0x69,0x6e,0x74,0x34,0x20,0x64,0x29,0x0a,0x7b,0x0a,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x6c,0x6f,0x6e,0x67,0x20,0x74,0x3d,0x61,0x3b,0x0a,0x63,
+    0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x20,0x71,0x3d,0x28,0x28,0x74,0x2b,0x64,0x2e,0x79,0x29,0x2a,0x64,0x2e,0x78,0x29,0x3e,0x3e,0x64,0x2e,0x7a,0x3b,0x0a,
+    0x72,0x65,0x74,0x75,0x72,0x6e,0x20,0x61,0x2d,0x71,0x2a,0x64,0x2e,0x77,0x3b,0x0a,0x7d,0x0a,0x5f,0x5f,0x6b,0x65,0x72,0x6e,0x65,0x6c,0x20,0x76,0x6f,0x69,0x64,0x20,
+    0x65,0x74,0x68,0x61,0x73,0x68,0x5f,0x63,0x61,0x6c,0x63,0x75,0x6c,0x61,0x74,0x65,0x5f,0x64,0x61,0x67,0x5f,0x69,0x74,0x65,0x6d,0x28,0x75,0x69,0x6e,0x74,0x20,0x73,
+    0x74,0x61,0x72,0x74,0x2c,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x68,0x61,0x73,0x68,0x36,0x34,0x5f,0x74,0x20,0x63,0x6f,0x6e,0x73,0x74,0x2a,0x20,0x67,0x5f,
+    0x6c,0x69,0x67,0x68,0x74,0x2c,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x68,0x61,0x73,0x68,0x36,0x34,0x5f,0x74,0x2a,0x20,0x67,0x5f,0x64,0x61,0x67,0x2c,0x75,
+    0x69,0x6e,0x74,0x20,0x69,0x73,0x6f,0x6c,0x61,0x74,0x65,0x2c,0x75,0x69,0x6e,0x74,0x20,0x64,0x61,0x67,0x5f,0x77,0x6f,0x72,0x64,0x73,0x2c,0x75,0x69,0x6e,0x74,0x34,
+    0x20,0x6c,0x69,0x67,0x68,0x74,0x5f,0x77,0x6f,0x72,0x64,0x73,0x29,0x0a,0x7b,0x0a,0x75,0x69,0x6e,0x74,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x6e,0x6f,0x64,0x65,0x5f,
+    0x69,0x6e,0x64,0x65,0x78,0x3d,0x73,0x74,0x61,0x72,0x74,0x2b,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x30,0x29,0x3b,0x0a,0x69,0x66,
+    0x28,0x6e,0x6f,0x64,0x65,0x5f,0x69,0x6e,0x64,0x65,0x78,0x3e,0x3d,0x64,0x61,0x67,0x5f,0x77,0x6f,0x72,0x64,0x73,0x29,0x0a,0x72,0x65,0x74,0x75,0x72,0x6e,0x3b,0x0a,
+    0x68,0x61,0x73,0x68,0x32,0x30,0x30,0x5f,0x74,0x20,0x64,0x61,0x67,0x5f,0x6e,0x6f,0x64,0x65,0x3b,0x0a,0x63,0x6f,0x70,0x79,0x28,0x64,0x61,0x67,0x5f,0x6e,0x6f,0x64,
+    0x65,0x2e,0x75,0x69,0x6e,0x74,0x34,0x73,0x2c,0x67,0x5f,0x6c,0x69,0x67,0x68,0x74,0x5b,0x66,0x61,0x73,0x74,0x5f,0x6d,0x6f,0x64,0x28,0x6e,0x6f,0x64,0x65,0x5f,0x69,
+    0x6e,0x64,0x65,0x78,0x2c,0x6c,0x69,0x67,0x68,0x74,0x5f,0x77,0x6f,0x72,0x64,0x73,0x29,0x5d,0x2e,0x75,0x69,0x6e,0x74,0x34,0x73,0x2c,0x34,0x29,0x3b,0x0a,0x64,0x61,
+    0x67,0x5f,0x6e,0x6f,0x64,0x65,0x2e,0x77,0x6f,0x72,0x64,0x73,0x5b,0x30,0x5d,0x20,0x5e,0x3d,0x20,0x6e,0x6f,0x64,0x65,0x5f,0x69,0x6e,0x64,0x65,0x78,0x3b,0x0a,0x53,
+    0x48,0x41,0x33,0x5f,0x35,0x31,0x32,0x28,0x64,0x61,0x67,0x5f,0x6e,0x6f,0x64,0x65,0x2e,0x75,0x69,0x6e,0x74,0x32,0x73,0x2c,0x69,0x73,0x6f,0x6c,0x61,0x74,0x65,0x29,
+    0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x20,0x69,0x3d,0x30,0x3b,0x20,0x69,0x21,0x3d,0x45,0x54,0x48,0x41,0x53,0x48,0x5f,0x44,0x41,0x54,0x41,0x53,
+    0x45,0x54,0x5f,0x50,0x41,0x52,0x45,0x4e,0x54,0x53,0x3b,0x20,0x2b,0x2b,0x69,0x29,0x0a,0x7b,0x0a,0x75,0x69,0x6e,0x74,0x20,0x70,0x61,0x72,0x65,0x6e,0x74,0x5f,0x69,
+    0x6e,0x64,0x65,0x78,0x3d,0x66,0x61,0x73,0x74,0x5f,0x6d,0x6f,0x64,0x28,0x66,0x6e,0x76,0x28,0x6e,0x6f,0x64,0x65,0x5f,0x69,0x6e,0x64,0x65,0x78,0x5e,0x69,0x2c,0x64,
+    0x61,0x67,0x5f,0x6e,0x6f,0x64,0x65,0x2e,0x77,0x6f,0x72,0x64,0x73,0x5b,0x69,0x20,0x25,0x20,0x4e,0x4f,0x44,0x45,0x5f,0x57,0x4f,0x52,0x44,0x53,0x5d,0x29,0x2c,0x6c,
+    0x69,0x67,0x68,0x74,0x5f,0x77,0x6f,0x72,0x64,0x73,0x29,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x20,0x77,0x3d,0x30,0x3b,0x20,0x77,0x21,0x3d,0x34,
+    0x3b,0x20,0x2b,0x2b,0x77,0x29,0x0a,0x64,0x61,0x67,0x5f,0x6e,0x6f,0x64,0x65,0x2e,0x75,0x69,0x6e,0x74,0x34,0x73,0x5b,0x77,0x5d,0x3d,0x66,0x6e,0x76,0x34,0x28,0x64,
+    0x61,0x67,0x5f,0x6e,0x6f,0x64,0x65,0x2e,0x75,0x69,0x6e,0x74,0x34,0x73,0x5b,0x77,0x5d,0x2c,0x67,0x5f,0x6c,0x69,0x67,0x68,0x74,0x5b,0x70,0x61,0x72,0x65,0x6e,0x74,
+    0x5f,0x69,0x6e,0x64,0x65,0x78,0x5d,0x2e,0x75,0x69,0x6e,0x74,0x34,0x73,0x5b,0x77,0x5d,0x29,0x3b,0x0a,0x7d,0x0a,0x53,0x48,0x41,0x33,0x5f,0x35,0x31,0x32,0x28,0x64,
+    0x61,0x67,0x5f,0x6e,0x6f,0x64,0x65,0x2e,0x75,0x69,0x6e,0x74,0x32,0x73,0x2c,0x69,0x73,0x6f,0x6c,0x61,0x74,0x65,0x29,0x3b,0x0a,0x63,0x6f,0x70,0x79,0x28,0x67,0x5f,
+    0x64,0x61,0x67,0x5b,0x6e,0x6f,0x64,0x65,0x5f,0x69,0x6e,0x64,0x65,0x78,0x5d,0x2e,0x75,0x69,0x6e,0x74,0x34,0x73,0x2c,0x64,0x61,0x67,0x5f,0x6e,0x6f,0x64,0x65,0x2e,
+    0x75,0x69,0x6e,0x74,0x34,0x73,0x2c,0x34,0x29,0x3b,0x0a,0x7d,0x0a,0x00
 };

 } // namespace xmrig
--- a/src/backend/opencl/generators/ocl_generic_cn_generator.cpp
+++ b/src/backend/opencl/generators/ocl_generic_cn_generator.cpp
@@ -39,6 +39,10 @@ static inline uint32_t getMaxThreads(const OclDevice &device, const Algorithm &a
        return 40000U;
    }

+    if (device.vendorId() == OCL_VENDOR_NVIDIA) {
+        return 4096U;
+    }
+
    const uint32_t ratio = (algorithm.l3() <= oneMiB) ? 2U : 1U;

    if (device.vendorId() == OCL_VENDOR_INTEL) {
--- a/src/backend/opencl/generators/ocl_generic_kawpow_generator.cpp
+++ b/src/backend/opencl/generators/ocl_generic_kawpow_generator.cpp
@@ -46,6 +46,7 @@ bool ocl_generic_kawpow_generator(const OclDevice &device, const Algorithm &algo
    case OclDevice::Navi_10:
    case OclDevice::Navi_12:
    case OclDevice::Navi_14:
+    case OclDevice::Navi_21:
        isNavi = true;
        break;

--- a/src/backend/opencl/generators/ocl_generic_rx_generator.cpp
+++ b/src/backend/opencl/generators/ocl_generic_rx_generator.cpp
@@ -53,6 +53,7 @@ bool ocl_generic_rx_generator(const OclDevice &device, const Algorithm &algorith

    switch (device.type()) {
    case OclDevice::Baffin:
+    case OclDevice::Ellesmere:
    case OclDevice::Polaris:
    case OclDevice::Lexa:
    case OclDevice::Vega_10:
@@ -67,6 +68,10 @@ bool ocl_generic_rx_generator(const OclDevice &device, const Algorithm &algorith
        isNavi = true;
        break;

+    case OclDevice::Navi_21:
+        isNavi = true;
+        break;
+
    default:
        break;
    }
--- a/src/backend/opencl/opencl.cmake
+++ b/src/backend/opencl/opencl.cmake
@@ -5,9 +5,13 @@ if (BUILD_STATIC AND XMRIG_OS_UNIX AND WITH_OPENCL)
 endif()

 if (WITH_OPENCL)
-    add_definitions(/DCL_TARGET_OPENCL_VERSION=200)
-    add_definitions(/DCL_USE_DEPRECATED_OPENCL_1_2_APIS)
    add_definitions(/DXMRIG_FEATURE_OPENCL)
+    add_definitions(/DCL_USE_DEPRECATED_OPENCL_1_2_APIS)
+    if (XMRIG_OS_APPLE)
+        add_definitions(/DCL_TARGET_OPENCL_VERSION=120)
+    elseif (WITH_OPENCL_VERSION)
+        add_definitions(/DCL_TARGET_OPENCL_VERSION=${WITH_OPENCL_VERSION})
+    endif()

    set(HEADERS_BACKEND_OPENCL
        src/backend/opencl/cl/OclSource.h
--- a/src/backend/opencl/runners/OclAstroBWTRunner.cpp
+++ b/src/backend/opencl/runners/OclAstroBWTRunner.cpp
@@ -46,6 +46,7 @@ xmrig::OclAstroBWTRunner::OclAstroBWTRunner(size_t index, const OclLaunchData &d
    switch (data.device.type())
    {
    case OclDevice::Baffin:
+    case OclDevice::Ellesmere:
    case OclDevice::Polaris:
    case OclDevice::Lexa:
    case OclDevice::Vega_10:
--- a/src/backend/opencl/runners/OclRxBaseRunner.cpp
+++ b/src/backend/opencl/runners/OclRxBaseRunner.cpp
@@ -55,7 +55,7 @@ xmrig::OclRxBaseRunner::OclRxBaseRunner(size_t index, const OclLaunchData &data)
        m_gcn_version = 14;
    }

-    if (data.device.type() == OclDevice::Navi_10 || data.device.type() == OclDevice::Navi_12 || data.device.type() == OclDevice::Navi_14) {
+    if (data.device.type() == OclDevice::Navi_10 || data.device.type() == OclDevice::Navi_12 || data.device.type() == OclDevice::Navi_14 || data.device.type() == OclDevice::Navi_21) {
        m_gcn_version = 15;
    }

--- a/src/backend/opencl/wrappers/OclDevice.cpp
+++ b/src/backend/opencl/wrappers/OclDevice.cpp
@@ -74,6 +74,28 @@ static ocl_gen_config_fun generators[] = {
 };


+static OclVendor getPlatformVendorId(const String &vendor, const String &extensions)
+{
+    if (extensions.contains("cl_amd_") || vendor.contains("Advanced Micro Devices") || vendor.contains("AMD")) {
+        return OCL_VENDOR_AMD;
+    }
+
+    if (extensions.contains("cl_nv_") || vendor.contains("NVIDIA")) {
+        return OCL_VENDOR_NVIDIA;
+    }
+
+    if (extensions.contains("cl_intel_") || vendor.contains("Intel")) {
+        return OCL_VENDOR_INTEL;
+    }
+
+    if (extensions.contains("cl_APPLE_") || vendor.contains("Apple")) {
+        return OCL_VENDOR_APPLE;
+    }
+
+    return OCL_VENDOR_UNKNOWN;
+}
+
+
 static OclVendor getVendorId(const String &vendor)
 {
    if (vendor.contains("Advanced Micro Devices") || vendor.contains("AMD")) {
@@ -81,19 +103,76 @@ static OclVendor getVendorId(const String &vendor)
    }

    if (vendor.contains("NVIDIA")) {
-        return  OCL_VENDOR_NVIDIA;
+        return OCL_VENDOR_NVIDIA;
    }

    if (vendor.contains("Intel")) {
        return OCL_VENDOR_INTEL;
    }

+    if (vendor.contains("Apple")) {
+        return OCL_VENDOR_APPLE;
+    }
+
    return OCL_VENDOR_UNKNOWN;
 }


-static OclDevice::Type getType(const String &name)
+static OclDevice::Type getType(const String &name, const OclVendor platformVendorId)
 {
+    if (platformVendorId == OCL_VENDOR_APPLE) {
+        // Apple Platform: uses product names, not gfx# or codenames
+        if (name.contains("AMD Radeon")) {
+            if (name.contains(" 450 ") ||
+                name.contains(" 455 ") ||
+                name.contains(" 460 ")) {
+                return OclDevice::Baffin;
+            }
+
+            if (name.contains(" 555 ") || name.contains(" 555X ") ||
+                name.contains(" 560 ") || name.contains(" 560X ") ||
+                name.contains(" 570 ") || name.contains(" 570X ") ||
+                name.contains(" 575 ") || name.contains(" 575X ")) {
+                return OclDevice::Polaris;
+            }
+
+            if (name.contains(" 580 ") || name.contains(" 580X ")) {
+                return OclDevice::Ellesmere;
+            }
+
+            if (name.contains(" Vega ")) {
+                if (name.contains(" 48 ") ||
+                    name.contains(" 56 ") ||
+                    name.contains(" 64 ") ||
+                    name.contains(" 64X ")) {
+                    return OclDevice::Vega_10;
+                }
+                if (name.contains(" 16 ") ||
+                    name.contains(" 20 ") ||
+                    name.contains(" II ")) {
+                    return OclDevice::Vega_20;
+                }
+            }
+
+            if (name.contains(" 5700 ") || name.contains(" W5700X ")) {
+                return OclDevice::Navi_10;
+            }
+
+            if (name.contains(" 5600 ") || name.contains(" 5600M ")) {
+                return OclDevice::Navi_12;
+            }
+
+            if (name.contains(" 5300 ") || name.contains(" 5300M ") ||
+                name.contains(" 5500 ") || name.contains(" 5500M ")) {
+                return OclDevice::Navi_14;
+            }
+
+            if (name.contains(" W6800 ") || name.contains(" W6900X ")) {
+                return OclDevice::Navi_21;
+            }
+        }
+    }
+
    if (name == "gfx900" || name == "gfx901") {
        return OclDevice::Vega_10;
    }
@@ -118,6 +197,10 @@ static OclDevice::Type getType(const String &name)
        return OclDevice::Navi_14;
    }

+    if (name == "gfx1030") {
+        return OclDevice::Navi_21;
+    }
+
    if (name == "gfx804") {
        return OclDevice::Lexa;
    }
@@ -126,7 +209,11 @@ static OclDevice::Type getType(const String &name)
        return OclDevice::Baffin;
    }

-    if (name == "gfx803" || name.contains("polaris") || name == "Ellesmere") {
+    if (name.contains("Ellesmere")) {
+        return OclDevice::Ellesmere;
+    }
+
+    if (name == "gfx803" || name.contains("polaris")) {
        return OclDevice::Polaris;
    }

@@ -140,28 +227,31 @@ static OclDevice::Type getType(const String &name)
 xmrig::OclDevice::OclDevice(uint32_t index, cl_device_id id, cl_platform_id platform) :
    m_id(id),
    m_platform(platform),
-    m_board(OclLib::getString(id, 0x4038 /* CL_DEVICE_BOARD_NAME_AMD */)),
+    m_platformVendor(OclLib::getString(platform, CL_PLATFORM_VENDOR)),
    m_name(OclLib::getString(id, CL_DEVICE_NAME)),
    m_vendor(OclLib::getString(id, CL_DEVICE_VENDOR)),
+    m_extensions(OclLib::getString(id, CL_DEVICE_EXTENSIONS)),
    m_maxMemoryAlloc(OclLib::getUlong(id, CL_DEVICE_MAX_MEM_ALLOC_SIZE)),
    m_globalMemory(OclLib::getUlong(id, CL_DEVICE_GLOBAL_MEM_SIZE)),
    m_computeUnits(OclLib::getUint(id, CL_DEVICE_MAX_COMPUTE_UNITS, 1)),
    m_index(index)
 {
    m_vendorId  = getVendorId(m_vendor);
-    m_type      = getType(m_name);
+    m_platformVendorId = getPlatformVendorId(m_platformVendor, m_extensions);
+    m_type      = getType(m_name, m_platformVendorId);

-    if (m_vendorId == OCL_VENDOR_AMD) {
+    if (m_extensions.contains("cl_amd_device_attribute_query")) {
        topology_amd topology;

-        if (OclLib::getDeviceInfo(id, 0x4037 /* CL_DEVICE_TOPOLOGY_AMD */, sizeof(topology), &topology, nullptr) == CL_SUCCESS && topology.raw.type == 1) {
+        if (OclLib::getDeviceInfo(id, CL_DEVICE_TOPOLOGY_AMD, sizeof(topology), &topology, nullptr) == CL_SUCCESS && topology.raw.type == CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD) {
            m_topology = PciTopology(static_cast<uint32_t>(topology.pcie.bus), static_cast<uint32_t>(topology.pcie.device), static_cast<uint32_t>(topology.pcie.function));
        }
+        m_board = OclLib::getString(id, CL_DEVICE_BOARD_NAME_AMD);
    }
-    else if (m_vendorId == OCL_VENDOR_NVIDIA) {
+    else if (m_extensions.contains("cl_nv_device_attribute_query")) {
        cl_uint bus = 0;
-        if (OclLib::getDeviceInfo(id, 0x4008 /* CL_DEVICE_PCI_BUS_ID_NV */, sizeof (bus), &bus, nullptr) == CL_SUCCESS) {
-            cl_uint slot  = OclLib::getUint(id, 0x4009 /* CL_DEVICE_PCI_SLOT_ID_NV */);
+        if (OclLib::getDeviceInfo(id, CL_DEVICE_PCI_BUS_ID_NV, sizeof (bus), &bus, nullptr) == CL_SUCCESS) {
+            cl_uint slot  = OclLib::getUint(id, CL_DEVICE_PCI_SLOT_ID_NV);
            m_topology = PciTopology(bus, (slot >> 3) & 0xff, slot & 7);
        }
    }
--- a/src/backend/opencl/wrappers/OclDevice.h
+++ b/src/backend/opencl/wrappers/OclDevice.h
@@ -45,6 +45,7 @@ public:
    enum Type {
        Unknown,
        Baffin,
+        Ellesmere,
        Polaris,
        Lexa,
        Vega_10,
@@ -52,7 +53,8 @@ public:
        Raven,
        Navi_10,
        Navi_12,
-        Navi_14
+        Navi_14,
+        Navi_21
    };

    OclDevice() = delete;
@@ -64,11 +66,14 @@ public:

    inline bool isValid() const                 { return m_id != nullptr && m_platform != nullptr; }
    inline cl_device_id id() const              { return m_id; }
+    inline const String &platformVendor() const { return m_platformVendor; }
+    inline OclVendor platformVendorId() const   { return m_vendorId; }
    inline const PciTopology &topology() const  { return m_topology; }
    inline const String &board() const          { return m_board.isNull() ? m_name : m_board; }
    inline const String &name() const           { return m_name; }
    inline const String &vendor() const         { return m_vendor; }
    inline OclVendor vendorId() const           { return m_vendorId; }
+    inline const String &extensions() const     { return m_extensions; }
    inline Type type() const                    { return m_type; }
    inline uint32_t computeUnits() const        { return m_computeUnits; }
    inline size_t freeMemSize() const           { return std::min(maxMemAllocSize(), globalMemSize()); }
@@ -83,13 +88,16 @@ public:
 private:
    cl_device_id m_id               = nullptr;
    cl_platform_id m_platform       = nullptr;
-    const String m_board;
+    const String m_platformVendor;
+    String m_board;
    const String m_name;
    const String m_vendor;
+    String m_extensions;
    const size_t m_maxMemoryAlloc   = 0;
    const size_t m_globalMemory     = 0;
    const uint32_t m_computeUnits   = 1;
    const uint32_t m_index          = 0;
+    OclVendor m_platformVendorId    = OCL_VENDOR_UNKNOWN;
    OclVendor m_vendorId            = OCL_VENDOR_UNKNOWN;
    PciTopology m_topology;
    Type m_type                     = Unknown;
--- a/src/backend/opencl/wrappers/OclLib.cpp
+++ b/src/backend/opencl/wrappers/OclLib.cpp
@@ -30,8 +30,6 @@

 #if defined(OCL_DEBUG_REFERENCE_COUNT)
 #   define LOG_REFS(x, ...) xmrig::Log::print(xmrig::Log::WARNING, x, ##__VA_ARGS__)
-#else
-#   define LOG_REFS(x, ...)
 #endif


@@ -405,7 +403,7 @@ cl_int xmrig::OclLib::getDeviceInfo(cl_device_id device, cl_device_info param_na
    assert(pGetDeviceInfo != nullptr);

    const cl_int ret = pGetDeviceInfo(device, param_name, param_value_size, param_value, param_value_size_ret);
-    if (ret != CL_SUCCESS && param_name != 0x4038) {
+    if (ret != CL_SUCCESS && param_name != CL_DEVICE_BOARD_NAME_AMD) {
        LOG_ERR("Error %s when calling %s, param 0x%04x", OclError::toString(ret), kGetDeviceInfo, param_name);
    }

@@ -476,7 +474,9 @@ cl_int xmrig::OclLib::release(cl_command_queue command_queue) noexcept
        return CL_SUCCESS;
    }

+#   if defined(OCL_DEBUG_REFERENCE_COUNT)
    LOG_REFS("%p %u ~queue", command_queue, getUint(command_queue, CL_QUEUE_REFERENCE_COUNT));
+#   endif

    finish(command_queue);

@@ -493,7 +493,9 @@ cl_int xmrig::OclLib::release(cl_context context) noexcept
 {
    assert(pReleaseContext != nullptr);

+#   if defined(OCL_DEBUG_REFERENCE_COUNT)
    LOG_REFS("%p %u ~context", context, getUint(context, CL_CONTEXT_REFERENCE_COUNT));
+#   endif

    const cl_int ret = pReleaseContext(context);
    if (ret != CL_SUCCESS) {
@@ -508,7 +510,9 @@ cl_int xmrig::OclLib::release(cl_device_id id) noexcept
 {
    assert(pReleaseDevice != nullptr);

+#   if defined(OCL_DEBUG_REFERENCE_COUNT)
    LOG_REFS("%p %u ~device", id, getUint(id, CL_DEVICE_REFERENCE_COUNT));
+#   endif

    const cl_int ret = pReleaseDevice(id);
    if (ret != CL_SUCCESS) {
@@ -527,7 +531,9 @@ cl_int xmrig::OclLib::release(cl_kernel kernel) noexcept
        return CL_SUCCESS;
    }

+#   if defined(OCL_DEBUG_REFERENCE_COUNT)
    LOG_REFS("%p %u ~kernel %s", kernel, getUint(kernel, CL_KERNEL_REFERENCE_COUNT), getString(kernel, CL_KERNEL_FUNCTION_NAME).data());
+#   endif

    const cl_int ret = pReleaseKernel(kernel);
    if (ret != CL_SUCCESS) {
@@ -546,7 +552,9 @@ cl_int xmrig::OclLib::release(cl_mem mem_obj) noexcept
        return CL_SUCCESS;
    }

+#   if defined(OCL_DEBUG_REFERENCE_COUNT)
    LOG_REFS("%p %u ~mem %zub", mem_obj, getUint(mem_obj, CL_MEM_REFERENCE_COUNT), getUlong(mem_obj, CL_MEM_SIZE));
+#   endif

    const cl_int ret = pReleaseMemObject(mem_obj);
    if (ret != CL_SUCCESS) {
@@ -565,7 +573,9 @@ cl_int xmrig::OclLib::release(cl_program program) noexcept
        return CL_SUCCESS;
    }

+#   if defined(OCL_DEBUG_REFERENCE_COUNT)
    LOG_REFS("%p %u ~program %s", program, getUint(program, CL_PROGRAM_REFERENCE_COUNT), getString(program, CL_PROGRAM_KERNEL_NAMES).data());
+#   endif

    const cl_int ret = pReleaseProgram(program);
    if (ret != CL_SUCCESS) {
--- a/src/backend/opencl/wrappers/OclLib.h
+++ b/src/backend/opencl/wrappers/OclLib.h
@@ -26,6 +26,22 @@
 #include "3rdparty/cl.h"
 #include "base/tools/String.h"

+#ifndef CL_DEVICE_TOPOLOGY_AMD
+#define CL_DEVICE_TOPOLOGY_AMD 0x4037
+#endif
+#ifndef CL_DEVICE_BOARD_NAME_AMD
+#define CL_DEVICE_BOARD_NAME_AMD 0x4038
+#endif
+#ifndef CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD
+#define CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD 1
+#endif
+#ifndef CL_DEVICE_PCI_BUS_ID_NV
+#define CL_DEVICE_PCI_BUS_ID_NV 0x4008
+#endif
+#ifndef CL_DEVICE_PCI_SLOT_ID_NV
+#define CL_DEVICE_PCI_SLOT_ID_NV 0x4009
+#endif
+

 namespace xmrig {

--- a/src/backend/opencl/wrappers/OclVendor.h
+++ b/src/backend/opencl/wrappers/OclVendor.h
@@ -33,7 +33,8 @@ enum OclVendor : unsigned {
    OCL_VENDOR_UNKNOWN,
    OCL_VENDOR_AMD,
    OCL_VENDOR_NVIDIA,
-    OCL_VENDOR_INTEL
+    OCL_VENDOR_INTEL,
+    OCL_VENDOR_APPLE
 };


--- a/src/base/crypto/Algorithm.cpp
+++ b/src/base/crypto/Algorithm.cpp
@@ -101,6 +101,11 @@ const char *Algorithm::kKAWPOW          = "kawpow";
 const char *Algorithm::kKAWPOW_RVN      = "kawpow";
 #endif

+#ifdef XMRIG_ALGO_GHOSTRIDER
+const char* Algorithm::kGHOSTRIDER      = "ghostrider";
+const char* Algorithm::kGHOSTRIDER_RTM  = "ghostrider";
+#endif
+

 #define ALGO_NAME(ALGO)         { Algorithm::ALGO, Algorithm::k##ALGO }
 #define ALGO_ALIAS(ALGO, NAME)  { NAME, Algorithm::ALGO }
@@ -163,6 +168,10 @@ static const std::map<uint32_t, const char *> kAlgorithmNames = {
 #   ifdef XMRIG_ALGO_KAWPOW
    ALGO_NAME(KAWPOW_RVN),
 #   endif
+
+#   ifdef XMRIG_ALGO_GHOSTRIDER
+    ALGO_NAME(GHOSTRIDER_RTM),
+#   endif
 };


@@ -278,6 +287,11 @@ static const std::map<const char *, Algorithm::Id, aliasCompare> kAlgorithmAlias
 #   ifdef XMRIG_ALGO_KAWPOW
    ALGO_ALIAS_AUTO(KAWPOW_RVN),    ALGO_ALIAS(KAWPOW_RVN,      "kawpow/rvn"),
 #   endif
+
+#   ifdef XMRIG_ALGO_GHOSTRIDER
+    ALGO_ALIAS_AUTO(GHOSTRIDER_RTM), ALGO_ALIAS(GHOSTRIDER_RTM, "ghostrider/rtm"),
+                                     ALGO_ALIAS(GHOSTRIDER_RTM, "gr"),
+#   endif
 };


@@ -352,7 +366,8 @@ std::vector<xmrig::Algorithm> xmrig::Algorithm::all(const std::function<bool(con
        RX_0, RX_WOW, RX_ARQ, RX_GRAFT, RX_SFX, RX_KEVA,
        AR2_CHUKWA, AR2_CHUKWA_V2, AR2_WRKZ,
        ASTROBWT_DERO,
-        KAWPOW_RVN
+        KAWPOW_RVN,
+        GHOSTRIDER_RTM
    };

    Algorithms out;
--- a/src/base/crypto/Algorithm.h
+++ b/src/base/crypto/Algorithm.h
@@ -65,6 +65,13 @@ public:
        CN_PICO_0       = 0x63120200,   // "cn-pico"          CryptoNight-Pico
        CN_PICO_TLO     = 0x63120274,   // "cn-pico/tlo"      CryptoNight-Pico (TLO)
        CN_UPX2         = 0x63110200,   // "cn/upx2"          Uplexa (UPX2)
+        CN_GR_0         = 0x63130100,   // "cn/dark"          GhostRider
+        CN_GR_1         = 0x63130101,   // "cn/dark-lite"     GhostRider
+        CN_GR_2         = 0x63150102,   // "cn/fast"          GhostRider
+        CN_GR_3         = 0x63140103,   // "cn/lite"          GhostRider
+        CN_GR_4         = 0x63120104,   // "cn/turtle"        GhostRider
+        CN_GR_5         = 0x63120105,   // "cn/turtle-lite"   GhostRider
+        GHOSTRIDER_RTM  = 0x6c150000,   // "ghostrider"       GhostRider
        RX_0            = 0x72151200,   // "rx/0"             RandomX (reference configuration).
        RX_WOW          = 0x72141177,   // "rx/wow"           RandomWOW (Wownero).
        RX_ARQ          = 0x72121061,   // "rx/arq"           RandomARQ (Arqma).
@@ -89,7 +96,8 @@ public:
        RANDOM_X        = 0x72000000,
        ARGON2          = 0x61000000,
        ASTROBWT        = 0x41000000,
-        KAWPOW          = 0x6b000000
+        KAWPOW          = 0x6b000000,
+        GHOSTRIDER      = 0x6c000000
    };

    static const char *kINVALID;
@@ -157,6 +165,11 @@ public:
    static const char *kKAWPOW_RVN;
 #   endif

+#   ifdef XMRIG_ALGO_GHOSTRIDER
+    static const char* kGHOSTRIDER;
+    static const char* kGHOSTRIDER_RTM;
+#   endif
+
    inline Algorithm() = default;
    inline Algorithm(const char *algo) : m_id(parse(algo))  {}
    inline Algorithm(Id id) : m_id(id)                      {}
@@ -176,7 +189,7 @@ public:
    inline Id id() const                                    { return m_id; }
    inline size_t l2() const                                { return l2(m_id); }
    inline uint32_t family() const                          { return family(m_id); }
-    inline uint32_t maxIntensity() const                    { return isCN() ? 5 : 1; };
+    inline uint32_t maxIntensity() const                    { return isCN() ? 5 : ((m_id == GHOSTRIDER_RTM) ? 8 : 1); };

    inline size_t l3() const
    {
--- a/src/base/io/Env.cpp
+++ b/src/base/io/Env.cpp
@@ -143,17 +143,10 @@ xmrig::String xmrig::Env::get(const String &name, const std::map<String, String>
 xmrig::String xmrig::Env::hostname()
 {
    char buf[UV_MAXHOSTNAMESIZE]{};
-    size_t size = sizeof(buf);

-#   if UV_VERSION_HEX >= 0x010c00
-    if (uv_os_gethostname(buf, &size) == 0) {
+    if (gethostname(buf, sizeof(buf)) == 0) {
        return static_cast<const char *>(buf);
    }
-#   else
-    if (gethostname(buf, size) == 0) {
-        return static_cast<const char *>(buf);
-    }
-#   endif

    return {};
 }
--- a/src/base/kernel/Entry.cpp
+++ b/src/base/kernel/Entry.cpp
@@ -141,7 +141,7 @@ xmrig::Entry::Id xmrig::Entry::get(const Process &process)
         return Usage;
    }

-    if (args.hasArg("-V") || args.hasArg("--version")) {
+    if (args.hasArg("-V") || args.hasArg("--version") || args.hasArg("--versions")) {
         return Version;
    }

--- a/src/base/kernel/Process.cpp
+++ b/src/base/kernel/Process.cpp
@@ -152,7 +152,7 @@ xmrig::String xmrig::Process::exepath()
 {
    size_t size = sizeof(pathBuf);

-    return uv_exepath(pathBuf, &size) < 0 ? "" : String(pathBuf, size);
+    return uv_exepath(pathBuf, &size) < 0 ? String("") : String(pathBuf, size);
 }


--- a/src/base/kernel/interfaces/IConfig.h
+++ b/src/base/kernel/interfaces/IConfig.h
@@ -87,6 +87,7 @@ public:
        SpendSecretKey       = 1055,
        DaemonZMQPortKey     = 1056,
        HugePagesJitKey      = 1057,
+        RotationKey          = 1058,

        // xmrig common
        CPUPriorityKey       = 1021,
--- a/src/base/net/dns/DnsUvBackend.cpp
+++ b/src/base/net/dns/DnsUvBackend.cpp
@@ -28,13 +28,19 @@

 namespace xmrig {

+static Storage<DnsUvBackend>* storage = nullptr;

 Storage<DnsUvBackend>& DnsUvBackend::getStorage()
 {
-    static Storage<DnsUvBackend>* storage = new Storage<DnsUvBackend>();
+    if (storage == nullptr) storage = new Storage<DnsUvBackend>();
    return *storage;
 }

+void DnsUvBackend::releaseStorage()
+{
+    delete storage;
+}
+
 static addrinfo hints{};


@@ -56,6 +62,7 @@ xmrig::DnsUvBackend::DnsUvBackend()
 xmrig::DnsUvBackend::~DnsUvBackend()
 {
    getStorage().release(m_key);
+    releaseStorage();
 }


--- a/src/base/net/dns/DnsUvBackend.h
+++ b/src/base/net/dns/DnsUvBackend.h
@@ -62,6 +62,7 @@ private:
    uintptr_t m_key;

    static Storage<DnsUvBackend>& getStorage();
+    void releaseStorage();
 };


--- a/src/base/net/stratum/AutoClient.cpp
+++ b/src/base/net/stratum/AutoClient.cpp
@@ -50,7 +50,7 @@ bool xmrig::AutoClient::parseLogin(const rapidjson::Value &result, int *code)
    }

    const Algorithm algo(Json::getString(result, "algo"));
-    if (algo.family() != Algorithm::KAWPOW) {
+    if (algo.family() != Algorithm::KAWPOW && algo.family() != Algorithm::GHOSTRIDER) {
        *code = 6;
        return false;
    }
--- a/src/base/net/stratum/EthStratumClient.cpp
+++ b/src/base/net/stratum/EthStratumClient.cpp
@@ -34,6 +34,16 @@
 #include "base/kernel/interfaces/IClientListener.h"
 #include "net/JobResult.h"

+#ifdef XMRIG_ALGO_GHOSTRIDER
+#include <cmath>
+
+extern "C" {
+#include "crypto/ghostrider/sph_sha2.h"
+}
+
+#include "base/tools/Cvt.h"
+#endif
+


 xmrig::EthStratumClient::EthStratumClient(int id, const char *agent, IClientListener *listener) :
@@ -66,29 +76,53 @@ int64_t xmrig::EthStratumClient::submit(const JobResult& result)
    params.PushBack(m_pool.user().toJSON(), allocator);
    params.PushBack(result.jobId.toJSON(), allocator);

-    std::stringstream s;
-    s << "0x" << std::hex << std::setw(16) << std::setfill('0') << result.nonce;
-    params.PushBack(Value(s.str().c_str(), allocator), allocator);
+#   ifdef XMRIG_ALGO_GHOSTRIDER
+    if (m_pool.algorithm().id() == Algorithm::GHOSTRIDER_RTM) {
+        params.PushBack(Value("00000000000000000000000000000000", static_cast<uint32_t>(m_extraNonce2Size * 2)), allocator);
+        params.PushBack(Value(m_ntime.data(), allocator), allocator);

-    s.str(std::string());
-    s << "0x";
-    for (size_t i = 0; i < 32; ++i) {
-        const uint32_t k = result.headerHash()[i];
-        s << std::hex << std::setw(2) << std::setfill('0') << k;
+        std::stringstream s;
+        s << std::hex << std::setw(8) << std::setfill('0') << result.nonce;
+        params.PushBack(Value(s.str().c_str(), allocator), allocator);
    }
-    params.PushBack(Value(s.str().c_str(), allocator), allocator);
+    else
+#   endif
+    {
+        std::stringstream s;
+        s << "0x" << std::hex << std::setw(16) << std::setfill('0') << result.nonce;
+        params.PushBack(Value(s.str().c_str(), allocator), allocator);

-    s.str(std::string());
-    s << "0x";
-    for (size_t i = 0; i < 32; ++i) {
-        const uint32_t k = result.mixHash()[i];
-        s << std::hex << std::setw(2) << std::setfill('0') << k;
+        s.str(std::string());
+        s << "0x";
+        for (size_t i = 0; i < 32; ++i) {
+            const uint32_t k = result.headerHash()[i];
+            s << std::hex << std::setw(2) << std::setfill('0') << k;
+        }
+        params.PushBack(Value(s.str().c_str(), allocator), allocator);
+
+        s.str(std::string());
+        s << "0x";
+        for (size_t i = 0; i < 32; ++i) {
+            const uint32_t k = result.mixHash()[i];
+            s << std::hex << std::setw(2) << std::setfill('0') << k;
+        }
+        params.PushBack(Value(s.str().c_str(), allocator), allocator);
    }
-    params.PushBack(Value(s.str().c_str(), allocator), allocator);

    JsonRequest::create(doc, m_sequence, "mining.submit", params);

-    uint64_t actual_diff = ethash_swap_u64(*((uint64_t*)result.result()));
+    uint64_t actual_diff;
+
+#   ifdef XMRIG_ALGO_GHOSTRIDER
+    if (result.algorithm == Algorithm::GHOSTRIDER_RTM) {
+        actual_diff = reinterpret_cast<const uint64_t*>(result.result())[3];
+    }
+    else
+#   endif
+    {
+        actual_diff = ethash_swap_u64(*((uint64_t*)result.result()));
+    }
+
    actual_diff = actual_diff ? (uint64_t(-1) / actual_diff) : 0;

 #   ifdef XMRIG_PROXY_PROJECT
@@ -161,6 +195,33 @@ void xmrig::EthStratumClient::parseNotification(const char *method, const rapidj
        setExtraNonce(arr[0]);
    }

+#   ifdef XMRIG_ALGO_GHOSTRIDER
+    if (strcmp(method, "mining.set_difficulty") == 0) {
+        if (!params.IsArray()) {
+            LOG_ERR("%s " RED("invalid mining.set_difficulty notification: params is not an array"), tag());
+            return;
+        }
+
+        if (m_pool.algorithm().id() != Algorithm::GHOSTRIDER_RTM) {
+            return;
+        }
+
+        auto arr = params.GetArray();
+
+        if (arr.Empty()) {
+            LOG_ERR("%s " RED("invalid mining.set_difficulty notification: params array is empty"), tag());
+            return;
+        }
+
+        if (!arr[0].IsDouble()) {
+            LOG_ERR("%s " RED("invalid mining.set_difficulty notification: difficulty is not a number"), tag());
+            return;
+        }
+
+        m_nextDifficulty = static_cast<uint64_t>(ceil(arr[0].GetDouble() * 65536.0));
+    }
+#   endif
+
    if (strcmp(method, "mining.notify") == 0) {
        if (!params.IsArray()) {
            LOG_ERR("%s " RED("invalid mining.notify notification: params is not an array"), tag());
@@ -169,44 +230,152 @@ void xmrig::EthStratumClient::parseNotification(const char *method, const rapidj

        auto arr = params.GetArray();

-        if (arr.Size() < 6) {
+        auto algo = m_pool.algorithm();
+        if (!algo.isValid()) {
+            algo = m_pool.coin().algorithm();
+        }
+
+        const size_t min_arr_size = (algo.id() == Algorithm::GHOSTRIDER_RTM) ? 8 : 6;
+
+        if (arr.Size() < min_arr_size) {
            LOG_ERR("%s " RED("invalid mining.notify notification: params array has wrong size"), tag());
            return;
        }

+        if (!arr[0].IsString()) {
+            LOG_ERR("%s " RED("invalid mining.notify notification: invalid job id"), tag());
+            return;
+        }
+
        Job job;
        job.setId(arr[0].GetString());

-        auto algo = m_pool.algorithm();
-        if (!algo.isValid()) {
-            algo = m_pool.coin().algorithm();
-        }
-
        job.setAlgorithm(algo);
        job.setExtraNonce(m_extraNonce.second);

        std::stringstream s;

-        // header hash (32 bytes)
-        s << arr[1].GetString();
+#       ifdef XMRIG_ALGO_GHOSTRIDER
+        if (algo.id() == Algorithm::GHOSTRIDER_RTM) {
+            // Raptoreum uses Bitcoin's Stratum protocol
+            // https://en.bitcoinwiki.org/wiki/Stratum_mining_protocol#mining.notify

-        // nonce template (8 bytes)
-        for (uint64_t i = 0, k = m_extraNonce.first; i < sizeof(m_extraNonce.first); ++i, k >>= 8) {
-            s << std::hex << std::setw(2) << std::setfill('0') << (k & 0xFF);
+            if (!arr[1].IsString() || !arr[2].IsString() || !arr[3].IsString() || !arr[4].IsArray() || !arr[5].IsString() || !arr[6].IsString() || !arr[7].IsString()) {
+                LOG_ERR("%s " RED("invalid mining.notify notification: invalid param array"), tag());
+                return;
+            }
+
+            // Version
+            s << arr[5].GetString();
+
+            // Previous block hash
+            s << arr[1].GetString();
+
+            // Merkle tree root
+            std::string blob = arr[2].GetString();
+            blob += m_extraNonce.second;
+            blob.append(m_extraNonce2Size * 2, '0');
+            blob += arr[3].GetString();
+
+            uint8_t merkle_root[64];
+
+            Buffer buf = Cvt::fromHex(blob.c_str(), blob.length());
+
+            // Get height from coinbase
+            {
+                uint8_t* p = buf.data() + 32;
+                uint8_t* m = p + 128;
+
+                while ((p < m) && (*p != 0xff)) ++p;
+                while ((p < m) && (*p == 0xff)) ++p;
+
+                if ((p < m) && (*(p - 1) == 0xff) && (*(p - 2) == 0xff)) {
+                    uint32_t height = *reinterpret_cast<uint16_t*>(p + 2);
+                    switch (*(p + 1)) {
+                    case 4:
+                        height += *reinterpret_cast<uint16_t*>(p + 4) * 0x10000UL;
+                        break;
+                    case 3:
+                        height += *(p + 4) * 0x10000UL;
+                        break;
+                    }
+                    job.setHeight(height);
+                }
+                else {
+                    job.setHeight(0);
+                }
+            }
+
+            sha256d(merkle_root, buf.data(), static_cast<int>(buf.size()));
+
+            auto merkle_branches = arr[4].GetArray();
+            for (int i = 0, n = merkle_branches.Size(); i < n; ++i) {
+                auto& b = merkle_branches[i];
+                buf = b.IsString() ? Cvt::fromHex(b.GetString(), b.GetStringLength()) : Buffer();
+                if (buf.size() != 32) {
+                    LOG_ERR("%s " RED("invalid mining.notify notification: param 4 is invalid"), tag());
+                    return;
+                }
+                memcpy(merkle_root + 32, buf.data(), 32);
+                sha256d(merkle_root, merkle_root, 64);
+            }
+
+            s << Cvt::toHex(merkle_root, 32);
+
+            // ntime
+            m_ntime = arr[7].GetString();
+            s << m_ntime;
+
+            // nbits
+            s << arr[6].GetString();
+
+            blob = s.str();
+
+            if (blob.size() != 76 * 2) {
+                LOG_ERR("%s " RED("invalid mining.notify notification: invalid blob size"), tag());
+                return;
+            }
+
+            // zeros up to 80 bytes
+            blob.resize(80 * 2, '0');
+
+            // Invert byte order (no idea why, but it's done in Bitcoin's Stratum)
+            buf = Cvt::fromHex(blob.c_str(), blob.length());
+            for (size_t i = 0; i < 80; i += sizeof(uint32_t)) {
+                uint32_t& k = *reinterpret_cast<uint32_t*>(buf.data() + i);
+                if ((i < 36) || (i >= 68)) {
+                    k = ethash_swap_u32(k);
+                }
+            }
+            blob = Cvt::toHex(buf.data(), buf.size());
+
+            job.setBlob(blob.c_str());
+            job.setDiff(m_nextDifficulty);
        }
+        else
+#       endif
+        {
+            // header hash (32 bytes)
+            s << arr[1].GetString();

-        std::string blob = s.str();
+            // nonce template (8 bytes)
+            for (uint64_t i = 0, k = m_extraNonce.first; i < sizeof(m_extraNonce.first); ++i, k >>= 8) {
+                s << std::hex << std::setw(2) << std::setfill('0') << (k & 0xFF);
+            }

-        // zeros up to 76 bytes
-        blob.resize(76 * 2, '0');
-        job.setBlob(blob.c_str());
+            std::string blob = s.str();

-        std::string target_str = arr[3].GetString();
-        target_str.resize(16, '0');
-        const uint64_t target = strtoull(target_str.c_str(), nullptr, 16);
-        job.setDiff(Job::toDiff(target));
+            // zeros up to 76 bytes
+            blob.resize(76 * 2, '0');
+            job.setBlob(blob.c_str());

-        job.setHeight(arr[5].GetUint64());
+            std::string target_str = arr[3].GetString();
+            target_str.resize(16, '0');
+            const uint64_t target = strtoull(target_str.c_str(), nullptr, 16);
+            job.setDiff(Job::toDiff(target));
+
+            job.setHeight(arr[5].GetUint64());
+        }

        bool ok = true;
        m_listener->onVerifyAlgorithm(this, algo, &ok);
@@ -356,11 +525,19 @@ void xmrig::EthStratumClient::onSubscribeResponse(const rapidjson::Value &result
            throw std::runtime_error("invalid mining.subscribe response: result is not an array");
        }

-        if (result.GetArray().Size() <= 1) {
+        auto arr = result.GetArray();
+
+        if (arr.Size() <= 1) {
            throw std::runtime_error("invalid mining.subscribe response: result array is too short");
        }

-        setExtraNonce(result.GetArray()[1]);
+        setExtraNonce(arr[1]);
+
+#       ifdef XMRIG_ALGO_GHOSTRIDER
+        if ((arr.Size() > 2) && (arr[2].IsUint())) {
+            m_extraNonce2Size = arr[2].GetUint();
+        }
+#       endif

        if (m_pool.isNicehash()) {
            using namespace rapidjson;
--- a/src/base/net/stratum/EthStratumClient.h
+++ b/src/base/net/stratum/EthStratumClient.h
@@ -57,6 +57,12 @@ private:

    bool m_authorized   = false;
    std::pair<uint64_t, String> m_extraNonce{};
+
+#   ifdef XMRIG_ALGO_GHOSTRIDER
+    uint64_t m_extraNonce2Size = 0;
+    uint64_t m_nextDifficulty = 0;
+    String m_ntime;
+#   endif
 };


--- a/src/base/net/stratum/Job.cpp
+++ b/src/base/net/stratum/Job.cpp
@@ -47,7 +47,7 @@ xmrig::Job::Job(bool nicehash, const Algorithm &algorithm, const String &clientI

 bool xmrig::Job::isEqual(const Job &other) const
 {
-    return m_id == other.m_id && m_clientId == other.m_clientId && memcmp(m_blob, other.m_blob, sizeof(m_blob)) == 0;
+    return m_id == other.m_id && m_clientId == other.m_clientId && memcmp(m_blob, other.m_blob, sizeof(m_blob)) == 0 && m_target == other.m_target;
 }


--- a/src/base/net/stratum/Job.h
+++ b/src/base/net/stratum/Job.h
@@ -76,7 +76,7 @@ public:
    inline const String &poolWallet() const             { return m_poolWallet; }
    inline const uint32_t *nonce() const                { return reinterpret_cast<const uint32_t*>(m_blob + nonceOffset()); }
    inline const uint8_t *blob() const                  { return m_blob; }
-    inline int32_t nonceOffset() const                  { return (algorithm().family() == Algorithm::KAWPOW) ? 32 : 39; }
+    inline int32_t nonceOffset() const                  { auto f = algorithm().family(); return (f == Algorithm::KAWPOW) ? 32 : ((f == Algorithm::GHOSTRIDER) ? 76 : 39); }
    inline size_t nonceSize() const                     { return (algorithm().family() == Algorithm::KAWPOW) ?  8 :  4; }
    inline size_t size() const                          { return m_size; }
    inline uint32_t *nonce()                            { return reinterpret_cast<uint32_t*>(m_blob + nonceOffset()); }
--- a/src/base/net/stratum/Pool.cpp
+++ b/src/base/net/stratum/Pool.cpp
@@ -219,7 +219,8 @@ xmrig::IClient *xmrig::Pool::createClient(int id, IClientListener *listener) con

    if (m_mode == MODE_POOL) {
 #       ifdef XMRIG_ALGO_KAWPOW
-        if ((m_algorithm.family() == Algorithm::KAWPOW) || (m_coin == Coin::RAVEN)) {
+        const uint32_t f = m_algorithm.family();
+        if ((f == Algorithm::KAWPOW) || (f == Algorithm::GHOSTRIDER) || (m_coin == Coin::RAVEN)) {
            client = new EthStratumClient(id, Platform::userAgent(), listener);
        }
        else
--- a/src/base/net/stratum/SelfSelectClient.cpp
+++ b/src/base/net/stratum/SelfSelectClient.cpp
@@ -72,7 +72,14 @@ int64_t xmrig::SelfSelectClient::submit(const JobResult &result)
        submitOriginDaemon(result);
    }

-    return m_client->submit(result);
+    uint64_t submit_result = m_client->submit(result);
+
+    if (m_submitToOrigin) {
+        // Ensure that the latest block template is available after block submission
+        getBlockTemplate();
+    }
+
+    return submit_result;
 }


@@ -285,9 +292,6 @@ void xmrig::SelfSelectClient::submitOriginDaemon(const JobResult& result)
    LOG_INFO("%s " GREEN_BOLD("submitted to origin daemon") " (%" PRId64 "/%" PRId64 ") " 
        " diff " WHITE("%" PRIu64) " vs. " WHITE("%" PRIu64),
        Tags::origin(), m_originSubmitted, m_originNotSubmitted, m_blockDiff, result.actualDiff(), result.diff);
-
-    // Ensure that the latest block template is available after block submission
-    getBlockTemplate();
 }

 void xmrig::SelfSelectClient::onHttpData(const HttpData &data)
--- a/src/base/net/stratum/benchmark/BenchClient.cpp
+++ b/src/base/net/stratum/benchmark/BenchClient.cpp
@@ -48,6 +48,39 @@ xmrig::BenchClient::BenchClient(const std::shared_ptr<BenchConfig> &benchmark, I
    std::vector<char> blob(112 * 2 + 1, '0');
    blob.back() = '\0';

+#   ifdef XMRIG_ALGO_GHOSTRIDER
+    if (m_benchmark->algorithm() == Algorithm::GHOSTRIDER_RTM) {
+        const uint32_t r = benchmark->rotation() % 20;
+
+        static constexpr uint32_t indices[20][3] = {
+             { 0, 1, 2 },
+             { 0, 1, 3 },
+             { 0, 1, 4 },
+             { 0, 1, 5 },
+             { 0, 2, 3 },
+             { 0, 2, 4 },
+             { 0, 2, 5 },
+             { 0, 3, 4 },
+             { 0, 3, 5 },
+             { 0, 4, 5 },
+             { 1, 2, 3 },
+             { 1, 2, 4 },
+             { 1, 2, 5 },
+             { 1, 3, 4 },
+             { 1, 3, 5 },
+             { 1, 4, 5 },
+             { 2, 3, 4 },
+             { 2, 3, 5 },
+             { 2, 4, 5 },
+             { 3, 4, 5 },
+        };
+
+        blob[ 8] = '0' + indices[r][1];
+        blob[ 9] = '0' + indices[r][0];
+        blob[11] = '0' + indices[r][2];
+    }
+#   endif
+
    m_job.setAlgorithm(m_benchmark->algorithm());
    m_job.setBlob(blob.data());
    m_job.setDiff(std::numeric_limits<uint64_t>::max());
@@ -60,7 +93,7 @@ xmrig::BenchClient::BenchClient(const std::shared_ptr<BenchConfig> &benchmark, I
    BenchState::init(this, m_benchmark->size());

 #   ifdef XMRIG_FEATURE_HTTP
-    if (m_benchmark->isSubmit()) {
+    if (m_benchmark->isSubmit() && (m_benchmark->algorithm().family() == Algorithm::RANDOM_X)) {
        m_mode  = ONLINE_BENCH;
        m_token = m_benchmark->token();

--- a/src/base/net/stratum/benchmark/BenchConfig.cpp
+++ b/src/base/net/stratum/benchmark/BenchConfig.cpp
@@ -39,6 +39,7 @@ const char *BenchConfig::kHash      = "hash";
 const char *BenchConfig::kId        = "id";
 const char *BenchConfig::kSeed      = "seed";
 const char *BenchConfig::kSize      = "size";
+const char *BenchConfig::kRotation  = "rotation";
 const char *BenchConfig::kSubmit    = "submit";
 const char *BenchConfig::kToken     = "token";
 const char *BenchConfig::kUser      = "user";
@@ -53,7 +54,7 @@ const char *BenchConfig::kApiHost   = "127.0.0.1";
 } // namespace xmrig


-xmrig::BenchConfig::BenchConfig(uint32_t size, const String &id, const rapidjson::Value &object, bool dmi) :
+xmrig::BenchConfig::BenchConfig(uint32_t size, const String &id, const rapidjson::Value &object, bool dmi, uint32_t rotation) :
    m_algorithm(Json::getString(object, kAlgo)),
    m_dmi(dmi),
    m_submit(Json::getBool(object, kSubmit)),
@@ -61,9 +62,15 @@ xmrig::BenchConfig::BenchConfig(uint32_t size, const String &id, const rapidjson
    m_seed(Json::getString(object, kSeed)),
    m_token(Json::getString(object, kToken)),
    m_user(Json::getString(object, kUser)),
-    m_size(size)
+    m_size(size),
+    m_rotation(rotation)
 {
-    if (!m_algorithm.isValid() || m_algorithm.family() != Algorithm::RANDOM_X) {
+    auto f = m_algorithm.family();
+    if (!m_algorithm.isValid() || (f != Algorithm::RANDOM_X
+#       ifdef XMRIG_ALGO_GHOSTRIDER
+        && f != Algorithm::GHOSTRIDER
+#       endif
+        )) {
        m_algorithm = Algorithm::RX_0;
    }

@@ -80,14 +87,17 @@ xmrig::BenchConfig *xmrig::BenchConfig::create(const rapidjson::Value &object, b
        return nullptr;
    }

-    const uint32_t size = getSize(Json::getString(object, kSize));
-    const String id     = Json::getString(object, kVerify);
+    const uint32_t size     = getSize(Json::getString(object, kSize));
+    const String id         = Json::getString(object, kVerify);
+
+    const char* rotation_str = Json::getString(object, kRotation);
+    const uint32_t rotation = rotation_str ? strtoul(rotation_str, nullptr, 10) : 0;

    if (size == 0 && id.isEmpty()) {
        return nullptr;
    }

-    return new BenchConfig(size, id, object, dmi);
+    return new BenchConfig(size, id, object, dmi, rotation);
 }


--- a/src/base/net/stratum/benchmark/BenchConfig.h
+++ b/src/base/net/stratum/benchmark/BenchConfig.h
@@ -37,6 +37,7 @@ public:
    static const char *kId;
    static const char *kSeed;
    static const char *kSize;
+    static const char* kRotation;
    static const char *kSubmit;
    static const char *kToken;
    static const char *kUser;
@@ -50,7 +51,7 @@ public:
    static constexpr const uint16_t kApiPort    = 18805;
 #   endif

-    BenchConfig(uint32_t size, const String &id, const rapidjson::Value &object, bool dmi);
+    BenchConfig(uint32_t size, const String &id, const rapidjson::Value &object, bool dmi, uint32_t rotation);

    static BenchConfig *create(const rapidjson::Value &object, bool dmi);

@@ -63,6 +64,7 @@ public:
    inline const String &user() const           { return m_user; }
    inline uint32_t size() const                { return m_size; }
    inline uint64_t hash() const                { return m_hash; }
+    inline uint32_t rotation() const            { return m_rotation; }

    rapidjson::Value toJSON(rapidjson::Document &doc) const;

@@ -77,6 +79,7 @@ private:
    String m_token;
    String m_user;
    uint32_t m_size;
+    uint32_t m_rotation;
    uint64_t m_hash = 0;
 };

--- a/src/base/tools/cryptonote/BlockTemplate.cpp
+++ b/src/base/tools/cryptonote/BlockTemplate.cpp
@@ -244,22 +244,27 @@ bool xmrig::BlockTemplate::parse(bool hashes)
    ar.skip(m_extraSize);

    while (ar_extra.index() < m_extraSize) {
-        uint64_t extra_tag = 0;
+        uint64_t extra_tag  = 0;
+        uint64_t size       = 0;
+
        ar_extra(extra_tag);

        switch (extra_tag) {
-        case 0x01: // TX_EXTRA_TAG_PUBKEY
+        case 0x01: // TX_EXTRA_TAG_PUBKEY        
            setOffset(TX_PUBKEY_OFFSET, offset(TX_EXTRA_OFFSET) + ar_extra.index());
            ar_extra.skip(kKeySize);
            break;

        case 0x02: // TX_EXTRA_NONCE
-            {
-                uint64_t size = 0;
-                ar_extra(size);
-                setOffset(TX_EXTRA_NONCE_OFFSET, offset(TX_EXTRA_OFFSET) + ar_extra.index());
-                ar_extra(m_txExtraNonce, size);
-            }
+            ar_extra(size);
+            setOffset(TX_EXTRA_NONCE_OFFSET, offset(TX_EXTRA_OFFSET) + ar_extra.index());
+            ar_extra(m_txExtraNonce, size);
+            break;
+
+        case 0x03: // TX_EXTRA_MERGE_MINING_TAG
+            ar_extra(size);
+            setOffset(TX_EXTRA_MERGE_MINING_TAG_OFFSET, offset(TX_EXTRA_OFFSET) + ar_extra.index());
+            ar_extra(m_txMergeMiningTag, size + kKeySize);
            break;

        default:
--- a/src/base/tools/cryptonote/BlockTemplate.h
+++ b/src/base/tools/cryptonote/BlockTemplate.h
@@ -54,6 +54,7 @@ public:
        TX_EXTRA_OFFSET,
        TX_PUBKEY_OFFSET,
        TX_EXTRA_NONCE_OFFSET,
+        TX_EXTRA_MERGE_MINING_TAG_OFFSET,
        OFFSET_COUNT
    };

@@ -86,6 +87,7 @@ public:
    inline uint64_t outputType() const                      { return m_outputType; }
    inline const Span &ephPublicKey() const                 { return m_ephPublicKey; }
    inline const Span &txExtraNonce() const                 { return m_txExtraNonce; }
+    inline const Span &txMergeMiningTag() const             { return m_txMergeMiningTag; }

    // Transaction hashes
    inline uint64_t numHashes() const                       { return m_numHashes; }
@@ -140,7 +142,7 @@ private:
    Span m_ephPublicKey;
    uint64_t m_extraSize    = 0;
    Span m_txExtraNonce;
-
+    Span m_txMergeMiningTag = 0;
    uint64_t m_numHashes    = 0;
    Buffer m_hashes;
    Buffer m_minerTxMerkleTreeBranch;
--- a/src/core/Miner.cpp
+++ b/src/core/Miner.cpp
@@ -22,6 +22,7 @@


 #include "core/Miner.h"
+#include "core/Taskbar.h"
 #include "3rdparty/rapidjson/document.h"
 #include "backend/common/Hashrate.h"
 #include "backend/cpu/Cpu.h"
@@ -66,6 +67,11 @@
 #endif


+#ifdef XMRIG_ALGO_GHOSTRIDER
+#   include "crypto/ghostrider/ghostrider.h"
+#endif
+
+
 namespace xmrig {


@@ -333,6 +339,11 @@ public:
 #   endif


+#   ifdef XMRIG_ALGO_GHOSTRIDER
+    inline void initGhostRider() const { ghostrider::benchmark(); }
+#   endif
+
+
    Algorithm algorithm;
    Algorithms algorithms;
    bool active         = false;
@@ -348,6 +359,8 @@ public:
    String userJobId;
    Timer *timer        = nullptr;
    uint64_t ticks      = 0;
+
+    Taskbar m_taskbar;
 };


@@ -475,6 +488,7 @@ void xmrig::Miner::execCommand(char command)
 void xmrig::Miner::pause()
 {
    d_ptr->active = false;
+    d_ptr->m_taskbar.setActive(false);

    Nonce::pause(true);
    Nonce::touch();
@@ -494,6 +508,7 @@ void xmrig::Miner::setEnabled(bool enabled)
    }

    d_ptr->enabled = enabled;
+    d_ptr->m_taskbar.setEnabled(enabled);

    if (enabled) {
        LOG_INFO("%s " GREEN_BOLD("resumed"), Tags::miner());
@@ -548,9 +563,16 @@ void xmrig::Miner::setJob(const Job &job, bool donate)
    constexpr const bool ready = true;
 #   endif

+#   ifdef XMRIG_ALGO_GHOSTRIDER
+    if (job.algorithm().family() == Algorithm::GHOSTRIDER) {
+        d_ptr->initGhostRider();
+    }
+#   endif
+
    mutex.unlock();

    d_ptr->active = true;
+    d_ptr->m_taskbar.setActive(true);

    if (ready) {
        d_ptr->handleJobChange();
--- a/src/core/Taskbar.cpp
+++ b/src/core/Taskbar.cpp
@@ -0,0 +1,126 @@
+/* XMRig
+ * Copyright (c) 2018-2021 SChernykh   <https://github.com/SChernykh>
+ * Copyright (c) 2016-2021 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 3 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "core/Taskbar.h"
+
+#ifdef _WIN32
+
+
+#include <Shobjidl.h>
+#include <Objbase.h>
+
+
+namespace xmrig {
+
+
+struct TaskbarPrivate
+{
+    TaskbarPrivate()
+    {
+        HRESULT hr = CoInitializeEx(nullptr, COINIT_APARTMENTTHREADED);
+        if (hr < 0) {
+            return;
+        }
+
+        hr = CoCreateInstance(CLSID_TaskbarList, NULL, CLSCTX_INPROC_SERVER, IID_PPV_ARGS(&m_taskbar));
+        if (hr < 0) {
+            return;
+        }
+
+        hr = m_taskbar->HrInit();
+        if (hr < 0) {
+            m_taskbar->Release();
+            m_taskbar = nullptr;
+            return;
+        }
+
+        m_consoleWnd = GetConsoleWindow();
+    }
+
+    ~TaskbarPrivate()
+    {
+        if (m_taskbar) {
+            m_taskbar->Release();
+        }
+        CoUninitialize();
+    }
+
+    ITaskbarList3* m_taskbar = nullptr;
+    HWND m_consoleWnd = nullptr;
+};
+
+
+Taskbar::Taskbar() : d_ptr(new TaskbarPrivate())
+{
+}
+
+
+Taskbar::~Taskbar()
+{
+    delete d_ptr;
+}
+
+
+void Taskbar::setActive(bool active)
+{
+    m_active = active;
+    updateTaskbarColor();
+}
+
+
+void Taskbar::setEnabled(bool enabled)
+{
+    m_enabled = enabled;
+    updateTaskbarColor();
+}
+
+
+void Taskbar::updateTaskbarColor()
+{
+    if (d_ptr->m_taskbar) {
+        if (m_active) {
+            d_ptr->m_taskbar->SetProgressState(d_ptr->m_consoleWnd, m_enabled ? TBPF_NOPROGRESS : TBPF_PAUSED);
+            d_ptr->m_taskbar->SetProgressValue(d_ptr->m_consoleWnd, m_enabled ? 0 : 1, 1);
+        }
+        else {
+            d_ptr->m_taskbar->SetProgressState(d_ptr->m_consoleWnd, TBPF_ERROR);
+            d_ptr->m_taskbar->SetProgressValue(d_ptr->m_consoleWnd, 1, 1);
+        }
+    }
+}
+
+
+} // namespace xmrig
+
+
+#else // _WIN32
+
+
+namespace xmrig {
+
+
+Taskbar::Taskbar() {}
+Taskbar::~Taskbar() {}
+void Taskbar::setActive(bool) {}
+void Taskbar::setEnabled(bool) {}
+
+
+} // namespace xmrig
+
+
+#endif // _WIN32
--- a/src/core/Taskbar.h
+++ b/src/core/Taskbar.h
@@ -0,0 +1,51 @@
+/* XMRig
+ * Copyright (c) 2018-2021 SChernykh   <https://github.com/SChernykh>
+ * Copyright (c) 2016-2021 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 3 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef XMRIG_TASKBAR_H
+#define XMRIG_TASKBAR_H
+
+
+namespace xmrig {
+
+
+struct TaskbarPrivate;
+
+
+class Taskbar
+{
+public:
+    Taskbar();
+    ~Taskbar();
+
+    void setActive(bool active);
+    void setEnabled(bool enabled);
+
+private:
+    bool m_active = false;
+    bool m_enabled = true;
+
+    TaskbarPrivate* d_ptr = nullptr;
+
+    void updateTaskbarColor();
+};
+
+
+} // namespace xmrig
+
+
+#endif /* XMRIG_TASKBAR_H */
--- a/src/core/config/ConfigTransform.cpp
+++ b/src/core/config/ConfigTransform.cpp
@@ -17,6 +17,7 @@
 */

 #include "core/config/ConfigTransform.h"
+#include "base/crypto/Algorithm.h"
 #include "base/kernel/interfaces/IConfig.h"
 #include "base/net/stratum/Pool.h"
 #include "base/net/stratum/Pools.h"
@@ -102,6 +103,9 @@ void xmrig::ConfigTransform::finalize(rapidjson::Document &doc)
        profile.AddMember(StringRef(kThreads),   m_threads, allocator);
        profile.AddMember(StringRef(kAffinity),  m_affinity, allocator);

+#       ifdef XMRIG_ALGO_KAWPOW
+        doc[CpuConfig::kField].AddMember(StringRef(Algorithm::kKAWPOW), false, doc.GetAllocator());
+#       endif
        doc[CpuConfig::kField].AddMember(StringRef(kAsterisk), profile, doc.GetAllocator());
    }

@@ -265,6 +269,7 @@ void xmrig::ConfigTransform::transform(rapidjson::Document &doc, int key, const
    case IConfig::BenchSeedKey:     /* --seed */
    case IConfig::BenchHashKey:     /* --hash */
    case IConfig::UserKey:          /* --user */
+    case IConfig::RotationKey:      /* --rotation */
        return transformBenchmark(doc, key, arg);
 #   endif

@@ -354,6 +359,9 @@ void xmrig::ConfigTransform::transformBenchmark(rapidjson::Document &doc, int ke
    case IConfig::UserKey: /* --user */
        return set(doc, BenchConfig::kBenchmark, BenchConfig::kUser, arg);

+    case IConfig::RotationKey: /* --rotation */
+        return set(doc, BenchConfig::kBenchmark, BenchConfig::kRotation, arg);
+
    default:
        break;
    }
--- a/src/core/config/Config_platform.h
+++ b/src/core/config/Config_platform.h
@@ -71,6 +71,7 @@ static const option options[] = {
    { "hugepage-size",         1, nullptr, IConfig::HugePageSizeKey       },
    { "huge-pages-jit",        0, nullptr, IConfig::HugePagesJitKey       },
    { "hugepages-jit",         0, nullptr, IConfig::HugePagesJitKey       },
+    { "rotation",              1, nullptr, IConfig::RotationKey           },
    { "pass",                  1, nullptr, IConfig::PasswordKey           },
    { "print-time",            1, nullptr, IConfig::PrintTimeKey          },
    { "retries",               1, nullptr, IConfig::RetriesKey            },
--- a/src/crypto/astrobwt/AstroBWT.cpp
+++ b/src/crypto/astrobwt/AstroBWT.cpp
@@ -86,15 +86,54 @@ static void Salsa20_XORKeyStream_AVX256(const void* key, void* output, size_t si
 }
 #endif

-void sort_indices(int N, const uint8_t* v, uint64_t* indices, uint64_t* tmp_indices)
+static inline bool smaller(const uint8_t* v, uint64_t a, uint64_t b)
+{
+	const uint64_t value_a = a >> 21;
+	const uint64_t value_b = b >> 21;
+
+	if (value_a < value_b) {
+		return true;
+	}
+
+	if (value_a > value_b) {
+		return false;
+	}
+
+	a &= (1 << 21) - 1;
+	b &= (1 << 21) - 1;
+
+	if (a == b) {
+		return false;
+	}
+
+	const uint64_t data_a = bswap_64(*reinterpret_cast<const uint64_t*>(v + a + 5));
+	const uint64_t data_b = bswap_64(*reinterpret_cast<const uint64_t*>(v + b + 5));
+	return (data_a < data_b);
+}
+
+void sort_indices(uint32_t N, const uint8_t* v, uint64_t* indices, uint64_t* tmp_indices)
 {
 	uint32_t counters[2][COUNTING_SORT_SIZE] = {};

-	for (int i = 0; i < N; ++i)
 	{
-		const uint64_t k = bswap_64(*reinterpret_cast<const uint64_t*>(v + i));
-		++counters[0][(k >> (64 - COUNTING_SORT_BITS * 2)) & (COUNTING_SORT_SIZE - 1)];
-		++counters[1][k >> (64 - COUNTING_SORT_BITS)];
+#define ITER(X) \
+		do { \
+			const uint64_t k = bswap_64(*reinterpret_cast<const uint64_t*>(v + i + X)); \
+			++counters[0][(k >> (64 - COUNTING_SORT_BITS * 2)) & (COUNTING_SORT_SIZE - 1)]; \
+			++counters[1][k >> (64 - COUNTING_SORT_BITS)]; \
+		} while (0)
+
+		uint32_t i = 0;
+		const uint32_t n = N - 15;
+		for (; i < n; i += 16) {
+			ITER(0); ITER(1); ITER(2); ITER(3); ITER(4); ITER(5); ITER(6); ITER(7);
+			ITER(8); ITER(9); ITER(10); ITER(11); ITER(12); ITER(13); ITER(14); ITER(15);
+		}
+		for (; i < N; ++i) {
+			ITER(0);
+		}
+
+#undef ITER
 	}

 	uint32_t prev[2] = { counters[0][0], counters[1][0] };
@@ -109,41 +148,47 @@ void sort_indices(int N, const uint8_t* v, uint64_t* indices, uint64_t* tmp_indi
 		prev[1] = cur[1];
 	}

-	for (int i = N - 1; i >= 0; --i)
 	{
-		const uint64_t k = bswap_64(*reinterpret_cast<const uint64_t*>(v + i));
-		tmp_indices[counters[0][(k >> (64 - COUNTING_SORT_BITS * 2)) & (COUNTING_SORT_SIZE - 1)]--] = (k & (static_cast<uint64_t>(-1) << 21)) | i;
-	}
+#define ITER(X) \
+		do { \
+			const uint64_t k = bswap_64(*reinterpret_cast<const uint64_t*>(v + (i - X))); \
+			tmp_indices[counters[0][(k >> (64 - COUNTING_SORT_BITS * 2)) & (COUNTING_SORT_SIZE - 1)]--] = (k & (static_cast<uint64_t>(-1) << 21)) | (i - X); \
+		} while (0)

-	for (int i = N - 1; i >= 0; --i)
-	{
-		const uint64_t data = tmp_indices[i];
-		indices[counters[1][data >> (64 - COUNTING_SORT_BITS)]--] = data;
-	}
-
-	auto smaller = [v](uint64_t a, uint64_t b)
-	{
-		const uint64_t value_a = a >> 21;
-		const uint64_t value_b = b >> 21;
-
-		if (value_a < value_b) {
-			return true;
+		uint32_t i = N;
+		for (; i >= 8; i -= 8) {
+			ITER(1); ITER(2); ITER(3); ITER(4); ITER(5); ITER(6); ITER(7); ITER(8);
+		}
+		for (; i > 0; --i) {
+			ITER(1);
 		}

-		if (value_a > value_b) {
-			return false;
+#undef ITER
+	}
+
+	{
+#define ITER(X) \
+		do { \
+			const uint64_t data = tmp_indices[i - X]; \
+			indices[counters[1][data >> (64 - COUNTING_SORT_BITS)]--] = data; \
+		} while (0)
+
+		uint32_t i = N;
+		for (; i >= 8; i -= 8) {
+			ITER(1); ITER(2); ITER(3); ITER(4); ITER(5); ITER(6); ITER(7); ITER(8);
+		}
+		for (; i > 0; --i) {
+			ITER(1);
 		}

-		const uint64_t data_a = bswap_64(*reinterpret_cast<const uint64_t*>(v + (a % (1 << 21)) + 5));
-		const uint64_t data_b = bswap_64(*reinterpret_cast<const uint64_t*>(v + (b % (1 << 21)) + 5));
-		return (data_a < data_b);
-	};
+#undef ITER
+	}

 	uint64_t prev_t = indices[0];
-	for (int i = 1; i < N; ++i)
+	for (uint32_t i = 1; i < N; ++i)
 	{
 		uint64_t t = indices[i];
-		if (smaller(t, prev_t))
+		if (smaller(v, t, prev_t))
 		{
 			const uint64_t t2 = prev_t;
 			int j = i - 1;
@@ -157,7 +202,7 @@ void sort_indices(int N, const uint8_t* v, uint64_t* indices, uint64_t* tmp_indi
 				}

 				prev_t = indices[j];
-			} while (smaller(t, prev_t));
+			} while (smaller(v, t, prev_t));
 			indices[j + 1] = t;
 			t = t2;
 		}
@@ -165,6 +210,144 @@ void sort_indices(int N, const uint8_t* v, uint64_t* indices, uint64_t* tmp_indi
 	}
 }

+void sort_indices2(uint32_t N, const uint8_t* v, uint64_t* indices, uint64_t* tmp_indices)
+{
+	alignas(16) uint32_t counters[1 << COUNTING_SORT_BITS] = {};
+	alignas(16) uint32_t counters2[1 << COUNTING_SORT_BITS];
+
+	{
+#define ITER(X) { \
+			const uint64_t k = bswap_64(*reinterpret_cast<const uint64_t*>(v + i + X)); \
+			++counters[k >> (64 - COUNTING_SORT_BITS)]; \
+		}
+
+		uint32_t i = 0;
+		const uint32_t n = (N / 32) * 32;
+		for (; i < n; i += 32) {
+			ITER(0); ITER(1); ITER(2); ITER(3); ITER(4); ITER(5); ITER(6); ITER(7);
+			ITER(8); ITER(9); ITER(10); ITER(11); ITER(12); ITER(13); ITER(14); ITER(15);
+			ITER(16); ITER(17); ITER(18); ITER(19); ITER(20); ITER(21); ITER(22); ITER(23);
+			ITER(24); ITER(25); ITER(26); ITER(27); ITER(28); ITER(29); ITER(30); ITER(31);
+		}
+		for (; i < N; ++i) {
+			ITER(0);
+		}
+
+#undef ITER
+	}
+
+	uint32_t prev = static_cast<uint32_t>(-1);
+	for (uint32_t i = 0; i < (1 << COUNTING_SORT_BITS); i += 16)
+	{
+#define ITER(X) { \
+			const uint32_t cur = counters[i + X] + prev; \
+			counters[i + X] = cur; \
+			counters2[i + X] = cur; \
+			prev = cur; \
+		}
+		ITER(0); ITER(1); ITER(2); ITER(3); ITER(4); ITER(5); ITER(6); ITER(7);
+		ITER(8); ITER(9); ITER(10); ITER(11); ITER(12); ITER(13); ITER(14); ITER(15);
+#undef ITER
+	}
+
+	{
+#define ITER(X) \
+		do { \
+			const uint64_t k = bswap_64(*reinterpret_cast<const uint64_t*>(v + (i - X))); \
+			indices[counters[k >> (64 - COUNTING_SORT_BITS)]--] = (k & (static_cast<uint64_t>(-1) << 21)) | (i - X); \
+		} while (0)
+
+		uint32_t i = N;
+		for (; i >= 8; i -= 8) {
+			ITER(1); ITER(2); ITER(3); ITER(4); ITER(5); ITER(6); ITER(7); ITER(8);
+		}
+		for (; i > 0; --i) {
+			ITER(1);
+		}
+
+#undef ITER
+	}
+
+	uint32_t prev_i = 0;
+	for (uint32_t i0 = 0; i0 < (1 << COUNTING_SORT_BITS); ++i0) {
+		const uint32_t i = counters2[i0] + 1;
+		const uint32_t n = i - prev_i;
+		if (n > 1) {
+			memset(counters, 0, sizeof(uint32_t) * (1 << COUNTING_SORT_BITS));
+
+			const uint32_t n8 = (n / 8) * 8;
+			uint32_t j = 0;
+
+#define ITER(X) { \
+				const uint64_t k = indices[prev_i + j + X]; \
+				++counters[(k >> (64 - COUNTING_SORT_BITS * 2)) & ((1 << COUNTING_SORT_BITS) - 1)]; \
+				tmp_indices[j + X] = k; \
+			}
+			for (; j < n8; j += 8) {
+				ITER(0); ITER(1); ITER(2); ITER(3); ITER(4); ITER(5); ITER(6); ITER(7);
+			}
+			for (; j < n; ++j) {
+				ITER(0);
+			}
+#undef ITER
+
+			uint32_t prev = static_cast<uint32_t>(-1);
+			for (uint32_t j = 0; j < (1 << COUNTING_SORT_BITS); j += 32)
+			{
+#define ITER(X) { \
+					const uint32_t cur = counters[j + X] + prev; \
+					counters[j + X] = cur; \
+					prev = cur; \
+				}
+				ITER(0); ITER(1); ITER(2); ITER(3); ITER(4); ITER(5); ITER(6); ITER(7);
+				ITER(8); ITER(9); ITER(10); ITER(11); ITER(12); ITER(13); ITER(14); ITER(15);
+				ITER(16); ITER(17); ITER(18); ITER(19); ITER(20); ITER(21); ITER(22); ITER(23);
+				ITER(24); ITER(25); ITER(26); ITER(27); ITER(28); ITER(29); ITER(30); ITER(31);
+#undef ITER
+			}
+
+#define ITER(X) { \
+				const uint64_t k = tmp_indices[j - X]; \
+				const uint32_t index = counters[(k >> (64 - COUNTING_SORT_BITS * 2)) & ((1 << COUNTING_SORT_BITS) - 1)]--; \
+				indices[prev_i + index] = k; \
+			}
+			for (j = n; j >= 8; j -= 8) {
+				ITER(1); ITER(2); ITER(3); ITER(4); ITER(5); ITER(6); ITER(7); ITER(8);
+			}
+			for (; j > 0; --j) {
+				ITER(1);
+			}
+#undef ITER
+
+			uint64_t prev_t = indices[prev_i];
+			for (uint64_t* p = indices + prev_i + 1, *e = indices + i; p != e; ++p)
+			{
+				uint64_t t = *p;
+				if (smaller(v, t, prev_t))
+				{
+					const uint64_t t2 = prev_t;
+					uint64_t* p1 = p;
+					do
+					{
+						*p1 = prev_t;
+						--p1;
+
+						if (p1 <= indices + prev_i) {
+							break;
+						}
+
+						prev_t = *(p1 - 1);
+					} while (smaller(v, t, prev_t));
+					*p1 = t;
+					t = t2;
+				}
+				prev_t = t;
+			}
+		}
+		prev_i = i;
+	}
+}
+
 bool xmrig::astrobwt::astrobwt_dero(const void* input_data, uint32_t input_size, void* scratchpad, uint8_t* output_hash, int stage2_max_size, bool avx2)
 {
 	alignas(8) uint8_t key[32];
@@ -219,7 +402,7 @@ bool xmrig::astrobwt::astrobwt_dero(const void* input_data, uint32_t input_size,
 		Salsa20_XORKeyStream(key, stage2_output, stage2_size);
 	}

-	sort_indices(stage2_size + 1, stage2_output, indices, tmp_indices);
+	sort_indices2(stage2_size + 1, stage2_output, indices, tmp_indices);

 	{
 		const uint8_t* tmp = stage2_output - 1;
--- a/src/crypto/cn/CnAlgo.h
+++ b/src/crypto/cn/CnAlgo.h
@@ -43,6 +43,7 @@ public:
    constexpr inline size_t memory() const       { static_assert(Algorithm::isCN(ALGO), "invalid CRYPTONIGHT algorithm"); return Algorithm::l3(ALGO); }
    constexpr inline uint32_t iterations() const { static_assert(Algorithm::isCN(ALGO), "invalid CRYPTONIGHT algorithm"); return CN_ITER; }
    constexpr inline uint32_t mask() const       { return static_cast<uint32_t>(((memory() - 1) / 16) * 16); }
+    constexpr inline uint32_t half_mem() const   { return mask() < memory() / 2; }

    inline static uint32_t iterations(Algorithm::Id algo)
    {
@@ -108,6 +109,16 @@ public:
        }
 #       endif

+#       ifdef XMRIG_ALGO_GHOSTRIDER
+        if (algo == Algorithm::CN_GR_1) {
+            return 0x3FFF0;
+        }
+
+        if (algo == Algorithm::CN_GR_5) {
+            return 0x1FFF0;
+        }
+#       endif
+
        return ((Algorithm::l3(algo) - 1) / 16) * 16;
    }

@@ -136,6 +147,18 @@ template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_UPX2>::iterations() co
 template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_PICO_0>::mask() const             { return 0x1FFF0; }
 template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_UPX2>::mask() const               { return 0x1FFF0; }

+#ifdef XMRIG_ALGO_GHOSTRIDER
+template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_GR_0>::iterations() const         { return CN_ITER / 4; }
+template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_GR_1>::iterations() const         { return CN_ITER / 4; }
+template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_GR_2>::iterations() const         { return CN_ITER / 2; }
+template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_GR_3>::iterations() const         { return CN_ITER / 2; }
+template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_GR_4>::iterations() const         { return CN_ITER / 8; }
+template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_GR_5>::iterations() const         { return CN_ITER / 8; }
+
+template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_GR_1>::mask() const               { return 0x3FFF0; }
+template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_GR_5>::mask() const               { return 0x1FFF0; }
+#endif
+

 } /* namespace xmrig */

--- a/src/crypto/cn/CnHash.cpp
+++ b/src/crypto/cn/CnHash.cpp
@@ -310,12 +310,29 @@ xmrig::CnHash::CnHash()
    m_map[Algorithm::ASTROBWT_DERO]->data[AV_SINGLE_SOFT][Assembly::NONE] = astrobwt::single_hash<Algorithm::ASTROBWT_DERO>;
 #   endif

+#   ifdef XMRIG_ALGO_GHOSTRIDER
+    ADD_FN(Algorithm::CN_GR_0);
+    ADD_FN(Algorithm::CN_GR_1);
+    ADD_FN(Algorithm::CN_GR_2);
+    ADD_FN(Algorithm::CN_GR_3);
+    ADD_FN(Algorithm::CN_GR_4);
+    ADD_FN(Algorithm::CN_GR_5);
+#   endif
+
 #   ifdef XMRIG_FEATURE_ASM
    patchAsmVariants();
 #   endif
 }


+xmrig::CnHash::~CnHash()
+{
+    for (auto const& x : m_map) {
+      delete m_map[x.first];
+    }
+}
+
+
 xmrig::cn_hash_fun xmrig::CnHash::fn(const Algorithm &algorithm, AlgoVariant av, Assembly::Id assembly)
 {
    assert(cnHash.m_map.count(algorithm));
--- a/src/crypto/cn/CnHash.h
+++ b/src/crypto/cn/CnHash.h
@@ -59,6 +59,7 @@ public:
    };

    CnHash();
+    virtual ~CnHash();

    static cn_hash_fun fn(const Algorithm &algorithm, AlgoVariant av, Assembly::Id assembly);

--- a/src/crypto/cn/CryptoNight.h
+++ b/src/crypto/cn/CryptoNight.h
@@ -58,6 +58,9 @@ struct cryptonight_ctx {

    cn_mainloop_fun_ms_abi generated_code;
    cryptonight_r_data generated_code_data;
+
+    alignas(16) uint8_t save_state[128];
+    bool first_half;
 };


--- a/src/crypto/cn/CryptoNight_arm.h
+++ b/src/crypto/cn/CryptoNight_arm.h
@@ -349,6 +349,9 @@ static inline __m128i aes_round_tweak_div(const __m128i &in, const __m128i &key)
 }


+alignas(64) static const uint32_t tweak1_table[256] = { 268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456 };
+
+
 namespace xmrig {


@@ -368,12 +371,7 @@ static inline void cryptonight_monero_tweak(const uint8_t* l, uint64_t idx, __m1

        uint64_t vh = vgetq_lane_u64(tmp, 1);

-        uint8_t x = vh >> 24;
-        static const uint16_t table = 0x7531;
-        const uint8_t index = (((x >> (3)) & 6) | (x & 1)) << 1;
-        vh ^= ((table >> index) & 0x3) << 28;
-
-        mem_out[1] = vh;
+        mem_out[1] = vh ^ tweak1_table[static_cast<uint8_t>(vh >> 24)];
    }
 }

--- a/src/crypto/cn/CryptoNight_test.h
+++ b/src/crypto/cn/CryptoNight_test.h
@@ -100,7 +100,7 @@ const static uint8_t test_output_r[] = {


 // "cn/0"
-const static uint8_t test_output_v0[160] = {
+const static uint8_t test_output_v0[256] = {
    0x1A, 0x3F, 0xFB, 0xEE, 0x90, 0x9B, 0x42, 0x0D, 0x91, 0xF7, 0xBE, 0x6E, 0x5F, 0xB5, 0x6D, 0xB7,
    0x1B, 0x31, 0x10, 0xD8, 0x86, 0x01, 0x1E, 0x87, 0x7E, 0xE5, 0x78, 0x6A, 0xFD, 0x08, 0x01, 0x00,
    0x1B, 0x60, 0x6A, 0x3F, 0x4A, 0x07, 0xD6, 0x48, 0x9A, 0x1B, 0xCD, 0x07, 0x69, 0x7B, 0xD1, 0x66,
@@ -115,7 +115,7 @@ const static uint8_t test_output_v0[160] = {


 // "cn/1" Cryptonight variant 1 (Monero v7)
-const static uint8_t test_output_v1[160] = {
+const static uint8_t test_output_v1[256] = {
    0xF2, 0x2D, 0x3D, 0x62, 0x03, 0xD2, 0xA0, 0x8B, 0x41, 0xD9, 0x02, 0x72, 0x78, 0xD8, 0xBC, 0xC9,
    0x83, 0xAC, 0xAD, 0xA9, 0xB6, 0x8E, 0x52, 0xE3, 0xC6, 0x89, 0x69, 0x2A, 0x50, 0xE9, 0x21, 0xD9,
    0xC9, 0xFA, 0xE8, 0x42, 0x5D, 0x86, 0x88, 0xDC, 0x23, 0x6B, 0xCD, 0xBC, 0x42, 0xFD, 0xB4, 0x2D,
@@ -130,7 +130,7 @@ const static uint8_t test_output_v1[160] = {


 // "cn/2" Cryptonight variant 2 (Monero v8)
-const static uint8_t test_output_v2[160] = {
+const static uint8_t test_output_v2[256] = {
    0x97, 0x37, 0x82, 0x82, 0xCF, 0x10, 0xE7, 0xAD, 0x03, 0x3F, 0x7B, 0x80, 0x74, 0xC4, 0x0E, 0x14,
    0xD0, 0x6E, 0x7F, 0x60, 0x9D, 0xDD, 0xDA, 0x78, 0x76, 0x80, 0xB5, 0x8C, 0x05, 0xF4, 0x3D, 0x21,
    0x87, 0x1F, 0xCD, 0x68, 0x23, 0xF6, 0xA8, 0x79, 0xBB, 0x3F, 0x33, 0x95, 0x1C, 0x8E, 0x8E, 0x89,
@@ -145,7 +145,7 @@ const static uint8_t test_output_v2[160] = {


 // "cn/half"
-const static uint8_t test_output_half[160] = {
+const static uint8_t test_output_half[256] = {
    0x5D, 0x4F, 0xBC, 0x35, 0x60, 0x97, 0xEA, 0x64, 0x40, 0xB0, 0x88, 0x8E, 0xDE, 0xB6, 0x35, 0xDD,
    0xC8, 0x4A, 0x0E, 0x39, 0x7C, 0x86, 0x84, 0x56, 0x89, 0x5C, 0x3F, 0x29, 0xBE, 0x73, 0x12, 0xA7,
    0x02, 0xE6, 0x1D, 0x2B, 0xBC, 0x84, 0xB6, 0x71, 0x96, 0x71, 0xD5, 0x0C, 0xAC, 0x76, 0x0E, 0x6B,
@@ -160,7 +160,7 @@ const static uint8_t test_output_half[160] = {


 // "cn/msr" Masari (MSR)
-const static uint8_t test_output_msr[160] = {
+const static uint8_t test_output_msr[256] = {
    0x3C, 0x7A, 0x61, 0x08, 0x4C, 0x5E, 0xB8, 0x65, 0xB4, 0x98, 0xAB, 0x2F, 0x5A, 0x1A, 0xC5, 0x2C,
    0x49, 0xC1, 0x77, 0xC2, 0xD0, 0x13, 0x34, 0x42, 0xD6, 0x5E, 0xD5, 0x14, 0x33, 0x5C, 0x82, 0xC5,
    0x69, 0xDF, 0x38, 0x51, 0x1B, 0xB3, 0xEB, 0x7D, 0xE7, 0x6B, 0x08, 0x8E, 0xB6, 0x7E, 0xB7, 0x1C,
@@ -175,7 +175,7 @@ const static uint8_t test_output_msr[160] = {


 // "cn/xao" Alloy (XAO)
-const static uint8_t test_output_xao[160] = {
+const static uint8_t test_output_xao[256] = {
    0x9A, 0x29, 0xD0, 0xC4, 0xAF, 0xDC, 0x63, 0x9B, 0x65, 0x53, 0xB1, 0xC8, 0x37, 0x35, 0x11, 0x4C,
    0x5D, 0x77, 0x16, 0x21, 0x42, 0x97, 0x5C, 0xB8, 0x50, 0xC0, 0xA5, 0x1F, 0x64, 0x07, 0xBD, 0x33,
    0xF1, 0xC9, 0x98, 0x40, 0x42, 0xDE, 0x39, 0xD1, 0xBA, 0x2D, 0xAD, 0xEC, 0xFE, 0xEA, 0xD8, 0x46,
@@ -190,7 +190,7 @@ const static uint8_t test_output_xao[160] = {


 // "cn/rto" Arto (RTO)
-const static uint8_t test_output_rto[160] = {
+const static uint8_t test_output_rto[256] = {
    0x82, 0x66, 0x1E, 0x1C, 0x6E, 0x64, 0x36, 0x66, 0x84, 0x06, 0x32, 0x7A, 0x9B, 0xB1, 0x13, 0x19,
    0xA5, 0x56, 0x16, 0x15, 0xDF, 0xEC, 0x1C, 0x9E, 0xE3, 0x88, 0x4A, 0x6C, 0x1C, 0xEB, 0x76, 0xA5,
    0xB3, 0xFB, 0xF4, 0x3F, 0x2B, 0x6A, 0x3A, 0x39, 0xA3, 0x6E, 0x08, 0x33, 0x67, 0x90, 0x31, 0xB9,
@@ -204,7 +204,7 @@ const static uint8_t test_output_rto[160] = {
 };

 // "cn/rwz"
-const static uint8_t test_output_rwz[160] = {
+const static uint8_t test_output_rwz[256] = {
    0x5f, 0x56, 0xc6, 0xb0, 0x99, 0x6b, 0xa2, 0x3e, 0x0b, 0xba, 0x07, 0x29, 0xc9, 0x90, 0x74, 0x85,
    0x5a, 0x10, 0xe3, 0x08, 0x7f, 0xdb, 0xfe, 0x94, 0x75, 0x33, 0x54, 0x73, 0x76, 0xf0, 0x75, 0xb8,
    0x8b, 0x70, 0x43, 0x9a, 0xfc, 0xf5, 0xeb, 0x15, 0xbb, 0xf9, 0xad, 0x9d, 0x2a, 0xbd, 0x72, 0x52,
@@ -218,7 +218,7 @@ const static uint8_t test_output_rwz[160] = {
 };

 // "cn/zls"
-const static uint8_t test_output_zls[160] = {
+const static uint8_t test_output_zls[256] = {
    0x51, 0x6E, 0x33, 0xC6, 0xE4, 0x46, 0xAB, 0xBC, 0xCD, 0xAD, 0x18, 0xC0, 0x4C, 0xD9, 0xA2, 0x5E,
    0x64, 0x10, 0x28, 0x53, 0xB2, 0x0A, 0x42, 0xDF, 0xDE, 0xAA, 0x8B, 0x59, 0x9E, 0xCF, 0x40, 0xE2,
    0x0D, 0x62, 0x5B, 0x42, 0x18, 0xE2, 0x76, 0xAD, 0xD0, 0x74, 0x90, 0x60, 0x8D, 0xC4, 0xC7, 0x80,
@@ -232,7 +232,7 @@ const static uint8_t test_output_zls[160] = {
 };

 // "cn/ccx"
-const static uint8_t test_output_ccx[160] = {
+const static uint8_t test_output_ccx[256] = {
    0xB3, 0xA1, 0x67, 0x86, 0xD2, 0xC9, 0x85, 0xEC, 0xAD, 0xC4, 0x5F, 0x91, 0x05, 0x27, 0xC7, 0xA1,
    0x96, 0xF0, 0xE1, 0xE9, 0x7C, 0x87, 0x09, 0x38, 0x1D, 0x7D, 0x41, 0x93, 0x35, 0xF8, 0x16, 0x72,
    0xC3, 0xBD, 0x8D, 0xE8, 0xD5, 0xAE, 0xB8, 0x59, 0x0A, 0x6C, 0xCB, 0x7B, 0x41, 0x30, 0xF7, 0x04,
@@ -246,7 +246,7 @@ const static uint8_t test_output_ccx[160] = {
 };

 // "cn/double"
-const static uint8_t test_output_double[160] = {
+const static uint8_t test_output_double[256] = {
    0xAE, 0xFB, 0xB3, 0xF0, 0xCC, 0x88, 0x04, 0x6D, 0x11, 0x9F, 0x6C, 0x54, 0xB9, 0x6D, 0x90, 0xC9,
    0xE8, 0x84, 0xEA, 0x3B, 0x59, 0x83, 0xA6, 0x0D, 0x50, 0xA4, 0x2D, 0x7D, 0x3E, 0xBE, 0x48, 0x21,
    0x49, 0xCE, 0x8E, 0xF3, 0xBC, 0x8A, 0x36, 0xBF, 0x86, 0x37, 0x89, 0x55, 0x09, 0xBA, 0x22, 0xF8,
@@ -261,7 +261,7 @@ const static uint8_t test_output_double[160] = {

 #ifdef XMRIG_ALGO_CN_LITE
 // "cn-lite/0"
-const static uint8_t test_output_v0_lite[160] = {
+const static uint8_t test_output_v0_lite[256] = {
    0x36, 0x95, 0xB4, 0xB5, 0x3B, 0xB0, 0x03, 0x58, 0xB0, 0xAD, 0x38, 0xDC, 0x16, 0x0F, 0xEB, 0x9E,
    0x00, 0x4E, 0xEC, 0xE0, 0x9B, 0x83, 0xA7, 0x2E, 0xF6, 0xBA, 0x98, 0x64, 0xD3, 0x51, 0x0C, 0x88,
    0x28, 0xA2, 0x2B, 0xAD, 0x3F, 0x93, 0xD1, 0x40, 0x8F, 0xCA, 0x47, 0x2E, 0xB5, 0xAD, 0x1C, 0xBE,
@@ -276,7 +276,7 @@ const static uint8_t test_output_v0_lite[160] = {


 // "cn-lite/1" AEON v7
-const static uint8_t test_output_v1_lite[160] = {
+const static uint8_t test_output_v1_lite[256] = {
    0x6D, 0x8C, 0xDC, 0x44, 0x4E, 0x9B, 0xBB, 0xFD, 0x68, 0xFC, 0x43, 0xFC, 0xD4, 0x85, 0x5B, 0x22,
    0x8C, 0x8A, 0x1B, 0xD9, 0x1D, 0x9D, 0x00, 0x28, 0x5B, 0xEC, 0x02, 0xB7, 0xCA, 0x2D, 0x67, 0x41,
    0x87, 0xC4, 0xE5, 0x70, 0x65, 0x3E, 0xB4, 0xC2, 0xB4, 0x2B, 0x7A, 0x0D, 0x54, 0x65, 0x59, 0x45,
@@ -293,7 +293,7 @@ const static uint8_t test_output_v1_lite[160] = {

 #ifdef XMRIG_ALGO_CN_HEAVY
 // "cn-heavy/0"
-const static uint8_t test_output_v0_heavy[160] = {
+const static uint8_t test_output_v0_heavy[256] = {
    0x99, 0x83, 0xF2, 0x1B, 0xDF, 0x20, 0x10, 0xA8, 0xD7, 0x07, 0xBB, 0x2F, 0x14, 0xD7, 0x86, 0x64,
    0xBB, 0xE1, 0x18, 0x7F, 0x55, 0x01, 0x4B, 0x39, 0xE5, 0xF3, 0xD6, 0x93, 0x28, 0xE4, 0x8F, 0xC2,
    0x4D, 0x94, 0x7D, 0xD6, 0xDB, 0x6E, 0x07, 0x48, 0x26, 0x4A, 0x51, 0x2E, 0xAC, 0xF3, 0x25, 0x4A,
@@ -308,7 +308,7 @@ const static uint8_t test_output_v0_heavy[160] = {


 // "cn-heavy/xhv"
-const static uint8_t test_output_xhv_heavy[160] = {
+const static uint8_t test_output_xhv_heavy[256] = {
    0x5A, 0xC3, 0xF7, 0x85, 0xC4, 0x90, 0xC5, 0x85, 0x50, 0xEC, 0x95, 0xD2, 0x72, 0x65, 0x63, 0x57,
    0x7E, 0x7C, 0x1C, 0x21, 0x2D, 0x0C, 0xDE, 0x59, 0x12, 0x73, 0x20, 0x1E, 0x44, 0xFD, 0xD5, 0xB6,
    0x1F, 0x4E, 0xB2, 0x0A, 0x36, 0x51, 0x4B, 0xF5, 0x4D, 0xC9, 0xE0, 0x90, 0x2C, 0x16, 0x47, 0x3F,
@@ -323,7 +323,7 @@ const static uint8_t test_output_xhv_heavy[160] = {


 // "cn-heavy/tube"
-const static uint8_t test_output_tube_heavy[160] = {
+const static uint8_t test_output_tube_heavy[256] = {
    0xFE, 0x53, 0x35, 0x20, 0x76, 0xEA, 0xE6, 0x89, 0xFA, 0x3B, 0x4F, 0xDA, 0x61, 0x46, 0x34, 0xCF,
    0xC3, 0x12, 0xEE, 0x0C, 0x38, 0x7D, 0xF2, 0xB8, 0xB7, 0x4D, 0xA2, 0xA1, 0x59, 0x74, 0x12, 0x35,
    0xCD, 0x3F, 0x29, 0xDF, 0x07, 0x4A, 0x14, 0xAD, 0x0B, 0x98, 0x99, 0x37, 0xCA, 0x14, 0x68, 0xA3,
@@ -340,7 +340,7 @@ const static uint8_t test_output_tube_heavy[160] = {

 #ifdef XMRIG_ALGO_CN_PICO
 // "cn-pico/trtl"
-const static uint8_t test_output_pico_trtl[160] = {
+const static uint8_t test_output_pico_trtl[256] = {
    0x08, 0xF4, 0x21, 0xD7, 0x83, 0x31, 0x17, 0x30, 0x0E, 0xDA, 0x66, 0xE9, 0x8F, 0x4A, 0x25, 0x69,
    0x09, 0x3D, 0xF3, 0x00, 0x50, 0x01, 0x73, 0x94, 0x4E, 0xFC, 0x40, 0x1E, 0x9A, 0x4A, 0x17, 0xAF,
    0xB2, 0x17, 0x2E, 0xC9, 0x46, 0x6E, 0x1A, 0xEE, 0x70, 0xEC, 0x85, 0x72, 0xA1, 0x4C, 0x23, 0x3E,
@@ -355,7 +355,7 @@ const static uint8_t test_output_pico_trtl[160] = {


 // "cn-pico/tlo"
-const static uint8_t test_output_pico_tlo[160] = {
+const static uint8_t test_output_pico_tlo[256] = {
    0x99, 0x75, 0xF2, 0xC1, 0xB3, 0xB4, 0x54, 0x34, 0xA4, 0x93, 0x86, 0x21, 0x30, 0x97, 0xF3, 0x1B,
    0xB4, 0xB9, 0xA6, 0x58, 0x6A, 0x7E, 0x81, 0xF4, 0x42, 0x9F, 0x6D, 0x5F, 0x65, 0xC3, 0x8D, 0x1A,
    0xFC, 0x67, 0xDF, 0xCC, 0xB5, 0xFC, 0x90, 0xD7, 0x85, 0x5A, 0xE9, 0x03, 0x36, 0x1E, 0xAB, 0xD7,
@@ -372,7 +372,7 @@ const static uint8_t test_output_pico_tlo[160] = {

 #ifdef XMRIG_ALGO_CN_FEMTO
 // "cn/upx2"
-const static uint8_t test_output_femto_upx2[160] = {
+const static uint8_t test_output_femto_upx2[256] = {
    0xAA, 0xBB, 0xB8, 0xED, 0x14, 0xA8, 0x35, 0xFA, 0x22, 0xCF, 0xB1, 0xB5, 0xDE, 0xA8, 0x72, 0xB0,
    0xA1, 0xD6, 0xCB, 0xD8, 0x46, 0xF4, 0x39, 0x1C, 0x0F, 0x01, 0xF3, 0x87, 0x5E, 0x3A, 0x37, 0x61,
    0x38, 0x59, 0x15, 0x72, 0xF8, 0x20, 0xD4, 0xDE, 0x25, 0x3C, 0xF5, 0x5A, 0x21, 0x92, 0xB6, 0x22,
@@ -389,7 +389,7 @@ const static uint8_t test_output_femto_upx2[160] = {

 #ifdef XMRIG_ALGO_ARGON2
 // "argon2/chukwa"
-const static uint8_t argon2_chukwa_test_out[160] = {
+const static uint8_t argon2_chukwa_test_out[256] = {
    0xC1, 0x58, 0xA1, 0x05, 0xAE, 0x75, 0xC7, 0x56, 0x1C, 0xFD, 0x02, 0x90, 0x83, 0xA4, 0x7A, 0x87,
    0x65, 0x3D, 0x51, 0xF9, 0x14, 0x12, 0x8E, 0x21, 0xC1, 0x97, 0x1D, 0x8B, 0x10, 0xC4, 0x90, 0x34,
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
@@ -403,7 +403,7 @@ const static uint8_t argon2_chukwa_test_out[160] = {
 };

 // "argon2/chukwav2"
-const static uint8_t argon2_chukwa_v2_test_out[160] = {
+const static uint8_t argon2_chukwa_v2_test_out[256] = {
    0x77, 0xCF, 0x69, 0x58, 0xB3, 0x53, 0x6E, 0x1F, 0x9F, 0x0D, 0x1E, 0xA1, 0x65, 0xF2, 0x28, 0x11,
    0xCA, 0x7B, 0xC4, 0x87, 0xEA, 0x9F, 0x52, 0x03, 0x0B, 0x50, 0x50, 0xC1, 0x7F, 0xCD, 0xD8, 0xF5,
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
@@ -417,7 +417,7 @@ const static uint8_t argon2_chukwa_v2_test_out[160] = {
 };

 // "argon2/wrkz"
-const static uint8_t argon2_wrkz_test_out[160] = {
+const static uint8_t argon2_wrkz_test_out[256] = {
    0x35, 0xE0, 0x83, 0xD4, 0xB9, 0xC6, 0x4C, 0x2A, 0x68, 0x82, 0x0A, 0x43, 0x1F, 0x61, 0x31, 0x19,
    0x98, 0xA8, 0xCD, 0x18, 0x64, 0xDB, 0xA4, 0x07, 0x7E, 0x25, 0xB7, 0xF1, 0x21, 0xD5, 0x4B, 0xD1,
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
@@ -434,7 +434,7 @@ const static uint8_t argon2_wrkz_test_out[160] = {

 #ifdef XMRIG_ALGO_ASTROBWT
 // "astrobwt"
-const static uint8_t astrobwt_dero_test_out[160] = {
+const static uint8_t astrobwt_dero_test_out[256] = {
    0x7E, 0x88, 0x44, 0xF2, 0xD6, 0xB7, 0xA4, 0x34, 0x98, 0xFE, 0x6D, 0x22, 0x65, 0x27, 0x68, 0x90,
    0x23, 0xDA, 0x8A, 0x52, 0xF9, 0xFC, 0x4E, 0xC6, 0x9E, 0x5A, 0xAA, 0xA6, 0x3E, 0xDC, 0xE1, 0xC1,
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
--- a/src/crypto/cn/CryptoNight_x86.h
+++ b/src/crypto/cn/CryptoNight_x86.h
@@ -285,23 +285,41 @@ inline constexpr uint64_t interleaved_index<0>(uint64_t k)


 template<Algorithm::Id ALGO, bool SOFT_AES, int interleave>
-static inline void cn_explode_scratchpad(const __m128i *input, __m128i *output)
+static NOINLINE void cn_explode_scratchpad(cryptonight_ctx *ctx)
 {
    constexpr CnAlgo<ALGO> props;

+    constexpr size_t N = (props.memory() / sizeof(__m128i)) / (props.half_mem() ? 2 : 1);
+
    __m128i xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7;
    __m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;

+    const __m128i* input = reinterpret_cast<const __m128i*>(ctx->state);
+    __m128i* output = reinterpret_cast<__m128i*>(ctx->memory);
+
    aes_genkey<SOFT_AES>(input, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9);

-    xin0 = _mm_load_si128(input + 4);
-    xin1 = _mm_load_si128(input + 5);
-    xin2 = _mm_load_si128(input + 6);
-    xin3 = _mm_load_si128(input + 7);
-    xin4 = _mm_load_si128(input + 8);
-    xin5 = _mm_load_si128(input + 9);
-    xin6 = _mm_load_si128(input + 10);
-    xin7 = _mm_load_si128(input + 11);
+    if (props.half_mem() && !ctx->first_half) {
+        const __m128i* p = reinterpret_cast<const __m128i*>(ctx->save_state);
+        xin0 = _mm_load_si128(p + 0);
+        xin1 = _mm_load_si128(p + 1);
+        xin2 = _mm_load_si128(p + 2);
+        xin3 = _mm_load_si128(p + 3);
+        xin4 = _mm_load_si128(p + 4);
+        xin5 = _mm_load_si128(p + 5);
+        xin6 = _mm_load_si128(p + 6);
+        xin7 = _mm_load_si128(p + 7);
+    }
+    else {
+        xin0 = _mm_load_si128(input + 4);
+        xin1 = _mm_load_si128(input + 5);
+        xin2 = _mm_load_si128(input + 6);
+        xin3 = _mm_load_si128(input + 7);
+        xin4 = _mm_load_si128(input + 8);
+        xin5 = _mm_load_si128(input + 9);
+        xin6 = _mm_load_si128(input + 10);
+        xin7 = _mm_load_si128(input + 11);
+    }

    if (props.isHeavy()) {
        for (size_t i = 0; i < 16; i++) {
@@ -320,50 +338,73 @@ static inline void cn_explode_scratchpad(const __m128i *input, __m128i *output)
        }
    }

-    for (size_t i = 0; i < props.memory() / sizeof(__m128i); i += 8) {
-        if (interleave > 0) {
-            _mm_prefetch((const char*)(output), _MM_HINT_T0);
-            _mm_prefetch((const char*)(output + (64 << interleave) / sizeof(__m128i)), _MM_HINT_T0);
-        }
+    constexpr int output_increment = (64 << interleave) / sizeof(__m128i);
+    constexpr int prefetch_dist = 2048 / sizeof(__m128i);

-        aes_round<SOFT_AES>(k0, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
-        aes_round<SOFT_AES>(k1, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
-        aes_round<SOFT_AES>(k2, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
-        aes_round<SOFT_AES>(k3, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
-        aes_round<SOFT_AES>(k4, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
-        aes_round<SOFT_AES>(k5, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
-        aes_round<SOFT_AES>(k6, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
-        aes_round<SOFT_AES>(k7, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
-        aes_round<SOFT_AES>(k8, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
-        aes_round<SOFT_AES>(k9, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
+    __m128i* e = output + N - prefetch_dist;
+    __m128i* prefetch_ptr = output + prefetch_dist;

-        _mm_store_si128(output + 0, xin0);
-        _mm_store_si128(output + 1, xin1);
-        _mm_store_si128(output + 2, xin2);
-        _mm_store_si128(output + 3, xin3);
+    for (int i = 0; i < 2; ++i) {
+        do {
+            _mm_prefetch((const char*)(prefetch_ptr), _MM_HINT_T0);
+            _mm_prefetch((const char*)(prefetch_ptr + output_increment), _MM_HINT_T0);

-        constexpr int output_increment = (64 << interleave) / sizeof(__m128i);
+            aes_round<SOFT_AES>(k0, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
+            aes_round<SOFT_AES>(k1, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
+            aes_round<SOFT_AES>(k2, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
+            aes_round<SOFT_AES>(k3, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
+            aes_round<SOFT_AES>(k4, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
+            aes_round<SOFT_AES>(k5, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
+            aes_round<SOFT_AES>(k6, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
+            aes_round<SOFT_AES>(k7, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
+            aes_round<SOFT_AES>(k8, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
+            aes_round<SOFT_AES>(k9, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);

-        _mm_store_si128(output + output_increment + 0, xin4);
-        _mm_store_si128(output + output_increment + 1, xin5);
-        _mm_store_si128(output + output_increment + 2, xin6);
-        _mm_store_si128(output + output_increment + 3, xin7);
+            _mm_store_si128(output + 0, xin0);
+            _mm_store_si128(output + 1, xin1);
+            _mm_store_si128(output + 2, xin2);
+            _mm_store_si128(output + 3, xin3);

-        output += output_increment * 2;
+            _mm_store_si128(output + output_increment + 0, xin4);
+            _mm_store_si128(output + output_increment + 1, xin5);
+            _mm_store_si128(output + output_increment + 2, xin6);
+            _mm_store_si128(output + output_increment + 3, xin7);
+
+            output += output_increment * 2;
+            prefetch_ptr += output_increment * 2;
+        } while (output < e);
+        e += prefetch_dist;
+        prefetch_ptr = output;
+    }
+
+    if (props.half_mem() && ctx->first_half) {
+         __m128i* p = reinterpret_cast<__m128i*>(ctx->save_state);
+        _mm_store_si128(p + 0, xin0);
+        _mm_store_si128(p + 1, xin1);
+        _mm_store_si128(p + 2, xin2);
+        _mm_store_si128(p + 3, xin3);
+        _mm_store_si128(p + 4, xin4);
+        _mm_store_si128(p + 5, xin5);
+        _mm_store_si128(p + 6, xin6);
+        _mm_store_si128(p + 7, xin7);
    }
 }


 template<Algorithm::Id ALGO, bool SOFT_AES, int interleave>
-static inline void cn_implode_scratchpad(const __m128i *input, __m128i *output)
+static NOINLINE void cn_implode_scratchpad(cryptonight_ctx *ctx)
 {
    constexpr CnAlgo<ALGO> props;

    constexpr bool IS_HEAVY = props.isHeavy();
+    constexpr size_t N = (props.memory() / sizeof(__m128i)) / (props.half_mem() ? 2 : 1);

    __m128i xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7;
    __m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;

+    const __m128i *input = reinterpret_cast<const __m128i*>(ctx->memory);
+    __m128i *output = reinterpret_cast<__m128i*>(ctx->state);
+
    aes_genkey<SOFT_AES>(output + 2, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9);

    xout0 = _mm_load_si128(output + 4);
@@ -376,46 +417,54 @@ static inline void cn_implode_scratchpad(const __m128i *input, __m128i *output)
    xout7 = _mm_load_si128(output + 11);

    const __m128i* input_begin = input;
-    for (size_t i = 0; i < props.memory() / sizeof(__m128i);) {
-        xout0 = _mm_xor_si128(_mm_load_si128(input + 0), xout0);
-        xout1 = _mm_xor_si128(_mm_load_si128(input + 1), xout1);
-        xout2 = _mm_xor_si128(_mm_load_si128(input + 2), xout2);
-        xout3 = _mm_xor_si128(_mm_load_si128(input + 3), xout3);
-
-        constexpr int input_increment = (64 << interleave) / sizeof(__m128i);
-
-        xout4 = _mm_xor_si128(_mm_load_si128(input + input_increment + 0), xout4);
-        xout5 = _mm_xor_si128(_mm_load_si128(input + input_increment + 1), xout5);
-        xout6 = _mm_xor_si128(_mm_load_si128(input + input_increment + 2), xout6);
-        xout7 = _mm_xor_si128(_mm_load_si128(input + input_increment + 3), xout7);
-
-        input += input_increment * 2;
-        i += 8;
-
-        if ((interleave > 0) && (i < props.memory() / sizeof(__m128i))) {
-            _mm_prefetch((const char*)(input), _MM_HINT_T0);
-            _mm_prefetch((const char*)(input + (64 << interleave) / sizeof(__m128i)), _MM_HINT_T0);
+    for (size_t part = 0; part < (props.half_mem() ? 2 : 1); ++part) {
+        if (props.half_mem() && (part == 1)) {
+            input = input_begin;
+            ctx->first_half = false;
+            cn_explode_scratchpad<ALGO, SOFT_AES, interleave>(ctx);
        }

-        aes_round<SOFT_AES>(k0, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
-        aes_round<SOFT_AES>(k1, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
-        aes_round<SOFT_AES>(k2, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
-        aes_round<SOFT_AES>(k3, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
-        aes_round<SOFT_AES>(k4, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
-        aes_round<SOFT_AES>(k5, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
-        aes_round<SOFT_AES>(k6, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
-        aes_round<SOFT_AES>(k7, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
-        aes_round<SOFT_AES>(k8, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
-        aes_round<SOFT_AES>(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+        for (size_t i = 0; i < N;) {
+            xout0 = _mm_xor_si128(_mm_load_si128(input + 0), xout0);
+            xout1 = _mm_xor_si128(_mm_load_si128(input + 1), xout1);
+            xout2 = _mm_xor_si128(_mm_load_si128(input + 2), xout2);
+            xout3 = _mm_xor_si128(_mm_load_si128(input + 3), xout3);

-        if (IS_HEAVY) {
-            mix_and_propagate(xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7);
+            constexpr int input_increment = (64 << interleave) / sizeof(__m128i);
+
+            xout4 = _mm_xor_si128(_mm_load_si128(input + input_increment + 0), xout4);
+            xout5 = _mm_xor_si128(_mm_load_si128(input + input_increment + 1), xout5);
+            xout6 = _mm_xor_si128(_mm_load_si128(input + input_increment + 2), xout6);
+            xout7 = _mm_xor_si128(_mm_load_si128(input + input_increment + 3), xout7);
+
+            input += input_increment * 2;
+            i += 8;
+
+            if (i < N) {
+                _mm_prefetch((const char*)(input), _MM_HINT_T0);
+                _mm_prefetch((const char*)(input + input_increment), _MM_HINT_T0);
+            }
+
+            aes_round<SOFT_AES>(k0, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+            aes_round<SOFT_AES>(k1, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+            aes_round<SOFT_AES>(k2, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+            aes_round<SOFT_AES>(k3, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+            aes_round<SOFT_AES>(k4, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+            aes_round<SOFT_AES>(k5, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+            aes_round<SOFT_AES>(k6, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+            aes_round<SOFT_AES>(k7, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+            aes_round<SOFT_AES>(k8, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+            aes_round<SOFT_AES>(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+
+            if (IS_HEAVY) {
+                mix_and_propagate(xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7);
+            }
        }
    }

    if (IS_HEAVY) {
        input = input_begin;
-        for (size_t i = 0; i < props.memory() / sizeof(__m128i);) {
+        for (size_t i = 0; i < N;) {
            xout0 = _mm_xor_si128(_mm_load_si128(input + 0), xout0);
            xout1 = _mm_xor_si128(_mm_load_si128(input + 1), xout1);
            xout2 = _mm_xor_si128(_mm_load_si128(input + 2), xout2);
@@ -523,6 +572,9 @@ static inline __m128i int_sqrt_v2(const uint64_t n0)
 void v4_soft_aes_compile_code(const V4_Instruction *code, int code_size, void *machine_code, xmrig::Assembly ASM);


+alignas(64) static const uint32_t tweak1_table[256] = { 268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456 };
+
+
 namespace xmrig {


@@ -541,12 +593,7 @@ static inline void cryptonight_monero_tweak(uint64_t *mem_out, const uint8_t *l,
        tmp = _mm_castps_si128(_mm_movehl_ps(_mm_castsi128_ps(tmp), _mm_castsi128_ps(tmp)));
        uint64_t vh = _mm_cvtsi128_si64(tmp);

-        uint8_t x = static_cast<uint8_t>(vh >> 24);
-        static const uint16_t table = 0x7531;
-        const uint8_t index = (((x >> (3)) & 6) | (x & 1)) << 1;
-        vh ^= ((table >> index) & 0x3) << 28;
-
-        mem_out[1] = vh;
+        mem_out[1] = vh ^ tweak1_table[static_cast<uint32_t>(vh) >> 24];
    }
 }

@@ -587,7 +634,11 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
    }

    keccak(input, size, ctx[0]->state);
-    cn_explode_scratchpad<ALGO, SOFT_AES, interleave>(reinterpret_cast<const __m128i *>(ctx[0]->state), reinterpret_cast<__m128i *>(ctx[0]->memory));
+
+    if (props.half_mem()) {
+        ctx[0]->first_half = true;
+    }
+    cn_explode_scratchpad<ALGO, SOFT_AES, interleave>(ctx[0]);

    uint64_t *h0 = reinterpret_cast<uint64_t*>(ctx[0]->state);
    uint8_t *l0   = ctx[0]->memory;
@@ -742,7 +793,7 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
    }
 #   endif

-    cn_implode_scratchpad<ALGO, SOFT_AES, interleave>(reinterpret_cast<const __m128i *>(ctx[0]->memory), reinterpret_cast<__m128i *>(ctx[0]->state));
+    cn_implode_scratchpad<ALGO, SOFT_AES, interleave>(ctx[0]);
    keccakf(h0, 24);
    extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
 }
@@ -833,7 +884,11 @@ inline void cryptonight_single_hash_asm(const uint8_t *__restrict__ input, size_
    }

    keccak(input, size, ctx[0]->state);
-    cn_explode_scratchpad<ALGO, false, 0>(reinterpret_cast<const __m128i*>(ctx[0]->state), reinterpret_cast<__m128i*>(ctx[0]->memory));
+
+    if (props.half_mem()) {
+        ctx[0]->first_half = true;
+    }
+    cn_explode_scratchpad<ALGO, false, 0>(ctx[0]);

    if (ALGO == Algorithm::CN_2) {
        if (ASM == Assembly::INTEL) {
@@ -915,7 +970,7 @@ inline void cryptonight_single_hash_asm(const uint8_t *__restrict__ input, size_
        ctx[0]->generated_code(ctx);
    }

-    cn_implode_scratchpad<ALGO, false, 0>(reinterpret_cast<const __m128i*>(ctx[0]->memory), reinterpret_cast<__m128i*>(ctx[0]->state));
+    cn_implode_scratchpad<ALGO, false, 0>(ctx[0]);
    keccakf(reinterpret_cast<uint64_t*>(ctx[0]->state), 24);
    extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
 }
@@ -937,8 +992,12 @@ inline void cryptonight_double_hash_asm(const uint8_t *__restrict__ input, size_
    keccak(input,        size, ctx[0]->state);
    keccak(input + size, size, ctx[1]->state);

-    cn_explode_scratchpad<ALGO, false, 0>(reinterpret_cast<const __m128i*>(ctx[0]->state), reinterpret_cast<__m128i*>(ctx[0]->memory));
-    cn_explode_scratchpad<ALGO, false, 0>(reinterpret_cast<const __m128i*>(ctx[1]->state), reinterpret_cast<__m128i*>(ctx[1]->memory));
+    if (props.half_mem()) {
+        ctx[0]->first_half = true;
+        ctx[1]->first_half = true;
+    }
+    cn_explode_scratchpad<ALGO, false, 0>(ctx[0]);
+    cn_explode_scratchpad<ALGO, false, 0>(ctx[1]);

    if (ALGO == Algorithm::CN_2) {
        cnv2_double_mainloop_sandybridge_asm(ctx);
@@ -977,8 +1036,8 @@ inline void cryptonight_double_hash_asm(const uint8_t *__restrict__ input, size_
        ctx[0]->generated_code(ctx);
    }

-    cn_implode_scratchpad<ALGO, false, 0>(reinterpret_cast<const __m128i*>(ctx[0]->memory), reinterpret_cast<__m128i*>(ctx[0]->state));
-    cn_implode_scratchpad<ALGO, false, 0>(reinterpret_cast<const __m128i*>(ctx[1]->memory), reinterpret_cast<__m128i*>(ctx[1]->state));
+    cn_implode_scratchpad<ALGO, false, 0>(ctx[0]);
+    cn_implode_scratchpad<ALGO, false, 0>(ctx[1]);

    keccakf(reinterpret_cast<uint64_t*>(ctx[0]->state), 24);
    keccakf(reinterpret_cast<uint64_t*>(ctx[1]->state), 24);
@@ -1029,8 +1088,12 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
    VARIANT4_RANDOM_MATH_INIT(0);
    VARIANT4_RANDOM_MATH_INIT(1);

-    cn_explode_scratchpad<ALGO, SOFT_AES, 0>(reinterpret_cast<const __m128i *>(h0), reinterpret_cast<__m128i *>(l0));
-    cn_explode_scratchpad<ALGO, SOFT_AES, 0>(reinterpret_cast<const __m128i *>(h1), reinterpret_cast<__m128i *>(l1));
+    if (props.half_mem()) {
+        ctx[0]->first_half = true;
+        ctx[1]->first_half = true;
+    }
+    cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[0]);
+    cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[1]);

    uint64_t al0 = h0[0] ^ h0[4];
    uint64_t al1 = h1[0] ^ h1[4];
@@ -1225,8 +1288,8 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
        bx10 = cx1;
    }

-    cn_implode_scratchpad<ALGO, SOFT_AES, 0>(reinterpret_cast<const __m128i *>(l0), reinterpret_cast<__m128i *>(h0));
-    cn_implode_scratchpad<ALGO, SOFT_AES, 0>(reinterpret_cast<const __m128i *>(l1), reinterpret_cast<__m128i *>(h1));
+    cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[0]);
+    cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[1]);

    keccakf(h0, 24);
    keccakf(h1, 24);
@@ -1236,6 +1299,198 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
 }


+static inline void cryptonight_monero_tweak_gr(uint64_t* mem_out, const uint8_t* l, uint64_t idx, __m128i ax0, __m128i bx0, __m128i cx)
+{
+    __m128i tmp = _mm_xor_si128(bx0, cx);
+    mem_out[0] = _mm_cvtsi128_si64(tmp);
+
+    tmp = _mm_castps_si128(_mm_movehl_ps(_mm_castsi128_ps(tmp), _mm_castsi128_ps(tmp)));
+    uint64_t vh = _mm_cvtsi128_si64(tmp);
+
+    mem_out[1] = vh ^ tweak1_table[static_cast<uint32_t>(vh) >> 24];
+}
+
+
+template<Algorithm::Id ALGO, bool SOFT_AES>
+void cryptonight_quad_hash_zen(const uint8_t* __restrict__ input, size_t size, uint8_t* __restrict__ output, cryptonight_ctx** __restrict__ ctx, uint64_t height)
+{
+    constexpr CnAlgo<ALGO> props;
+    constexpr size_t MASK = props.mask();
+    constexpr Algorithm::Id BASE = props.base();
+
+    if (BASE == Algorithm::CN_1 && size < 43) {
+        memset(output, 0, 64);
+        return;
+    }
+
+    keccak(input + size * 0, size, ctx[0]->state);
+    keccak(input + size * 1, size, ctx[1]->state);
+    keccak(input + size * 2, size, ctx[2]->state);
+    keccak(input + size * 3, size, ctx[3]->state);
+
+    uint8_t* l0 = ctx[0]->memory;
+    uint8_t* l1 = ctx[1]->memory;
+    uint8_t* l2 = ctx[2]->memory;
+    uint8_t* l3 = ctx[3]->memory;
+
+    uint64_t* h0 = reinterpret_cast<uint64_t*>(ctx[0]->state);
+    uint64_t* h1 = reinterpret_cast<uint64_t*>(ctx[1]->state);
+    uint64_t* h2 = reinterpret_cast<uint64_t*>(ctx[2]->state);
+    uint64_t* h3 = reinterpret_cast<uint64_t*>(ctx[3]->state);
+
+    VARIANT1_INIT(0);
+    VARIANT1_INIT(1);
+    VARIANT1_INIT(2);
+    VARIANT1_INIT(3);
+
+    if (props.half_mem()) {
+        ctx[0]->first_half = true;
+        ctx[1]->first_half = true;
+        ctx[2]->first_half = true;
+        ctx[3]->first_half = true;
+    }
+
+    cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[0]);
+    cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[1]);
+    cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[2]);
+    cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[3]);
+
+    uint64_t al0 = h0[0] ^ h0[4];
+    uint64_t al1 = h1[0] ^ h1[4];
+    uint64_t al2 = h2[0] ^ h2[4];
+    uint64_t al3 = h3[0] ^ h3[4];
+
+    uint64_t ah0 = h0[1] ^ h0[5];
+    uint64_t ah1 = h1[1] ^ h1[5];
+    uint64_t ah2 = h2[1] ^ h2[5];
+    uint64_t ah3 = h3[1] ^ h3[5];
+
+    __m128i bx00 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
+    __m128i bx10 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
+    __m128i bx20 = _mm_set_epi64x(h2[3] ^ h2[7], h2[2] ^ h2[6]);
+    __m128i bx30 = _mm_set_epi64x(h3[3] ^ h3[7], h3[2] ^ h3[6]);
+
+    uint64_t idx0 = al0;
+    uint64_t idx1 = al1;
+    uint64_t idx2 = al2;
+    uint64_t idx3 = al3;
+
+    __m128i cx0, cx1, cx2, cx3;
+
+    if (!SOFT_AES) {
+        cx0 = _mm_load_si128(reinterpret_cast<const __m128i*>(&l0[idx0 & MASK]));
+        cx1 = _mm_load_si128(reinterpret_cast<const __m128i*>(&l1[idx1 & MASK]));
+        cx2 = _mm_load_si128(reinterpret_cast<const __m128i*>(&l2[idx2 & MASK]));
+        cx3 = _mm_load_si128(reinterpret_cast<const __m128i*>(&l3[idx3 & MASK]));
+    }
+
+    for (size_t i = 0; i < props.iterations(); i++) {
+        const __m128i ax0 = _mm_set_epi64x(ah0, al0);
+        const __m128i ax1 = _mm_set_epi64x(ah1, al1);
+        const __m128i ax2 = _mm_set_epi64x(ah2, al2);
+        const __m128i ax3 = _mm_set_epi64x(ah3, al3);
+
+        if (SOFT_AES) {
+            cx0 = soft_aesenc(&l0[idx0 & MASK], ax0, reinterpret_cast<const uint32_t*>(saes_table));
+            cx1 = soft_aesenc(&l1[idx1 & MASK], ax1, reinterpret_cast<const uint32_t*>(saes_table));
+            cx2 = soft_aesenc(&l2[idx2 & MASK], ax2, reinterpret_cast<const uint32_t*>(saes_table));
+            cx3 = soft_aesenc(&l3[idx3 & MASK], ax3, reinterpret_cast<const uint32_t*>(saes_table));
+        }
+        else {
+            cx0 = _mm_aesenc_si128(cx0, ax0);
+            cx1 = _mm_aesenc_si128(cx1, ax1);
+            cx2 = _mm_aesenc_si128(cx2, ax2);
+            cx3 = _mm_aesenc_si128(cx3, ax3);
+            if (MASK > 131072) {
+                _mm_prefetch((const char*)(&l0[_mm_cvtsi128_si32(cx0) & MASK]), _MM_HINT_T0);
+                _mm_prefetch((const char*)(&l1[_mm_cvtsi128_si32(cx1) & MASK]), _MM_HINT_T0);
+                _mm_prefetch((const char*)(&l2[_mm_cvtsi128_si32(cx2) & MASK]), _MM_HINT_T0);
+                _mm_prefetch((const char*)(&l3[_mm_cvtsi128_si32(cx3) & MASK]), _MM_HINT_T0);
+            }
+        }
+
+        cryptonight_monero_tweak_gr((uint64_t*)&l0[idx0 & MASK], l0, idx0 & MASK, ax0, bx00, cx0);
+        cryptonight_monero_tweak_gr((uint64_t*)&l1[idx1 & MASK], l1, idx1 & MASK, ax1, bx10, cx1);
+        cryptonight_monero_tweak_gr((uint64_t*)&l2[idx2 & MASK], l2, idx2 & MASK, ax2, bx20, cx2);
+        cryptonight_monero_tweak_gr((uint64_t*)&l3[idx3 & MASK], l3, idx3 & MASK, ax3, bx30, cx3);
+
+        idx0 = _mm_cvtsi128_si64(cx0);
+        idx1 = _mm_cvtsi128_si64(cx1);
+        idx2 = _mm_cvtsi128_si64(cx2);
+        idx3 = _mm_cvtsi128_si64(cx3);
+
+        uint64_t hi, lo, cl, ch;
+
+        cl = ((uint64_t*)&l0[idx0 & MASK])[0];
+        ch = ((uint64_t*)&l0[idx0 & MASK])[1];
+        lo = __umul128(idx0, cl, &hi);
+        al0 += hi;
+        ah0 += lo;
+        ((uint64_t*)&l0[idx0 & MASK])[0] = al0;
+        ((uint64_t*)&l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0;
+        al0 ^= cl;
+        ah0 ^= ch;
+        idx0 = al0;
+        bx00 = cx0;
+        if (!SOFT_AES) cx0 = _mm_load_si128(reinterpret_cast<const __m128i*>(&l0[idx0 & MASK]));
+
+        cl = ((uint64_t*)&l1[idx1 & MASK])[0];
+        ch = ((uint64_t*)&l1[idx1 & MASK])[1];
+        lo = __umul128(idx1, cl, &hi);
+        al1 += hi;
+        ah1 += lo;
+        ((uint64_t*)&l1[idx1 & MASK])[0] = al1;
+        ((uint64_t*)&l1[idx1 & MASK])[1] = ah1 ^ tweak1_2_1;
+        al1 ^= cl;
+        ah1 ^= ch;
+        idx1 = al1;
+        bx10 = cx1;
+        if (!SOFT_AES) cx1 = _mm_load_si128(reinterpret_cast<const __m128i*>(&l1[idx1 & MASK]));
+
+        cl = ((uint64_t*)&l2[idx2 & MASK])[0];
+        ch = ((uint64_t*)&l2[idx2 & MASK])[1];
+        lo = __umul128(idx2, cl, &hi);
+        al2 += hi;
+        ah2 += lo;
+        ((uint64_t*)&l2[idx2 & MASK])[0] = al2;
+        ((uint64_t*)&l2[idx2 & MASK])[1] = ah2 ^ tweak1_2_2;
+        al2 ^= cl;
+        ah2 ^= ch;
+        idx2 = al2;
+        bx20 = cx2;
+        if (!SOFT_AES) cx2 = _mm_load_si128(reinterpret_cast<const __m128i*>(&l2[idx2 & MASK]));
+
+        cl = ((uint64_t*)&l3[idx3 & MASK])[0];
+        ch = ((uint64_t*)&l3[idx3 & MASK])[1];
+        lo = __umul128(idx3, cl, &hi);
+        al3 += hi;
+        ah3 += lo;
+        ((uint64_t*)&l3[idx3 & MASK])[0] = al3;
+        ((uint64_t*)&l3[idx3 & MASK])[1] = ah3 ^ tweak1_2_3;
+        al3 ^= cl;
+        ah3 ^= ch;
+        idx3 = al3;
+        bx30 = cx3;
+        if (!SOFT_AES) cx3 = _mm_load_si128(reinterpret_cast<const __m128i*>(&l3[idx3 & MASK]));
+    }
+
+    cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[0]);
+    cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[1]);
+    cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[2]);
+    cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[3]);
+
+    keccakf(h0, 24);
+    keccakf(h1, 24);
+    keccakf(h2, 24);
+    keccakf(h3, 24);
+
+    extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
+    extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, output + 32);
+    extra_hashes[ctx[2]->state[0] & 3](ctx[2]->state, 200, output + 64);
+    extra_hashes[ctx[3]->state[0] & 3](ctx[3]->state, 200, output + 96);
+}
+
+
 #define CN_STEP1(a, b0, b1, c, l, ptr, idx, conc_var) \
    ptr = reinterpret_cast<__m128i*>(&l[idx & MASK]); \
    c = _mm_load_si128(ptr);                          \
@@ -1371,7 +1626,10 @@ inline void cryptonight_triple_hash(const uint8_t *__restrict__ input, size_t si

    for (size_t i = 0; i < 3; i++) {
        keccak(input + size * i, size, ctx[i]->state);
-        cn_explode_scratchpad<ALGO, SOFT_AES, 0>(reinterpret_cast<const __m128i*>(ctx[i]->state), reinterpret_cast<__m128i*>(ctx[i]->memory));
+        if (props.half_mem()) {
+            ctx[i]->first_half = true;
+        }
+        cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[i]);
    }

    uint8_t* l0  = ctx[0]->memory;
@@ -1416,7 +1674,7 @@ inline void cryptonight_triple_hash(const uint8_t *__restrict__ input, size_t si
    }

    for (size_t i = 0; i < 3; i++) {
-        cn_implode_scratchpad<ALGO, SOFT_AES, 0>(reinterpret_cast<const __m128i*>(ctx[i]->memory), reinterpret_cast<__m128i*>(ctx[i]->state));
+        cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[i]);
        keccakf(reinterpret_cast<uint64_t*>(ctx[i]->state), 24);
        extra_hashes[ctx[i]->state[0] & 3](ctx[i]->state, 200, output + 32 * i);
    }
@@ -1426,6 +1684,14 @@ inline void cryptonight_triple_hash(const uint8_t *__restrict__ input, size_t si
 template<Algorithm::Id ALGO, bool SOFT_AES>
 inline void cryptonight_quad_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height)
 {
+    const auto arch = Cpu::info()->arch();
+    if ((arch >= ICpuInfo::ARCH_ZEN) && (arch <= ICpuInfo::ARCH_ZEN3)) {
+        if ((ALGO == Algorithm::CN_GR_0) || (ALGO == Algorithm::CN_GR_1) || (ALGO == Algorithm::CN_GR_2) || (ALGO == Algorithm::CN_GR_3) || (ALGO == Algorithm::CN_GR_4) || (ALGO == Algorithm::CN_GR_5)) {
+            cryptonight_quad_hash_zen<ALGO, SOFT_AES>(input, size, output, ctx, height);
+            return;
+        }
+    }
+
    constexpr CnAlgo<ALGO> props;
    constexpr size_t MASK        = props.mask();
    constexpr Algorithm::Id BASE = props.base();
@@ -1445,7 +1711,10 @@ inline void cryptonight_quad_hash(const uint8_t *__restrict__ input, size_t size

    for (size_t i = 0; i < 4; i++) {
        keccak(input + size * i, size, ctx[i]->state);
-        cn_explode_scratchpad<ALGO, SOFT_AES, 0>(reinterpret_cast<const __m128i*>(ctx[i]->state), reinterpret_cast<__m128i*>(ctx[i]->memory));
+        if (props.half_mem()) {
+            ctx[i]->first_half = true;
+        }
+        cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[i]);
    }

    uint8_t* l0  = ctx[0]->memory;
@@ -1498,7 +1767,7 @@ inline void cryptonight_quad_hash(const uint8_t *__restrict__ input, size_t size
    }

    for (size_t i = 0; i < 4; i++) {
-        cn_implode_scratchpad<ALGO, SOFT_AES, 0>(reinterpret_cast<const __m128i*>(ctx[i]->memory), reinterpret_cast<__m128i*>(ctx[i]->state));
+        cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[i]);
        keccakf(reinterpret_cast<uint64_t*>(ctx[i]->state), 24);
        extra_hashes[ctx[i]->state[0] & 3](ctx[i]->state, 200, output + 32 * i);
    }
@@ -1527,7 +1796,10 @@ inline void cryptonight_penta_hash(const uint8_t *__restrict__ input, size_t siz

    for (size_t i = 0; i < 5; i++) {
        keccak(input + size * i, size, ctx[i]->state);
-        cn_explode_scratchpad<ALGO, SOFT_AES, 0>(reinterpret_cast<const __m128i*>(ctx[i]->state), reinterpret_cast<__m128i*>(ctx[i]->memory));
+        if (props.half_mem()) {
+            ctx[i]->first_half = true;
+        }
+        cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[i]);
    }

    uint8_t* l0  = ctx[0]->memory;
@@ -1588,7 +1860,7 @@ inline void cryptonight_penta_hash(const uint8_t *__restrict__ input, size_t siz
    }

    for (size_t i = 0; i < 5; i++) {
-        cn_implode_scratchpad<ALGO, SOFT_AES, 0>(reinterpret_cast<const __m128i*>(ctx[i]->memory), reinterpret_cast<__m128i*>(ctx[i]->state));
+        cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[i]);
        keccakf(reinterpret_cast<uint64_t*>(ctx[i]->state), 24);
        extra_hashes[ctx[i]->state[0] & 3](ctx[i]->state, 200, output + 32 * i);
    }
--- a/src/crypto/ghostrider/CMakeLists.txt
+++ b/src/crypto/ghostrider/CMakeLists.txt
@@ -0,0 +1,59 @@
+cmake_minimum_required(VERSION 2.8.12)
+project(GhostRider)
+
+set(HEADERS
+    sph_types.h
+    sph_blake.h
+    sph_bmw.h
+    sph_cubehash.h
+    sph_echo.h
+    sph_fugue.h
+    sph_groestl.h
+    sph_hamsi.h
+    sph_jh.h
+    sph_keccak.h
+    sph_luffa.h
+    sph_sha2.h
+    sph_shabal.h
+    sph_shavite.h
+    sph_simd.h
+    sph_skein.h
+    sph_whirlpool.h
+    ghostrider.h
+)
+
+set(SOURCES
+    sph_blake.c
+    sph_bmw.c
+    sph_cubehash.c
+    sph_echo.c
+    sph_fugue.c
+    sph_groestl.c
+    sph_hamsi.c
+    sph_jh.c
+    sph_keccak.c
+    sph_luffa.c
+    sph_shabal.c
+    sph_shavite.c
+    sph_simd.c
+    sph_sha2.c
+    sph_skein.c
+    sph_whirlpool.c
+    ghostrider.cpp
+)
+
+if (CMAKE_C_COMPILER_ID MATCHES GNU)
+    # gcc 11.2.0 crashes with -ftree-vrp
+    set_source_files_properties(sph_jh.c PROPERTIES COMPILE_FLAGS "-Ofast -fno-tree-vrp")
+
+    # gcc 11.2.0 creates incorrect code with -O3
+    set_source_files_properties(sph_sha2.c PROPERTIES COMPILE_FLAGS "-O2")
+
+    set_source_files_properties(sph_luffa.c PROPERTIES COMPILE_FLAGS "-Ofast -Wno-unused-const-variable")
+endif()
+
+include_directories(.)
+include_directories(../..)
+include_directories(${UV_INCLUDE_DIR})
+
+add_library(ghostrider STATIC ${HEADERS} ${SOURCES})
--- a/src/crypto/ghostrider/README.md
+++ b/src/crypto/ghostrider/README.md
@@ -0,0 +1,39 @@
+# GhostRider (Raptoreum) release notes
+
+**XMRig** supports GhostRider algorithm starting from version **v6.16.0**.
+
+No tuning is required - auto-config works well on most CPUs!
+
+**Note for Windows users: MSVC binary is ~5% faster than GCC binary!**
+
+### Sample command line (non-SSL port)
+```
+xmrig -a gr -o raptoreumemporium.com:3008 -u WALLET_ADDRESS
+```
+
+### Sample command line (SSL port)
+```
+xmrig -a gr -o us.flockpool.com:5555 --tls -u WALLET_ADDRESS
+```
+
+You can use **rtm_ghostrider_example.cmd** as a template and put pool URL and your wallet address there. The general XMRig documentation is available [here](https://xmrig.com/docs/miner).
+
+## Performance
+
+While individual algorithm implementations are a bit unoptimized, XMRig achieves higher hashrates by employing better auto-config and more fine-grained thread scheduling: it can calculate a single batch of hashes using 2 threads for parts that don't require much cache. For example, on a typical Intel CPU (2 MB cache per core) it will use 1 thread per core for cn/fast, and 2 threads per core for other Cryptonight variants while calculating the same batch of hashes, always achieving more than 50% CPU load.
+
+For the same reason, XMRig can sometimes use less than 100% CPU on Ryzen 3000/5000 CPUs if it finds that running 1 thread per core is faster for some Cryptonight variants on your system. Also, this is why it reports using only half the threads at startup - it's actually 2 threads per each reported thread.
+
+**Windows** (detailed results [here](https://imgur.com/a/GCjEWpl))
+CPU|cpuminer-gr-avx2 (tuned), h/s|XMRig (MSVC build), h/s|Speedup
+-|-|-|-
+AMD Ryzen 7 4700U|632.6|731|+15.5%
+Intel Core i7-2600|496.4|533.6|+7.5%
+AMD Ryzen 7 3700X @ 4.1 GHz|2453.0|2469.1|+0.65%
+AMD Ryzen 5 5600X @ 4.65 GHz|2112.6|2221.2|+5.1%
+
+**Linux** (tested by **Delgon**, detailed results [here](https://cdn.discordapp.com/attachments/604375870236524574/913167614749048872/unknown.png))
+CPU|cpuminer-gr-avx2 (tuned), h/s|XMRig (GCC build), h/s|Speedup
+-|-|-|-
+AMD Ryzen 9 3900X|3746.51|3604.89|-3.78%
+2xIntel Xeon E5-2698v3|2563.4|2638.38|+2.925%
--- a/src/crypto/ghostrider/aes_helper.c
+++ b/src/crypto/ghostrider/aes_helper.c
@@ -0,0 +1,392 @@
+/* $Id: aes_helper.c 220 2010-06-09 09:21:50Z tp $ */
+/*
+ * AES tables. This file is not meant to be compiled by itself; it
+ * is included by some hash function implementations. It contains
+ * the precomputed tables and helper macros for evaluating an AES
+ * round, optionally with a final XOR with a subkey.
+ *
+ * By default, this file defines the tables and macros for little-endian
+ * processing (i.e. it is assumed that the input bytes have been read
+ * from memory and assembled with the little-endian convention). If
+ * the 'AES_BIG_ENDIAN' macro is defined (to a non-zero integer value)
+ * when this file is included, then the tables and macros for big-endian
+ * processing are defined instead. The big-endian tables and macros have
+ * names distinct from the little-endian tables and macros, hence it is
+ * possible to have both simultaneously, by including this file twice
+ * (with and without the AES_BIG_ENDIAN macro).
+ *
+ * ==========================(LICENSE BEGIN)============================
+ *
+ * Copyright (c) 2007-2010  Projet RNRT SAPHIR
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * ===========================(LICENSE END)=============================
+ *
+ * @author   Thomas Pornin <thomas.pornin@cryptolog.com>
+ */
+
+#include "sph_types.h"
+#ifdef __cplusplus
+extern "C"{
+#endif
+#if AES_BIG_ENDIAN
+
+#define AESx(x)   ( ((SPH_C32(x) >> 24) & SPH_C32(0x000000FF)) \
+                  | ((SPH_C32(x) >>  8) & SPH_C32(0x0000FF00)) \
+                  | ((SPH_C32(x) <<  8) & SPH_C32(0x00FF0000)) \
+                  | ((SPH_C32(x) << 24) & SPH_C32(0xFF000000)))
+
+#define AES0      AES0_BE
+#define AES1      AES1_BE
+#define AES2      AES2_BE
+#define AES3      AES3_BE
+
+#define AES_ROUND_BE(X0, X1, X2, X3, K0, K1, K2, K3, Y0, Y1, Y2, Y3)   do { \
+		(Y0) = AES0[((X0) >> 24) & 0xFF] \
+			^ AES1[((X1) >> 16) & 0xFF] \
+			^ AES2[((X2) >> 8) & 0xFF] \
+			^ AES3[(X3) & 0xFF] ^ (K0); \
+		(Y1) = AES0[((X1) >> 24) & 0xFF] \
+			^ AES1[((X2) >> 16) & 0xFF] \
+			^ AES2[((X3) >> 8) & 0xFF] \
+			^ AES3[(X0) & 0xFF] ^ (K1); \
+		(Y2) = AES0[((X2) >> 24) & 0xFF] \
+			^ AES1[((X3) >> 16) & 0xFF] \
+			^ AES2[((X0) >> 8) & 0xFF] \
+			^ AES3[(X1) & 0xFF] ^ (K2); \
+		(Y3) = AES0[((X3) >> 24) & 0xFF] \
+			^ AES1[((X0) >> 16) & 0xFF] \
+			^ AES2[((X1) >> 8) & 0xFF] \
+			^ AES3[(X2) & 0xFF] ^ (K3); \
+	} while (0)
+
+#define AES_ROUND_NOKEY_BE(X0, X1, X2, X3, Y0, Y1, Y2, Y3) \
+	AES_ROUND_BE(X0, X1, X2, X3, 0, 0, 0, 0, Y0, Y1, Y2, Y3)
+
+#else
+
+#define AESx(x)   SPH_C32(x)
+#define AES0      AES0_LE
+#define AES1      AES1_LE
+#define AES2      AES2_LE
+#define AES3      AES3_LE
+
+#define AES_ROUND_LE(X0, X1, X2, X3, K0, K1, K2, K3, Y0, Y1, Y2, Y3)   do { \
+		(Y0) = AES0[(X0) & 0xFF] \
+			^ AES1[((X1) >> 8) & 0xFF] \
+			^ AES2[((X2) >> 16) & 0xFF] \
+			^ AES3[((X3) >> 24) & 0xFF] ^ (K0); \
+		(Y1) = AES0[(X1) & 0xFF] \
+			^ AES1[((X2) >> 8) & 0xFF] \
+			^ AES2[((X3) >> 16) & 0xFF] \
+			^ AES3[((X0) >> 24) & 0xFF] ^ (K1); \
+		(Y2) = AES0[(X2) & 0xFF] \
+			^ AES1[((X3) >> 8) & 0xFF] \
+			^ AES2[((X0) >> 16) & 0xFF] \
+			^ AES3[((X1) >> 24) & 0xFF] ^ (K2); \
+		(Y3) = AES0[(X3) & 0xFF] \
+			^ AES1[((X0) >> 8) & 0xFF] \
+			^ AES2[((X1) >> 16) & 0xFF] \
+			^ AES3[((X2) >> 24) & 0xFF] ^ (K3); \
+	} while (0)
+
+#define AES_ROUND_NOKEY_LE(X0, X1, X2, X3, Y0, Y1, Y2, Y3) \
+	AES_ROUND_LE(X0, X1, X2, X3, 0, 0, 0, 0, Y0, Y1, Y2, Y3)
+
+#endif
+
+/*
+ * The AES*[] tables allow us to perform a fast evaluation of an AES
+ * round; table AESi[] combines SubBytes for a byte at row i, and
+ * MixColumns for the column where that byte goes after ShiftRows.
+ */
+
+static const sph_u32 AES0[256] = {
+	AESx(0xA56363C6), AESx(0x847C7CF8), AESx(0x997777EE), AESx(0x8D7B7BF6),
+	AESx(0x0DF2F2FF), AESx(0xBD6B6BD6), AESx(0xB16F6FDE), AESx(0x54C5C591),
+	AESx(0x50303060), AESx(0x03010102), AESx(0xA96767CE), AESx(0x7D2B2B56),
+	AESx(0x19FEFEE7), AESx(0x62D7D7B5), AESx(0xE6ABAB4D), AESx(0x9A7676EC),
+	AESx(0x45CACA8F), AESx(0x9D82821F), AESx(0x40C9C989), AESx(0x877D7DFA),
+	AESx(0x15FAFAEF), AESx(0xEB5959B2), AESx(0xC947478E), AESx(0x0BF0F0FB),
+	AESx(0xECADAD41), AESx(0x67D4D4B3), AESx(0xFDA2A25F), AESx(0xEAAFAF45),
+	AESx(0xBF9C9C23), AESx(0xF7A4A453), AESx(0x967272E4), AESx(0x5BC0C09B),
+	AESx(0xC2B7B775), AESx(0x1CFDFDE1), AESx(0xAE93933D), AESx(0x6A26264C),
+	AESx(0x5A36366C), AESx(0x413F3F7E), AESx(0x02F7F7F5), AESx(0x4FCCCC83),
+	AESx(0x5C343468), AESx(0xF4A5A551), AESx(0x34E5E5D1), AESx(0x08F1F1F9),
+	AESx(0x937171E2), AESx(0x73D8D8AB), AESx(0x53313162), AESx(0x3F15152A),
+	AESx(0x0C040408), AESx(0x52C7C795), AESx(0x65232346), AESx(0x5EC3C39D),
+	AESx(0x28181830), AESx(0xA1969637), AESx(0x0F05050A), AESx(0xB59A9A2F),
+	AESx(0x0907070E), AESx(0x36121224), AESx(0x9B80801B), AESx(0x3DE2E2DF),
+	AESx(0x26EBEBCD), AESx(0x6927274E), AESx(0xCDB2B27F), AESx(0x9F7575EA),
+	AESx(0x1B090912), AESx(0x9E83831D), AESx(0x742C2C58), AESx(0x2E1A1A34),
+	AESx(0x2D1B1B36), AESx(0xB26E6EDC), AESx(0xEE5A5AB4), AESx(0xFBA0A05B),
+	AESx(0xF65252A4), AESx(0x4D3B3B76), AESx(0x61D6D6B7), AESx(0xCEB3B37D),
+	AESx(0x7B292952), AESx(0x3EE3E3DD), AESx(0x712F2F5E), AESx(0x97848413),
+	AESx(0xF55353A6), AESx(0x68D1D1B9), AESx(0x00000000), AESx(0x2CEDEDC1),
+	AESx(0x60202040), AESx(0x1FFCFCE3), AESx(0xC8B1B179), AESx(0xED5B5BB6),
+	AESx(0xBE6A6AD4), AESx(0x46CBCB8D), AESx(0xD9BEBE67), AESx(0x4B393972),
+	AESx(0xDE4A4A94), AESx(0xD44C4C98), AESx(0xE85858B0), AESx(0x4ACFCF85),
+	AESx(0x6BD0D0BB), AESx(0x2AEFEFC5), AESx(0xE5AAAA4F), AESx(0x16FBFBED),
+	AESx(0xC5434386), AESx(0xD74D4D9A), AESx(0x55333366), AESx(0x94858511),
+	AESx(0xCF45458A), AESx(0x10F9F9E9), AESx(0x06020204), AESx(0x817F7FFE),
+	AESx(0xF05050A0), AESx(0x443C3C78), AESx(0xBA9F9F25), AESx(0xE3A8A84B),
+	AESx(0xF35151A2), AESx(0xFEA3A35D), AESx(0xC0404080), AESx(0x8A8F8F05),
+	AESx(0xAD92923F), AESx(0xBC9D9D21), AESx(0x48383870), AESx(0x04F5F5F1),
+	AESx(0xDFBCBC63), AESx(0xC1B6B677), AESx(0x75DADAAF), AESx(0x63212142),
+	AESx(0x30101020), AESx(0x1AFFFFE5), AESx(0x0EF3F3FD), AESx(0x6DD2D2BF),
+	AESx(0x4CCDCD81), AESx(0x140C0C18), AESx(0x35131326), AESx(0x2FECECC3),
+	AESx(0xE15F5FBE), AESx(0xA2979735), AESx(0xCC444488), AESx(0x3917172E),
+	AESx(0x57C4C493), AESx(0xF2A7A755), AESx(0x827E7EFC), AESx(0x473D3D7A),
+	AESx(0xAC6464C8), AESx(0xE75D5DBA), AESx(0x2B191932), AESx(0x957373E6),
+	AESx(0xA06060C0), AESx(0x98818119), AESx(0xD14F4F9E), AESx(0x7FDCDCA3),
+	AESx(0x66222244), AESx(0x7E2A2A54), AESx(0xAB90903B), AESx(0x8388880B),
+	AESx(0xCA46468C), AESx(0x29EEEEC7), AESx(0xD3B8B86B), AESx(0x3C141428),
+	AESx(0x79DEDEA7), AESx(0xE25E5EBC), AESx(0x1D0B0B16), AESx(0x76DBDBAD),
+	AESx(0x3BE0E0DB), AESx(0x56323264), AESx(0x4E3A3A74), AESx(0x1E0A0A14),
+	AESx(0xDB494992), AESx(0x0A06060C), AESx(0x6C242448), AESx(0xE45C5CB8),
+	AESx(0x5DC2C29F), AESx(0x6ED3D3BD), AESx(0xEFACAC43), AESx(0xA66262C4),
+	AESx(0xA8919139), AESx(0xA4959531), AESx(0x37E4E4D3), AESx(0x8B7979F2),
+	AESx(0x32E7E7D5), AESx(0x43C8C88B), AESx(0x5937376E), AESx(0xB76D6DDA),
+	AESx(0x8C8D8D01), AESx(0x64D5D5B1), AESx(0xD24E4E9C), AESx(0xE0A9A949),
+	AESx(0xB46C6CD8), AESx(0xFA5656AC), AESx(0x07F4F4F3), AESx(0x25EAEACF),
+	AESx(0xAF6565CA), AESx(0x8E7A7AF4), AESx(0xE9AEAE47), AESx(0x18080810),
+	AESx(0xD5BABA6F), AESx(0x887878F0), AESx(0x6F25254A), AESx(0x722E2E5C),
+	AESx(0x241C1C38), AESx(0xF1A6A657), AESx(0xC7B4B473), AESx(0x51C6C697),
+	AESx(0x23E8E8CB), AESx(0x7CDDDDA1), AESx(0x9C7474E8), AESx(0x211F1F3E),
+	AESx(0xDD4B4B96), AESx(0xDCBDBD61), AESx(0x868B8B0D), AESx(0x858A8A0F),
+	AESx(0x907070E0), AESx(0x423E3E7C), AESx(0xC4B5B571), AESx(0xAA6666CC),
+	AESx(0xD8484890), AESx(0x05030306), AESx(0x01F6F6F7), AESx(0x120E0E1C),
+	AESx(0xA36161C2), AESx(0x5F35356A), AESx(0xF95757AE), AESx(0xD0B9B969),
+	AESx(0x91868617), AESx(0x58C1C199), AESx(0x271D1D3A), AESx(0xB99E9E27),
+	AESx(0x38E1E1D9), AESx(0x13F8F8EB), AESx(0xB398982B), AESx(0x33111122),
+	AESx(0xBB6969D2), AESx(0x70D9D9A9), AESx(0x898E8E07), AESx(0xA7949433),
+	AESx(0xB69B9B2D), AESx(0x221E1E3C), AESx(0x92878715), AESx(0x20E9E9C9),
+	AESx(0x49CECE87), AESx(0xFF5555AA), AESx(0x78282850), AESx(0x7ADFDFA5),
+	AESx(0x8F8C8C03), AESx(0xF8A1A159), AESx(0x80898909), AESx(0x170D0D1A),
+	AESx(0xDABFBF65), AESx(0x31E6E6D7), AESx(0xC6424284), AESx(0xB86868D0),
+	AESx(0xC3414182), AESx(0xB0999929), AESx(0x772D2D5A), AESx(0x110F0F1E),
+	AESx(0xCBB0B07B), AESx(0xFC5454A8), AESx(0xD6BBBB6D), AESx(0x3A16162C)
+};
+
+static const sph_u32 AES1[256] = {
+	AESx(0x6363C6A5), AESx(0x7C7CF884), AESx(0x7777EE99), AESx(0x7B7BF68D),
+	AESx(0xF2F2FF0D), AESx(0x6B6BD6BD), AESx(0x6F6FDEB1), AESx(0xC5C59154),
+	AESx(0x30306050), AESx(0x01010203), AESx(0x6767CEA9), AESx(0x2B2B567D),
+	AESx(0xFEFEE719), AESx(0xD7D7B562), AESx(0xABAB4DE6), AESx(0x7676EC9A),
+	AESx(0xCACA8F45), AESx(0x82821F9D), AESx(0xC9C98940), AESx(0x7D7DFA87),
+	AESx(0xFAFAEF15), AESx(0x5959B2EB), AESx(0x47478EC9), AESx(0xF0F0FB0B),
+	AESx(0xADAD41EC), AESx(0xD4D4B367), AESx(0xA2A25FFD), AESx(0xAFAF45EA),
+	AESx(0x9C9C23BF), AESx(0xA4A453F7), AESx(0x7272E496), AESx(0xC0C09B5B),
+	AESx(0xB7B775C2), AESx(0xFDFDE11C), AESx(0x93933DAE), AESx(0x26264C6A),
+	AESx(0x36366C5A), AESx(0x3F3F7E41), AESx(0xF7F7F502), AESx(0xCCCC834F),
+	AESx(0x3434685C), AESx(0xA5A551F4), AESx(0xE5E5D134), AESx(0xF1F1F908),
+	AESx(0x7171E293), AESx(0xD8D8AB73), AESx(0x31316253), AESx(0x15152A3F),
+	AESx(0x0404080C), AESx(0xC7C79552), AESx(0x23234665), AESx(0xC3C39D5E),
+	AESx(0x18183028), AESx(0x969637A1), AESx(0x05050A0F), AESx(0x9A9A2FB5),
+	AESx(0x07070E09), AESx(0x12122436), AESx(0x80801B9B), AESx(0xE2E2DF3D),
+	AESx(0xEBEBCD26), AESx(0x27274E69), AESx(0xB2B27FCD), AESx(0x7575EA9F),
+	AESx(0x0909121B), AESx(0x83831D9E), AESx(0x2C2C5874), AESx(0x1A1A342E),
+	AESx(0x1B1B362D), AESx(0x6E6EDCB2), AESx(0x5A5AB4EE), AESx(0xA0A05BFB),
+	AESx(0x5252A4F6), AESx(0x3B3B764D), AESx(0xD6D6B761), AESx(0xB3B37DCE),
+	AESx(0x2929527B), AESx(0xE3E3DD3E), AESx(0x2F2F5E71), AESx(0x84841397),
+	AESx(0x5353A6F5), AESx(0xD1D1B968), AESx(0x00000000), AESx(0xEDEDC12C),
+	AESx(0x20204060), AESx(0xFCFCE31F), AESx(0xB1B179C8), AESx(0x5B5BB6ED),
+	AESx(0x6A6AD4BE), AESx(0xCBCB8D46), AESx(0xBEBE67D9), AESx(0x3939724B),
+	AESx(0x4A4A94DE), AESx(0x4C4C98D4), AESx(0x5858B0E8), AESx(0xCFCF854A),
+	AESx(0xD0D0BB6B), AESx(0xEFEFC52A), AESx(0xAAAA4FE5), AESx(0xFBFBED16),
+	AESx(0x434386C5), AESx(0x4D4D9AD7), AESx(0x33336655), AESx(0x85851194),
+	AESx(0x45458ACF), AESx(0xF9F9E910), AESx(0x02020406), AESx(0x7F7FFE81),
+	AESx(0x5050A0F0), AESx(0x3C3C7844), AESx(0x9F9F25BA), AESx(0xA8A84BE3),
+	AESx(0x5151A2F3), AESx(0xA3A35DFE), AESx(0x404080C0), AESx(0x8F8F058A),
+	AESx(0x92923FAD), AESx(0x9D9D21BC), AESx(0x38387048), AESx(0xF5F5F104),
+	AESx(0xBCBC63DF), AESx(0xB6B677C1), AESx(0xDADAAF75), AESx(0x21214263),
+	AESx(0x10102030), AESx(0xFFFFE51A), AESx(0xF3F3FD0E), AESx(0xD2D2BF6D),
+	AESx(0xCDCD814C), AESx(0x0C0C1814), AESx(0x13132635), AESx(0xECECC32F),
+	AESx(0x5F5FBEE1), AESx(0x979735A2), AESx(0x444488CC), AESx(0x17172E39),
+	AESx(0xC4C49357), AESx(0xA7A755F2), AESx(0x7E7EFC82), AESx(0x3D3D7A47),
+	AESx(0x6464C8AC), AESx(0x5D5DBAE7), AESx(0x1919322B), AESx(0x7373E695),
+	AESx(0x6060C0A0), AESx(0x81811998), AESx(0x4F4F9ED1), AESx(0xDCDCA37F),
+	AESx(0x22224466), AESx(0x2A2A547E), AESx(0x90903BAB), AESx(0x88880B83),
+	AESx(0x46468CCA), AESx(0xEEEEC729), AESx(0xB8B86BD3), AESx(0x1414283C),
+	AESx(0xDEDEA779), AESx(0x5E5EBCE2), AESx(0x0B0B161D), AESx(0xDBDBAD76),
+	AESx(0xE0E0DB3B), AESx(0x32326456), AESx(0x3A3A744E), AESx(0x0A0A141E),
+	AESx(0x494992DB), AESx(0x06060C0A), AESx(0x2424486C), AESx(0x5C5CB8E4),
+	AESx(0xC2C29F5D), AESx(0xD3D3BD6E), AESx(0xACAC43EF), AESx(0x6262C4A6),
+	AESx(0x919139A8), AESx(0x959531A4), AESx(0xE4E4D337), AESx(0x7979F28B),
+	AESx(0xE7E7D532), AESx(0xC8C88B43), AESx(0x37376E59), AESx(0x6D6DDAB7),
+	AESx(0x8D8D018C), AESx(0xD5D5B164), AESx(0x4E4E9CD2), AESx(0xA9A949E0),
+	AESx(0x6C6CD8B4), AESx(0x5656ACFA), AESx(0xF4F4F307), AESx(0xEAEACF25),
+	AESx(0x6565CAAF), AESx(0x7A7AF48E), AESx(0xAEAE47E9), AESx(0x08081018),
+	AESx(0xBABA6FD5), AESx(0x7878F088), AESx(0x25254A6F), AESx(0x2E2E5C72),
+	AESx(0x1C1C3824), AESx(0xA6A657F1), AESx(0xB4B473C7), AESx(0xC6C69751),
+	AESx(0xE8E8CB23), AESx(0xDDDDA17C), AESx(0x7474E89C), AESx(0x1F1F3E21),
+	AESx(0x4B4B96DD), AESx(0xBDBD61DC), AESx(0x8B8B0D86), AESx(0x8A8A0F85),
+	AESx(0x7070E090), AESx(0x3E3E7C42), AESx(0xB5B571C4), AESx(0x6666CCAA),
+	AESx(0x484890D8), AESx(0x03030605), AESx(0xF6F6F701), AESx(0x0E0E1C12),
+	AESx(0x6161C2A3), AESx(0x35356A5F), AESx(0x5757AEF9), AESx(0xB9B969D0),
+	AESx(0x86861791), AESx(0xC1C19958), AESx(0x1D1D3A27), AESx(0x9E9E27B9),
+	AESx(0xE1E1D938), AESx(0xF8F8EB13), AESx(0x98982BB3), AESx(0x11112233),
+	AESx(0x6969D2BB), AESx(0xD9D9A970), AESx(0x8E8E0789), AESx(0x949433A7),
+	AESx(0x9B9B2DB6), AESx(0x1E1E3C22), AESx(0x87871592), AESx(0xE9E9C920),
+	AESx(0xCECE8749), AESx(0x5555AAFF), AESx(0x28285078), AESx(0xDFDFA57A),
+	AESx(0x8C8C038F), AESx(0xA1A159F8), AESx(0x89890980), AESx(0x0D0D1A17),
+	AESx(0xBFBF65DA), AESx(0xE6E6D731), AESx(0x424284C6), AESx(0x6868D0B8),
+	AESx(0x414182C3), AESx(0x999929B0), AESx(0x2D2D5A77), AESx(0x0F0F1E11),
+	AESx(0xB0B07BCB), AESx(0x5454A8FC), AESx(0xBBBB6DD6), AESx(0x16162C3A)
+};
+
+static const sph_u32 AES2[256] = {
+	AESx(0x63C6A563), AESx(0x7CF8847C), AESx(0x77EE9977), AESx(0x7BF68D7B),
+	AESx(0xF2FF0DF2), AESx(0x6BD6BD6B), AESx(0x6FDEB16F), AESx(0xC59154C5),
+	AESx(0x30605030), AESx(0x01020301), AESx(0x67CEA967), AESx(0x2B567D2B),
+	AESx(0xFEE719FE), AESx(0xD7B562D7), AESx(0xAB4DE6AB), AESx(0x76EC9A76),
+	AESx(0xCA8F45CA), AESx(0x821F9D82), AESx(0xC98940C9), AESx(0x7DFA877D),
+	AESx(0xFAEF15FA), AESx(0x59B2EB59), AESx(0x478EC947), AESx(0xF0FB0BF0),
+	AESx(0xAD41ECAD), AESx(0xD4B367D4), AESx(0xA25FFDA2), AESx(0xAF45EAAF),
+	AESx(0x9C23BF9C), AESx(0xA453F7A4), AESx(0x72E49672), AESx(0xC09B5BC0),
+	AESx(0xB775C2B7), AESx(0xFDE11CFD), AESx(0x933DAE93), AESx(0x264C6A26),
+	AESx(0x366C5A36), AESx(0x3F7E413F), AESx(0xF7F502F7), AESx(0xCC834FCC),
+	AESx(0x34685C34), AESx(0xA551F4A5), AESx(0xE5D134E5), AESx(0xF1F908F1),
+	AESx(0x71E29371), AESx(0xD8AB73D8), AESx(0x31625331), AESx(0x152A3F15),
+	AESx(0x04080C04), AESx(0xC79552C7), AESx(0x23466523), AESx(0xC39D5EC3),
+	AESx(0x18302818), AESx(0x9637A196), AESx(0x050A0F05), AESx(0x9A2FB59A),
+	AESx(0x070E0907), AESx(0x12243612), AESx(0x801B9B80), AESx(0xE2DF3DE2),
+	AESx(0xEBCD26EB), AESx(0x274E6927), AESx(0xB27FCDB2), AESx(0x75EA9F75),
+	AESx(0x09121B09), AESx(0x831D9E83), AESx(0x2C58742C), AESx(0x1A342E1A),
+	AESx(0x1B362D1B), AESx(0x6EDCB26E), AESx(0x5AB4EE5A), AESx(0xA05BFBA0),
+	AESx(0x52A4F652), AESx(0x3B764D3B), AESx(0xD6B761D6), AESx(0xB37DCEB3),
+	AESx(0x29527B29), AESx(0xE3DD3EE3), AESx(0x2F5E712F), AESx(0x84139784),
+	AESx(0x53A6F553), AESx(0xD1B968D1), AESx(0x00000000), AESx(0xEDC12CED),
+	AESx(0x20406020), AESx(0xFCE31FFC), AESx(0xB179C8B1), AESx(0x5BB6ED5B),
+	AESx(0x6AD4BE6A), AESx(0xCB8D46CB), AESx(0xBE67D9BE), AESx(0x39724B39),
+	AESx(0x4A94DE4A), AESx(0x4C98D44C), AESx(0x58B0E858), AESx(0xCF854ACF),
+	AESx(0xD0BB6BD0), AESx(0xEFC52AEF), AESx(0xAA4FE5AA), AESx(0xFBED16FB),
+	AESx(0x4386C543), AESx(0x4D9AD74D), AESx(0x33665533), AESx(0x85119485),
+	AESx(0x458ACF45), AESx(0xF9E910F9), AESx(0x02040602), AESx(0x7FFE817F),
+	AESx(0x50A0F050), AESx(0x3C78443C), AESx(0x9F25BA9F), AESx(0xA84BE3A8),
+	AESx(0x51A2F351), AESx(0xA35DFEA3), AESx(0x4080C040), AESx(0x8F058A8F),
+	AESx(0x923FAD92), AESx(0x9D21BC9D), AESx(0x38704838), AESx(0xF5F104F5),
+	AESx(0xBC63DFBC), AESx(0xB677C1B6), AESx(0xDAAF75DA), AESx(0x21426321),
+	AESx(0x10203010), AESx(0xFFE51AFF), AESx(0xF3FD0EF3), AESx(0xD2BF6DD2),
+	AESx(0xCD814CCD), AESx(0x0C18140C), AESx(0x13263513), AESx(0xECC32FEC),
+	AESx(0x5FBEE15F), AESx(0x9735A297), AESx(0x4488CC44), AESx(0x172E3917),
+	AESx(0xC49357C4), AESx(0xA755F2A7), AESx(0x7EFC827E), AESx(0x3D7A473D),
+	AESx(0x64C8AC64), AESx(0x5DBAE75D), AESx(0x19322B19), AESx(0x73E69573),
+	AESx(0x60C0A060), AESx(0x81199881), AESx(0x4F9ED14F), AESx(0xDCA37FDC),
+	AESx(0x22446622), AESx(0x2A547E2A), AESx(0x903BAB90), AESx(0x880B8388),
+	AESx(0x468CCA46), AESx(0xEEC729EE), AESx(0xB86BD3B8), AESx(0x14283C14),
+	AESx(0xDEA779DE), AESx(0x5EBCE25E), AESx(0x0B161D0B), AESx(0xDBAD76DB),
+	AESx(0xE0DB3BE0), AESx(0x32645632), AESx(0x3A744E3A), AESx(0x0A141E0A),
+	AESx(0x4992DB49), AESx(0x060C0A06), AESx(0x24486C24), AESx(0x5CB8E45C),
+	AESx(0xC29F5DC2), AESx(0xD3BD6ED3), AESx(0xAC43EFAC), AESx(0x62C4A662),
+	AESx(0x9139A891), AESx(0x9531A495), AESx(0xE4D337E4), AESx(0x79F28B79),
+	AESx(0xE7D532E7), AESx(0xC88B43C8), AESx(0x376E5937), AESx(0x6DDAB76D),
+	AESx(0x8D018C8D), AESx(0xD5B164D5), AESx(0x4E9CD24E), AESx(0xA949E0A9),
+	AESx(0x6CD8B46C), AESx(0x56ACFA56), AESx(0xF4F307F4), AESx(0xEACF25EA),
+	AESx(0x65CAAF65), AESx(0x7AF48E7A), AESx(0xAE47E9AE), AESx(0x08101808),
+	AESx(0xBA6FD5BA), AESx(0x78F08878), AESx(0x254A6F25), AESx(0x2E5C722E),
+	AESx(0x1C38241C), AESx(0xA657F1A6), AESx(0xB473C7B4), AESx(0xC69751C6),
+	AESx(0xE8CB23E8), AESx(0xDDA17CDD), AESx(0x74E89C74), AESx(0x1F3E211F),
+	AESx(0x4B96DD4B), AESx(0xBD61DCBD), AESx(0x8B0D868B), AESx(0x8A0F858A),
+	AESx(0x70E09070), AESx(0x3E7C423E), AESx(0xB571C4B5), AESx(0x66CCAA66),
+	AESx(0x4890D848), AESx(0x03060503), AESx(0xF6F701F6), AESx(0x0E1C120E),
+	AESx(0x61C2A361), AESx(0x356A5F35), AESx(0x57AEF957), AESx(0xB969D0B9),
+	AESx(0x86179186), AESx(0xC19958C1), AESx(0x1D3A271D), AESx(0x9E27B99E),
+	AESx(0xE1D938E1), AESx(0xF8EB13F8), AESx(0x982BB398), AESx(0x11223311),
+	AESx(0x69D2BB69), AESx(0xD9A970D9), AESx(0x8E07898E), AESx(0x9433A794),
+	AESx(0x9B2DB69B), AESx(0x1E3C221E), AESx(0x87159287), AESx(0xE9C920E9),
+	AESx(0xCE8749CE), AESx(0x55AAFF55), AESx(0x28507828), AESx(0xDFA57ADF),
+	AESx(0x8C038F8C), AESx(0xA159F8A1), AESx(0x89098089), AESx(0x0D1A170D),
+	AESx(0xBF65DABF), AESx(0xE6D731E6), AESx(0x4284C642), AESx(0x68D0B868),
+	AESx(0x4182C341), AESx(0x9929B099), AESx(0x2D5A772D), AESx(0x0F1E110F),
+	AESx(0xB07BCBB0), AESx(0x54A8FC54), AESx(0xBB6DD6BB), AESx(0x162C3A16)
+};
+
+static const sph_u32 AES3[256] = {
+	AESx(0xC6A56363), AESx(0xF8847C7C), AESx(0xEE997777), AESx(0xF68D7B7B),
+	AESx(0xFF0DF2F2), AESx(0xD6BD6B6B), AESx(0xDEB16F6F), AESx(0x9154C5C5),
+	AESx(0x60503030), AESx(0x02030101), AESx(0xCEA96767), AESx(0x567D2B2B),
+	AESx(0xE719FEFE), AESx(0xB562D7D7), AESx(0x4DE6ABAB), AESx(0xEC9A7676),
+	AESx(0x8F45CACA), AESx(0x1F9D8282), AESx(0x8940C9C9), AESx(0xFA877D7D),
+	AESx(0xEF15FAFA), AESx(0xB2EB5959), AESx(0x8EC94747), AESx(0xFB0BF0F0),
+	AESx(0x41ECADAD), AESx(0xB367D4D4), AESx(0x5FFDA2A2), AESx(0x45EAAFAF),
+	AESx(0x23BF9C9C), AESx(0x53F7A4A4), AESx(0xE4967272), AESx(0x9B5BC0C0),
+	AESx(0x75C2B7B7), AESx(0xE11CFDFD), AESx(0x3DAE9393), AESx(0x4C6A2626),
+	AESx(0x6C5A3636), AESx(0x7E413F3F), AESx(0xF502F7F7), AESx(0x834FCCCC),
+	AESx(0x685C3434), AESx(0x51F4A5A5), AESx(0xD134E5E5), AESx(0xF908F1F1),
+	AESx(0xE2937171), AESx(0xAB73D8D8), AESx(0x62533131), AESx(0x2A3F1515),
+	AESx(0x080C0404), AESx(0x9552C7C7), AESx(0x46652323), AESx(0x9D5EC3C3),
+	AESx(0x30281818), AESx(0x37A19696), AESx(0x0A0F0505), AESx(0x2FB59A9A),
+	AESx(0x0E090707), AESx(0x24361212), AESx(0x1B9B8080), AESx(0xDF3DE2E2),
+	AESx(0xCD26EBEB), AESx(0x4E692727), AESx(0x7FCDB2B2), AESx(0xEA9F7575),
+	AESx(0x121B0909), AESx(0x1D9E8383), AESx(0x58742C2C), AESx(0x342E1A1A),
+	AESx(0x362D1B1B), AESx(0xDCB26E6E), AESx(0xB4EE5A5A), AESx(0x5BFBA0A0),
+	AESx(0xA4F65252), AESx(0x764D3B3B), AESx(0xB761D6D6), AESx(0x7DCEB3B3),
+	AESx(0x527B2929), AESx(0xDD3EE3E3), AESx(0x5E712F2F), AESx(0x13978484),
+	AESx(0xA6F55353), AESx(0xB968D1D1), AESx(0x00000000), AESx(0xC12CEDED),
+	AESx(0x40602020), AESx(0xE31FFCFC), AESx(0x79C8B1B1), AESx(0xB6ED5B5B),
+	AESx(0xD4BE6A6A), AESx(0x8D46CBCB), AESx(0x67D9BEBE), AESx(0x724B3939),
+	AESx(0x94DE4A4A), AESx(0x98D44C4C), AESx(0xB0E85858), AESx(0x854ACFCF),
+	AESx(0xBB6BD0D0), AESx(0xC52AEFEF), AESx(0x4FE5AAAA), AESx(0xED16FBFB),
+	AESx(0x86C54343), AESx(0x9AD74D4D), AESx(0x66553333), AESx(0x11948585),
+	AESx(0x8ACF4545), AESx(0xE910F9F9), AESx(0x04060202), AESx(0xFE817F7F),
+	AESx(0xA0F05050), AESx(0x78443C3C), AESx(0x25BA9F9F), AESx(0x4BE3A8A8),
+	AESx(0xA2F35151), AESx(0x5DFEA3A3), AESx(0x80C04040), AESx(0x058A8F8F),
+	AESx(0x3FAD9292), AESx(0x21BC9D9D), AESx(0x70483838), AESx(0xF104F5F5),
+	AESx(0x63DFBCBC), AESx(0x77C1B6B6), AESx(0xAF75DADA), AESx(0x42632121),
+	AESx(0x20301010), AESx(0xE51AFFFF), AESx(0xFD0EF3F3), AESx(0xBF6DD2D2),
+	AESx(0x814CCDCD), AESx(0x18140C0C), AESx(0x26351313), AESx(0xC32FECEC),
+	AESx(0xBEE15F5F), AESx(0x35A29797), AESx(0x88CC4444), AESx(0x2E391717),
+	AESx(0x9357C4C4), AESx(0x55F2A7A7), AESx(0xFC827E7E), AESx(0x7A473D3D),
+	AESx(0xC8AC6464), AESx(0xBAE75D5D), AESx(0x322B1919), AESx(0xE6957373),
+	AESx(0xC0A06060), AESx(0x19988181), AESx(0x9ED14F4F), AESx(0xA37FDCDC),
+	AESx(0x44662222), AESx(0x547E2A2A), AESx(0x3BAB9090), AESx(0x0B838888),
+	AESx(0x8CCA4646), AESx(0xC729EEEE), AESx(0x6BD3B8B8), AESx(0x283C1414),
+	AESx(0xA779DEDE), AESx(0xBCE25E5E), AESx(0x161D0B0B), AESx(0xAD76DBDB),
+	AESx(0xDB3BE0E0), AESx(0x64563232), AESx(0x744E3A3A), AESx(0x141E0A0A),
+	AESx(0x92DB4949), AESx(0x0C0A0606), AESx(0x486C2424), AESx(0xB8E45C5C),
+	AESx(0x9F5DC2C2), AESx(0xBD6ED3D3), AESx(0x43EFACAC), AESx(0xC4A66262),
+	AESx(0x39A89191), AESx(0x31A49595), AESx(0xD337E4E4), AESx(0xF28B7979),
+	AESx(0xD532E7E7), AESx(0x8B43C8C8), AESx(0x6E593737), AESx(0xDAB76D6D),
+	AESx(0x018C8D8D), AESx(0xB164D5D5), AESx(0x9CD24E4E), AESx(0x49E0A9A9),
+	AESx(0xD8B46C6C), AESx(0xACFA5656), AESx(0xF307F4F4), AESx(0xCF25EAEA),
+	AESx(0xCAAF6565), AESx(0xF48E7A7A), AESx(0x47E9AEAE), AESx(0x10180808),
+	AESx(0x6FD5BABA), AESx(0xF0887878), AESx(0x4A6F2525), AESx(0x5C722E2E),
+	AESx(0x38241C1C), AESx(0x57F1A6A6), AESx(0x73C7B4B4), AESx(0x9751C6C6),
+	AESx(0xCB23E8E8), AESx(0xA17CDDDD), AESx(0xE89C7474), AESx(0x3E211F1F),
+	AESx(0x96DD4B4B), AESx(0x61DCBDBD), AESx(0x0D868B8B), AESx(0x0F858A8A),
+	AESx(0xE0907070), AESx(0x7C423E3E), AESx(0x71C4B5B5), AESx(0xCCAA6666),
+	AESx(0x90D84848), AESx(0x06050303), AESx(0xF701F6F6), AESx(0x1C120E0E),
+	AESx(0xC2A36161), AESx(0x6A5F3535), AESx(0xAEF95757), AESx(0x69D0B9B9),
+	AESx(0x17918686), AESx(0x9958C1C1), AESx(0x3A271D1D), AESx(0x27B99E9E),
+	AESx(0xD938E1E1), AESx(0xEB13F8F8), AESx(0x2BB39898), AESx(0x22331111),
+	AESx(0xD2BB6969), AESx(0xA970D9D9), AESx(0x07898E8E), AESx(0x33A79494),
+	AESx(0x2DB69B9B), AESx(0x3C221E1E), AESx(0x15928787), AESx(0xC920E9E9),
+	AESx(0x8749CECE), AESx(0xAAFF5555), AESx(0x50782828), AESx(0xA57ADFDF),
+	AESx(0x038F8C8C), AESx(0x59F8A1A1), AESx(0x09808989), AESx(0x1A170D0D),
+	AESx(0x65DABFBF), AESx(0xD731E6E6), AESx(0x84C64242), AESx(0xD0B86868),
+	AESx(0x82C34141), AESx(0x29B09999), AESx(0x5A772D2D), AESx(0x1E110F0F),
+	AESx(0x7BCBB0B0), AESx(0xA8FC5454), AESx(0x6DD6BBBB), AESx(0x2C3A1616)
+};
+
+#ifdef __cplusplus
+}
+#endif
--- a/src/crypto/ghostrider/ghostrider.cpp
+++ b/src/crypto/ghostrider/ghostrider.cpp
@@ -0,0 +1,830 @@
+/* XMRig
+ * Copyright 2018-2021 SChernykh   <https://github.com/SChernykh>
+ * Copyright 2016-2021 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 3 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+
+#include "ghostrider.h"
+#include "sph_blake.h"
+#include "sph_bmw.h"
+#include "sph_groestl.h"
+#include "sph_jh.h"
+#include "sph_keccak.h"
+#include "sph_skein.h"
+#include "sph_luffa.h"
+#include "sph_cubehash.h"
+#include "sph_shavite.h"
+#include "sph_simd.h"
+#include "sph_echo.h"
+#include "sph_hamsi.h"
+#include "sph_fugue.h"
+#include "sph_shabal.h"
+#include "sph_whirlpool.h"
+
+#include "base/io/log/Log.h"
+#include "base/io/log/Tags.h"
+#include "backend/cpu/Cpu.h"
+#include "crypto/cn/CnHash.h"
+#include "crypto/cn/CnCtx.h"
+#include "crypto/cn/CryptoNight.h"
+#include "crypto/common/VirtualMemory.h"
+
+#include <thread>
+#include <atomic>
+#include <chrono>
+#include <uv.h>
+
+#ifdef XMRIG_FEATURE_HWLOC
+#include "base/kernel/Platform.h"
+#include "backend/cpu/platform/HwlocCpuInfo.h"
+#include <hwloc.h>
+#endif
+
+#if defined(XMRIG_ARM)
+#   include "crypto/cn/sse2neon.h"
+#elif defined(__GNUC__)
+#   include <x86intrin.h>
+#else
+#   include <intrin.h>
+#endif
+
+#define CORE_HASH(i, x) static void h##i(const uint8_t* data, size_t size, uint8_t* output) \
+{ \
+    sph_##x##_context ctx; \
+    sph_##x##_init(&ctx); \
+    sph_##x(&ctx, data, size); \
+    sph_##x##_close(&ctx, output); \
+}
+
+CORE_HASH( 0, blake512   );
+CORE_HASH( 1, bmw512     );
+CORE_HASH( 2, groestl512 );
+CORE_HASH( 3, jh512      );
+CORE_HASH( 4, keccak512  );
+CORE_HASH( 5, skein512   );
+CORE_HASH( 6, luffa512   );
+CORE_HASH( 7, cubehash512);
+CORE_HASH( 8, shavite512 );
+CORE_HASH( 9, simd512    );
+CORE_HASH(10, echo512    );
+CORE_HASH(11, hamsi512   );
+CORE_HASH(12, fugue512   );
+CORE_HASH(13, shabal512  );
+CORE_HASH(14, whirlpool  );
+
+#undef CORE_HASH
+
+typedef void (*core_hash_func)(const uint8_t* data, size_t size, uint8_t* output);
+static const core_hash_func core_hash[15] = { h0, h1, h2, h3, h4, h5, h6, h7, h8, h9, h10, h11, h12, h13, h14 };
+
+namespace xmrig
+{
+
+
+static constexpr Algorithm::Id cn_hash[6] = {
+    Algorithm::CN_GR_0,
+    Algorithm::CN_GR_1,
+    Algorithm::CN_GR_2,
+    Algorithm::CN_GR_3,
+    Algorithm::CN_GR_4,
+    Algorithm::CN_GR_5,
+};
+
+static constexpr const char* cn_names[6] = {
+    "cn/dark (512 KB)",
+    "cn/dark-lite (256 KB)",
+    "cn/fast (2 MB)",
+    "cn/lite (1 MB)",
+    "cn/turtle (256 KB)",
+    "cn/turtle-lite (128 KB)",
+};
+
+static constexpr size_t cn_sizes[6] = {
+    Algorithm::l3(Algorithm::CN_GR_0),     // 512 KB
+    Algorithm::l3(Algorithm::CN_GR_1) / 2, // 256 KB
+    Algorithm::l3(Algorithm::CN_GR_2),     // 2 MB
+    Algorithm::l3(Algorithm::CN_GR_3),     // 1 MB
+    Algorithm::l3(Algorithm::CN_GR_4),     // 256 KB
+    Algorithm::l3(Algorithm::CN_GR_5) / 2, // 128 KB
+};
+
+static constexpr CnHash::AlgoVariant av_hw_aes[5] = { CnHash::AV_SINGLE, CnHash::AV_SINGLE, CnHash::AV_DOUBLE, CnHash::AV_TRIPLE, CnHash::AV_QUAD };
+static constexpr CnHash::AlgoVariant av_soft_aes[5] = { CnHash::AV_SINGLE_SOFT, CnHash::AV_SINGLE_SOFT, CnHash::AV_DOUBLE_SOFT, CnHash::AV_TRIPLE_SOFT, CnHash::AV_QUAD_SOFT };
+
+template<size_t N>
+static inline void select_indices(uint32_t (&indices)[N], const uint8_t* seed)
+{
+    bool selected[N] = {};
+
+    uint32_t k = 0;
+    for (uint32_t i = 0; i < 64; ++i) {
+        const uint8_t index = ((seed[i / 2] >> ((i & 1) * 4)) & 0xF) % N;
+        if (!selected[index]) {
+            selected[index] = true;
+            indices[k++] = index;
+            if (k >= N) {
+                return;
+            }
+        }
+    }
+
+    for (uint32_t i = 0; i < N; ++i) {
+        if (!selected[i]) {
+            indices[k++] = i;
+        }
+    }
+}
+
+
+namespace ghostrider
+{
+
+
+#ifdef XMRIG_FEATURE_HWLOC
+
+
+static struct AlgoTune
+{
+    double hashrate = 0.0;
+    uint32_t step = 1;
+    uint32_t threads = 1;
+} tuneDefault[6], tune8MB[6];
+
+
+struct HelperThread
+{
+    HelperThread(hwloc_bitmap_t cpu_set, bool is8MB) : m_cpuSet(cpu_set), m_is8MB(is8MB)
+    {
+        uv_mutex_init(&m_mutex);
+        uv_cond_init(&m_cond);
+
+        m_thread = new std::thread(&HelperThread::run, this);
+        do {
+            std::this_thread::sleep_for(std::chrono::milliseconds(1));
+        } while (!m_ready);
+    }
+
+    ~HelperThread()
+    {
+        uv_mutex_lock(&m_mutex);
+        m_finished = true;
+        uv_cond_signal(&m_cond);
+        uv_mutex_unlock(&m_mutex);
+
+        m_thread->join();
+        delete m_thread;
+
+        uv_mutex_destroy(&m_mutex);
+        uv_cond_destroy(&m_cond);
+
+        hwloc_bitmap_free(m_cpuSet);
+    }
+
+    struct TaskBase
+    {
+        virtual ~TaskBase() {}
+        virtual void run() = 0;
+    };
+
+    template<typename T>
+    struct Task : TaskBase
+    {
+        inline Task(T&& task) : m_task(std::move(task))
+        {
+            static_assert(sizeof(Task) <= 128, "Task struct is too large");
+        }
+
+        void run() override
+        {
+            m_task();
+            this->~Task();
+        }
+
+        T m_task;
+    };
+
+    template<typename T>
+    inline void launch_task(T&& task)
+    {
+        uv_mutex_lock(&m_mutex);
+        new (&m_tasks[m_numTasks++]) Task<T>(std::move(task));
+        uv_cond_signal(&m_cond);
+        uv_mutex_unlock(&m_mutex);
+    }
+
+    inline void wait() const
+    {
+        while (m_numTasks) {
+            _mm_pause();
+        }
+    }
+
+    void run()
+    {
+        if (hwloc_bitmap_weight(m_cpuSet) > 0) {
+            hwloc_topology_t topology = reinterpret_cast<HwlocCpuInfo*>(Cpu::info())->topology();
+            if (hwloc_set_cpubind(topology, m_cpuSet, HWLOC_CPUBIND_THREAD | HWLOC_CPUBIND_STRICT) < 0) {
+                hwloc_set_cpubind(topology, m_cpuSet, HWLOC_CPUBIND_THREAD);
+            }
+        }
+
+        uv_mutex_lock(&m_mutex);
+        m_ready = true;
+
+        do {
+            uv_cond_wait(&m_cond, &m_mutex);
+
+            const uint32_t n = m_numTasks;
+            if (n > 0) {
+                for (uint32_t i = 0; i < n; ++i) {
+                    reinterpret_cast<TaskBase*>(&m_tasks[i])->run();
+                }
+                std::atomic_thread_fence(std::memory_order_seq_cst);
+                m_numTasks = 0;
+            }
+        } while (!m_finished);
+
+        uv_mutex_unlock(&m_mutex);
+    }
+
+    uv_mutex_t m_mutex;
+    uv_cond_t m_cond;
+
+    alignas(16) uint8_t m_tasks[4][128] = {};
+    volatile uint32_t m_numTasks = 0;
+    volatile bool m_ready = false;
+    volatile bool m_finished = false;
+    hwloc_bitmap_t m_cpuSet = {};
+    bool m_is8MB = false;
+
+    std::thread* m_thread = nullptr;
+};
+
+
+void benchmark()
+{
+#ifndef XMRIG_ARM
+    static std::atomic<int> done{ 0 };
+    if (done.exchange(1)) {
+        return;
+    }
+
+    std::thread t([]() {
+        // Try to avoid CPU core 0 because many system threads use it and can interfere
+        uint32_t thread_index1 = (Cpu::info()->threads() > 2) ? 2 : 0;
+
+        hwloc_topology_t topology = reinterpret_cast<HwlocCpuInfo*>(Cpu::info())->topology();
+        hwloc_obj_t pu = hwloc_get_pu_obj_by_os_index(topology, thread_index1);
+        hwloc_obj_t pu2;
+        hwloc_get_closest_objs(topology, pu, &pu2, 1);
+        uint32_t thread_index2 = pu2->os_index;
+
+        if (thread_index2 < thread_index1) {
+            std::swap(thread_index1, thread_index2);
+        }
+
+        Platform::setThreadAffinity(thread_index1);
+
+        constexpr uint32_t N = 1U << 21;
+
+        VirtualMemory::init(0, N);
+        VirtualMemory* memory = new VirtualMemory(N * 8, true, false, false);
+
+        // 2 MB cache per core by default
+        size_t max_scratchpad_size = 1U << 21;
+
+        if ((Cpu::info()->L3() >> 22) > Cpu::info()->cores()) {
+            // At least 1 core can run with 8 MB cache
+            max_scratchpad_size = 1U << 23;
+        }
+        else if ((Cpu::info()->L3() >> 22) >= Cpu::info()->cores()) {
+            // All cores can run with 4 MB cache
+            max_scratchpad_size = 1U << 22;
+        }
+
+        LOG_VERBOSE("Running GhostRider benchmark on logical CPUs %u and %u (max scratchpad size %zu MB, huge pages %s)", thread_index1, thread_index2, max_scratchpad_size >> 20, memory->isHugePages() ? "on" : "off");
+
+        cryptonight_ctx* ctx[8];
+        CnCtx::create(ctx, memory->scratchpad(), N, 8);
+
+        const CnHash::AlgoVariant* av = Cpu::info()->hasAES() ? av_hw_aes : av_soft_aes;
+
+        uint8_t buf[80];
+        uint8_t hash[32 * 8];
+
+        LOG_VERBOSE("%24s |  N  | Hashrate", "Algorithm");
+        LOG_VERBOSE("-------------------------|-----|-------------");
+
+        using namespace std::chrono;
+
+        for (uint32_t algo = 0; algo < 6; ++algo) {
+            for (uint64_t step : { 1, 2, 4}) {
+                const size_t cur_scratchpad_size = cn_sizes[algo] * step;
+                if (cur_scratchpad_size > max_scratchpad_size) {
+                    continue;
+                }
+
+                auto f = CnHash::fn(cn_hash[algo], av[step], Assembly::AUTO);
+
+                const high_resolution_clock::time_point start_time = high_resolution_clock::now();
+
+                double min_dt = 1e10;
+                for (uint32_t iter = 0;; ++iter) {
+                    const high_resolution_clock::time_point t1 = high_resolution_clock::now();
+
+                    // Stop after 15 milliseconds, but only if at least 10 iterations were done
+                    if ((iter >= 10) && (duration_cast<milliseconds>(t1 - start_time).count() >= 15)) {
+                        break;
+                    }
+
+                    f(buf, sizeof(buf), hash, ctx, 0);
+
+                    const double dt = duration_cast<nanoseconds>(high_resolution_clock::now() - t1).count() / 1e9;
+                    if (dt < min_dt) {
+                        min_dt = dt;
+                    }
+                }
+
+                const double hashrate = step / min_dt;
+                LOG_VERBOSE("%24s | %" PRIu64 "x1 | %.2f h/s", cn_names[algo], step, hashrate);
+
+                if (hashrate > tune8MB[algo].hashrate) {
+                    tune8MB[algo].hashrate = hashrate;
+                    tune8MB[algo].step = static_cast<uint32_t>(step);
+                    tune8MB[algo].threads = 1;
+                }
+
+                if ((cur_scratchpad_size < (1U << 23)) && (hashrate > tuneDefault[algo].hashrate)) {
+                    tuneDefault[algo].hashrate = hashrate;
+                    tuneDefault[algo].step = static_cast<uint32_t>(step);
+                    tuneDefault[algo].threads = 1;
+                }
+            }
+        }
+
+        hwloc_bitmap_t helper_set = hwloc_bitmap_alloc();
+        hwloc_bitmap_set(helper_set, thread_index2);
+        HelperThread* helper = new HelperThread(helper_set, false);
+
+        for (uint32_t algo = 0; algo < 6; ++algo) {
+            for (uint64_t step : { 1, 2, 4}) {
+                const size_t cur_scratchpad_size = cn_sizes[algo] * step * 2;
+                if (cur_scratchpad_size > max_scratchpad_size) {
+                    continue;
+                }
+
+                auto f = CnHash::fn(cn_hash[algo], av[step], Assembly::AUTO);
+
+                const high_resolution_clock::time_point start_time = high_resolution_clock::now();
+
+                double min_dt = 1e10;
+                for (uint32_t iter = 0;; ++iter) {
+                    const high_resolution_clock::time_point t1 = high_resolution_clock::now();
+
+                    // Stop after 30 milliseconds, but only if at least 10 iterations were done
+                    if ((iter >= 10) && (duration_cast<milliseconds>(t1 - start_time).count() >= 30)) {
+                        break;
+                    }
+
+                    helper->launch_task([&f, &buf, &hash, &ctx, &step]() { f(buf, sizeof(buf), hash + step * 32, ctx + step, 0); });
+                    f(buf, sizeof(buf), hash, ctx, 0);
+                    helper->wait();
+
+                    const double dt = duration_cast<nanoseconds>(high_resolution_clock::now() - t1).count() / 1e9;
+                    if (dt < min_dt) {
+                        min_dt = dt;
+                    }
+                }
+
+                const double hashrate = step * 2.0 / min_dt * 1.0075;
+                LOG_VERBOSE("%24s | %" PRIu64 "x2 | %.2f h/s", cn_names[algo], step, hashrate);
+
+                if (hashrate > tune8MB[algo].hashrate) {
+                    tune8MB[algo].hashrate = hashrate;
+                    tune8MB[algo].step = static_cast<uint32_t>(step);
+                    tune8MB[algo].threads = 2;
+                }
+
+                if ((cur_scratchpad_size < (1U << 23)) && (hashrate > tuneDefault[algo].hashrate)) {
+                    tuneDefault[algo].hashrate = hashrate;
+                    tuneDefault[algo].step = static_cast<uint32_t>(step);
+                    tuneDefault[algo].threads = 2;
+                }
+            }
+        }
+
+        delete helper;
+
+        CnCtx::release(ctx, 8);
+        delete memory;
+    });
+
+    t.join();
+
+    LOG_VERBOSE("---------------------------------------------");
+    LOG_VERBOSE("|         GhostRider tuning results         |");
+    LOG_VERBOSE("---------------------------------------------");
+
+    for (int algo = 0; algo < 6; ++algo) {
+        LOG_VERBOSE("%24s | %ux%u | %.2f h/s", cn_names[algo], tuneDefault[algo].step, tuneDefault[algo].threads, tuneDefault[algo].hashrate);
+        if ((tune8MB[algo].step != tuneDefault[algo].step) || (tune8MB[algo].threads != tuneDefault[algo].threads)) {
+            LOG_VERBOSE("%24s | %ux%u | %.2f h/s", cn_names[algo], tune8MB[algo].step, tune8MB[algo].threads, tune8MB[algo].hashrate);
+        }
+    }
+#endif
+}
+
+
+template <typename func>
+static inline bool findByType(hwloc_obj_t obj, hwloc_obj_type_t type, func lambda)
+{
+    for (size_t i = 0; i < obj->arity; i++) {
+        if (obj->children[i]->type == type) {
+            if (lambda(obj->children[i])) {
+                return true;
+            }
+        }
+        else {
+            if (findByType(obj->children[i], type, lambda)) {
+                return true;
+            }
+        }
+    }
+    return false;
+}
+
+
+HelperThread* create_helper_thread(int64_t cpu_index, const std::vector<int64_t>& affinities)
+{
+#ifndef XMRIG_ARM
+    hwloc_bitmap_t helper_cpu_set = hwloc_bitmap_alloc();
+    hwloc_bitmap_t main_threads_set = hwloc_bitmap_alloc();
+
+    for (int64_t i : affinities) {
+        if (i >= 0) {
+            hwloc_bitmap_set(main_threads_set, i);
+        }
+    }
+
+    if (cpu_index >= 0) {
+        hwloc_topology_t topology = reinterpret_cast<HwlocCpuInfo*>(Cpu::info())->topology();
+        hwloc_obj_t root = hwloc_get_root_obj(topology);
+
+        bool is8MB = false;
+
+        findByType(root, HWLOC_OBJ_L3CACHE, [cpu_index, &is8MB](hwloc_obj_t obj) {
+            if (!hwloc_bitmap_isset(obj->cpuset, cpu_index)) {
+                return false;
+            }
+
+            uint32_t num_cores = 0;
+            findByType(obj, HWLOC_OBJ_CORE, [&num_cores](hwloc_obj_t) { ++num_cores; return false; });
+
+            if ((obj->attr->cache.size >> 22) > num_cores) {
+                uint32_t num_8MB_cores = (obj->attr->cache.size >> 22) - num_cores;
+
+                is8MB = findByType(obj, HWLOC_OBJ_CORE, [cpu_index, &num_8MB_cores](hwloc_obj_t obj2) {
+                    if (num_8MB_cores > 0) {
+                        --num_8MB_cores;
+                        if (hwloc_bitmap_isset(obj2->cpuset, cpu_index)) {
+                            return true;
+                        }
+                    }
+                    return false;
+                });
+            }
+            return true;
+        });
+
+        for (auto obj_type : { HWLOC_OBJ_CORE, HWLOC_OBJ_L1CACHE, HWLOC_OBJ_L2CACHE, HWLOC_OBJ_L3CACHE }) {
+            findByType(root, obj_type, [cpu_index, helper_cpu_set, main_threads_set](hwloc_obj_t obj) {
+                const hwloc_cpuset_t& s = obj->cpuset;
+                if (hwloc_bitmap_isset(s, cpu_index)) {
+                    hwloc_bitmap_andnot(helper_cpu_set, s, main_threads_set);
+                    if (hwloc_bitmap_weight(helper_cpu_set) > 0) {
+                        return true;
+                    }
+                }
+                return false;
+            });
+
+            if (hwloc_bitmap_weight(helper_cpu_set) > 0) {
+                return new HelperThread(helper_cpu_set, is8MB);
+            }
+        }
+    }
+#endif
+
+    return nullptr;
+}
+
+
+void destroy_helper_thread(HelperThread* t)
+{
+    delete t;
+}
+
+
+void hash_octa(const uint8_t* data, size_t size, uint8_t* output, cryptonight_ctx** ctx, HelperThread* helper)
+{
+    enum { N = 8 };
+
+    uint8_t* ctx_memory[N];
+    for (size_t i = 0; i < N; ++i) {
+        ctx_memory[i] = ctx[i]->memory;
+    }
+
+    // PrevBlockHash (GhostRider's seed) is stored in bytes [4; 36)
+    uint32_t core_indices[15];
+    select_indices(core_indices, data + 4);
+
+    uint32_t cn_indices[6];
+    select_indices(cn_indices, data + 4);
+
+    static uint32_t prev_indices[3];
+    if (memcmp(cn_indices, prev_indices, sizeof(prev_indices)) != 0) {
+        memcpy(prev_indices, cn_indices, sizeof(prev_indices));
+        for (int i = 0; i < 3; ++i) {
+            LOG_INFO("%s GhostRider algo %d: %s", Tags::cpu(), i + 1, cn_names[cn_indices[i]]);
+        }
+    }
+
+    const CnHash::AlgoVariant* av = Cpu::info()->hasAES() ? av_hw_aes : av_soft_aes;
+    const AlgoTune* tune = (helper && helper->m_is8MB) ? tune8MB : tuneDefault;
+
+    uint8_t tmp[64 * N];
+
+    if (helper && (tune[cn_indices[0]].threads == 2) && (tune[cn_indices[1]].threads == 2) && (tune[cn_indices[2]].threads == 2)) {
+        const size_t n = N / 2;
+
+        helper->launch_task([n, av, data, size, &ctx_memory, ctx, &cn_indices, &core_indices, &tmp, output, tune]() {
+            const uint8_t* input = data;
+            size_t input_size = size;
+
+            for (size_t part = 0; part < 3; ++part) {
+                const AlgoTune& t = tune[cn_indices[part]];
+
+                // Allocate scratchpads
+                {
+                    uint8_t* p = ctx_memory[4];
+
+                    for (size_t i = n, k = 4; i < N; ++i) {
+                        if ((i % t.step) == 0) {
+                            k = 4;
+                            p = ctx_memory[4];
+                        }
+                        else if (p - ctx_memory[k] >= (1 << 21)) {
+                            ++k;
+                            p = ctx_memory[k];
+                        }
+                        ctx[i]->memory = p;
+                        p += cn_sizes[cn_indices[part]];
+                    }
+                }
+
+                for (size_t i = 0; i < 5; ++i) {
+                    for (size_t j = n; j < N; ++j) {
+                        core_hash[core_indices[part * 5 + i]](input + j * input_size, input_size, tmp + j * 64);
+                    }
+                    input = tmp;
+                    input_size = 64;
+                }
+
+                auto f = CnHash::fn(cn_hash[cn_indices[part]], av[t.step], Assembly::AUTO);
+                for (size_t j = n; j < N; j += t.step) {
+                    f(tmp + j * 64, 64, output + j * 32, ctx + n, 0);
+                }
+
+                for (size_t j = n; j < N; ++j) {
+                    memcpy(tmp + j * 64, output + j * 32, 32);
+                    memset(tmp + j * 64 + 32, 0, 32);
+                }
+            }
+        });
+
+        const uint8_t* input = data;
+        size_t input_size = size;
+
+        for (size_t part = 0; part < 3; ++part) {
+            const AlgoTune& t = tune[cn_indices[part]];
+
+            // Allocate scratchpads
+            {
+                uint8_t* p = ctx_memory[0];
+
+                for (size_t i = 0, k = 0; i < n; ++i) {
+                    if ((i % t.step) == 0) {
+                        k = 0;
+                        p = ctx_memory[0];
+                    }
+                    else if (p - ctx_memory[k] >= (1 << 21)) {
+                        ++k;
+                        p = ctx_memory[k];
+                    }
+                    ctx[i]->memory = p;
+                    p += cn_sizes[cn_indices[part]];
+                }
+            }
+
+            for (size_t i = 0; i < 5; ++i) {
+                for (size_t j = 0; j < n; ++j) {
+                    core_hash[core_indices[part * 5 + i]](input + j * input_size, input_size, tmp + j * 64);
+                }
+                input = tmp;
+                input_size = 64;
+            }
+
+            auto f = CnHash::fn(cn_hash[cn_indices[part]], av[t.step], Assembly::AUTO);
+            for (size_t j = 0; j < n; j += t.step) {
+                f(tmp + j * 64, 64, output + j * 32, ctx, 0);
+            }
+
+            for (size_t j = 0; j < n; ++j) {
+                memcpy(tmp + j * 64, output + j * 32, 32);
+                memset(tmp + j * 64 + 32, 0, 32);
+            }
+        }
+
+        helper->wait();
+    }
+    else {
+        for (size_t part = 0; part < 3; ++part) {
+            const AlgoTune& t = tune[cn_indices[part]];
+
+            // Allocate scratchpads
+            {
+                uint8_t* p = ctx_memory[0];
+                const size_t n = N / t.threads;
+
+                // Thread 1
+                for (size_t i = 0, k = 0; i < n; ++i) {
+                    if ((i % t.step) == 0) {
+                        k = 0;
+                        p = ctx_memory[0];
+                    }
+                    else if (p - ctx_memory[k] >= (1 << 21)) {
+                        ++k;
+                        p = ctx_memory[k];
+                    }
+                    ctx[i]->memory = p;
+                    p += cn_sizes[cn_indices[part]];
+                }
+
+                // Thread 2
+                for (size_t i = n, k = 4; i < N; ++i) {
+                    if ((i % t.step) == 0) {
+                        k = 4;
+                        p = ctx_memory[4];
+                    }
+                    else if (p - ctx_memory[k] >= (1 << 21)) {
+                        ++k;
+                        p = ctx_memory[k];
+                    }
+                    ctx[i]->memory = p;
+                    p += cn_sizes[cn_indices[part]];
+                }
+            }
+
+            size_t n = N;
+
+            if (helper && (t.threads == 2)) {
+                n = N / 2;
+
+                helper->launch_task([data, size, n, &cn_indices, &core_indices, part, &tmp, av, &t, output, ctx]() {
+                    const uint8_t* input = data;
+                    size_t input_size = size;
+
+                    for (size_t i = 0; i < 5; ++i) {
+                        for (size_t j = n; j < N; ++j) {
+                            core_hash[core_indices[part * 5 + i]](input + j * input_size, input_size, tmp + j * 64);
+                        }
+                        input = tmp;
+                        input_size = 64;
+                    }
+
+                    auto f = CnHash::fn(cn_hash[cn_indices[part]], av[t.step], Assembly::AUTO);
+                    for (size_t j = n; j < N; j += t.step) {
+                        f(tmp + j * 64, 64, output + j * 32, ctx + n, 0);
+                    }
+
+                    for (size_t j = n; j < N; ++j) {
+                        memcpy(tmp + j * 64, output + j * 32, 32);
+                        memset(tmp + j * 64 + 32, 0, 32);
+                    }
+                });
+            }
+
+            for (size_t i = 0; i < 5; ++i) {
+                for (size_t j = 0; j < n; ++j) {
+                    core_hash[core_indices[part * 5 + i]](data + j * size, size, tmp + j * 64);
+                }
+                data = tmp;
+                size = 64;
+            }
+
+            auto f = CnHash::fn(cn_hash[cn_indices[part]], av[t.step], Assembly::AUTO);
+            for (size_t j = 0; j < n; j += t.step) {
+                f(tmp + j * 64, 64, output + j * 32, ctx, 0);
+            }
+
+            for (size_t j = 0; j < n; ++j) {
+                memcpy(tmp + j * 64, output + j * 32, 32);
+                memset(tmp + j * 64 + 32, 0, 32);
+            }
+
+            if (helper && (t.threads == 2)) {
+                helper->wait();
+            }
+        }
+    }
+
+    for (size_t i = 0; i < N; ++i) {
+        ctx[i]->memory = ctx_memory[i];
+    }
+}
+
+
+#else // XMRIG_FEATURE_HWLOC
+
+
+void benchmark() {}
+HelperThread* create_helper_thread(int64_t, const std::vector<int64_t>&) { return nullptr; }
+void destroy_helper_thread(HelperThread*) {}
+
+
+void hash_octa(const uint8_t* data, size_t size, uint8_t* output, cryptonight_ctx** ctx, HelperThread*)
+{
+    constexpr uint32_t N = 8;
+
+    // PrevBlockHash (GhostRider's seed) is stored in bytes [4; 36)
+    const uint8_t* seed = data + 4;
+
+    uint32_t core_indices[15];
+    select_indices(core_indices, seed);
+
+    uint32_t cn_indices[6];
+    select_indices(cn_indices, seed);
+
+#ifdef XMRIG_ARM
+    uint32_t step[6] = { 1, 1, 1, 1, 1, 1 };
+#else
+    uint32_t step[6] = { 4, 4, 1, 2, 4, 4 };
+#endif
+
+    static uint32_t prev_indices[3];
+    if (memcmp(cn_indices, prev_indices, sizeof(prev_indices)) != 0) {
+        memcpy(prev_indices, cn_indices, sizeof(prev_indices));
+        for (int i = 0; i < 3; ++i) {
+            LOG_INFO("%s GhostRider algo %d: %s", Tags::cpu(), i + 1, cn_names[cn_indices[i]]);
+        }
+    }
+
+    const CnHash::AlgoVariant* av = Cpu::info()->hasAES() ? av_hw_aes : av_soft_aes;
+
+    const cn_hash_fun f[3] = {
+        CnHash::fn(cn_hash[cn_indices[0]], av[step[cn_indices[0]]], Assembly::AUTO),
+        CnHash::fn(cn_hash[cn_indices[1]], av[step[cn_indices[1]]], Assembly::AUTO),
+        CnHash::fn(cn_hash[cn_indices[2]], av[step[cn_indices[2]]], Assembly::AUTO),
+    };
+
+    uint8_t tmp[64 * N];
+
+    for (uint64_t part = 0; part < 3; ++part) {
+        for (uint64_t i = 0; i < 5; ++i) {
+            for (uint64_t j = 0; j < N; ++j) {
+                core_hash[core_indices[part * 5 + i]](data + j * size, size, tmp + j * 64);
+                data = tmp;
+                size = 64;
+            }
+        }
+        for (uint64_t j = 0, k = step[cn_indices[part]]; j < N; j += k) {
+            f[part](tmp + j * 64, 64, output + j * 32, ctx, 0);
+        }
+        for (uint64_t j = 0; j < N; ++j) {
+            memcpy(tmp + j * 64, output + j * 32, 32);
+            memset(tmp + j * 64 + 32, 0, 32);
+        }
+    }
+}
+
+
+#endif // XMRIG_FEATURE_HWLOC
+
+
+} // namespace ghostrider
+
+
+} // namespace xmrig
--- a/src/crypto/ghostrider/ghostrider.h
+++ b/src/crypto/ghostrider/ghostrider.h
@@ -0,0 +1,52 @@
+/* XMRig
+ * Copyright 2018-2021 SChernykh   <https://github.com/SChernykh>
+ * Copyright 2016-2021 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 3 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef XMRIG_GR_HASH_H
+#define XMRIG_GR_HASH_H
+
+
+#include <cstddef>
+#include <cstdint>
+#include <vector>
+
+
+struct cryptonight_ctx;
+
+
+namespace xmrig
+{
+
+
+namespace ghostrider
+{
+
+
+struct HelperThread;
+
+void benchmark();
+HelperThread* create_helper_thread(int64_t cpu_index, const std::vector<int64_t>& affinities);
+void destroy_helper_thread(HelperThread* t);
+void hash_octa(const uint8_t* data, size_t size, uint8_t* output, cryptonight_ctx** ctx, HelperThread* helper);
+
+
+} // namespace ghostrider
+
+
+} // namespace xmrig
+
+#endif // XMRIG_GR_HASH_H
--- a/src/crypto/ghostrider/md_helper.c
+++ b/src/crypto/ghostrider/md_helper.c
@@ -0,0 +1,346 @@
+/* $Id: md_helper.c 216 2010-06-08 09:46:57Z tp $ */
+/*
+ * This file contains some functions which implement the external data
+ * handling and padding for Merkle-Damgard hash functions which follow
+ * the conventions set out by MD4 (little-endian) or SHA-1 (big-endian).
+ *
+ * API: this file is meant to be included, not compiled as a stand-alone
+ * file. Some macros must be defined:
+ *   RFUN   name for the round function
+ *   HASH   "short name" for the hash function
+ *   BE32   defined for big-endian, 32-bit based (e.g. SHA-1)
+ *   LE32   defined for little-endian, 32-bit based (e.g. MD5)
+ *   BE64   defined for big-endian, 64-bit based (e.g. SHA-512)
+ *   LE64   defined for little-endian, 64-bit based (no example yet)
+ *   PW01   if defined, append 0x01 instead of 0x80 (for Tiger)
+ *   BLEN   if defined, length of a message block (in bytes)
+ *   PLW1   if defined, length is defined on one 64-bit word only (for Tiger)
+ *   PLW4   if defined, length is defined on four 64-bit words (for WHIRLPOOL)
+ *   SVAL   if defined, reference to the context state information
+ *
+ * BLEN is used when a message block is not 16 (32-bit or 64-bit) words:
+ * this is used for instance for Tiger, which works on 64-bit words but
+ * uses 512-bit message blocks (eight 64-bit words). PLW1 and PLW4 are
+ * ignored if 32-bit words are used; if 64-bit words are used and PLW1 is
+ * set, then only one word (64 bits) will be used to encode the input
+ * message length (in bits), otherwise two words will be used (as in
+ * SHA-384 and SHA-512). If 64-bit words are used and PLW4 is defined (but
+ * not PLW1), four 64-bit words will be used to encode the message length
+ * (in bits). Note that regardless of those settings, only 64-bit message
+ * lengths are supported (in bits): messages longer than 2 Exabytes will be
+ * improperly hashed (this is unlikely to happen soon: 2 Exabytes is about
+ * 2 millions Terabytes, which is huge).
+ *
+ * If CLOSE_ONLY is defined, then this file defines only the sph_XXX_close()
+ * function. This is used for Tiger2, which is identical to Tiger except
+ * when it comes to the padding (Tiger2 uses the standard 0x80 byte instead
+ * of the 0x01 from original Tiger).
+ *
+ * The RFUN function is invoked with two arguments, the first pointing to
+ * aligned data (as a "const void *"), the second being state information
+ * from the context structure. By default, this state information is the
+ * "val" field from the context, and this field is assumed to be an array
+ * of words ("sph_u32" or "sph_u64", depending on BE32/LE32/BE64/LE64).
+ * from the context structure. The "val" field can have any type, except
+ * for the output encoding which assumes that it is an array of "sph_u32"
+ * values. By defining NO_OUTPUT, this last step is deactivated; the
+ * includer code is then responsible for writing out the hash result. When
+ * NO_OUTPUT is defined, the third parameter to the "close()" function is
+ * ignored.
+ *
+ * ==========================(LICENSE BEGIN)============================
+ *
+ * Copyright (c) 2007-2010  Projet RNRT SAPHIR
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * ===========================(LICENSE END)=============================
+ *
+ * @author   Thomas Pornin <thomas.pornin@cryptolog.com>
+ */
+
+#ifdef _MSC_VER
+#pragma warning (disable: 4146)
+#endif
+
+#undef SPH_XCAT
+#define SPH_XCAT(a, b)     SPH_XCAT_(a, b)
+#undef SPH_XCAT_
+#define SPH_XCAT_(a, b)    a ## b
+
+#undef SPH_BLEN
+#undef SPH_WLEN
+#if defined BE64 || defined LE64
+#define SPH_BLEN    128U
+#define SPH_WLEN      8U
+#else
+#define SPH_BLEN     64U
+#define SPH_WLEN      4U
+#endif
+
+#ifdef BLEN
+#undef SPH_BLEN
+#define SPH_BLEN    BLEN
+#endif
+
+#undef SPH_MAXPAD
+#if defined PLW1
+#define SPH_MAXPAD   (SPH_BLEN - SPH_WLEN)
+#elif defined PLW4
+#define SPH_MAXPAD   (SPH_BLEN - (SPH_WLEN << 2))
+#else
+#define SPH_MAXPAD   (SPH_BLEN - (SPH_WLEN << 1))
+#endif
+
+#undef SPH_VAL
+#undef SPH_NO_OUTPUT
+#ifdef SVAL
+#define SPH_VAL         SVAL
+#define SPH_NO_OUTPUT   1
+#else
+#define SPH_VAL   sc->val
+#endif
+
+#ifndef CLOSE_ONLY
+
+#ifdef SPH_UPTR
+static void
+SPH_XCAT(HASH, _short)(void *cc, const void *data, size_t len)
+#else
+void
+SPH_XCAT(sph_, HASH)(void *cc, const void *data, size_t len)
+#endif
+{
+	SPH_XCAT(sph_, SPH_XCAT(HASH, _context)) *sc;
+	size_t current;
+
+	sc = cc;
+#if SPH_64
+	current = (unsigned)sc->count & (SPH_BLEN - 1U);
+#else
+	current = (unsigned)sc->count_low & (SPH_BLEN - 1U);
+#endif
+	while (len > 0) {
+		size_t clen;
+#if !SPH_64
+		sph_u32 clow, clow2;
+#endif
+
+		clen = SPH_BLEN - current;
+		if (clen > len)
+			clen = len;
+		memcpy(sc->buf + current, data, clen);
+		data = (const unsigned char *)data + clen;
+		current += clen;
+		len -= clen;
+		if (current == SPH_BLEN) {
+			RFUN(sc->buf, SPH_VAL);
+			current = 0;
+		}
+#if SPH_64
+		sc->count += clen;
+#else
+		clow = sc->count_low;
+		clow2 = SPH_T32(clow + clen);
+		sc->count_low = clow2;
+		if (clow2 < clow)
+			sc->count_high ++;
+#endif
+	}
+}
+
+#ifdef SPH_UPTR
+void
+SPH_XCAT(sph_, HASH)(void *cc, const void *data, size_t len)
+{
+	SPH_XCAT(sph_, SPH_XCAT(HASH, _context)) *sc;
+	unsigned current;
+	size_t orig_len;
+#if !SPH_64
+	sph_u32 clow, clow2;
+#endif
+
+	if (len < (2 * SPH_BLEN)) {
+		SPH_XCAT(HASH, _short)(cc, data, len);
+		return;
+	}
+	sc = cc;
+#if SPH_64
+	current = (unsigned)sc->count & (SPH_BLEN - 1U);
+#else
+	current = (unsigned)sc->count_low & (SPH_BLEN - 1U);
+#endif
+	if (current > 0) {
+		unsigned t;
+
+		t = SPH_BLEN - current;
+		SPH_XCAT(HASH, _short)(cc, data, t);
+		data = (const unsigned char *)data + t;
+		len -= t;
+	}
+#if !SPH_UNALIGNED
+	if (((SPH_UPTR)data & (SPH_WLEN - 1U)) != 0) {
+		SPH_XCAT(HASH, _short)(cc, data, len);
+		return;
+	}
+#endif
+	orig_len = len;
+	while (len >= SPH_BLEN) {
+		RFUN(data, SPH_VAL);
+		len -= SPH_BLEN;
+		data = (const unsigned char *)data + SPH_BLEN;
+	}
+	if (len > 0)
+		memcpy(sc->buf, data, len);
+#if SPH_64
+	sc->count += (sph_u64)orig_len;
+#else
+	clow = sc->count_low;
+	clow2 = SPH_T32(clow + orig_len);
+	sc->count_low = clow2;
+	if (clow2 < clow)
+		sc->count_high ++;
+	/*
+	 * This code handles the improbable situation where "size_t" is
+	 * greater than 32 bits, and yet we do not have a 64-bit type.
+	 */
+	orig_len >>= 12;
+	orig_len >>= 10;
+	orig_len >>= 10;
+	sc->count_high += orig_len;
+#endif
+}
+#endif
+
+#endif
+
+/*
+ * Perform padding and produce result. The context is NOT reinitialized
+ * by this function.
+ */
+static void
+SPH_XCAT(HASH, _addbits_and_close)(void *cc,
+	unsigned ub, unsigned n, void *dst, unsigned rnum)
+{
+	SPH_XCAT(sph_, SPH_XCAT(HASH, _context)) *sc;
+	unsigned current, u;
+#if !SPH_64
+	sph_u32 low, high;
+#endif
+
+	sc = cc;
+#if SPH_64
+	current = (unsigned)sc->count & (SPH_BLEN - 1U);
+#else
+	current = (unsigned)sc->count_low & (SPH_BLEN - 1U);
+#endif
+#ifdef PW01
+	sc->buf[current ++] = (0x100 | (ub & 0xFF)) >> (8 - n);
+#else
+	{
+		unsigned z;
+
+		z = 0x80 >> n;
+		sc->buf[current ++] = ((ub & -z) | z) & 0xFF;
+	}
+#endif
+	if (current > SPH_MAXPAD) {
+		memset(sc->buf + current, 0, SPH_BLEN - current);
+		RFUN(sc->buf, SPH_VAL);
+		memset(sc->buf, 0, SPH_MAXPAD);
+	} else {
+		memset(sc->buf + current, 0, SPH_MAXPAD - current);
+	}
+#if defined BE64
+#if defined PLW1
+	sph_enc64be_aligned(sc->buf + SPH_MAXPAD,
+		SPH_T64(sc->count << 3) + (sph_u64)n);
+#elif defined PLW4
+	memset(sc->buf + SPH_MAXPAD, 0, 2 * SPH_WLEN);
+	sph_enc64be_aligned(sc->buf + SPH_MAXPAD + 2 * SPH_WLEN,
+		sc->count >> 61);
+	sph_enc64be_aligned(sc->buf + SPH_MAXPAD + 3 * SPH_WLEN,
+		SPH_T64(sc->count << 3) + (sph_u64)n);
+#else
+	sph_enc64be_aligned(sc->buf + SPH_MAXPAD, sc->count >> 61);
+	sph_enc64be_aligned(sc->buf + SPH_MAXPAD + SPH_WLEN,
+		SPH_T64(sc->count << 3) + (sph_u64)n);
+#endif
+#elif defined LE64
+#if defined PLW1
+	sph_enc64le_aligned(sc->buf + SPH_MAXPAD,
+		SPH_T64(sc->count << 3) + (sph_u64)n);
+#elif defined PLW1
+	sph_enc64le_aligned(sc->buf + SPH_MAXPAD,
+		SPH_T64(sc->count << 3) + (sph_u64)n);
+	sph_enc64le_aligned(sc->buf + SPH_MAXPAD + SPH_WLEN, sc->count >> 61);
+	memset(sc->buf + SPH_MAXPAD + 2 * SPH_WLEN, 0, 2 * SPH_WLEN);
+#else
+	sph_enc64le_aligned(sc->buf + SPH_MAXPAD,
+		SPH_T64(sc->count << 3) + (sph_u64)n);
+	sph_enc64le_aligned(sc->buf + SPH_MAXPAD + SPH_WLEN, sc->count >> 61);
+#endif
+#else
+#if SPH_64
+#ifdef BE32
+	sph_enc64be_aligned(sc->buf + SPH_MAXPAD,
+		SPH_T64(sc->count << 3) + (sph_u64)n);
+#else
+	sph_enc64le_aligned(sc->buf + SPH_MAXPAD,
+		SPH_T64(sc->count << 3) + (sph_u64)n);
+#endif
+#else
+	low = sc->count_low;
+	high = SPH_T32((sc->count_high << 3) | (low >> 29));
+	low = SPH_T32(low << 3) + (sph_u32)n;
+#ifdef BE32
+	sph_enc32be(sc->buf + SPH_MAXPAD, high);
+	sph_enc32be(sc->buf + SPH_MAXPAD + SPH_WLEN, low);
+#else
+	sph_enc32le(sc->buf + SPH_MAXPAD, low);
+	sph_enc32le(sc->buf + SPH_MAXPAD + SPH_WLEN, high);
+#endif
+#endif
+#endif
+	RFUN(sc->buf, SPH_VAL);
+#ifdef SPH_NO_OUTPUT
+	(void)dst;
+	(void)rnum;
+	(void)u;
+#else
+	for (u = 0; u < rnum; u ++) {
+#if defined BE64
+		sph_enc64be((unsigned char *)dst + 8 * u, sc->val[u]);
+#elif defined LE64
+		sph_enc64le((unsigned char *)dst + 8 * u, sc->val[u]);
+#elif defined BE32
+		sph_enc32be((unsigned char *)dst + 4 * u, sc->val[u]);
+#else
+		sph_enc32le((unsigned char *)dst + 4 * u, sc->val[u]);
+#endif
+	}
+#endif
+}
+
+static void
+SPH_XCAT(HASH, _close)(void *cc, void *dst, unsigned rnum)
+{
+	SPH_XCAT(HASH, _addbits_and_close)(cc, 0, 0, dst, rnum);
+}
--- a/src/crypto/ghostrider/sph_blake.c
+++ b/src/crypto/ghostrider/sph_blake.c
--- a/src/crypto/ghostrider/sph_blake.h
+++ b/src/crypto/ghostrider/sph_blake.h
@@ -0,0 +1,327 @@
+/* $Id: sph_blake.h 252 2011-06-07 17:55:14Z tp $ */
+/**
+ * BLAKE interface. BLAKE is a family of functions which differ by their
+ * output size; this implementation defines BLAKE for output sizes 224,
+ * 256, 384 and 512 bits. This implementation conforms to the "third
+ * round" specification.
+ *
+ * ==========================(LICENSE BEGIN)============================
+ *
+ * Copyright (c) 2007-2010  Projet RNRT SAPHIR
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * ===========================(LICENSE END)=============================
+ *
+ * @file     sph_blake.h
+ * @author   Thomas Pornin <thomas.pornin@cryptolog.com>
+ */
+
+#ifndef SPH_BLAKE_H__
+#define SPH_BLAKE_H__
+
+#ifdef __cplusplus
+extern "C"{
+#endif
+
+#include <stddef.h>
+#include "sph_types.h"
+
+/**
+ * Output size (in bits) for BLAKE-224.
+ */
+#define SPH_SIZE_blake224   224
+
+/**
+ * Output size (in bits) for BLAKE-256.
+ */
+#define SPH_SIZE_blake256   256
+
+#if SPH_64
+
+/**
+ * Output size (in bits) for BLAKE-384.
+ */
+#define SPH_SIZE_blake384   384
+
+/**
+ * Output size (in bits) for BLAKE-512.
+ */
+#define SPH_SIZE_blake512   512
+
+#endif
+
+/**
+ * This structure is a context for BLAKE-224 and BLAKE-256 computations:
+ * it contains the intermediate values and some data from the last
+ * entered block. Once a BLAKE computation has been performed, the
+ * context can be reused for another computation.
+ *
+ * The contents of this structure are private. A running BLAKE
+ * computation can be cloned by copying the context (e.g. with a simple
+ * <code>memcpy()</code>).
+ */
+typedef struct {
+#ifndef DOXYGEN_IGNORE
+	unsigned char buf[64];    /* first field, for alignment */
+	size_t ptr;
+	sph_u32 H[8];
+	sph_u32 S[4];
+	sph_u32 T0, T1;
+#endif
+} sph_blake_small_context;
+
+/**
+ * This structure is a context for BLAKE-224 computations. It is
+ * identical to the common <code>sph_blake_small_context</code>.
+ */
+typedef sph_blake_small_context sph_blake224_context;
+
+/**
+ * This structure is a context for BLAKE-256 computations. It is
+ * identical to the common <code>sph_blake_small_context</code>.
+ */
+typedef sph_blake_small_context sph_blake256_context;
+
+#if SPH_64
+
+/**
+ * This structure is a context for BLAKE-384 and BLAKE-512 computations:
+ * it contains the intermediate values and some data from the last
+ * entered block. Once a BLAKE computation has been performed, the
+ * context can be reused for another computation.
+ *
+ * The contents of this structure are private. A running BLAKE
+ * computation can be cloned by copying the context (e.g. with a simple
+ * <code>memcpy()</code>).
+ */
+typedef struct {
+#ifndef DOXYGEN_IGNORE
+	unsigned char buf[128];    /* first field, for alignment */
+	size_t ptr;
+	sph_u64 H[8];
+	sph_u64 S[4];
+	sph_u64 T0, T1;
+#endif
+} sph_blake_big_context;
+
+/**
+ * This structure is a context for BLAKE-384 computations. It is
+ * identical to the common <code>sph_blake_small_context</code>.
+ */
+typedef sph_blake_big_context sph_blake384_context;
+
+/**
+ * This structure is a context for BLAKE-512 computations. It is
+ * identical to the common <code>sph_blake_small_context</code>.
+ */
+typedef sph_blake_big_context sph_blake512_context;
+
+#endif
+
+/**
+ * Initialize a BLAKE-224 context. This process performs no memory allocation.
+ *
+ * @param cc   the BLAKE-224 context (pointer to a
+ *             <code>sph_blake224_context</code>)
+ */
+void sph_blake224_init(void *cc);
+
+/**
+ * Process some data bytes. It is acceptable that <code>len</code> is zero
+ * (in which case this function does nothing).
+ *
+ * @param cc     the BLAKE-224 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_blake224(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current BLAKE-224 computation and output the result into
+ * the provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (28 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the BLAKE-224 context
+ * @param dst   the destination buffer
+ */
+void sph_blake224_close(void *cc, void *dst);
+
+/**
+ * Add a few additional bits (0 to 7) to the current computation, then
+ * terminate it and output the result in the provided buffer, which must
+ * be wide enough to accomodate the result (28 bytes). If bit number i
+ * in <code>ub</code> has value 2^i, then the extra bits are those
+ * numbered 7 downto 8-n (this is the big-endian convention at the byte
+ * level). The context is automatically reinitialized.
+ *
+ * @param cc    the BLAKE-224 context
+ * @param ub    the extra bits
+ * @param n     the number of extra bits (0 to 7)
+ * @param dst   the destination buffer
+ */
+void sph_blake224_addbits_and_close(
+	void *cc, unsigned ub, unsigned n, void *dst);
+
+/**
+ * Initialize a BLAKE-256 context. This process performs no memory allocation.
+ *
+ * @param cc   the BLAKE-256 context (pointer to a
+ *             <code>sph_blake256_context</code>)
+ */
+void sph_blake256_init(void *cc);
+
+/**
+ * Process some data bytes. It is acceptable that <code>len</code> is zero
+ * (in which case this function does nothing).
+ *
+ * @param cc     the BLAKE-256 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_blake256(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current BLAKE-256 computation and output the result into
+ * the provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (32 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the BLAKE-256 context
+ * @param dst   the destination buffer
+ */
+void sph_blake256_close(void *cc, void *dst);
+
+/**
+ * Add a few additional bits (0 to 7) to the current computation, then
+ * terminate it and output the result in the provided buffer, which must
+ * be wide enough to accomodate the result (32 bytes). If bit number i
+ * in <code>ub</code> has value 2^i, then the extra bits are those
+ * numbered 7 downto 8-n (this is the big-endian convention at the byte
+ * level). The context is automatically reinitialized.
+ *
+ * @param cc    the BLAKE-256 context
+ * @param ub    the extra bits
+ * @param n     the number of extra bits (0 to 7)
+ * @param dst   the destination buffer
+ */
+void sph_blake256_addbits_and_close(
+	void *cc, unsigned ub, unsigned n, void *dst);
+
+#if SPH_64
+
+/**
+ * Initialize a BLAKE-384 context. This process performs no memory allocation.
+ *
+ * @param cc   the BLAKE-384 context (pointer to a
+ *             <code>sph_blake384_context</code>)
+ */
+void sph_blake384_init(void *cc);
+
+/**
+ * Process some data bytes. It is acceptable that <code>len</code> is zero
+ * (in which case this function does nothing).
+ *
+ * @param cc     the BLAKE-384 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_blake384(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current BLAKE-384 computation and output the result into
+ * the provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (48 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the BLAKE-384 context
+ * @param dst   the destination buffer
+ */
+void sph_blake384_close(void *cc, void *dst);
+
+/**
+ * Add a few additional bits (0 to 7) to the current computation, then
+ * terminate it and output the result in the provided buffer, which must
+ * be wide enough to accomodate the result (48 bytes). If bit number i
+ * in <code>ub</code> has value 2^i, then the extra bits are those
+ * numbered 7 downto 8-n (this is the big-endian convention at the byte
+ * level). The context is automatically reinitialized.
+ *
+ * @param cc    the BLAKE-384 context
+ * @param ub    the extra bits
+ * @param n     the number of extra bits (0 to 7)
+ * @param dst   the destination buffer
+ */
+void sph_blake384_addbits_and_close(
+	void *cc, unsigned ub, unsigned n, void *dst);
+
+/**
+ * Initialize a BLAKE-512 context. This process performs no memory allocation.
+ *
+ * @param cc   the BLAKE-512 context (pointer to a
+ *             <code>sph_blake512_context</code>)
+ */
+void sph_blake512_init(void *cc);
+
+/**
+ * Process some data bytes. It is acceptable that <code>len</code> is zero
+ * (in which case this function does nothing).
+ *
+ * @param cc     the BLAKE-512 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_blake512(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current BLAKE-512 computation and output the result into
+ * the provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (64 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the BLAKE-512 context
+ * @param dst   the destination buffer
+ */
+void sph_blake512_close(void *cc, void *dst);
+
+/**
+ * Add a few additional bits (0 to 7) to the current computation, then
+ * terminate it and output the result in the provided buffer, which must
+ * be wide enough to accomodate the result (64 bytes). If bit number i
+ * in <code>ub</code> has value 2^i, then the extra bits are those
+ * numbered 7 downto 8-n (this is the big-endian convention at the byte
+ * level). The context is automatically reinitialized.
+ *
+ * @param cc    the BLAKE-512 context
+ * @param ub    the extra bits
+ * @param n     the number of extra bits (0 to 7)
+ * @param dst   the destination buffer
+ */
+void sph_blake512_addbits_and_close(
+	void *cc, unsigned ub, unsigned n, void *dst);
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
--- a/src/crypto/ghostrider/sph_bmw.c
+++ b/src/crypto/ghostrider/sph_bmw.c
@@ -0,0 +1,986 @@
+/* $Id: bmw.c 227 2010-06-16 17:28:38Z tp $ */
+/*
+ * BMW implementation.
+ *
+ * ==========================(LICENSE BEGIN)============================
+ *
+ * Copyright (c) 2007-2010  Projet RNRT SAPHIR
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * ===========================(LICENSE END)=============================
+ *
+ * @author   Thomas Pornin <thomas.pornin@cryptolog.com>
+ */
+
+#include <stddef.h>
+#include <string.h>
+#include <limits.h>
+
+#ifdef __cplusplus
+extern "C"{
+#endif
+
+#include "sph_bmw.h"
+
+#if SPH_SMALL_FOOTPRINT && !defined SPH_SMALL_FOOTPRINT_BMW
+#define SPH_SMALL_FOOTPRINT_BMW   1
+#endif
+
+#ifdef _MSC_VER
+#pragma warning (disable: 4146)
+#endif
+
+#if !defined(__AVX2__)
+
+static const sph_u32 IV224[] = {
+	SPH_C32(0x00010203), SPH_C32(0x04050607),
+	SPH_C32(0x08090A0B), SPH_C32(0x0C0D0E0F),
+	SPH_C32(0x10111213), SPH_C32(0x14151617),
+	SPH_C32(0x18191A1B), SPH_C32(0x1C1D1E1F),
+	SPH_C32(0x20212223), SPH_C32(0x24252627),
+	SPH_C32(0x28292A2B), SPH_C32(0x2C2D2E2F),
+	SPH_C32(0x30313233), SPH_C32(0x34353637),
+	SPH_C32(0x38393A3B), SPH_C32(0x3C3D3E3F)
+};
+
+static const sph_u32 IV256[] = {
+	SPH_C32(0x40414243), SPH_C32(0x44454647),
+	SPH_C32(0x48494A4B), SPH_C32(0x4C4D4E4F),
+	SPH_C32(0x50515253), SPH_C32(0x54555657),
+	SPH_C32(0x58595A5B), SPH_C32(0x5C5D5E5F),
+	SPH_C32(0x60616263), SPH_C32(0x64656667),
+	SPH_C32(0x68696A6B), SPH_C32(0x6C6D6E6F),
+	SPH_C32(0x70717273), SPH_C32(0x74757677),
+	SPH_C32(0x78797A7B), SPH_C32(0x7C7D7E7F)
+};
+
+#endif // !AVX2
+
+#if SPH_64
+
+static const sph_u64 IV384[] = {
+	SPH_C64(0x0001020304050607), SPH_C64(0x08090A0B0C0D0E0F),
+	SPH_C64(0x1011121314151617), SPH_C64(0x18191A1B1C1D1E1F),
+	SPH_C64(0x2021222324252627), SPH_C64(0x28292A2B2C2D2E2F),
+	SPH_C64(0x3031323334353637), SPH_C64(0x38393A3B3C3D3E3F),
+	SPH_C64(0x4041424344454647), SPH_C64(0x48494A4B4C4D4E4F),
+	SPH_C64(0x5051525354555657), SPH_C64(0x58595A5B5C5D5E5F),
+	SPH_C64(0x6061626364656667), SPH_C64(0x68696A6B6C6D6E6F),
+	SPH_C64(0x7071727374757677), SPH_C64(0x78797A7B7C7D7E7F)
+};
+
+static const sph_u64 IV512[] = {
+	SPH_C64(0x8081828384858687), SPH_C64(0x88898A8B8C8D8E8F),
+	SPH_C64(0x9091929394959697), SPH_C64(0x98999A9B9C9D9E9F),
+	SPH_C64(0xA0A1A2A3A4A5A6A7), SPH_C64(0xA8A9AAABACADAEAF),
+	SPH_C64(0xB0B1B2B3B4B5B6B7), SPH_C64(0xB8B9BABBBCBDBEBF),
+	SPH_C64(0xC0C1C2C3C4C5C6C7), SPH_C64(0xC8C9CACBCCCDCECF),
+	SPH_C64(0xD0D1D2D3D4D5D6D7), SPH_C64(0xD8D9DADBDCDDDEDF),
+	SPH_C64(0xE0E1E2E3E4E5E6E7), SPH_C64(0xE8E9EAEBECEDEEEF),
+	SPH_C64(0xF0F1F2F3F4F5F6F7), SPH_C64(0xF8F9FAFBFCFDFEFF)
+};
+
+#endif
+
+#define XCAT(x, y)    XCAT_(x, y)
+#define XCAT_(x, y)   x ## y
+
+#define LPAR   (
+
+#define I16_16    0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15
+#define I16_17    1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16
+#define I16_18    2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17
+#define I16_19    3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18
+#define I16_20    4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19
+#define I16_21    5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20
+#define I16_22    6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
+#define I16_23    7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22
+#define I16_24    8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23
+#define I16_25    9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24
+#define I16_26   10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25
+#define I16_27   11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26
+#define I16_28   12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27
+#define I16_29   13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28
+#define I16_30   14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29
+#define I16_31   15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30
+
+#define M16_16    0,  1,  3,  4,  7, 10, 11
+#define M16_17    1,  2,  4,  5,  8, 11, 12
+#define M16_18    2,  3,  5,  6,  9, 12, 13
+#define M16_19    3,  4,  6,  7, 10, 13, 14
+#define M16_20    4,  5,  7,  8, 11, 14, 15
+#define M16_21    5,  6,  8,  9, 12, 15, 16
+#define M16_22    6,  7,  9, 10, 13,  0,  1
+#define M16_23    7,  8, 10, 11, 14,  1,  2
+#define M16_24    8,  9, 11, 12, 15,  2,  3
+#define M16_25    9, 10, 12, 13,  0,  3,  4
+#define M16_26   10, 11, 13, 14,  1,  4,  5
+#define M16_27   11, 12, 14, 15,  2,  5,  6
+#define M16_28   12, 13, 15, 16,  3,  6,  7
+#define M16_29   13, 14,  0,  1,  4,  7,  8
+#define M16_30   14, 15,  1,  2,  5,  8,  9
+#define M16_31   15, 16,  2,  3,  6,  9, 10
+
+#if !defined(__AVX2__)
+
+#define ss0(x)    (((x) >> 1) ^ SPH_T32((x) << 3) \
+                  ^ SPH_ROTL32(x,  4) ^ SPH_ROTL32(x, 19))
+#define ss1(x)    (((x) >> 1) ^ SPH_T32((x) << 2) \
+                  ^ SPH_ROTL32(x,  8) ^ SPH_ROTL32(x, 23))
+#define ss2(x)    (((x) >> 2) ^ SPH_T32((x) << 1) \
+                  ^ SPH_ROTL32(x, 12) ^ SPH_ROTL32(x, 25))
+#define ss3(x)    (((x) >> 2) ^ SPH_T32((x) << 2) \
+                  ^ SPH_ROTL32(x, 15) ^ SPH_ROTL32(x, 29))
+#define ss4(x)    (((x) >> 1) ^ (x))
+#define ss5(x)    (((x) >> 2) ^ (x))
+#define rs1(x)    SPH_ROTL32(x,  3)
+#define rs2(x)    SPH_ROTL32(x,  7)
+#define rs3(x)    SPH_ROTL32(x, 13)
+#define rs4(x)    SPH_ROTL32(x, 16)
+#define rs5(x)    SPH_ROTL32(x, 19)
+#define rs6(x)    SPH_ROTL32(x, 23)
+#define rs7(x)    SPH_ROTL32(x, 27)
+
+#define Ks(j)   SPH_T32((sph_u32)(j) * SPH_C32(0x05555555))
+
+#define add_elt_s(mf, hf, j0m, j1m, j3m, j4m, j7m, j10m, j11m, j16) \
+	(SPH_T32(SPH_ROTL32(mf(j0m), j1m) + SPH_ROTL32(mf(j3m), j4m) \
+		- SPH_ROTL32(mf(j10m), j11m) + Ks(j16)) ^ hf(j7m))
+
+#define expand1s_inner(qf, mf, hf, i16, \
+		i0, i1, i2, i3, i4, i5, i6, i7, i8, \
+		i9, i10, i11, i12, i13, i14, i15, \
+		i0m, i1m, i3m, i4m, i7m, i10m, i11m) \
+	SPH_T32(ss1(qf(i0)) + ss2(qf(i1)) + ss3(qf(i2)) + ss0(qf(i3)) \
+		+ ss1(qf(i4)) + ss2(qf(i5)) + ss3(qf(i6)) + ss0(qf(i7)) \
+		+ ss1(qf(i8)) + ss2(qf(i9)) + ss3(qf(i10)) + ss0(qf(i11)) \
+		+ ss1(qf(i12)) + ss2(qf(i13)) + ss3(qf(i14)) + ss0(qf(i15)) \
+		+ add_elt_s(mf, hf, i0m, i1m, i3m, i4m, i7m, i10m, i11m, i16))
+
+#define expand1s(qf, mf, hf, i16) \
+	expand1s_(qf, mf, hf, i16, I16_ ## i16, M16_ ## i16)
+#define expand1s_(qf, mf, hf, i16, ix, iy) \
+	expand1s_inner LPAR qf, mf, hf, i16, ix, iy)
+
+#define expand2s_inner(qf, mf, hf, i16, \
+		i0, i1, i2, i3, i4, i5, i6, i7, i8, \
+		i9, i10, i11, i12, i13, i14, i15, \
+		i0m, i1m, i3m, i4m, i7m, i10m, i11m) \
+	SPH_T32(qf(i0) + rs1(qf(i1)) + qf(i2) + rs2(qf(i3)) \
+		+ qf(i4) + rs3(qf(i5)) + qf(i6) + rs4(qf(i7)) \
+		+ qf(i8) + rs5(qf(i9)) + qf(i10) + rs6(qf(i11)) \
+		+ qf(i12) + rs7(qf(i13)) + ss4(qf(i14)) + ss5(qf(i15)) \
+		+ add_elt_s(mf, hf, i0m, i1m, i3m, i4m, i7m, i10m, i11m, i16))
+
+#define expand2s(qf, mf, hf, i16) \
+	expand2s_(qf, mf, hf, i16, I16_ ## i16, M16_ ## i16)
+#define expand2s_(qf, mf, hf, i16, ix, iy) \
+	expand2s_inner LPAR qf, mf, hf, i16, ix, iy)
+
+#endif // !AVX2
+
+#if SPH_64
+
+#define sb0(x)    (((x) >> 1) ^ SPH_T64((x) << 3) \
+                  ^ SPH_ROTL64(x,  4) ^ SPH_ROTL64(x, 37))
+#define sb1(x)    (((x) >> 1) ^ SPH_T64((x) << 2) \
+                  ^ SPH_ROTL64(x, 13) ^ SPH_ROTL64(x, 43))
+#define sb2(x)    (((x) >> 2) ^ SPH_T64((x) << 1) \
+                  ^ SPH_ROTL64(x, 19) ^ SPH_ROTL64(x, 53))
+#define sb3(x)    (((x) >> 2) ^ SPH_T64((x) << 2) \
+                  ^ SPH_ROTL64(x, 28) ^ SPH_ROTL64(x, 59))
+#define sb4(x)    (((x) >> 1) ^ (x))
+#define sb5(x)    (((x) >> 2) ^ (x))
+#define rb1(x)    SPH_ROTL64(x,  5)
+#define rb2(x)    SPH_ROTL64(x, 11)
+#define rb3(x)    SPH_ROTL64(x, 27)
+#define rb4(x)    SPH_ROTL64(x, 32)
+#define rb5(x)    SPH_ROTL64(x, 37)
+#define rb6(x)    SPH_ROTL64(x, 43)
+#define rb7(x)    SPH_ROTL64(x, 53)
+
+#define Kb(j)   SPH_T64((sph_u64)(j) * SPH_C64(0x0555555555555555))
+
+#if SPH_SMALL_FOOTPRINT_BMW
+
+static const sph_u64 Kb_tab[] = {
+	Kb(16), Kb(17), Kb(18), Kb(19), Kb(20), Kb(21), Kb(22), Kb(23),
+	Kb(24), Kb(25), Kb(26), Kb(27), Kb(28), Kb(29), Kb(30), Kb(31)
+};
+
+#define rol_off(mf, j, off) \
+	SPH_ROTL64(mf(((j) + (off)) & 15), (((j) + (off)) & 15) + 1)
+
+#define add_elt_b(mf, hf, j) \
+	(SPH_T64(rol_off(mf, j, 0) + rol_off(mf, j, 3) \
+		- rol_off(mf, j, 10) + Kb_tab[j]) ^ hf(((j) + 7) & 15))
+
+#define expand1b(qf, mf, hf, i) \
+	SPH_T64(sb1(qf((i) - 16)) + sb2(qf((i) - 15)) \
+		+ sb3(qf((i) - 14)) + sb0(qf((i) - 13)) \
+		+ sb1(qf((i) - 12)) + sb2(qf((i) - 11)) \
+		+ sb3(qf((i) - 10)) + sb0(qf((i) - 9)) \
+		+ sb1(qf((i) - 8)) + sb2(qf((i) - 7)) \
+		+ sb3(qf((i) - 6)) + sb0(qf((i) - 5)) \
+		+ sb1(qf((i) - 4)) + sb2(qf((i) - 3)) \
+		+ sb3(qf((i) - 2)) + sb0(qf((i) - 1)) \
+		+ add_elt_b(mf, hf, (i) - 16))
+
+#define expand2b(qf, mf, hf, i) \
+	SPH_T64(qf((i) - 16) + rb1(qf((i) - 15)) \
+		+ qf((i) - 14) + rb2(qf((i) - 13)) \
+		+ qf((i) - 12) + rb3(qf((i) - 11)) \
+		+ qf((i) - 10) + rb4(qf((i) - 9)) \
+		+ qf((i) - 8) + rb5(qf((i) - 7)) \
+		+ qf((i) - 6) + rb6(qf((i) - 5)) \
+		+ qf((i) - 4) + rb7(qf((i) - 3)) \
+		+ sb4(qf((i) - 2)) + sb5(qf((i) - 1)) \
+		+ add_elt_b(mf, hf, (i) - 16))
+
+#else
+
+#define add_elt_b(mf, hf, j0m, j1m, j3m, j4m, j7m, j10m, j11m, j16) \
+	(SPH_T64(SPH_ROTL64(mf(j0m), j1m) + SPH_ROTL64(mf(j3m), j4m) \
+		- SPH_ROTL64(mf(j10m), j11m) + Kb(j16)) ^ hf(j7m))
+
+#define expand1b_inner(qf, mf, hf, i16, \
+		i0, i1, i2, i3, i4, i5, i6, i7, i8, \
+		i9, i10, i11, i12, i13, i14, i15, \
+		i0m, i1m, i3m, i4m, i7m, i10m, i11m) \
+	SPH_T64(sb1(qf(i0)) + sb2(qf(i1)) + sb3(qf(i2)) + sb0(qf(i3)) \
+		+ sb1(qf(i4)) + sb2(qf(i5)) + sb3(qf(i6)) + sb0(qf(i7)) \
+		+ sb1(qf(i8)) + sb2(qf(i9)) + sb3(qf(i10)) + sb0(qf(i11)) \
+		+ sb1(qf(i12)) + sb2(qf(i13)) + sb3(qf(i14)) + sb0(qf(i15)) \
+		+ add_elt_b(mf, hf, i0m, i1m, i3m, i4m, i7m, i10m, i11m, i16))
+
+#define expand1b(qf, mf, hf, i16) \
+	expand1b_(qf, mf, hf, i16, I16_ ## i16, M16_ ## i16)
+#define expand1b_(qf, mf, hf, i16, ix, iy) \
+	expand1b_inner LPAR qf, mf, hf, i16, ix, iy)
+
+#define expand2b_inner(qf, mf, hf, i16, \
+		i0, i1, i2, i3, i4, i5, i6, i7, i8, \
+		i9, i10, i11, i12, i13, i14, i15, \
+		i0m, i1m, i3m, i4m, i7m, i10m, i11m) \
+	SPH_T64(qf(i0) + rb1(qf(i1)) + qf(i2) + rb2(qf(i3)) \
+		+ qf(i4) + rb3(qf(i5)) + qf(i6) + rb4(qf(i7)) \
+		+ qf(i8) + rb5(qf(i9)) + qf(i10) + rb6(qf(i11)) \
+		+ qf(i12) + rb7(qf(i13)) + sb4(qf(i14)) + sb5(qf(i15)) \
+		+ add_elt_b(mf, hf, i0m, i1m, i3m, i4m, i7m, i10m, i11m, i16))
+
+#define expand2b(qf, mf, hf, i16) \
+	expand2b_(qf, mf, hf, i16, I16_ ## i16, M16_ ## i16)
+#define expand2b_(qf, mf, hf, i16, ix, iy) \
+	expand2b_inner LPAR qf, mf, hf, i16, ix, iy)
+
+#endif
+
+#endif
+
+#define MAKE_W(tt, i0, op01, i1, op12, i2, op23, i3, op34, i4) \
+	tt((M(i0) ^ H(i0)) op01 (M(i1) ^ H(i1)) op12 (M(i2) ^ H(i2)) \
+	op23 (M(i3) ^ H(i3)) op34 (M(i4) ^ H(i4)))
+
+#if !defined(__AVX2__)
+
+#define Ws0    MAKE_W(SPH_T32,  5, -,  7, +, 10, +, 13, +, 14)
+#define Ws1    MAKE_W(SPH_T32,  6, -,  8, +, 11, +, 14, -, 15)
+#define Ws2    MAKE_W(SPH_T32,  0, +,  7, +,  9, -, 12, +, 15)
+#define Ws3    MAKE_W(SPH_T32,  0, -,  1, +,  8, -, 10, +, 13)
+#define Ws4    MAKE_W(SPH_T32,  1, +,  2, +,  9, -, 11, -, 14)
+#define Ws5    MAKE_W(SPH_T32,  3, -,  2, +, 10, -, 12, +, 15)
+#define Ws6    MAKE_W(SPH_T32,  4, -,  0, -,  3, -, 11, +, 13)
+#define Ws7    MAKE_W(SPH_T32,  1, -,  4, -,  5, -, 12, -, 14)
+#define Ws8    MAKE_W(SPH_T32,  2, -,  5, -,  6, +, 13, -, 15)
+#define Ws9    MAKE_W(SPH_T32,  0, -,  3, +,  6, -,  7, +, 14)
+#define Ws10   MAKE_W(SPH_T32,  8, -,  1, -,  4, -,  7, +, 15)
+#define Ws11   MAKE_W(SPH_T32,  8, -,  0, -,  2, -,  5, +,  9)
+#define Ws12   MAKE_W(SPH_T32,  1, +,  3, -,  6, -,  9, +, 10)
+#define Ws13   MAKE_W(SPH_T32,  2, +,  4, +,  7, +, 10, +, 11)
+#define Ws14   MAKE_W(SPH_T32,  3, -,  5, +,  8, -, 11, -, 12)
+#define Ws15   MAKE_W(SPH_T32, 12, -,  4, -,  6, -,  9, +, 13)
+
+#if SPH_SMALL_FOOTPRINT_BMW
+
+#define MAKE_Qas   do { \
+		unsigned u; \
+		sph_u32 Ws[16]; \
+		Ws[ 0] = Ws0; \
+		Ws[ 1] = Ws1; \
+		Ws[ 2] = Ws2; \
+		Ws[ 3] = Ws3; \
+		Ws[ 4] = Ws4; \
+		Ws[ 5] = Ws5; \
+		Ws[ 6] = Ws6; \
+		Ws[ 7] = Ws7; \
+		Ws[ 8] = Ws8; \
+		Ws[ 9] = Ws9; \
+		Ws[10] = Ws10; \
+		Ws[11] = Ws11; \
+		Ws[12] = Ws12; \
+		Ws[13] = Ws13; \
+		Ws[14] = Ws14; \
+		Ws[15] = Ws15; \
+		for (u = 0; u < 15; u += 5) { \
+			qt[u + 0] = SPH_T32(ss0(Ws[u + 0]) + H(u + 1)); \
+			qt[u + 1] = SPH_T32(ss1(Ws[u + 1]) + H(u + 2)); \
+			qt[u + 2] = SPH_T32(ss2(Ws[u + 2]) + H(u + 3)); \
+			qt[u + 3] = SPH_T32(ss3(Ws[u + 3]) + H(u + 4)); \
+			qt[u + 4] = SPH_T32(ss4(Ws[u + 4]) + H(u + 5)); \
+		} \
+		qt[15] = SPH_T32(ss0(Ws[15]) + H(0)); \
+	} while (0)
+
+#define MAKE_Qbs   do { \
+		qt[16] = expand1s(Qs, M, H, 16); \
+		qt[17] = expand1s(Qs, M, H, 17); \
+		qt[18] = expand2s(Qs, M, H, 18); \
+		qt[19] = expand2s(Qs, M, H, 19); \
+		qt[20] = expand2s(Qs, M, H, 20); \
+		qt[21] = expand2s(Qs, M, H, 21); \
+		qt[22] = expand2s(Qs, M, H, 22); \
+		qt[23] = expand2s(Qs, M, H, 23); \
+		qt[24] = expand2s(Qs, M, H, 24); \
+		qt[25] = expand2s(Qs, M, H, 25); \
+		qt[26] = expand2s(Qs, M, H, 26); \
+		qt[27] = expand2s(Qs, M, H, 27); \
+		qt[28] = expand2s(Qs, M, H, 28); \
+		qt[29] = expand2s(Qs, M, H, 29); \
+		qt[30] = expand2s(Qs, M, H, 30); \
+		qt[31] = expand2s(Qs, M, H, 31); \
+	} while (0)
+
+#else
+
+#define MAKE_Qas   do { \
+		qt[ 0] = SPH_T32(ss0(Ws0 ) + H( 1)); \
+		qt[ 1] = SPH_T32(ss1(Ws1 ) + H( 2)); \
+		qt[ 2] = SPH_T32(ss2(Ws2 ) + H( 3)); \
+		qt[ 3] = SPH_T32(ss3(Ws3 ) + H( 4)); \
+		qt[ 4] = SPH_T32(ss4(Ws4 ) + H( 5)); \
+		qt[ 5] = SPH_T32(ss0(Ws5 ) + H( 6)); \
+		qt[ 6] = SPH_T32(ss1(Ws6 ) + H( 7)); \
+		qt[ 7] = SPH_T32(ss2(Ws7 ) + H( 8)); \
+		qt[ 8] = SPH_T32(ss3(Ws8 ) + H( 9)); \
+		qt[ 9] = SPH_T32(ss4(Ws9 ) + H(10)); \
+		qt[10] = SPH_T32(ss0(Ws10) + H(11)); \
+		qt[11] = SPH_T32(ss1(Ws11) + H(12)); \
+		qt[12] = SPH_T32(ss2(Ws12) + H(13)); \
+		qt[13] = SPH_T32(ss3(Ws13) + H(14)); \
+		qt[14] = SPH_T32(ss4(Ws14) + H(15)); \
+		qt[15] = SPH_T32(ss0(Ws15) + H( 0)); \
+	} while (0)
+
+#define MAKE_Qbs   do { \
+		qt[16] = expand1s(Qs, M, H, 16); \
+		qt[17] = expand1s(Qs, M, H, 17); \
+		qt[18] = expand2s(Qs, M, H, 18); \
+		qt[19] = expand2s(Qs, M, H, 19); \
+		qt[20] = expand2s(Qs, M, H, 20); \
+		qt[21] = expand2s(Qs, M, H, 21); \
+		qt[22] = expand2s(Qs, M, H, 22); \
+		qt[23] = expand2s(Qs, M, H, 23); \
+		qt[24] = expand2s(Qs, M, H, 24); \
+		qt[25] = expand2s(Qs, M, H, 25); \
+		qt[26] = expand2s(Qs, M, H, 26); \
+		qt[27] = expand2s(Qs, M, H, 27); \
+		qt[28] = expand2s(Qs, M, H, 28); \
+		qt[29] = expand2s(Qs, M, H, 29); \
+		qt[30] = expand2s(Qs, M, H, 30); \
+		qt[31] = expand2s(Qs, M, H, 31); \
+	} while (0)
+
+#endif
+
+#define MAKE_Qs   do { \
+		MAKE_Qas; \
+		MAKE_Qbs; \
+	} while (0)
+
+#define Qs(j)   (qt[j])
+
+#endif  // !AVX2
+
+#if SPH_64
+
+#define Wb0    MAKE_W(SPH_T64,  5, -,  7, +, 10, +, 13, +, 14)
+#define Wb1    MAKE_W(SPH_T64,  6, -,  8, +, 11, +, 14, -, 15)
+#define Wb2    MAKE_W(SPH_T64,  0, +,  7, +,  9, -, 12, +, 15)
+#define Wb3    MAKE_W(SPH_T64,  0, -,  1, +,  8, -, 10, +, 13)
+#define Wb4    MAKE_W(SPH_T64,  1, +,  2, +,  9, -, 11, -, 14)
+#define Wb5    MAKE_W(SPH_T64,  3, -,  2, +, 10, -, 12, +, 15)
+#define Wb6    MAKE_W(SPH_T64,  4, -,  0, -,  3, -, 11, +, 13)
+#define Wb7    MAKE_W(SPH_T64,  1, -,  4, -,  5, -, 12, -, 14)
+#define Wb8    MAKE_W(SPH_T64,  2, -,  5, -,  6, +, 13, -, 15)
+#define Wb9    MAKE_W(SPH_T64,  0, -,  3, +,  6, -,  7, +, 14)
+#define Wb10   MAKE_W(SPH_T64,  8, -,  1, -,  4, -,  7, +, 15)
+#define Wb11   MAKE_W(SPH_T64,  8, -,  0, -,  2, -,  5, +,  9)
+#define Wb12   MAKE_W(SPH_T64,  1, +,  3, -,  6, -,  9, +, 10)
+#define Wb13   MAKE_W(SPH_T64,  2, +,  4, +,  7, +, 10, +, 11)
+#define Wb14   MAKE_W(SPH_T64,  3, -,  5, +,  8, -, 11, -, 12)
+#define Wb15   MAKE_W(SPH_T64, 12, -,  4, -,  6, -,  9, +, 13)
+
+#if SPH_SMALL_FOOTPRINT_BMW
+
+#define MAKE_Qab   do { \
+		unsigned u; \
+		sph_u64 Wb[16]; \
+		Wb[ 0] = Wb0; \
+		Wb[ 1] = Wb1; \
+		Wb[ 2] = Wb2; \
+		Wb[ 3] = Wb3; \
+		Wb[ 4] = Wb4; \
+		Wb[ 5] = Wb5; \
+		Wb[ 6] = Wb6; \
+		Wb[ 7] = Wb7; \
+		Wb[ 8] = Wb8; \
+		Wb[ 9] = Wb9; \
+		Wb[10] = Wb10; \
+		Wb[11] = Wb11; \
+		Wb[12] = Wb12; \
+		Wb[13] = Wb13; \
+		Wb[14] = Wb14; \
+		Wb[15] = Wb15; \
+		for (u = 0; u < 15; u += 5) { \
+			qt[u + 0] = SPH_T64(sb0(Wb[u + 0]) + H(u + 1)); \
+			qt[u + 1] = SPH_T64(sb1(Wb[u + 1]) + H(u + 2)); \
+			qt[u + 2] = SPH_T64(sb2(Wb[u + 2]) + H(u + 3)); \
+			qt[u + 3] = SPH_T64(sb3(Wb[u + 3]) + H(u + 4)); \
+			qt[u + 4] = SPH_T64(sb4(Wb[u + 4]) + H(u + 5)); \
+		} \
+		qt[15] = SPH_T64(sb0(Wb[15]) + H(0)); \
+	} while (0)
+
+#define MAKE_Qbb   do { \
+		unsigned u; \
+		for (u = 16; u < 18; u ++) \
+			qt[u] = expand1b(Qb, M, H, u); \
+		for (u = 18; u < 32; u ++) \
+			qt[u] = expand2b(Qb, M, H, u); \
+	} while (0)
+
+#else
+
+#define MAKE_Qab   do { \
+		qt[ 0] = SPH_T64(sb0(Wb0 ) + H( 1)); \
+		qt[ 1] = SPH_T64(sb1(Wb1 ) + H( 2)); \
+		qt[ 2] = SPH_T64(sb2(Wb2 ) + H( 3)); \
+		qt[ 3] = SPH_T64(sb3(Wb3 ) + H( 4)); \
+		qt[ 4] = SPH_T64(sb4(Wb4 ) + H( 5)); \
+		qt[ 5] = SPH_T64(sb0(Wb5 ) + H( 6)); \
+		qt[ 6] = SPH_T64(sb1(Wb6 ) + H( 7)); \
+		qt[ 7] = SPH_T64(sb2(Wb7 ) + H( 8)); \
+		qt[ 8] = SPH_T64(sb3(Wb8 ) + H( 9)); \
+		qt[ 9] = SPH_T64(sb4(Wb9 ) + H(10)); \
+		qt[10] = SPH_T64(sb0(Wb10) + H(11)); \
+		qt[11] = SPH_T64(sb1(Wb11) + H(12)); \
+		qt[12] = SPH_T64(sb2(Wb12) + H(13)); \
+		qt[13] = SPH_T64(sb3(Wb13) + H(14)); \
+		qt[14] = SPH_T64(sb4(Wb14) + H(15)); \
+		qt[15] = SPH_T64(sb0(Wb15) + H( 0)); \
+	} while (0)
+
+#define MAKE_Qbb   do { \
+		qt[16] = expand1b(Qb, M, H, 16); \
+		qt[17] = expand1b(Qb, M, H, 17); \
+		qt[18] = expand2b(Qb, M, H, 18); \
+		qt[19] = expand2b(Qb, M, H, 19); \
+		qt[20] = expand2b(Qb, M, H, 20); \
+		qt[21] = expand2b(Qb, M, H, 21); \
+		qt[22] = expand2b(Qb, M, H, 22); \
+		qt[23] = expand2b(Qb, M, H, 23); \
+		qt[24] = expand2b(Qb, M, H, 24); \
+		qt[25] = expand2b(Qb, M, H, 25); \
+		qt[26] = expand2b(Qb, M, H, 26); \
+		qt[27] = expand2b(Qb, M, H, 27); \
+		qt[28] = expand2b(Qb, M, H, 28); \
+		qt[29] = expand2b(Qb, M, H, 29); \
+		qt[30] = expand2b(Qb, M, H, 30); \
+		qt[31] = expand2b(Qb, M, H, 31); \
+	} while (0)
+
+#endif
+
+#define MAKE_Qb   do { \
+		MAKE_Qab; \
+		MAKE_Qbb; \
+	} while (0)
+
+#define Qb(j)   (qt[j])
+
+#endif
+
+#define FOLD(type, mkQ, tt, rol, mf, qf, dhf)   do { \
+		type qt[32], xl, xh; \
+		mkQ; \
+		xl = qf(16) ^ qf(17) ^ qf(18) ^ qf(19) \
+			^ qf(20) ^ qf(21) ^ qf(22) ^ qf(23); \
+		xh = xl ^ qf(24) ^ qf(25) ^ qf(26) ^ qf(27) \
+			^ qf(28) ^ qf(29) ^ qf(30) ^ qf(31); \
+		dhf( 0) = tt(((xh <<  5) ^ (qf(16) >>  5) ^ mf( 0)) \
+			+ (xl ^ qf(24) ^ qf( 0))); \
+		dhf( 1) = tt(((xh >>  7) ^ (qf(17) <<  8) ^ mf( 1)) \
+			+ (xl ^ qf(25) ^ qf( 1))); \
+		dhf( 2) = tt(((xh >>  5) ^ (qf(18) <<  5) ^ mf( 2)) \
+			+ (xl ^ qf(26) ^ qf( 2))); \
+		dhf( 3) = tt(((xh >>  1) ^ (qf(19) <<  5) ^ mf( 3)) \
+			+ (xl ^ qf(27) ^ qf( 3))); \
+		dhf( 4) = tt(((xh >>  3) ^ (qf(20) <<  0) ^ mf( 4)) \
+			+ (xl ^ qf(28) ^ qf( 4))); \
+		dhf( 5) = tt(((xh <<  6) ^ (qf(21) >>  6) ^ mf( 5)) \
+			+ (xl ^ qf(29) ^ qf( 5))); \
+		dhf( 6) = tt(((xh >>  4) ^ (qf(22) <<  6) ^ mf( 6)) \
+			+ (xl ^ qf(30) ^ qf( 6))); \
+		dhf( 7) = tt(((xh >> 11) ^ (qf(23) <<  2) ^ mf( 7)) \
+			+ (xl ^ qf(31) ^ qf( 7))); \
+		dhf( 8) = tt(rol(dhf(4),  9) + (xh ^ qf(24) ^ mf( 8)) \
+			+ ((xl << 8) ^ qf(23) ^ qf( 8))); \
+		dhf( 9) = tt(rol(dhf(5), 10) + (xh ^ qf(25) ^ mf( 9)) \
+			+ ((xl >> 6) ^ qf(16) ^ qf( 9))); \
+		dhf(10) = tt(rol(dhf(6), 11) + (xh ^ qf(26) ^ mf(10)) \
+			+ ((xl << 6) ^ qf(17) ^ qf(10))); \
+		dhf(11) = tt(rol(dhf(7), 12) + (xh ^ qf(27) ^ mf(11)) \
+			+ ((xl << 4) ^ qf(18) ^ qf(11))); \
+		dhf(12) = tt(rol(dhf(0), 13) + (xh ^ qf(28) ^ mf(12)) \
+			+ ((xl >> 3) ^ qf(19) ^ qf(12))); \
+		dhf(13) = tt(rol(dhf(1), 14) + (xh ^ qf(29) ^ mf(13)) \
+			+ ((xl >> 4) ^ qf(20) ^ qf(13))); \
+		dhf(14) = tt(rol(dhf(2), 15) + (xh ^ qf(30) ^ mf(14)) \
+			+ ((xl >> 7) ^ qf(21) ^ qf(14))); \
+		dhf(15) = tt(rol(dhf(3), 16) + (xh ^ qf(31) ^ mf(15)) \
+			+ ((xl >> 2) ^ qf(22) ^ qf(15))); \
+	} while (0)
+
+
+#if SPH_64
+
+#define FOLDb   FOLD(sph_u64, MAKE_Qb, SPH_T64, SPH_ROTL64, M, Qb, dH)
+
+#endif
+
+#if !defined(__AVX2__)
+
+#define FOLDs   FOLD(sph_u32, MAKE_Qs, SPH_T32, SPH_ROTL32, M, Qs, dH)
+
+static void
+compress_small(const unsigned char *data, const sph_u32 h[16], sph_u32 dh[16])
+{
+#if SPH_LITTLE_FAST
+#define M(x)    sph_dec32le_aligned(data + 4 * (x))
+#else
+	sph_u32 mv[16];
+
+	mv[ 0] = sph_dec32le_aligned(data +  0);
+	mv[ 1] = sph_dec32le_aligned(data +  4);
+	mv[ 2] = sph_dec32le_aligned(data +  8);
+	mv[ 3] = sph_dec32le_aligned(data + 12);
+	mv[ 4] = sph_dec32le_aligned(data + 16);
+	mv[ 5] = sph_dec32le_aligned(data + 20);
+	mv[ 6] = sph_dec32le_aligned(data + 24);
+	mv[ 7] = sph_dec32le_aligned(data + 28);
+	mv[ 8] = sph_dec32le_aligned(data + 32);
+	mv[ 9] = sph_dec32le_aligned(data + 36);
+	mv[10] = sph_dec32le_aligned(data + 40);
+	mv[11] = sph_dec32le_aligned(data + 44);
+	mv[12] = sph_dec32le_aligned(data + 48);
+	mv[13] = sph_dec32le_aligned(data + 52);
+	mv[14] = sph_dec32le_aligned(data + 56);
+	mv[15] = sph_dec32le_aligned(data + 60);
+#define M(x)    (mv[x])
+#endif
+#define H(x)    (h[x])
+#define dH(x)   (dh[x])
+
+	FOLDs;
+
+#undef M
+#undef H
+#undef dH
+}
+
+static const sph_u32 final_s[16] = {
+	SPH_C32(0xaaaaaaa0), SPH_C32(0xaaaaaaa1), SPH_C32(0xaaaaaaa2),
+	SPH_C32(0xaaaaaaa3), SPH_C32(0xaaaaaaa4), SPH_C32(0xaaaaaaa5),
+	SPH_C32(0xaaaaaaa6), SPH_C32(0xaaaaaaa7), SPH_C32(0xaaaaaaa8),
+	SPH_C32(0xaaaaaaa9), SPH_C32(0xaaaaaaaa), SPH_C32(0xaaaaaaab),
+	SPH_C32(0xaaaaaaac), SPH_C32(0xaaaaaaad), SPH_C32(0xaaaaaaae),
+	SPH_C32(0xaaaaaaaf)
+};
+
+static void
+bmw32_init(sph_bmw_small_context *sc, const sph_u32 *iv)
+{
+	memcpy(sc->H, iv, sizeof sc->H);
+	sc->ptr = 0;
+#if SPH_64
+	sc->bit_count = 0;
+#else
+	sc->bit_count_high = 0;
+	sc->bit_count_low = 0;
+#endif
+}
+
+static void
+bmw32(sph_bmw_small_context *sc, const void *data, size_t len)
+{
+	unsigned char *buf;
+	size_t ptr;
+	sph_u32 htmp[16];
+	sph_u32 *h1, *h2;
+#if !SPH_64
+	sph_u32 tmp;
+#endif
+
+#if SPH_64
+	sc->bit_count += (sph_u64)len << 3;
+#else
+	tmp = sc->bit_count_low;
+	sc->bit_count_low = SPH_T32(tmp + ((sph_u32)len << 3));
+	if (sc->bit_count_low < tmp)
+		sc->bit_count_high ++;
+	sc->bit_count_high += len >> 29;
+#endif
+	buf = sc->buf;
+	ptr = sc->ptr;
+	h1 = sc->H;
+	h2 = htmp;
+	while (len > 0) {
+		size_t clen;
+
+		clen = (sizeof sc->buf) - ptr;
+		if (clen > len)
+			clen = len;
+		memcpy(buf + ptr, data, clen);
+		data = (const unsigned char *)data + clen;
+		len -= clen;
+		ptr += clen;
+		if (ptr == sizeof sc->buf) {
+			sph_u32 *ht;
+
+			compress_small(buf, h1, h2);
+			ht = h1;
+			h1 = h2;
+			h2 = ht;
+			ptr = 0;
+		}
+	}
+	sc->ptr = ptr;
+	if (h1 != sc->H)
+		memcpy(sc->H, h1, sizeof sc->H);
+}
+
+static void
+bmw32_close(sph_bmw_small_context *sc, unsigned ub, unsigned n,
+	void *dst, size_t out_size_w32)
+{
+	unsigned char *buf, *out;
+	size_t ptr, u, v;
+	unsigned z;
+	sph_u32 h1[16], h2[16], *h;
+
+	buf = sc->buf;
+	ptr = sc->ptr;
+	z = 0x80 >> n;
+	buf[ptr ++] = ((ub & -z) | z) & 0xFF;
+	h = sc->H;
+	if (ptr > (sizeof sc->buf) - 8) {
+		memset(buf + ptr, 0, (sizeof sc->buf) - ptr);
+		compress_small(buf, h, h1);
+		ptr = 0;
+		h = h1;
+	}
+	memset(buf + ptr, 0, (sizeof sc->buf) - 8 - ptr);
+#if SPH_64
+	sph_enc64le_aligned(buf + (sizeof sc->buf) - 8,
+		SPH_T64(sc->bit_count + n));
+#else
+	sph_enc32le_aligned(buf + (sizeof sc->buf) - 8,
+		sc->bit_count_low + n);
+	sph_enc32le_aligned(buf + (sizeof sc->buf) - 4,
+		SPH_T32(sc->bit_count_high));
+#endif
+	compress_small(buf, h, h2);
+	for (u = 0; u < 16; u ++)
+		sph_enc32le_aligned(buf + 4 * u, h2[u]);
+	compress_small(buf, final_s, h1);
+	out = dst;
+	for (u = 0, v = 16 - out_size_w32; u < out_size_w32; u ++, v ++)
+		sph_enc32le(out + 4 * u, h1[v]);
+}
+
+#endif // !AVX2
+
+#if SPH_64
+
+static void
+compress_big(const unsigned char *data, const sph_u64 h[16], sph_u64 dh[16])
+{
+#if SPH_LITTLE_FAST
+#define M(x)    sph_dec64le_aligned(data + 8 * (x))
+#else
+	sph_u64 mv[16];
+
+	mv[ 0] = sph_dec64le_aligned(data +   0);
+	mv[ 1] = sph_dec64le_aligned(data +   8);
+	mv[ 2] = sph_dec64le_aligned(data +  16);
+	mv[ 3] = sph_dec64le_aligned(data +  24);
+	mv[ 4] = sph_dec64le_aligned(data +  32);
+	mv[ 5] = sph_dec64le_aligned(data +  40);
+	mv[ 6] = sph_dec64le_aligned(data +  48);
+	mv[ 7] = sph_dec64le_aligned(data +  56);
+	mv[ 8] = sph_dec64le_aligned(data +  64);
+	mv[ 9] = sph_dec64le_aligned(data +  72);
+	mv[10] = sph_dec64le_aligned(data +  80);
+	mv[11] = sph_dec64le_aligned(data +  88);
+	mv[12] = sph_dec64le_aligned(data +  96);
+	mv[13] = sph_dec64le_aligned(data + 104);
+	mv[14] = sph_dec64le_aligned(data + 112);
+	mv[15] = sph_dec64le_aligned(data + 120);
+#define M(x)    (mv[x])
+#endif
+#define H(x)    (h[x])
+#define dH(x)   (dh[x])
+
+	FOLDb;
+
+#undef M
+#undef H
+#undef dH
+}
+
+static const sph_u64 final_b[16] = {
+	SPH_C64(0xaaaaaaaaaaaaaaa0), SPH_C64(0xaaaaaaaaaaaaaaa1),
+	SPH_C64(0xaaaaaaaaaaaaaaa2), SPH_C64(0xaaaaaaaaaaaaaaa3),
+	SPH_C64(0xaaaaaaaaaaaaaaa4), SPH_C64(0xaaaaaaaaaaaaaaa5),
+	SPH_C64(0xaaaaaaaaaaaaaaa6), SPH_C64(0xaaaaaaaaaaaaaaa7),
+	SPH_C64(0xaaaaaaaaaaaaaaa8), SPH_C64(0xaaaaaaaaaaaaaaa9),
+	SPH_C64(0xaaaaaaaaaaaaaaaa), SPH_C64(0xaaaaaaaaaaaaaaab),
+	SPH_C64(0xaaaaaaaaaaaaaaac), SPH_C64(0xaaaaaaaaaaaaaaad),
+	SPH_C64(0xaaaaaaaaaaaaaaae), SPH_C64(0xaaaaaaaaaaaaaaaf)
+};
+
+static void
+bmw64_init(sph_bmw_big_context *sc, const sph_u64 *iv)
+{
+	memcpy(sc->H, iv, sizeof sc->H);
+	sc->ptr = 0;
+	sc->bit_count = 0;
+}
+
+static void
+bmw64(sph_bmw_big_context *sc, const void *data, size_t len)
+{
+	unsigned char *buf;
+	size_t ptr;
+	sph_u64 htmp[16];
+	sph_u64 *h1, *h2;
+
+	sc->bit_count += (sph_u64)len << 3;
+	buf = sc->buf;
+	ptr = sc->ptr;
+	h1 = sc->H;
+	h2 = htmp;
+	while (len > 0) {
+		size_t clen;
+
+		clen = (sizeof sc->buf) - ptr;
+		if (clen > len)
+			clen = len;
+		memcpy(buf + ptr, data, clen);
+		data = (const unsigned char *)data + clen;
+		len -= clen;
+		ptr += clen;
+		if (ptr == sizeof sc->buf) {
+			sph_u64 *ht;
+
+			compress_big(buf, h1, h2);
+			ht = h1;
+			h1 = h2;
+			h2 = ht;
+			ptr = 0;
+		}
+	}
+	sc->ptr = ptr;
+	if (h1 != sc->H)
+		memcpy(sc->H, h1, sizeof sc->H);
+}
+
+static void
+bmw64_close(sph_bmw_big_context *sc, unsigned ub, unsigned n,
+	void *dst, size_t out_size_w64)
+{
+	unsigned char *buf, *out;
+	size_t ptr, u, v;
+	unsigned z;
+	sph_u64 h1[16], h2[16], *h;
+
+	buf = sc->buf;
+	ptr = sc->ptr;
+	z = 0x80 >> n;
+	buf[ptr ++] = ((ub & -z) | z) & 0xFF;
+	h = sc->H;
+	if (ptr > (sizeof sc->buf) - 8) {
+		memset(buf + ptr, 0, (sizeof sc->buf) - ptr);
+		compress_big(buf, h, h1);
+		ptr = 0;
+		h = h1;
+	}
+	memset(buf + ptr, 0, (sizeof sc->buf) - 8 - ptr);
+	sph_enc64le_aligned(buf + (sizeof sc->buf) - 8,
+		SPH_T64(sc->bit_count + n));
+	compress_big(buf, h, h2);
+	for (u = 0; u < 16; u ++)
+		sph_enc64le_aligned(buf + 8 * u, h2[u]);
+	compress_big(buf, final_b, h1);
+	out = dst;
+	for (u = 0, v = 16 - out_size_w64; u < out_size_w64; u ++, v ++)
+		sph_enc64le(out + 8 * u, h1[v]);
+}
+
+#endif
+
+#if !defined(__AVX2__)
+
+/* see sph_bmw.h */
+void
+sph_bmw224_init(void *cc)
+{
+	bmw32_init(cc, IV224);
+}
+
+/* see sph_bmw.h */
+void
+sph_bmw224(void *cc, const void *data, size_t len)
+{
+	bmw32(cc, data, len);
+}
+
+/* see sph_bmw.h */
+void
+sph_bmw224_close(void *cc, void *dst)
+{
+	sph_bmw224_addbits_and_close(cc, 0, 0, dst);
+}
+
+/* see sph_bmw.h */
+void
+sph_bmw224_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
+{
+	bmw32_close(cc, ub, n, dst, 7);
+//	sph_bmw224_init(cc);
+}
+
+/* see sph_bmw.h */
+void
+sph_bmw256_init(void *cc)
+{
+	bmw32_init(cc, IV256);
+}
+
+/* see sph_bmw.h */
+void
+sph_bmw256(void *cc, const void *data, size_t len)
+{
+	bmw32(cc, data, len);
+}
+
+/* see sph_bmw.h */
+void
+sph_bmw256_close(void *cc, void *dst)
+{
+	sph_bmw256_addbits_and_close(cc, 0, 0, dst);
+}
+
+/* see sph_bmw.h */
+void
+sph_bmw256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
+{
+	bmw32_close(cc, ub, n, dst, 8);
+//	sph_bmw256_init(cc);
+}
+
+#endif // !AVX2
+
+#if SPH_64
+
+/* see sph_bmw.h */
+void
+sph_bmw384_init(void *cc)
+{
+	bmw64_init(cc, IV384);
+}
+
+/* see sph_bmw.h */
+void
+sph_bmw384(void *cc, const void *data, size_t len)
+{
+	bmw64(cc, data, len);
+}
+
+/* see sph_bmw.h */
+void
+sph_bmw384_close(void *cc, void *dst)
+{
+	sph_bmw384_addbits_and_close(cc, 0, 0, dst);
+}
+
+/* see sph_bmw.h */
+void
+sph_bmw384_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
+{
+	bmw64_close(cc, ub, n, dst, 6);
+//	sph_bmw384_init(cc);
+}
+
+/* see sph_bmw.h */
+void
+sph_bmw512_init(void *cc)
+{
+	bmw64_init(cc, IV512);
+}
+
+/* see sph_bmw.h */
+void
+sph_bmw512(void *cc, const void *data, size_t len)
+{
+	bmw64(cc, data, len);
+}
+
+/* see sph_bmw.h */
+void
+sph_bmw512_close(void *cc, void *dst)
+{
+	sph_bmw512_addbits_and_close(cc, 0, 0, dst);
+}
+
+/* see sph_bmw.h */
+void
+sph_bmw512_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
+{
+	bmw64_close(cc, ub, n, dst, 8);
+//	sph_bmw512_init(cc);
+}
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif
--- a/src/crypto/ghostrider/sph_bmw.h
+++ b/src/crypto/ghostrider/sph_bmw.h
@@ -0,0 +1,337 @@
+/* $Id: sph_bmw.h 216 2010-06-08 09:46:57Z tp $ */
+/**
+ * BMW interface. BMW (aka "Blue Midnight Wish") is a family of
+ * functions which differ by their output size; this implementation
+ * defines BMW for output sizes 224, 256, 384 and 512 bits.
+ *
+ * ==========================(LICENSE BEGIN)============================
+ *
+ * Copyright (c) 2007-2010  Projet RNRT SAPHIR
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * ===========================(LICENSE END)=============================
+ *
+ * @file     sph_bmw.h
+ * @author   Thomas Pornin <thomas.pornin@cryptolog.com>
+ */
+
+#ifndef SPH_BMW_H__
+#define SPH_BMW_H__
+
+#ifdef __cplusplus
+extern "C"{
+#endif
+
+#include <stddef.h>
+#include "sph_types.h"
+
+/**
+ * Output size (in bits) for BMW-224.
+ */
+#define SPH_SIZE_bmw224   224
+
+/**
+ * Output size (in bits) for BMW-256.
+ */
+#define SPH_SIZE_bmw256   256
+
+#if SPH_64
+
+/**
+ * Output size (in bits) for BMW-384.
+ */
+#define SPH_SIZE_bmw384   384
+
+/**
+ * Output size (in bits) for BMW-512.
+ */
+#define SPH_SIZE_bmw512   512
+
+#endif
+
+/**
+ * This structure is a context for BMW-224 and BMW-256 computations:
+ * it contains the intermediate values and some data from the last
+ * entered block. Once a BMW computation has been performed, the
+ * context can be reused for another computation.
+ *
+ * The contents of this structure are private. A running BMW
+ * computation can be cloned by copying the context (e.g. with a simple
+ * <code>memcpy()</code>).
+ */
+
+#if !defined(__AVX2__)
+
+typedef struct {
+#ifndef DOXYGEN_IGNORE
+	unsigned char buf[64];    /* first field, for alignment */
+	size_t ptr;
+	sph_u32 H[16];
+#if SPH_64
+	sph_u64 bit_count;
+#else
+	sph_u32 bit_count_high, bit_count_low;
+#endif
+#endif
+} sph_bmw_small_context;
+
+/**
+ * This structure is a context for BMW-224 computations. It is
+ * identical to the common <code>sph_bmw_small_context</code>.
+ */
+typedef sph_bmw_small_context sph_bmw224_context;
+
+/**
+ * This structure is a context for BMW-256 computations. It is
+ * identical to the common <code>sph_bmw_small_context</code>.
+ */
+typedef sph_bmw_small_context sph_bmw256_context;
+
+#endif // !AVX2
+
+#if SPH_64
+
+/**
+ * This structure is a context for BMW-384 and BMW-512 computations:
+ * it contains the intermediate values and some data from the last
+ * entered block. Once a BMW computation has been performed, the
+ * context can be reused for another computation.
+ *
+ * The contents of this structure are private. A running BMW
+ * computation can be cloned by copying the context (e.g. with a simple
+ * <code>memcpy()</code>).
+ */
+typedef struct {
+#ifndef DOXYGEN_IGNORE
+	unsigned char buf[128];    /* first field, for alignment */
+	size_t ptr;
+	sph_u64 H[16];
+	sph_u64 bit_count;
+#endif
+} sph_bmw_big_context;
+
+/**
+ * This structure is a context for BMW-384 computations. It is
+ * identical to the common <code>sph_bmw_small_context</code>.
+ */
+typedef sph_bmw_big_context sph_bmw384_context;
+
+/**
+ * This structure is a context for BMW-512 computations. It is
+ * identical to the common <code>sph_bmw_small_context</code>.
+ */
+typedef sph_bmw_big_context sph_bmw512_context;
+
+#endif
+
+#if !defined(__AVX2__)
+
+/**
+ * Initialize a BMW-224 context. This process performs no memory allocation.
+ *
+ * @param cc   the BMW-224 context (pointer to a
+ *             <code>sph_bmw224_context</code>)
+ */
+void sph_bmw224_init(void *cc);
+
+/**
+ * Process some data bytes. It is acceptable that <code>len</code> is zero
+ * (in which case this function does nothing).
+ *
+ * @param cc     the BMW-224 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_bmw224(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current BMW-224 computation and output the result into
+ * the provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (28 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the BMW-224 context
+ * @param dst   the destination buffer
+ */
+void sph_bmw224_close(void *cc, void *dst);
+
+/**
+ * Add a few additional bits (0 to 7) to the current computation, then
+ * terminate it and output the result in the provided buffer, which must
+ * be wide enough to accomodate the result (28 bytes). If bit number i
+ * in <code>ub</code> has value 2^i, then the extra bits are those
+ * numbered 7 downto 8-n (this is the big-endian convention at the byte
+ * level). The context is automatically reinitialized.
+ *
+ * @param cc    the BMW-224 context
+ * @param ub    the extra bits
+ * @param n     the number of extra bits (0 to 7)
+ * @param dst   the destination buffer
+ */
+void sph_bmw224_addbits_and_close(
+	void *cc, unsigned ub, unsigned n, void *dst);
+
+/**
+ * Initialize a BMW-256 context. This process performs no memory allocation.
+ *
+ * @param cc   the BMW-256 context (pointer to a
+ *             <code>sph_bmw256_context</code>)
+ */
+void sph_bmw256_init(void *cc);
+
+/**
+ * Process some data bytes. It is acceptable that <code>len</code> is zero
+ * (in which case this function does nothing).
+ *
+ * @param cc     the BMW-256 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_bmw256(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current BMW-256 computation and output the result into
+ * the provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (32 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the BMW-256 context
+ * @param dst   the destination buffer
+ */
+void sph_bmw256_close(void *cc, void *dst);
+
+/**
+ * Add a few additional bits (0 to 7) to the current computation, then
+ * terminate it and output the result in the provided buffer, which must
+ * be wide enough to accomodate the result (32 bytes). If bit number i
+ * in <code>ub</code> has value 2^i, then the extra bits are those
+ * numbered 7 downto 8-n (this is the big-endian convention at the byte
+ * level). The context is automatically reinitialized.
+ *
+ * @param cc    the BMW-256 context
+ * @param ub    the extra bits
+ * @param n     the number of extra bits (0 to 7)
+ * @param dst   the destination buffer
+ */
+void sph_bmw256_addbits_and_close(
+	void *cc, unsigned ub, unsigned n, void *dst);
+
+#endif // !AVX2
+
+#if SPH_64
+
+/**
+ * Initialize a BMW-384 context. This process performs no memory allocation.
+ *
+ * @param cc   the BMW-384 context (pointer to a
+ *             <code>sph_bmw384_context</code>)
+ */
+void sph_bmw384_init(void *cc);
+
+/**
+ * Process some data bytes. It is acceptable that <code>len</code> is zero
+ * (in which case this function does nothing).
+ *
+ * @param cc     the BMW-384 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_bmw384(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current BMW-384 computation and output the result into
+ * the provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (48 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the BMW-384 context
+ * @param dst   the destination buffer
+ */
+void sph_bmw384_close(void *cc, void *dst);
+
+/**
+ * Add a few additional bits (0 to 7) to the current computation, then
+ * terminate it and output the result in the provided buffer, which must
+ * be wide enough to accomodate the result (48 bytes). If bit number i
+ * in <code>ub</code> has value 2^i, then the extra bits are those
+ * numbered 7 downto 8-n (this is the big-endian convention at the byte
+ * level). The context is automatically reinitialized.
+ *
+ * @param cc    the BMW-384 context
+ * @param ub    the extra bits
+ * @param n     the number of extra bits (0 to 7)
+ * @param dst   the destination buffer
+ */
+void sph_bmw384_addbits_and_close(
+	void *cc, unsigned ub, unsigned n, void *dst);
+
+/**
+ * Initialize a BMW-512 context. This process performs no memory allocation.
+ *
+ * @param cc   the BMW-512 context (pointer to a
+ *             <code>sph_bmw512_context</code>)
+ */
+void sph_bmw512_init(void *cc);
+
+/**
+ * Process some data bytes. It is acceptable that <code>len</code> is zero
+ * (in which case this function does nothing).
+ *
+ * @param cc     the BMW-512 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_bmw512(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current BMW-512 computation and output the result into
+ * the provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (64 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the BMW-512 context
+ * @param dst   the destination buffer
+ */
+void sph_bmw512_close(void *cc, void *dst);
+
+/**
+ * Add a few additional bits (0 to 7) to the current computation, then
+ * terminate it and output the result in the provided buffer, which must
+ * be wide enough to accomodate the result (64 bytes). If bit number i
+ * in <code>ub</code> has value 2^i, then the extra bits are those
+ * numbered 7 downto 8-n (this is the big-endian convention at the byte
+ * level). The context is automatically reinitialized.
+ *
+ * @param cc    the BMW-512 context
+ * @param ub    the extra bits
+ * @param n     the number of extra bits (0 to 7)
+ * @param dst   the destination buffer
+ */
+void sph_bmw512_addbits_and_close(
+	void *cc, unsigned ub, unsigned n, void *dst);
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
--- a/src/crypto/ghostrider/sph_cubehash.c
+++ b/src/crypto/ghostrider/sph_cubehash.c
@@ -0,0 +1,723 @@
+/* $Id: cubehash.c 227 2010-06-16 17:28:38Z tp $ */
+/*
+ * CubeHash implementation.
+ *
+ * ==========================(LICENSE BEGIN)============================
+ *
+ * Copyright (c) 2007-2010  Projet RNRT SAPHIR
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * ===========================(LICENSE END)=============================
+ *
+ * @author   Thomas Pornin <thomas.pornin@cryptolog.com>
+ */
+
+#include <stddef.h>
+#include <string.h>
+#include <limits.h>
+
+#include "sph_cubehash.h"
+#ifdef __cplusplus
+extern "C"{
+#endif
+
+#if SPH_SMALL_FOOTPRINT && !defined SPH_SMALL_FOOTPRINT_CUBEHASH
+#define SPH_SMALL_FOOTPRINT_CUBEHASH   1
+#endif
+
+/*
+ * Some tests were conducted on an Intel Core2 Q6600 (32-bit and 64-bit
+ * mode), a PowerPC G3, and a MIPS-compatible CPU (Broadcom BCM3302).
+ * It appears that the optimal settings are:
+ *  -- full unroll, no state copy on the "big" systems (x86, PowerPC)
+ *  -- unroll to 4 or 8, state copy on the "small" system (MIPS)
+ */
+
+#if SPH_SMALL_FOOTPRINT_CUBEHASH
+
+#if !defined SPH_CUBEHASH_UNROLL
+#define SPH_CUBEHASH_UNROLL   4
+#endif
+#if !defined SPH_CUBEHASH_NOCOPY
+#define SPH_CUBEHASH_NOCOPY   1
+#endif
+
+#else
+
+#if !defined SPH_CUBEHASH_UNROLL
+#define SPH_CUBEHASH_UNROLL   0
+#endif
+#if !defined SPH_CUBEHASH_NOCOPY
+#define SPH_CUBEHASH_NOCOPY   0
+#endif
+
+#endif
+
+#ifdef _MSC_VER
+#pragma warning (disable: 4146)
+#endif
+
+static const sph_u32 IV224[] = {
+	SPH_C32(0xB0FC8217), SPH_C32(0x1BEE1A90), SPH_C32(0x829E1A22),
+	SPH_C32(0x6362C342), SPH_C32(0x24D91C30), SPH_C32(0x03A7AA24),
+	SPH_C32(0xA63721C8), SPH_C32(0x85B0E2EF), SPH_C32(0xF35D13F3),
+	SPH_C32(0x41DA807D), SPH_C32(0x21A70CA6), SPH_C32(0x1F4E9774),
+	SPH_C32(0xB3E1C932), SPH_C32(0xEB0A79A8), SPH_C32(0xCDDAAA66),
+	SPH_C32(0xE2F6ECAA), SPH_C32(0x0A713362), SPH_C32(0xAA3080E0),
+	SPH_C32(0xD8F23A32), SPH_C32(0xCEF15E28), SPH_C32(0xDB086314),
+	SPH_C32(0x7F709DF7), SPH_C32(0xACD228A4), SPH_C32(0x704D6ECE),
+	SPH_C32(0xAA3EC95F), SPH_C32(0xE387C214), SPH_C32(0x3A6445FF),
+	SPH_C32(0x9CAB81C3), SPH_C32(0xC73D4B98), SPH_C32(0xD277AEBE),
+	SPH_C32(0xFD20151C), SPH_C32(0x00CB573E)
+};
+
+static const sph_u32 IV256[] = {
+	SPH_C32(0xEA2BD4B4), SPH_C32(0xCCD6F29F), SPH_C32(0x63117E71),
+	SPH_C32(0x35481EAE), SPH_C32(0x22512D5B), SPH_C32(0xE5D94E63),
+	SPH_C32(0x7E624131), SPH_C32(0xF4CC12BE), SPH_C32(0xC2D0B696),
+	SPH_C32(0x42AF2070), SPH_C32(0xD0720C35), SPH_C32(0x3361DA8C),
+	SPH_C32(0x28CCECA4), SPH_C32(0x8EF8AD83), SPH_C32(0x4680AC00),
+	SPH_C32(0x40E5FBAB), SPH_C32(0xD89041C3), SPH_C32(0x6107FBD5),
+	SPH_C32(0x6C859D41), SPH_C32(0xF0B26679), SPH_C32(0x09392549),
+	SPH_C32(0x5FA25603), SPH_C32(0x65C892FD), SPH_C32(0x93CB6285),
+	SPH_C32(0x2AF2B5AE), SPH_C32(0x9E4B4E60), SPH_C32(0x774ABFDD),
+	SPH_C32(0x85254725), SPH_C32(0x15815AEB), SPH_C32(0x4AB6AAD6),
+	SPH_C32(0x9CDAF8AF), SPH_C32(0xD6032C0A)
+};
+
+static const sph_u32 IV384[] = {
+	SPH_C32(0xE623087E), SPH_C32(0x04C00C87), SPH_C32(0x5EF46453),
+	SPH_C32(0x69524B13), SPH_C32(0x1A05C7A9), SPH_C32(0x3528DF88),
+	SPH_C32(0x6BDD01B5), SPH_C32(0x5057B792), SPH_C32(0x6AA7A922),
+	SPH_C32(0x649C7EEE), SPH_C32(0xF426309F), SPH_C32(0xCB629052),
+	SPH_C32(0xFC8E20ED), SPH_C32(0xB3482BAB), SPH_C32(0xF89E5E7E),
+	SPH_C32(0xD83D4DE4), SPH_C32(0x44BFC10D), SPH_C32(0x5FC1E63D),
+	SPH_C32(0x2104E6CB), SPH_C32(0x17958F7F), SPH_C32(0xDBEAEF70),
+	SPH_C32(0xB4B97E1E), SPH_C32(0x32C195F6), SPH_C32(0x6184A8E4),
+	SPH_C32(0x796C2543), SPH_C32(0x23DE176D), SPH_C32(0xD33BBAEC),
+	SPH_C32(0x0C12E5D2), SPH_C32(0x4EB95A7B), SPH_C32(0x2D18BA01),
+	SPH_C32(0x04EE475F), SPH_C32(0x1FC5F22E)
+};
+
+static const sph_u32 IV512[] = {
+	SPH_C32(0x2AEA2A61), SPH_C32(0x50F494D4), SPH_C32(0x2D538B8B),
+	SPH_C32(0x4167D83E), SPH_C32(0x3FEE2313), SPH_C32(0xC701CF8C),
+	SPH_C32(0xCC39968E), SPH_C32(0x50AC5695), SPH_C32(0x4D42C787),
+	SPH_C32(0xA647A8B3), SPH_C32(0x97CF0BEF), SPH_C32(0x825B4537),
+	SPH_C32(0xEEF864D2), SPH_C32(0xF22090C4), SPH_C32(0xD0E5CD33),
+	SPH_C32(0xA23911AE), SPH_C32(0xFCD398D9), SPH_C32(0x148FE485),
+	SPH_C32(0x1B017BEF), SPH_C32(0xB6444532), SPH_C32(0x6A536159),
+	SPH_C32(0x2FF5781C), SPH_C32(0x91FA7934), SPH_C32(0x0DBADEA9),
+	SPH_C32(0xD65C8A2B), SPH_C32(0xA5A70E75), SPH_C32(0xB1C62456),
+	SPH_C32(0xBC796576), SPH_C32(0x1921C8F7), SPH_C32(0xE7989AF1),
+	SPH_C32(0x7795D246), SPH_C32(0xD43E3B44)
+};
+
+#define T32      SPH_T32
+#define ROTL32   SPH_ROTL32
+
+#if SPH_CUBEHASH_NOCOPY
+
+#define DECL_STATE
+#define READ_STATE(cc)
+#define WRITE_STATE(cc)
+
+#define x0   ((sc)->state[ 0])
+#define x1   ((sc)->state[ 1])
+#define x2   ((sc)->state[ 2])
+#define x3   ((sc)->state[ 3])
+#define x4   ((sc)->state[ 4])
+#define x5   ((sc)->state[ 5])
+#define x6   ((sc)->state[ 6])
+#define x7   ((sc)->state[ 7])
+#define x8   ((sc)->state[ 8])
+#define x9   ((sc)->state[ 9])
+#define xa   ((sc)->state[10])
+#define xb   ((sc)->state[11])
+#define xc   ((sc)->state[12])
+#define xd   ((sc)->state[13])
+#define xe   ((sc)->state[14])
+#define xf   ((sc)->state[15])
+#define xg   ((sc)->state[16])
+#define xh   ((sc)->state[17])
+#define xi   ((sc)->state[18])
+#define xj   ((sc)->state[19])
+#define xk   ((sc)->state[20])
+#define xl   ((sc)->state[21])
+#define xm   ((sc)->state[22])
+#define xn   ((sc)->state[23])
+#define xo   ((sc)->state[24])
+#define xp   ((sc)->state[25])
+#define xq   ((sc)->state[26])
+#define xr   ((sc)->state[27])
+#define xs   ((sc)->state[28])
+#define xt   ((sc)->state[29])
+#define xu   ((sc)->state[30])
+#define xv   ((sc)->state[31])
+
+#else
+
+#define DECL_STATE \
+	sph_u32 x0, x1, x2, x3, x4, x5, x6, x7; \
+	sph_u32 x8, x9, xa, xb, xc, xd, xe, xf; \
+	sph_u32 xg, xh, xi, xj, xk, xl, xm, xn; \
+	sph_u32 xo, xp, xq, xr, xs, xt, xu, xv;
+
+#define READ_STATE(cc)   do { \
+		x0 = (cc)->state[ 0]; \
+		x1 = (cc)->state[ 1]; \
+		x2 = (cc)->state[ 2]; \
+		x3 = (cc)->state[ 3]; \
+		x4 = (cc)->state[ 4]; \
+		x5 = (cc)->state[ 5]; \
+		x6 = (cc)->state[ 6]; \
+		x7 = (cc)->state[ 7]; \
+		x8 = (cc)->state[ 8]; \
+		x9 = (cc)->state[ 9]; \
+		xa = (cc)->state[10]; \
+		xb = (cc)->state[11]; \
+		xc = (cc)->state[12]; \
+		xd = (cc)->state[13]; \
+		xe = (cc)->state[14]; \
+		xf = (cc)->state[15]; \
+		xg = (cc)->state[16]; \
+		xh = (cc)->state[17]; \
+		xi = (cc)->state[18]; \
+		xj = (cc)->state[19]; \
+		xk = (cc)->state[20]; \
+		xl = (cc)->state[21]; \
+		xm = (cc)->state[22]; \
+		xn = (cc)->state[23]; \
+		xo = (cc)->state[24]; \
+		xp = (cc)->state[25]; \
+		xq = (cc)->state[26]; \
+		xr = (cc)->state[27]; \
+		xs = (cc)->state[28]; \
+		xt = (cc)->state[29]; \
+		xu = (cc)->state[30]; \
+		xv = (cc)->state[31]; \
+	} while (0)
+
+#define WRITE_STATE(cc)   do { \
+		(cc)->state[ 0] = x0; \
+		(cc)->state[ 1] = x1; \
+		(cc)->state[ 2] = x2; \
+		(cc)->state[ 3] = x3; \
+		(cc)->state[ 4] = x4; \
+		(cc)->state[ 5] = x5; \
+		(cc)->state[ 6] = x6; \
+		(cc)->state[ 7] = x7; \
+		(cc)->state[ 8] = x8; \
+		(cc)->state[ 9] = x9; \
+		(cc)->state[10] = xa; \
+		(cc)->state[11] = xb; \
+		(cc)->state[12] = xc; \
+		(cc)->state[13] = xd; \
+		(cc)->state[14] = xe; \
+		(cc)->state[15] = xf; \
+		(cc)->state[16] = xg; \
+		(cc)->state[17] = xh; \
+		(cc)->state[18] = xi; \
+		(cc)->state[19] = xj; \
+		(cc)->state[20] = xk; \
+		(cc)->state[21] = xl; \
+		(cc)->state[22] = xm; \
+		(cc)->state[23] = xn; \
+		(cc)->state[24] = xo; \
+		(cc)->state[25] = xp; \
+		(cc)->state[26] = xq; \
+		(cc)->state[27] = xr; \
+		(cc)->state[28] = xs; \
+		(cc)->state[29] = xt; \
+		(cc)->state[30] = xu; \
+		(cc)->state[31] = xv; \
+	} while (0)
+
+#endif
+
+#define INPUT_BLOCK   do { \
+		x0 ^= sph_dec32le_aligned(buf +  0); \
+		x1 ^= sph_dec32le_aligned(buf +  4); \
+		x2 ^= sph_dec32le_aligned(buf +  8); \
+		x3 ^= sph_dec32le_aligned(buf + 12); \
+		x4 ^= sph_dec32le_aligned(buf + 16); \
+		x5 ^= sph_dec32le_aligned(buf + 20); \
+		x6 ^= sph_dec32le_aligned(buf + 24); \
+		x7 ^= sph_dec32le_aligned(buf + 28); \
+	} while (0)
+
+#define ROUND_EVEN   do { \
+		xg = T32(x0 + xg); \
+		x0 = ROTL32(x0, 7); \
+		xh = T32(x1 + xh); \
+		x1 = ROTL32(x1, 7); \
+		xi = T32(x2 + xi); \
+		x2 = ROTL32(x2, 7); \
+		xj = T32(x3 + xj); \
+		x3 = ROTL32(x3, 7); \
+		xk = T32(x4 + xk); \
+		x4 = ROTL32(x4, 7); \
+		xl = T32(x5 + xl); \
+		x5 = ROTL32(x5, 7); \
+		xm = T32(x6 + xm); \
+		x6 = ROTL32(x6, 7); \
+		xn = T32(x7 + xn); \
+		x7 = ROTL32(x7, 7); \
+		xo = T32(x8 + xo); \
+		x8 = ROTL32(x8, 7); \
+		xp = T32(x9 + xp); \
+		x9 = ROTL32(x9, 7); \
+		xq = T32(xa + xq); \
+		xa = ROTL32(xa, 7); \
+		xr = T32(xb + xr); \
+		xb = ROTL32(xb, 7); \
+		xs = T32(xc + xs); \
+		xc = ROTL32(xc, 7); \
+		xt = T32(xd + xt); \
+		xd = ROTL32(xd, 7); \
+		xu = T32(xe + xu); \
+		xe = ROTL32(xe, 7); \
+		xv = T32(xf + xv); \
+		xf = ROTL32(xf, 7); \
+		x8 ^= xg; \
+		x9 ^= xh; \
+		xa ^= xi; \
+		xb ^= xj; \
+		xc ^= xk; \
+		xd ^= xl; \
+		xe ^= xm; \
+		xf ^= xn; \
+		x0 ^= xo; \
+		x1 ^= xp; \
+		x2 ^= xq; \
+		x3 ^= xr; \
+		x4 ^= xs; \
+		x5 ^= xt; \
+		x6 ^= xu; \
+		x7 ^= xv; \
+		xi = T32(x8 + xi); \
+		x8 = ROTL32(x8, 11); \
+		xj = T32(x9 + xj); \
+		x9 = ROTL32(x9, 11); \
+		xg = T32(xa + xg); \
+		xa = ROTL32(xa, 11); \
+		xh = T32(xb + xh); \
+		xb = ROTL32(xb, 11); \
+		xm = T32(xc + xm); \
+		xc = ROTL32(xc, 11); \
+		xn = T32(xd + xn); \
+		xd = ROTL32(xd, 11); \
+		xk = T32(xe + xk); \
+		xe = ROTL32(xe, 11); \
+		xl = T32(xf + xl); \
+		xf = ROTL32(xf, 11); \
+		xq = T32(x0 + xq); \
+		x0 = ROTL32(x0, 11); \
+		xr = T32(x1 + xr); \
+		x1 = ROTL32(x1, 11); \
+		xo = T32(x2 + xo); \
+		x2 = ROTL32(x2, 11); \
+		xp = T32(x3 + xp); \
+		x3 = ROTL32(x3, 11); \
+		xu = T32(x4 + xu); \
+		x4 = ROTL32(x4, 11); \
+		xv = T32(x5 + xv); \
+		x5 = ROTL32(x5, 11); \
+		xs = T32(x6 + xs); \
+		x6 = ROTL32(x6, 11); \
+		xt = T32(x7 + xt); \
+		x7 = ROTL32(x7, 11); \
+		xc ^= xi; \
+		xd ^= xj; \
+		xe ^= xg; \
+		xf ^= xh; \
+		x8 ^= xm; \
+		x9 ^= xn; \
+		xa ^= xk; \
+		xb ^= xl; \
+		x4 ^= xq; \
+		x5 ^= xr; \
+		x6 ^= xo; \
+		x7 ^= xp; \
+		x0 ^= xu; \
+		x1 ^= xv; \
+		x2 ^= xs; \
+		x3 ^= xt; \
+	} while (0)
+
+#define ROUND_ODD   do { \
+		xj = T32(xc + xj); \
+		xc = ROTL32(xc, 7); \
+		xi = T32(xd + xi); \
+		xd = ROTL32(xd, 7); \
+		xh = T32(xe + xh); \
+		xe = ROTL32(xe, 7); \
+		xg = T32(xf + xg); \
+		xf = ROTL32(xf, 7); \
+		xn = T32(x8 + xn); \
+		x8 = ROTL32(x8, 7); \
+		xm = T32(x9 + xm); \
+		x9 = ROTL32(x9, 7); \
+		xl = T32(xa + xl); \
+		xa = ROTL32(xa, 7); \
+		xk = T32(xb + xk); \
+		xb = ROTL32(xb, 7); \
+		xr = T32(x4 + xr); \
+		x4 = ROTL32(x4, 7); \
+		xq = T32(x5 + xq); \
+		x5 = ROTL32(x5, 7); \
+		xp = T32(x6 + xp); \
+		x6 = ROTL32(x6, 7); \
+		xo = T32(x7 + xo); \
+		x7 = ROTL32(x7, 7); \
+		xv = T32(x0 + xv); \
+		x0 = ROTL32(x0, 7); \
+		xu = T32(x1 + xu); \
+		x1 = ROTL32(x1, 7); \
+		xt = T32(x2 + xt); \
+		x2 = ROTL32(x2, 7); \
+		xs = T32(x3 + xs); \
+		x3 = ROTL32(x3, 7); \
+		x4 ^= xj; \
+		x5 ^= xi; \
+		x6 ^= xh; \
+		x7 ^= xg; \
+		x0 ^= xn; \
+		x1 ^= xm; \
+		x2 ^= xl; \
+		x3 ^= xk; \
+		xc ^= xr; \
+		xd ^= xq; \
+		xe ^= xp; \
+		xf ^= xo; \
+		x8 ^= xv; \
+		x9 ^= xu; \
+		xa ^= xt; \
+		xb ^= xs; \
+		xh = T32(x4 + xh); \
+		x4 = ROTL32(x4, 11); \
+		xg = T32(x5 + xg); \
+		x5 = ROTL32(x5, 11); \
+		xj = T32(x6 + xj); \
+		x6 = ROTL32(x6, 11); \
+		xi = T32(x7 + xi); \
+		x7 = ROTL32(x7, 11); \
+		xl = T32(x0 + xl); \
+		x0 = ROTL32(x0, 11); \
+		xk = T32(x1 + xk); \
+		x1 = ROTL32(x1, 11); \
+		xn = T32(x2 + xn); \
+		x2 = ROTL32(x2, 11); \
+		xm = T32(x3 + xm); \
+		x3 = ROTL32(x3, 11); \
+		xp = T32(xc + xp); \
+		xc = ROTL32(xc, 11); \
+		xo = T32(xd + xo); \
+		xd = ROTL32(xd, 11); \
+		xr = T32(xe + xr); \
+		xe = ROTL32(xe, 11); \
+		xq = T32(xf + xq); \
+		xf = ROTL32(xf, 11); \
+		xt = T32(x8 + xt); \
+		x8 = ROTL32(x8, 11); \
+		xs = T32(x9 + xs); \
+		x9 = ROTL32(x9, 11); \
+		xv = T32(xa + xv); \
+		xa = ROTL32(xa, 11); \
+		xu = T32(xb + xu); \
+		xb = ROTL32(xb, 11); \
+		x0 ^= xh; \
+		x1 ^= xg; \
+		x2 ^= xj; \
+		x3 ^= xi; \
+		x4 ^= xl; \
+		x5 ^= xk; \
+		x6 ^= xn; \
+		x7 ^= xm; \
+		x8 ^= xp; \
+		x9 ^= xo; \
+		xa ^= xr; \
+		xb ^= xq; \
+		xc ^= xt; \
+		xd ^= xs; \
+		xe ^= xv; \
+		xf ^= xu; \
+	} while (0)
+
+/*
+ * There is no need to unroll all 16 rounds. The word-swapping permutation
+ * is an involution, so we need to unroll an even number of rounds. On
+ * "big" systems, unrolling 4 rounds yields about 97% of the speed
+ * achieved with full unrolling; and it keeps the code more compact
+ * for small architectures.
+ */
+
+#if SPH_CUBEHASH_UNROLL == 2
+
+#define SIXTEEN_ROUNDS   do { \
+		int j; \
+		for (j = 0; j < 8; j ++) { \
+			ROUND_EVEN; \
+			ROUND_ODD; \
+		} \
+	} while (0)
+
+#elif SPH_CUBEHASH_UNROLL == 4
+
+#define SIXTEEN_ROUNDS   do { \
+		int j; \
+		for (j = 0; j < 4; j ++) { \
+			ROUND_EVEN; \
+			ROUND_ODD; \
+			ROUND_EVEN; \
+			ROUND_ODD; \
+		} \
+	} while (0)
+
+#elif SPH_CUBEHASH_UNROLL == 8
+
+#define SIXTEEN_ROUNDS   do { \
+		int j; \
+		for (j = 0; j < 2; j ++) { \
+			ROUND_EVEN; \
+			ROUND_ODD; \
+			ROUND_EVEN; \
+			ROUND_ODD; \
+			ROUND_EVEN; \
+			ROUND_ODD; \
+			ROUND_EVEN; \
+			ROUND_ODD; \
+		} \
+	} while (0)
+
+#else
+
+#define SIXTEEN_ROUNDS   do { \
+		ROUND_EVEN; \
+		ROUND_ODD; \
+		ROUND_EVEN; \
+		ROUND_ODD; \
+		ROUND_EVEN; \
+		ROUND_ODD; \
+		ROUND_EVEN; \
+		ROUND_ODD; \
+		ROUND_EVEN; \
+		ROUND_ODD; \
+		ROUND_EVEN; \
+		ROUND_ODD; \
+		ROUND_EVEN; \
+		ROUND_ODD; \
+		ROUND_EVEN; \
+		ROUND_ODD; \
+	} while (0)
+
+#endif
+
+static void
+cubehash_init(sph_cubehash_context *sc, const sph_u32 *iv)
+{
+	memcpy(sc->state, iv, sizeof sc->state);
+	sc->ptr = 0;
+}
+
+static void
+cubehash_core(sph_cubehash_context *sc, const void *data, size_t len)
+{
+	unsigned char *buf;
+	size_t ptr;
+	DECL_STATE
+
+	buf = sc->buf;
+	ptr = sc->ptr;
+	if (len < (sizeof sc->buf) - ptr) {
+		memcpy(buf + ptr, data, len);
+		ptr += len;
+		sc->ptr = ptr;
+		return;
+	}
+
+	READ_STATE(sc);
+	while (len > 0) {
+		size_t clen;
+
+		clen = (sizeof sc->buf) - ptr;
+		if (clen > len)
+			clen = len;
+		memcpy(buf + ptr, data, clen);
+		ptr += clen;
+		data = (const unsigned char *)data + clen;
+		len -= clen;
+		if (ptr == sizeof sc->buf) {
+			INPUT_BLOCK;
+			SIXTEEN_ROUNDS;
+			ptr = 0;
+		}
+	}
+	WRITE_STATE(sc);
+	sc->ptr = ptr;
+}
+
+static void
+cubehash_close(sph_cubehash_context *sc, unsigned ub, unsigned n,
+	void *dst, size_t out_size_w32)
+{
+	unsigned char *buf, *out;
+	size_t ptr;
+	unsigned z;
+	int i;
+	DECL_STATE
+
+	buf = sc->buf;
+	ptr = sc->ptr;
+	z = 0x80 >> n;
+	buf[ptr ++] = ((ub & -z) | z) & 0xFF;
+	memset(buf + ptr, 0, (sizeof sc->buf) - ptr);
+	READ_STATE(sc);
+	INPUT_BLOCK;
+	for (i = 0; i < 11; i ++) {
+		SIXTEEN_ROUNDS;
+		if (i == 0)
+			xv ^= SPH_C32(1);
+	}
+	WRITE_STATE(sc);
+	out = dst;
+	for (z = 0; z < out_size_w32; z ++)
+		sph_enc32le(out + (z << 2), sc->state[z]);
+}
+
+/* see sph_cubehash.h */
+void
+sph_cubehash224_init(void *cc)
+{
+	cubehash_init(cc, IV224);
+}
+
+/* see sph_cubehash.h */
+void
+sph_cubehash224(void *cc, const void *data, size_t len)
+{
+	cubehash_core(cc, data, len);
+}
+
+/* see sph_cubehash.h */
+void
+sph_cubehash224_close(void *cc, void *dst)
+{
+	sph_cubehash224_addbits_and_close(cc, 0, 0, dst);
+}
+
+/* see sph_cubehash.h */
+void
+sph_cubehash224_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
+{
+	cubehash_close(cc, ub, n, dst, 7);
+	sph_cubehash224_init(cc);
+}
+
+/* see sph_cubehash.h */
+void
+sph_cubehash256_init(void *cc)
+{
+	cubehash_init(cc, IV256);
+}
+
+/* see sph_cubehash.h */
+void
+sph_cubehash256(void *cc, const void *data, size_t len)
+{
+	cubehash_core(cc, data, len);
+}
+
+/* see sph_cubehash.h */
+void
+sph_cubehash256_close(void *cc, void *dst)
+{
+	sph_cubehash256_addbits_and_close(cc, 0, 0, dst);
+}
+
+/* see sph_cubehash.h */
+void
+sph_cubehash256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
+{
+	cubehash_close(cc, ub, n, dst, 8);
+	sph_cubehash256_init(cc);
+}
+
+/* see sph_cubehash.h */
+void
+sph_cubehash384_init(void *cc)
+{
+	cubehash_init(cc, IV384);
+}
+
+/* see sph_cubehash.h */
+void
+sph_cubehash384(void *cc, const void *data, size_t len)
+{
+	cubehash_core(cc, data, len);
+}
+
+/* see sph_cubehash.h */
+void
+sph_cubehash384_close(void *cc, void *dst)
+{
+	sph_cubehash384_addbits_and_close(cc, 0, 0, dst);
+}
+
+/* see sph_cubehash.h */
+void
+sph_cubehash384_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
+{
+	cubehash_close(cc, ub, n, dst, 12);
+	sph_cubehash384_init(cc);
+}
+
+/* see sph_cubehash.h */
+void
+sph_cubehash512_init(void *cc)
+{
+	cubehash_init(cc, IV512);
+}
+
+/* see sph_cubehash.h */
+void
+sph_cubehash512(void *cc, const void *data, size_t len)
+{
+	cubehash_core(cc, data, len);
+}
+
+/* see sph_cubehash.h */
+void
+sph_cubehash512_close(void *cc, void *dst)
+{
+	sph_cubehash512_addbits_and_close(cc, 0, 0, dst);
+}
+
+/* see sph_cubehash.h */
+void
+sph_cubehash512_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
+{
+	cubehash_close(cc, ub, n, dst, 16);
+	sph_cubehash512_init(cc);
+}
+#ifdef __cplusplus
+}
+#endif
--- a/src/crypto/ghostrider/sph_cubehash.h
+++ b/src/crypto/ghostrider/sph_cubehash.h
@@ -0,0 +1,292 @@
+/* $Id: sph_cubehash.h 180 2010-05-08 02:29:25Z tp $ */
+/**
+ * CubeHash interface. CubeHash is a family of functions which differ by
+ * their output size; this implementation defines CubeHash for output
+ * sizes 224, 256, 384 and 512 bits, with the "standard parameters"
+ * (CubeHash16/32 with the CubeHash specification notations).
+ *
+ * ==========================(LICENSE BEGIN)============================
+ *
+ * Copyright (c) 2007-2010  Projet RNRT SAPHIR
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * ===========================(LICENSE END)=============================
+ *
+ * @file     sph_cubehash.h
+ * @author   Thomas Pornin <thomas.pornin@cryptolog.com>
+ */
+
+#ifndef SPH_CUBEHASH_H__
+#define SPH_CUBEHASH_H__
+
+#ifdef __cplusplus
+extern "C"{
+#endif
+
+#include <stddef.h>
+#include "sph_types.h"
+
+/**
+ * Output size (in bits) for CubeHash-224.
+ */
+#define SPH_SIZE_cubehash224   224
+
+/**
+ * Output size (in bits) for CubeHash-256.
+ */
+#define SPH_SIZE_cubehash256   256
+
+/**
+ * Output size (in bits) for CubeHash-384.
+ */
+#define SPH_SIZE_cubehash384   384
+
+/**
+ * Output size (in bits) for CubeHash-512.
+ */
+#define SPH_SIZE_cubehash512   512
+
+/**
+ * This structure is a context for CubeHash computations: it contains the
+ * intermediate values and some data from the last entered block. Once
+ * a CubeHash computation has been performed, the context can be reused for
+ * another computation.
+ *
+ * The contents of this structure are private. A running CubeHash computation
+ * can be cloned by copying the context (e.g. with a simple
+ * <code>memcpy()</code>).
+ */
+typedef struct {
+#ifndef DOXYGEN_IGNORE
+	unsigned char buf[32];    /* first field, for alignment */
+	size_t ptr;
+	sph_u32 state[32];
+#endif
+} sph_cubehash_context;
+
+/**
+ * Type for a CubeHash-224 context (identical to the common context).
+ */
+typedef sph_cubehash_context sph_cubehash224_context;
+
+/**
+ * Type for a CubeHash-256 context (identical to the common context).
+ */
+typedef sph_cubehash_context sph_cubehash256_context;
+
+/**
+ * Type for a CubeHash-384 context (identical to the common context).
+ */
+typedef sph_cubehash_context sph_cubehash384_context;
+
+/**
+ * Type for a CubeHash-512 context (identical to the common context).
+ */
+typedef sph_cubehash_context sph_cubehash512_context;
+
+/**
+ * Initialize a CubeHash-224 context. This process performs no memory
+ * allocation.
+ *
+ * @param cc   the CubeHash-224 context (pointer to a
+ *             <code>sph_cubehash224_context</code>)
+ */
+void sph_cubehash224_init(void *cc);
+
+/**
+ * Process some data bytes. It is acceptable that <code>len</code> is zero
+ * (in which case this function does nothing).
+ *
+ * @param cc     the CubeHash-224 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_cubehash224(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current CubeHash-224 computation and output the result into
+ * the provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (28 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the CubeHash-224 context
+ * @param dst   the destination buffer
+ */
+void sph_cubehash224_close(void *cc, void *dst);
+
+/**
+ * Add a few additional bits (0 to 7) to the current computation, then
+ * terminate it and output the result in the provided buffer, which must
+ * be wide enough to accomodate the result (28 bytes). If bit number i
+ * in <code>ub</code> has value 2^i, then the extra bits are those
+ * numbered 7 downto 8-n (this is the big-endian convention at the byte
+ * level). The context is automatically reinitialized.
+ *
+ * @param cc    the CubeHash-224 context
+ * @param ub    the extra bits
+ * @param n     the number of extra bits (0 to 7)
+ * @param dst   the destination buffer
+ */
+void sph_cubehash224_addbits_and_close(
+	void *cc, unsigned ub, unsigned n, void *dst);
+
+/**
+ * Initialize a CubeHash-256 context. This process performs no memory
+ * allocation.
+ *
+ * @param cc   the CubeHash-256 context (pointer to a
+ *             <code>sph_cubehash256_context</code>)
+ */
+void sph_cubehash256_init(void *cc);
+
+/**
+ * Process some data bytes. It is acceptable that <code>len</code> is zero
+ * (in which case this function does nothing).
+ *
+ * @param cc     the CubeHash-256 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_cubehash256(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current CubeHash-256 computation and output the result into
+ * the provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (32 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the CubeHash-256 context
+ * @param dst   the destination buffer
+ */
+void sph_cubehash256_close(void *cc, void *dst);
+
+/**
+ * Add a few additional bits (0 to 7) to the current computation, then
+ * terminate it and output the result in the provided buffer, which must
+ * be wide enough to accomodate the result (32 bytes). If bit number i
+ * in <code>ub</code> has value 2^i, then the extra bits are those
+ * numbered 7 downto 8-n (this is the big-endian convention at the byte
+ * level). The context is automatically reinitialized.
+ *
+ * @param cc    the CubeHash-256 context
+ * @param ub    the extra bits
+ * @param n     the number of extra bits (0 to 7)
+ * @param dst   the destination buffer
+ */
+void sph_cubehash256_addbits_and_close(
+	void *cc, unsigned ub, unsigned n, void *dst);
+
+/**
+ * Initialize a CubeHash-384 context. This process performs no memory
+ * allocation.
+ *
+ * @param cc   the CubeHash-384 context (pointer to a
+ *             <code>sph_cubehash384_context</code>)
+ */
+void sph_cubehash384_init(void *cc);
+
+/**
+ * Process some data bytes. It is acceptable that <code>len</code> is zero
+ * (in which case this function does nothing).
+ *
+ * @param cc     the CubeHash-384 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_cubehash384(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current CubeHash-384 computation and output the result into
+ * the provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (48 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the CubeHash-384 context
+ * @param dst   the destination buffer
+ */
+void sph_cubehash384_close(void *cc, void *dst);
+
+/**
+ * Add a few additional bits (0 to 7) to the current computation, then
+ * terminate it and output the result in the provided buffer, which must
+ * be wide enough to accomodate the result (48 bytes). If bit number i
+ * in <code>ub</code> has value 2^i, then the extra bits are those
+ * numbered 7 downto 8-n (this is the big-endian convention at the byte
+ * level). The context is automatically reinitialized.
+ *
+ * @param cc    the CubeHash-384 context
+ * @param ub    the extra bits
+ * @param n     the number of extra bits (0 to 7)
+ * @param dst   the destination buffer
+ */
+void sph_cubehash384_addbits_and_close(
+	void *cc, unsigned ub, unsigned n, void *dst);
+
+/**
+ * Initialize a CubeHash-512 context. This process performs no memory
+ * allocation.
+ *
+ * @param cc   the CubeHash-512 context (pointer to a
+ *             <code>sph_cubehash512_context</code>)
+ */
+void sph_cubehash512_init(void *cc);
+
+/**
+ * Process some data bytes. It is acceptable that <code>len</code> is zero
+ * (in which case this function does nothing).
+ *
+ * @param cc     the CubeHash-512 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_cubehash512(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current CubeHash-512 computation and output the result into
+ * the provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (64 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the CubeHash-512 context
+ * @param dst   the destination buffer
+ */
+void sph_cubehash512_close(void *cc, void *dst);
+
+/**
+ * Add a few additional bits (0 to 7) to the current computation, then
+ * terminate it and output the result in the provided buffer, which must
+ * be wide enough to accomodate the result (64 bytes). If bit number i
+ * in <code>ub</code> has value 2^i, then the extra bits are those
+ * numbered 7 downto 8-n (this is the big-endian convention at the byte
+ * level). The context is automatically reinitialized.
+ *
+ * @param cc    the CubeHash-512 context
+ * @param ub    the extra bits
+ * @param n     the number of extra bits (0 to 7)
+ * @param dst   the destination buffer
+ */
+void sph_cubehash512_addbits_and_close(
+	void *cc, unsigned ub, unsigned n, void *dst);
+#ifdef __cplusplus
+}
+#endif
+
+#endif
--- a/src/crypto/ghostrider/sph_echo.c
+++ b/src/crypto/ghostrider/sph_echo.c
--- a/src/crypto/ghostrider/sph_echo.h
+++ b/src/crypto/ghostrider/sph_echo.h
@@ -0,0 +1,319 @@
+/* $Id: sph_echo.h 216 2010-06-08 09:46:57Z tp $ */
+/**
+ * ECHO interface. ECHO is a family of functions which differ by
+ * their output size; this implementation defines ECHO for output
+ * sizes 224, 256, 384 and 512 bits.
+ *
+ * ==========================(LICENSE BEGIN)============================
+ *
+ * Copyright (c) 2007-2010  Projet RNRT SAPHIR
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * ===========================(LICENSE END)=============================
+ *
+ * @file     sph_echo.h
+ * @author   Thomas Pornin <thomas.pornin@cryptolog.com>
+ */
+
+#ifndef SPH_ECHO_H__
+#define SPH_ECHO_H__
+
+#ifdef __cplusplus
+extern "C"{
+#endif
+
+#include <stddef.h>
+#include "sph_types.h"
+
+/**
+ * Output size (in bits) for ECHO-224.
+ */
+#define SPH_SIZE_echo224   224
+
+/**
+ * Output size (in bits) for ECHO-256.
+ */
+#define SPH_SIZE_echo256   256
+
+/**
+ * Output size (in bits) for ECHO-384.
+ */
+#define SPH_SIZE_echo384   384
+
+/**
+ * Output size (in bits) for ECHO-512.
+ */
+#define SPH_SIZE_echo512   512
+
+/**
+ * This structure is a context for ECHO computations: it contains the
+ * intermediate values and some data from the last entered block. Once
+ * an ECHO computation has been performed, the context can be reused for
+ * another computation. This specific structure is used for ECHO-224
+ * and ECHO-256.
+ *
+ * The contents of this structure are private. A running ECHO computation
+ * can be cloned by copying the context (e.g. with a simple
+ * <code>memcpy()</code>).
+ */
+typedef struct {
+#ifndef DOXYGEN_IGNORE
+	unsigned char buf[192];    /* first field, for alignment */
+	size_t ptr;
+	union {
+		sph_u32 Vs[4][4];
+#if SPH_64
+		sph_u64 Vb[4][2];
+#endif
+	} u;
+	sph_u32 C0, C1, C2, C3;
+#endif
+} sph_echo_small_context;
+
+/**
+ * This structure is a context for ECHO computations: it contains the
+ * intermediate values and some data from the last entered block. Once
+ * an ECHO computation has been performed, the context can be reused for
+ * another computation. This specific structure is used for ECHO-384
+ * and ECHO-512.
+ *
+ * The contents of this structure are private. A running ECHO computation
+ * can be cloned by copying the context (e.g. with a simple
+ * <code>memcpy()</code>).
+ */
+typedef struct {
+#ifndef DOXYGEN_IGNORE
+	unsigned char buf[128];    /* first field, for alignment */
+	size_t ptr;
+	union {
+		sph_u32 Vs[8][4];
+#if SPH_64
+		sph_u64 Vb[8][2];
+#endif
+	} u;
+	sph_u32 C0, C1, C2, C3;
+#endif
+} sph_echo_big_context;
+
+/**
+ * Type for a ECHO-224 context (identical to the common "small" context).
+ */
+typedef sph_echo_small_context sph_echo224_context;
+
+/**
+ * Type for a ECHO-256 context (identical to the common "small" context).
+ */
+typedef sph_echo_small_context sph_echo256_context;
+
+/**
+ * Type for a ECHO-384 context (identical to the common "big" context).
+ */
+typedef sph_echo_big_context sph_echo384_context;
+
+/**
+ * Type for a ECHO-512 context (identical to the common "big" context).
+ */
+typedef sph_echo_big_context sph_echo512_context;
+
+/**
+ * Initialize an ECHO-224 context. This process performs no memory allocation.
+ *
+ * @param cc   the ECHO-224 context (pointer to a
+ *             <code>sph_echo224_context</code>)
+ */
+void sph_echo224_init(void *cc);
+
+/**
+ * Process some data bytes. It is acceptable that <code>len</code> is zero
+ * (in which case this function does nothing).
+ *
+ * @param cc     the ECHO-224 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_echo224(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current ECHO-224 computation and output the result into
+ * the provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (28 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the ECHO-224 context
+ * @param dst   the destination buffer
+ */
+void sph_echo224_close(void *cc, void *dst);
+
+/**
+ * Add a few additional bits (0 to 7) to the current computation, then
+ * terminate it and output the result in the provided buffer, which must
+ * be wide enough to accomodate the result (28 bytes). If bit number i
+ * in <code>ub</code> has value 2^i, then the extra bits are those
+ * numbered 7 downto 8-n (this is the big-endian convention at the byte
+ * level). The context is automatically reinitialized.
+ *
+ * @param cc    the ECHO-224 context
+ * @param ub    the extra bits
+ * @param n     the number of extra bits (0 to 7)
+ * @param dst   the destination buffer
+ */
+void sph_echo224_addbits_and_close(
+	void *cc, unsigned ub, unsigned n, void *dst);
+
+/**
+ * Initialize an ECHO-256 context. This process performs no memory allocation.
+ *
+ * @param cc   the ECHO-256 context (pointer to a
+ *             <code>sph_echo256_context</code>)
+ */
+void sph_echo256_init(void *cc);
+
+/**
+ * Process some data bytes. It is acceptable that <code>len</code> is zero
+ * (in which case this function does nothing).
+ *
+ * @param cc     the ECHO-256 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_echo256(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current ECHO-256 computation and output the result into
+ * the provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (32 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the ECHO-256 context
+ * @param dst   the destination buffer
+ */
+void sph_echo256_close(void *cc, void *dst);
+
+/**
+ * Add a few additional bits (0 to 7) to the current computation, then
+ * terminate it and output the result in the provided buffer, which must
+ * be wide enough to accomodate the result (32 bytes). If bit number i
+ * in <code>ub</code> has value 2^i, then the extra bits are those
+ * numbered 7 downto 8-n (this is the big-endian convention at the byte
+ * level). The context is automatically reinitialized.
+ *
+ * @param cc    the ECHO-256 context
+ * @param ub    the extra bits
+ * @param n     the number of extra bits (0 to 7)
+ * @param dst   the destination buffer
+ */
+void sph_echo256_addbits_and_close(
+	void *cc, unsigned ub, unsigned n, void *dst);
+
+/**
+ * Initialize an ECHO-384 context. This process performs no memory allocation.
+ *
+ * @param cc   the ECHO-384 context (pointer to a
+ *             <code>sph_echo384_context</code>)
+ */
+void sph_echo384_init(void *cc);
+
+/**
+ * Process some data bytes. It is acceptable that <code>len</code> is zero
+ * (in which case this function does nothing).
+ *
+ * @param cc     the ECHO-384 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_echo384(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current ECHO-384 computation and output the result into
+ * the provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (48 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the ECHO-384 context
+ * @param dst   the destination buffer
+ */
+void sph_echo384_close(void *cc, void *dst);
+
+/**
+ * Add a few additional bits (0 to 7) to the current computation, then
+ * terminate it and output the result in the provided buffer, which must
+ * be wide enough to accomodate the result (48 bytes). If bit number i
+ * in <code>ub</code> has value 2^i, then the extra bits are those
+ * numbered 7 downto 8-n (this is the big-endian convention at the byte
+ * level). The context is automatically reinitialized.
+ *
+ * @param cc    the ECHO-384 context
+ * @param ub    the extra bits
+ * @param n     the number of extra bits (0 to 7)
+ * @param dst   the destination buffer
+ */
+void sph_echo384_addbits_and_close(
+	void *cc, unsigned ub, unsigned n, void *dst);
+
+/**
+ * Initialize an ECHO-512 context. This process performs no memory allocation.
+ *
+ * @param cc   the ECHO-512 context (pointer to a
+ *             <code>sph_echo512_context</code>)
+ */
+void sph_echo512_init(void *cc);
+
+/**
+ * Process some data bytes. It is acceptable that <code>len</code> is zero
+ * (in which case this function does nothing).
+ *
+ * @param cc     the ECHO-512 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_echo512(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current ECHO-512 computation and output the result into
+ * the provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (64 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the ECHO-512 context
+ * @param dst   the destination buffer
+ */
+void sph_echo512_close(void *cc, void *dst);
+
+/**
+ * Add a few additional bits (0 to 7) to the current computation, then
+ * terminate it and output the result in the provided buffer, which must
+ * be wide enough to accomodate the result (64 bytes). If bit number i
+ * in <code>ub</code> has value 2^i, then the extra bits are those
+ * numbered 7 downto 8-n (this is the big-endian convention at the byte
+ * level). The context is automatically reinitialized.
+ *
+ * @param cc    the ECHO-512 context
+ * @param ub    the extra bits
+ * @param n     the number of extra bits (0 to 7)
+ * @param dst   the destination buffer
+ */
+void sph_echo512_addbits_and_close(
+	void *cc, unsigned ub, unsigned n, void *dst);
+	
+#ifdef __cplusplus
+}
+#endif
+#endif
--- a/src/crypto/ghostrider/sph_fugue.c
+++ b/src/crypto/ghostrider/sph_fugue.c
--- a/src/crypto/ghostrider/sph_fugue.h
+++ b/src/crypto/ghostrider/sph_fugue.h
@@ -0,0 +1,89 @@
+#ifndef SPH_FUGUE_H__
+#define SPH_FUGUE_H__
+
+#include <stddef.h>
+#include "sph_types.h"
+
+#ifdef __cplusplus
+extern "C"{
+#endif
+
+#define SPH_SIZE_fugue224   224
+
+#define SPH_SIZE_fugue256   256
+
+#define SPH_SIZE_fugue384   384
+
+#define SPH_SIZE_fugue512   512
+
+typedef struct {
+#ifndef DOXYGEN_IGNORE
+	sph_u32 partial;
+	unsigned partial_len;
+	unsigned round_shift;
+	sph_u32 S[36];
+#if SPH_64
+	sph_u64 bit_count;
+#else
+	sph_u32 bit_count_high, bit_count_low;
+#endif
+#endif
+} sph_fugue_context;
+
+typedef sph_fugue_context sph_fugue224_context;
+
+typedef sph_fugue_context sph_fugue256_context;
+
+typedef sph_fugue_context sph_fugue384_context;
+
+typedef sph_fugue_context sph_fugue512_context;
+
+void sph_fugue224_init(void *cc);
+
+void sph_fugue224(void *cc, const void *data, size_t len);
+
+void sph_fugue224_close(void *cc, void *dst);
+
+void sph_fugue224_addbits_and_close(
+	void *cc, unsigned ub, unsigned n, void *dst);
+
+void sph_fugue256_init(void *cc);
+
+void sph_fugue256(void *cc, const void *data, size_t len);
+
+void sph_fugue256_close(void *cc, void *dst);
+
+void sph_fugue256_addbits_and_close(
+	void *cc, unsigned ub, unsigned n, void *dst);
+
+void sph_fugue384_init(void *cc);
+
+void sph_fugue384(void *cc, const void *data, size_t len);
+
+void sph_fugue384_close(void *cc, void *dst);
+
+void sph_fugue384_addbits_and_close(
+	void *cc, unsigned ub, unsigned n, void *dst);
+
+void sph_fugue512_init(void *cc);
+
+void sph_fugue512(void *cc, const void *data, size_t len);
+
+void sph_fugue512_close(void *cc, void *dst);
+
+void sph_fugue512_addbits_and_close(
+	void *cc, unsigned ub, unsigned n, void *dst);
+
+#define sph_fugue512_full( cc, dst, data, len ) \
+do{ \
+   sph_fugue512_init( cc ); \
+   sph_fugue512( cc, data, len ); \
+   sph_fugue512_close( cc, dst ); \
+}while(0)
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
--- a/src/crypto/ghostrider/sph_groestl.c
+++ b/src/crypto/ghostrider/sph_groestl.c
--- a/src/crypto/ghostrider/sph_groestl.h
+++ b/src/crypto/ghostrider/sph_groestl.h
@@ -0,0 +1,329 @@
+/* $Id: sph_groestl.h 216 2010-06-08 09:46:57Z tp $ */
+/**
+ * Groestl interface. This code implements Groestl with the recommended
+ * parameters for SHA-3, with outputs of 224, 256, 384 and 512 bits.
+ *
+ * ==========================(LICENSE BEGIN)============================
+ *
+ * Copyright (c) 2007-2010  Projet RNRT SAPHIR
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * ===========================(LICENSE END)=============================
+ *
+ * @file     sph_groestl.h
+ * @author   Thomas Pornin <thomas.pornin@cryptolog.com>
+ */
+
+#ifndef SPH_GROESTL_H__
+#define SPH_GROESTL_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "sph_types.h"
+#include <stddef.h>
+
+/**
+ * Output size (in bits) for Groestl-224.
+ */
+#define SPH_SIZE_groestl224 224
+
+/**
+ * Output size (in bits) for Groestl-256.
+ */
+#define SPH_SIZE_groestl256 256
+
+/**
+ * Output size (in bits) for Groestl-384.
+ */
+#define SPH_SIZE_groestl384 384
+
+/**
+ * Output size (in bits) for Groestl-512.
+ */
+#define SPH_SIZE_groestl512 512
+
+/**
+ * This structure is a context for Groestl-224 and Groestl-256 computations:
+ * it contains the intermediate values and some data from the last
+ * entered block. Once a Groestl computation has been performed, the
+ * context can be reused for another computation.
+ *
+ * The contents of this structure are private. A running Groestl
+ * computation can be cloned by copying the context (e.g. with a simple
+ * <code>memcpy()</code>).
+ */
+typedef struct {
+#ifndef DOXYGEN_IGNORE
+  unsigned char buf[64]; /* first field, for alignment */
+  size_t ptr;
+  union {
+#if SPH_64
+    sph_u64 wide[8];
+#endif
+    sph_u32 narrow[16];
+  } state;
+#if SPH_64
+  sph_u64 count;
+#else
+  sph_u32 count_high, count_low;
+#endif
+#endif
+} sph_groestl_small_context;
+
+/**
+ * This structure is a context for Groestl-224 computations. It is
+ * identical to the common <code>sph_groestl_small_context</code>.
+ */
+typedef sph_groestl_small_context sph_groestl224_context;
+
+/**
+ * This structure is a context for Groestl-256 computations. It is
+ * identical to the common <code>sph_groestl_small_context</code>.
+ */
+typedef sph_groestl_small_context sph_groestl256_context;
+
+/**
+ * This structure is a context for Groestl-384 and Groestl-512 computations:
+ * it contains the intermediate values and some data from the last
+ * entered block. Once a Groestl computation has been performed, the
+ * context can be reused for another computation.
+ *
+ * The contents of this structure are private. A running Groestl
+ * computation can be cloned by copying the context (e.g. with a simple
+ * <code>memcpy()</code>).
+ */
+typedef struct {
+#ifndef DOXYGEN_IGNORE
+  unsigned char buf[128]; /* first field, for alignment */
+  size_t ptr;
+  union {
+#if SPH_64
+    sph_u64 wide[16];
+#endif
+    sph_u32 narrow[32];
+  } state;
+#if SPH_64
+  sph_u64 count;
+#else
+  sph_u32 count_high, count_low;
+#endif
+#endif
+} sph_groestl_big_context;
+
+/**
+ * This structure is a context for Groestl-384 computations. It is
+ * identical to the common <code>sph_groestl_small_context</code>.
+ */
+typedef sph_groestl_big_context sph_groestl384_context;
+
+/**
+ * This structure is a context for Groestl-512 computations. It is
+ * identical to the common <code>sph_groestl_small_context</code>.
+ */
+typedef sph_groestl_big_context sph_groestl512_context;
+
+/**
+ * Initialize a Groestl-224 context. This process performs no memory allocation.
+ *
+ * @param cc   the Groestl-224 context (pointer to a
+ *             <code>sph_groestl224_context</code>)
+ */
+void sph_groestl224_init(void *cc);
+
+/**
+ * Process some data bytes. It is acceptable that <code>len</code> is zero
+ * (in which case this function does nothing).
+ *
+ * @param cc     the Groestl-224 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_groestl224(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current Groestl-224 computation and output the result into
+ * the provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (28 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the Groestl-224 context
+ * @param dst   the destination buffer
+ */
+void sph_groestl224_close(void *cc, void *dst);
+
+/**
+ * Add a few additional bits (0 to 7) to the current computation, then
+ * terminate it and output the result in the provided buffer, which must
+ * be wide enough to accomodate the result (28 bytes). If bit number i
+ * in <code>ub</code> has value 2^i, then the extra bits are those
+ * numbered 7 downto 8-n (this is the big-endian convention at the byte
+ * level). The context is automatically reinitialized.
+ *
+ * @param cc    the Groestl-224 context
+ * @param ub    the extra bits
+ * @param n     the number of extra bits (0 to 7)
+ * @param dst   the destination buffer
+ */
+void sph_groestl224_addbits_and_close(void *cc, unsigned ub, unsigned n,
+                                      void *dst);
+
+/**
+ * Initialize a Groestl-256 context. This process performs no memory allocation.
+ *
+ * @param cc   the Groestl-256 context (pointer to a
+ *             <code>sph_groestl256_context</code>)
+ */
+void sph_groestl256_init(void *cc);
+
+/**
+ * Process some data bytes. It is acceptable that <code>len</code> is zero
+ * (in which case this function does nothing).
+ *
+ * @param cc     the Groestl-256 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_groestl256(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current Groestl-256 computation and output the result into
+ * the provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (32 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the Groestl-256 context
+ * @param dst   the destination buffer
+ */
+void sph_groestl256_close(void *cc, void *dst);
+
+/**
+ * Add a few additional bits (0 to 7) to the current computation, then
+ * terminate it and output the result in the provided buffer, which must
+ * be wide enough to accomodate the result (32 bytes). If bit number i
+ * in <code>ub</code> has value 2^i, then the extra bits are those
+ * numbered 7 downto 8-n (this is the big-endian convention at the byte
+ * level). The context is automatically reinitialized.
+ *
+ * @param cc    the Groestl-256 context
+ * @param ub    the extra bits
+ * @param n     the number of extra bits (0 to 7)
+ * @param dst   the destination buffer
+ */
+void sph_groestl256_addbits_and_close(void *cc, unsigned ub, unsigned n,
+                                      void *dst);
+
+/**
+ * Initialize a Groestl-384 context. This process performs no memory allocation.
+ *
+ * @param cc   the Groestl-384 context (pointer to a
+ *             <code>sph_groestl384_context</code>)
+ */
+void sph_groestl384_init(void *cc);
+
+/**
+ * Process some data bytes. It is acceptable that <code>len</code> is zero
+ * (in which case this function does nothing).
+ *
+ * @param cc     the Groestl-384 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_groestl384(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current Groestl-384 computation and output the result into
+ * the provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (48 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the Groestl-384 context
+ * @param dst   the destination buffer
+ */
+void sph_groestl384_close(void *cc, void *dst);
+
+/**
+ * Add a few additional bits (0 to 7) to the current computation, then
+ * terminate it and output the result in the provided buffer, which must
+ * be wide enough to accomodate the result (48 bytes). If bit number i
+ * in <code>ub</code> has value 2^i, then the extra bits are those
+ * numbered 7 downto 8-n (this is the big-endian convention at the byte
+ * level). The context is automatically reinitialized.
+ *
+ * @param cc    the Groestl-384 context
+ * @param ub    the extra bits
+ * @param n     the number of extra bits (0 to 7)
+ * @param dst   the destination buffer
+ */
+void sph_groestl384_addbits_and_close(void *cc, unsigned ub, unsigned n,
+                                      void *dst);
+
+/**
+ * Initialize a Groestl-512 context. This process performs no memory allocation.
+ *
+ * @param cc   the Groestl-512 context (pointer to a
+ *             <code>sph_groestl512_context</code>)
+ */
+void sph_groestl512_init(void *cc);
+
+/**
+ * Process some data bytes. It is acceptable that <code>len</code> is zero
+ * (in which case this function does nothing).
+ *
+ * @param cc     the Groestl-512 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_groestl512(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current Groestl-512 computation and output the result into
+ * the provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (64 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the Groestl-512 context
+ * @param dst   the destination buffer
+ */
+void sph_groestl512_close(void *cc, void *dst);
+
+/**
+ * Add a few additional bits (0 to 7) to the current computation, then
+ * terminate it and output the result in the provided buffer, which must
+ * be wide enough to accomodate the result (64 bytes). If bit number i
+ * in <code>ub</code> has value 2^i, then the extra bits are those
+ * numbered 7 downto 8-n (this is the big-endian convention at the byte
+ * level). The context is automatically reinitialized.
+ *
+ * @param cc    the Groestl-512 context
+ * @param ub    the extra bits
+ * @param n     the number of extra bits (0 to 7)
+ * @param dst   the destination buffer
+ */
+void sph_groestl512_addbits_and_close(void *cc, unsigned ub, unsigned n,
+                                      void *dst);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
--- a/src/crypto/ghostrider/sph_hamsi.c
+++ b/src/crypto/ghostrider/sph_hamsi.c
@@ -0,0 +1,867 @@
+/* $Id: hamsi.c 251 2010-10-19 14:31:51Z tp $ */
+/*
+ * Hamsi implementation.
+ *
+ * ==========================(LICENSE BEGIN)============================
+ *
+ * Copyright (c) 2007-2010  Projet RNRT SAPHIR
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * ===========================(LICENSE END)=============================
+ *
+ * @author   Thomas Pornin <thomas.pornin@cryptolog.com>
+ */
+
+#include <stddef.h>
+#include <string.h>
+
+#include "sph_hamsi.h"
+
+#ifdef __cplusplus
+extern "C"{
+#endif
+
+#if SPH_SMALL_FOOTPRINT && !defined SPH_SMALL_FOOTPRINT_HAMSI
+#define SPH_SMALL_FOOTPRINT_HAMSI   1
+#endif
+
+/*
+ * The SPH_HAMSI_EXPAND_* define how many input bits we handle in one
+ * table lookup during message expansion (1 to 8, inclusive). If we note
+ * w the number of bits per message word (w=32 for Hamsi-224/256, w=64
+ * for Hamsi-384/512), r the size of a "row" in 32-bit words (r=8 for
+ * Hamsi-224/256, r=16 for Hamsi-384/512), and n the expansion level,
+ * then we will get t tables (where t=ceil(w/n)) of individual size
+ * 2^n*r*4 (in bytes). The last table may be shorter (e.g. with w=32 and
+ * n=5, there are 7 tables, but the last one uses only two bits on
+ * input, not five).
+ *
+ * Also, we read t rows of r words from RAM. Words in a given row are
+ * concatenated in RAM in that order, so most of the cost is about
+ * reading the first row word; comparatively, cache misses are thus
+ * less expensive with Hamsi-512 (r=16) than with Hamsi-256 (r=8).
+ *
+ * When n=1, tables are "special" in that we omit the first entry of
+ * each table (which always contains 0), so that total table size is
+ * halved.
+ *
+ * We thus have the following (size1 is the cumulative table size of
+ * Hamsi-224/256; size2 is for Hamsi-384/512; similarly, t1 and t2
+ * are for Hamsi-224/256 and Hamsi-384/512, respectively).
+ *
+ *   n      size1      size2    t1    t2
+ * ---------------------------------------
+ *   1       1024       4096    32    64
+ *   2       2048       8192    16    32
+ *   3       2688      10880    11    22
+ *   4       4096      16384     8    16
+ *   5       6272      25600     7    13
+ *   6      10368      41984     6    11
+ *   7      16896      73856     5    10
+ *   8      32768     131072     4     8
+ *
+ * So there is a trade-off: a lower n makes the tables fit better in
+ * L1 cache, but increases the number of memory accesses. The optimal
+ * value depends on the amount of available L1 cache and the relative
+ * impact of a cache miss.
+ *
+ * Experimentally, in ideal benchmark conditions (which are not necessarily
+ * realistic with regards to L1 cache contention), it seems that n=8 is
+ * the best value on "big" architectures (those with 32 kB or more of L1
+ * cache), while n=4 is better on "small" architectures. This was tested
+ * on an Intel Core2 Q6600 (both 32-bit and 64-bit mode), a PowerPC G3
+ * (32 kB L1 cache, hence "big"), and a MIPS-compatible Broadcom BCM3302
+ * (8 kB L1 cache).
+ *
+ * Note: with n=1, the 32 tables (actually implemented as one big table)
+ * are read entirely and sequentially, regardless of the input data,
+ * thus avoiding any data-dependent table access pattern.
+ */
+
+#if !defined SPH_HAMSI_EXPAND_SMALL
+#if SPH_SMALL_FOOTPRINT_HAMSI
+#define SPH_HAMSI_EXPAND_SMALL  4
+#else
+#define SPH_HAMSI_EXPAND_SMALL  8
+#endif
+#endif
+
+#if !defined SPH_HAMSI_EXPAND_BIG
+#define SPH_HAMSI_EXPAND_BIG    8
+#endif
+
+#ifdef _MSC_VER
+#pragma warning (disable: 4146)
+#endif
+
+#include "sph_hamsi_helper.c"
+
+static const sph_u32 IV224[] = {
+	SPH_C32(0xc3967a67), SPH_C32(0xc3bc6c20), SPH_C32(0x4bc3bcc3),
+	SPH_C32(0xa7c3bc6b), SPH_C32(0x2c204b61), SPH_C32(0x74686f6c),
+	SPH_C32(0x69656b65), SPH_C32(0x20556e69)
+};
+
+/*
+ * This version is the one used in the Hamsi submission package for
+ * round 2 of the SHA-3 competition; the UTF-8 encoding is wrong and
+ * shall soon be corrected in the official Hamsi specification.
+ *
+static const sph_u32 IV224[] = {
+	SPH_C32(0x3c967a67), SPH_C32(0x3cbc6c20), SPH_C32(0xb4c343c3),
+	SPH_C32(0xa73cbc6b), SPH_C32(0x2c204b61), SPH_C32(0x74686f6c),
+	SPH_C32(0x69656b65), SPH_C32(0x20556e69)
+};
+ */
+
+static const sph_u32 IV256[] = {
+	SPH_C32(0x76657273), SPH_C32(0x69746569), SPH_C32(0x74204c65),
+	SPH_C32(0x7576656e), SPH_C32(0x2c204465), SPH_C32(0x70617274),
+	SPH_C32(0x656d656e), SPH_C32(0x7420456c)
+};
+
+static const sph_u32 IV384[] = {
+	SPH_C32(0x656b7472), SPH_C32(0x6f746563), SPH_C32(0x686e6965),
+	SPH_C32(0x6b2c2043), SPH_C32(0x6f6d7075), SPH_C32(0x74657220),
+	SPH_C32(0x53656375), SPH_C32(0x72697479), SPH_C32(0x20616e64),
+	SPH_C32(0x20496e64), SPH_C32(0x75737472), SPH_C32(0x69616c20),
+	SPH_C32(0x43727970), SPH_C32(0x746f6772), SPH_C32(0x61706879),
+	SPH_C32(0x2c204b61)
+};
+
+static const sph_u32 IV512[] = {
+	SPH_C32(0x73746565), SPH_C32(0x6c706172), SPH_C32(0x6b204172),
+	SPH_C32(0x656e6265), SPH_C32(0x72672031), SPH_C32(0x302c2062),
+	SPH_C32(0x75732032), SPH_C32(0x3434362c), SPH_C32(0x20422d33),
+	SPH_C32(0x30303120), SPH_C32(0x4c657576), SPH_C32(0x656e2d48),
+	SPH_C32(0x65766572), SPH_C32(0x6c65652c), SPH_C32(0x2042656c),
+	SPH_C32(0x6769756d)
+};
+
+static const sph_u32 alpha_n[] = {
+	SPH_C32(0xff00f0f0), SPH_C32(0xccccaaaa), SPH_C32(0xf0f0cccc),
+	SPH_C32(0xff00aaaa), SPH_C32(0xccccaaaa), SPH_C32(0xf0f0ff00),
+	SPH_C32(0xaaaacccc), SPH_C32(0xf0f0ff00), SPH_C32(0xf0f0cccc),
+	SPH_C32(0xaaaaff00), SPH_C32(0xccccff00), SPH_C32(0xaaaaf0f0),
+	SPH_C32(0xaaaaf0f0), SPH_C32(0xff00cccc), SPH_C32(0xccccf0f0),
+	SPH_C32(0xff00aaaa), SPH_C32(0xccccaaaa), SPH_C32(0xff00f0f0),
+	SPH_C32(0xff00aaaa), SPH_C32(0xf0f0cccc), SPH_C32(0xf0f0ff00),
+	SPH_C32(0xccccaaaa), SPH_C32(0xf0f0ff00), SPH_C32(0xaaaacccc),
+	SPH_C32(0xaaaaff00), SPH_C32(0xf0f0cccc), SPH_C32(0xaaaaf0f0),
+	SPH_C32(0xccccff00), SPH_C32(0xff00cccc), SPH_C32(0xaaaaf0f0),
+	SPH_C32(0xff00aaaa), SPH_C32(0xccccf0f0)
+};
+
+static const sph_u32 alpha_f[] = {
+	SPH_C32(0xcaf9639c), SPH_C32(0x0ff0f9c0), SPH_C32(0x639c0ff0),
+	SPH_C32(0xcaf9f9c0), SPH_C32(0x0ff0f9c0), SPH_C32(0x639ccaf9),
+	SPH_C32(0xf9c00ff0), SPH_C32(0x639ccaf9), SPH_C32(0x639c0ff0),
+	SPH_C32(0xf9c0caf9), SPH_C32(0x0ff0caf9), SPH_C32(0xf9c0639c),
+	SPH_C32(0xf9c0639c), SPH_C32(0xcaf90ff0), SPH_C32(0x0ff0639c),
+	SPH_C32(0xcaf9f9c0), SPH_C32(0x0ff0f9c0), SPH_C32(0xcaf9639c),
+	SPH_C32(0xcaf9f9c0), SPH_C32(0x639c0ff0), SPH_C32(0x639ccaf9),
+	SPH_C32(0x0ff0f9c0), SPH_C32(0x639ccaf9), SPH_C32(0xf9c00ff0),
+	SPH_C32(0xf9c0caf9), SPH_C32(0x639c0ff0), SPH_C32(0xf9c0639c),
+	SPH_C32(0x0ff0caf9), SPH_C32(0xcaf90ff0), SPH_C32(0xf9c0639c),
+	SPH_C32(0xcaf9f9c0), SPH_C32(0x0ff0639c)
+};
+
+#define DECL_STATE_SMALL \
+	sph_u32 c0, c1, c2, c3, c4, c5, c6, c7;
+
+#define READ_STATE_SMALL(sc)   do { \
+		c0 = sc->h[0x0]; \
+		c1 = sc->h[0x1]; \
+		c2 = sc->h[0x2]; \
+		c3 = sc->h[0x3]; \
+		c4 = sc->h[0x4]; \
+		c5 = sc->h[0x5]; \
+		c6 = sc->h[0x6]; \
+		c7 = sc->h[0x7]; \
+	} while (0)
+
+#define WRITE_STATE_SMALL(sc)   do { \
+		sc->h[0x0] = c0; \
+		sc->h[0x1] = c1; \
+		sc->h[0x2] = c2; \
+		sc->h[0x3] = c3; \
+		sc->h[0x4] = c4; \
+		sc->h[0x5] = c5; \
+		sc->h[0x6] = c6; \
+		sc->h[0x7] = c7; \
+	} while (0)
+
+#define s0   m0
+#define s1   m1
+#define s2   c0
+#define s3   c1
+#define s4   c2
+#define s5   c3
+#define s6   m2
+#define s7   m3
+#define s8   m4
+#define s9   m5
+#define sA   c4
+#define sB   c5
+#define sC   c6
+#define sD   c7
+#define sE   m6
+#define sF   m7
+
+#define SBOX(a, b, c, d)   do { \
+		sph_u32 t; \
+		t = (a); \
+		(a) &= (c); \
+		(a) ^= (d); \
+		(c) ^= (b); \
+		(c) ^= (a); \
+		(d) |= t; \
+		(d) ^= (b); \
+		t ^= (c); \
+		(b) = (d); \
+		(d) |= t; \
+		(d) ^= (a); \
+		(a) &= (b); \
+		t ^= (a); \
+		(b) ^= (d); \
+		(b) ^= t; \
+		(a) = (c); \
+		(c) = (b); \
+		(b) = (d); \
+		(d) = SPH_T32(~t); \
+	} while (0)
+
+#define L(a, b, c, d)   do { \
+		(a) = SPH_ROTL32(a, 13); \
+		(c) = SPH_ROTL32(c, 3); \
+		(b) ^= (a) ^ (c); \
+		(d) ^= (c) ^ SPH_T32((a) << 3); \
+		(b) = SPH_ROTL32(b, 1); \
+		(d) = SPH_ROTL32(d, 7); \
+		(a) ^= (b) ^ (d); \
+		(c) ^= (d) ^ SPH_T32((b) << 7); \
+		(a) = SPH_ROTL32(a, 5); \
+		(c) = SPH_ROTL32(c, 22); \
+	} while (0)
+
+#define ROUND_SMALL(rc, alpha)   do { \
+		s0 ^= alpha[0x00]; \
+		s1 ^= alpha[0x01] ^ (sph_u32)(rc); \
+		s2 ^= alpha[0x02]; \
+		s3 ^= alpha[0x03]; \
+		s4 ^= alpha[0x08]; \
+		s5 ^= alpha[0x09]; \
+		s6 ^= alpha[0x0A]; \
+		s7 ^= alpha[0x0B]; \
+		s8 ^= alpha[0x10]; \
+		s9 ^= alpha[0x11]; \
+		sA ^= alpha[0x12]; \
+		sB ^= alpha[0x13]; \
+		sC ^= alpha[0x18]; \
+		sD ^= alpha[0x19]; \
+		sE ^= alpha[0x1A]; \
+		sF ^= alpha[0x1B]; \
+		SBOX(s0, s4, s8, sC); \
+		SBOX(s1, s5, s9, sD); \
+		SBOX(s2, s6, sA, sE); \
+		SBOX(s3, s7, sB, sF); \
+		L(s0, s5, sA, sF); \
+		L(s1, s6, sB, sC); \
+		L(s2, s7, s8, sD); \
+		L(s3, s4, s9, sE); \
+	} while (0)
+
+#define P_SMALL   do { \
+		ROUND_SMALL(0, alpha_n); \
+		ROUND_SMALL(1, alpha_n); \
+		ROUND_SMALL(2, alpha_n); \
+	} while (0)
+
+#define PF_SMALL   do { \
+		ROUND_SMALL(0, alpha_f); \
+		ROUND_SMALL(1, alpha_f); \
+		ROUND_SMALL(2, alpha_f); \
+		ROUND_SMALL(3, alpha_f); \
+		ROUND_SMALL(4, alpha_f); \
+		ROUND_SMALL(5, alpha_f); \
+	} while (0)
+
+#define T_SMALL   do { \
+		/* order is important */ \
+		c7 = (sc->h[7] ^= sB); \
+		c6 = (sc->h[6] ^= sA); \
+		c5 = (sc->h[5] ^= s9); \
+		c4 = (sc->h[4] ^= s8); \
+		c3 = (sc->h[3] ^= s3); \
+		c2 = (sc->h[2] ^= s2); \
+		c1 = (sc->h[1] ^= s1); \
+		c0 = (sc->h[0] ^= s0); \
+	} while (0)
+
+static void
+hamsi_small(sph_hamsi_small_context *sc, const unsigned char *buf, size_t num)
+{
+	DECL_STATE_SMALL
+#if !SPH_64
+	sph_u32 tmp;
+#endif
+
+#if SPH_64
+	sc->count += (sph_u64)num << 5;
+#else
+	tmp = SPH_T32((sph_u32)num << 5);
+	sc->count_low = SPH_T32(sc->count_low + tmp);
+	sc->count_high += (sph_u32)((num >> 13) >> 14);
+	if (sc->count_low < tmp)
+		sc->count_high ++;
+#endif
+	READ_STATE_SMALL(sc);
+	while (num -- > 0) {
+		sph_u32 m0, m1, m2, m3, m4, m5, m6, m7;
+
+		INPUT_SMALL;
+		P_SMALL;
+		T_SMALL;
+		buf += 4;
+	}
+	WRITE_STATE_SMALL(sc);
+}
+
+static void
+hamsi_small_final(sph_hamsi_small_context *sc, const unsigned char *buf)
+{
+	sph_u32 m0, m1, m2, m3, m4, m5, m6, m7;
+	DECL_STATE_SMALL
+
+	READ_STATE_SMALL(sc);
+	INPUT_SMALL;
+	PF_SMALL;
+	T_SMALL;
+	WRITE_STATE_SMALL(sc);
+}
+
+static void
+hamsi_small_init(sph_hamsi_small_context *sc, const sph_u32 *iv)
+{
+	sc->partial_len = 0;
+	memcpy(sc->h, iv, sizeof sc->h);
+#if SPH_64
+	sc->count = 0;
+#else
+	sc->count_high = sc->count_low = 0;
+#endif
+}
+
+static void
+hamsi_small_core(sph_hamsi_small_context *sc, const void *data, size_t len)
+{
+	if (sc->partial_len != 0) {
+		size_t mlen;
+
+		mlen = 4 - sc->partial_len;
+		if (len < mlen) {
+			memcpy(sc->partial + sc->partial_len, data, len);
+			sc->partial_len += len;
+			return;
+		} else {
+			memcpy(sc->partial + sc->partial_len, data, mlen);
+			len -= mlen;
+			data = (const unsigned char *)data + mlen;
+			hamsi_small(sc, sc->partial, 1);
+			sc->partial_len = 0;
+		}
+	}
+
+	hamsi_small(sc, data, (len >> 2));
+	data = (const unsigned char *)data + (len & ~(size_t)3);
+	len &= (size_t)3;
+	memcpy(sc->partial, data, len);
+	sc->partial_len = len;
+}
+
+static void
+hamsi_small_close(sph_hamsi_small_context *sc,
+	unsigned ub, unsigned n, void *dst, size_t out_size_w32)
+{
+	unsigned char pad[12];
+	size_t ptr, u;
+	unsigned z;
+	unsigned char *out;
+
+	ptr = sc->partial_len;
+	memcpy(pad, sc->partial, ptr);
+#if SPH_64
+	sph_enc64be(pad + 4, sc->count + (ptr << 3) + n);
+#else
+	sph_enc32be(pad + 4, sc->count_high);
+	sph_enc32be(pad + 8, sc->count_low + (ptr << 3) + n);
+#endif
+	z = 0x80 >> n;
+	pad[ptr ++] = ((ub & -z) | z) & 0xFF;
+	while (ptr < 4)
+		pad[ptr ++] = 0;
+	hamsi_small(sc, pad, 2);
+	hamsi_small_final(sc, pad + 8);
+	out = dst;
+	for (u = 0; u < out_size_w32; u ++)
+		sph_enc32be(out + (u << 2), sc->h[u]);
+}
+
+#define DECL_STATE_BIG \
+	sph_u32 c0, c1, c2, c3, c4, c5, c6, c7; \
+	sph_u32 c8, c9, cA, cB, cC, cD, cE, cF;
+
+#define READ_STATE_BIG(sc)   do { \
+		c0 = sc->h[0x0]; \
+		c1 = sc->h[0x1]; \
+		c2 = sc->h[0x2]; \
+		c3 = sc->h[0x3]; \
+		c4 = sc->h[0x4]; \
+		c5 = sc->h[0x5]; \
+		c6 = sc->h[0x6]; \
+		c7 = sc->h[0x7]; \
+		c8 = sc->h[0x8]; \
+		c9 = sc->h[0x9]; \
+		cA = sc->h[0xA]; \
+		cB = sc->h[0xB]; \
+		cC = sc->h[0xC]; \
+		cD = sc->h[0xD]; \
+		cE = sc->h[0xE]; \
+		cF = sc->h[0xF]; \
+	} while (0)
+
+#define WRITE_STATE_BIG(sc)   do { \
+		sc->h[0x0] = c0; \
+		sc->h[0x1] = c1; \
+		sc->h[0x2] = c2; \
+		sc->h[0x3] = c3; \
+		sc->h[0x4] = c4; \
+		sc->h[0x5] = c5; \
+		sc->h[0x6] = c6; \
+		sc->h[0x7] = c7; \
+		sc->h[0x8] = c8; \
+		sc->h[0x9] = c9; \
+		sc->h[0xA] = cA; \
+		sc->h[0xB] = cB; \
+		sc->h[0xC] = cC; \
+		sc->h[0xD] = cD; \
+		sc->h[0xE] = cE; \
+		sc->h[0xF] = cF; \
+	} while (0)
+
+#define s00   m0
+#define s01   m1
+#define s02   c0
+#define s03   c1
+#define s04   m2
+#define s05   m3
+#define s06   c2
+#define s07   c3
+#define s08   c4
+#define s09   c5
+#define s0A   m4
+#define s0B   m5
+#define s0C   c6
+#define s0D   c7
+#define s0E   m6
+#define s0F   m7
+#define s10   m8
+#define s11   m9
+#define s12   c8
+#define s13   c9
+#define s14   mA
+#define s15   mB
+#define s16   cA
+#define s17   cB
+#define s18   cC
+#define s19   cD
+#define s1A   mC
+#define s1B   mD
+#define s1C   cE
+#define s1D   cF
+#define s1E   mE
+#define s1F   mF
+
+#define ROUND_BIG(rc, alpha)   do { \
+		s00 ^= alpha[0x00]; \
+		s01 ^= alpha[0x01] ^ (sph_u32)(rc); \
+		s02 ^= alpha[0x02]; \
+		s03 ^= alpha[0x03]; \
+		s04 ^= alpha[0x04]; \
+		s05 ^= alpha[0x05]; \
+		s06 ^= alpha[0x06]; \
+		s07 ^= alpha[0x07]; \
+		s08 ^= alpha[0x08]; \
+		s09 ^= alpha[0x09]; \
+		s0A ^= alpha[0x0A]; \
+		s0B ^= alpha[0x0B]; \
+		s0C ^= alpha[0x0C]; \
+		s0D ^= alpha[0x0D]; \
+		s0E ^= alpha[0x0E]; \
+		s0F ^= alpha[0x0F]; \
+		s10 ^= alpha[0x10]; \
+		s11 ^= alpha[0x11]; \
+		s12 ^= alpha[0x12]; \
+		s13 ^= alpha[0x13]; \
+		s14 ^= alpha[0x14]; \
+		s15 ^= alpha[0x15]; \
+		s16 ^= alpha[0x16]; \
+		s17 ^= alpha[0x17]; \
+		s18 ^= alpha[0x18]; \
+		s19 ^= alpha[0x19]; \
+		s1A ^= alpha[0x1A]; \
+		s1B ^= alpha[0x1B]; \
+		s1C ^= alpha[0x1C]; \
+		s1D ^= alpha[0x1D]; \
+		s1E ^= alpha[0x1E]; \
+		s1F ^= alpha[0x1F]; \
+		SBOX(s00, s08, s10, s18); \
+		SBOX(s01, s09, s11, s19); \
+		SBOX(s02, s0A, s12, s1A); \
+		SBOX(s03, s0B, s13, s1B); \
+		SBOX(s04, s0C, s14, s1C); \
+		SBOX(s05, s0D, s15, s1D); \
+		SBOX(s06, s0E, s16, s1E); \
+		SBOX(s07, s0F, s17, s1F); \
+		L(s00, s09, s12, s1B); \
+		L(s01, s0A, s13, s1C); \
+		L(s02, s0B, s14, s1D); \
+		L(s03, s0C, s15, s1E); \
+		L(s04, s0D, s16, s1F); \
+		L(s05, s0E, s17, s18); \
+		L(s06, s0F, s10, s19); \
+		L(s07, s08, s11, s1A); \
+		L(s00, s02, s05, s07); \
+		L(s10, s13, s15, s16); \
+		L(s09, s0B, s0C, s0E); \
+		L(s19, s1A, s1C, s1F); \
+	} while (0)
+
+#if SPH_SMALL_FOOTPRINT_HAMSI
+
+#define P_BIG   do { \
+		unsigned r; \
+		for (r = 0; r < 6; r ++) \
+			ROUND_BIG(r, alpha_n); \
+	} while (0)
+
+#define PF_BIG   do { \
+		unsigned r; \
+		for (r = 0; r < 12; r ++) \
+			ROUND_BIG(r, alpha_f); \
+	} while (0)
+
+#else
+
+#define P_BIG   do { \
+		ROUND_BIG(0, alpha_n); \
+		ROUND_BIG(1, alpha_n); \
+		ROUND_BIG(2, alpha_n); \
+		ROUND_BIG(3, alpha_n); \
+		ROUND_BIG(4, alpha_n); \
+		ROUND_BIG(5, alpha_n); \
+	} while (0)
+
+#define PF_BIG   do { \
+		ROUND_BIG(0, alpha_f); \
+		ROUND_BIG(1, alpha_f); \
+		ROUND_BIG(2, alpha_f); \
+		ROUND_BIG(3, alpha_f); \
+		ROUND_BIG(4, alpha_f); \
+		ROUND_BIG(5, alpha_f); \
+		ROUND_BIG(6, alpha_f); \
+		ROUND_BIG(7, alpha_f); \
+		ROUND_BIG(8, alpha_f); \
+		ROUND_BIG(9, alpha_f); \
+		ROUND_BIG(10, alpha_f); \
+		ROUND_BIG(11, alpha_f); \
+	} while (0)
+
+#endif
+
+#define T_BIG   do { \
+		/* order is important */ \
+		cF = (sc->h[0xF] ^= s17); \
+		cE = (sc->h[0xE] ^= s16); \
+		cD = (sc->h[0xD] ^= s15); \
+		cC = (sc->h[0xC] ^= s14); \
+		cB = (sc->h[0xB] ^= s13); \
+		cA = (sc->h[0xA] ^= s12); \
+		c9 = (sc->h[0x9] ^= s11); \
+		c8 = (sc->h[0x8] ^= s10); \
+		c7 = (sc->h[0x7] ^= s07); \
+		c6 = (sc->h[0x6] ^= s06); \
+		c5 = (sc->h[0x5] ^= s05); \
+		c4 = (sc->h[0x4] ^= s04); \
+		c3 = (sc->h[0x3] ^= s03); \
+		c2 = (sc->h[0x2] ^= s02); \
+		c1 = (sc->h[0x1] ^= s01); \
+		c0 = (sc->h[0x0] ^= s00); \
+	} while (0)
+
+static void
+hamsi_big(sph_hamsi_big_context *sc, const unsigned char *buf, size_t num)
+{
+	DECL_STATE_BIG
+#if !SPH_64
+	sph_u32 tmp;
+#endif
+
+#if SPH_64
+	sc->count += (sph_u64)num << 6;
+#else
+	tmp = SPH_T32((sph_u32)num << 6);
+	sc->count_low = SPH_T32(sc->count_low + tmp);
+	sc->count_high += (sph_u32)((num >> 13) >> 13);
+	if (sc->count_low < tmp)
+		sc->count_high ++;
+#endif
+	READ_STATE_BIG(sc);
+	while (num -- > 0) {
+		sph_u32 m0, m1, m2, m3, m4, m5, m6, m7;
+		sph_u32 m8, m9, mA, mB, mC, mD, mE, mF;
+
+		INPUT_BIG;
+		P_BIG;
+		T_BIG;
+		buf += 8;
+	}
+	WRITE_STATE_BIG(sc);
+}
+
+static void
+hamsi_big_final(sph_hamsi_big_context *sc, const unsigned char *buf)
+{
+	sph_u32 m0, m1, m2, m3, m4, m5, m6, m7;
+	sph_u32 m8, m9, mA, mB, mC, mD, mE, mF;
+	DECL_STATE_BIG
+
+	READ_STATE_BIG(sc);
+	INPUT_BIG;
+	PF_BIG;
+	T_BIG;
+	WRITE_STATE_BIG(sc);
+}
+
+static void
+hamsi_big_init(sph_hamsi_big_context *sc, const sph_u32 *iv)
+{
+	sc->partial_len = 0;
+	memcpy(sc->h, iv, sizeof sc->h);
+#if SPH_64
+	sc->count = 0;
+#else
+	sc->count_high = sc->count_low = 0;
+#endif
+}
+
+static void
+hamsi_big_core(sph_hamsi_big_context *sc, const void *data, size_t len)
+{
+	if (sc->partial_len != 0) {
+		size_t mlen;
+
+		mlen = 8 - sc->partial_len;
+		if (len < mlen) {
+			memcpy(sc->partial + sc->partial_len, data, len);
+			sc->partial_len += len;
+			return;
+		} else {
+			memcpy(sc->partial + sc->partial_len, data, mlen);
+			len -= mlen;
+			data = (const unsigned char *)data + mlen;
+			hamsi_big(sc, sc->partial, 1);
+			sc->partial_len = 0;
+		}
+	}
+
+	hamsi_big(sc, data, (len >> 3));
+	data = (const unsigned char *)data + (len & ~(size_t)7);
+	len &= (size_t)7;
+	memcpy(sc->partial, data, len);
+	sc->partial_len = len;
+}
+
+static void
+hamsi_big_close(sph_hamsi_big_context *sc,
+	unsigned ub, unsigned n, void *dst, size_t out_size_w32)
+{
+	unsigned char pad[8];
+	size_t ptr, u;
+	unsigned z;
+	unsigned char *out;
+
+	ptr = sc->partial_len;
+#if SPH_64
+	sph_enc64be(pad, sc->count + (ptr << 3) + n);
+#else
+	sph_enc32be(pad, sc->count_high);
+	sph_enc32be(pad + 4, sc->count_low + (ptr << 3) + n);
+#endif
+	z = 0x80 >> n;
+	sc->partial[ptr ++] = ((ub & -z) | z) & 0xFF;
+	while (ptr < 8)
+		sc->partial[ptr ++] = 0;
+	hamsi_big(sc, sc->partial, 1);
+	hamsi_big_final(sc, pad);
+	out = dst;
+	if (out_size_w32 == 12) {
+		sph_enc32be(out +  0, sc->h[ 0]);
+		sph_enc32be(out +  4, sc->h[ 1]);
+		sph_enc32be(out +  8, sc->h[ 3]);
+		sph_enc32be(out + 12, sc->h[ 4]);
+		sph_enc32be(out + 16, sc->h[ 5]);
+		sph_enc32be(out + 20, sc->h[ 6]);
+		sph_enc32be(out + 24, sc->h[ 8]);
+		sph_enc32be(out + 28, sc->h[ 9]);
+		sph_enc32be(out + 32, sc->h[10]);
+		sph_enc32be(out + 36, sc->h[12]);
+		sph_enc32be(out + 40, sc->h[13]);
+		sph_enc32be(out + 44, sc->h[15]);
+	} else {
+		for (u = 0; u < 16; u ++)
+			sph_enc32be(out + (u << 2), sc->h[u]);
+	}
+}
+
+/* see sph_hamsi.h */
+void
+sph_hamsi224_init(void *cc)
+{
+	hamsi_small_init(cc, IV224);
+}
+
+/* see sph_hamsi.h */
+void
+sph_hamsi224(void *cc, const void *data, size_t len)
+{
+	hamsi_small_core(cc, data, len);
+}
+
+/* see sph_hamsi.h */
+void
+sph_hamsi224_close(void *cc, void *dst)
+{
+	hamsi_small_close(cc, 0, 0, dst, 7);
+//	hamsi_small_init(cc, IV224);
+}
+
+/* see sph_hamsi.h */
+void
+sph_hamsi224_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
+{
+	hamsi_small_close(cc, ub, n, dst, 7);
+//	hamsi_small_init(cc, IV224);
+}
+
+/* see sph_hamsi.h */
+void
+sph_hamsi256_init(void *cc)
+{
+	hamsi_small_init(cc, IV256);
+}
+
+/* see sph_hamsi.h */
+void
+sph_hamsi256(void *cc, const void *data, size_t len)
+{
+	hamsi_small_core(cc, data, len);
+}
+
+/* see sph_hamsi.h */
+void
+sph_hamsi256_close(void *cc, void *dst)
+{
+	hamsi_small_close(cc, 0, 0, dst, 8);
+//	hamsi_small_init(cc, IV256);
+}
+
+/* see sph_hamsi.h */
+void
+sph_hamsi256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
+{
+	hamsi_small_close(cc, ub, n, dst, 8);
+//	hamsi_small_init(cc, IV256);
+}
+
+/* see sph_hamsi.h */
+void
+sph_hamsi384_init(void *cc)
+{
+	hamsi_big_init(cc, IV384);
+}
+
+/* see sph_hamsi.h */
+void
+sph_hamsi384(void *cc, const void *data, size_t len)
+{
+	hamsi_big_core(cc, data, len);
+}
+
+/* see sph_hamsi.h */
+void
+sph_hamsi384_close(void *cc, void *dst)
+{
+	hamsi_big_close(cc, 0, 0, dst, 12);
+//	hamsi_big_init(cc, IV384);
+}
+
+/* see sph_hamsi.h */
+void
+sph_hamsi384_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
+{
+	hamsi_big_close(cc, ub, n, dst, 12);
+//	hamsi_big_init(cc, IV384);
+}
+
+/* see sph_hamsi.h */
+void
+sph_hamsi512_init(void *cc)
+{
+	hamsi_big_init(cc, IV512);
+}
+
+/* see sph_hamsi.h */
+void
+sph_hamsi512(void *cc, const void *data, size_t len)
+{
+	hamsi_big_core(cc, data, len);
+}
+
+/* see sph_hamsi.h */
+void
+sph_hamsi512_close(void *cc, void *dst)
+{
+	hamsi_big_close(cc, 0, 0, dst, 16);
+//	hamsi_big_init(cc, IV512);
+}
+
+/* see sph_hamsi.h */
+void
+sph_hamsi512_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
+{
+	hamsi_big_close(cc, ub, n, dst, 16);
+//	hamsi_big_init(cc, IV512);
+}
+
+#ifdef __cplusplus
+}
+#endif
--- a/src/crypto/ghostrider/sph_hamsi.h
+++ b/src/crypto/ghostrider/sph_hamsi.h
@@ -0,0 +1,321 @@
+/* $Id: sph_hamsi.h 216 2010-06-08 09:46:57Z tp $ */
+/**
+ * Hamsi interface. This code implements Hamsi with the recommended
+ * parameters for SHA-3, with outputs of 224, 256, 384 and 512 bits.
+ *
+ * ==========================(LICENSE BEGIN)============================
+ *
+ * Copyright (c) 2007-2010  Projet RNRT SAPHIR
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * ===========================(LICENSE END)=============================
+ *
+ * @file     sph_hamsi.h
+ * @author   Thomas Pornin <thomas.pornin@cryptolog.com>
+ */
+
+#ifndef SPH_HAMSI_H__
+#define SPH_HAMSI_H__
+
+#include <stddef.h>
+#include "sph_types.h"
+
+#ifdef __cplusplus
+extern "C"{
+#endif
+
+/**
+ * Output size (in bits) for Hamsi-224.
+ */
+#define SPH_SIZE_hamsi224   224
+
+/**
+ * Output size (in bits) for Hamsi-256.
+ */
+#define SPH_SIZE_hamsi256   256
+
+/**
+ * Output size (in bits) for Hamsi-384.
+ */
+#define SPH_SIZE_hamsi384   384
+
+/**
+ * Output size (in bits) for Hamsi-512.
+ */
+#define SPH_SIZE_hamsi512   512
+
+/**
+ * This structure is a context for Hamsi-224 and Hamsi-256 computations:
+ * it contains the intermediate values and some data from the last
+ * entered block. Once a Hamsi computation has been performed, the
+ * context can be reused for another computation.
+ *
+ * The contents of this structure are private. A running Hamsi
+ * computation can be cloned by copying the context (e.g. with a simple
+ * <code>memcpy()</code>).
+ */
+typedef struct {
+#ifndef DOXYGEN_IGNORE
+	unsigned char partial[4];
+	size_t partial_len;
+	sph_u32 h[8];
+#if SPH_64
+	sph_u64 count;
+#else
+	sph_u32 count_high, count_low;
+#endif
+#endif
+} sph_hamsi_small_context;
+
+/**
+ * This structure is a context for Hamsi-224 computations. It is
+ * identical to the common <code>sph_hamsi_small_context</code>.
+ */
+typedef sph_hamsi_small_context sph_hamsi224_context;
+
+/**
+ * This structure is a context for Hamsi-256 computations. It is
+ * identical to the common <code>sph_hamsi_small_context</code>.
+ */
+typedef sph_hamsi_small_context sph_hamsi256_context;
+
+/**
+ * This structure is a context for Hamsi-384 and Hamsi-512 computations:
+ * it contains the intermediate values and some data from the last
+ * entered block. Once a Hamsi computation has been performed, the
+ * context can be reused for another computation.
+ *
+ * The contents of this structure are private. A running Hamsi
+ * computation can be cloned by copying the context (e.g. with a simple
+ * <code>memcpy()</code>).
+ */
+typedef struct {
+#ifndef DOXYGEN_IGNORE
+	unsigned char partial[8];
+	size_t partial_len;
+	sph_u32 h[16];
+#if SPH_64
+	sph_u64 count;
+#else
+	sph_u32 count_high, count_low;
+#endif
+#endif
+} sph_hamsi_big_context;
+
+/**
+ * This structure is a context for Hamsi-384 computations. It is
+ * identical to the common <code>sph_hamsi_small_context</code>.
+ */
+typedef sph_hamsi_big_context sph_hamsi384_context;
+
+/**
+ * This structure is a context for Hamsi-512 computations. It is
+ * identical to the common <code>sph_hamsi_small_context</code>.
+ */
+typedef sph_hamsi_big_context sph_hamsi512_context;
+
+/**
+ * Initialize a Hamsi-224 context. This process performs no memory allocation.
+ *
+ * @param cc   the Hamsi-224 context (pointer to a
+ *             <code>sph_hamsi224_context</code>)
+ */
+void sph_hamsi224_init(void *cc);
+
+/**
+ * Process some data bytes. It is acceptable that <code>len</code> is zero
+ * (in which case this function does nothing).
+ *
+ * @param cc     the Hamsi-224 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_hamsi224(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current Hamsi-224 computation and output the result into
+ * the provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (28 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the Hamsi-224 context
+ * @param dst   the destination buffer
+ */
+void sph_hamsi224_close(void *cc, void *dst);
+
+/**
+ * Add a few additional bits (0 to 7) to the current computation, then
+ * terminate it and output the result in the provided buffer, which must
+ * be wide enough to accomodate the result (28 bytes). If bit number i
+ * in <code>ub</code> has value 2^i, then the extra bits are those
+ * numbered 7 downto 8-n (this is the big-endian convention at the byte
+ * level). The context is automatically reinitialized.
+ *
+ * @param cc    the Hamsi-224 context
+ * @param ub    the extra bits
+ * @param n     the number of extra bits (0 to 7)
+ * @param dst   the destination buffer
+ */
+void sph_hamsi224_addbits_and_close(
+	void *cc, unsigned ub, unsigned n, void *dst);
+
+/**
+ * Initialize a Hamsi-256 context. This process performs no memory allocation.
+ *
+ * @param cc   the Hamsi-256 context (pointer to a
+ *             <code>sph_hamsi256_context</code>)
+ */
+void sph_hamsi256_init(void *cc);
+
+/**
+ * Process some data bytes. It is acceptable that <code>len</code> is zero
+ * (in which case this function does nothing).
+ *
+ * @param cc     the Hamsi-256 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_hamsi256(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current Hamsi-256 computation and output the result into
+ * the provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (32 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the Hamsi-256 context
+ * @param dst   the destination buffer
+ */
+void sph_hamsi256_close(void *cc, void *dst);
+
+/**
+ * Add a few additional bits (0 to 7) to the current computation, then
+ * terminate it and output the result in the provided buffer, which must
+ * be wide enough to accomodate the result (32 bytes). If bit number i
+ * in <code>ub</code> has value 2^i, then the extra bits are those
+ * numbered 7 downto 8-n (this is the big-endian convention at the byte
+ * level). The context is automatically reinitialized.
+ *
+ * @param cc    the Hamsi-256 context
+ * @param ub    the extra bits
+ * @param n     the number of extra bits (0 to 7)
+ * @param dst   the destination buffer
+ */
+void sph_hamsi256_addbits_and_close(
+	void *cc, unsigned ub, unsigned n, void *dst);
+
+/**
+ * Initialize a Hamsi-384 context. This process performs no memory allocation.
+ *
+ * @param cc   the Hamsi-384 context (pointer to a
+ *             <code>sph_hamsi384_context</code>)
+ */
+void sph_hamsi384_init(void *cc);
+
+/**
+ * Process some data bytes. It is acceptable that <code>len</code> is zero
+ * (in which case this function does nothing).
+ *
+ * @param cc     the Hamsi-384 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_hamsi384(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current Hamsi-384 computation and output the result into
+ * the provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (48 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the Hamsi-384 context
+ * @param dst   the destination buffer
+ */
+void sph_hamsi384_close(void *cc, void *dst);
+
+/**
+ * Add a few additional bits (0 to 7) to the current computation, then
+ * terminate it and output the result in the provided buffer, which must
+ * be wide enough to accomodate the result (48 bytes). If bit number i
+ * in <code>ub</code> has value 2^i, then the extra bits are those
+ * numbered 7 downto 8-n (this is the big-endian convention at the byte
+ * level). The context is automatically reinitialized.
+ *
+ * @param cc    the Hamsi-384 context
+ * @param ub    the extra bits
+ * @param n     the number of extra bits (0 to 7)
+ * @param dst   the destination buffer
+ */
+void sph_hamsi384_addbits_and_close(
+	void *cc, unsigned ub, unsigned n, void *dst);
+
+/**
+ * Initialize a Hamsi-512 context. This process performs no memory allocation.
+ *
+ * @param cc   the Hamsi-512 context (pointer to a
+ *             <code>sph_hamsi512_context</code>)
+ */
+void sph_hamsi512_init(void *cc);
+
+/**
+ * Process some data bytes. It is acceptable that <code>len</code> is zero
+ * (in which case this function does nothing).
+ *
+ * @param cc     the Hamsi-512 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_hamsi512(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current Hamsi-512 computation and output the result into
+ * the provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (64 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the Hamsi-512 context
+ * @param dst   the destination buffer
+ */
+void sph_hamsi512_close(void *cc, void *dst);
+
+/**
+ * Add a few additional bits (0 to 7) to the current computation, then
+ * terminate it and output the result in the provided buffer, which must
+ * be wide enough to accomodate the result (64 bytes). If bit number i
+ * in <code>ub</code> has value 2^i, then the extra bits are those
+ * numbered 7 downto 8-n (this is the big-endian convention at the byte
+ * level). The context is automatically reinitialized.
+ *
+ * @param cc    the Hamsi-512 context
+ * @param ub    the extra bits
+ * @param n     the number of extra bits (0 to 7)
+ * @param dst   the destination buffer
+ */
+void sph_hamsi512_addbits_and_close(
+	void *cc, unsigned ub, unsigned n, void *dst);
+
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
--- a/src/crypto/ghostrider/sph_hamsi_helper.c
+++ b/src/crypto/ghostrider/sph_hamsi_helper.c
--- a/src/crypto/ghostrider/sph_jh.c
+++ b/src/crypto/ghostrider/sph_jh.c
--- a/src/crypto/ghostrider/sph_jh.h
+++ b/src/crypto/ghostrider/sph_jh.h
@@ -0,0 +1,298 @@
+/* $Id: sph_jh.h 216 2010-06-08 09:46:57Z tp $ */
+/**
+ * JH interface. JH is a family of functions which differ by
+ * their output size; this implementation defines JH for output
+ * sizes 224, 256, 384 and 512 bits.
+ *
+ * ==========================(LICENSE BEGIN)============================
+ *
+ * Copyright (c) 2007-2010  Projet RNRT SAPHIR
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * ===========================(LICENSE END)=============================
+ *
+ * @file     sph_jh.h
+ * @author   Thomas Pornin <thomas.pornin@cryptolog.com>
+ */
+
+#ifndef SPH_JH_H__
+#define SPH_JH_H__
+
+#ifdef __cplusplus
+extern "C"{
+#endif
+
+#include <stddef.h>
+#include "sph_types.h"
+
+/**
+ * Output size (in bits) for JH-224.
+ */
+#define SPH_SIZE_jh224   224
+
+/**
+ * Output size (in bits) for JH-256.
+ */
+#define SPH_SIZE_jh256   256
+
+/**
+ * Output size (in bits) for JH-384.
+ */
+#define SPH_SIZE_jh384   384
+
+/**
+ * Output size (in bits) for JH-512.
+ */
+#define SPH_SIZE_jh512   512
+
+/**
+ * This structure is a context for JH computations: it contains the
+ * intermediate values and some data from the last entered block. Once
+ * a JH computation has been performed, the context can be reused for
+ * another computation.
+ *
+ * The contents of this structure are private. A running JH computation
+ * can be cloned by copying the context (e.g. with a simple
+ * <code>memcpy()</code>).
+ */
+typedef struct {
+#ifndef DOXYGEN_IGNORE
+	unsigned char buf[64];    /* first field, for alignment */
+	size_t ptr;
+	union {
+#if SPH_64
+		sph_u64 wide[16];
+#endif
+		sph_u32 narrow[32];
+	} H;
+#if SPH_64
+	sph_u64 block_count;
+#else
+	sph_u32 block_count_high, block_count_low;
+#endif
+#endif
+} sph_jh_context;
+
+/**
+ * Type for a JH-224 context (identical to the common context).
+ */
+typedef sph_jh_context sph_jh224_context;
+
+/**
+ * Type for a JH-256 context (identical to the common context).
+ */
+typedef sph_jh_context sph_jh256_context;
+
+/**
+ * Type for a JH-384 context (identical to the common context).
+ */
+typedef sph_jh_context sph_jh384_context;
+
+/**
+ * Type for a JH-512 context (identical to the common context).
+ */
+typedef sph_jh_context sph_jh512_context;
+
+/**
+ * Initialize a JH-224 context. This process performs no memory allocation.
+ *
+ * @param cc   the JH-224 context (pointer to a
+ *             <code>sph_jh224_context</code>)
+ */
+void sph_jh224_init(void *cc);
+
+/**
+ * Process some data bytes. It is acceptable that <code>len</code> is zero
+ * (in which case this function does nothing).
+ *
+ * @param cc     the JH-224 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_jh224(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current JH-224 computation and output the result into
+ * the provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (28 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the JH-224 context
+ * @param dst   the destination buffer
+ */
+void sph_jh224_close(void *cc, void *dst);
+
+/**
+ * Add a few additional bits (0 to 7) to the current computation, then
+ * terminate it and output the result in the provided buffer, which must
+ * be wide enough to accomodate the result (28 bytes). If bit number i
+ * in <code>ub</code> has value 2^i, then the extra bits are those
+ * numbered 7 downto 8-n (this is the big-endian convention at the byte
+ * level). The context is automatically reinitialized.
+ *
+ * @param cc    the JH-224 context
+ * @param ub    the extra bits
+ * @param n     the number of extra bits (0 to 7)
+ * @param dst   the destination buffer
+ */
+void sph_jh224_addbits_and_close(
+	void *cc, unsigned ub, unsigned n, void *dst);
+
+/**
+ * Initialize a JH-256 context. This process performs no memory allocation.
+ *
+ * @param cc   the JH-256 context (pointer to a
+ *             <code>sph_jh256_context</code>)
+ */
+void sph_jh256_init(void *cc);
+
+/**
+ * Process some data bytes. It is acceptable that <code>len</code> is zero
+ * (in which case this function does nothing).
+ *
+ * @param cc     the JH-256 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_jh256(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current JH-256 computation and output the result into
+ * the provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (32 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the JH-256 context
+ * @param dst   the destination buffer
+ */
+void sph_jh256_close(void *cc, void *dst);
+
+/**
+ * Add a few additional bits (0 to 7) to the current computation, then
+ * terminate it and output the result in the provided buffer, which must
+ * be wide enough to accomodate the result (32 bytes). If bit number i
+ * in <code>ub</code> has value 2^i, then the extra bits are those
+ * numbered 7 downto 8-n (this is the big-endian convention at the byte
+ * level). The context is automatically reinitialized.
+ *
+ * @param cc    the JH-256 context
+ * @param ub    the extra bits
+ * @param n     the number of extra bits (0 to 7)
+ * @param dst   the destination buffer
+ */
+void sph_jh256_addbits_and_close(
+	void *cc, unsigned ub, unsigned n, void *dst);
+
+/**
+ * Initialize a JH-384 context. This process performs no memory allocation.
+ *
+ * @param cc   the JH-384 context (pointer to a
+ *             <code>sph_jh384_context</code>)
+ */
+void sph_jh384_init(void *cc);
+
+/**
+ * Process some data bytes. It is acceptable that <code>len</code> is zero
+ * (in which case this function does nothing).
+ *
+ * @param cc     the JH-384 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_jh384(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current JH-384 computation and output the result into
+ * the provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (48 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the JH-384 context
+ * @param dst   the destination buffer
+ */
+void sph_jh384_close(void *cc, void *dst);
+
+/**
+ * Add a few additional bits (0 to 7) to the current computation, then
+ * terminate it and output the result in the provided buffer, which must
+ * be wide enough to accomodate the result (48 bytes). If bit number i
+ * in <code>ub</code> has value 2^i, then the extra bits are those
+ * numbered 7 downto 8-n (this is the big-endian convention at the byte
+ * level). The context is automatically reinitialized.
+ *
+ * @param cc    the JH-384 context
+ * @param ub    the extra bits
+ * @param n     the number of extra bits (0 to 7)
+ * @param dst   the destination buffer
+ */
+void sph_jh384_addbits_and_close(
+	void *cc, unsigned ub, unsigned n, void *dst);
+
+/**
+ * Initialize a JH-512 context. This process performs no memory allocation.
+ *
+ * @param cc   the JH-512 context (pointer to a
+ *             <code>sph_jh512_context</code>)
+ */
+void sph_jh512_init(void *cc);
+
+/**
+ * Process some data bytes. It is acceptable that <code>len</code> is zero
+ * (in which case this function does nothing).
+ *
+ * @param cc     the JH-512 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_jh512(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current JH-512 computation and output the result into
+ * the provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (64 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the JH-512 context
+ * @param dst   the destination buffer
+ */
+void sph_jh512_close(void *cc, void *dst);
+
+/**
+ * Add a few additional bits (0 to 7) to the current computation, then
+ * terminate it and output the result in the provided buffer, which must
+ * be wide enough to accomodate the result (64 bytes). If bit number i
+ * in <code>ub</code> has value 2^i, then the extra bits are those
+ * numbered 7 downto 8-n (this is the big-endian convention at the byte
+ * level). The context is automatically reinitialized.
+ *
+ * @param cc    the JH-512 context
+ * @param ub    the extra bits
+ * @param n     the number of extra bits (0 to 7)
+ * @param dst   the destination buffer
+ */
+void sph_jh512_addbits_and_close(
+	void *cc, unsigned ub, unsigned n, void *dst);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
--- a/src/crypto/ghostrider/sph_keccak.c
+++ b/src/crypto/ghostrider/sph_keccak.c
--- a/src/crypto/ghostrider/sph_keccak.h
+++ b/src/crypto/ghostrider/sph_keccak.h
@@ -0,0 +1,296 @@
+/* $Id: sph_keccak.h 216 2010-06-08 09:46:57Z tp $ */
+/**
+ * Keccak interface. This is the interface for Keccak with the
+ * recommended parameters for SHA-3, with output lengths 224, 256,
+ * 384 and 512 bits.
+ *
+ * ==========================(LICENSE BEGIN)============================
+ *
+ * Copyright (c) 2007-2010  Projet RNRT SAPHIR
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * ===========================(LICENSE END)=============================
+ *
+ * @file     sph_keccak.h
+ * @author   Thomas Pornin <thomas.pornin@cryptolog.com>
+ */
+
+#ifndef SPH_KECCAK_H__
+#define SPH_KECCAK_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Taken from keccak-gate.h
+extern int hard_coded_eb;
+
+#include "sph_types.h"
+#include <stddef.h>
+
+/**
+ * Output size (in bits) for Keccak-224.
+ */
+#define SPH_SIZE_keccak224 224
+
+/**
+ * Output size (in bits) for Keccak-256.
+ */
+#define SPH_SIZE_keccak256 256
+
+/**
+ * Output size (in bits) for Keccak-384.
+ */
+#define SPH_SIZE_keccak384 384
+
+/**
+ * Output size (in bits) for Keccak-512.
+ */
+#define SPH_SIZE_keccak512 512
+
+/**
+ * This structure is a context for Keccak computations: it contains the
+ * intermediate values and some data from the last entered block. Once a
+ * Keccak computation has been performed, the context can be reused for
+ * another computation.
+ *
+ * The contents of this structure are private. A running Keccak computation
+ * can be cloned by copying the context (e.g. with a simple
+ * <code>memcpy()</code>).
+ */
+typedef struct {
+#ifndef DOXYGEN_IGNORE
+  unsigned char buf[144]; /* first field, for alignment */
+  size_t ptr, lim;
+  union {
+#if SPH_64
+    sph_u64 wide[25];
+#endif
+    sph_u32 narrow[50];
+  } u;
+#endif
+} sph_keccak_context;
+
+/**
+ * Type for a Keccak-224 context (identical to the common context).
+ */
+typedef sph_keccak_context sph_keccak224_context;
+
+/**
+ * Type for a Keccak-256 context (identical to the common context).
+ */
+typedef sph_keccak_context sph_keccak256_context;
+
+/**
+ * Type for a Keccak-384 context (identical to the common context).
+ */
+typedef sph_keccak_context sph_keccak384_context;
+
+/**
+ * Type for a Keccak-512 context (identical to the common context).
+ */
+typedef sph_keccak_context sph_keccak512_context;
+
+/**
+ * Initialize a Keccak-224 context. This process performs no memory allocation.
+ *
+ * @param cc   the Keccak-224 context (pointer to a
+ *             <code>sph_keccak224_context</code>)
+ */
+void sph_keccak224_init(void *cc);
+
+/**
+ * Process some data bytes. It is acceptable that <code>len</code> is zero
+ * (in which case this function does nothing).
+ *
+ * @param cc     the Keccak-224 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_keccak224(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current Keccak-224 computation and output the result into
+ * the provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (28 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the Keccak-224 context
+ * @param dst   the destination buffer
+ */
+void sph_keccak224_close(void *cc, void *dst);
+
+/**
+ * Add a few additional bits (0 to 7) to the current computation, then
+ * terminate it and output the result in the provided buffer, which must
+ * be wide enough to accomodate the result (28 bytes). If bit number i
+ * in <code>ub</code> has value 2^i, then the extra bits are those
+ * numbered 7 downto 8-n (this is the big-endian convention at the byte
+ * level). The context is automatically reinitialized.
+ *
+ * @param cc    the Keccak-224 context
+ * @param ub    the extra bits
+ * @param n     the number of extra bits (0 to 7)
+ * @param dst   the destination buffer
+ */
+void sph_keccak224_addbits_and_close(void *cc, unsigned ub, unsigned n,
+                                     void *dst);
+
+/**
+ * Initialize a Keccak-256 context. This process performs no memory allocation.
+ *
+ * @param cc   the Keccak-256 context (pointer to a
+ *             <code>sph_keccak256_context</code>)
+ */
+void sph_keccak256_init(void *cc);
+
+/**
+ * Process some data bytes. It is acceptable that <code>len</code> is zero
+ * (in which case this function does nothing).
+ *
+ * @param cc     the Keccak-256 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_keccak256(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current Keccak-256 computation and output the result into
+ * the provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (32 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the Keccak-256 context
+ * @param dst   the destination buffer
+ */
+void sph_keccak256_close(void *cc, void *dst);
+
+/**
+ * Add a few additional bits (0 to 7) to the current computation, then
+ * terminate it and output the result in the provided buffer, which must
+ * be wide enough to accomodate the result (32 bytes). If bit number i
+ * in <code>ub</code> has value 2^i, then the extra bits are those
+ * numbered 7 downto 8-n (this is the big-endian convention at the byte
+ * level). The context is automatically reinitialized.
+ *
+ * @param cc    the Keccak-256 context
+ * @param ub    the extra bits
+ * @param n     the number of extra bits (0 to 7)
+ * @param dst   the destination buffer
+ */
+void sph_keccak256_addbits_and_close(void *cc, unsigned ub, unsigned n,
+                                     void *dst);
+
+/**
+ * Initialize a Keccak-384 context. This process performs no memory allocation.
+ *
+ * @param cc   the Keccak-384 context (pointer to a
+ *             <code>sph_keccak384_context</code>)
+ */
+void sph_keccak384_init(void *cc);
+
+/**
+ * Process some data bytes. It is acceptable that <code>len</code> is zero
+ * (in which case this function does nothing).
+ *
+ * @param cc     the Keccak-384 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_keccak384(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current Keccak-384 computation and output the result into
+ * the provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (48 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the Keccak-384 context
+ * @param dst   the destination buffer
+ */
+void sph_keccak384_close(void *cc, void *dst);
+
+/**
+ * Add a few additional bits (0 to 7) to the current computation, then
+ * terminate it and output the result in the provided buffer, which must
+ * be wide enough to accomodate the result (48 bytes). If bit number i
+ * in <code>ub</code> has value 2^i, then the extra bits are those
+ * numbered 7 downto 8-n (this is the big-endian convention at the byte
+ * level). The context is automatically reinitialized.
+ *
+ * @param cc    the Keccak-384 context
+ * @param ub    the extra bits
+ * @param n     the number of extra bits (0 to 7)
+ * @param dst   the destination buffer
+ */
+void sph_keccak384_addbits_and_close(void *cc, unsigned ub, unsigned n,
+                                     void *dst);
+
+/**
+ * Initialize a Keccak-512 context. This process performs no memory allocation.
+ *
+ * @param cc   the Keccak-512 context (pointer to a
+ *             <code>sph_keccak512_context</code>)
+ */
+void sph_keccak512_init(void *cc);
+
+/**
+ * Process some data bytes. It is acceptable that <code>len</code> is zero
+ * (in which case this function does nothing).
+ *
+ * @param cc     the Keccak-512 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_keccak512(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current Keccak-512 computation and output the result into
+ * the provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (64 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the Keccak-512 context
+ * @param dst   the destination buffer
+ */
+void sph_keccak512_close(void *cc, void *dst);
+
+/**
+ * Add a few additional bits (0 to 7) to the current computation, then
+ * terminate it and output the result in the provided buffer, which must
+ * be wide enough to accomodate the result (64 bytes). If bit number i
+ * in <code>ub</code> has value 2^i, then the extra bits are those
+ * numbered 7 downto 8-n (this is the big-endian convention at the byte
+ * level). The context is automatically reinitialized.
+ *
+ * @param cc    the Keccak-512 context
+ * @param ub    the extra bits
+ * @param n     the number of extra bits (0 to 7)
+ * @param dst   the destination buffer
+ */
+void sph_keccak512_addbits_and_close(void *cc, unsigned ub, unsigned n,
+                                     void *dst);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
--- a/src/crypto/ghostrider/sph_luffa.c
+++ b/src/crypto/ghostrider/sph_luffa.c
--- a/src/crypto/ghostrider/sph_luffa.h
+++ b/src/crypto/ghostrider/sph_luffa.h
@@ -0,0 +1,296 @@
+/* $Id: sph_luffa.h 154 2010-04-26 17:00:24Z tp $ */
+/**
+ * Luffa interface. Luffa is a family of functions which differ by
+ * their output size; this implementation defines Luffa for output
+ * sizes 224, 256, 384 and 512 bits.
+ *
+ * ==========================(LICENSE BEGIN)============================
+ *
+ * Copyright (c) 2007-2010  Projet RNRT SAPHIR
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * ===========================(LICENSE END)=============================
+ *
+ * @file     sph_luffa.h
+ * @author   Thomas Pornin <thomas.pornin@cryptolog.com>
+ */
+
+#ifndef SPH_LUFFA_H__
+#define SPH_LUFFA_H__
+
+#ifdef __cplusplus
+extern "C"{
+#endif
+
+#include <stddef.h>
+#include "sph_types.h"
+
+/**
+ * Output size (in bits) for Luffa-224.
+ */
+#define SPH_SIZE_luffa224   224
+
+/**
+ * Output size (in bits) for Luffa-256.
+ */
+#define SPH_SIZE_luffa256   256
+
+/**
+ * Output size (in bits) for Luffa-384.
+ */
+#define SPH_SIZE_luffa384   384
+
+/**
+ * Output size (in bits) for Luffa-512.
+ */
+#define SPH_SIZE_luffa512   512
+
+/**
+ * This structure is a context for Luffa-224 computations: it contains
+ * the intermediate values and some data from the last entered block.
+ * Once a Luffa computation has been performed, the context can be
+ * reused for another computation.
+ *
+ * The contents of this structure are private. A running Luffa
+ * computation can be cloned by copying the context (e.g. with a simple
+ * <code>memcpy()</code>).
+ */
+typedef struct {
+#ifndef DOXYGEN_IGNORE
+	unsigned char buf[32];    /* first field, for alignment */
+	size_t ptr;
+	sph_u32 V[3][8];
+#endif
+} sph_luffa224_context;
+
+/**
+ * This structure is a context for Luffa-256 computations. It is
+ * identical to <code>sph_luffa224_context</code>.
+ */
+typedef sph_luffa224_context sph_luffa256_context;
+
+/**
+ * This structure is a context for Luffa-384 computations.
+ */
+typedef struct {
+#ifndef DOXYGEN_IGNORE
+	unsigned char buf[32];    /* first field, for alignment */
+	size_t ptr;
+	sph_u32 V[4][8];
+#endif
+} sph_luffa384_context;
+
+/**
+ * This structure is a context for Luffa-512 computations.
+ */
+typedef struct {
+#ifndef DOXYGEN_IGNORE
+	unsigned char buf[32];    /* first field, for alignment */
+	size_t ptr;
+	sph_u32 V[5][8];
+#endif
+} sph_luffa512_context;
+
+/**
+ * Initialize a Luffa-224 context. This process performs no memory allocation.
+ *
+ * @param cc   the Luffa-224 context (pointer to a
+ *             <code>sph_luffa224_context</code>)
+ */
+void sph_luffa224_init(void *cc);
+
+/**
+ * Process some data bytes. It is acceptable that <code>len</code> is zero
+ * (in which case this function does nothing).
+ *
+ * @param cc     the Luffa-224 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_luffa224(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current Luffa-224 computation and output the result into
+ * the provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (28 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the Luffa-224 context
+ * @param dst   the destination buffer
+ */
+void sph_luffa224_close(void *cc, void *dst);
+
+/**
+ * Add a few additional bits (0 to 7) to the current computation, then
+ * terminate it and output the result in the provided buffer, which must
+ * be wide enough to accomodate the result (28 bytes). If bit number i
+ * in <code>ub</code> has value 2^i, then the extra bits are those
+ * numbered 7 downto 8-n (this is the big-endian convention at the byte
+ * level). The context is automatically reinitialized.
+ *
+ * @param cc    the Luffa-224 context
+ * @param ub    the extra bits
+ * @param n     the number of extra bits (0 to 7)
+ * @param dst   the destination buffer
+ */
+void sph_luffa224_addbits_and_close(
+	void *cc, unsigned ub, unsigned n, void *dst);
+
+/**
+ * Initialize a Luffa-256 context. This process performs no memory allocation.
+ *
+ * @param cc   the Luffa-256 context (pointer to a
+ *             <code>sph_luffa256_context</code>)
+ */
+void sph_luffa256_init(void *cc);
+
+/**
+ * Process some data bytes. It is acceptable that <code>len</code> is zero
+ * (in which case this function does nothing).
+ *
+ * @param cc     the Luffa-256 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_luffa256(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current Luffa-256 computation and output the result into
+ * the provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (32 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the Luffa-256 context
+ * @param dst   the destination buffer
+ */
+void sph_luffa256_close(void *cc, void *dst);
+
+/**
+ * Add a few additional bits (0 to 7) to the current computation, then
+ * terminate it and output the result in the provided buffer, which must
+ * be wide enough to accomodate the result (32 bytes). If bit number i
+ * in <code>ub</code> has value 2^i, then the extra bits are those
+ * numbered 7 downto 8-n (this is the big-endian convention at the byte
+ * level). The context is automatically reinitialized.
+ *
+ * @param cc    the Luffa-256 context
+ * @param ub    the extra bits
+ * @param n     the number of extra bits (0 to 7)
+ * @param dst   the destination buffer
+ */
+void sph_luffa256_addbits_and_close(
+	void *cc, unsigned ub, unsigned n, void *dst);
+
+/**
+ * Initialize a Luffa-384 context. This process performs no memory allocation.
+ *
+ * @param cc   the Luffa-384 context (pointer to a
+ *             <code>sph_luffa384_context</code>)
+ */
+void sph_luffa384_init(void *cc);
+
+/**
+ * Process some data bytes. It is acceptable that <code>len</code> is zero
+ * (in which case this function does nothing).
+ *
+ * @param cc     the Luffa-384 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_luffa384(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current Luffa-384 computation and output the result into
+ * the provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (48 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the Luffa-384 context
+ * @param dst   the destination buffer
+ */
+void sph_luffa384_close(void *cc, void *dst);
+
+/**
+ * Add a few additional bits (0 to 7) to the current computation, then
+ * terminate it and output the result in the provided buffer, which must
+ * be wide enough to accomodate the result (48 bytes). If bit number i
+ * in <code>ub</code> has value 2^i, then the extra bits are those
+ * numbered 7 downto 8-n (this is the big-endian convention at the byte
+ * level). The context is automatically reinitialized.
+ *
+ * @param cc    the Luffa-384 context
+ * @param ub    the extra bits
+ * @param n     the number of extra bits (0 to 7)
+ * @param dst   the destination buffer
+ */
+void sph_luffa384_addbits_and_close(
+	void *cc, unsigned ub, unsigned n, void *dst);
+
+/**
+ * Initialize a Luffa-512 context. This process performs no memory allocation.
+ *
+ * @param cc   the Luffa-512 context (pointer to a
+ *             <code>sph_luffa512_context</code>)
+ */
+void sph_luffa512_init(void *cc);
+
+/**
+ * Process some data bytes. It is acceptable that <code>len</code> is zero
+ * (in which case this function does nothing).
+ *
+ * @param cc     the Luffa-512 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_luffa512(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current Luffa-512 computation and output the result into
+ * the provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (64 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the Luffa-512 context
+ * @param dst   the destination buffer
+ */
+void sph_luffa512_close(void *cc, void *dst);
+
+/**
+ * Add a few additional bits (0 to 7) to the current computation, then
+ * terminate it and output the result in the provided buffer, which must
+ * be wide enough to accomodate the result (64 bytes). If bit number i
+ * in <code>ub</code> has value 2^i, then the extra bits are those
+ * numbered 7 downto 8-n (this is the big-endian convention at the byte
+ * level). The context is automatically reinitialized.
+ *
+ * @param cc    the Luffa-512 context
+ * @param ub    the extra bits
+ * @param n     the number of extra bits (0 to 7)
+ * @param dst   the destination buffer
+ */
+void sph_luffa512_addbits_and_close(
+	void *cc, unsigned ub, unsigned n, void *dst);
+	
+#ifdef __cplusplus
+}
+#endif
+	
+#endif
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
XMRig	cd652e2644	v6.16.0	2021-11-26 18:57:07 +07:00
XMRig	6f5ef0fe0f	Merge branch 'dev'	2021-11-26 18:51:53 +07:00
xmrig	01fa968763	Update CHANGELOG.md	2021-11-26 18:50:34 +07:00
xmrig	8e6f3ad99e	Merge pull request #2719 from SChernykh/dev Added GhostRider release notes	2021-11-25 19:21:50 +07:00
SChernykh	b1f2479ec1	Added GhostRider release notes	2021-11-25 13:19:01 +01:00
XMRig	ecceba8ecd	Add GhostRider support for AutoClient.	2021-11-25 17:44:36 +07:00
xmrig	cb5f4a9c17	Merge pull request #2716 from Spudz76/dev-initGR Only initGhostRider() when job is in the family	2021-11-25 09:09:16 +07:00
Tony Butler	3a8ebfdcb6	Only initGhostRider() when job is in the family	2021-11-24 13:04:03 -07:00
xmrig	0dcafeb571	Merge pull request #2715 from SChernykh/dev Benchmark support for GhostRider (offline only)	2021-11-24 22:05:44 +07:00
SChernykh	a1d7ee4c6b	Benchmark support for GhostRider (offline only) Command line: ``` ./xmrig --bench=250K -a gr --rotation 15 ``` Where `rotation` is an integer between 0 and 19 (inclusive).	2021-11-24 15:54:09 +01:00
XMRig	03e70ba2ed	v6.16.0-dev	2021-11-24 19:49:21 +07:00
xmrig	19ef8c5d65	Merge pull request #2714 from SChernykh/gh3 GhostRider: fixed invalid hashes on ARMv8	2021-11-24 19:40:48 +07:00
SChernykh	63baa9e263	GhostRider: fixed invalid hashes on ARMv8	2021-11-24 13:39:55 +01:00
xmrig	1248bd5859	Merge pull request #2713 from SChernykh/gh3 Optimized quad hash for Ryzens	2021-11-24 18:21:01 +07:00
SChernykh	5c951ddb8a	Optimized quad hash for Ryzens	2021-11-24 08:16:41 +01:00
xmrig	4ab0ad928d	Merge pull request #2712 from SChernykh/gh3 GhostRider algorithm (Raptoreum) support	2021-11-24 09:49:34 +07:00
SChernykh	e67eb47796	Faster quad hash for GhostRider algos (Ryzen CPUs)	2021-11-23 22:14:46 +01:00
SChernykh	a6656a8c49	Fixed broken difficulty adjustment on some Raptoreum pools	2021-11-23 18:02:58 +01:00
SChernykh	a903d0a5bd	Fixed compilation error	2021-11-23 08:52:30 +01:00
SChernykh	ceaebfd877	GhostRider algorithm (Raptoreum) support	2021-11-23 08:14:01 +01:00
xmrig	5156ff11a8	Merge pull request #2684 from SChernykh/fix-183 MSR mod: fix for error 183	2021-11-11 17:50:08 +07:00
xmrig	e0143a92a8	Merge pull request #2682 from SChernykh/dev Fix: use cn-heavy optimization only for Vermeer CPUs	2021-11-11 17:49:51 +07:00
SChernykh	f682d9a2e9	MSR mod: fix for error 183 When WinRing0 driver starts, but some other version already created "\\.\WinRing0_1_2_0", it returns error 183 ERROR_ALREADY_EXISTS - "Cannot create a file when that file already exists."	2021-11-11 10:26:38 +01:00
SChernykh	3bece0ff40	Fix: use cn-heavy optimization only for Vermeer CPUs Fixes #2680	2021-11-11 07:57:05 +01:00
XMRig	e6c456a970	v6.15.4-dev	2021-11-02 18:26:44 +07:00
XMRig	923d1d712f	Merge branch 'master' into dev	2021-11-02 18:26:12 +07:00
XMRig	ae8459bd35	v6.15.3	2021-11-01 19:59:05 +07:00
XMRig	3a7be07c62	Merge branch 'dev'	2021-11-01 19:58:30 +07:00
xmrig	e1cc0000c6	Update CHANGELOG.md	2021-11-01 12:27:10 +07:00
xmrig	1210e8e95c	Merge pull request #2644 from Spudz76/dev-fixMemleaks Patch a couple minor leaks	2021-10-25 20:33:10 +07:00
xmrig	a45fbd9cae	Merge pull request #2646 from SChernykh/dev Fix MSVC compilation error	2021-10-25 20:31:12 +07:00
Tony Butler	f6d45f7990	Fix various memory leaks	2021-10-25 04:06:49 -06:00
SChernykh	b9464f993b	Fix MSVC compilation error	2021-10-25 10:26:44 +02:00
xmrig	f8f73b0cd7	Merge pull request #2641 from SChernykh/dev AstroBWT: fixed rare incorrect hashes	2021-10-20 07:24:15 +07:00
SChernykh	df6ab2edd8	AstroBWT: fixed rare incorrect hashes	2021-10-19 19:08:56 +02:00
xmrig	8bf7600154	Merge pull request #2639 from SChernykh/dev AstroBWT even bigger speedup (up to +35%)	2021-10-19 22:50:07 +07:00
SChernykh	a30501956f	AstroBWT even bigger speedup	2021-10-19 17:37:45 +02:00
xmrig	c287a40a20	Merge pull request #2636 from SChernykh/dev AstroBWT speedup (up to +7%)	2021-10-19 07:58:24 +07:00
SChernykh	04f50c24e2	AstroBWT speedup	2021-10-18 18:05:51 +02:00
xmrig	7627b23212	Merge pull request #2614 from Spudz76/dev-fixAppleOpenCL OpenCL fixes for non-AMD platforms	2021-10-13 06:20:53 +07:00
XMRig	e90e7febfb	Merge branch 'StriderDM-merge_mining_tag_fix' into dev	2021-10-13 05:43:27 +07:00
XMRig	733b85a132	Code cleanup.	2021-10-13 05:43:05 +07:00
XMRig	35ba786e63	Merge branch 'merge_mining_tag_fix' of https://github.com/StriderDM/xmrig into StriderDM-merge_mining_tag_fix	2021-10-13 05:33:34 +07:00
David Main	446810a837	fix: expand validation of tx_extra for merge mining tag	2021-10-12 11:17:37 +02:00
Tony Butler	c6a68c3e51	Cap max threads to 4096 with nVidia OpenCL	2021-10-11 04:17:01 -06:00
Tony Butler	ca8bef3ade	Adjust API version logic	2021-10-11 04:17:01 -06:00
Tony Butler	d735caa334	Adjust definitions and replace literal 0x4038	2021-10-11 04:17:01 -06:00
Tony Butler	eb54cc0e0f	Revert amd_bitalign/amd_bfe polyfills	2021-10-11 04:17:01 -06:00
Tony Butler	84c67c37cd	Apply "no-static-without-amd" fixes	2021-10-11 04:17:01 -06:00
Tony Butler	b44f38a362	Attempt repair of cn/r output-array access problem	2021-10-11 04:17:01 -06:00
Tony Butler	8ed4088d0a	Second try at fixing cn/r atomic_inc() call	2021-10-11 04:17:01 -06:00
Tony Butler	cdcea2a4f9	Attempt fix for cn/r on Apple-AMD	2021-10-11 04:17:01 -06:00
Tony Butler	f0d80326ec	Add Ellesmere correctly (still just a Polaris alias)	2021-10-11 04:17:01 -06:00
Tony Butler	cb8fc26cbe	Add every Apple AMD GPU type	2021-10-11 04:17:01 -06:00
Tony Butler	5ec5b5ed00	Possibly fix problem with clGetProgramInfo crash	2021-10-11 04:17:01 -06:00
Tony Butler	67e29c1af1	Readjust OclDevice logic and add OCL_VENDOR_APPLE	2021-10-11 04:17:01 -06:00
xmrig	4bd94a79a4	Merge pull request #2623 from Spudz76/dev-fixWithoutKawpow Fix #2583 compiling without kawpow (string ref is nonexistent then)	2021-10-11 16:41:44 +07:00
Tony Butler	80e597d951	Fix #2583 compiling without kawpow (string ref is nonexistent then)	2021-10-11 03:31:28 -06:00
XMRig	2e269f5b8c	v6.15.3-dev	2021-10-06 02:01:29 +07:00
XMRig	57b8e35903	Merge branch 'master' into dev	2021-10-06 02:00:49 +07:00
XMRig	53be5765e6	v6.15.2	2021-10-05 23:28:29 +07:00
XMRig	68741c925b	Merge branch 'dev'	2021-10-05 23:28:06 +07:00
xmrig	9ce207e667	Update CHANGELOG.md	2021-10-05 22:24:58 +07:00
XMRig	07e0966517	Added "--versions" alias.	2021-10-05 21:49:03 +07:00
XMRig	a9d4c2a923	Removed uv_os_gethostname call for all OS.	2021-09-28 23:56:33 +07:00
xmrig	dc02e1feaa	Merge pull request #2606 from SChernykh/dev Fix: AstroBWT auto-config ignored max-threads-hint	2021-09-26 18:51:47 +07:00
SChernykh	7daff331dc	Fix: AstroBWT auto-config ignored max-threads-hint	2021-09-26 12:22:58 +02:00
XMRig	058a2fb0f4	v6.15.2-dev	2021-09-22 19:13:07 +07:00
XMRig	4fff3b946e	Merge branch 'master' into dev	2021-09-22 19:12:38 +07:00
XMRig	f7aa5e781b	v6.15.1	2021-09-22 13:08:00 +07:00
XMRig	298c5cccfa	Merge branch 'dev'	2021-09-22 13:05:36 +07:00
xmrig	2985571620	Update CHANGELOG.md	2021-09-21 18:59:47 +07:00
xmrig	279d29cd7f	Merge pull request #2594 from SChernykh/dev Added Windows taskbar icon colors	2021-09-20 23:07:00 +07:00
SChernykh	387320ad6d	Added Windows taskbar icon colors - Red when there's no connection to any pool - Yellow when mining is paused - No color during normal mining	2021-09-20 18:03:22 +02:00
XMRig	76cd83edb2	Merge branch 'Spudz76-dev-fixAsteriskProfiling' into dev	2021-09-20 20:56:11 +07:00
XMRig	7f4d667351	Remove unnecessary string.	2021-09-20 20:53:36 +07:00
Tony Butler	8027716264	Fix --threads generates "*" profile without "kawpow":false to negate it.	2021-09-20 06:49:17 -06:00
xmrig	a459dd7741	Merge pull request #2591 from Spudz76/dev-fixCompileNoRX Fix compile warning/crash when WITH_RANDOMX=OFF	2021-09-20 10:50:00 +07:00
Tony Butler	ef6011ac12	Fix compile warning when WITH_RANDOMX=OFF	2021-09-19 18:12:46 -06:00
xmrig	6d66051d92	Merge pull request #2586 from SChernykh/dev Fixed Windows 7 compatibility	2021-09-17 17:11:09 +07:00
SChernykh	b2cc2ef0d7	Fixed Windows 7 compatibility Fixes #2585	2021-09-17 12:05:37 +02:00
xmrig	9805320517	Merge pull request #2582 from Spudz76/dev-fixupRXnaming Fixup RandomX naming consistency	2021-09-17 08:03:03 +07:00
Tony Butler	582d17bb84	Fixup RandomX naming consistency	2021-09-16 08:24:37 -06:00
XMRig	9e5f5b35a6	v6.15.1-dev	2021-08-31 18:57:08 +07:00
XMRig	9a9c69ff50	Merge branch 'master' into dev	2021-08-31 18:56:31 +07:00