mirror of
https://github.com/xmrig/xmrig.git
synced 2025-12-25 13:42:54 -05:00
Cleanup MoneroOcean patchset
This commit is contained in:
@@ -90,17 +90,17 @@ public:
|
||||
case Algorithm::CN_DOUBLE:
|
||||
return CN_ITER * 2;
|
||||
|
||||
# ifdef XMRIG_ALGO_CN_GPU
|
||||
case Algorithm::CN_GPU:
|
||||
return 0xC000;
|
||||
# endif
|
||||
|
||||
# ifdef XMRIG_ALGO_CN_PICO
|
||||
case Algorithm::CN_PICO_0:
|
||||
case Algorithm::CN_PICO_TLO:
|
||||
return CN_ITER / 8;
|
||||
# endif
|
||||
|
||||
# ifdef XMRIG_ALGO_CN_GPU
|
||||
case Algorithm::CN_GPU:
|
||||
return 0xC000;
|
||||
# endif
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@@ -110,18 +110,18 @@ public:
|
||||
|
||||
inline static uint32_t mask(Algorithm::Id algo)
|
||||
{
|
||||
# ifdef XMRIG_ALGO_CN_GPU
|
||||
if (algo == Algorithm::CN_GPU) {
|
||||
return 0x1FFFC0;
|
||||
}
|
||||
# endif
|
||||
|
||||
# ifdef XMRIG_ALGO_CN_PICO
|
||||
if (algo == Algorithm::CN_PICO_0) {
|
||||
return 0x1FFF0;
|
||||
}
|
||||
# endif
|
||||
|
||||
# ifdef XMRIG_ALGO_CN_GPU
|
||||
if (algo == Algorithm::CN_GPU) {
|
||||
return 0x1FFFC0;
|
||||
}
|
||||
# endif
|
||||
|
||||
return ((memory(algo) - 1) / 16) * 16;
|
||||
}
|
||||
|
||||
@@ -205,10 +205,10 @@ template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_XAO>::iterations() con
|
||||
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_DOUBLE>::iterations() const { return CN_ITER * 2; }
|
||||
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_RWZ>::iterations() const { return 0x60000; }
|
||||
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_ZLS>::iterations() const { return 0x60000; }
|
||||
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_GPU>::iterations() const { return 0xC000; }
|
||||
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_PICO_0>::iterations() const { return CN_ITER / 8; }
|
||||
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_PICO_TLO>::iterations() const { return CN_ITER / 8; }
|
||||
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_CCX>::iterations() const { return CN_ITER / 2; }
|
||||
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_GPU>::iterations() const { return 0xC000; }
|
||||
|
||||
|
||||
template<> constexpr inline size_t CnAlgo<Algorithm::CN_LITE_0>::memory() const { return CN_MEMORY / 2; }
|
||||
@@ -220,8 +220,8 @@ template<> constexpr inline size_t CnAlgo<Algorithm::CN_PICO_0>::memory() const
|
||||
template<> constexpr inline size_t CnAlgo<Algorithm::CN_PICO_TLO>::memory() const { return CN_MEMORY / 8; }
|
||||
|
||||
|
||||
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_GPU>::mask() const { return 0x1FFFC0; }
|
||||
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_PICO_0>::mask() const { return 0x1FFF0; }
|
||||
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_GPU>::mask() const { return 0x1FFFC0; }
|
||||
|
||||
|
||||
} /* namespace xmrig */
|
||||
|
||||
@@ -252,11 +252,6 @@ xmrig::CnHash::CnHash()
|
||||
ADD_FN_ASM(Algorithm::CN_ZLS);
|
||||
ADD_FN_ASM(Algorithm::CN_DOUBLE);
|
||||
|
||||
# ifdef XMRIG_ALGO_CN_GPU
|
||||
m_map[Algorithm::CN_GPU][AV_SINGLE][Assembly::NONE] = cryptonight_single_hash_gpu<Algorithm::CN_GPU, false>;
|
||||
m_map[Algorithm::CN_GPU][AV_SINGLE_SOFT][Assembly::NONE] = cryptonight_single_hash_gpu<Algorithm::CN_GPU, true>;
|
||||
# endif
|
||||
|
||||
# ifdef XMRIG_ALGO_CN_LITE
|
||||
ADD_FN(Algorithm::CN_LITE_0);
|
||||
ADD_FN(Algorithm::CN_LITE_1);
|
||||
@@ -289,6 +284,11 @@ xmrig::CnHash::CnHash()
|
||||
m_map[Algorithm::ASTROBWT_DERO][AV_SINGLE_SOFT][Assembly::NONE] = astrobwt::single_hash<Algorithm::ASTROBWT_DERO>;
|
||||
# endif
|
||||
|
||||
# ifdef XMRIG_ALGO_CN_GPU
|
||||
m_map[Algorithm::CN_GPU][AV_SINGLE][Assembly::NONE] = cryptonight_single_hash_gpu<Algorithm::CN_GPU, false>;
|
||||
m_map[Algorithm::CN_GPU][AV_SINGLE_SOFT][Assembly::NONE] = cryptonight_single_hash_gpu<Algorithm::CN_GPU, true>;
|
||||
# endif
|
||||
|
||||
# ifdef XMRIG_FEATURE_ASM
|
||||
patchAsmVariants();
|
||||
# endif
|
||||
|
||||
@@ -45,11 +45,11 @@ inline void prep_dv_avx(__m256i* idx, __m256i& v, __m256& n01)
|
||||
n01 = _mm256_cvtepi32_ps(v);
|
||||
}
|
||||
|
||||
inline __m256 fma_break(const __m256& x)
|
||||
{
|
||||
// Break the dependency chain by setitng the exp to ?????01
|
||||
__m256 xx = _mm256_and_ps(_mm256_castsi256_ps(_mm256_set1_epi32(0xFEFFFFFF)), x);
|
||||
return _mm256_or_ps(_mm256_castsi256_ps(_mm256_set1_epi32(0x00800000)), xx);
|
||||
inline __m256 fma_break(const __m256& x)
|
||||
{
|
||||
// Break the dependency chain by setting the exp to ?????01
|
||||
__m256 xx = _mm256_and_ps(_mm256_castsi256_ps(_mm256_set1_epi32(0xFEFFFFFF)), x);
|
||||
return _mm256_or_ps(_mm256_castsi256_ps(_mm256_set1_epi32(0x00800000)), xx);
|
||||
}
|
||||
|
||||
// 14
|
||||
@@ -151,13 +151,13 @@ void cn_gpu_inner_avx(const uint8_t* spad, uint8_t* lpad)
|
||||
__m256 n01, n23;
|
||||
prep_dv_avx(idx0, v01, n01);
|
||||
prep_dv_avx(idx2, v23, n23);
|
||||
|
||||
|
||||
__m256i out, out2;
|
||||
__m256 n10, n22, n33;
|
||||
n10 = _mm256_permute2f128_ps(n01, n01, 0x01);
|
||||
n22 = _mm256_permute2f128_ps(n23, n23, 0x00);
|
||||
n33 = _mm256_permute2f128_ps(n23, n23, 0x11);
|
||||
|
||||
|
||||
out = _mm256_setzero_si256();
|
||||
double_compute_wrap<0>(n01, n10, n22, n33, 1.3437500f, 1.4296875f, rc, suma, out);
|
||||
double_compute_wrap<1>(n01, n22, n33, n10, 1.2812500f, 1.3984375f, rc, suma, out);
|
||||
@@ -166,7 +166,7 @@ void cn_gpu_inner_avx(const uint8_t* spad, uint8_t* lpad)
|
||||
_mm256_store_si256(idx0, _mm256_xor_si256(v01, out));
|
||||
sum0 = _mm256_add_ps(suma, sumb);
|
||||
out2 = out;
|
||||
|
||||
|
||||
__m256 n11, n02, n30;
|
||||
n11 = _mm256_permute2f128_ps(n01, n01, 0x11);
|
||||
n02 = _mm256_permute2f128_ps(n01, n23, 0x20);
|
||||
@@ -191,7 +191,7 @@ void cn_gpu_inner_avx(const uint8_t* spad, uint8_t* lpad)
|
||||
__m128 sum = _mm256_castps256_ps128(sum0);
|
||||
|
||||
sum = _mm_and_ps(_mm_castsi128_ps(_mm_set1_epi32(0x7fffffff)), sum); // take abs(va) by masking the float sign bit
|
||||
// vs range 0 - 64
|
||||
// vs range 0 - 64
|
||||
__m128i v0 = _mm_cvttps_epi32(_mm_mul_ps(sum, _mm_set1_ps(16777216.0f)));
|
||||
v0 = _mm_xor_si128(v0, _mm256_castsi256_si128(out2));
|
||||
__m128i v1 = _mm_shuffle_epi32(v0, _MM_SHUFFLE(0, 1, 2, 3));
|
||||
|
||||
@@ -39,11 +39,11 @@ inline void prep_dv(__m128i* idx, __m128i& v, __m128& n)
|
||||
n = _mm_cvtepi32_ps(v);
|
||||
}
|
||||
|
||||
inline __m128 fma_break(__m128 x)
|
||||
{
|
||||
// Break the dependency chain by setitng the exp to ?????01
|
||||
x = _mm_and_ps(_mm_castsi128_ps(_mm_set1_epi32(0xFEFFFFFF)), x);
|
||||
return _mm_or_ps(_mm_castsi128_ps(_mm_set1_epi32(0x00800000)), x);
|
||||
inline __m128 fma_break(__m128 x)
|
||||
{
|
||||
// Break the dependency chain by setting the exp to ?????01
|
||||
x = _mm_and_ps(_mm_castsi128_ps(_mm_set1_epi32(0xFEFFFFFF)), x);
|
||||
return _mm_or_ps(_mm_castsi128_ps(_mm_set1_epi32(0x00800000)), x);
|
||||
}
|
||||
|
||||
// 14
|
||||
@@ -136,13 +136,13 @@ void cn_gpu_inner_ssse3(const uint8_t* spad, uint8_t* lpad)
|
||||
__m128i* idx2 = scratchpad_ptr<MASK>(lpad, s, 2);
|
||||
__m128i* idx3 = scratchpad_ptr<MASK>(lpad, s, 3);
|
||||
__m128 sum0 = _mm_setzero_ps();
|
||||
|
||||
|
||||
for(size_t i = 0; i < ITER; i++)
|
||||
{
|
||||
__m128 n0, n1, n2, n3;
|
||||
__m128i v0, v1, v2, v3;
|
||||
__m128 suma, sumb, sum1, sum2, sum3;
|
||||
|
||||
|
||||
prep_dv(idx0, v0, n0);
|
||||
prep_dv(idx1, v1, n1);
|
||||
prep_dv(idx2, v2, n2);
|
||||
@@ -158,7 +158,7 @@ void cn_gpu_inner_ssse3(const uint8_t* spad, uint8_t* lpad)
|
||||
sum0 = _mm_add_ps(suma, sumb);
|
||||
_mm_store_si128(idx0, _mm_xor_si128(v0, out));
|
||||
out2 = out;
|
||||
|
||||
|
||||
out = _mm_setzero_si128();
|
||||
single_compute_wrap<0>(n1, n0, n2, n3, 1.4296875f, rc, suma, out);
|
||||
single_compute_wrap<1>(n1, n2, n3, n0, 1.3984375f, rc, suma, out);
|
||||
@@ -190,7 +190,7 @@ void cn_gpu_inner_ssse3(const uint8_t* spad, uint8_t* lpad)
|
||||
sum0 = _mm_add_ps(sum0, sum2);
|
||||
|
||||
sum0 = _mm_and_ps(_mm_castsi128_ps(_mm_set1_epi32(0x7fffffff)), sum0); // take abs(va) by masking the float sign bit
|
||||
// vs range 0 - 64
|
||||
// vs range 0 - 64
|
||||
n0 = _mm_mul_ps(sum0, _mm_set1_ps(16777216.0f));
|
||||
v0 = _mm_cvttps_epi32(n0);
|
||||
v0 = _mm_xor_si128(v0, out2);
|
||||
|
||||
Reference in New Issue
Block a user