1
0
mirror of https://github.com/xmrig/xmrig.git synced 2025-12-09 08:42:40 -05:00

Compare commits

...

5 Commits

Author SHA1 Message Date
Tony Butler
3ef47dbb18 Merge 753e63cd96 into fee51b20fa 2023-10-21 13:30:07 -07:00
xmrig
fee51b20fa Merge pull request #3346 from SChernykh/dev
ARM64 JIT: don't use `x18` register
2023-10-20 07:36:12 +07:00
SChernykh
5e66efabcf ARM64 JIT: don't use x18 register
From https://developer.apple.com/documentation/xcode/writing-arm64-code-for-apple-platforms
> The platforms reserve register x18. Don’t use this register.

This PR fixes invalid hashes when running on Apple silicon with the latest macOS SDK.
2023-10-19 17:45:15 +02:00
Tony Butler
753e63cd96 Update/Improve API docs 2023-07-12 02:06:53 -06:00
Tony Butler
f42b3e83a7 Cleanup API code 2023-07-12 02:06:53 -06:00
11 changed files with 464 additions and 110 deletions

View File

@@ -4,7 +4,7 @@ If you want use HTTP API you need enable it (`"enabled": true,`) then choice `po
Offical HTTP client for API: http://workers.xmrig.info/
Example configuration:
Example configuration, used in Curl examples below:
```json
"api": {
@@ -12,11 +12,11 @@ Example configuration:
"worker-id": null,
},
"http": {
"enabled": false,
"enabled": true,
"host": "127.0.0.1",
"port": 0,
"access-token": null,
"restricted": true
"port": 44444,
"access-token": "SECRET",
"restricted": false
}
```
@@ -37,30 +37,78 @@ Versions before 2.15 was use another options for API https://github.com/xmrig/xm
## Endpoints
### GET /1/summary
### APIVersion 2
Get miner summary information. [Example](api/1/summary.json).
#### GET /2/summary
### GET /1/threads
Get miner summary information. [Example](api/2/summary.json).
Get detailed information about miner threads. [Example](api/1/threads.json).
#### GET /2/backends
Get detailed information about miner backends. [Example](api/2/backends.json).
### APIVersion 1 (deprecated)
#### GET /1/summary
Get miner summary information. Currently identical to `GET /2/summary`
#### GET /1/threads
**REMOVED** Get detailed information about miner threads. [Example](api/1/threads.json).
Functionally replaced by `GET /2/backends` which contains a `threads` item per backend.
### APIVersion 0 (deprecated)
#### GET /api.json
Get miner summary information. Currently identical to `GET /2/summary`
## Restricted endpoints
All API endpoints below allow access to sensitive information and remote configure miner. You should set `access-token` and allow unrestricted access (`"restricted": false`).
### GET /1/config
### JSON-RPC Interface
Get current miner configuration. [Example](api/1/config.json).
#### POST /json_rpc
Control miner with JSON-RPC. Methods: `pause`, `resume`, `stop`, `start`
### PUT /1/config
Curl example:
```
curl -v --data "{\"method\":\"pause\",\"id\":1}" -H "Content-Type: application/json" -H "Authorization: Bearer SECRET" http://127.0.0.1:44444/json_rpc
```
### APIVersion 2
#### GET /2/config
Get current miner configuration. [Example](api/2/config.json).
#### PUT /2/config
Update current miner configuration. Common use case, get current configuration, make changes, and upload it to miner.
Curl example:
```
curl -v --data-binary @config.json -X PUT -H "Content-Type: application/json" -H "Authorization: Bearer SECRET" http://127.0.0.1:44444/1/config
...GET current config...
curl -v -H "Content-Type: application/json" -H "Authorization: Bearer SECRET" http://127.0.0.1:44444/2/config > config.json
...make changes...
vim config.json
...PUT changed config...
curl -v --data-binary @config.json -X PUT -H "Content-Type: application/json" -H "Authorization: Bearer SECRET" http://127.0.0.1:44444/2/config
```
### APIVersion 1 (deprecated)
#### GET /1/config
Get current miner configuration. Currently identical to `GET /2/config`
#### PUT /1/config
Update current miner configuration. Currently identical to `PUT /2/config`

78
doc/api/2/backends.json Normal file
View File

@@ -0,0 +1,78 @@
[
{
"type": "cpu",
"enabled": true,
"algo": "rx/0",
"profile": "rx",
"hw-aes": true,
"priority": -1,
"msr": true,
"asm": "intel",
"argon2-impl": "AVX2",
"hugepages": [6, 6],
"memory": 6291456,
"hashrate": [1235.78, 1228.89, null],
"threads": [
{
"intensity": 1,
"affinity": 0,
"av": 1,
"hashrate": [409.75, 406.58, null]
},
{
"intensity": 1,
"affinity": 2,
"av": 1,
"hashrate": [412.9, 411.33, null]
},
{
"intensity": 1,
"affinity": 4,
"av": 1,
"hashrate": [413.11, 410.98, null]
}
]
},
{
"type": "cuda",
"enabled": true,
"algo": "cn-heavy/xhv",
"profile": "cn-heavy/xhv",
"versions": {
"cuda-runtime": "11.6",
"cuda-driver": "11.6",
"plugin": "6.17.1-dev",
"nvml": "11.512.15",
"driver": "512.15"
},
"hashrate": [247.02, 247.34, null],
"threads": [
{
"index": 0,
"threads": 32,
"blocks": 38,
"bfactor": 6,
"bsleep": 25,
"affinity": -1,
"cclock": 0,
"mclock": 0,
"dataset_host": false,
"hashrate": [246.77, 247.26, null],
"name": "NVIDIA GeForce GTX 970",
"bus_id": "01:00.0",
"smx": 13,
"arch": 52,
"global_mem": 4294836224,
"clock": 1177,
"memory_clock": 3666,
"health": {
"temperature": 69,
"power": 161,
"clock": 1328,
"mem_clock": 3662,
"fan_speed": [100]
}
}
]
}
]

136
doc/api/2/config.json Normal file
View File

@@ -0,0 +1,136 @@
{
"api": {
"id": null,
"worker-id": null
},
"http": {
"enabled": true,
"host": "127.0.0.1",
"port": 44444,
"access-token": "SECRET",
"restricted": false
},
"autosave": true,
"background": false,
"colors": true,
"title": true,
"randomx": {
"init": -1,
"init-avx2": -1,
"mode": "auto",
"1gb-pages": true,
"rdmsr": true,
"wrmsr": true,
"cache_qos": false,
"numa": true,
"scratchpad_prefetch_mode": 1
},
"cpu": {
"enabled": true,
"huge-pages": true,
"huge-pages-jit": true,
"hw-aes": null,
"priority": null,
"memory-pool": true,
"yield": true,
"asm": true,
"argon2-impl": null,
"argon2": [0, 2, 4, 6, 5, 7],
"astrobwt/v2": [1, 2, 3, 4, 5, 6, 7],
"cn": [
[1, 0],
[1, 2],
[1, 4]
],
"cn-heavy": [
[1, 0],
[1, 2]
],
"cn-lite": [
[1, 0],
[1, 2],
[1, 4],
[1, 6],
[1, 5],
[1, 7]
],
"cn-pico": [
[2, 1],
[2, 2],
[2, 3],
[2, 4],
[2, 5],
[2, 6],
[2, 7]
],
"cn/2": [
[1, 0],
[1, 2],
[1, 4]
],
"cn/upx2": [
[2, 1],
[2, 2],
[2, 3],
[2, 4],
[2, 5],
[2, 6],
[2, 7]
],
"ghostrider": [
[8, 0],
[8, 2],
[8, 4]
],
"rx": [0, 2, 4],
"rx/arq": [1, 2, 3, 4, 5, 6, 7],
"rx/keva": [0, 2, 4, 6, 5, 7],
"rx/wow": [0, 2, 4, 6, 5, 7],
"cn-lite/0": false,
"cn/0": "cn"
},
"log-file": null,
"donate-level": 0,
"donate-over-proxy": 0,
"pools": [
{
"algo": null,
"coin": null,
"url": "some.pool:10064",
"user": "4blahblahblahblahblahblahblahblahblahblahblahblahblahblahblahblahblahblahblahblahblahblahblahbl",
"pass": "x",
"rig-id": null,
"nicehash": false,
"keepalive": true,
"enabled": true,
"tls": false,
"wss": false,
"daemon": false,
"socks5": null,
"self-select": null,
"submit-to-origin": false
}
],
"retries": 5,
"retry-pause": 5,
"print-time": 64,
"syslog": false,
"tls": {
"enabled": false,
"protocols": null,
"cert": null,
"cert_key": null,
"ciphers": null,
"ciphersuites": null,
"dhparam": null
},
"dns": {
"ipv6": false,
"ttl": 30
},
"user-agent": null,
"verbose": 1,
"watch": true,
"pause-on-battery": false,
"pause-on-active": false
}

76
doc/api/2/summary.json Normal file
View File

@@ -0,0 +1,76 @@
{
"id": "51aca77da137cb62",
"worker_id": "tpad",
"uptime": 106,
"restricted": false,
"resources": {
"memory": {
"free": 4977348608,
"total": 16659546112,
"resident_set_memory": 16441344
},
"load_average": [3.4, 2.7, 2.44],
"hardware_concurrency": 8
},
"features": ["api", "asm", "http", "hwloc", "tls"],
"results": {
"diff_current": 37638,
"shares_good": 3,
"shares_total": 3,
"avg_time": 35,
"avg_time_ms": 35603,
"hashes_total": 165638,
"best": [358821, 344438, 73371, 0, 0, 0, 0, 0, 0, 0]
},
"algo": "rx/0",
"connection": {
"pool": "some.pool:20064",
"ip": "127.1.2.3",
"uptime": 106,
"uptime_ms": 106811,
"ping": 405,
"failures": 0,
"tls": "TLSv1.3",
"tls-fingerprint": null,
"algo": "rx/0",
"diff": 37638,
"accepted": 3,
"rejected": 0,
"avg_time": 35,
"avg_time_ms": 35603,
"hashes_total": 165638
},
"version": "6.17.1-dev",
"kind": "miner",
"ua": "XMRig/6.17.1-dev (Linux x86_64) libuv/1.43.0 gcc/9.4.0",
"cpu": {
"brand": "Intel(R) Core(TM) i7-4700MQ CPU @ 2.40GHz",
"family": 6,
"model": 60,
"stepping": 3,
"proc_info": 198339,
"aes": true,
"avx2": true,
"x64": true,
"64_bit": true,
"l2": 1048576,
"l3": 6291456,
"cores": 4,
"threads": 8,
"packages": 1,
"nodes": 1,
"backend": "hwloc/2.7.0",
"msr": "intel",
"assembly": "intel",
"arch": "x86_64",
"flags": ["aes", "avx", "avx2", "bmi2", "osxsave", "pdpe1gb", "sse2", "ssse3", "sse4.1", "popcnt"]
},
"donate_level": 0,
"paused": false,
"algorithms": ["cn/0", "cn/1", "cn/2", "cn/r", "cn/fast", "cn/half", "cn/xao", "cn/rto", "cn/rwz", "cn/zls", "cn/double", "cn/ccx", "cn-lite/1", "cn-heavy/0", "cn-heavy/tube", "cn-heavy/xhv", "cn-pico", "cn-pico/tlo", "cn/upx2", "rx/0", "rx/wow", "rx/arq", "rx/graft", "rx/sfx", "rx/keva", "argon2/chukwa", "argon2/chukwav2", "argon2/ninja", "astrobwt/v2", "ghostrider"],
"hashrate": {
"total": [1207.19, 1210.82, null],
"highest": 1316.85
},
"hugepages": [6, 6]
}

View File

@@ -52,6 +52,8 @@ public:
enum RequestType {
REQ_UNKNOWN,
REQ_SUMMARY,
REQ_BACKENDS,
REQ_CONFIG,
REQ_JSON_RPC
};

View File

@@ -60,7 +60,7 @@ protected:
inline Source source() const override { return m_source; }
inline void done(int) override { m_state = STATE_DONE; }
int m_version = 1;
int m_version = 0;
RequestType m_type = REQ_UNKNOWN;
State m_state = STATE_NEW;
String m_rpcMethod;

View File

@@ -67,10 +67,33 @@ xmrig::HttpApiRequest::HttpApiRequest(const HttpData &req, bool restricted) :
m_res(req.id()),
m_url(req.url.c_str())
{
if (method() == METHOD_GET) {
if (url() == "/1/summary" || url() == "/2/summary" || url() == "/api.json") {
m_type = REQ_SUMMARY;
if (url().size() > 4 && memcmp(url().data(), "/", 1) == 0 && memcmp(url().data()+2, "/", 1) == 0) {
if (memcmp(url().data(), "/2/", 3) == 0) {
m_version = 2;
} else if (memcmp(url().data(), "/1/", 3) == 0) {
m_version = 1;
}
switch (url().size()) {
case 9:
if (memcmp(url().data()+3, "config", 6) == 0) {
m_type = REQ_CONFIG;
}
break;
case 10:
if (memcmp(url().data()+3, "summary", 7) == 0) {
m_type = REQ_SUMMARY;
}
break;
case 11:
if (memcmp(url().data()+3, "backends", 8) == 0) {
m_type = REQ_BACKENDS;
}
break;
}
}
if (url() == "/api.json") {
m_type = REQ_SUMMARY;
}
if (method() == METHOD_POST && url() == "/json_rpc") {
@@ -94,12 +117,6 @@ xmrig::HttpApiRequest::HttpApiRequest(const HttpData &req, bool restricted) :
return;
}
if (url().size() > 4) {
if (memcmp(url().data(), "/2/", 3) == 0) {
m_version = 2;
}
}
}

View File

@@ -45,13 +45,6 @@
#ifdef XMRIG_FEATURE_API
# include "base/api/Api.h"
# include "base/api/interfaces/IApiRequest.h"
namespace xmrig {
static const char *kConfigPathV1 = "/1/config";
static const char *kConfigPathV2 = "/2/config";
} // namespace xmrig
#endif
@@ -317,7 +310,7 @@ void xmrig::Base::onFileChanged(const String &fileName)
void xmrig::Base::onRequest(IApiRequest &request)
{
if (request.method() == IApiRequest::METHOD_GET) {
if (request.url() == kConfigPathV1 || request.url() == kConfigPathV2) {
if (request.type() == IApiRequest::REQ_CONFIG) {
if (request.isRestricted()) {
return request.done(403);
}
@@ -327,7 +320,7 @@ void xmrig::Base::onRequest(IApiRequest &request)
}
}
else if (request.method() == IApiRequest::METHOD_PUT || request.method() == IApiRequest::METHOD_POST) {
if (request.url() == kConfigPathV1 || request.url() == kConfigPathV2) {
if (request.type() == IApiRequest::REQ_CONFIG) {
request.accept();
if (!reload(request.json())) {

View File

@@ -689,7 +689,7 @@ void xmrig::Miner::onRequest(IApiRequest &request)
d_ptr->getMiner(request.reply(), request.doc(), request.version());
d_ptr->getHashrate(request.reply(), request.doc(), request.version());
}
else if (request.url() == "/2/backends") {
else if (request.type() == IApiRequest::REQ_BACKENDS && request.version() == 2) {
request.accept();
d_ptr->getBackends(request.reply(), request.doc());
@@ -711,6 +711,12 @@ void xmrig::Miner::onRequest(IApiRequest &request)
stop();
}
else if (request.rpcMethod() == "start") {
request.accept();
const auto config = d_ptr->controller->config();
onConfigChanged(config, config);
}
}
for (IBackend *backend : d_ptr->backends) {

View File

@@ -131,8 +131,8 @@ void JitCompilerA64::generateProgram(Program& program, ProgramConfiguration& con
// and w16, w10, ScratchpadL3Mask64
emit32(0x121A0000 | 16 | (10 << 5) | ((RandomX_CurrentConfig.Log2_ScratchpadL3 - 7) << 10), code, codePos);
// and w17, w18, ScratchpadL3Mask64
emit32(0x121A0000 | 17 | (18 << 5) | ((RandomX_CurrentConfig.Log2_ScratchpadL3 - 7) << 10), code, codePos);
// and w17, w20, ScratchpadL3Mask64
emit32(0x121A0000 | 17 | (20 << 5) | ((RandomX_CurrentConfig.Log2_ScratchpadL3 - 7) << 10), code, codePos);
codePos = PrologueSize;
literalPos = ImulRcpLiteralsEnd;
@@ -148,16 +148,16 @@ void JitCompilerA64::generateProgram(Program& program, ProgramConfiguration& con
}
// Update spMix2
// eor w18, config.readReg2, config.readReg3
emit32(ARMV8A::EOR32 | 18 | (IntRegMap[config.readReg2] << 5) | (IntRegMap[config.readReg3] << 16), code, codePos);
// eor w20, config.readReg2, config.readReg3
emit32(ARMV8A::EOR32 | 20 | (IntRegMap[config.readReg2] << 5) | (IntRegMap[config.readReg3] << 16), code, codePos);
// Jump back to the main loop
const uint32_t offset = (((uint8_t*)randomx_program_aarch64_vm_instructions_end) - ((uint8_t*)randomx_program_aarch64)) - codePos;
emit32(ARMV8A::B | (offset / 4), code, codePos);
// and w18, w18, CacheLineAlignMask
// and w20, w20, CacheLineAlignMask
codePos = (((uint8_t*)randomx_program_aarch64_cacheline_align_mask1) - ((uint8_t*)randomx_program_aarch64));
emit32(0x121A0000 | 18 | (18 << 5) | ((RandomX_CurrentConfig.Log2_DatasetBaseSize - 7) << 10), code, codePos);
emit32(0x121A0000 | 20 | (20 << 5) | ((RandomX_CurrentConfig.Log2_DatasetBaseSize - 7) << 10), code, codePos);
// and w10, w10, CacheLineAlignMask
codePos = (((uint8_t*)randomx_program_aarch64_cacheline_align_mask2) - ((uint8_t*)randomx_program_aarch64));
@@ -189,8 +189,8 @@ void JitCompilerA64::generateProgramLight(Program& program, ProgramConfiguration
// and w16, w10, ScratchpadL3Mask64
emit32(0x121A0000 | 16 | (10 << 5) | ((RandomX_CurrentConfig.Log2_ScratchpadL3 - 7) << 10), code, codePos);
// and w17, w18, ScratchpadL3Mask64
emit32(0x121A0000 | 17 | (18 << 5) | ((RandomX_CurrentConfig.Log2_ScratchpadL3 - 7) << 10), code, codePos);
// and w17, w20, ScratchpadL3Mask64
emit32(0x121A0000 | 17 | (20 << 5) | ((RandomX_CurrentConfig.Log2_ScratchpadL3 - 7) << 10), code, codePos);
codePos = PrologueSize;
literalPos = ImulRcpLiteralsEnd;
@@ -206,8 +206,8 @@ void JitCompilerA64::generateProgramLight(Program& program, ProgramConfiguration
}
// Update spMix2
// eor w18, config.readReg2, config.readReg3
emit32(ARMV8A::EOR32 | 18 | (IntRegMap[config.readReg2] << 5) | (IntRegMap[config.readReg3] << 16), code, codePos);
// eor w20, config.readReg2, config.readReg3
emit32(ARMV8A::EOR32 | 20 | (IntRegMap[config.readReg2] << 5) | (IntRegMap[config.readReg3] << 16), code, codePos);
// Jump back to the main loop
const uint32_t offset = (((uint8_t*)randomx_program_aarch64_vm_instructions_end_light) - ((uint8_t*)randomx_program_aarch64)) - codePos;
@@ -477,7 +477,7 @@ void JitCompilerA64::emitAddImmediate(uint32_t dst, uint32_t src, uint32_t imm,
}
else
{
constexpr uint32_t tmp_reg = 18;
constexpr uint32_t tmp_reg = 20;
emitMovImmediate(tmp_reg, imm, code, k);
// add dst, src, tmp_reg
@@ -526,7 +526,7 @@ void JitCompilerA64::emitMemLoadFP(uint32_t src, Instruction& instr, uint8_t* co
uint32_t k = codePos;
uint32_t imm = instr.getImm32();
constexpr uint32_t tmp_reg = 18;
constexpr uint32_t tmp_reg = 19;
imm &= instr.getModMem() ? (RandomX_CurrentConfig.ScratchpadL1_Size - 1) : (RandomX_CurrentConfig.ScratchpadL2_Size - 1);
emitAddImmediate(tmp_reg, src, imm, code, k);
@@ -580,7 +580,7 @@ void JitCompilerA64::h_IADD_M(Instruction& instr, uint32_t& codePos)
const uint32_t src = IntRegMap[instr.src];
const uint32_t dst = IntRegMap[instr.dst];
constexpr uint32_t tmp_reg = 18;
constexpr uint32_t tmp_reg = 20;
emitMemLoad<tmp_reg>(dst, src, instr, code, k);
// add dst, dst, tmp_reg
@@ -618,7 +618,7 @@ void JitCompilerA64::h_ISUB_M(Instruction& instr, uint32_t& codePos)
const uint32_t src = IntRegMap[instr.src];
const uint32_t dst = IntRegMap[instr.dst];
constexpr uint32_t tmp_reg = 18;
constexpr uint32_t tmp_reg = 20;
emitMemLoad<tmp_reg>(dst, src, instr, code, k);
// sub dst, dst, tmp_reg
@@ -637,7 +637,7 @@ void JitCompilerA64::h_IMUL_R(Instruction& instr, uint32_t& codePos)
if (src == dst)
{
src = 18;
src = 20;
emitMovImmediate(src, instr.getImm32(), code, k);
}
@@ -655,7 +655,7 @@ void JitCompilerA64::h_IMUL_M(Instruction& instr, uint32_t& codePos)
const uint32_t src = IntRegMap[instr.src];
const uint32_t dst = IntRegMap[instr.dst];
constexpr uint32_t tmp_reg = 18;
constexpr uint32_t tmp_reg = 20;
emitMemLoad<tmp_reg>(dst, src, instr, code, k);
// sub dst, dst, tmp_reg
@@ -686,7 +686,7 @@ void JitCompilerA64::h_IMULH_M(Instruction& instr, uint32_t& codePos)
const uint32_t src = IntRegMap[instr.src];
const uint32_t dst = IntRegMap[instr.dst];
constexpr uint32_t tmp_reg = 18;
constexpr uint32_t tmp_reg = 20;
emitMemLoad<tmp_reg>(dst, src, instr, code, k);
// umulh dst, dst, tmp_reg
@@ -717,7 +717,7 @@ void JitCompilerA64::h_ISMULH_M(Instruction& instr, uint32_t& codePos)
const uint32_t src = IntRegMap[instr.src];
const uint32_t dst = IntRegMap[instr.dst];
constexpr uint32_t tmp_reg = 18;
constexpr uint32_t tmp_reg = 20;
emitMemLoad<tmp_reg>(dst, src, instr, code, k);
// smulh dst, dst, tmp_reg
@@ -735,7 +735,7 @@ void JitCompilerA64::h_IMUL_RCP(Instruction& instr, uint32_t& codePos)
uint32_t k = codePos;
constexpr uint32_t tmp_reg = 18;
constexpr uint32_t tmp_reg = 20;
const uint32_t dst = IntRegMap[instr.dst];
constexpr uint64_t N = 1ULL << 63;
@@ -754,9 +754,9 @@ void JitCompilerA64::h_IMUL_RCP(Instruction& instr, uint32_t& codePos)
literalPos -= sizeof(uint64_t);
*(uint64_t*)(code + literalPos) = (q << shift) + ((r << shift) / divisor);
if (literal_id < 13)
if (literal_id < 12)
{
static constexpr uint32_t literal_regs[13] = { 30 << 16, 29 << 16, 28 << 16, 27 << 16, 26 << 16, 25 << 16, 24 << 16, 23 << 16, 22 << 16, 21 << 16, 20 << 16, 11 << 16, 0 };
static constexpr uint32_t literal_regs[12] = { 30 << 16, 29 << 16, 28 << 16, 27 << 16, 26 << 16, 25 << 16, 24 << 16, 23 << 16, 22 << 16, 21 << 16, 11 << 16, 0 };
// mul dst, dst, literal_reg
emit32(ARMV8A::MUL | dst | (dst << 5) | literal_regs[literal_id], code, k);
@@ -794,7 +794,7 @@ void JitCompilerA64::h_IXOR_R(Instruction& instr, uint32_t& codePos)
if (src == dst)
{
src = 18;
src = 20;
emitMovImmediate(src, instr.getImm32(), code, k);
}
@@ -812,7 +812,7 @@ void JitCompilerA64::h_IXOR_M(Instruction& instr, uint32_t& codePos)
const uint32_t src = IntRegMap[instr.src];
const uint32_t dst = IntRegMap[instr.dst];
constexpr uint32_t tmp_reg = 18;
constexpr uint32_t tmp_reg = 20;
emitMemLoad<tmp_reg>(dst, src, instr, code, k);
// eor dst, dst, tmp_reg
@@ -850,7 +850,7 @@ void JitCompilerA64::h_IROL_R(Instruction& instr, uint32_t& codePos)
if (src != dst)
{
constexpr uint32_t tmp_reg = 18;
constexpr uint32_t tmp_reg = 20;
// sub tmp_reg, xzr, src
emit32(ARMV8A::SUB | tmp_reg | (31 << 5) | (src << 16), code, k);
@@ -878,7 +878,7 @@ void JitCompilerA64::h_ISWAP_R(Instruction& instr, uint32_t& codePos)
uint32_t k = codePos;
constexpr uint32_t tmp_reg = 18;
constexpr uint32_t tmp_reg = 20;
emit32(ARMV8A::MOV_REG | tmp_reg | (dst << 16), code, k);
emit32(ARMV8A::MOV_REG | dst | (src << 16), code, k);
emit32(ARMV8A::MOV_REG | src | (tmp_reg << 16), code, k);
@@ -1026,7 +1026,7 @@ void JitCompilerA64::h_CFROUND(Instruction& instr, uint32_t& codePos)
const uint32_t src = IntRegMap[instr.src];
constexpr uint32_t tmp_reg = 18;
constexpr uint32_t tmp_reg = 20;
constexpr uint32_t fpcr_tmp_reg = 8;
// ror tmp_reg, src, imm
@@ -1050,7 +1050,7 @@ void JitCompilerA64::h_ISTORE(Instruction& instr, uint32_t& codePos)
const uint32_t src = IntRegMap[instr.src];
const uint32_t dst = IntRegMap[instr.dst];
constexpr uint32_t tmp_reg = 18;
constexpr uint32_t tmp_reg = 20;
uint32_t imm = instr.getImm32();

View File

@@ -72,9 +72,9 @@
# x15 -> "r7"
# x16 -> spAddr0
# x17 -> spAddr1
# x18 -> temporary
# x18 -> unused (platform register, don't touch it)
# x19 -> temporary
# x20 -> literal for IMUL_RCP
# x20 -> temporary
# x21 -> literal for IMUL_RCP
# x22 -> literal for IMUL_RCP
# x23 -> literal for IMUL_RCP
@@ -109,7 +109,7 @@ DECL(randomx_program_aarch64):
# Save callee-saved registers
sub sp, sp, 192
stp x16, x17, [sp]
stp x18, x19, [sp, 16]
str x19, [sp, 16]
stp x20, x21, [sp, 32]
stp x22, x23, [sp, 48]
stp x24, x25, [sp, 64]
@@ -164,7 +164,6 @@ DECL(randomx_program_aarch64):
# Read literals
ldr x0, literal_x0
ldr x11, literal_x11
ldr x20, literal_x20
ldr x21, literal_x21
ldr x22, literal_x22
ldr x23, literal_x23
@@ -196,11 +195,11 @@ DECL(randomx_program_aarch64):
DECL(randomx_program_aarch64_main_loop):
# spAddr0 = spMix1 & ScratchpadL3Mask64;
# spAddr1 = (spMix1 >> 32) & ScratchpadL3Mask64;
lsr x18, x10, 32
lsr x20, x10, 32
# Actual mask will be inserted by JIT compiler
and w16, w10, 1
and w17, w18, 1
and w17, w20, 1
# x16 = scratchpad + spAddr0
# x17 = scratchpad + spAddr1
@@ -208,31 +207,31 @@ DECL(randomx_program_aarch64_main_loop):
add x17, x17, x2
# xor integer registers with scratchpad data (spAddr0)
ldp x18, x19, [x16]
eor x4, x4, x18
ldp x20, x19, [x16]
eor x4, x4, x20
eor x5, x5, x19
ldp x18, x19, [x16, 16]
eor x6, x6, x18
ldp x20, x19, [x16, 16]
eor x6, x6, x20
eor x7, x7, x19
ldp x18, x19, [x16, 32]
eor x12, x12, x18
ldp x20, x19, [x16, 32]
eor x12, x12, x20
eor x13, x13, x19
ldp x18, x19, [x16, 48]
eor x14, x14, x18
ldp x20, x19, [x16, 48]
eor x14, x14, x20
eor x15, x15, x19
# Load group F registers (spAddr1)
ldpsw x18, x19, [x17]
ins v16.d[0], x18
ldpsw x20, x19, [x17]
ins v16.d[0], x20
ins v16.d[1], x19
ldpsw x18, x19, [x17, 8]
ins v17.d[0], x18
ldpsw x20, x19, [x17, 8]
ins v17.d[0], x20
ins v17.d[1], x19
ldpsw x18, x19, [x17, 16]
ins v18.d[0], x18
ldpsw x20, x19, [x17, 16]
ins v18.d[0], x20
ins v18.d[1], x19
ldpsw x18, x19, [x17, 24]
ins v19.d[0], x18
ldpsw x20, x19, [x17, 24]
ins v19.d[0], x20
ins v19.d[1], x19
scvtf v16.2d, v16.2d
scvtf v17.2d, v17.2d
@@ -240,17 +239,17 @@ DECL(randomx_program_aarch64_main_loop):
scvtf v19.2d, v19.2d
# Load group E registers (spAddr1)
ldpsw x18, x19, [x17, 32]
ins v20.d[0], x18
ldpsw x20, x19, [x17, 32]
ins v20.d[0], x20
ins v20.d[1], x19
ldpsw x18, x19, [x17, 40]
ins v21.d[0], x18
ldpsw x20, x19, [x17, 40]
ins v21.d[0], x20
ins v21.d[1], x19
ldpsw x18, x19, [x17, 48]
ins v22.d[0], x18
ldpsw x20, x19, [x17, 48]
ins v22.d[0], x20
ins v22.d[1], x19
ldpsw x18, x19, [x17, 56]
ins v23.d[0], x18
ldpsw x20, x19, [x17, 56]
ins v23.d[0], x20
ins v23.d[1], x19
scvtf v20.2d, v20.2d
scvtf v21.2d, v21.2d
@@ -273,7 +272,6 @@ DECL(randomx_program_aarch64_vm_instructions):
literal_x0: .fill 1,8,0
literal_x11: .fill 1,8,0
literal_x20: .fill 1,8,0
literal_x21: .fill 1,8,0
literal_x22: .fill 1,8,0
literal_x23: .fill 1,8,0
@@ -309,17 +307,17 @@ DECL(randomx_program_aarch64_vm_instructions_end):
lsr x10, x9, 32
# mx ^= r[readReg2] ^ r[readReg3];
eor x9, x9, x18
eor x9, x9, x20
# Calculate dataset pointer for dataset prefetch
mov w18, w9
mov w20, w9
DECL(randomx_program_aarch64_cacheline_align_mask1):
# Actual mask will be inserted by JIT compiler
and x18, x18, 1
add x18, x18, x1
and x20, x20, 1
add x20, x20, x1
# Prefetch dataset data
prfm pldl2strm, [x18]
prfm pldl2strm, [x20]
# mx <-> ma
ror x9, x9, 32
@@ -331,17 +329,17 @@ DECL(randomx_program_aarch64_cacheline_align_mask2):
DECL(randomx_program_aarch64_xor_with_dataset_line):
# xor integer registers with dataset data
ldp x18, x19, [x10]
eor x4, x4, x18
ldp x20, x19, [x10]
eor x4, x4, x20
eor x5, x5, x19
ldp x18, x19, [x10, 16]
eor x6, x6, x18
ldp x20, x19, [x10, 16]
eor x6, x6, x20
eor x7, x7, x19
ldp x18, x19, [x10, 32]
eor x12, x12, x18
ldp x20, x19, [x10, 32]
eor x12, x12, x20
eor x13, x13, x19
ldp x18, x19, [x10, 48]
eor x14, x14, x18
ldp x20, x19, [x10, 48]
eor x14, x14, x20
eor x15, x15, x19
DECL(randomx_program_aarch64_update_spMix1):
@@ -384,7 +382,7 @@ DECL(randomx_program_aarch64_update_spMix1):
# Restore callee-saved registers
ldp x16, x17, [sp]
ldp x18, x19, [sp, 16]
ldr x19, [sp, 16]
ldp x20, x21, [sp, 32]
ldp x22, x23, [sp, 48]
ldp x24, x25, [sp, 64]
@@ -405,7 +403,7 @@ DECL(randomx_program_aarch64_vm_instructions_end_light):
stp x2, x30, [sp, 80]
# mx ^= r[readReg2] ^ r[readReg3];
eor x9, x9, x18
eor x9, x9, x20
# mx <-> ma
ror x9, x9, 32
@@ -447,8 +445,8 @@ DECL(randomx_program_aarch64_light_dataset_offset):
# x3 -> end item
DECL(randomx_init_dataset_aarch64):
# Save x30 (return address)
str x30, [sp, -16]!
# Save x20 (used as temporary, but must be saved to not break ABI) and x30 (return address)
stp x20, x30, [sp, -16]!
# Load pointer to cache memory
ldr x0, [x0]
@@ -460,8 +458,8 @@ DECL(randomx_init_dataset_aarch64_main_loop):
cmp x2, x3
bne DECL(randomx_init_dataset_aarch64_main_loop)
# Restore x30 (return address)
ldr x30, [sp], 16
# Restore x20 and x30
ldp x20, x30, [sp], 16
ret