1
0
mirror of https://github.com/xmrig/xmrig.git synced 2026-07-01 06:36:35 -04:00

Compare commits

...

5 Commits

Author SHA1 Message Date
Tony Butler 3ef47dbb18 Merge 753e63cd96 into fee51b20fa 2023-10-21 13:30:07 -07:00
xmrig fee51b20fa Merge pull request #3346 from SChernykh/dev
ARM64 JIT: don't use `x18` register
2023-10-20 07:36:12 +07:00
SChernykh 5e66efabcf ARM64 JIT: don't use x18 register
From https://developer.apple.com/documentation/xcode/writing-arm64-code-for-apple-platforms
> The platforms reserve register x18. Don’t use this register.

This PR fixes invalid hashes when running on Apple silicon with the latest macOS SDK.
2023-10-19 17:45:15 +02:00
Tony Butler 753e63cd96 Update/Improve API docs 2023-07-12 02:06:53 -06:00
Tony Butler f42b3e83a7 Cleanup API code 2023-07-12 02:06:53 -06:00
11 changed files with 464 additions and 110 deletions
+61 -13
View File
@@ -4,7 +4,7 @@ If you want use HTTP API you need enable it (`"enabled": true,`) then choice `po
Offical HTTP client for API: http://workers.xmrig.info/ Offical HTTP client for API: http://workers.xmrig.info/
Example configuration: Example configuration, used in Curl examples below:
```json ```json
"api": { "api": {
@@ -12,11 +12,11 @@ Example configuration:
"worker-id": null, "worker-id": null,
}, },
"http": { "http": {
"enabled": false, "enabled": true,
"host": "127.0.0.1", "host": "127.0.0.1",
"port": 0, "port": 44444,
"access-token": null, "access-token": "SECRET",
"restricted": true "restricted": false
} }
``` ```
@@ -37,30 +37,78 @@ Versions before 2.15 was use another options for API https://github.com/xmrig/xm
## Endpoints ## Endpoints
### GET /1/summary ### APIVersion 2
Get miner summary information. [Example](api/1/summary.json). #### GET /2/summary
### GET /1/threads Get miner summary information. [Example](api/2/summary.json).
Get detailed information about miner threads. [Example](api/1/threads.json). #### GET /2/backends
Get detailed information about miner backends. [Example](api/2/backends.json).
### APIVersion 1 (deprecated)
#### GET /1/summary
Get miner summary information. Currently identical to `GET /2/summary`
#### GET /1/threads
**REMOVED** Get detailed information about miner threads. [Example](api/1/threads.json).
Functionally replaced by `GET /2/backends` which contains a `threads` item per backend.
### APIVersion 0 (deprecated)
#### GET /api.json
Get miner summary information. Currently identical to `GET /2/summary`
## Restricted endpoints ## Restricted endpoints
All API endpoints below allow access to sensitive information and remote configure miner. You should set `access-token` and allow unrestricted access (`"restricted": false`). All API endpoints below allow access to sensitive information and remote configure miner. You should set `access-token` and allow unrestricted access (`"restricted": false`).
### GET /1/config ### JSON-RPC Interface
Get current miner configuration. [Example](api/1/config.json). #### POST /json_rpc
Control miner with JSON-RPC. Methods: `pause`, `resume`, `stop`, `start`
### PUT /1/config Curl example:
```
curl -v --data "{\"method\":\"pause\",\"id\":1}" -H "Content-Type: application/json" -H "Authorization: Bearer SECRET" http://127.0.0.1:44444/json_rpc
```
### APIVersion 2
#### GET /2/config
Get current miner configuration. [Example](api/2/config.json).
#### PUT /2/config
Update current miner configuration. Common use case, get current configuration, make changes, and upload it to miner. Update current miner configuration. Common use case, get current configuration, make changes, and upload it to miner.
Curl example: Curl example:
``` ```
curl -v --data-binary @config.json -X PUT -H "Content-Type: application/json" -H "Authorization: Bearer SECRET" http://127.0.0.1:44444/1/config ...GET current config...
curl -v -H "Content-Type: application/json" -H "Authorization: Bearer SECRET" http://127.0.0.1:44444/2/config > config.json
...make changes...
vim config.json
...PUT changed config...
curl -v --data-binary @config.json -X PUT -H "Content-Type: application/json" -H "Authorization: Bearer SECRET" http://127.0.0.1:44444/2/config
``` ```
### APIVersion 1 (deprecated)
#### GET /1/config
Get current miner configuration. Currently identical to `GET /2/config`
#### PUT /1/config
Update current miner configuration. Currently identical to `PUT /2/config`
+78
View File
@@ -0,0 +1,78 @@
[
{
"type": "cpu",
"enabled": true,
"algo": "rx/0",
"profile": "rx",
"hw-aes": true,
"priority": -1,
"msr": true,
"asm": "intel",
"argon2-impl": "AVX2",
"hugepages": [6, 6],
"memory": 6291456,
"hashrate": [1235.78, 1228.89, null],
"threads": [
{
"intensity": 1,
"affinity": 0,
"av": 1,
"hashrate": [409.75, 406.58, null]
},
{
"intensity": 1,
"affinity": 2,
"av": 1,
"hashrate": [412.9, 411.33, null]
},
{
"intensity": 1,
"affinity": 4,
"av": 1,
"hashrate": [413.11, 410.98, null]
}
]
},
{
"type": "cuda",
"enabled": true,
"algo": "cn-heavy/xhv",
"profile": "cn-heavy/xhv",
"versions": {
"cuda-runtime": "11.6",
"cuda-driver": "11.6",
"plugin": "6.17.1-dev",
"nvml": "11.512.15",
"driver": "512.15"
},
"hashrate": [247.02, 247.34, null],
"threads": [
{
"index": 0,
"threads": 32,
"blocks": 38,
"bfactor": 6,
"bsleep": 25,
"affinity": -1,
"cclock": 0,
"mclock": 0,
"dataset_host": false,
"hashrate": [246.77, 247.26, null],
"name": "NVIDIA GeForce GTX 970",
"bus_id": "01:00.0",
"smx": 13,
"arch": 52,
"global_mem": 4294836224,
"clock": 1177,
"memory_clock": 3666,
"health": {
"temperature": 69,
"power": 161,
"clock": 1328,
"mem_clock": 3662,
"fan_speed": [100]
}
}
]
}
]
+136
View File
@@ -0,0 +1,136 @@
{
"api": {
"id": null,
"worker-id": null
},
"http": {
"enabled": true,
"host": "127.0.0.1",
"port": 44444,
"access-token": "SECRET",
"restricted": false
},
"autosave": true,
"background": false,
"colors": true,
"title": true,
"randomx": {
"init": -1,
"init-avx2": -1,
"mode": "auto",
"1gb-pages": true,
"rdmsr": true,
"wrmsr": true,
"cache_qos": false,
"numa": true,
"scratchpad_prefetch_mode": 1
},
"cpu": {
"enabled": true,
"huge-pages": true,
"huge-pages-jit": true,
"hw-aes": null,
"priority": null,
"memory-pool": true,
"yield": true,
"asm": true,
"argon2-impl": null,
"argon2": [0, 2, 4, 6, 5, 7],
"astrobwt/v2": [1, 2, 3, 4, 5, 6, 7],
"cn": [
[1, 0],
[1, 2],
[1, 4]
],
"cn-heavy": [
[1, 0],
[1, 2]
],
"cn-lite": [
[1, 0],
[1, 2],
[1, 4],
[1, 6],
[1, 5],
[1, 7]
],
"cn-pico": [
[2, 1],
[2, 2],
[2, 3],
[2, 4],
[2, 5],
[2, 6],
[2, 7]
],
"cn/2": [
[1, 0],
[1, 2],
[1, 4]
],
"cn/upx2": [
[2, 1],
[2, 2],
[2, 3],
[2, 4],
[2, 5],
[2, 6],
[2, 7]
],
"ghostrider": [
[8, 0],
[8, 2],
[8, 4]
],
"rx": [0, 2, 4],
"rx/arq": [1, 2, 3, 4, 5, 6, 7],
"rx/keva": [0, 2, 4, 6, 5, 7],
"rx/wow": [0, 2, 4, 6, 5, 7],
"cn-lite/0": false,
"cn/0": "cn"
},
"log-file": null,
"donate-level": 0,
"donate-over-proxy": 0,
"pools": [
{
"algo": null,
"coin": null,
"url": "some.pool:10064",
"user": "4blahblahblahblahblahblahblahblahblahblahblahblahblahblahblahblahblahblahblahblahblahblahblahbl",
"pass": "x",
"rig-id": null,
"nicehash": false,
"keepalive": true,
"enabled": true,
"tls": false,
"wss": false,
"daemon": false,
"socks5": null,
"self-select": null,
"submit-to-origin": false
}
],
"retries": 5,
"retry-pause": 5,
"print-time": 64,
"syslog": false,
"tls": {
"enabled": false,
"protocols": null,
"cert": null,
"cert_key": null,
"ciphers": null,
"ciphersuites": null,
"dhparam": null
},
"dns": {
"ipv6": false,
"ttl": 30
},
"user-agent": null,
"verbose": 1,
"watch": true,
"pause-on-battery": false,
"pause-on-active": false
}
+76
View File
@@ -0,0 +1,76 @@
{
"id": "51aca77da137cb62",
"worker_id": "tpad",
"uptime": 106,
"restricted": false,
"resources": {
"memory": {
"free": 4977348608,
"total": 16659546112,
"resident_set_memory": 16441344
},
"load_average": [3.4, 2.7, 2.44],
"hardware_concurrency": 8
},
"features": ["api", "asm", "http", "hwloc", "tls"],
"results": {
"diff_current": 37638,
"shares_good": 3,
"shares_total": 3,
"avg_time": 35,
"avg_time_ms": 35603,
"hashes_total": 165638,
"best": [358821, 344438, 73371, 0, 0, 0, 0, 0, 0, 0]
},
"algo": "rx/0",
"connection": {
"pool": "some.pool:20064",
"ip": "127.1.2.3",
"uptime": 106,
"uptime_ms": 106811,
"ping": 405,
"failures": 0,
"tls": "TLSv1.3",
"tls-fingerprint": null,
"algo": "rx/0",
"diff": 37638,
"accepted": 3,
"rejected": 0,
"avg_time": 35,
"avg_time_ms": 35603,
"hashes_total": 165638
},
"version": "6.17.1-dev",
"kind": "miner",
"ua": "XMRig/6.17.1-dev (Linux x86_64) libuv/1.43.0 gcc/9.4.0",
"cpu": {
"brand": "Intel(R) Core(TM) i7-4700MQ CPU @ 2.40GHz",
"family": 6,
"model": 60,
"stepping": 3,
"proc_info": 198339,
"aes": true,
"avx2": true,
"x64": true,
"64_bit": true,
"l2": 1048576,
"l3": 6291456,
"cores": 4,
"threads": 8,
"packages": 1,
"nodes": 1,
"backend": "hwloc/2.7.0",
"msr": "intel",
"assembly": "intel",
"arch": "x86_64",
"flags": ["aes", "avx", "avx2", "bmi2", "osxsave", "pdpe1gb", "sse2", "ssse3", "sse4.1", "popcnt"]
},
"donate_level": 0,
"paused": false,
"algorithms": ["cn/0", "cn/1", "cn/2", "cn/r", "cn/fast", "cn/half", "cn/xao", "cn/rto", "cn/rwz", "cn/zls", "cn/double", "cn/ccx", "cn-lite/1", "cn-heavy/0", "cn-heavy/tube", "cn-heavy/xhv", "cn-pico", "cn-pico/tlo", "cn/upx2", "rx/0", "rx/wow", "rx/arq", "rx/graft", "rx/sfx", "rx/keva", "argon2/chukwa", "argon2/chukwav2", "argon2/ninja", "astrobwt/v2", "ghostrider"],
"hashrate": {
"total": [1207.19, 1210.82, null],
"highest": 1316.85
},
"hugepages": [6, 6]
}
+2
View File
@@ -52,6 +52,8 @@ public:
enum RequestType { enum RequestType {
REQ_UNKNOWN, REQ_UNKNOWN,
REQ_SUMMARY, REQ_SUMMARY,
REQ_BACKENDS,
REQ_CONFIG,
REQ_JSON_RPC REQ_JSON_RPC
}; };
+1 -1
View File
@@ -60,7 +60,7 @@ protected:
inline Source source() const override { return m_source; } inline Source source() const override { return m_source; }
inline void done(int) override { m_state = STATE_DONE; } inline void done(int) override { m_state = STATE_DONE; }
int m_version = 1; int m_version = 0;
RequestType m_type = REQ_UNKNOWN; RequestType m_type = REQ_UNKNOWN;
State m_state = STATE_NEW; State m_state = STATE_NEW;
String m_rpcMethod; String m_rpcMethod;
+26 -9
View File
@@ -67,10 +67,33 @@ xmrig::HttpApiRequest::HttpApiRequest(const HttpData &req, bool restricted) :
m_res(req.id()), m_res(req.id()),
m_url(req.url.c_str()) m_url(req.url.c_str())
{ {
if (method() == METHOD_GET) { if (url().size() > 4 && memcmp(url().data(), "/", 1) == 0 && memcmp(url().data()+2, "/", 1) == 0) {
if (url() == "/1/summary" || url() == "/2/summary" || url() == "/api.json") { if (memcmp(url().data(), "/2/", 3) == 0) {
m_type = REQ_SUMMARY; m_version = 2;
} else if (memcmp(url().data(), "/1/", 3) == 0) {
m_version = 1;
} }
switch (url().size()) {
case 9:
if (memcmp(url().data()+3, "config", 6) == 0) {
m_type = REQ_CONFIG;
}
break;
case 10:
if (memcmp(url().data()+3, "summary", 7) == 0) {
m_type = REQ_SUMMARY;
}
break;
case 11:
if (memcmp(url().data()+3, "backends", 8) == 0) {
m_type = REQ_BACKENDS;
}
break;
}
}
if (url() == "/api.json") {
m_type = REQ_SUMMARY;
} }
if (method() == METHOD_POST && url() == "/json_rpc") { if (method() == METHOD_POST && url() == "/json_rpc") {
@@ -94,12 +117,6 @@ xmrig::HttpApiRequest::HttpApiRequest(const HttpData &req, bool restricted) :
return; return;
} }
if (url().size() > 4) {
if (memcmp(url().data(), "/2/", 3) == 0) {
m_version = 2;
}
}
} }
+2 -9
View File
@@ -45,13 +45,6 @@
#ifdef XMRIG_FEATURE_API #ifdef XMRIG_FEATURE_API
# include "base/api/Api.h" # include "base/api/Api.h"
# include "base/api/interfaces/IApiRequest.h" # include "base/api/interfaces/IApiRequest.h"
namespace xmrig {
static const char *kConfigPathV1 = "/1/config";
static const char *kConfigPathV2 = "/2/config";
} // namespace xmrig
#endif #endif
@@ -317,7 +310,7 @@ void xmrig::Base::onFileChanged(const String &fileName)
void xmrig::Base::onRequest(IApiRequest &request) void xmrig::Base::onRequest(IApiRequest &request)
{ {
if (request.method() == IApiRequest::METHOD_GET) { if (request.method() == IApiRequest::METHOD_GET) {
if (request.url() == kConfigPathV1 || request.url() == kConfigPathV2) { if (request.type() == IApiRequest::REQ_CONFIG) {
if (request.isRestricted()) { if (request.isRestricted()) {
return request.done(403); return request.done(403);
} }
@@ -327,7 +320,7 @@ void xmrig::Base::onRequest(IApiRequest &request)
} }
} }
else if (request.method() == IApiRequest::METHOD_PUT || request.method() == IApiRequest::METHOD_POST) { else if (request.method() == IApiRequest::METHOD_PUT || request.method() == IApiRequest::METHOD_POST) {
if (request.url() == kConfigPathV1 || request.url() == kConfigPathV2) { if (request.type() == IApiRequest::REQ_CONFIG) {
request.accept(); request.accept();
if (!reload(request.json())) { if (!reload(request.json())) {
+7 -1
View File
@@ -689,7 +689,7 @@ void xmrig::Miner::onRequest(IApiRequest &request)
d_ptr->getMiner(request.reply(), request.doc(), request.version()); d_ptr->getMiner(request.reply(), request.doc(), request.version());
d_ptr->getHashrate(request.reply(), request.doc(), request.version()); d_ptr->getHashrate(request.reply(), request.doc(), request.version());
} }
else if (request.url() == "/2/backends") { else if (request.type() == IApiRequest::REQ_BACKENDS && request.version() == 2) {
request.accept(); request.accept();
d_ptr->getBackends(request.reply(), request.doc()); d_ptr->getBackends(request.reply(), request.doc());
@@ -711,6 +711,12 @@ void xmrig::Miner::onRequest(IApiRequest &request)
stop(); stop();
} }
else if (request.rpcMethod() == "start") {
request.accept();
const auto config = d_ptr->controller->config();
onConfigChanged(config, config);
}
} }
for (IBackend *backend : d_ptr->backends) { for (IBackend *backend : d_ptr->backends) {
+27 -27
View File
@@ -131,8 +131,8 @@ void JitCompilerA64::generateProgram(Program& program, ProgramConfiguration& con
// and w16, w10, ScratchpadL3Mask64 // and w16, w10, ScratchpadL3Mask64
emit32(0x121A0000 | 16 | (10 << 5) | ((RandomX_CurrentConfig.Log2_ScratchpadL3 - 7) << 10), code, codePos); emit32(0x121A0000 | 16 | (10 << 5) | ((RandomX_CurrentConfig.Log2_ScratchpadL3 - 7) << 10), code, codePos);
// and w17, w18, ScratchpadL3Mask64 // and w17, w20, ScratchpadL3Mask64
emit32(0x121A0000 | 17 | (18 << 5) | ((RandomX_CurrentConfig.Log2_ScratchpadL3 - 7) << 10), code, codePos); emit32(0x121A0000 | 17 | (20 << 5) | ((RandomX_CurrentConfig.Log2_ScratchpadL3 - 7) << 10), code, codePos);
codePos = PrologueSize; codePos = PrologueSize;
literalPos = ImulRcpLiteralsEnd; literalPos = ImulRcpLiteralsEnd;
@@ -148,16 +148,16 @@ void JitCompilerA64::generateProgram(Program& program, ProgramConfiguration& con
} }
// Update spMix2 // Update spMix2
// eor w18, config.readReg2, config.readReg3 // eor w20, config.readReg2, config.readReg3
emit32(ARMV8A::EOR32 | 18 | (IntRegMap[config.readReg2] << 5) | (IntRegMap[config.readReg3] << 16), code, codePos); emit32(ARMV8A::EOR32 | 20 | (IntRegMap[config.readReg2] << 5) | (IntRegMap[config.readReg3] << 16), code, codePos);
// Jump back to the main loop // Jump back to the main loop
const uint32_t offset = (((uint8_t*)randomx_program_aarch64_vm_instructions_end) - ((uint8_t*)randomx_program_aarch64)) - codePos; const uint32_t offset = (((uint8_t*)randomx_program_aarch64_vm_instructions_end) - ((uint8_t*)randomx_program_aarch64)) - codePos;
emit32(ARMV8A::B | (offset / 4), code, codePos); emit32(ARMV8A::B | (offset / 4), code, codePos);
// and w18, w18, CacheLineAlignMask // and w20, w20, CacheLineAlignMask
codePos = (((uint8_t*)randomx_program_aarch64_cacheline_align_mask1) - ((uint8_t*)randomx_program_aarch64)); codePos = (((uint8_t*)randomx_program_aarch64_cacheline_align_mask1) - ((uint8_t*)randomx_program_aarch64));
emit32(0x121A0000 | 18 | (18 << 5) | ((RandomX_CurrentConfig.Log2_DatasetBaseSize - 7) << 10), code, codePos); emit32(0x121A0000 | 20 | (20 << 5) | ((RandomX_CurrentConfig.Log2_DatasetBaseSize - 7) << 10), code, codePos);
// and w10, w10, CacheLineAlignMask // and w10, w10, CacheLineAlignMask
codePos = (((uint8_t*)randomx_program_aarch64_cacheline_align_mask2) - ((uint8_t*)randomx_program_aarch64)); codePos = (((uint8_t*)randomx_program_aarch64_cacheline_align_mask2) - ((uint8_t*)randomx_program_aarch64));
@@ -189,8 +189,8 @@ void JitCompilerA64::generateProgramLight(Program& program, ProgramConfiguration
// and w16, w10, ScratchpadL3Mask64 // and w16, w10, ScratchpadL3Mask64
emit32(0x121A0000 | 16 | (10 << 5) | ((RandomX_CurrentConfig.Log2_ScratchpadL3 - 7) << 10), code, codePos); emit32(0x121A0000 | 16 | (10 << 5) | ((RandomX_CurrentConfig.Log2_ScratchpadL3 - 7) << 10), code, codePos);
// and w17, w18, ScratchpadL3Mask64 // and w17, w20, ScratchpadL3Mask64
emit32(0x121A0000 | 17 | (18 << 5) | ((RandomX_CurrentConfig.Log2_ScratchpadL3 - 7) << 10), code, codePos); emit32(0x121A0000 | 17 | (20 << 5) | ((RandomX_CurrentConfig.Log2_ScratchpadL3 - 7) << 10), code, codePos);
codePos = PrologueSize; codePos = PrologueSize;
literalPos = ImulRcpLiteralsEnd; literalPos = ImulRcpLiteralsEnd;
@@ -206,8 +206,8 @@ void JitCompilerA64::generateProgramLight(Program& program, ProgramConfiguration
} }
// Update spMix2 // Update spMix2
// eor w18, config.readReg2, config.readReg3 // eor w20, config.readReg2, config.readReg3
emit32(ARMV8A::EOR32 | 18 | (IntRegMap[config.readReg2] << 5) | (IntRegMap[config.readReg3] << 16), code, codePos); emit32(ARMV8A::EOR32 | 20 | (IntRegMap[config.readReg2] << 5) | (IntRegMap[config.readReg3] << 16), code, codePos);
// Jump back to the main loop // Jump back to the main loop
const uint32_t offset = (((uint8_t*)randomx_program_aarch64_vm_instructions_end_light) - ((uint8_t*)randomx_program_aarch64)) - codePos; const uint32_t offset = (((uint8_t*)randomx_program_aarch64_vm_instructions_end_light) - ((uint8_t*)randomx_program_aarch64)) - codePos;
@@ -477,7 +477,7 @@ void JitCompilerA64::emitAddImmediate(uint32_t dst, uint32_t src, uint32_t imm,
} }
else else
{ {
constexpr uint32_t tmp_reg = 18; constexpr uint32_t tmp_reg = 20;
emitMovImmediate(tmp_reg, imm, code, k); emitMovImmediate(tmp_reg, imm, code, k);
// add dst, src, tmp_reg // add dst, src, tmp_reg
@@ -526,7 +526,7 @@ void JitCompilerA64::emitMemLoadFP(uint32_t src, Instruction& instr, uint8_t* co
uint32_t k = codePos; uint32_t k = codePos;
uint32_t imm = instr.getImm32(); uint32_t imm = instr.getImm32();
constexpr uint32_t tmp_reg = 18; constexpr uint32_t tmp_reg = 19;
imm &= instr.getModMem() ? (RandomX_CurrentConfig.ScratchpadL1_Size - 1) : (RandomX_CurrentConfig.ScratchpadL2_Size - 1); imm &= instr.getModMem() ? (RandomX_CurrentConfig.ScratchpadL1_Size - 1) : (RandomX_CurrentConfig.ScratchpadL2_Size - 1);
emitAddImmediate(tmp_reg, src, imm, code, k); emitAddImmediate(tmp_reg, src, imm, code, k);
@@ -580,7 +580,7 @@ void JitCompilerA64::h_IADD_M(Instruction& instr, uint32_t& codePos)
const uint32_t src = IntRegMap[instr.src]; const uint32_t src = IntRegMap[instr.src];
const uint32_t dst = IntRegMap[instr.dst]; const uint32_t dst = IntRegMap[instr.dst];
constexpr uint32_t tmp_reg = 18; constexpr uint32_t tmp_reg = 20;
emitMemLoad<tmp_reg>(dst, src, instr, code, k); emitMemLoad<tmp_reg>(dst, src, instr, code, k);
// add dst, dst, tmp_reg // add dst, dst, tmp_reg
@@ -618,7 +618,7 @@ void JitCompilerA64::h_ISUB_M(Instruction& instr, uint32_t& codePos)
const uint32_t src = IntRegMap[instr.src]; const uint32_t src = IntRegMap[instr.src];
const uint32_t dst = IntRegMap[instr.dst]; const uint32_t dst = IntRegMap[instr.dst];
constexpr uint32_t tmp_reg = 18; constexpr uint32_t tmp_reg = 20;
emitMemLoad<tmp_reg>(dst, src, instr, code, k); emitMemLoad<tmp_reg>(dst, src, instr, code, k);
// sub dst, dst, tmp_reg // sub dst, dst, tmp_reg
@@ -637,7 +637,7 @@ void JitCompilerA64::h_IMUL_R(Instruction& instr, uint32_t& codePos)
if (src == dst) if (src == dst)
{ {
src = 18; src = 20;
emitMovImmediate(src, instr.getImm32(), code, k); emitMovImmediate(src, instr.getImm32(), code, k);
} }
@@ -655,7 +655,7 @@ void JitCompilerA64::h_IMUL_M(Instruction& instr, uint32_t& codePos)
const uint32_t src = IntRegMap[instr.src]; const uint32_t src = IntRegMap[instr.src];
const uint32_t dst = IntRegMap[instr.dst]; const uint32_t dst = IntRegMap[instr.dst];
constexpr uint32_t tmp_reg = 18; constexpr uint32_t tmp_reg = 20;
emitMemLoad<tmp_reg>(dst, src, instr, code, k); emitMemLoad<tmp_reg>(dst, src, instr, code, k);
// sub dst, dst, tmp_reg // sub dst, dst, tmp_reg
@@ -686,7 +686,7 @@ void JitCompilerA64::h_IMULH_M(Instruction& instr, uint32_t& codePos)
const uint32_t src = IntRegMap[instr.src]; const uint32_t src = IntRegMap[instr.src];
const uint32_t dst = IntRegMap[instr.dst]; const uint32_t dst = IntRegMap[instr.dst];
constexpr uint32_t tmp_reg = 18; constexpr uint32_t tmp_reg = 20;
emitMemLoad<tmp_reg>(dst, src, instr, code, k); emitMemLoad<tmp_reg>(dst, src, instr, code, k);
// umulh dst, dst, tmp_reg // umulh dst, dst, tmp_reg
@@ -717,7 +717,7 @@ void JitCompilerA64::h_ISMULH_M(Instruction& instr, uint32_t& codePos)
const uint32_t src = IntRegMap[instr.src]; const uint32_t src = IntRegMap[instr.src];
const uint32_t dst = IntRegMap[instr.dst]; const uint32_t dst = IntRegMap[instr.dst];
constexpr uint32_t tmp_reg = 18; constexpr uint32_t tmp_reg = 20;
emitMemLoad<tmp_reg>(dst, src, instr, code, k); emitMemLoad<tmp_reg>(dst, src, instr, code, k);
// smulh dst, dst, tmp_reg // smulh dst, dst, tmp_reg
@@ -735,7 +735,7 @@ void JitCompilerA64::h_IMUL_RCP(Instruction& instr, uint32_t& codePos)
uint32_t k = codePos; uint32_t k = codePos;
constexpr uint32_t tmp_reg = 18; constexpr uint32_t tmp_reg = 20;
const uint32_t dst = IntRegMap[instr.dst]; const uint32_t dst = IntRegMap[instr.dst];
constexpr uint64_t N = 1ULL << 63; constexpr uint64_t N = 1ULL << 63;
@@ -754,9 +754,9 @@ void JitCompilerA64::h_IMUL_RCP(Instruction& instr, uint32_t& codePos)
literalPos -= sizeof(uint64_t); literalPos -= sizeof(uint64_t);
*(uint64_t*)(code + literalPos) = (q << shift) + ((r << shift) / divisor); *(uint64_t*)(code + literalPos) = (q << shift) + ((r << shift) / divisor);
if (literal_id < 13) if (literal_id < 12)
{ {
static constexpr uint32_t literal_regs[13] = { 30 << 16, 29 << 16, 28 << 16, 27 << 16, 26 << 16, 25 << 16, 24 << 16, 23 << 16, 22 << 16, 21 << 16, 20 << 16, 11 << 16, 0 }; static constexpr uint32_t literal_regs[12] = { 30 << 16, 29 << 16, 28 << 16, 27 << 16, 26 << 16, 25 << 16, 24 << 16, 23 << 16, 22 << 16, 21 << 16, 11 << 16, 0 };
// mul dst, dst, literal_reg // mul dst, dst, literal_reg
emit32(ARMV8A::MUL | dst | (dst << 5) | literal_regs[literal_id], code, k); emit32(ARMV8A::MUL | dst | (dst << 5) | literal_regs[literal_id], code, k);
@@ -794,7 +794,7 @@ void JitCompilerA64::h_IXOR_R(Instruction& instr, uint32_t& codePos)
if (src == dst) if (src == dst)
{ {
src = 18; src = 20;
emitMovImmediate(src, instr.getImm32(), code, k); emitMovImmediate(src, instr.getImm32(), code, k);
} }
@@ -812,7 +812,7 @@ void JitCompilerA64::h_IXOR_M(Instruction& instr, uint32_t& codePos)
const uint32_t src = IntRegMap[instr.src]; const uint32_t src = IntRegMap[instr.src];
const uint32_t dst = IntRegMap[instr.dst]; const uint32_t dst = IntRegMap[instr.dst];
constexpr uint32_t tmp_reg = 18; constexpr uint32_t tmp_reg = 20;
emitMemLoad<tmp_reg>(dst, src, instr, code, k); emitMemLoad<tmp_reg>(dst, src, instr, code, k);
// eor dst, dst, tmp_reg // eor dst, dst, tmp_reg
@@ -850,7 +850,7 @@ void JitCompilerA64::h_IROL_R(Instruction& instr, uint32_t& codePos)
if (src != dst) if (src != dst)
{ {
constexpr uint32_t tmp_reg = 18; constexpr uint32_t tmp_reg = 20;
// sub tmp_reg, xzr, src // sub tmp_reg, xzr, src
emit32(ARMV8A::SUB | tmp_reg | (31 << 5) | (src << 16), code, k); emit32(ARMV8A::SUB | tmp_reg | (31 << 5) | (src << 16), code, k);
@@ -878,7 +878,7 @@ void JitCompilerA64::h_ISWAP_R(Instruction& instr, uint32_t& codePos)
uint32_t k = codePos; uint32_t k = codePos;
constexpr uint32_t tmp_reg = 18; constexpr uint32_t tmp_reg = 20;
emit32(ARMV8A::MOV_REG | tmp_reg | (dst << 16), code, k); emit32(ARMV8A::MOV_REG | tmp_reg | (dst << 16), code, k);
emit32(ARMV8A::MOV_REG | dst | (src << 16), code, k); emit32(ARMV8A::MOV_REG | dst | (src << 16), code, k);
emit32(ARMV8A::MOV_REG | src | (tmp_reg << 16), code, k); emit32(ARMV8A::MOV_REG | src | (tmp_reg << 16), code, k);
@@ -1026,7 +1026,7 @@ void JitCompilerA64::h_CFROUND(Instruction& instr, uint32_t& codePos)
const uint32_t src = IntRegMap[instr.src]; const uint32_t src = IntRegMap[instr.src];
constexpr uint32_t tmp_reg = 18; constexpr uint32_t tmp_reg = 20;
constexpr uint32_t fpcr_tmp_reg = 8; constexpr uint32_t fpcr_tmp_reg = 8;
// ror tmp_reg, src, imm // ror tmp_reg, src, imm
@@ -1050,7 +1050,7 @@ void JitCompilerA64::h_ISTORE(Instruction& instr, uint32_t& codePos)
const uint32_t src = IntRegMap[instr.src]; const uint32_t src = IntRegMap[instr.src];
const uint32_t dst = IntRegMap[instr.dst]; const uint32_t dst = IntRegMap[instr.dst];
constexpr uint32_t tmp_reg = 18; constexpr uint32_t tmp_reg = 20;
uint32_t imm = instr.getImm32(); uint32_t imm = instr.getImm32();
+48 -50
View File
@@ -72,9 +72,9 @@
# x15 -> "r7" # x15 -> "r7"
# x16 -> spAddr0 # x16 -> spAddr0
# x17 -> spAddr1 # x17 -> spAddr1
# x18 -> temporary # x18 -> unused (platform register, don't touch it)
# x19 -> temporary # x19 -> temporary
# x20 -> literal for IMUL_RCP # x20 -> temporary
# x21 -> literal for IMUL_RCP # x21 -> literal for IMUL_RCP
# x22 -> literal for IMUL_RCP # x22 -> literal for IMUL_RCP
# x23 -> literal for IMUL_RCP # x23 -> literal for IMUL_RCP
@@ -109,7 +109,7 @@ DECL(randomx_program_aarch64):
# Save callee-saved registers # Save callee-saved registers
sub sp, sp, 192 sub sp, sp, 192
stp x16, x17, [sp] stp x16, x17, [sp]
stp x18, x19, [sp, 16] str x19, [sp, 16]
stp x20, x21, [sp, 32] stp x20, x21, [sp, 32]
stp x22, x23, [sp, 48] stp x22, x23, [sp, 48]
stp x24, x25, [sp, 64] stp x24, x25, [sp, 64]
@@ -164,7 +164,6 @@ DECL(randomx_program_aarch64):
# Read literals # Read literals
ldr x0, literal_x0 ldr x0, literal_x0
ldr x11, literal_x11 ldr x11, literal_x11
ldr x20, literal_x20
ldr x21, literal_x21 ldr x21, literal_x21
ldr x22, literal_x22 ldr x22, literal_x22
ldr x23, literal_x23 ldr x23, literal_x23
@@ -196,11 +195,11 @@ DECL(randomx_program_aarch64):
DECL(randomx_program_aarch64_main_loop): DECL(randomx_program_aarch64_main_loop):
# spAddr0 = spMix1 & ScratchpadL3Mask64; # spAddr0 = spMix1 & ScratchpadL3Mask64;
# spAddr1 = (spMix1 >> 32) & ScratchpadL3Mask64; # spAddr1 = (spMix1 >> 32) & ScratchpadL3Mask64;
lsr x18, x10, 32 lsr x20, x10, 32
# Actual mask will be inserted by JIT compiler # Actual mask will be inserted by JIT compiler
and w16, w10, 1 and w16, w10, 1
and w17, w18, 1 and w17, w20, 1
# x16 = scratchpad + spAddr0 # x16 = scratchpad + spAddr0
# x17 = scratchpad + spAddr1 # x17 = scratchpad + spAddr1
@@ -208,31 +207,31 @@ DECL(randomx_program_aarch64_main_loop):
add x17, x17, x2 add x17, x17, x2
# xor integer registers with scratchpad data (spAddr0) # xor integer registers with scratchpad data (spAddr0)
ldp x18, x19, [x16] ldp x20, x19, [x16]
eor x4, x4, x18 eor x4, x4, x20
eor x5, x5, x19 eor x5, x5, x19
ldp x18, x19, [x16, 16] ldp x20, x19, [x16, 16]
eor x6, x6, x18 eor x6, x6, x20
eor x7, x7, x19 eor x7, x7, x19
ldp x18, x19, [x16, 32] ldp x20, x19, [x16, 32]
eor x12, x12, x18 eor x12, x12, x20
eor x13, x13, x19 eor x13, x13, x19
ldp x18, x19, [x16, 48] ldp x20, x19, [x16, 48]
eor x14, x14, x18 eor x14, x14, x20
eor x15, x15, x19 eor x15, x15, x19
# Load group F registers (spAddr1) # Load group F registers (spAddr1)
ldpsw x18, x19, [x17] ldpsw x20, x19, [x17]
ins v16.d[0], x18 ins v16.d[0], x20
ins v16.d[1], x19 ins v16.d[1], x19
ldpsw x18, x19, [x17, 8] ldpsw x20, x19, [x17, 8]
ins v17.d[0], x18 ins v17.d[0], x20
ins v17.d[1], x19 ins v17.d[1], x19
ldpsw x18, x19, [x17, 16] ldpsw x20, x19, [x17, 16]
ins v18.d[0], x18 ins v18.d[0], x20
ins v18.d[1], x19 ins v18.d[1], x19
ldpsw x18, x19, [x17, 24] ldpsw x20, x19, [x17, 24]
ins v19.d[0], x18 ins v19.d[0], x20
ins v19.d[1], x19 ins v19.d[1], x19
scvtf v16.2d, v16.2d scvtf v16.2d, v16.2d
scvtf v17.2d, v17.2d scvtf v17.2d, v17.2d
@@ -240,17 +239,17 @@ DECL(randomx_program_aarch64_main_loop):
scvtf v19.2d, v19.2d scvtf v19.2d, v19.2d
# Load group E registers (spAddr1) # Load group E registers (spAddr1)
ldpsw x18, x19, [x17, 32] ldpsw x20, x19, [x17, 32]
ins v20.d[0], x18 ins v20.d[0], x20
ins v20.d[1], x19 ins v20.d[1], x19
ldpsw x18, x19, [x17, 40] ldpsw x20, x19, [x17, 40]
ins v21.d[0], x18 ins v21.d[0], x20
ins v21.d[1], x19 ins v21.d[1], x19
ldpsw x18, x19, [x17, 48] ldpsw x20, x19, [x17, 48]
ins v22.d[0], x18 ins v22.d[0], x20
ins v22.d[1], x19 ins v22.d[1], x19
ldpsw x18, x19, [x17, 56] ldpsw x20, x19, [x17, 56]
ins v23.d[0], x18 ins v23.d[0], x20
ins v23.d[1], x19 ins v23.d[1], x19
scvtf v20.2d, v20.2d scvtf v20.2d, v20.2d
scvtf v21.2d, v21.2d scvtf v21.2d, v21.2d
@@ -273,7 +272,6 @@ DECL(randomx_program_aarch64_vm_instructions):
literal_x0: .fill 1,8,0 literal_x0: .fill 1,8,0
literal_x11: .fill 1,8,0 literal_x11: .fill 1,8,0
literal_x20: .fill 1,8,0
literal_x21: .fill 1,8,0 literal_x21: .fill 1,8,0
literal_x22: .fill 1,8,0 literal_x22: .fill 1,8,0
literal_x23: .fill 1,8,0 literal_x23: .fill 1,8,0
@@ -309,17 +307,17 @@ DECL(randomx_program_aarch64_vm_instructions_end):
lsr x10, x9, 32 lsr x10, x9, 32
# mx ^= r[readReg2] ^ r[readReg3]; # mx ^= r[readReg2] ^ r[readReg3];
eor x9, x9, x18 eor x9, x9, x20
# Calculate dataset pointer for dataset prefetch # Calculate dataset pointer for dataset prefetch
mov w18, w9 mov w20, w9
DECL(randomx_program_aarch64_cacheline_align_mask1): DECL(randomx_program_aarch64_cacheline_align_mask1):
# Actual mask will be inserted by JIT compiler # Actual mask will be inserted by JIT compiler
and x18, x18, 1 and x20, x20, 1
add x18, x18, x1 add x20, x20, x1
# Prefetch dataset data # Prefetch dataset data
prfm pldl2strm, [x18] prfm pldl2strm, [x20]
# mx <-> ma # mx <-> ma
ror x9, x9, 32 ror x9, x9, 32
@@ -331,17 +329,17 @@ DECL(randomx_program_aarch64_cacheline_align_mask2):
DECL(randomx_program_aarch64_xor_with_dataset_line): DECL(randomx_program_aarch64_xor_with_dataset_line):
# xor integer registers with dataset data # xor integer registers with dataset data
ldp x18, x19, [x10] ldp x20, x19, [x10]
eor x4, x4, x18 eor x4, x4, x20
eor x5, x5, x19 eor x5, x5, x19
ldp x18, x19, [x10, 16] ldp x20, x19, [x10, 16]
eor x6, x6, x18 eor x6, x6, x20
eor x7, x7, x19 eor x7, x7, x19
ldp x18, x19, [x10, 32] ldp x20, x19, [x10, 32]
eor x12, x12, x18 eor x12, x12, x20
eor x13, x13, x19 eor x13, x13, x19
ldp x18, x19, [x10, 48] ldp x20, x19, [x10, 48]
eor x14, x14, x18 eor x14, x14, x20
eor x15, x15, x19 eor x15, x15, x19
DECL(randomx_program_aarch64_update_spMix1): DECL(randomx_program_aarch64_update_spMix1):
@@ -384,7 +382,7 @@ DECL(randomx_program_aarch64_update_spMix1):
# Restore callee-saved registers # Restore callee-saved registers
ldp x16, x17, [sp] ldp x16, x17, [sp]
ldp x18, x19, [sp, 16] ldr x19, [sp, 16]
ldp x20, x21, [sp, 32] ldp x20, x21, [sp, 32]
ldp x22, x23, [sp, 48] ldp x22, x23, [sp, 48]
ldp x24, x25, [sp, 64] ldp x24, x25, [sp, 64]
@@ -405,7 +403,7 @@ DECL(randomx_program_aarch64_vm_instructions_end_light):
stp x2, x30, [sp, 80] stp x2, x30, [sp, 80]
# mx ^= r[readReg2] ^ r[readReg3]; # mx ^= r[readReg2] ^ r[readReg3];
eor x9, x9, x18 eor x9, x9, x20
# mx <-> ma # mx <-> ma
ror x9, x9, 32 ror x9, x9, 32
@@ -447,8 +445,8 @@ DECL(randomx_program_aarch64_light_dataset_offset):
# x3 -> end item # x3 -> end item
DECL(randomx_init_dataset_aarch64): DECL(randomx_init_dataset_aarch64):
# Save x30 (return address) # Save x20 (used as temporary, but must be saved to not break ABI) and x30 (return address)
str x30, [sp, -16]! stp x20, x30, [sp, -16]!
# Load pointer to cache memory # Load pointer to cache memory
ldr x0, [x0] ldr x0, [x0]
@@ -460,8 +458,8 @@ DECL(randomx_init_dataset_aarch64_main_loop):
cmp x2, x3 cmp x2, x3
bne DECL(randomx_init_dataset_aarch64_main_loop) bne DECL(randomx_init_dataset_aarch64_main_loop)
# Restore x30 (return address) # Restore x20 and x30
ldr x30, [sp], 16 ldp x20, x30, [sp], 16
ret ret