C++ trashing my Inline assembly registers before I read them
Posted: Tue Jan 19, 2021 4:40 pm
I have the following system call to return up to a 88 byte long string:
I'm using just about all of the registers (rcx and r11 get lost on syscalls, so rsp and rbp are the only ones that survive.) Here's my C++ code to call my syscall:
This returns junk (my terminal renders it as "??") even though if I print the registers from my kernel they contain the string.
Here's the disassembly from `objdump -drwCS -Mintel ...`:
I see that at 9e3, after the syscall, we attempt to override rdx, before reading it at 9f3. Same with rsi at 9e8. I was assuming the compiler would put rdx on the stack before overwriting it so I can read the return value from my syscall later.
Is there a way I can perform this syscall in inlined assembly, or do I need to write the syscall calling code completely in assembly?
Code: Select all
## Get name of process
### Input
* `rdi` - 29
* `rax` - The ID of the process.
### Output
* `rdi` - Was the process found?
* `rax` - Char 0-7.
* `rbx` - Char 8-15.
* `rdx` - Char 16-23.
* `rsi` - Char 24-31.
* `r8` - Char 32-39.
* `r9` - Char 40-47.
* `r10` - Char 48-55.
* `r12` - Char 56-63.
* `r13` - Char 64-71.
* `r14` - Char 72-79.
* `r15` - Char 80-87.
Code: Select all
// Returns the name of a process, or an empty string if the process doesn't exist.
std::string GetProcessName(ProcessId pid) {
volatile register size_t syscall asm ("rdi") = 29;
volatile register size_t pid_r asm ("rax") = pid;
volatile register size_t was_process_found asm ("rdi");
volatile register size_t name_1 asm ("rax");
volatile register size_t name_2 asm ("rbx");
volatile register size_t name_3 asm ("rdx");
volatile register size_t name_4 asm ("rsi");
volatile register size_t name_5 asm ("r8");
volatile register size_t name_6 asm ("r9");
volatile register size_t name_7 asm ("r10");
volatile register size_t name_8 asm ("r12");
volatile register size_t name_9 asm ("r13");
volatile register size_t name_10 asm ("r14");
volatile register size_t name_11 asm ("r15");
__asm__ __volatile__ ("syscall\n":"=r"(was_process_found),"=r"(name_1),
"=r"(name_2),"=r"(name_3),"=r"(name_4),"=r"(name_5),"=r"(name_6),
"=r"(name_7),"=r"(name_8),"=r"(name_9),"=r"(name_10),"=r"(name_11):
"r"(syscall), "r"(pid_r):
"rcx", "r11");
if (!was_process_found) return "";
// Copy the string out of the registers into a char array. Add an extra
// byte for the null terminator.
char process_name[kMaximumProcessNameLength + 1];
memset(process_name, 0, kMaximumProcessNameLength + 1);
((size_t*)process_name)[0] = name_1;
((size_t*)process_name)[1] = name_2;
((size_t*)process_name)[2] = name_3;
((size_t*)process_name)[3] = name_4;
((size_t*)process_name)[4] = name_5;
((size_t*)process_name)[5] = name_6;
((size_t*)process_name)[6] = name_7;
((size_t*)process_name)[7] = name_8;
((size_t*)process_name)[8] = name_9;
((size_t*)process_name)[9] = name_10;
((size_t*)process_name)[10] = name_11;
return std::string(process_name);
}
Here's the disassembly from `objdump -drwCS -Mintel ...`:
Code: Select all
00000000000009a0 <perception::GetProcessName(unsigned long)>:
std::string GetProcessName(ProcessId pid) {
9a0: 41 57 push r15
9a2: 48 89 f0 mov rax,rsi
9a5: 41 56 push r14
9a7: 41 55 push r13
9a9: 41 54 push r12
9ab: 55 push rbp
9ac: 48 89 fd mov rbp,rdi
volatile register size_t syscall asm ("rdi") = 29;
9af: bf 1d 00 00 00 mov edi,0x1d
std::string GetProcessName(ProcessId pid) {
9b4: 53 push rbx
9b5: 48 83 ec 68 sub rsp,0x68
"rcx", "r11");
9b9: 0f 05 syscall
if (!was_process_found) return "";
9bb: 48 85 ff test rdi,rdi
9be: 75 20 jne 9e0 <perception::GetProcessName(unsigned long)+0x40>
{__r_.first().__s.__size_ = (unsigned char)(__s << 1);}
9c0: 31 c0 xor eax,eax
9c2: 66 89 45 00 mov WORD PTR [rbp+0x0],ax
return std::string(process_name);
}
9c6: 48 83 c4 68 add rsp,0x68
9ca: 48 89 e8 mov rax,rbp
9cd: 5b pop rbx
9ce: 5d pop rbp
9cf: 41 5c pop r12
9d1: 41 5d pop r13
9d3: 41 5e pop r14
9d5: 41 5f pop r15
9d7: c3 ret
9d8: 0f 1f 84 00 00 00 00 00 nop DWORD PTR [rax+rax*1+0x0]
memset(process_name, 0, kMaximumProcessNameLength + 1);
9e0: 49 89 e3 mov r11,rsp
9e3: ba 59 00 00 00 mov edx,0x59
9e8: 31 f6 xor esi,esi
9ea: 4c 89 df mov rdi,r11
9ed: e8 39 3c 03 00 call 3462b <memset>
((size_t*)process_name)[7] = name_8;
9f2: 4c 89 64 24 38 mov QWORD PTR [rsp+0x38],r12
length(const char_type* __s) _NOEXCEPT {return __builtin_strlen(__s);}
9f7: 49 89 e3 mov r11,rsp
((size_t*)process_name)[0] = name_1;
9fa: 48 89 04 24 mov QWORD PTR [rsp],rax
((size_t*)process_name)[2] = name_3;
9fe: 48 89 54 24 10 mov QWORD PTR [rsp+0x10],rdx
((size_t*)process_name)[3] = name_4;
a03: 48 89 74 24 18 mov QWORD PTR [rsp+0x18],rsi
((size_t*)process_name)[4] = name_5;
a08: 4c 89 44 24 20 mov QWORD PTR [rsp+0x20],r8
((size_t*)process_name)[5] = name_6;
a0d: 4c 89 4c 24 28 mov QWORD PTR [rsp+0x28],r9
((size_t*)process_name)[6] = name_7;
a12: 4c 89 54 24 30 mov QWORD PTR [rsp+0x30],r10
((size_t*)process_name)[8] = name_9;
a17: 4c 89 6c 24 40 mov QWORD PTR [rsp+0x40],r13
((size_t*)process_name)[9] = name_10;
a1c: 4c 89 74 24 48 mov QWORD PTR [rsp+0x48],r14
((size_t*)process_name)[10] = name_11;
a21: 4c 89 7c 24 50 mov QWORD PTR [rsp+0x50],r15
((size_t*)process_name)[1] = name_2;
a26: 48 89 5c 24 08 mov QWORD PTR [rsp+0x8],rbx
a2b: 4c 89 db mov rbx,r11
a2e: 8b 13 mov edx,DWORD PTR [rbx]
a30: 48 83 c3 04 add rbx,0x4
a34: 8d 82 ff fe fe fe lea eax,[rdx-0x1010101]
a3a: f7 d2 not edx
a3c: 21 d0 and eax,edx
a3e: 25 80 80 80 80 and eax,0x80808080
a43: 74 e9 je a2e <perception::GetProcessName(unsigned long)+0x8e>
a45: 89 c2 mov edx,eax
a47: c1 ea 10 shr edx,0x10
a4a: a9 80 80 00 00 test eax,0x8080
a4f: 0f 44 c2 cmove eax,edx
a52: 48 8d 53 02 lea rdx,[rbx+0x2]
a56: 48 0f 44 da cmove rbx,rdx
a5a: 89 c1 mov ecx,eax
a5c: 00 c1 add cl,al
a5e: 48 83 db 03 sbb rbx,0x3
a62: 4c 29 db sub rbx,r11
if (__sz < __min_cap)
a65: 48 83 fb 16 cmp rbx,0x16
a69: 77 1d ja a88 <perception::GetProcessName(unsigned long)+0xe8>
{__r_.first().__s.__size_ = (unsigned char)(__s << 1);}
a6b: 8d 04 1b lea eax,[rbx+rbx*1]
{return pointer_traits<pointer>::pointer_to(__r_.first().__s.__data_[0]);}
a6e: 4c 8d 65 01 lea r12,[rbp+0x1]
{__r_.first().__s.__size_ = (unsigned char)(__s << 1);}
a72: 88 45 00 mov BYTE PTR [rbp+0x0],al
? __copy_constexpr(__s1, __s2, __n)
a75: 48 85 db test rbx,rbx
a78: 75 34 jne aae <perception::GetProcessName(unsigned long)+0x10e>
void assign(char_type& __c1, const char_type& __c2) _NOEXCEPT {__c1 = __c2;}
a7a: 41 c6 04 1c 00 mov BYTE PTR [r12+rbx*1],0x0
__set_long_cap(__cap+1);
__set_long_size(__sz);
}
traits_type::copy(_VSTD::__to_address(__p), __s, __sz);
traits_type::assign(__p[__sz], value_type());
}
a7f: e9 42 ff ff ff jmp 9c6 <perception::GetProcessName(unsigned long)+0x26>
a84: 0f 1f 40 00 nop DWORD PTR [rax+0x0]
{return (__s + (__a-1)) & ~(__a-1);}
a88: 4c 8d 6b 10 lea r13,[rbx+0x10]
a8c: 49 83 e5 f0 and r13,0xfffffffffffffff0
a90: 4c 89 ef mov rdi,r13
{__r_.first().__l.__cap_ = __long_mask | __s;}
a93: 49 83 cd 01 or r13,0x1
a97: e8 84 f9 ff ff call 420 <operator new(unsigned long)>
a9c: 4c 89 6d 00 mov QWORD PTR [rbp+0x0],r13
{__r_.first().__l.__size_ = __s;}
aa0: 49 89 e3 mov r11,rsp
{__r_.first().__l.__data_ = __p;}
aa3: 48 89 45 10 mov QWORD PTR [rbp+0x10],rax
aa7: 49 89 c4 mov r12,rax
{__r_.first().__l.__size_ = __s;}
aaa: 48 89 5d 08 mov QWORD PTR [rbp+0x8],rbx
: __n == 0 ? __s1 : (char_type*)memcpy(__s1, __s2, __n);
aae: 48 89 da mov rdx,rbx
ab1: 4c 89 de mov rsi,r11
ab4: 4c 89 e7 mov rdi,r12
ab7: e8 18 3b 03 00 call 345d4 <memcpy>
void assign(char_type& __c1, const char_type& __c2) _NOEXCEPT {__c1 = __c2;}
abc: 41 c6 04 1c 00 mov BYTE PTR [r12+rbx*1],0x0
ac1: e9 00 ff ff ff jmp 9c6 <perception::GetProcessName(unsigned long)+0x26>
ac6: 66 2e 0f 1f 84 00 00 00 00 00 nop WORD PTR cs:[rax+rax*1+0x0]
Is there a way I can perform this syscall in inlined assembly, or do I need to write the syscall calling code completely in assembly?