I have found the solution, I needed to add the FRAME, .allocstack and .endprolog
So in the end it was some sort of alignment issue or the compile puts some additional data some ware else that were relevant.
Code:
; NTSTATUS Sbie_InvokeSyscall_asm(void* func, ULONG count, void* args);
Sbie_InvokeSyscall_asm PROC FRAME
; prolog
push rsi
.allocstack 8
push rdi
.allocstack 8
sub rsp, 98h ; 8 * 19 - prepare enough stack for up to 19 arguments
.allocstack 98h
.endprolog
; quick sanity check
cmp rdx, 13h ; if count > 19
jle arg_count_ok
mov rax, 0C000001Ch ; return STATUS_INVALID_SYSTEM_SERVICE
jmp func_return
arg_count_ok:
; save our 3 relevant arguments to spare registers
mov r11, r8 ; args
mov r10, rdx ; count
mov rax, rcx ; func
; check if we have higher arguments and if not skip
cmp r10, 4
jle copy_reg_args
; copy arguments 5-19
mov rsi, r11 ; source
add rsi, 20h
mov rdi, rsp ; destination
add rdi, 20h
mov rcx, r10 ; arg count
sub rcx, 4 ; skip the register passed args
rep movsq
copy_reg_args:
; copy arguments 1-4
mov r9, qword ptr [r11+18h]
mov r8, qword ptr [r11+10h]
mov rdx, qword ptr [r11+08h]
mov rcx, qword ptr [r11+00h]
; call the function
call rax
func_return:
; epilog
add rsp, 98h
pop rdi
pop rsi
ret
Sbie_InvokeSyscall_asm ENDP