global mov_load_x1 global mov_load_x2 global mov_load_x3 global mov_load_x4 section .text ; rsi - byte_count ; rdi - buffer mov_load_x1: mov rcx, rsi align 64 .loop: mov rax, [rdi] sub rcx, 1 jnle .loop ret ; rsi - byte_count ; rdi - buffer mov_load_x2: mov rcx, rsi align 64 .loop: mov rax, [rdi] mov rax, [rdi] sub rcx, 2 jnle .loop ret ; rsi - byte_count ; rdi - buffer mov_load_x3: mov rcx, rsi align 64 .loop: mov rax, [rdi] mov rax, [rdi] mov rax, [rdi] sub rcx, 3 jnle .loop ret ; rsi - byte_count ; rdi - buffer mov_load_x4: mov rcx, rsi align 64 .loop: mov rax, [rdi] mov rax, [rdi] mov rax, [rdi] mov rax, [rdi] sub rcx, 4 jnle .loop ret