#if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits #endif #if defined(__x86_64__) .global s16_mix_in_avx512 .text # 16 bits in 512 bits = 32 samples at a time s16_mix_in_avx512: mov %rdx, %rax and $-32, %al # 32 samples at a time xor %rcx, %rcx loop: cmp %rax, %rcx jge remainder vmovdqu16 (%rdi,%rcx,2), %zmm0 # 16-bit size vpaddsw (%rsi,%rcx,2), %zmm0, %zmm1 vmovdqu16 %zmm1, (%rdi,%rcx,2) # 16-bit size add $32, %rcx # 32 samples at a time jmp loop remainder: xor %r8, %r8 xor %r9, %r9 cmp %rdx, %rcx jge done mov (%rsi,%rcx,2), %r8w # 16-bit size mov (%rdi,%rcx,2), %r9w # 16-bit size movd %r8, %xmm0 movd %r9, %xmm1 paddsw %xmm0, %xmm1 movd %xmm1, %r8 mov %r8w, (%rdi,%rcx,2) # 16-bit size inc %rcx jmp remainder done: ret #endif