You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
rtpengine/lib/mix_in_x64_avx512bw.S

42 lines
801 B

#if defined(__linux__) && defined(__ELF__)
.section .note.GNU-stack,"",%progbits
#endif
#if defined(__x86_64__)
.global s16_mix_in_avx512
.text
# 16 bits in 512 bits = 32 samples at a time
s16_mix_in_avx512:
mov %rdx, %rax
and $-32, %al # 32 samples at a time
xor %rcx, %rcx
loop:
cmp %rax, %rcx
jge remainder
vmovdqu16 (%rdi,%rcx,2), %zmm0 # 16-bit size
vpaddsw (%rsi,%rcx,2), %zmm0, %zmm1
vmovdqu16 %zmm1, (%rdi,%rcx,2) # 16-bit size
add $32, %rcx # 32 samples at a time
jmp loop
remainder:
xor %r8, %r8
xor %r9, %r9
cmp %rdx, %rcx
jge done
mov (%rsi,%rcx,2), %r8w # 16-bit size
mov (%rdi,%rcx,2), %r9w # 16-bit size
movd %r8, %xmm0
movd %r9, %xmm1
paddsw %xmm0, %xmm1
movd %xmm1, %r8
mov %r8w, (%rdi,%rcx,2) # 16-bit size
inc %rcx
jmp remainder
done:
ret
#endif