You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
rtpengine/lib/mix_in_x64_avx2.S

42 lines
793 B

#if defined(__linux__) && defined(__ELF__)
.section .note.GNU-stack,"",%progbits
#endif
#if defined(__x86_64__)
.global s16_mix_in_avx2
.text
# 16 bits in 256 bits = 16 samples at a time
s16_mix_in_avx2:
mov %rdx, %rax
and $-16, %al # 16 samples at a time
xor %rcx, %rcx
loop:
cmp %rax, %rcx
jge remainder
vmovdqu (%rdi,%rcx,2), %ymm0 # 16-bit size
vpaddsw (%rsi,%rcx,2), %ymm0, %ymm1
vmovdqu %ymm1, (%rdi,%rcx,2) # 16-bit size
add $16, %rcx # 16 samples at a time
jmp loop
remainder:
xor %r8, %r8
xor %r9, %r9
cmp %rdx, %rcx
jge done
mov (%rsi,%rcx,2), %r8w # 16-bit size
mov (%rdi,%rcx,2), %r9w # 16-bit size
movd %r8, %xmm0
movd %r9, %xmm1
paddsw %xmm0, %xmm1
movd %xmm1, %r8
mov %r8w, (%rdi,%rcx,2) # 16-bit size
inc %rcx
jmp remainder
done:
ret
#endif