mirror of https://github.com/sipwise/rtpengine.git
Change-Id: Ia7b47f7b32ca1042f9f32828da476ff5360a1c72pull/1692/head
parent
7247ef4027
commit
9fc1b547e3
@ -0,0 +1,41 @@
|
||||
#if defined(__linux__) && defined(__ELF__)
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
#endif
|
||||
|
||||
#if defined(__x86_64__)
|
||||
|
||||
.global s16_mix_in_avx2
|
||||
|
||||
.text
|
||||
|
||||
# 16 bits in 256 bits = 16 samples at a time
|
||||
s16_mix_in_avx2:
|
||||
mov %rdx, %rax
|
||||
and $-16, %al # 16 samples at a time
|
||||
xor %rcx, %rcx
|
||||
loop:
|
||||
cmp %rax, %rcx
|
||||
jge remainder
|
||||
vmovdqu (%rdi,%rcx,2), %ymm0 # 16-bit size
|
||||
vpaddsw (%rsi,%rcx,2), %ymm0, %ymm1
|
||||
vmovdqu %ymm1, (%rdi,%rcx,2) # 16-bit size
|
||||
add $16, %rcx # 16 samples at a time
|
||||
jmp loop
|
||||
remainder:
|
||||
xor %r8, %r8
|
||||
xor %r9, %r9
|
||||
cmp %rdx, %rcx
|
||||
jge done
|
||||
mov (%rsi,%rcx,2), %r8w # 16-bit size
|
||||
mov (%rdi,%rcx,2), %r9w # 16-bit size
|
||||
movd %r8, %xmm0
|
||||
movd %r9, %xmm1
|
||||
paddsw %xmm0, %xmm1
|
||||
movd %xmm1, %r8
|
||||
mov %r8w, (%rdi,%rcx,2) # 16-bit size
|
||||
inc %rcx
|
||||
jmp remainder
|
||||
done:
|
||||
ret
|
||||
|
||||
#endif
|
@ -0,0 +1,41 @@
|
||||
#if defined(__linux__) && defined(__ELF__)
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
#endif
|
||||
|
||||
#if defined(__x86_64__)
|
||||
|
||||
.global s16_mix_in_avx512
|
||||
|
||||
.text
|
||||
|
||||
# 16 bits in 512 bits = 32 samples at a time
|
||||
s16_mix_in_avx512:
|
||||
mov %rdx, %rax
|
||||
and $-32, %al # 32 samples at a time
|
||||
xor %rcx, %rcx
|
||||
loop:
|
||||
cmp %rax, %rcx
|
||||
jge remainder
|
||||
vmovdqu16 (%rdi,%rcx,2), %zmm0 # 16-bit size
|
||||
vpaddsw (%rsi,%rcx,2), %zmm0, %zmm1
|
||||
vmovdqu16 %zmm1, (%rdi,%rcx,2) # 16-bit size
|
||||
add $32, %rcx # 32 samples at a time
|
||||
jmp loop
|
||||
remainder:
|
||||
xor %r8, %r8
|
||||
xor %r9, %r9
|
||||
cmp %rdx, %rcx
|
||||
jge done
|
||||
mov (%rsi,%rcx,2), %r8w # 16-bit size
|
||||
mov (%rdi,%rcx,2), %r9w # 16-bit size
|
||||
movd %r8, %xmm0
|
||||
movd %r9, %xmm1
|
||||
paddsw %xmm0, %xmm1
|
||||
movd %xmm1, %r8
|
||||
mov %r8w, (%rdi,%rcx,2) # 16-bit size
|
||||
inc %rcx
|
||||
jmp remainder
|
||||
done:
|
||||
ret
|
||||
|
||||
#endif
|
@ -0,0 +1,42 @@
|
||||
#if defined(__linux__) && defined(__ELF__)
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
#endif
|
||||
|
||||
#if defined(__x86_64__)
|
||||
|
||||
.global s16_mix_in_sse2
|
||||
|
||||
.text
|
||||
|
||||
# 16 bits in 128 bits = 8 samples at a time
|
||||
s16_mix_in_sse2:
|
||||
mov %rdx, %rax
|
||||
and $-8, %al # 8 samples at a time
|
||||
xor %rcx, %rcx
|
||||
loop:
|
||||
cmp %rax, %rcx
|
||||
jge remainder
|
||||
movdqu (%rdi,%rcx,2), %xmm0 # 16-bit size
|
||||
movdqu (%rsi,%rcx,2), %xmm1 # 16-bit size
|
||||
paddsw %xmm0, %xmm1
|
||||
movdqu %xmm1, (%rdi,%rcx,2) # 16-bit size
|
||||
add $8, %rcx # 8 samples at a time
|
||||
jmp loop
|
||||
remainder:
|
||||
xor %r8, %r8
|
||||
xor %r9, %r9
|
||||
cmp %rdx, %rcx
|
||||
jge done
|
||||
mov (%rsi,%rcx,2), %r8w # 16-bit size
|
||||
mov (%rdi,%rcx,2), %r9w # 16-bit size
|
||||
movd %r8, %xmm0
|
||||
movd %r9, %xmm1
|
||||
paddsw %xmm0, %xmm1
|
||||
movd %xmm1, %r8
|
||||
mov %r8w, (%rdi,%rcx,2) # 16-bit size
|
||||
inc %rcx
|
||||
jmp remainder
|
||||
done:
|
||||
ret
|
||||
|
||||
#endif
|
Loading…
Reference in new issue