Commit 0d3aeda3 authored by Tom Finegan's avatar Tom Finegan

Remove unused assembly sources and associated tests.

Change-Id: Ic8386743b1852ca1074528d04e2adc1d191b091b
parent dbfec2a8
......@@ -374,8 +374,6 @@ set(AOM_UNIT_TEST_SOURCES
#"${AOM_ROOT}/test/accounting_test.cc"
"${AOM_ROOT}/test/acm_random.h"
"${AOM_ROOT}/test/active_map_test.cc"
# not in test.mk
#"${AOM_ROOT}/test/add_noise_test.cc"
"${AOM_ROOT}/test/altref_test.cc"
"${AOM_ROOT}/test/android"
# requires CONFIG_ANS
......
/*
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
*
* This source code is subject to the terms of the BSD 2 Clause License and
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
* was not distributed with this source code in the LICENSE file, you can
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
* Media Patent License 1.0 was not distributed with this source code in the
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*
*/
#include <stdlib.h>
#include "aom/aom_integer.h"
const int16_t aom_rv[] = {
8, 5, 2, 2, 8, 12, 4, 9, 8, 3, 0, 3, 9, 0, 0, 0, 8, 3, 14,
4, 10, 1, 11, 14, 1, 14, 9, 6, 12, 11, 8, 6, 10, 0, 0, 8, 9, 0,
3, 14, 8, 11, 13, 4, 2, 9, 0, 3, 9, 6, 1, 2, 3, 14, 13, 1, 8,
2, 9, 7, 3, 3, 1, 13, 13, 6, 6, 5, 2, 7, 11, 9, 11, 8, 7, 3,
2, 0, 13, 13, 14, 4, 12, 5, 12, 10, 8, 10, 13, 10, 4, 14, 4, 10, 0,
8, 11, 1, 13, 7, 7, 14, 6, 14, 13, 2, 13, 5, 4, 4, 0, 10, 0, 5,
13, 2, 12, 7, 11, 13, 8, 0, 4, 10, 7, 2, 7, 2, 2, 5, 3, 4, 7,
3, 3, 14, 14, 5, 9, 13, 3, 14, 3, 6, 3, 0, 11, 8, 13, 1, 13, 1,
12, 0, 10, 9, 7, 6, 2, 8, 5, 2, 13, 7, 1, 13, 14, 7, 6, 7, 9,
6, 10, 11, 7, 8, 7, 5, 14, 8, 4, 4, 0, 8, 7, 10, 0, 8, 14, 11,
3, 12, 5, 7, 14, 3, 14, 5, 2, 6, 11, 12, 12, 8, 0, 11, 13, 1, 2,
0, 5, 10, 14, 7, 8, 0, 4, 11, 0, 8, 0, 3, 10, 5, 8, 0, 11, 6,
7, 8, 10, 7, 13, 9, 2, 5, 1, 5, 10, 2, 4, 3, 5, 6, 10, 8, 9,
4, 11, 14, 0, 10, 0, 5, 13, 2, 12, 7, 11, 13, 8, 0, 4, 10, 7, 2,
7, 2, 2, 5, 3, 4, 7, 3, 3, 14, 14, 5, 9, 13, 3, 14, 3, 6, 3,
0, 11, 8, 13, 1, 13, 1, 12, 0, 10, 9, 7, 6, 2, 8, 5, 2, 13, 7,
1, 13, 14, 7, 6, 7, 9, 6, 10, 11, 7, 8, 7, 5, 14, 8, 4, 4, 0,
8, 7, 10, 0, 8, 14, 11, 3, 12, 5, 7, 14, 3, 14, 5, 2, 6, 11, 12,
12, 8, 0, 11, 13, 1, 2, 0, 5, 10, 14, 7, 8, 0, 4, 11, 0, 8, 0,
3, 10, 5, 8, 0, 11, 6, 7, 8, 10, 7, 13, 9, 2, 5, 1, 5, 10, 2,
4, 3, 5, 6, 10, 8, 9, 4, 11, 14, 3, 8, 3, 7, 8, 5, 11, 4, 12,
3, 11, 9, 14, 8, 14, 13, 4, 3, 1, 2, 14, 6, 5, 4, 4, 11, 4, 6,
2, 1, 5, 8, 8, 12, 13, 5, 14, 10, 12, 13, 0, 9, 5, 5, 11, 10, 13,
9, 10, 13,
};
void aom_post_proc_down_and_across_mb_row_c(unsigned char *src_ptr,
unsigned char *dst_ptr,
int src_pixels_per_line,
int dst_pixels_per_line, int cols,
unsigned char *f, int size) {
unsigned char *p_src, *p_dst;
int row;
int col;
unsigned char v;
unsigned char d[4];
for (row = 0; row < size; row++) {
/* post_proc_down for one row */
p_src = src_ptr;
p_dst = dst_ptr;
for (col = 0; col < cols; col++) {
unsigned char p_above2 = p_src[col - 2 * src_pixels_per_line];
unsigned char p_above1 = p_src[col - src_pixels_per_line];
unsigned char p_below1 = p_src[col + src_pixels_per_line];
unsigned char p_below2 = p_src[col + 2 * src_pixels_per_line];
v = p_src[col];
if ((abs(v - p_above2) < f[col]) && (abs(v - p_above1) < f[col]) &&
(abs(v - p_below1) < f[col]) && (abs(v - p_below2) < f[col])) {
unsigned char k1, k2, k3;
k1 = (p_above2 + p_above1 + 1) >> 1;
k2 = (p_below2 + p_below1 + 1) >> 1;
k3 = (k1 + k2 + 1) >> 1;
v = (k3 + v + 1) >> 1;
}
p_dst[col] = v;
}
/* now post_proc_across */
p_src = dst_ptr;
p_dst = dst_ptr;
p_src[-2] = p_src[-1] = p_src[0];
p_src[cols] = p_src[cols + 1] = p_src[cols - 1];
for (col = 0; col < cols; col++) {
v = p_src[col];
if ((abs(v - p_src[col - 2]) < f[col]) &&
(abs(v - p_src[col - 1]) < f[col]) &&
(abs(v - p_src[col + 1]) < f[col]) &&
(abs(v - p_src[col + 2]) < f[col])) {
unsigned char k1, k2, k3;
k1 = (p_src[col - 2] + p_src[col - 1] + 1) >> 1;
k2 = (p_src[col + 2] + p_src[col + 1] + 1) >> 1;
k3 = (k1 + k2 + 1) >> 1;
v = (k3 + v + 1) >> 1;
}
d[col & 3] = v;
if (col >= 2) p_dst[col - 2] = d[(col - 2) & 3];
}
/* handle the last two pixels */
p_dst[col - 2] = d[(col - 2) & 3];
p_dst[col - 1] = d[(col - 1) & 3];
/* next row */
src_ptr += src_pixels_per_line;
dst_ptr += dst_pixels_per_line;
}
}
void aom_mbpost_proc_across_ip_c(unsigned char *src, int pitch, int rows,
int cols, int flimit) {
int r, c, i;
unsigned char *s = src;
unsigned char d[16];
for (r = 0; r < rows; r++) {
int sumsq = 0;
int sum = 0;
for (i = -8; i < 0; i++) s[i] = s[0];
/* 17 avoids valgrind warning - we buffer values in c in d
* and only write them when we've read 8 ahead...
*/
for (i = 0; i < 17; i++) s[i + cols] = s[cols - 1];
for (i = -8; i <= 6; i++) {
sumsq += s[i] * s[i];
sum += s[i];
d[i + 8] = 0;
}
for (c = 0; c < cols + 8; c++) {
int x = s[c + 7] - s[c - 8];
int y = s[c + 7] + s[c - 8];
sum += x;
sumsq += x * y;
d[c & 15] = s[c];
if (sumsq * 15 - sum * sum < flimit) {
d[c & 15] = (8 + sum + s[c]) >> 4;
}
s[c - 8] = d[(c - 8) & 15];
}
s += pitch;
}
}
void aom_mbpost_proc_down_c(unsigned char *dst, int pitch, int rows, int cols,
int flimit) {
int r, c, i;
const int16_t *rv3 = &aom_rv[63 & rand()];
for (c = 0; c < cols; c++) {
unsigned char *s = &dst[c];
int sumsq = 0;
int sum = 0;
unsigned char d[16];
const int16_t *rv2 = rv3 + ((c * 17) & 127);
for (i = -8; i < 0; i++) s[i * pitch] = s[0];
/* 17 avoids valgrind warning - we buffer values in c in d
* and only write them when we've read 8 ahead...
*/
for (i = 0; i < 17; i++) s[(i + rows) * pitch] = s[(rows - 1) * pitch];
for (i = -8; i <= 6; i++) {
sumsq += s[i * pitch] * s[i * pitch];
sum += s[i * pitch];
}
for (r = 0; r < rows + 8; r++) {
sumsq += s[7 * pitch] * s[7 * pitch] - s[-8 * pitch] * s[-8 * pitch];
sum += s[7 * pitch] - s[-8 * pitch];
d[r & 15] = s[0];
if (sumsq * 15 - sum * sum < flimit) {
d[r & 15] = (rv2[r & 127] + sum + s[0]) >> 4;
}
if (r >= 8) s[-8 * pitch] = d[(r - 8) & 15];
s += pitch;
}
}
}
This diff is collapsed.
;
; Copyright (c) 2016, Alliance for Open Media. All rights reserved
;
; This source code is subject to the terms of the BSD 2 Clause License and
; the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
; was not distributed with this source code in the LICENSE file, you can
; obtain it at www.aomedia.org/license/software. If the Alliance for Open
; Media Patent License 1.0 was not distributed with this source code in the
; PATENTS file, you can obtain it at www.aomedia.org/license/patent.
;
%include "aom_ports/x86_abi_support.asm"
;void aom_plane_add_noise_sse2(unsigned char *start, unsigned char *noise,
; unsigned char blackclamp[16],
; unsigned char whiteclamp[16],
; unsigned char bothclamp[16],
; unsigned int width, unsigned int height,
; int pitch)
global sym(aom_plane_add_noise_sse2) PRIVATE
sym(aom_plane_add_noise_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 8
GET_GOT rbx
push rsi
push rdi
; end prolog
; get the clamps in registers
mov rdx, arg(2) ; blackclamp
movdqu xmm3, [rdx]
mov rdx, arg(3) ; whiteclamp
movdqu xmm4, [rdx]
mov rdx, arg(4) ; bothclamp
movdqu xmm5, [rdx]
.addnoise_loop:
call sym(LIBAOM_RAND) WRT_PLT
mov rcx, arg(1) ;noise
and rax, 0xff
add rcx, rax
mov rdi, rcx
movsxd rcx, dword arg(5) ;[Width]
mov rsi, arg(0) ;Pos
xor rax,rax
.addnoise_nextset:
movdqu xmm1,[rsi+rax] ; get the source
psubusb xmm1, xmm3 ; subtract black clamp
paddusb xmm1, xmm5 ; add both clamp
psubusb xmm1, xmm4 ; subtract whiteclamp
movdqu xmm2,[rdi+rax] ; get the noise for this line
paddb xmm1,xmm2 ; add it in
movdqu [rsi+rax],xmm1 ; store the result
add rax,16 ; move to the next line
cmp rax, rcx
jl .addnoise_nextset
movsxd rax, dword arg(7) ; Pitch
add arg(0), rax ; Start += Pitch
sub dword arg(6), 1 ; Height -= 1
jg .addnoise_loop
; begin epilog
pop rdi
pop rsi
RESTORE_GOT
UNSHADOW_ARGS
pop rbp
ret
SECTION_RODATA
align 16
rd42:
times 8 dw 0x04
four8s:
times 4 dd 8
;
; Copyright (c) 2016, Alliance for Open Media. All rights reserved
;
; This source code is subject to the terms of the BSD 2 Clause License and
; the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
; was not distributed with this source code in the LICENSE file, you can
; obtain it at www.aomedia.org/license/software. If the Alliance for Open
; Media Patent License 1.0 was not distributed with this source code in the
; PATENTS file, you can obtain it at www.aomedia.org/license/patent.
;
%include "aom_ports/x86_abi_support.asm"
;macro in deblock functions
%macro FIRST_2_ROWS 0
movdqa xmm4, xmm0
movdqa xmm6, xmm0
movdqa xmm5, xmm1
pavgb xmm5, xmm3
;calculate absolute value
psubusb xmm4, xmm1
psubusb xmm1, xmm0
psubusb xmm6, xmm3
psubusb xmm3, xmm0
paddusb xmm4, xmm1
paddusb xmm6, xmm3
;get threshold
movdqa xmm2, flimit
pxor xmm1, xmm1
movdqa xmm7, xmm2
;get mask
psubusb xmm2, xmm4
psubusb xmm7, xmm6
pcmpeqb xmm2, xmm1
pcmpeqb xmm7, xmm1
por xmm7, xmm2
%endmacro
%macro SECOND_2_ROWS 0
movdqa xmm6, xmm0
movdqa xmm4, xmm0
movdqa xmm2, xmm1
pavgb xmm1, xmm3
;calculate absolute value
psubusb xmm6, xmm2
psubusb xmm2, xmm0
psubusb xmm4, xmm3
psubusb xmm3, xmm0
paddusb xmm6, xmm2
paddusb xmm4, xmm3
pavgb xmm5, xmm1
;get threshold
movdqa xmm2, flimit
pxor xmm1, xmm1
movdqa xmm3, xmm2
;get mask
psubusb xmm2, xmm6
psubusb xmm3, xmm4
pcmpeqb xmm2, xmm1
pcmpeqb xmm3, xmm1
por xmm7, xmm2
por xmm7, xmm3
pavgb xmm5, xmm0
;decide if or not to use filtered value
pand xmm0, xmm7
pandn xmm7, xmm5
paddusb xmm0, xmm7
%endmacro
%macro UPDATE_FLIMIT 0
movdqa xmm2, XMMWORD PTR [rbx]
movdqa [rsp], xmm2
add rbx, 16
%endmacro
;void aom_post_proc_down_and_across_mb_row_sse2
;(
; unsigned char *src_ptr,
; unsigned char *dst_ptr,
; int src_pixels_per_line,
; int dst_pixels_per_line,
; int cols,
; int *flimits,
; int size
;)
global sym(aom_post_proc_down_and_across_mb_row_sse2) PRIVATE
sym(aom_post_proc_down_and_across_mb_row_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 7
SAVE_XMM 7
push rbx
push rsi
push rdi
; end prolog
ALIGN_STACK 16, rax
sub rsp, 16
; put flimit on stack
mov rbx, arg(5) ;flimits ptr
UPDATE_FLIMIT
%define flimit [rsp]
mov rsi, arg(0) ;src_ptr
mov rdi, arg(1) ;dst_ptr
movsxd rax, DWORD PTR arg(2) ;src_pixels_per_line
movsxd rcx, DWORD PTR arg(6) ;rows in a macroblock
.nextrow:
xor rdx, rdx ;col
.nextcol:
;load current and next 2 rows
movdqu xmm0, XMMWORD PTR [rsi]
movdqu xmm1, XMMWORD PTR [rsi + rax]
movdqu xmm3, XMMWORD PTR [rsi + 2*rax]
FIRST_2_ROWS
;load above 2 rows
neg rax
movdqu xmm1, XMMWORD PTR [rsi + 2*rax]
movdqu xmm3, XMMWORD PTR [rsi + rax]
SECOND_2_ROWS
movdqu XMMWORD PTR [rdi], xmm0
neg rax ; positive stride
add rsi, 16
add rdi, 16
add rdx, 16
cmp edx, dword arg(4) ;cols
jge .downdone
UPDATE_FLIMIT
jmp .nextcol
.downdone:
; done with the all cols, start the across filtering in place
sub rsi, rdx
sub rdi, rdx
mov rbx, arg(5) ; flimits
UPDATE_FLIMIT
; dup the first byte into the left border 8 times
movq mm1, [rdi]
punpcklbw mm1, mm1
punpcklwd mm1, mm1
punpckldq mm1, mm1
mov rdx, -8
movq [rdi+rdx], mm1
; dup the last byte into the right border
movsxd rdx, dword arg(4)
movq mm1, [rdi + rdx + -1]
punpcklbw mm1, mm1
punpcklwd mm1, mm1
punpckldq mm1, mm1
movq [rdi+rdx], mm1
xor rdx, rdx
movq mm0, QWORD PTR [rdi-16];
movq mm1, QWORD PTR [rdi-8];
.acrossnextcol:
movdqu xmm0, XMMWORD PTR [rdi + rdx]
movdqu xmm1, XMMWORD PTR [rdi + rdx -2]
movdqu xmm3, XMMWORD PTR [rdi + rdx -1]
FIRST_2_ROWS
movdqu xmm1, XMMWORD PTR [rdi + rdx +1]
movdqu xmm3, XMMWORD PTR [rdi + rdx +2]
SECOND_2_ROWS
movq QWORD PTR [rdi+rdx-16], mm0 ; store previous 8 bytes
movq QWORD PTR [rdi+rdx-8], mm1 ; store previous 8 bytes
movdq2q mm0, xmm0
psrldq xmm0, 8
movdq2q mm1, xmm0
add rdx, 16
cmp edx, dword arg(4) ;cols
jge .acrossdone
UPDATE_FLIMIT
jmp .acrossnextcol
.acrossdone:
; last 16 pixels
movq QWORD PTR [rdi+rdx-16], mm0
cmp edx, dword arg(4)
jne .throw_last_8
movq QWORD PTR [rdi+rdx-8], mm1
.throw_last_8:
; done with this rwo
add rsi,rax ;next src line
mov eax, dword arg(3) ;dst_pixels_per_line
add rdi,rax ;next destination
mov eax, dword arg(2) ;src_pixels_per_line
mov rbx, arg(5) ;flimits
UPDATE_FLIMIT
dec rcx ;decrement count
jnz .nextrow ;next row
add rsp, 16
pop rsp
; begin epilog
pop rdi
pop rsi
pop rbx
RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
%undef flimit
;void aom_mbpost_proc_down_xmm(unsigned char *dst,
; int pitch, int rows, int cols,int flimit)
extern sym(aom_rv)
global sym(aom_mbpost_proc_down_xmm) PRIVATE
sym(aom_mbpost_proc_down_xmm):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 5
SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
; end prolog
ALIGN_STACK 16, rax
sub rsp, 128+16
; unsigned char d[16][8] at [rsp]
; create flimit2 at [rsp+128]
mov eax, dword ptr arg(4) ;flimit
mov [rsp+128], eax
mov [rsp+128+4], eax
mov [rsp+128+8], eax
mov [rsp+128+12], eax
%define flimit4 [rsp+128]
%if ABI_IS_32BIT=0
lea r8, [GLOBAL(sym(aom_rv))]
%endif
;rows +=8;
add dword arg(2), 8
;for(c=0; c<cols; c+=8)
.loop_col:
mov rsi, arg(0) ; s
pxor xmm0, xmm0 ;
movsxd rax, dword ptr arg(1) ;pitch ;
; this copies the last row down into the border 8 rows
mov rdi, rsi
mov rdx, arg(2)
sub rdx, 9
imul rdx, rax
lea rdi, [rdi+rdx]
movq xmm1, QWORD ptr[rdi] ; first row
mov rcx, 8
.init_borderd: ; initialize borders
lea rdi, [rdi + rax]
movq [rdi], xmm1
dec rcx
jne .init_borderd
neg rax ; rax = -pitch
; this copies the first row up into the border 8 rows
mov rdi, rsi
movq xmm1, QWORD ptr[rdi] ; first row
mov rcx, 8
.init_border: ; initialize borders
lea rdi, [rdi + rax]
movq [rdi], xmm1
dec rcx
jne .init_border
lea rsi, [rsi + rax*8]; ; rdi = s[-pitch*8]
neg rax
pxor xmm5, xmm5
pxor xmm6, xmm6 ;
pxor xmm7, xmm7 ;
mov rdi, rsi
mov rcx, 15 ;