Commit be111b38 authored by Ralph Giles's avatar Ralph Giles Committed by Ralph Giles

Remove armv6 media-extension assembly.

Libvpx dropped armv6 support sometime after the aom fork.

We don't intend to support this platform, which is likely
too slow in any case. Remove the assembly and intrinsics
optimized routines, their tests, cpu feature detection,
and rtcd specialization for this instruction set extension.

Change-Id: If44ec28e5ddafc6af179c5d1982ac7e81fe54d5e
parent 70d9acc1
......@@ -324,7 +324,6 @@ ifeq ($(CONFIG_ENCODERS),yes)
DSP_SRCS-yes += sad.c
DSP_SRCS-yes += subtract.c
DSP_SRCS-$(HAVE_MEDIA) += arm/sad_media$(ASM)
DSP_SRCS-$(HAVE_NEON) += arm/sad4d_neon.c
DSP_SRCS-$(HAVE_NEON) += arm/sad_neon.c
DSP_SRCS-$(HAVE_NEON) += arm/subtract_neon.c
......@@ -373,12 +372,6 @@ ifneq ($(filter yes,$(CONFIG_ENCODERS)),)
DSP_SRCS-yes += variance.c
DSP_SRCS-yes += variance.h
DSP_SRCS-$(HAVE_MEDIA) += arm/bilinear_filter_media$(ASM)
DSP_SRCS-$(HAVE_MEDIA) += arm/subpel_variance_media.c
DSP_SRCS-$(HAVE_MEDIA) += arm/variance_halfpixvar16x16_h_media$(ASM)
DSP_SRCS-$(HAVE_MEDIA) += arm/variance_halfpixvar16x16_hv_media$(ASM)
DSP_SRCS-$(HAVE_MEDIA) += arm/variance_halfpixvar16x16_v_media$(ASM)
DSP_SRCS-$(HAVE_MEDIA) += arm/variance_media$(ASM)
DSP_SRCS-$(HAVE_NEON) += arm/subpel_variance_neon.c
DSP_SRCS-$(HAVE_NEON) += arm/variance_neon.c
......
......@@ -657,22 +657,22 @@ foreach (@block_sizes) {
add_proto qw/unsigned int/, "aom_sad${w}x${h}_avg", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
}
specialize qw/aom_sad128x128 avx2 sse2/;
specialize qw/aom_sad128x64 avx2 sse2/;
specialize qw/aom_sad64x128 avx2 sse2/;
specialize qw/aom_sad64x64 avx2 neon msa sse2/;
specialize qw/aom_sad64x32 avx2 msa sse2/;
specialize qw/aom_sad32x64 avx2 msa sse2/;
specialize qw/aom_sad32x32 avx2 neon msa sse2/;
specialize qw/aom_sad32x16 avx2 msa sse2/;
specialize qw/aom_sad16x32 msa sse2/;
specialize qw/aom_sad16x16 media neon msa sse2/;
specialize qw/aom_sad16x8 neon msa sse2/;
specialize qw/aom_sad8x16 neon msa sse2/;
specialize qw/aom_sad8x8 neon msa sse2/;
specialize qw/aom_sad8x4 msa sse2/;
specialize qw/aom_sad4x8 msa sse2/;
specialize qw/aom_sad4x4 neon msa sse2/;
specialize qw/aom_sad128x128 avx2 sse2/;
specialize qw/aom_sad128x64 avx2 sse2/;
specialize qw/aom_sad64x128 avx2 sse2/;
specialize qw/aom_sad64x64 avx2 neon msa sse2/;
specialize qw/aom_sad64x32 avx2 msa sse2/;
specialize qw/aom_sad32x64 avx2 msa sse2/;
specialize qw/aom_sad32x32 avx2 neon msa sse2/;
specialize qw/aom_sad32x16 avx2 msa sse2/;
specialize qw/aom_sad16x32 msa sse2/;
specialize qw/aom_sad16x16 neon msa sse2/;
specialize qw/aom_sad16x8 neon msa sse2/;
specialize qw/aom_sad8x16 neon msa sse2/;
specialize qw/aom_sad8x8 neon msa sse2/;
specialize qw/aom_sad8x4 msa sse2/;
specialize qw/aom_sad4x8 msa sse2/;
specialize qw/aom_sad4x4 neon msa sse2/;
specialize qw/aom_sad128x128_avg avx2 sse2/;
specialize qw/aom_sad128x64_avg avx2 sse2/;
......@@ -901,10 +901,10 @@ add_proto qw/unsigned int aom_mse16x8/, "const uint8_t *src_ptr, int source_str
add_proto qw/unsigned int aom_mse8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
add_proto qw/unsigned int aom_mse8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
specialize qw/aom_mse16x16 sse2 avx2 media neon msa/;
specialize qw/aom_mse16x8 sse2 msa/;
specialize qw/aom_mse8x16 sse2 msa/;
specialize qw/aom_mse8x8 sse2 msa/;
specialize qw/aom_mse16x16 sse2 avx2 neon msa/;
specialize qw/aom_mse16x8 sse2 msa/;
specialize qw/aom_mse8x16 sse2 msa/;
specialize qw/aom_mse8x8 sse2 msa/;
if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
foreach $bd (8, 10, 12) {
......@@ -961,33 +961,33 @@ foreach (@block_sizes) {
add_proto qw/uint32_t/, "aom_sub_pixel_avg_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
}
specialize qw/aom_variance64x64 sse2 avx2 neon msa/;
specialize qw/aom_variance64x32 sse2 avx2 neon msa/;
specialize qw/aom_variance32x64 sse2 neon msa/;
specialize qw/aom_variance32x32 sse2 avx2 neon msa/;
specialize qw/aom_variance32x16 sse2 avx2 msa/;
specialize qw/aom_variance16x32 sse2 msa/;
specialize qw/aom_variance16x16 sse2 avx2 media neon msa/;
specialize qw/aom_variance16x8 sse2 neon msa/;
specialize qw/aom_variance8x16 sse2 neon msa/;
specialize qw/aom_variance8x8 sse2 media neon msa/;
specialize qw/aom_variance8x4 sse2 msa/;
specialize qw/aom_variance4x8 sse2 msa/;
specialize qw/aom_variance4x4 sse2 msa/;
specialize qw/aom_sub_pixel_variance64x64 avx2 neon msa sse2 ssse3/;
specialize qw/aom_sub_pixel_variance64x32 msa sse2 ssse3/;
specialize qw/aom_sub_pixel_variance32x64 msa sse2 ssse3/;
specialize qw/aom_sub_pixel_variance32x32 avx2 neon msa sse2 ssse3/;
specialize qw/aom_sub_pixel_variance32x16 msa sse2 ssse3/;
specialize qw/aom_sub_pixel_variance16x32 msa sse2 ssse3/;
specialize qw/aom_sub_pixel_variance16x16 media neon msa sse2 ssse3/;
specialize qw/aom_sub_pixel_variance16x8 msa sse2 ssse3/;
specialize qw/aom_sub_pixel_variance8x16 msa sse2 ssse3/;
specialize qw/aom_sub_pixel_variance8x8 media neon msa sse2 ssse3/;
specialize qw/aom_sub_pixel_variance8x4 msa sse2 ssse3/;
specialize qw/aom_sub_pixel_variance4x8 msa sse2 ssse3/;
specialize qw/aom_sub_pixel_variance4x4 msa sse2 ssse3/;
specialize qw/aom_variance64x64 sse2 avx2 neon msa/;
specialize qw/aom_variance64x32 sse2 avx2 neon msa/;
specialize qw/aom_variance32x64 sse2 neon msa/;
specialize qw/aom_variance32x32 sse2 avx2 neon msa/;
specialize qw/aom_variance32x16 sse2 avx2 msa/;
specialize qw/aom_variance16x32 sse2 msa/;
specialize qw/aom_variance16x16 sse2 avx2 neon msa/;
specialize qw/aom_variance16x8 sse2 neon msa/;
specialize qw/aom_variance8x16 sse2 neon msa/;
specialize qw/aom_variance8x8 sse2 neon msa/;
specialize qw/aom_variance8x4 sse2 msa/;
specialize qw/aom_variance4x8 sse2 msa/;
specialize qw/aom_variance4x4 sse2 msa/;
specialize qw/aom_sub_pixel_variance64x64 avx2 neon msa sse2 ssse3/;
specialize qw/aom_sub_pixel_variance64x32 msa sse2 ssse3/;
specialize qw/aom_sub_pixel_variance32x64 msa sse2 ssse3/;
specialize qw/aom_sub_pixel_variance32x32 avx2 neon msa sse2 ssse3/;
specialize qw/aom_sub_pixel_variance32x16 msa sse2 ssse3/;
specialize qw/aom_sub_pixel_variance16x32 msa sse2 ssse3/;
specialize qw/aom_sub_pixel_variance16x16 neon msa sse2 ssse3/;
specialize qw/aom_sub_pixel_variance16x8 msa sse2 ssse3/;
specialize qw/aom_sub_pixel_variance8x16 msa sse2 ssse3/;
specialize qw/aom_sub_pixel_variance8x8 neon msa sse2 ssse3/;
specialize qw/aom_sub_pixel_variance8x4 msa sse2 ssse3/;
specialize qw/aom_sub_pixel_variance4x8 msa sse2 ssse3/;
specialize qw/aom_sub_pixel_variance4x4 msa sse2 ssse3/;
specialize qw/aom_sub_pixel_avg_variance64x64 avx2 msa sse2 ssse3/;
specialize qw/aom_sub_pixel_avg_variance64x32 msa sse2 ssse3/;
......@@ -1124,13 +1124,13 @@ add_proto qw/uint32_t aom_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, i
# Specialty Subpixel
#
add_proto qw/uint32_t aom_variance_halfpixvar16x16_h/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse";
specialize qw/aom_variance_halfpixvar16x16_h sse2 media/;
specialize qw/aom_variance_halfpixvar16x16_h sse2/;
add_proto qw/uint32_t aom_variance_halfpixvar16x16_v/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse";
specialize qw/aom_variance_halfpixvar16x16_v sse2 media/;
specialize qw/aom_variance_halfpixvar16x16_v sse2/;
add_proto qw/uint32_t aom_variance_halfpixvar16x16_hv/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse";
specialize qw/aom_variance_halfpixvar16x16_hv sse2 media/;
specialize qw/aom_variance_halfpixvar16x16_hv sse2/;
#
# Comp Avg
......
;
; Copyright (c) 2016, Alliance for Open Media. All rights reserved
;
; This source code is subject to the terms of the BSD 2 Clause License and
; the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
; was not distributed with this source code in the LICENSE file, you can
; obtain it at www.aomedia.org/license/software. If the Alliance for Open
; Media Patent License 1.0 was not distributed with this source code in the
; PATENTS file, you can obtain it at www.aomedia.org/license/patent.
;
;
EXPORT |aom_filter_block2d_bil_first_pass_media|
EXPORT |aom_filter_block2d_bil_second_pass_media|
AREA |.text|, CODE, READONLY ; name this block of code
;-------------------------------------
; r0 unsigned char *src_ptr,
; r1 unsigned short *dst_ptr,
; r2 unsigned int src_pitch,
; r3 unsigned int height,
; stack unsigned int width,
; stack const short *aom_filter
;-------------------------------------
; The output is transposed stroed in output array to make it easy for second pass filtering.
|aom_filter_block2d_bil_first_pass_media| PROC
stmdb sp!, {r4 - r11, lr}
ldr r11, [sp, #40] ; aom_filter address
ldr r4, [sp, #36] ; width
mov r12, r3 ; outer-loop counter
add r7, r2, r4 ; preload next row
pld [r0, r7]
sub r2, r2, r4 ; src increment for height loop
ldr r5, [r11] ; load up filter coefficients
mov r3, r3, lsl #1 ; height*2
add r3, r3, #2 ; plus 2 to make output buffer 4-bit aligned since height is actually (height+1)
mov r11, r1 ; save dst_ptr for each row
cmp r5, #128 ; if filter coef = 128, then skip the filter
beq bil_null_1st_filter
|bil_height_loop_1st_v6|
ldrb r6, [r0] ; load source data
ldrb r7, [r0, #1]
ldrb r8, [r0, #2]
mov lr, r4, lsr #2 ; 4-in-parellel loop counter
|bil_width_loop_1st_v6|
ldrb r9, [r0, #3]
ldrb r10, [r0, #4]
pkhbt r6, r6, r7, lsl #16 ; src[1] | src[0]
pkhbt r7, r7, r8, lsl #16 ; src[2] | src[1]
smuad r6, r6, r5 ; apply the filter
pkhbt r8, r8, r9, lsl #16 ; src[3] | src[2]
smuad r7, r7, r5
pkhbt r9, r9, r10, lsl #16 ; src[4] | src[3]
smuad r8, r8, r5
smuad r9, r9, r5
add r0, r0, #4
subs lr, lr, #1
add r6, r6, #0x40 ; round_shift_and_clamp
add r7, r7, #0x40
usat r6, #16, r6, asr #7
usat r7, #16, r7, asr #7
strh r6, [r1], r3 ; result is transposed and stored
add r8, r8, #0x40 ; round_shift_and_clamp
strh r7, [r1], r3
add r9, r9, #0x40
usat r8, #16, r8, asr #7
usat r9, #16, r9, asr #7
strh r8, [r1], r3 ; result is transposed and stored
ldrneb r6, [r0] ; load source data
strh r9, [r1], r3
ldrneb r7, [r0, #1]
ldrneb r8, [r0, #2]
bne bil_width_loop_1st_v6
add r0, r0, r2 ; move to next input row
subs r12, r12, #1
add r9, r2, r4, lsl #1 ; adding back block width
pld [r0, r9] ; preload next row
add r11, r11, #2 ; move over to next column
mov r1, r11
bne bil_height_loop_1st_v6
ldmia sp!, {r4 - r11, pc}
|bil_null_1st_filter|
|bil_height_loop_null_1st|
mov lr, r4, lsr #2 ; loop counter
|bil_width_loop_null_1st|
ldrb r6, [r0] ; load data
ldrb r7, [r0, #1]
ldrb r8, [r0, #2]
ldrb r9, [r0, #3]
strh r6, [r1], r3 ; store it to immediate buffer
add r0, r0, #4
strh r7, [r1], r3
subs lr, lr, #1
strh r8, [r1], r3
strh r9, [r1], r3
bne bil_width_loop_null_1st
subs r12, r12, #1
add r0, r0, r2 ; move to next input line
add r11, r11, #2 ; move over to next column
mov r1, r11
bne bil_height_loop_null_1st
ldmia sp!, {r4 - r11, pc}
ENDP ; |aom_filter_block2d_bil_first_pass_media|
;---------------------------------
; r0 unsigned short *src_ptr,
; r1 unsigned char *dst_ptr,
; r2 int dst_pitch,
; r3 unsigned int height,
; stack unsigned int width,
; stack const short *aom_filter
;---------------------------------
|aom_filter_block2d_bil_second_pass_media| PROC
stmdb sp!, {r4 - r11, lr}
ldr r11, [sp, #40] ; aom_filter address
ldr r4, [sp, #36] ; width
ldr r5, [r11] ; load up filter coefficients
mov r12, r4 ; outer-loop counter = width, since we work on transposed data matrix
mov r11, r1
cmp r5, #128 ; if filter coef = 128, then skip the filter
beq bil_null_2nd_filter
|bil_height_loop_2nd|
ldr r6, [r0] ; load the data
ldr r8, [r0, #4]
ldrh r10, [r0, #8]
mov lr, r3, lsr #2 ; loop counter
|bil_width_loop_2nd|
pkhtb r7, r6, r8 ; src[1] | src[2]
pkhtb r9, r8, r10 ; src[3] | src[4]
smuad r6, r6, r5 ; apply filter
smuad r8, r8, r5 ; apply filter
subs lr, lr, #1
smuadx r7, r7, r5 ; apply filter
smuadx r9, r9, r5 ; apply filter
add r0, r0, #8
add r6, r6, #0x40 ; round_shift_and_clamp
add r7, r7, #0x40
usat r6, #8, r6, asr #7
usat r7, #8, r7, asr #7
strb r6, [r1], r2 ; the result is transposed back and stored
add r8, r8, #0x40 ; round_shift_and_clamp
strb r7, [r1], r2
add r9, r9, #0x40
usat r8, #8, r8, asr #7
usat r9, #8, r9, asr #7
strb r8, [r1], r2 ; the result is transposed back and stored
ldrne r6, [r0] ; load data
strb r9, [r1], r2
ldrne r8, [r0, #4]
ldrneh r10, [r0, #8]
bne bil_width_loop_2nd
subs r12, r12, #1
add r0, r0, #4 ; update src for next row
add r11, r11, #1
mov r1, r11
bne bil_height_loop_2nd
ldmia sp!, {r4 - r11, pc}
|bil_null_2nd_filter|
|bil_height_loop_null_2nd|
mov lr, r3, lsr #2
|bil_width_loop_null_2nd|
ldr r6, [r0], #4 ; load data
subs lr, lr, #1
ldr r8, [r0], #4
strb r6, [r1], r2 ; store data
mov r7, r6, lsr #16
strb r7, [r1], r2
mov r9, r8, lsr #16
strb r8, [r1], r2
strb r9, [r1], r2
bne bil_width_loop_null_2nd
subs r12, r12, #1
add r0, r0, #4
add r11, r11, #1
mov r1, r11
bne bil_height_loop_null_2nd
ldmia sp!, {r4 - r11, pc}
ENDP ; |aom_filter_block2d_second_pass_media|
END
;
; Copyright (c) 2016, Alliance for Open Media. All rights reserved
;
; This source code is subject to the terms of the BSD 2 Clause License and
; the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
; was not distributed with this source code in the LICENSE file, you can
; obtain it at www.aomedia.org/license/software. If the Alliance for Open
; Media Patent License 1.0 was not distributed with this source code in the
; PATENTS file, you can obtain it at www.aomedia.org/license/patent.
;
;
EXPORT |aom_sad16x16_media|
ARM
REQUIRE8
PRESERVE8
AREA ||.text||, CODE, READONLY, ALIGN=2
; r0 const unsigned char *src_ptr
; r1 int src_stride
; r2 const unsigned char *ref_ptr
; r3 int ref_stride
|aom_sad16x16_media| PROC
stmfd sp!, {r4-r12, lr}
pld [r0, r1, lsl #0]
pld [r2, r3, lsl #0]
pld [r0, r1, lsl #1]
pld [r2, r3, lsl #1]
mov r4, #0 ; sad = 0;
mov r5, #8 ; loop count
loop
; 1st row
ldr r6, [r0, #0x0] ; load 4 src pixels (1A)
ldr r8, [r2, #0x0] ; load 4 ref pixels (1A)
ldr r7, [r0, #0x4] ; load 4 src pixels (1A)
ldr r9, [r2, #0x4] ; load 4 ref pixels (1A)
ldr r10, [r0, #0x8] ; load 4 src pixels (1B)
ldr r11, [r0, #0xC] ; load 4 src pixels (1B)
usada8 r4, r8, r6, r4 ; calculate sad for 4 pixels
usad8 r8, r7, r9 ; calculate sad for 4 pixels
ldr r12, [r2, #0x8] ; load 4 ref pixels (1B)
ldr lr, [r2, #0xC] ; load 4 ref pixels (1B)
add r0, r0, r1 ; set src pointer to next row
add r2, r2, r3 ; set dst pointer to next row
pld [r0, r1, lsl #1]
pld [r2, r3, lsl #1]
usada8 r4, r10, r12, r4 ; calculate sad for 4 pixels
usada8 r8, r11, lr, r8 ; calculate sad for 4 pixels
ldr r6, [r0, #0x0] ; load 4 src pixels (2A)
ldr r7, [r0, #0x4] ; load 4 src pixels (2A)
add r4, r4, r8 ; add partial sad values
; 2nd row
ldr r8, [r2, #0x0] ; load 4 ref pixels (2A)
ldr r9, [r2, #0x4] ; load 4 ref pixels (2A)
ldr r10, [r0, #0x8] ; load 4 src pixels (2B)
ldr r11, [r0, #0xC] ; load 4 src pixels (2B)
usada8 r4, r6, r8, r4 ; calculate sad for 4 pixels
usad8 r8, r7, r9 ; calculate sad for 4 pixels
ldr r12, [r2, #0x8] ; load 4 ref pixels (2B)
ldr lr, [r2, #0xC] ; load 4 ref pixels (2B)
add r0, r0, r1 ; set src pointer to next row
add r2, r2, r3 ; set dst pointer to next row
usada8 r4, r10, r12, r4 ; calculate sad for 4 pixels
usada8 r8, r11, lr, r8 ; calculate sad for 4 pixels
pld [r0, r1, lsl #1]
pld [r2, r3, lsl #1]
subs r5, r5, #1 ; decrement loop counter
add r4, r4, r8 ; add partial sad values
bne loop
mov r0, r4 ; return sad
ldmfd sp!, {r4-r12, pc}
ENDP
END
/*
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
*
* This source code is subject to the terms of the BSD 2 Clause License and
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
* was not distributed with this source code in the LICENSE file, you can
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
* Media Patent License 1.0 was not distributed with this source code in the
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
#include "./aom_config.h"
#include "./aom_dsp_rtcd.h"
#include "aom/aom_integer.h"
#include "aom_ports/mem.h"
#if HAVE_MEDIA
static const int16_t bilinear_filters_media[8][2] = { { 128, 0 }, { 112, 16 },
{ 96, 32 }, { 80, 48 },
{ 64, 64 }, { 48, 80 },
{ 32, 96 }, { 16, 112 } };
extern void aom_filter_block2d_bil_first_pass_media(
const uint8_t *src_ptr, uint16_t *dst_ptr, uint32_t src_pitch,
uint32_t height, uint32_t width, const int16_t *filter);
extern void aom_filter_block2d_bil_second_pass_media(
const uint16_t *src_ptr, uint8_t *dst_ptr, int32_t src_pitch,
uint32_t height, uint32_t width, const int16_t *filter);
unsigned int aom_sub_pixel_variance8x8_media(
const uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset,
const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse) {
uint16_t first_pass[10 * 8];
uint8_t second_pass[8 * 8];
const int16_t *HFilter, *VFilter;
HFilter = bilinear_filters_media[xoffset];
VFilter = bilinear_filters_media[yoffset];
aom_filter_block2d_bil_first_pass_media(src_ptr, first_pass,
src_pixels_per_line, 9, 8, HFilter);
aom_filter_block2d_bil_second_pass_media(first_pass, second_pass, 8, 8, 8,
VFilter);
return aom_variance8x8_media(second_pass, 8, dst_ptr, dst_pixels_per_line,
sse);
}
unsigned int aom_sub_pixel_variance16x16_media(
const uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset,
const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse) {
uint16_t first_pass[36 * 16];
uint8_t second_pass[20 * 16];
const int16_t *HFilter, *VFilter;
unsigned int var;
if (xoffset == 4 && yoffset == 0) {
var = aom_variance_halfpixvar16x16_h_media(
src_ptr, src_pixels_per_line, dst_ptr, dst_pixels_per_line, sse);
} else if (xoffset == 0 && yoffset == 4) {
var = aom_variance_halfpixvar16x16_v_media(
src_ptr, src_pixels_per_line, dst_ptr, dst_pixels_per_line, sse);
} else if (xoffset == 4 && yoffset == 4) {
var = aom_variance_halfpixvar16x16_hv_media(
src_ptr, src_pixels_per_line, dst_ptr, dst_pixels_per_line, sse);
} else {
HFilter = bilinear_filters_media[xoffset];
VFilter = bilinear_filters_media[yoffset];
aom_filter_block2d_bil_first_pass_media(
src_ptr, first_pass, src_pixels_per_line, 17, 16, HFilter);
aom_filter_block2d_bil_second_pass_media(first_pass, second_pass, 16, 16,
16, VFilter);
var = aom_variance16x16_media(second_pass, 16, dst_ptr, dst_pixels_per_line,
sse);
}
return var;
}
#endif // HAVE_MEDIA
;
; Copyright (c) 2016, Alliance for Open Media. All rights reserved
;
; This source code is subject to the terms of the BSD 2 Clause License and
; the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
; was not distributed with this source code in the LICENSE file, you can
; obtain it at www.aomedia.org/license/software. If the Alliance for Open
; Media Patent License 1.0 was not distributed with this source code in the
; PATENTS file, you can obtain it at www.aomedia.org/license/patent.
;
;
EXPORT |aom_variance_halfpixvar16x16_h_media|
ARM
REQUIRE8
PRESERVE8
AREA ||.text||, CODE, READONLY, ALIGN=2
; r0 unsigned char *src_ptr
; r1 int source_stride
; r2 unsigned char *ref_ptr
; r3 int recon_stride
; stack unsigned int *sse
|aom_variance_halfpixvar16x16_h_media| PROC
stmfd sp!, {r4-r12, lr}
pld [r0, r1, lsl #0]
pld [r2, r3, lsl #0]
mov r8, #0 ; initialize sum = 0
ldr r10, c80808080
mov r11, #0 ; initialize sse = 0
mov r12, #16 ; set loop counter to 16 (=block height)
mov lr, #0 ; constant zero
loop
; 1st 4 pixels
ldr r4, [r0, #0] ; load 4 src pixels
ldr r6, [r0, #1] ; load 4 src pixels with 1 byte offset
ldr r5, [r2, #0] ; load 4 ref pixels
; bilinear interpolation
mvn r6, r6
uhsub8 r4, r4, r6
eor r4, r4, r10
usub8 r6, r4, r5 ; calculate difference
pld [r0, r1,