Commit 73ad5236 authored by Steinar Midtskogen's avatar Steinar Midtskogen
Browse files

Add support for disabling CLPF on tile boundaries

Change-Id: Icb578f9b54c4020effa4b9245e343c1519bd7acb
parent 42d9610a
...@@ -6,6 +6,7 @@ print <<EOF ...@@ -6,6 +6,7 @@ print <<EOF
#include "aom/aom_integer.h" #include "aom/aom_integer.h"
#include "aom_dsp/aom_dsp_common.h" #include "aom_dsp/aom_dsp_common.h"
#include "av1/common/enums.h"
EOF EOF
} }
...@@ -852,7 +853,7 @@ specialize qw/aom_lpf_horizontal_4_dual sse2 neon dspr2 msa/; ...@@ -852,7 +853,7 @@ specialize qw/aom_lpf_horizontal_4_dual sse2 neon dspr2 msa/;
if (aom_config("CONFIG_CLPF") eq "yes") { if (aom_config("CONFIG_CLPF") eq "yes") {
if (aom_config("CONFIG_AOM_HIGHBITDEPTH") eq "yes") { if (aom_config("CONFIG_AOM_HIGHBITDEPTH") eq "yes") {
add_proto qw/void aom_clpf_block_hbd/, "const uint16_t *src, uint16_t *dst, int sstride, int dstride, int x0, int y0, int sizex, int sizey, int width, int height, unsigned int strength"; add_proto qw/void aom_clpf_block_hbd/, "const uint16_t *src, uint16_t *dst, int sstride, int dstride, int x0, int y0, int sizex, int sizey, unsigned int strength, BOUNDARY_TYPE bt";
add_proto qw/void aom_clpf_detect_hbd/, "const uint16_t *rec, const uint16_t *org, int rstride, int ostride, int x0, int y0, int width, int height, int *sum0, int *sum1, unsigned int strength, int shift, int size"; add_proto qw/void aom_clpf_detect_hbd/, "const uint16_t *rec, const uint16_t *org, int rstride, int ostride, int x0, int y0, int width, int height, int *sum0, int *sum1, unsigned int strength, int shift, int size";
add_proto qw/void aom_clpf_detect_multi_hbd/, "const uint16_t *rec, const uint16_t *org, int rstride, int ostride, int x0, int y0, int width, int height, int *sum, int shift, int size"; add_proto qw/void aom_clpf_detect_multi_hbd/, "const uint16_t *rec, const uint16_t *org, int rstride, int ostride, int x0, int y0, int width, int height, int *sum, int shift, int size";
# VS compiling for 32 bit targets does not support vector types in # VS compiling for 32 bit targets does not support vector types in
...@@ -864,7 +865,7 @@ if (aom_config("CONFIG_CLPF") eq "yes") { ...@@ -864,7 +865,7 @@ if (aom_config("CONFIG_CLPF") eq "yes") {
specialize qw/aom_clpf_detect_multi_hbd sse2 ssse3 sse4_1 neon/; specialize qw/aom_clpf_detect_multi_hbd sse2 ssse3 sse4_1 neon/;
} }
} }
add_proto qw/void aom_clpf_block/, "const uint8_t *src, uint8_t *dst, int sstride, int dstride, int x0, int y0, int sizex, int sizey, int width, int height, unsigned int strength"; add_proto qw/void aom_clpf_block/, "const uint8_t *src, uint8_t *dst, int sstride, int dstride, int x0, int y0, int sizex, int sizey, unsigned int strength, BOUNDARY_TYPE bt";
add_proto qw/void aom_clpf_detect/, "const uint8_t *rec, const uint8_t *org, int rstride, int ostride, int x0, int y0, int width, int height, int *sum0, int *sum1, unsigned int strength, int size"; add_proto qw/void aom_clpf_detect/, "const uint8_t *rec, const uint8_t *org, int rstride, int ostride, int x0, int y0, int width, int height, int *sum0, int *sum1, unsigned int strength, int size";
add_proto qw/void aom_clpf_detect_multi/, "const uint8_t *rec, const uint8_t *org, int rstride, int ostride, int x0, int y0, int width, int height, int *sum, int size"; add_proto qw/void aom_clpf_detect_multi/, "const uint8_t *rec, const uint8_t *org, int rstride, int ostride, int x0, int y0, int width, int height, int *sum, int size";
# VS compiling for 32 bit targets does not support vector types in # VS compiling for 32 bit targets does not support vector types in
......
...@@ -23,17 +23,21 @@ int av1_clpf_sample(int X, int A, int B, int C, int D, int E, int F, int b) { ...@@ -23,17 +23,21 @@ int av1_clpf_sample(int X, int A, int B, int C, int D, int E, int F, int b) {
void aom_clpf_block_c(const uint8_t *src, uint8_t *dst, int sstride, void aom_clpf_block_c(const uint8_t *src, uint8_t *dst, int sstride,
int dstride, int x0, int y0, int sizex, int sizey, int dstride, int x0, int y0, int sizex, int sizey,
int width, int height, unsigned int strength) { unsigned int strength, BOUNDARY_TYPE bt) {
int x, y; int x, y;
int xmin = x0 - !(bt & TILE_LEFT_BOUNDARY) * 2;
int ymin = y0 - !(bt & TILE_ABOVE_BOUNDARY);
int xmax = x0 + sizex + !(bt & TILE_RIGHT_BOUNDARY) * 2 - 1;
int ymax = y0 + sizey + !(bt & TILE_BOTTOM_BOUNDARY) - 1;
for (y = y0; y < y0 + sizey; y++) { for (y = y0; y < y0 + sizey; y++) {
for (x = x0; x < x0 + sizex; x++) { for (x = x0; x < x0 + sizex; x++) {
int X = src[y * sstride + x]; int X = src[y * sstride + x];
int A = src[AOMMAX(0, y - 1) * sstride + x]; int A = src[AOMMAX(ymin, y - 1) * sstride + x];
int B = src[y * sstride + AOMMAX(0, x - 2)]; int B = src[y * sstride + AOMMAX(xmin, x - 2)];
int C = src[y * sstride + AOMMAX(0, x - 1)]; int C = src[y * sstride + AOMMAX(xmin, x - 1)];
int D = src[y * sstride + AOMMIN(width - 1, x + 1)]; int D = src[y * sstride + AOMMIN(xmax, x + 1)];
int E = src[y * sstride + AOMMIN(width - 1, x + 2)]; int E = src[y * sstride + AOMMIN(xmax, x + 2)];
int F = src[AOMMIN(height - 1, y + 1) * sstride + x]; int F = src[AOMMIN(ymax, y + 1) * sstride + x];
int delta; int delta;
delta = av1_clpf_sample(X, A, B, C, D, E, F, strength); delta = av1_clpf_sample(X, A, B, C, D, E, F, strength);
dst[y * dstride + x] = X + delta; dst[y * dstride + x] = X + delta;
...@@ -45,17 +49,22 @@ void aom_clpf_block_c(const uint8_t *src, uint8_t *dst, int sstride, ...@@ -45,17 +49,22 @@ void aom_clpf_block_c(const uint8_t *src, uint8_t *dst, int sstride,
// Identical to aom_clpf_block_c() apart from "src" and "dst". // Identical to aom_clpf_block_c() apart from "src" and "dst".
void aom_clpf_block_hbd_c(const uint16_t *src, uint16_t *dst, int sstride, void aom_clpf_block_hbd_c(const uint16_t *src, uint16_t *dst, int sstride,
int dstride, int x0, int y0, int sizex, int sizey, int dstride, int x0, int y0, int sizex, int sizey,
int width, int height, unsigned int strength) { unsigned int strength, BOUNDARY_TYPE bt) {
int x, y; int x, y;
int xmin = x0 - !(bt & TILE_LEFT_BOUNDARY) * 2;
int ymin = y0 - !(bt & TILE_ABOVE_BOUNDARY);
int xmax = x0 + sizex + !(bt & TILE_RIGHT_BOUNDARY) * 2 - 1;
int ymax = y0 + sizey + !(bt & TILE_BOTTOM_BOUNDARY) - 1;
for (y = y0; y < y0 + sizey; y++) { for (y = y0; y < y0 + sizey; y++) {
for (x = x0; x < x0 + sizex; x++) { for (x = x0; x < x0 + sizex; x++) {
int X = src[y * sstride + x]; int X = src[y * sstride + x];
int A = src[AOMMAX(0, y - 1) * sstride + x]; int A = src[AOMMAX(ymin, y - 1) * sstride + x];
int B = src[y * sstride + AOMMAX(0, x - 2)]; int B = src[y * sstride + AOMMAX(xmin, x - 2)];
int C = src[y * sstride + AOMMAX(0, x - 1)]; int C = src[y * sstride + AOMMAX(xmin, x - 1)];
int D = src[y * sstride + AOMMIN(width - 1, x + 1)]; int D = src[y * sstride + AOMMIN(xmax, x + 1)];
int E = src[y * sstride + AOMMIN(width - 1, x + 2)]; int E = src[y * sstride + AOMMIN(xmax, x + 2)];
int F = src[AOMMIN(height - 1, y + 1) * sstride + x]; int F = src[AOMMIN(ymax, y + 1) * sstride + x];
int delta; int delta;
delta = av1_clpf_sample(X, A, B, C, D, E, F, strength); delta = av1_clpf_sample(X, A, B, C, D, E, F, strength);
dst[y * dstride + x] = X + delta; dst[y * dstride + x] = X + delta;
...@@ -156,6 +165,11 @@ void av1_clpf_frame(const YV12_BUFFER_CONFIG *frame, ...@@ -156,6 +165,11 @@ void av1_clpf_frame(const YV12_BUFFER_CONFIG *frame,
(xpos << subx) / MI_SIZE] (xpos << subx) / MI_SIZE]
->mbmi.skip || ->mbmi.skip ||
(enable_fb_flag && fb_size_log2 == MAX_FB_SIZE_LOG2)) { (enable_fb_flag && fb_size_log2 == MAX_FB_SIZE_LOG2)) {
BOUNDARY_TYPE boundary_type =
cm->mi[(ypos << suby) / MI_SIZE * cm->mi_stride +
(xpos << subx) / MI_SIZE]
.mbmi.boundary_info;
// Temporary buffering needed for in-place filtering // Temporary buffering needed for in-place filtering
if (cache_ptr[cache_idx]) { if (cache_ptr[cache_idx]) {
// Copy filtered block back into the frame // Copy filtered block back into the frame
...@@ -228,15 +242,15 @@ void av1_clpf_frame(const YV12_BUFFER_CONFIG *frame, ...@@ -228,15 +242,15 @@ void av1_clpf_frame(const YV12_BUFFER_CONFIG *frame,
if (cm->use_highbitdepth) { if (cm->use_highbitdepth) {
aom_clpf_block_hbd(CONVERT_TO_SHORTPTR(src_buffer), aom_clpf_block_hbd(CONVERT_TO_SHORTPTR(src_buffer),
CONVERT_TO_SHORTPTR(dst_buffer), sstride, CONVERT_TO_SHORTPTR(dst_buffer), sstride,
dstride, xpos, ypos, sizex, sizey, width, dstride, xpos, ypos, sizex, sizey, strength,
height, strength); boundary_type);
} else { } else {
aom_clpf_block(src_buffer, dst_buffer, sstride, dstride, xpos, aom_clpf_block(src_buffer, dst_buffer, sstride, dstride, xpos,
ypos, sizex, sizey, width, height, strength); ypos, sizex, sizey, strength, boundary_type);
} }
#else #else
aom_clpf_block(src_buffer, dst_buffer, sstride, dstride, xpos, aom_clpf_block(src_buffer, dst_buffer, sstride, dstride, xpos,
ypos, sizex, sizey, width, height, strength); ypos, sizex, sizey, strength, boundary_type);
#endif #endif
} }
} }
......
...@@ -51,10 +51,12 @@ SIMD_INLINE v128 calc_delta(v128 o, v128 a, v128 b, v128 c, v128 d, v128 e, ...@@ -51,10 +51,12 @@ SIMD_INLINE v128 calc_delta(v128 o, v128 a, v128 b, v128 c, v128 d, v128 e,
// Process blocks of width 8, two lines at a time, 8 bit. // Process blocks of width 8, two lines at a time, 8 bit.
static void clpf_block8(const uint8_t *src, uint8_t *dst, int sstride, static void clpf_block8(const uint8_t *src, uint8_t *dst, int sstride,
int dstride, int x0, int y0, int sizey, int width, int dstride, int x0, int y0, int sizey,
int height, unsigned int strength) { unsigned int strength, BOUNDARY_TYPE bt) {
const int bottom = height - 2 - y0; const int bottom = bt & TILE_BOTTOM_BOUNDARY ? sizey - 2 : -1;
const int right = width - 8 - x0; const int right = !(bt & TILE_RIGHT_BOUNDARY);
const int left = !(bt & TILE_LEFT_BOUNDARY);
const int top = bt & TILE_ABOVE_BOUNDARY ? y0 : -1;
const v128 sp = v128_dup_8(strength); const v128 sp = v128_dup_8(strength);
const v128 sm = v128_dup_8(-(int)strength); const v128 sm = v128_dup_8(-(int)strength);
DECLARE_ALIGNED(16, static const uint64_t, DECLARE_ALIGNED(16, static const uint64_t,
...@@ -75,12 +77,12 @@ static void clpf_block8(const uint8_t *src, uint8_t *dst, int sstride, ...@@ -75,12 +77,12 @@ static void clpf_block8(const uint8_t *src, uint8_t *dst, int sstride,
const v64 l2 = v64_load_aligned(src + sstride); const v64 l2 = v64_load_aligned(src + sstride);
v128 o = v128_from_v64(l1, l2); v128 o = v128_from_v64(l1, l2);
const v128 a = const v128 a =
v128_from_v64(v64_load_aligned(src - (y != -y0) * sstride), l1); v128_from_v64(v64_load_aligned(src - (y != top) * sstride), l1);
const v128 f = v128_from_v64( const v128 f = v128_from_v64(
l2, v64_load_aligned(src + ((y != bottom) + 1) * sstride)); l2, v64_load_aligned(src + ((y != bottom) + 1) * sstride));
v128 b, c, d, e; v128 b, c, d, e;
if (x0) { if (left) {
b = v128_from_v64(v64_load_unaligned(src - 2), b = v128_from_v64(v64_load_unaligned(src - 2),
v64_load_unaligned(src - 2 + sstride)); v64_load_unaligned(src - 2 + sstride));
c = v128_from_v64(v64_load_unaligned(src - 1), c = v128_from_v64(v64_load_unaligned(src - 1),
...@@ -109,12 +111,15 @@ static void clpf_block8(const uint8_t *src, uint8_t *dst, int sstride, ...@@ -109,12 +111,15 @@ static void clpf_block8(const uint8_t *src, uint8_t *dst, int sstride,
// Process blocks of width 4, four lines at a time, 8 bit. // Process blocks of width 4, four lines at a time, 8 bit.
static void clpf_block4(const uint8_t *src, uint8_t *dst, int sstride, static void clpf_block4(const uint8_t *src, uint8_t *dst, int sstride,
int dstride, int x0, int y0, int sizey, int width, int dstride, int x0, int y0, int sizey,
int height, unsigned int strength) { unsigned int strength, BOUNDARY_TYPE bt) {
const v128 sp = v128_dup_8(strength); const v128 sp = v128_dup_8(strength);
const v128 sm = v128_dup_8(-(int)strength); const v128 sm = v128_dup_8(-(int)strength);
const int right = width - 4 - x0; const int right = !(bt & TILE_RIGHT_BOUNDARY);
const int bottom = height - 4 - y0; const int bottom = bt & TILE_BOTTOM_BOUNDARY ? sizey - 4 : -1;
const int left = !(bt & TILE_LEFT_BOUNDARY);
const int top = bt & TILE_ABOVE_BOUNDARY ? y0 : -1;
DECLARE_ALIGNED(16, static const uint64_t, DECLARE_ALIGNED(16, static const uint64_t,
b_shuff[]) = { 0x0504040401000000LL, 0x0d0c0c0c09080808LL }; b_shuff[]) = { 0x0504040401000000LL, 0x0d0c0c0c09080808LL };
DECLARE_ALIGNED(16, static const uint64_t, DECLARE_ALIGNED(16, static const uint64_t,
...@@ -129,7 +134,7 @@ static void clpf_block4(const uint8_t *src, uint8_t *dst, int sstride, ...@@ -129,7 +134,7 @@ static void clpf_block4(const uint8_t *src, uint8_t *dst, int sstride,
src += x0 + y0 * sstride; src += x0 + y0 * sstride;
for (y = 0; y < sizey; y += 4) { for (y = 0; y < sizey; y += 4) {
const uint32_t l0 = u32_load_aligned(src - (y != -y0) * sstride); const uint32_t l0 = u32_load_aligned(src - (y != top) * sstride);
const uint32_t l1 = u32_load_aligned(src); const uint32_t l1 = u32_load_aligned(src);
const uint32_t l2 = u32_load_aligned(src + sstride); const uint32_t l2 = u32_load_aligned(src + sstride);
const uint32_t l3 = u32_load_aligned(src + 2 * sstride); const uint32_t l3 = u32_load_aligned(src + 2 * sstride);
...@@ -140,7 +145,7 @@ static void clpf_block4(const uint8_t *src, uint8_t *dst, int sstride, ...@@ -140,7 +145,7 @@ static void clpf_block4(const uint8_t *src, uint8_t *dst, int sstride,
const v128 f = v128_from_32(l2, l3, l4, l5); const v128 f = v128_from_32(l2, l3, l4, l5);
v128 b, c, d, e; v128 b, c, d, e;
if (x0) { if (left) {
b = v128_from_32(u32_load_unaligned(src - 2), b = v128_from_32(u32_load_unaligned(src - 2),
u32_load_unaligned(src + sstride - 2), u32_load_unaligned(src + sstride - 2),
u32_load_unaligned(src + 2 * sstride - 2), u32_load_unaligned(src + 2 * sstride - 2),
...@@ -180,17 +185,17 @@ static void clpf_block4(const uint8_t *src, uint8_t *dst, int sstride, ...@@ -180,17 +185,17 @@ static void clpf_block4(const uint8_t *src, uint8_t *dst, int sstride,
void SIMD_FUNC(aom_clpf_block)(const uint8_t *src, uint8_t *dst, int sstride, void SIMD_FUNC(aom_clpf_block)(const uint8_t *src, uint8_t *dst, int sstride,
int dstride, int x0, int y0, int sizex, int dstride, int x0, int y0, int sizex,
int sizey, int width, int height, int sizey, unsigned int strength,
unsigned int strength) { BOUNDARY_TYPE bt) {
if ((sizex != 4 && sizex != 8) || ((sizey & 3) && sizex == 4)) { if ((sizex != 4 && sizex != 8) || ((sizey & 3) && sizex == 4)) {
// Fallback to C for odd sizes: // Fallback to C for odd sizes:
// * block widths not 4 or 8 // * block widths not 4 or 8
// * block heights not a multiple of 4 if the block width is 4 // * block heights not a multiple of 4 if the block width is 4
aom_clpf_block_c(src, dst, sstride, dstride, x0, y0, sizex, sizey, width, aom_clpf_block_c(src, dst, sstride, dstride, x0, y0, sizex, sizey, strength,
height, strength); bt);
} else { } else {
(sizex == 4 ? clpf_block4 : clpf_block8)(src, dst, sstride, dstride, x0, y0, (sizex == 4 ? clpf_block4 : clpf_block8)(src, dst, sstride, dstride, x0, y0,
sizey, width, height, strength); sizey, strength, bt);
} }
} }
...@@ -237,12 +242,15 @@ static void calc_delta_hbd8(v128 o, v128 a, v128 b, v128 c, v128 d, v128 e, ...@@ -237,12 +242,15 @@ static void calc_delta_hbd8(v128 o, v128 a, v128 b, v128 c, v128 d, v128 e,
// Process blocks of width 4, two lines at time. // Process blocks of width 4, two lines at time.
SIMD_INLINE void clpf_block_hbd4(const uint16_t *src, uint16_t *dst, SIMD_INLINE void clpf_block_hbd4(const uint16_t *src, uint16_t *dst,
int sstride, int dstride, int x0, int y0, int sstride, int dstride, int x0, int y0,
int sizey, int width, int height, int sizey, unsigned int strength,
unsigned int strength) { BOUNDARY_TYPE bt) {
const v128 sp = v128_dup_16(strength); const v128 sp = v128_dup_16(strength);
const v128 sm = v128_dup_16(-(int)strength); const v128 sm = v128_dup_16(-(int)strength);
const int right = width - 4 - x0; const int right = !(bt & TILE_RIGHT_BOUNDARY);
const int bottom = height - 2 - y0; const int bottom = bt & TILE_BOTTOM_BOUNDARY ? sizey - 2 : -1;
const int left = !(bt & TILE_LEFT_BOUNDARY);
const int top = bt & TILE_ABOVE_BOUNDARY ? y0 : -1;
DECLARE_ALIGNED(16, static const uint64_t, DECLARE_ALIGNED(16, static const uint64_t,
b_shuff[]) = { 0x0302010001000100LL, 0x0b0a090809080908LL }; b_shuff[]) = { 0x0302010001000100LL, 0x0b0a090809080908LL };
DECLARE_ALIGNED(16, static const uint64_t, DECLARE_ALIGNED(16, static const uint64_t,
...@@ -261,12 +269,12 @@ SIMD_INLINE void clpf_block_hbd4(const uint16_t *src, uint16_t *dst, ...@@ -261,12 +269,12 @@ SIMD_INLINE void clpf_block_hbd4(const uint16_t *src, uint16_t *dst,
const v64 l2 = v64_load_aligned(src + sstride); const v64 l2 = v64_load_aligned(src + sstride);
v128 o = v128_from_v64(l1, l2); v128 o = v128_from_v64(l1, l2);
const v128 a = const v128 a =
v128_from_v64(v64_load_aligned(src - (y != -y0) * sstride), l1); v128_from_v64(v64_load_aligned(src - (y != top) * sstride), l1);
const v128 f = v128_from_v64( const v128 f = v128_from_v64(
l2, v64_load_aligned(src + ((y != bottom) + 1) * sstride)); l2, v64_load_aligned(src + ((y != bottom) + 1) * sstride));
v128 b, c, d, e; v128 b, c, d, e;
if (x0) { if (left) {
b = v128_from_v64(v64_load_unaligned(src - 2), b = v128_from_v64(v64_load_unaligned(src - 2),
v64_load_unaligned(src - 2 + sstride)); v64_load_unaligned(src - 2 + sstride));
c = v128_from_v64(v64_load_unaligned(src - 1), c = v128_from_v64(v64_load_unaligned(src - 1),
...@@ -293,11 +301,14 @@ SIMD_INLINE void clpf_block_hbd4(const uint16_t *src, uint16_t *dst, ...@@ -293,11 +301,14 @@ SIMD_INLINE void clpf_block_hbd4(const uint16_t *src, uint16_t *dst,
// The most simple case. Start here if you need to understand the functions. // The most simple case. Start here if you need to understand the functions.
SIMD_INLINE void clpf_block_hbd(const uint16_t *src, uint16_t *dst, int sstride, SIMD_INLINE void clpf_block_hbd(const uint16_t *src, uint16_t *dst, int sstride,
int dstride, int x0, int y0, int sizey, int dstride, int x0, int y0, int sizey,
int width, int height, unsigned int strength) { unsigned int strength, BOUNDARY_TYPE bt) {
const v128 sp = v128_dup_16(strength); const v128 sp = v128_dup_16(strength);
const v128 sm = v128_dup_16(-(int)strength); const v128 sm = v128_dup_16(-(int)strength);
const int right = width - 8 - x0; const int right = !(bt & TILE_RIGHT_BOUNDARY);
const int bottom = height - 2 - y0; const int bottom = bt & TILE_BOTTOM_BOUNDARY ? sizey - 2 : -2;
const int left = !(bt & TILE_LEFT_BOUNDARY);
const int top = bt & TILE_ABOVE_BOUNDARY ? y0 : -1;
DECLARE_ALIGNED(16, static const uint64_t, DECLARE_ALIGNED(16, static const uint64_t,
b_shuff[]) = { 0x0302010001000100LL, 0x0b0a090807060504LL }; b_shuff[]) = { 0x0302010001000100LL, 0x0b0a090807060504LL };
DECLARE_ALIGNED(16, static const uint64_t, DECLARE_ALIGNED(16, static const uint64_t,
...@@ -317,11 +328,11 @@ SIMD_INLINE void clpf_block_hbd(const uint16_t *src, uint16_t *dst, int sstride, ...@@ -317,11 +328,11 @@ SIMD_INLINE void clpf_block_hbd(const uint16_t *src, uint16_t *dst, int sstride,
// instructions doing shift and pad. // instructions doing shift and pad.
for (y = 0; y < sizey; y++) { for (y = 0; y < sizey; y++) {
const v128 o = v128_load_aligned(src); const v128 o = v128_load_aligned(src);
const v128 a = v128_load_aligned(src - (y != -y0) * sstride); const v128 a = v128_load_aligned(src - (y != top) * sstride);
const v128 f = v128_load_aligned(src + (y - 1 != bottom) * sstride); const v128 f = v128_load_aligned(src + (y - 1 != bottom) * sstride);
v128 b, c, d, e; v128 b, c, d, e;
if (x0) { if (left) {
b = v128_load_unaligned(src - 2); b = v128_load_unaligned(src - 2);
c = v128_load_unaligned(src - 1); c = v128_load_unaligned(src - 1);
} else { // Left clipping } else { // Left clipping
...@@ -343,17 +354,17 @@ SIMD_INLINE void clpf_block_hbd(const uint16_t *src, uint16_t *dst, int sstride, ...@@ -343,17 +354,17 @@ SIMD_INLINE void clpf_block_hbd(const uint16_t *src, uint16_t *dst, int sstride,
void SIMD_FUNC(aom_clpf_block_hbd)(const uint16_t *src, uint16_t *dst, void SIMD_FUNC(aom_clpf_block_hbd)(const uint16_t *src, uint16_t *dst,
int sstride, int dstride, int x0, int y0, int sstride, int dstride, int x0, int y0,
int sizex, int sizey, int width, int height, int sizex, int sizey, unsigned int strength,
unsigned int strength) { BOUNDARY_TYPE bt) {
if ((sizex != 4 && sizex != 8) || ((sizey & 1) && sizex == 4)) { if ((sizex != 4 && sizex != 8) || ((sizey & 1) && sizex == 4)) {
// Fallback to C for odd sizes: // Fallback to C for odd sizes:
// * block width not 4 or 8 // * block width not 4 or 8
// * block heights not a multiple of 2 if the block width is 4 // * block heights not a multiple of 2 if the block width is 4
aom_clpf_block_hbd_c(src, dst, sstride, dstride, x0, y0, sizex, sizey, aom_clpf_block_hbd_c(src, dst, sstride, dstride, x0, y0, sizex, sizey,
width, height, strength); strength, bt);
} else { } else {
(sizex == 4 ? clpf_block_hbd4 : clpf_block_hbd)( (sizex == 4 ? clpf_block_hbd4 : clpf_block_hbd)(
src, dst, sstride, dstride, x0, y0, sizey, width, height, strength); src, dst, sstride, dstride, x0, y0, sizey, strength, bt);
} }
} }
#endif #endif
...@@ -28,7 +28,7 @@ namespace { ...@@ -28,7 +28,7 @@ namespace {
typedef void (*clpf_block_t)(const uint8_t *src, uint8_t *dst, int sstride, typedef void (*clpf_block_t)(const uint8_t *src, uint8_t *dst, int sstride,
int dstride, int x0, int y0, int sizex, int sizey, int dstride, int x0, int y0, int sizex, int sizey,
int width, int height, unsigned int strength); unsigned int strength, BOUNDARY_TYPE bt);
typedef std::tr1::tuple<clpf_block_t, clpf_block_t, int, int> typedef std::tr1::tuple<clpf_block_t, clpf_block_t, int, int>
clpf_block_param_t; clpf_block_param_t;
...@@ -57,8 +57,8 @@ typedef ClpfBlockTest ClpfSpeedTest; ...@@ -57,8 +57,8 @@ typedef ClpfBlockTest ClpfSpeedTest;
#if CONFIG_AOM_HIGHBITDEPTH #if CONFIG_AOM_HIGHBITDEPTH
typedef void (*clpf_block_hbd_t)(const uint16_t *src, uint16_t *dst, typedef void (*clpf_block_hbd_t)(const uint16_t *src, uint16_t *dst,
int sstride, int dstride, int x0, int y0, int sstride, int dstride, int x0, int y0,
int sizex, int sizey, int width, int height, int sizex, int sizey, unsigned int strength,
unsigned int strength); BOUNDARY_TYPE bt);
typedef std::tr1::tuple<clpf_block_hbd_t, clpf_block_hbd_t, int, int> typedef std::tr1::tuple<clpf_block_hbd_t, clpf_block_hbd_t, int, int>
clpf_block_hbd_param_t; clpf_block_hbd_param_t;
...@@ -90,11 +90,11 @@ template <typename pixel> ...@@ -90,11 +90,11 @@ template <typename pixel>
void test_clpf(int w, int h, int depth, int iterations, void test_clpf(int w, int h, int depth, int iterations,
void (*clpf)(const pixel *src, pixel *dst, int sstride, void (*clpf)(const pixel *src, pixel *dst, int sstride,
int dstride, int x0, int y0, int sizex, int sizey, int dstride, int x0, int y0, int sizex, int sizey,
int width, int height, unsigned int strength), unsigned int strength, BOUNDARY_TYPE bt),
void (*ref_clpf)(const pixel *src, pixel *dst, int sstride, void (*ref_clpf)(const pixel *src, pixel *dst, int sstride,
int dstride, int x0, int y0, int sizex, int dstride, int x0, int y0, int sizex,
int sizey, int width, int height, int sizey, unsigned int strength,
unsigned int strength)) { BOUNDARY_TYPE bt)) {
const int size = 24; const int size = 24;
ACMRandom rnd(ACMRandom::DeterministicSeed()); ACMRandom rnd(ACMRandom::DeterministicSeed());
DECLARE_ALIGNED(16, pixel, s[size * size]); DECLARE_ALIGNED(16, pixel, s[size * size]);
...@@ -123,11 +123,16 @@ void test_clpf(int w, int h, int depth, int iterations, ...@@ -123,11 +123,16 @@ void test_clpf(int w, int h, int depth, int iterations,
for (xpos = 0; xpos < size && !error; xpos += w * !error) { for (xpos = 0; xpos < size && !error; xpos += w * !error) {
for (strength = depth - 8; strength < depth - 5 && !error; for (strength = depth - 8; strength < depth - 5 && !error;
strength += !error) { strength += !error) {
ref_clpf(s, ref_d, size, size, xpos, ypos, w, h, size, size, BOUNDARY_TYPE bt =
1 << strength); BOUNDARY_TYPE((TILE_LEFT_BOUNDARY & -(!xpos)) |
(TILE_ABOVE_BOUNDARY & -(!ypos)) |
(TILE_RIGHT_BOUNDARY & -(xpos + w == size)) |
(TILE_BOTTOM_BOUNDARY & -(ypos + h == size)));
ref_clpf(s, ref_d, size, size, xpos, ypos, w, h, 1 << strength,
bt);
if (clpf != ref_clpf) if (clpf != ref_clpf)
ASM_REGISTER_STATE_CHECK(clpf(s, d, size, size, xpos, ypos, w, ASM_REGISTER_STATE_CHECK(clpf(s, d, size, size, xpos, ypos, w,
h, size, size, 1 << strength)); h, 1 << strength, bt));
if (ref_clpf != clpf) if (ref_clpf != clpf)
for (pos = 0; pos < size * size && !error; pos++) { for (pos = 0; pos < size * size && !error; pos++) {
error = ref_d[pos] != d[pos]; error = ref_d[pos] != d[pos];
...@@ -163,12 +168,12 @@ template <typename pixel> ...@@ -163,12 +168,12 @@ template <typename pixel>
void test_clpf_speed(int w, int h, int depth, int iterations, void test_clpf_speed(int w, int h, int depth, int iterations,
void (*clpf)(const pixel *src, pixel *dst, int sstride, void (*clpf)(const pixel *src, pixel *dst, int sstride,
int dstride, int x0, int y0, int sizex, int dstride, int x0, int y0, int sizex,
int sizey, int width, int height, int sizey, unsigned int strength,
unsigned int strength), BOUNDARY_TYPE bt),
void (*ref_clpf)(const pixel *src, pixel *dst, int sstride, void (*ref_clpf)(const pixel *src, pixel *dst, int sstride,
int dstride, int x0, int y0, int sizex, int dstride, int x0, int y0, int sizex,
int sizey, int width, int height, int sizey, unsigned int strength,
unsigned int strength)) { BOUNDARY_TYPE bt)) {
aom_usec_timer ref_timer; aom_usec_timer ref_timer;
aom_usec_timer timer; aom_usec_timer timer;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment