diff --git a/vp10/common/alloccommon.c b/vp10/common/alloccommon.c index afb80133812b861a1c5f4c3880bbf6802be221a1..be2e5386708c031a272211017ca53530aedcd6f7 100644 --- a/vp10/common/alloccommon.c +++ b/vp10/common/alloccommon.c @@ -35,8 +35,7 @@ static int alloc_seg_map(VP10_COMMON *cm, int seg_map_size) { for (i = 0; i < NUM_PING_PONG_BUFFERS; ++i) { cm->seg_map_array[i] = (uint8_t *)vpx_calloc(seg_map_size, 1); - if (cm->seg_map_array[i] == NULL) - return 1; + if (cm->seg_map_array[i] == NULL) return 1; } cm->seg_map_alloc_size = seg_map_size; @@ -91,7 +90,7 @@ void vp10_free_context_buffers(VP10_COMMON *cm) { int i; cm->free_mi(cm); free_seg_map(cm); - for (i = 0 ; i < MAX_MB_PLANE ; i++) { + for (i = 0; i < MAX_MB_PLANE; i++) { vpx_free(cm->above_context[i]); cm->above_context[i] = NULL; } @@ -110,15 +109,13 @@ int vp10_alloc_context_buffers(VP10_COMMON *cm, int width, int height) { new_mi_size = cm->mi_stride * calc_mi_size(cm->mi_rows); if (cm->mi_alloc_size < new_mi_size) { cm->free_mi(cm); - if (cm->alloc_mi(cm, new_mi_size)) - goto fail; + if (cm->alloc_mi(cm, new_mi_size)) goto fail; } if (cm->seg_map_alloc_size < cm->mi_rows * cm->mi_cols) { // Create the segmentation map structure and set to 0. free_seg_map(cm); - if (alloc_seg_map(cm, cm->mi_rows * cm->mi_cols)) - goto fail; + if (alloc_seg_map(cm, cm->mi_rows * cm->mi_cols)) goto fail; } if (cm->above_context_alloc_cols < cm->mi_cols) { @@ -129,7 +126,7 @@ int vp10_alloc_context_buffers(VP10_COMMON *cm, int width, int height) { ALIGN_POWER_OF_TWO(cm->mi_cols, MAX_MIB_SIZE_LOG2); int i; - for (i = 0 ; i < MAX_MB_PLANE ; i++) { + for (i = 0; i < MAX_MB_PLANE; i++) { vpx_free(cm->above_context[i]); cm->above_context[i] = (ENTROPY_CONTEXT *)vpx_calloc( 2 * aligned_mi_cols, sizeof(*cm->above_context[0])); @@ -153,7 +150,7 @@ int vp10_alloc_context_buffers(VP10_COMMON *cm, int width, int height) { return 0; - fail: +fail: vp10_free_context_buffers(cm); return 1; } diff --git a/vp10/common/alloccommon.h b/vp10/common/alloccommon.h index 69849931cbf1e9aeb7794de5829e8de5c8ca99b2..d2d2643be181120af40fe3ead0ad59afdea00a89 100644 --- a/vp10/common/alloccommon.h +++ b/vp10/common/alloccommon.h @@ -8,7 +8,6 @@ * be found in the AUTHORS file in the root of the source tree. */ - #ifndef VP10_COMMON_ALLOCCOMMON_H_ #define VP10_COMMON_ALLOCCOMMON_H_ diff --git a/vp10/common/ans.h b/vp10/common/ans.h index 24d7c09828b15bd7127ad2433f1859fc8e4a54f7..a7a74fc3692d70703aa61f48b2ddd89aa09318d3 100644 --- a/vp10/common/ans.h +++ b/vp10/common/ans.h @@ -23,20 +23,18 @@ #if ANS_DIVIDE_BY_MULTIPLY #include "vp10/common/divide.h" #define ANS_DIVREM(quotient, remainder, dividend, divisor) \ - do { \ - quotient = fastdiv(dividend, divisor); \ - remainder = dividend - quotient * divisor; \ + do { \ + quotient = fastdiv(dividend, divisor); \ + remainder = dividend - quotient * divisor; \ } while (0) -#define ANS_DIV(dividend, divisor) \ - fastdiv(dividend, divisor) +#define ANS_DIV(dividend, divisor) fastdiv(dividend, divisor) #else #define ANS_DIVREM(quotient, remainder, dividend, divisor) \ - do { \ - quotient = dividend / divisor; \ - remainder = dividend % divisor; \ + do { \ + quotient = dividend / divisor; \ + remainder = dividend % divisor; \ } while (0) -#define ANS_DIV(dividend, divisor) \ - ((dividend) / (divisor)) +#define ANS_DIV(dividend, divisor) ((dividend) / (divisor)) #endif #ifdef __cplusplus @@ -245,8 +243,7 @@ static INLINE int uabs_read_literal(struct AnsDecoder *ans, int bits) { // TODO(aconverse): Investigate ways to read/write literals faster, // e.g. 8-bit chunks. - for (bit = bits - 1; bit >= 0; bit--) - literal |= uabs_read_bit(ans) << bit; + for (bit = bits - 1; bit >= 0; bit--) literal |= uabs_read_bit(ans) << bit; return literal; } @@ -257,8 +254,7 @@ static INLINE int uabs_read_tree(struct AnsDecoder *ans, const AnsP8 *probs) { vpx_tree_index i = 0; - while ((i = tree[i + uabs_read(ans, probs[i >> 1])]) > 0) - continue; + while ((i = tree[i + uabs_read(ans, probs[i >> 1])]) > 0) continue; return -i; } @@ -288,8 +284,7 @@ static INLINE void rans_build_cdf_from_pdf(const AnsP10 token_probs[], assert(cdf_tab[i - 1] == rans_precision); } -static INLINE int ans_find_largest(const AnsP10 *const pdf_tab, - int num_syms) { +static INLINE int ans_find_largest(const AnsP10 *const pdf_tab, int num_syms) { int largest_idx = -1; int largest_p = -1; int i; @@ -365,8 +360,7 @@ static INLINE void fetch_sym(struct rans_dec_sym *out, const rans_dec_lut cdf, out->cum_prob = (AnsP10)cdf[i - 1]; } -static INLINE int rans_read(struct AnsDecoder *ans, - const rans_dec_lut tab) { +static INLINE int rans_read(struct AnsDecoder *ans, const rans_dec_lut tab) { unsigned rem; unsigned quo; struct rans_dec_sym sym; @@ -381,8 +375,7 @@ static INLINE int rans_read(struct AnsDecoder *ans, } static INLINE int ans_read_init(struct AnsDecoder *const ans, - const uint8_t *const buf, - int offset) { + const uint8_t *const buf, int offset) { unsigned x; if (offset < 1) return 1; ans->buf = buf; @@ -403,8 +396,7 @@ static INLINE int ans_read_init(struct AnsDecoder *const ans, return 1; } ans->state += l_base; - if (ans->state >= l_base * io_base) - return 1; + if (ans->state >= l_base * io_base) return 1; return 0; } diff --git a/vp10/common/arm/neon/iht4x4_add_neon.c b/vp10/common/arm/neon/iht4x4_add_neon.c index bd3e8b30f4098cea0485f1f16bd8b80c90aadc7e..d074bc91f9fc187fa9f0d3444dad1f9d740802dc 100644 --- a/vp10/common/arm/neon/iht4x4_add_neon.c +++ b/vp10/common/arm/neon/iht4x4_add_neon.c @@ -23,226 +23,211 @@ static int16_t cospi_8_64 = 0x3b21; static int16_t cospi_16_64 = 0x2d41; static int16_t cospi_24_64 = 0x187e; -static INLINE void TRANSPOSE4X4( - int16x8_t *q8s16, - int16x8_t *q9s16) { - int32x4_t q8s32, q9s32; - int16x4x2_t d0x2s16, d1x2s16; - int32x4x2_t q0x2s32; - - d0x2s16 = vtrn_s16(vget_low_s16(*q8s16), vget_high_s16(*q8s16)); - d1x2s16 = vtrn_s16(vget_low_s16(*q9s16), vget_high_s16(*q9s16)); - - q8s32 = vreinterpretq_s32_s16(vcombine_s16(d0x2s16.val[0], d0x2s16.val[1])); - q9s32 = vreinterpretq_s32_s16(vcombine_s16(d1x2s16.val[0], d1x2s16.val[1])); - q0x2s32 = vtrnq_s32(q8s32, q9s32); - - *q8s16 = vreinterpretq_s16_s32(q0x2s32.val[0]); - *q9s16 = vreinterpretq_s16_s32(q0x2s32.val[1]); - return; +static INLINE void TRANSPOSE4X4(int16x8_t *q8s16, int16x8_t *q9s16) { + int32x4_t q8s32, q9s32; + int16x4x2_t d0x2s16, d1x2s16; + int32x4x2_t q0x2s32; + + d0x2s16 = vtrn_s16(vget_low_s16(*q8s16), vget_high_s16(*q8s16)); + d1x2s16 = vtrn_s16(vget_low_s16(*q9s16), vget_high_s16(*q9s16)); + + q8s32 = vreinterpretq_s32_s16(vcombine_s16(d0x2s16.val[0], d0x2s16.val[1])); + q9s32 = vreinterpretq_s32_s16(vcombine_s16(d1x2s16.val[0], d1x2s16.val[1])); + q0x2s32 = vtrnq_s32(q8s32, q9s32); + + *q8s16 = vreinterpretq_s16_s32(q0x2s32.val[0]); + *q9s16 = vreinterpretq_s16_s32(q0x2s32.val[1]); + return; } -static INLINE void GENERATE_COSINE_CONSTANTS( - int16x4_t *d0s16, - int16x4_t *d1s16, - int16x4_t *d2s16) { - *d0s16 = vdup_n_s16(cospi_8_64); - *d1s16 = vdup_n_s16(cospi_16_64); - *d2s16 = vdup_n_s16(cospi_24_64); - return; +static INLINE void GENERATE_COSINE_CONSTANTS(int16x4_t *d0s16, int16x4_t *d1s16, + int16x4_t *d2s16) { + *d0s16 = vdup_n_s16(cospi_8_64); + *d1s16 = vdup_n_s16(cospi_16_64); + *d2s16 = vdup_n_s16(cospi_24_64); + return; } -static INLINE void GENERATE_SINE_CONSTANTS( - int16x4_t *d3s16, - int16x4_t *d4s16, - int16x4_t *d5s16, - int16x8_t *q3s16) { - *d3s16 = vdup_n_s16(sinpi_1_9); - *d4s16 = vdup_n_s16(sinpi_2_9); - *q3s16 = vdupq_n_s16(sinpi_3_9); - *d5s16 = vdup_n_s16(sinpi_4_9); - return; +static INLINE void GENERATE_SINE_CONSTANTS(int16x4_t *d3s16, int16x4_t *d4s16, + int16x4_t *d5s16, int16x8_t *q3s16) { + *d3s16 = vdup_n_s16(sinpi_1_9); + *d4s16 = vdup_n_s16(sinpi_2_9); + *q3s16 = vdupq_n_s16(sinpi_3_9); + *d5s16 = vdup_n_s16(sinpi_4_9); + return; } -static INLINE void IDCT4x4_1D( - int16x4_t *d0s16, - int16x4_t *d1s16, - int16x4_t *d2s16, - int16x8_t *q8s16, - int16x8_t *q9s16) { - int16x4_t d16s16, d17s16, d18s16, d19s16, d23s16, d24s16; - int16x4_t d26s16, d27s16, d28s16, d29s16; - int32x4_t q10s32, q13s32, q14s32, q15s32; - int16x8_t q13s16, q14s16; - - d16s16 = vget_low_s16(*q8s16); - d17s16 = vget_high_s16(*q8s16); - d18s16 = vget_low_s16(*q9s16); - d19s16 = vget_high_s16(*q9s16); - - d23s16 = vadd_s16(d16s16, d18s16); - d24s16 = vsub_s16(d16s16, d18s16); - - q15s32 = vmull_s16(d17s16, *d2s16); - q10s32 = vmull_s16(d17s16, *d0s16); - q13s32 = vmull_s16(d23s16, *d1s16); - q14s32 = vmull_s16(d24s16, *d1s16); - q15s32 = vmlsl_s16(q15s32, d19s16, *d0s16); - q10s32 = vmlal_s16(q10s32, d19s16, *d2s16); - - d26s16 = vqrshrn_n_s32(q13s32, 14); - d27s16 = vqrshrn_n_s32(q14s32, 14); - d29s16 = vqrshrn_n_s32(q15s32, 14); - d28s16 = vqrshrn_n_s32(q10s32, 14); - - q13s16 = vcombine_s16(d26s16, d27s16); - q14s16 = vcombine_s16(d28s16, d29s16); - *q8s16 = vaddq_s16(q13s16, q14s16); - *q9s16 = vsubq_s16(q13s16, q14s16); - *q9s16 = vcombine_s16(vget_high_s16(*q9s16), - vget_low_s16(*q9s16)); // vswp - return; +static INLINE void IDCT4x4_1D(int16x4_t *d0s16, int16x4_t *d1s16, + int16x4_t *d2s16, int16x8_t *q8s16, + int16x8_t *q9s16) { + int16x4_t d16s16, d17s16, d18s16, d19s16, d23s16, d24s16; + int16x4_t d26s16, d27s16, d28s16, d29s16; + int32x4_t q10s32, q13s32, q14s32, q15s32; + int16x8_t q13s16, q14s16; + + d16s16 = vget_low_s16(*q8s16); + d17s16 = vget_high_s16(*q8s16); + d18s16 = vget_low_s16(*q9s16); + d19s16 = vget_high_s16(*q9s16); + + d23s16 = vadd_s16(d16s16, d18s16); + d24s16 = vsub_s16(d16s16, d18s16); + + q15s32 = vmull_s16(d17s16, *d2s16); + q10s32 = vmull_s16(d17s16, *d0s16); + q13s32 = vmull_s16(d23s16, *d1s16); + q14s32 = vmull_s16(d24s16, *d1s16); + q15s32 = vmlsl_s16(q15s32, d19s16, *d0s16); + q10s32 = vmlal_s16(q10s32, d19s16, *d2s16); + + d26s16 = vqrshrn_n_s32(q13s32, 14); + d27s16 = vqrshrn_n_s32(q14s32, 14); + d29s16 = vqrshrn_n_s32(q15s32, 14); + d28s16 = vqrshrn_n_s32(q10s32, 14); + + q13s16 = vcombine_s16(d26s16, d27s16); + q14s16 = vcombine_s16(d28s16, d29s16); + *q8s16 = vaddq_s16(q13s16, q14s16); + *q9s16 = vsubq_s16(q13s16, q14s16); + *q9s16 = vcombine_s16(vget_high_s16(*q9s16), vget_low_s16(*q9s16)); // vswp + return; } -static INLINE void IADST4x4_1D( - int16x4_t *d3s16, - int16x4_t *d4s16, - int16x4_t *d5s16, - int16x8_t *q3s16, - int16x8_t *q8s16, - int16x8_t *q9s16) { - int16x4_t d6s16, d16s16, d17s16, d18s16, d19s16; - int32x4_t q8s32, q9s32, q10s32, q11s32, q12s32, q13s32, q14s32, q15s32; - - d6s16 = vget_low_s16(*q3s16); - - d16s16 = vget_low_s16(*q8s16); - d17s16 = vget_high_s16(*q8s16); - d18s16 = vget_low_s16(*q9s16); - d19s16 = vget_high_s16(*q9s16); - - q10s32 = vmull_s16(*d3s16, d16s16); - q11s32 = vmull_s16(*d4s16, d16s16); - q12s32 = vmull_s16(d6s16, d17s16); - q13s32 = vmull_s16(*d5s16, d18s16); - q14s32 = vmull_s16(*d3s16, d18s16); - q15s32 = vmovl_s16(d16s16); - q15s32 = vaddw_s16(q15s32, d19s16); - q8s32 = vmull_s16(*d4s16, d19s16); - q15s32 = vsubw_s16(q15s32, d18s16); - q9s32 = vmull_s16(*d5s16, d19s16); - - q10s32 = vaddq_s32(q10s32, q13s32); - q10s32 = vaddq_s32(q10s32, q8s32); - q11s32 = vsubq_s32(q11s32, q14s32); - q8s32 = vdupq_n_s32(sinpi_3_9); - q11s32 = vsubq_s32(q11s32, q9s32); - q15s32 = vmulq_s32(q15s32, q8s32); - - q13s32 = vaddq_s32(q10s32, q12s32); - q10s32 = vaddq_s32(q10s32, q11s32); - q14s32 = vaddq_s32(q11s32, q12s32); - q10s32 = vsubq_s32(q10s32, q12s32); - - d16s16 = vqrshrn_n_s32(q13s32, 14); - d17s16 = vqrshrn_n_s32(q14s32, 14); - d18s16 = vqrshrn_n_s32(q15s32, 14); - d19s16 = vqrshrn_n_s32(q10s32, 14); - - *q8s16 = vcombine_s16(d16s16, d17s16); - *q9s16 = vcombine_s16(d18s16, d19s16); - return; +static INLINE void IADST4x4_1D(int16x4_t *d3s16, int16x4_t *d4s16, + int16x4_t *d5s16, int16x8_t *q3s16, + int16x8_t *q8s16, int16x8_t *q9s16) { + int16x4_t d6s16, d16s16, d17s16, d18s16, d19s16; + int32x4_t q8s32, q9s32, q10s32, q11s32, q12s32, q13s32, q14s32, q15s32; + + d6s16 = vget_low_s16(*q3s16); + + d16s16 = vget_low_s16(*q8s16); + d17s16 = vget_high_s16(*q8s16); + d18s16 = vget_low_s16(*q9s16); + d19s16 = vget_high_s16(*q9s16); + + q10s32 = vmull_s16(*d3s16, d16s16); + q11s32 = vmull_s16(*d4s16, d16s16); + q12s32 = vmull_s16(d6s16, d17s16); + q13s32 = vmull_s16(*d5s16, d18s16); + q14s32 = vmull_s16(*d3s16, d18s16); + q15s32 = vmovl_s16(d16s16); + q15s32 = vaddw_s16(q15s32, d19s16); + q8s32 = vmull_s16(*d4s16, d19s16); + q15s32 = vsubw_s16(q15s32, d18s16); + q9s32 = vmull_s16(*d5s16, d19s16); + + q10s32 = vaddq_s32(q10s32, q13s32); + q10s32 = vaddq_s32(q10s32, q8s32); + q11s32 = vsubq_s32(q11s32, q14s32); + q8s32 = vdupq_n_s32(sinpi_3_9); + q11s32 = vsubq_s32(q11s32, q9s32); + q15s32 = vmulq_s32(q15s32, q8s32); + + q13s32 = vaddq_s32(q10s32, q12s32); + q10s32 = vaddq_s32(q10s32, q11s32); + q14s32 = vaddq_s32(q11s32, q12s32); + q10s32 = vsubq_s32(q10s32, q12s32); + + d16s16 = vqrshrn_n_s32(q13s32, 14); + d17s16 = vqrshrn_n_s32(q14s32, 14); + d18s16 = vqrshrn_n_s32(q15s32, 14); + d19s16 = vqrshrn_n_s32(q10s32, 14); + + *q8s16 = vcombine_s16(d16s16, d17s16); + *q9s16 = vcombine_s16(d18s16, d19s16); + return; } void vp10_iht4x4_16_add_neon(const tran_low_t *input, uint8_t *dest, - int dest_stride, int tx_type) { - uint8x8_t d26u8, d27u8; - int16x4_t d0s16, d1s16, d2s16, d3s16, d4s16, d5s16; - uint32x2_t d26u32, d27u32; - int16x8_t q3s16, q8s16, q9s16; - uint16x8_t q8u16, q9u16; - - d26u32 = d27u32 = vdup_n_u32(0); - - q8s16 = vld1q_s16(input); - q9s16 = vld1q_s16(input + 8); - - TRANSPOSE4X4(&q8s16, &q9s16); - - switch (tx_type) { - case 0: // idct_idct is not supported. Fall back to C - vp10_iht4x4_16_add_c(input, dest, dest_stride, tx_type); - return; - break; - case 1: // iadst_idct - // generate constants - GENERATE_COSINE_CONSTANTS(&d0s16, &d1s16, &d2s16); - GENERATE_SINE_CONSTANTS(&d3s16, &d4s16, &d5s16, &q3s16); - - // first transform rows - IDCT4x4_1D(&d0s16, &d1s16, &d2s16, &q8s16, &q9s16); - - // transpose the matrix - TRANSPOSE4X4(&q8s16, &q9s16); - - // then transform columns - IADST4x4_1D(&d3s16, &d4s16, &d5s16, &q3s16, &q8s16, &q9s16); - break; - case 2: // idct_iadst - // generate constantsyy - GENERATE_COSINE_CONSTANTS(&d0s16, &d1s16, &d2s16); - GENERATE_SINE_CONSTANTS(&d3s16, &d4s16, &d5s16, &q3s16); - - // first transform rows - IADST4x4_1D(&d3s16, &d4s16, &d5s16, &q3s16, &q8s16, &q9s16); - - // transpose the matrix - TRANSPOSE4X4(&q8s16, &q9s16); - - // then transform columns - IDCT4x4_1D(&d0s16, &d1s16, &d2s16, &q8s16, &q9s16); - break; - case 3: // iadst_iadst - // generate constants - GENERATE_SINE_CONSTANTS(&d3s16, &d4s16, &d5s16, &q3s16); - - // first transform rows - IADST4x4_1D(&d3s16, &d4s16, &d5s16, &q3s16, &q8s16, &q9s16); - - // transpose the matrix - TRANSPOSE4X4(&q8s16, &q9s16); - - // then transform columns - IADST4x4_1D(&d3s16, &d4s16, &d5s16, &q3s16, &q8s16, &q9s16); - break; - default: // iadst_idct - assert(0); - break; - } - - q8s16 = vrshrq_n_s16(q8s16, 4); - q9s16 = vrshrq_n_s16(q9s16, 4); - - d26u32 = vld1_lane_u32((const uint32_t *)dest, d26u32, 0); - dest += dest_stride; - d26u32 = vld1_lane_u32((const uint32_t *)dest, d26u32, 1); - dest += dest_stride; - d27u32 = vld1_lane_u32((const uint32_t *)dest, d27u32, 0); - dest += dest_stride; - d27u32 = vld1_lane_u32((const uint32_t *)dest, d27u32, 1); - - q8u16 = vaddw_u8(vreinterpretq_u16_s16(q8s16), vreinterpret_u8_u32(d26u32)); - q9u16 = vaddw_u8(vreinterpretq_u16_s16(q9s16), vreinterpret_u8_u32(d27u32)); - - d26u8 = vqmovun_s16(vreinterpretq_s16_u16(q8u16)); - d27u8 = vqmovun_s16(vreinterpretq_s16_u16(q9u16)); - - vst1_lane_u32((uint32_t *)dest, vreinterpret_u32_u8(d27u8), 1); - dest -= dest_stride; - vst1_lane_u32((uint32_t *)dest, vreinterpret_u32_u8(d27u8), 0); - dest -= dest_stride; - vst1_lane_u32((uint32_t *)dest, vreinterpret_u32_u8(d26u8), 1); - dest -= dest_stride; - vst1_lane_u32((uint32_t *)dest, vreinterpret_u32_u8(d26u8), 0); - return; + int dest_stride, int tx_type) { + uint8x8_t d26u8, d27u8; + int16x4_t d0s16, d1s16, d2s16, d3s16, d4s16, d5s16; + uint32x2_t d26u32, d27u32; + int16x8_t q3s16, q8s16, q9s16; + uint16x8_t q8u16, q9u16; + + d26u32 = d27u32 = vdup_n_u32(0); + + q8s16 = vld1q_s16(input); + q9s16 = vld1q_s16(input + 8); + + TRANSPOSE4X4(&q8s16, &q9s16); + + switch (tx_type) { + case 0: // idct_idct is not supported. Fall back to C + vp10_iht4x4_16_add_c(input, dest, dest_stride, tx_type); + return; + break; + case 1: // iadst_idct + // generate constants + GENERATE_COSINE_CONSTANTS(&d0s16, &d1s16, &d2s16); + GENERATE_SINE_CONSTANTS(&d3s16, &d4s16, &d5s16, &q3s16); + + // first transform rows + IDCT4x4_1D(&d0s16, &d1s16, &d2s16, &q8s16, &q9s16); + + // transpose the matrix + TRANSPOSE4X4(&q8s16, &q9s16); + + // then transform columns + IADST4x4_1D(&d3s16, &d4s16, &d5s16, &q3s16, &q8s16, &q9s16); + break; + case 2: // idct_iadst + // generate constantsyy + GENERATE_COSINE_CONSTANTS(&d0s16, &d1s16, &d2s16); + GENERATE_SINE_CONSTANTS(&d3s16, &d4s16, &d5s16, &q3s16); + + // first transform rows + IADST4x4_1D(&d3s16, &d4s16, &d5s16, &q3s16, &q8s16, &q9s16); + + // transpose the matrix + TRANSPOSE4X4(&q8s16, &q9s16); + + // then transform columns + IDCT4x4_1D(&d0s16, &d1s16, &d2s16, &q8s16, &q9s16); + break; + case 3: // iadst_iadst + // generate constants + GENERATE_SINE_CONSTANTS(&d3s16, &d4s16, &d5s16, &q3s16); + + // first transform rows + IADST4x4_1D(&d3s16, &d4s16, &d5s16, &q3s16, &q8s16, &q9s16); + + // transpose the matrix + TRANSPOSE4X4(&q8s16, &q9s16); + + // then transform columns + IADST4x4_1D(&d3s16, &d4s16, &d5s16, &q3s16, &q8s16, &q9s16); + break; + default: // iadst_idct + assert(0); + break; + } + + q8s16 = vrshrq_n_s16(q8s16, 4); + q9s16 = vrshrq_n_s16(q9s16, 4); + + d26u32 = vld1_lane_u32((const uint32_t *)dest, d26u32, 0); + dest += dest_stride; + d26u32 = vld1_lane_u32((const uint32_t *)dest, d26u32, 1); + dest += dest_stride; + d27u32 = vld1_lane_u32((const uint32_t *)dest, d27u32, 0); + dest += dest_stride; + d27u32 = vld1_lane_u32((const uint32_t *)dest, d27u32, 1); + + q8u16 = vaddw_u8(vreinterpretq_u16_s16(q8s16), vreinterpret_u8_u32(d26u32)); + q9u16 = vaddw_u8(vreinterpretq_u16_s16(q9s16), vreinterpret_u8_u32(d27u32)); + + d26u8 = vqmovun_s16(vreinterpretq_s16_u16(q8u16)); + d27u8 = vqmovun_s16(vreinterpretq_s16_u16(q9u16)); + + vst1_lane_u32((uint32_t *)dest, vreinterpret_u32_u8(d27u8), 1); + dest -= dest_stride; + vst1_lane_u32((uint32_t *)dest, vreinterpret_u32_u8(d27u8), 0); + dest -= dest_stride; + vst1_lane_u32((uint32_t *)dest, vreinterpret_u32_u8(d26u8), 1); + dest -= dest_stride; + vst1_lane_u32((uint32_t *)dest, vreinterpret_u32_u8(d26u8), 0); + return; } diff --git a/vp10/common/arm/neon/iht8x8_add_neon.c b/vp10/common/arm/neon/iht8x8_add_neon.c index 82d7ccc612ea9cf1c02ac35c94df9e78b2c3e2f9..7e1c83f41517b65731f4125b6c57bbe869bdf87e 100644 --- a/vp10/common/arm/neon/iht8x8_add_neon.c +++ b/vp10/common/arm/neon/iht8x8_add_neon.c @@ -31,594 +31,577 @@ static int16_t cospi_26_64 = 4756; static int16_t cospi_28_64 = 3196; static int16_t cospi_30_64 = 1606; -static INLINE void TRANSPOSE8X8( - int16x8_t *q8s16, - int16x8_t *q9s16, - int16x8_t *q10s16, - int16x8_t *q11s16, - int16x8_t *q12s16, - int16x8_t *q13s16, - int16x8_t *q14s16, - int16x8_t *q15s16) { - int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d21s16, d22s16, d23s16; - int16x4_t d24s16, d25s16, d26s16, d27s16, d28s16, d29s16, d30s16, d31s16; - int32x4x2_t q0x2s32, q1x2s32, q2x2s32, q3x2s32; - int16x8x2_t q0x2s16, q1x2s16, q2x2s16, q3x2s16; - - d16s16 = vget_low_s16(*q8s16); - d17s16 = vget_high_s16(*q8s16); - d18s16 = vget_low_s16(*q9s16); - d19s16 = vget_high_s16(*q9s16); - d20s16 = vget_low_s16(*q10s16); - d21s16 = vget_high_s16(*q10s16); - d22s16 = vget_low_s16(*q11s16); - d23s16 = vget_high_s16(*q11s16); - d24s16 = vget_low_s16(*q12s16); - d25s16 = vget_high_s16(*q12s16); - d26s16 = vget_low_s16(*q13s16); - d27s16 = vget_high_s16(*q13s16); - d28s16 = vget_low_s16(*q14s16); - d29s16 = vget_high_s16(*q14s16); - d30s16 = vget_low_s16(*q15s16); - d31s16 = vget_high_s16(*q15s16); - - *q8s16 = vcombine_s16(d16s16, d24s16); // vswp d17, d24 - *q9s16 = vcombine_s16(d18s16, d26s16); // vswp d19, d26 - *q10s16 = vcombine_s16(d20s16, d28s16); // vswp d21, d28 - *q11s16 = vcombine_s16(d22s16, d30s16); // vswp d23, d30 - *q12s16 = vcombine_s16(d17s16, d25s16); - *q13s16 = vcombine_s16(d19s16, d27s16); - *q14s16 = vcombine_s16(d21s16, d29s16); - *q15s16 = vcombine_s16(d23s16, d31s16); - - q0x2s32 = vtrnq_s32(vreinterpretq_s32_s16(*q8s16), - vreinterpretq_s32_s16(*q10s16)); - q1x2s32 = vtrnq_s32(vreinterpretq_s32_s16(*q9s16), - vreinterpretq_s32_s16(*q11s16)); - q2x2s32 = vtrnq_s32(vreinterpretq_s32_s16(*q12s16), - vreinterpretq_s32_s16(*q14s16)); - q3x2s32 = vtrnq_s32(vreinterpretq_s32_s16(*q13s16), - vreinterpretq_s32_s16(*q15s16)); - - q0x2s16 = vtrnq_s16(vreinterpretq_s16_s32(q0x2s32.val[0]), // q8 - vreinterpretq_s16_s32(q1x2s32.val[0])); // q9 - q1x2s16 = vtrnq_s16(vreinterpretq_s16_s32(q0x2s32.val[1]), // q10 - vreinterpretq_s16_s32(q1x2s32.val[1])); // q11 - q2x2s16 = vtrnq_s16(vreinterpretq_s16_s32(q2x2s32.val[0]), // q12 - vreinterpretq_s16_s32(q3x2s32.val[0])); // q13 - q3x2s16 = vtrnq_s16(vreinterpretq_s16_s32(q2x2s32.val[1]), // q14 - vreinterpretq_s16_s32(q3x2s32.val[1])); // q15 - - *q8s16 = q0x2s16.val[0]; - *q9s16 = q0x2s16.val[1]; - *q10s16 = q1x2s16.val[0]; - *q11s16 = q1x2s16.val[1]; - *q12s16 = q2x2s16.val[0]; - *q13s16 = q2x2s16.val[1]; - *q14s16 = q3x2s16.val[0]; - *q15s16 = q3x2s16.val[1]; - return; +static INLINE void TRANSPOSE8X8(int16x8_t *q8s16, int16x8_t *q9s16, + int16x8_t *q10s16, int16x8_t *q11s16, + int16x8_t *q12s16, int16x8_t *q13s16, + int16x8_t *q14s16, int16x8_t *q15s16) { + int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d21s16, d22s16, d23s16; + int16x4_t d24s16, d25s16, d26s16, d27s16, d28s16, d29s16, d30s16, d31s16; + int32x4x2_t q0x2s32, q1x2s32, q2x2s32, q3x2s32; + int16x8x2_t q0x2s16, q1x2s16, q2x2s16, q3x2s16; + + d16s16 = vget_low_s16(*q8s16); + d17s16 = vget_high_s16(*q8s16); + d18s16 = vget_low_s16(*q9s16); + d19s16 = vget_high_s16(*q9s16); + d20s16 = vget_low_s16(*q10s16); + d21s16 = vget_high_s16(*q10s16); + d22s16 = vget_low_s16(*q11s16); + d23s16 = vget_high_s16(*q11s16); + d24s16 = vget_low_s16(*q12s16); + d25s16 = vget_high_s16(*q12s16); + d26s16 = vget_low_s16(*q13s16); + d27s16 = vget_high_s16(*q13s16); + d28s16 = vget_low_s16(*q14s16); + d29s16 = vget_high_s16(*q14s16); + d30s16 = vget_low_s16(*q15s16); + d31s16 = vget_high_s16(*q15s16); + + *q8s16 = vcombine_s16(d16s16, d24s16); // vswp d17, d24 + *q9s16 = vcombine_s16(d18s16, d26s16); // vswp d19, d26 + *q10s16 = vcombine_s16(d20s16, d28s16); // vswp d21, d28 + *q11s16 = vcombine_s16(d22s16, d30s16); // vswp d23, d30 + *q12s16 = vcombine_s16(d17s16, d25s16); + *q13s16 = vcombine_s16(d19s16, d27s16); + *q14s16 = vcombine_s16(d21s16, d29s16); + *q15s16 = vcombine_s16(d23s16, d31s16); + + q0x2s32 = + vtrnq_s32(vreinterpretq_s32_s16(*q8s16), vreinterpretq_s32_s16(*q10s16)); + q1x2s32 = + vtrnq_s32(vreinterpretq_s32_s16(*q9s16), vreinterpretq_s32_s16(*q11s16)); + q2x2s32 = + vtrnq_s32(vreinterpretq_s32_s16(*q12s16), vreinterpretq_s32_s16(*q14s16)); + q3x2s32 = + vtrnq_s32(vreinterpretq_s32_s16(*q13s16), vreinterpretq_s32_s16(*q15s16)); + + q0x2s16 = vtrnq_s16(vreinterpretq_s16_s32(q0x2s32.val[0]), // q8 + vreinterpretq_s16_s32(q1x2s32.val[0])); // q9 + q1x2s16 = vtrnq_s16(vreinterpretq_s16_s32(q0x2s32.val[1]), // q10 + vreinterpretq_s16_s32(q1x2s32.val[1])); // q11 + q2x2s16 = vtrnq_s16(vreinterpretq_s16_s32(q2x2s32.val[0]), // q12 + vreinterpretq_s16_s32(q3x2s32.val[0])); // q13 + q3x2s16 = vtrnq_s16(vreinterpretq_s16_s32(q2x2s32.val[1]), // q14 + vreinterpretq_s16_s32(q3x2s32.val[1])); // q15 + + *q8s16 = q0x2s16.val[0]; + *q9s16 = q0x2s16.val[1]; + *q10s16 = q1x2s16.val[0]; + *q11s16 = q1x2s16.val[1]; + *q12s16 = q2x2s16.val[0]; + *q13s16 = q2x2s16.val[1]; + *q14s16 = q3x2s16.val[0]; + *q15s16 = q3x2s16.val[1]; + return; } -static INLINE void IDCT8x8_1D( - int16x8_t *q8s16, - int16x8_t *q9s16, - int16x8_t *q10s16, - int16x8_t *q11s16, - int16x8_t *q12s16, - int16x8_t *q13s16, - int16x8_t *q14s16, - int16x8_t *q15s16) { - int16x4_t d0s16, d1s16, d2s16, d3s16; - int16x4_t d8s16, d9s16, d10s16, d11s16, d12s16, d13s16, d14s16, d15s16; - int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d21s16, d22s16, d23s16; - int16x4_t d24s16, d25s16, d26s16, d27s16, d28s16, d29s16, d30s16, d31s16; - int16x8_t q0s16, q1s16, q2s16, q3s16, q4s16, q5s16, q6s16, q7s16; - int32x4_t q2s32, q3s32, q5s32, q6s32, q8s32, q9s32; - int32x4_t q10s32, q11s32, q12s32, q13s32, q15s32; - - d0s16 = vdup_n_s16(cospi_28_64); - d1s16 = vdup_n_s16(cospi_4_64); - d2s16 = vdup_n_s16(cospi_12_64); - d3s16 = vdup_n_s16(cospi_20_64); - - d16s16 = vget_low_s16(*q8s16); - d17s16 = vget_high_s16(*q8s16); - d18s16 = vget_low_s16(*q9s16); - d19s16 = vget_high_s16(*q9s16); - d20s16 = vget_low_s16(*q10s16); - d21s16 = vget_high_s16(*q10s16); - d22s16 = vget_low_s16(*q11s16); - d23s16 = vget_high_s16(*q11s16); - d24s16 = vget_low_s16(*q12s16); - d25s16 = vget_high_s16(*q12s16); - d26s16 = vget_low_s16(*q13s16); - d27s16 = vget_high_s16(*q13s16); - d28s16 = vget_low_s16(*q14s16); - d29s16 = vget_high_s16(*q14s16); - d30s16 = vget_low_s16(*q15s16); - d31s16 = vget_high_s16(*q15s16); - - q2s32 = vmull_s16(d18s16, d0s16); - q3s32 = vmull_s16(d19s16, d0s16); - q5s32 = vmull_s16(d26s16, d2s16); - q6s32 = vmull_s16(d27s16, d2s16); - - q2s32 = vmlsl_s16(q2s32, d30s16, d1s16); - q3s32 = vmlsl_s16(q3s32, d31s16, d1s16); - q5s32 = vmlsl_s16(q5s32, d22s16, d3s16); - q6s32 = vmlsl_s16(q6s32, d23s16, d3s16); - - d8s16 = vqrshrn_n_s32(q2s32, 14); - d9s16 = vqrshrn_n_s32(q3s32, 14); - d10s16 = vqrshrn_n_s32(q5s32, 14); - d11s16 = vqrshrn_n_s32(q6s32, 14); - q4s16 = vcombine_s16(d8s16, d9s16); - q5s16 = vcombine_s16(d10s16, d11s16); - - q2s32 = vmull_s16(d18s16, d1s16); - q3s32 = vmull_s16(d19s16, d1s16); - q9s32 = vmull_s16(d26s16, d3s16); - q13s32 = vmull_s16(d27s16, d3s16); - - q2s32 = vmlal_s16(q2s32, d30s16, d0s16); - q3s32 = vmlal_s16(q3s32, d31s16, d0s16); - q9s32 = vmlal_s16(q9s32, d22s16, d2s16); - q13s32 = vmlal_s16(q13s32, d23s16, d2s16); - - d14s16 = vqrshrn_n_s32(q2s32, 14); - d15s16 = vqrshrn_n_s32(q3s32, 14); - d12s16 = vqrshrn_n_s32(q9s32, 14); - d13s16 = vqrshrn_n_s32(q13s32, 14); - q6s16 = vcombine_s16(d12s16, d13s16); - q7s16 = vcombine_s16(d14s16, d15s16); - - d0s16 = vdup_n_s16(cospi_16_64); - - q2s32 = vmull_s16(d16s16, d0s16); - q3s32 = vmull_s16(d17s16, d0s16); - q13s32 = vmull_s16(d16s16, d0s16); - q15s32 = vmull_s16(d17s16, d0s16); - - q2s32 = vmlal_s16(q2s32, d24s16, d0s16); - q3s32 = vmlal_s16(q3s32, d25s16, d0s16); - q13s32 = vmlsl_s16(q13s32, d24s16, d0s16); - q15s32 = vmlsl_s16(q15s32, d25s16, d0s16); - - d0s16 = vdup_n_s16(cospi_24_64); - d1s16 = vdup_n_s16(cospi_8_64); - - d18s16 = vqrshrn_n_s32(q2s32, 14); - d19s16 = vqrshrn_n_s32(q3s32, 14); - d22s16 = vqrshrn_n_s32(q13s32, 14); - d23s16 = vqrshrn_n_s32(q15s32, 14); - *q9s16 = vcombine_s16(d18s16, d19s16); - *q11s16 = vcombine_s16(d22s16, d23s16); - - q2s32 = vmull_s16(d20s16, d0s16); - q3s32 = vmull_s16(d21s16, d0s16); - q8s32 = vmull_s16(d20s16, d1s16); - q12s32 = vmull_s16(d21s16, d1s16); - - q2s32 = vmlsl_s16(q2s32, d28s16, d1s16); - q3s32 = vmlsl_s16(q3s32, d29s16, d1s16); - q8s32 = vmlal_s16(q8s32, d28s16, d0s16); - q12s32 = vmlal_s16(q12s32, d29s16, d0s16); - - d26s16 = vqrshrn_n_s32(q2s32, 14); - d27s16 = vqrshrn_n_s32(q3s32, 14); - d30s16 = vqrshrn_n_s32(q8s32, 14); - d31s16 = vqrshrn_n_s32(q12s32, 14); - *q13s16 = vcombine_s16(d26s16, d27s16); - *q15s16 = vcombine_s16(d30s16, d31s16); - - q0s16 = vaddq_s16(*q9s16, *q15s16); - q1s16 = vaddq_s16(*q11s16, *q13s16); - q2s16 = vsubq_s16(*q11s16, *q13s16); - q3s16 = vsubq_s16(*q9s16, *q15s16); - - *q13s16 = vsubq_s16(q4s16, q5s16); - q4s16 = vaddq_s16(q4s16, q5s16); - *q14s16 = vsubq_s16(q7s16, q6s16); - q7s16 = vaddq_s16(q7s16, q6s16); - d26s16 = vget_low_s16(*q13s16); - d27s16 = vget_high_s16(*q13s16); - d28s16 = vget_low_s16(*q14s16); - d29s16 = vget_high_s16(*q14s16); - - d16s16 = vdup_n_s16(cospi_16_64); - - q9s32 = vmull_s16(d28s16, d16s16); - q10s32 = vmull_s16(d29s16, d16s16); - q11s32 = vmull_s16(d28s16, d16s16); - q12s32 = vmull_s16(d29s16, d16s16); - - q9s32 = vmlsl_s16(q9s32, d26s16, d16s16); - q10s32 = vmlsl_s16(q10s32, d27s16, d16s16); - q11s32 = vmlal_s16(q11s32, d26s16, d16s16); - q12s32 = vmlal_s16(q12s32, d27s16, d16s16); - - d10s16 = vqrshrn_n_s32(q9s32, 14); - d11s16 = vqrshrn_n_s32(q10s32, 14); - d12s16 = vqrshrn_n_s32(q11s32, 14); - d13s16 = vqrshrn_n_s32(q12s32, 14); - q5s16 = vcombine_s16(d10s16, d11s16); - q6s16 = vcombine_s16(d12s16, d13s16); - - *q8s16 = vaddq_s16(q0s16, q7s16); - *q9s16 = vaddq_s16(q1s16, q6s16); - *q10s16 = vaddq_s16(q2s16, q5s16); - *q11s16 = vaddq_s16(q3s16, q4s16); - *q12s16 = vsubq_s16(q3s16, q4s16); - *q13s16 = vsubq_s16(q2s16, q5s16); - *q14s16 = vsubq_s16(q1s16, q6s16); - *q15s16 = vsubq_s16(q0s16, q7s16); - return; +static INLINE void IDCT8x8_1D(int16x8_t *q8s16, int16x8_t *q9s16, + int16x8_t *q10s16, int16x8_t *q11s16, + int16x8_t *q12s16, int16x8_t *q13s16, + int16x8_t *q14s16, int16x8_t *q15s16) { + int16x4_t d0s16, d1s16, d2s16, d3s16; + int16x4_t d8s16, d9s16, d10s16, d11s16, d12s16, d13s16, d14s16, d15s16; + int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d21s16, d22s16, d23s16; + int16x4_t d24s16, d25s16, d26s16, d27s16, d28s16, d29s16, d30s16, d31s16; + int16x8_t q0s16, q1s16, q2s16, q3s16, q4s16, q5s16, q6s16, q7s16; + int32x4_t q2s32, q3s32, q5s32, q6s32, q8s32, q9s32; + int32x4_t q10s32, q11s32, q12s32, q13s32, q15s32; + + d0s16 = vdup_n_s16(cospi_28_64); + d1s16 = vdup_n_s16(cospi_4_64); + d2s16 = vdup_n_s16(cospi_12_64); + d3s16 = vdup_n_s16(cospi_20_64); + + d16s16 = vget_low_s16(*q8s16); + d17s16 = vget_high_s16(*q8s16); + d18s16 = vget_low_s16(*q9s16); + d19s16 = vget_high_s16(*q9s16); + d20s16 = vget_low_s16(*q10s16); + d21s16 = vget_high_s16(*q10s16); + d22s16 = vget_low_s16(*q11s16); + d23s16 = vget_high_s16(*q11s16); + d24s16 = vget_low_s16(*q12s16); + d25s16 = vget_high_s16(*q12s16); + d26s16 = vget_low_s16(*q13s16); + d27s16 = vget_high_s16(*q13s16); + d28s16 = vget_low_s16(*q14s16); + d29s16 = vget_high_s16(*q14s16); + d30s16 = vget_low_s16(*q15s16); + d31s16 = vget_high_s16(*q15s16); + + q2s32 = vmull_s16(d18s16, d0s16); + q3s32 = vmull_s16(d19s16, d0s16); + q5s32 = vmull_s16(d26s16, d2s16); + q6s32 = vmull_s16(d27s16, d2s16); + + q2s32 = vmlsl_s16(q2s32, d30s16, d1s16); + q3s32 = vmlsl_s16(q3s32, d31s16, d1s16); + q5s32 = vmlsl_s16(q5s32, d22s16, d3s16); + q6s32 = vmlsl_s16(q6s32, d23s16, d3s16); + + d8s16 = vqrshrn_n_s32(q2s32, 14); + d9s16 = vqrshrn_n_s32(q3s32, 14); + d10s16 = vqrshrn_n_s32(q5s32, 14); + d11s16 = vqrshrn_n_s32(q6s32, 14); + q4s16 = vcombine_s16(d8s16, d9s16); + q5s16 = vcombine_s16(d10s16, d11s16); + + q2s32 = vmull_s16(d18s16, d1s16); + q3s32 = vmull_s16(d19s16, d1s16); + q9s32 = vmull_s16(d26s16, d3s16); + q13s32 = vmull_s16(d27s16, d3s16); + + q2s32 = vmlal_s16(q2s32, d30s16, d0s16); + q3s32 = vmlal_s16(q3s32, d31s16, d0s16); + q9s32 = vmlal_s16(q9s32, d22s16, d2s16); + q13s32 = vmlal_s16(q13s32, d23s16, d2s16); + + d14s16 = vqrshrn_n_s32(q2s32, 14); + d15s16 = vqrshrn_n_s32(q3s32, 14); + d12s16 = vqrshrn_n_s32(q9s32, 14); + d13s16 = vqrshrn_n_s32(q13s32, 14); + q6s16 = vcombine_s16(d12s16, d13s16); + q7s16 = vcombine_s16(d14s16, d15s16); + + d0s16 = vdup_n_s16(cospi_16_64); + + q2s32 = vmull_s16(d16s16, d0s16); + q3s32 = vmull_s16(d17s16, d0s16); + q13s32 = vmull_s16(d16s16, d0s16); + q15s32 = vmull_s16(d17s16, d0s16); + + q2s32 = vmlal_s16(q2s32, d24s16, d0s16); + q3s32 = vmlal_s16(q3s32, d25s16, d0s16); + q13s32 = vmlsl_s16(q13s32, d24s16, d0s16); + q15s32 = vmlsl_s16(q15s32, d25s16, d0s16); + + d0s16 = vdup_n_s16(cospi_24_64); + d1s16 = vdup_n_s16(cospi_8_64); + + d18s16 = vqrshrn_n_s32(q2s32, 14); + d19s16 = vqrshrn_n_s32(q3s32, 14); + d22s16 = vqrshrn_n_s32(q13s32, 14); + d23s16 = vqrshrn_n_s32(q15s32, 14); + *q9s16 = vcombine_s16(d18s16, d19s16); + *q11s16 = vcombine_s16(d22s16, d23s16); + + q2s32 = vmull_s16(d20s16, d0s16); + q3s32 = vmull_s16(d21s16, d0s16); + q8s32 = vmull_s16(d20s16, d1s16); + q12s32 = vmull_s16(d21s16, d1s16); + + q2s32 = vmlsl_s16(q2s32, d28s16, d1s16); + q3s32 = vmlsl_s16(q3s32, d29s16, d1s16); + q8s32 = vmlal_s16(q8s32, d28s16, d0s16); + q12s32 = vmlal_s16(q12s32, d29s16, d0s16); + + d26s16 = vqrshrn_n_s32(q2s32, 14); + d27s16 = vqrshrn_n_s32(q3s32, 14); + d30s16 = vqrshrn_n_s32(q8s32, 14); + d31s16 = vqrshrn_n_s32(q12s32, 14); + *q13s16 = vcombine_s16(d26s16, d27s16); + *q15s16 = vcombine_s16(d30s16, d31s16); + + q0s16 = vaddq_s16(*q9s16, *q15s16); + q1s16 = vaddq_s16(*q11s16, *q13s16); + q2s16 = vsubq_s16(*q11s16, *q13s16); + q3s16 = vsubq_s16(*q9s16, *q15s16); + + *q13s16 = vsubq_s16(q4s16, q5s16); + q4s16 = vaddq_s16(q4s16, q5s16); + *q14s16 = vsubq_s16(q7s16, q6s16); + q7s16 = vaddq_s16(q7s16, q6s16); + d26s16 = vget_low_s16(*q13s16); + d27s16 = vget_high_s16(*q13s16); + d28s16 = vget_low_s16(*q14s16); + d29s16 = vget_high_s16(*q14s16); + + d16s16 = vdup_n_s16(cospi_16_64); + + q9s32 = vmull_s16(d28s16, d16s16); + q10s32 = vmull_s16(d29s16, d16s16); + q11s32 = vmull_s16(d28s16, d16s16); + q12s32 = vmull_s16(d29s16, d16s16); + + q9s32 = vmlsl_s16(q9s32, d26s16, d16s16); + q10s32 = vmlsl_s16(q10s32, d27s16, d16s16); + q11s32 = vmlal_s16(q11s32, d26s16, d16s16); + q12s32 = vmlal_s16(q12s32, d27s16, d16s16); + + d10s16 = vqrshrn_n_s32(q9s32, 14); + d11s16 = vqrshrn_n_s32(q10s32, 14); + d12s16 = vqrshrn_n_s32(q11s32, 14); + d13s16 = vqrshrn_n_s32(q12s32, 14); + q5s16 = vcombine_s16(d10s16, d11s16); + q6s16 = vcombine_s16(d12s16, d13s16); + + *q8s16 = vaddq_s16(q0s16, q7s16); + *q9s16 = vaddq_s16(q1s16, q6s16); + *q10s16 = vaddq_s16(q2s16, q5s16); + *q11s16 = vaddq_s16(q3s16, q4s16); + *q12s16 = vsubq_s16(q3s16, q4s16); + *q13s16 = vsubq_s16(q2s16, q5s16); + *q14s16 = vsubq_s16(q1s16, q6s16); + *q15s16 = vsubq_s16(q0s16, q7s16); + return; } -static INLINE void IADST8X8_1D( - int16x8_t *q8s16, - int16x8_t *q9s16, - int16x8_t *q10s16, - int16x8_t *q11s16, - int16x8_t *q12s16, - int16x8_t *q13s16, - int16x8_t *q14s16, - int16x8_t *q15s16) { - int16x4_t d0s16, d1s16, d2s16, d3s16, d4s16, d5s16, d6s16, d7s16; - int16x4_t d8s16, d9s16, d10s16, d11s16, d12s16, d13s16, d14s16, d15s16; - int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d21s16, d22s16, d23s16; - int16x4_t d24s16, d25s16, d26s16, d27s16, d28s16, d29s16, d30s16, d31s16; - int16x8_t q2s16, q4s16, q5s16, q6s16; - int32x4_t q0s32, q1s32, q2s32, q3s32, q4s32, q5s32, q6s32, q7s32, q8s32; - int32x4_t q9s32, q10s32, q11s32, q12s32, q13s32, q14s32, q15s32; - - d16s16 = vget_low_s16(*q8s16); - d17s16 = vget_high_s16(*q8s16); - d18s16 = vget_low_s16(*q9s16); - d19s16 = vget_high_s16(*q9s16); - d20s16 = vget_low_s16(*q10s16); - d21s16 = vget_high_s16(*q10s16); - d22s16 = vget_low_s16(*q11s16); - d23s16 = vget_high_s16(*q11s16); - d24s16 = vget_low_s16(*q12s16); - d25s16 = vget_high_s16(*q12s16); - d26s16 = vget_low_s16(*q13s16); - d27s16 = vget_high_s16(*q13s16); - d28s16 = vget_low_s16(*q14s16); - d29s16 = vget_high_s16(*q14s16); - d30s16 = vget_low_s16(*q15s16); - d31s16 = vget_high_s16(*q15s16); - - d14s16 = vdup_n_s16(cospi_2_64); - d15s16 = vdup_n_s16(cospi_30_64); - - q1s32 = vmull_s16(d30s16, d14s16); - q2s32 = vmull_s16(d31s16, d14s16); - q3s32 = vmull_s16(d30s16, d15s16); - q4s32 = vmull_s16(d31s16, d15s16); - - d30s16 = vdup_n_s16(cospi_18_64); - d31s16 = vdup_n_s16(cospi_14_64); - - q1s32 = vmlal_s16(q1s32, d16s16, d15s16); - q2s32 = vmlal_s16(q2s32, d17s16, d15s16); - q3s32 = vmlsl_s16(q3s32, d16s16, d14s16); - q4s32 = vmlsl_s16(q4s32, d17s16, d14s16); - - q5s32 = vmull_s16(d22s16, d30s16); - q6s32 = vmull_s16(d23s16, d30s16); - q7s32 = vmull_s16(d22s16, d31s16); - q8s32 = vmull_s16(d23s16, d31s16); - - q5s32 = vmlal_s16(q5s32, d24s16, d31s16); - q6s32 = vmlal_s16(q6s32, d25s16, d31s16); - q7s32 = vmlsl_s16(q7s32, d24s16, d30s16); - q8s32 = vmlsl_s16(q8s32, d25s16, d30s16); - - q11s32 = vaddq_s32(q1s32, q5s32); - q12s32 = vaddq_s32(q2s32, q6s32); - q1s32 = vsubq_s32(q1s32, q5s32); - q2s32 = vsubq_s32(q2s32, q6s32); - - d22s16 = vqrshrn_n_s32(q11s32, 14); - d23s16 = vqrshrn_n_s32(q12s32, 14); - *q11s16 = vcombine_s16(d22s16, d23s16); - - q12s32 = vaddq_s32(q3s32, q7s32); - q15s32 = vaddq_s32(q4s32, q8s32); - q3s32 = vsubq_s32(q3s32, q7s32); - q4s32 = vsubq_s32(q4s32, q8s32); - - d2s16 = vqrshrn_n_s32(q1s32, 14); - d3s16 = vqrshrn_n_s32(q2s32, 14); - d24s16 = vqrshrn_n_s32(q12s32, 14); - d25s16 = vqrshrn_n_s32(q15s32, 14); - d6s16 = vqrshrn_n_s32(q3s32, 14); - d7s16 = vqrshrn_n_s32(q4s32, 14); - *q12s16 = vcombine_s16(d24s16, d25s16); - - d0s16 = vdup_n_s16(cospi_10_64); - d1s16 = vdup_n_s16(cospi_22_64); - q4s32 = vmull_s16(d26s16, d0s16); - q5s32 = vmull_s16(d27s16, d0s16); - q2s32 = vmull_s16(d26s16, d1s16); - q6s32 = vmull_s16(d27s16, d1s16); - - d30s16 = vdup_n_s16(cospi_26_64); - d31s16 = vdup_n_s16(cospi_6_64); - - q4s32 = vmlal_s16(q4s32, d20s16, d1s16); - q5s32 = vmlal_s16(q5s32, d21s16, d1s16); - q2s32 = vmlsl_s16(q2s32, d20s16, d0s16); - q6s32 = vmlsl_s16(q6s32, d21s16, d0s16); - - q0s32 = vmull_s16(d18s16, d30s16); - q13s32 = vmull_s16(d19s16, d30s16); - - q0s32 = vmlal_s16(q0s32, d28s16, d31s16); - q13s32 = vmlal_s16(q13s32, d29s16, d31s16); - - q10s32 = vmull_s16(d18s16, d31s16); - q9s32 = vmull_s16(d19s16, d31s16); - - q10s32 = vmlsl_s16(q10s32, d28s16, d30s16); - q9s32 = vmlsl_s16(q9s32, d29s16, d30s16); - - q14s32 = vaddq_s32(q2s32, q10s32); - q15s32 = vaddq_s32(q6s32, q9s32); - q2s32 = vsubq_s32(q2s32, q10s32); - q6s32 = vsubq_s32(q6s32, q9s32); - - d28s16 = vqrshrn_n_s32(q14s32, 14); - d29s16 = vqrshrn_n_s32(q15s32, 14); - d4s16 = vqrshrn_n_s32(q2s32, 14); - d5s16 = vqrshrn_n_s32(q6s32, 14); - *q14s16 = vcombine_s16(d28s16, d29s16); - - q9s32 = vaddq_s32(q4s32, q0s32); - q10s32 = vaddq_s32(q5s32, q13s32); - q4s32 = vsubq_s32(q4s32, q0s32); - q5s32 = vsubq_s32(q5s32, q13s32); - - d30s16 = vdup_n_s16(cospi_8_64); - d31s16 = vdup_n_s16(cospi_24_64); - - d18s16 = vqrshrn_n_s32(q9s32, 14); - d19s16 = vqrshrn_n_s32(q10s32, 14); - d8s16 = vqrshrn_n_s32(q4s32, 14); - d9s16 = vqrshrn_n_s32(q5s32, 14); - *q9s16 = vcombine_s16(d18s16, d19s16); - - q5s32 = vmull_s16(d2s16, d30s16); - q6s32 = vmull_s16(d3s16, d30s16); - q7s32 = vmull_s16(d2s16, d31s16); - q0s32 = vmull_s16(d3s16, d31s16); - - q5s32 = vmlal_s16(q5s32, d6s16, d31s16); - q6s32 = vmlal_s16(q6s32, d7s16, d31s16); - q7s32 = vmlsl_s16(q7s32, d6s16, d30s16); - q0s32 = vmlsl_s16(q0s32, d7s16, d30s16); - - q1s32 = vmull_s16(d4s16, d30s16); - q3s32 = vmull_s16(d5s16, d30s16); - q10s32 = vmull_s16(d4s16, d31s16); - q2s32 = vmull_s16(d5s16, d31s16); - - q1s32 = vmlsl_s16(q1s32, d8s16, d31s16); - q3s32 = vmlsl_s16(q3s32, d9s16, d31s16); - q10s32 = vmlal_s16(q10s32, d8s16, d30s16); - q2s32 = vmlal_s16(q2s32, d9s16, d30s16); - - *q8s16 = vaddq_s16(*q11s16, *q9s16); - *q11s16 = vsubq_s16(*q11s16, *q9s16); - q4s16 = vaddq_s16(*q12s16, *q14s16); - *q12s16 = vsubq_s16(*q12s16, *q14s16); - - q14s32 = vaddq_s32(q5s32, q1s32); - q15s32 = vaddq_s32(q6s32, q3s32); - q5s32 = vsubq_s32(q5s32, q1s32); - q6s32 = vsubq_s32(q6s32, q3s32); - - d18s16 = vqrshrn_n_s32(q14s32, 14); - d19s16 = vqrshrn_n_s32(q15s32, 14); - d10s16 = vqrshrn_n_s32(q5s32, 14); - d11s16 = vqrshrn_n_s32(q6s32, 14); - *q9s16 = vcombine_s16(d18s16, d19s16); - - q1s32 = vaddq_s32(q7s32, q10s32); - q3s32 = vaddq_s32(q0s32, q2s32); - q7s32 = vsubq_s32(q7s32, q10s32); - q0s32 = vsubq_s32(q0s32, q2s32); - - d28s16 = vqrshrn_n_s32(q1s32, 14); - d29s16 = vqrshrn_n_s32(q3s32, 14); - d14s16 = vqrshrn_n_s32(q7s32, 14); - d15s16 = vqrshrn_n_s32(q0s32, 14); - *q14s16 = vcombine_s16(d28s16, d29s16); - - d30s16 = vdup_n_s16(cospi_16_64); - - d22s16 = vget_low_s16(*q11s16); - d23s16 = vget_high_s16(*q11s16); - q2s32 = vmull_s16(d22s16, d30s16); - q3s32 = vmull_s16(d23s16, d30s16); - q13s32 = vmull_s16(d22s16, d30s16); - q1s32 = vmull_s16(d23s16, d30s16); - - d24s16 = vget_low_s16(*q12s16); - d25s16 = vget_high_s16(*q12s16); - q2s32 = vmlal_s16(q2s32, d24s16, d30s16); - q3s32 = vmlal_s16(q3s32, d25s16, d30s16); - q13s32 = vmlsl_s16(q13s32, d24s16, d30s16); - q1s32 = vmlsl_s16(q1s32, d25s16, d30s16); - - d4s16 = vqrshrn_n_s32(q2s32, 14); - d5s16 = vqrshrn_n_s32(q3s32, 14); - d24s16 = vqrshrn_n_s32(q13s32, 14); - d25s16 = vqrshrn_n_s32(q1s32, 14); - q2s16 = vcombine_s16(d4s16, d5s16); - *q12s16 = vcombine_s16(d24s16, d25s16); - - q13s32 = vmull_s16(d10s16, d30s16); - q1s32 = vmull_s16(d11s16, d30s16); - q11s32 = vmull_s16(d10s16, d30s16); - q0s32 = vmull_s16(d11s16, d30s16); - - q13s32 = vmlal_s16(q13s32, d14s16, d30s16); - q1s32 = vmlal_s16(q1s32, d15s16, d30s16); - q11s32 = vmlsl_s16(q11s32, d14s16, d30s16); - q0s32 = vmlsl_s16(q0s32, d15s16, d30s16); - - d20s16 = vqrshrn_n_s32(q13s32, 14); - d21s16 = vqrshrn_n_s32(q1s32, 14); - d12s16 = vqrshrn_n_s32(q11s32, 14); - d13s16 = vqrshrn_n_s32(q0s32, 14); - *q10s16 = vcombine_s16(d20s16, d21s16); - q6s16 = vcombine_s16(d12s16, d13s16); - - q5s16 = vdupq_n_s16(0); - - *q9s16 = vsubq_s16(q5s16, *q9s16); - *q11s16 = vsubq_s16(q5s16, q2s16); - *q13s16 = vsubq_s16(q5s16, q6s16); - *q15s16 = vsubq_s16(q5s16, q4s16); - return; +static INLINE void IADST8X8_1D(int16x8_t *q8s16, int16x8_t *q9s16, + int16x8_t *q10s16, int16x8_t *q11s16, + int16x8_t *q12s16, int16x8_t *q13s16, + int16x8_t *q14s16, int16x8_t *q15s16) { + int16x4_t d0s16, d1s16, d2s16, d3s16, d4s16, d5s16, d6s16, d7s16; + int16x4_t d8s16, d9s16, d10s16, d11s16, d12s16, d13s16, d14s16, d15s16; + int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d21s16, d22s16, d23s16; + int16x4_t d24s16, d25s16, d26s16, d27s16, d28s16, d29s16, d30s16, d31s16; + int16x8_t q2s16, q4s16, q5s16, q6s16; + int32x4_t q0s32, q1s32, q2s32, q3s32, q4s32, q5s32, q6s32, q7s32, q8s32; + int32x4_t q9s32, q10s32, q11s32, q12s32, q13s32, q14s32, q15s32; + + d16s16 = vget_low_s16(*q8s16); + d17s16 = vget_high_s16(*q8s16); + d18s16 = vget_low_s16(*q9s16); + d19s16 = vget_high_s16(*q9s16); + d20s16 = vget_low_s16(*q10s16); + d21s16 = vget_high_s16(*q10s16); + d22s16 = vget_low_s16(*q11s16); + d23s16 = vget_high_s16(*q11s16); + d24s16 = vget_low_s16(*q12s16); + d25s16 = vget_high_s16(*q12s16); + d26s16 = vget_low_s16(*q13s16); + d27s16 = vget_high_s16(*q13s16); + d28s16 = vget_low_s16(*q14s16); + d29s16 = vget_high_s16(*q14s16); + d30s16 = vget_low_s16(*q15s16); + d31s16 = vget_high_s16(*q15s16); + + d14s16 = vdup_n_s16(cospi_2_64); + d15s16 = vdup_n_s16(cospi_30_64); + + q1s32 = vmull_s16(d30s16, d14s16); + q2s32 = vmull_s16(d31s16, d14s16); + q3s32 = vmull_s16(d30s16, d15s16); + q4s32 = vmull_s16(d31s16, d15s16); + + d30s16 = vdup_n_s16(cospi_18_64); + d31s16 = vdup_n_s16(cospi_14_64); + + q1s32 = vmlal_s16(q1s32, d16s16, d15s16); + q2s32 = vmlal_s16(q2s32, d17s16, d15s16); + q3s32 = vmlsl_s16(q3s32, d16s16, d14s16); + q4s32 = vmlsl_s16(q4s32, d17s16, d14s16); + + q5s32 = vmull_s16(d22s16, d30s16); + q6s32 = vmull_s16(d23s16, d30s16); + q7s32 = vmull_s16(d22s16, d31s16); + q8s32 = vmull_s16(d23s16, d31s16); + + q5s32 = vmlal_s16(q5s32, d24s16, d31s16); + q6s32 = vmlal_s16(q6s32, d25s16, d31s16); + q7s32 = vmlsl_s16(q7s32, d24s16, d30s16); + q8s32 = vmlsl_s16(q8s32, d25s16, d30s16); + + q11s32 = vaddq_s32(q1s32, q5s32); + q12s32 = vaddq_s32(q2s32, q6s32); + q1s32 = vsubq_s32(q1s32, q5s32); + q2s32 = vsubq_s32(q2s32, q6s32); + + d22s16 = vqrshrn_n_s32(q11s32, 14); + d23s16 = vqrshrn_n_s32(q12s32, 14); + *q11s16 = vcombine_s16(d22s16, d23s16); + + q12s32 = vaddq_s32(q3s32, q7s32); + q15s32 = vaddq_s32(q4s32, q8s32); + q3s32 = vsubq_s32(q3s32, q7s32); + q4s32 = vsubq_s32(q4s32, q8s32); + + d2s16 = vqrshrn_n_s32(q1s32, 14); + d3s16 = vqrshrn_n_s32(q2s32, 14); + d24s16 = vqrshrn_n_s32(q12s32, 14); + d25s16 = vqrshrn_n_s32(q15s32, 14); + d6s16 = vqrshrn_n_s32(q3s32, 14); + d7s16 = vqrshrn_n_s32(q4s32, 14); + *q12s16 = vcombine_s16(d24s16, d25s16); + + d0s16 = vdup_n_s16(cospi_10_64); + d1s16 = vdup_n_s16(cospi_22_64); + q4s32 = vmull_s16(d26s16, d0s16); + q5s32 = vmull_s16(d27s16, d0s16); + q2s32 = vmull_s16(d26s16, d1s16); + q6s32 = vmull_s16(d27s16, d1s16); + + d30s16 = vdup_n_s16(cospi_26_64); + d31s16 = vdup_n_s16(cospi_6_64); + + q4s32 = vmlal_s16(q4s32, d20s16, d1s16); + q5s32 = vmlal_s16(q5s32, d21s16, d1s16); + q2s32 = vmlsl_s16(q2s32, d20s16, d0s16); + q6s32 = vmlsl_s16(q6s32, d21s16, d0s16); + + q0s32 = vmull_s16(d18s16, d30s16); + q13s32 = vmull_s16(d19s16, d30s16); + + q0s32 = vmlal_s16(q0s32, d28s16, d31s16); + q13s32 = vmlal_s16(q13s32, d29s16, d31s16); + + q10s32 = vmull_s16(d18s16, d31s16); + q9s32 = vmull_s16(d19s16, d31s16); + + q10s32 = vmlsl_s16(q10s32, d28s16, d30s16); + q9s32 = vmlsl_s16(q9s32, d29s16, d30s16); + + q14s32 = vaddq_s32(q2s32, q10s32); + q15s32 = vaddq_s32(q6s32, q9s32); + q2s32 = vsubq_s32(q2s32, q10s32); + q6s32 = vsubq_s32(q6s32, q9s32); + + d28s16 = vqrshrn_n_s32(q14s32, 14); + d29s16 = vqrshrn_n_s32(q15s32, 14); + d4s16 = vqrshrn_n_s32(q2s32, 14); + d5s16 = vqrshrn_n_s32(q6s32, 14); + *q14s16 = vcombine_s16(d28s16, d29s16); + + q9s32 = vaddq_s32(q4s32, q0s32); + q10s32 = vaddq_s32(q5s32, q13s32); + q4s32 = vsubq_s32(q4s32, q0s32); + q5s32 = vsubq_s32(q5s32, q13s32); + + d30s16 = vdup_n_s16(cospi_8_64); + d31s16 = vdup_n_s16(cospi_24_64); + + d18s16 = vqrshrn_n_s32(q9s32, 14); + d19s16 = vqrshrn_n_s32(q10s32, 14); + d8s16 = vqrshrn_n_s32(q4s32, 14); + d9s16 = vqrshrn_n_s32(q5s32, 14); + *q9s16 = vcombine_s16(d18s16, d19s16); + + q5s32 = vmull_s16(d2s16, d30s16); + q6s32 = vmull_s16(d3s16, d30s16); + q7s32 = vmull_s16(d2s16, d31s16); + q0s32 = vmull_s16(d3s16, d31s16); + + q5s32 = vmlal_s16(q5s32, d6s16, d31s16); + q6s32 = vmlal_s16(q6s32, d7s16, d31s16); + q7s32 = vmlsl_s16(q7s32, d6s16, d30s16); + q0s32 = vmlsl_s16(q0s32, d7s16, d30s16); + + q1s32 = vmull_s16(d4s16, d30s16); + q3s32 = vmull_s16(d5s16, d30s16); + q10s32 = vmull_s16(d4s16, d31s16); + q2s32 = vmull_s16(d5s16, d31s16); + + q1s32 = vmlsl_s16(q1s32, d8s16, d31s16); + q3s32 = vmlsl_s16(q3s32, d9s16, d31s16); + q10s32 = vmlal_s16(q10s32, d8s16, d30s16); + q2s32 = vmlal_s16(q2s32, d9s16, d30s16); + + *q8s16 = vaddq_s16(*q11s16, *q9s16); + *q11s16 = vsubq_s16(*q11s16, *q9s16); + q4s16 = vaddq_s16(*q12s16, *q14s16); + *q12s16 = vsubq_s16(*q12s16, *q14s16); + + q14s32 = vaddq_s32(q5s32, q1s32); + q15s32 = vaddq_s32(q6s32, q3s32); + q5s32 = vsubq_s32(q5s32, q1s32); + q6s32 = vsubq_s32(q6s32, q3s32); + + d18s16 = vqrshrn_n_s32(q14s32, 14); + d19s16 = vqrshrn_n_s32(q15s32, 14); + d10s16 = vqrshrn_n_s32(q5s32, 14); + d11s16 = vqrshrn_n_s32(q6s32, 14); + *q9s16 = vcombine_s16(d18s16, d19s16); + + q1s32 = vaddq_s32(q7s32, q10s32); + q3s32 = vaddq_s32(q0s32, q2s32); + q7s32 = vsubq_s32(q7s32, q10s32); + q0s32 = vsubq_s32(q0s32, q2s32); + + d28s16 = vqrshrn_n_s32(q1s32, 14); + d29s16 = vqrshrn_n_s32(q3s32, 14); + d14s16 = vqrshrn_n_s32(q7s32, 14); + d15s16 = vqrshrn_n_s32(q0s32, 14); + *q14s16 = vcombine_s16(d28s16, d29s16); + + d30s16 = vdup_n_s16(cospi_16_64); + + d22s16 = vget_low_s16(*q11s16); + d23s16 = vget_high_s16(*q11s16); + q2s32 = vmull_s16(d22s16, d30s16); + q3s32 = vmull_s16(d23s16, d30s16); + q13s32 = vmull_s16(d22s16, d30s16); + q1s32 = vmull_s16(d23s16, d30s16); + + d24s16 = vget_low_s16(*q12s16); + d25s16 = vget_high_s16(*q12s16); + q2s32 = vmlal_s16(q2s32, d24s16, d30s16); + q3s32 = vmlal_s16(q3s32, d25s16, d30s16); + q13s32 = vmlsl_s16(q13s32, d24s16, d30s16); + q1s32 = vmlsl_s16(q1s32, d25s16, d30s16); + + d4s16 = vqrshrn_n_s32(q2s32, 14); + d5s16 = vqrshrn_n_s32(q3s32, 14); + d24s16 = vqrshrn_n_s32(q13s32, 14); + d25s16 = vqrshrn_n_s32(q1s32, 14); + q2s16 = vcombine_s16(d4s16, d5s16); + *q12s16 = vcombine_s16(d24s16, d25s16); + + q13s32 = vmull_s16(d10s16, d30s16); + q1s32 = vmull_s16(d11s16, d30s16); + q11s32 = vmull_s16(d10s16, d30s16); + q0s32 = vmull_s16(d11s16, d30s16); + + q13s32 = vmlal_s16(q13s32, d14s16, d30s16); + q1s32 = vmlal_s16(q1s32, d15s16, d30s16); + q11s32 = vmlsl_s16(q11s32, d14s16, d30s16); + q0s32 = vmlsl_s16(q0s32, d15s16, d30s16); + + d20s16 = vqrshrn_n_s32(q13s32, 14); + d21s16 = vqrshrn_n_s32(q1s32, 14); + d12s16 = vqrshrn_n_s32(q11s32, 14); + d13s16 = vqrshrn_n_s32(q0s32, 14); + *q10s16 = vcombine_s16(d20s16, d21s16); + q6s16 = vcombine_s16(d12s16, d13s16); + + q5s16 = vdupq_n_s16(0); + + *q9s16 = vsubq_s16(q5s16, *q9s16); + *q11s16 = vsubq_s16(q5s16, q2s16); + *q13s16 = vsubq_s16(q5s16, q6s16); + *q15s16 = vsubq_s16(q5s16, q4s16); + return; } void vp10_iht8x8_64_add_neon(const tran_low_t *input, uint8_t *dest, - int dest_stride, int tx_type) { - int i; - uint8_t *d1, *d2; - uint8x8_t d0u8, d1u8, d2u8, d3u8; - uint64x1_t d0u64, d1u64, d2u64, d3u64; - int16x8_t q8s16, q9s16, q10s16, q11s16, q12s16, q13s16, q14s16, q15s16; - uint16x8_t q8u16, q9u16, q10u16, q11u16; - - q8s16 = vld1q_s16(input); - q9s16 = vld1q_s16(input + 8); - q10s16 = vld1q_s16(input + 8 * 2); - q11s16 = vld1q_s16(input + 8 * 3); - q12s16 = vld1q_s16(input + 8 * 4); - q13s16 = vld1q_s16(input + 8 * 5); - q14s16 = vld1q_s16(input + 8 * 6); - q15s16 = vld1q_s16(input + 8 * 7); - - TRANSPOSE8X8(&q8s16, &q9s16, &q10s16, &q11s16, - &q12s16, &q13s16, &q14s16, &q15s16); - - switch (tx_type) { - case 0: // idct_idct is not supported. Fall back to C - vp10_iht8x8_64_add_c(input, dest, dest_stride, tx_type); - return; - break; - case 1: // iadst_idct - // generate IDCT constants - // GENERATE_IDCT_CONSTANTS - - // first transform rows - IDCT8x8_1D(&q8s16, &q9s16, &q10s16, &q11s16, - &q12s16, &q13s16, &q14s16, &q15s16); - - // transpose the matrix - TRANSPOSE8X8(&q8s16, &q9s16, &q10s16, &q11s16, - &q12s16, &q13s16, &q14s16, &q15s16); - - // generate IADST constants - // GENERATE_IADST_CONSTANTS - - // then transform columns - IADST8X8_1D(&q8s16, &q9s16, &q10s16, &q11s16, - &q12s16, &q13s16, &q14s16, &q15s16); - break; - case 2: // idct_iadst - // generate IADST constants - // GENERATE_IADST_CONSTANTS - - // first transform rows - IADST8X8_1D(&q8s16, &q9s16, &q10s16, &q11s16, - &q12s16, &q13s16, &q14s16, &q15s16); - - // transpose the matrix - TRANSPOSE8X8(&q8s16, &q9s16, &q10s16, &q11s16, - &q12s16, &q13s16, &q14s16, &q15s16); - - // generate IDCT constants - // GENERATE_IDCT_CONSTANTS - - // then transform columns - IDCT8x8_1D(&q8s16, &q9s16, &q10s16, &q11s16, - &q12s16, &q13s16, &q14s16, &q15s16); - break; - case 3: // iadst_iadst - // generate IADST constants - // GENERATE_IADST_CONSTANTS - - // first transform rows - IADST8X8_1D(&q8s16, &q9s16, &q10s16, &q11s16, - &q12s16, &q13s16, &q14s16, &q15s16); - - // transpose the matrix - TRANSPOSE8X8(&q8s16, &q9s16, &q10s16, &q11s16, - &q12s16, &q13s16, &q14s16, &q15s16); - - // then transform columns - IADST8X8_1D(&q8s16, &q9s16, &q10s16, &q11s16, - &q12s16, &q13s16, &q14s16, &q15s16); - break; - default: // iadst_idct - assert(0); - break; + int dest_stride, int tx_type) { + int i; + uint8_t *d1, *d2; + uint8x8_t d0u8, d1u8, d2u8, d3u8; + uint64x1_t d0u64, d1u64, d2u64, d3u64; + int16x8_t q8s16, q9s16, q10s16, q11s16, q12s16, q13s16, q14s16, q15s16; + uint16x8_t q8u16, q9u16, q10u16, q11u16; + + q8s16 = vld1q_s16(input); + q9s16 = vld1q_s16(input + 8); + q10s16 = vld1q_s16(input + 8 * 2); + q11s16 = vld1q_s16(input + 8 * 3); + q12s16 = vld1q_s16(input + 8 * 4); + q13s16 = vld1q_s16(input + 8 * 5); + q14s16 = vld1q_s16(input + 8 * 6); + q15s16 = vld1q_s16(input + 8 * 7); + + TRANSPOSE8X8(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16, + &q15s16); + + switch (tx_type) { + case 0: // idct_idct is not supported. Fall back to C + vp10_iht8x8_64_add_c(input, dest, dest_stride, tx_type); + return; + break; + case 1: // iadst_idct + // generate IDCT constants + // GENERATE_IDCT_CONSTANTS + + // first transform rows + IDCT8x8_1D(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16, + &q15s16); + + // transpose the matrix + TRANSPOSE8X8(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16, + &q15s16); + + // generate IADST constants + // GENERATE_IADST_CONSTANTS + + // then transform columns + IADST8X8_1D(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16, + &q15s16); + break; + case 2: // idct_iadst + // generate IADST constants + // GENERATE_IADST_CONSTANTS + + // first transform rows + IADST8X8_1D(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16, + &q15s16); + + // transpose the matrix + TRANSPOSE8X8(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16, + &q15s16); + + // generate IDCT constants + // GENERATE_IDCT_CONSTANTS + + // then transform columns + IDCT8x8_1D(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16, + &q15s16); + break; + case 3: // iadst_iadst + // generate IADST constants + // GENERATE_IADST_CONSTANTS + + // first transform rows + IADST8X8_1D(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16, + &q15s16); + + // transpose the matrix + TRANSPOSE8X8(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16, + &q15s16); + + // then transform columns + IADST8X8_1D(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16, + &q15s16); + break; + default: // iadst_idct + assert(0); + break; + } + + q8s16 = vrshrq_n_s16(q8s16, 5); + q9s16 = vrshrq_n_s16(q9s16, 5); + q10s16 = vrshrq_n_s16(q10s16, 5); + q11s16 = vrshrq_n_s16(q11s16, 5); + q12s16 = vrshrq_n_s16(q12s16, 5); + q13s16 = vrshrq_n_s16(q13s16, 5); + q14s16 = vrshrq_n_s16(q14s16, 5); + q15s16 = vrshrq_n_s16(q15s16, 5); + + for (d1 = d2 = dest, i = 0; i < 2; i++) { + if (i != 0) { + q8s16 = q12s16; + q9s16 = q13s16; + q10s16 = q14s16; + q11s16 = q15s16; } - q8s16 = vrshrq_n_s16(q8s16, 5); - q9s16 = vrshrq_n_s16(q9s16, 5); - q10s16 = vrshrq_n_s16(q10s16, 5); - q11s16 = vrshrq_n_s16(q11s16, 5); - q12s16 = vrshrq_n_s16(q12s16, 5); - q13s16 = vrshrq_n_s16(q13s16, 5); - q14s16 = vrshrq_n_s16(q14s16, 5); - q15s16 = vrshrq_n_s16(q15s16, 5); - - for (d1 = d2 = dest, i = 0; i < 2; i++) { - if (i != 0) { - q8s16 = q12s16; - q9s16 = q13s16; - q10s16 = q14s16; - q11s16 = q15s16; - } - - d0u64 = vld1_u64((uint64_t *)d1); - d1 += dest_stride; - d1u64 = vld1_u64((uint64_t *)d1); - d1 += dest_stride; - d2u64 = vld1_u64((uint64_t *)d1); - d1 += dest_stride; - d3u64 = vld1_u64((uint64_t *)d1); - d1 += dest_stride; - - q8u16 = vaddw_u8(vreinterpretq_u16_s16(q8s16), - vreinterpret_u8_u64(d0u64)); - q9u16 = vaddw_u8(vreinterpretq_u16_s16(q9s16), - vreinterpret_u8_u64(d1u64)); - q10u16 = vaddw_u8(vreinterpretq_u16_s16(q10s16), - vreinterpret_u8_u64(d2u64)); - q11u16 = vaddw_u8(vreinterpretq_u16_s16(q11s16), - vreinterpret_u8_u64(d3u64)); - - d0u8 = vqmovun_s16(vreinterpretq_s16_u16(q8u16)); - d1u8 = vqmovun_s16(vreinterpretq_s16_u16(q9u16)); - d2u8 = vqmovun_s16(vreinterpretq_s16_u16(q10u16)); - d3u8 = vqmovun_s16(vreinterpretq_s16_u16(q11u16)); - - vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d0u8)); - d2 += dest_stride; - vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d1u8)); - d2 += dest_stride; - vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d2u8)); - d2 += dest_stride; - vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d3u8)); - d2 += dest_stride; - } - return; + d0u64 = vld1_u64((uint64_t *)d1); + d1 += dest_stride; + d1u64 = vld1_u64((uint64_t *)d1); + d1 += dest_stride; + d2u64 = vld1_u64((uint64_t *)d1); + d1 += dest_stride; + d3u64 = vld1_u64((uint64_t *)d1); + d1 += dest_stride; + + q8u16 = vaddw_u8(vreinterpretq_u16_s16(q8s16), vreinterpret_u8_u64(d0u64)); + q9u16 = vaddw_u8(vreinterpretq_u16_s16(q9s16), vreinterpret_u8_u64(d1u64)); + q10u16 = + vaddw_u8(vreinterpretq_u16_s16(q10s16), vreinterpret_u8_u64(d2u64)); + q11u16 = + vaddw_u8(vreinterpretq_u16_s16(q11s16), vreinterpret_u8_u64(d3u64)); + + d0u8 = vqmovun_s16(vreinterpretq_s16_u16(q8u16)); + d1u8 = vqmovun_s16(vreinterpretq_s16_u16(q9u16)); + d2u8 = vqmovun_s16(vreinterpretq_s16_u16(q10u16)); + d3u8 = vqmovun_s16(vreinterpretq_s16_u16(q11u16)); + + vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d0u8)); + d2 += dest_stride; + vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d1u8)); + d2 += dest_stride; + vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d2u8)); + d2 += dest_stride; + vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d3u8)); + d2 += dest_stride; + } + return; } diff --git a/vp10/common/blockd.c b/vp10/common/blockd.c index c09fa8834252c19b111d30d776c2c8645f5ef51e..91cc2ed34ea662cf2588070be85a29b572fc3a54 100644 --- a/vp10/common/blockd.c +++ b/vp10/common/blockd.c @@ -15,10 +15,9 @@ #include "vp10/common/blockd.h" PREDICTION_MODE vp10_left_block_mode(const MODE_INFO *cur_mi, - const MODE_INFO *left_mi, int b) { + const MODE_INFO *left_mi, int b) { if (b == 0 || b == 2) { - if (!left_mi || is_inter_block(&left_mi->mbmi)) - return DC_PRED; + if (!left_mi || is_inter_block(&left_mi->mbmi)) return DC_PRED; return get_y_mode(left_mi, b + 1); } else { @@ -28,10 +27,9 @@ PREDICTION_MODE vp10_left_block_mode(const MODE_INFO *cur_mi, } PREDICTION_MODE vp10_above_block_mode(const MODE_INFO *cur_mi, - const MODE_INFO *above_mi, int b) { + const MODE_INFO *above_mi, int b) { if (b == 0 || b == 1) { - if (!above_mi || is_inter_block(&above_mi->mbmi)) - return DC_PRED; + if (!above_mi || is_inter_block(&above_mi->mbmi)) return DC_PRED; return get_y_mode(above_mi, b + 2); } else { @@ -44,12 +42,11 @@ void vp10_foreach_transformed_block_in_plane( const MACROBLOCKD *const xd, BLOCK_SIZE bsize, int plane, foreach_transformed_block_visitor visit, void *arg) { const struct macroblockd_plane *const pd = &xd->plane[plane]; - const MB_MODE_INFO* mbmi = &xd->mi[0]->mbmi; + const MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; // block and transform sizes, in number of 4x4 blocks log 2 ("*_b") // 4x4=0, 8x8=2, 16x16=4, 32x32=6, 64x64=8 // transform size varies per plane, look it up in a common way. - const TX_SIZE tx_size = plane ? get_uv_tx_size(mbmi, pd) - : mbmi->tx_size; + const TX_SIZE tx_size = plane ? get_uv_tx_size(mbmi, pd) : mbmi->tx_size; const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd); const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize]; const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize]; @@ -61,13 +58,16 @@ void vp10_foreach_transformed_block_in_plane( // If mb_to_right_edge is < 0 we are in a situation in which // the current block size extends into the UMV and we won't // visit the sub blocks that are wholly within the UMV. - const int max_blocks_wide = num_4x4_w + (xd->mb_to_right_edge >= 0 ? 0 : - xd->mb_to_right_edge >> (5 + pd->subsampling_x)); - const int max_blocks_high = num_4x4_h + (xd->mb_to_bottom_edge >= 0 ? 0 : - xd->mb_to_bottom_edge >> (5 + pd->subsampling_y)); - const int extra_step = - ((num_4x4_w - max_blocks_wide) >> - num_4x4_blocks_wide_txsize_log2_lookup[tx_size]) * step; + const int max_blocks_wide = + num_4x4_w + (xd->mb_to_right_edge >= 0 ? 0 : xd->mb_to_right_edge >> + (5 + pd->subsampling_x)); + const int max_blocks_high = + num_4x4_h + (xd->mb_to_bottom_edge >= 0 + ? 0 + : xd->mb_to_bottom_edge >> (5 + pd->subsampling_y)); + const int extra_step = ((num_4x4_w - max_blocks_wide) >> + num_4x4_blocks_wide_txsize_log2_lookup[tx_size]) * + step; // Keep track of the row and column of the blocks we use so that we know // if we are in the unrestricted motion border. @@ -81,10 +81,10 @@ void vp10_foreach_transformed_block_in_plane( } } -void vp10_foreach_transformed_block(const MACROBLOCKD* const xd, - BLOCK_SIZE bsize, - foreach_transformed_block_visitor visit, - void *arg) { +void vp10_foreach_transformed_block(const MACROBLOCKD *const xd, + BLOCK_SIZE bsize, + foreach_transformed_block_visitor visit, + void *arg) { int plane; for (plane = 0; plane < MAX_MB_PLANE; ++plane) vp10_foreach_transformed_block_in_plane(xd, bsize, plane, visit, arg); @@ -107,10 +107,8 @@ void vp10_set_contexts(const MACROBLOCKD *xd, struct macroblockd_plane *pd, if (above_contexts + aoff > blocks_wide) above_contexts = blocks_wide - aoff; - for (i = 0; i < above_contexts; ++i) - a[i] = has_eob; - for (i = above_contexts; i < tx_w_in_blocks; ++i) - a[i] = 0; + for (i = 0; i < above_contexts; ++i) a[i] = has_eob; + for (i = above_contexts; i < tx_w_in_blocks; ++i) a[i] = 0; } else { memset(a, has_eob, sizeof(ENTROPY_CONTEXT) * tx_w_in_blocks); } @@ -121,13 +119,10 @@ void vp10_set_contexts(const MACROBLOCKD *xd, struct macroblockd_plane *pd, const int blocks_high = num_4x4_blocks_high_lookup[plane_bsize] + (xd->mb_to_bottom_edge >> (5 + pd->subsampling_y)); int left_contexts = tx_h_in_blocks; - if (left_contexts + loff > blocks_high) - left_contexts = blocks_high - loff; + if (left_contexts + loff > blocks_high) left_contexts = blocks_high - loff; - for (i = 0; i < left_contexts; ++i) - l[i] = has_eob; - for (i = left_contexts; i < tx_h_in_blocks; ++i) - l[i] = 0; + for (i = 0; i < left_contexts; ++i) l[i] = has_eob; + for (i = left_contexts; i < tx_h_in_blocks; ++i) l[i] = 0; } else { memset(l, has_eob, sizeof(ENTROPY_CONTEXT) * tx_h_in_blocks); } @@ -145,37 +140,26 @@ void vp10_setup_block_planes(MACROBLOCKD *xd, int ss_x, int ss_y) { #if CONFIG_EXT_INTRA const int16_t dr_intra_derivative[90] = { - 1, 14666, 7330, 4884, 3660, - 2926, 2435, 2084, 1821, 1616, - 1451, 1317, 1204, 1108, 1026, - 955, 892, 837, 787, 743, - 703, 666, 633, 603, 574, - 548, 524, 502, 481, 461, - 443, 426, 409, 394, 379, - 365, 352, 339, 327, 316, - 305, 294, 284, 274, 265, - 256, 247, 238, 230, 222, - 214, 207, 200, 192, 185, - 179, 172, 166, 159, 153, - 147, 141, 136, 130, 124, - 119, 113, 108, 103, 98, - 93, 88, 83, 78, 73, - 68, 63, 59, 54, 49, - 45, 40, 35, 31, 26, - 22, 17, 13, 8, 4, + 1, 14666, 7330, 4884, 3660, 2926, 2435, 2084, 1821, 1616, 1451, 1317, 1204, + 1108, 1026, 955, 892, 837, 787, 743, 703, 666, 633, 603, 574, 548, + 524, 502, 481, 461, 443, 426, 409, 394, 379, 365, 352, 339, 327, + 316, 305, 294, 284, 274, 265, 256, 247, 238, 230, 222, 214, 207, + 200, 192, 185, 179, 172, 166, 159, 153, 147, 141, 136, 130, 124, + 119, 113, 108, 103, 98, 93, 88, 83, 78, 73, 68, 63, 59, + 54, 49, 45, 40, 35, 31, 26, 22, 17, 13, 8, 4, }; // Returns whether filter selection is needed for a given // intra prediction angle. int vp10_is_intra_filter_switchable(int angle) { assert(angle > 0 && angle < 270); - if (angle % 45 == 0) - return 0; + if (angle % 45 == 0) return 0; if (angle > 90 && angle < 180) { return 1; } else { - return ((angle < 90 ? dr_intra_derivative[angle] : - dr_intra_derivative[270 - angle]) & 0xFF) > 0; + return ((angle < 90 ? dr_intra_derivative[angle] + : dr_intra_derivative[270 - angle]) & + 0xFF) > 0; } } #endif // CONFIG_EXT_INTRA diff --git a/vp10/common/blockd.h b/vp10/common/blockd.h index 01e51648a6016cca8181727d9cdd990e934d9230..ec1ad88e0b811dbcca22603432cb7880fcc3ff08 100644 --- a/vp10/common/blockd.h +++ b/vp10/common/blockd.h @@ -8,7 +8,6 @@ * be found in the AUTHORS file in the root of the source tree. */ - #ifndef VP10_COMMON_BLOCKD_H_ #define VP10_COMMON_BLOCKD_H_ @@ -40,9 +39,9 @@ typedef enum { } FRAME_TYPE; #if CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS -#define IsInterpolatingFilter(filter) (vp10_is_interpolating_filter(filter)) +#define IsInterpolatingFilter(filter) (vp10_is_interpolating_filter(filter)) #else -#define IsInterpolatingFilter(filter) (1) +#define IsInterpolatingFilter(filter) (1) #endif // CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS static INLINE int is_inter_mode(PREDICTION_MODE mode) { @@ -127,8 +126,7 @@ static INLINE PREDICTION_MODE compound_ref1_mode(PREDICTION_MODE mode) { } static INLINE int have_newmv_in_inter_mode(PREDICTION_MODE mode) { - return (mode == NEWMV || mode == NEWFROMNEARMV || - mode == NEW_NEWMV || + return (mode == NEWMV || mode == NEWFROMNEARMV || mode == NEW_NEWMV || mode == NEAREST_NEWMV || mode == NEW_NEARESTMV || mode == NEAR_NEWMV || mode == NEW_NEARMV); } @@ -159,7 +157,7 @@ typedef int8_t MV_REFERENCE_FRAME; typedef struct { // Number of base colors for Y (0) and UV (1) uint8_t palette_size[2]; - // Value of base colors for Y, U, and V +// Value of base colors for Y, U, and V #if CONFIG_VP9_HIGHBITDEPTH uint16_t palette_colors[3 * PALETTE_MAX_SIZE]; #else @@ -195,14 +193,14 @@ typedef struct { #if CONFIG_SUPERTX // Minimum of all segment IDs under the current supertx block. int8_t segment_id_supertx; -#endif // CONFIG_SUPERTX +#endif // CONFIG_SUPERTX int8_t seg_id_predicted; // valid only when temporal_update is enabled // Only for INTRA blocks PREDICTION_MODE uv_mode; PALETTE_MODE_INFO palette_mode_info; - // Only for INTER blocks +// Only for INTER blocks #if CONFIG_DUAL_FILTER INTERP_FILTER interp_filter[4]; #else @@ -249,8 +247,7 @@ typedef struct MODE_INFO { } MODE_INFO; static INLINE PREDICTION_MODE get_y_mode(const MODE_INFO *mi, int block) { - return mi->mbmi.sb_type < BLOCK_8X8 ? mi->bmi[block].as_mode - : mi->mbmi.mode; + return mi->mbmi.sb_type < BLOCK_8X8 ? mi->bmi[block].as_mode : mi->mbmi.mode; } static INLINE int is_inter_block(const MB_MODE_INFO *mbmi) { @@ -262,15 +259,12 @@ static INLINE int has_second_ref(const MB_MODE_INFO *mbmi) { } PREDICTION_MODE vp10_left_block_mode(const MODE_INFO *cur_mi, - const MODE_INFO *left_mi, int b); + const MODE_INFO *left_mi, int b); PREDICTION_MODE vp10_above_block_mode(const MODE_INFO *cur_mi, - const MODE_INFO *above_mi, int b); + const MODE_INFO *above_mi, int b); -enum mv_precision { - MV_PRECISION_Q3, - MV_PRECISION_Q4 -}; +enum mv_precision { MV_PRECISION_Q3, MV_PRECISION_Q4 }; struct buf_2d { uint8_t *buf; @@ -292,7 +286,7 @@ typedef struct macroblockd_plane { int16_t seg_dequant[MAX_SEGMENTS][2]; #if CONFIG_NEW_QUANT dequant_val_type_nuq - seg_dequant_nuq[MAX_SEGMENTS][QUANT_PROFILES][COEF_BANDS]; + seg_dequant_nuq[MAX_SEGMENTS][QUANT_PROFILES][COEF_BANDS]; #endif uint8_t *color_index_map; @@ -304,11 +298,11 @@ typedef struct macroblockd_plane { // encoder const int16_t *dequant; #if CONFIG_NEW_QUANT - const dequant_val_type_nuq* dequant_val_nuq[QUANT_PROFILES]; + const dequant_val_type_nuq *dequant_val_nuq[QUANT_PROFILES]; #endif // CONFIG_NEW_QUANT } MACROBLOCKD_PLANE; -#define BLOCK_OFFSET(x, i) ((x) + (i) * 16) +#define BLOCK_OFFSET(x, i) ((x) + (i)*16) typedef struct RefBuffer { // TODO(dkovalev): idx is not really required and should be removed, now it @@ -416,9 +410,8 @@ static const TX_TYPE intra_mode_to_tx_type_context[INTRA_MODES] = { #if CONFIG_SUPERTX static INLINE int supertx_enabled(const MB_MODE_INFO *mbmi) { - return (int)mbmi->tx_size > - VPXMIN(b_width_log2_lookup[mbmi->sb_type], - b_height_log2_lookup[mbmi->sb_type]); + return (int)mbmi->tx_size > VPXMIN(b_width_log2_lookup[mbmi->sb_type], + b_height_log2_lookup[mbmi->sb_type]); } #endif // CONFIG_SUPERTX @@ -435,52 +428,44 @@ static INLINE int get_tx2d_size(TX_SIZE tx_size) { } #if CONFIG_EXT_TX -#define ALLOW_INTRA_EXT_TX 1 +#define ALLOW_INTRA_EXT_TX 1 // whether masked transforms are used for 32X32 -#define USE_MSKTX_FOR_32X32 0 +#define USE_MSKTX_FOR_32X32 0 #define USE_REDUCED_TXSET_FOR_16X16 1 -static const int num_ext_tx_set_inter[EXT_TX_SETS_INTER] = { - 1, 16, 12, 2 -}; -static const int num_ext_tx_set_intra[EXT_TX_SETS_INTRA] = { - 1, 7, 5 -}; +static const int num_ext_tx_set_inter[EXT_TX_SETS_INTER] = { 1, 16, 12, 2 }; +static const int num_ext_tx_set_intra[EXT_TX_SETS_INTRA] = { 1, 7, 5 }; #if EXT_TX_SIZES == 4 -static INLINE int get_ext_tx_set(TX_SIZE tx_size, BLOCK_SIZE bs, - int is_inter) { +static INLINE int get_ext_tx_set(TX_SIZE tx_size, BLOCK_SIZE bs, int is_inter) { tx_size = txsize_sqr_map[tx_size]; if (tx_size > TX_32X32 || bs < BLOCK_8X8) return 0; #if USE_REDUCED_TXSET_FOR_16X16 - if (tx_size == TX_32X32) - return is_inter ? 3 - USE_MSKTX_FOR_32X32 : 0; + if (tx_size == TX_32X32) return is_inter ? 3 - USE_MSKTX_FOR_32X32 : 0; return (tx_size == TX_16X16 ? 2 : 1); #else - if (tx_size == TX_32X32) - return is_inter ? 3 - 2 * USE_MSKTX_FOR_32X32 : 0; + if (tx_size == TX_32X32) return is_inter ? 3 - 2 * USE_MSKTX_FOR_32X32 : 0; return (tx_size == TX_16X16 && !is_inter ? 2 : 1); #endif // USE_REDUCED_TXSET_FOR_16X16 } static const int use_intra_ext_tx_for_txsize[EXT_TX_SETS_INTRA][TX_SIZES] = { - { 0, 0, 0, 0, }, // unused - { 1, 1, 0, 0, }, - { 0, 0, 1, 0, }, + { 0, 0, 0, 0 }, // unused + { 1, 1, 0, 0 }, + { 0, 0, 1, 0 }, }; static const int use_inter_ext_tx_for_txsize[EXT_TX_SETS_INTER][TX_SIZES] = { - { 0, 0, 0, 0, }, // unused - { 1, 1, (!USE_REDUCED_TXSET_FOR_16X16), USE_MSKTX_FOR_32X32, }, - { 0, 0, USE_REDUCED_TXSET_FOR_16X16, 0, }, - { 0, 0, 0, (!USE_MSKTX_FOR_32X32), }, + { 0, 0, 0, 0 }, // unused + { 1, 1, (!USE_REDUCED_TXSET_FOR_16X16), USE_MSKTX_FOR_32X32 }, + { 0, 0, USE_REDUCED_TXSET_FOR_16X16, 0 }, + { 0, 0, 0, (!USE_MSKTX_FOR_32X32) }, }; #else // EXT_TX_SIZES == 4 -static INLINE int get_ext_tx_set(TX_SIZE tx_size, BLOCK_SIZE bs, - int is_inter) { - (void) is_inter; +static INLINE int get_ext_tx_set(TX_SIZE tx_size, BLOCK_SIZE bs, int is_inter) { + (void)is_inter; tx_size = txsize_sqr_map[tx_size]; if (tx_size > TX_32X32 || bs < BLOCK_8X8) return 0; if (tx_size == TX_32X32) return 0; @@ -492,41 +477,38 @@ static INLINE int get_ext_tx_set(TX_SIZE tx_size, BLOCK_SIZE bs, } static const int use_intra_ext_tx_for_txsize[EXT_TX_SETS_INTRA][TX_SIZES] = { - { 0, 0, 0, 0, }, // unused - { 1, 1, 0, 0, }, - { 0, 0, 1, 0, }, + { 0, 0, 0, 0 }, // unused + { 1, 1, 0, 0 }, + { 0, 0, 1, 0 }, }; static const int use_inter_ext_tx_for_txsize[EXT_TX_SETS_INTER][TX_SIZES] = { - { 0, 0, 0, 0, }, // unused - { 1, 1, (!USE_REDUCED_TXSET_FOR_16X16), 0, }, - { 0, 0, USE_REDUCED_TXSET_FOR_16X16, 0, }, - { 0, 0, 0, 1, }, + { 0, 0, 0, 0 }, // unused + { 1, 1, (!USE_REDUCED_TXSET_FOR_16X16), 0 }, + { 0, 0, USE_REDUCED_TXSET_FOR_16X16, 0 }, + { 0, 0, 0, 1 }, }; #endif // EXT_TX_SIZES == 4 // Transform types used in each intra set static const int ext_tx_used_intra[EXT_TX_SETS_INTRA][TX_TYPES] = { - {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0}, - {1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0}, + { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, + { 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0 }, + { 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0 }, }; // Transform types used in each inter set static const int ext_tx_used_inter[EXT_TX_SETS_INTER][TX_TYPES] = { - {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, - {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0}, - {1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0}, + { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, + { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, + { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0 }, + { 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0 }, }; // 1D Transforms used in inter set, this needs to be changed if // ext_tx_used_inter is changed static const int ext_tx_used_inter_1D[EXT_TX_SETS_INTER][TX_TYPES_1D] = { - {1, 0, 0, 0}, - {1, 1, 1, 1}, - {1, 1, 1, 1}, - {1, 0, 0, 1}, + { 1, 0, 0, 0 }, { 1, 1, 1, 1 }, { 1, 1, 1, 1 }, { 1, 0, 0, 1 }, }; static INLINE int get_ext_tx_types(TX_SIZE tx_size, BLOCK_SIZE bs, @@ -544,7 +526,7 @@ static INLINE int get_ext_tx_types(TX_SIZE tx_size, BLOCK_SIZE bs, extern const int16_t dr_intra_derivative[90]; static const uint8_t mode_to_angle_map[INTRA_MODES] = { - 0, 90, 180, 45, 135, 111, 157, 203, 67, 0, + 0, 90, 180, 45, 135, 111, 157, 203, 67, 0, }; static const TX_TYPE filter_intra_mode_to_tx_type_lookup[FILTER_INTRA_MODES] = { @@ -570,20 +552,20 @@ int vp10_is_intra_filter_switchable(int angle); #endif static INLINE TX_TYPE get_default_tx_type(PLANE_TYPE plane_type, - const MACROBLOCKD *xd, - int block_idx, TX_SIZE tx_size) { + const MACROBLOCKD *xd, int block_idx, + TX_SIZE tx_size) { const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; if (is_inter_block(mbmi) || plane_type != PLANE_TYPE_Y || xd->lossless[mbmi->segment_id] || tx_size >= TX_32X32) return DCT_DCT; - return intra_mode_to_tx_type_context[plane_type == PLANE_TYPE_Y ? - get_y_mode(xd->mi[0], block_idx) : mbmi->uv_mode]; + return intra_mode_to_tx_type_context[plane_type == PLANE_TYPE_Y + ? get_y_mode(xd->mi[0], block_idx) + : mbmi->uv_mode]; } -static INLINE TX_TYPE get_tx_type(PLANE_TYPE plane_type, - const MACROBLOCKD *xd, +static INLINE TX_TYPE get_tx_type(PLANE_TYPE plane_type, const MACROBLOCKD *xd, int block_idx, TX_SIZE tx_size) { const MODE_INFO *const mi = xd->mi[0]; const MB_MODE_INFO *const mbmi = &mi->mbmi; @@ -597,11 +579,11 @@ static INLINE TX_TYPE get_tx_type(PLANE_TYPE plane_type, mbmi->ext_intra_mode_info.use_ext_intra_mode[plane_type]; const EXT_INTRA_MODE ext_intra_mode = mbmi->ext_intra_mode_info.ext_intra_mode[plane_type]; - const PREDICTION_MODE mode = (plane_type == PLANE_TYPE_Y) ? - get_y_mode(mi, block_idx) : mbmi->uv_mode; + const PREDICTION_MODE mode = (plane_type == PLANE_TYPE_Y) + ? get_y_mode(mi, block_idx) + : mbmi->uv_mode; - if (xd->lossless[mbmi->segment_id] || tx_size >= TX_32X32) - return DCT_DCT; + if (xd->lossless[mbmi->segment_id] || tx_size >= TX_32X32) return DCT_DCT; #if CONFIG_EXT_TX #if ALLOW_INTRA_EXT_TX @@ -636,8 +618,7 @@ static INLINE TX_TYPE get_tx_type(PLANE_TYPE plane_type, #if CONFIG_EXT_TX #if EXT_TX_SIZES == 4 - if (xd->lossless[mbmi->segment_id] || - txsize_sqr_map[tx_size] > TX_32X32 || + if (xd->lossless[mbmi->segment_id] || txsize_sqr_map[tx_size] > TX_32X32 || (txsize_sqr_map[tx_size] >= TX_32X32 && !is_inter_block(mbmi))) #else if (xd->lossless[mbmi->segment_id] || txsize_sqr_map[tx_size] >= TX_32X32) @@ -652,18 +633,20 @@ static INLINE TX_TYPE get_tx_type(PLANE_TYPE plane_type, } if (is_inter_block(mbmi)) // UV Inter only - return (mbmi->tx_type == IDTX && txsize_sqr_map[tx_size] == TX_32X32) ? - DCT_DCT : mbmi->tx_type; + return (mbmi->tx_type == IDTX && txsize_sqr_map[tx_size] == TX_32X32) + ? DCT_DCT + : mbmi->tx_type; } // Sub8x8-Inter/Intra OR UV-Intra if (is_inter_block(mbmi)) // Sub8x8-Inter return DCT_DCT; else // Sub8x8 Intra OR UV-Intra - return intra_mode_to_tx_type_context[plane_type == PLANE_TYPE_Y ? - get_y_mode(mi, block_idx) : mbmi->uv_mode]; + return intra_mode_to_tx_type_context[plane_type == PLANE_TYPE_Y + ? get_y_mode(mi, block_idx) + : mbmi->uv_mode]; #else // CONFIG_EXT_TX - (void) block_idx; + (void)block_idx; if (plane_type != PLANE_TYPE_Y || xd->lossless[mbmi->segment_id] || txsize_sqr_map[tx_size] >= TX_32X32) return DCT_DCT; @@ -688,14 +671,14 @@ static INLINE TX_SIZE get_uv_tx_size(const MB_MODE_INFO *mbmi, #if CONFIG_SUPERTX if (supertx_enabled(mbmi)) return uvsupertx_size_lookup[mbmi->tx_size][pd->subsampling_x] - [pd->subsampling_y]; + [pd->subsampling_y]; #endif // CONFIG_SUPERTX return get_uv_tx_size_impl(mbmi->tx_size, mbmi->sb_type, pd->subsampling_x, pd->subsampling_y); } -static INLINE BLOCK_SIZE get_plane_block_size(BLOCK_SIZE bsize, - const struct macroblockd_plane *pd) { +static INLINE BLOCK_SIZE +get_plane_block_size(BLOCK_SIZE bsize, const struct macroblockd_plane *pd) { return ss_size_lookup[bsize][pd->subsampling_x][pd->subsampling_y]; } @@ -714,20 +697,20 @@ static INLINE void reset_skip_context(MACROBLOCKD *xd, BLOCK_SIZE bsize) { typedef void (*foreach_transformed_block_visitor)(int plane, int block, int blk_row, int blk_col, BLOCK_SIZE plane_bsize, - TX_SIZE tx_size, - void *arg); + TX_SIZE tx_size, void *arg); void vp10_foreach_transformed_block_in_plane( const MACROBLOCKD *const xd, BLOCK_SIZE bsize, int plane, foreach_transformed_block_visitor visit, void *arg); -void vp10_foreach_transformed_block( - const MACROBLOCKD* const xd, BLOCK_SIZE bsize, - foreach_transformed_block_visitor visit, void *arg); +void vp10_foreach_transformed_block(const MACROBLOCKD *const xd, + BLOCK_SIZE bsize, + foreach_transformed_block_visitor visit, + void *arg); void vp10_set_contexts(const MACROBLOCKD *xd, struct macroblockd_plane *pd, - BLOCK_SIZE plane_bsize, TX_SIZE tx_size, int has_eob, - int aoff, int loff); + BLOCK_SIZE plane_bsize, TX_SIZE tx_size, int has_eob, + int aoff, int loff); #if CONFIG_EXT_INTER static INLINE int is_interintra_allowed_bsize(const BLOCK_SIZE bsize) { @@ -744,16 +727,15 @@ static INLINE int is_interintra_allowed_ref(const MV_REFERENCE_FRAME rf[2]) { } static INLINE int is_interintra_allowed(const MB_MODE_INFO *mbmi) { - return is_interintra_allowed_bsize(mbmi->sb_type) - && is_interintra_allowed_mode(mbmi->mode) - && is_interintra_allowed_ref(mbmi->ref_frame); + return is_interintra_allowed_bsize(mbmi->sb_type) && + is_interintra_allowed_mode(mbmi->mode) && + is_interintra_allowed_ref(mbmi->ref_frame); } static INLINE int is_interintra_allowed_bsize_group(const int group) { int i; for (i = 0; i < BLOCK_SIZES; i++) { - if (size_group_lookup[i] == group && - is_interintra_allowed_bsize(i)) + if (size_group_lookup[i] == group && is_interintra_allowed_bsize(i)) return 1; } return 0; diff --git a/vp10/common/common.h b/vp10/common/common.h index 88f0d59ca42f43a48eebb99b0da0b9fd879f07f8..41dc1af0d2587f5b58a6a512a6351501c4099ae7 100644 --- a/vp10/common/common.h +++ b/vp10/common/common.h @@ -28,15 +28,17 @@ extern "C" { #define PI 3.141592653589793238462643383279502884 // Only need this for fixed-size arrays, for structs just assign. -#define vp10_copy(dest, src) { \ +#define vp10_copy(dest, src) \ + { \ assert(sizeof(dest) == sizeof(src)); \ - memcpy(dest, src, sizeof(src)); \ + memcpy(dest, src, sizeof(src)); \ } // Use this for variably-sized arrays. -#define vp10_copy_array(dest, src, n) { \ - assert(sizeof(*(dest)) == sizeof(*(src))); \ - memcpy(dest, src, n * sizeof(*(src))); \ +#define vp10_copy_array(dest, src, n) \ + { \ + assert(sizeof(*(dest)) == sizeof(*(src))); \ + memcpy(dest, src, n * sizeof(*(src))); \ } #define vp10_zero(dest) memset(&(dest), 0, sizeof(dest)) @@ -47,19 +49,21 @@ static INLINE int get_unsigned_bits(unsigned int num_values) { } #if CONFIG_DEBUG -#define CHECK_MEM_ERROR(cm, lval, expr) do { \ - lval = (expr); \ - if (!lval) \ - vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, \ - "Failed to allocate "#lval" at %s:%d", \ - __FILE__, __LINE__); \ +#define CHECK_MEM_ERROR(cm, lval, expr) \ + do { \ + lval = (expr); \ + if (!lval) \ + vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, \ + "Failed to allocate " #lval " at %s:%d", __FILE__, \ + __LINE__); \ } while (0) #else -#define CHECK_MEM_ERROR(cm, lval, expr) do { \ - lval = (expr); \ - if (!lval) \ - vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, \ - "Failed to allocate "#lval); \ +#define CHECK_MEM_ERROR(cm, lval, expr) \ + do { \ + lval = (expr); \ + if (!lval) \ + vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, \ + "Failed to allocate " #lval); \ } while (0) #endif // TODO(yaowu: validate the usage of these codes or develop new ones.) diff --git a/vp10/common/common_data.h b/vp10/common/common_data.h index 2506986701c14ffe758e3c3c7ad60bcbf175f7a8..cb11822d1c7ef7114587be92a86a95883cf0aeb0 100644 --- a/vp10/common/common_data.h +++ b/vp10/common/common_data.h @@ -20,70 +20,78 @@ extern "C" { #endif #if CONFIG_EXT_PARTITION -# define IF_EXT_PARTITION(...) __VA_ARGS__ +#define IF_EXT_PARTITION(...) __VA_ARGS__ #else -# define IF_EXT_PARTITION(...) +#define IF_EXT_PARTITION(...) #endif // Log 2 conversion lookup tables for block width and height -static const uint8_t b_width_log2_lookup[BLOCK_SIZES] = - {0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, IF_EXT_PARTITION(4, 5, 5)}; -static const uint8_t b_height_log2_lookup[BLOCK_SIZES] = - {0, 1, 0, 1, 2, 1, 2, 3, 2, 3, 4, 3, 4, IF_EXT_PARTITION(5, 4, 5)}; +static const uint8_t b_width_log2_lookup[BLOCK_SIZES] = { + 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, IF_EXT_PARTITION(4, 5, 5) +}; +static const uint8_t b_height_log2_lookup[BLOCK_SIZES] = { + 0, 1, 0, 1, 2, 1, 2, 3, 2, 3, 4, 3, 4, IF_EXT_PARTITION(5, 4, 5) +}; // Log 2 conversion lookup tables for modeinfo width and height -static const uint8_t mi_width_log2_lookup[BLOCK_SIZES] = - {0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, IF_EXT_PARTITION(3, 4, 4)}; -static const uint8_t mi_height_log2_lookup[BLOCK_SIZES] = - {0, 0, 0, 0, 1, 0, 1, 2, 1, 2, 3, 2, 3, IF_EXT_PARTITION(4, 3, 4)}; +static const uint8_t mi_width_log2_lookup[BLOCK_SIZES] = { + 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, IF_EXT_PARTITION(3, 4, 4) +}; +static const uint8_t mi_height_log2_lookup[BLOCK_SIZES] = { + 0, 0, 0, 0, 1, 0, 1, 2, 1, 2, 3, 2, 3, IF_EXT_PARTITION(4, 3, 4) +}; // Width/height lookup tables in units of varios block sizes -static const uint8_t num_4x4_blocks_wide_lookup[BLOCK_SIZES] = - {1, 1, 2, 2, 2, 4, 4, 4, 8, 8, 8, 16, 16, IF_EXT_PARTITION(16, 32, 32)}; -static const uint8_t num_4x4_blocks_high_lookup[BLOCK_SIZES] = - {1, 2, 1, 2, 4, 2, 4, 8, 4, 8, 16, 8, 16, IF_EXT_PARTITION(32, 16, 32)}; -static const uint8_t num_8x8_blocks_wide_lookup[BLOCK_SIZES] = - {1, 1, 1, 1, 1, 2, 2, 2, 4, 4, 4, 8, 8, IF_EXT_PARTITION(8, 16, 16)}; -static const uint8_t num_8x8_blocks_high_lookup[BLOCK_SIZES] = - {1, 1, 1, 1, 2, 1, 2, 4, 2, 4, 8, 4, 8, IF_EXT_PARTITION(16, 8, 16)}; -static const uint8_t num_16x16_blocks_wide_lookup[BLOCK_SIZES] = - {1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 4, 4, IF_EXT_PARTITION(4, 8, 8)}; -static const uint8_t num_16x16_blocks_high_lookup[BLOCK_SIZES] = - {1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 4, 2, 4, IF_EXT_PARTITION(8, 4, 8)}; +static const uint8_t num_4x4_blocks_wide_lookup[BLOCK_SIZES] = { + 1, 1, 2, 2, 2, 4, 4, 4, 8, 8, 8, 16, 16, IF_EXT_PARTITION(16, 32, 32) +}; +static const uint8_t num_4x4_blocks_high_lookup[BLOCK_SIZES] = { + 1, 2, 1, 2, 4, 2, 4, 8, 4, 8, 16, 8, 16, IF_EXT_PARTITION(32, 16, 32) +}; +static const uint8_t num_8x8_blocks_wide_lookup[BLOCK_SIZES] = { + 1, 1, 1, 1, 1, 2, 2, 2, 4, 4, 4, 8, 8, IF_EXT_PARTITION(8, 16, 16) +}; +static const uint8_t num_8x8_blocks_high_lookup[BLOCK_SIZES] = { + 1, 1, 1, 1, 2, 1, 2, 4, 2, 4, 8, 4, 8, IF_EXT_PARTITION(16, 8, 16) +}; +static const uint8_t num_16x16_blocks_wide_lookup[BLOCK_SIZES] = { + 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 4, 4, IF_EXT_PARTITION(4, 8, 8) +}; +static const uint8_t num_16x16_blocks_high_lookup[BLOCK_SIZES] = { + 1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 4, 2, 4, IF_EXT_PARTITION(8, 4, 8) +}; -static const uint8_t num_4x4_blocks_txsize_lookup[TX_SIZES_ALL] = { - 1, 4, 16, 64, +static const uint8_t num_4x4_blocks_txsize_lookup[TX_SIZES_ALL] = { 1, 4, + 16, 64, #if CONFIG_EXT_TX - 2, 2 + 2, 2 #endif // CONFIG_EXT_TX }; -static const uint8_t num_4x4_blocks_wide_txsize_lookup[TX_SIZES_ALL] = { - 1, 2, 4, 8, +static const uint8_t num_4x4_blocks_wide_txsize_lookup[TX_SIZES_ALL] = { 1, 2, + 4, 8, #if CONFIG_EXT_TX - 1, 2 + 1, 2 #endif // CONFIG_EXT_TX }; -static const uint8_t num_4x4_blocks_high_txsize_lookup[TX_SIZES_ALL] = { - 1, 2, 4, 8, +static const uint8_t num_4x4_blocks_high_txsize_lookup[TX_SIZES_ALL] = { 1, 2, + 4, 8, #if CONFIG_EXT_TX - 2, 1 + 2, 1 #endif // CONFIG_EXT_TX }; -static const uint8_t num_4x4_blocks_txsize_log2_lookup[TX_SIZES_ALL] = { - 0, 2, 4, 6, +static const uint8_t num_4x4_blocks_txsize_log2_lookup[TX_SIZES_ALL] = { 0, 2, + 4, 6, #if CONFIG_EXT_TX - 1, 1 + 1, 1 #endif // CONFIG_EXT_TX }; -static const uint8_t num_4x4_blocks_wide_txsize_log2_lookup - [TX_SIZES_ALL] = { +static const uint8_t num_4x4_blocks_wide_txsize_log2_lookup[TX_SIZES_ALL] = { 0, 1, 2, 3, #if CONFIG_EXT_TX 0, 1 #endif // CONFIG_EXT_TX }; -static const uint8_t num_4x4_blocks_high_txsize_log2_lookup - [TX_SIZES_ALL] = { +static const uint8_t num_4x4_blocks_high_txsize_log2_lookup[TX_SIZES_ALL] = { 0, 1, 2, 3, #if CONFIG_EXT_TX 1, 0 @@ -91,12 +99,15 @@ static const uint8_t num_4x4_blocks_high_txsize_log2_lookup }; // VPXMIN(3, VPXMIN(b_width_log2(bsize), b_height_log2(bsize))) -static const uint8_t size_group_lookup[BLOCK_SIZES] = - {0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, IF_EXT_PARTITION(3, 3, 3)}; +static const uint8_t size_group_lookup[BLOCK_SIZES] = { + 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, IF_EXT_PARTITION(3, 3, 3) +}; -static const uint8_t num_pels_log2_lookup[BLOCK_SIZES] = - {4, 5, 5, 6, 7, 7, 8, 9, 9, 10, 11, 11, 12, IF_EXT_PARTITION(13, 13, 14)}; +static const uint8_t num_pels_log2_lookup[BLOCK_SIZES] = { + 4, 5, 5, 6, 7, 7, 8, 9, 9, 10, 11, 11, 12, IF_EXT_PARTITION(13, 13, 14) +}; +/* clang-format off */ static const PARTITION_TYPE partition_lookup[MAX_SB_SIZE_LOG2 - 1][BLOCK_SIZES] = { { // 4X4 -> @@ -355,6 +366,7 @@ static const TX_SIZE max_txsize_rect_lookup[BLOCK_SIZES] = { #endif // CONFIG_EXT_PARTITION }; #endif // CONFIG_EXT_TX +/* clang-format on */ static const BLOCK_SIZE txsize_to_bsize[TX_SIZES_ALL] = { BLOCK_4X4, // TX_4X4 @@ -362,9 +374,9 @@ static const BLOCK_SIZE txsize_to_bsize[TX_SIZES_ALL] = { BLOCK_16X16, // TX_16X16 BLOCK_32X32, // TX_32X32 #if CONFIG_EXT_TX - BLOCK_4X8, // TX_4X8 - BLOCK_8X4, // TX_8X4 -#endif // CONFIG_EXT_TX + BLOCK_4X8, // TX_4X8 + BLOCK_8X4, // TX_8X4 +#endif // CONFIG_EXT_TX }; static const TX_SIZE txsize_sqr_map[TX_SIZES_ALL] = { @@ -373,9 +385,9 @@ static const TX_SIZE txsize_sqr_map[TX_SIZES_ALL] = { TX_16X16, // TX_16X16 TX_32X32, // TX_32X32 #if CONFIG_EXT_TX - TX_4X4, // TX_4X8 - TX_4X4, // TX_8X4 -#endif // CONFIG_EXT_TX + TX_4X4, // TX_4X8 + TX_4X4, // TX_8X4 +#endif // CONFIG_EXT_TX }; static const TX_SIZE txsize_sqr_up_map[TX_SIZES_ALL] = { @@ -384,40 +396,39 @@ static const TX_SIZE txsize_sqr_up_map[TX_SIZES_ALL] = { TX_16X16, // TX_16X16 TX_32X32, // TX_32X32 #if CONFIG_EXT_TX - TX_8X8, // TX_4X8 - TX_8X8, // TX_8X4 -#endif // CONFIG_EXT_TX + TX_8X8, // TX_4X8 + TX_8X8, // TX_8X4 +#endif // CONFIG_EXT_TX }; - static const TX_SIZE tx_mode_to_biggest_tx_size[TX_MODES] = { - TX_4X4, // ONLY_4X4 - TX_8X8, // ALLOW_8X8 + TX_4X4, // ONLY_4X4 + TX_8X8, // ALLOW_8X8 TX_16X16, // ALLOW_16X16 TX_32X32, // ALLOW_32X32 TX_32X32, // TX_MODE_SELECT }; static const BLOCK_SIZE ss_size_lookup[BLOCK_SIZES][2][2] = { -// ss_x == 0 ss_x == 0 ss_x == 1 ss_x == 1 -// ss_y == 0 ss_y == 1 ss_y == 0 ss_y == 1 - {{BLOCK_4X4, BLOCK_INVALID}, {BLOCK_INVALID, BLOCK_INVALID}}, - {{BLOCK_4X8, BLOCK_4X4}, {BLOCK_INVALID, BLOCK_INVALID}}, - {{BLOCK_8X4, BLOCK_INVALID}, {BLOCK_4X4, BLOCK_INVALID}}, - {{BLOCK_8X8, BLOCK_8X4}, {BLOCK_4X8, BLOCK_4X4}}, - {{BLOCK_8X16, BLOCK_8X8}, {BLOCK_INVALID, BLOCK_4X8}}, - {{BLOCK_16X8, BLOCK_INVALID}, {BLOCK_8X8, BLOCK_8X4}}, - {{BLOCK_16X16, BLOCK_16X8}, {BLOCK_8X16, BLOCK_8X8}}, - {{BLOCK_16X32, BLOCK_16X16}, {BLOCK_INVALID, BLOCK_8X16}}, - {{BLOCK_32X16, BLOCK_INVALID}, {BLOCK_16X16, BLOCK_16X8}}, - {{BLOCK_32X32, BLOCK_32X16}, {BLOCK_16X32, BLOCK_16X16}}, - {{BLOCK_32X64, BLOCK_32X32}, {BLOCK_INVALID, BLOCK_16X32}}, - {{BLOCK_64X32, BLOCK_INVALID}, {BLOCK_32X32, BLOCK_32X16}}, - {{BLOCK_64X64, BLOCK_64X32}, {BLOCK_32X64, BLOCK_32X32}}, + // ss_x == 0 ss_x == 0 ss_x == 1 ss_x == 1 + // ss_y == 0 ss_y == 1 ss_y == 0 ss_y == 1 + { { BLOCK_4X4, BLOCK_INVALID }, { BLOCK_INVALID, BLOCK_INVALID } }, + { { BLOCK_4X8, BLOCK_4X4 }, { BLOCK_INVALID, BLOCK_INVALID } }, + { { BLOCK_8X4, BLOCK_INVALID }, { BLOCK_4X4, BLOCK_INVALID } }, + { { BLOCK_8X8, BLOCK_8X4 }, { BLOCK_4X8, BLOCK_4X4 } }, + { { BLOCK_8X16, BLOCK_8X8 }, { BLOCK_INVALID, BLOCK_4X8 } }, + { { BLOCK_16X8, BLOCK_INVALID }, { BLOCK_8X8, BLOCK_8X4 } }, + { { BLOCK_16X16, BLOCK_16X8 }, { BLOCK_8X16, BLOCK_8X8 } }, + { { BLOCK_16X32, BLOCK_16X16 }, { BLOCK_INVALID, BLOCK_8X16 } }, + { { BLOCK_32X16, BLOCK_INVALID }, { BLOCK_16X16, BLOCK_16X8 } }, + { { BLOCK_32X32, BLOCK_32X16 }, { BLOCK_16X32, BLOCK_16X16 } }, + { { BLOCK_32X64, BLOCK_32X32 }, { BLOCK_INVALID, BLOCK_16X32 } }, + { { BLOCK_64X32, BLOCK_INVALID }, { BLOCK_32X32, BLOCK_32X16 } }, + { { BLOCK_64X64, BLOCK_64X32 }, { BLOCK_32X64, BLOCK_32X32 } }, #if CONFIG_EXT_PARTITION - {{BLOCK_64X128, BLOCK_64X64}, {BLOCK_INVALID, BLOCK_32X64}}, - {{BLOCK_128X64, BLOCK_INVALID}, {BLOCK_64X64, BLOCK_64X32}}, - {{BLOCK_128X128, BLOCK_128X64}, {BLOCK_64X128, BLOCK_64X64}}, + { { BLOCK_64X128, BLOCK_64X64 }, { BLOCK_INVALID, BLOCK_32X64 } }, + { { BLOCK_128X64, BLOCK_INVALID }, { BLOCK_64X64, BLOCK_64X32 } }, + { { BLOCK_128X128, BLOCK_128X64 }, { BLOCK_64X128, BLOCK_64X64 } }, #endif // CONFIG_EXT_PARTITION }; @@ -427,38 +438,38 @@ static const BLOCK_SIZE ss_size_lookup[BLOCK_SIZES][2][2] = { static const struct { PARTITION_CONTEXT above; PARTITION_CONTEXT left; -} partition_context_lookup[BLOCK_SIZES]= { +} partition_context_lookup[BLOCK_SIZES] = { #if CONFIG_EXT_PARTITION - {31, 31}, // 4X4 - {0b11111, 0b11111} - {31, 30}, // 4X8 - {0b11111, 0b11110} - {30, 31}, // 8X4 - {0b11110, 0b11111} - {30, 30}, // 8X8 - {0b11110, 0b11110} - {30, 28}, // 8X16 - {0b11110, 0b11100} - {28, 30}, // 16X8 - {0b11100, 0b11110} - {28, 28}, // 16X16 - {0b11100, 0b11100} - {28, 24}, // 16X32 - {0b11100, 0b11000} - {24, 28}, // 32X16 - {0b11000, 0b11100} - {24, 24}, // 32X32 - {0b11000, 0b11000} - {24, 16}, // 32X64 - {0b11000, 0b10000} - {16, 24}, // 64X32 - {0b10000, 0b11000} - {16, 16}, // 64X64 - {0b10000, 0b10000} - {16, 0 }, // 64X128- {0b10000, 0b00000} - {0, 16}, // 128X64- {0b00000, 0b10000} - {0, 0 }, // 128X128-{0b00000, 0b00000} + { 31, 31 }, // 4X4 - {0b11111, 0b11111} + { 31, 30 }, // 4X8 - {0b11111, 0b11110} + { 30, 31 }, // 8X4 - {0b11110, 0b11111} + { 30, 30 }, // 8X8 - {0b11110, 0b11110} + { 30, 28 }, // 8X16 - {0b11110, 0b11100} + { 28, 30 }, // 16X8 - {0b11100, 0b11110} + { 28, 28 }, // 16X16 - {0b11100, 0b11100} + { 28, 24 }, // 16X32 - {0b11100, 0b11000} + { 24, 28 }, // 32X16 - {0b11000, 0b11100} + { 24, 24 }, // 32X32 - {0b11000, 0b11000} + { 24, 16 }, // 32X64 - {0b11000, 0b10000} + { 16, 24 }, // 64X32 - {0b10000, 0b11000} + { 16, 16 }, // 64X64 - {0b10000, 0b10000} + { 16, 0 }, // 64X128- {0b10000, 0b00000} + { 0, 16 }, // 128X64- {0b00000, 0b10000} + { 0, 0 }, // 128X128-{0b00000, 0b00000} #else - {15, 15}, // 4X4 - {0b1111, 0b1111} - {15, 14}, // 4X8 - {0b1111, 0b1110} - {14, 15}, // 8X4 - {0b1110, 0b1111} - {14, 14}, // 8X8 - {0b1110, 0b1110} - {14, 12}, // 8X16 - {0b1110, 0b1100} - {12, 14}, // 16X8 - {0b1100, 0b1110} - {12, 12}, // 16X16 - {0b1100, 0b1100} - {12, 8 }, // 16X32 - {0b1100, 0b1000} - {8, 12}, // 32X16 - {0b1000, 0b1100} - {8, 8 }, // 32X32 - {0b1000, 0b1000} - {8, 0 }, // 32X64 - {0b1000, 0b0000} - {0, 8 }, // 64X32 - {0b0000, 0b1000} - {0, 0 }, // 64X64 - {0b0000, 0b0000} + { 15, 15 }, // 4X4 - {0b1111, 0b1111} + { 15, 14 }, // 4X8 - {0b1111, 0b1110} + { 14, 15 }, // 8X4 - {0b1110, 0b1111} + { 14, 14 }, // 8X8 - {0b1110, 0b1110} + { 14, 12 }, // 8X16 - {0b1110, 0b1100} + { 12, 14 }, // 16X8 - {0b1100, 0b1110} + { 12, 12 }, // 16X16 - {0b1100, 0b1100} + { 12, 8 }, // 16X32 - {0b1100, 0b1000} + { 8, 12 }, // 32X16 - {0b1000, 0b1100} + { 8, 8 }, // 32X32 - {0b1000, 0b1000} + { 8, 0 }, // 32X64 - {0b1000, 0b0000} + { 0, 8 }, // 64X32 - {0b0000, 0b1000} + { 0, 0 }, // 64X64 - {0b0000, 0b0000} #endif // CONFIG_EXT_PARTITION }; @@ -466,10 +477,10 @@ static const struct { static const TX_SIZE uvsupertx_size_lookup[TX_SIZES][2][2] = { // ss_x == 0 ss_x == 0 ss_x == 1 ss_x == 1 // ss_y == 0 ss_y == 1 ss_y == 0 ss_y == 1 - {{TX_4X4, TX_4X4}, {TX_4X4, TX_4X4}}, - {{TX_8X8, TX_4X4}, {TX_4X4, TX_4X4}}, - {{TX_16X16, TX_8X8}, {TX_8X8, TX_8X8}}, - {{TX_32X32, TX_16X16}, {TX_16X16, TX_16X16}}, + { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } }, + { { TX_8X8, TX_4X4 }, { TX_4X4, TX_4X4 } }, + { { TX_16X16, TX_8X8 }, { TX_8X8, TX_8X8 } }, + { { TX_32X32, TX_16X16 }, { TX_16X16, TX_16X16 } }, }; #if CONFIG_EXT_PARTITION_TYPES @@ -478,9 +489,8 @@ static const int partition_supertx_context_lookup[EXT_PARTITION_TYPES] = { }; #else -static const int partition_supertx_context_lookup[PARTITION_TYPES] = { - -1, 0, 0, 1 -}; +static const int partition_supertx_context_lookup[PARTITION_TYPES] = { -1, 0, 0, + 1 }; #endif // CONFIG_EXT_PARTITION_TYPES #endif // CONFIG_SUPERTX diff --git a/vp10/common/debugmodes.c b/vp10/common/debugmodes.c index 10fc4d633d3b1b64436193ec32aa296ce8fa8ab0..18bbbec1ccb95a8a77a60ba37c3b2708410be7b4 100644 --- a/vp10/common/debugmodes.c +++ b/vp10/common/debugmodes.c @@ -34,9 +34,7 @@ static void print_mi_data(VP10_COMMON *cm, FILE *file, const char *descriptor, for (mi_row = 0; mi_row < rows; mi_row++) { fprintf(file, "%c ", prefix); for (mi_col = 0; mi_col < cols; mi_col++) { - fprintf(file, "%2d ", - *((int*) ((char *) (&mi[0]->mbmi) + - member_offset))); + fprintf(file, "%2d ", *((int *)((char *)(&mi[0]->mbmi) + member_offset))); mi++; } fprintf(file, "\n"); @@ -79,7 +77,7 @@ void vp10_print_modes_and_motion_vectors(VP10_COMMON *cm, const char *file) { fprintf(mvs, "V "); for (mi_col = 0; mi_col < cols; mi_col++) { fprintf(mvs, "%4d:%4d ", mi[0]->mbmi.mv[0].as_mv.row, - mi[0]->mbmi.mv[0].as_mv.col); + mi[0]->mbmi.mv[0].as_mv.col); mi++; } fprintf(mvs, "\n"); diff --git a/vp10/common/divide.c b/vp10/common/divide.c index 3f144d71a5de7ab18d4d31ae1a60bb9d726b1d9d..2fa95a114a193b360576732012fba14cb316f94a 100644 --- a/vp10/common/divide.c +++ b/vp10/common/divide.c @@ -26,68 +26,90 @@ void init_fastdiv() { } */ const struct fastdiv_elem vp10_fastdiv_tab[256] = { - {0, 0}, {0, 0}, {0, 1}, {1431655766, 2}, - {0, 2}, {2576980378u, 3}, {1431655766, 3}, {613566757, 3}, - {0, 3}, {3340530120u, 4}, {2576980378u, 4}, {1952257862, 4}, - {1431655766, 4}, {991146300, 4}, {613566757, 4}, {286331154u, 4}, - {0, 4}, {3789677026u, 5}, {3340530120u, 5}, {2938661835u, 5}, - {2576980378u, 5}, {2249744775u, 5}, {1952257862, 5}, {1680639377, 5}, - {1431655766, 5}, {1202590843, 5}, {991146300, 5}, {795364315, 5}, - {613566757, 5}, {444306962, 5}, {286331154, 5}, {138547333, 5}, - {0, 5}, {4034666248u, 6}, {3789677026u, 6}, {3558687189u, 6}, - {3340530120u, 6}, {3134165325u, 6}, {2938661835u, 6}, {2753184165u, 6}, - {2576980378u, 6}, {2409371898u, 6}, {2249744775u, 6}, {2097542168u, 6}, - {1952257862, 6}, {1813430637, 6}, {1680639377, 6}, {1553498810, 6}, - {1431655766, 6}, {1314785907, 6}, {1202590843, 6}, {1094795586, 6}, - {991146300, 6}, {891408307, 6}, {795364315, 6}, {702812831, 6}, - {613566757, 6}, {527452125, 6}, {444306962, 6}, {363980280, 6}, - {286331154, 6}, {211227900, 6}, {138547333, 6}, {68174085, 6}, - {0, 6}, {4162814457u, 7}, {4034666248u, 7}, {3910343360u, 7}, - {3789677026u, 7}, {3672508268u, 7}, {3558687189u, 7}, {3448072337u, 7}, - {3340530120u, 7}, {3235934265u, 7}, {3134165325u, 7}, {3035110223u, 7}, - {2938661835u, 7}, {2844718599u, 7}, {2753184165u, 7}, {2663967058u, 7}, - {2576980378u, 7}, {2492141518u, 7}, {2409371898u, 7}, {2328596727u, 7}, - {2249744775u, 7}, {2172748162u, 7}, {2097542168, 7}, {2024065048, 7}, - {1952257862, 7}, {1882064321, 7}, {1813430637, 7}, {1746305385, 7}, - {1680639377, 7}, {1616385542, 7}, {1553498810, 7}, {1491936009, 7}, - {1431655766, 7}, {1372618415, 7}, {1314785907, 7}, {1258121734, 7}, - {1202590843, 7}, {1148159575, 7}, {1094795586, 7}, {1042467791, 7}, - {991146300, 7}, {940802361, 7}, {891408307, 7}, {842937507, 7}, - {795364315, 7}, {748664025, 7}, {702812831, 7}, {657787785, 7}, - {613566757, 7}, {570128403, 7}, {527452125, 7}, {485518043, 7}, - {444306962, 7}, {403800345, 7}, {363980280, 7}, {324829460, 7}, - {286331154, 7}, {248469183, 7}, {211227900, 7}, {174592167, 7}, - {138547333, 7}, {103079216, 7}, {68174085, 7}, {33818641, 7}, - {0, 7}, {4228378656u, 8}, {4162814457u, 8}, {4098251237u, 8}, - {4034666248u, 8}, {3972037425u, 8}, {3910343360u, 8}, {3849563281u, 8}, - {3789677026u, 8}, {3730665024u, 8}, {3672508268u, 8}, {3615188300u, 8}, - {3558687189u, 8}, {3502987511u, 8}, {3448072337u, 8}, {3393925206u, 8}, - {3340530120u, 8}, {3287871517u, 8}, {3235934265u, 8}, {3184703642u, 8}, - {3134165325u, 8}, {3084305374u, 8}, {3035110223u, 8}, {2986566663u, 8}, - {2938661835u, 8}, {2891383213u, 8}, {2844718599u, 8}, {2798656110u, 8}, - {2753184165u, 8}, {2708291480u, 8}, {2663967058u, 8}, {2620200175u, 8}, - {2576980378u, 8}, {2534297473u, 8}, {2492141518u, 8}, {2450502814u, 8}, - {2409371898u, 8}, {2368739540u, 8}, {2328596727u, 8}, {2288934667u, 8}, - {2249744775u, 8}, {2211018668u, 8}, {2172748162u, 8}, {2134925265u, 8}, - {2097542168, 8}, {2060591247, 8}, {2024065048, 8}, {1987956292, 8}, - {1952257862, 8}, {1916962805, 8}, {1882064321, 8}, {1847555765, 8}, - {1813430637, 8}, {1779682582, 8}, {1746305385, 8}, {1713292966, 8}, - {1680639377, 8}, {1648338801, 8}, {1616385542, 8}, {1584774030, 8}, - {1553498810, 8}, {1522554545, 8}, {1491936009, 8}, {1461638086, 8}, - {1431655766, 8}, {1401984144, 8}, {1372618415, 8}, {1343553873, 8}, - {1314785907, 8}, {1286310003, 8}, {1258121734, 8}, {1230216764, 8}, - {1202590843, 8}, {1175239808, 8}, {1148159575, 8}, {1121346142, 8}, - {1094795586, 8}, {1068504060, 8}, {1042467791, 8}, {1016683080, 8}, - {991146300, 8}, {965853890, 8}, {940802361, 8}, {915988286, 8}, - {891408307, 8}, {867059126, 8}, {842937507, 8}, {819040276, 8}, - {795364315, 8}, {771906565, 8}, {748664025, 8}, {725633745, 8}, - {702812831, 8}, {680198441, 8}, {657787785, 8}, {635578121, 8}, - {613566757, 8}, {591751050, 8}, {570128403, 8}, {548696263, 8}, - {527452125, 8}, {506393524, 8}, {485518043, 8}, {464823301, 8}, - {444306962, 8}, {423966729, 8}, {403800345, 8}, {383805589, 8}, - {363980280, 8}, {344322273, 8}, {324829460, 8}, {305499766, 8}, - {286331154, 8}, {267321616, 8}, {248469183, 8}, {229771913, 8}, - {211227900, 8}, {192835267, 8}, {174592167, 8}, {156496785, 8}, - {138547333, 8}, {120742053, 8}, {103079216, 8}, {85557118, 8}, - {68174085, 8}, {50928466, 8}, {33818641, 8}, {16843010, 8}, + { 0, 0 }, { 0, 0 }, { 0, 1 }, + { 1431655766, 2 }, { 0, 2 }, { 2576980378u, 3 }, + { 1431655766, 3 }, { 613566757, 3 }, { 0, 3 }, + { 3340530120u, 4 }, { 2576980378u, 4 }, { 1952257862, 4 }, + { 1431655766, 4 }, { 991146300, 4 }, { 613566757, 4 }, + { 286331154u, 4 }, { 0, 4 }, { 3789677026u, 5 }, + { 3340530120u, 5 }, { 2938661835u, 5 }, { 2576980378u, 5 }, + { 2249744775u, 5 }, { 1952257862, 5 }, { 1680639377, 5 }, + { 1431655766, 5 }, { 1202590843, 5 }, { 991146300, 5 }, + { 795364315, 5 }, { 613566757, 5 }, { 444306962, 5 }, + { 286331154, 5 }, { 138547333, 5 }, { 0, 5 }, + { 4034666248u, 6 }, { 3789677026u, 6 }, { 3558687189u, 6 }, + { 3340530120u, 6 }, { 3134165325u, 6 }, { 2938661835u, 6 }, + { 2753184165u, 6 }, { 2576980378u, 6 }, { 2409371898u, 6 }, + { 2249744775u, 6 }, { 2097542168u, 6 }, { 1952257862, 6 }, + { 1813430637, 6 }, { 1680639377, 6 }, { 1553498810, 6 }, + { 1431655766, 6 }, { 1314785907, 6 }, { 1202590843, 6 }, + { 1094795586, 6 }, { 991146300, 6 }, { 891408307, 6 }, + { 795364315, 6 }, { 702812831, 6 }, { 613566757, 6 }, + { 527452125, 6 }, { 444306962, 6 }, { 363980280, 6 }, + { 286331154, 6 }, { 211227900, 6 }, { 138547333, 6 }, + { 68174085, 6 }, { 0, 6 }, { 4162814457u, 7 }, + { 4034666248u, 7 }, { 3910343360u, 7 }, { 3789677026u, 7 }, + { 3672508268u, 7 }, { 3558687189u, 7 }, { 3448072337u, 7 }, + { 3340530120u, 7 }, { 3235934265u, 7 }, { 3134165325u, 7 }, + { 3035110223u, 7 }, { 2938661835u, 7 }, { 2844718599u, 7 }, + { 2753184165u, 7 }, { 2663967058u, 7 }, { 2576980378u, 7 }, + { 2492141518u, 7 }, { 2409371898u, 7 }, { 2328596727u, 7 }, + { 2249744775u, 7 }, { 2172748162u, 7 }, { 2097542168, 7 }, + { 2024065048, 7 }, { 1952257862, 7 }, { 1882064321, 7 }, + { 1813430637, 7 }, { 1746305385, 7 }, { 1680639377, 7 }, + { 1616385542, 7 }, { 1553498810, 7 }, { 1491936009, 7 }, + { 1431655766, 7 }, { 1372618415, 7 }, { 1314785907, 7 }, + { 1258121734, 7 }, { 1202590843, 7 }, { 1148159575, 7 }, + { 1094795586, 7 }, { 1042467791, 7 }, { 991146300, 7 }, + { 940802361, 7 }, { 891408307, 7 }, { 842937507, 7 }, + { 795364315, 7 }, { 748664025, 7 }, { 702812831, 7 }, + { 657787785, 7 }, { 613566757, 7 }, { 570128403, 7 }, + { 527452125, 7 }, { 485518043, 7 }, { 444306962, 7 }, + { 403800345, 7 }, { 363980280, 7 }, { 324829460, 7 }, + { 286331154, 7 }, { 248469183, 7 }, { 211227900, 7 }, + { 174592167, 7 }, { 138547333, 7 }, { 103079216, 7 }, + { 68174085, 7 }, { 33818641, 7 }, { 0, 7 }, + { 4228378656u, 8 }, { 4162814457u, 8 }, { 4098251237u, 8 }, + { 4034666248u, 8 }, { 3972037425u, 8 }, { 3910343360u, 8 }, + { 3849563281u, 8 }, { 3789677026u, 8 }, { 3730665024u, 8 }, + { 3672508268u, 8 }, { 3615188300u, 8 }, { 3558687189u, 8 }, + { 3502987511u, 8 }, { 3448072337u, 8 }, { 3393925206u, 8 }, + { 3340530120u, 8 }, { 3287871517u, 8 }, { 3235934265u, 8 }, + { 3184703642u, 8 }, { 3134165325u, 8 }, { 3084305374u, 8 }, + { 3035110223u, 8 }, { 2986566663u, 8 }, { 2938661835u, 8 }, + { 2891383213u, 8 }, { 2844718599u, 8 }, { 2798656110u, 8 }, + { 2753184165u, 8 }, { 2708291480u, 8 }, { 2663967058u, 8 }, + { 2620200175u, 8 }, { 2576980378u, 8 }, { 2534297473u, 8 }, + { 2492141518u, 8 }, { 2450502814u, 8 }, { 2409371898u, 8 }, + { 2368739540u, 8 }, { 2328596727u, 8 }, { 2288934667u, 8 }, + { 2249744775u, 8 }, { 2211018668u, 8 }, { 2172748162u, 8 }, + { 2134925265u, 8 }, { 2097542168, 8 }, { 2060591247, 8 }, + { 2024065048, 8 }, { 1987956292, 8 }, { 1952257862, 8 }, + { 1916962805, 8 }, { 1882064321, 8 }, { 1847555765, 8 }, + { 1813430637, 8 }, { 1779682582, 8 }, { 1746305385, 8 }, + { 1713292966, 8 }, { 1680639377, 8 }, { 1648338801, 8 }, + { 1616385542, 8 }, { 1584774030, 8 }, { 1553498810, 8 }, + { 1522554545, 8 }, { 1491936009, 8 }, { 1461638086, 8 }, + { 1431655766, 8 }, { 1401984144, 8 }, { 1372618415, 8 }, + { 1343553873, 8 }, { 1314785907, 8 }, { 1286310003, 8 }, + { 1258121734, 8 }, { 1230216764, 8 }, { 1202590843, 8 }, + { 1175239808, 8 }, { 1148159575, 8 }, { 1121346142, 8 }, + { 1094795586, 8 }, { 1068504060, 8 }, { 1042467791, 8 }, + { 1016683080, 8 }, { 991146300, 8 }, { 965853890, 8 }, + { 940802361, 8 }, { 915988286, 8 }, { 891408307, 8 }, + { 867059126, 8 }, { 842937507, 8 }, { 819040276, 8 }, + { 795364315, 8 }, { 771906565, 8 }, { 748664025, 8 }, + { 725633745, 8 }, { 702812831, 8 }, { 680198441, 8 }, + { 657787785, 8 }, { 635578121, 8 }, { 613566757, 8 }, + { 591751050, 8 }, { 570128403, 8 }, { 548696263, 8 }, + { 527452125, 8 }, { 506393524, 8 }, { 485518043, 8 }, + { 464823301, 8 }, { 444306962, 8 }, { 423966729, 8 }, + { 403800345, 8 }, { 383805589, 8 }, { 363980280, 8 }, + { 344322273, 8 }, { 324829460, 8 }, { 305499766, 8 }, + { 286331154, 8 }, { 267321616, 8 }, { 248469183, 8 }, + { 229771913, 8 }, { 211227900, 8 }, { 192835267, 8 }, + { 174592167, 8 }, { 156496785, 8 }, { 138547333, 8 }, + { 120742053, 8 }, { 103079216, 8 }, { 85557118, 8 }, + { 68174085, 8 }, { 50928466, 8 }, { 33818641, 8 }, + { 16843010, 8 }, }; diff --git a/vp10/common/entropy.c b/vp10/common/entropy.c index 1ce801a65caa3dc70876f53effd6cbb3090b92d9..4f51c53da3af659cf97a24437518e8d231e84fc6 100644 --- a/vp10/common/entropy.c +++ b/vp10/common/entropy.c @@ -16,6 +16,7 @@ #include "vpx/vpx_integer.h" // Unconstrained Node Tree +/* clang-format off */ const vpx_tree_index vp10_coef_con_tree[TREE_SIZE(ENTROPY_TOKENS)] = { 2, 6, // 0 = LOW_VAL -TWO_TOKEN, 4, // 1 = TWO @@ -26,15 +27,15 @@ const vpx_tree_index vp10_coef_con_tree[TREE_SIZE(ENTROPY_TOKENS)] = { -CATEGORY3_TOKEN, -CATEGORY4_TOKEN, // 6 = CAT_THREE -CATEGORY5_TOKEN, -CATEGORY6_TOKEN // 7 = CAT_FIVE }; +/* clang-format on */ const vpx_prob vp10_cat1_prob[] = { 159 }; const vpx_prob vp10_cat2_prob[] = { 165, 145 }; const vpx_prob vp10_cat3_prob[] = { 173, 148, 140 }; const vpx_prob vp10_cat4_prob[] = { 176, 155, 140, 135 }; const vpx_prob vp10_cat5_prob[] = { 180, 157, 141, 134, 130 }; -const vpx_prob vp10_cat6_prob[] = { - 254, 254, 254, 252, 249, 243, 230, 196, 177, 153, 140, 133, 130, 129 -}; +const vpx_prob vp10_cat6_prob[] = { 254, 254, 254, 252, 249, 243, 230, + 196, 177, 153, 140, 133, 130, 129 }; #if CONFIG_VP9_HIGHBITDEPTH const vpx_prob vp10_cat1_prob_high10[] = { 159 }; const vpx_prob vp10_cat2_prob_high10[] = { 165, 145 }; @@ -42,109 +43,76 @@ const vpx_prob vp10_cat3_prob_high10[] = { 173, 148, 140 }; const vpx_prob vp10_cat4_prob_high10[] = { 176, 155, 140, 135 }; const vpx_prob vp10_cat5_prob_high10[] = { 180, 157, 141, 134, 130 }; const vpx_prob vp10_cat6_prob_high10[] = { - 255, 255, 254, 254, 254, 252, 249, 243, - 230, 196, 177, 153, 140, 133, 130, 129 + 255, 255, 254, 254, 254, 252, 249, 243, 230, 196, 177, 153, 140, 133, 130, 129 }; const vpx_prob vp10_cat1_prob_high12[] = { 159 }; const vpx_prob vp10_cat2_prob_high12[] = { 165, 145 }; const vpx_prob vp10_cat3_prob_high12[] = { 173, 148, 140 }; const vpx_prob vp10_cat4_prob_high12[] = { 176, 155, 140, 135 }; const vpx_prob vp10_cat5_prob_high12[] = { 180, 157, 141, 134, 130 }; -const vpx_prob vp10_cat6_prob_high12[] = { - 255, 255, 255, 255, 254, 254, 254, 252, 249, - 243, 230, 196, 177, 153, 140, 133, 130, 129 -}; +const vpx_prob vp10_cat6_prob_high12[] = { 255, 255, 255, 255, 254, 254, + 254, 252, 249, 243, 230, 196, + 177, 153, 140, 133, 130, 129 }; #endif const uint16_t band_count_table[TX_SIZES_ALL][8] = { - { 1, 2, 3, 4, 3, 16 - 13, 0 }, - { 1, 2, 3, 4, 11, 64 - 21, 0 }, - { 1, 2, 3, 4, 11, 256 - 21, 0 }, - { 1, 2, 3, 4, 11, 1024 - 21, 0 }, + { 1, 2, 3, 4, 3, 16 - 13, 0 }, { 1, 2, 3, 4, 11, 64 - 21, 0 }, + { 1, 2, 3, 4, 11, 256 - 21, 0 }, { 1, 2, 3, 4, 11, 1024 - 21, 0 }, #if CONFIG_EXT_TX - { 1, 2, 3, 4, 8, 32 - 18, 0 }, - { 1, 2, 3, 4, 8, 32 - 18, 0 }, + { 1, 2, 3, 4, 8, 32 - 18, 0 }, { 1, 2, 3, 4, 8, 32 - 18, 0 }, #endif // CONFIG_EXT_TX }; const uint16_t band_cum_count_table[TX_SIZES_ALL][8] = { - { 0, 1, 3, 6, 10, 13, 16, 0 }, - { 0, 1, 3, 6, 10, 21, 64, 0 }, - { 0, 1, 3, 6, 10, 21, 256, 0 }, - { 0, 1, 3, 6, 10, 21, 1024, 0 }, + { 0, 1, 3, 6, 10, 13, 16, 0 }, { 0, 1, 3, 6, 10, 21, 64, 0 }, + { 0, 1, 3, 6, 10, 21, 256, 0 }, { 0, 1, 3, 6, 10, 21, 1024, 0 }, #if CONFIG_EXT_TX - { 0, 1, 3, 6, 10, 18, 32, 0 }, - { 0, 1, 3, 6, 10, 18, 32, 0 }, + { 0, 1, 3, 6, 10, 18, 32, 0 }, { 0, 1, 3, 6, 10, 18, 32, 0 }, #endif // CONFIG_EXT_TX }; const uint8_t vp10_coefband_trans_8x8plus[1024] = { - 0, 1, 1, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 5, + 0, 1, 1, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, // beyond MAXBAND_INDEX+1 all values are filled as 5 - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, }; #if CONFIG_EXT_TX @@ -158,9 +126,8 @@ const uint8_t vp10_coefband_trans_4x4[16] = { 0, 1, 1, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 5, 5, 5, }; -const uint8_t vp10_pt_energy_class[ENTROPY_TOKENS] = { - 0, 1, 2, 3, 3, 4, 4, 5, 5, 5, 5, 5 -}; +const uint8_t vp10_pt_energy_class[ENTROPY_TOKENS] = { 0, 1, 2, 3, 3, 4, + 4, 5, 5, 5, 5, 5 }; // Model obtained from a 2-sided zero-centered distribution derived // from a Pareto distribution. The cdf of the distribution is: @@ -177,261 +144,261 @@ const uint8_t vp10_pt_energy_class[ENTROPY_TOKENS] = { // vp10_pareto8_full[l][node] = (vp10_pareto8_full[l-1][node] + // vp10_pareto8_full[l+1][node] ) >> 1; const vpx_prob vp10_pareto8_full[COEFF_PROB_MODELS][MODEL_NODES] = { - { 3, 86, 128, 6, 86, 23, 88, 29}, - { 6, 86, 128, 11, 87, 42, 91, 52}, - { 9, 86, 129, 17, 88, 61, 94, 76}, - { 12, 86, 129, 22, 88, 77, 97, 93}, - { 15, 87, 129, 28, 89, 93, 100, 110}, - { 17, 87, 129, 33, 90, 105, 103, 123}, - { 20, 88, 130, 38, 91, 118, 106, 136}, - { 23, 88, 130, 43, 91, 128, 108, 146}, - { 26, 89, 131, 48, 92, 139, 111, 156}, - { 28, 89, 131, 53, 93, 147, 114, 163}, - { 31, 90, 131, 58, 94, 156, 117, 171}, - { 34, 90, 131, 62, 94, 163, 119, 177}, - { 37, 90, 132, 66, 95, 171, 122, 184}, - { 39, 90, 132, 70, 96, 177, 124, 189}, - { 42, 91, 132, 75, 97, 183, 127, 194}, - { 44, 91, 132, 79, 97, 188, 129, 198}, - { 47, 92, 133, 83, 98, 193, 132, 202}, - { 49, 92, 133, 86, 99, 197, 134, 205}, - { 52, 93, 133, 90, 100, 201, 137, 208}, - { 54, 93, 133, 94, 100, 204, 139, 211}, - { 57, 94, 134, 98, 101, 208, 142, 214}, - { 59, 94, 134, 101, 102, 211, 144, 216}, - { 62, 94, 135, 105, 103, 214, 146, 218}, - { 64, 94, 135, 108, 103, 216, 148, 220}, - { 66, 95, 135, 111, 104, 219, 151, 222}, - { 68, 95, 135, 114, 105, 221, 153, 223}, - { 71, 96, 136, 117, 106, 224, 155, 225}, - { 73, 96, 136, 120, 106, 225, 157, 226}, - { 76, 97, 136, 123, 107, 227, 159, 228}, - { 78, 97, 136, 126, 108, 229, 160, 229}, - { 80, 98, 137, 129, 109, 231, 162, 231}, - { 82, 98, 137, 131, 109, 232, 164, 232}, - { 84, 98, 138, 134, 110, 234, 166, 233}, - { 86, 98, 138, 137, 111, 235, 168, 234}, - { 89, 99, 138, 140, 112, 236, 170, 235}, - { 91, 99, 138, 142, 112, 237, 171, 235}, - { 93, 100, 139, 145, 113, 238, 173, 236}, - { 95, 100, 139, 147, 114, 239, 174, 237}, - { 97, 101, 140, 149, 115, 240, 176, 238}, - { 99, 101, 140, 151, 115, 241, 177, 238}, - {101, 102, 140, 154, 116, 242, 179, 239}, - {103, 102, 140, 156, 117, 242, 180, 239}, - {105, 103, 141, 158, 118, 243, 182, 240}, - {107, 103, 141, 160, 118, 243, 183, 240}, - {109, 104, 141, 162, 119, 244, 185, 241}, - {111, 104, 141, 164, 119, 244, 186, 241}, - {113, 104, 142, 166, 120, 245, 187, 242}, - {114, 104, 142, 168, 121, 245, 188, 242}, - {116, 105, 143, 170, 122, 246, 190, 243}, - {118, 105, 143, 171, 122, 246, 191, 243}, - {120, 106, 143, 173, 123, 247, 192, 244}, - {121, 106, 143, 175, 124, 247, 193, 244}, - {123, 107, 144, 177, 125, 248, 195, 244}, - {125, 107, 144, 178, 125, 248, 196, 244}, - {127, 108, 145, 180, 126, 249, 197, 245}, - {128, 108, 145, 181, 127, 249, 198, 245}, - {130, 109, 145, 183, 128, 249, 199, 245}, - {132, 109, 145, 184, 128, 249, 200, 245}, - {134, 110, 146, 186, 129, 250, 201, 246}, - {135, 110, 146, 187, 130, 250, 202, 246}, - {137, 111, 147, 189, 131, 251, 203, 246}, - {138, 111, 147, 190, 131, 251, 204, 246}, - {140, 112, 147, 192, 132, 251, 205, 247}, - {141, 112, 147, 193, 132, 251, 206, 247}, - {143, 113, 148, 194, 133, 251, 207, 247}, - {144, 113, 148, 195, 134, 251, 207, 247}, - {146, 114, 149, 197, 135, 252, 208, 248}, - {147, 114, 149, 198, 135, 252, 209, 248}, - {149, 115, 149, 199, 136, 252, 210, 248}, - {150, 115, 149, 200, 137, 252, 210, 248}, - {152, 115, 150, 201, 138, 252, 211, 248}, - {153, 115, 150, 202, 138, 252, 212, 248}, - {155, 116, 151, 204, 139, 253, 213, 249}, - {156, 116, 151, 205, 139, 253, 213, 249}, - {158, 117, 151, 206, 140, 253, 214, 249}, - {159, 117, 151, 207, 141, 253, 215, 249}, - {161, 118, 152, 208, 142, 253, 216, 249}, - {162, 118, 152, 209, 142, 253, 216, 249}, - {163, 119, 153, 210, 143, 253, 217, 249}, - {164, 119, 153, 211, 143, 253, 217, 249}, - {166, 120, 153, 212, 144, 254, 218, 250}, - {167, 120, 153, 212, 145, 254, 219, 250}, - {168, 121, 154, 213, 146, 254, 220, 250}, - {169, 121, 154, 214, 146, 254, 220, 250}, - {171, 122, 155, 215, 147, 254, 221, 250}, - {172, 122, 155, 216, 147, 254, 221, 250}, - {173, 123, 155, 217, 148, 254, 222, 250}, - {174, 123, 155, 217, 149, 254, 222, 250}, - {176, 124, 156, 218, 150, 254, 223, 250}, - {177, 124, 156, 219, 150, 254, 223, 250}, - {178, 125, 157, 220, 151, 254, 224, 251}, - {179, 125, 157, 220, 151, 254, 224, 251}, - {180, 126, 157, 221, 152, 254, 225, 251}, - {181, 126, 157, 221, 152, 254, 225, 251}, - {183, 127, 158, 222, 153, 254, 226, 251}, - {184, 127, 158, 223, 154, 254, 226, 251}, - {185, 128, 159, 224, 155, 255, 227, 251}, - {186, 128, 159, 224, 155, 255, 227, 251}, - {187, 129, 160, 225, 156, 255, 228, 251}, - {188, 130, 160, 225, 156, 255, 228, 251}, - {189, 131, 160, 226, 157, 255, 228, 251}, - {190, 131, 160, 226, 158, 255, 228, 251}, - {191, 132, 161, 227, 159, 255, 229, 251}, - {192, 132, 161, 227, 159, 255, 229, 251}, - {193, 133, 162, 228, 160, 255, 230, 252}, - {194, 133, 162, 229, 160, 255, 230, 252}, - {195, 134, 163, 230, 161, 255, 231, 252}, - {196, 134, 163, 230, 161, 255, 231, 252}, - {197, 135, 163, 231, 162, 255, 231, 252}, - {198, 135, 163, 231, 162, 255, 231, 252}, - {199, 136, 164, 232, 163, 255, 232, 252}, - {200, 136, 164, 232, 164, 255, 232, 252}, - {201, 137, 165, 233, 165, 255, 233, 252}, - {201, 137, 165, 233, 165, 255, 233, 252}, - {202, 138, 166, 233, 166, 255, 233, 252}, - {203, 138, 166, 233, 166, 255, 233, 252}, - {204, 139, 166, 234, 167, 255, 234, 252}, - {205, 139, 166, 234, 167, 255, 234, 252}, - {206, 140, 167, 235, 168, 255, 235, 252}, - {206, 140, 167, 235, 168, 255, 235, 252}, - {207, 141, 168, 236, 169, 255, 235, 252}, - {208, 141, 168, 236, 170, 255, 235, 252}, - {209, 142, 169, 237, 171, 255, 236, 252}, - {209, 143, 169, 237, 171, 255, 236, 252}, - {210, 144, 169, 237, 172, 255, 236, 252}, - {211, 144, 169, 237, 172, 255, 236, 252}, - {212, 145, 170, 238, 173, 255, 237, 252}, - {213, 145, 170, 238, 173, 255, 237, 252}, - {214, 146, 171, 239, 174, 255, 237, 253}, - {214, 146, 171, 239, 174, 255, 237, 253}, - {215, 147, 172, 240, 175, 255, 238, 253}, - {215, 147, 172, 240, 175, 255, 238, 253}, - {216, 148, 173, 240, 176, 255, 238, 253}, - {217, 148, 173, 240, 176, 255, 238, 253}, - {218, 149, 173, 241, 177, 255, 239, 253}, - {218, 149, 173, 241, 178, 255, 239, 253}, - {219, 150, 174, 241, 179, 255, 239, 253}, - {219, 151, 174, 241, 179, 255, 239, 253}, - {220, 152, 175, 242, 180, 255, 240, 253}, - {221, 152, 175, 242, 180, 255, 240, 253}, - {222, 153, 176, 242, 181, 255, 240, 253}, - {222, 153, 176, 242, 181, 255, 240, 253}, - {223, 154, 177, 243, 182, 255, 240, 253}, - {223, 154, 177, 243, 182, 255, 240, 253}, - {224, 155, 178, 244, 183, 255, 241, 253}, - {224, 155, 178, 244, 183, 255, 241, 253}, - {225, 156, 178, 244, 184, 255, 241, 253}, - {225, 157, 178, 244, 184, 255, 241, 253}, - {226, 158, 179, 244, 185, 255, 242, 253}, - {227, 158, 179, 244, 185, 255, 242, 253}, - {228, 159, 180, 245, 186, 255, 242, 253}, - {228, 159, 180, 245, 186, 255, 242, 253}, - {229, 160, 181, 245, 187, 255, 242, 253}, - {229, 160, 181, 245, 187, 255, 242, 253}, - {230, 161, 182, 246, 188, 255, 243, 253}, - {230, 162, 182, 246, 188, 255, 243, 253}, - {231, 163, 183, 246, 189, 255, 243, 253}, - {231, 163, 183, 246, 189, 255, 243, 253}, - {232, 164, 184, 247, 190, 255, 243, 253}, - {232, 164, 184, 247, 190, 255, 243, 253}, - {233, 165, 185, 247, 191, 255, 244, 253}, - {233, 165, 185, 247, 191, 255, 244, 253}, - {234, 166, 185, 247, 192, 255, 244, 253}, - {234, 167, 185, 247, 192, 255, 244, 253}, - {235, 168, 186, 248, 193, 255, 244, 253}, - {235, 168, 186, 248, 193, 255, 244, 253}, - {236, 169, 187, 248, 194, 255, 244, 253}, - {236, 169, 187, 248, 194, 255, 244, 253}, - {236, 170, 188, 248, 195, 255, 245, 253}, - {236, 170, 188, 248, 195, 255, 245, 253}, - {237, 171, 189, 249, 196, 255, 245, 254}, - {237, 172, 189, 249, 196, 255, 245, 254}, - {238, 173, 190, 249, 197, 255, 245, 254}, - {238, 173, 190, 249, 197, 255, 245, 254}, - {239, 174, 191, 249, 198, 255, 245, 254}, - {239, 174, 191, 249, 198, 255, 245, 254}, - {240, 175, 192, 249, 199, 255, 246, 254}, - {240, 176, 192, 249, 199, 255, 246, 254}, - {240, 177, 193, 250, 200, 255, 246, 254}, - {240, 177, 193, 250, 200, 255, 246, 254}, - {241, 178, 194, 250, 201, 255, 246, 254}, - {241, 178, 194, 250, 201, 255, 246, 254}, - {242, 179, 195, 250, 202, 255, 246, 254}, - {242, 180, 195, 250, 202, 255, 246, 254}, - {242, 181, 196, 250, 203, 255, 247, 254}, - {242, 181, 196, 250, 203, 255, 247, 254}, - {243, 182, 197, 251, 204, 255, 247, 254}, - {243, 183, 197, 251, 204, 255, 247, 254}, - {244, 184, 198, 251, 205, 255, 247, 254}, - {244, 184, 198, 251, 205, 255, 247, 254}, - {244, 185, 199, 251, 206, 255, 247, 254}, - {244, 185, 199, 251, 206, 255, 247, 254}, - {245, 186, 200, 251, 207, 255, 247, 254}, - {245, 187, 200, 251, 207, 255, 247, 254}, - {246, 188, 201, 252, 207, 255, 248, 254}, - {246, 188, 201, 252, 207, 255, 248, 254}, - {246, 189, 202, 252, 208, 255, 248, 254}, - {246, 190, 202, 252, 208, 255, 248, 254}, - {247, 191, 203, 252, 209, 255, 248, 254}, - {247, 191, 203, 252, 209, 255, 248, 254}, - {247, 192, 204, 252, 210, 255, 248, 254}, - {247, 193, 204, 252, 210, 255, 248, 254}, - {248, 194, 205, 252, 211, 255, 248, 254}, - {248, 194, 205, 252, 211, 255, 248, 254}, - {248, 195, 206, 252, 212, 255, 249, 254}, - {248, 196, 206, 252, 212, 255, 249, 254}, - {249, 197, 207, 253, 213, 255, 249, 254}, - {249, 197, 207, 253, 213, 255, 249, 254}, - {249, 198, 208, 253, 214, 255, 249, 254}, - {249, 199, 209, 253, 214, 255, 249, 254}, - {250, 200, 210, 253, 215, 255, 249, 254}, - {250, 200, 210, 253, 215, 255, 249, 254}, - {250, 201, 211, 253, 215, 255, 249, 254}, - {250, 202, 211, 253, 215, 255, 249, 254}, - {250, 203, 212, 253, 216, 255, 249, 254}, - {250, 203, 212, 253, 216, 255, 249, 254}, - {251, 204, 213, 253, 217, 255, 250, 254}, - {251, 205, 213, 253, 217, 255, 250, 254}, - {251, 206, 214, 254, 218, 255, 250, 254}, - {251, 206, 215, 254, 218, 255, 250, 254}, - {252, 207, 216, 254, 219, 255, 250, 254}, - {252, 208, 216, 254, 219, 255, 250, 254}, - {252, 209, 217, 254, 220, 255, 250, 254}, - {252, 210, 217, 254, 220, 255, 250, 254}, - {252, 211, 218, 254, 221, 255, 250, 254}, - {252, 212, 218, 254, 221, 255, 250, 254}, - {253, 213, 219, 254, 222, 255, 250, 254}, - {253, 213, 220, 254, 222, 255, 250, 254}, - {253, 214, 221, 254, 223, 255, 250, 254}, - {253, 215, 221, 254, 223, 255, 250, 254}, - {253, 216, 222, 254, 224, 255, 251, 254}, - {253, 217, 223, 254, 224, 255, 251, 254}, - {253, 218, 224, 254, 225, 255, 251, 254}, - {253, 219, 224, 254, 225, 255, 251, 254}, - {254, 220, 225, 254, 225, 255, 251, 254}, - {254, 221, 226, 254, 225, 255, 251, 254}, - {254, 222, 227, 255, 226, 255, 251, 254}, - {254, 223, 227, 255, 226, 255, 251, 254}, - {254, 224, 228, 255, 227, 255, 251, 254}, - {254, 225, 229, 255, 227, 255, 251, 254}, - {254, 226, 230, 255, 228, 255, 251, 254}, - {254, 227, 230, 255, 229, 255, 251, 254}, - {255, 228, 231, 255, 230, 255, 251, 254}, - {255, 229, 232, 255, 230, 255, 251, 254}, - {255, 230, 233, 255, 231, 255, 252, 254}, - {255, 231, 234, 255, 231, 255, 252, 254}, - {255, 232, 235, 255, 232, 255, 252, 254}, - {255, 233, 236, 255, 232, 255, 252, 254}, - {255, 235, 237, 255, 233, 255, 252, 254}, - {255, 236, 238, 255, 234, 255, 252, 254}, - {255, 238, 240, 255, 235, 255, 252, 255}, - {255, 239, 241, 255, 235, 255, 252, 254}, - {255, 241, 243, 255, 236, 255, 252, 254}, - {255, 243, 245, 255, 237, 255, 252, 254}, - {255, 246, 247, 255, 239, 255, 253, 255}, + { 3, 86, 128, 6, 86, 23, 88, 29 }, + { 6, 86, 128, 11, 87, 42, 91, 52 }, + { 9, 86, 129, 17, 88, 61, 94, 76 }, + { 12, 86, 129, 22, 88, 77, 97, 93 }, + { 15, 87, 129, 28, 89, 93, 100, 110 }, + { 17, 87, 129, 33, 90, 105, 103, 123 }, + { 20, 88, 130, 38, 91, 118, 106, 136 }, + { 23, 88, 130, 43, 91, 128, 108, 146 }, + { 26, 89, 131, 48, 92, 139, 111, 156 }, + { 28, 89, 131, 53, 93, 147, 114, 163 }, + { 31, 90, 131, 58, 94, 156, 117, 171 }, + { 34, 90, 131, 62, 94, 163, 119, 177 }, + { 37, 90, 132, 66, 95, 171, 122, 184 }, + { 39, 90, 132, 70, 96, 177, 124, 189 }, + { 42, 91, 132, 75, 97, 183, 127, 194 }, + { 44, 91, 132, 79, 97, 188, 129, 198 }, + { 47, 92, 133, 83, 98, 193, 132, 202 }, + { 49, 92, 133, 86, 99, 197, 134, 205 }, + { 52, 93, 133, 90, 100, 201, 137, 208 }, + { 54, 93, 133, 94, 100, 204, 139, 211 }, + { 57, 94, 134, 98, 101, 208, 142, 214 }, + { 59, 94, 134, 101, 102, 211, 144, 216 }, + { 62, 94, 135, 105, 103, 214, 146, 218 }, + { 64, 94, 135, 108, 103, 216, 148, 220 }, + { 66, 95, 135, 111, 104, 219, 151, 222 }, + { 68, 95, 135, 114, 105, 221, 153, 223 }, + { 71, 96, 136, 117, 106, 224, 155, 225 }, + { 73, 96, 136, 120, 106, 225, 157, 226 }, + { 76, 97, 136, 123, 107, 227, 159, 228 }, + { 78, 97, 136, 126, 108, 229, 160, 229 }, + { 80, 98, 137, 129, 109, 231, 162, 231 }, + { 82, 98, 137, 131, 109, 232, 164, 232 }, + { 84, 98, 138, 134, 110, 234, 166, 233 }, + { 86, 98, 138, 137, 111, 235, 168, 234 }, + { 89, 99, 138, 140, 112, 236, 170, 235 }, + { 91, 99, 138, 142, 112, 237, 171, 235 }, + { 93, 100, 139, 145, 113, 238, 173, 236 }, + { 95, 100, 139, 147, 114, 239, 174, 237 }, + { 97, 101, 140, 149, 115, 240, 176, 238 }, + { 99, 101, 140, 151, 115, 241, 177, 238 }, + { 101, 102, 140, 154, 116, 242, 179, 239 }, + { 103, 102, 140, 156, 117, 242, 180, 239 }, + { 105, 103, 141, 158, 118, 243, 182, 240 }, + { 107, 103, 141, 160, 118, 243, 183, 240 }, + { 109, 104, 141, 162, 119, 244, 185, 241 }, + { 111, 104, 141, 164, 119, 244, 186, 241 }, + { 113, 104, 142, 166, 120, 245, 187, 242 }, + { 114, 104, 142, 168, 121, 245, 188, 242 }, + { 116, 105, 143, 170, 122, 246, 190, 243 }, + { 118, 105, 143, 171, 122, 246, 191, 243 }, + { 120, 106, 143, 173, 123, 247, 192, 244 }, + { 121, 106, 143, 175, 124, 247, 193, 244 }, + { 123, 107, 144, 177, 125, 248, 195, 244 }, + { 125, 107, 144, 178, 125, 248, 196, 244 }, + { 127, 108, 145, 180, 126, 249, 197, 245 }, + { 128, 108, 145, 181, 127, 249, 198, 245 }, + { 130, 109, 145, 183, 128, 249, 199, 245 }, + { 132, 109, 145, 184, 128, 249, 200, 245 }, + { 134, 110, 146, 186, 129, 250, 201, 246 }, + { 135, 110, 146, 187, 130, 250, 202, 246 }, + { 137, 111, 147, 189, 131, 251, 203, 246 }, + { 138, 111, 147, 190, 131, 251, 204, 246 }, + { 140, 112, 147, 192, 132, 251, 205, 247 }, + { 141, 112, 147, 193, 132, 251, 206, 247 }, + { 143, 113, 148, 194, 133, 251, 207, 247 }, + { 144, 113, 148, 195, 134, 251, 207, 247 }, + { 146, 114, 149, 197, 135, 252, 208, 248 }, + { 147, 114, 149, 198, 135, 252, 209, 248 }, + { 149, 115, 149, 199, 136, 252, 210, 248 }, + { 150, 115, 149, 200, 137, 252, 210, 248 }, + { 152, 115, 150, 201, 138, 252, 211, 248 }, + { 153, 115, 150, 202, 138, 252, 212, 248 }, + { 155, 116, 151, 204, 139, 253, 213, 249 }, + { 156, 116, 151, 205, 139, 253, 213, 249 }, + { 158, 117, 151, 206, 140, 253, 214, 249 }, + { 159, 117, 151, 207, 141, 253, 215, 249 }, + { 161, 118, 152, 208, 142, 253, 216, 249 }, + { 162, 118, 152, 209, 142, 253, 216, 249 }, + { 163, 119, 153, 210, 143, 253, 217, 249 }, + { 164, 119, 153, 211, 143, 253, 217, 249 }, + { 166, 120, 153, 212, 144, 254, 218, 250 }, + { 167, 120, 153, 212, 145, 254, 219, 250 }, + { 168, 121, 154, 213, 146, 254, 220, 250 }, + { 169, 121, 154, 214, 146, 254, 220, 250 }, + { 171, 122, 155, 215, 147, 254, 221, 250 }, + { 172, 122, 155, 216, 147, 254, 221, 250 }, + { 173, 123, 155, 217, 148, 254, 222, 250 }, + { 174, 123, 155, 217, 149, 254, 222, 250 }, + { 176, 124, 156, 218, 150, 254, 223, 250 }, + { 177, 124, 156, 219, 150, 254, 223, 250 }, + { 178, 125, 157, 220, 151, 254, 224, 251 }, + { 179, 125, 157, 220, 151, 254, 224, 251 }, + { 180, 126, 157, 221, 152, 254, 225, 251 }, + { 181, 126, 157, 221, 152, 254, 225, 251 }, + { 183, 127, 158, 222, 153, 254, 226, 251 }, + { 184, 127, 158, 223, 154, 254, 226, 251 }, + { 185, 128, 159, 224, 155, 255, 227, 251 }, + { 186, 128, 159, 224, 155, 255, 227, 251 }, + { 187, 129, 160, 225, 156, 255, 228, 251 }, + { 188, 130, 160, 225, 156, 255, 228, 251 }, + { 189, 131, 160, 226, 157, 255, 228, 251 }, + { 190, 131, 160, 226, 158, 255, 228, 251 }, + { 191, 132, 161, 227, 159, 255, 229, 251 }, + { 192, 132, 161, 227, 159, 255, 229, 251 }, + { 193, 133, 162, 228, 160, 255, 230, 252 }, + { 194, 133, 162, 229, 160, 255, 230, 252 }, + { 195, 134, 163, 230, 161, 255, 231, 252 }, + { 196, 134, 163, 230, 161, 255, 231, 252 }, + { 197, 135, 163, 231, 162, 255, 231, 252 }, + { 198, 135, 163, 231, 162, 255, 231, 252 }, + { 199, 136, 164, 232, 163, 255, 232, 252 }, + { 200, 136, 164, 232, 164, 255, 232, 252 }, + { 201, 137, 165, 233, 165, 255, 233, 252 }, + { 201, 137, 165, 233, 165, 255, 233, 252 }, + { 202, 138, 166, 233, 166, 255, 233, 252 }, + { 203, 138, 166, 233, 166, 255, 233, 252 }, + { 204, 139, 166, 234, 167, 255, 234, 252 }, + { 205, 139, 166, 234, 167, 255, 234, 252 }, + { 206, 140, 167, 235, 168, 255, 235, 252 }, + { 206, 140, 167, 235, 168, 255, 235, 252 }, + { 207, 141, 168, 236, 169, 255, 235, 252 }, + { 208, 141, 168, 236, 170, 255, 235, 252 }, + { 209, 142, 169, 237, 171, 255, 236, 252 }, + { 209, 143, 169, 237, 171, 255, 236, 252 }, + { 210, 144, 169, 237, 172, 255, 236, 252 }, + { 211, 144, 169, 237, 172, 255, 236, 252 }, + { 212, 145, 170, 238, 173, 255, 237, 252 }, + { 213, 145, 170, 238, 173, 255, 237, 252 }, + { 214, 146, 171, 239, 174, 255, 237, 253 }, + { 214, 146, 171, 239, 174, 255, 237, 253 }, + { 215, 147, 172, 240, 175, 255, 238, 253 }, + { 215, 147, 172, 240, 175, 255, 238, 253 }, + { 216, 148, 173, 240, 176, 255, 238, 253 }, + { 217, 148, 173, 240, 176, 255, 238, 253 }, + { 218, 149, 173, 241, 177, 255, 239, 253 }, + { 218, 149, 173, 241, 178, 255, 239, 253 }, + { 219, 150, 174, 241, 179, 255, 239, 253 }, + { 219, 151, 174, 241, 179, 255, 239, 253 }, + { 220, 152, 175, 242, 180, 255, 240, 253 }, + { 221, 152, 175, 242, 180, 255, 240, 253 }, + { 222, 153, 176, 242, 181, 255, 240, 253 }, + { 222, 153, 176, 242, 181, 255, 240, 253 }, + { 223, 154, 177, 243, 182, 255, 240, 253 }, + { 223, 154, 177, 243, 182, 255, 240, 253 }, + { 224, 155, 178, 244, 183, 255, 241, 253 }, + { 224, 155, 178, 244, 183, 255, 241, 253 }, + { 225, 156, 178, 244, 184, 255, 241, 253 }, + { 225, 157, 178, 244, 184, 255, 241, 253 }, + { 226, 158, 179, 244, 185, 255, 242, 253 }, + { 227, 158, 179, 244, 185, 255, 242, 253 }, + { 228, 159, 180, 245, 186, 255, 242, 253 }, + { 228, 159, 180, 245, 186, 255, 242, 253 }, + { 229, 160, 181, 245, 187, 255, 242, 253 }, + { 229, 160, 181, 245, 187, 255, 242, 253 }, + { 230, 161, 182, 246, 188, 255, 243, 253 }, + { 230, 162, 182, 246, 188, 255, 243, 253 }, + { 231, 163, 183, 246, 189, 255, 243, 253 }, + { 231, 163, 183, 246, 189, 255, 243, 253 }, + { 232, 164, 184, 247, 190, 255, 243, 253 }, + { 232, 164, 184, 247, 190, 255, 243, 253 }, + { 233, 165, 185, 247, 191, 255, 244, 253 }, + { 233, 165, 185, 247, 191, 255, 244, 253 }, + { 234, 166, 185, 247, 192, 255, 244, 253 }, + { 234, 167, 185, 247, 192, 255, 244, 253 }, + { 235, 168, 186, 248, 193, 255, 244, 253 }, + { 235, 168, 186, 248, 193, 255, 244, 253 }, + { 236, 169, 187, 248, 194, 255, 244, 253 }, + { 236, 169, 187, 248, 194, 255, 244, 253 }, + { 236, 170, 188, 248, 195, 255, 245, 253 }, + { 236, 170, 188, 248, 195, 255, 245, 253 }, + { 237, 171, 189, 249, 196, 255, 245, 254 }, + { 237, 172, 189, 249, 196, 255, 245, 254 }, + { 238, 173, 190, 249, 197, 255, 245, 254 }, + { 238, 173, 190, 249, 197, 255, 245, 254 }, + { 239, 174, 191, 249, 198, 255, 245, 254 }, + { 239, 174, 191, 249, 198, 255, 245, 254 }, + { 240, 175, 192, 249, 199, 255, 246, 254 }, + { 240, 176, 192, 249, 199, 255, 246, 254 }, + { 240, 177, 193, 250, 200, 255, 246, 254 }, + { 240, 177, 193, 250, 200, 255, 246, 254 }, + { 241, 178, 194, 250, 201, 255, 246, 254 }, + { 241, 178, 194, 250, 201, 255, 246, 254 }, + { 242, 179, 195, 250, 202, 255, 246, 254 }, + { 242, 180, 195, 250, 202, 255, 246, 254 }, + { 242, 181, 196, 250, 203, 255, 247, 254 }, + { 242, 181, 196, 250, 203, 255, 247, 254 }, + { 243, 182, 197, 251, 204, 255, 247, 254 }, + { 243, 183, 197, 251, 204, 255, 247, 254 }, + { 244, 184, 198, 251, 205, 255, 247, 254 }, + { 244, 184, 198, 251, 205, 255, 247, 254 }, + { 244, 185, 199, 251, 206, 255, 247, 254 }, + { 244, 185, 199, 251, 206, 255, 247, 254 }, + { 245, 186, 200, 251, 207, 255, 247, 254 }, + { 245, 187, 200, 251, 207, 255, 247, 254 }, + { 246, 188, 201, 252, 207, 255, 248, 254 }, + { 246, 188, 201, 252, 207, 255, 248, 254 }, + { 246, 189, 202, 252, 208, 255, 248, 254 }, + { 246, 190, 202, 252, 208, 255, 248, 254 }, + { 247, 191, 203, 252, 209, 255, 248, 254 }, + { 247, 191, 203, 252, 209, 255, 248, 254 }, + { 247, 192, 204, 252, 210, 255, 248, 254 }, + { 247, 193, 204, 252, 210, 255, 248, 254 }, + { 248, 194, 205, 252, 211, 255, 248, 254 }, + { 248, 194, 205, 252, 211, 255, 248, 254 }, + { 248, 195, 206, 252, 212, 255, 249, 254 }, + { 248, 196, 206, 252, 212, 255, 249, 254 }, + { 249, 197, 207, 253, 213, 255, 249, 254 }, + { 249, 197, 207, 253, 213, 255, 249, 254 }, + { 249, 198, 208, 253, 214, 255, 249, 254 }, + { 249, 199, 209, 253, 214, 255, 249, 254 }, + { 250, 200, 210, 253, 215, 255, 249, 254 }, + { 250, 200, 210, 253, 215, 255, 249, 254 }, + { 250, 201, 211, 253, 215, 255, 249, 254 }, + { 250, 202, 211, 253, 215, 255, 249, 254 }, + { 250, 203, 212, 253, 216, 255, 249, 254 }, + { 250, 203, 212, 253, 216, 255, 249, 254 }, + { 251, 204, 213, 253, 217, 255, 250, 254 }, + { 251, 205, 213, 253, 217, 255, 250, 254 }, + { 251, 206, 214, 254, 218, 255, 250, 254 }, + { 251, 206, 215, 254, 218, 255, 250, 254 }, + { 252, 207, 216, 254, 219, 255, 250, 254 }, + { 252, 208, 216, 254, 219, 255, 250, 254 }, + { 252, 209, 217, 254, 220, 255, 250, 254 }, + { 252, 210, 217, 254, 220, 255, 250, 254 }, + { 252, 211, 218, 254, 221, 255, 250, 254 }, + { 252, 212, 218, 254, 221, 255, 250, 254 }, + { 253, 213, 219, 254, 222, 255, 250, 254 }, + { 253, 213, 220, 254, 222, 255, 250, 254 }, + { 253, 214, 221, 254, 223, 255, 250, 254 }, + { 253, 215, 221, 254, 223, 255, 250, 254 }, + { 253, 216, 222, 254, 224, 255, 251, 254 }, + { 253, 217, 223, 254, 224, 255, 251, 254 }, + { 253, 218, 224, 254, 225, 255, 251, 254 }, + { 253, 219, 224, 254, 225, 255, 251, 254 }, + { 254, 220, 225, 254, 225, 255, 251, 254 }, + { 254, 221, 226, 254, 225, 255, 251, 254 }, + { 254, 222, 227, 255, 226, 255, 251, 254 }, + { 254, 223, 227, 255, 226, 255, 251, 254 }, + { 254, 224, 228, 255, 227, 255, 251, 254 }, + { 254, 225, 229, 255, 227, 255, 251, 254 }, + { 254, 226, 230, 255, 228, 255, 251, 254 }, + { 254, 227, 230, 255, 229, 255, 251, 254 }, + { 255, 228, 231, 255, 230, 255, 251, 254 }, + { 255, 229, 232, 255, 230, 255, 251, 254 }, + { 255, 230, 233, 255, 231, 255, 252, 254 }, + { 255, 231, 234, 255, 231, 255, 252, 254 }, + { 255, 232, 235, 255, 232, 255, 252, 254 }, + { 255, 233, 236, 255, 232, 255, 252, 254 }, + { 255, 235, 237, 255, 233, 255, 252, 254 }, + { 255, 236, 238, 255, 234, 255, 252, 254 }, + { 255, 238, 240, 255, 235, 255, 252, 255 }, + { 255, 239, 241, 255, 235, 255, 252, 254 }, + { 255, 241, 243, 255, 236, 255, 252, 254 }, + { 255, 243, 245, 255, 237, 255, 252, 254 }, + { 255, 246, 247, 255, 239, 255, 253, 255 }, }; #if CONFIG_ANS @@ -446,266 +413,266 @@ const vpx_prob vp10_pareto8_full[COEFF_PROB_MODELS][MODEL_NODES] = { // beta = 8 // Values for tokens ONE_TOKEN through CATEGORY6_TOKEN included here. // ZERO_TOKEN and EOB_TOKEN are coded as flags outside this coder. -const AnsP10 vp10_pareto8_token_probs[COEFF_PROB_MODELS] - [ENTROPY_TOKENS - 2] = { -{ 4, 4, 4, 4, 8, 15, 30, 57, 103, 795 }, -{ 8, 8, 8, 8, 15, 30, 57, 103, 168, 619 }, -{ 12, 12, 12, 12, 23, 43, 80, 138, 205, 487 }, -{ 16, 16, 15, 15, 30, 56, 101, 165, 225, 385 }, -{ 20, 20, 19, 19, 36, 68, 119, 186, 231, 306 }, -{ 24, 23, 23, 22, 43, 79, 135, 201, 230, 244 }, -{ 28, 27, 26, 26, 49, 89, 149, 211, 223, 196 }, -{ 32, 31, 30, 29, 55, 98, 160, 218, 212, 159 }, -{ 36, 35, 33, 32, 60, 107, 171, 221, 200, 129 }, -{ 40, 38, 37, 35, 66, 115, 179, 222, 187, 105 }, -{ 44, 42, 40, 38, 71, 122, 186, 221, 174, 86 }, -{ 48, 45, 43, 41, 76, 129, 192, 219, 160, 71 }, -{ 52, 49, 46, 44, 80, 136, 196, 215, 148, 58 }, -{ 56, 53, 49, 46, 85, 142, 200, 210, 135, 48 }, -{ 60, 56, 52, 49, 89, 147, 203, 204, 124, 40 }, -{ 64, 60, 55, 52, 93, 151, 205, 198, 113, 33 }, -{ 68, 63, 58, 54, 97, 156, 205, 192, 103, 28 }, -{ 72, 66, 61, 57, 100, 160, 206, 185, 94, 23 }, -{ 76, 70, 64, 59, 104, 163, 205, 178, 85, 20 }, -{ 80, 73, 67, 61, 107, 166, 205, 171, 77, 17 }, -{ 84, 76, 69, 63, 110, 169, 204, 164, 71, 14 }, -{ 88, 80, 72, 65, 113, 171, 202, 157, 64, 12 }, -{ 92, 83, 75, 67, 116, 173, 200, 150, 58, 10 }, -{ 96, 86, 77, 69, 118, 175, 198, 143, 53, 9 }, -{ 100, 89, 80, 71, 121, 176, 195, 137, 48, 7 }, -{ 104, 92, 82, 73, 123, 178, 192, 130, 44, 6 }, -{ 108, 96, 84, 75, 125, 178, 189, 124, 40, 5 }, -{ 112, 98, 87, 76, 127, 179, 186, 118, 36, 5 }, -{ 116, 101, 89, 78, 129, 179, 183, 112, 33, 4 }, -{ 120, 104, 91, 80, 131, 180, 179, 106, 30, 3 }, -{ 124, 107, 93, 81, 132, 180, 176, 101, 27, 3 }, -{ 128, 110, 95, 82, 134, 179, 172, 96, 25, 3 }, -{ 132, 113, 97, 84, 135, 179, 168, 91, 23, 2 }, -{ 136, 116, 99, 85, 136, 179, 164, 86, 21, 2 }, -{ 140, 119, 101, 86, 137, 178, 160, 82, 19, 2 }, -{ 144, 122, 103, 88, 138, 177, 157, 77, 17, 1 }, -{ 148, 124, 105, 89, 139, 176, 153, 73, 16, 1 }, -{ 152, 127, 107, 90, 140, 175, 149, 69, 14, 1 }, -{ 156, 130, 108, 91, 141, 173, 145, 66, 13, 1 }, -{ 160, 133, 110, 92, 141, 172, 141, 62, 12, 1 }, -{ 164, 135, 111, 93, 142, 171, 137, 59, 11, 1 }, -{ 168, 138, 113, 94, 142, 169, 133, 56, 10, 1 }, -{ 172, 140, 115, 94, 142, 168, 130, 53, 9, 1 }, -{ 176, 143, 116, 95, 143, 166, 126, 50, 8, 1 }, -{ 180, 145, 118, 96, 143, 164, 122, 47, 8, 1 }, -{ 184, 147, 119, 96, 143, 163, 119, 45, 7, 1 }, -{ 188, 150, 120, 97, 143, 161, 116, 42, 6, 1 }, -{ 192, 152, 121, 98, 143, 159, 112, 40, 6, 1 }, -{ 196, 155, 123, 98, 142, 157, 109, 38, 5, 1 }, -{ 200, 157, 124, 99, 142, 155, 105, 36, 5, 1 }, -{ 204, 159, 125, 99, 142, 153, 102, 34, 5, 1 }, -{ 208, 161, 126, 100, 142, 151, 99, 32, 4, 1 }, -{ 212, 164, 127, 100, 141, 149, 96, 30, 4, 1 }, -{ 216, 166, 129, 100, 141, 147, 93, 28, 3, 1 }, -{ 220, 168, 130, 101, 140, 144, 90, 27, 3, 1 }, -{ 224, 170, 131, 101, 140, 142, 87, 25, 3, 1 }, -{ 228, 172, 132, 101, 139, 140, 84, 24, 3, 1 }, -{ 232, 174, 132, 101, 139, 138, 81, 23, 3, 1 }, -{ 236, 176, 133, 101, 138, 136, 79, 22, 2, 1 }, -{ 240, 178, 134, 102, 137, 134, 76, 20, 2, 1 }, -{ 244, 180, 135, 102, 136, 131, 74, 19, 2, 1 }, -{ 248, 182, 135, 102, 136, 129, 71, 18, 2, 1 }, -{ 252, 184, 136, 101, 135, 127, 69, 17, 2, 1 }, -{ 256, 186, 137, 102, 134, 124, 66, 16, 2, 1 }, -{ 260, 188, 138, 102, 133, 122, 64, 15, 1, 1 }, -{ 264, 190, 138, 101, 132, 120, 62, 15, 1, 1 }, -{ 268, 191, 139, 101, 131, 118, 60, 14, 1, 1 }, -{ 272, 193, 139, 101, 130, 116, 58, 13, 1, 1 }, -{ 276, 195, 139, 101, 129, 114, 56, 12, 1, 1 }, -{ 280, 196, 140, 101, 128, 111, 54, 12, 1, 1 }, -{ 284, 198, 140, 101, 127, 109, 52, 11, 1, 1 }, -{ 288, 200, 141, 100, 126, 107, 50, 10, 1, 1 }, -{ 292, 201, 141, 100, 125, 105, 48, 10, 1, 1 }, -{ 296, 203, 141, 100, 123, 103, 47, 9, 1, 1 }, -{ 300, 204, 142, 99, 122, 101, 45, 9, 1, 1 }, -{ 304, 206, 142, 99, 121, 99, 43, 8, 1, 1 }, -{ 308, 207, 142, 99, 119, 97, 42, 8, 1, 1 }, -{ 312, 209, 142, 99, 118, 95, 40, 7, 1, 1 }, -{ 316, 210, 142, 98, 117, 93, 39, 7, 1, 1 }, -{ 320, 211, 142, 98, 116, 91, 37, 7, 1, 1 }, -{ 324, 213, 142, 97, 115, 89, 36, 6, 1, 1 }, -{ 328, 214, 142, 97, 113, 87, 35, 6, 1, 1 }, -{ 332, 215, 143, 96, 112, 85, 33, 6, 1, 1 }, -{ 336, 216, 143, 96, 111, 83, 32, 5, 1, 1 }, -{ 340, 218, 143, 95, 109, 81, 31, 5, 1, 1 }, -{ 344, 219, 142, 95, 108, 79, 30, 5, 1, 1 }, -{ 348, 220, 142, 94, 107, 78, 29, 4, 1, 1 }, -{ 352, 221, 142, 94, 105, 76, 28, 4, 1, 1 }, -{ 356, 222, 142, 93, 104, 74, 27, 4, 1, 1 }, -{ 360, 223, 142, 92, 103, 72, 26, 4, 1, 1 }, -{ 364, 224, 142, 92, 101, 70, 25, 4, 1, 1 }, -{ 368, 225, 142, 91, 100, 69, 24, 3, 1, 1 }, -{ 372, 226, 141, 91, 99, 67, 23, 3, 1, 1 }, -{ 376, 227, 141, 90, 97, 66, 22, 3, 1, 1 }, -{ 380, 228, 141, 89, 96, 64, 21, 3, 1, 1 }, -{ 384, 229, 140, 89, 95, 62, 20, 3, 1, 1 }, -{ 388, 229, 140, 88, 93, 61, 20, 3, 1, 1 }, -{ 392, 230, 140, 87, 92, 60, 19, 2, 1, 1 }, -{ 396, 231, 140, 86, 91, 58, 18, 2, 1, 1 }, -{ 400, 232, 139, 86, 89, 57, 17, 2, 1, 1 }, -{ 404, 232, 139, 85, 88, 55, 17, 2, 1, 1 }, -{ 408, 233, 138, 84, 87, 54, 16, 2, 1, 1 }, -{ 412, 234, 138, 84, 85, 52, 15, 2, 1, 1 }, -{ 416, 234, 137, 83, 84, 51, 15, 2, 1, 1 }, -{ 420, 235, 137, 82, 82, 50, 14, 2, 1, 1 }, -{ 424, 236, 136, 81, 81, 48, 14, 2, 1, 1 }, -{ 428, 236, 136, 81, 80, 47, 13, 1, 1, 1 }, -{ 432, 236, 135, 80, 79, 46, 13, 1, 1, 1 }, -{ 436, 237, 135, 79, 77, 45, 12, 1, 1, 1 }, -{ 440, 238, 134, 78, 76, 43, 12, 1, 1, 1 }, -{ 444, 238, 134, 77, 75, 42, 11, 1, 1, 1 }, -{ 448, 238, 133, 77, 73, 41, 11, 1, 1, 1 }, -{ 452, 239, 132, 76, 72, 40, 10, 1, 1, 1 }, -{ 456, 239, 131, 75, 71, 39, 10, 1, 1, 1 }, -{ 460, 239, 131, 74, 70, 38, 9, 1, 1, 1 }, -{ 464, 240, 130, 73, 68, 37, 9, 1, 1, 1 }, -{ 468, 240, 129, 72, 67, 36, 9, 1, 1, 1 }, -{ 472, 240, 128, 72, 66, 35, 8, 1, 1, 1 }, -{ 476, 240, 127, 71, 65, 34, 8, 1, 1, 1 }, -{ 480, 240, 127, 70, 63, 33, 8, 1, 1, 1 }, -{ 484, 241, 126, 69, 62, 32, 7, 1, 1, 1 }, -{ 488, 241, 125, 68, 61, 31, 7, 1, 1, 1 }, -{ 492, 241, 124, 67, 60, 30, 7, 1, 1, 1 }, -{ 496, 241, 124, 66, 59, 29, 6, 1, 1, 1 }, -{ 500, 240, 123, 66, 58, 28, 6, 1, 1, 1 }, -{ 504, 240, 122, 65, 57, 27, 6, 1, 1, 1 }, -{ 508, 240, 121, 64, 55, 27, 6, 1, 1, 1 }, -{ 512, 241, 120, 63, 54, 26, 5, 1, 1, 1 }, -{ 516, 241, 119, 62, 53, 25, 5, 1, 1, 1 }, -{ 520, 240, 118, 62, 52, 24, 5, 1, 1, 1 }, -{ 524, 240, 117, 60, 51, 24, 5, 1, 1, 1 }, -{ 528, 239, 116, 60, 50, 23, 5, 1, 1, 1 }, -{ 532, 239, 116, 59, 49, 22, 4, 1, 1, 1 }, -{ 536, 239, 115, 58, 48, 21, 4, 1, 1, 1 }, -{ 540, 239, 113, 57, 47, 21, 4, 1, 1, 1 }, -{ 544, 238, 113, 56, 46, 20, 4, 1, 1, 1 }, -{ 548, 238, 112, 55, 45, 19, 4, 1, 1, 1 }, -{ 552, 238, 110, 55, 44, 19, 3, 1, 1, 1 }, -{ 556, 237, 110, 54, 43, 18, 3, 1, 1, 1 }, -{ 560, 237, 108, 53, 42, 18, 3, 1, 1, 1 }, -{ 564, 236, 108, 52, 41, 17, 3, 1, 1, 1 }, -{ 568, 236, 106, 51, 40, 17, 3, 1, 1, 1 }, -{ 572, 235, 105, 51, 39, 16, 3, 1, 1, 1 }, -{ 576, 235, 104, 50, 38, 15, 3, 1, 1, 1 }, -{ 580, 234, 103, 49, 37, 15, 3, 1, 1, 1 }, -{ 584, 234, 102, 48, 37, 14, 2, 1, 1, 1 }, -{ 588, 233, 101, 47, 36, 14, 2, 1, 1, 1 }, -{ 592, 233, 100, 46, 35, 13, 2, 1, 1, 1 }, -{ 596, 231, 99, 46, 34, 13, 2, 1, 1, 1 }, -{ 600, 230, 98, 45, 33, 13, 2, 1, 1, 1 }, -{ 604, 230, 97, 44, 32, 12, 2, 1, 1, 1 }, -{ 608, 229, 96, 43, 31, 12, 2, 1, 1, 1 }, -{ 612, 228, 95, 42, 31, 11, 2, 1, 1, 1 }, -{ 616, 227, 93, 42, 30, 11, 2, 1, 1, 1 }, -{ 620, 227, 92, 41, 29, 10, 2, 1, 1, 1 }, -{ 624, 226, 92, 40, 28, 10, 1, 1, 1, 1 }, -{ 628, 225, 90, 39, 28, 10, 1, 1, 1, 1 }, -{ 632, 224, 89, 39, 27, 9, 1, 1, 1, 1 }, -{ 636, 223, 88, 38, 26, 9, 1, 1, 1, 1 }, -{ 640, 222, 87, 37, 25, 9, 1, 1, 1, 1 }, -{ 644, 221, 86, 36, 25, 8, 1, 1, 1, 1 }, -{ 648, 220, 84, 36, 24, 8, 1, 1, 1, 1 }, -{ 652, 219, 83, 35, 23, 8, 1, 1, 1, 1 }, -{ 656, 218, 82, 34, 23, 7, 1, 1, 1, 1 }, -{ 660, 217, 81, 33, 22, 7, 1, 1, 1, 1 }, -{ 664, 215, 80, 33, 21, 7, 1, 1, 1, 1 }, -{ 668, 214, 78, 32, 21, 7, 1, 1, 1, 1 }, -{ 672, 213, 78, 31, 20, 6, 1, 1, 1, 1 }, -{ 676, 211, 76, 31, 20, 6, 1, 1, 1, 1 }, -{ 680, 210, 75, 30, 19, 6, 1, 1, 1, 1 }, -{ 684, 209, 74, 29, 18, 6, 1, 1, 1, 1 }, -{ 688, 208, 73, 28, 18, 5, 1, 1, 1, 1 }, -{ 692, 206, 72, 28, 17, 5, 1, 1, 1, 1 }, -{ 696, 205, 70, 27, 17, 5, 1, 1, 1, 1 }, -{ 700, 203, 69, 27, 16, 5, 1, 1, 1, 1 }, -{ 704, 201, 68, 26, 16, 5, 1, 1, 1, 1 }, -{ 708, 201, 67, 25, 15, 4, 1, 1, 1, 1 }, -{ 712, 198, 66, 25, 15, 4, 1, 1, 1, 1 }, -{ 716, 197, 65, 24, 14, 4, 1, 1, 1, 1 }, -{ 720, 196, 63, 23, 14, 4, 1, 1, 1, 1 }, -{ 724, 194, 62, 23, 13, 4, 1, 1, 1, 1 }, -{ 728, 193, 61, 22, 13, 3, 1, 1, 1, 1 }, -{ 732, 191, 60, 22, 12, 3, 1, 1, 1, 1 }, -{ 736, 189, 59, 21, 12, 3, 1, 1, 1, 1 }, -{ 740, 188, 58, 20, 11, 3, 1, 1, 1, 1 }, -{ 744, 186, 56, 20, 11, 3, 1, 1, 1, 1 }, -{ 748, 184, 55, 19, 11, 3, 1, 1, 1, 1 }, -{ 752, 182, 54, 19, 10, 3, 1, 1, 1, 1 }, -{ 756, 181, 53, 18, 10, 2, 1, 1, 1, 1 }, -{ 760, 179, 52, 18, 9, 2, 1, 1, 1, 1 }, -{ 764, 177, 51, 17, 9, 2, 1, 1, 1, 1 }, -{ 768, 174, 50, 17, 9, 2, 1, 1, 1, 1 }, -{ 772, 173, 49, 16, 8, 2, 1, 1, 1, 1 }, -{ 776, 171, 47, 16, 8, 2, 1, 1, 1, 1 }, -{ 780, 169, 46, 15, 8, 2, 1, 1, 1, 1 }, -{ 784, 167, 45, 15, 7, 2, 1, 1, 1, 1 }, -{ 788, 165, 44, 14, 7, 2, 1, 1, 1, 1 }, -{ 792, 162, 43, 14, 7, 2, 1, 1, 1, 1 }, -{ 796, 161, 42, 13, 7, 1, 1, 1, 1, 1 }, -{ 800, 159, 41, 13, 6, 1, 1, 1, 1, 1 }, -{ 804, 157, 40, 12, 6, 1, 1, 1, 1, 1 }, -{ 808, 154, 39, 12, 6, 1, 1, 1, 1, 1 }, -{ 812, 153, 38, 11, 5, 1, 1, 1, 1, 1 }, -{ 816, 150, 37, 11, 5, 1, 1, 1, 1, 1 }, -{ 820, 148, 36, 10, 5, 1, 1, 1, 1, 1 }, -{ 824, 145, 35, 10, 5, 1, 1, 1, 1, 1 }, -{ 828, 143, 34, 10, 4, 1, 1, 1, 1, 1 }, -{ 832, 141, 33, 9, 4, 1, 1, 1, 1, 1 }, -{ 836, 138, 32, 9, 4, 1, 1, 1, 1, 1 }, -{ 840, 136, 30, 9, 4, 1, 1, 1, 1, 1 }, -{ 844, 133, 30, 8, 4, 1, 1, 1, 1, 1 }, -{ 848, 131, 29, 8, 3, 1, 1, 1, 1, 1 }, -{ 852, 129, 28, 7, 3, 1, 1, 1, 1, 1 }, -{ 856, 126, 27, 7, 3, 1, 1, 1, 1, 1 }, -{ 860, 123, 26, 7, 3, 1, 1, 1, 1, 1 }, -{ 864, 121, 25, 6, 3, 1, 1, 1, 1, 1 }, -{ 868, 118, 24, 6, 3, 1, 1, 1, 1, 1 }, -{ 872, 116, 23, 6, 2, 1, 1, 1, 1, 1 }, -{ 876, 113, 22, 6, 2, 1, 1, 1, 1, 1 }, -{ 880, 111, 21, 5, 2, 1, 1, 1, 1, 1 }, -{ 884, 108, 20, 5, 2, 1, 1, 1, 1, 1 }, -{ 888, 105, 19, 5, 2, 1, 1, 1, 1, 1 }, -{ 892, 102, 19, 4, 2, 1, 1, 1, 1, 1 }, -{ 896, 99, 18, 4, 2, 1, 1, 1, 1, 1 }, -{ 900, 97, 17, 4, 1, 1, 1, 1, 1, 1 }, -{ 904, 94, 16, 4, 1, 1, 1, 1, 1, 1 }, -{ 908, 92, 15, 3, 1, 1, 1, 1, 1, 1 }, -{ 912, 89, 14, 3, 1, 1, 1, 1, 1, 1 }, -{ 916, 85, 14, 3, 1, 1, 1, 1, 1, 1 }, -{ 920, 82, 13, 3, 1, 1, 1, 1, 1, 1 }, -{ 924, 79, 12, 3, 1, 1, 1, 1, 1, 1 }, -{ 928, 77, 11, 2, 1, 1, 1, 1, 1, 1 }, -{ 932, 73, 11, 2, 1, 1, 1, 1, 1, 1 }, -{ 936, 70, 10, 2, 1, 1, 1, 1, 1, 1 }, -{ 940, 67, 9, 2, 1, 1, 1, 1, 1, 1 }, -{ 944, 64, 8, 2, 1, 1, 1, 1, 1, 1 }, -{ 948, 60, 8, 2, 1, 1, 1, 1, 1, 1 }, -{ 952, 58, 7, 1, 1, 1, 1, 1, 1, 1 }, -{ 956, 54, 7, 1, 1, 1, 1, 1, 1, 1 }, -{ 960, 51, 6, 1, 1, 1, 1, 1, 1, 1 }, -{ 964, 48, 5, 1, 1, 1, 1, 1, 1, 1 }, -{ 968, 44, 5, 1, 1, 1, 1, 1, 1, 1 }, -{ 972, 41, 4, 1, 1, 1, 1, 1, 1, 1 }, -{ 976, 37, 4, 1, 1, 1, 1, 1, 1, 1 }, -{ 980, 34, 3, 1, 1, 1, 1, 1, 1, 1 }, -{ 984, 30, 3, 1, 1, 1, 1, 1, 1, 1 }, -{ 988, 27, 2, 1, 1, 1, 1, 1, 1, 1 }, -{ 992, 23, 2, 1, 1, 1, 1, 1, 1, 1 }, -{ 996, 19, 2, 1, 1, 1, 1, 1, 1, 1 }, -{ 1000, 16, 1, 1, 1, 1, 1, 1, 1, 1 }, -{ 1004, 12, 1, 1, 1, 1, 1, 1, 1, 1 }, -{ 1008, 8, 1, 1, 1, 1, 1, 1, 1, 1 }, -{ 1012, 4, 1, 1, 1, 1, 1, 1, 1, 1 }, -{ 1015, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, -{ 1015, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, +const AnsP10 vp10_pareto8_token_probs[COEFF_PROB_MODELS][ENTROPY_TOKENS - 2] = { + { 4, 4, 4, 4, 8, 15, 30, 57, 103, 795 }, + { 8, 8, 8, 8, 15, 30, 57, 103, 168, 619 }, + { 12, 12, 12, 12, 23, 43, 80, 138, 205, 487 }, + { 16, 16, 15, 15, 30, 56, 101, 165, 225, 385 }, + { 20, 20, 19, 19, 36, 68, 119, 186, 231, 306 }, + { 24, 23, 23, 22, 43, 79, 135, 201, 230, 244 }, + { 28, 27, 26, 26, 49, 89, 149, 211, 223, 196 }, + { 32, 31, 30, 29, 55, 98, 160, 218, 212, 159 }, + { 36, 35, 33, 32, 60, 107, 171, 221, 200, 129 }, + { 40, 38, 37, 35, 66, 115, 179, 222, 187, 105 }, + { 44, 42, 40, 38, 71, 122, 186, 221, 174, 86 }, + { 48, 45, 43, 41, 76, 129, 192, 219, 160, 71 }, + { 52, 49, 46, 44, 80, 136, 196, 215, 148, 58 }, + { 56, 53, 49, 46, 85, 142, 200, 210, 135, 48 }, + { 60, 56, 52, 49, 89, 147, 203, 204, 124, 40 }, + { 64, 60, 55, 52, 93, 151, 205, 198, 113, 33 }, + { 68, 63, 58, 54, 97, 156, 205, 192, 103, 28 }, + { 72, 66, 61, 57, 100, 160, 206, 185, 94, 23 }, + { 76, 70, 64, 59, 104, 163, 205, 178, 85, 20 }, + { 80, 73, 67, 61, 107, 166, 205, 171, 77, 17 }, + { 84, 76, 69, 63, 110, 169, 204, 164, 71, 14 }, + { 88, 80, 72, 65, 113, 171, 202, 157, 64, 12 }, + { 92, 83, 75, 67, 116, 173, 200, 150, 58, 10 }, + { 96, 86, 77, 69, 118, 175, 198, 143, 53, 9 }, + { 100, 89, 80, 71, 121, 176, 195, 137, 48, 7 }, + { 104, 92, 82, 73, 123, 178, 192, 130, 44, 6 }, + { 108, 96, 84, 75, 125, 178, 189, 124, 40, 5 }, + { 112, 98, 87, 76, 127, 179, 186, 118, 36, 5 }, + { 116, 101, 89, 78, 129, 179, 183, 112, 33, 4 }, + { 120, 104, 91, 80, 131, 180, 179, 106, 30, 3 }, + { 124, 107, 93, 81, 132, 180, 176, 101, 27, 3 }, + { 128, 110, 95, 82, 134, 179, 172, 96, 25, 3 }, + { 132, 113, 97, 84, 135, 179, 168, 91, 23, 2 }, + { 136, 116, 99, 85, 136, 179, 164, 86, 21, 2 }, + { 140, 119, 101, 86, 137, 178, 160, 82, 19, 2 }, + { 144, 122, 103, 88, 138, 177, 157, 77, 17, 1 }, + { 148, 124, 105, 89, 139, 176, 153, 73, 16, 1 }, + { 152, 127, 107, 90, 140, 175, 149, 69, 14, 1 }, + { 156, 130, 108, 91, 141, 173, 145, 66, 13, 1 }, + { 160, 133, 110, 92, 141, 172, 141, 62, 12, 1 }, + { 164, 135, 111, 93, 142, 171, 137, 59, 11, 1 }, + { 168, 138, 113, 94, 142, 169, 133, 56, 10, 1 }, + { 172, 140, 115, 94, 142, 168, 130, 53, 9, 1 }, + { 176, 143, 116, 95, 143, 166, 126, 50, 8, 1 }, + { 180, 145, 118, 96, 143, 164, 122, 47, 8, 1 }, + { 184, 147, 119, 96, 143, 163, 119, 45, 7, 1 }, + { 188, 150, 120, 97, 143, 161, 116, 42, 6, 1 }, + { 192, 152, 121, 98, 143, 159, 112, 40, 6, 1 }, + { 196, 155, 123, 98, 142, 157, 109, 38, 5, 1 }, + { 200, 157, 124, 99, 142, 155, 105, 36, 5, 1 }, + { 204, 159, 125, 99, 142, 153, 102, 34, 5, 1 }, + { 208, 161, 126, 100, 142, 151, 99, 32, 4, 1 }, + { 212, 164, 127, 100, 141, 149, 96, 30, 4, 1 }, + { 216, 166, 129, 100, 141, 147, 93, 28, 3, 1 }, + { 220, 168, 130, 101, 140, 144, 90, 27, 3, 1 }, + { 224, 170, 131, 101, 140, 142, 87, 25, 3, 1 }, + { 228, 172, 132, 101, 139, 140, 84, 24, 3, 1 }, + { 232, 174, 132, 101, 139, 138, 81, 23, 3, 1 }, + { 236, 176, 133, 101, 138, 136, 79, 22, 2, 1 }, + { 240, 178, 134, 102, 137, 134, 76, 20, 2, 1 }, + { 244, 180, 135, 102, 136, 131, 74, 19, 2, 1 }, + { 248, 182, 135, 102, 136, 129, 71, 18, 2, 1 }, + { 252, 184, 136, 101, 135, 127, 69, 17, 2, 1 }, + { 256, 186, 137, 102, 134, 124, 66, 16, 2, 1 }, + { 260, 188, 138, 102, 133, 122, 64, 15, 1, 1 }, + { 264, 190, 138, 101, 132, 120, 62, 15, 1, 1 }, + { 268, 191, 139, 101, 131, 118, 60, 14, 1, 1 }, + { 272, 193, 139, 101, 130, 116, 58, 13, 1, 1 }, + { 276, 195, 139, 101, 129, 114, 56, 12, 1, 1 }, + { 280, 196, 140, 101, 128, 111, 54, 12, 1, 1 }, + { 284, 198, 140, 101, 127, 109, 52, 11, 1, 1 }, + { 288, 200, 141, 100, 126, 107, 50, 10, 1, 1 }, + { 292, 201, 141, 100, 125, 105, 48, 10, 1, 1 }, + { 296, 203, 141, 100, 123, 103, 47, 9, 1, 1 }, + { 300, 204, 142, 99, 122, 101, 45, 9, 1, 1 }, + { 304, 206, 142, 99, 121, 99, 43, 8, 1, 1 }, + { 308, 207, 142, 99, 119, 97, 42, 8, 1, 1 }, + { 312, 209, 142, 99, 118, 95, 40, 7, 1, 1 }, + { 316, 210, 142, 98, 117, 93, 39, 7, 1, 1 }, + { 320, 211, 142, 98, 116, 91, 37, 7, 1, 1 }, + { 324, 213, 142, 97, 115, 89, 36, 6, 1, 1 }, + { 328, 214, 142, 97, 113, 87, 35, 6, 1, 1 }, + { 332, 215, 143, 96, 112, 85, 33, 6, 1, 1 }, + { 336, 216, 143, 96, 111, 83, 32, 5, 1, 1 }, + { 340, 218, 143, 95, 109, 81, 31, 5, 1, 1 }, + { 344, 219, 142, 95, 108, 79, 30, 5, 1, 1 }, + { 348, 220, 142, 94, 107, 78, 29, 4, 1, 1 }, + { 352, 221, 142, 94, 105, 76, 28, 4, 1, 1 }, + { 356, 222, 142, 93, 104, 74, 27, 4, 1, 1 }, + { 360, 223, 142, 92, 103, 72, 26, 4, 1, 1 }, + { 364, 224, 142, 92, 101, 70, 25, 4, 1, 1 }, + { 368, 225, 142, 91, 100, 69, 24, 3, 1, 1 }, + { 372, 226, 141, 91, 99, 67, 23, 3, 1, 1 }, + { 376, 227, 141, 90, 97, 66, 22, 3, 1, 1 }, + { 380, 228, 141, 89, 96, 64, 21, 3, 1, 1 }, + { 384, 229, 140, 89, 95, 62, 20, 3, 1, 1 }, + { 388, 229, 140, 88, 93, 61, 20, 3, 1, 1 }, + { 392, 230, 140, 87, 92, 60, 19, 2, 1, 1 }, + { 396, 231, 140, 86, 91, 58, 18, 2, 1, 1 }, + { 400, 232, 139, 86, 89, 57, 17, 2, 1, 1 }, + { 404, 232, 139, 85, 88, 55, 17, 2, 1, 1 }, + { 408, 233, 138, 84, 87, 54, 16, 2, 1, 1 }, + { 412, 234, 138, 84, 85, 52, 15, 2, 1, 1 }, + { 416, 234, 137, 83, 84, 51, 15, 2, 1, 1 }, + { 420, 235, 137, 82, 82, 50, 14, 2, 1, 1 }, + { 424, 236, 136, 81, 81, 48, 14, 2, 1, 1 }, + { 428, 236, 136, 81, 80, 47, 13, 1, 1, 1 }, + { 432, 236, 135, 80, 79, 46, 13, 1, 1, 1 }, + { 436, 237, 135, 79, 77, 45, 12, 1, 1, 1 }, + { 440, 238, 134, 78, 76, 43, 12, 1, 1, 1 }, + { 444, 238, 134, 77, 75, 42, 11, 1, 1, 1 }, + { 448, 238, 133, 77, 73, 41, 11, 1, 1, 1 }, + { 452, 239, 132, 76, 72, 40, 10, 1, 1, 1 }, + { 456, 239, 131, 75, 71, 39, 10, 1, 1, 1 }, + { 460, 239, 131, 74, 70, 38, 9, 1, 1, 1 }, + { 464, 240, 130, 73, 68, 37, 9, 1, 1, 1 }, + { 468, 240, 129, 72, 67, 36, 9, 1, 1, 1 }, + { 472, 240, 128, 72, 66, 35, 8, 1, 1, 1 }, + { 476, 240, 127, 71, 65, 34, 8, 1, 1, 1 }, + { 480, 240, 127, 70, 63, 33, 8, 1, 1, 1 }, + { 484, 241, 126, 69, 62, 32, 7, 1, 1, 1 }, + { 488, 241, 125, 68, 61, 31, 7, 1, 1, 1 }, + { 492, 241, 124, 67, 60, 30, 7, 1, 1, 1 }, + { 496, 241, 124, 66, 59, 29, 6, 1, 1, 1 }, + { 500, 240, 123, 66, 58, 28, 6, 1, 1, 1 }, + { 504, 240, 122, 65, 57, 27, 6, 1, 1, 1 }, + { 508, 240, 121, 64, 55, 27, 6, 1, 1, 1 }, + { 512, 241, 120, 63, 54, 26, 5, 1, 1, 1 }, + { 516, 241, 119, 62, 53, 25, 5, 1, 1, 1 }, + { 520, 240, 118, 62, 52, 24, 5, 1, 1, 1 }, + { 524, 240, 117, 60, 51, 24, 5, 1, 1, 1 }, + { 528, 239, 116, 60, 50, 23, 5, 1, 1, 1 }, + { 532, 239, 116, 59, 49, 22, 4, 1, 1, 1 }, + { 536, 239, 115, 58, 48, 21, 4, 1, 1, 1 }, + { 540, 239, 113, 57, 47, 21, 4, 1, 1, 1 }, + { 544, 238, 113, 56, 46, 20, 4, 1, 1, 1 }, + { 548, 238, 112, 55, 45, 19, 4, 1, 1, 1 }, + { 552, 238, 110, 55, 44, 19, 3, 1, 1, 1 }, + { 556, 237, 110, 54, 43, 18, 3, 1, 1, 1 }, + { 560, 237, 108, 53, 42, 18, 3, 1, 1, 1 }, + { 564, 236, 108, 52, 41, 17, 3, 1, 1, 1 }, + { 568, 236, 106, 51, 40, 17, 3, 1, 1, 1 }, + { 572, 235, 105, 51, 39, 16, 3, 1, 1, 1 }, + { 576, 235, 104, 50, 38, 15, 3, 1, 1, 1 }, + { 580, 234, 103, 49, 37, 15, 3, 1, 1, 1 }, + { 584, 234, 102, 48, 37, 14, 2, 1, 1, 1 }, + { 588, 233, 101, 47, 36, 14, 2, 1, 1, 1 }, + { 592, 233, 100, 46, 35, 13, 2, 1, 1, 1 }, + { 596, 231, 99, 46, 34, 13, 2, 1, 1, 1 }, + { 600, 230, 98, 45, 33, 13, 2, 1, 1, 1 }, + { 604, 230, 97, 44, 32, 12, 2, 1, 1, 1 }, + { 608, 229, 96, 43, 31, 12, 2, 1, 1, 1 }, + { 612, 228, 95, 42, 31, 11, 2, 1, 1, 1 }, + { 616, 227, 93, 42, 30, 11, 2, 1, 1, 1 }, + { 620, 227, 92, 41, 29, 10, 2, 1, 1, 1 }, + { 624, 226, 92, 40, 28, 10, 1, 1, 1, 1 }, + { 628, 225, 90, 39, 28, 10, 1, 1, 1, 1 }, + { 632, 224, 89, 39, 27, 9, 1, 1, 1, 1 }, + { 636, 223, 88, 38, 26, 9, 1, 1, 1, 1 }, + { 640, 222, 87, 37, 25, 9, 1, 1, 1, 1 }, + { 644, 221, 86, 36, 25, 8, 1, 1, 1, 1 }, + { 648, 220, 84, 36, 24, 8, 1, 1, 1, 1 }, + { 652, 219, 83, 35, 23, 8, 1, 1, 1, 1 }, + { 656, 218, 82, 34, 23, 7, 1, 1, 1, 1 }, + { 660, 217, 81, 33, 22, 7, 1, 1, 1, 1 }, + { 664, 215, 80, 33, 21, 7, 1, 1, 1, 1 }, + { 668, 214, 78, 32, 21, 7, 1, 1, 1, 1 }, + { 672, 213, 78, 31, 20, 6, 1, 1, 1, 1 }, + { 676, 211, 76, 31, 20, 6, 1, 1, 1, 1 }, + { 680, 210, 75, 30, 19, 6, 1, 1, 1, 1 }, + { 684, 209, 74, 29, 18, 6, 1, 1, 1, 1 }, + { 688, 208, 73, 28, 18, 5, 1, 1, 1, 1 }, + { 692, 206, 72, 28, 17, 5, 1, 1, 1, 1 }, + { 696, 205, 70, 27, 17, 5, 1, 1, 1, 1 }, + { 700, 203, 69, 27, 16, 5, 1, 1, 1, 1 }, + { 704, 201, 68, 26, 16, 5, 1, 1, 1, 1 }, + { 708, 201, 67, 25, 15, 4, 1, 1, 1, 1 }, + { 712, 198, 66, 25, 15, 4, 1, 1, 1, 1 }, + { 716, 197, 65, 24, 14, 4, 1, 1, 1, 1 }, + { 720, 196, 63, 23, 14, 4, 1, 1, 1, 1 }, + { 724, 194, 62, 23, 13, 4, 1, 1, 1, 1 }, + { 728, 193, 61, 22, 13, 3, 1, 1, 1, 1 }, + { 732, 191, 60, 22, 12, 3, 1, 1, 1, 1 }, + { 736, 189, 59, 21, 12, 3, 1, 1, 1, 1 }, + { 740, 188, 58, 20, 11, 3, 1, 1, 1, 1 }, + { 744, 186, 56, 20, 11, 3, 1, 1, 1, 1 }, + { 748, 184, 55, 19, 11, 3, 1, 1, 1, 1 }, + { 752, 182, 54, 19, 10, 3, 1, 1, 1, 1 }, + { 756, 181, 53, 18, 10, 2, 1, 1, 1, 1 }, + { 760, 179, 52, 18, 9, 2, 1, 1, 1, 1 }, + { 764, 177, 51, 17, 9, 2, 1, 1, 1, 1 }, + { 768, 174, 50, 17, 9, 2, 1, 1, 1, 1 }, + { 772, 173, 49, 16, 8, 2, 1, 1, 1, 1 }, + { 776, 171, 47, 16, 8, 2, 1, 1, 1, 1 }, + { 780, 169, 46, 15, 8, 2, 1, 1, 1, 1 }, + { 784, 167, 45, 15, 7, 2, 1, 1, 1, 1 }, + { 788, 165, 44, 14, 7, 2, 1, 1, 1, 1 }, + { 792, 162, 43, 14, 7, 2, 1, 1, 1, 1 }, + { 796, 161, 42, 13, 7, 1, 1, 1, 1, 1 }, + { 800, 159, 41, 13, 6, 1, 1, 1, 1, 1 }, + { 804, 157, 40, 12, 6, 1, 1, 1, 1, 1 }, + { 808, 154, 39, 12, 6, 1, 1, 1, 1, 1 }, + { 812, 153, 38, 11, 5, 1, 1, 1, 1, 1 }, + { 816, 150, 37, 11, 5, 1, 1, 1, 1, 1 }, + { 820, 148, 36, 10, 5, 1, 1, 1, 1, 1 }, + { 824, 145, 35, 10, 5, 1, 1, 1, 1, 1 }, + { 828, 143, 34, 10, 4, 1, 1, 1, 1, 1 }, + { 832, 141, 33, 9, 4, 1, 1, 1, 1, 1 }, + { 836, 138, 32, 9, 4, 1, 1, 1, 1, 1 }, + { 840, 136, 30, 9, 4, 1, 1, 1, 1, 1 }, + { 844, 133, 30, 8, 4, 1, 1, 1, 1, 1 }, + { 848, 131, 29, 8, 3, 1, 1, 1, 1, 1 }, + { 852, 129, 28, 7, 3, 1, 1, 1, 1, 1 }, + { 856, 126, 27, 7, 3, 1, 1, 1, 1, 1 }, + { 860, 123, 26, 7, 3, 1, 1, 1, 1, 1 }, + { 864, 121, 25, 6, 3, 1, 1, 1, 1, 1 }, + { 868, 118, 24, 6, 3, 1, 1, 1, 1, 1 }, + { 872, 116, 23, 6, 2, 1, 1, 1, 1, 1 }, + { 876, 113, 22, 6, 2, 1, 1, 1, 1, 1 }, + { 880, 111, 21, 5, 2, 1, 1, 1, 1, 1 }, + { 884, 108, 20, 5, 2, 1, 1, 1, 1, 1 }, + { 888, 105, 19, 5, 2, 1, 1, 1, 1, 1 }, + { 892, 102, 19, 4, 2, 1, 1, 1, 1, 1 }, + { 896, 99, 18, 4, 2, 1, 1, 1, 1, 1 }, + { 900, 97, 17, 4, 1, 1, 1, 1, 1, 1 }, + { 904, 94, 16, 4, 1, 1, 1, 1, 1, 1 }, + { 908, 92, 15, 3, 1, 1, 1, 1, 1, 1 }, + { 912, 89, 14, 3, 1, 1, 1, 1, 1, 1 }, + { 916, 85, 14, 3, 1, 1, 1, 1, 1, 1 }, + { 920, 82, 13, 3, 1, 1, 1, 1, 1, 1 }, + { 924, 79, 12, 3, 1, 1, 1, 1, 1, 1 }, + { 928, 77, 11, 2, 1, 1, 1, 1, 1, 1 }, + { 932, 73, 11, 2, 1, 1, 1, 1, 1, 1 }, + { 936, 70, 10, 2, 1, 1, 1, 1, 1, 1 }, + { 940, 67, 9, 2, 1, 1, 1, 1, 1, 1 }, + { 944, 64, 8, 2, 1, 1, 1, 1, 1, 1 }, + { 948, 60, 8, 2, 1, 1, 1, 1, 1, 1 }, + { 952, 58, 7, 1, 1, 1, 1, 1, 1, 1 }, + { 956, 54, 7, 1, 1, 1, 1, 1, 1, 1 }, + { 960, 51, 6, 1, 1, 1, 1, 1, 1, 1 }, + { 964, 48, 5, 1, 1, 1, 1, 1, 1, 1 }, + { 968, 44, 5, 1, 1, 1, 1, 1, 1, 1 }, + { 972, 41, 4, 1, 1, 1, 1, 1, 1, 1 }, + { 976, 37, 4, 1, 1, 1, 1, 1, 1, 1 }, + { 980, 34, 3, 1, 1, 1, 1, 1, 1, 1 }, + { 984, 30, 3, 1, 1, 1, 1, 1, 1, 1 }, + { 988, 27, 2, 1, 1, 1, 1, 1, 1, 1 }, + { 992, 23, 2, 1, 1, 1, 1, 1, 1, 1 }, + { 996, 19, 2, 1, 1, 1, 1, 1, 1, 1 }, + { 1000, 16, 1, 1, 1, 1, 1, 1, 1, 1 }, + { 1004, 12, 1, 1, 1, 1, 1, 1, 1, 1 }, + { 1008, 8, 1, 1, 1, 1, 1, 1, 1, 1 }, + { 1012, 4, 1, 1, 1, 1, 1, 1, 1, 1 }, + { 1015, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, + { 1015, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, }; #endif // CONFIG_ANS +/* clang-format off */ #if CONFIG_ENTROPY const vp10_coeff_probs_model default_qctx_coef_probs[QCTX_BINS][TX_SIZES][PLANE_TYPES] = { @@ -2815,6 +2782,7 @@ static const vp10_coeff_probs_model default_coef_probs_32x32[PLANE_TYPES] = { } }; #endif // CONFIG_ENTROPY +/* clang-format on */ static void extend_to_full_distribution(vpx_prob *probs, vpx_prob p) { assert(p != 0); @@ -2854,9 +2822,8 @@ void vp10_coef_pareto_cdfs(FRAME_CONTEXT *fc) { void vp10_default_coef_probs(VP10_COMMON *cm) { #if CONFIG_ENTROPY - const int index = - VPXMIN(ROUND_POWER_OF_TWO(cm->base_qindex, 8 - QCTX_BIN_BITS), - QCTX_BINS - 1); + const int index = VPXMIN( + ROUND_POWER_OF_TWO(cm->base_qindex, 8 - QCTX_BIN_BITS), QCTX_BINS - 1); vp10_copy(cm->fc->coef_probs, default_qctx_coef_probs[index]); #else vp10_copy(cm->fc->coef_probs[TX_4X4], default_coef_probs_4x4); @@ -2875,17 +2842,18 @@ static void adapt_coef_probs(VP10_COMMON *cm, TX_SIZE tx_size, const FRAME_CONTEXT *pre_fc = &cm->frame_contexts[cm->frame_context_idx]; vp10_coeff_probs_model *const probs = cm->fc->coef_probs[tx_size]; #if CONFIG_ENTROPY - const vp10_coeff_probs_model *const pre_probs = cm->partial_prob_update ? - (const vp10_coeff_probs_model *)cm->starting_coef_probs[tx_size] : - pre_fc->coef_probs[tx_size]; + const vp10_coeff_probs_model *const pre_probs = + cm->partial_prob_update + ? (const vp10_coeff_probs_model *)cm->starting_coef_probs[tx_size] + : pre_fc->coef_probs[tx_size]; #else const vp10_coeff_probs_model *const pre_probs = pre_fc->coef_probs[tx_size]; #endif // CONFIG_ENTROPY const vp10_coeff_count_model *const counts = (const vp10_coeff_count_model *)cm->counts.coef[tx_size]; - const unsigned int (*eob_counts)[REF_TYPES][COEF_BANDS][COEFF_CONTEXTS] = - (const unsigned int (*)[REF_TYPES][COEF_BANDS][COEFF_CONTEXTS]) - cm->counts.eob_branch[tx_size]; + const unsigned int(*eob_counts)[REF_TYPES][COEF_BANDS][COEFF_CONTEXTS] = + (const unsigned int(*)[ + REF_TYPES][COEF_BANDS][COEFF_CONTEXTS])cm->counts.eob_branch[tx_size]; int i, j, k, l, m; for (i = 0; i < PLANE_TYPES; ++i) @@ -2897,14 +2865,12 @@ static void adapt_coef_probs(VP10_COMMON *cm, TX_SIZE tx_size, const int n2 = counts[i][j][k][l][TWO_TOKEN]; const int neob = counts[i][j][k][l][EOB_MODEL_TOKEN]; const unsigned int branch_ct[UNCONSTRAINED_NODES][2] = { - { neob, eob_counts[i][j][k][l] - neob }, - { n0, n1 + n2 }, - { n1, n2 } + { neob, eob_counts[i][j][k][l] - neob }, { n0, n1 + n2 }, { n1, n2 } }; for (m = 0; m < UNCONSTRAINED_NODES; ++m) - probs[i][j][k][l][m] = vp10_merge_probs(pre_probs[i][j][k][l][m], - branch_ct[m], - count_sat, update_factor); + probs[i][j][k][l][m] = + vp10_merge_probs(pre_probs[i][j][k][l][m], branch_ct[m], + count_sat, update_factor); } } @@ -2914,7 +2880,7 @@ void vp10_adapt_coef_probs(VP10_COMMON *cm) { #if CONFIG_ENTROPY if (cm->last_frame_type == KEY_FRAME) { - update_factor = COEF_MAX_UPDATE_FACTOR_AFTER_KEY_BITS; /* adapt quickly */ + update_factor = COEF_MAX_UPDATE_FACTOR_AFTER_KEY_BITS; /* adapt quickly */ count_sat = COEF_COUNT_SAT_AFTER_KEY_BITS; } else { update_factor = COEF_MAX_UPDATE_FACTOR_BITS; @@ -2925,7 +2891,7 @@ void vp10_adapt_coef_probs(VP10_COMMON *cm) { } #else if (cm->last_frame_type == KEY_FRAME) { - update_factor = COEF_MAX_UPDATE_FACTOR_AFTER_KEY; /* adapt quickly */ + update_factor = COEF_MAX_UPDATE_FACTOR_AFTER_KEY; /* adapt quickly */ count_sat = COEF_COUNT_SAT_AFTER_KEY; } else { update_factor = COEF_MAX_UPDATE_FACTOR; diff --git a/vp10/common/entropy.h b/vp10/common/entropy.h index baaa5151f230a599f5298e82aa612f4766d851fa..19ff33526eb9f452d893005018504103fbc54bb7 100644 --- a/vp10/common/entropy.h +++ b/vp10/common/entropy.h @@ -24,7 +24,7 @@ extern "C" { #endif -#define DIFF_UPDATE_PROB 252 +#define DIFF_UPDATE_PROB 252 #define GROUP_DIFF_UPDATE_PROB 252 #if CONFIG_ENTROPY @@ -34,18 +34,18 @@ extern "C" { #endif // CONFIG_ENTROPY // Coefficient token alphabet -#define ZERO_TOKEN 0 // 0 Extra Bits 0+0 -#define ONE_TOKEN 1 // 1 Extra Bits 0+1 -#define TWO_TOKEN 2 // 2 Extra Bits 0+1 -#define THREE_TOKEN 3 // 3 Extra Bits 0+1 -#define FOUR_TOKEN 4 // 4 Extra Bits 0+1 +#define ZERO_TOKEN 0 // 0 Extra Bits 0+0 +#define ONE_TOKEN 1 // 1 Extra Bits 0+1 +#define TWO_TOKEN 2 // 2 Extra Bits 0+1 +#define THREE_TOKEN 3 // 3 Extra Bits 0+1 +#define FOUR_TOKEN 4 // 4 Extra Bits 0+1 #define CATEGORY1_TOKEN 5 // 5-6 Extra Bits 1+1 #define CATEGORY2_TOKEN 6 // 7-10 Extra Bits 2+1 #define CATEGORY3_TOKEN 7 // 11-18 Extra Bits 3+1 #define CATEGORY4_TOKEN 8 // 19-34 Extra Bits 4+1 #define CATEGORY5_TOKEN 9 // 35-66 Extra Bits 5+1 #define CATEGORY6_TOKEN 10 // 67+ Extra Bits 14+1 -#define EOB_TOKEN 11 // EOB Extra Bits 0+0 +#define EOB_TOKEN 11 // EOB Extra Bits 0+0 #define ENTROPY_TOKENS 12 @@ -53,12 +53,12 @@ extern "C" { DECLARE_ALIGNED(16, extern const uint8_t, vp10_pt_energy_class[ENTROPY_TOKENS]); -#define CAT1_MIN_VAL 5 -#define CAT2_MIN_VAL 7 -#define CAT3_MIN_VAL 11 -#define CAT4_MIN_VAL 19 -#define CAT5_MIN_VAL 35 -#define CAT6_MIN_VAL 67 +#define CAT1_MIN_VAL 5 +#define CAT2_MIN_VAL 7 +#define CAT3_MIN_VAL 11 +#define CAT4_MIN_VAL 19 +#define CAT5_MIN_VAL 35 +#define CAT6_MIN_VAL 67 // Extra bit probabilities. DECLARE_ALIGNED(16, extern const uint8_t, vp10_cat1_prob[1]); @@ -100,10 +100,10 @@ extern const vp10_extra_bit vp10_extra_bits_high10[ENTROPY_TOKENS]; extern const vp10_extra_bit vp10_extra_bits_high12[ENTROPY_TOKENS]; #endif // CONFIG_VP9_HIGHBITDEPTH -#define DCT_MAX_VALUE 16384 +#define DCT_MAX_VALUE 16384 #if CONFIG_VP9_HIGHBITDEPTH -#define DCT_MAX_VALUE_HIGH10 65536 -#define DCT_MAX_VALUE_HIGH12 262144 +#define DCT_MAX_VALUE_HIGH10 65536 +#define DCT_MAX_VALUE_HIGH12 262144 #endif // CONFIG_VP9_HIGHBITDEPTH /* Coefficients are predicted via a 3-dimensional probability table. */ @@ -134,13 +134,13 @@ extern const vp10_extra_bit vp10_extra_bits_high12[ENTROPY_TOKENS]; // #define ENTROPY_STATS -typedef unsigned int vp10_coeff_count[REF_TYPES][COEF_BANDS][COEFF_CONTEXTS] - [ENTROPY_TOKENS]; -typedef unsigned int vp10_coeff_stats[REF_TYPES][COEF_BANDS][COEFF_CONTEXTS] - [ENTROPY_NODES][2]; +typedef unsigned int + vp10_coeff_count[REF_TYPES][COEF_BANDS][COEFF_CONTEXTS][ENTROPY_TOKENS]; +typedef unsigned int + vp10_coeff_stats[REF_TYPES][COEF_BANDS][COEFF_CONTEXTS][ENTROPY_NODES][2]; -#define SUBEXP_PARAM 4 /* Subexponential code parameter */ -#define MODULUS_PARAM 13 /* Modulus parameter */ +#define SUBEXP_PARAM 4 /* Subexponential code parameter */ +#define MODULUS_PARAM 13 /* Modulus parameter */ struct VP10Common; void vp10_default_coef_probs(struct VP10Common *cm); @@ -160,22 +160,18 @@ DECLARE_ALIGNED(16, extern const uint8_t, vp10_coefband_trans_8x4_4x8[32]); #endif // CONFIG_EXT_TX DECLARE_ALIGNED(16, extern const uint8_t, vp10_coefband_trans_4x4[16]); -DECLARE_ALIGNED(16, extern const uint16_t, - band_count_table[TX_SIZES_ALL][8]); +DECLARE_ALIGNED(16, extern const uint16_t, band_count_table[TX_SIZES_ALL][8]); DECLARE_ALIGNED(16, extern const uint16_t, band_cum_count_table[TX_SIZES_ALL][8]); static INLINE const uint8_t *get_band_translate(TX_SIZE tx_size) { switch (tx_size) { - case TX_4X4: - return vp10_coefband_trans_4x4; + case TX_4X4: return vp10_coefband_trans_4x4; #if CONFIG_EXT_TX case TX_4X8: - case TX_8X4: - return vp10_coefband_trans_8x4_4x8; + case TX_8X4: return vp10_coefband_trans_8x4_4x8; #endif // CONFIG_EXT_TX - default: - return vp10_coefband_trans_8x8plus; + default: return vp10_coefband_trans_8x8plus; } } @@ -185,9 +181,9 @@ static INLINE const uint8_t *get_band_translate(TX_SIZE tx_size) { #define COEFF_PROB_MODELS 255 -#define UNCONSTRAINED_NODES 3 +#define UNCONSTRAINED_NODES 3 -#define PIVOT_NODE 2 // which node is pivot +#define PIVOT_NODE 2 // which node is pivot #define MODEL_NODES (ENTROPY_NODES - UNCONSTRAINED_NODES) extern const vpx_tree_index vp10_coef_con_tree[TREE_SIZE(ENTROPY_TOKENS)]; @@ -199,12 +195,11 @@ extern const AnsP10 typedef rans_dec_lut coeff_cdf_model[REF_TYPES][COEF_BANDS][COEFF_CONTEXTS]; #endif // CONFIG_ANS -typedef vpx_prob vp10_coeff_probs_model[REF_TYPES][COEF_BANDS] - [COEFF_CONTEXTS][UNCONSTRAINED_NODES]; +typedef vpx_prob vp10_coeff_probs_model[REF_TYPES][COEF_BANDS][COEFF_CONTEXTS] + [UNCONSTRAINED_NODES]; -typedef unsigned int vp10_coeff_count_model[REF_TYPES][COEF_BANDS] - [COEFF_CONTEXTS] - [UNCONSTRAINED_NODES + 1]; +typedef unsigned int vp10_coeff_count_model + [REF_TYPES][COEF_BANDS][COEFF_CONTEXTS][UNCONSTRAINED_NODES + 1]; void vp10_model_to_full_probs(const vpx_prob *model, vpx_prob *full); @@ -215,8 +210,7 @@ static INLINE int combine_entropy_contexts(ENTROPY_CONTEXT a, return (a != 0) + (b != 0); } -static INLINE int get_entropy_context(TX_SIZE tx_size, - const ENTROPY_CONTEXT *a, +static INLINE int get_entropy_context(TX_SIZE tx_size, const ENTROPY_CONTEXT *a, const ENTROPY_CONTEXT *l) { ENTROPY_CONTEXT above_ec = 0, left_ec = 0; @@ -237,19 +231,17 @@ static INLINE int get_entropy_context(TX_SIZE tx_size, #endif // CONFIG_EXT_TX case TX_8X8: above_ec = !!*(const uint16_t *)a; - left_ec = !!*(const uint16_t *)l; + left_ec = !!*(const uint16_t *)l; break; case TX_16X16: above_ec = !!*(const uint32_t *)a; - left_ec = !!*(const uint32_t *)l; + left_ec = !!*(const uint32_t *)l; break; case TX_32X32: above_ec = !!*(const uint64_t *)a; - left_ec = !!*(const uint64_t *)l; - break; - default: - assert(0 && "Invalid transform size."); + left_ec = !!*(const uint64_t *)l; break; + default: assert(0 && "Invalid transform size."); break; } return combine_entropy_contexts(above_ec, left_ec); } @@ -260,12 +252,12 @@ void vp10_coef_pareto_cdfs(struct frame_contexts *fc); #endif // CONFIG_ANS #if CONFIG_ENTROPY -#define COEF_COUNT_SAT_BITS 5 -#define COEF_MAX_UPDATE_FACTOR_BITS 7 -#define COEF_COUNT_SAT_AFTER_KEY_BITS 5 +#define COEF_COUNT_SAT_BITS 5 +#define COEF_MAX_UPDATE_FACTOR_BITS 7 +#define COEF_COUNT_SAT_AFTER_KEY_BITS 5 #define COEF_MAX_UPDATE_FACTOR_AFTER_KEY_BITS 7 -#define MODE_MV_COUNT_SAT_BITS 5 -#define MODE_MV_MAX_UPDATE_FACTOR_BITS 7 +#define MODE_MV_COUNT_SAT_BITS 5 +#define MODE_MV_MAX_UPDATE_FACTOR_BITS 7 #else @@ -284,8 +276,7 @@ static INLINE vpx_prob vp10_merge_probs(vpx_prob pre_prob, const vpx_prob prob = get_binary_prob(ct[0], ct[1]); const unsigned int count = VPXMIN(ct[0] + ct[1], (unsigned int)(1 << count_sat)); - const unsigned int factor = - count << (max_update_factor - count_sat); + const unsigned int factor = count << (max_update_factor - count_sat); return weighted_prob(pre_prob, prob, factor); #else return merge_probs(pre_prob, ct, count_sat, max_update_factor); @@ -295,8 +286,7 @@ static INLINE vpx_prob vp10_merge_probs(vpx_prob pre_prob, static INLINE vpx_prob vp10_mode_mv_merge_probs(vpx_prob pre_prob, const unsigned int ct[2]) { #if CONFIG_ENTROPY - return vp10_merge_probs(pre_prob, ct, - MODE_MV_COUNT_SAT_BITS, + return vp10_merge_probs(pre_prob, ct, MODE_MV_COUNT_SAT_BITS, MODE_MV_MAX_UPDATE_FACTOR_BITS); #else return mode_mv_merge_probs(pre_prob, ct); diff --git a/vp10/common/entropymode.c b/vp10/common/entropymode.c index 8fb88b21c8b95f72fcce746caff342bdec6ac106..411f4fb08fb86287f5990172dd149a974e58a425 100644 --- a/vp10/common/entropymode.c +++ b/vp10/common/entropymode.c @@ -14,259 +14,276 @@ #include "vp10/common/onyxc_int.h" #include "vp10/common/seg_common.h" -const vpx_prob vp10_kf_y_mode_prob[INTRA_MODES][INTRA_MODES][INTRA_MODES - 1] = { - { // above = dc - { 137, 30, 42, 148, 151, 207, 70, 52, 91 }, // left = dc - { 92, 45, 102, 136, 116, 180, 74, 90, 100 }, // left = v - { 73, 32, 19, 187, 222, 215, 46, 34, 100 }, // left = h - { 91, 30, 32, 116, 121, 186, 93, 86, 94 }, // left = d45 - { 72, 35, 36, 149, 68, 206, 68, 63, 105 }, // left = d135 - { 73, 31, 28, 138, 57, 124, 55, 122, 151 }, // left = d117 - { 67, 23, 21, 140, 126, 197, 40, 37, 171 }, // left = d153 - { 86, 27, 28, 128, 154, 212, 45, 43, 53 }, // left = d207 - { 74, 32, 27, 107, 86, 160, 63, 134, 102 }, // left = d63 - { 59, 67, 44, 140, 161, 202, 78, 67, 119 } // left = tm - }, { // above = v - { 63, 36, 126, 146, 123, 158, 60, 90, 96 }, // left = dc - { 43, 46, 168, 134, 107, 128, 69, 142, 92 }, // left = v - { 44, 29, 68, 159, 201, 177, 50, 57, 77 }, // left = h - { 58, 38, 76, 114, 97, 172, 78, 133, 92 }, // left = d45 - { 46, 41, 76, 140, 63, 184, 69, 112, 57 }, // left = d135 - { 38, 32, 85, 140, 46, 112, 54, 151, 133 }, // left = d117 - { 39, 27, 61, 131, 110, 175, 44, 75, 136 }, // left = d153 - { 52, 30, 74, 113, 130, 175, 51, 64, 58 }, // left = d207 - { 47, 35, 80, 100, 74, 143, 64, 163, 74 }, // left = d63 - { 36, 61, 116, 114, 128, 162, 80, 125, 82 } // left = tm - }, { // above = h - { 82, 26, 26, 171, 208, 204, 44, 32, 105 }, // left = dc - { 55, 44, 68, 166, 179, 192, 57, 57, 108 }, // left = v - { 42, 26, 11, 199, 241, 228, 23, 15, 85 }, // left = h - { 68, 42, 19, 131, 160, 199, 55, 52, 83 }, // left = d45 - { 58, 50, 25, 139, 115, 232, 39, 52, 118 }, // left = d135 - { 50, 35, 33, 153, 104, 162, 64, 59, 131 }, // left = d117 - { 44, 24, 16, 150, 177, 202, 33, 19, 156 }, // left = d153 - { 55, 27, 12, 153, 203, 218, 26, 27, 49 }, // left = d207 - { 53, 49, 21, 110, 116, 168, 59, 80, 76 }, // left = d63 - { 38, 72, 19, 168, 203, 212, 50, 50, 107 } // left = tm - }, { // above = d45 - { 103, 26, 36, 129, 132, 201, 83, 80, 93 }, // left = dc - { 59, 38, 83, 112, 103, 162, 98, 136, 90 }, // left = v - { 62, 30, 23, 158, 200, 207, 59, 57, 50 }, // left = h - { 67, 30, 29, 84, 86, 191, 102, 91, 59 }, // left = d45 - { 60, 32, 33, 112, 71, 220, 64, 89, 104 }, // left = d135 - { 53, 26, 34, 130, 56, 149, 84, 120, 103 }, // left = d117 - { 53, 21, 23, 133, 109, 210, 56, 77, 172 }, // left = d153 - { 77, 19, 29, 112, 142, 228, 55, 66, 36 }, // left = d207 - { 61, 29, 29, 93, 97, 165, 83, 175, 162 }, // left = d63 - { 47, 47, 43, 114, 137, 181, 100, 99, 95 } // left = tm - }, { // above = d135 - { 69, 23, 29, 128, 83, 199, 46, 44, 101 }, // left = dc - { 53, 40, 55, 139, 69, 183, 61, 80, 110 }, // left = v - { 40, 29, 19, 161, 180, 207, 43, 24, 91 }, // left = h - { 60, 34, 19, 105, 61, 198, 53, 64, 89 }, // left = d45 - { 52, 31, 22, 158, 40, 209, 58, 62, 89 }, // left = d135 - { 44, 31, 29, 147, 46, 158, 56, 102, 198 }, // left = d117 - { 35, 19, 12, 135, 87, 209, 41, 45, 167 }, // left = d153 - { 55, 25, 21, 118, 95, 215, 38, 39, 66 }, // left = d207 - { 51, 38, 25, 113, 58, 164, 70, 93, 97 }, // left = d63 - { 47, 54, 34, 146, 108, 203, 72, 103, 151 } // left = tm - }, { // above = d117 - { 64, 19, 37, 156, 66, 138, 49, 95, 133 }, // left = dc - { 46, 27, 80, 150, 55, 124, 55, 121, 135 }, // left = v - { 36, 23, 27, 165, 149, 166, 54, 64, 118 }, // left = h - { 53, 21, 36, 131, 63, 163, 60, 109, 81 }, // left = d45 - { 40, 26, 35, 154, 40, 185, 51, 97, 123 }, // left = d135 - { 35, 19, 34, 179, 19, 97, 48, 129, 124 }, // left = d117 - { 36, 20, 26, 136, 62, 164, 33, 77, 154 }, // left = d153 - { 45, 18, 32, 130, 90, 157, 40, 79, 91 }, // left = d207 - { 45, 26, 28, 129, 45, 129, 49, 147, 123 }, // left = d63 - { 38, 44, 51, 136, 74, 162, 57, 97, 121 } // left = tm - }, { // above = d153 - { 75, 17, 22, 136, 138, 185, 32, 34, 166 }, // left = dc - { 56, 39, 58, 133, 117, 173, 48, 53, 187 }, // left = v - { 35, 21, 12, 161, 212, 207, 20, 23, 145 }, // left = h - { 56, 29, 19, 117, 109, 181, 55, 68, 112 }, // left = d45 - { 47, 29, 17, 153, 64, 220, 59, 51, 114 }, // left = d135 - { 46, 16, 24, 136, 76, 147, 41, 64, 172 }, // left = d117 - { 34, 17, 11, 108, 152, 187, 13, 15, 209 }, // left = d153 - { 51, 24, 14, 115, 133, 209, 32, 26, 104 }, // left = d207 - { 55, 30, 18, 122, 79, 179, 44, 88, 116 }, // left = d63 - { 37, 49, 25, 129, 168, 164, 41, 54, 148 } // left = tm - }, { // above = d207 - { 82, 22, 32, 127, 143, 213, 39, 41, 70 }, // left = dc - { 62, 44, 61, 123, 105, 189, 48, 57, 64 }, // left = v - { 47, 25, 17, 175, 222, 220, 24, 30, 86 }, // left = h - { 68, 36, 17, 106, 102, 206, 59, 74, 74 }, // left = d45 - { 57, 39, 23, 151, 68, 216, 55, 63, 58 }, // left = d135 - { 49, 30, 35, 141, 70, 168, 82, 40, 115 }, // left = d117 - { 51, 25, 15, 136, 129, 202, 38, 35, 139 }, // left = d153 - { 68, 26, 16, 111, 141, 215, 29, 28, 28 }, // left = d207 - { 59, 39, 19, 114, 75, 180, 77, 104, 42 }, // left = d63 - { 40, 61, 26, 126, 152, 206, 61, 59, 93 } // left = tm - }, { // above = d63 - { 78, 23, 39, 111, 117, 170, 74, 124, 94 }, // left = dc - { 48, 34, 86, 101, 92, 146, 78, 179, 134 }, // left = v - { 47, 22, 24, 138, 187, 178, 68, 69, 59 }, // left = h - { 56, 25, 33, 105, 112, 187, 95, 177, 129 }, // left = d45 - { 48, 31, 27, 114, 63, 183, 82, 116, 56 }, // left = d135 - { 43, 28, 37, 121, 63, 123, 61, 192, 169 }, // left = d117 - { 42, 17, 24, 109, 97, 177, 56, 76, 122 }, // left = d153 - { 58, 18, 28, 105, 139, 182, 70, 92, 63 }, // left = d207 - { 46, 23, 32, 74, 86, 150, 67, 183, 88 }, // left = d63 - { 36, 38, 48, 92, 122, 165, 88, 137, 91 } // left = tm - }, { // above = tm - { 65, 70, 60, 155, 159, 199, 61, 60, 81 }, // left = dc - { 44, 78, 115, 132, 119, 173, 71, 112, 93 }, // left = v - { 39, 38, 21, 184, 227, 206, 42, 32, 64 }, // left = h - { 58, 47, 36, 124, 137, 193, 80, 82, 78 }, // left = d45 - { 49, 50, 35, 144, 95, 205, 63, 78, 59 }, // left = d135 - { 41, 53, 52, 148, 71, 142, 65, 128, 51 }, // left = d117 - { 40, 36, 28, 143, 143, 202, 40, 55, 137 }, // left = d153 - { 52, 34, 29, 129, 183, 227, 42, 35, 43 }, // left = d207 - { 42, 44, 44, 104, 105, 164, 64, 130, 80 }, // left = d63 - { 43, 81, 53, 140, 169, 204, 68, 84, 72 } // left = tm - } -}; +const vpx_prob vp10_kf_y_mode_prob[INTRA_MODES][INTRA_MODES][INTRA_MODES - 1] = + { { + // above = dc + { 137, 30, 42, 148, 151, 207, 70, 52, 91 }, // left = dc + { 92, 45, 102, 136, 116, 180, 74, 90, 100 }, // left = v + { 73, 32, 19, 187, 222, 215, 46, 34, 100 }, // left = h + { 91, 30, 32, 116, 121, 186, 93, 86, 94 }, // left = d45 + { 72, 35, 36, 149, 68, 206, 68, 63, 105 }, // left = d135 + { 73, 31, 28, 138, 57, 124, 55, 122, 151 }, // left = d117 + { 67, 23, 21, 140, 126, 197, 40, 37, 171 }, // left = d153 + { 86, 27, 28, 128, 154, 212, 45, 43, 53 }, // left = d207 + { 74, 32, 27, 107, 86, 160, 63, 134, 102 }, // left = d63 + { 59, 67, 44, 140, 161, 202, 78, 67, 119 } // left = tm + }, + { + // above = v + { 63, 36, 126, 146, 123, 158, 60, 90, 96 }, // left = dc + { 43, 46, 168, 134, 107, 128, 69, 142, 92 }, // left = v + { 44, 29, 68, 159, 201, 177, 50, 57, 77 }, // left = h + { 58, 38, 76, 114, 97, 172, 78, 133, 92 }, // left = d45 + { 46, 41, 76, 140, 63, 184, 69, 112, 57 }, // left = d135 + { 38, 32, 85, 140, 46, 112, 54, 151, 133 }, // left = d117 + { 39, 27, 61, 131, 110, 175, 44, 75, 136 }, // left = d153 + { 52, 30, 74, 113, 130, 175, 51, 64, 58 }, // left = d207 + { 47, 35, 80, 100, 74, 143, 64, 163, 74 }, // left = d63 + { 36, 61, 116, 114, 128, 162, 80, 125, 82 } // left = tm + }, + { + // above = h + { 82, 26, 26, 171, 208, 204, 44, 32, 105 }, // left = dc + { 55, 44, 68, 166, 179, 192, 57, 57, 108 }, // left = v + { 42, 26, 11, 199, 241, 228, 23, 15, 85 }, // left = h + { 68, 42, 19, 131, 160, 199, 55, 52, 83 }, // left = d45 + { 58, 50, 25, 139, 115, 232, 39, 52, 118 }, // left = d135 + { 50, 35, 33, 153, 104, 162, 64, 59, 131 }, // left = d117 + { 44, 24, 16, 150, 177, 202, 33, 19, 156 }, // left = d153 + { 55, 27, 12, 153, 203, 218, 26, 27, 49 }, // left = d207 + { 53, 49, 21, 110, 116, 168, 59, 80, 76 }, // left = d63 + { 38, 72, 19, 168, 203, 212, 50, 50, 107 } // left = tm + }, + { + // above = d45 + { 103, 26, 36, 129, 132, 201, 83, 80, 93 }, // left = dc + { 59, 38, 83, 112, 103, 162, 98, 136, 90 }, // left = v + { 62, 30, 23, 158, 200, 207, 59, 57, 50 }, // left = h + { 67, 30, 29, 84, 86, 191, 102, 91, 59 }, // left = d45 + { 60, 32, 33, 112, 71, 220, 64, 89, 104 }, // left = d135 + { 53, 26, 34, 130, 56, 149, 84, 120, 103 }, // left = d117 + { 53, 21, 23, 133, 109, 210, 56, 77, 172 }, // left = d153 + { 77, 19, 29, 112, 142, 228, 55, 66, 36 }, // left = d207 + { 61, 29, 29, 93, 97, 165, 83, 175, 162 }, // left = d63 + { 47, 47, 43, 114, 137, 181, 100, 99, 95 } // left = tm + }, + { + // above = d135 + { 69, 23, 29, 128, 83, 199, 46, 44, 101 }, // left = dc + { 53, 40, 55, 139, 69, 183, 61, 80, 110 }, // left = v + { 40, 29, 19, 161, 180, 207, 43, 24, 91 }, // left = h + { 60, 34, 19, 105, 61, 198, 53, 64, 89 }, // left = d45 + { 52, 31, 22, 158, 40, 209, 58, 62, 89 }, // left = d135 + { 44, 31, 29, 147, 46, 158, 56, 102, 198 }, // left = d117 + { 35, 19, 12, 135, 87, 209, 41, 45, 167 }, // left = d153 + { 55, 25, 21, 118, 95, 215, 38, 39, 66 }, // left = d207 + { 51, 38, 25, 113, 58, 164, 70, 93, 97 }, // left = d63 + { 47, 54, 34, 146, 108, 203, 72, 103, 151 } // left = tm + }, + { + // above = d117 + { 64, 19, 37, 156, 66, 138, 49, 95, 133 }, // left = dc + { 46, 27, 80, 150, 55, 124, 55, 121, 135 }, // left = v + { 36, 23, 27, 165, 149, 166, 54, 64, 118 }, // left = h + { 53, 21, 36, 131, 63, 163, 60, 109, 81 }, // left = d45 + { 40, 26, 35, 154, 40, 185, 51, 97, 123 }, // left = d135 + { 35, 19, 34, 179, 19, 97, 48, 129, 124 }, // left = d117 + { 36, 20, 26, 136, 62, 164, 33, 77, 154 }, // left = d153 + { 45, 18, 32, 130, 90, 157, 40, 79, 91 }, // left = d207 + { 45, 26, 28, 129, 45, 129, 49, 147, 123 }, // left = d63 + { 38, 44, 51, 136, 74, 162, 57, 97, 121 } // left = tm + }, + { + // above = d153 + { 75, 17, 22, 136, 138, 185, 32, 34, 166 }, // left = dc + { 56, 39, 58, 133, 117, 173, 48, 53, 187 }, // left = v + { 35, 21, 12, 161, 212, 207, 20, 23, 145 }, // left = h + { 56, 29, 19, 117, 109, 181, 55, 68, 112 }, // left = d45 + { 47, 29, 17, 153, 64, 220, 59, 51, 114 }, // left = d135 + { 46, 16, 24, 136, 76, 147, 41, 64, 172 }, // left = d117 + { 34, 17, 11, 108, 152, 187, 13, 15, 209 }, // left = d153 + { 51, 24, 14, 115, 133, 209, 32, 26, 104 }, // left = d207 + { 55, 30, 18, 122, 79, 179, 44, 88, 116 }, // left = d63 + { 37, 49, 25, 129, 168, 164, 41, 54, 148 } // left = tm + }, + { + // above = d207 + { 82, 22, 32, 127, 143, 213, 39, 41, 70 }, // left = dc + { 62, 44, 61, 123, 105, 189, 48, 57, 64 }, // left = v + { 47, 25, 17, 175, 222, 220, 24, 30, 86 }, // left = h + { 68, 36, 17, 106, 102, 206, 59, 74, 74 }, // left = d45 + { 57, 39, 23, 151, 68, 216, 55, 63, 58 }, // left = d135 + { 49, 30, 35, 141, 70, 168, 82, 40, 115 }, // left = d117 + { 51, 25, 15, 136, 129, 202, 38, 35, 139 }, // left = d153 + { 68, 26, 16, 111, 141, 215, 29, 28, 28 }, // left = d207 + { 59, 39, 19, 114, 75, 180, 77, 104, 42 }, // left = d63 + { 40, 61, 26, 126, 152, 206, 61, 59, 93 } // left = tm + }, + { + // above = d63 + { 78, 23, 39, 111, 117, 170, 74, 124, 94 }, // left = dc + { 48, 34, 86, 101, 92, 146, 78, 179, 134 }, // left = v + { 47, 22, 24, 138, 187, 178, 68, 69, 59 }, // left = h + { 56, 25, 33, 105, 112, 187, 95, 177, 129 }, // left = d45 + { 48, 31, 27, 114, 63, 183, 82, 116, 56 }, // left = d135 + { 43, 28, 37, 121, 63, 123, 61, 192, 169 }, // left = d117 + { 42, 17, 24, 109, 97, 177, 56, 76, 122 }, // left = d153 + { 58, 18, 28, 105, 139, 182, 70, 92, 63 }, // left = d207 + { 46, 23, 32, 74, 86, 150, 67, 183, 88 }, // left = d63 + { 36, 38, 48, 92, 122, 165, 88, 137, 91 } // left = tm + }, + { + // above = tm + { 65, 70, 60, 155, 159, 199, 61, 60, 81 }, // left = dc + { 44, 78, 115, 132, 119, 173, 71, 112, 93 }, // left = v + { 39, 38, 21, 184, 227, 206, 42, 32, 64 }, // left = h + { 58, 47, 36, 124, 137, 193, 80, 82, 78 }, // left = d45 + { 49, 50, 35, 144, 95, 205, 63, 78, 59 }, // left = d135 + { 41, 53, 52, 148, 71, 142, 65, 128, 51 }, // left = d117 + { 40, 36, 28, 143, 143, 202, 40, 55, 137 }, // left = d153 + { 52, 34, 29, 129, 183, 227, 42, 35, 43 }, // left = d207 + { 42, 44, 44, 104, 105, 164, 64, 130, 80 }, // left = d63 + { 43, 81, 53, 140, 169, 204, 68, 84, 72 } // left = tm + } }; static const vpx_prob default_if_y_probs[BLOCK_SIZE_GROUPS][INTRA_MODES - 1] = { - { 65, 32, 18, 144, 162, 194, 41, 51, 98 }, // block_size < 8x8 - { 132, 68, 18, 165, 217, 196, 45, 40, 78 }, // block_size < 16x16 - { 173, 80, 19, 176, 240, 193, 64, 35, 46 }, // block_size < 32x32 - { 221, 135, 38, 194, 248, 121, 96, 85, 29 } // block_size >= 32x32 + { 65, 32, 18, 144, 162, 194, 41, 51, 98 }, // block_size < 8x8 + { 132, 68, 18, 165, 217, 196, 45, 40, 78 }, // block_size < 16x16 + { 173, 80, 19, 176, 240, 193, 64, 35, 46 }, // block_size < 32x32 + { 221, 135, 38, 194, 248, 121, 96, 85, 29 } // block_size >= 32x32 }; static const vpx_prob default_uv_probs[INTRA_MODES][INTRA_MODES - 1] = { - { 120, 7, 76, 176, 208, 126, 28, 54, 103 }, // y = dc - { 48, 12, 154, 155, 139, 90, 34, 117, 119 }, // y = v - { 67, 6, 25, 204, 243, 158, 13, 21, 96 }, // y = h - { 97, 5, 44, 131, 176, 139, 48, 68, 97 }, // y = d45 - { 83, 5, 42, 156, 111, 152, 26, 49, 152 }, // y = d135 - { 80, 5, 58, 178, 74, 83, 33, 62, 145 }, // y = d117 - { 86, 5, 32, 154, 192, 168, 14, 22, 163 }, // y = d153 - { 85, 5, 32, 156, 216, 148, 19, 29, 73 }, // y = d207 - { 77, 7, 64, 116, 132, 122, 37, 126, 120 }, // y = d63 - { 101, 21, 107, 181, 192, 103, 19, 67, 125 } // y = tm + { 120, 7, 76, 176, 208, 126, 28, 54, 103 }, // y = dc + { 48, 12, 154, 155, 139, 90, 34, 117, 119 }, // y = v + { 67, 6, 25, 204, 243, 158, 13, 21, 96 }, // y = h + { 97, 5, 44, 131, 176, 139, 48, 68, 97 }, // y = d45 + { 83, 5, 42, 156, 111, 152, 26, 49, 152 }, // y = d135 + { 80, 5, 58, 178, 74, 83, 33, 62, 145 }, // y = d117 + { 86, 5, 32, 154, 192, 168, 14, 22, 163 }, // y = d153 + { 85, 5, 32, 156, 216, 148, 19, 29, 73 }, // y = d207 + { 77, 7, 64, 116, 132, 122, 37, 126, 120 }, // y = d63 + { 101, 21, 107, 181, 192, 103, 19, 67, 125 } // y = tm }; #if CONFIG_EXT_PARTITION_TYPES -static const vpx_prob default_partition_probs[PARTITION_CONTEXTS] - [EXT_PARTITION_TYPES - 1] = { - // 8x8 -> 4x4 - { 199, 122, 141, 128, 128, 128, 128 }, // a/l both not split - { 147, 63, 159, 128, 128, 128, 128 }, // a split, l not split - { 148, 133, 118, 128, 128, 128, 128 }, // l split, a not split - { 121, 104, 114, 128, 128, 128, 128 }, // a/l both split - // 16x16 -> 8x8 - { 174, 73, 87, 128, 128, 128, 128 }, // a/l both not split - { 92, 41, 83, 128, 128, 128, 128 }, // a split, l not split - { 82, 99, 50, 128, 128, 128, 128 }, // l split, a not split - { 53, 39, 39, 128, 128, 128, 128 }, // a/l both split - // 32x32 -> 16x16 - { 177, 58, 59, 128, 128, 128, 128 }, // a/l both not split - { 68, 26, 63, 128, 128, 128, 128 }, // a split, l not split - { 52, 79, 25, 128, 128, 128, 128 }, // l split, a not split - { 17, 14, 12, 128, 128, 128, 128 }, // a/l both split - // 64x64 -> 32x32 - { 222, 34, 30, 128, 128, 128, 128 }, // a/l both not split - { 72, 16, 44, 128, 128, 128, 128 }, // a split, l not split - { 58, 32, 12, 128, 128, 128, 128 }, // l split, a not split - { 10, 7, 6, 128, 128, 128, 128 }, // a/l both split +static const vpx_prob + default_partition_probs[PARTITION_CONTEXTS][EXT_PARTITION_TYPES - 1] = { + // 8x8 -> 4x4 + { 199, 122, 141, 128, 128, 128, 128 }, // a/l both not split + { 147, 63, 159, 128, 128, 128, 128 }, // a split, l not split + { 148, 133, 118, 128, 128, 128, 128 }, // l split, a not split + { 121, 104, 114, 128, 128, 128, 128 }, // a/l both split + // 16x16 -> 8x8 + { 174, 73, 87, 128, 128, 128, 128 }, // a/l both not split + { 92, 41, 83, 128, 128, 128, 128 }, // a split, l not split + { 82, 99, 50, 128, 128, 128, 128 }, // l split, a not split + { 53, 39, 39, 128, 128, 128, 128 }, // a/l both split + // 32x32 -> 16x16 + { 177, 58, 59, 128, 128, 128, 128 }, // a/l both not split + { 68, 26, 63, 128, 128, 128, 128 }, // a split, l not split + { 52, 79, 25, 128, 128, 128, 128 }, // l split, a not split + { 17, 14, 12, 128, 128, 128, 128 }, // a/l both split + // 64x64 -> 32x32 + { 222, 34, 30, 128, 128, 128, 128 }, // a/l both not split + { 72, 16, 44, 128, 128, 128, 128 }, // a split, l not split + { 58, 32, 12, 128, 128, 128, 128 }, // l split, a not split + { 10, 7, 6, 128, 128, 128, 128 }, // a/l both split #if CONFIG_EXT_PARTITION - // 128x128 -> 64x64 - { 222, 34, 30, 128, 128, 128, 128 }, // a/l both not split - { 72, 16, 44, 128, 128, 128, 128 }, // a split, l not split - { 58, 32, 12, 128, 128, 128, 128 }, // l split, a not split - { 10, 7, 6, 128, 128, 128, 128 }, // a/l both split -#endif // CONFIG_EXT_PARTITION -}; + // 128x128 -> 64x64 + { 222, 34, 30, 128, 128, 128, 128 }, // a/l both not split + { 72, 16, 44, 128, 128, 128, 128 }, // a split, l not split + { 58, 32, 12, 128, 128, 128, 128 }, // l split, a not split + { 10, 7, 6, 128, 128, 128, 128 }, // a/l both split +#endif // CONFIG_EXT_PARTITION + }; #else -static const vpx_prob default_partition_probs[PARTITION_CONTEXTS] - [PARTITION_TYPES - 1] = { - // 8x8 -> 4x4 - { 199, 122, 141 }, // a/l both not split - { 147, 63, 159 }, // a split, l not split - { 148, 133, 118 }, // l split, a not split - { 121, 104, 114 }, // a/l both split - // 16x16 -> 8x8 - { 174, 73, 87 }, // a/l both not split - { 92, 41, 83 }, // a split, l not split - { 82, 99, 50 }, // l split, a not split - { 53, 39, 39 }, // a/l both split - // 32x32 -> 16x16 - { 177, 58, 59 }, // a/l both not split - { 68, 26, 63 }, // a split, l not split - { 52, 79, 25 }, // l split, a not split - { 17, 14, 12 }, // a/l both split - // 64x64 -> 32x32 - { 222, 34, 30 }, // a/l both not split - { 72, 16, 44 }, // a split, l not split - { 58, 32, 12 }, // l split, a not split - { 10, 7, 6 }, // a/l both split +static const vpx_prob + default_partition_probs[PARTITION_CONTEXTS][PARTITION_TYPES - 1] = { + // 8x8 -> 4x4 + { 199, 122, 141 }, // a/l both not split + { 147, 63, 159 }, // a split, l not split + { 148, 133, 118 }, // l split, a not split + { 121, 104, 114 }, // a/l both split + // 16x16 -> 8x8 + { 174, 73, 87 }, // a/l both not split + { 92, 41, 83 }, // a split, l not split + { 82, 99, 50 }, // l split, a not split + { 53, 39, 39 }, // a/l both split + // 32x32 -> 16x16 + { 177, 58, 59 }, // a/l both not split + { 68, 26, 63 }, // a split, l not split + { 52, 79, 25 }, // l split, a not split + { 17, 14, 12 }, // a/l both split + // 64x64 -> 32x32 + { 222, 34, 30 }, // a/l both not split + { 72, 16, 44 }, // a split, l not split + { 58, 32, 12 }, // l split, a not split + { 10, 7, 6 }, // a/l both split #if CONFIG_EXT_PARTITION - // 128x128 -> 64x64 - { 222, 34, 30 }, // a/l both not split - { 72, 16, 44 }, // a split, l not split - { 58, 32, 12 }, // l split, a not split - { 10, 7, 6 }, // a/l both split + // 128x128 -> 64x64 + { 222, 34, 30 }, // a/l both not split + { 72, 16, 44 }, // a split, l not split + { 58, 32, 12 }, // l split, a not split + { 10, 7, 6 }, // a/l both split #endif // CONFIG_EXT_PARTITION -}; + }; #endif // CONFIG_EXT_PARTITION_TYPES #if CONFIG_REF_MV static const vpx_prob default_newmv_prob[NEWMV_MODE_CONTEXTS] = { - 200, 180, 150, 150, 110, 70, 60, + 200, 180, 150, 150, 110, 70, 60, }; static const vpx_prob default_zeromv_prob[ZEROMV_MODE_CONTEXTS] = { - 192, 64, + 192, 64, }; static const vpx_prob default_refmv_prob[REFMV_MODE_CONTEXTS] = { - 220, 220, 200, 200, 180, 128, 30, 220, 30, + 220, 220, 200, 200, 180, 128, 30, 220, 30, }; -static const vpx_prob default_drl_prob[DRL_MODE_CONTEXTS] = { - 128, 160, 180, 128, 160 -}; +static const vpx_prob default_drl_prob[DRL_MODE_CONTEXTS] = { 128, 160, 180, + 128, 160 }; #if CONFIG_EXT_INTER static const vpx_prob default_new2mv_prob = 180; #endif // CONFIG_EXT_INTER #endif // CONFIG_REF_MV -static const vpx_prob default_inter_mode_probs[INTER_MODE_CONTEXTS] - [INTER_MODES - 1] = { +static const vpx_prob + default_inter_mode_probs[INTER_MODE_CONTEXTS][INTER_MODES - 1] = { #if CONFIG_EXT_INTER - // TODO(zoeliu): To adjust the initial default probs - {2, 173, 34, 173}, // 0 = both zero mv - {7, 145, 85, 145}, // 1 = one zero mv + one a predicted mv - {7, 166, 63, 166}, // 2 = two predicted mvs - {7, 94, 66, 128}, // 3 = one predicted/zero and one new mv - {8, 64, 46, 128}, // 4 = two new mvs - {17, 81, 31, 128}, // 5 = one intra neighbour + x - {25, 29, 30, 96}, // 6 = two intra neighbours + // TODO(zoeliu): To adjust the initial default probs + { 2, 173, 34, 173 }, // 0 = both zero mv + { 7, 145, 85, 145 }, // 1 = one zero mv + one a predicted mv + { 7, 166, 63, 166 }, // 2 = two predicted mvs + { 7, 94, 66, 128 }, // 3 = one predicted/zero and one new mv + { 8, 64, 46, 128 }, // 4 = two new mvs + { 17, 81, 31, 128 }, // 5 = one intra neighbour + x + { 25, 29, 30, 96 }, // 6 = two intra neighbours #else - {2, 173, 34}, // 0 = both zero mv - {7, 145, 85}, // 1 = one zero mv + one a predicted mv - {7, 166, 63}, // 2 = two predicted mvs - {7, 94, 66}, // 3 = one predicted/zero and one new mv - {8, 64, 46}, // 4 = two new mvs - {17, 81, 31}, // 5 = one intra neighbour + x - {25, 29, 30}, // 6 = two intra neighbours + { 2, 173, 34 }, // 0 = both zero mv + { 7, 145, 85 }, // 1 = one zero mv + one a predicted mv + { 7, 166, 63 }, // 2 = two predicted mvs + { 7, 94, 66 }, // 3 = one predicted/zero and one new mv + { 8, 64, 46 }, // 4 = two new mvs + { 17, 81, 31 }, // 5 = one intra neighbour + x + { 25, 29, 30 }, // 6 = two intra neighbours #endif // CONFIG_EXT_INTER -}; + }; #if CONFIG_EXT_INTER static const vpx_prob default_inter_compound_mode_probs - [INTER_MODE_CONTEXTS][INTER_COMPOUND_MODES - 1] = { - { 2, 173, 68, 192, 64, 192, 128, 180, 180}, // 0 = both zero mv - { 7, 145, 160, 192, 64, 192, 128, 180, 180}, // 1 = 1 zero + 1 predicted - { 7, 166, 126, 192, 64, 192, 128, 180, 180}, // 2 = two predicted mvs - { 7, 94, 132, 192, 64, 192, 128, 180, 180}, // 3 = 1 pred/zero, 1 new - { 8, 64, 64, 192, 64, 192, 128, 180, 180}, // 4 = two new mvs - {17, 81, 52, 192, 64, 192, 128, 180, 180}, // 5 = one intra neighbour - {25, 29, 50, 192, 64, 192, 128, 180, 180}, // 6 = two intra neighbours -}; + [INTER_MODE_CONTEXTS][INTER_COMPOUND_MODES - 1] = { + { 2, 173, 68, 192, 64, 192, 128, 180, 180 }, // 0 = both zero mv + { 7, 145, 160, 192, 64, 192, 128, 180, 180 }, // 1 = 1 zero + 1 predicted + { 7, 166, 126, 192, 64, 192, 128, 180, 180 }, // 2 = two predicted mvs + { 7, 94, 132, 192, 64, 192, 128, 180, 180 }, // 3 = 1 pred/zero, 1 new + { 8, 64, 64, 192, 64, 192, 128, 180, 180 }, // 4 = two new mvs + { 17, 81, 52, 192, 64, 192, 128, 180, 180 }, // 5 = one intra neighbour + { 25, 29, 50, 192, 64, 192, 128, 180, 180 }, // 6 = two intra neighbours + }; static const vpx_prob default_interintra_prob[BLOCK_SIZE_GROUPS] = { 208, 208, 208, 208, @@ -274,11 +291,11 @@ static const vpx_prob default_interintra_prob[BLOCK_SIZE_GROUPS] = { static const vpx_prob default_interintra_mode_prob[BLOCK_SIZE_GROUPS][INTERINTRA_MODES - 1] = { - { 65, 32, 18, 144, 162, 194, 41, 51, 98 }, // block_size < 8x8 - { 132, 68, 18, 165, 217, 196, 45, 40, 78 }, // block_size < 16x16 - { 173, 80, 19, 176, 240, 193, 64, 35, 46 }, // block_size < 32x32 - { 221, 135, 38, 194, 248, 121, 96, 85, 29 } // block_size >= 32x32 -}; + { 65, 32, 18, 144, 162, 194, 41, 51, 98 }, // block_size < 8x8 + { 132, 68, 18, 165, 217, 196, 45, 40, 78 }, // block_size < 16x16 + { 173, 80, 19, 176, 240, 193, 64, 35, 46 }, // block_size < 32x32 + { 221, 135, 38, 194, 248, 121, 96, 85, 29 } // block_size >= 32x32 + }; static const vpx_prob default_wedge_interintra_prob[BLOCK_SIZES] = { 208, 208, 208, 208, 208, 208, 216, 216, 216, 224, 224, 224, 240, @@ -300,79 +317,71 @@ static const vpx_prob default_wedge_interinter_prob[BLOCK_SIZES] = { const vpx_tree_index vp10_motvar_tree[TREE_SIZE(MOTION_VARIATIONS)] = { -SIMPLE_TRANSLATION, -OBMC_CAUSAL }; -static -const vpx_prob default_motvar_prob[BLOCK_SIZES][MOTION_VARIATIONS - 1] = { - {255}, - {255}, {255}, {151}, - {153}, {144}, {178}, - {165}, {160}, {207}, - {195}, {168}, {244}, +static const vpx_prob default_motvar_prob[BLOCK_SIZES][MOTION_VARIATIONS - 1] = + { + { 255 }, { 255 }, { 255 }, { 151 }, { 153 }, { 144 }, { 178 }, + { 165 }, { 160 }, { 207 }, { 195 }, { 168 }, { 244 }, #if CONFIG_EXT_PARTITION - {252}, {252}, {252}, + { 252 }, { 252 }, { 252 }, #endif // CONFIG_EXT_PARTITION -}; + }; #elif !CONFIG_OBMC && CONFIG_WARPED_MOTION const vpx_tree_index vp10_motvar_tree[TREE_SIZE(MOTION_VARIATIONS)] = { -SIMPLE_TRANSLATION, -WARPED_CAUSAL }; -static -const vpx_prob default_motvar_prob[BLOCK_SIZES][MOTION_VARIATIONS - 1] = { - {255}, - {255}, {255}, {151}, - {153}, {144}, {178}, - {165}, {160}, {207}, - {195}, {168}, {244}, +static const vpx_prob default_motvar_prob[BLOCK_SIZES][MOTION_VARIATIONS - 1] = + { + { 255 }, { 255 }, { 255 }, { 151 }, { 153 }, { 144 }, { 178 }, + { 165 }, { 160 }, { 207 }, { 195 }, { 168 }, { 244 }, #if CONFIG_EXT_PARTITION - {252}, {252}, {252}, + { 252 }, { 252 }, { 252 }, #endif // CONFIG_EXT_PARTITION -}; + }; #elif CONFIG_OBMC && CONFIG_WARPED_MOTION const vpx_tree_index vp10_motvar_tree[TREE_SIZE(MOTION_VARIATIONS)] = { - -SIMPLE_TRANSLATION, 2, - -OBMC_CAUSAL, -WARPED_CAUSAL, + -SIMPLE_TRANSLATION, 2, -OBMC_CAUSAL, -WARPED_CAUSAL, }; -static -const vpx_prob default_motvar_prob[BLOCK_SIZES][MOTION_VARIATIONS - 1] = { - {255, 200}, - {255, 200}, {255, 200}, {151, 200}, - {153, 200}, {144, 200}, {178, 200}, - {165, 200}, {160, 200}, {207, 200}, - {195, 200}, {168, 200}, {244, 200}, +static const vpx_prob default_motvar_prob[BLOCK_SIZES][MOTION_VARIATIONS - 1] = + { + { 255, 200 }, { 255, 200 }, { 255, 200 }, { 151, 200 }, { 153, 200 }, + { 144, 200 }, { 178, 200 }, { 165, 200 }, { 160, 200 }, { 207, 200 }, + { 195, 200 }, { 168, 200 }, { 244, 200 }, #if CONFIG_EXT_PARTITION - {252, 200}, {252, 200}, {252, 200}, + { 252, 200 }, { 252, 200 }, { 252, 200 }, #endif // CONFIG_EXT_PARTITION -}; + }; #endif // CONFIG_OBMC || !CONFIG_WARPED_MOTION /* Array indices are identical to previously-existing INTRAMODECONTEXTNODES. */ const vpx_tree_index vp10_intra_mode_tree[TREE_SIZE(INTRA_MODES)] = { - -DC_PRED, 2, /* 0 = DC_NODE */ - -TM_PRED, 4, /* 1 = TM_NODE */ - -V_PRED, 6, /* 2 = V_NODE */ - 8, 12, /* 3 = COM_NODE */ - -H_PRED, 10, /* 4 = H_NODE */ - -D135_PRED, -D117_PRED, /* 5 = D135_NODE */ - -D45_PRED, 14, /* 6 = D45_NODE */ - -D63_PRED, 16, /* 7 = D63_NODE */ - -D153_PRED, -D207_PRED /* 8 = D153_NODE */ + -DC_PRED, 2, /* 0 = DC_NODE */ + -TM_PRED, 4, /* 1 = TM_NODE */ + -V_PRED, 6, /* 2 = V_NODE */ + 8, 12, /* 3 = COM_NODE */ + -H_PRED, 10, /* 4 = H_NODE */ + -D135_PRED, -D117_PRED, /* 5 = D135_NODE */ + -D45_PRED, 14, /* 6 = D45_NODE */ + -D63_PRED, 16, /* 7 = D63_NODE */ + -D153_PRED, -D207_PRED /* 8 = D153_NODE */ }; const vpx_tree_index vp10_inter_mode_tree[TREE_SIZE(INTER_MODES)] = { - -INTER_OFFSET(ZEROMV), 2, + -INTER_OFFSET(ZEROMV), 2, -INTER_OFFSET(NEARESTMV), 4, #if CONFIG_EXT_INTER - -INTER_OFFSET(NEARMV), 6, - -INTER_OFFSET(NEWMV), -INTER_OFFSET(NEWFROMNEARMV) + -INTER_OFFSET(NEARMV), 6, + -INTER_OFFSET(NEWMV), -INTER_OFFSET(NEWFROMNEARMV) #else - -INTER_OFFSET(NEARMV), -INTER_OFFSET(NEWMV) + -INTER_OFFSET(NEARMV), -INTER_OFFSET(NEWMV) #endif // CONFIG_EXT_INTER }; #if CONFIG_EXT_INTER +/* clang-format off */ const vpx_tree_index vp10_interintra_mode_tree[TREE_SIZE(INTERINTRA_MODES)] = { -II_DC_PRED, 2, /* 0 = II_DC_NODE */ -II_TM_PRED, 4, /* 1 = II_TM_NODE */ @@ -386,7 +395,7 @@ const vpx_tree_index vp10_interintra_mode_tree[TREE_SIZE(INTERINTRA_MODES)] = { }; const vpx_tree_index vp10_inter_compound_mode_tree - [TREE_SIZE(INTER_COMPOUND_MODES)] = { + [TREE_SIZE(INTER_COMPOUND_MODES)] = { -INTER_COMPOUND_OFFSET(ZERO_ZEROMV), 2, -INTER_COMPOUND_OFFSET(NEAREST_NEARESTMV), 4, 6, -INTER_COMPOUND_OFFSET(NEW_NEWMV), @@ -398,15 +407,15 @@ const vpx_tree_index vp10_inter_compound_mode_tree -INTER_COMPOUND_OFFSET(NEAREST_NEWMV), -INTER_COMPOUND_OFFSET(NEW_NEARESTMV), -INTER_COMPOUND_OFFSET(NEAR_NEWMV), -INTER_COMPOUND_OFFSET(NEW_NEARMV) }; +/* clang-format on */ #endif // CONFIG_EXT_INTER const vpx_tree_index vp10_partition_tree[TREE_SIZE(PARTITION_TYPES)] = { - -PARTITION_NONE, 2, - -PARTITION_HORZ, 4, - -PARTITION_VERT, -PARTITION_SPLIT + -PARTITION_NONE, 2, -PARTITION_HORZ, 4, -PARTITION_VERT, -PARTITION_SPLIT }; #if CONFIG_EXT_PARTITION_TYPES +/* clang-format off */ const vpx_tree_index vp10_ext_partition_tree[TREE_SIZE(EXT_PARTITION_TYPES)] = { -PARTITION_NONE, 2, 6, 4, @@ -416,6 +425,7 @@ const vpx_tree_index vp10_ext_partition_tree[TREE_SIZE(EXT_PARTITION_TYPES)] = { -PARTITION_HORZ_A, -PARTITION_HORZ_B, -PARTITION_VERT_A, -PARTITION_VERT_B }; +/* clang-format on */ #endif // CONFIG_EXT_PARTITION_TYPES static const vpx_prob default_intra_inter_p[INTRA_INTER_CONTEXTS] = { @@ -423,15 +433,14 @@ static const vpx_prob default_intra_inter_p[INTRA_INTER_CONTEXTS] = { }; static const vpx_prob default_comp_inter_p[COMP_INTER_CONTEXTS] = { - 239, 183, 119, 96, 41 + 239, 183, 119, 96, 41 }; - #if CONFIG_EXT_REFS static const vpx_prob default_comp_ref_p[REF_CONTEXTS][FWD_REFS - 1] = { // TODO(zoeliu): To adjust the initial prob values. - { 33, 16, 16 }, - { 77, 74, 74 }, + { 33, 16, 16 }, + { 77, 74, 74 }, { 142, 142, 142 }, { 172, 170, 170 }, { 238, 247, 247 } @@ -447,431 +456,411 @@ static const vpx_prob default_comp_ref_p[REF_CONTEXTS][COMP_REFS - 1] = { static const vpx_prob default_single_ref_p[REF_CONTEXTS][SINGLE_REFS - 1] = { #if CONFIG_EXT_REFS - { 33, 16, 16, 16, 16 }, - { 77, 74, 74, 74, 74 }, + { 33, 16, 16, 16, 16 }, + { 77, 74, 74, 74, 74 }, { 142, 142, 142, 142, 142 }, { 172, 170, 170, 170, 170 }, { 238, 247, 247, 247, 247 } #else - { 33, 16 }, - { 77, 74 }, - { 142, 142 }, - { 172, 170 }, - { 238, 247 } + { 33, 16 }, { 77, 74 }, { 142, 142 }, { 172, 170 }, { 238, 247 } #endif // CONFIG_EXT_REFS }; const vpx_tree_index vp10_palette_size_tree[TREE_SIZE(PALETTE_SIZES)] = { - -TWO_COLORS, 2, - -THREE_COLORS, 4, - -FOUR_COLORS, 6, - -FIVE_COLORS, 8, - -SIX_COLORS, 10, - -SEVEN_COLORS, -EIGHT_COLORS, + -TWO_COLORS, 2, -THREE_COLORS, 4, -FOUR_COLORS, 6, + -FIVE_COLORS, 8, -SIX_COLORS, 10, -SEVEN_COLORS, -EIGHT_COLORS, }; // TODO(huisu): tune these probs const vpx_prob -vp10_default_palette_y_size_prob[PALETTE_BLOCK_SIZES][PALETTE_SIZES - 1] = { - { 96, 89, 100, 64, 77, 130}, - { 22, 15, 44, 16, 34, 82}, - { 30, 19, 57, 18, 38, 86}, - { 94, 36, 104, 23, 43, 92}, - { 116, 76, 107, 46, 65, 105}, - { 112, 82, 94, 40, 70, 112}, - { 147, 124, 123, 58, 69, 103}, - { 180, 113, 136, 49, 45, 114}, - { 107, 70, 87, 49, 154, 156}, - { 98, 105, 142, 63, 64, 152}, + vp10_default_palette_y_size_prob[PALETTE_BLOCK_SIZES][PALETTE_SIZES - 1] = { + { 96, 89, 100, 64, 77, 130 }, { 22, 15, 44, 16, 34, 82 }, + { 30, 19, 57, 18, 38, 86 }, { 94, 36, 104, 23, 43, 92 }, + { 116, 76, 107, 46, 65, 105 }, { 112, 82, 94, 40, 70, 112 }, + { 147, 124, 123, 58, 69, 103 }, { 180, 113, 136, 49, 45, 114 }, + { 107, 70, 87, 49, 154, 156 }, { 98, 105, 142, 63, 64, 152 }, #if CONFIG_EXT_PARTITION - { 98, 105, 142, 63, 64, 152}, - { 98, 105, 142, 63, 64, 152}, - { 98, 105, 142, 63, 64, 152}, + { 98, 105, 142, 63, 64, 152 }, { 98, 105, 142, 63, 64, 152 }, + { 98, 105, 142, 63, 64, 152 }, #endif // CONFIG_EXT_PARTITION -}; + }; const vpx_prob -vp10_default_palette_uv_size_prob[PALETTE_BLOCK_SIZES][PALETTE_SIZES - 1] = { - { 160, 196, 228, 213, 175, 230}, - { 87, 148, 208, 141, 166, 163}, - { 72, 151, 204, 139, 155, 161}, - { 78, 135, 171, 104, 120, 173}, - { 59, 92, 131, 78, 92, 142}, - { 75, 118, 149, 84, 90, 128}, - { 89, 87, 92, 66, 66, 128}, - { 67, 53, 54, 55, 66, 93}, - { 120, 130, 83, 171, 75, 214}, - { 72, 55, 66, 68, 79, 107}, + vp10_default_palette_uv_size_prob[PALETTE_BLOCK_SIZES][PALETTE_SIZES - 1] = + { + { 160, 196, 228, 213, 175, 230 }, { 87, 148, 208, 141, 166, 163 }, + { 72, 151, 204, 139, 155, 161 }, { 78, 135, 171, 104, 120, 173 }, + { 59, 92, 131, 78, 92, 142 }, { 75, 118, 149, 84, 90, 128 }, + { 89, 87, 92, 66, 66, 128 }, { 67, 53, 54, 55, 66, 93 }, + { 120, 130, 83, 171, 75, 214 }, { 72, 55, 66, 68, 79, 107 }, #if CONFIG_EXT_PARTITION - { 72, 55, 66, 68, 79, 107}, - { 72, 55, 66, 68, 79, 107}, - { 72, 55, 66, 68, 79, 107}, + { 72, 55, 66, 68, 79, 107 }, { 72, 55, 66, 68, 79, 107 }, + { 72, 55, 66, 68, 79, 107 }, #endif // CONFIG_EXT_PARTITION -}; - -const vpx_prob -vp10_default_palette_y_mode_prob[PALETTE_BLOCK_SIZES][PALETTE_Y_MODE_CONTEXTS] - = { - { 240, 180, 100, }, - { 240, 180, 100, }, - { 240, 180, 100, }, - { 240, 180, 100, }, - { 240, 180, 100, }, - { 240, 180, 100, }, - { 240, 180, 100, }, - { 240, 180, 100, }, - { 240, 180, 100, }, - { 240, 180, 100, }, + }; + +const vpx_prob vp10_default_palette_y_mode_prob + [PALETTE_BLOCK_SIZES][PALETTE_Y_MODE_CONTEXTS] = { + { 240, 180, 100 }, { 240, 180, 100 }, { 240, 180, 100 }, + { 240, 180, 100 }, { 240, 180, 100 }, { 240, 180, 100 }, + { 240, 180, 100 }, { 240, 180, 100 }, { 240, 180, 100 }, + { 240, 180, 100 }, #if CONFIG_EXT_PARTITION - { 240, 180, 100, }, - { 240, 180, 100, }, - { 240, 180, 100, }, + { 240, 180, 100 }, { 240, 180, 100 }, { 240, 180, 100 }, #endif // CONFIG_EXT_PARTITION -}; - + }; -const vpx_prob vp10_default_palette_uv_mode_prob[2] = { - 253, 229 -}; +const vpx_prob vp10_default_palette_uv_mode_prob[2] = { 253, 229 }; const vpx_tree_index -vp10_palette_color_tree[PALETTE_MAX_SIZE - 1][TREE_SIZE(PALETTE_COLORS)] = { - { // 2 colors - -PALETTE_COLOR_ONE, -PALETTE_COLOR_TWO, - }, - { // 3 colors - -PALETTE_COLOR_ONE, 2, - -PALETTE_COLOR_TWO, -PALETTE_COLOR_THREE, - }, - { // 4 colors - -PALETTE_COLOR_ONE, 2, - -PALETTE_COLOR_TWO, 4, - -PALETTE_COLOR_THREE, -PALETTE_COLOR_FOUR, - }, - { // 5 colors - -PALETTE_COLOR_ONE, 2, - -PALETTE_COLOR_TWO, 4, - -PALETTE_COLOR_THREE, 6, - -PALETTE_COLOR_FOUR, -PALETTE_COLOR_FIVE, - }, - { // 6 colors - -PALETTE_COLOR_ONE, 2, - -PALETTE_COLOR_TWO, 4, - -PALETTE_COLOR_THREE, 6, - -PALETTE_COLOR_FOUR, 8, - -PALETTE_COLOR_FIVE, -PALETTE_COLOR_SIX, - }, - { // 7 colors - -PALETTE_COLOR_ONE, 2, - -PALETTE_COLOR_TWO, 4, - -PALETTE_COLOR_THREE, 6, - -PALETTE_COLOR_FOUR, 8, - -PALETTE_COLOR_FIVE, 10, - -PALETTE_COLOR_SIX, -PALETTE_COLOR_SEVEN, - }, - { // 8 colors - -PALETTE_COLOR_ONE, 2, - -PALETTE_COLOR_TWO, 4, - -PALETTE_COLOR_THREE, 6, - -PALETTE_COLOR_FOUR, 8, - -PALETTE_COLOR_FIVE, 10, - -PALETTE_COLOR_SIX, 12, - -PALETTE_COLOR_SEVEN, -PALETTE_COLOR_EIGHT, - }, -}; + vp10_palette_color_tree[PALETTE_MAX_SIZE - 1][TREE_SIZE(PALETTE_COLORS)] = { + { // 2 colors + -PALETTE_COLOR_ONE, -PALETTE_COLOR_TWO }, + { // 3 colors + -PALETTE_COLOR_ONE, 2, -PALETTE_COLOR_TWO, -PALETTE_COLOR_THREE }, + { // 4 colors + -PALETTE_COLOR_ONE, 2, -PALETTE_COLOR_TWO, 4, -PALETTE_COLOR_THREE, + -PALETTE_COLOR_FOUR }, + { // 5 colors + -PALETTE_COLOR_ONE, 2, -PALETTE_COLOR_TWO, 4, -PALETTE_COLOR_THREE, 6, + -PALETTE_COLOR_FOUR, -PALETTE_COLOR_FIVE }, + { // 6 colors + -PALETTE_COLOR_ONE, 2, -PALETTE_COLOR_TWO, 4, -PALETTE_COLOR_THREE, 6, + -PALETTE_COLOR_FOUR, 8, -PALETTE_COLOR_FIVE, -PALETTE_COLOR_SIX }, + { // 7 colors + -PALETTE_COLOR_ONE, 2, -PALETTE_COLOR_TWO, 4, -PALETTE_COLOR_THREE, 6, + -PALETTE_COLOR_FOUR, 8, -PALETTE_COLOR_FIVE, 10, -PALETTE_COLOR_SIX, + -PALETTE_COLOR_SEVEN }, + { // 8 colors + -PALETTE_COLOR_ONE, 2, -PALETTE_COLOR_TWO, 4, -PALETTE_COLOR_THREE, 6, + -PALETTE_COLOR_FOUR, 8, -PALETTE_COLOR_FIVE, 10, -PALETTE_COLOR_SIX, 12, + -PALETTE_COLOR_SEVEN, -PALETTE_COLOR_EIGHT }, + }; const vpx_prob vp10_default_palette_y_color_prob -[PALETTE_MAX_SIZE - 1][PALETTE_COLOR_CONTEXTS][PALETTE_COLORS - 1] = { - { // 2 colors - { 230, 255, 128, 128, 128, 128, 128 }, - { 214, 255, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128 }, - { 240, 255, 128, 128, 128, 128, 128 }, - { 73, 255, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128 }, - { 130, 255, 128, 128, 128, 128, 128 }, - { 227, 255, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128 }, - { 188, 255, 128, 128, 128, 128, 128 }, - { 75, 255, 128, 128, 128, 128, 128 }, - { 250, 255, 128, 128, 128, 128, 128 }, - { 223, 255, 128, 128, 128, 128, 128 }, - { 252, 255, 128, 128, 128, 128, 128 }, - }, { // 3 colors - { 229, 137, 255, 128, 128, 128, 128 }, - { 197, 120, 255, 128, 128, 128, 128 }, - { 107, 195, 255, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128 }, - { 27, 151, 255, 128, 128, 128, 128 }, - { 230, 130, 255, 128, 128, 128, 128 }, - { 37, 230, 255, 128, 128, 128, 128 }, - { 67, 221, 255, 128, 128, 128, 128 }, - { 124, 230, 255, 128, 128, 128, 128 }, - { 195, 109, 255, 128, 128, 128, 128 }, - { 99, 122, 255, 128, 128, 128, 128 }, - { 205, 208, 255, 128, 128, 128, 128 }, - { 40, 235, 255, 128, 128, 128, 128 }, - { 251, 132, 255, 128, 128, 128, 128 }, - { 237, 186, 255, 128, 128, 128, 128 }, - { 253, 112, 255, 128, 128, 128, 128 }, - }, { // 4 colors - { 195, 87, 128, 255, 128, 128, 128 }, - { 143, 100, 123, 255, 128, 128, 128 }, - { 94, 124, 119, 255, 128, 128, 128 }, - { 77, 91, 130, 255, 128, 128, 128 }, - { 39, 114, 178, 255, 128, 128, 128 }, - { 222, 94, 125, 255, 128, 128, 128 }, - { 44, 203, 132, 255, 128, 128, 128 }, - { 68, 175, 122, 255, 128, 128, 128 }, - { 110, 187, 124, 255, 128, 128, 128 }, - { 152, 91, 128, 255, 128, 128, 128 }, - { 70, 109, 181, 255, 128, 128, 128 }, - { 133, 113, 164, 255, 128, 128, 128 }, - { 47, 205, 133, 255, 128, 128, 128 }, - { 247, 94, 136, 255, 128, 128, 128 }, - { 205, 122, 146, 255, 128, 128, 128 }, - { 251, 100, 141, 255, 128, 128, 128 }, - }, { // 5 colors - { 195, 65, 84, 125, 255, 128, 128 }, - { 150, 76, 84, 121, 255, 128, 128 }, - { 94, 110, 81, 117, 255, 128, 128 }, - { 79, 85, 91, 139, 255, 128, 128 }, - { 26, 102, 139, 127, 255, 128, 128 }, - { 220, 73, 91, 119, 255, 128, 128 }, - { 38, 203, 86, 127, 255, 128, 128 }, - { 61, 186, 72, 124, 255, 128, 128 }, - { 132, 199, 84, 128, 255, 128, 128 }, - { 172, 52, 62, 120, 255, 128, 128 }, - { 102, 89, 121, 122, 255, 128, 128 }, - { 182, 48, 69, 186, 255, 128, 128 }, - { 36, 206, 87, 126, 255, 128, 128 }, - { 249, 55, 67, 122, 255, 128, 128 }, - { 218, 88, 75, 122, 255, 128, 128 }, - { 253, 64, 80, 119, 255, 128, 128 }, - }, { // 6 colors - { 182, 54, 64, 75, 118, 255, 128 }, - { 126, 67, 70, 76, 116, 255, 128 }, - { 79, 92, 67, 85, 120, 255, 128 }, - { 63, 61, 81, 118, 132, 255, 128 }, - { 21, 80, 105, 83, 119, 255, 128 }, - { 215, 72, 74, 74, 111, 255, 128 }, - { 50, 176, 63, 79, 120, 255, 128 }, - { 72, 148, 66, 77, 120, 255, 128 }, - { 105, 177, 57, 78, 130, 255, 128 }, - { 150, 66, 66, 80, 127, 255, 128 }, - { 81, 76, 109, 85, 116, 255, 128 }, - { 113, 81, 62, 96, 148, 255, 128 }, - { 54, 179, 69, 82, 121, 255, 128 }, - { 244, 47, 48, 67, 118, 255, 128 }, - { 198, 83, 53, 65, 121, 255, 128 }, - { 250, 42, 51, 69, 110, 255, 128 }, - }, { // 7 colors - { 182, 45, 54, 62, 74, 113, 255 }, - { 124, 63, 57, 62, 77, 114, 255 }, - { 77, 80, 56, 66, 76, 117, 255 }, - { 63, 57, 69, 98, 85, 131, 255 }, - { 19, 81, 98, 63, 80, 116, 255 }, - { 215, 56, 60, 63, 68, 105, 255 }, - { 50, 174, 50, 60, 79, 118, 255 }, - { 68, 151, 50, 58, 73, 117, 255 }, - { 104, 182, 53, 57, 79, 127, 255 }, - { 156, 50, 51, 63, 77, 111, 255 }, - { 88, 67, 97, 59, 82, 120, 255 }, - { 114, 81, 46, 65, 103, 132, 255 }, - { 55, 166, 57, 66, 82, 120, 255 }, - { 245, 34, 38, 43, 63, 114, 255 }, - { 203, 68, 45, 47, 60, 118, 255 }, - { 250, 35, 37, 47, 66, 110, 255 }, - }, { // 8 colors - { 180, 43, 46, 50, 56, 69, 109 }, - { 116, 53, 51, 49, 57, 73, 115 }, - { 79, 70, 49, 50, 59, 74, 117 }, - { 60, 54, 57, 70, 62, 83, 129 }, - { 20, 73, 85, 52, 66, 81, 119 }, - { 213, 56, 52, 49, 53, 62, 104 }, - { 48, 161, 41, 45, 56, 77, 116 }, - { 68, 139, 40, 47, 54, 71, 116 }, - { 123, 166, 42, 43, 52, 76, 130 }, - { 153, 44, 44, 47, 54, 79, 129 }, - { 87, 64, 83, 49, 60, 75, 127 }, - { 131, 68, 43, 48, 73, 96, 130 }, - { 55, 152, 45, 51, 64, 77, 113 }, - { 243, 30, 28, 33, 41, 65, 114 }, - { 202, 56, 35, 36, 42, 63, 123 }, - { 249, 31, 29, 32, 45, 68, 111 }, - } -}; + [PALETTE_MAX_SIZE - 1][PALETTE_COLOR_CONTEXTS][PALETTE_COLORS - 1] = { + { + // 2 colors + { 230, 255, 128, 128, 128, 128, 128 }, + { 214, 255, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128 }, + { 240, 255, 128, 128, 128, 128, 128 }, + { 73, 255, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128 }, + { 130, 255, 128, 128, 128, 128, 128 }, + { 227, 255, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128 }, + { 188, 255, 128, 128, 128, 128, 128 }, + { 75, 255, 128, 128, 128, 128, 128 }, + { 250, 255, 128, 128, 128, 128, 128 }, + { 223, 255, 128, 128, 128, 128, 128 }, + { 252, 255, 128, 128, 128, 128, 128 }, + }, + { + // 3 colors + { 229, 137, 255, 128, 128, 128, 128 }, + { 197, 120, 255, 128, 128, 128, 128 }, + { 107, 195, 255, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128 }, + { 27, 151, 255, 128, 128, 128, 128 }, + { 230, 130, 255, 128, 128, 128, 128 }, + { 37, 230, 255, 128, 128, 128, 128 }, + { 67, 221, 255, 128, 128, 128, 128 }, + { 124, 230, 255, 128, 128, 128, 128 }, + { 195, 109, 255, 128, 128, 128, 128 }, + { 99, 122, 255, 128, 128, 128, 128 }, + { 205, 208, 255, 128, 128, 128, 128 }, + { 40, 235, 255, 128, 128, 128, 128 }, + { 251, 132, 255, 128, 128, 128, 128 }, + { 237, 186, 255, 128, 128, 128, 128 }, + { 253, 112, 255, 128, 128, 128, 128 }, + }, + { + // 4 colors + { 195, 87, 128, 255, 128, 128, 128 }, + { 143, 100, 123, 255, 128, 128, 128 }, + { 94, 124, 119, 255, 128, 128, 128 }, + { 77, 91, 130, 255, 128, 128, 128 }, + { 39, 114, 178, 255, 128, 128, 128 }, + { 222, 94, 125, 255, 128, 128, 128 }, + { 44, 203, 132, 255, 128, 128, 128 }, + { 68, 175, 122, 255, 128, 128, 128 }, + { 110, 187, 124, 255, 128, 128, 128 }, + { 152, 91, 128, 255, 128, 128, 128 }, + { 70, 109, 181, 255, 128, 128, 128 }, + { 133, 113, 164, 255, 128, 128, 128 }, + { 47, 205, 133, 255, 128, 128, 128 }, + { 247, 94, 136, 255, 128, 128, 128 }, + { 205, 122, 146, 255, 128, 128, 128 }, + { 251, 100, 141, 255, 128, 128, 128 }, + }, + { + // 5 colors + { 195, 65, 84, 125, 255, 128, 128 }, + { 150, 76, 84, 121, 255, 128, 128 }, + { 94, 110, 81, 117, 255, 128, 128 }, + { 79, 85, 91, 139, 255, 128, 128 }, + { 26, 102, 139, 127, 255, 128, 128 }, + { 220, 73, 91, 119, 255, 128, 128 }, + { 38, 203, 86, 127, 255, 128, 128 }, + { 61, 186, 72, 124, 255, 128, 128 }, + { 132, 199, 84, 128, 255, 128, 128 }, + { 172, 52, 62, 120, 255, 128, 128 }, + { 102, 89, 121, 122, 255, 128, 128 }, + { 182, 48, 69, 186, 255, 128, 128 }, + { 36, 206, 87, 126, 255, 128, 128 }, + { 249, 55, 67, 122, 255, 128, 128 }, + { 218, 88, 75, 122, 255, 128, 128 }, + { 253, 64, 80, 119, 255, 128, 128 }, + }, + { + // 6 colors + { 182, 54, 64, 75, 118, 255, 128 }, + { 126, 67, 70, 76, 116, 255, 128 }, + { 79, 92, 67, 85, 120, 255, 128 }, + { 63, 61, 81, 118, 132, 255, 128 }, + { 21, 80, 105, 83, 119, 255, 128 }, + { 215, 72, 74, 74, 111, 255, 128 }, + { 50, 176, 63, 79, 120, 255, 128 }, + { 72, 148, 66, 77, 120, 255, 128 }, + { 105, 177, 57, 78, 130, 255, 128 }, + { 150, 66, 66, 80, 127, 255, 128 }, + { 81, 76, 109, 85, 116, 255, 128 }, + { 113, 81, 62, 96, 148, 255, 128 }, + { 54, 179, 69, 82, 121, 255, 128 }, + { 244, 47, 48, 67, 118, 255, 128 }, + { 198, 83, 53, 65, 121, 255, 128 }, + { 250, 42, 51, 69, 110, 255, 128 }, + }, + { + // 7 colors + { 182, 45, 54, 62, 74, 113, 255 }, + { 124, 63, 57, 62, 77, 114, 255 }, + { 77, 80, 56, 66, 76, 117, 255 }, + { 63, 57, 69, 98, 85, 131, 255 }, + { 19, 81, 98, 63, 80, 116, 255 }, + { 215, 56, 60, 63, 68, 105, 255 }, + { 50, 174, 50, 60, 79, 118, 255 }, + { 68, 151, 50, 58, 73, 117, 255 }, + { 104, 182, 53, 57, 79, 127, 255 }, + { 156, 50, 51, 63, 77, 111, 255 }, + { 88, 67, 97, 59, 82, 120, 255 }, + { 114, 81, 46, 65, 103, 132, 255 }, + { 55, 166, 57, 66, 82, 120, 255 }, + { 245, 34, 38, 43, 63, 114, 255 }, + { 203, 68, 45, 47, 60, 118, 255 }, + { 250, 35, 37, 47, 66, 110, 255 }, + }, + { + // 8 colors + { 180, 43, 46, 50, 56, 69, 109 }, + { 116, 53, 51, 49, 57, 73, 115 }, + { 79, 70, 49, 50, 59, 74, 117 }, + { 60, 54, 57, 70, 62, 83, 129 }, + { 20, 73, 85, 52, 66, 81, 119 }, + { 213, 56, 52, 49, 53, 62, 104 }, + { 48, 161, 41, 45, 56, 77, 116 }, + { 68, 139, 40, 47, 54, 71, 116 }, + { 123, 166, 42, 43, 52, 76, 130 }, + { 153, 44, 44, 47, 54, 79, 129 }, + { 87, 64, 83, 49, 60, 75, 127 }, + { 131, 68, 43, 48, 73, 96, 130 }, + { 55, 152, 45, 51, 64, 77, 113 }, + { 243, 30, 28, 33, 41, 65, 114 }, + { 202, 56, 35, 36, 42, 63, 123 }, + { 249, 31, 29, 32, 45, 68, 111 }, + } + }; const vpx_prob vp10_default_palette_uv_color_prob -[PALETTE_MAX_SIZE - 1][PALETTE_COLOR_CONTEXTS][PALETTE_COLORS - 1] = { - { // 2 colors - { 228, 255, 128, 128, 128, 128, 128 }, - { 195, 255, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128 }, - { 228, 255, 128, 128, 128, 128, 128 }, - { 71, 255, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128 }, - { 129, 255, 128, 128, 128, 128, 128 }, - { 206, 255, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128 }, - { 136, 255, 128, 128, 128, 128, 128 }, - { 98, 255, 128, 128, 128, 128, 128 }, - { 236, 255, 128, 128, 128, 128, 128 }, - { 222, 255, 128, 128, 128, 128, 128 }, - { 249, 255, 128, 128, 128, 128, 128 }, - }, { // 3 colors - { 198, 136, 255, 128, 128, 128, 128 }, - { 178, 105, 255, 128, 128, 128, 128 }, - { 100, 206, 255, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128 }, - { 12, 136, 255, 128, 128, 128, 128 }, - { 219, 134, 255, 128, 128, 128, 128 }, - { 50, 198, 255, 128, 128, 128, 128 }, - { 61, 231, 255, 128, 128, 128, 128 }, - { 110, 209, 255, 128, 128, 128, 128 }, - { 173, 106, 255, 128, 128, 128, 128 }, - { 145, 166, 255, 128, 128, 128, 128 }, - { 156, 175, 255, 128, 128, 128, 128 }, - { 69, 183, 255, 128, 128, 128, 128 }, - { 241, 163, 255, 128, 128, 128, 128 }, - { 224, 160, 255, 128, 128, 128, 128 }, - { 246, 154, 255, 128, 128, 128, 128 }, - }, { // 4 colors - { 173, 88, 143, 255, 128, 128, 128 }, - { 146, 81, 127, 255, 128, 128, 128 }, - { 84, 134, 102, 255, 128, 128, 128 }, - { 69, 138, 140, 255, 128, 128, 128 }, - { 31, 103, 200, 255, 128, 128, 128 }, - { 217, 101, 139, 255, 128, 128, 128 }, - { 51, 174, 121, 255, 128, 128, 128 }, - { 64, 177, 109, 255, 128, 128, 128 }, - { 96, 179, 145, 255, 128, 128, 128 }, - { 164, 77, 114, 255, 128, 128, 128 }, - { 87, 94, 156, 255, 128, 128, 128 }, - { 105, 57, 173, 255, 128, 128, 128 }, - { 63, 158, 137, 255, 128, 128, 128 }, - { 236, 102, 156, 255, 128, 128, 128 }, - { 197, 115, 153, 255, 128, 128, 128 }, - { 245, 106, 154, 255, 128, 128, 128 }, - }, { // 5 colors - { 179, 64, 97, 129, 255, 128, 128 }, - { 137, 56, 88, 125, 255, 128, 128 }, - { 82, 107, 61, 118, 255, 128, 128 }, - { 59, 113, 86, 115, 255, 128, 128 }, - { 23, 88, 118, 130, 255, 128, 128 }, - { 213, 66, 90, 125, 255, 128, 128 }, - { 37, 181, 103, 121, 255, 128, 128 }, - { 47, 188, 61, 131, 255, 128, 128 }, - { 104, 185, 103, 144, 255, 128, 128 }, - { 163, 39, 76, 112, 255, 128, 128 }, - { 94, 74, 131, 126, 255, 128, 128 }, - { 142, 42, 103, 163, 255, 128, 128 }, - { 53, 162, 99, 149, 255, 128, 128 }, - { 239, 54, 84, 108, 255, 128, 128 }, - { 203, 84, 110, 147, 255, 128, 128 }, - { 248, 70, 105, 151, 255, 128, 128 }, - }, { // 6 colors - { 189, 50, 67, 90, 130, 255, 128 }, - { 114, 50, 55, 90, 123, 255, 128 }, - { 66, 76, 54, 82, 128, 255, 128 }, - { 43, 69, 69, 80, 129, 255, 128 }, - { 22, 59, 87, 88, 141, 255, 128 }, - { 203, 49, 68, 87, 122, 255, 128 }, - { 43, 157, 74, 104, 146, 255, 128 }, - { 54, 138, 51, 95, 138, 255, 128 }, - { 82, 171, 58, 102, 146, 255, 128 }, - { 129, 38, 59, 64, 168, 255, 128 }, - { 56, 67, 119, 92, 112, 255, 128 }, - { 96, 62, 53, 132, 82, 255, 128 }, - { 60, 147, 77, 108, 145, 255, 128 }, - { 238, 76, 73, 93, 148, 255, 128 }, - { 189, 86, 73, 103, 157, 255, 128 }, - { 246, 62, 75, 83, 167, 255, 128 }, - }, { // 7 colors - { 179, 42, 51, 73, 99, 134, 255 }, - { 119, 52, 52, 61, 64, 114, 255 }, - { 53, 77, 35, 65, 71, 131, 255 }, - { 38, 70, 51, 68, 89, 144, 255 }, - { 23, 65, 128, 73, 97, 131, 255 }, - { 210, 47, 52, 63, 81, 143, 255 }, - { 42, 159, 57, 68, 98, 143, 255 }, - { 49, 153, 45, 82, 93, 143, 255 }, - { 81, 169, 52, 72, 113, 151, 255 }, - { 136, 46, 35, 56, 75, 96, 255 }, - { 57, 84, 109, 47, 107, 131, 255 }, - { 128, 78, 57, 36, 128, 85, 255 }, - { 54, 149, 68, 77, 94, 153, 255 }, - { 243, 58, 50, 71, 81, 167, 255 }, - { 189, 92, 64, 70, 121, 173, 255 }, - { 248, 35, 38, 51, 82, 201, 255 }, - }, { // 8 colors - { 201, 40, 36, 42, 64, 92, 123 }, - { 116, 43, 33, 43, 73, 102, 128 }, - { 46, 77, 37, 69, 62, 78, 150 }, - { 40, 65, 52, 50, 76, 89, 133 }, - { 28, 48, 91, 17, 64, 77, 133 }, - { 218, 43, 43, 37, 56, 72, 163 }, - { 41, 155, 44, 83, 82, 129, 180 }, - { 44, 141, 29, 55, 64, 89, 147 }, - { 92, 166, 48, 45, 59, 126, 179 }, - { 169, 35, 49, 41, 36, 99, 139 }, - { 55, 77, 77, 56, 60, 75, 156 }, - { 155, 81, 51, 64, 57, 182, 255 }, - { 60, 134, 49, 49, 93, 128, 174 }, - { 244, 98, 51, 46, 22, 73, 238 }, - { 189, 70, 40, 87, 93, 79, 201 }, - { 248, 54, 49, 40, 29, 42, 227 }, - } -}; + [PALETTE_MAX_SIZE - 1][PALETTE_COLOR_CONTEXTS][PALETTE_COLORS - 1] = { + { + // 2 colors + { 228, 255, 128, 128, 128, 128, 128 }, + { 195, 255, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128 }, + { 228, 255, 128, 128, 128, 128, 128 }, + { 71, 255, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128 }, + { 129, 255, 128, 128, 128, 128, 128 }, + { 206, 255, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128 }, + { 136, 255, 128, 128, 128, 128, 128 }, + { 98, 255, 128, 128, 128, 128, 128 }, + { 236, 255, 128, 128, 128, 128, 128 }, + { 222, 255, 128, 128, 128, 128, 128 }, + { 249, 255, 128, 128, 128, 128, 128 }, + }, + { + // 3 colors + { 198, 136, 255, 128, 128, 128, 128 }, + { 178, 105, 255, 128, 128, 128, 128 }, + { 100, 206, 255, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128 }, + { 12, 136, 255, 128, 128, 128, 128 }, + { 219, 134, 255, 128, 128, 128, 128 }, + { 50, 198, 255, 128, 128, 128, 128 }, + { 61, 231, 255, 128, 128, 128, 128 }, + { 110, 209, 255, 128, 128, 128, 128 }, + { 173, 106, 255, 128, 128, 128, 128 }, + { 145, 166, 255, 128, 128, 128, 128 }, + { 156, 175, 255, 128, 128, 128, 128 }, + { 69, 183, 255, 128, 128, 128, 128 }, + { 241, 163, 255, 128, 128, 128, 128 }, + { 224, 160, 255, 128, 128, 128, 128 }, + { 246, 154, 255, 128, 128, 128, 128 }, + }, + { + // 4 colors + { 173, 88, 143, 255, 128, 128, 128 }, + { 146, 81, 127, 255, 128, 128, 128 }, + { 84, 134, 102, 255, 128, 128, 128 }, + { 69, 138, 140, 255, 128, 128, 128 }, + { 31, 103, 200, 255, 128, 128, 128 }, + { 217, 101, 139, 255, 128, 128, 128 }, + { 51, 174, 121, 255, 128, 128, 128 }, + { 64, 177, 109, 255, 128, 128, 128 }, + { 96, 179, 145, 255, 128, 128, 128 }, + { 164, 77, 114, 255, 128, 128, 128 }, + { 87, 94, 156, 255, 128, 128, 128 }, + { 105, 57, 173, 255, 128, 128, 128 }, + { 63, 158, 137, 255, 128, 128, 128 }, + { 236, 102, 156, 255, 128, 128, 128 }, + { 197, 115, 153, 255, 128, 128, 128 }, + { 245, 106, 154, 255, 128, 128, 128 }, + }, + { + // 5 colors + { 179, 64, 97, 129, 255, 128, 128 }, + { 137, 56, 88, 125, 255, 128, 128 }, + { 82, 107, 61, 118, 255, 128, 128 }, + { 59, 113, 86, 115, 255, 128, 128 }, + { 23, 88, 118, 130, 255, 128, 128 }, + { 213, 66, 90, 125, 255, 128, 128 }, + { 37, 181, 103, 121, 255, 128, 128 }, + { 47, 188, 61, 131, 255, 128, 128 }, + { 104, 185, 103, 144, 255, 128, 128 }, + { 163, 39, 76, 112, 255, 128, 128 }, + { 94, 74, 131, 126, 255, 128, 128 }, + { 142, 42, 103, 163, 255, 128, 128 }, + { 53, 162, 99, 149, 255, 128, 128 }, + { 239, 54, 84, 108, 255, 128, 128 }, + { 203, 84, 110, 147, 255, 128, 128 }, + { 248, 70, 105, 151, 255, 128, 128 }, + }, + { + // 6 colors + { 189, 50, 67, 90, 130, 255, 128 }, + { 114, 50, 55, 90, 123, 255, 128 }, + { 66, 76, 54, 82, 128, 255, 128 }, + { 43, 69, 69, 80, 129, 255, 128 }, + { 22, 59, 87, 88, 141, 255, 128 }, + { 203, 49, 68, 87, 122, 255, 128 }, + { 43, 157, 74, 104, 146, 255, 128 }, + { 54, 138, 51, 95, 138, 255, 128 }, + { 82, 171, 58, 102, 146, 255, 128 }, + { 129, 38, 59, 64, 168, 255, 128 }, + { 56, 67, 119, 92, 112, 255, 128 }, + { 96, 62, 53, 132, 82, 255, 128 }, + { 60, 147, 77, 108, 145, 255, 128 }, + { 238, 76, 73, 93, 148, 255, 128 }, + { 189, 86, 73, 103, 157, 255, 128 }, + { 246, 62, 75, 83, 167, 255, 128 }, + }, + { + // 7 colors + { 179, 42, 51, 73, 99, 134, 255 }, + { 119, 52, 52, 61, 64, 114, 255 }, + { 53, 77, 35, 65, 71, 131, 255 }, + { 38, 70, 51, 68, 89, 144, 255 }, + { 23, 65, 128, 73, 97, 131, 255 }, + { 210, 47, 52, 63, 81, 143, 255 }, + { 42, 159, 57, 68, 98, 143, 255 }, + { 49, 153, 45, 82, 93, 143, 255 }, + { 81, 169, 52, 72, 113, 151, 255 }, + { 136, 46, 35, 56, 75, 96, 255 }, + { 57, 84, 109, 47, 107, 131, 255 }, + { 128, 78, 57, 36, 128, 85, 255 }, + { 54, 149, 68, 77, 94, 153, 255 }, + { 243, 58, 50, 71, 81, 167, 255 }, + { 189, 92, 64, 70, 121, 173, 255 }, + { 248, 35, 38, 51, 82, 201, 255 }, + }, + { + // 8 colors + { 201, 40, 36, 42, 64, 92, 123 }, + { 116, 43, 33, 43, 73, 102, 128 }, + { 46, 77, 37, 69, 62, 78, 150 }, + { 40, 65, 52, 50, 76, 89, 133 }, + { 28, 48, 91, 17, 64, 77, 133 }, + { 218, 43, 43, 37, 56, 72, 163 }, + { 41, 155, 44, 83, 82, 129, 180 }, + { 44, 141, 29, 55, 64, 89, 147 }, + { 92, 166, 48, 45, 59, 126, 179 }, + { 169, 35, 49, 41, 36, 99, 139 }, + { 55, 77, 77, 56, 60, 75, 156 }, + { 155, 81, 51, 64, 57, 182, 255 }, + { 60, 134, 49, 49, 93, 128, 174 }, + { 244, 98, 51, 46, 22, 73, 238 }, + { 189, 70, 40, 87, 93, 79, 201 }, + { 248, 54, 49, 40, 29, 42, 227 }, + } + }; static const int palette_color_context_lookup[PALETTE_COLOR_CONTEXTS] = { - // (3, 0, 0, 0), (3, 2, 0, 0), (3, 3, 2, 0), (3, 3, 2, 2), - 3993, 4235, 4378, 4380, - // (4, 3, 3, 0), (5, 0, 0, 0), (5, 3, 0, 0), (5, 3, 2, 0), - 5720, 6655, 7018, 7040, - // (5, 5, 0, 0), (6, 2, 0, 0), (6, 2, 2, 0), (6, 4, 0, 0), - 7260, 8228, 8250, 8470, - // (7, 3, 0, 0), (8, 0, 0, 0), (8, 2, 0, 0), (10, 0, 0, 0) - 9680, 10648, 10890, 13310 + // (3, 0, 0, 0), (3, 2, 0, 0), (3, 3, 2, 0), (3, 3, 2, 2), + 3993, 4235, 4378, 4380, + // (4, 3, 3, 0), (5, 0, 0, 0), (5, 3, 0, 0), (5, 3, 2, 0), + 5720, 6655, 7018, 7040, + // (5, 5, 0, 0), (6, 2, 0, 0), (6, 2, 2, 0), (6, 4, 0, 0), + 7260, 8228, 8250, 8470, + // (7, 3, 0, 0), (8, 0, 0, 0), (8, 2, 0, 0), (10, 0, 0, 0) + 9680, 10648, 10890, 13310 }; const vpx_tree_index vp10_tx_size_tree[TX_SIZES - 1][TREE_SIZE(TX_SIZES)] = { - { // Max tx_size is 8X8 - -TX_4X4, -TX_8X8, - }, - { // Max tx_size is 16X16 - -TX_4X4, 2, - -TX_8X8, -TX_16X16, - }, - { // Max tx_size is 32X32 - -TX_4X4, 2, - -TX_8X8, 4, - -TX_16X16, -TX_32X32, - }, + { + // Max tx_size is 8X8 + -TX_4X4, -TX_8X8, + }, + { + // Max tx_size is 16X16 + -TX_4X4, 2, -TX_8X8, -TX_16X16, + }, + { + // Max tx_size is 32X32 + -TX_4X4, 2, -TX_8X8, 4, -TX_16X16, -TX_32X32, + }, }; static const vpx_prob -default_tx_size_prob[TX_SIZES - 1][TX_SIZE_CONTEXTS][TX_SIZES - 1] = { - { // Max tx_size is 8X8 - { 100, }, { 66, }, - }, - { // Max tx_size is 16X16 - { 20, 152, }, { 15, 101, }, - }, - { // Max tx_size is 32X32 - { 3, 136, 37 }, { 5, 52, 13 }, - }, -}; - -int vp10_get_palette_color_context(const uint8_t *color_map, int cols, - int r, int c, int n, int *color_order) { + default_tx_size_prob[TX_SIZES - 1][TX_SIZE_CONTEXTS][TX_SIZES - 1] = { + { + // Max tx_size is 8X8 + { 100 }, + { 66 }, + }, + { + // Max tx_size is 16X16 + { 20, 152 }, + { 15, 101 }, + }, + { + // Max tx_size is 32X32 + { 3, 136, 37 }, + { 5, 52, 13 }, + }, + }; + +int vp10_get_palette_color_context(const uint8_t *color_map, int cols, int r, + int c, int n, int *color_order) { int i, j, max, max_idx, temp; int scores[PALETTE_MAX_SIZE + 10]; - int weights[4] = {3, 2, 3, 2}; + int weights[4] = { 3, 2, 3, 2 }; int color_ctx = 0; int color_neighbors[4]; @@ -894,12 +883,10 @@ int vp10_get_palette_color_context(const uint8_t *color_map, int cols, else color_neighbors[3] = -1; - for (i = 0; i < PALETTE_MAX_SIZE; ++i) - color_order[i] = i; + for (i = 0; i < PALETTE_MAX_SIZE; ++i) color_order[i] = i; memset(scores, 0, PALETTE_MAX_SIZE * sizeof(scores[0])); for (i = 0; i < 4; ++i) { - if (color_neighbors[i] >= 0) - scores[color_neighbors[i]] += weights[i]; + if (color_neighbors[i] >= 0) scores[color_neighbors[i]] += weights[i]; } for (i = 0; i < 4; ++i) { @@ -925,8 +912,7 @@ int vp10_get_palette_color_context(const uint8_t *color_map, int cols, } } - for (i = 0; i < 4; ++i) - color_ctx = color_ctx * 11 + scores[i]; + for (i = 0; i < 4; ++i) color_ctx = color_ctx * 11 + scores[i]; for (i = 0; i < PALETTE_COLOR_CONTEXTS; ++i) if (color_ctx == palette_color_context_lookup[i]) { @@ -934,98 +920,61 @@ int vp10_get_palette_color_context(const uint8_t *color_map, int cols, break; } - if (color_ctx >= PALETTE_COLOR_CONTEXTS) - color_ctx = 0; + if (color_ctx >= PALETTE_COLOR_CONTEXTS) color_ctx = 0; return color_ctx; } #if CONFIG_VAR_TX static const vpx_prob default_txfm_partition_probs[TXFM_PARTITION_CONTEXTS] = { - 192, 128, 64, 192, 128, 64, 192, 128, 64, + 192, 128, 64, 192, 128, 64, 192, 128, 64, }; #endif -static const vpx_prob default_skip_probs[SKIP_CONTEXTS] = { - 192, 128, 64 -}; +static const vpx_prob default_skip_probs[SKIP_CONTEXTS] = { 192, 128, 64 }; #if CONFIG_EXT_INTERP -static const vpx_prob default_switchable_interp_prob[SWITCHABLE_FILTER_CONTEXTS] - [SWITCHABLE_FILTERS - 1] = { +static const vpx_prob default_switchable_interp_prob + [SWITCHABLE_FILTER_CONTEXTS][SWITCHABLE_FILTERS - 1] = { #if CONFIG_DUAL_FILTER - { 235, 192, 128, 128}, - { 36, 243, 208, 128}, - { 34, 16, 128, 128}, - { 36, 243, 48, 128}, - { 34, 16, 128, 128}, - { 149, 160, 128, 128}, - - { 235, 192, 128, 128}, - { 36, 243, 208, 128}, - { 34, 16, 128, 128}, - { 36, 243, 48, 128}, - { 34, 16, 128, 128}, - { 149, 160, 128, 128}, - - { 235, 192, 128, 128}, - { 36, 243, 208, 128}, - { 34, 16, 128, 128}, - { 36, 243, 48, 128}, - { 34, 16, 128, 128}, - { 149, 160, 128, 128}, - - { 235, 192, 128, 128}, - { 36, 243, 208, 128}, - { 34, 16, 128, 128}, - { 36, 243, 48, 128}, - { 34, 16, 128, 128}, - { 149, 160, 128, 128}, + { 235, 192, 128, 128 }, { 36, 243, 208, 128 }, { 34, 16, 128, 128 }, + { 36, 243, 48, 128 }, { 34, 16, 128, 128 }, { 149, 160, 128, 128 }, + + { 235, 192, 128, 128 }, { 36, 243, 208, 128 }, { 34, 16, 128, 128 }, + { 36, 243, 48, 128 }, { 34, 16, 128, 128 }, { 149, 160, 128, 128 }, + + { 235, 192, 128, 128 }, { 36, 243, 208, 128 }, { 34, 16, 128, 128 }, + { 36, 243, 48, 128 }, { 34, 16, 128, 128 }, { 149, 160, 128, 128 }, + + { 235, 192, 128, 128 }, { 36, 243, 208, 128 }, { 34, 16, 128, 128 }, + { 36, 243, 48, 128 }, { 34, 16, 128, 128 }, { 149, 160, 128, 128 }, #else - { 235, 192, 128, 128}, - { 36, 243, 208, 128}, - { 34, 16, 128, 128}, - { 36, 243, 48, 128}, - { 34, 16, 128, 128}, - { 149, 160, 128, 128}, + { 235, 192, 128, 128 }, { 36, 243, 208, 128 }, { 34, 16, 128, 128 }, + { 36, 243, 48, 128 }, { 34, 16, 128, 128 }, { 149, 160, 128, 128 }, #endif -}; + }; #else // CONFIG_EXT_INTERP #if CONFIG_DUAL_FILTER -static const vpx_prob default_switchable_interp_prob[SWITCHABLE_FILTER_CONTEXTS] - [SWITCHABLE_FILTERS - 1] = { - { 235, 162, }, - { 36, 255, }, - { 34, 3, }, - { 149, 144, }, - - { 235, 162, }, - { 36, 255, }, - { 34, 3, }, - { 10, 3, }, - - { 235, 162, }, - { 36, 255, }, - { 34, 3, }, - { 149, 144, }, - - { 235, 162, }, - { 36, 255, }, - { 34, 3, }, - { 10, 3, }, -}; +static const vpx_prob default_switchable_interp_prob + [SWITCHABLE_FILTER_CONTEXTS][SWITCHABLE_FILTERS - 1] = { + { 235, 162 }, { 36, 255 }, { 34, 3 }, { 149, 144 }, + + { 235, 162 }, { 36, 255 }, { 34, 3 }, { 10, 3 }, + + { 235, 162 }, { 36, 255 }, { 34, 3 }, { 149, 144 }, + + { 235, 162 }, { 36, 255 }, { 34, 3 }, { 10, 3 }, + }; #else -static const vpx_prob default_switchable_interp_prob[SWITCHABLE_FILTER_CONTEXTS] - [SWITCHABLE_FILTERS - 1] = { - { 235, 162, }, - { 36, 255, }, - { 34, 3, }, - { 149, 144, }, -}; +static const vpx_prob default_switchable_interp_prob + [SWITCHABLE_FILTER_CONTEXTS][SWITCHABLE_FILTERS - 1] = { + { 235, 162 }, { 36, 255 }, { 34, 3 }, { 149, 144 }, + }; #endif #endif // CONFIG_EXT_INTERP #if CONFIG_EXT_TX +/* clang-format off */ const vpx_tree_index vp10_ext_tx_inter_tree[EXT_TX_SETS_INTER] [TREE_SIZE(TX_TYPES)] = { { // ToDo(yaowu): remove used entry 0. @@ -1081,207 +1030,256 @@ const vpx_tree_index vp10_ext_tx_intra_tree[EXT_TX_SETS_INTRA] -ADST_DCT, -DCT_ADST, } }; +/* clang-format on */ static const vpx_prob -default_inter_ext_tx_prob[EXT_TX_SETS_INTER][EXT_TX_SIZES][TX_TYPES - 1] = { - { // ToDo(yaowu): remove unused entry 0. - { 0 }, - { 0 }, - { 0 }, + default_inter_ext_tx_prob[EXT_TX_SETS_INTER][EXT_TX_SIZES][TX_TYPES - 1] = { + { + // ToDo(yaowu): remove unused entry 0. + { 0 }, + { 0 }, + { 0 }, #if EXT_TX_SIZES == 4 - { 0 }, + { 0 }, #endif - }, { - { 10, 24, 30, 128, 128, 128, 128, 112, 160, 128, 128, 128, 128, 128, 128}, - { 10, 24, 30, 128, 128, 128, 128, 112, 160, 128, 128, 128, 128, 128, 128}, - { 10, 24, 30, 128, 128, 128, 128, 112, 160, 128, 128, 128, 128, 128, 128}, + }, + { + { 10, 24, 30, 128, 128, 128, 128, 112, 160, 128, 128, 128, 128, 128, + 128 }, + { 10, 24, 30, 128, 128, 128, 128, 112, 160, 128, 128, 128, 128, 128, + 128 }, + { 10, 24, 30, 128, 128, 128, 128, 112, 160, 128, 128, 128, 128, 128, + 128 }, #if EXT_TX_SIZES == 4 - { 10, 24, 30, 128, 128, 128, 128, 112, 160, 128, 128, 128, 128, 128, 128}, + { 10, 24, 30, 128, 128, 128, 128, 112, 160, 128, 128, 128, 128, 128, + 128 }, #endif - }, { - { 10, 30, 128, 112, 160, 128, 128, 128, 128, 128, 128 }, - { 10, 30, 128, 112, 160, 128, 128, 128, 128, 128, 128 }, - { 10, 30, 128, 112, 160, 128, 128, 128, 128, 128, 128 }, + }, + { + { 10, 30, 128, 112, 160, 128, 128, 128, 128, 128, 128 }, + { 10, 30, 128, 112, 160, 128, 128, 128, 128, 128, 128 }, + { 10, 30, 128, 112, 160, 128, 128, 128, 128, 128, 128 }, #if EXT_TX_SIZES == 4 - { 10, 30, 128, 112, 160, 128, 128, 128, 128, 128, 128 }, + { 10, 30, 128, 112, 160, 128, 128, 128, 128, 128, 128 }, #endif - }, { - { 12, }, - { 12, }, - { 12, }, + }, + { + { 12 }, + { 12 }, + { 12 }, #if EXT_TX_SIZES == 4 - { 12, }, + { 12 }, #endif - } -}; - -static const vpx_prob -default_intra_ext_tx_prob[EXT_TX_SETS_INTRA][EXT_TX_SIZES] - [INTRA_MODES][TX_TYPES - 1] = { - { // ToDo(yaowu): remove unused entry 0. - { - { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, - }, { - { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, - }, { - { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, + } + }; + +static const vpx_prob default_intra_ext_tx_prob + [EXT_TX_SETS_INTRA][EXT_TX_SIZES][INTRA_MODES][TX_TYPES - 1] = { + { + // ToDo(yaowu): remove unused entry 0. + { + { 0 }, + { 0 }, + { 0 }, + { 0 }, + { 0 }, + { 0 }, + { 0 }, + { 0 }, + { 0 }, + { 0 }, + }, + { + { 0 }, + { 0 }, + { 0 }, + { 0 }, + { 0 }, + { 0 }, + { 0 }, + { 0 }, + { 0 }, + { 0 }, + }, + { + { 0 }, + { 0 }, + { 0 }, + { 0 }, + { 0 }, + { 0 }, + { 0 }, + { 0 }, + { 0 }, + { 0 }, #if EXT_TX_SIZES == 4 - }, { - { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, + }, + { + { 0 }, + { 0 }, + { 0 }, + { 0 }, + { 0 }, + { 0 }, + { 0 }, + { 0 }, + { 0 }, + { 0 }, #endif - }, - }, { - { - { 8, 224, 32, 128, 64, 128, }, - { 10, 32, 32, 128, 16, 192, }, - { 10, 32, 32, 128, 16, 64, }, - { 9, 200, 32, 128, 64, 128, }, - { 8, 8, 32, 128, 224, 128, }, - { 10, 32, 32, 128, 16, 192, }, - { 10, 32, 32, 128, 16, 64, }, - { 10, 23, 32, 128, 80, 176, }, - { 10, 23, 32, 128, 80, 176, }, - { 10, 32, 32, 128, 16, 64, }, - }, { - { 8, 224, 32, 128, 64, 128, }, - { 10, 32, 32, 128, 16, 192, }, - { 10, 32, 32, 128, 16, 64, }, - { 9, 200, 32, 128, 64, 128, }, - { 8, 8, 32, 128, 224, 128, }, - { 10, 32, 32, 128, 16, 192, }, - { 10, 32, 32, 128, 16, 64, }, - { 10, 23, 32, 128, 80, 176, }, - { 10, 23, 32, 128, 80, 176, }, - { 10, 32, 32, 128, 16, 64, }, - }, { - { 8, 224, 32, 128, 64, 128, }, - { 10, 32, 32, 128, 16, 192, }, - { 10, 32, 32, 128, 16, 64, }, - { 9, 200, 32, 128, 64, 128, }, - { 8, 8, 32, 128, 224, 128, }, - { 10, 32, 32, 128, 16, 192, }, - { 10, 32, 32, 128, 16, 64, }, - { 10, 23, 32, 128, 80, 176, }, - { 10, 23, 32, 128, 80, 176, }, - { 10, 32, 32, 128, 16, 64, }, + }, + }, + { + { + { 8, 224, 32, 128, 64, 128 }, + { 10, 32, 32, 128, 16, 192 }, + { 10, 32, 32, 128, 16, 64 }, + { 9, 200, 32, 128, 64, 128 }, + { 8, 8, 32, 128, 224, 128 }, + { 10, 32, 32, 128, 16, 192 }, + { 10, 32, 32, 128, 16, 64 }, + { 10, 23, 32, 128, 80, 176 }, + { 10, 23, 32, 128, 80, 176 }, + { 10, 32, 32, 128, 16, 64 }, + }, + { + { 8, 224, 32, 128, 64, 128 }, + { 10, 32, 32, 128, 16, 192 }, + { 10, 32, 32, 128, 16, 64 }, + { 9, 200, 32, 128, 64, 128 }, + { 8, 8, 32, 128, 224, 128 }, + { 10, 32, 32, 128, 16, 192 }, + { 10, 32, 32, 128, 16, 64 }, + { 10, 23, 32, 128, 80, 176 }, + { 10, 23, 32, 128, 80, 176 }, + { 10, 32, 32, 128, 16, 64 }, + }, + { + { 8, 224, 32, 128, 64, 128 }, + { 10, 32, 32, 128, 16, 192 }, + { 10, 32, 32, 128, 16, 64 }, + { 9, 200, 32, 128, 64, 128 }, + { 8, 8, 32, 128, 224, 128 }, + { 10, 32, 32, 128, 16, 192 }, + { 10, 32, 32, 128, 16, 64 }, + { 10, 23, 32, 128, 80, 176 }, + { 10, 23, 32, 128, 80, 176 }, + { 10, 32, 32, 128, 16, 64 }, #if EXT_TX_SIZES == 4 - }, { - { 8, 224, 32, 128, 64, 128, }, - { 10, 32, 32, 128, 16, 192, }, - { 10, 32, 32, 128, 16, 64, }, - { 9, 200, 32, 128, 64, 128, }, - { 8, 8, 32, 128, 224, 128, }, - { 10, 32, 32, 128, 16, 192, }, - { 10, 32, 32, 128, 16, 64, }, - { 10, 23, 32, 128, 80, 176, }, - { 10, 23, 32, 128, 80, 176, }, - { 10, 32, 32, 128, 16, 64, }, + }, + { + { 8, 224, 32, 128, 64, 128 }, + { 10, 32, 32, 128, 16, 192 }, + { 10, 32, 32, 128, 16, 64 }, + { 9, 200, 32, 128, 64, 128 }, + { 8, 8, 32, 128, 224, 128 }, + { 10, 32, 32, 128, 16, 192 }, + { 10, 32, 32, 128, 16, 64 }, + { 10, 23, 32, 128, 80, 176 }, + { 10, 23, 32, 128, 80, 176 }, + { 10, 32, 32, 128, 16, 64 }, #endif - }, - }, { - { - { 8, 224, 64, 128, }, - { 10, 32, 16, 192, }, - { 10, 32, 16, 64, }, - { 9, 200, 64, 128, }, - { 8, 8, 224, 128, }, - { 10, 32, 16, 192, }, - { 10, 32, 16, 64, }, - { 10, 23, 80, 176, }, - { 10, 23, 80, 176, }, - { 10, 32, 16, 64, }, - }, { - { 8, 224, 64, 128, }, - { 10, 32, 16, 192, }, - { 10, 32, 16, 64, }, - { 9, 200, 64, 128, }, - { 8, 8, 224, 128, }, - { 10, 32, 16, 192, }, - { 10, 32, 16, 64, }, - { 10, 23, 80, 176, }, - { 10, 23, 80, 176, }, - { 10, 32, 16, 64, }, - }, { - { 8, 224, 64, 128, }, - { 10, 32, 16, 192, }, - { 10, 32, 16, 64, }, - { 9, 200, 64, 128, }, - { 8, 8, 224, 128, }, - { 10, 32, 16, 192, }, - { 10, 32, 16, 64, }, - { 10, 23, 80, 176, }, - { 10, 23, 80, 176, }, - { 10, 32, 16, 64, }, + }, + }, + { + { + { 8, 224, 64, 128 }, + { 10, 32, 16, 192 }, + { 10, 32, 16, 64 }, + { 9, 200, 64, 128 }, + { 8, 8, 224, 128 }, + { 10, 32, 16, 192 }, + { 10, 32, 16, 64 }, + { 10, 23, 80, 176 }, + { 10, 23, 80, 176 }, + { 10, 32, 16, 64 }, + }, + { + { 8, 224, 64, 128 }, + { 10, 32, 16, 192 }, + { 10, 32, 16, 64 }, + { 9, 200, 64, 128 }, + { 8, 8, 224, 128 }, + { 10, 32, 16, 192 }, + { 10, 32, 16, 64 }, + { 10, 23, 80, 176 }, + { 10, 23, 80, 176 }, + { 10, 32, 16, 64 }, + }, + { + { 8, 224, 64, 128 }, + { 10, 32, 16, 192 }, + { 10, 32, 16, 64 }, + { 9, 200, 64, 128 }, + { 8, 8, 224, 128 }, + { 10, 32, 16, 192 }, + { 10, 32, 16, 64 }, + { 10, 23, 80, 176 }, + { 10, 23, 80, 176 }, + { 10, 32, 16, 64 }, #if EXT_TX_SIZES == 4 - }, { - { 8, 224, 64, 128, }, - { 10, 32, 16, 192, }, - { 10, 32, 16, 64, }, - { 9, 200, 64, 128, }, - { 8, 8, 224, 128, }, - { 10, 32, 16, 192, }, - { 10, 32, 16, 64, }, - { 10, 23, 80, 176, }, - { 10, 23, 80, 176, }, - { 10, 32, 16, 64, }, + }, + { + { 8, 224, 64, 128 }, + { 10, 32, 16, 192 }, + { 10, 32, 16, 64 }, + { 9, 200, 64, 128 }, + { 8, 8, 224, 128 }, + { 10, 32, 16, 192 }, + { 10, 32, 16, 64 }, + { 10, 23, 80, 176 }, + { 10, 23, 80, 176 }, + { 10, 32, 16, 64 }, #endif - }, - }, -}; + }, + }, + }; #else +/* clang-format off */ const vpx_tree_index vp10_ext_tx_tree[TREE_SIZE(TX_TYPES)] = { -DCT_DCT, 2, -ADST_ADST, 4, -ADST_DCT, -DCT_ADST }; +/* clang-format on */ -static const vpx_prob default_intra_ext_tx_prob[EXT_TX_SIZES] - [TX_TYPES][TX_TYPES - 1] = { - {{240, 85, 128}, {4, 1, 248}, {4, 1, 8}, {4, 248, 128}}, - {{244, 85, 128}, {8, 2, 248}, {8, 2, 8}, {8, 248, 128}}, - {{248, 85, 128}, {16, 4, 248}, {16, 4, 8}, {16, 248, 128}}, -}; - -static const vpx_prob default_inter_ext_tx_prob[EXT_TX_SIZES] - [TX_TYPES - 1] = { - {160, 85, 128}, - {176, 85, 128}, - {192, 85, 128}, +static const vpx_prob + default_intra_ext_tx_prob[EXT_TX_SIZES][TX_TYPES][TX_TYPES - 1] = { + { { 240, 85, 128 }, { 4, 1, 248 }, { 4, 1, 8 }, { 4, 248, 128 } }, + { { 244, 85, 128 }, { 8, 2, 248 }, { 8, 2, 8 }, { 8, 248, 128 } }, + { { 248, 85, 128 }, { 16, 4, 248 }, { 16, 4, 8 }, { 16, 248, 128 } }, + }; + +static const vpx_prob default_inter_ext_tx_prob[EXT_TX_SIZES][TX_TYPES - 1] = { + { 160, 85, 128 }, { 176, 85, 128 }, { 192, 85, 128 }, }; #endif // CONFIG_EXT_TX #if CONFIG_EXT_INTRA static const vpx_prob -default_intra_filter_probs[INTRA_FILTERS + 1][INTRA_FILTERS - 1] = { - { 98, 63, 60, }, - { 98, 82, 80, }, - { 94, 65, 103, }, - { 49, 25, 24, }, - { 72, 38, 50, }, -}; -static const vpx_prob default_ext_intra_probs[2] = {230, 230}; + default_intra_filter_probs[INTRA_FILTERS + 1][INTRA_FILTERS - 1] = { + { 98, 63, 60 }, { 98, 82, 80 }, { 94, 65, 103 }, + { 49, 25, 24 }, { 72, 38, 50 }, + }; +static const vpx_prob default_ext_intra_probs[2] = { 230, 230 }; const vpx_tree_index vp10_intra_filter_tree[TREE_SIZE(INTRA_FILTERS)] = { - -INTRA_FILTER_LINEAR, 2, - -INTRA_FILTER_8TAP, 4, - -INTRA_FILTER_8TAP_SHARP, -INTRA_FILTER_8TAP_SMOOTH, + -INTRA_FILTER_LINEAR, 2, -INTRA_FILTER_8TAP, 4, -INTRA_FILTER_8TAP_SHARP, + -INTRA_FILTER_8TAP_SMOOTH, }; #endif // CONFIG_EXT_INTRA #if CONFIG_SUPERTX -static const vpx_prob default_supertx_prob[PARTITION_SUPERTX_CONTEXTS] - [TX_SIZES] = { - { 1, 160, 160, 170 }, - { 1, 200, 200, 210 }, -}; +static const vpx_prob + default_supertx_prob[PARTITION_SUPERTX_CONTEXTS][TX_SIZES] = { + { 1, 160, 160, 170 }, { 1, 200, 200, 210 }, + }; #endif // CONFIG_SUPERTX // FIXME(someone) need real defaults here static const struct segmentation_probs default_seg_probs = { - { 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128 }, { 128, 128, 128 }, }; static void init_mode_probs(FRAME_CONTEXT *fc) { @@ -1335,19 +1333,21 @@ static void init_mode_probs(FRAME_CONTEXT *fc) { } #if CONFIG_EXT_INTERP -const vpx_tree_index vp10_switchable_interp_tree -[TREE_SIZE(SWITCHABLE_FILTERS)] = { - -EIGHTTAP_REGULAR, 2, - 4, 6, - -EIGHTTAP_SMOOTH, -EIGHTTAP_SMOOTH2, - -MULTITAP_SHARP, -MULTITAP_SHARP2, -}; +const vpx_tree_index + vp10_switchable_interp_tree[TREE_SIZE(SWITCHABLE_FILTERS)] = { + -EIGHTTAP_REGULAR, + 2, + 4, + 6, + -EIGHTTAP_SMOOTH, + -EIGHTTAP_SMOOTH2, + -MULTITAP_SHARP, + -MULTITAP_SHARP2, + }; #else -const vpx_tree_index vp10_switchable_interp_tree -[TREE_SIZE(SWITCHABLE_FILTERS)] = { - -EIGHTTAP_REGULAR, 2, - -EIGHTTAP_SMOOTH, -MULTITAP_SHARP -}; +const vpx_tree_index vp10_switchable_interp_tree[TREE_SIZE( + SWITCHABLE_FILTERS)] = { -EIGHTTAP_REGULAR, 2, -EIGHTTAP_SMOOTH, + -MULTITAP_SHARP }; #endif // CONFIG_EXT_INTERP void vp10_adapt_inter_frame_probs(VP10_COMMON *cm) { @@ -1366,8 +1366,8 @@ void vp10_adapt_inter_frame_probs(VP10_COMMON *cm) { #if CONFIG_EXT_REFS for (i = 0; i < REF_CONTEXTS; i++) for (j = 0; j < (FWD_REFS - 1); j++) - fc->comp_ref_prob[i][j] = mode_mv_merge_probs( - pre_fc->comp_ref_prob[i][j], counts->comp_ref[i][j]); + fc->comp_ref_prob[i][j] = mode_mv_merge_probs(pre_fc->comp_ref_prob[i][j], + counts->comp_ref[i][j]); for (i = 0; i < REF_CONTEXTS; i++) for (j = 0; j < (BWD_REFS - 1); j++) fc->comp_bwdref_prob[i][j] = mode_mv_merge_probs( @@ -1375,8 +1375,8 @@ void vp10_adapt_inter_frame_probs(VP10_COMMON *cm) { #else for (i = 0; i < REF_CONTEXTS; i++) for (j = 0; j < (COMP_REFS - 1); j++) - fc->comp_ref_prob[i][j] = mode_mv_merge_probs( - pre_fc->comp_ref_prob[i][j], counts->comp_ref[i][j]); + fc->comp_ref_prob[i][j] = mode_mv_merge_probs(pre_fc->comp_ref_prob[i][j], + counts->comp_ref[i][j]); #endif // CONFIG_EXT_REFS for (i = 0; i < REF_CONTEXTS; i++) @@ -1386,21 +1386,21 @@ void vp10_adapt_inter_frame_probs(VP10_COMMON *cm) { #if CONFIG_REF_MV for (i = 0; i < NEWMV_MODE_CONTEXTS; ++i) - fc->newmv_prob[i] = vp10_mode_mv_merge_probs(pre_fc->newmv_prob[i], - counts->newmv_mode[i]); + fc->newmv_prob[i] = + vp10_mode_mv_merge_probs(pre_fc->newmv_prob[i], counts->newmv_mode[i]); for (i = 0; i < ZEROMV_MODE_CONTEXTS; ++i) fc->zeromv_prob[i] = vp10_mode_mv_merge_probs(pre_fc->zeromv_prob[i], counts->zeromv_mode[i]); for (i = 0; i < REFMV_MODE_CONTEXTS; ++i) - fc->refmv_prob[i] = vp10_mode_mv_merge_probs(pre_fc->refmv_prob[i], - counts->refmv_mode[i]); + fc->refmv_prob[i] = + vp10_mode_mv_merge_probs(pre_fc->refmv_prob[i], counts->refmv_mode[i]); for (i = 0; i < DRL_MODE_CONTEXTS; ++i) - fc->drl_prob[i] = vp10_mode_mv_merge_probs(pre_fc->drl_prob[i], - counts->drl_mode[i]); + fc->drl_prob[i] = + vp10_mode_mv_merge_probs(pre_fc->drl_prob[i], counts->drl_mode[i]); #if CONFIG_EXT_INTER - fc->new2mv_prob = vp10_mode_mv_merge_probs(pre_fc->new2mv_prob, - counts->new2mv_mode); + fc->new2mv_prob = + vp10_mode_mv_merge_probs(pre_fc->new2mv_prob, counts->new2mv_mode); #endif // CONFIG_EXT_INTER #else for (i = 0; i < INTER_MODE_CONTEXTS; i++) @@ -1426,10 +1426,9 @@ void vp10_adapt_inter_frame_probs(VP10_COMMON *cm) { #if CONFIG_EXT_INTER for (i = 0; i < INTER_MODE_CONTEXTS; i++) - vpx_tree_merge_probs(vp10_inter_compound_mode_tree, - pre_fc->inter_compound_mode_probs[i], - counts->inter_compound_mode[i], - fc->inter_compound_mode_probs[i]); + vpx_tree_merge_probs( + vp10_inter_compound_mode_tree, pre_fc->inter_compound_mode_probs[i], + counts->inter_compound_mode[i], fc->inter_compound_mode_probs[i]); for (i = 0; i < BLOCK_SIZE_GROUPS; ++i) { if (is_interintra_allowed_bsize_group(i)) fc->interintra_prob[i] = vp10_mode_mv_merge_probs( @@ -1454,14 +1453,13 @@ void vp10_adapt_inter_frame_probs(VP10_COMMON *cm) { for (i = 0; i < BLOCK_SIZE_GROUPS; i++) vpx_tree_merge_probs(vp10_intra_mode_tree, pre_fc->y_mode_prob[i], - counts->y_mode[i], fc->y_mode_prob[i]); + counts->y_mode[i], fc->y_mode_prob[i]); if (cm->interp_filter == SWITCHABLE) { for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) - vpx_tree_merge_probs(vp10_switchable_interp_tree, - pre_fc->switchable_interp_prob[i], - counts->switchable_interp[i], - fc->switchable_interp_prob[i]); + vpx_tree_merge_probs( + vp10_switchable_interp_tree, pre_fc->switchable_interp_prob[i], + counts->switchable_interp[i], fc->switchable_interp_prob[i]); } } @@ -1474,60 +1472,52 @@ void vp10_adapt_intra_frame_probs(VP10_COMMON *cm) { if (cm->tx_mode == TX_MODE_SELECT) { for (i = 0; i < TX_SIZES - 1; ++i) { for (j = 0; j < TX_SIZE_CONTEXTS; ++j) - vpx_tree_merge_probs(vp10_tx_size_tree[i], - pre_fc->tx_size_probs[i][j], - counts->tx_size[i][j], - fc->tx_size_probs[i][j]); + vpx_tree_merge_probs(vp10_tx_size_tree[i], pre_fc->tx_size_probs[i][j], + counts->tx_size[i][j], fc->tx_size_probs[i][j]); } } #if CONFIG_VAR_TX if (cm->tx_mode == TX_MODE_SELECT) for (i = 0; i < TXFM_PARTITION_CONTEXTS; ++i) - fc->txfm_partition_prob[i] = - vp10_mode_mv_merge_probs(pre_fc->txfm_partition_prob[i], - counts->txfm_partition[i]); + fc->txfm_partition_prob[i] = vp10_mode_mv_merge_probs( + pre_fc->txfm_partition_prob[i], counts->txfm_partition[i]); #endif for (i = 0; i < SKIP_CONTEXTS; ++i) - fc->skip_probs[i] = vp10_mode_mv_merge_probs( - pre_fc->skip_probs[i], counts->skip[i]); + fc->skip_probs[i] = + vp10_mode_mv_merge_probs(pre_fc->skip_probs[i], counts->skip[i]); #if CONFIG_EXT_TX for (i = TX_4X4; i < EXT_TX_SIZES; ++i) { int s; for (s = 1; s < EXT_TX_SETS_INTER; ++s) { if (use_inter_ext_tx_for_txsize[s][i]) { - vpx_tree_merge_probs(vp10_ext_tx_inter_tree[s], - pre_fc->inter_ext_tx_prob[s][i], - counts->inter_ext_tx[s][i], - fc->inter_ext_tx_prob[s][i]); + vpx_tree_merge_probs( + vp10_ext_tx_inter_tree[s], pre_fc->inter_ext_tx_prob[s][i], + counts->inter_ext_tx[s][i], fc->inter_ext_tx_prob[s][i]); } } for (s = 1; s < EXT_TX_SETS_INTRA; ++s) { if (use_intra_ext_tx_for_txsize[s][i]) { int j; for (j = 0; j < INTRA_MODES; ++j) - vpx_tree_merge_probs(vp10_ext_tx_intra_tree[s], - pre_fc->intra_ext_tx_prob[s][i][j], - counts->intra_ext_tx[s][i][j], - fc->intra_ext_tx_prob[s][i][j]); + vpx_tree_merge_probs( + vp10_ext_tx_intra_tree[s], pre_fc->intra_ext_tx_prob[s][i][j], + counts->intra_ext_tx[s][i][j], fc->intra_ext_tx_prob[s][i][j]); } } } #else for (i = TX_4X4; i < EXT_TX_SIZES; ++i) { for (j = 0; j < TX_TYPES; ++j) - vpx_tree_merge_probs(vp10_ext_tx_tree, - pre_fc->intra_ext_tx_prob[i][j], + vpx_tree_merge_probs(vp10_ext_tx_tree, pre_fc->intra_ext_tx_prob[i][j], counts->intra_ext_tx[i][j], fc->intra_ext_tx_prob[i][j]); } for (i = TX_4X4; i < EXT_TX_SIZES; ++i) { - vpx_tree_merge_probs(vp10_ext_tx_tree, - pre_fc->inter_ext_tx_prob[i], - counts->inter_ext_tx[i], - fc->inter_ext_tx_prob[i]); + vpx_tree_merge_probs(vp10_ext_tx_tree, pre_fc->inter_ext_tx_prob[i], + counts->inter_ext_tx[i], fc->inter_ext_tx_prob[i]); } #endif // CONFIG_EXT_TX @@ -1562,7 +1552,7 @@ void vp10_adapt_intra_frame_probs(VP10_COMMON *cm) { #if CONFIG_EXT_INTRA for (i = 0; i < PLANE_TYPES; ++i) { fc->ext_intra_probs[i] = vp10_mode_mv_merge_probs( - pre_fc->ext_intra_probs[i], counts->ext_intra[i]); + pre_fc->ext_intra_probs[i], counts->ext_intra[i]); } for (i = 0; i < INTRA_FILTERS + 1; ++i) @@ -1623,8 +1613,7 @@ void vp10_setup_past_independence(VP10_COMMON *cm) { if (cm->frame_type == KEY_FRAME || cm->error_resilient_mode || cm->reset_frame_context == RESET_FRAME_CONTEXT_ALL) { // Reset all frame contexts. - for (i = 0; i < FRAME_CONTEXTS; ++i) - cm->frame_contexts[i] = *cm->fc; + for (i = 0; i < FRAME_CONTEXTS; ++i) cm->frame_contexts[i] = *cm->fc; } else if (cm->reset_frame_context == RESET_FRAME_CONTEXT_CURRENT) { // Reset only the frame context specified in the frame header. cm->frame_contexts[cm->frame_context_idx] = *cm->fc; diff --git a/vp10/common/entropymode.h b/vp10/common/entropymode.h index a898038a839aed223a56c89654169f3294daf912..dc6958bdfed4135146f5764356c7e5d0defe7a9c 100644 --- a/vp10/common/entropymode.h +++ b/vp10/common/entropymode.h @@ -25,9 +25,9 @@ extern "C" { #define TX_SIZE_CONTEXTS 2 -#define INTER_OFFSET(mode) ((mode) - NEARESTMV) +#define INTER_OFFSET(mode) ((mode)-NEARESTMV) #if CONFIG_EXT_INTER -#define INTER_COMPOUND_OFFSET(mode) ((mode) - NEAREST_NEARESTMV) +#define INTER_COMPOUND_OFFSET(mode) ((mode)-NEAREST_NEARESTMV) #endif // CONFIG_EXT_INTER #define PALETTE_COLOR_CONTEXTS 16 @@ -56,8 +56,9 @@ typedef struct frame_contexts { #if CONFIG_ANS coeff_cdf_model coef_cdfs[TX_SIZES][PLANE_TYPES]; #endif - vpx_prob switchable_interp_prob[SWITCHABLE_FILTER_CONTEXTS] - [SWITCHABLE_FILTERS - 1]; + vpx_prob + switchable_interp_prob[SWITCHABLE_FILTER_CONTEXTS][SWITCHABLE_FILTERS - + 1]; #if CONFIG_REF_MV vpx_prob newmv_prob[NEWMV_MODE_CONTEXTS]; @@ -72,8 +73,8 @@ typedef struct frame_contexts { vpx_prob inter_mode_probs[INTER_MODE_CONTEXTS][INTER_MODES - 1]; #if CONFIG_EXT_INTER - vpx_prob inter_compound_mode_probs[INTER_MODE_CONTEXTS] - [INTER_COMPOUND_MODES - 1]; + vpx_prob + inter_compound_mode_probs[INTER_MODE_CONTEXTS][INTER_COMPOUND_MODES - 1]; vpx_prob interintra_prob[BLOCK_SIZE_GROUPS]; vpx_prob interintra_mode_prob[BLOCK_SIZE_GROUPS][INTERINTRA_MODES - 1]; vpx_prob wedge_interintra_prob[BLOCK_SIZES]; @@ -84,12 +85,12 @@ typedef struct frame_contexts { #endif // CONFIG_OBMC || CONFIG_WARPED_MOTION vpx_prob intra_inter_prob[INTRA_INTER_CONTEXTS]; vpx_prob comp_inter_prob[COMP_INTER_CONTEXTS]; - vpx_prob single_ref_prob[REF_CONTEXTS][SINGLE_REFS-1]; + vpx_prob single_ref_prob[REF_CONTEXTS][SINGLE_REFS - 1]; #if CONFIG_EXT_REFS - vpx_prob comp_ref_prob[REF_CONTEXTS][FWD_REFS-1]; - vpx_prob comp_bwdref_prob[REF_CONTEXTS][BWD_REFS-1]; + vpx_prob comp_ref_prob[REF_CONTEXTS][FWD_REFS - 1]; + vpx_prob comp_bwdref_prob[REF_CONTEXTS][BWD_REFS - 1]; #else - vpx_prob comp_ref_prob[REF_CONTEXTS][COMP_REFS-1]; + vpx_prob comp_ref_prob[REF_CONTEXTS][COMP_REFS - 1]; #endif // CONFIG_EXT_REFS vpx_prob tx_size_probs[TX_SIZES - 1][TX_SIZE_CONTEXTS][TX_SIZES - 1]; #if CONFIG_VAR_TX @@ -104,8 +105,9 @@ typedef struct frame_contexts { int initialized; #if CONFIG_EXT_TX vpx_prob inter_ext_tx_prob[EXT_TX_SETS_INTER][EXT_TX_SIZES][TX_TYPES - 1]; - vpx_prob intra_ext_tx_prob[EXT_TX_SETS_INTRA][EXT_TX_SIZES][INTRA_MODES] - [TX_TYPES - 1]; + vpx_prob + intra_ext_tx_prob[EXT_TX_SETS_INTRA][EXT_TX_SIZES][INTRA_MODES][TX_TYPES - + 1]; #else vpx_prob intra_ext_tx_prob[EXT_TX_SIZES][TX_TYPES][TX_TYPES - 1]; vpx_prob inter_ext_tx_prob[EXT_TX_SIZES][TX_TYPES - 1]; @@ -135,10 +137,10 @@ typedef struct FRAME_COUNTS { unsigned int partition[PARTITION_CONTEXTS][PARTITION_TYPES]; #endif vp10_coeff_count_model coef[TX_SIZES][PLANE_TYPES]; - unsigned int eob_branch[TX_SIZES][PLANE_TYPES][REF_TYPES] - [COEF_BANDS][COEFF_CONTEXTS]; - unsigned int switchable_interp[SWITCHABLE_FILTER_CONTEXTS] - [SWITCHABLE_FILTERS]; + unsigned int + eob_branch[TX_SIZES][PLANE_TYPES][REF_TYPES][COEF_BANDS][COEFF_CONTEXTS]; + unsigned int + switchable_interp[SWITCHABLE_FILTER_CONTEXTS][SWITCHABLE_FILTERS]; #if CONFIG_REF_MV unsigned int newmv_mode[NEWMV_MODE_CONTEXTS][2]; unsigned int zeromv_mode[ZEROMV_MODE_CONTEXTS][2]; @@ -162,12 +164,12 @@ typedef struct FRAME_COUNTS { #endif // CONFIG_OBMC || CONFIG_WARPED_MOTION unsigned int intra_inter[INTRA_INTER_CONTEXTS][2]; unsigned int comp_inter[COMP_INTER_CONTEXTS][2]; - unsigned int single_ref[REF_CONTEXTS][SINGLE_REFS-1][2]; + unsigned int single_ref[REF_CONTEXTS][SINGLE_REFS - 1][2]; #if CONFIG_EXT_REFS - unsigned int comp_ref[REF_CONTEXTS][FWD_REFS-1][2]; - unsigned int comp_bwdref[REF_CONTEXTS][BWD_REFS-1][2]; + unsigned int comp_ref[REF_CONTEXTS][FWD_REFS - 1][2]; + unsigned int comp_bwdref[REF_CONTEXTS][BWD_REFS - 1][2]; #else - unsigned int comp_ref[REF_CONTEXTS][COMP_REFS-1][2]; + unsigned int comp_ref[REF_CONTEXTS][COMP_REFS - 1][2]; #endif // CONFIG_EXT_REFS unsigned int tx_size_totals[TX_SIZES]; unsigned int tx_size[TX_SIZES - 1][TX_SIZE_CONTEXTS][TX_SIZES]; @@ -182,8 +184,8 @@ typedef struct FRAME_COUNTS { #endif #if CONFIG_EXT_TX unsigned int inter_ext_tx[EXT_TX_SETS_INTER][EXT_TX_SIZES][TX_TYPES]; - unsigned int intra_ext_tx[EXT_TX_SETS_INTRA][EXT_TX_SIZES][INTRA_MODES] - [TX_TYPES]; + unsigned int + intra_ext_tx[EXT_TX_SETS_INTRA][EXT_TX_SIZES][INTRA_MODES][TX_TYPES]; #else unsigned int intra_ext_tx[EXT_TX_SIZES][TX_TYPES][TX_TYPES]; unsigned int inter_ext_tx[EXT_TX_SIZES][TX_TYPES]; @@ -199,40 +201,40 @@ typedef struct FRAME_COUNTS { #endif // CONFIG_EXT_INTRA } FRAME_COUNTS; -extern const vpx_prob vp10_kf_y_mode_prob[INTRA_MODES][INTRA_MODES] - [INTRA_MODES - 1]; extern const vpx_prob -vp10_default_palette_y_mode_prob[PALETTE_BLOCK_SIZES][PALETTE_Y_MODE_CONTEXTS]; + vp10_kf_y_mode_prob[INTRA_MODES][INTRA_MODES][INTRA_MODES - 1]; +extern const vpx_prob vp10_default_palette_y_mode_prob[PALETTE_BLOCK_SIZES] + [PALETTE_Y_MODE_CONTEXTS]; extern const vpx_prob vp10_default_palette_uv_mode_prob[2]; extern const vpx_prob -vp10_default_palette_y_size_prob[PALETTE_BLOCK_SIZES][PALETTE_SIZES - 1]; + vp10_default_palette_y_size_prob[PALETTE_BLOCK_SIZES][PALETTE_SIZES - 1]; extern const vpx_prob -vp10_default_palette_uv_size_prob[PALETTE_BLOCK_SIZES][PALETTE_SIZES - 1]; + vp10_default_palette_uv_size_prob[PALETTE_BLOCK_SIZES][PALETTE_SIZES - 1]; extern const vpx_prob vp10_default_palette_y_color_prob -[PALETTE_MAX_SIZE - 1][PALETTE_COLOR_CONTEXTS][PALETTE_COLORS - 1]; + [PALETTE_MAX_SIZE - 1][PALETTE_COLOR_CONTEXTS][PALETTE_COLORS - 1]; extern const vpx_prob vp10_default_palette_uv_color_prob -[PALETTE_MAX_SIZE - 1][PALETTE_COLOR_CONTEXTS][PALETTE_COLORS - 1]; + [PALETTE_MAX_SIZE - 1][PALETTE_COLOR_CONTEXTS][PALETTE_COLORS - 1]; extern const vpx_tree_index vp10_intra_mode_tree[TREE_SIZE(INTRA_MODES)]; extern const vpx_tree_index vp10_inter_mode_tree[TREE_SIZE(INTER_MODES)]; #if CONFIG_EXT_INTER -extern const vpx_tree_index vp10_interintra_mode_tree - [TREE_SIZE(INTERINTRA_MODES)]; -extern const vpx_tree_index vp10_inter_compound_mode_tree - [TREE_SIZE(INTER_COMPOUND_MODES)]; +extern const vpx_tree_index + vp10_interintra_mode_tree[TREE_SIZE(INTERINTRA_MODES)]; +extern const vpx_tree_index + vp10_inter_compound_mode_tree[TREE_SIZE(INTER_COMPOUND_MODES)]; #endif // CONFIG_EXT_INTER extern const vpx_tree_index vp10_partition_tree[TREE_SIZE(PARTITION_TYPES)]; #if CONFIG_EXT_PARTITION_TYPES -extern const vpx_tree_index vp10_ext_partition_tree - [TREE_SIZE(EXT_PARTITION_TYPES)]; +extern const vpx_tree_index + vp10_ext_partition_tree[TREE_SIZE(EXT_PARTITION_TYPES)]; #endif -extern const vpx_tree_index vp10_switchable_interp_tree - [TREE_SIZE(SWITCHABLE_FILTERS)]; +extern const vpx_tree_index + vp10_switchable_interp_tree[TREE_SIZE(SWITCHABLE_FILTERS)]; extern const vpx_tree_index vp10_palette_size_tree[TREE_SIZE(PALETTE_SIZES)]; extern const vpx_tree_index -vp10_palette_color_tree[PALETTE_MAX_SIZE - 1][TREE_SIZE(PALETTE_COLORS)]; + vp10_palette_color_tree[PALETTE_MAX_SIZE - 1][TREE_SIZE(PALETTE_COLORS)]; extern const vpx_tree_index -vp10_tx_size_tree[TX_SIZES - 1][TREE_SIZE(TX_SIZES)]; + vp10_tx_size_tree[TX_SIZES - 1][TREE_SIZE(TX_SIZES)]; #if CONFIG_EXT_INTRA extern const vpx_tree_index vp10_intra_filter_tree[TREE_SIZE(INTRA_FILTERS)]; #endif // CONFIG_EXT_INTRA @@ -242,8 +244,7 @@ extern const vpx_tree_index extern const vpx_tree_index vp10_ext_tx_intra_tree[EXT_TX_SETS_INTRA][TREE_SIZE(TX_TYPES)]; #else -extern const vpx_tree_index - vp10_ext_tx_tree[TREE_SIZE(TX_TYPES)]; +extern const vpx_tree_index vp10_ext_tx_tree[TREE_SIZE(TX_TYPES)]; #endif // CONFIG_EXT_TX #if CONFIG_OBMC || CONFIG_WARPED_MOTION extern const vpx_tree_index vp10_motvar_tree[TREE_SIZE(MOTION_VARIATIONS)]; @@ -263,8 +264,8 @@ static INLINE int vp10_ceil_log2(int n) { return i; } -int vp10_get_palette_color_context(const uint8_t *color_map, int cols, - int r, int c, int n, int *color_order); +int vp10_get_palette_color_context(const uint8_t *color_map, int cols, int r, + int c, int n, int *color_order); #ifdef __cplusplus } // extern "C" diff --git a/vp10/common/entropymv.c b/vp10/common/entropymv.c index 925cc1e1806f676ec650ed5c16841638b882541d..3fa43291c2a866058969c7a2b457b96ecb3400af 100644 --- a/vp10/common/entropymv.c +++ b/vp10/common/entropymv.c @@ -15,11 +15,10 @@ #define COMPANDED_MVREF_THRESH 8 const vpx_tree_index vp10_mv_joint_tree[TREE_SIZE(MV_JOINTS)] = { - -MV_JOINT_ZERO, 2, - -MV_JOINT_HNZVZ, 4, - -MV_JOINT_HZVNZ, -MV_JOINT_HNZVNZ + -MV_JOINT_ZERO, 2, -MV_JOINT_HNZVZ, 4, -MV_JOINT_HZVNZ, -MV_JOINT_HNZVNZ }; +/* clang-format off */ const vpx_tree_index vp10_mv_class_tree[TREE_SIZE(MV_CLASSES)] = { -MV_CLASS_0, 2, -MV_CLASS_1, 4, @@ -32,104 +31,97 @@ const vpx_tree_index vp10_mv_class_tree[TREE_SIZE(MV_CLASSES)] = { -MV_CLASS_7, -MV_CLASS_8, -MV_CLASS_9, -MV_CLASS_10, }; +/* clang-format on */ const vpx_tree_index vp10_mv_class0_tree[TREE_SIZE(CLASS0_SIZE)] = { -0, -1, }; -const vpx_tree_index vp10_mv_fp_tree[TREE_SIZE(MV_FP_SIZE)] = { - -0, 2, - -1, 4, - -2, -3 -}; +const vpx_tree_index vp10_mv_fp_tree[TREE_SIZE(MV_FP_SIZE)] = { -0, 2, -1, + 4, -2, -3 }; static const nmv_context default_nmv_context = { #if CONFIG_REF_MV - {1, 64, 96}, + { 1, 64, 96 }, 128, #else - {32, 64, 96}, + { 32, 64, 96 }, #endif - { - { // Vertical component - 128, // sign - {224, 144, 192, 168, 192, 176, 192, 198, 198, 245}, // class - {216}, // class0 - {136, 140, 148, 160, 176, 192, 224, 234, 234, 240}, // bits - {{128, 128, 64}, {96, 112, 64}}, // class0_fp - {64, 96, 64}, // fp - 160, // class0_hp bit - 128, // hp + { { + // Vertical component + 128, // sign + { 224, 144, 192, 168, 192, 176, 192, 198, 198, 245 }, // class + { 216 }, // class0 + { 136, 140, 148, 160, 176, 192, 224, 234, 234, 240 }, // bits + { { 128, 128, 64 }, { 96, 112, 64 } }, // class0_fp + { 64, 96, 64 }, // fp + 160, // class0_hp bit + 128, // hp }, - { // Horizontal component - 128, // sign - {216, 128, 176, 160, 176, 176, 192, 198, 198, 208}, // class - {208}, // class0 - {136, 140, 148, 160, 176, 192, 224, 234, 234, 240}, // bits - {{128, 128, 64}, {96, 112, 64}}, // class0_fp - {64, 96, 64}, // fp - 160, // class0_hp bit - 128, // hp - } - }, + { + // Horizontal component + 128, // sign + { 216, 128, 176, 160, 176, 176, 192, 198, 198, 208 }, // class + { 208 }, // class0 + { 136, 140, 148, 160, 176, 192, 224, 234, 234, 240 }, // bits + { { 128, 128, 64 }, { 96, 112, 64 } }, // class0_fp + { 64, 96, 64 }, // fp + 160, // class0_hp bit + 128, // hp + } }, }; static const uint8_t log_in_base_2[] = { - 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 10 + 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 10 }; #if CONFIG_GLOBAL_MOTION -const vpx_tree_index vp10_global_motion_types_tree - [TREE_SIZE(GLOBAL_MOTION_TYPES)] = { - -GLOBAL_ZERO, 2, - -GLOBAL_TRANSLATION, 4, - -GLOBAL_ROTZOOM, -GLOBAL_AFFINE -}; +const vpx_tree_index + vp10_global_motion_types_tree[TREE_SIZE(GLOBAL_MOTION_TYPES)] = { + -GLOBAL_ZERO, 2, -GLOBAL_TRANSLATION, 4, -GLOBAL_ROTZOOM, -GLOBAL_AFFINE + }; -static const vpx_prob default_global_motion_types_prob - [GLOBAL_MOTION_TYPES - 1] = {224, 128, 128}; +static const vpx_prob default_global_motion_types_prob[GLOBAL_MOTION_TYPES - + 1] = { 224, 128, 128 }; #endif // CONFIG_GLOBAL_MOTION static INLINE int mv_class_base(MV_CLASS_TYPE c) { @@ -137,48 +129,45 @@ static INLINE int mv_class_base(MV_CLASS_TYPE c) { } MV_CLASS_TYPE vp10_get_mv_class(int z, int *offset) { - const MV_CLASS_TYPE c = (z >= CLASS0_SIZE * 4096) ? - MV_CLASS_10 : (MV_CLASS_TYPE)log_in_base_2[z >> 3]; - if (offset) - *offset = z - mv_class_base(c); + const MV_CLASS_TYPE c = (z >= CLASS0_SIZE * 4096) + ? MV_CLASS_10 + : (MV_CLASS_TYPE)log_in_base_2[z >> 3]; + if (offset) *offset = z - mv_class_base(c); return c; } // TODO(jingning): This idle function is intentionally left as is for // experimental purpose. int vp10_use_mv_hp(const MV *ref) { - (void) ref; + (void)ref; return 1; } -static void inc_mv_component(int v, nmv_component_counts *comp_counts, - int incr, int usehp) { +static void inc_mv_component(int v, nmv_component_counts *comp_counts, int incr, + int usehp) { int s, z, c, o, d, e, f; - assert(v != 0); /* should not be zero */ + assert(v != 0); /* should not be zero */ s = v < 0; comp_counts->sign[s] += incr; - z = (s ? -v : v) - 1; /* magnitude - 1 */ + z = (s ? -v : v) - 1; /* magnitude - 1 */ c = vp10_get_mv_class(z, &o); comp_counts->classes[c] += incr; - d = (o >> 3); /* int mv data */ - f = (o >> 1) & 3; /* fractional pel mv data */ - e = (o & 1); /* high precision mv data */ + d = (o >> 3); /* int mv data */ + f = (o >> 1) & 3; /* fractional pel mv data */ + e = (o & 1); /* high precision mv data */ if (c == MV_CLASS_0) { comp_counts->class0[d] += incr; comp_counts->class0_fp[d][f] += incr; - if (usehp) - comp_counts->class0_hp[e] += incr; + if (usehp) comp_counts->class0_hp[e] += incr; } else { int i; int b = c + CLASS0_BITS - 1; // number of bits - for (i = 0; i < b; ++i) - comp_counts->bits[i][((d >> i) & 1)] += incr; + for (i = 0; i < b; ++i) comp_counts->bits[i][((d >> i) & 1)] += incr; comp_counts->fp[f] += incr; - if (usehp) - comp_counts->hp[e] += incr; + if (usehp) comp_counts->hp[e] += incr; } } @@ -188,8 +177,7 @@ void vp10_inc_mv(const MV *mv, nmv_context_counts *counts, const int usehp) { #if CONFIG_REF_MV ++counts->zero_rmv[j == MV_JOINT_ZERO]; - if (j == MV_JOINT_ZERO) - return; + if (j == MV_JOINT_ZERO) return; #endif ++counts->joints[j]; @@ -238,8 +226,8 @@ void vp10_adapt_mv_probs(VP10_COMMON *cm, int allow_hp) { vpx_tree_merge_probs(vp10_mv_fp_tree, pre_comp->fp, c->fp, comp->fp); if (allow_hp) { - comp->class0_hp = vp10_mode_mv_merge_probs(pre_comp->class0_hp, - c->class0_hp); + comp->class0_hp = + vp10_mode_mv_merge_probs(pre_comp->class0_hp, c->class0_hp); comp->hp = vp10_mode_mv_merge_probs(pre_comp->hp, c->hp); } } @@ -273,8 +261,8 @@ void vp10_adapt_mv_probs(VP10_COMMON *cm, int allow_hp) { vpx_tree_merge_probs(vp10_mv_fp_tree, pre_comp->fp, c->fp, comp->fp); if (allow_hp) { - comp->class0_hp = vp10_mode_mv_merge_probs( - pre_comp->class0_hp, c->class0_hp); + comp->class0_hp = + vp10_mode_mv_merge_probs(pre_comp->class0_hp, c->class0_hp); comp->hp = vp10_mode_mv_merge_probs(pre_comp->hp, c->hp); } } @@ -284,8 +272,7 @@ void vp10_adapt_mv_probs(VP10_COMMON *cm, int allow_hp) { void vp10_init_mv_probs(VP10_COMMON *cm) { #if CONFIG_REF_MV int i; - for (i = 0; i < NMV_CONTEXTS; ++i) - cm->fc->nmvc[i] = default_nmv_context; + for (i = 0; i < NMV_CONTEXTS; ++i) cm->fc->nmvc[i] = default_nmv_context; #else cm->fc->nmvc = default_nmv_context; #endif diff --git a/vp10/common/entropymv.h b/vp10/common/entropymv.h index af69a2daa53440ed90240889e835ea7f1da76b68..93eb50cffcaf2e8151476e4b9230887982800dda 100644 --- a/vp10/common/entropymv.h +++ b/vp10/common/entropymv.h @@ -8,7 +8,6 @@ * be found in the AUTHORS file in the root of the source tree. */ - #ifndef VP10_COMMON_ENTROPYMV_H_ #define VP10_COMMON_ENTROPYMV_H_ @@ -32,12 +31,12 @@ int vp10_use_mv_hp(const MV *ref); #define MV_UPDATE_PROB 252 /* Symbols for coding which components are zero jointly */ -#define MV_JOINTS 4 +#define MV_JOINTS 4 typedef enum { - MV_JOINT_ZERO = 0, /* Zero vector */ - MV_JOINT_HNZVZ = 1, /* Vert zero, hor nonzero */ - MV_JOINT_HZVNZ = 2, /* Hor zero, vert nonzero */ - MV_JOINT_HNZVNZ = 3, /* Both components nonzero */ + MV_JOINT_ZERO = 0, /* Zero vector */ + MV_JOINT_HNZVZ = 1, /* Vert zero, hor nonzero */ + MV_JOINT_HZVNZ = 2, /* Hor zero, vert nonzero */ + MV_JOINT_HNZVNZ = 3, /* Both components nonzero */ } MV_JOINT_TYPE; static INLINE int mv_joint_vertical(MV_JOINT_TYPE type) { @@ -49,33 +48,33 @@ static INLINE int mv_joint_horizontal(MV_JOINT_TYPE type) { } /* Symbols for coding magnitude class of nonzero components */ -#define MV_CLASSES 11 +#define MV_CLASSES 11 typedef enum { - MV_CLASS_0 = 0, /* (0, 2] integer pel */ - MV_CLASS_1 = 1, /* (2, 4] integer pel */ - MV_CLASS_2 = 2, /* (4, 8] integer pel */ - MV_CLASS_3 = 3, /* (8, 16] integer pel */ - MV_CLASS_4 = 4, /* (16, 32] integer pel */ - MV_CLASS_5 = 5, /* (32, 64] integer pel */ - MV_CLASS_6 = 6, /* (64, 128] integer pel */ - MV_CLASS_7 = 7, /* (128, 256] integer pel */ - MV_CLASS_8 = 8, /* (256, 512] integer pel */ - MV_CLASS_9 = 9, /* (512, 1024] integer pel */ - MV_CLASS_10 = 10, /* (1024,2048] integer pel */ + MV_CLASS_0 = 0, /* (0, 2] integer pel */ + MV_CLASS_1 = 1, /* (2, 4] integer pel */ + MV_CLASS_2 = 2, /* (4, 8] integer pel */ + MV_CLASS_3 = 3, /* (8, 16] integer pel */ + MV_CLASS_4 = 4, /* (16, 32] integer pel */ + MV_CLASS_5 = 5, /* (32, 64] integer pel */ + MV_CLASS_6 = 6, /* (64, 128] integer pel */ + MV_CLASS_7 = 7, /* (128, 256] integer pel */ + MV_CLASS_8 = 8, /* (256, 512] integer pel */ + MV_CLASS_9 = 9, /* (512, 1024] integer pel */ + MV_CLASS_10 = 10, /* (1024,2048] integer pel */ } MV_CLASS_TYPE; -#define CLASS0_BITS 1 /* bits at integer precision for class 0 */ -#define CLASS0_SIZE (1 << CLASS0_BITS) +#define CLASS0_BITS 1 /* bits at integer precision for class 0 */ +#define CLASS0_SIZE (1 << CLASS0_BITS) #define MV_OFFSET_BITS (MV_CLASSES + CLASS0_BITS - 2) #define MV_FP_SIZE 4 -#define MV_MAX_BITS (MV_CLASSES + CLASS0_BITS + 2) -#define MV_MAX ((1 << MV_MAX_BITS) - 1) -#define MV_VALS ((MV_MAX << 1) + 1) +#define MV_MAX_BITS (MV_CLASSES + CLASS0_BITS + 2) +#define MV_MAX ((1 << MV_MAX_BITS) - 1) +#define MV_VALS ((MV_MAX << 1) + 1) #define MV_IN_USE_BITS 14 -#define MV_UPP ((1 << MV_IN_USE_BITS) - 1) -#define MV_LOW (-(1 << MV_IN_USE_BITS)) +#define MV_UPP ((1 << MV_IN_USE_BITS) - 1) +#define MV_LOW (-(1 << MV_IN_USE_BITS)) extern const vpx_tree_index vp10_mv_joint_tree[]; extern const vpx_tree_index vp10_mv_class_tree[]; @@ -133,8 +132,8 @@ typedef struct { void vp10_inc_mv(const MV *mv, nmv_context_counts *mvctx, const int usehp); #if CONFIG_GLOBAL_MOTION -extern const vpx_tree_index vp10_global_motion_types_tree - [TREE_SIZE(GLOBAL_MOTION_TYPES)]; +extern const vpx_tree_index + vp10_global_motion_types_tree[TREE_SIZE(GLOBAL_MOTION_TYPES)]; #endif // CONFIG_GLOBAL_MOTION #ifdef __cplusplus diff --git a/vp10/common/enums.h b/vp10/common/enums.h index f2c49913a3b5a79e8107258bd77e58df81c0bb30..e02fa0c80d188bd3cf12b113e2cc68cf0f586b77 100644 --- a/vp10/common/enums.h +++ b/vp10/common/enums.h @@ -22,38 +22,38 @@ extern "C" { // Max superblock size #if CONFIG_EXT_PARTITION -# define MAX_SB_SIZE_LOG2 7 +#define MAX_SB_SIZE_LOG2 7 #else -# define MAX_SB_SIZE_LOG2 6 +#define MAX_SB_SIZE_LOG2 6 #endif // CONFIG_EXT_PARTITION -#define MAX_SB_SIZE (1 << MAX_SB_SIZE_LOG2) +#define MAX_SB_SIZE (1 << MAX_SB_SIZE_LOG2) #define MAX_SB_SQUARE (MAX_SB_SIZE * MAX_SB_SIZE) // Min superblock size #define MIN_SB_SIZE_LOG2 6 // Pixels per Mode Info (MI) unit -#define MI_SIZE_LOG2 3 -#define MI_SIZE (1 << MI_SIZE_LOG2) +#define MI_SIZE_LOG2 3 +#define MI_SIZE (1 << MI_SIZE_LOG2) // MI-units per max superblock (MI Block - MIB) #define MAX_MIB_SIZE_LOG2 (MAX_SB_SIZE_LOG2 - MI_SIZE_LOG2) -#define MAX_MIB_SIZE (1 << MAX_MIB_SIZE_LOG2) +#define MAX_MIB_SIZE (1 << MAX_MIB_SIZE_LOG2) // MI-units per min superblock #define MIN_MIB_SIZE_LOG2 (MIN_SB_SIZE_LOG2 - MI_SIZE_LOG2) // Mask to extract MI offset within max MIB -#define MAX_MIB_MASK (MAX_MIB_SIZE - 1) -#define MAX_MIB_MASK_2 (MAX_MIB_SIZE * 2 - 1) +#define MAX_MIB_MASK (MAX_MIB_SIZE - 1) +#define MAX_MIB_MASK_2 (MAX_MIB_SIZE * 2 - 1) // Maximum number of tile rows and tile columns #if CONFIG_EXT_TILE -# define MAX_TILE_ROWS 1024 -# define MAX_TILE_COLS 1024 +#define MAX_TILE_ROWS 1024 +#define MAX_TILE_COLS 1024 #else -# define MAX_TILE_ROWS 4 -# define MAX_TILE_COLS 64 +#define MAX_TILE_ROWS 4 +#define MAX_TILE_COLS 64 #endif // CONFIG_EXT_TILE // Bitstream profiles indicated by 2-3 bits in the uncompressed header. @@ -71,26 +71,26 @@ typedef enum BITSTREAM_PROFILE { MAX_PROFILES } BITSTREAM_PROFILE; -#define BLOCK_4X4 0 -#define BLOCK_4X8 1 -#define BLOCK_8X4 2 -#define BLOCK_8X8 3 -#define BLOCK_8X16 4 -#define BLOCK_16X8 5 -#define BLOCK_16X16 6 -#define BLOCK_16X32 7 -#define BLOCK_32X16 8 -#define BLOCK_32X32 9 -#define BLOCK_32X64 10 -#define BLOCK_64X32 11 -#define BLOCK_64X64 12 +#define BLOCK_4X4 0 +#define BLOCK_4X8 1 +#define BLOCK_8X4 2 +#define BLOCK_8X8 3 +#define BLOCK_8X16 4 +#define BLOCK_16X8 5 +#define BLOCK_16X16 6 +#define BLOCK_16X32 7 +#define BLOCK_32X16 8 +#define BLOCK_32X32 9 +#define BLOCK_32X64 10 +#define BLOCK_64X32 11 +#define BLOCK_64X64 12 #if !CONFIG_EXT_PARTITION -# define BLOCK_SIZES 13 +#define BLOCK_SIZES 13 #else -# define BLOCK_64X128 13 -# define BLOCK_128X64 14 -# define BLOCK_128X128 15 -# define BLOCK_SIZES 16 +#define BLOCK_64X128 13 +#define BLOCK_128X64 14 +#define BLOCK_128X128 15 +#define BLOCK_SIZES 16 #endif // !CONFIG_EXT_PARTITION #define BLOCK_INVALID BLOCK_SIZES #define BLOCK_LARGEST (BLOCK_SIZES - 1) @@ -122,50 +122,49 @@ typedef enum PARTITION_TYPE { #endif // CONFIG_EXT_PARTITION_TYPES typedef char PARTITION_CONTEXT; -#define PARTITION_PLOFFSET 4 // number of probability models per block size +#define PARTITION_PLOFFSET 4 // number of probability models per block size #if CONFIG_EXT_PARTITION -# define PARTITION_CONTEXTS (5 * PARTITION_PLOFFSET) +#define PARTITION_CONTEXTS (5 * PARTITION_PLOFFSET) #else -# define PARTITION_CONTEXTS (4 * PARTITION_PLOFFSET) +#define PARTITION_CONTEXTS (4 * PARTITION_PLOFFSET) #endif // CONFIG_EXT_PARTITION // block transform size typedef uint8_t TX_SIZE; -#define TX_4X4 ((TX_SIZE)0) // 4x4 transform -#define TX_8X8 ((TX_SIZE)1) // 8x8 transform -#define TX_16X16 ((TX_SIZE)2) // 16x16 transform -#define TX_32X32 ((TX_SIZE)3) // 32x32 transform +#define TX_4X4 ((TX_SIZE)0) // 4x4 transform +#define TX_8X8 ((TX_SIZE)1) // 8x8 transform +#define TX_16X16 ((TX_SIZE)2) // 16x16 transform +#define TX_32X32 ((TX_SIZE)3) // 32x32 transform #define TX_SIZES ((TX_SIZE)4) #if CONFIG_EXT_TX -#define TX_4X8 ((TX_SIZE)4) // 4x8 transform -#define TX_8X4 ((TX_SIZE)5) // 8x4 transform +#define TX_4X8 ((TX_SIZE)4) // 4x8 transform +#define TX_8X4 ((TX_SIZE)5) // 8x4 transform #define TX_SIZES_ALL ((TX_SIZE)6) // Includes rectangular transforms #else #define TX_SIZES_ALL ((TX_SIZE)4) #endif // CONFIG_EXT_TX -#define MAX_TX_SIZE_LOG2 5 -#define MAX_TX_SIZE (1 << MAX_TX_SIZE_LOG2) -#define MIN_TX_SIZE_LOG2 2 -#define MIN_TX_SIZE (1 << MIN_TX_SIZE_LOG2) -#define MAX_TX_SQUARE (MAX_TX_SIZE * MAX_TX_SIZE) +#define MAX_TX_SIZE_LOG2 5 +#define MAX_TX_SIZE (1 << MAX_TX_SIZE_LOG2) +#define MIN_TX_SIZE_LOG2 2 +#define MIN_TX_SIZE (1 << MIN_TX_SIZE_LOG2) +#define MAX_TX_SQUARE (MAX_TX_SIZE * MAX_TX_SIZE) // Number of maxium size transform blocks in the maximum size superblock -#define MAX_TX_BLOCKS_IN_MAX_SB_LOG2 \ - ((MAX_SB_SIZE_LOG2 - MAX_TX_SIZE_LOG2) * 2) +#define MAX_TX_BLOCKS_IN_MAX_SB_LOG2 ((MAX_SB_SIZE_LOG2 - MAX_TX_SIZE_LOG2) * 2) #define MAX_TX_BLOCKS_IN_MAX_SB (1 << MAX_TX_BLOCKS_IN_MAX_SB_LOG2) -#define MAX_NUM_TXB (1 << (MAX_SB_SIZE_LOG2 - MIN_TX_SIZE_LOG2)) +#define MAX_NUM_TXB (1 << (MAX_SB_SIZE_LOG2 - MIN_TX_SIZE_LOG2)) // frame transform mode typedef enum { - ONLY_4X4 = 0, // only 4x4 transform used - ALLOW_8X8 = 1, // allow block transform size up to 8x8 - ALLOW_16X16 = 2, // allow block transform size up to 16x16 - ALLOW_32X32 = 3, // allow block transform size up to 32x32 - TX_MODE_SELECT = 4, // transform specified for each block - TX_MODES = 5, + ONLY_4X4 = 0, // only 4x4 transform used + ALLOW_8X8 = 1, // allow block transform size up to 8x8 + ALLOW_16X16 = 2, // allow block transform size up to 16x16 + ALLOW_32X32 = 3, // allow block transform size up to 32x32 + TX_MODE_SELECT = 4, // transform specified for each block + TX_MODES = 5, } TX_MODE; // 1D tx types @@ -178,10 +177,10 @@ typedef enum { } TX_TYPE_1D; typedef enum { - DCT_DCT = 0, // DCT in both horizontal and vertical - ADST_DCT = 1, // ADST in vertical, DCT in horizontal - DCT_ADST = 2, // DCT in vertical, ADST in horizontal - ADST_ADST = 3, // ADST in both directions + DCT_DCT = 0, // DCT in both horizontal and vertical + ADST_DCT = 1, // ADST in vertical, DCT in horizontal + DCT_ADST = 2, // DCT in vertical, ADST in horizontal + ADST_ADST = 3, // ADST in both directions #if CONFIG_EXT_TX FLIPADST_DCT = 4, DCT_FLIPADST = 5, @@ -200,12 +199,12 @@ typedef enum { } TX_TYPE; #if CONFIG_EXT_TX -#define EXT_TX_SIZES 4 // number of sizes that use extended transforms -#define EXT_TX_SETS_INTER 4 // Sets of transform selections for INTER -#define EXT_TX_SETS_INTRA 3 // Sets of transform selections for INTRA +#define EXT_TX_SIZES 4 // number of sizes that use extended transforms +#define EXT_TX_SETS_INTER 4 // Sets of transform selections for INTER +#define EXT_TX_SETS_INTRA 3 // Sets of transform selections for INTRA #else -#define EXT_TX_SIZES 3 // number of sizes that use extended transforms -#endif // CONFIG_EXT_TX +#define EXT_TX_SIZES 3 // number of sizes that use extended transforms +#endif // CONFIG_EXT_TX typedef enum { VPX_LAST_FLAG = 1 << 0, @@ -223,11 +222,7 @@ typedef enum { #endif // CONFIG_EXT_REFS } VPX_REFFRAME; -typedef enum { - PLANE_TYPE_Y = 0, - PLANE_TYPE_UV = 1, - PLANE_TYPES -} PLANE_TYPE; +typedef enum { PLANE_TYPE_Y = 0, PLANE_TYPE_UV = 1, PLANE_TYPES } PLANE_TYPE; typedef enum { TWO_COLORS, @@ -252,35 +247,35 @@ typedef enum { PALETTE_COLORS } PALETTE_COLOR; -#define DC_PRED 0 // Average of above and left pixels -#define V_PRED 1 // Vertical -#define H_PRED 2 // Horizontal -#define D45_PRED 3 // Directional 45 deg = round(arctan(1/1) * 180/pi) -#define D135_PRED 4 // Directional 135 deg = 180 - 45 -#define D117_PRED 5 // Directional 117 deg = 180 - 63 -#define D153_PRED 6 // Directional 153 deg = 180 - 27 -#define D207_PRED 7 // Directional 207 deg = 180 + 27 -#define D63_PRED 8 // Directional 63 deg = round(arctan(2/1) * 180/pi) -#define TM_PRED 9 // True-motion +#define DC_PRED 0 // Average of above and left pixels +#define V_PRED 1 // Vertical +#define H_PRED 2 // Horizontal +#define D45_PRED 3 // Directional 45 deg = round(arctan(1/1) * 180/pi) +#define D135_PRED 4 // Directional 135 deg = 180 - 45 +#define D117_PRED 5 // Directional 117 deg = 180 - 63 +#define D153_PRED 6 // Directional 153 deg = 180 - 27 +#define D207_PRED 7 // Directional 207 deg = 180 + 27 +#define D63_PRED 8 // Directional 63 deg = round(arctan(2/1) * 180/pi) +#define TM_PRED 9 // True-motion #define NEARESTMV 10 -#define NEARMV 11 -#define ZEROMV 12 -#define NEWMV 13 +#define NEARMV 11 +#define ZEROMV 12 +#define NEWMV 13 #if CONFIG_EXT_INTER -#define NEWFROMNEARMV 14 +#define NEWFROMNEARMV 14 #define NEAREST_NEARESTMV 15 -#define NEAREST_NEARMV 16 -#define NEAR_NEARESTMV 17 -#define NEAR_NEARMV 18 -#define NEAREST_NEWMV 19 -#define NEW_NEARESTMV 20 -#define NEAR_NEWMV 21 -#define NEW_NEARMV 22 -#define ZERO_ZEROMV 23 -#define NEW_NEWMV 24 -#define MB_MODE_COUNT 25 +#define NEAREST_NEARMV 16 +#define NEAR_NEARESTMV 17 +#define NEAR_NEARMV 18 +#define NEAREST_NEWMV 19 +#define NEW_NEARESTMV 20 +#define NEAR_NEWMV 21 +#define NEW_NEARMV 22 +#define ZERO_ZEROMV 23 +#define NEW_NEWMV 24 +#define MB_MODE_COUNT 25 #else -#define MB_MODE_COUNT 14 +#define MB_MODE_COUNT 14 #endif // CONFIG_EXT_INTER typedef uint8_t PREDICTION_MODE; @@ -289,11 +284,11 @@ typedef uint8_t PREDICTION_MODE; typedef enum { SIMPLE_TRANSLATION = 0, #if CONFIG_OBMC - OBMC_CAUSAL, // 2-sided OBMC -#endif // CONFIG_OBMC + OBMC_CAUSAL, // 2-sided OBMC +#endif // CONFIG_OBMC #if CONFIG_WARPED_MOTION WARPED_CAUSAL, // 2-sided WARPED -#endif // CONFIG_WARPED_MOTION +#endif // CONFIG_WARPED_MOTION MOTION_VARIATIONS } MOTION_VARIATION; @@ -348,21 +343,21 @@ typedef enum { #if CONFIG_REF_MV #define NMV_CONTEXTS 3 -#define NEWMV_MODE_CONTEXTS 7 +#define NEWMV_MODE_CONTEXTS 7 #define ZEROMV_MODE_CONTEXTS 2 -#define REFMV_MODE_CONTEXTS 9 -#define DRL_MODE_CONTEXTS 5 +#define REFMV_MODE_CONTEXTS 9 +#define DRL_MODE_CONTEXTS 5 #define ZEROMV_OFFSET 3 -#define REFMV_OFFSET 4 +#define REFMV_OFFSET 4 #define NEWMV_CTX_MASK ((1 << ZEROMV_OFFSET) - 1) #define ZEROMV_CTX_MASK ((1 << (REFMV_OFFSET - ZEROMV_OFFSET)) - 1) #define REFMV_CTX_MASK ((1 << (8 - REFMV_OFFSET)) - 1) -#define ALL_ZERO_FLAG_OFFSET 8 -#define SKIP_NEARESTMV_OFFSET 9 -#define SKIP_NEARMV_OFFSET 10 +#define ALL_ZERO_FLAG_OFFSET 8 +#define SKIP_NEARESTMV_OFFSET 9 +#define SKIP_NEARMV_OFFSET 10 #define SKIP_NEARESTMV_SUB8X8_OFFSET 11 #endif @@ -389,20 +384,20 @@ typedef enum { typedef TX_SIZE TXFM_CONTEXT; #endif -#define NONE -1 -#define INTRA_FRAME 0 -#define LAST_FRAME 1 +#define NONE -1 +#define INTRA_FRAME 0 +#define LAST_FRAME 1 #if CONFIG_EXT_REFS -#define LAST2_FRAME 2 -#define LAST3_FRAME 3 -#define GOLDEN_FRAME 4 -#define BWDREF_FRAME 5 -#define ALTREF_FRAME 6 +#define LAST2_FRAME 2 +#define LAST3_FRAME 3 +#define GOLDEN_FRAME 4 +#define BWDREF_FRAME 5 +#define ALTREF_FRAME 6 #define LAST_REF_FRAMES (LAST3_FRAME - LAST_FRAME + 1) #else -#define GOLDEN_FRAME 2 -#define ALTREF_FRAME 3 +#define GOLDEN_FRAME 2 +#define ALTREF_FRAME 3 #endif // CONFIG_EXT_REFS #define INTER_REFS_PER_FRAME (ALTREF_FRAME - LAST_FRAME + 1) @@ -419,7 +414,7 @@ typedef TX_SIZE TXFM_CONTEXT; #endif // CONFIG_EXT_REFS #define SINGLE_REFS (FWD_REFS + BWD_REFS) -#define COMP_REFS (FWD_REFS * BWD_REFS) +#define COMP_REFS (FWD_REFS * BWD_REFS) #if CONFIG_REF_MV #define MODE_CTX_REF_FRAMES (TOTAL_REFS_PER_FRAME + COMP_REFS) diff --git a/vp10/common/filter.c b/vp10/common/filter.c index 8427237c1a4fda0252af974401fe03e082596c00..b2d8aeb9743412190947d870d3b49b0d4a1e4a9a 100644 --- a/vp10/common/filter.c +++ b/vp10/common/filter.c @@ -14,44 +14,36 @@ DECLARE_ALIGNED(256, static const InterpKernel, bilinear_filters[SUBPEL_SHIFTS]) = { - { 0, 0, 0, 128, 0, 0, 0, 0 }, - { 0, 0, 0, 120, 8, 0, 0, 0 }, - { 0, 0, 0, 112, 16, 0, 0, 0 }, - { 0, 0, 0, 104, 24, 0, 0, 0 }, - { 0, 0, 0, 96, 32, 0, 0, 0 }, - { 0, 0, 0, 88, 40, 0, 0, 0 }, - { 0, 0, 0, 80, 48, 0, 0, 0 }, - { 0, 0, 0, 72, 56, 0, 0, 0 }, - { 0, 0, 0, 64, 64, 0, 0, 0 }, - { 0, 0, 0, 56, 72, 0, 0, 0 }, - { 0, 0, 0, 48, 80, 0, 0, 0 }, - { 0, 0, 0, 40, 88, 0, 0, 0 }, - { 0, 0, 0, 32, 96, 0, 0, 0 }, - { 0, 0, 0, 24, 104, 0, 0, 0 }, - { 0, 0, 0, 16, 112, 0, 0, 0 }, - { 0, 0, 0, 8, 120, 0, 0, 0 } + { 0, 0, 0, 128, 0, 0, 0, 0 }, { 0, 0, 0, 120, 8, 0, 0, 0 }, + { 0, 0, 0, 112, 16, 0, 0, 0 }, { 0, 0, 0, 104, 24, 0, 0, 0 }, + { 0, 0, 0, 96, 32, 0, 0, 0 }, { 0, 0, 0, 88, 40, 0, 0, 0 }, + { 0, 0, 0, 80, 48, 0, 0, 0 }, { 0, 0, 0, 72, 56, 0, 0, 0 }, + { 0, 0, 0, 64, 64, 0, 0, 0 }, { 0, 0, 0, 56, 72, 0, 0, 0 }, + { 0, 0, 0, 48, 80, 0, 0, 0 }, { 0, 0, 0, 40, 88, 0, 0, 0 }, + { 0, 0, 0, 32, 96, 0, 0, 0 }, { 0, 0, 0, 24, 104, 0, 0, 0 }, + { 0, 0, 0, 16, 112, 0, 0, 0 }, { 0, 0, 0, 8, 120, 0, 0, 0 } }; #if USE_TEMPORALFILTER_12TAP DECLARE_ALIGNED(16, static const int16_t, sub_pel_filters_temporalfilter_12[SUBPEL_SHIFTS][12]) = { // intfilt 0.8 - {0, 0, 0, 0, 0, 128, 0, 0, 0, 0, 0, 0}, - {0, 1, -1, 3, -7, 127, 8, -4, 2, -1, 0, 0}, - {0, 1, -3, 5, -12, 124, 18, -8, 4, -2, 1, 0}, - {-1, 2, -4, 8, -17, 120, 28, -11, 6, -3, 1, -1}, - {-1, 2, -4, 10, -21, 114, 38, -15, 8, -4, 2, -1}, - {-1, 3, -5, 11, -23, 107, 49, -18, 9, -5, 2, -1}, - {-1, 3, -6, 12, -25, 99, 60, -21, 11, -6, 3, -1}, - {-1, 3, -6, 12, -25, 90, 70, -23, 12, -6, 3, -1}, - {-1, 3, -6, 12, -24, 80, 80, -24, 12, -6, 3, -1}, - {-1, 3, -6, 12, -23, 70, 90, -25, 12, -6, 3, -1}, - {-1, 3, -6, 11, -21, 60, 99, -25, 12, -6, 3, -1}, - {-1, 2, -5, 9, -18, 49, 107, -23, 11, -5, 3, -1}, - {-1, 2, -4, 8, -15, 38, 114, -21, 10, -4, 2, -1}, - {-1, 1, -3, 6, -11, 28, 120, -17, 8, -4, 2, -1}, - {0, 1, -2, 4, -8, 18, 124, -12, 5, -3, 1, 0}, - {0, 0, -1, 2, -4, 8, 127, -7, 3, -1, 1, 0}, + { 0, 0, 0, 0, 0, 128, 0, 0, 0, 0, 0, 0 }, + { 0, 1, -1, 3, -7, 127, 8, -4, 2, -1, 0, 0 }, + { 0, 1, -3, 5, -12, 124, 18, -8, 4, -2, 1, 0 }, + { -1, 2, -4, 8, -17, 120, 28, -11, 6, -3, 1, -1 }, + { -1, 2, -4, 10, -21, 114, 38, -15, 8, -4, 2, -1 }, + { -1, 3, -5, 11, -23, 107, 49, -18, 9, -5, 2, -1 }, + { -1, 3, -6, 12, -25, 99, 60, -21, 11, -6, 3, -1 }, + { -1, 3, -6, 12, -25, 90, 70, -23, 12, -6, 3, -1 }, + { -1, 3, -6, 12, -24, 80, 80, -24, 12, -6, 3, -1 }, + { -1, 3, -6, 12, -23, 70, 90, -25, 12, -6, 3, -1 }, + { -1, 3, -6, 11, -21, 60, 99, -25, 12, -6, 3, -1 }, + { -1, 2, -5, 9, -18, 49, 107, -23, 11, -5, 3, -1 }, + { -1, 2, -4, 8, -15, 38, 114, -21, 10, -4, 2, -1 }, + { -1, 1, -3, 6, -11, 28, 120, -17, 8, -4, 2, -1 }, + { 0, 1, -2, 4, -8, 18, 124, -12, 5, -3, 1, 0 }, + { 0, 0, -1, 2, -4, 8, 127, -7, 3, -1, 1, 0 }, }; #endif // USE_TEMPORALFILTER_12TAP @@ -59,193 +51,137 @@ DECLARE_ALIGNED(16, static const int16_t, DECLARE_ALIGNED(256, static const InterpKernel, sub_pel_filters_8[SUBPEL_SHIFTS]) = { // intfilt 0.575 - {0, 0, 0, 128, 0, 0, 0, 0}, - {0, 1, -5, 126, 8, -3, 1, 0}, - {-1, 3, -10, 123, 18, -6, 2, -1}, - {-1, 4, -14, 118, 27, -9, 3, 0}, - {-1, 5, -16, 112, 37, -12, 4, -1}, - {-1, 5, -18, 105, 48, -14, 4, -1}, - {-1, 6, -19, 97, 58, -17, 5, -1}, - {-1, 6, -20, 88, 68, -18, 6, -1}, - {-1, 6, -19, 78, 78, -19, 6, -1}, - {-1, 6, -18, 68, 88, -20, 6, -1}, - {-1, 5, -17, 58, 97, -19, 6, -1}, - {-1, 4, -14, 48, 105, -18, 5, -1}, - {-1, 4, -12, 37, 112, -16, 5, -1}, - {0, 3, -9, 27, 118, -14, 4, -1}, - {-1, 2, -6, 18, 123, -10, 3, -1}, - {0, 1, -3, 8, 126, -5, 1, 0}, + { 0, 0, 0, 128, 0, 0, 0, 0 }, { 0, 1, -5, 126, 8, -3, 1, 0 }, + { -1, 3, -10, 123, 18, -6, 2, -1 }, { -1, 4, -14, 118, 27, -9, 3, 0 }, + { -1, 5, -16, 112, 37, -12, 4, -1 }, { -1, 5, -18, 105, 48, -14, 4, -1 }, + { -1, 6, -19, 97, 58, -17, 5, -1 }, { -1, 6, -20, 88, 68, -18, 6, -1 }, + { -1, 6, -19, 78, 78, -19, 6, -1 }, { -1, 6, -18, 68, 88, -20, 6, -1 }, + { -1, 5, -17, 58, 97, -19, 6, -1 }, { -1, 4, -14, 48, 105, -18, 5, -1 }, + { -1, 4, -12, 37, 112, -16, 5, -1 }, { 0, 3, -9, 27, 118, -14, 4, -1 }, + { -1, 2, -6, 18, 123, -10, 3, -1 }, { 0, 1, -3, 8, 126, -5, 1, 0 }, }; #if CONFIG_EXT_INTRA DECLARE_ALIGNED(256, static const InterpKernel, sub_pel_filters_8sharp[SUBPEL_SHIFTS]) = { // intfilt 0.8 - {0, 0, 0, 128, 0, 0, 0, 0}, - {-1, 2, -6, 127, 9, -4, 2, -1}, - {-2, 5, -12, 124, 18, -7, 4, -2}, - {-2, 7, -16, 119, 28, -11, 5, -2}, - {-3, 8, -19, 114, 38, -14, 7, -3}, - {-3, 9, -22, 107, 49, -17, 8, -3}, - {-4, 10, -23, 99, 60, -20, 10, -4}, - {-4, 11, -23, 90, 70, -22, 10, -4}, - {-4, 11, -23, 80, 80, -23, 11, -4}, - {-4, 10, -22, 70, 90, -23, 11, -4}, - {-4, 10, -20, 60, 99, -23, 10, -4}, - {-3, 8, -17, 49, 107, -22, 9, -3}, - {-3, 7, -14, 38, 114, -19, 8, -3}, - {-2, 5, -11, 28, 119, -16, 7, -2}, - {-2, 4, -7, 18, 124, -12, 5, -2}, - {-1, 2, -4, 9, 127, -6, 2, -1}, + { 0, 0, 0, 128, 0, 0, 0, 0 }, { -1, 2, -6, 127, 9, -4, 2, -1 }, + { -2, 5, -12, 124, 18, -7, 4, -2 }, { -2, 7, -16, 119, 28, -11, 5, -2 }, + { -3, 8, -19, 114, 38, -14, 7, -3 }, { -3, 9, -22, 107, 49, -17, 8, -3 }, + { -4, 10, -23, 99, 60, -20, 10, -4 }, { -4, 11, -23, 90, 70, -22, 10, -4 }, + { -4, 11, -23, 80, 80, -23, 11, -4 }, { -4, 10, -22, 70, 90, -23, 11, -4 }, + { -4, 10, -20, 60, 99, -23, 10, -4 }, { -3, 8, -17, 49, 107, -22, 9, -3 }, + { -3, 7, -14, 38, 114, -19, 8, -3 }, { -2, 5, -11, 28, 119, -16, 7, -2 }, + { -2, 4, -7, 18, 124, -12, 5, -2 }, { -1, 2, -4, 9, 127, -6, 2, -1 }, }; #endif // CONFIG_EXT_INTRA DECLARE_ALIGNED(256, static const int16_t, sub_pel_filters_10sharp[SUBPEL_SHIFTS][10]) = { // intfilt 0.77 - {0, 0, 0, 0, 128, 0, 0, 0, 0, 0}, - {0, -1, 3, -6, 127, 8, -4, 2, -1, 0}, - {1, -2, 5, -12, 124, 18, -7, 3, -2, 0}, - {1, -3, 7, -17, 119, 28, -11, 5, -2, 1}, - {1, -4, 8, -20, 114, 38, -14, 7, -3, 1}, - {1, -4, 9, -22, 107, 49, -17, 8, -4, 1}, - {2, -5, 10, -24, 99, 59, -20, 9, -4, 2}, - {2, -5, 10, -24, 90, 70, -22, 10, -5, 2}, - {2, -5, 10, -23, 80, 80, -23, 10, -5, 2}, - {2, -5, 10, -22, 70, 90, -24, 10, -5, 2}, - {2, -4, 9, -20, 59, 99, -24, 10, -5, 2}, - {1, -4, 8, -17, 49, 107, -22, 9, -4, 1}, - {1, -3, 7, -14, 38, 114, -20, 8, -4, 1}, - {1, -2, 5, -11, 28, 119, -17, 7, -3, 1}, - {0, -2, 3, -7, 18, 124, -12, 5, -2, 1}, - {0, -1, 2, -4, 8, 127, -6, 3, -1, 0}, + { 0, 0, 0, 0, 128, 0, 0, 0, 0, 0 }, + { 0, -1, 3, -6, 127, 8, -4, 2, -1, 0 }, + { 1, -2, 5, -12, 124, 18, -7, 3, -2, 0 }, + { 1, -3, 7, -17, 119, 28, -11, 5, -2, 1 }, + { 1, -4, 8, -20, 114, 38, -14, 7, -3, 1 }, + { 1, -4, 9, -22, 107, 49, -17, 8, -4, 1 }, + { 2, -5, 10, -24, 99, 59, -20, 9, -4, 2 }, + { 2, -5, 10, -24, 90, 70, -22, 10, -5, 2 }, + { 2, -5, 10, -23, 80, 80, -23, 10, -5, 2 }, + { 2, -5, 10, -22, 70, 90, -24, 10, -5, 2 }, + { 2, -4, 9, -20, 59, 99, -24, 10, -5, 2 }, + { 1, -4, 8, -17, 49, 107, -22, 9, -4, 1 }, + { 1, -3, 7, -14, 38, 114, -20, 8, -4, 1 }, + { 1, -2, 5, -11, 28, 119, -17, 7, -3, 1 }, + { 0, -2, 3, -7, 18, 124, -12, 5, -2, 1 }, + { 0, -1, 2, -4, 8, 127, -6, 3, -1, 0 }, }; DECLARE_ALIGNED(256, static const InterpKernel, sub_pel_filters_8smooth2[SUBPEL_SHIFTS]) = { -// freqmultiplier = 0.35 - {0, 0, 0, 128, 0, 0, 0, 0}, - {-1, 8, 31, 47, 34, 10, 0, -1}, - {-1, 7, 29, 46, 36, 12, 0, -1}, - {-1, 6, 28, 46, 37, 13, 0, -1}, - {-1, 5, 26, 46, 38, 14, 1, -1}, - {-1, 4, 25, 45, 39, 16, 1, -1}, - {-1, 4, 23, 44, 41, 17, 1, -1}, - {-1, 3, 21, 44, 42, 18, 2, -1}, - {-1, 2, 20, 43, 43, 20, 2, -1}, - {-1, 2, 18, 42, 44, 21, 3, -1}, - {-1, 1, 17, 41, 44, 23, 4, -1}, - {-1, 1, 16, 39, 45, 25, 4, -1}, - {-1, 1, 14, 38, 46, 26, 5, -1}, - {-1, 0, 13, 37, 46, 28, 6, -1}, - {-1, 0, 12, 36, 46, 29, 7, -1}, - {-1, 0, 10, 34, 47, 31, 8, -1}, + // freqmultiplier = 0.35 + { 0, 0, 0, 128, 0, 0, 0, 0 }, { -1, 8, 31, 47, 34, 10, 0, -1 }, + { -1, 7, 29, 46, 36, 12, 0, -1 }, { -1, 6, 28, 46, 37, 13, 0, -1 }, + { -1, 5, 26, 46, 38, 14, 1, -1 }, { -1, 4, 25, 45, 39, 16, 1, -1 }, + { -1, 4, 23, 44, 41, 17, 1, -1 }, { -1, 3, 21, 44, 42, 18, 2, -1 }, + { -1, 2, 20, 43, 43, 20, 2, -1 }, { -1, 2, 18, 42, 44, 21, 3, -1 }, + { -1, 1, 17, 41, 44, 23, 4, -1 }, { -1, 1, 16, 39, 45, 25, 4, -1 }, + { -1, 1, 14, 38, 46, 26, 5, -1 }, { -1, 0, 13, 37, 46, 28, 6, -1 }, + { -1, 0, 12, 36, 46, 29, 7, -1 }, { -1, 0, 10, 34, 47, 31, 8, -1 }, }; DECLARE_ALIGNED(256, static const InterpKernel, sub_pel_filters_8smooth[SUBPEL_SHIFTS]) = { -// freqmultiplier = 0.75 - {0, 0, 0, 128, 0, 0, 0, 0}, - {2, -10, 19, 95, 31, -11, 2, 0}, - {2, -9, 14, 94, 37, -12, 2, 0}, - {2, -8, 9, 92, 43, -12, 1, 1}, - {2, -7, 5, 90, 49, -12, 1, 0}, - {2, -5, 1, 86, 55, -12, 0, 1}, - {1, -4, -2, 82, 61, -11, 0, 1}, - {1, -3, -5, 77, 67, -9, -1, 1}, - {1, -2, -7, 72, 72, -7, -2, 1}, - {1, -1, -9, 67, 77, -5, -3, 1}, - {1, 0, -11, 61, 82, -2, -4, 1}, - {1, 0, -12, 55, 86, 1, -5, 2}, - {0, 1, -12, 49, 90, 5, -7, 2}, - {1, 1, -12, 43, 92, 9, -8, 2}, - {0, 2, -12, 37, 94, 14, -9, 2}, - {0, 2, -11, 31, 95, 19, -10, 2}, + // freqmultiplier = 0.75 + { 0, 0, 0, 128, 0, 0, 0, 0 }, { 2, -10, 19, 95, 31, -11, 2, 0 }, + { 2, -9, 14, 94, 37, -12, 2, 0 }, { 2, -8, 9, 92, 43, -12, 1, 1 }, + { 2, -7, 5, 90, 49, -12, 1, 0 }, { 2, -5, 1, 86, 55, -12, 0, 1 }, + { 1, -4, -2, 82, 61, -11, 0, 1 }, { 1, -3, -5, 77, 67, -9, -1, 1 }, + { 1, -2, -7, 72, 72, -7, -2, 1 }, { 1, -1, -9, 67, 77, -5, -3, 1 }, + { 1, 0, -11, 61, 82, -2, -4, 1 }, { 1, 0, -12, 55, 86, 1, -5, 2 }, + { 0, 1, -12, 49, 90, 5, -7, 2 }, { 1, 1, -12, 43, 92, 9, -8, 2 }, + { 0, 2, -12, 37, 94, 14, -9, 2 }, { 0, 2, -11, 31, 95, 19, -10, 2 }, }; DECLARE_ALIGNED(16, static const int16_t, sub_pel_filters_12sharp[SUBPEL_SHIFTS][12]) = { // intfilt 0.85 - {0, 0, 0, 0, 0, 128, 0, 0, 0, 0, 0, 0}, - {0, 1, -2, 3, -7, 127, 8, -4, 2, -1, 1, 0}, - {-1, 2, -3, 6, -13, 124, 18, -8, 4, -2, 2, -1}, - {-1, 3, -4, 8, -18, 120, 28, -12, 7, -4, 2, -1}, - {-1, 3, -6, 10, -21, 115, 38, -15, 8, -5, 3, -1}, - {-2, 4, -6, 12, -24, 108, 49, -18, 10, -6, 3, -2}, - {-2, 4, -7, 13, -25, 100, 60, -21, 11, -7, 4, -2}, - {-2, 4, -7, 13, -26, 91, 71, -24, 13, -7, 4, -2}, - {-2, 4, -7, 13, -25, 81, 81, -25, 13, -7, 4, -2}, - {-2, 4, -7, 13, -24, 71, 91, -26, 13, -7, 4, -2}, - {-2, 4, -7, 11, -21, 60, 100, -25, 13, -7, 4, -2}, - {-2, 3, -6, 10, -18, 49, 108, -24, 12, -6, 4, -2}, - {-1, 3, -5, 8, -15, 38, 115, -21, 10, -6, 3, -1}, - {-1, 2, -4, 7, -12, 28, 120, -18, 8, -4, 3, -1}, - {-1, 2, -2, 4, -8, 18, 124, -13, 6, -3, 2, -1}, - {0, 1, -1, 2, -4, 8, 127, -7, 3, -2, 1, 0}, + { 0, 0, 0, 0, 0, 128, 0, 0, 0, 0, 0, 0 }, + { 0, 1, -2, 3, -7, 127, 8, -4, 2, -1, 1, 0 }, + { -1, 2, -3, 6, -13, 124, 18, -8, 4, -2, 2, -1 }, + { -1, 3, -4, 8, -18, 120, 28, -12, 7, -4, 2, -1 }, + { -1, 3, -6, 10, -21, 115, 38, -15, 8, -5, 3, -1 }, + { -2, 4, -6, 12, -24, 108, 49, -18, 10, -6, 3, -2 }, + { -2, 4, -7, 13, -25, 100, 60, -21, 11, -7, 4, -2 }, + { -2, 4, -7, 13, -26, 91, 71, -24, 13, -7, 4, -2 }, + { -2, 4, -7, 13, -25, 81, 81, -25, 13, -7, 4, -2 }, + { -2, 4, -7, 13, -24, 71, 91, -26, 13, -7, 4, -2 }, + { -2, 4, -7, 11, -21, 60, 100, -25, 13, -7, 4, -2 }, + { -2, 3, -6, 10, -18, 49, 108, -24, 12, -6, 4, -2 }, + { -1, 3, -5, 8, -15, 38, 115, -21, 10, -6, 3, -1 }, + { -1, 2, -4, 7, -12, 28, 120, -18, 8, -4, 3, -1 }, + { -1, 2, -2, 4, -8, 18, 124, -13, 6, -3, 2, -1 }, + { 0, 1, -1, 2, -4, 8, 127, -7, 3, -2, 1, 0 }, }; -#else // CONFIG_EXT_INTERP +#else // CONFIG_EXT_INTERP DECLARE_ALIGNED(256, static const InterpKernel, sub_pel_filters_8[SUBPEL_SHIFTS]) = { // Lagrangian interpolation filter - { 0, 0, 0, 128, 0, 0, 0, 0}, - { 0, 1, -5, 126, 8, -3, 1, 0}, - { -1, 3, -10, 122, 18, -6, 2, 0}, - { -1, 4, -13, 118, 27, -9, 3, -1}, - { -1, 4, -16, 112, 37, -11, 4, -1}, - { -1, 5, -18, 105, 48, -14, 4, -1}, - { -1, 5, -19, 97, 58, -16, 5, -1}, - { -1, 6, -19, 88, 68, -18, 5, -1}, - { -1, 6, -19, 78, 78, -19, 6, -1}, - { -1, 5, -18, 68, 88, -19, 6, -1}, - { -1, 5, -16, 58, 97, -19, 5, -1}, - { -1, 4, -14, 48, 105, -18, 5, -1}, - { -1, 4, -11, 37, 112, -16, 4, -1}, - { -1, 3, -9, 27, 118, -13, 4, -1}, - { 0, 2, -6, 18, 122, -10, 3, -1}, - { 0, 1, -3, 8, 126, -5, 1, 0} + { 0, 0, 0, 128, 0, 0, 0, 0 }, { 0, 1, -5, 126, 8, -3, 1, 0 }, + { -1, 3, -10, 122, 18, -6, 2, 0 }, { -1, 4, -13, 118, 27, -9, 3, -1 }, + { -1, 4, -16, 112, 37, -11, 4, -1 }, { -1, 5, -18, 105, 48, -14, 4, -1 }, + { -1, 5, -19, 97, 58, -16, 5, -1 }, { -1, 6, -19, 88, 68, -18, 5, -1 }, + { -1, 6, -19, 78, 78, -19, 6, -1 }, { -1, 5, -18, 68, 88, -19, 6, -1 }, + { -1, 5, -16, 58, 97, -19, 5, -1 }, { -1, 4, -14, 48, 105, -18, 5, -1 }, + { -1, 4, -11, 37, 112, -16, 4, -1 }, { -1, 3, -9, 27, 118, -13, 4, -1 }, + { 0, 2, -6, 18, 122, -10, 3, -1 }, { 0, 1, -3, 8, 126, -5, 1, 0 } }; DECLARE_ALIGNED(256, static const InterpKernel, sub_pel_filters_8sharp[SUBPEL_SHIFTS]) = { // DCT based filter - {0, 0, 0, 128, 0, 0, 0, 0}, - {-1, 3, -7, 127, 8, -3, 1, 0}, - {-2, 5, -13, 125, 17, -6, 3, -1}, - {-3, 7, -17, 121, 27, -10, 5, -2}, - {-4, 9, -20, 115, 37, -13, 6, -2}, - {-4, 10, -23, 108, 48, -16, 8, -3}, - {-4, 10, -24, 100, 59, -19, 9, -3}, - {-4, 11, -24, 90, 70, -21, 10, -4}, - {-4, 11, -23, 80, 80, -23, 11, -4}, - {-4, 10, -21, 70, 90, -24, 11, -4}, - {-3, 9, -19, 59, 100, -24, 10, -4}, - {-3, 8, -16, 48, 108, -23, 10, -4}, - {-2, 6, -13, 37, 115, -20, 9, -4}, - {-2, 5, -10, 27, 121, -17, 7, -3}, - {-1, 3, -6, 17, 125, -13, 5, -2}, - {0, 1, -3, 8, 127, -7, 3, -1} + { 0, 0, 0, 128, 0, 0, 0, 0 }, { -1, 3, -7, 127, 8, -3, 1, 0 }, + { -2, 5, -13, 125, 17, -6, 3, -1 }, { -3, 7, -17, 121, 27, -10, 5, -2 }, + { -4, 9, -20, 115, 37, -13, 6, -2 }, { -4, 10, -23, 108, 48, -16, 8, -3 }, + { -4, 10, -24, 100, 59, -19, 9, -3 }, { -4, 11, -24, 90, 70, -21, 10, -4 }, + { -4, 11, -23, 80, 80, -23, 11, -4 }, { -4, 10, -21, 70, 90, -24, 11, -4 }, + { -3, 9, -19, 59, 100, -24, 10, -4 }, { -3, 8, -16, 48, 108, -23, 10, -4 }, + { -2, 6, -13, 37, 115, -20, 9, -4 }, { -2, 5, -10, 27, 121, -17, 7, -3 }, + { -1, 3, -6, 17, 125, -13, 5, -2 }, { 0, 1, -3, 8, 127, -7, 3, -1 } }; DECLARE_ALIGNED(256, static const InterpKernel, sub_pel_filters_8smooth[SUBPEL_SHIFTS]) = { -// freqmultiplier = 0.5 - { 0, 0, 0, 128, 0, 0, 0, 0}, - {-3, -1, 32, 64, 38, 1, -3, 0}, - {-2, -2, 29, 63, 41, 2, -3, 0}, - {-2, -2, 26, 63, 43, 4, -4, 0}, - {-2, -3, 24, 62, 46, 5, -4, 0}, - {-2, -3, 21, 60, 49, 7, -4, 0}, - {-1, -4, 18, 59, 51, 9, -4, 0}, - {-1, -4, 16, 57, 53, 12, -4, -1}, - {-1, -4, 14, 55, 55, 14, -4, -1}, - {-1, -4, 12, 53, 57, 16, -4, -1}, - { 0, -4, 9, 51, 59, 18, -4, -1}, - { 0, -4, 7, 49, 60, 21, -3, -2}, - { 0, -4, 5, 46, 62, 24, -3, -2}, - { 0, -4, 4, 43, 63, 26, -2, -2}, - { 0, -3, 2, 41, 63, 29, -2, -2}, - { 0, -3, 1, 38, 64, 32, -1, -3} + // freqmultiplier = 0.5 + { 0, 0, 0, 128, 0, 0, 0, 0 }, { -3, -1, 32, 64, 38, 1, -3, 0 }, + { -2, -2, 29, 63, 41, 2, -3, 0 }, { -2, -2, 26, 63, 43, 4, -4, 0 }, + { -2, -3, 24, 62, 46, 5, -4, 0 }, { -2, -3, 21, 60, 49, 7, -4, 0 }, + { -1, -4, 18, 59, 51, 9, -4, 0 }, { -1, -4, 16, 57, 53, 12, -4, -1 }, + { -1, -4, 14, 55, 55, 14, -4, -1 }, { -1, -4, 12, 53, 57, 16, -4, -1 }, + { 0, -4, 9, 51, 59, 18, -4, -1 }, { 0, -4, 7, 49, 60, 21, -3, -2 }, + { 0, -4, 5, 46, 62, 24, -3, -2 }, { 0, -4, 4, 43, 63, 26, -2, -2 }, + { 0, -3, 2, 41, 63, 29, -2, -2 }, { 0, -3, 1, 38, 64, 32, -1, -3 } }; #endif // CONFIG_EXT_INTERP @@ -260,27 +196,27 @@ const InterpKernel *vp10_intra_filter_kernels[INTRA_FILTERS] = { #if CONFIG_EXT_INTERP static const InterpFilterParams -vp10_interp_filter_params_list[SWITCHABLE_FILTERS + 1] = { - {(const int16_t*)sub_pel_filters_8, SUBPEL_TAPS, SUBPEL_SHIFTS}, - {(const int16_t*)sub_pel_filters_8smooth, SUBPEL_TAPS, SUBPEL_SHIFTS}, - {(const int16_t*)sub_pel_filters_10sharp, 10, SUBPEL_SHIFTS}, - {(const int16_t*)sub_pel_filters_8smooth2, SUBPEL_TAPS, SUBPEL_SHIFTS}, - {(const int16_t*)sub_pel_filters_12sharp, 12, SUBPEL_SHIFTS}, - {(const int16_t*)bilinear_filters, SUBPEL_TAPS, SUBPEL_SHIFTS} -}; + vp10_interp_filter_params_list[SWITCHABLE_FILTERS + 1] = { + { (const int16_t *)sub_pel_filters_8, SUBPEL_TAPS, SUBPEL_SHIFTS }, + { (const int16_t *)sub_pel_filters_8smooth, SUBPEL_TAPS, SUBPEL_SHIFTS }, + { (const int16_t *)sub_pel_filters_10sharp, 10, SUBPEL_SHIFTS }, + { (const int16_t *)sub_pel_filters_8smooth2, SUBPEL_TAPS, SUBPEL_SHIFTS }, + { (const int16_t *)sub_pel_filters_12sharp, 12, SUBPEL_SHIFTS }, + { (const int16_t *)bilinear_filters, SUBPEL_TAPS, SUBPEL_SHIFTS } + }; #else static const InterpFilterParams -vp10_interp_filter_params_list[SWITCHABLE_FILTERS + 1] = { - {(const int16_t*)sub_pel_filters_8, SUBPEL_TAPS, SUBPEL_SHIFTS}, - {(const int16_t*)sub_pel_filters_8smooth, SUBPEL_TAPS, SUBPEL_SHIFTS}, - {(const int16_t*)sub_pel_filters_8sharp, SUBPEL_TAPS, SUBPEL_SHIFTS}, - {(const int16_t*)bilinear_filters, SUBPEL_TAPS, SUBPEL_SHIFTS} -}; + vp10_interp_filter_params_list[SWITCHABLE_FILTERS + 1] = { + { (const int16_t *)sub_pel_filters_8, SUBPEL_TAPS, SUBPEL_SHIFTS }, + { (const int16_t *)sub_pel_filters_8smooth, SUBPEL_TAPS, SUBPEL_SHIFTS }, + { (const int16_t *)sub_pel_filters_8sharp, SUBPEL_TAPS, SUBPEL_SHIFTS }, + { (const int16_t *)bilinear_filters, SUBPEL_TAPS, SUBPEL_SHIFTS } + }; #endif // CONFIG_EXT_INTERP #if USE_TEMPORALFILTER_12TAP static const InterpFilterParams vp10_interp_temporalfilter_12tap = { - (const int16_t*)sub_pel_filters_temporalfilter_12, 12, SUBPEL_SHIFTS + (const int16_t *)sub_pel_filters_temporalfilter_12, 12, SUBPEL_SHIFTS }; #endif // USE_TEMPORALFILTER_12TAP @@ -299,12 +235,12 @@ const int16_t *vp10_get_interp_filter_kernel( if (interp_filter == TEMPORALFILTER_12TAP) return vp10_interp_temporalfilter_12tap.filter_ptr; #endif // USE_TEMPORALFILTER_12TAP - return (const int16_t*) - vp10_interp_filter_params_list[interp_filter].filter_ptr; + return (const int16_t *)vp10_interp_filter_params_list[interp_filter] + .filter_ptr; } -SubpelFilterCoeffs vp10_get_subpel_filter_signal_dir( - const InterpFilterParams p, int index) { +SubpelFilterCoeffs vp10_get_subpel_filter_signal_dir(const InterpFilterParams p, + int index) { #if CONFIG_EXT_INTERP && HAVE_SSSE3 if (p.filter_ptr == (const int16_t *)sub_pel_filters_12sharp) { return &sub_pel_filters_12sharp_signal_dir[index][0]; diff --git a/vp10/common/filter.h b/vp10/common/filter.h index 5ebf2a5f43604eb034cf921379544a3ec8e4db62..5f9a6d9b6a3523ca18d4502b15bcb99a2a3a5690 100644 --- a/vp10/common/filter.h +++ b/vp10/common/filter.h @@ -16,26 +16,25 @@ #include "vpx_dsp/vpx_filter.h" #include "vpx_ports/mem.h" - #ifdef __cplusplus extern "C" { #endif -#define EIGHTTAP_REGULAR 0 -#define EIGHTTAP_SMOOTH 1 -#define MULTITAP_SHARP 2 +#define EIGHTTAP_REGULAR 0 +#define EIGHTTAP_SMOOTH 1 +#define MULTITAP_SHARP 2 #if CONFIG_EXT_INTERP -#define EIGHTTAP_SMOOTH2 3 -#define MULTITAP_SHARP2 4 +#define EIGHTTAP_SMOOTH2 3 +#define MULTITAP_SHARP2 4 -#define MAX_SUBPEL_TAPS 12 +#define MAX_SUBPEL_TAPS 12 -#define SUPPORT_NONINTERPOLATING_FILTERS 0 /* turn it on for experimentation */ -#define SWITCHABLE_FILTERS 5 /* Number of switchable filters */ +#define SUPPORT_NONINTERPOLATING_FILTERS 0 /* turn on for experimentation */ +#define SWITCHABLE_FILTERS 5 /* Number of switchable filters */ #else -#define SWITCHABLE_FILTERS 3 /* Number of switchable filters */ -#endif // CONFIG_EXT_INTERP +#define SWITCHABLE_FILTERS 3 /* Number of switchable filters */ +#endif // CONFIG_EXT_INTERP #define USE_TEMPORALFILTER_12TAP 1 #if USE_TEMPORALFILTER_12TAP @@ -45,12 +44,12 @@ extern "C" { // The codec can operate in four possible inter prediction filter mode: // 8-tap, 8-tap-smooth, 8-tap-sharp, and switching between the three. -#define BILINEAR (SWITCHABLE_FILTERS) -#define SWITCHABLE (SWITCHABLE_FILTERS + 1) /* the last one */ +#define BILINEAR (SWITCHABLE_FILTERS) +#define SWITCHABLE (SWITCHABLE_FILTERS + 1) /* the last one */ #if CONFIG_DUAL_FILTER #define SWITCHABLE_FILTER_CONTEXTS ((SWITCHABLE_FILTERS + 1) * 4) -#define INTER_FILTER_COMP_OFFSET (SWITCHABLE_FILTERS + 1) -#define INTER_FILTER_DIR_OFFSET ((SWITCHABLE_FILTERS + 1) * 2) +#define INTER_FILTER_COMP_OFFSET (SWITCHABLE_FILTERS + 1) +#define INTER_FILTER_DIR_OFFSET ((SWITCHABLE_FILTERS + 1) * 2) #else #define SWITCHABLE_FILTER_CONTEXTS (SWITCHABLE_FILTERS + 1) #endif @@ -70,7 +69,7 @@ extern const InterpKernel *vp10_intra_filter_kernels[INTRA_FILTERS]; #endif // CONFIG_EXT_INTRA typedef struct InterpFilterParams { - const int16_t* filter_ptr; + const int16_t *filter_ptr; uint16_t taps; uint16_t subpel_shifts; } InterpFilterParams; @@ -78,10 +77,9 @@ typedef struct InterpFilterParams { InterpFilterParams vp10_get_interp_filter_params( const INTERP_FILTER interp_filter); -const int16_t *vp10_get_interp_filter_kernel( - const INTERP_FILTER interp_filter); +const int16_t *vp10_get_interp_filter_kernel(const INTERP_FILTER interp_filter); -static INLINE const int16_t* vp10_get_interp_filter_subpel_kernel( +static INLINE const int16_t *vp10_get_interp_filter_subpel_kernel( const InterpFilterParams filter_params, const int subpel) { return filter_params.filter_ptr + filter_params.taps * subpel; } @@ -96,8 +94,8 @@ static INLINE int vp10_is_interpolating_filter( extern const int8_t sub_pel_filters_temporalfilter_12_signal_dir[15][2][16]; extern const int8_t sub_pel_filters_temporalfilter_12_ver_signal_dir[15][6][16]; #if CONFIG_VP9_HIGHBITDEPTH -extern const -int16_t sub_pel_filters_temporalfilter_12_highbd_ver_signal_dir[15][6][8]; +extern const int16_t + sub_pel_filters_temporalfilter_12_highbd_ver_signal_dir[15][6][8]; #endif #endif @@ -117,8 +115,8 @@ typedef const int8_t (*SubpelFilterCoeffs)[16]; typedef const int16_t (*HbdSubpelFilterCoeffs)[8]; #endif -SubpelFilterCoeffs vp10_get_subpel_filter_signal_dir( - const InterpFilterParams p, int index); +SubpelFilterCoeffs vp10_get_subpel_filter_signal_dir(const InterpFilterParams p, + int index); SubpelFilterCoeffs vp10_get_subpel_filter_ver_signal_dir( const InterpFilterParams p, int index); diff --git a/vp10/common/frame_buffers.c b/vp10/common/frame_buffers.c index 31ff9602a60ee19ff98b59ffa44901759234deaf..564fa9c2acc7b9d0a5c3101192168be694745d1c 100644 --- a/vp10/common/frame_buffers.c +++ b/vp10/common/frame_buffers.c @@ -19,9 +19,8 @@ int vp10_alloc_internal_frame_buffers(InternalFrameBufferList *list) { list->num_internal_frame_buffers = VPX_MAXIMUM_REF_BUFFERS + VPX_MAXIMUM_WORK_BUFFERS; - list->int_fb = - (InternalFrameBuffer *)vpx_calloc(list->num_internal_frame_buffers, - sizeof(*list->int_fb)); + list->int_fb = (InternalFrameBuffer *)vpx_calloc( + list->num_internal_frame_buffers, sizeof(*list->int_fb)); return (list->int_fb == NULL); } @@ -39,27 +38,23 @@ void vp10_free_internal_frame_buffers(InternalFrameBufferList *list) { } int vp10_get_frame_buffer(void *cb_priv, size_t min_size, - vpx_codec_frame_buffer_t *fb) { + vpx_codec_frame_buffer_t *fb) { int i; InternalFrameBufferList *const int_fb_list = (InternalFrameBufferList *)cb_priv; - if (int_fb_list == NULL) - return -1; + if (int_fb_list == NULL) return -1; // Find a free frame buffer. for (i = 0; i < int_fb_list->num_internal_frame_buffers; ++i) { - if (!int_fb_list->int_fb[i].in_use) - break; + if (!int_fb_list->int_fb[i].in_use) break; } - if (i == int_fb_list->num_internal_frame_buffers) - return -1; + if (i == int_fb_list->num_internal_frame_buffers) return -1; if (int_fb_list->int_fb[i].size < min_size) { int_fb_list->int_fb[i].data = (uint8_t *)vpx_realloc(int_fb_list->int_fb[i].data, min_size); - if (!int_fb_list->int_fb[i].data) - return -1; + if (!int_fb_list->int_fb[i].data) return -1; // This memset is needed for fixing valgrind error from C loop filter // due to access uninitialized memory in frame border. It could be @@ -80,7 +75,6 @@ int vp10_get_frame_buffer(void *cb_priv, size_t min_size, int vp10_release_frame_buffer(void *cb_priv, vpx_codec_frame_buffer_t *fb) { InternalFrameBuffer *const int_fb = (InternalFrameBuffer *)fb->priv; (void)cb_priv; - if (int_fb) - int_fb->in_use = 0; + if (int_fb) int_fb->in_use = 0; return 0; } diff --git a/vp10/common/frame_buffers.h b/vp10/common/frame_buffers.h index 729ebafb023f900bf0fc17c10ce4e82ef831594e..fa6a2e2592157d709ef6e1cb9b968e702485fe39 100644 --- a/vp10/common/frame_buffers.h +++ b/vp10/common/frame_buffers.h @@ -40,7 +40,7 @@ void vp10_free_internal_frame_buffers(InternalFrameBufferList *list); // |min_size| is the minimum size in bytes needed to decode the next frame. // |fb| pointer to the frame buffer. int vp10_get_frame_buffer(void *cb_priv, size_t min_size, - vpx_codec_frame_buffer_t *fb); + vpx_codec_frame_buffer_t *fb); // Callback used by libvpx when there are no references to the frame buffer. // |cb_priv| is not used. |fb| pointer to the frame buffer. diff --git a/vp10/common/idct.c b/vp10/common/idct.c index 22eff655bf657ff3ffdf7c5ca504ccdd9c4ba958..5f05b70387f370ddbd663c4609f9f18bbdd9ed88 100644 --- a/vp10/common/idct.c +++ b/vp10/common/idct.c @@ -21,7 +21,7 @@ int get_tx_scale(const MACROBLOCKD *const xd, const TX_TYPE tx_type, const TX_SIZE tx_size) { - (void) tx_type; + (void)tx_type; #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { return tx_size == TX_32X32; @@ -41,8 +41,7 @@ static void iidtx4_c(const tran_low_t *input, tran_low_t *output) { static void iidtx8_c(const tran_low_t *input, tran_low_t *output) { int i; - for (i = 0; i < 8; ++i) - output[i] = input[i] * 2; + for (i = 0; i < 8; ++i) output[i] = input[i] * 2; } static void iidtx16_c(const tran_low_t *input, tran_low_t *output) { @@ -53,8 +52,7 @@ static void iidtx16_c(const tran_low_t *input, tran_low_t *output) { static void iidtx32_c(const tran_low_t *input, tran_low_t *output) { int i; - for (i = 0; i < 32; ++i) - output[i] = input[i] * 4; + for (i = 0; i < 32; ++i) output[i] = input[i] * 4; } // For use in lieu of DST @@ -77,32 +75,30 @@ static void highbd_iidtx4_c(const tran_low_t *input, tran_low_t *output, int bd) { int i; for (i = 0; i < 4; ++i) - output[i] = HIGHBD_WRAPLOW( - highbd_dct_const_round_shift(input[i] * Sqrt2), bd); + output[i] = + HIGHBD_WRAPLOW(highbd_dct_const_round_shift(input[i] * Sqrt2), bd); } static void highbd_iidtx8_c(const tran_low_t *input, tran_low_t *output, int bd) { int i; - (void) bd; - for (i = 0; i < 8; ++i) - output[i] = input[i] * 2; + (void)bd; + for (i = 0; i < 8; ++i) output[i] = input[i] * 2; } static void highbd_iidtx16_c(const tran_low_t *input, tran_low_t *output, - int bd) { + int bd) { int i; for (i = 0; i < 16; ++i) - output[i] = HIGHBD_WRAPLOW( - highbd_dct_const_round_shift(input[i] * 2 * Sqrt2), bd); + output[i] = + HIGHBD_WRAPLOW(highbd_dct_const_round_shift(input[i] * 2 * Sqrt2), bd); } static void highbd_iidtx32_c(const tran_low_t *input, tran_low_t *output, int bd) { int i; - (void) bd; - for (i = 0; i < 32; ++i) - output[i] = input[i] * 4; + (void)bd; + for (i = 0; i < 32; ++i) output[i] = input[i] * 4; } static void highbd_ihalfright32_c(const tran_low_t *input, tran_low_t *output, @@ -114,8 +110,8 @@ static void highbd_ihalfright32_c(const tran_low_t *input, tran_low_t *output, } // Multiply input by sqrt(2) for (i = 0; i < 16; ++i) { - inputhalf[i] = HIGHBD_WRAPLOW( - highbd_dct_const_round_shift(input[i] * Sqrt2), bd); + inputhalf[i] = + HIGHBD_WRAPLOW(highbd_dct_const_round_shift(input[i] * Sqrt2), bd); } vpx_highbd_idct16_c(inputhalf, output + 16, bd); // Note overall scaling factor is 4 times orthogonal @@ -137,14 +133,15 @@ static void inv_idtx_add_c(const tran_low_t *input, uint8_t *dest, int stride, } } -#define FLIPUD_PTR(dest, stride, size) do { \ - (dest) = (dest) + ((size) - 1) * (stride); \ - (stride) = - (stride); \ -} while (0) +#define FLIPUD_PTR(dest, stride, size) \ + do { \ + (dest) = (dest) + ((size)-1) * (stride); \ + (stride) = -(stride); \ + } while (0) -static void maybe_flip_strides(uint8_t **dst, int *dstride, - tran_low_t **src, int *sstride, - int tx_type, int sizey, int sizex) { +static void maybe_flip_strides(uint8_t **dst, int *dstride, tran_low_t **src, + int *sstride, int tx_type, int sizey, + int sizex) { // Note that the transpose of src will be added to dst. In order to LR // flip the addends (in dst coordinates), we UD flip the src. To UD flip // the addends, we UD flip the dst. @@ -157,8 +154,7 @@ static void maybe_flip_strides(uint8_t **dst, int *dstride, case V_DCT: case H_DCT: case V_ADST: - case H_ADST: - break; + case H_ADST: break; case FLIPADST_DCT: case FLIPADST_ADST: case V_FLIPADST: @@ -177,9 +173,7 @@ static void maybe_flip_strides(uint8_t **dst, int *dstride, // flip LR FLIPUD_PTR(*src, *sstride, sizex); break; - default: - assert(0); - break; + default: assert(0); break; } } @@ -187,7 +181,7 @@ static void maybe_flip_strides(uint8_t **dst, int *dstride, void highbd_idst4_c(const tran_low_t *input, tran_low_t *output, int bd) { tran_low_t step[4]; tran_high_t temp1, temp2; - (void) bd; + (void)bd; // stage 1 temp1 = (input[3] + input[1]) * cospi_16_64; temp2 = (input[3] - input[1]) * cospi_16_64; @@ -208,7 +202,7 @@ void highbd_idst4_c(const tran_low_t *input, tran_low_t *output, int bd) { void highbd_idst8_c(const tran_low_t *input, tran_low_t *output, int bd) { tran_low_t step1[8], step2[8]; tran_high_t temp1, temp2; - (void) bd; + (void)bd; // stage 1 step1[0] = input[7]; step1[2] = input[3]; @@ -264,7 +258,7 @@ void highbd_idst16_c(const tran_low_t *input, tran_low_t *output, int bd) { // vp9_highbd_igentx16(input, output, bd, Tx16); tran_low_t step1[16], step2[16]; tran_high_t temp1, temp2; - (void) bd; + (void)bd; // stage 1 step1[0] = input[15]; @@ -443,9 +437,9 @@ static void highbd_inv_idtx_add_c(const tran_low_t *input, uint8_t *dest8, } } -static void maybe_flip_strides16(uint16_t **dst, int *dstride, - tran_low_t **src, int *sstride, - int tx_type, int sizey, int sizex) { +static void maybe_flip_strides16(uint16_t **dst, int *dstride, tran_low_t **src, + int *sstride, int tx_type, int sizey, + int sizex) { // Note that the transpose of src will be added to dst. In order to LR // flip the addends (in dst coordinates), we UD flip the src. To UD flip // the addends, we UD flip the dst. @@ -458,8 +452,7 @@ static void maybe_flip_strides16(uint16_t **dst, int *dstride, case V_DCT: case H_DCT: case V_ADST: - case H_ADST: - break; + case H_ADST: break; case FLIPADST_DCT: case FLIPADST_ADST: case V_FLIPADST: @@ -478,9 +471,7 @@ static void maybe_flip_strides16(uint16_t **dst, int *dstride, // flip LR FLIPUD_PTR(*src, *sstride, sizex); break; - default: - assert(0); - break; + default: assert(0); break; } } #endif // CONFIG_VP9_HIGHBITDEPTH @@ -489,24 +480,24 @@ static void maybe_flip_strides16(uint16_t **dst, int *dstride, void vp10_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride, int tx_type) { static const transform_2d IHT_4[] = { - { idct4_c, idct4_c }, // DCT_DCT - { iadst4_c, idct4_c }, // ADST_DCT - { idct4_c, iadst4_c }, // DCT_ADST + { idct4_c, idct4_c }, // DCT_DCT + { iadst4_c, idct4_c }, // ADST_DCT + { idct4_c, iadst4_c }, // DCT_ADST { iadst4_c, iadst4_c }, // ADST_ADST #if CONFIG_EXT_TX - { iadst4_c, idct4_c }, // FLIPADST_DCT - { idct4_c, iadst4_c }, // DCT_FLIPADST + { iadst4_c, idct4_c }, // FLIPADST_DCT + { idct4_c, iadst4_c }, // DCT_FLIPADST { iadst4_c, iadst4_c }, // FLIPADST_FLIPADST { iadst4_c, iadst4_c }, // ADST_FLIPADST { iadst4_c, iadst4_c }, // FLIPADST_ADST { iidtx4_c, iidtx4_c }, // IDTX - { idct4_c, iidtx4_c }, // V_DCT - { iidtx4_c, idct4_c }, // H_DCT + { idct4_c, iidtx4_c }, // V_DCT + { iidtx4_c, idct4_c }, // H_DCT { iadst4_c, iidtx4_c }, // V_ADST { iidtx4_c, iadst4_c }, // H_ADST { iadst4_c, iidtx4_c }, // V_FLIPADST { iidtx4_c, iadst4_c }, // H_FLIPADST -#endif // CONFIG_EXT_TX +#endif // CONFIG_EXT_TX }; int i, j; @@ -518,13 +509,13 @@ void vp10_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride, // inverse transform row vectors for (i = 0; i < 4; ++i) { IHT_4[tx_type].rows(input, out[i]); - input += 4; + input += 4; } // transpose - for (i = 1 ; i < 4; i++) { + for (i = 1; i < 4; i++) { for (j = 0; j < i; j++) { - tmp = out[i][j]; + tmp = out[i][j]; out[i][j] = out[j][i]; out[j][i] = tmp; } @@ -552,24 +543,24 @@ void vp10_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride, void vp10_iht4x8_32_add_c(const tran_low_t *input, uint8_t *dest, int stride, int tx_type) { static const transform_2d IHT_4x8[] = { - { idct8_c, idct4_c }, // DCT_DCT - { iadst8_c, idct4_c }, // ADST_DCT - { idct8_c, iadst4_c }, // DCT_ADST + { idct8_c, idct4_c }, // DCT_DCT + { iadst8_c, idct4_c }, // ADST_DCT + { idct8_c, iadst4_c }, // DCT_ADST { iadst8_c, iadst4_c }, // ADST_ADST #if CONFIG_EXT_TX - { iadst8_c, idct4_c }, // FLIPADST_DCT - { idct8_c, iadst4_c }, // DCT_FLIPADST + { iadst8_c, idct4_c }, // FLIPADST_DCT + { idct8_c, iadst4_c }, // DCT_FLIPADST { iadst8_c, iadst4_c }, // FLIPADST_FLIPADST { iadst8_c, iadst4_c }, // ADST_FLIPADST { iadst8_c, iadst4_c }, // FLIPADST_ADST { iidtx8_c, iidtx4_c }, // IDTX - { idct8_c, iidtx4_c }, // V_DCT - { iidtx8_c, idct4_c }, // H_DCT + { idct8_c, iidtx4_c }, // V_DCT + { iidtx8_c, idct4_c }, // H_DCT { iadst8_c, iidtx4_c }, // V_ADST { iidtx8_c, iadst4_c }, // H_ADST { iadst8_c, iidtx4_c }, // V_FLIPADST { iidtx8_c, iadst4_c }, // H_FLIPADST -#endif // CONFIG_EXT_TX +#endif // CONFIG_EXT_TX }; int i, j; @@ -582,7 +573,7 @@ void vp10_iht4x8_32_add_c(const tran_low_t *input, uint8_t *dest, int stride, IHT_4x8[tx_type].rows(input, outtmp); for (j = 0; j < 4; ++j) out[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2); - input += 4; + input += 4; } // inverse transform column vectors @@ -607,24 +598,24 @@ void vp10_iht4x8_32_add_c(const tran_low_t *input, uint8_t *dest, int stride, void vp10_iht8x4_32_add_c(const tran_low_t *input, uint8_t *dest, int stride, int tx_type) { static const transform_2d IHT_8x4[] = { - { idct4_c, idct8_c }, // DCT_DCT - { iadst4_c, idct8_c }, // ADST_DCT - { idct4_c, iadst8_c }, // DCT_ADST + { idct4_c, idct8_c }, // DCT_DCT + { iadst4_c, idct8_c }, // ADST_DCT + { idct4_c, iadst8_c }, // DCT_ADST { iadst4_c, iadst8_c }, // ADST_ADST #if CONFIG_EXT_TX - { iadst4_c, idct8_c }, // FLIPADST_DCT - { idct4_c, iadst8_c }, // DCT_FLIPADST + { iadst4_c, idct8_c }, // FLIPADST_DCT + { idct4_c, iadst8_c }, // DCT_FLIPADST { iadst4_c, iadst8_c }, // FLIPADST_FLIPADST { iadst4_c, iadst8_c }, // ADST_FLIPADST { iadst4_c, iadst8_c }, // FLIPADST_ADST { iidtx4_c, iidtx8_c }, // IDTX - { idct4_c, iidtx8_c }, // V_DCT - { iidtx4_c, idct8_c }, // H_DCT + { idct4_c, iidtx8_c }, // V_DCT + { iidtx4_c, idct8_c }, // H_DCT { iadst4_c, iidtx8_c }, // V_ADST { iidtx4_c, iadst8_c }, // H_ADST { iadst4_c, iidtx8_c }, // V_FLIPADST { iidtx4_c, iadst8_c }, // H_FLIPADST -#endif // CONFIG_EXT_TX +#endif // CONFIG_EXT_TX }; int i, j; @@ -637,7 +628,7 @@ void vp10_iht8x4_32_add_c(const tran_low_t *input, uint8_t *dest, int stride, IHT_8x4[tx_type].rows(input, outtmp); for (j = 0; j < 8; ++j) out[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2); - input += 8; + input += 8; } // inverse transform column vectors @@ -660,26 +651,26 @@ void vp10_iht8x4_32_add_c(const tran_low_t *input, uint8_t *dest, int stride, } void vp10_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride, - int tx_type) { + int tx_type) { static const transform_2d IHT_8[] = { - { idct8_c, idct8_c }, // DCT_DCT - { iadst8_c, idct8_c }, // ADST_DCT - { idct8_c, iadst8_c }, // DCT_ADST + { idct8_c, idct8_c }, // DCT_DCT + { iadst8_c, idct8_c }, // ADST_DCT + { idct8_c, iadst8_c }, // DCT_ADST { iadst8_c, iadst8_c }, // ADST_ADST #if CONFIG_EXT_TX - { iadst8_c, idct8_c }, // FLIPADST_DCT - { idct8_c, iadst8_c }, // DCT_FLIPADST + { iadst8_c, idct8_c }, // FLIPADST_DCT + { idct8_c, iadst8_c }, // DCT_FLIPADST { iadst8_c, iadst8_c }, // FLIPADST_FLIPADST { iadst8_c, iadst8_c }, // ADST_FLIPADST { iadst8_c, iadst8_c }, // FLIPADST_ADST { iidtx8_c, iidtx8_c }, // IDTX - { idct8_c, iidtx8_c }, // V_DCT - { iidtx8_c, idct8_c }, // H_DCT + { idct8_c, iidtx8_c }, // V_DCT + { iidtx8_c, idct8_c }, // H_DCT { iadst8_c, iidtx8_c }, // V_ADST { iidtx8_c, iadst8_c }, // H_ADST { iadst8_c, iidtx8_c }, // V_FLIPADST { iidtx8_c, iadst8_c }, // H_FLIPADST -#endif // CONFIG_EXT_TX +#endif // CONFIG_EXT_TX }; int i, j; @@ -691,13 +682,13 @@ void vp10_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride, // inverse transform row vectors for (i = 0; i < 8; ++i) { IHT_8[tx_type].rows(input, out[i]); - input += 8; + input += 8; } // transpose - for (i = 1 ; i < 8; i++) { + for (i = 1; i < 8; i++) { for (j = 0; j < i; j++) { - tmp = out[i][j]; + tmp = out[i][j]; out[i][j] = out[j][i]; out[j][i] = tmp; } @@ -723,26 +714,26 @@ void vp10_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride, } void vp10_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int stride, - int tx_type) { + int tx_type) { static const transform_2d IHT_16[] = { - { idct16_c, idct16_c }, // DCT_DCT - { iadst16_c, idct16_c }, // ADST_DCT - { idct16_c, iadst16_c }, // DCT_ADST + { idct16_c, idct16_c }, // DCT_DCT + { iadst16_c, idct16_c }, // ADST_DCT + { idct16_c, iadst16_c }, // DCT_ADST { iadst16_c, iadst16_c }, // ADST_ADST #if CONFIG_EXT_TX - { iadst16_c, idct16_c }, // FLIPADST_DCT - { idct16_c, iadst16_c }, // DCT_FLIPADST + { iadst16_c, idct16_c }, // FLIPADST_DCT + { idct16_c, iadst16_c }, // DCT_FLIPADST { iadst16_c, iadst16_c }, // FLIPADST_FLIPADST { iadst16_c, iadst16_c }, // ADST_FLIPADST { iadst16_c, iadst16_c }, // FLIPADST_ADST { iidtx16_c, iidtx16_c }, // IDTX - { idct16_c, iidtx16_c }, // V_DCT - { iidtx16_c, idct16_c }, // H_DCT + { idct16_c, iidtx16_c }, // V_DCT + { iidtx16_c, idct16_c }, // H_DCT { iadst16_c, iidtx16_c }, // V_ADST { iidtx16_c, iadst16_c }, // H_ADST { iadst16_c, iidtx16_c }, // V_FLIPADST { iidtx16_c, iadst16_c }, // H_FLIPADST -#endif // CONFIG_EXT_TX +#endif // CONFIG_EXT_TX }; int i, j; @@ -754,13 +745,13 @@ void vp10_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int stride, // inverse transform row vectors for (i = 0; i < 16; ++i) { IHT_16[tx_type].rows(input, out[i]); - input += 16; + input += 16; } // transpose - for (i = 1 ; i < 16; i++) { + for (i = 1; i < 16; i++) { for (j = 0; j < i; j++) { - tmp = out[i][j]; + tmp = out[i][j]; out[i][j] = out[j][i]; out[j][i] = tmp; } @@ -789,22 +780,22 @@ void vp10_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int stride, void vp10_iht32x32_1024_add_c(const tran_low_t *input, uint8_t *dest, int stride, int tx_type) { static const transform_2d IHT_32[] = { - { idct32_c, idct32_c }, // DCT_DCT - { ihalfright32_c, idct32_c }, // ADST_DCT - { idct32_c, ihalfright32_c }, // DCT_ADST - { ihalfright32_c, ihalfright32_c }, // ADST_ADST - { ihalfright32_c, idct32_c }, // FLIPADST_DCT - { idct32_c, ihalfright32_c }, // DCT_FLIPADST - { ihalfright32_c, ihalfright32_c }, // FLIPADST_FLIPADST - { ihalfright32_c, ihalfright32_c }, // ADST_FLIPADST - { ihalfright32_c, ihalfright32_c }, // FLIPADST_ADST - { iidtx32_c, iidtx32_c }, // IDTX - { idct32_c, iidtx32_c }, // V_DCT - { iidtx32_c, idct32_c }, // H_DCT - { ihalfright32_c, iidtx16_c }, // V_ADST - { iidtx16_c, ihalfright32_c }, // H_ADST - { ihalfright32_c, iidtx16_c }, // V_FLIPADST - { iidtx16_c, ihalfright32_c }, // H_FLIPADST + { idct32_c, idct32_c }, // DCT_DCT + { ihalfright32_c, idct32_c }, // ADST_DCT + { idct32_c, ihalfright32_c }, // DCT_ADST + { ihalfright32_c, ihalfright32_c }, // ADST_ADST + { ihalfright32_c, idct32_c }, // FLIPADST_DCT + { idct32_c, ihalfright32_c }, // DCT_FLIPADST + { ihalfright32_c, ihalfright32_c }, // FLIPADST_FLIPADST + { ihalfright32_c, ihalfright32_c }, // ADST_FLIPADST + { ihalfright32_c, ihalfright32_c }, // FLIPADST_ADST + { iidtx32_c, iidtx32_c }, // IDTX + { idct32_c, iidtx32_c }, // V_DCT + { iidtx32_c, idct32_c }, // H_DCT + { ihalfright32_c, iidtx16_c }, // V_ADST + { iidtx16_c, ihalfright32_c }, // H_ADST + { ihalfright32_c, iidtx16_c }, // V_FLIPADST + { iidtx16_c, ihalfright32_c }, // H_FLIPADST }; int i, j; @@ -816,13 +807,13 @@ void vp10_iht32x32_1024_add_c(const tran_low_t *input, uint8_t *dest, // inverse transform row vectors for (i = 0; i < 32; ++i) { IHT_32[tx_type].rows(input, out[i]); - input += 32; + input += 32; } // transpose - for (i = 1 ; i < 32; i++) { + for (i = 1; i < 32; i++) { for (j = 0; j < i; j++) { - tmp = out[i][j]; + tmp = out[i][j]; out[i][j] = out[j][i]; out[j][i] = tmp; } @@ -848,16 +839,15 @@ void vp10_iht32x32_1024_add_c(const tran_low_t *input, uint8_t *dest, // idct void vp10_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride, - int eob) { + int eob) { if (eob > 1) vpx_idct4x4_16_add(input, dest, stride); else vpx_idct4x4_1_add(input, dest, stride); } - void vp10_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride, - int eob) { + int eob) { if (eob > 1) vpx_iwht4x4_16_add(input, dest, stride); else @@ -865,7 +855,7 @@ void vp10_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride, } void vp10_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride, - int eob) { + int eob) { // If dc is 1, then input[0] is the reconstructed value, do not need // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1. @@ -883,11 +873,10 @@ void vp10_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride, } void vp10_idct16x16_add(const tran_low_t *input, uint8_t *dest, int stride, - int eob) { + int eob) { /* The calculation can be simplified if there are not many non-zero dct * coefficients. Use eobs to separate different cases. */ - if (eob == 1) - /* DC only DCT coefficient. */ + if (eob == 1) /* DC only DCT coefficient. */ vpx_idct16x16_1_add(input, dest, stride); else if (eob <= 10) vpx_idct16x16_10_add(input, dest, stride); @@ -896,7 +885,7 @@ void vp10_idct16x16_add(const tran_low_t *input, uint8_t *dest, int stride, } void vp10_idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride, - int eob) { + int eob) { if (eob == 1) vpx_idct32x32_1_add(input, dest, stride); else if (eob <= 34) @@ -906,8 +895,8 @@ void vp10_idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride, vpx_idct32x32_1024_add(input, dest, stride); } -void vp10_inv_txfm_add_4x4(const tran_low_t *input, uint8_t *dest, - int stride, int eob, TX_TYPE tx_type, int lossless) { +void vp10_inv_txfm_add_4x4(const tran_low_t *input, uint8_t *dest, int stride, + int eob, TX_TYPE tx_type, int lossless) { if (lossless) { assert(tx_type == DCT_DCT); vp10_iwht4x4_add(input, dest, stride, eob); @@ -915,22 +904,16 @@ void vp10_inv_txfm_add_4x4(const tran_low_t *input, uint8_t *dest, } switch (tx_type) { - case DCT_DCT: - vp10_idct4x4_add(input, dest, stride, eob); - break; + case DCT_DCT: vp10_idct4x4_add(input, dest, stride, eob); break; case ADST_DCT: case DCT_ADST: - case ADST_ADST: - vp10_iht4x4_16_add(input, dest, stride, tx_type); - break; + case ADST_ADST: vp10_iht4x4_16_add(input, dest, stride, tx_type); break; #if CONFIG_EXT_TX case FLIPADST_DCT: case DCT_FLIPADST: case FLIPADST_FLIPADST: case ADST_FLIPADST: - case FLIPADST_ADST: - vp10_iht4x4_16_add(input, dest, stride, tx_type); - break; + case FLIPADST_ADST: vp10_iht4x4_16_add(input, dest, stride, tx_type); break; case V_DCT: case H_DCT: case V_ADST: @@ -940,49 +923,39 @@ void vp10_inv_txfm_add_4x4(const tran_low_t *input, uint8_t *dest, // Use C version since DST only exists in C code vp10_iht4x4_16_add_c(input, dest, stride, tx_type); break; - case IDTX: - inv_idtx_add_c(input, dest, stride, 4, tx_type); - break; + case IDTX: inv_idtx_add_c(input, dest, stride, 4, tx_type); break; #endif // CONFIG_EXT_TX - default: - assert(0); - break; + default: assert(0); break; } } #if CONFIG_EXT_TX -void vp10_inv_txfm_add_8x4(const tran_low_t *input, uint8_t *dest, - int stride, int eob, TX_TYPE tx_type) { - (void) eob; +void vp10_inv_txfm_add_8x4(const tran_low_t *input, uint8_t *dest, int stride, + int eob, TX_TYPE tx_type) { + (void)eob; vp10_iht8x4_32_add(input, dest, stride, tx_type); } -void vp10_inv_txfm_add_4x8(const tran_low_t *input, uint8_t *dest, - int stride, int eob, TX_TYPE tx_type) { - (void) eob; +void vp10_inv_txfm_add_4x8(const tran_low_t *input, uint8_t *dest, int stride, + int eob, TX_TYPE tx_type) { + (void)eob; vp10_iht4x8_32_add(input, dest, stride, tx_type); } #endif // CONFIG_EXT_TX -void vp10_inv_txfm_add_8x8(const tran_low_t *input, uint8_t *dest, - int stride, int eob, TX_TYPE tx_type) { +void vp10_inv_txfm_add_8x8(const tran_low_t *input, uint8_t *dest, int stride, + int eob, TX_TYPE tx_type) { switch (tx_type) { - case DCT_DCT: - vp10_idct8x8_add(input, dest, stride, eob); - break; + case DCT_DCT: vp10_idct8x8_add(input, dest, stride, eob); break; case ADST_DCT: case DCT_ADST: - case ADST_ADST: - vp10_iht8x8_64_add(input, dest, stride, tx_type); - break; + case ADST_ADST: vp10_iht8x8_64_add(input, dest, stride, tx_type); break; #if CONFIG_EXT_TX case FLIPADST_DCT: case DCT_FLIPADST: case FLIPADST_FLIPADST: case ADST_FLIPADST: - case FLIPADST_ADST: - vp10_iht8x8_64_add(input, dest, stride, tx_type); - break; + case FLIPADST_ADST: vp10_iht8x8_64_add(input, dest, stride, tx_type); break; case V_DCT: case H_DCT: case V_ADST: @@ -992,27 +965,19 @@ void vp10_inv_txfm_add_8x8(const tran_low_t *input, uint8_t *dest, // Use C version since DST only exists in C code vp10_iht8x8_64_add_c(input, dest, stride, tx_type); break; - case IDTX: - inv_idtx_add_c(input, dest, stride, 8, tx_type); - break; + case IDTX: inv_idtx_add_c(input, dest, stride, 8, tx_type); break; #endif // CONFIG_EXT_TX - default: - assert(0); - break; + default: assert(0); break; } } -void vp10_inv_txfm_add_16x16(const tran_low_t *input, uint8_t *dest, - int stride, int eob, TX_TYPE tx_type) { +void vp10_inv_txfm_add_16x16(const tran_low_t *input, uint8_t *dest, int stride, + int eob, TX_TYPE tx_type) { switch (tx_type) { - case DCT_DCT: - vp10_idct16x16_add(input, dest, stride, eob); - break; + case DCT_DCT: vp10_idct16x16_add(input, dest, stride, eob); break; case ADST_DCT: case DCT_ADST: - case ADST_ADST: - vp10_iht16x16_256_add(input, dest, stride, tx_type); - break; + case ADST_ADST: vp10_iht16x16_256_add(input, dest, stride, tx_type); break; #if CONFIG_EXT_TX case FLIPADST_DCT: case DCT_FLIPADST: @@ -1030,22 +995,16 @@ void vp10_inv_txfm_add_16x16(const tran_low_t *input, uint8_t *dest, // Use C version since DST only exists in C code vp10_iht16x16_256_add_c(input, dest, stride, tx_type); break; - case IDTX: - inv_idtx_add_c(input, dest, stride, 16, tx_type); - break; + case IDTX: inv_idtx_add_c(input, dest, stride, 16, tx_type); break; #endif // CONFIG_EXT_TX - default: - assert(0); - break; + default: assert(0); break; } } -void vp10_inv_txfm_add_32x32(const tran_low_t *input, uint8_t *dest, - int stride, int eob, TX_TYPE tx_type) { +void vp10_inv_txfm_add_32x32(const tran_low_t *input, uint8_t *dest, int stride, + int eob, TX_TYPE tx_type) { switch (tx_type) { - case DCT_DCT: - vp10_idct32x32_add(input, dest, stride, eob); - break; + case DCT_DCT: vp10_idct32x32_add(input, dest, stride, eob); break; #if CONFIG_EXT_TX case ADST_DCT: case DCT_ADST: @@ -1063,38 +1022,34 @@ void vp10_inv_txfm_add_32x32(const tran_low_t *input, uint8_t *dest, case H_FLIPADST: vp10_iht32x32_1024_add_c(input, dest, stride, tx_type); break; - case IDTX: - inv_idtx_add_c(input, dest, stride, 32, tx_type); - break; + case IDTX: inv_idtx_add_c(input, dest, stride, 32, tx_type); break; #endif // CONFIG_EXT_TX - default: - assert(0); - break; + default: assert(0); break; } } #if CONFIG_VP9_HIGHBITDEPTH void vp10_highbd_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8, - int stride, int tx_type, int bd) { + int stride, int tx_type, int bd) { static const highbd_transform_2d HIGH_IHT_4[] = { - { vpx_highbd_idct4_c, vpx_highbd_idct4_c }, // DCT_DCT - { vpx_highbd_iadst4_c, vpx_highbd_idct4_c }, // ADST_DCT - { vpx_highbd_idct4_c, vpx_highbd_iadst4_c }, // DCT_ADST + { vpx_highbd_idct4_c, vpx_highbd_idct4_c }, // DCT_DCT + { vpx_highbd_iadst4_c, vpx_highbd_idct4_c }, // ADST_DCT + { vpx_highbd_idct4_c, vpx_highbd_iadst4_c }, // DCT_ADST { vpx_highbd_iadst4_c, vpx_highbd_iadst4_c }, // ADST_ADST #if CONFIG_EXT_TX - { vpx_highbd_iadst4_c, vpx_highbd_idct4_c }, // FLIPADST_DCT - { vpx_highbd_idct4_c, vpx_highbd_iadst4_c }, // DCT_FLIPADST + { vpx_highbd_iadst4_c, vpx_highbd_idct4_c }, // FLIPADST_DCT + { vpx_highbd_idct4_c, vpx_highbd_iadst4_c }, // DCT_FLIPADST { vpx_highbd_iadst4_c, vpx_highbd_iadst4_c }, // FLIPADST_FLIPADST { vpx_highbd_iadst4_c, vpx_highbd_iadst4_c }, // ADST_FLIPADST { vpx_highbd_iadst4_c, vpx_highbd_iadst4_c }, // FLIPADST_ADST - { highbd_iidtx4_c, highbd_iidtx4_c }, // IDTX - { vpx_highbd_idct4_c, highbd_iidtx4_c }, // V_DCT - { highbd_iidtx4_c, vpx_highbd_idct4_c }, // H_DCT - { vpx_highbd_iadst4_c, highbd_iidtx4_c }, // V_ADST - { highbd_iidtx4_c, vpx_highbd_iadst4_c }, // H_ADST - { vpx_highbd_iadst4_c, highbd_iidtx4_c }, // V_FLIPADST - { highbd_iidtx4_c, vpx_highbd_iadst4_c }, // H_FLIPADST -#endif // CONFIG_EXT_TX + { highbd_iidtx4_c, highbd_iidtx4_c }, // IDTX + { vpx_highbd_idct4_c, highbd_iidtx4_c }, // V_DCT + { highbd_iidtx4_c, vpx_highbd_idct4_c }, // H_DCT + { vpx_highbd_iadst4_c, highbd_iidtx4_c }, // V_ADST + { highbd_iidtx4_c, vpx_highbd_iadst4_c }, // H_ADST + { vpx_highbd_iadst4_c, highbd_iidtx4_c }, // V_FLIPADST + { highbd_iidtx4_c, vpx_highbd_iadst4_c }, // H_FLIPADST +#endif // CONFIG_EXT_TX }; uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); @@ -1108,13 +1063,13 @@ void vp10_highbd_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8, // inverse transform row vectors for (i = 0; i < 4; ++i) { HIGH_IHT_4[tx_type].rows(input, out[i], bd); - input += 4; + input += 4; } // transpose - for (i = 1 ; i < 4; i++) { + for (i = 1; i < 4; i++) { for (j = 0; j < i; j++) { - tmp = out[i][j]; + tmp = out[i][j]; out[i][j] = out[j][i]; out[j][i] = tmp; } @@ -1134,32 +1089,32 @@ void vp10_highbd_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8, for (j = 0; j < 4; ++j) { int d = i * stride + j; int s = j * outstride + i; - dest[d] = highbd_clip_pixel_add(dest[d], - ROUND_POWER_OF_TWO(outp[s], 4), bd); + dest[d] = + highbd_clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 4), bd); } } } #if CONFIG_EXT_TX void vp10_highbd_iht4x8_32_add_c(const tran_low_t *input, uint8_t *dest8, - int stride, int tx_type, int bd) { + int stride, int tx_type, int bd) { static const highbd_transform_2d HIGH_IHT_4x8[] = { - { vpx_highbd_idct8_c, vpx_highbd_idct4_c }, // DCT_DCT - { vpx_highbd_iadst8_c, vpx_highbd_idct4_c }, // ADST_DCT - { vpx_highbd_idct8_c, vpx_highbd_iadst4_c }, // DCT_ADST + { vpx_highbd_idct8_c, vpx_highbd_idct4_c }, // DCT_DCT + { vpx_highbd_iadst8_c, vpx_highbd_idct4_c }, // ADST_DCT + { vpx_highbd_idct8_c, vpx_highbd_iadst4_c }, // DCT_ADST { vpx_highbd_iadst8_c, vpx_highbd_iadst4_c }, // ADST_ADST - { vpx_highbd_iadst8_c, vpx_highbd_idct4_c }, // FLIPADST_DCT - { vpx_highbd_idct8_c, vpx_highbd_iadst4_c }, // DCT_FLIPADST + { vpx_highbd_iadst8_c, vpx_highbd_idct4_c }, // FLIPADST_DCT + { vpx_highbd_idct8_c, vpx_highbd_iadst4_c }, // DCT_FLIPADST { vpx_highbd_iadst8_c, vpx_highbd_iadst4_c }, // FLIPADST_FLIPADST { vpx_highbd_iadst8_c, vpx_highbd_iadst4_c }, // ADST_FLIPADST { vpx_highbd_iadst8_c, vpx_highbd_iadst4_c }, // FLIPADST_ADST - { highbd_iidtx8_c, highbd_iidtx4_c }, // IDTX - { vpx_highbd_idct8_c, highbd_iidtx4_c }, // V_DCT - { highbd_iidtx8_c, vpx_highbd_idct4_c }, // H_DCT - { vpx_highbd_iadst8_c, highbd_iidtx4_c }, // V_ADST - { highbd_iidtx8_c, vpx_highbd_iadst4_c }, // H_ADST - { vpx_highbd_iadst8_c, highbd_iidtx4_c }, // V_FLIPADST - { highbd_iidtx8_c, vpx_highbd_iadst4_c }, // H_FLIPADST + { highbd_iidtx8_c, highbd_iidtx4_c }, // IDTX + { vpx_highbd_idct8_c, highbd_iidtx4_c }, // V_DCT + { highbd_iidtx8_c, vpx_highbd_idct4_c }, // H_DCT + { vpx_highbd_iadst8_c, highbd_iidtx4_c }, // V_ADST + { highbd_iidtx8_c, vpx_highbd_iadst4_c }, // H_ADST + { vpx_highbd_iadst8_c, highbd_iidtx4_c }, // V_FLIPADST + { highbd_iidtx8_c, vpx_highbd_iadst4_c }, // H_FLIPADST }; uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); @@ -1173,10 +1128,10 @@ void vp10_highbd_iht4x8_32_add_c(const tran_low_t *input, uint8_t *dest8, for (i = 0; i < 8; ++i) { HIGH_IHT_4x8[tx_type].rows(input, outtmp, bd); for (j = 0; j < 4; ++j) { - out[j][i] = HIGHBD_WRAPLOW( - highbd_dct_const_round_shift(outtmp[j] * Sqrt2), bd); + out[j][i] = + HIGHBD_WRAPLOW(highbd_dct_const_round_shift(outtmp[j] * Sqrt2), bd); } - input += 4; + input += 4; } // inverse transform column vectors @@ -1191,31 +1146,31 @@ void vp10_highbd_iht4x8_32_add_c(const tran_low_t *input, uint8_t *dest8, for (j = 0; j < 4; ++j) { int d = i * stride + j; int s = j * outstride + i; - dest[d] = highbd_clip_pixel_add(dest[d], - ROUND_POWER_OF_TWO(outp[s], 5), bd); + dest[d] = + highbd_clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5), bd); } } } void vp10_highbd_iht8x4_32_add_c(const tran_low_t *input, uint8_t *dest8, - int stride, int tx_type, int bd) { + int stride, int tx_type, int bd) { static const highbd_transform_2d HIGH_IHT_8x4[] = { - { vpx_highbd_idct4_c, vpx_highbd_idct8_c }, // DCT_DCT - { vpx_highbd_iadst4_c, vpx_highbd_idct8_c }, // ADST_DCT - { vpx_highbd_idct4_c, vpx_highbd_iadst8_c }, // DCT_ADST + { vpx_highbd_idct4_c, vpx_highbd_idct8_c }, // DCT_DCT + { vpx_highbd_iadst4_c, vpx_highbd_idct8_c }, // ADST_DCT + { vpx_highbd_idct4_c, vpx_highbd_iadst8_c }, // DCT_ADST { vpx_highbd_iadst4_c, vpx_highbd_iadst8_c }, // ADST_ADST - { vpx_highbd_iadst4_c, vpx_highbd_idct8_c }, // FLIPADST_DCT - { vpx_highbd_idct4_c, vpx_highbd_iadst8_c }, // DCT_FLIPADST + { vpx_highbd_iadst4_c, vpx_highbd_idct8_c }, // FLIPADST_DCT + { vpx_highbd_idct4_c, vpx_highbd_iadst8_c }, // DCT_FLIPADST { vpx_highbd_iadst4_c, vpx_highbd_iadst8_c }, // FLIPADST_FLIPADST { vpx_highbd_iadst4_c, vpx_highbd_iadst8_c }, // ADST_FLIPADST { vpx_highbd_iadst4_c, vpx_highbd_iadst8_c }, // FLIPADST_ADST - { highbd_iidtx4_c, highbd_iidtx8_c }, // IDTX - { vpx_highbd_idct4_c, highbd_iidtx8_c }, // V_DCT - { highbd_iidtx4_c, vpx_highbd_idct8_c }, // H_DCT - { vpx_highbd_iadst4_c, highbd_iidtx8_c }, // V_ADST - { highbd_iidtx4_c, vpx_highbd_iadst8_c }, // H_ADST - { vpx_highbd_iadst4_c, highbd_iidtx8_c }, // V_FLIPADST - { highbd_iidtx4_c, vpx_highbd_iadst8_c }, // H_FLIPADST + { highbd_iidtx4_c, highbd_iidtx8_c }, // IDTX + { vpx_highbd_idct4_c, highbd_iidtx8_c }, // V_DCT + { highbd_iidtx4_c, vpx_highbd_idct8_c }, // H_DCT + { vpx_highbd_iadst4_c, highbd_iidtx8_c }, // V_ADST + { highbd_iidtx4_c, vpx_highbd_iadst8_c }, // H_ADST + { vpx_highbd_iadst4_c, highbd_iidtx8_c }, // V_FLIPADST + { highbd_iidtx4_c, vpx_highbd_iadst8_c }, // H_FLIPADST }; uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); @@ -1229,10 +1184,10 @@ void vp10_highbd_iht8x4_32_add_c(const tran_low_t *input, uint8_t *dest8, for (i = 0; i < 4; ++i) { HIGH_IHT_8x4[tx_type].rows(input, outtmp, bd); for (j = 0; j < 8; ++j) { - out[j][i] = HIGHBD_WRAPLOW( - highbd_dct_const_round_shift(outtmp[j] * Sqrt2), bd); + out[j][i] = + HIGHBD_WRAPLOW(highbd_dct_const_round_shift(outtmp[j] * Sqrt2), bd); } - input += 8; + input += 8; } // inverse transform column vectors @@ -1247,34 +1202,34 @@ void vp10_highbd_iht8x4_32_add_c(const tran_low_t *input, uint8_t *dest8, for (j = 0; j < 8; ++j) { int d = i * stride + j; int s = j * outstride + i; - dest[d] = highbd_clip_pixel_add(dest[d], - ROUND_POWER_OF_TWO(outp[s], 5), bd); + dest[d] = + highbd_clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5), bd); } } } #endif // CONFIG_EXT_TX void vp10_highbd_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest8, - int stride, int tx_type, int bd) { + int stride, int tx_type, int bd) { static const highbd_transform_2d HIGH_IHT_8[] = { - { vpx_highbd_idct8_c, vpx_highbd_idct8_c }, // DCT_DCT - { vpx_highbd_iadst8_c, vpx_highbd_idct8_c }, // ADST_DCT - { vpx_highbd_idct8_c, vpx_highbd_iadst8_c }, // DCT_ADST + { vpx_highbd_idct8_c, vpx_highbd_idct8_c }, // DCT_DCT + { vpx_highbd_iadst8_c, vpx_highbd_idct8_c }, // ADST_DCT + { vpx_highbd_idct8_c, vpx_highbd_iadst8_c }, // DCT_ADST { vpx_highbd_iadst8_c, vpx_highbd_iadst8_c }, // ADST_ADST #if CONFIG_EXT_TX - { vpx_highbd_iadst8_c, vpx_highbd_idct8_c }, // FLIPADST_DCT - { vpx_highbd_idct8_c, vpx_highbd_iadst8_c }, // DCT_FLIPADST + { vpx_highbd_iadst8_c, vpx_highbd_idct8_c }, // FLIPADST_DCT + { vpx_highbd_idct8_c, vpx_highbd_iadst8_c }, // DCT_FLIPADST { vpx_highbd_iadst8_c, vpx_highbd_iadst8_c }, // FLIPADST_FLIPADST { vpx_highbd_iadst8_c, vpx_highbd_iadst8_c }, // ADST_FLIPADST { vpx_highbd_iadst8_c, vpx_highbd_iadst8_c }, // FLIPADST_ADST - { highbd_iidtx8_c, highbd_iidtx8_c }, // IDTX - { vpx_highbd_idct8_c, highbd_iidtx8_c }, // V_DCT - { highbd_iidtx8_c, vpx_highbd_idct8_c }, // H_DCT - { vpx_highbd_iadst8_c, highbd_iidtx8_c }, // V_ADST - { highbd_iidtx8_c, vpx_highbd_iadst8_c }, // H_ADST - { vpx_highbd_iadst8_c, highbd_iidtx8_c }, // V_FLIPADST - { highbd_iidtx8_c, vpx_highbd_iadst8_c }, // H_FLIPADST -#endif // CONFIG_EXT_TX + { highbd_iidtx8_c, highbd_iidtx8_c }, // IDTX + { vpx_highbd_idct8_c, highbd_iidtx8_c }, // V_DCT + { highbd_iidtx8_c, vpx_highbd_idct8_c }, // H_DCT + { vpx_highbd_iadst8_c, highbd_iidtx8_c }, // V_ADST + { highbd_iidtx8_c, vpx_highbd_iadst8_c }, // H_ADST + { vpx_highbd_iadst8_c, highbd_iidtx8_c }, // V_FLIPADST + { highbd_iidtx8_c, vpx_highbd_iadst8_c }, // H_FLIPADST +#endif // CONFIG_EXT_TX }; uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); @@ -1288,13 +1243,13 @@ void vp10_highbd_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest8, // inverse transform row vectors for (i = 0; i < 8; ++i) { HIGH_IHT_8[tx_type].rows(input, out[i], bd); - input += 8; + input += 8; } // transpose - for (i = 1 ; i < 8; i++) { + for (i = 1; i < 8; i++) { for (j = 0; j < i; j++) { - tmp = out[i][j]; + tmp = out[i][j]; out[i][j] = out[j][i]; out[j][i] = tmp; } @@ -1314,33 +1269,33 @@ void vp10_highbd_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest8, for (j = 0; j < 8; ++j) { int d = i * stride + j; int s = j * outstride + i; - dest[d] = highbd_clip_pixel_add(dest[d], - ROUND_POWER_OF_TWO(outp[s], 5), bd); + dest[d] = + highbd_clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5), bd); } } } void vp10_highbd_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest8, - int stride, int tx_type, int bd) { + int stride, int tx_type, int bd) { static const highbd_transform_2d HIGH_IHT_16[] = { - { vpx_highbd_idct16_c, vpx_highbd_idct16_c }, // DCT_DCT - { vpx_highbd_iadst16_c, vpx_highbd_idct16_c }, // ADST_DCT - { vpx_highbd_idct16_c, vpx_highbd_iadst16_c }, // DCT_ADST + { vpx_highbd_idct16_c, vpx_highbd_idct16_c }, // DCT_DCT + { vpx_highbd_iadst16_c, vpx_highbd_idct16_c }, // ADST_DCT + { vpx_highbd_idct16_c, vpx_highbd_iadst16_c }, // DCT_ADST { vpx_highbd_iadst16_c, vpx_highbd_iadst16_c }, // ADST_ADST #if CONFIG_EXT_TX - { vpx_highbd_iadst16_c, vpx_highbd_idct16_c }, // FLIPADST_DCT - { vpx_highbd_idct16_c, vpx_highbd_iadst16_c }, // DCT_FLIPADST + { vpx_highbd_iadst16_c, vpx_highbd_idct16_c }, // FLIPADST_DCT + { vpx_highbd_idct16_c, vpx_highbd_iadst16_c }, // DCT_FLIPADST { vpx_highbd_iadst16_c, vpx_highbd_iadst16_c }, // FLIPADST_FLIPADST { vpx_highbd_iadst16_c, vpx_highbd_iadst16_c }, // ADST_FLIPADST { vpx_highbd_iadst16_c, vpx_highbd_iadst16_c }, // FLIPADST_ADST - { highbd_iidtx16_c, highbd_iidtx16_c }, // IDTX - { vpx_highbd_idct16_c, highbd_iidtx16_c }, // V_DCT - { highbd_iidtx16_c, vpx_highbd_idct16_c }, // H_DCT - { vpx_highbd_iadst16_c, highbd_iidtx16_c }, // V_ADST - { highbd_iidtx16_c, vpx_highbd_iadst16_c }, // H_ADST - { vpx_highbd_iadst16_c, highbd_iidtx16_c }, // V_FLIPADST - { highbd_iidtx16_c, vpx_highbd_iadst16_c }, // H_FLIPADST -#endif // CONFIG_EXT_TX + { highbd_iidtx16_c, highbd_iidtx16_c }, // IDTX + { vpx_highbd_idct16_c, highbd_iidtx16_c }, // V_DCT + { highbd_iidtx16_c, vpx_highbd_idct16_c }, // H_DCT + { vpx_highbd_iadst16_c, highbd_iidtx16_c }, // V_ADST + { highbd_iidtx16_c, vpx_highbd_iadst16_c }, // H_ADST + { vpx_highbd_iadst16_c, highbd_iidtx16_c }, // V_FLIPADST + { highbd_iidtx16_c, vpx_highbd_iadst16_c }, // H_FLIPADST +#endif // CONFIG_EXT_TX }; uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); @@ -1354,13 +1309,13 @@ void vp10_highbd_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest8, // inverse transform row vectors for (i = 0; i < 16; ++i) { HIGH_IHT_16[tx_type].rows(input, out[i], bd); - input += 16; + input += 16; } // transpose - for (i = 1 ; i < 16; i++) { + for (i = 1; i < 16; i++) { for (j = 0; j < i; j++) { - tmp = out[i][j]; + tmp = out[i][j]; out[i][j] = out[j][i]; out[j][i] = tmp; } @@ -1380,8 +1335,8 @@ void vp10_highbd_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest8, for (j = 0; j < 16; ++j) { int d = i * stride + j; int s = j * outstride + i; - dest[d] = highbd_clip_pixel_add(dest[d], - ROUND_POWER_OF_TWO(outp[s], 6), bd); + dest[d] = + highbd_clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6), bd); } } } @@ -1390,22 +1345,22 @@ void vp10_highbd_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest8, void vp10_highbd_iht32x32_1024_add_c(const tran_low_t *input, uint8_t *dest8, int stride, int tx_type, int bd) { static const highbd_transform_2d HIGH_IHT_32[] = { - { vpx_highbd_idct32_c, vpx_highbd_idct32_c }, // DCT_DCT - { highbd_ihalfright32_c, vpx_highbd_idct32_c }, // ADST_DCT - { vpx_highbd_idct32_c, highbd_ihalfright32_c }, // DCT_ADST - { highbd_ihalfright32_c, highbd_ihalfright32_c }, // ADST_ADST - { highbd_ihalfright32_c, vpx_highbd_idct32_c }, // FLIPADST_DCT - { vpx_highbd_idct32_c, highbd_ihalfright32_c }, // DCT_FLIPADST - { highbd_ihalfright32_c, highbd_ihalfright32_c }, // FLIPADST_FLIPADST - { highbd_ihalfright32_c, highbd_ihalfright32_c }, // ADST_FLIPADST - { highbd_ihalfright32_c, highbd_ihalfright32_c }, // FLIPADST_ADST - { highbd_iidtx32_c, highbd_iidtx32_c }, // IDTX - { vpx_highbd_idct32_c, highbd_iidtx32_c }, // V_DCT - { highbd_iidtx32_c, vpx_highbd_idct32_c }, // H_DCT - { highbd_ihalfright32_c, highbd_iidtx32_c }, // V_ADST - { highbd_iidtx32_c, highbd_ihalfright32_c }, // H_ADST - { highbd_ihalfright32_c, highbd_iidtx32_c }, // V_FLIPADST - { highbd_iidtx32_c, highbd_ihalfright32_c }, // H_FLIPADST + { vpx_highbd_idct32_c, vpx_highbd_idct32_c }, // DCT_DCT + { highbd_ihalfright32_c, vpx_highbd_idct32_c }, // ADST_DCT + { vpx_highbd_idct32_c, highbd_ihalfright32_c }, // DCT_ADST + { highbd_ihalfright32_c, highbd_ihalfright32_c }, // ADST_ADST + { highbd_ihalfright32_c, vpx_highbd_idct32_c }, // FLIPADST_DCT + { vpx_highbd_idct32_c, highbd_ihalfright32_c }, // DCT_FLIPADST + { highbd_ihalfright32_c, highbd_ihalfright32_c }, // FLIPADST_FLIPADST + { highbd_ihalfright32_c, highbd_ihalfright32_c }, // ADST_FLIPADST + { highbd_ihalfright32_c, highbd_ihalfright32_c }, // FLIPADST_ADST + { highbd_iidtx32_c, highbd_iidtx32_c }, // IDTX + { vpx_highbd_idct32_c, highbd_iidtx32_c }, // V_DCT + { highbd_iidtx32_c, vpx_highbd_idct32_c }, // H_DCT + { highbd_ihalfright32_c, highbd_iidtx32_c }, // V_ADST + { highbd_iidtx32_c, highbd_ihalfright32_c }, // H_ADST + { highbd_ihalfright32_c, highbd_iidtx32_c }, // V_FLIPADST + { highbd_iidtx32_c, highbd_ihalfright32_c }, // H_FLIPADST }; uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); @@ -1419,13 +1374,13 @@ void vp10_highbd_iht32x32_1024_add_c(const tran_low_t *input, uint8_t *dest8, // inverse transform row vectors for (i = 0; i < 32; ++i) { HIGH_IHT_32[tx_type].rows(input, out[i], bd); - input += 32; + input += 32; } // transpose - for (i = 1 ; i < 32; i++) { + for (i = 1; i < 32; i++) { for (j = 0; j < i; j++) { - tmp = out[i][j]; + tmp = out[i][j]; out[i][j] = out[j][i]; out[j][i] = tmp; } @@ -1443,8 +1398,8 @@ void vp10_highbd_iht32x32_1024_add_c(const tran_low_t *input, uint8_t *dest8, for (j = 0; j < 32; ++j) { int d = i * stride + j; int s = j * outstride + i; - dest[d] = highbd_clip_pixel_add(dest[d], - ROUND_POWER_OF_TWO(outp[s], 6), bd); + dest[d] = + highbd_clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6), bd); } } } @@ -1452,16 +1407,15 @@ void vp10_highbd_iht32x32_1024_add_c(const tran_low_t *input, uint8_t *dest8, // idct void vp10_highbd_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride, - int eob, int bd) { + int eob, int bd) { if (eob > 1) vpx_highbd_idct4x4_16_add(input, dest, stride, bd); else vpx_highbd_idct4x4_1_add(input, dest, stride, bd); } - void vp10_highbd_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride, - int eob, int bd) { + int eob, int bd) { if (eob > 1) vpx_highbd_iwht4x4_16_add(input, dest, stride, bd); else @@ -1469,7 +1423,7 @@ void vp10_highbd_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride, } void vp10_highbd_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride, - int eob, int bd) { + int eob, int bd) { // If dc is 1, then input[0] is the reconstructed value, do not need // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1. @@ -1488,7 +1442,7 @@ void vp10_highbd_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride, } void vp10_highbd_idct16x16_add(const tran_low_t *input, uint8_t *dest, - int stride, int eob, int bd) { + int stride, int eob, int bd) { // The calculation can be simplified if there are not many non-zero dct // coefficients. Use eobs to separate different cases. // DC only DCT coefficient. @@ -1502,7 +1456,7 @@ void vp10_highbd_idct16x16_add(const tran_low_t *input, uint8_t *dest, } void vp10_highbd_idct32x32_add(const tran_low_t *input, uint8_t *dest, - int stride, int eob, int bd) { + int stride, int eob, int bd) { // Non-zero coeff only in upper-left 8x8 if (eob == 1) { vpx_highbd_idct32x32_1_add(input, dest, stride, bd); @@ -1527,8 +1481,8 @@ void vp10_highbd_inv_txfm_add_4x4(const tran_low_t *input, uint8_t *dest, case ADST_DCT: case DCT_ADST: case ADST_ADST: - vp10_inv_txfm2d_add_4x4(input, CONVERT_TO_SHORTPTR(dest), stride, - tx_type, bd); + vp10_inv_txfm2d_add_4x4(input, CONVERT_TO_SHORTPTR(dest), stride, tx_type, + bd); break; #if CONFIG_EXT_TX case FLIPADST_DCT: @@ -1536,8 +1490,8 @@ void vp10_highbd_inv_txfm_add_4x4(const tran_low_t *input, uint8_t *dest, case FLIPADST_FLIPADST: case ADST_FLIPADST: case FLIPADST_ADST: - vp10_inv_txfm2d_add_4x4(input, CONVERT_TO_SHORTPTR(dest), stride, - tx_type, bd); + vp10_inv_txfm2d_add_4x4(input, CONVERT_TO_SHORTPTR(dest), stride, tx_type, + bd); break; case V_DCT: case H_DCT: @@ -1552,9 +1506,7 @@ void vp10_highbd_inv_txfm_add_4x4(const tran_low_t *input, uint8_t *dest, highbd_inv_idtx_add_c(input, dest, stride, 4, tx_type, bd); break; #endif // CONFIG_EXT_TX - default: - assert(0); - break; + default: assert(0); break; } } @@ -1562,14 +1514,14 @@ void vp10_highbd_inv_txfm_add_4x4(const tran_low_t *input, uint8_t *dest, void vp10_highbd_inv_txfm_add_8x4(const tran_low_t *input, uint8_t *dest, int stride, int eob, int bd, TX_TYPE tx_type) { - (void) eob; + (void)eob; vp10_highbd_iht8x4_32_add_c(input, dest, stride, tx_type, bd); } void vp10_highbd_inv_txfm_add_4x8(const tran_low_t *input, uint8_t *dest, int stride, int eob, int bd, TX_TYPE tx_type) { - (void) eob; + (void)eob; vp10_highbd_iht4x8_32_add_c(input, dest, stride, tx_type, bd); } #endif // CONFIG_EXT_TX @@ -1583,8 +1535,8 @@ void vp10_highbd_inv_txfm_add_8x8(const tran_low_t *input, uint8_t *dest, case ADST_DCT: case DCT_ADST: case ADST_ADST: - vp10_inv_txfm2d_add_8x8(input, CONVERT_TO_SHORTPTR(dest), stride, - tx_type, bd); + vp10_inv_txfm2d_add_8x8(input, CONVERT_TO_SHORTPTR(dest), stride, tx_type, + bd); break; #if CONFIG_EXT_TX case FLIPADST_DCT: @@ -1592,8 +1544,8 @@ void vp10_highbd_inv_txfm_add_8x8(const tran_low_t *input, uint8_t *dest, case FLIPADST_FLIPADST: case ADST_FLIPADST: case FLIPADST_ADST: - vp10_inv_txfm2d_add_8x8(input, CONVERT_TO_SHORTPTR(dest), stride, - tx_type, bd); + vp10_inv_txfm2d_add_8x8(input, CONVERT_TO_SHORTPTR(dest), stride, tx_type, + bd); break; case V_DCT: case H_DCT: @@ -1608,9 +1560,7 @@ void vp10_highbd_inv_txfm_add_8x8(const tran_low_t *input, uint8_t *dest, highbd_inv_idtx_add_c(input, dest, stride, 8, tx_type, bd); break; #endif // CONFIG_EXT_TX - default: - assert(0); - break; + default: assert(0); break; } } @@ -1648,9 +1598,7 @@ void vp10_highbd_inv_txfm_add_16x16(const tran_low_t *input, uint8_t *dest, highbd_inv_idtx_add_c(input, dest, stride, 16, tx_type, bd); break; #endif // CONFIG_EXT_TX - default: - assert(0); - break; + default: assert(0); break; } } @@ -1684,9 +1632,7 @@ void vp10_highbd_inv_txfm_add_32x32(const tran_low_t *input, uint8_t *dest, highbd_inv_idtx_add_c(input, dest, stride, 32, tx_type, bd); break; #endif // CONFIG_EXT_TX - default: - assert(0); - break; + default: assert(0); break; } } #endif // CONFIG_VP9_HIGHBITDEPTH @@ -1720,12 +1666,9 @@ void inv_txfm_add(const tran_low_t *input, uint8_t *dest, int stride, // this is like vp10_short_idct4x4 but has a special case around eob<=1 // which is significant (not just an optimization) for the lossless // case. - vp10_inv_txfm_add_4x4(input, dest, stride, eob, tx_type, - lossless); - break; - default: - assert(0 && "Invalid transform size"); + vp10_inv_txfm_add_4x4(input, dest, stride, eob, tx_type, lossless); break; + default: assert(0 && "Invalid transform size"); break; } } @@ -1763,9 +1706,7 @@ void highbd_inv_txfm_add(const tran_low_t *input, uint8_t *dest, int stride, vp10_highbd_inv_txfm_add_4x4(input, dest, stride, eob, bd, tx_type, lossless); break; - default: - assert(0 && "Invalid transform size"); - break; + default: assert(0 && "Invalid transform size"); break; } } #endif // CONFIG_VP9_HIGHBITDEPTH diff --git a/vp10/common/idct.h b/vp10/common/idct.h index f20a154ad2cb0c9ee8137700300ac057f3aa8ba7..144bfe572eaf23edb18617cef586e32468ab6e1b 100644 --- a/vp10/common/idct.h +++ b/vp10/common/idct.h @@ -35,14 +35,14 @@ typedef struct INV_TXFM_PARAM { #endif } INV_TXFM_PARAM; -typedef void (*transform_1d)(const tran_low_t*, tran_low_t*); +typedef void (*transform_1d)(const tran_low_t *, tran_low_t *); typedef struct { transform_1d cols, rows; // vertical and horizontal } transform_2d; #if CONFIG_VP9_HIGHBITDEPTH -typedef void (*highbd_transform_1d)(const tran_low_t*, tran_low_t*, int bd); +typedef void (*highbd_transform_1d)(const tran_low_t *, tran_low_t *, int bd); typedef struct { highbd_transform_1d cols, rows; // vertical and horizontal @@ -54,9 +54,9 @@ int get_tx_scale(const MACROBLOCKD *const xd, const TX_TYPE tx_type, const TX_SIZE tx_size); void vp10_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride, - int eob); + int eob); void vp10_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride, - int eob); + int eob); void vp10_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride, int eob); void vp10_idct16x16_add(const tran_low_t *input, uint8_t *dest, int stride, @@ -64,33 +64,33 @@ void vp10_idct16x16_add(const tran_low_t *input, uint8_t *dest, int stride, void vp10_idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride, int eob); -void vp10_inv_txfm_add_4x4(const tran_low_t *input, uint8_t *dest, - int stride, int eob, TX_TYPE tx_type, int lossless); +void vp10_inv_txfm_add_4x4(const tran_low_t *input, uint8_t *dest, int stride, + int eob, TX_TYPE tx_type, int lossless); #if CONFIG_EXT_TX -void vp10_inv_txfm_add_8x4(const tran_low_t *input, uint8_t *dest, - int stride, int eob, TX_TYPE tx_type); -void vp10_inv_txfm_add_4x8(const tran_low_t *input, uint8_t *dest, - int stride, int eob, TX_TYPE tx_type); +void vp10_inv_txfm_add_8x4(const tran_low_t *input, uint8_t *dest, int stride, + int eob, TX_TYPE tx_type); +void vp10_inv_txfm_add_4x8(const tran_low_t *input, uint8_t *dest, int stride, + int eob, TX_TYPE tx_type); #endif // CONFIG_EXT_TX -void vp10_inv_txfm_add_8x8(const tran_low_t *input, uint8_t *dest, - int stride, int eob, TX_TYPE tx_type); -void vp10_inv_txfm_add_16x16(const tran_low_t *input, uint8_t *dest, - int stride, int eob, TX_TYPE tx_type); -void vp10_inv_txfm_add_32x32(const tran_low_t *input, uint8_t *dest, - int stride, int eob, TX_TYPE tx_type); +void vp10_inv_txfm_add_8x8(const tran_low_t *input, uint8_t *dest, int stride, + int eob, TX_TYPE tx_type); +void vp10_inv_txfm_add_16x16(const tran_low_t *input, uint8_t *dest, int stride, + int eob, TX_TYPE tx_type); +void vp10_inv_txfm_add_32x32(const tran_low_t *input, uint8_t *dest, int stride, + int eob, TX_TYPE tx_type); void inv_txfm_add(const tran_low_t *input, uint8_t *dest, int stride, INV_TXFM_PARAM *inv_txfm_param); #if CONFIG_VP9_HIGHBITDEPTH void vp10_highbd_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride, - int eob, int bd); + int eob, int bd); void vp10_highbd_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride, - int eob, int bd); + int eob, int bd); void vp10_highbd_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride, - int eob, int bd); + int eob, int bd); void vp10_highbd_idct16x16_add(const tran_low_t *input, uint8_t *dest, - int stride, int eob, int bd); + int stride, int eob, int bd); void vp10_highbd_idct32x32_add(const tran_low_t *input, uint8_t *dest, - int stride, int eob, int bd); + int stride, int eob, int bd); void vp10_highbd_inv_txfm_add_4x4(const tran_low_t *input, uint8_t *dest, int stride, int eob, int bd, TX_TYPE tx_type, int lossless); diff --git a/vp10/common/intra_filters.h b/vp10/common/intra_filters.h index 664a7d6d1e651e0ff200052c541202469a04adbf..021fb8edef6f0eee84c171e6ad313775f6a20a27 100644 --- a/vp10/common/intra_filters.h +++ b/vp10/common/intra_filters.h @@ -14,54 +14,54 @@ #define FILTER_INTRA_PREC_BITS (10) static int filter_intra_taps_4[TX_SIZES][INTRA_MODES][4] = { - { - {735, 881, -537, -54}, - {1005, 519, -488, -11}, - {383, 990, -343, -6}, - {442, 805, -542, 319}, - {658, 616, -133, -116}, - {875, 442, -141, -151}, - {386, 741, -23, -80}, - {390, 1027, -446, 51}, - {679, 606, -523, 262}, - {903, 922, -778, -23}, - }, - { - {648, 803, -444, 16}, - {972, 620, -576, 7}, - {561, 967, -499, -5}, - {585, 762, -468, 144}, - {596, 619, -182, -9}, - {895, 459, -176, -153}, - {557, 722, -126, -129}, - {601, 839, -523, 105}, - {562, 709, -499, 251}, - {803, 872, -695, 43}, - }, - { - {423, 728, -347, 111}, - {963, 685, -665, 23}, - {281, 1024, -480, 216}, - {640, 596, -437, 78}, - {429, 669, -259, 99}, - {740, 646, -415, 23}, - {568, 771, -346, 40}, - {404, 833, -486, 209}, - {398, 712, -423, 307}, - {939, 935, -887, 17}, - }, - { - {477, 737, -393, 150}, - {881, 630, -546, 67}, - {506, 984, -443, -20}, - {114, 459, -270, 528}, - {433, 528, 14, 3}, - {837, 470, -301, -30}, - {181, 777, 89, -107}, - {-29, 716, -232, 259}, - {589, 646, -495, 255}, - {740, 884, -728, 77}, - }, + { + { 735, 881, -537, -54 }, + { 1005, 519, -488, -11 }, + { 383, 990, -343, -6 }, + { 442, 805, -542, 319 }, + { 658, 616, -133, -116 }, + { 875, 442, -141, -151 }, + { 386, 741, -23, -80 }, + { 390, 1027, -446, 51 }, + { 679, 606, -523, 262 }, + { 903, 922, -778, -23 }, + }, + { + { 648, 803, -444, 16 }, + { 972, 620, -576, 7 }, + { 561, 967, -499, -5 }, + { 585, 762, -468, 144 }, + { 596, 619, -182, -9 }, + { 895, 459, -176, -153 }, + { 557, 722, -126, -129 }, + { 601, 839, -523, 105 }, + { 562, 709, -499, 251 }, + { 803, 872, -695, 43 }, + }, + { + { 423, 728, -347, 111 }, + { 963, 685, -665, 23 }, + { 281, 1024, -480, 216 }, + { 640, 596, -437, 78 }, + { 429, 669, -259, 99 }, + { 740, 646, -415, 23 }, + { 568, 771, -346, 40 }, + { 404, 833, -486, 209 }, + { 398, 712, -423, 307 }, + { 939, 935, -887, 17 }, + }, + { + { 477, 737, -393, 150 }, + { 881, 630, -546, 67 }, + { 506, 984, -443, -20 }, + { 114, 459, -270, 528 }, + { 433, 528, 14, 3 }, + { 837, 470, -301, -30 }, + { 181, 777, 89, -107 }, + { -29, 716, -232, 259 }, + { 589, 646, -495, 255 }, + { 740, 884, -728, 77 }, + }, }; #endif // VP10_COMMON_INTRA_FILTERS_H_ diff --git a/vp10/common/loopfilter.c b/vp10/common/loopfilter.c index 708acd87b946f3fbf5e3fccd5352075fc7d7e7d4..17e6ac4d92590a0189329608a02a2c746ed4ccd0 100644 --- a/vp10/common/loopfilter.c +++ b/vp10/common/loopfilter.c @@ -39,7 +39,7 @@ // 10101010 // // A loopfilter should be applied to every other 8x8 horizontally. -static const uint64_t left_64x64_txform_mask[TX_SIZES]= { +static const uint64_t left_64x64_txform_mask[TX_SIZES] = { 0xffffffffffffffffULL, // TX_4X4 0xffffffffffffffffULL, // TX_8x8 0x5555555555555555ULL, // TX_16x16 @@ -63,7 +63,7 @@ static const uint64_t left_64x64_txform_mask[TX_SIZES]= { // 00000000 // // A loopfilter should be applied to every other 4 the row vertically. -static const uint64_t above_64x64_txform_mask[TX_SIZES]= { +static const uint64_t above_64x64_txform_mask[TX_SIZES] = { 0xffffffffffffffffULL, // TX_4X4 0xffffffffffffffffULL, // TX_8x8 0x00ff00ff00ff00ffULL, // TX_16x16 @@ -137,18 +137,18 @@ static const uint64_t size_mask[BLOCK_SIZES] = { }; // These are used for masking the left and above borders. -static const uint64_t left_border = 0x1111111111111111ULL; +static const uint64_t left_border = 0x1111111111111111ULL; static const uint64_t above_border = 0x000000ff000000ffULL; // 16 bit masks for uv transform sizes. -static const uint16_t left_64x64_txform_mask_uv[TX_SIZES]= { +static const uint16_t left_64x64_txform_mask_uv[TX_SIZES] = { 0xffff, // TX_4X4 0xffff, // TX_8x8 0x5555, // TX_16x16 0x1111, // TX_32x32 }; -static const uint16_t above_64x64_txform_mask_uv[TX_SIZES]= { +static const uint16_t above_64x64_txform_mask_uv[TX_SIZES] = { 0xffff, // TX_4X4 0xffff, // TX_8x8 0x0f0f, // TX_16x16 @@ -204,16 +204,17 @@ static const uint16_t size_mask_uv[BLOCK_SIZES] = { 0x00ff, // BLOCK_64X32, 0xffff, // BLOCK_64X64 }; -static const uint16_t left_border_uv = 0x1111; +static const uint16_t left_border_uv = 0x1111; static const uint16_t above_border_uv = 0x000f; static const int mode_lf_lut[MB_MODE_COUNT] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // INTRA_MODES 1, 1, 0, 1 // INTER_MODES (ZEROMV == 0) #if CONFIG_EXT_INTER - , 1, // NEWFROMNEARMV mode - 1, 1, 1, 1, 1, 1, 1, 1, 0, 1 // INTER_COMPOUND_MODES (ZERO_ZEROMV == 0) -#endif // CONFIG_EXT_INTER + , + 1, // NEWFROMNEARMV mode + 1, 1, 1, 1, 1, 1, 1, 1, 0, 1 // INTER_COMPOUND_MODES (ZERO_ZEROMV == 0) +#endif // CONFIG_EXT_INTER }; static void update_sharpness(loop_filter_info_n *lfi, int sharpness_lvl) { @@ -229,8 +230,7 @@ static void update_sharpness(loop_filter_info_n *lfi, int sharpness_lvl) { block_inside_limit = (9 - sharpness_lvl); } - if (block_inside_limit < 1) - block_inside_limit = 1; + if (block_inside_limit < 1) block_inside_limit = 1; memset(lfi->lfthr[lvl].lim, block_inside_limit, SIMD_WIDTH); memset(lfi->lfthr[lvl].mblim, (2 * (lvl + 2) + block_inside_limit), @@ -242,8 +242,8 @@ static uint8_t get_filter_level(const loop_filter_info_n *lfi_n, const MB_MODE_INFO *mbmi) { #if CONFIG_SUPERTX const int segment_id = VPXMIN(mbmi->segment_id, mbmi->segment_id_supertx); - assert(IMPLIES(supertx_enabled(mbmi), - mbmi->segment_id_supertx != MAX_SEGMENTS)); + assert( + IMPLIES(supertx_enabled(mbmi), mbmi->segment_id_supertx != MAX_SEGMENTS)); assert(IMPLIES(supertx_enabled(mbmi), mbmi->segment_id_supertx <= mbmi->segment_id)); #else @@ -286,9 +286,9 @@ void vp10_loop_filter_frame_init(VP10_COMMON *cm, int default_filt_lvl) { int lvl_seg = default_filt_lvl; if (segfeature_active(seg, seg_id, SEG_LVL_ALT_LF)) { const int data = get_segdata(seg, seg_id, SEG_LVL_ALT_LF); - lvl_seg = clamp(seg->abs_delta == SEGMENT_ABSDATA ? - data : default_filt_lvl + data, - 0, MAX_LOOP_FILTER); + lvl_seg = clamp( + seg->abs_delta == SEGMENT_ABSDATA ? data : default_filt_lvl + data, 0, + MAX_LOOP_FILTER); } if (!lf->mode_ref_delta_enabled) { @@ -302,8 +302,8 @@ void vp10_loop_filter_frame_init(VP10_COMMON *cm, int default_filt_lvl) { for (ref = LAST_FRAME; ref < TOTAL_REFS_PER_FRAME; ++ref) { for (mode = 0; mode < MAX_MODE_LF_DELTAS; ++mode) { - const int inter_lvl = lvl_seg + lf->ref_deltas[ref] * scale - + lf->mode_deltas[mode] * scale; + const int inter_lvl = lvl_seg + lf->ref_deltas[ref] * scale + + lf->mode_deltas[mode] * scale; lfi->lvl[seg_id][ref][mode] = clamp(inter_lvl, 0, MAX_LOOP_FILTER); } } @@ -311,9 +311,8 @@ void vp10_loop_filter_frame_init(VP10_COMMON *cm, int default_filt_lvl) { } } -static void filter_selectively_vert_row2(int subsampling_factor, - uint8_t *s, int pitch, - unsigned int mask_16x16_l, +static void filter_selectively_vert_row2(int subsampling_factor, uint8_t *s, + int pitch, unsigned int mask_16x16_l, unsigned int mask_8x8_l, unsigned int mask_4x4_l, unsigned int mask_4x4_int_l, @@ -345,11 +344,10 @@ static void filter_selectively_vert_row2(int subsampling_factor, vpx_lpf_vertical_16_dual(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr); } else if (mask_16x16_0 & 1) { - vpx_lpf_vertical_16(s, pitch, lfi0->mblim, lfi0->lim, - lfi0->hev_thr); + vpx_lpf_vertical_16(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr); } else { - vpx_lpf_vertical_16(s + 8 *pitch, pitch, lfi1->mblim, - lfi1->lim, lfi1->hev_thr); + vpx_lpf_vertical_16(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim, + lfi1->hev_thr); } } @@ -408,14 +406,11 @@ static void filter_selectively_vert_row2(int subsampling_factor, } #if CONFIG_VP9_HIGHBITDEPTH -static void highbd_filter_selectively_vert_row2(int subsampling_factor, - uint16_t *s, int pitch, - unsigned int mask_16x16_l, - unsigned int mask_8x8_l, - unsigned int mask_4x4_l, - unsigned int mask_4x4_int_l, - const loop_filter_info_n *lfi_n, - const uint8_t *lfl, int bd) { +static void highbd_filter_selectively_vert_row2( + int subsampling_factor, uint16_t *s, int pitch, unsigned int mask_16x16_l, + unsigned int mask_8x8_l, unsigned int mask_4x4_l, + unsigned int mask_4x4_int_l, const loop_filter_info_n *lfi_n, + const uint8_t *lfl, int bd) { const int mask_shift = subsampling_factor ? 4 : 8; const int mask_cutoff = subsampling_factor ? 0xf : 0xff; const int lfl_forward = subsampling_factor ? 4 : 8; @@ -431,7 +426,7 @@ static void highbd_filter_selectively_vert_row2(int subsampling_factor, unsigned int mask; for (mask = mask_16x16_0 | mask_8x8_0 | mask_4x4_0 | mask_4x4_int_0 | - mask_16x16_1 | mask_8x8_1 | mask_4x4_1 | mask_4x4_int_1; + mask_16x16_1 | mask_8x8_1 | mask_4x4_1 | mask_4x4_int_1; mask; mask >>= 1) { const loop_filter_thresh *lfi0 = lfi_n->lfthr + *lfl; const loop_filter_thresh *lfi1 = lfi_n->lfthr + *(lfl + lfl_forward); @@ -445,7 +440,7 @@ static void highbd_filter_selectively_vert_row2(int subsampling_factor, vpx_highbd_lpf_vertical_16(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr, bd); } else { - vpx_highbd_lpf_vertical_16(s + 8 *pitch, pitch, lfi1->mblim, + vpx_highbd_lpf_vertical_16(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim, lfi1->hev_thr, bd); } } @@ -507,18 +502,15 @@ static void highbd_filter_selectively_vert_row2(int subsampling_factor, } #endif // CONFIG_VP9_HIGHBITDEPTH -static void filter_selectively_horiz(uint8_t *s, int pitch, - unsigned int mask_16x16, - unsigned int mask_8x8, - unsigned int mask_4x4, - unsigned int mask_4x4_int, - const loop_filter_info_n *lfi_n, - const uint8_t *lfl) { +static void filter_selectively_horiz( + uint8_t *s, int pitch, unsigned int mask_16x16, unsigned int mask_8x8, + unsigned int mask_4x4, unsigned int mask_4x4_int, + const loop_filter_info_n *lfi_n, const uint8_t *lfl) { unsigned int mask; int count; - for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int; - mask; mask >>= count) { + for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int; mask; + mask >>= count) { const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl; count = 1; @@ -604,18 +596,15 @@ static void filter_selectively_horiz(uint8_t *s, int pitch, } #if CONFIG_VP9_HIGHBITDEPTH -static void highbd_filter_selectively_horiz(uint16_t *s, int pitch, - unsigned int mask_16x16, - unsigned int mask_8x8, - unsigned int mask_4x4, - unsigned int mask_4x4_int, - const loop_filter_info_n *lfi_n, - const uint8_t *lfl, int bd) { +static void highbd_filter_selectively_horiz( + uint16_t *s, int pitch, unsigned int mask_16x16, unsigned int mask_8x8, + unsigned int mask_4x4, unsigned int mask_4x4_int, + const loop_filter_info_n *lfi_n, const uint8_t *lfl, int bd) { unsigned int mask; int count; - for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int; - mask; mask >>= count) { + for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int; mask; + mask >>= count) { const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl; count = 1; @@ -639,10 +628,9 @@ static void highbd_filter_selectively_horiz(uint16_t *s, int pitch, lfin->hev_thr, bd); if ((mask_4x4_int & 3) == 3) { - vpx_highbd_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim, - lfi->lim, lfi->hev_thr, - lfin->mblim, lfin->lim, - lfin->hev_thr, bd); + vpx_highbd_lpf_horizontal_4_dual( + s + 4 * pitch, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, + lfin->mblim, lfin->lim, lfin->hev_thr, bd); } else { if (mask_4x4_int & 1) { vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, @@ -671,10 +659,9 @@ static void highbd_filter_selectively_horiz(uint16_t *s, int pitch, lfi->hev_thr, lfin->mblim, lfin->lim, lfin->hev_thr, bd); if ((mask_4x4_int & 3) == 3) { - vpx_highbd_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim, - lfi->lim, lfi->hev_thr, - lfin->mblim, lfin->lim, - lfin->hev_thr, bd); + vpx_highbd_lpf_horizontal_4_dual( + s + 4 * pitch, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, + lfin->mblim, lfin->lim, lfin->hev_thr, bd); } else { if (mask_4x4_int & 1) { vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, @@ -718,8 +705,7 @@ static void highbd_filter_selectively_horiz(uint16_t *s, int pitch, // TODO(JBB) Need another function for different resolution color.. static void build_masks(const loop_filter_info_n *const lfi_n, const MODE_INFO *mi, const int shift_y, - const int shift_uv, - LOOP_FILTER_MASK *lfm) { + const int shift_uv, LOOP_FILTER_MASK *lfm) { const MB_MODE_INFO *mbmi = &mi->mbmi; const BLOCK_SIZE block_size = mbmi->sb_type; // TODO(debargha): Check if masks can be setup correctly when @@ -745,8 +731,7 @@ static void build_masks(const loop_filter_info_n *const lfi_n, const int row = (shift_y >> MAX_MIB_SIZE_LOG2); const int col = shift_y - (row << MAX_MIB_SIZE_LOG2); - for (i = 0; i < h; i++) - memset(&lfm->lfl_y[row + i][col], filter_level, w); + for (i = 0; i < h; i++) memset(&lfm->lfl_y[row + i][col], filter_level, w); } // These set 1 in the current block size for the block size edges. @@ -768,22 +753,22 @@ static void build_masks(const loop_filter_info_n *const lfi_n, // If the block has no coefficients and is not intra we skip applying // the loop filter on block edges. - if ((mbmi->skip || mbmi->has_no_coeffs) && is_inter_block(mbmi)) - return; + if ((mbmi->skip || mbmi->has_no_coeffs) && is_inter_block(mbmi)) return; // Here we are adding a mask for the transform size. The transform // size mask is set to be correct for a 64x64 prediction block size. We // mask to match the size of the block we are working on and then shift it // into place.. - *above_y |= (size_mask[block_size] & - above_64x64_txform_mask[tx_size_y]) << shift_y; - *above_uv |= (size_mask_uv[block_size] & - above_64x64_txform_mask_uv[tx_size_uv]) << shift_uv; + *above_y |= (size_mask[block_size] & above_64x64_txform_mask[tx_size_y]) + << shift_y; + *above_uv |= + (size_mask_uv[block_size] & above_64x64_txform_mask_uv[tx_size_uv]) + << shift_uv; - *left_y |= (size_mask[block_size] & - left_64x64_txform_mask[tx_size_y]) << shift_y; - *left_uv |= (size_mask_uv[block_size] & - left_64x64_txform_mask_uv[tx_size_uv]) << shift_uv; + *left_y |= (size_mask[block_size] & left_64x64_txform_mask[tx_size_y]) + << shift_y; + *left_uv |= (size_mask_uv[block_size] & left_64x64_txform_mask_uv[tx_size_uv]) + << shift_uv; // Here we are trying to determine what to do with the internal 4x4 block // boundaries. These differ from the 4x4 boundaries on the outside edge of @@ -827,21 +812,19 @@ static void build_y_mask(const loop_filter_info_n *const lfi_n, const int row = (shift_y >> MAX_MIB_SIZE_LOG2); const int col = shift_y - (row << MAX_MIB_SIZE_LOG2); - for (i = 0; i < h; i++) - memset(&lfm->lfl_y[row + i][col], filter_level, w); + for (i = 0; i < h; i++) memset(&lfm->lfl_y[row + i][col], filter_level, w); } *above_y |= above_prediction_mask[block_size] << shift_y; *left_y |= left_prediction_mask[block_size] << shift_y; - if ((mbmi->skip || mbmi->has_no_coeffs) && is_inter_block(mbmi)) - return; + if ((mbmi->skip || mbmi->has_no_coeffs) && is_inter_block(mbmi)) return; - *above_y |= (size_mask[block_size] & - above_64x64_txform_mask[tx_size_y]) << shift_y; + *above_y |= (size_mask[block_size] & above_64x64_txform_mask[tx_size_y]) + << shift_y; - *left_y |= (size_mask[block_size] & - left_64x64_txform_mask[tx_size_y]) << shift_y; + *left_y |= (size_mask[block_size] & left_64x64_txform_mask[tx_size_y]) + << shift_y; if (tx_size_y == TX_4X4) *int_4x4_y |= (size_mask[block_size] & 0xffffffffffffffffULL) << shift_y; @@ -851,8 +834,8 @@ static void build_y_mask(const loop_filter_info_n *const lfi_n, // by mi_row, mi_col. // TODO(JBB): This function only works for yv12. void vp10_setup_mask(VP10_COMMON *const cm, const int mi_row, const int mi_col, - MODE_INFO **mi, const int mode_info_stride, - LOOP_FILTER_MASK *lfm) { + MODE_INFO **mi, const int mode_info_stride, + LOOP_FILTER_MASK *lfm) { int idx_32, idx_16, idx_8; const loop_filter_info_n *const lfi_n = &cm->lf_info; MODE_INFO **mip = mi; @@ -862,21 +845,21 @@ void vp10_setup_mask(VP10_COMMON *const cm, const int mi_row, const int mi_col, // added to the mi ptr as we go through each loop. It helps us to avoid // setting up special row and column counters for each index. The last step // brings us out back to the starting position. - const int offset_32[] = {4, (mode_info_stride << 2) - 4, 4, - -(mode_info_stride << 2) - 4}; - const int offset_16[] = {2, (mode_info_stride << 1) - 2, 2, - -(mode_info_stride << 1) - 2}; - const int offset[] = {1, mode_info_stride - 1, 1, -mode_info_stride - 1}; + const int offset_32[] = { 4, (mode_info_stride << 2) - 4, 4, + -(mode_info_stride << 2) - 4 }; + const int offset_16[] = { 2, (mode_info_stride << 1) - 2, 2, + -(mode_info_stride << 1) - 2 }; + const int offset[] = { 1, mode_info_stride - 1, 1, -mode_info_stride - 1 }; // Following variables represent shifts to position the current block // mask over the appropriate block. A shift of 36 to the left will move // the bits for the final 32 by 32 block in the 64x64 up 4 rows and left // 4 rows to the appropriate spot. - const int shift_32_y[] = {0, 4, 32, 36}; - const int shift_16_y[] = {0, 2, 16, 18}; - const int shift_8_y[] = {0, 1, 8, 9}; - const int shift_32_uv[] = {0, 2, 8, 10}; - const int shift_16_uv[] = {0, 1, 4, 5}; + const int shift_32_y[] = { 0, 4, 32, 36 }; + const int shift_16_y[] = { 0, 2, 16, 18 }; + const int shift_8_y[] = { 0, 1, 8, 9 }; + const int shift_32_uv[] = { 0, 2, 8, 10 }; + const int shift_16_uv[] = { 0, 1, 4, 5 }; int i; const int max_rows = VPXMIN(cm->mi_rows - mi_row, MAX_MIB_SIZE); const int max_cols = VPXMIN(cm->mi_cols - mi_col, MAX_MIB_SIZE); @@ -891,21 +874,17 @@ void vp10_setup_mask(VP10_COMMON *const cm, const int mi_row, const int mi_col, // loop and storing lfm in the mbmi structure so that we don't have to go // through the recursive loop structure multiple times. switch (mip[0]->mbmi.sb_type) { - case BLOCK_64X64: - build_masks(lfi_n, mip[0] , 0, 0, lfm); - break; + case BLOCK_64X64: build_masks(lfi_n, mip[0], 0, 0, lfm); break; case BLOCK_64X32: build_masks(lfi_n, mip[0], 0, 0, lfm); mip2 = mip + mode_info_stride * 4; - if (4 >= max_rows) - break; + if (4 >= max_rows) break; build_masks(lfi_n, mip2[0], 32, 8, lfm); break; case BLOCK_32X64: build_masks(lfi_n, mip[0], 0, 0, lfm); mip2 = mip + 4; - if (4 >= max_cols) - break; + if (4 >= max_cols) break; build_masks(lfi_n, mip2[0], 4, 2, lfm); break; default: @@ -920,25 +899,19 @@ void vp10_setup_mask(VP10_COMMON *const cm, const int mi_row, const int mi_col, case BLOCK_32X32: build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); break; - case BLOCK_32X16: - build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); + case BLOCK_32X16: build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); #if CONFIG_SUPERTX - if (supertx_enabled(&mip[0]->mbmi)) - break; + if (supertx_enabled(&mip[0]->mbmi)) break; #endif - if (mi_32_row_offset + 2 >= max_rows) - continue; + if (mi_32_row_offset + 2 >= max_rows) continue; mip2 = mip + mode_info_stride * 2; build_masks(lfi_n, mip2[0], shift_y + 16, shift_uv + 4, lfm); break; - case BLOCK_16X32: - build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); + case BLOCK_16X32: build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); #if CONFIG_SUPERTX - if (supertx_enabled(&mip[0]->mbmi)) - break; + if (supertx_enabled(&mip[0]->mbmi)) break; #endif - if (mi_32_col_offset + 2 >= max_cols) - continue; + if (mi_32_col_offset + 2 >= max_cols) continue; mip2 = mip + 2; build_masks(lfi_n, mip2[0], shift_y + 2, shift_uv + 1, lfm); break; @@ -952,10 +925,10 @@ void vp10_setup_mask(VP10_COMMON *const cm, const int mi_row, const int mi_col, for (idx_16 = 0; idx_16 < 4; mip += offset_16[idx_16], ++idx_16) { const int shift_y = shift_32_y[idx_32] + shift_16_y[idx_16]; const int shift_uv = shift_32_uv[idx_32] + shift_16_uv[idx_16]; - const int mi_16_col_offset = mi_32_col_offset + - ((idx_16 & 1) << 1); - const int mi_16_row_offset = mi_32_row_offset + - ((idx_16 >> 1) << 1); + const int mi_16_col_offset = + mi_32_col_offset + ((idx_16 & 1) << 1); + const int mi_16_row_offset = + mi_32_row_offset + ((idx_16 >> 1) << 1); if (mi_16_col_offset >= max_cols || mi_16_row_offset >= max_rows) continue; @@ -966,14 +939,12 @@ void vp10_setup_mask(VP10_COMMON *const cm, const int mi_row, const int mi_col, break; case BLOCK_16X8: #if CONFIG_SUPERTX - if (supertx_enabled(&mip[0]->mbmi)) - break; + if (supertx_enabled(&mip[0]->mbmi)) break; #endif build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); - if (mi_16_row_offset + 1 >= max_rows) - continue; + if (mi_16_row_offset + 1 >= max_rows) continue; mip2 = mip + mode_info_stride; - build_y_mask(lfi_n, mip2[0], shift_y+8, + build_y_mask(lfi_n, mip2[0], shift_y + 8, #if CONFIG_SUPERTX 0, #endif @@ -981,23 +952,20 @@ void vp10_setup_mask(VP10_COMMON *const cm, const int mi_row, const int mi_col, break; case BLOCK_8X16: #if CONFIG_SUPERTX - if (supertx_enabled(&mip[0]->mbmi)) - break; + if (supertx_enabled(&mip[0]->mbmi)) break; #endif build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); - if (mi_16_col_offset +1 >= max_cols) - continue; + if (mi_16_col_offset + 1 >= max_cols) continue; mip2 = mip + 1; - build_y_mask(lfi_n, mip2[0], shift_y+1, + build_y_mask(lfi_n, mip2[0], shift_y + 1, #if CONFIG_SUPERTX 0, #endif lfm); break; default: { - const int shift_y = shift_32_y[idx_32] + - shift_16_y[idx_16] + - shift_8_y[0]; + const int shift_y = + shift_32_y[idx_32] + shift_16_y[idx_16] + shift_8_y[0]; #if CONFIG_SUPERTX if (mip[0]->mbmi.tx_size == TX_16X16) { build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); @@ -1008,12 +976,11 @@ void vp10_setup_mask(VP10_COMMON *const cm, const int mi_row, const int mi_col, mip += offset[0]; for (idx_8 = 1; idx_8 < 4; mip += offset[idx_8], ++idx_8) { const int shift_y = shift_32_y[idx_32] + - shift_16_y[idx_16] + - shift_8_y[idx_8]; - const int mi_8_col_offset = mi_16_col_offset + - ((idx_8 & 1)); - const int mi_8_row_offset = mi_16_row_offset + - ((idx_8 >> 1)); + shift_16_y[idx_16] + shift_8_y[idx_8]; + const int mi_8_col_offset = + mi_16_col_offset + ((idx_8 & 1)); + const int mi_8_row_offset = + mi_16_row_offset + ((idx_8 >> 1)); if (mi_8_col_offset >= max_cols || mi_8_row_offset >= max_rows) @@ -1057,10 +1024,9 @@ void vp10_setup_mask(VP10_COMMON *const cm, const int mi_row, const int mi_col, const uint64_t rows = cm->mi_rows - mi_row; // Each pixel inside the border gets a 1, - const uint64_t mask_y = - (((uint64_t) 1 << (rows << MAX_MIB_SIZE_LOG2)) - 1); + const uint64_t mask_y = (((uint64_t)1 << (rows << MAX_MIB_SIZE_LOG2)) - 1); const uint16_t mask_uv = - (((uint16_t) 1 << (((rows + 1) >> 1) << (MAX_MIB_SIZE_LOG2 - 1))) - 1); + (((uint16_t)1 << (((rows + 1) >> 1) << (MAX_MIB_SIZE_LOG2 - 1))) - 1); // Remove values completely outside our border. for (i = 0; i < TX_32X32; i++) { @@ -1089,7 +1055,7 @@ void vp10_setup_mask(VP10_COMMON *const cm, const int mi_row, const int mi_col, // Each pixel inside the border gets a 1, the multiply copies the border // to where we need it. - const uint64_t mask_y = (((1 << columns) - 1)) * 0x0101010101010101ULL; + const uint64_t mask_y = (((1 << columns) - 1)) * 0x0101010101010101ULL; const uint16_t mask_uv = ((1 << ((columns + 1) >> 1)) - 1) * 0x1111; // Internal edges are not applied on the last column of the image so @@ -1131,7 +1097,7 @@ void vp10_setup_mask(VP10_COMMON *const cm, const int mi_row, const int mi_col, assert(!(lfm->left_y[TX_16X16] & lfm->left_y[TX_4X4])); assert(!(lfm->left_y[TX_8X8] & lfm->left_y[TX_4X4])); assert(!(lfm->int_4x4_y & lfm->left_y[TX_16X16])); - assert(!(lfm->left_uv[TX_16X16]&lfm->left_uv[TX_8X8])); + assert(!(lfm->left_uv[TX_16X16] & lfm->left_uv[TX_8X8])); assert(!(lfm->left_uv[TX_16X16] & lfm->left_uv[TX_4X4])); assert(!(lfm->left_uv[TX_8X8] & lfm->left_uv[TX_4X4])); assert(!(lfm->left_int_4x4_uv & lfm->left_uv[TX_16X16])); @@ -1145,17 +1111,14 @@ void vp10_setup_mask(VP10_COMMON *const cm, const int mi_row, const int mi_col, assert(!(lfm->above_int_4x4_uv & lfm->above_uv[TX_16X16])); } -static void filter_selectively_vert(uint8_t *s, int pitch, - unsigned int mask_16x16, - unsigned int mask_8x8, - unsigned int mask_4x4, - unsigned int mask_4x4_int, - const loop_filter_info_n *lfi_n, - const uint8_t *lfl) { +static void filter_selectively_vert( + uint8_t *s, int pitch, unsigned int mask_16x16, unsigned int mask_8x8, + unsigned int mask_4x4, unsigned int mask_4x4_int, + const loop_filter_info_n *lfi_n, const uint8_t *lfl) { unsigned int mask; - for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int; - mask; mask >>= 1) { + for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int; mask; + mask >>= 1) { const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl; if (mask & 1) { @@ -1179,29 +1142,26 @@ static void filter_selectively_vert(uint8_t *s, int pitch, } #if CONFIG_VP9_HIGHBITDEPTH -static void highbd_filter_selectively_vert(uint16_t *s, int pitch, - unsigned int mask_16x16, - unsigned int mask_8x8, - unsigned int mask_4x4, - unsigned int mask_4x4_int, - const loop_filter_info_n *lfi_n, - const uint8_t *lfl, int bd) { +static void highbd_filter_selectively_vert( + uint16_t *s, int pitch, unsigned int mask_16x16, unsigned int mask_8x8, + unsigned int mask_4x4, unsigned int mask_4x4_int, + const loop_filter_info_n *lfi_n, const uint8_t *lfl, int bd) { unsigned int mask; - for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int; - mask; mask >>= 1) { + for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int; mask; + mask >>= 1) { const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl; if (mask & 1) { if (mask_16x16 & 1) { - vpx_highbd_lpf_vertical_16(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, bd); + vpx_highbd_lpf_vertical_16(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, + bd); } else if (mask_8x8 & 1) { - vpx_highbd_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, bd); + vpx_highbd_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, + bd); } else if (mask_4x4 & 1) { - vpx_highbd_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, bd); + vpx_highbd_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, + bd); } } if (mask_4x4_int & 1) @@ -1219,18 +1179,17 @@ static void highbd_filter_selectively_vert(uint16_t *s, int pitch, void vp10_filter_block_plane_non420(VP10_COMMON *cm, struct macroblockd_plane *plane, - MODE_INFO **mib, - int mi_row, int mi_col) { + MODE_INFO **mib, int mi_row, int mi_col) { const int ss_x = plane->subsampling_x; const int ss_y = plane->subsampling_y; const int row_step = 1 << ss_y; const int col_step = 1 << ss_x; struct buf_2d *const dst = &plane->dst; - uint8_t* const dst0 = dst->buf; - unsigned int mask_16x16[MAX_MIB_SIZE] = {0}; - unsigned int mask_8x8[MAX_MIB_SIZE] = {0}; - unsigned int mask_4x4[MAX_MIB_SIZE] = {0}; - unsigned int mask_4x4_int[MAX_MIB_SIZE] = {0}; + uint8_t *const dst0 = dst->buf; + unsigned int mask_16x16[MAX_MIB_SIZE] = { 0 }; + unsigned int mask_8x8[MAX_MIB_SIZE] = { 0 }; + unsigned int mask_4x4[MAX_MIB_SIZE] = { 0 }; + unsigned int mask_4x4_int[MAX_MIB_SIZE] = { 0 }; uint8_t lfl[MAX_MIB_SIZE][MAX_MIB_SIZE]; int r, c; @@ -1250,21 +1209,22 @@ void vp10_filter_block_plane_non420(VP10_COMMON *cm, const int blk_col = c & (num_8x8_blocks_wide_lookup[sb_type] - 1); // left edge of current unit is block/partition edge -> no skip - const int block_edge_left = (num_4x4_blocks_wide_lookup[sb_type] > 1) ? - !blk_col : 1; + const int block_edge_left = + (num_4x4_blocks_wide_lookup[sb_type] > 1) ? !blk_col : 1; const int skip_this_c = skip_this && !block_edge_left; // top edge of current unit is block/partition edge -> no skip - const int block_edge_above = (num_4x4_blocks_high_lookup[sb_type] > 1) ? - !blk_row : 1; + const int block_edge_above = + (num_4x4_blocks_high_lookup[sb_type] > 1) ? !blk_row : 1; const int skip_this_r = skip_this && !block_edge_above; #if CONFIG_VAR_TX TX_SIZE tx_size = (plane->plane_type == PLANE_TYPE_UV) - ? get_uv_tx_size(mbmi, plane) : mbmi->tx_size; -#else - const TX_SIZE tx_size = (plane->plane_type == PLANE_TYPE_UV) ? get_uv_tx_size(mbmi, plane) : mbmi->tx_size; +#else + const TX_SIZE tx_size = (plane->plane_type == PLANE_TYPE_UV) + ? get_uv_tx_size(mbmi, plane) + : mbmi->tx_size; #endif const int skip_border_4x4_c = ss_x && mi_col + c == cm->mi_cols - 1; @@ -1275,8 +1235,7 @@ void vp10_filter_block_plane_non420(VP10_COMMON *cm, int tx_size_mask = 0; // Filter level can vary per MI - if (!(lfl[r][c >> ss_x] = get_filter_level(&cm->lf_info, mbmi))) - continue; + if (!(lfl[r][c >> ss_x] = get_filter_level(&cm->lf_info, mbmi))) continue; if (tx_size == TX_32X32) tx_size_mask = 3; @@ -1287,15 +1246,15 @@ void vp10_filter_block_plane_non420(VP10_COMMON *cm, #if CONFIG_VAR_TX if (is_inter_block(mbmi) && !mbmi->skip) - tx_size = (plane->plane_type == PLANE_TYPE_UV) ? - get_uv_tx_size_impl(mbmi->inter_tx_size[blk_row][ blk_col], - sb_type, ss_x, ss_y) : - mbmi->inter_tx_size[blk_row][blk_col]; + tx_size = + (plane->plane_type == PLANE_TYPE_UV) + ? get_uv_tx_size_impl(mbmi->inter_tx_size[blk_row][blk_col], + sb_type, ss_x, ss_y) + : mbmi->inter_tx_size[blk_row][blk_col]; - tx_size_r = VPXMIN(tx_size, - cm->above_txfm_context[mi_col + c]); - tx_size_c = VPXMIN(tx_size, - cm->left_txfm_context[(mi_row + r) & MAX_MIB_MASK]); + tx_size_r = VPXMIN(tx_size, cm->above_txfm_context[mi_col + c]); + tx_size_c = + VPXMIN(tx_size, cm->left_txfm_context[(mi_row + r) & MAX_MIB_MASK]); cm->above_txfm_context[mi_col + c] = tx_size; cm->left_txfm_context[(mi_row + r) & MAX_MIB_MASK] = tx_size; @@ -1366,29 +1325,20 @@ void vp10_filter_block_plane_non420(VP10_COMMON *cm, #if CONFIG_VP9_HIGHBITDEPTH if (cm->use_highbitdepth) { highbd_filter_selectively_vert( - CONVERT_TO_SHORTPTR(dst->buf), - dst->stride, - mask_16x16_c & border_mask, - mask_8x8_c & border_mask, - mask_4x4_c & border_mask, - mask_4x4_int[r], - &cm->lf_info, &lfl[r][0], + CONVERT_TO_SHORTPTR(dst->buf), dst->stride, + mask_16x16_c & border_mask, mask_8x8_c & border_mask, + mask_4x4_c & border_mask, mask_4x4_int[r], &cm->lf_info, &lfl[r][0], (int)cm->bit_depth); } else { - filter_selectively_vert(dst->buf, dst->stride, - mask_16x16_c & border_mask, + filter_selectively_vert(dst->buf, dst->stride, mask_16x16_c & border_mask, mask_8x8_c & border_mask, - mask_4x4_c & border_mask, - mask_4x4_int[r], + mask_4x4_c & border_mask, mask_4x4_int[r], &cm->lf_info, &lfl[r][0]); } #else - filter_selectively_vert(dst->buf, dst->stride, - mask_16x16_c & border_mask, - mask_8x8_c & border_mask, - mask_4x4_c & border_mask, - mask_4x4_int[r], - &cm->lf_info, &lfl[r][0]); + filter_selectively_vert(dst->buf, dst->stride, mask_16x16_c & border_mask, + mask_8x8_c & border_mask, mask_4x4_c & border_mask, + mask_4x4_int[r], &cm->lf_info, &lfl[r][0]); #endif // CONFIG_VP9_HIGHBITDEPTH dst->buf += MI_SIZE * dst->stride; mib += row_step * cm->mi_stride; @@ -1415,39 +1365,27 @@ void vp10_filter_block_plane_non420(VP10_COMMON *cm, } #if CONFIG_VP9_HIGHBITDEPTH if (cm->use_highbitdepth) { - highbd_filter_selectively_horiz( - CONVERT_TO_SHORTPTR(dst->buf), - dst->stride, - mask_16x16_r, - mask_8x8_r, - mask_4x4_r, - mask_4x4_int_r, - &cm->lf_info, &lfl[r][0], - (int)cm->bit_depth); + highbd_filter_selectively_horiz(CONVERT_TO_SHORTPTR(dst->buf), + dst->stride, mask_16x16_r, mask_8x8_r, + mask_4x4_r, mask_4x4_int_r, &cm->lf_info, + &lfl[r][0], (int)cm->bit_depth); } else { - filter_selectively_horiz(dst->buf, dst->stride, - mask_16x16_r, - mask_8x8_r, - mask_4x4_r, - mask_4x4_int_r, - &cm->lf_info, &lfl[r][0]); + filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r, + mask_4x4_r, mask_4x4_int_r, &cm->lf_info, + &lfl[r][0]); } #else - filter_selectively_horiz(dst->buf, dst->stride, - mask_16x16_r, - mask_8x8_r, - mask_4x4_r, - mask_4x4_int_r, - &cm->lf_info, &lfl[r][0]); + filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r, + mask_4x4_r, mask_4x4_int_r, &cm->lf_info, + &lfl[r][0]); #endif // CONFIG_VP9_HIGHBITDEPTH dst->buf += MI_SIZE * dst->stride; } } void vp10_filter_block_plane_ss00(VP10_COMMON *const cm, - struct macroblockd_plane *const plane, - int mi_row, - LOOP_FILTER_MASK *lfm) { + struct macroblockd_plane *const plane, + int mi_row, LOOP_FILTER_MASK *lfm) { struct buf_2d *const dst = &plane->dst; uint8_t *const dst0 = dst->buf; int r; @@ -1475,14 +1413,12 @@ void vp10_filter_block_plane_ss00(VP10_COMMON *const cm, } else { filter_selectively_vert_row2( plane->subsampling_x, dst->buf, dst->stride, mask_16x16_l, mask_8x8_l, - mask_4x4_l, mask_4x4_int_l, &cm->lf_info, - &lfm->lfl_y[r][0]); + mask_4x4_l, mask_4x4_int_l, &cm->lf_info, &lfm->lfl_y[r][0]); } #else filter_selectively_vert_row2( plane->subsampling_x, dst->buf, dst->stride, mask_16x16_l, mask_8x8_l, - mask_4x4_l, mask_4x4_int_l, &cm->lf_info, - &lfm->lfl_y[r][0]); + mask_4x4_l, mask_4x4_int_l, &cm->lf_info, &lfm->lfl_y[r][0]); #endif // CONFIG_VP9_HIGHBITDEPTH dst->buf += 2 * MI_SIZE * dst->stride; mask_16x16 >>= 2 * MI_SIZE; @@ -1517,8 +1453,7 @@ void vp10_filter_block_plane_ss00(VP10_COMMON *const cm, if (cm->use_highbitdepth) { highbd_filter_selectively_horiz( CONVERT_TO_SHORTPTR(dst->buf), dst->stride, mask_16x16_r, mask_8x8_r, - mask_4x4_r, mask_4x4_int & 0xff, &cm->lf_info, - &lfm->lfl_y[r][0], + mask_4x4_r, mask_4x4_int & 0xff, &cm->lf_info, &lfm->lfl_y[r][0], (int)cm->bit_depth); } else { filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r, @@ -1540,9 +1475,8 @@ void vp10_filter_block_plane_ss00(VP10_COMMON *const cm, } void vp10_filter_block_plane_ss11(VP10_COMMON *const cm, - struct macroblockd_plane *const plane, - int mi_row, - LOOP_FILTER_MASK *lfm) { + struct macroblockd_plane *const plane, + int mi_row, LOOP_FILTER_MASK *lfm) { struct buf_2d *const dst = &plane->dst; uint8_t *const dst0 = dst->buf; int r, c; @@ -1576,16 +1510,15 @@ void vp10_filter_block_plane_ss11(VP10_COMMON *const cm, mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info, &lfm->lfl_uv[r >> 1][0], (int)cm->bit_depth); } else { - filter_selectively_vert_row2( - plane->subsampling_x, dst->buf, dst->stride, - mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info, - &lfm->lfl_uv[r >> 1][0]); + filter_selectively_vert_row2(plane->subsampling_x, dst->buf, + dst->stride, mask_16x16_l, mask_8x8_l, + mask_4x4_l, mask_4x4_int_l, &cm->lf_info, + &lfm->lfl_uv[r >> 1][0]); } #else filter_selectively_vert_row2( - plane->subsampling_x, dst->buf, dst->stride, - mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info, - &lfm->lfl_uv[r >> 1][0]); + plane->subsampling_x, dst->buf, dst->stride, mask_16x16_l, mask_8x8_l, + mask_4x4_l, mask_4x4_int_l, &cm->lf_info, &lfm->lfl_uv[r >> 1][0]); #endif // CONFIG_VP9_HIGHBITDEPTH dst->buf += 2 * MI_SIZE * dst->stride; @@ -1623,11 +1556,10 @@ void vp10_filter_block_plane_ss11(VP10_COMMON *const cm, #if CONFIG_VP9_HIGHBITDEPTH if (cm->use_highbitdepth) { - highbd_filter_selectively_horiz(CONVERT_TO_SHORTPTR(dst->buf), - dst->stride, mask_16x16_r, mask_8x8_r, - mask_4x4_r, mask_4x4_int_r, &cm->lf_info, - &lfm->lfl_uv[r >> 1][0], - (int)cm->bit_depth); + highbd_filter_selectively_horiz( + CONVERT_TO_SHORTPTR(dst->buf), dst->stride, mask_16x16_r, mask_8x8_r, + mask_4x4_r, mask_4x4_int_r, &cm->lf_info, &lfm->lfl_uv[r >> 1][0], + (int)cm->bit_depth); } else { filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r, mask_4x4_r, mask_4x4_int_r, &cm->lf_info, @@ -1647,30 +1579,29 @@ void vp10_filter_block_plane_ss11(VP10_COMMON *const cm, } } -void vp10_loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer, - VP10_COMMON *cm, +void vp10_loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer, VP10_COMMON *cm, struct macroblockd_plane planes[MAX_MB_PLANE], int start, int stop, int y_only) { #if CONFIG_VAR_TX || CONFIG_EXT_PARTITION || CONFIG_EXT_PARTITION_TYPES const int num_planes = y_only ? 1 : MAX_MB_PLANE; int mi_row, mi_col; -# if CONFIG_VAR_TX +#if CONFIG_VAR_TX memset(cm->above_txfm_context, TX_SIZES, cm->mi_cols); -# endif // CONFIG_VAR_TX +#endif // CONFIG_VAR_TX for (mi_row = start; mi_row < stop; mi_row += cm->mib_size) { MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride; -# if CONFIG_VAR_TX +#if CONFIG_VAR_TX memset(cm->left_txfm_context, TX_SIZES, MAX_MIB_SIZE); -# endif // CONFIG_VAR_TX +#endif // CONFIG_VAR_TX for (mi_col = 0; mi_col < cm->mi_cols; mi_col += cm->mib_size) { int plane; vp10_setup_dst_planes(planes, frame_buffer, mi_row, mi_col); for (plane = 0; plane < num_planes; ++plane) - vp10_filter_block_plane_non420(cm, &planes[plane], mi + mi_col, - mi_row, mi_col); + vp10_filter_block_plane_non420(cm, &planes[plane], mi + mi_col, mi_row, + mi_col); } } #else @@ -1709,7 +1640,7 @@ void vp10_loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer, break; case LF_PATH_SLOW: vp10_filter_block_plane_non420(cm, &planes[plane], mi + mi_col, - mi_row, mi_col); + mi_row, mi_col); break; } } @@ -1718,10 +1649,9 @@ void vp10_loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer, #endif // CONFIG_VAR_TX || CONFIG_EXT_PARTITION || CONFIG_EXT_PARTITION_TYPES } -void vp10_loop_filter_frame(YV12_BUFFER_CONFIG *frame, - VP10_COMMON *cm, MACROBLOCKD *xd, - int frame_filter_level, - int y_only, int partial_frame) { +void vp10_loop_filter_frame(YV12_BUFFER_CONFIG *frame, VP10_COMMON *cm, + MACROBLOCKD *xd, int frame_filter_level, int y_only, + int partial_frame) { int start_mi_row, end_mi_row, mi_rows_to_filter; if (!frame_filter_level) return; start_mi_row = 0; @@ -1733,9 +1663,7 @@ void vp10_loop_filter_frame(YV12_BUFFER_CONFIG *frame, } end_mi_row = start_mi_row + mi_rows_to_filter; vp10_loop_filter_frame_init(cm, frame_filter_level); - vp10_loop_filter_rows(frame, cm, xd->plane, - start_mi_row, end_mi_row, - y_only); + vp10_loop_filter_rows(frame, cm, xd->plane, start_mi_row, end_mi_row, y_only); } void vp10_loop_filter_data_reset( @@ -1753,6 +1681,6 @@ void vp10_loop_filter_data_reset( int vp10_loop_filter_worker(LFWorkerData *const lf_data, void *unused) { (void)unused; vp10_loop_filter_rows(lf_data->frame_buffer, lf_data->cm, lf_data->planes, - lf_data->start, lf_data->stop, lf_data->y_only); + lf_data->start, lf_data->stop, lf_data->y_only); return 1; } diff --git a/vp10/common/loopfilter.h b/vp10/common/loopfilter.h index 9eaa5ed96a501561ab8adb86da67495e903df3f6..97165668e480a1f2e961e16fb92f9ef5b734fe15 100644 --- a/vp10/common/loopfilter.h +++ b/vp10/common/loopfilter.h @@ -27,7 +27,7 @@ extern "C" { #define SIMD_WIDTH 16 -#define MAX_MODE_LF_DELTAS 2 +#define MAX_MODE_LF_DELTAS 2 enum lf_path { LF_PATH_420, @@ -95,37 +95,32 @@ struct VP10LfSyncData; // This function sets up the bit masks for the entire 64x64 region represented // by mi_row, mi_col. -void vp10_setup_mask(struct VP10Common *const cm, - const int mi_row, const int mi_col, - MODE_INFO **mi_8x8, const int mode_info_stride, - LOOP_FILTER_MASK *lfm); +void vp10_setup_mask(struct VP10Common *const cm, const int mi_row, + const int mi_col, MODE_INFO **mi_8x8, + const int mode_info_stride, LOOP_FILTER_MASK *lfm); void vp10_filter_block_plane_ss00(struct VP10Common *const cm, - struct macroblockd_plane *const plane, - int mi_row, - LOOP_FILTER_MASK *lfm); + struct macroblockd_plane *const plane, + int mi_row, LOOP_FILTER_MASK *lfm); void vp10_filter_block_plane_ss11(struct VP10Common *const cm, - struct macroblockd_plane *const plane, - int mi_row, - LOOP_FILTER_MASK *lfm); + struct macroblockd_plane *const plane, + int mi_row, LOOP_FILTER_MASK *lfm); void vp10_filter_block_plane_non420(struct VP10Common *cm, - struct macroblockd_plane *plane, - MODE_INFO **mi_8x8, - int mi_row, int mi_col); + struct macroblockd_plane *plane, + MODE_INFO **mi_8x8, int mi_row, int mi_col); void vp10_loop_filter_init(struct VP10Common *cm); // Update the loop filter for the current frame. -// This should be called before vp10_loop_filter_rows(), vp10_loop_filter_frame() +// This should be called before vp10_loop_filter_rows(), +// vp10_loop_filter_frame() // calls this function directly. void vp10_loop_filter_frame_init(struct VP10Common *cm, int default_filt_lvl); -void vp10_loop_filter_frame(YV12_BUFFER_CONFIG *frame, - struct VP10Common *cm, - struct macroblockd *mbd, - int filter_level, +void vp10_loop_filter_frame(YV12_BUFFER_CONFIG *frame, struct VP10Common *cm, + struct macroblockd *mbd, int filter_level, int y_only, int partial_frame); // Apply the loop filter to [start, stop) macro block rows in frame_buffer. diff --git a/vp10/common/mips/dspr2/itrans16_dspr2.c b/vp10/common/mips/dspr2/itrans16_dspr2.c index 3d1bd3d906d60328475ed0821a0ea30dea4fd4d9..0fcae870ed5ad4b532db111cf7b50458e036235f 100644 --- a/vp10/common/mips/dspr2/itrans16_dspr2.c +++ b/vp10/common/mips/dspr2/itrans16_dspr2.c @@ -21,27 +21,23 @@ #include "vpx_ports/mem.h" #if HAVE_DSPR2 -void vp10_iht16x16_256_add_dspr2(const int16_t *input, uint8_t *dest, - int pitch, int tx_type) { +void vp10_iht16x16_256_add_dspr2(const int16_t *input, uint8_t *dest, int pitch, + int tx_type) { int i, j; - DECLARE_ALIGNED(32, int16_t, out[16 * 16]); + DECLARE_ALIGNED(32, int16_t, out[16 * 16]); int16_t *outptr = out; int16_t temp_out[16]; uint32_t pos = 45; /* bit positon for extract from acc */ - __asm__ __volatile__ ( - "wrdsp %[pos], 1 \n\t" - : - : [pos] "r" (pos) - ); + __asm__ __volatile__("wrdsp %[pos], 1 \n\t" : : [pos] "r"(pos)); switch (tx_type) { - case DCT_DCT: // DCT in both horizontal and vertical + case DCT_DCT: // DCT in both horizontal and vertical idct16_rows_dspr2(input, outptr, 16); idct16_cols_add_blk_dspr2(out, dest, pitch); break; - case ADST_DCT: // ADST in vertical, DCT in horizontal + case ADST_DCT: // ADST in vertical, DCT in horizontal idct16_rows_dspr2(input, outptr, 16); outptr = out; @@ -50,13 +46,12 @@ void vp10_iht16x16_256_add_dspr2(const int16_t *input, uint8_t *dest, iadst16_dspr2(outptr, temp_out); for (j = 0; j < 16; ++j) - dest[j * pitch + i] = - clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6) - + dest[j * pitch + i]); + dest[j * pitch + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6) + + dest[j * pitch + i]); outptr += 16; } break; - case DCT_ADST: // DCT in vertical, ADST in horizontal + case DCT_ADST: // DCT in vertical, ADST in horizontal { int16_t temp_in[16 * 16]; @@ -70,13 +65,11 @@ void vp10_iht16x16_256_add_dspr2(const int16_t *input, uint8_t *dest, } for (i = 0; i < 16; ++i) - for (j = 0; j < 16; ++j) - temp_in[j * 16 + i] = out[i * 16 + j]; + for (j = 0; j < 16; ++j) temp_in[j * 16 + i] = out[i * 16 + j]; idct16_cols_add_blk_dspr2(temp_in, dest, pitch); - } - break; - case ADST_ADST: // ADST in both directions + } break; + case ADST_ADST: // ADST in both directions { int16_t temp_in[16]; @@ -90,19 +83,14 @@ void vp10_iht16x16_256_add_dspr2(const int16_t *input, uint8_t *dest, } for (i = 0; i < 16; ++i) { - for (j = 0; j < 16; ++j) - temp_in[j] = out[j * 16 + i]; + for (j = 0; j < 16; ++j) temp_in[j] = out[j * 16 + i]; iadst16_dspr2(temp_in, temp_out); for (j = 0; j < 16; ++j) - dest[j * pitch + i] = - clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6) - + dest[j * pitch + i]); + dest[j * pitch + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6) + + dest[j * pitch + i]); } - } - break; - default: - printf("vp10_short_iht16x16_add_dspr2 : Invalid tx_type\n"); - break; + } break; + default: printf("vp10_short_iht16x16_add_dspr2 : Invalid tx_type\n"); break; } } #endif // #if HAVE_DSPR2 diff --git a/vp10/common/mips/dspr2/itrans4_dspr2.c b/vp10/common/mips/dspr2/itrans4_dspr2.c index 5249287b85ecdd3238ec49a9cfd0e2499670724c..9d10d5e83b722ae0aa8a1f541fefa78e8b346189 100644 --- a/vp10/common/mips/dspr2/itrans4_dspr2.c +++ b/vp10/common/mips/dspr2/itrans4_dspr2.c @@ -22,7 +22,7 @@ #if HAVE_DSPR2 void vp10_iht4x4_16_add_dspr2(const int16_t *input, uint8_t *dest, - int dest_stride, int tx_type) { + int dest_stride, int tx_type) { int i, j; DECLARE_ALIGNED(32, int16_t, out[4 * 4]); int16_t *outptr = out; @@ -30,14 +30,12 @@ void vp10_iht4x4_16_add_dspr2(const int16_t *input, uint8_t *dest, uint32_t pos = 45; /* bit positon for extract from acc */ - __asm__ __volatile__ ( - "wrdsp %[pos], 1 \n\t" - : - : [pos] "r" (pos) - ); + __asm__ __volatile__("wrdsp %[pos], 1 \n\t" + : + : [pos] "r"(pos)); switch (tx_type) { - case DCT_DCT: // DCT in both horizontal and vertical + case DCT_DCT: // DCT in both horizontal and vertical vpx_idct4_rows_dspr2(input, outptr); vpx_idct4_columns_add_blk_dspr2(&out[0], dest, dest_stride); break; @@ -50,9 +48,8 @@ void vp10_iht4x4_16_add_dspr2(const int16_t *input, uint8_t *dest, iadst4_dspr2(outptr, temp_out); for (j = 0; j < 4; ++j) - dest[j * dest_stride + i] = - clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 4) - + dest[j * dest_stride + i]); + dest[j * dest_stride + i] = clip_pixel( + ROUND_POWER_OF_TWO(temp_out[j], 4) + dest[j * dest_stride + i]); outptr += 4; } @@ -60,7 +57,7 @@ void vp10_iht4x4_16_add_dspr2(const int16_t *input, uint8_t *dest, case DCT_ADST: // DCT in vertical, ADST in horizontal for (i = 0; i < 4; ++i) { iadst4_dspr2(input, outptr); - input += 4; + input += 4; outptr += 4; } @@ -74,24 +71,20 @@ void vp10_iht4x4_16_add_dspr2(const int16_t *input, uint8_t *dest, case ADST_ADST: // ADST in both directions for (i = 0; i < 4; ++i) { iadst4_dspr2(input, outptr); - input += 4; + input += 4; outptr += 4; } for (i = 0; i < 4; ++i) { - for (j = 0; j < 4; ++j) - temp_in[j] = out[j * 4 + i]; + for (j = 0; j < 4; ++j) temp_in[j] = out[j * 4 + i]; iadst4_dspr2(temp_in, temp_out); for (j = 0; j < 4; ++j) - dest[j * dest_stride + i] = - clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 4) - + dest[j * dest_stride + i]); + dest[j * dest_stride + i] = clip_pixel( + ROUND_POWER_OF_TWO(temp_out[j], 4) + dest[j * dest_stride + i]); } break; - default: - printf("vp10_short_iht4x4_add_dspr2 : Invalid tx_type\n"); - break; + default: printf("vp10_short_iht4x4_add_dspr2 : Invalid tx_type\n"); break; } } #endif // #if HAVE_DSPR2 diff --git a/vp10/common/mips/dspr2/itrans8_dspr2.c b/vp10/common/mips/dspr2/itrans8_dspr2.c index b25b93aee0552fe86e62c88775e4645766f9d7d7..3ebf8ce90cbcec09b2b0b893fb29de4eb8f5e4a9 100644 --- a/vp10/common/mips/dspr2/itrans8_dspr2.c +++ b/vp10/common/mips/dspr2/itrans8_dspr2.c @@ -21,7 +21,7 @@ #if HAVE_DSPR2 void vp10_iht8x8_64_add_dspr2(const int16_t *input, uint8_t *dest, - int dest_stride, int tx_type) { + int dest_stride, int tx_type) { int i, j; DECLARE_ALIGNED(32, int16_t, out[8 * 8]); int16_t *outptr = out; @@ -29,30 +29,25 @@ void vp10_iht8x8_64_add_dspr2(const int16_t *input, uint8_t *dest, uint32_t pos = 45; /* bit positon for extract from acc */ - __asm__ __volatile__ ( - "wrdsp %[pos], 1 \n\t" - : - : [pos] "r" (pos) - ); + __asm__ __volatile__("wrdsp %[pos], 1 \n\t" : : [pos] "r"(pos)); switch (tx_type) { - case DCT_DCT: // DCT in both horizontal and vertical + case DCT_DCT: // DCT in both horizontal and vertical idct8_rows_dspr2(input, outptr, 8); idct8_columns_add_blk_dspr2(&out[0], dest, dest_stride); break; - case ADST_DCT: // ADST in vertical, DCT in horizontal + case ADST_DCT: // ADST in vertical, DCT in horizontal idct8_rows_dspr2(input, outptr, 8); for (i = 0; i < 8; ++i) { iadst8_dspr2(&out[i * 8], temp_out); for (j = 0; j < 8; ++j) - dest[j * dest_stride + i] = - clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5) - + dest[j * dest_stride + i]); + dest[j * dest_stride + i] = clip_pixel( + ROUND_POWER_OF_TWO(temp_out[j], 5) + dest[j * dest_stride + i]); } break; - case DCT_ADST: // DCT in vertical, ADST in horizontal + case DCT_ADST: // DCT in vertical, ADST in horizontal for (i = 0; i < 8; ++i) { iadst8_dspr2(input, outptr); input += 8; @@ -66,7 +61,7 @@ void vp10_iht8x8_64_add_dspr2(const int16_t *input, uint8_t *dest, } idct8_columns_add_blk_dspr2(&temp_in[0], dest, dest_stride); break; - case ADST_ADST: // ADST in both directions + case ADST_ADST: // ADST in both directions for (i = 0; i < 8; ++i) { iadst8_dspr2(input, outptr); input += 8; @@ -74,20 +69,16 @@ void vp10_iht8x8_64_add_dspr2(const int16_t *input, uint8_t *dest, } for (i = 0; i < 8; ++i) { - for (j = 0; j < 8; ++j) - temp_in[j] = out[j * 8 + i]; + for (j = 0; j < 8; ++j) temp_in[j] = out[j * 8 + i]; iadst8_dspr2(temp_in, temp_out); for (j = 0; j < 8; ++j) - dest[j * dest_stride + i] = - clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5) - + dest[j * dest_stride + i]); + dest[j * dest_stride + i] = clip_pixel( + ROUND_POWER_OF_TWO(temp_out[j], 5) + dest[j * dest_stride + i]); } break; - default: - printf("vp10_short_iht8x8_add_dspr2 : Invalid tx_type\n"); - break; + default: printf("vp10_short_iht8x8_add_dspr2 : Invalid tx_type\n"); break; } } #endif // #if HAVE_DSPR2 diff --git a/vp10/common/mips/msa/idct16x16_msa.c b/vp10/common/mips/msa/idct16x16_msa.c index a89e41b3dd93af0d83ca5ba590049e43719008b8..c73ef3764b5158e3ac7201416a0257773bb684e5 100644 --- a/vp10/common/mips/msa/idct16x16_msa.c +++ b/vp10/common/mips/msa/idct16x16_msa.c @@ -14,7 +14,7 @@ #include "vpx_dsp/mips/inv_txfm_msa.h" void vp10_iht16x16_256_add_msa(const int16_t *input, uint8_t *dst, - int32_t dst_stride, int32_t tx_type) { + int32_t dst_stride, int32_t tx_type) { int32_t i; DECLARE_ALIGNED(32, int16_t, out[16 * 16]); int16_t *out_ptr = &out[0]; @@ -74,8 +74,6 @@ void vp10_iht16x16_256_add_msa(const int16_t *input, uint8_t *dst, (dst + (i << 3)), dst_stride); } break; - default: - assert(0); - break; + default: assert(0); break; } } diff --git a/vp10/common/mips/msa/idct4x4_msa.c b/vp10/common/mips/msa/idct4x4_msa.c index e38889f27539c67c17c99cae6369beb3bf3b46ca..ea4091b6a68b2ce9123ef9c328c5fa85569ce084 100644 --- a/vp10/common/mips/msa/idct4x4_msa.c +++ b/vp10/common/mips/msa/idct4x4_msa.c @@ -14,7 +14,7 @@ #include "vpx_dsp/mips/inv_txfm_msa.h" void vp10_iht4x4_16_add_msa(const int16_t *input, uint8_t *dst, - int32_t dst_stride, int32_t tx_type) { + int32_t dst_stride, int32_t tx_type) { v8i16 in0, in1, in2, in3; /* load vector elements of 4x4 block */ @@ -50,9 +50,7 @@ void vp10_iht4x4_16_add_msa(const int16_t *input, uint8_t *dst, TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3); VPX_IADST4x4(in0, in1, in2, in3, in0, in1, in2, in3); break; - default: - assert(0); - break; + default: assert(0); break; } /* final rounding (add 2^3, divide by 2^4) and shift */ diff --git a/vp10/common/mips/msa/idct8x8_msa.c b/vp10/common/mips/msa/idct8x8_msa.c index ede6751c4ee359d0880a69b9787aa5877ecbc230..c62e82d59343cf64926fed3a59eabacdc5de58e5 100644 --- a/vp10/common/mips/msa/idct8x8_msa.c +++ b/vp10/common/mips/msa/idct8x8_msa.c @@ -14,59 +14,57 @@ #include "vpx_dsp/mips/inv_txfm_msa.h" void vp10_iht8x8_64_add_msa(const int16_t *input, uint8_t *dst, - int32_t dst_stride, int32_t tx_type) { + int32_t dst_stride, int32_t tx_type) { v8i16 in0, in1, in2, in3, in4, in5, in6, in7; /* load vector elements of 8x8 block */ LD_SH8(input, 8, in0, in1, in2, in3, in4, in5, in6, in7); - TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, - in0, in1, in2, in3, in4, in5, in6, in7); + TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, + in4, in5, in6, in7); switch (tx_type) { case DCT_DCT: /* DCT in horizontal */ - VPX_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7, - in0, in1, in2, in3, in4, in5, in6, in7); + VPX_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, + in4, in5, in6, in7); /* DCT in vertical */ - TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, - in0, in1, in2, in3, in4, in5, in6, in7); - VPX_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7, - in0, in1, in2, in3, in4, in5, in6, in7); + TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, + in3, in4, in5, in6, in7); + VPX_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, + in4, in5, in6, in7); break; case ADST_DCT: /* DCT in horizontal */ - VPX_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7, - in0, in1, in2, in3, in4, in5, in6, in7); + VPX_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, + in4, in5, in6, in7); /* ADST in vertical */ - TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, - in0, in1, in2, in3, in4, in5, in6, in7); - VPX_ADST8(in0, in1, in2, in3, in4, in5, in6, in7, - in0, in1, in2, in3, in4, in5, in6, in7); + TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, + in3, in4, in5, in6, in7); + VPX_ADST8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4, + in5, in6, in7); break; case DCT_ADST: /* ADST in horizontal */ - VPX_ADST8(in0, in1, in2, in3, in4, in5, in6, in7, - in0, in1, in2, in3, in4, in5, in6, in7); + VPX_ADST8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4, + in5, in6, in7); /* DCT in vertical */ - TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, - in0, in1, in2, in3, in4, in5, in6, in7); - VPX_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7, - in0, in1, in2, in3, in4, in5, in6, in7); + TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, + in3, in4, in5, in6, in7); + VPX_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, + in4, in5, in6, in7); break; case ADST_ADST: /* ADST in horizontal */ - VPX_ADST8(in0, in1, in2, in3, in4, in5, in6, in7, - in0, in1, in2, in3, in4, in5, in6, in7); + VPX_ADST8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4, + in5, in6, in7); /* ADST in vertical */ - TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, - in0, in1, in2, in3, in4, in5, in6, in7); - VPX_ADST8(in0, in1, in2, in3, in4, in5, in6, in7, - in0, in1, in2, in3, in4, in5, in6, in7); - break; - default: - assert(0); + TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, + in3, in4, in5, in6, in7); + VPX_ADST8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4, + in5, in6, in7); break; + default: assert(0); break; } /* final rounding (add 2^4, divide by 2^5) and shift */ diff --git a/vp10/common/mv.h b/vp10/common/mv.h index aee31098e44abbc2d12e1a944805bd44a70f7b3f..123d0132f5c4839dccaa32bc80e527658e73f191 100644 --- a/vp10/common/mv.h +++ b/vp10/common/mv.h @@ -61,23 +61,23 @@ typedef struct mv32 { // // XX_MIN, XX_MAX are also computed to avoid repeated computation -#define GM_TRANS_PREC_BITS 5 -#define GM_TRANS_PREC_DIFF (WARPEDMODEL_PREC_BITS - GM_TRANS_PREC_BITS) -#define GM_TRANS_DECODE_FACTOR (1 << GM_TRANS_PREC_DIFF) -#define GM_TRANS_ENCODE_FACTOR (1 / (GM_TRANS_DECODE_FACTOR)) +#define GM_TRANS_PREC_BITS 5 +#define GM_TRANS_PREC_DIFF (WARPEDMODEL_PREC_BITS - GM_TRANS_PREC_BITS) +#define GM_TRANS_DECODE_FACTOR (1 << GM_TRANS_PREC_DIFF) +#define GM_TRANS_ENCODE_FACTOR (1 / (GM_TRANS_DECODE_FACTOR)) -#define GM_ALPHA_PREC_BITS 5 -#define GM_ALPHA_PREC_DIFF (WARPEDMODEL_PREC_BITS - GM_ALPHA_PREC_BITS) -#define GM_ALPHA_DECODE_FACTOR (1 << GM_ALPHA_PREC_DIFF) -#define GM_ALPHA_ENCODE_FACTOR (1 / (GM_ALPHA_DECODE_FACTOR)) +#define GM_ALPHA_PREC_BITS 5 +#define GM_ALPHA_PREC_DIFF (WARPEDMODEL_PREC_BITS - GM_ALPHA_PREC_BITS) +#define GM_ALPHA_DECODE_FACTOR (1 << GM_ALPHA_PREC_DIFF) +#define GM_ALPHA_ENCODE_FACTOR (1 / (GM_ALPHA_DECODE_FACTOR)) -#define GM_ABS_ALPHA_BITS 8 -#define GM_ABS_TRANS_BITS 8 +#define GM_ABS_ALPHA_BITS 8 +#define GM_ABS_TRANS_BITS 8 -#define GM_TRANS_MAX (1 << GM_ABS_TRANS_BITS) -#define GM_ALPHA_MAX (1 << GM_ABS_ALPHA_BITS) -#define GM_TRANS_MIN -GM_TRANS_MAX -#define GM_ALPHA_MIN -GM_ALPHA_MAX +#define GM_TRANS_MAX (1 << GM_ABS_TRANS_BITS) +#define GM_ALPHA_MAX (1 << GM_ABS_ALPHA_BITS) +#define GM_TRANS_MIN -GM_TRANS_MAX +#define GM_ALPHA_MIN -GM_ALPHA_MAX typedef enum { GLOBAL_ZERO = 0, @@ -94,20 +94,11 @@ typedef struct { static INLINE TransformationType gm_to_trans_type(GLOBAL_MOTION_TYPE gmtype) { switch (gmtype) { - case GLOBAL_ZERO: - return UNKNOWN_TRANSFORM; - break; - case GLOBAL_TRANSLATION: - return TRANSLATION; - break; - case GLOBAL_ROTZOOM: - return ROTZOOM; - break; - case GLOBAL_AFFINE: - return AFFINE; - break; - default: - assert(0); + case GLOBAL_ZERO: return UNKNOWN_TRANSFORM; break; + case GLOBAL_TRANSLATION: return TRANSLATION; break; + case GLOBAL_ROTZOOM: return ROTZOOM; break; + case GLOBAL_AFFINE: return AFFINE; break; + default: assert(0); } return UNKNOWN_TRANSFORM; } @@ -115,8 +106,9 @@ static INLINE TransformationType gm_to_trans_type(GLOBAL_MOTION_TYPE gmtype) { static INLINE GLOBAL_MOTION_TYPE get_gmtype(const Global_Motion_Params *gm) { if (gm->motion_params.wmmat[4] == 0 && gm->motion_params.wmmat[5] == 0) { if (gm->motion_params.wmmat[2] == 0 && gm->motion_params.wmmat[3] == 0) { - return ((gm->motion_params.wmmat[0] | gm->motion_params.wmmat[1]) ? - GLOBAL_TRANSLATION : GLOBAL_ZERO); + return ((gm->motion_params.wmmat[0] | gm->motion_params.wmmat[1]) + ? GLOBAL_TRANSLATION + : GLOBAL_ZERO); } else { return GLOBAL_ROTZOOM; } @@ -140,11 +132,11 @@ static INLINE int is_zero_mv(const MV *mv) { } static INLINE int is_equal_mv(const MV *a, const MV *b) { - return *((const uint32_t *)a) == *((const uint32_t *)b); + return *((const uint32_t *)a) == *((const uint32_t *)b); } -static INLINE void clamp_mv(MV *mv, int min_col, int max_col, - int min_row, int max_row) { +static INLINE void clamp_mv(MV *mv, int min_col, int max_col, int min_row, + int max_row) { mv->col = clamp(mv->col, min_col, max_col); mv->row = clamp(mv->row, min_row, max_row); } diff --git a/vp10/common/mvref_common.c b/vp10/common/mvref_common.c index eed150899e568e00f0e6a5e876b1830fca58b959..c5f5c085441329a7cc217a5750e934c998a74cd3 100644 --- a/vp10/common/mvref_common.c +++ b/vp10/common/mvref_common.c @@ -13,13 +13,10 @@ #if CONFIG_REF_MV -static uint8_t add_ref_mv_candidate(const MODE_INFO *const candidate_mi, - const MB_MODE_INFO *const candidate, - const MV_REFERENCE_FRAME rf[2], - uint8_t *refmv_count, - CANDIDATE_MV *ref_mv_stack, - const int use_hp, - int len, int block, int col) { +static uint8_t add_ref_mv_candidate( + const MODE_INFO *const candidate_mi, const MB_MODE_INFO *const candidate, + const MV_REFERENCE_FRAME rf[2], uint8_t *refmv_count, + CANDIDATE_MV *ref_mv_stack, const int use_hp, int len, int block, int col) { const int weight = len; int index = 0, ref; int newmv_count = 0; @@ -30,16 +27,13 @@ static uint8_t add_ref_mv_candidate(const MODE_INFO *const candidate_mi, // single reference frame for (ref = 0; ref < 2; ++ref) { if (candidate->ref_frame[ref] == rf[0]) { - int_mv this_refmv = - get_sub_block_mv(candidate_mi, ref, col, block); + int_mv this_refmv = get_sub_block_mv(candidate_mi, ref, col, block); lower_mv_precision(&this_refmv.as_mv, use_hp); for (index = 0; index < *refmv_count; ++index) - if (ref_mv_stack[index].this_mv.as_int == this_refmv.as_int) - break; + if (ref_mv_stack[index].this_mv.as_int == this_refmv.as_int) break; - if (index < *refmv_count) - ref_mv_stack[index].weight += 2 * weight; + if (index < *refmv_count) ref_mv_stack[index].weight += 2 * weight; // Add a new item to the list. if (index == *refmv_count) { @@ -59,16 +53,13 @@ static uint8_t add_ref_mv_candidate(const MODE_INFO *const candidate_mi, if (candidate_mi->mbmi.sb_type < BLOCK_8X8 && block >= 0) { int alt_block = 3 - block; - this_refmv = - get_sub_block_mv(candidate_mi, ref, col, alt_block); + this_refmv = get_sub_block_mv(candidate_mi, ref, col, alt_block); lower_mv_precision(&this_refmv.as_mv, use_hp); for (index = 0; index < *refmv_count; ++index) - if (ref_mv_stack[index].this_mv.as_int == this_refmv.as_int) - break; + if (ref_mv_stack[index].this_mv.as_int == this_refmv.as_int) break; - if (index < *refmv_count) - ref_mv_stack[index].weight += weight; + if (index < *refmv_count) ref_mv_stack[index].weight += weight; // Add a new item to the list. if (index == *refmv_count) { @@ -79,7 +70,7 @@ static uint8_t add_ref_mv_candidate(const MODE_INFO *const candidate_mi, ++(*refmv_count); #if CONFIG_EXT_INTER - if (candidate->mode == NEWMV || candidate->mode == NEWFROMNEARMV) + if (candidate->mode == NEWMV || candidate->mode == NEWFROMNEARMV) #else if (candidate->mode == NEWMV) #endif // CONFIG_EXT_INTER @@ -90,8 +81,7 @@ static uint8_t add_ref_mv_candidate(const MODE_INFO *const candidate_mi, } } else { // compound reference frame - if (candidate->ref_frame[0] == rf[0] && - candidate->ref_frame[1] == rf[1]) { + if (candidate->ref_frame[0] == rf[0] && candidate->ref_frame[1] == rf[1]) { int_mv this_refmv[2]; for (ref = 0; ref < 2; ++ref) { @@ -104,8 +94,7 @@ static uint8_t add_ref_mv_candidate(const MODE_INFO *const candidate_mi, (ref_mv_stack[index].comp_mv.as_int == this_refmv[1].as_int)) break; - if (index < *refmv_count) - ref_mv_stack[index].weight += 2 * weight; + if (index < *refmv_count) ref_mv_stack[index].weight += 2 * weight; // Add a new item to the list. if (index == *refmv_count) { @@ -135,8 +124,7 @@ static uint8_t add_ref_mv_candidate(const MODE_INFO *const candidate_mi, ref_mv_stack[index].comp_mv.as_int == this_refmv[1].as_int) break; - if (index < *refmv_count) - ref_mv_stack[index].weight += weight; + if (index < *refmv_count) ref_mv_stack[index].weight += weight; // Add a new item to the list. if (index == *refmv_count) { @@ -160,10 +148,8 @@ static uint8_t add_ref_mv_candidate(const MODE_INFO *const candidate_mi, static uint8_t scan_row_mbmi(const VP10_COMMON *cm, const MACROBLOCKD *xd, const int mi_row, const int mi_col, int block, - const MV_REFERENCE_FRAME rf[2], - int row_offset, - CANDIDATE_MV *ref_mv_stack, - uint8_t *refmv_count) { + const MV_REFERENCE_FRAME rf[2], int row_offset, + CANDIDATE_MV *ref_mv_stack, uint8_t *refmv_count) { const TileInfo *const tile = &xd->tile; int i; uint8_t newmv_count = 0; @@ -177,13 +163,12 @@ static uint8_t scan_row_mbmi(const VP10_COMMON *cm, const MACROBLOCKD *xd, const MODE_INFO *const candidate_mi = xd->mi[mi_pos.row * xd->mi_stride + mi_pos.col]; const MB_MODE_INFO *const candidate = &candidate_mi->mbmi; - const int len = VPXMIN(xd->n8_w, - num_8x8_blocks_wide_lookup[candidate->sb_type]); + const int len = + VPXMIN(xd->n8_w, num_8x8_blocks_wide_lookup[candidate->sb_type]); - newmv_count += add_ref_mv_candidate(candidate_mi, candidate, rf, - refmv_count, ref_mv_stack, - cm->allow_high_precision_mv, - len, block, mi_pos.col); + newmv_count += add_ref_mv_candidate( + candidate_mi, candidate, rf, refmv_count, ref_mv_stack, + cm->allow_high_precision_mv, len, block, mi_pos.col); i += len; } else { ++i; @@ -195,10 +180,8 @@ static uint8_t scan_row_mbmi(const VP10_COMMON *cm, const MACROBLOCKD *xd, static uint8_t scan_col_mbmi(const VP10_COMMON *cm, const MACROBLOCKD *xd, const int mi_row, const int mi_col, int block, - const MV_REFERENCE_FRAME rf[2], - int col_offset, - CANDIDATE_MV *ref_mv_stack, - uint8_t *refmv_count) { + const MV_REFERENCE_FRAME rf[2], int col_offset, + CANDIDATE_MV *ref_mv_stack, uint8_t *refmv_count) { const TileInfo *const tile = &xd->tile; int i; uint8_t newmv_count = 0; @@ -212,13 +195,12 @@ static uint8_t scan_col_mbmi(const VP10_COMMON *cm, const MACROBLOCKD *xd, const MODE_INFO *const candidate_mi = xd->mi[mi_pos.row * xd->mi_stride + mi_pos.col]; const MB_MODE_INFO *const candidate = &candidate_mi->mbmi; - const int len = VPXMIN(xd->n8_h, - num_8x8_blocks_high_lookup[candidate->sb_type]); + const int len = + VPXMIN(xd->n8_h, num_8x8_blocks_high_lookup[candidate->sb_type]); - newmv_count += add_ref_mv_candidate(candidate_mi, candidate, rf, - refmv_count, ref_mv_stack, - cm->allow_high_precision_mv, - len, block, mi_pos.col); + newmv_count += add_ref_mv_candidate( + candidate_mi, candidate, rf, refmv_count, ref_mv_stack, + cm->allow_high_precision_mv, len, block, mi_pos.col); i += len; } else { ++i; @@ -230,9 +212,8 @@ static uint8_t scan_col_mbmi(const VP10_COMMON *cm, const MACROBLOCKD *xd, static uint8_t scan_blk_mbmi(const VP10_COMMON *cm, const MACROBLOCKD *xd, const int mi_row, const int mi_col, int block, - const MV_REFERENCE_FRAME rf[2], - int row_offset, int col_offset, - CANDIDATE_MV *ref_mv_stack, + const MV_REFERENCE_FRAME rf[2], int row_offset, + int col_offset, CANDIDATE_MV *ref_mv_stack, uint8_t *refmv_count) { const TileInfo *const tile = &xd->tile; POSITION mi_pos; @@ -248,16 +229,15 @@ static uint8_t scan_blk_mbmi(const VP10_COMMON *cm, const MACROBLOCKD *xd, const MB_MODE_INFO *const candidate = &candidate_mi->mbmi; const int len = 1; - newmv_count += add_ref_mv_candidate(candidate_mi, candidate, rf, - refmv_count, ref_mv_stack, - cm->allow_high_precision_mv, - len, block, mi_pos.col); + newmv_count += add_ref_mv_candidate( + candidate_mi, candidate, rf, refmv_count, ref_mv_stack, + cm->allow_high_precision_mv, len, block, mi_pos.col); } // Analyze a single 8x8 block motion information. return newmv_count; } -static int has_top_right(const MACROBLOCKD *xd, - int mi_row, int mi_col, int bs) { +static int has_top_right(const MACROBLOCKD *xd, int mi_row, int mi_col, + int bs) { // In a split partition all apart from the bottom right has a top right int has_tr = !((mi_row & bs) && (mi_col & bs)); @@ -282,27 +262,24 @@ static int has_top_right(const MACROBLOCKD *xd, // The left hand of two vertical rectangles always has a top right (as the // block above will have been decoded) if (xd->n8_w < xd->n8_h) - if (!xd->is_sec_rect) - has_tr = 1; + if (!xd->is_sec_rect) has_tr = 1; // The bottom of two horizontal rectangles never has a top right (as the block // to the right won't have been decoded) if (xd->n8_w > xd->n8_h) - if (xd->is_sec_rect) - has_tr = 0; + if (xd->is_sec_rect) has_tr = 0; #if CONFIG_EXT_PARTITION_TYPES // The bottom left square of a Vertical A does not have a top right as it is // decoded before the right hand rectangle of the partition if (xd->mi[0]->mbmi.partition == PARTITION_VERT_A) - if ((mi_row & bs) && !(mi_col & bs)) - has_tr = 0; + if ((mi_row & bs) && !(mi_col & bs)) has_tr = 0; #endif // CONFIG_EXT_PARTITION_TYPES return has_tr; } -static void handle_sec_rect_block(const MB_MODE_INFO * const candidate, +static void handle_sec_rect_block(const MB_MODE_INFO *const candidate, uint8_t refmv_count, CANDIDATE_MV *ref_mv_stack, MV_REFERENCE_FRAME ref_frame, @@ -315,8 +292,7 @@ static void handle_sec_rect_block(const MB_MODE_INFO * const candidate, const int_mv pred_mv = candidate->mv[rf]; for (idx = 0; idx < list_range; ++idx) - if (pred_mv.as_int == ref_mv_stack[idx].this_mv.as_int) - break; + if (pred_mv.as_int == ref_mv_stack[idx].this_mv.as_int) break; if (idx < list_range) { if (idx == 0) @@ -330,19 +306,19 @@ static void handle_sec_rect_block(const MB_MODE_INFO * const candidate, static void setup_ref_mv_list(const VP10_COMMON *cm, const MACROBLOCKD *xd, MV_REFERENCE_FRAME ref_frame, - uint8_t *refmv_count, - CANDIDATE_MV *ref_mv_stack, - int_mv *mv_ref_list, - int block, int mi_row, int mi_col, - int16_t *mode_context) { + uint8_t *refmv_count, CANDIDATE_MV *ref_mv_stack, + int_mv *mv_ref_list, int block, int mi_row, + int mi_col, int16_t *mode_context) { int idx, nearest_refmv_count = 0; uint8_t newmv_count = 0; CANDIDATE_MV tmp_mv; int len, nr_len; - const MV_REF *const prev_frame_mvs_base = cm->use_prev_frame_mvs ? - cm->prev_frame->mvs + mi_row * cm->mi_cols + mi_col : NULL; + const MV_REF *const prev_frame_mvs_base = + cm->use_prev_frame_mvs + ? cm->prev_frame->mvs + mi_row * cm->mi_cols + mi_col + : NULL; int bs = VPXMAX(xd->n8_w, xd->n8_h); int has_tr = has_top_right(xd, mi_row, mi_col, bs); @@ -354,16 +330,16 @@ static void setup_ref_mv_list(const VP10_COMMON *cm, const MACROBLOCKD *xd, *refmv_count = 0; // Scan the first above row mode info. - newmv_count = scan_row_mbmi(cm, xd, mi_row, mi_col, block, rf, - -1, ref_mv_stack, refmv_count); + newmv_count = scan_row_mbmi(cm, xd, mi_row, mi_col, block, rf, -1, + ref_mv_stack, refmv_count); // Scan the first left column mode info. - newmv_count += scan_col_mbmi(cm, xd, mi_row, mi_col, block, rf, - -1, ref_mv_stack, refmv_count); + newmv_count += scan_col_mbmi(cm, xd, mi_row, mi_col, block, rf, -1, + ref_mv_stack, refmv_count); // Check top-right boundary if (has_tr) - newmv_count += scan_blk_mbmi(cm, xd, mi_row, mi_col, block, rf, - -1, 1, ref_mv_stack, refmv_count); + newmv_count += scan_blk_mbmi(cm, xd, mi_row, mi_col, block, rf, -1, 1, + ref_mv_stack, refmv_count); nearest_refmv_count = *refmv_count; @@ -373,8 +349,8 @@ static void setup_ref_mv_list(const VP10_COMMON *cm, const MACROBLOCKD *xd, ref_mv_stack[idx].weight += REF_CAT_LEVEL; } - if (prev_frame_mvs_base && cm->show_frame && cm->last_show_frame - && rf[1] == NONE) { + if (prev_frame_mvs_base && cm->show_frame && cm->last_show_frame && + rf[1] == NONE) { int ref; int blk_row, blk_col; @@ -387,24 +363,19 @@ static void setup_ref_mv_list(const VP10_COMMON *cm, const MACROBLOCKD *xd, mi_pos.row = blk_row; mi_pos.col = blk_col; - if (!is_inside(&xd->tile, mi_col, mi_row, &mi_pos)) - continue; + if (!is_inside(&xd->tile, mi_col, mi_row, &mi_pos)) continue; for (ref = 0; ref < 2; ++ref) { if (prev_frame_mvs->ref_frame[ref] == ref_frame) { int_mv this_refmv = prev_frame_mvs->mv[ref]; - lower_mv_precision(&this_refmv.as_mv, - cm->allow_high_precision_mv); + lower_mv_precision(&this_refmv.as_mv, cm->allow_high_precision_mv); for (idx = 0; idx < *refmv_count; ++idx) - if (this_refmv.as_int == ref_mv_stack[idx].this_mv.as_int) - break; + if (this_refmv.as_int == ref_mv_stack[idx].this_mv.as_int) break; - if (idx < *refmv_count) - ref_mv_stack[idx].weight += 2; + if (idx < *refmv_count) ref_mv_stack[idx].weight += 2; - if (idx == *refmv_count && - *refmv_count < MAX_REF_MV_STACK_SIZE) { + if (idx == *refmv_count && *refmv_count < MAX_REF_MV_STACK_SIZE) { ref_mv_stack[idx].this_mv.as_int = this_refmv.as_int; ref_mv_stack[idx].weight = 2; ++(*refmv_count); @@ -423,33 +394,32 @@ static void setup_ref_mv_list(const VP10_COMMON *cm, const MACROBLOCKD *xd, mode_context[ref_frame] |= (1 << ZEROMV_OFFSET); // Analyze the top-left corner block mode info. -// scan_blk_mbmi(cm, xd, mi_row, mi_col, block, ref_frame, -// -1, -1, ref_mv_stack, refmv_count); + // scan_blk_mbmi(cm, xd, mi_row, mi_col, block, ref_frame, + // -1, -1, ref_mv_stack, refmv_count); // Scan the second outer area. - scan_row_mbmi(cm, xd, mi_row, mi_col, block, rf, - -2, ref_mv_stack, refmv_count); - scan_col_mbmi(cm, xd, mi_row, mi_col, block, rf, - -2, ref_mv_stack, refmv_count); + scan_row_mbmi(cm, xd, mi_row, mi_col, block, rf, -2, ref_mv_stack, + refmv_count); + scan_col_mbmi(cm, xd, mi_row, mi_col, block, rf, -2, ref_mv_stack, + refmv_count); // Scan the third outer area. - scan_row_mbmi(cm, xd, mi_row, mi_col, block, rf, - -3, ref_mv_stack, refmv_count); - scan_col_mbmi(cm, xd, mi_row, mi_col, block, rf, - -3, ref_mv_stack, refmv_count); + scan_row_mbmi(cm, xd, mi_row, mi_col, block, rf, -3, ref_mv_stack, + refmv_count); + scan_col_mbmi(cm, xd, mi_row, mi_col, block, rf, -3, ref_mv_stack, + refmv_count); // Scan the fourth outer area. - scan_row_mbmi(cm, xd, mi_row, mi_col, block, rf, - -4, ref_mv_stack, refmv_count); + scan_row_mbmi(cm, xd, mi_row, mi_col, block, rf, -4, ref_mv_stack, + refmv_count); // Scan the third left row mode info. - scan_col_mbmi(cm, xd, mi_row, mi_col, block, rf, - -4, ref_mv_stack, refmv_count); + scan_col_mbmi(cm, xd, mi_row, mi_col, block, rf, -4, ref_mv_stack, + refmv_count); switch (nearest_refmv_count) { case 0: mode_context[ref_frame] |= 0; - if (*refmv_count >= 1) - mode_context[ref_frame] |= 1; + if (*refmv_count >= 1) mode_context[ref_frame] |= 1; if (*refmv_count == 1) mode_context[ref_frame] |= (1 << REFMV_OFFSET); @@ -526,16 +496,15 @@ static void setup_ref_mv_list(const VP10_COMMON *cm, const MACROBLOCKD *xd, if (rf[1] > NONE) { for (idx = 0; idx < *refmv_count; ++idx) { - clamp_mv_ref(&ref_mv_stack[idx].this_mv.as_mv, - xd->n8_w << 3 , xd->n8_h << 3, xd); - clamp_mv_ref(&ref_mv_stack[idx].comp_mv.as_mv, - xd->n8_w << 3 , xd->n8_h << 3, xd); + clamp_mv_ref(&ref_mv_stack[idx].this_mv.as_mv, xd->n8_w << 3, + xd->n8_h << 3, xd); + clamp_mv_ref(&ref_mv_stack[idx].comp_mv.as_mv, xd->n8_w << 3, + xd->n8_h << 3, xd); } } else { for (idx = 0; idx < VPXMIN(MAX_MV_REF_CANDIDATES, *refmv_count); ++idx) { mv_ref_list[idx].as_int = ref_mv_stack[idx].this_mv.as_int; - clamp_mv_ref(&mv_ref_list[idx].as_mv, - xd->n8_w << 3, xd->n8_h << 3, xd); + clamp_mv_ref(&mv_ref_list[idx].as_mv, xd->n8_w << 3, xd->n8_h << 3, xd); } } } @@ -545,17 +514,18 @@ static void setup_ref_mv_list(const VP10_COMMON *cm, const MACROBLOCKD *xd, // to try and find candidate reference vectors. static void find_mv_refs_idx(const VP10_COMMON *cm, const MACROBLOCKD *xd, MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame, - int_mv *mv_ref_list, - int block, int mi_row, int mi_col, - find_mv_refs_sync sync, void *const data, - int16_t *mode_context) { + int_mv *mv_ref_list, int block, int mi_row, + int mi_col, find_mv_refs_sync sync, + void *const data, int16_t *mode_context) { const int *ref_sign_bias = cm->ref_frame_sign_bias; int i, refmv_count = 0; const POSITION *const mv_ref_search = mv_ref_blocks[mi->mbmi.sb_type]; int different_ref_found = 0; int context_counter = 0; - const MV_REF *const prev_frame_mvs = cm->use_prev_frame_mvs ? - cm->prev_frame->mvs + mi_row * cm->mi_cols + mi_col : NULL; + const MV_REF *const prev_frame_mvs = + cm->use_prev_frame_mvs + ? cm->prev_frame->mvs + mi_row * cm->mi_cols + mi_col + : NULL; const TileInfo *const tile = &xd->tile; const int bw = num_8x8_blocks_wide_lookup[mi->mbmi.sb_type] << 3; const int bh = num_8x8_blocks_high_lookup[mi->mbmi.sb_type] << 3; @@ -566,8 +536,8 @@ static void find_mv_refs_idx(const VP10_COMMON *cm, const MACROBLOCKD *xd, for (i = 0; i < 2; ++i) { const POSITION *const mv_ref = &mv_ref_search[i]; if (is_inside(tile, mi_col, mi_row, mv_ref)) { - const MODE_INFO *const candidate_mi = xd->mi[mv_ref->col + mv_ref->row * - xd->mi_stride]; + const MODE_INFO *const candidate_mi = + xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride]; const MB_MODE_INFO *const candidate = &candidate_mi->mbmi; // Keep counts for entropy encoding. context_counter += mode_2_counter[candidate->mode]; @@ -588,27 +558,27 @@ static void find_mv_refs_idx(const VP10_COMMON *cm, const MACROBLOCKD *xd, for (; i < MVREF_NEIGHBOURS; ++i) { const POSITION *const mv_ref = &mv_ref_search[i]; if (is_inside(tile, mi_col, mi_row, mv_ref)) { - const MB_MODE_INFO *const candidate = &xd->mi[mv_ref->col + mv_ref->row * - xd->mi_stride]->mbmi; + const MB_MODE_INFO *const candidate = + &xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride]->mbmi; different_ref_found = 1; if (candidate->ref_frame[0] == ref_frame) - ADD_MV_REF_LIST(candidate->mv[0], refmv_count, mv_ref_list, - bw, bh, xd, Done); + ADD_MV_REF_LIST(candidate->mv[0], refmv_count, mv_ref_list, bw, bh, xd, + Done); else if (candidate->ref_frame[1] == ref_frame) - ADD_MV_REF_LIST(candidate->mv[1], refmv_count, mv_ref_list, - bw, bh, xd, Done); + ADD_MV_REF_LIST(candidate->mv[1], refmv_count, mv_ref_list, bw, bh, xd, + Done); } } - // TODO(hkuang): Remove this sync after fixing pthread_cond_broadcast - // on windows platform. The sync here is unncessary if use_perv_frame_mvs - // is 0. But after removing it, there will be hang in the unit test on windows - // due to several threads waiting for a thread's signal. +// TODO(hkuang): Remove this sync after fixing pthread_cond_broadcast +// on windows platform. The sync here is unncessary if use_perv_frame_mvs +// is 0. But after removing it, there will be hang in the unit test on windows +// due to several threads waiting for a thread's signal. #if defined(_WIN32) && !HAVE_PTHREAD_H - if (cm->frame_parallel_decode && sync != NULL) { - sync(data, mi_row); - } + if (cm->frame_parallel_decode && sync != NULL) { + sync(data, mi_row); + } #endif // Check the last frame's mode and mv info. @@ -619,11 +589,11 @@ static void find_mv_refs_idx(const VP10_COMMON *cm, const MACROBLOCKD *xd, } if (prev_frame_mvs->ref_frame[0] == ref_frame) { - ADD_MV_REF_LIST(prev_frame_mvs->mv[0], refmv_count, mv_ref_list, - bw, bh, xd, Done); + ADD_MV_REF_LIST(prev_frame_mvs->mv[0], refmv_count, mv_ref_list, bw, bh, + xd, Done); } else if (prev_frame_mvs->ref_frame[1] == ref_frame) { - ADD_MV_REF_LIST(prev_frame_mvs->mv[1], refmv_count, mv_ref_list, - bw, bh, xd, Done); + ADD_MV_REF_LIST(prev_frame_mvs->mv[1], refmv_count, mv_ref_list, bw, bh, + xd, Done); } } @@ -634,8 +604,8 @@ static void find_mv_refs_idx(const VP10_COMMON *cm, const MACROBLOCKD *xd, for (i = 0; i < MVREF_NEIGHBOURS; ++i) { const POSITION *mv_ref = &mv_ref_search[i]; if (is_inside(tile, mi_col, mi_row, mv_ref)) { - const MB_MODE_INFO *const candidate = &xd->mi[mv_ref->col + mv_ref->row - * xd->mi_stride]->mbmi; + const MB_MODE_INFO *const candidate = + &xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride]->mbmi; // If the candidate is INTRA we don't want to consider its mv. IF_DIFF_REF_FRAME_ADD_MV(candidate, ref_frame, ref_sign_bias, @@ -673,14 +643,13 @@ Done: if (mode_context) mode_context[ref_frame] = counter_to_context[context_counter]; for (i = refmv_count; i < MAX_MV_REF_CANDIDATES; ++i) - mv_ref_list[i].as_int = 0; + mv_ref_list[i].as_int = 0; } #if CONFIG_EXT_INTER // This function keeps a mode count for a given MB/SB -void vp10_update_mv_context(const MACROBLOCKD *xd, - MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame, - int_mv *mv_ref_list, +void vp10_update_mv_context(const MACROBLOCKD *xd, MODE_INFO *mi, + MV_REFERENCE_FRAME ref_frame, int_mv *mv_ref_list, int block, int mi_row, int mi_col, int16_t *mode_context) { int i, refmv_count = 0; @@ -715,7 +684,7 @@ void vp10_update_mv_context(const MACROBLOCKD *xd, } } - Done: +Done: if (mode_context) mode_context[ref_frame] = counter_to_context[context_counter]; @@ -723,52 +692,46 @@ void vp10_update_mv_context(const MACROBLOCKD *xd, #endif // CONFIG_EXT_INTER void vp10_find_mv_refs(const VP10_COMMON *cm, const MACROBLOCKD *xd, - MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame, + MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame, #if CONFIG_REF_MV - uint8_t *ref_mv_count, - CANDIDATE_MV *ref_mv_stack, + uint8_t *ref_mv_count, CANDIDATE_MV *ref_mv_stack, #if CONFIG_EXT_INTER - int16_t *compound_mode_context, + int16_t *compound_mode_context, #endif // CONFIG_EXT_INTER #endif - int_mv *mv_ref_list, - int mi_row, int mi_col, - find_mv_refs_sync sync, void *const data, - int16_t *mode_context) { + int_mv *mv_ref_list, int mi_row, int mi_col, + find_mv_refs_sync sync, void *const data, + int16_t *mode_context) { #if CONFIG_REF_MV int idx, all_zero = 1; #endif #if CONFIG_EXT_INTER - vp10_update_mv_context(xd, mi, ref_frame, mv_ref_list, -1, - mi_row, mi_col, + vp10_update_mv_context(xd, mi, ref_frame, mv_ref_list, -1, mi_row, mi_col, #if CONFIG_REF_MV compound_mode_context); #else mode_context); #endif // CONFIG_REF_MV - find_mv_refs_idx(cm, xd, mi, ref_frame, mv_ref_list, -1, - mi_row, mi_col, sync, data, NULL); + find_mv_refs_idx(cm, xd, mi, ref_frame, mv_ref_list, -1, mi_row, mi_col, sync, + data, NULL); #else - find_mv_refs_idx(cm, xd, mi, ref_frame, mv_ref_list, -1, - mi_row, mi_col, sync, data, mode_context); + find_mv_refs_idx(cm, xd, mi, ref_frame, mv_ref_list, -1, mi_row, mi_col, sync, + data, mode_context); #endif // CONFIG_EXT_INTER #if CONFIG_REF_MV - setup_ref_mv_list(cm, xd, ref_frame, ref_mv_count, ref_mv_stack, - mv_ref_list, -1, mi_row, mi_col, mode_context); + setup_ref_mv_list(cm, xd, ref_frame, ref_mv_count, ref_mv_stack, mv_ref_list, + -1, mi_row, mi_col, mode_context); for (idx = 0; idx < MAX_MV_REF_CANDIDATES; ++idx) - if (mv_ref_list[idx].as_int != 0) - all_zero = 0; + if (mv_ref_list[idx].as_int != 0) all_zero = 0; - if (all_zero) - mode_context[ref_frame] |= (1 << ALL_ZERO_FLAG_OFFSET); + if (all_zero) mode_context[ref_frame] |= (1 << ALL_ZERO_FLAG_OFFSET); #endif } -void vp10_find_best_ref_mvs(int allow_hp, - int_mv *mvlist, int_mv *nearest_mv, - int_mv *near_mv) { +void vp10_find_best_ref_mvs(int allow_hp, int_mv *mvlist, int_mv *nearest_mv, + int_mv *near_mv) { int i; // Make sure all the candidates are properly clamped etc for (i = 0; i < MAX_MV_REF_CANDIDATES; ++i) { @@ -778,8 +741,8 @@ void vp10_find_best_ref_mvs(int allow_hp, *near_mv = mvlist[1]; } -void vp10_append_sub8x8_mvs_for_idx(VP10_COMMON *cm, MACROBLOCKD *xd, - int block, int ref, int mi_row, int mi_col, +void vp10_append_sub8x8_mvs_for_idx(VP10_COMMON *cm, MACROBLOCKD *xd, int block, + int ref, int mi_row, int mi_col, #if CONFIG_REF_MV CANDIDATE_MV *ref_mv_stack, uint8_t *ref_mv_count, @@ -804,16 +767,16 @@ void vp10_append_sub8x8_mvs_for_idx(VP10_COMMON *cm, MACROBLOCKD *xd, assert(MAX_MV_REF_CANDIDATES == 2); - find_mv_refs_idx(cm, xd, mi, mi->mbmi.ref_frame[ref], mv_list, block, - mi_row, mi_col, NULL, NULL, NULL); + find_mv_refs_idx(cm, xd, mi, mi->mbmi.ref_frame[ref], mv_list, block, mi_row, + mi_col, NULL, NULL, NULL); #if CONFIG_REF_MV - scan_blk_mbmi(cm, xd, mi_row, mi_col, block, rf, - -1, 0, ref_mv_stack, ref_mv_count); + scan_blk_mbmi(cm, xd, mi_row, mi_col, block, rf, -1, 0, ref_mv_stack, + ref_mv_count); above_count = *ref_mv_count; - scan_blk_mbmi(cm, xd, mi_row, mi_col, block, rf, - 0, -1, ref_mv_stack, ref_mv_count); + scan_blk_mbmi(cm, xd, mi_row, mi_col, block, rf, 0, -1, ref_mv_stack, + ref_mv_count); left_count = *ref_mv_count - above_count; if (above_count > 1 && left_count > 0) { @@ -823,8 +786,8 @@ void vp10_append_sub8x8_mvs_for_idx(VP10_COMMON *cm, MACROBLOCKD *xd, } for (idx = 0; idx < *ref_mv_count; ++idx) - clamp_mv_ref(&ref_mv_stack[idx].this_mv.as_mv, - xd->n8_w << 3, xd->n8_h << 3, xd); + clamp_mv_ref(&ref_mv_stack[idx].this_mv.as_mv, xd->n8_w << 3, xd->n8_h << 3, + xd); for (idx = 0; idx < VPXMIN(MAX_MV_REF_CANDIDATES, *ref_mv_count); ++idx) mv_list[idx].as_int = ref_mv_stack[idx].this_mv.as_int; @@ -860,7 +823,6 @@ void vp10_append_sub8x8_mvs_for_idx(VP10_COMMON *cm, MACROBLOCKD *xd, } break; } - default: - assert(0 && "Invalid block index."); + default: assert(0 && "Invalid block index."); } } diff --git a/vp10/common/mvref_common.h b/vp10/common/mvref_common.h index 06b1e9f09900009a2c82be00c74ca55c088ce127..4b25dc7e620f59099045939c91e122aaa795fe79 100644 --- a/vp10/common/mvref_common.h +++ b/vp10/common/mvref_common.h @@ -56,17 +56,17 @@ static const int mode_2_counter[MB_MODE_COUNT] = { 3, // ZEROMV 1, // NEWMV #if CONFIG_EXT_INTER - 1, // NEWFROMNEARMV - 0, // NEAREST_NEARESTMV - 0, // NEAREST_NEARMV - 0, // NEAR_NEARESTMV - 0, // NEAR_NEARMV - 1, // NEAREST_NEWMV - 1, // NEW_NEARESTMV - 1, // NEAR_NEWMV - 1, // NEW_NEARMV - 3, // ZERO_ZEROMV - 1, // NEW_NEWMV + 1, // NEWFROMNEARMV + 0, // NEAREST_NEARESTMV + 0, // NEAREST_NEARMV + 0, // NEAR_NEARESTMV + 0, // NEAR_NEARMV + 1, // NEAREST_NEWMV + 1, // NEW_NEARESTMV + 1, // NEAR_NEWMV + 1, // NEW_NEARMV + 3, // ZERO_ZEROMV + 1, // NEW_NEWMV #endif // CONFIG_EXT_INTER }; @@ -74,84 +74,193 @@ static const int mode_2_counter[MB_MODE_COUNT] = { // 2. However the actual count can never be greater than 2 so the highest // counter we need is 18. 9 is an invalid counter that's never used. static const int counter_to_context[19] = { - BOTH_PREDICTED, // 0 - NEW_PLUS_NON_INTRA, // 1 - BOTH_NEW, // 2 - ZERO_PLUS_PREDICTED, // 3 - NEW_PLUS_NON_INTRA, // 4 - INVALID_CASE, // 5 - BOTH_ZERO, // 6 - INVALID_CASE, // 7 - INVALID_CASE, // 8 + BOTH_PREDICTED, // 0 + NEW_PLUS_NON_INTRA, // 1 + BOTH_NEW, // 2 + ZERO_PLUS_PREDICTED, // 3 + NEW_PLUS_NON_INTRA, // 4 + INVALID_CASE, // 5 + BOTH_ZERO, // 6 + INVALID_CASE, // 7 + INVALID_CASE, // 8 INTRA_PLUS_NON_INTRA, // 9 INTRA_PLUS_NON_INTRA, // 10 - INVALID_CASE, // 11 + INVALID_CASE, // 11 INTRA_PLUS_NON_INTRA, // 12 - INVALID_CASE, // 13 - INVALID_CASE, // 14 - INVALID_CASE, // 15 - INVALID_CASE, // 16 - INVALID_CASE, // 17 - BOTH_INTRA // 18 + INVALID_CASE, // 13 + INVALID_CASE, // 14 + INVALID_CASE, // 15 + INVALID_CASE, // 16 + INVALID_CASE, // 17 + BOTH_INTRA // 18 }; static const POSITION mv_ref_blocks[BLOCK_SIZES][MVREF_NEIGHBOURS] = { // 4X4 - {{-1, 0}, {0, -1}, {-1, -1}, {-2, 0}, {0, -2}, {-2, -1}, {-1, -2}, {-2, -2}}, + { { -1, 0 }, + { 0, -1 }, + { -1, -1 }, + { -2, 0 }, + { 0, -2 }, + { -2, -1 }, + { -1, -2 }, + { -2, -2 } }, // 4X8 - {{-1, 0}, {0, -1}, {-1, -1}, {-2, 0}, {0, -2}, {-2, -1}, {-1, -2}, {-2, -2}}, + { { -1, 0 }, + { 0, -1 }, + { -1, -1 }, + { -2, 0 }, + { 0, -2 }, + { -2, -1 }, + { -1, -2 }, + { -2, -2 } }, // 8X4 - {{-1, 0}, {0, -1}, {-1, -1}, {-2, 0}, {0, -2}, {-2, -1}, {-1, -2}, {-2, -2}}, + { { -1, 0 }, + { 0, -1 }, + { -1, -1 }, + { -2, 0 }, + { 0, -2 }, + { -2, -1 }, + { -1, -2 }, + { -2, -2 } }, // 8X8 - {{-1, 0}, {0, -1}, {-1, -1}, {-2, 0}, {0, -2}, {-2, -1}, {-1, -2}, {-2, -2}}, + { { -1, 0 }, + { 0, -1 }, + { -1, -1 }, + { -2, 0 }, + { 0, -2 }, + { -2, -1 }, + { -1, -2 }, + { -2, -2 } }, // 8X16 - {{0, -1}, {-1, 0}, {1, -1}, {-1, -1}, {0, -2}, {-2, 0}, {-2, -1}, {-1, -2}}, + { { 0, -1 }, + { -1, 0 }, + { 1, -1 }, + { -1, -1 }, + { 0, -2 }, + { -2, 0 }, + { -2, -1 }, + { -1, -2 } }, // 16X8 - {{-1, 0}, {0, -1}, {-1, 1}, {-1, -1}, {-2, 0}, {0, -2}, {-1, -2}, {-2, -1}}, + { { -1, 0 }, + { 0, -1 }, + { -1, 1 }, + { -1, -1 }, + { -2, 0 }, + { 0, -2 }, + { -1, -2 }, + { -2, -1 } }, // 16X16 - {{-1, 0}, {0, -1}, {-1, 1}, {1, -1}, {-1, -1}, {-3, 0}, {0, -3}, {-3, -3}}, + { { -1, 0 }, + { 0, -1 }, + { -1, 1 }, + { 1, -1 }, + { -1, -1 }, + { -3, 0 }, + { 0, -3 }, + { -3, -3 } }, // 16X32 - {{0, -1}, {-1, 0}, {2, -1}, {-1, -1}, {-1, 1}, {0, -3}, {-3, 0}, {-3, -3}}, + { { 0, -1 }, + { -1, 0 }, + { 2, -1 }, + { -1, -1 }, + { -1, 1 }, + { 0, -3 }, + { -3, 0 }, + { -3, -3 } }, // 32X16 - {{-1, 0}, {0, -1}, {-1, 2}, {-1, -1}, {1, -1}, {-3, 0}, {0, -3}, {-3, -3}}, + { { -1, 0 }, + { 0, -1 }, + { -1, 2 }, + { -1, -1 }, + { 1, -1 }, + { -3, 0 }, + { 0, -3 }, + { -3, -3 } }, // 32X32 - {{-1, 1}, {1, -1}, {-1, 2}, {2, -1}, {-1, -1}, {-3, 0}, {0, -3}, {-3, -3}}, + { { -1, 1 }, + { 1, -1 }, + { -1, 2 }, + { 2, -1 }, + { -1, -1 }, + { -3, 0 }, + { 0, -3 }, + { -3, -3 } }, // 32X64 - {{0, -1}, {-1, 0}, {4, -1}, {-1, 2}, {-1, -1}, {0, -3}, {-3, 0}, {2, -1}}, + { { 0, -1 }, + { -1, 0 }, + { 4, -1 }, + { -1, 2 }, + { -1, -1 }, + { 0, -3 }, + { -3, 0 }, + { 2, -1 } }, // 64X32 - {{-1, 0}, {0, -1}, {-1, 4}, {2, -1}, {-1, -1}, {-3, 0}, {0, -3}, {-1, 2}}, + { { -1, 0 }, + { 0, -1 }, + { -1, 4 }, + { 2, -1 }, + { -1, -1 }, + { -3, 0 }, + { 0, -3 }, + { -1, 2 } }, // 64X64 - {{-1, 3}, {3, -1}, {-1, 4}, {4, -1}, {-1, -1}, {-1, 0}, {0, -1}, {-1, 6}}, + { { -1, 3 }, + { 3, -1 }, + { -1, 4 }, + { 4, -1 }, + { -1, -1 }, + { -1, 0 }, + { 0, -1 }, + { -1, 6 } }, #if CONFIG_EXT_PARTITION // TODO(debargha/jingning) Making them twice the 32x64, .. ones above // 64x128 - {{0, -2}, {-2, 0}, {8, -2}, {-2, 4}, {-2, -2}, {0, -6}, {-6, 0}, {4, -2}}, + { { 0, -2 }, + { -2, 0 }, + { 8, -2 }, + { -2, 4 }, + { -2, -2 }, + { 0, -6 }, + { -6, 0 }, + { 4, -2 } }, // 128x64 - {{-2, 0}, {0, -2}, {-2, 8}, {4, -2}, {-2, -2}, {-6, 0}, {0, -6}, {-2, 4}}, + { { -2, 0 }, + { 0, -2 }, + { -2, 8 }, + { 4, -2 }, + { -2, -2 }, + { -6, 0 }, + { 0, -6 }, + { -2, 4 } }, // 128x128 - {{-2, 6}, {6, -2}, {-2, 8}, {8, -2}, {-2, -2}, {-2, 0}, {0, -2}, {-2, 12}}, + { { -2, 6 }, + { 6, -2 }, + { -2, 8 }, + { 8, -2 }, + { -2, -2 }, + { -2, 0 }, + { 0, -2 }, + { -2, 12 } }, #endif // CONFIG_EXT_PARTITION }; static const int idx_n_column_to_subblock[4][2] = { - {1, 2}, - {1, 3}, - {3, 2}, - {3, 3} + { 1, 2 }, { 1, 3 }, { 3, 2 }, { 3, 3 } }; // clamp_mv_ref #if CONFIG_EXT_PARTITION -# define MV_BORDER (16 << 3) // Allow 16 pels in 1/8th pel units +#define MV_BORDER (16 << 3) // Allow 16 pels in 1/8th pel units #else -# define MV_BORDER (8 << 3) // Allow 8 pels in 1/8th pel units -#endif // CONFIG_EXT_PARTITION +#define MV_BORDER (8 << 3) // Allow 8 pels in 1/8th pel units +#endif // CONFIG_EXT_PARTITION static INLINE void clamp_mv_ref(MV *mv, int bw, int bh, const MACROBLOCKD *xd) { clamp_mv(mv, xd->mb_to_left_edge - bw * 8 - MV_BORDER, - xd->mb_to_right_edge + bw * 8 + MV_BORDER, - xd->mb_to_top_edge - bh * 8 - MV_BORDER, - xd->mb_to_bottom_edge + bh * 8 + MV_BORDER); + xd->mb_to_right_edge + bw * 8 + MV_BORDER, + xd->mb_to_top_edge - bh * 8 - MV_BORDER, + xd->mb_to_bottom_edge + bh * 8 + MV_BORDER); } // This function returns either the appropriate sub block or block's mv @@ -159,19 +268,21 @@ static INLINE void clamp_mv_ref(MV *mv, int bw, int bh, const MACROBLOCKD *xd) { static INLINE int_mv get_sub_block_mv(const MODE_INFO *candidate, int which_mv, int search_col, int block_idx) { return block_idx >= 0 && candidate->mbmi.sb_type < BLOCK_8X8 - ? candidate->bmi[idx_n_column_to_subblock[block_idx][search_col == 0]] - .as_mv[which_mv] - : candidate->mbmi.mv[which_mv]; + ? candidate + ->bmi[idx_n_column_to_subblock[block_idx][search_col == 0]] + .as_mv[which_mv] + : candidate->mbmi.mv[which_mv]; } #if CONFIG_REF_MV static INLINE int_mv get_sub_block_pred_mv(const MODE_INFO *candidate, - int which_mv, - int search_col, int block_idx) { + int which_mv, int search_col, + int block_idx) { return block_idx >= 0 && candidate->mbmi.sb_type < BLOCK_8X8 - ? candidate->bmi[idx_n_column_to_subblock[block_idx][search_col == 0]] - .pred_mv_s8[which_mv] - : candidate->mbmi.pred_mv[which_mv]; + ? candidate + ->bmi[idx_n_column_to_subblock[block_idx][search_col == 0]] + .pred_mv_s8[which_mv] + : candidate->mbmi.pred_mv[which_mv]; } #endif @@ -192,38 +303,35 @@ static INLINE int_mv scale_mv(const MB_MODE_INFO *mbmi, int ref, // This macro is used to add a motion vector mv_ref list if it isn't // already in the list. If it's the second motion vector it will also // skip all additional processing and jump to done! -#define ADD_MV_REF_LIST(mv, refmv_count, mv_ref_list, bw, bh, xd, Done) \ - do { \ - (mv_ref_list)[(refmv_count)] = (mv); \ - CLIP_IN_ADD(&(mv_ref_list)[(refmv_count)].as_mv, (bw), (bh), (xd)); \ +#define ADD_MV_REF_LIST(mv, refmv_count, mv_ref_list, bw, bh, xd, Done) \ + do { \ + (mv_ref_list)[(refmv_count)] = (mv); \ + CLIP_IN_ADD(&(mv_ref_list)[(refmv_count)].as_mv, (bw), (bh), (xd)); \ if (refmv_count && (mv_ref_list)[1].as_int != (mv_ref_list)[0].as_int) { \ - (refmv_count) = 2; \ - goto Done; \ - } \ - (refmv_count) = 1; \ + (refmv_count) = 2; \ + goto Done; \ + } \ + (refmv_count) = 1; \ } while (0) // If either reference frame is different, not INTRA, and they // are different from each other scale and add the mv to our list. #define IF_DIFF_REF_FRAME_ADD_MV(mbmi, ref_frame, ref_sign_bias, refmv_count, \ - mv_ref_list, bw, bh, xd, Done) \ - do { \ - if (is_inter_block(mbmi)) { \ - if ((mbmi)->ref_frame[0] != ref_frame) \ - ADD_MV_REF_LIST(scale_mv((mbmi), 0, ref_frame, ref_sign_bias), \ - refmv_count, mv_ref_list, bw, bh, xd, Done); \ - if (has_second_ref(mbmi) && \ - (mbmi)->ref_frame[1] != ref_frame) \ - ADD_MV_REF_LIST(scale_mv((mbmi), 1, ref_frame, ref_sign_bias), \ - refmv_count, mv_ref_list, bw, bh, xd, Done); \ - } \ + mv_ref_list, bw, bh, xd, Done) \ + do { \ + if (is_inter_block(mbmi)) { \ + if ((mbmi)->ref_frame[0] != ref_frame) \ + ADD_MV_REF_LIST(scale_mv((mbmi), 0, ref_frame, ref_sign_bias), \ + refmv_count, mv_ref_list, bw, bh, xd, Done); \ + if (has_second_ref(mbmi) && (mbmi)->ref_frame[1] != ref_frame) \ + ADD_MV_REF_LIST(scale_mv((mbmi), 1, ref_frame, ref_sign_bias), \ + refmv_count, mv_ref_list, bw, bh, xd, Done); \ + } \ } while (0) - // Checks that the given mi_row, mi_col and search point // are inside the borders of the tile. -static INLINE int is_inside(const TileInfo *const tile, - int mi_col, int mi_row, +static INLINE int is_inside(const TileInfo *const tile, int mi_col, int mi_row, const POSITION *mi_pos) { return !(mi_row + mi_pos->row < tile->mi_row_start || mi_col + mi_pos->col < tile->mi_col_start || @@ -234,10 +342,8 @@ static INLINE int is_inside(const TileInfo *const tile, static INLINE void lower_mv_precision(MV *mv, int allow_hp) { const int use_hp = allow_hp && vp10_use_mv_hp(mv); if (!use_hp) { - if (mv->row & 1) - mv->row += (mv->row > 0 ? -1 : 1); - if (mv->col & 1) - mv->col += (mv->col > 0 ? -1 : 1); + if (mv->row & 1) mv->row += (mv->row > 0 ? -1 : 1); + if (mv->col & 1) mv->col += (mv->col > 0 ? -1 : 1); } } @@ -247,12 +353,11 @@ static INLINE int vp10_nmv_ctx(const uint8_t ref_mv_count, #if CONFIG_EXT_INTER return 0; #endif - if (ref_mv_stack[0].weight > REF_CAT_LEVEL && - ref_mv_count > 0) { + if (ref_mv_stack[0].weight > REF_CAT_LEVEL && ref_mv_count > 0) { if (abs(ref_mv_stack[0].this_mv.as_mv.row - - ref_mv_stack[0].pred_mv.as_mv.row) <= 4 && - abs(ref_mv_stack[0].this_mv.as_mv.col - - ref_mv_stack[0].pred_mv.as_mv.col) <= 4) + ref_mv_stack[0].pred_mv.as_mv.row) <= 4 && + abs(ref_mv_stack[0].this_mv.as_mv.col - + ref_mv_stack[0].pred_mv.as_mv.col) <= 4) return 2; else return 1; @@ -263,7 +368,7 @@ static INLINE int vp10_nmv_ctx(const uint8_t ref_mv_count, static INLINE int8_t vp10_ref_frame_type(const MV_REFERENCE_FRAME *const rf) { if (rf[1] > INTRA_FRAME) { return TOTAL_REFS_PER_FRAME + FWD_RF_OFFSET(rf[0]) + - BWD_RF_OFFSET(rf[1]) * FWD_REFS; + BWD_RF_OFFSET(rf[1]) * FWD_REFS; } return rf[0]; @@ -271,18 +376,13 @@ static INLINE int8_t vp10_ref_frame_type(const MV_REFERENCE_FRAME *const rf) { static MV_REFERENCE_FRAME ref_frame_map[COMP_REFS][2] = { #if CONFIG_EXT_REFS - {LAST_FRAME, BWDREF_FRAME}, - {LAST2_FRAME, BWDREF_FRAME}, - {LAST3_FRAME, BWDREF_FRAME}, - {GOLDEN_FRAME, BWDREF_FRAME}, - - {LAST_FRAME, ALTREF_FRAME}, - {LAST2_FRAME, ALTREF_FRAME}, - {LAST3_FRAME, ALTREF_FRAME}, - {GOLDEN_FRAME, ALTREF_FRAME} + { LAST_FRAME, BWDREF_FRAME }, { LAST2_FRAME, BWDREF_FRAME }, + { LAST3_FRAME, BWDREF_FRAME }, { GOLDEN_FRAME, BWDREF_FRAME }, + + { LAST_FRAME, ALTREF_FRAME }, { LAST2_FRAME, ALTREF_FRAME }, + { LAST3_FRAME, ALTREF_FRAME }, { GOLDEN_FRAME, ALTREF_FRAME } #else - {LAST_FRAME, ALTREF_FRAME}, - {GOLDEN_FRAME, ALTREF_FRAME} + { LAST_FRAME, ALTREF_FRAME }, { GOLDEN_FRAME, ALTREF_FRAME } #endif }; @@ -350,8 +450,7 @@ typedef void (*find_mv_refs_sync)(void *const data, int mi_row); void vp10_find_mv_refs(const VP10_COMMON *cm, const MACROBLOCKD *xd, MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame, #if CONFIG_REF_MV - uint8_t *ref_mv_count, - CANDIDATE_MV *ref_mv_stack, + uint8_t *ref_mv_count, CANDIDATE_MV *ref_mv_stack, #if CONFIG_EXT_INTER int16_t *compound_mode_context, #endif // CONFIG_EXT_INTER @@ -363,11 +462,11 @@ void vp10_find_mv_refs(const VP10_COMMON *cm, const MACROBLOCKD *xd, // check a list of motion vectors by sad score using a number rows of pixels // above and a number cols of pixels in the left to select the one with best // score to use as ref motion vector -void vp10_find_best_ref_mvs(int allow_hp, - int_mv *mvlist, int_mv *nearest_mv, int_mv *near_mv); +void vp10_find_best_ref_mvs(int allow_hp, int_mv *mvlist, int_mv *nearest_mv, + int_mv *near_mv); -void vp10_append_sub8x8_mvs_for_idx(VP10_COMMON *cm, MACROBLOCKD *xd, - int block, int ref, int mi_row, int mi_col, +void vp10_append_sub8x8_mvs_for_idx(VP10_COMMON *cm, MACROBLOCKD *xd, int block, + int ref, int mi_row, int mi_col, #if CONFIG_REF_MV CANDIDATE_MV *ref_mv_stack, uint8_t *ref_mv_count, @@ -379,9 +478,8 @@ void vp10_append_sub8x8_mvs_for_idx(VP10_COMMON *cm, MACROBLOCKD *xd, #if CONFIG_EXT_INTER // This function keeps a mode count for a given MB/SB -void vp10_update_mv_context(const MACROBLOCKD *xd, - MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame, - int_mv *mv_ref_list, +void vp10_update_mv_context(const MACROBLOCKD *xd, MODE_INFO *mi, + MV_REFERENCE_FRAME ref_frame, int_mv *mv_ref_list, int block, int mi_row, int mi_col, int16_t *mode_context); #endif // CONFIG_EXT_INTER diff --git a/vp10/common/onyxc_int.h b/vp10/common/onyxc_int.h index f16e76a614a7d525baffba6133ba21793afdac52..91eeefdee38b66f9e80452c0a16380a3f191d41d 100644 --- a/vp10/common/onyxc_int.h +++ b/vp10/common/onyxc_int.h @@ -52,10 +52,10 @@ extern "C" { #define NUM_PING_PONG_BUFFERS 2 typedef enum { - SINGLE_REFERENCE = 0, - COMPOUND_REFERENCE = 1, + SINGLE_REFERENCE = 0, + COMPOUND_REFERENCE = 1, REFERENCE_MODE_SELECT = 2, - REFERENCE_MODES = 3, + REFERENCE_MODES = 3, } REFERENCE_MODE; typedef enum { @@ -104,9 +104,9 @@ typedef struct { } RefCntBuffer; typedef struct BufferPool { - // Protect BufferPool from being accessed by several FrameWorkers at - // the same time during frame parallel decode. - // TODO(hkuang): Try to use atomic variable instead of locking the whole pool. +// Protect BufferPool from being accessed by several FrameWorkers at +// the same time during frame parallel decode. +// TODO(hkuang): Try to use atomic variable instead of locking the whole pool. #if CONFIG_MULTITHREAD pthread_mutex_t pool_mutex; #endif @@ -124,7 +124,7 @@ typedef struct BufferPool { } BufferPool; typedef struct VP10Common { - struct vpx_internal_error_info error; + struct vpx_internal_error_info error; vpx_color_space_t color_space; int color_range; int width; @@ -169,7 +169,7 @@ typedef struct VP10Common { YV12_BUFFER_CONFIG tmp_loop_buf; #endif // CONFIG_LOOP_RESTORATION - FRAME_TYPE last_frame_type; /* last frame's frame type for motion search.*/ + FRAME_TYPE last_frame_type; /* last frame's frame type for motion search.*/ #if CONFIG_EXT_REFS // frame type of the frame before last frame FRAME_TYPE last2_frame_type; @@ -266,14 +266,14 @@ typedef struct VP10Common { // a frame decode REFRESH_FRAME_CONTEXT_MODE refresh_frame_context; - int ref_frame_sign_bias[TOTAL_REFS_PER_FRAME]; /* Two state 0, 1 */ + int ref_frame_sign_bias[TOTAL_REFS_PER_FRAME]; /* Two state 0, 1 */ struct loopfilter lf; struct segmentation seg; int frame_parallel_decode; // frame-based threading. - // Context probabilities for reference frame prediction +// Context probabilities for reference frame prediction #if CONFIG_EXT_REFS MV_REFERENCE_FRAME comp_fwd_ref[FWD_REFS]; MV_REFERENCE_FRAME comp_bwd_ref[BWD_REFS]; @@ -283,9 +283,9 @@ typedef struct VP10Common { #endif // CONFIG_EXT_REFS REFERENCE_MODE reference_mode; - FRAME_CONTEXT *fc; /* this frame entropy */ - FRAME_CONTEXT *frame_contexts; // FRAME_CONTEXTS - unsigned int frame_context_idx; /* Context to use/update */ + FRAME_CONTEXT *fc; /* this frame entropy */ + FRAME_CONTEXT *frame_contexts; // FRAME_CONTEXTS + unsigned int frame_context_idx; /* Context to use/update */ FRAME_COUNTS counts; #if CONFIG_ENTROPY @@ -345,9 +345,9 @@ typedef struct VP10Common { Global_Motion_Params global_motion[TOTAL_REFS_PER_FRAME]; #endif - BLOCK_SIZE sb_size; // Size of the superblock used for this frame - int mib_size; // Size of the superblock in units of MI blocks - int mib_size_log2; // Log 2 of above. + BLOCK_SIZE sb_size; // Size of the superblock used for this frame + int mib_size; // Size of the superblock in units of MI blocks + int mib_size_log2; // Log 2 of above. } VP10_COMMON; // TODO(hkuang): Don't need to lock the whole pool after implementing atomic @@ -369,10 +369,8 @@ static void unlock_buffer_pool(BufferPool *const pool) { } static INLINE YV12_BUFFER_CONFIG *get_ref_frame(VP10_COMMON *cm, int index) { - if (index < 0 || index >= REF_FRAMES) - return NULL; - if (cm->ref_frame_map[index] < 0) - return NULL; + if (index < 0 || index >= REF_FRAMES) return NULL; + if (cm->ref_frame_map[index] < 0) return NULL; assert(cm->ref_frame_map[index] < FRAME_BUFFERS); return &cm->buffer_pool->frame_bufs[cm->ref_frame_map[index]].buf; } @@ -388,8 +386,7 @@ static INLINE int get_free_fb(VP10_COMMON *cm) { lock_buffer_pool(cm->buffer_pool); for (i = 0; i < FRAME_BUFFERS; ++i) - if (frame_bufs[i].ref_count == 0) - break; + if (frame_bufs[i].ref_count == 0) break; if (i != FRAME_BUFFERS) { frame_bufs[i].ref_count = 1; @@ -426,7 +423,7 @@ static INLINE int frame_is_intra_only(const VP10_COMMON *const cm) { } static INLINE void vp10_init_macroblockd(VP10_COMMON *cm, MACROBLOCKD *xd, - tran_low_t *dqcoeff) { + tran_low_t *dqcoeff) { int i; for (i = 0; i < MAX_MB_PLANE; ++i) { xd->plane[i].dqcoeff = dqcoeff; @@ -472,17 +469,16 @@ static INLINE int calc_mi_size(int len) { } static INLINE void set_mi_row_col(MACROBLOCKD *xd, const TileInfo *const tile, - int mi_row, int bh, - int mi_col, int bw, + int mi_row, int bh, int mi_col, int bw, int mi_rows, int mi_cols) { - xd->mb_to_top_edge = -((mi_row * MI_SIZE) * 8); + xd->mb_to_top_edge = -((mi_row * MI_SIZE) * 8); xd->mb_to_bottom_edge = ((mi_rows - bh - mi_row) * MI_SIZE) * 8; - xd->mb_to_left_edge = -((mi_col * MI_SIZE) * 8); - xd->mb_to_right_edge = ((mi_cols - bw - mi_col) * MI_SIZE) * 8; + xd->mb_to_left_edge = -((mi_col * MI_SIZE) * 8); + xd->mb_to_right_edge = ((mi_cols - bw - mi_col) * MI_SIZE) * 8; // Are edges available for intra prediction? - xd->up_available = (mi_row > tile->mi_row_start); - xd->left_available = (mi_col > tile->mi_col_start); + xd->up_available = (mi_row > tile->mi_row_start); + xd->left_available = (mi_col > tile->mi_col_start); if (xd->up_available) { xd->above_mi = xd->mi[-xd->mi_stride]; // above_mi may be NULL in encoder's first pass. @@ -506,12 +502,10 @@ static INLINE void set_mi_row_col(MACROBLOCKD *xd, const TileInfo *const tile, #if CONFIG_REF_MV xd->is_sec_rect = 0; if (xd->n8_w < xd->n8_h) - if (mi_col & (xd->n8_h - 1)) - xd->is_sec_rect = 1; + if (mi_col & (xd->n8_h - 1)) xd->is_sec_rect = 1; if (xd->n8_w > xd->n8_h) - if (mi_row & (xd->n8_w - 1)) - xd->is_sec_rect = 1; + if (mi_row & (xd->n8_w - 1)) xd->is_sec_rect = 1; #endif } @@ -525,13 +519,12 @@ static INLINE const vpx_prob *get_y_mode_probs(const VP10_COMMON *cm, return cm->kf_y_prob[above][left]; } -static INLINE void update_partition_context(MACROBLOCKD *xd, - int mi_row, int mi_col, - BLOCK_SIZE subsize, +static INLINE void update_partition_context(MACROBLOCKD *xd, int mi_row, + int mi_col, BLOCK_SIZE subsize, BLOCK_SIZE bsize) { PARTITION_CONTEXT *const above_ctx = xd->above_seg_context + mi_col; PARTITION_CONTEXT *const left_ctx = - xd->left_seg_context + (mi_row & MAX_MIB_MASK); + xd->left_seg_context + (mi_row & MAX_MIB_MASK); #if CONFIG_EXT_PARTITION_TYPES const int bw = num_8x8_blocks_wide_lookup[bsize]; @@ -551,9 +544,8 @@ static INLINE void update_partition_context(MACROBLOCKD *xd, } #if CONFIG_EXT_PARTITION_TYPES -static INLINE void update_ext_partition_context(MACROBLOCKD *xd, - int mi_row, int mi_col, - BLOCK_SIZE subsize, +static INLINE void update_ext_partition_context(MACROBLOCKD *xd, int mi_row, + int mi_col, BLOCK_SIZE subsize, BLOCK_SIZE bsize, PARTITION_TYPE partition) { if (bsize >= BLOCK_8X8) { @@ -561,8 +553,7 @@ static INLINE void update_ext_partition_context(MACROBLOCKD *xd, BLOCK_SIZE bsize2 = get_subsize(bsize, PARTITION_SPLIT); switch (partition) { case PARTITION_SPLIT: - if (bsize != BLOCK_8X8) - break; + if (bsize != BLOCK_8X8) break; case PARTITION_NONE: case PARTITION_HORZ: case PARTITION_VERT: @@ -584,21 +575,19 @@ static INLINE void update_ext_partition_context(MACROBLOCKD *xd, update_partition_context(xd, mi_row, mi_col, subsize, subsize); update_partition_context(xd, mi_row, mi_col + hbs, bsize2, subsize); break; - default: - assert(0 && "Invalid partition type"); + default: assert(0 && "Invalid partition type"); } } } #endif // CONFIG_EXT_PARTITION_TYPES -static INLINE int partition_plane_context(const MACROBLOCKD *xd, - int mi_row, int mi_col, - BLOCK_SIZE bsize) { +static INLINE int partition_plane_context(const MACROBLOCKD *xd, int mi_row, + int mi_col, BLOCK_SIZE bsize) { const PARTITION_CONTEXT *above_ctx = xd->above_seg_context + mi_col; const PARTITION_CONTEXT *left_ctx = - xd->left_seg_context + (mi_row & MAX_MIB_MASK); + xd->left_seg_context + (mi_row & MAX_MIB_MASK); const int bsl = mi_width_log2_lookup[bsize]; - int above = (*above_ctx >> bsl) & 1 , left = (*left_ctx >> bsl) & 1; + int above = (*above_ctx >> bsl) & 1, left = (*left_ctx >> bsl) & 1; assert(b_width_log2_lookup[bsize] == b_height_log2_lookup[bsize]); assert(bsl >= 0); @@ -607,7 +596,7 @@ static INLINE int partition_plane_context(const MACROBLOCKD *xd, } static INLINE void vp10_zero_above_context(VP10_COMMON *const cm, - int mi_col_start, int mi_col_end) { + int mi_col_start, int mi_col_end) { const int width = mi_col_end - mi_col_start; const int offset_y = 2 * mi_col_start; @@ -635,12 +624,10 @@ static INLINE void vp10_zero_left_context(MACROBLOCKD *const xd) { } #if CONFIG_VAR_TX -static INLINE void set_txfm_ctx(TXFM_CONTEXT *txfm_ctx, - TX_SIZE tx_size, +static INLINE void set_txfm_ctx(TXFM_CONTEXT *txfm_ctx, TX_SIZE tx_size, int len) { int i; - for (i = 0; i < len; ++i) - txfm_ctx[i] = tx_size; + for (i = 0; i < len; ++i) txfm_ctx[i] = tx_size; } static INLINE void txfm_partition_update(TXFM_CONTEXT *above_ctx, @@ -665,8 +652,7 @@ static INLINE int txfm_partition_context(TXFM_CONTEXT *above_ctx, #endif static INLINE PARTITION_TYPE get_partition(const VP10_COMMON *const cm, - const int mi_row, - const int mi_col, + const int mi_row, const int mi_col, const BLOCK_SIZE bsize) { if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) { return PARTITION_INVALID; @@ -683,10 +669,8 @@ static INLINE PARTITION_TYPE get_partition(const VP10_COMMON *const cm, assert(cm->mi_grid_visible[offset] == &cm->mi[offset]); - if (partition != PARTITION_NONE && - bsize > BLOCK_8X8 && - mi_row + hbs < cm->mi_rows && - mi_col + hbs < cm->mi_cols) { + if (partition != PARTITION_NONE && bsize > BLOCK_8X8 && + mi_row + hbs < cm->mi_rows && mi_col + hbs < cm->mi_cols) { const BLOCK_SIZE h = get_subsize(bsize, PARTITION_HORZ_A); const BLOCK_SIZE v = get_subsize(bsize, PARTITION_VERT_A); const MB_MODE_INFO *const mbmi_right = &mi[hbs]->mbmi; diff --git a/vp10/common/pred_common.c b/vp10/common/pred_common.c index dd9be87323374d9dcf5e554a539f92ca5e3a9d12..dc0cd901b3e635a3cd04fe36568ae59bf7a666bf 100644 --- a/vp10/common/pred_common.c +++ b/vp10/common/pred_common.c @@ -17,14 +17,13 @@ // Returns a context number for the given MB prediction signal #if CONFIG_DUAL_FILTER static INTERP_FILTER get_ref_filter_type(const MODE_INFO *mi, - const MACROBLOCKD *xd, - int dir, + const MACROBLOCKD *xd, int dir, MV_REFERENCE_FRAME ref_frame) { INTERP_FILTER ref_type = SWITCHABLE_FILTERS; const MB_MODE_INFO *ref_mbmi = &mi->mbmi; int use_subpel[2] = { - has_subpel_mv_component(mi, xd, dir), - has_subpel_mv_component(mi, xd, dir + 2), + has_subpel_mv_component(mi, xd, dir), + has_subpel_mv_component(mi, xd, dir + 2), }; if (ref_mbmi->ref_frame[0] == ref_frame && use_subpel[0]) @@ -39,8 +38,8 @@ int vp10_get_pred_context_switchable_interp(const MACROBLOCKD *xd, int dir) { const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; const int ctx_offset = (mbmi->ref_frame[1] > INTRA_FRAME) * INTER_FILTER_COMP_OFFSET; - MV_REFERENCE_FRAME ref_frame = (dir < 2) ? - mbmi->ref_frame[0] : mbmi->ref_frame[1]; + MV_REFERENCE_FRAME ref_frame = + (dir < 2) ? mbmi->ref_frame[0] : mbmi->ref_frame[1]; // Note: // The mode info data structure has a one element border above and to the // left of the entries corresponding to real macroblocks. @@ -53,8 +52,8 @@ int vp10_get_pred_context_switchable_interp(const MACROBLOCKD *xd, int dir) { left_type = get_ref_filter_type(xd->mi[-1], xd, dir, ref_frame); if (xd->up_available) - above_type = get_ref_filter_type(xd->mi[-xd->mi_stride], xd, - dir, ref_frame); + above_type = + get_ref_filter_type(xd->mi[-xd->mi_stride], xd, dir, ref_frame); if (left_type == above_type) filter_type_ctx += left_type; @@ -74,11 +73,13 @@ int vp10_get_pred_context_switchable_interp(const MACROBLOCKD *xd) { // left of the entries corresponding to real macroblocks. // The prediction flags in these dummy entries are initialized to 0. const MB_MODE_INFO *const left_mbmi = xd->left_mbmi; - const int left_type = xd->left_available && is_inter_block(left_mbmi) ? - left_mbmi->interp_filter : SWITCHABLE_FILTERS; + const int left_type = xd->left_available && is_inter_block(left_mbmi) + ? left_mbmi->interp_filter + : SWITCHABLE_FILTERS; const MB_MODE_INFO *const above_mbmi = xd->above_mbmi; - const int above_type = xd->up_available && is_inter_block(above_mbmi) ? - above_mbmi->interp_filter : SWITCHABLE_FILTERS; + const int above_type = xd->up_available && is_inter_block(above_mbmi) + ? above_mbmi->interp_filter + : SWITCHABLE_FILTERS; if (left_type == above_type) return left_type; @@ -104,25 +105,16 @@ static INTRA_FILTER get_ref_intra_filter(const MB_MODE_INFO *ref_mbmi) { #else switch (ref_mbmi->interp_filter) { #endif - case EIGHTTAP_REGULAR: - ref_type = INTRA_FILTER_8TAP; - break; - case EIGHTTAP_SMOOTH: - ref_type = INTRA_FILTER_8TAP_SMOOTH; - break; - case MULTITAP_SHARP: - ref_type = INTRA_FILTER_8TAP_SHARP; - break; - case BILINEAR: - ref_type = INTRA_FILTERS; - break; - default: - break; + case EIGHTTAP_REGULAR: ref_type = INTRA_FILTER_8TAP; break; + case EIGHTTAP_SMOOTH: ref_type = INTRA_FILTER_8TAP_SMOOTH; break; + case MULTITAP_SHARP: ref_type = INTRA_FILTER_8TAP_SHARP; break; + case BILINEAR: ref_type = INTRA_FILTERS; break; + default: break; } } else { if (mode != DC_PRED && mode != TM_PRED) { - int p_angle = mode_to_angle_map[mode] + - ref_mbmi->angle_delta[0] * ANGLE_STEP; + int p_angle = + mode_to_angle_map[mode] + ref_mbmi->angle_delta[0] * ANGLE_STEP; if (vp10_is_intra_filter_switchable(p_angle)) { ref_type = ref_mbmi->intra_filter; } @@ -135,11 +127,9 @@ static INTRA_FILTER get_ref_intra_filter(const MB_MODE_INFO *ref_mbmi) { int vp10_get_pred_context_intra_interp(const MACROBLOCKD *xd) { int left_type = INTRA_FILTERS, above_type = INTRA_FILTERS; - if (xd->left_available) - left_type = get_ref_intra_filter(xd->left_mbmi); + if (xd->left_available) left_type = get_ref_intra_filter(xd->left_mbmi); - if (xd->up_available) - above_type = get_ref_intra_filter(xd->above_mbmi); + if (xd->up_available) above_type = get_ref_intra_filter(xd->above_mbmi); if (left_type == above_type) return left_type; @@ -168,8 +158,7 @@ int vp10_get_intra_inter_context(const MACROBLOCKD *xd) { if (has_above && has_left) { // both edges available const int above_intra = !is_inter_block(above_mbmi); const int left_intra = !is_inter_block(left_mbmi); - return left_intra && above_intra ? 3 - : left_intra || above_intra; + return left_intra && above_intra ? 3 : left_intra || above_intra; } else if (has_above || has_left) { // one edge available return 2 * !is_inter_block(has_above ? above_mbmi : left_mbmi); } else { @@ -315,18 +304,21 @@ int vp10_get_pred_context_comp_ref_p(const VP10_COMMON *cm, const MB_MODE_INFO *edge_mbmi = above_intra ? left_mbmi : above_mbmi; if (!has_second_ref(edge_mbmi)) // single pred (1/3) - pred_context = 1 + - 2 * (!CHECK_GOLDEN_OR_LAST3(edge_mbmi->ref_frame[0])); + pred_context = + 1 + 2 * (!CHECK_GOLDEN_OR_LAST3(edge_mbmi->ref_frame[0])); else // comp pred (1/3) - pred_context = 1 + 2 * - (!CHECK_GOLDEN_OR_LAST3(edge_mbmi->ref_frame[fwd_ref_sign_idx])); + pred_context = 1 + + 2 * (!CHECK_GOLDEN_OR_LAST3( + edge_mbmi->ref_frame[fwd_ref_sign_idx])); } else { // inter/inter const int l_sg = !has_second_ref(left_mbmi); const int a_sg = !has_second_ref(above_mbmi); - const MV_REFERENCE_FRAME frfa = a_sg ? - above_mbmi->ref_frame[0] : above_mbmi->ref_frame[fwd_ref_sign_idx]; - const MV_REFERENCE_FRAME frfl = l_sg ? - left_mbmi->ref_frame[0] : left_mbmi->ref_frame[fwd_ref_sign_idx]; + const MV_REFERENCE_FRAME frfa = + a_sg ? above_mbmi->ref_frame[0] + : above_mbmi->ref_frame[fwd_ref_sign_idx]; + const MV_REFERENCE_FRAME frfl = + l_sg ? left_mbmi->ref_frame[0] + : left_mbmi->ref_frame[fwd_ref_sign_idx]; if (frfa == frfl && CHECK_GOLDEN_OR_LAST3(frfa)) { pred_context = 0; @@ -334,8 +326,7 @@ int vp10_get_pred_context_comp_ref_p(const VP10_COMMON *cm, if ((CHECK_BWDREF_OR_ALTREF(frfa) && CHECK_LAST_OR_LAST2(frfl)) || (CHECK_BWDREF_OR_ALTREF(frfl) && CHECK_LAST_OR_LAST2(frfa))) { pred_context = 4; - } else if (CHECK_GOLDEN_OR_LAST3(frfa) || - CHECK_GOLDEN_OR_LAST3(frfl)) { + } else if (CHECK_GOLDEN_OR_LAST3(frfa) || CHECK_GOLDEN_OR_LAST3(frfl)) { pred_context = 1; } else { pred_context = 3; @@ -367,7 +358,8 @@ int vp10_get_pred_context_comp_ref_p(const VP10_COMMON *cm, pred_context = 2; } else { if (has_second_ref(edge_mbmi)) - pred_context = 4 * + pred_context = + 4 * (!CHECK_GOLDEN_OR_LAST3(edge_mbmi->ref_frame[fwd_ref_sign_idx])); else pred_context = 3 * (!CHECK_GOLDEN_OR_LAST3(edge_mbmi->ref_frame[0])); @@ -414,15 +406,17 @@ int vp10_get_pred_context_comp_ref_p1(const VP10_COMMON *cm, if (!has_second_ref(edge_mbmi)) // single pred (1/3) pred_context = 1 + 2 * (edge_mbmi->ref_frame[0] != LAST_FRAME); else // comp pred (1/3) - pred_context = 1 + 2 * (edge_mbmi->ref_frame[fwd_ref_sign_idx] - != LAST_FRAME); + pred_context = + 1 + 2 * (edge_mbmi->ref_frame[fwd_ref_sign_idx] != LAST_FRAME); } else { // inter/inter const int l_sg = !has_second_ref(left_mbmi); const int a_sg = !has_second_ref(above_mbmi); - const MV_REFERENCE_FRAME frfa = a_sg ? - above_mbmi->ref_frame[0] : above_mbmi->ref_frame[fwd_ref_sign_idx]; - const MV_REFERENCE_FRAME frfl = l_sg ? - left_mbmi->ref_frame[0] : left_mbmi->ref_frame[fwd_ref_sign_idx]; + const MV_REFERENCE_FRAME frfa = + a_sg ? above_mbmi->ref_frame[0] + : above_mbmi->ref_frame[fwd_ref_sign_idx]; + const MV_REFERENCE_FRAME frfl = + l_sg ? left_mbmi->ref_frame[0] + : left_mbmi->ref_frame[fwd_ref_sign_idx]; if (frfa == frfl && frfa == LAST_FRAME) pred_context = 0; @@ -445,14 +439,14 @@ int vp10_get_pred_context_comp_ref_p1(const VP10_COMMON *cm, else if (rfs == LAST_FRAME && frfc != LAST_FRAME) pred_context = 2; else - pred_context = 3 + - (frfc == LAST2_FRAME || CHECK_GOLDEN_OR_LAST3(rfs)); + pred_context = + 3 + (frfc == LAST2_FRAME || CHECK_GOLDEN_OR_LAST3(rfs)); } else { // comp/comp if (frfa == LAST_FRAME || frfl == LAST_FRAME) pred_context = 2; else - pred_context = 3 + - (CHECK_GOLDEN_OR_LAST3(frfa) || CHECK_GOLDEN_OR_LAST3(frfl)); + pred_context = + 3 + (CHECK_GOLDEN_OR_LAST3(frfa) || CHECK_GOLDEN_OR_LAST3(frfl)); } } } else if (above_in_image || left_in_image) { // one edge available @@ -462,8 +456,8 @@ int vp10_get_pred_context_comp_ref_p1(const VP10_COMMON *cm, pred_context = 2; } else { if (has_second_ref(edge_mbmi)) { - pred_context = 4 * - (edge_mbmi->ref_frame[fwd_ref_sign_idx] != LAST_FRAME); + pred_context = + 4 * (edge_mbmi->ref_frame[fwd_ref_sign_idx] != LAST_FRAME); } else { if (edge_mbmi->ref_frame[0] == LAST_FRAME) pred_context = 0; @@ -513,15 +507,17 @@ int vp10_get_pred_context_comp_ref_p2(const VP10_COMMON *cm, if (!has_second_ref(edge_mbmi)) // single pred (1/3) pred_context = 1 + 2 * (edge_mbmi->ref_frame[0] != GOLDEN_FRAME); else // comp pred (1/3) - pred_context = 1 + 2 * (edge_mbmi->ref_frame[fwd_ref_sign_idx] - != GOLDEN_FRAME); + pred_context = + 1 + 2 * (edge_mbmi->ref_frame[fwd_ref_sign_idx] != GOLDEN_FRAME); } else { // inter/inter const int l_sg = !has_second_ref(left_mbmi); const int a_sg = !has_second_ref(above_mbmi); - const MV_REFERENCE_FRAME frfa = a_sg ? - above_mbmi->ref_frame[0] : above_mbmi->ref_frame[fwd_ref_sign_idx]; - const MV_REFERENCE_FRAME frfl = l_sg ? - left_mbmi->ref_frame[0] : left_mbmi->ref_frame[fwd_ref_sign_idx]; + const MV_REFERENCE_FRAME frfa = + a_sg ? above_mbmi->ref_frame[0] + : above_mbmi->ref_frame[fwd_ref_sign_idx]; + const MV_REFERENCE_FRAME frfl = + l_sg ? left_mbmi->ref_frame[0] + : left_mbmi->ref_frame[fwd_ref_sign_idx]; if (frfa == frfl && frfa == GOLDEN_FRAME) pred_context = 0; @@ -544,14 +540,13 @@ int vp10_get_pred_context_comp_ref_p2(const VP10_COMMON *cm, else if (rfs == GOLDEN_FRAME && frfc != GOLDEN_FRAME) pred_context = 2; else - pred_context = 3 + - (frfc == LAST3_FRAME || CHECK_LAST_OR_LAST2(rfs)); + pred_context = 3 + (frfc == LAST3_FRAME || CHECK_LAST_OR_LAST2(rfs)); } else { // comp/comp if (frfa == GOLDEN_FRAME || frfl == GOLDEN_FRAME) pred_context = 2; else - pred_context = 3 + - (CHECK_LAST_OR_LAST2(frfa) || CHECK_LAST_OR_LAST2(frfl)); + pred_context = + 3 + (CHECK_LAST_OR_LAST2(frfa) || CHECK_LAST_OR_LAST2(frfl)); } } } else if (above_in_image || left_in_image) { // one edge available @@ -561,8 +556,8 @@ int vp10_get_pred_context_comp_ref_p2(const VP10_COMMON *cm, pred_context = 2; } else { if (has_second_ref(edge_mbmi)) { - pred_context = 4 * - (edge_mbmi->ref_frame[fwd_ref_sign_idx] != GOLDEN_FRAME); + pred_context = + 4 * (edge_mbmi->ref_frame[fwd_ref_sign_idx] != GOLDEN_FRAME); } else { if (edge_mbmi->ref_frame[0] == GOLDEN_FRAME) pred_context = 0; @@ -607,21 +602,24 @@ int vp10_get_pred_context_comp_bwdref_p(const VP10_COMMON *cm, if (!has_second_ref(edge_mbmi)) // single pred (1/3) pred_context = 1 + 2 * (edge_mbmi->ref_frame[1] != cm->comp_bwd_ref[1]); else // comp pred (1/3) - pred_context = 1 + 2 * (edge_mbmi->ref_frame[bwd_ref_sign_idx] - != cm->comp_bwd_ref[1]); + pred_context = + 1 + + 2 * (edge_mbmi->ref_frame[bwd_ref_sign_idx] != cm->comp_bwd_ref[1]); } else { // inter/inter const int l_comp = has_second_ref(left_mbmi); const int a_comp = has_second_ref(above_mbmi); - const MV_REFERENCE_FRAME l_brf = l_comp ? - left_mbmi->ref_frame[bwd_ref_sign_idx] : NONE; - const MV_REFERENCE_FRAME a_brf = a_comp ? - above_mbmi->ref_frame[bwd_ref_sign_idx] : NONE; + const MV_REFERENCE_FRAME l_brf = + l_comp ? left_mbmi->ref_frame[bwd_ref_sign_idx] : NONE; + const MV_REFERENCE_FRAME a_brf = + a_comp ? above_mbmi->ref_frame[bwd_ref_sign_idx] : NONE; - const MV_REFERENCE_FRAME l_frf = !l_comp ? - left_mbmi->ref_frame[0] : left_mbmi->ref_frame[fwd_ref_sign_idx]; - const MV_REFERENCE_FRAME a_frf = !a_comp ? - above_mbmi->ref_frame[0] : above_mbmi->ref_frame[fwd_ref_sign_idx]; + const MV_REFERENCE_FRAME l_frf = + !l_comp ? left_mbmi->ref_frame[0] + : left_mbmi->ref_frame[fwd_ref_sign_idx]; + const MV_REFERENCE_FRAME a_frf = + !a_comp ? above_mbmi->ref_frame[0] + : above_mbmi->ref_frame[fwd_ref_sign_idx]; if (l_comp && a_comp) { // comp/comp if (l_brf == a_brf && l_brf == cm->comp_bwd_ref[1]) { @@ -643,8 +641,8 @@ int vp10_get_pred_context_comp_bwdref_p(const VP10_COMMON *cm, } else if (l_frf == a_frf) { pred_context = 3; } else { - assert(l_frf != a_frf && - l_frf != cm->comp_bwd_ref[1] && a_frf != cm->comp_bwd_ref[1]); + assert(l_frf != a_frf && l_frf != cm->comp_bwd_ref[1] && + a_frf != cm->comp_bwd_ref[1]); pred_context = 4; } } else { // comp/single @@ -672,8 +670,8 @@ int vp10_get_pred_context_comp_bwdref_p(const VP10_COMMON *cm, pred_context = 2; } else { if (has_second_ref(edge_mbmi)) { - pred_context = 4 * (edge_mbmi->ref_frame[bwd_ref_sign_idx] - != cm->comp_bwd_ref[1]); + pred_context = + 4 * (edge_mbmi->ref_frame[bwd_ref_sign_idx] != cm->comp_bwd_ref[1]); } else { pred_context = 3 * (edge_mbmi->ref_frame[0] != cm->comp_bwd_ref[1]); } @@ -716,15 +714,15 @@ int vp10_get_pred_context_comp_ref_p(const VP10_COMMON *cm, if (!has_second_ref(edge_mbmi)) // single pred (1/3) pred_context = 1 + 2 * (edge_mbmi->ref_frame[0] != cm->comp_var_ref[1]); else // comp pred (1/3) - pred_context = 1 + 2 * (edge_mbmi->ref_frame[var_ref_idx] - != cm->comp_var_ref[1]); + pred_context = + 1 + 2 * (edge_mbmi->ref_frame[var_ref_idx] != cm->comp_var_ref[1]); } else { // inter/inter const int l_sg = !has_second_ref(left_mbmi); const int a_sg = !has_second_ref(above_mbmi); - const MV_REFERENCE_FRAME vrfa = a_sg ? above_mbmi->ref_frame[0] - : above_mbmi->ref_frame[var_ref_idx]; - const MV_REFERENCE_FRAME vrfl = l_sg ? left_mbmi->ref_frame[0] - : left_mbmi->ref_frame[var_ref_idx]; + const MV_REFERENCE_FRAME vrfa = + a_sg ? above_mbmi->ref_frame[0] : above_mbmi->ref_frame[var_ref_idx]; + const MV_REFERENCE_FRAME vrfl = + l_sg ? left_mbmi->ref_frame[0] : left_mbmi->ref_frame[var_ref_idx]; if (vrfa == vrfl && cm->comp_var_ref[1] == vrfa) { pred_context = 0; @@ -758,8 +756,8 @@ int vp10_get_pred_context_comp_ref_p(const VP10_COMMON *cm, pred_context = 2; } else { if (has_second_ref(edge_mbmi)) - pred_context = 4 * (edge_mbmi->ref_frame[var_ref_idx] - != cm->comp_var_ref[1]); + pred_context = + 4 * (edge_mbmi->ref_frame[var_ref_idx] != cm->comp_var_ref[1]); else pred_context = 3 * (edge_mbmi->ref_frame[0] != cm->comp_var_ref[1]); } @@ -806,7 +804,7 @@ int vp10_get_pred_context_single_ref_p1(const MACROBLOCKD *xd) { !CHECK_BWDREF_OR_ALTREF(edge_mbmi->ref_frame[1])); } else { // inter/inter const int above_has_second = has_second_ref(above_mbmi); - const int left_has_second = has_second_ref(left_mbmi); + const int left_has_second = has_second_ref(left_mbmi); const MV_REFERENCE_FRAME above0 = above_mbmi->ref_frame[0]; const MV_REFERENCE_FRAME above1 = above_mbmi->ref_frame[1]; @@ -827,8 +825,8 @@ int vp10_get_pred_context_single_ref_p1(const MACROBLOCKD *xd) { pred_context = 3 + (!CHECK_BWDREF_OR_ALTREF(crf1) || !CHECK_BWDREF_OR_ALTREF(crf2)); else - pred_context = !CHECK_BWDREF_OR_ALTREF(crf1) || - !CHECK_BWDREF_OR_ALTREF(crf2); + pred_context = + !CHECK_BWDREF_OR_ALTREF(crf1) || !CHECK_BWDREF_OR_ALTREF(crf2); } else { pred_context = 2 * (!CHECK_BWDREF_OR_ALTREF(above0)) + 2 * (!CHECK_BWDREF_OR_ALTREF(left0)); @@ -883,12 +881,13 @@ int vp10_get_pred_context_single_ref_p2(const MACROBLOCKD *xd) { else pred_context = 4 * (edge_mbmi->ref_frame[0] == BWDREF_FRAME); } else { - pred_context = 1 + 2 * (edge_mbmi->ref_frame[0] == BWDREF_FRAME || - edge_mbmi->ref_frame[1] == BWDREF_FRAME); + pred_context = 1 + + 2 * (edge_mbmi->ref_frame[0] == BWDREF_FRAME || + edge_mbmi->ref_frame[1] == BWDREF_FRAME); } } else { // inter/inter const int above_has_second = has_second_ref(above_mbmi); - const int left_has_second = has_second_ref(left_mbmi); + const int left_has_second = has_second_ref(left_mbmi); const MV_REFERENCE_FRAME above0 = above_mbmi->ref_frame[0]; const MV_REFERENCE_FRAME above1 = above_mbmi->ref_frame[1]; const MV_REFERENCE_FRAME left0 = left_mbmi->ref_frame[0]; @@ -896,10 +895,9 @@ int vp10_get_pred_context_single_ref_p2(const MACROBLOCKD *xd) { if (above_has_second && left_has_second) { if (above0 == left0 && above1 == left1) - pred_context = 3 * (above0 == BWDREF_FRAME || - above1 == BWDREF_FRAME || - left0 == BWDREF_FRAME || - left1 == BWDREF_FRAME); + pred_context = + 3 * (above0 == BWDREF_FRAME || above1 == BWDREF_FRAME || + left0 == BWDREF_FRAME || left1 == BWDREF_FRAME); else pred_context = 2; } else if (above_has_second || left_has_second) { @@ -922,8 +920,8 @@ int vp10_get_pred_context_single_ref_p2(const MACROBLOCKD *xd) { !CHECK_BWDREF_OR_ALTREF(above0) ? left0 : above0; pred_context = 4 * (edge0 == BWDREF_FRAME); } else { - pred_context = 2 * (above0 == BWDREF_FRAME) + - 2 * (left0 == BWDREF_FRAME); + pred_context = + 2 * (above0 == BWDREF_FRAME) + 2 * (left0 == BWDREF_FRAME); } } } @@ -978,12 +976,12 @@ int vp10_get_pred_context_single_ref_p3(const MACROBLOCKD *xd) { pred_context = 4 * CHECK_LAST_OR_LAST2(edge_mbmi->ref_frame[0]); } else { pred_context = 1 + - 2 * (CHECK_LAST_OR_LAST2(edge_mbmi->ref_frame[0]) || - CHECK_LAST_OR_LAST2(edge_mbmi->ref_frame[1])); + 2 * (CHECK_LAST_OR_LAST2(edge_mbmi->ref_frame[0]) || + CHECK_LAST_OR_LAST2(edge_mbmi->ref_frame[1])); } } else { // inter/inter const int above_has_second = has_second_ref(above_mbmi); - const int left_has_second = has_second_ref(left_mbmi); + const int left_has_second = has_second_ref(left_mbmi); const MV_REFERENCE_FRAME above0 = above_mbmi->ref_frame[0]; const MV_REFERENCE_FRAME above1 = above_mbmi->ref_frame[1]; const MV_REFERENCE_FRAME left0 = left_mbmi->ref_frame[0]; @@ -991,10 +989,9 @@ int vp10_get_pred_context_single_ref_p3(const MACROBLOCKD *xd) { if (above_has_second && left_has_second) { if (above0 == left0 && above1 == left1) - pred_context = 3 * (CHECK_LAST_OR_LAST2(above0) || - CHECK_LAST_OR_LAST2(above1) || - CHECK_LAST_OR_LAST2(left0) || - CHECK_LAST_OR_LAST2(left1)); + pred_context = + 3 * (CHECK_LAST_OR_LAST2(above0) || CHECK_LAST_OR_LAST2(above1) || + CHECK_LAST_OR_LAST2(left0) || CHECK_LAST_OR_LAST2(left1)); else pred_context = 2; } else if (above_has_second || left_has_second) { @@ -1003,14 +1000,14 @@ int vp10_get_pred_context_single_ref_p3(const MACROBLOCKD *xd) { const MV_REFERENCE_FRAME crf2 = above_has_second ? above1 : left1; if (CHECK_LAST_OR_LAST2(rfs)) - pred_context = 3 + (CHECK_LAST_OR_LAST2(crf1) || - CHECK_LAST_OR_LAST2(crf2)); + pred_context = + 3 + (CHECK_LAST_OR_LAST2(crf1) || CHECK_LAST_OR_LAST2(crf2)); else if (CHECK_GOLDEN_OR_LAST3(rfs)) - pred_context = (CHECK_LAST_OR_LAST2(crf1) || - CHECK_LAST_OR_LAST2(crf2)); + pred_context = + (CHECK_LAST_OR_LAST2(crf1) || CHECK_LAST_OR_LAST2(crf2)); else - pred_context = 1 + 2 * (CHECK_LAST_OR_LAST2(crf1) || - CHECK_LAST_OR_LAST2(crf2)); + pred_context = + 1 + 2 * (CHECK_LAST_OR_LAST2(crf1) || CHECK_LAST_OR_LAST2(crf2)); } else { if (CHECK_BWDREF_OR_ALTREF(above0) && CHECK_BWDREF_OR_ALTREF(left0)) { pred_context = 2 + (above0 == left0); @@ -1020,8 +1017,8 @@ int vp10_get_pred_context_single_ref_p3(const MACROBLOCKD *xd) { CHECK_BWDREF_OR_ALTREF(above0) ? left0 : above0; pred_context = 4 * CHECK_LAST_OR_LAST2(edge0); } else { - pred_context = 2 * CHECK_LAST_OR_LAST2(above0) + - 2 * CHECK_LAST_OR_LAST2(left0); + pred_context = + 2 * CHECK_LAST_OR_LAST2(above0) + 2 * CHECK_LAST_OR_LAST2(left0); } } } @@ -1076,12 +1073,12 @@ int vp10_get_pred_context_single_ref_p4(const MACROBLOCKD *xd) { pred_context = 4 * (edge_mbmi->ref_frame[0] == LAST_FRAME); } else { pred_context = 1 + - 2 * (edge_mbmi->ref_frame[0] == LAST_FRAME || - edge_mbmi->ref_frame[1] == LAST_FRAME); + 2 * (edge_mbmi->ref_frame[0] == LAST_FRAME || + edge_mbmi->ref_frame[1] == LAST_FRAME); } } else { // inter/inter const int above_has_second = has_second_ref(above_mbmi); - const int left_has_second = has_second_ref(left_mbmi); + const int left_has_second = has_second_ref(left_mbmi); const MV_REFERENCE_FRAME above0 = above_mbmi->ref_frame[0]; const MV_REFERENCE_FRAME above1 = above_mbmi->ref_frame[1]; const MV_REFERENCE_FRAME left0 = left_mbmi->ref_frame[0]; @@ -1105,8 +1102,7 @@ int vp10_get_pred_context_single_ref_p4(const MACROBLOCKD *xd) { else pred_context = 1 + 2 * (crf1 == LAST_FRAME || crf2 == LAST_FRAME); } else { - if (!CHECK_LAST_OR_LAST2(above0) && - !CHECK_LAST_OR_LAST2(left0)) { + if (!CHECK_LAST_OR_LAST2(above0) && !CHECK_LAST_OR_LAST2(left0)) { pred_context = 2 + (above0 == left0); } else if (!CHECK_LAST_OR_LAST2(above0) || !CHECK_LAST_OR_LAST2(left0)) { @@ -1169,12 +1165,12 @@ int vp10_get_pred_context_single_ref_p5(const MACROBLOCKD *xd) { pred_context = 4 * (edge_mbmi->ref_frame[0] == LAST3_FRAME); } else { pred_context = 1 + - 2 * (edge_mbmi->ref_frame[0] == LAST3_FRAME || - edge_mbmi->ref_frame[1] == LAST3_FRAME); + 2 * (edge_mbmi->ref_frame[0] == LAST3_FRAME || + edge_mbmi->ref_frame[1] == LAST3_FRAME); } } else { // inter/inter const int above_has_second = has_second_ref(above_mbmi); - const int left_has_second = has_second_ref(left_mbmi); + const int left_has_second = has_second_ref(left_mbmi); const MV_REFERENCE_FRAME above0 = above_mbmi->ref_frame[0]; const MV_REFERENCE_FRAME above1 = above_mbmi->ref_frame[1]; const MV_REFERENCE_FRAME left0 = left_mbmi->ref_frame[0]; @@ -1198,8 +1194,7 @@ int vp10_get_pred_context_single_ref_p5(const MACROBLOCKD *xd) { else pred_context = 1 + 2 * (crf1 == LAST3_FRAME || crf2 == LAST3_FRAME); } else { - if (!CHECK_GOLDEN_OR_LAST3(above0) && - !CHECK_GOLDEN_OR_LAST3(left0)) { + if (!CHECK_GOLDEN_OR_LAST3(above0) && !CHECK_GOLDEN_OR_LAST3(left0)) { pred_context = 2 + (above0 == left0); } else if (!CHECK_GOLDEN_OR_LAST3(above0) || !CHECK_GOLDEN_OR_LAST3(left0)) { @@ -1207,8 +1202,8 @@ int vp10_get_pred_context_single_ref_p5(const MACROBLOCKD *xd) { !CHECK_GOLDEN_OR_LAST3(above0) ? left0 : above0; pred_context = 4 * (edge0 == LAST3_FRAME); } else { - pred_context = 2 * (above0 == LAST3_FRAME) + - 2 * (left0 == LAST3_FRAME); + pred_context = + 2 * (above0 == LAST3_FRAME) + 2 * (left0 == LAST3_FRAME); } } } @@ -1325,8 +1320,9 @@ int vp10_get_pred_context_single_ref_p2(const MACROBLOCKD *xd) { else pred_context = 4 * (edge_mbmi->ref_frame[0] == GOLDEN_FRAME); } else { - pred_context = 1 + 2 * (edge_mbmi->ref_frame[0] == GOLDEN_FRAME || - edge_mbmi->ref_frame[1] == GOLDEN_FRAME); + pred_context = 1 + + 2 * (edge_mbmi->ref_frame[0] == GOLDEN_FRAME || + edge_mbmi->ref_frame[1] == GOLDEN_FRAME); } } else { // inter/inter const int above_has_second = has_second_ref(above_mbmi); @@ -1338,10 +1334,9 @@ int vp10_get_pred_context_single_ref_p2(const MACROBLOCKD *xd) { if (above_has_second && left_has_second) { if (above0 == left0 && above1 == left1) - pred_context = 3 * (above0 == GOLDEN_FRAME || - above1 == GOLDEN_FRAME || - left0 == GOLDEN_FRAME || - left1 == GOLDEN_FRAME); + pred_context = + 3 * (above0 == GOLDEN_FRAME || above1 == GOLDEN_FRAME || + left0 == GOLDEN_FRAME || left1 == GOLDEN_FRAME); else pred_context = 2; } else if (above_has_second || left_has_second) { @@ -1359,8 +1354,8 @@ int vp10_get_pred_context_single_ref_p2(const MACROBLOCKD *xd) { if (above0 == LAST_FRAME && left0 == LAST_FRAME) { pred_context = 3; } else if (above0 == LAST_FRAME || left0 == LAST_FRAME) { - const MV_REFERENCE_FRAME edge0 = (above0 == LAST_FRAME) ? left0 - : above0; + const MV_REFERENCE_FRAME edge0 = + (above0 == LAST_FRAME) ? left0 : above0; pred_context = 4 * (edge0 == GOLDEN_FRAME); } else { pred_context = diff --git a/vp10/common/pred_common.h b/vp10/common/pred_common.h index 9b73eb27e51599f2ba761a6f2e537b117b980f20..63537b7bac7f298406db7b86854b995d41f72e80 100644 --- a/vp10/common/pred_common.h +++ b/vp10/common/pred_common.h @@ -20,8 +20,8 @@ extern "C" { #endif static INLINE int get_segment_id(const VP10_COMMON *cm, - const uint8_t *segment_ids, - BLOCK_SIZE bsize, int mi_row, int mi_col) { + const uint8_t *segment_ids, BLOCK_SIZE bsize, + int mi_row, int mi_col) { const int mi_offset = mi_row * cm->mi_cols + mi_col; const int bw = num_8x8_blocks_wide_lookup[bsize]; const int bh = num_8x8_blocks_high_lookup[bsize]; @@ -41,8 +41,8 @@ static INLINE int get_segment_id(const VP10_COMMON *cm, static INLINE int vp10_get_pred_context_seg_id(const MACROBLOCKD *xd) { const MODE_INFO *const above_mi = xd->above_mi; const MODE_INFO *const left_mi = xd->left_mi; - const int above_sip = (above_mi != NULL) ? - above_mi->mbmi.seg_id_predicted : 0; + const int above_sip = + (above_mi != NULL) ? above_mi->mbmi.seg_id_predicted : 0; const int left_sip = (left_mi != NULL) ? left_mi->mbmi.seg_id_predicted : 0; return above_sip + left_sip; @@ -62,7 +62,7 @@ static INLINE int vp10_get_skip_context(const MACROBLOCKD *xd) { } static INLINE vpx_prob vp10_get_skip_prob(const VP10_COMMON *cm, - const MACROBLOCKD *xd) { + const MACROBLOCKD *xd) { return cm->fc->skip_probs[vp10_get_skip_context(xd)]; } @@ -79,7 +79,7 @@ int vp10_get_pred_context_intra_interp(const MACROBLOCKD *xd); int vp10_get_intra_inter_context(const MACROBLOCKD *xd); static INLINE vpx_prob vp10_get_intra_inter_prob(const VP10_COMMON *cm, - const MACROBLOCKD *xd) { + const MACROBLOCKD *xd) { return cm->fc->intra_inter_prob[vp10_get_intra_inter_context(xd)]; } @@ -92,7 +92,7 @@ static INLINE vpx_prob vp10_get_reference_mode_prob(const VP10_COMMON *cm, } int vp10_get_pred_context_comp_ref_p(const VP10_COMMON *cm, - const MACROBLOCKD *xd); + const MACROBLOCKD *xd); static INLINE vpx_prob vp10_get_pred_prob_comp_ref_p(const VP10_COMMON *cm, const MACROBLOCKD *xd) { @@ -105,7 +105,7 @@ int vp10_get_pred_context_comp_ref_p1(const VP10_COMMON *cm, const MACROBLOCKD *xd); static INLINE vpx_prob vp10_get_pred_prob_comp_ref_p1(const VP10_COMMON *cm, - const MACROBLOCKD *xd) { + const MACROBLOCKD *xd) { const int pred_context = vp10_get_pred_context_comp_ref_p1(cm, xd); return cm->fc->comp_ref_prob[pred_context][1]; } @@ -114,7 +114,7 @@ int vp10_get_pred_context_comp_ref_p2(const VP10_COMMON *cm, const MACROBLOCKD *xd); static INLINE vpx_prob vp10_get_pred_prob_comp_ref_p2(const VP10_COMMON *cm, - const MACROBLOCKD *xd) { + const MACROBLOCKD *xd) { const int pred_context = vp10_get_pred_context_comp_ref_p2(cm, xd); return cm->fc->comp_ref_prob[pred_context][2]; } @@ -177,16 +177,16 @@ static INLINE int get_tx_size_context(const MACROBLOCKD *xd) { const MB_MODE_INFO *const left_mbmi = xd->left_mbmi; const int has_above = xd->up_available; const int has_left = xd->left_available; - int above_ctx = (has_above && !above_mbmi->skip) ? - (int)txsize_sqr_map[above_mbmi->tx_size] : max_tx_size; - int left_ctx = (has_left && !left_mbmi->skip) ? - (int)txsize_sqr_map[left_mbmi->tx_size] : max_tx_size; + int above_ctx = (has_above && !above_mbmi->skip) + ? (int)txsize_sqr_map[above_mbmi->tx_size] + : max_tx_size; + int left_ctx = (has_left && !left_mbmi->skip) + ? (int)txsize_sqr_map[left_mbmi->tx_size] + : max_tx_size; assert(xd->mi[0]->mbmi.sb_type >= BLOCK_8X8); - if (!has_left) - left_ctx = above_ctx; + if (!has_left) left_ctx = above_ctx; - if (!has_above) - above_ctx = left_ctx; + if (!has_above) above_ctx = left_ctx; return (above_ctx + left_ctx) > max_tx_size; } @@ -209,8 +209,7 @@ static void update_tx_counts(VP10_COMMON *cm, MACROBLOCKD *xd, if (xd->mb_to_right_edge < 0) max_blocks_wide += xd->mb_to_right_edge >> (5 + pd->subsampling_x); - if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) - return; + if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return; if (tx_size == plane_tx_size) { ++xd->counts->tx_size[max_tx_size - TX_8X8][ctx][tx_size]; @@ -226,16 +225,14 @@ static void update_tx_counts(VP10_COMMON *cm, MACROBLOCKD *xd, const int offsetr = blk_row + ((i >> 1) << bsl); const int offsetc = blk_col + ((i & 0x01) << bsl); - if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) - continue; - update_tx_counts(cm, xd, mbmi, plane_bsize, - tx_size - 1, offsetr, offsetc, max_tx_size, ctx); + if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) continue; + update_tx_counts(cm, xd, mbmi, plane_bsize, tx_size - 1, offsetr, offsetc, + max_tx_size, ctx); } } } -static INLINE void inter_block_tx_count_update(VP10_COMMON *cm, - MACROBLOCKD *xd, +static INLINE void inter_block_tx_count_update(VP10_COMMON *cm, MACROBLOCKD *xd, MB_MODE_INFO *mbmi, BLOCK_SIZE plane_bsize, int ctx) { diff --git a/vp10/common/quant_common.c b/vp10/common/quant_common.c index a1ce23e8bb96c0cd65158c640fb67c70de6a8e55..49e2a4bbaf6fe23d85647911b122b8012312f1f9 100644 --- a/vp10/common/quant_common.c +++ b/vp10/common/quant_common.c @@ -26,73 +26,70 @@ // TODO(sarahparker, debargha): Optimize these tables typedef struct { - uint8_t knots[NUQ_KNOTS]; // offsets - uint8_t doff; // dequantization + uint8_t knots[NUQ_KNOTS]; // offsets + uint8_t doff; // dequantization } qprofile_type; static const qprofile_type nuq_lossless[COEF_BANDS] = { - {{64, 128, 128}, 0}, // dc, band 0 - {{64, 128, 128}, 0}, // band 1 - {{64, 128, 128}, 0}, // band 2 - {{64, 128, 128}, 0}, // band 3 - {{64, 128, 128}, 0}, // band 4 - {{64, 128, 128}, 0}, // band 5 + { { 64, 128, 128 }, 0 }, // dc, band 0 + { { 64, 128, 128 }, 0 }, // band 1 + { { 64, 128, 128 }, 0 }, // band 2 + { { 64, 128, 128 }, 0 }, // band 3 + { { 64, 128, 128 }, 0 }, // band 4 + { { 64, 128, 128 }, 0 }, // band 5 }; static const qprofile_type nuq[QUANT_PROFILES][QUANT_RANGES][COEF_BANDS] = { - { + { { + { { 64, 128, 128 }, 0 }, // dc, band 0 + { { 64, 128, 128 }, 0 }, // band 1 + { { 64, 128, 128 }, 0 }, // band 2 + { { 64, 128, 128 }, 0 }, // band 3 + { { 64, 128, 128 }, 0 }, // band 4 + { { 64, 128, 128 }, 0 } // band 5 + }, { - {{64, 128, 128}, 0}, // dc, band 0 - {{64, 128, 128}, 0}, // band 1 - {{64, 128, 128}, 0}, // band 2 - {{64, 128, 128}, 0}, // band 3 - {{64, 128, 128}, 0}, // band 4 - {{64, 128, 128}, 0} // band 5 - }, { - {{64, 128, 128}, 0}, // dc, band 0 - {{64, 128, 128}, 0}, // band 1 - {{64, 128, 128}, 0}, // band 2 - {{64, 128, 128}, 0}, // band 3 - {{64, 128, 128}, 0}, // band 4 - {{64, 128, 128}, 0} // band 5 - } - }, + { { 64, 128, 128 }, 0 }, // dc, band 0 + { { 64, 128, 128 }, 0 }, // band 1 + { { 64, 128, 128 }, 0 }, // band 2 + { { 64, 128, 128 }, 0 }, // band 3 + { { 64, 128, 128 }, 0 }, // band 4 + { { 64, 128, 128 }, 0 } // band 5 + } }, #if QUANT_PROFILES > 1 - { + { { + { { 64, 128, 128 }, 0 }, // dc, band 0 + { { 64, 128, 128 }, 0 }, // band 1 + { { 64, 128, 128 }, 0 }, // band 2 + { { 64, 128, 128 }, 0 }, // band 3 + { { 64, 128, 128 }, 0 }, // band 4 + { { 64, 128, 128 }, 0 } // band 5 + }, { - {{64, 128, 128}, 0}, // dc, band 0 - {{64, 128, 128}, 0}, // band 1 - {{64, 128, 128}, 0}, // band 2 - {{64, 128, 128}, 0}, // band 3 - {{64, 128, 128}, 0}, // band 4 - {{64, 128, 128}, 0} // band 5 - }, { - {{64, 128, 128}, 0}, // dc, band 0 - {{64, 128, 128}, 0}, // band 1 - {{64, 128, 128}, 0}, // band 2 - {{64, 128, 128}, 0}, // band 3 - {{64, 128, 128}, 0}, // band 4 - {{64, 128, 128}, 0} // band 5 - } - }, + { { 64, 128, 128 }, 0 }, // dc, band 0 + { { 64, 128, 128 }, 0 }, // band 1 + { { 64, 128, 128 }, 0 }, // band 2 + { { 64, 128, 128 }, 0 }, // band 3 + { { 64, 128, 128 }, 0 }, // band 4 + { { 64, 128, 128 }, 0 } // band 5 + } }, #if QUANT_PROFILES > 2 - { + { { + { { 64, 128, 128 }, 0 }, // dc, band 0 + { { 64, 128, 128 }, 0 }, // band 1 + { { 64, 128, 128 }, 0 }, // band 2 + { { 64, 128, 128 }, 0 }, // band 3 + { { 64, 128, 128 }, 0 }, // band 4 + { { 64, 128, 128 }, 0 }, // band 5 + }, { - {{64, 128, 128}, 0}, // dc, band 0 - {{64, 128, 128}, 0}, // band 1 - {{64, 128, 128}, 0}, // band 2 - {{64, 128, 128}, 0}, // band 3 - {{64, 128, 128}, 0}, // band 4 - {{64, 128, 128}, 0}, // band 5 - }, { - {{64, 128, 128}, 0}, // dc, band 0 - {{64, 128, 128}, 0}, // band 1 - {{64, 128, 128}, 0}, // band 2 - {{64, 128, 128}, 0}, // band 3 - {{64, 128, 128}, 0}, // band 4 - {{64, 128, 128}, 0}, // band 5 - } - } + { { 64, 128, 128 }, 0 }, // dc, band 0 + { { 64, 128, 128 }, 0 }, // band 1 + { { 64, 128, 128 }, 0 }, // band 2 + { { 64, 128, 128 }, 0 }, // band 3 + { { 64, 128, 128 }, 0 }, // band 4 + { { 64, 128, 128 }, 0 }, // band 5 + } } #endif // QUANT_PROFILES > 2 #endif // QUANT_PROFILES > 1 }; @@ -109,8 +106,7 @@ static const uint8_t *get_nuq_knots(int qindex, int band, int q_profile) { return nuq[q_profile][qrange_from_qindex(qindex)][band].knots; } -static INLINE int16_t quant_to_doff_fixed(int qindex, int band, - int q_profile) { +static INLINE int16_t quant_to_doff_fixed(int qindex, int band, int q_profile) { if (!qindex) return nuq_lossless[band].doff; else @@ -124,15 +120,13 @@ static INLINE void get_cuml_bins_nuq(int q, int qindex, int band, int16_t cuml_knots[NUQ_KNOTS]; int i; cuml_knots[0] = knots[0]; - for (i = 1; i < NUQ_KNOTS; ++i) - cuml_knots[i] = cuml_knots[i - 1] + knots[i]; + for (i = 1; i < NUQ_KNOTS; ++i) cuml_knots[i] = cuml_knots[i - 1] + knots[i]; for (i = 0; i < NUQ_KNOTS; ++i) cuml_bins[i] = ROUND_POWER_OF_TWO(cuml_knots[i] * q, 7); } -void vp10_get_dequant_val_nuq(int q, int qindex, int band, - tran_low_t *dq, tran_low_t *cuml_bins, - int q_profile) { +void vp10_get_dequant_val_nuq(int q, int qindex, int band, tran_low_t *dq, + tran_low_t *cuml_bins, int q_profile) { const uint8_t *knots = get_nuq_knots(qindex, band, q_profile); tran_low_t cuml_bins_[NUQ_KNOTS], *cuml_bins_ptr; tran_low_t doff; @@ -143,8 +137,8 @@ void vp10_get_dequant_val_nuq(int q, int qindex, int band, for (i = 1; i < NUQ_KNOTS; ++i) { doff = quant_to_doff_fixed(qindex, band, q_profile); doff = ROUND_POWER_OF_TWO(doff * knots[i], 7); - dq[i] = cuml_bins_ptr[i - 1] + - ROUND_POWER_OF_TWO((knots[i] - doff * 2) * q, 8); + dq[i] = + cuml_bins_ptr[i - 1] + ROUND_POWER_OF_TWO((knots[i] - doff * 2) * q, 8); } doff = quant_to_doff_fixed(qindex, band, q_profile); dq[NUQ_KNOTS] = @@ -165,234 +159,166 @@ tran_low_t vp10_dequant_coeff_nuq(int v, int q, const tran_low_t *dq) { #endif // CONFIG_NEW_QUANT static const int16_t dc_qlookup[QINDEX_RANGE] = { - 4, 8, 8, 9, 10, 11, 12, 12, - 13, 14, 15, 16, 17, 18, 19, 19, - 20, 21, 22, 23, 24, 25, 26, 26, - 27, 28, 29, 30, 31, 32, 32, 33, - 34, 35, 36, 37, 38, 38, 39, 40, - 41, 42, 43, 43, 44, 45, 46, 47, - 48, 48, 49, 50, 51, 52, 53, 53, - 54, 55, 56, 57, 57, 58, 59, 60, - 61, 62, 62, 63, 64, 65, 66, 66, - 67, 68, 69, 70, 70, 71, 72, 73, - 74, 74, 75, 76, 77, 78, 78, 79, - 80, 81, 81, 82, 83, 84, 85, 85, - 87, 88, 90, 92, 93, 95, 96, 98, - 99, 101, 102, 104, 105, 107, 108, 110, - 111, 113, 114, 116, 117, 118, 120, 121, - 123, 125, 127, 129, 131, 134, 136, 138, - 140, 142, 144, 146, 148, 150, 152, 154, - 156, 158, 161, 164, 166, 169, 172, 174, - 177, 180, 182, 185, 187, 190, 192, 195, - 199, 202, 205, 208, 211, 214, 217, 220, - 223, 226, 230, 233, 237, 240, 243, 247, - 250, 253, 257, 261, 265, 269, 272, 276, - 280, 284, 288, 292, 296, 300, 304, 309, - 313, 317, 322, 326, 330, 335, 340, 344, - 349, 354, 359, 364, 369, 374, 379, 384, - 389, 395, 400, 406, 411, 417, 423, 429, - 435, 441, 447, 454, 461, 467, 475, 482, - 489, 497, 505, 513, 522, 530, 539, 549, - 559, 569, 579, 590, 602, 614, 626, 640, - 654, 668, 684, 700, 717, 736, 755, 775, - 796, 819, 843, 869, 896, 925, 955, 988, - 1022, 1058, 1098, 1139, 1184, 1232, 1282, 1336, + 4, 8, 8, 9, 10, 11, 12, 12, 13, 14, 15, 16, 17, 18, + 19, 19, 20, 21, 22, 23, 24, 25, 26, 26, 27, 28, 29, 30, + 31, 32, 32, 33, 34, 35, 36, 37, 38, 38, 39, 40, 41, 42, + 43, 43, 44, 45, 46, 47, 48, 48, 49, 50, 51, 52, 53, 53, + 54, 55, 56, 57, 57, 58, 59, 60, 61, 62, 62, 63, 64, 65, + 66, 66, 67, 68, 69, 70, 70, 71, 72, 73, 74, 74, 75, 76, + 77, 78, 78, 79, 80, 81, 81, 82, 83, 84, 85, 85, 87, 88, + 90, 92, 93, 95, 96, 98, 99, 101, 102, 104, 105, 107, 108, 110, + 111, 113, 114, 116, 117, 118, 120, 121, 123, 125, 127, 129, 131, 134, + 136, 138, 140, 142, 144, 146, 148, 150, 152, 154, 156, 158, 161, 164, + 166, 169, 172, 174, 177, 180, 182, 185, 187, 190, 192, 195, 199, 202, + 205, 208, 211, 214, 217, 220, 223, 226, 230, 233, 237, 240, 243, 247, + 250, 253, 257, 261, 265, 269, 272, 276, 280, 284, 288, 292, 296, 300, + 304, 309, 313, 317, 322, 326, 330, 335, 340, 344, 349, 354, 359, 364, + 369, 374, 379, 384, 389, 395, 400, 406, 411, 417, 423, 429, 435, 441, + 447, 454, 461, 467, 475, 482, 489, 497, 505, 513, 522, 530, 539, 549, + 559, 569, 579, 590, 602, 614, 626, 640, 654, 668, 684, 700, 717, 736, + 755, 775, 796, 819, 843, 869, 896, 925, 955, 988, 1022, 1058, 1098, 1139, + 1184, 1232, 1282, 1336, }; #if CONFIG_VP9_HIGHBITDEPTH static const int16_t dc_qlookup_10[QINDEX_RANGE] = { - 4, 9, 10, 13, 15, 17, 20, 22, - 25, 28, 31, 34, 37, 40, 43, 47, - 50, 53, 57, 60, 64, 68, 71, 75, - 78, 82, 86, 90, 93, 97, 101, 105, - 109, 113, 116, 120, 124, 128, 132, 136, - 140, 143, 147, 151, 155, 159, 163, 166, - 170, 174, 178, 182, 185, 189, 193, 197, - 200, 204, 208, 212, 215, 219, 223, 226, - 230, 233, 237, 241, 244, 248, 251, 255, - 259, 262, 266, 269, 273, 276, 280, 283, - 287, 290, 293, 297, 300, 304, 307, 310, - 314, 317, 321, 324, 327, 331, 334, 337, - 343, 350, 356, 362, 369, 375, 381, 387, - 394, 400, 406, 412, 418, 424, 430, 436, - 442, 448, 454, 460, 466, 472, 478, 484, - 490, 499, 507, 516, 525, 533, 542, 550, - 559, 567, 576, 584, 592, 601, 609, 617, - 625, 634, 644, 655, 666, 676, 687, 698, - 708, 718, 729, 739, 749, 759, 770, 782, - 795, 807, 819, 831, 844, 856, 868, 880, - 891, 906, 920, 933, 947, 961, 975, 988, - 1001, 1015, 1030, 1045, 1061, 1076, 1090, 1105, - 1120, 1137, 1153, 1170, 1186, 1202, 1218, 1236, - 1253, 1271, 1288, 1306, 1323, 1342, 1361, 1379, - 1398, 1416, 1436, 1456, 1476, 1496, 1516, 1537, - 1559, 1580, 1601, 1624, 1647, 1670, 1692, 1717, - 1741, 1766, 1791, 1817, 1844, 1871, 1900, 1929, - 1958, 1990, 2021, 2054, 2088, 2123, 2159, 2197, - 2236, 2276, 2319, 2363, 2410, 2458, 2508, 2561, - 2616, 2675, 2737, 2802, 2871, 2944, 3020, 3102, - 3188, 3280, 3375, 3478, 3586, 3702, 3823, 3953, - 4089, 4236, 4394, 4559, 4737, 4929, 5130, 5347, + 4, 9, 10, 13, 15, 17, 20, 22, 25, 28, 31, 34, 37, + 40, 43, 47, 50, 53, 57, 60, 64, 68, 71, 75, 78, 82, + 86, 90, 93, 97, 101, 105, 109, 113, 116, 120, 124, 128, 132, + 136, 140, 143, 147, 151, 155, 159, 163, 166, 170, 174, 178, 182, + 185, 189, 193, 197, 200, 204, 208, 212, 215, 219, 223, 226, 230, + 233, 237, 241, 244, 248, 251, 255, 259, 262, 266, 269, 273, 276, + 280, 283, 287, 290, 293, 297, 300, 304, 307, 310, 314, 317, 321, + 324, 327, 331, 334, 337, 343, 350, 356, 362, 369, 375, 381, 387, + 394, 400, 406, 412, 418, 424, 430, 436, 442, 448, 454, 460, 466, + 472, 478, 484, 490, 499, 507, 516, 525, 533, 542, 550, 559, 567, + 576, 584, 592, 601, 609, 617, 625, 634, 644, 655, 666, 676, 687, + 698, 708, 718, 729, 739, 749, 759, 770, 782, 795, 807, 819, 831, + 844, 856, 868, 880, 891, 906, 920, 933, 947, 961, 975, 988, 1001, + 1015, 1030, 1045, 1061, 1076, 1090, 1105, 1120, 1137, 1153, 1170, 1186, 1202, + 1218, 1236, 1253, 1271, 1288, 1306, 1323, 1342, 1361, 1379, 1398, 1416, 1436, + 1456, 1476, 1496, 1516, 1537, 1559, 1580, 1601, 1624, 1647, 1670, 1692, 1717, + 1741, 1766, 1791, 1817, 1844, 1871, 1900, 1929, 1958, 1990, 2021, 2054, 2088, + 2123, 2159, 2197, 2236, 2276, 2319, 2363, 2410, 2458, 2508, 2561, 2616, 2675, + 2737, 2802, 2871, 2944, 3020, 3102, 3188, 3280, 3375, 3478, 3586, 3702, 3823, + 3953, 4089, 4236, 4394, 4559, 4737, 4929, 5130, 5347, }; static const int16_t dc_qlookup_12[QINDEX_RANGE] = { - 4, 12, 18, 25, 33, 41, 50, 60, - 70, 80, 91, 103, 115, 127, 140, 153, - 166, 180, 194, 208, 222, 237, 251, 266, - 281, 296, 312, 327, 343, 358, 374, 390, - 405, 421, 437, 453, 469, 484, 500, 516, - 532, 548, 564, 580, 596, 611, 627, 643, - 659, 674, 690, 706, 721, 737, 752, 768, - 783, 798, 814, 829, 844, 859, 874, 889, - 904, 919, 934, 949, 964, 978, 993, 1008, - 1022, 1037, 1051, 1065, 1080, 1094, 1108, 1122, - 1136, 1151, 1165, 1179, 1192, 1206, 1220, 1234, - 1248, 1261, 1275, 1288, 1302, 1315, 1329, 1342, - 1368, 1393, 1419, 1444, 1469, 1494, 1519, 1544, - 1569, 1594, 1618, 1643, 1668, 1692, 1717, 1741, - 1765, 1789, 1814, 1838, 1862, 1885, 1909, 1933, - 1957, 1992, 2027, 2061, 2096, 2130, 2165, 2199, - 2233, 2267, 2300, 2334, 2367, 2400, 2434, 2467, - 2499, 2532, 2575, 2618, 2661, 2704, 2746, 2788, - 2830, 2872, 2913, 2954, 2995, 3036, 3076, 3127, - 3177, 3226, 3275, 3324, 3373, 3421, 3469, 3517, - 3565, 3621, 3677, 3733, 3788, 3843, 3897, 3951, - 4005, 4058, 4119, 4181, 4241, 4301, 4361, 4420, - 4479, 4546, 4612, 4677, 4742, 4807, 4871, 4942, - 5013, 5083, 5153, 5222, 5291, 5367, 5442, 5517, - 5591, 5665, 5745, 5825, 5905, 5984, 6063, 6149, - 6234, 6319, 6404, 6495, 6587, 6678, 6769, 6867, - 6966, 7064, 7163, 7269, 7376, 7483, 7599, 7715, - 7832, 7958, 8085, 8214, 8352, 8492, 8635, 8788, - 8945, 9104, 9275, 9450, 9639, 9832, 10031, 10245, - 10465, 10702, 10946, 11210, 11482, 11776, 12081, 12409, - 12750, 13118, 13501, 13913, 14343, 14807, 15290, 15812, - 16356, 16943, 17575, 18237, 18949, 19718, 20521, 21387, + 4, 12, 18, 25, 33, 41, 50, 60, 70, 80, 91, + 103, 115, 127, 140, 153, 166, 180, 194, 208, 222, 237, + 251, 266, 281, 296, 312, 327, 343, 358, 374, 390, 405, + 421, 437, 453, 469, 484, 500, 516, 532, 548, 564, 580, + 596, 611, 627, 643, 659, 674, 690, 706, 721, 737, 752, + 768, 783, 798, 814, 829, 844, 859, 874, 889, 904, 919, + 934, 949, 964, 978, 993, 1008, 1022, 1037, 1051, 1065, 1080, + 1094, 1108, 1122, 1136, 1151, 1165, 1179, 1192, 1206, 1220, 1234, + 1248, 1261, 1275, 1288, 1302, 1315, 1329, 1342, 1368, 1393, 1419, + 1444, 1469, 1494, 1519, 1544, 1569, 1594, 1618, 1643, 1668, 1692, + 1717, 1741, 1765, 1789, 1814, 1838, 1862, 1885, 1909, 1933, 1957, + 1992, 2027, 2061, 2096, 2130, 2165, 2199, 2233, 2267, 2300, 2334, + 2367, 2400, 2434, 2467, 2499, 2532, 2575, 2618, 2661, 2704, 2746, + 2788, 2830, 2872, 2913, 2954, 2995, 3036, 3076, 3127, 3177, 3226, + 3275, 3324, 3373, 3421, 3469, 3517, 3565, 3621, 3677, 3733, 3788, + 3843, 3897, 3951, 4005, 4058, 4119, 4181, 4241, 4301, 4361, 4420, + 4479, 4546, 4612, 4677, 4742, 4807, 4871, 4942, 5013, 5083, 5153, + 5222, 5291, 5367, 5442, 5517, 5591, 5665, 5745, 5825, 5905, 5984, + 6063, 6149, 6234, 6319, 6404, 6495, 6587, 6678, 6769, 6867, 6966, + 7064, 7163, 7269, 7376, 7483, 7599, 7715, 7832, 7958, 8085, 8214, + 8352, 8492, 8635, 8788, 8945, 9104, 9275, 9450, 9639, 9832, 10031, + 10245, 10465, 10702, 10946, 11210, 11482, 11776, 12081, 12409, 12750, 13118, + 13501, 13913, 14343, 14807, 15290, 15812, 16356, 16943, 17575, 18237, 18949, + 19718, 20521, 21387, }; #endif static const int16_t ac_qlookup[QINDEX_RANGE] = { - 4, 8, 9, 10, 11, 12, 13, 14, - 15, 16, 17, 18, 19, 20, 21, 22, - 23, 24, 25, 26, 27, 28, 29, 30, - 31, 32, 33, 34, 35, 36, 37, 38, - 39, 40, 41, 42, 43, 44, 45, 46, - 47, 48, 49, 50, 51, 52, 53, 54, - 55, 56, 57, 58, 59, 60, 61, 62, - 63, 64, 65, 66, 67, 68, 69, 70, - 71, 72, 73, 74, 75, 76, 77, 78, - 79, 80, 81, 82, 83, 84, 85, 86, - 87, 88, 89, 90, 91, 92, 93, 94, - 95, 96, 97, 98, 99, 100, 101, 102, - 104, 106, 108, 110, 112, 114, 116, 118, - 120, 122, 124, 126, 128, 130, 132, 134, - 136, 138, 140, 142, 144, 146, 148, 150, - 152, 155, 158, 161, 164, 167, 170, 173, - 176, 179, 182, 185, 188, 191, 194, 197, - 200, 203, 207, 211, 215, 219, 223, 227, - 231, 235, 239, 243, 247, 251, 255, 260, - 265, 270, 275, 280, 285, 290, 295, 300, - 305, 311, 317, 323, 329, 335, 341, 347, - 353, 359, 366, 373, 380, 387, 394, 401, - 408, 416, 424, 432, 440, 448, 456, 465, - 474, 483, 492, 501, 510, 520, 530, 540, - 550, 560, 571, 582, 593, 604, 615, 627, - 639, 651, 663, 676, 689, 702, 715, 729, - 743, 757, 771, 786, 801, 816, 832, 848, - 864, 881, 898, 915, 933, 951, 969, 988, - 1007, 1026, 1046, 1066, 1087, 1108, 1129, 1151, - 1173, 1196, 1219, 1243, 1267, 1292, 1317, 1343, - 1369, 1396, 1423, 1451, 1479, 1508, 1537, 1567, - 1597, 1628, 1660, 1692, 1725, 1759, 1793, 1828, + 4, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, + 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, + 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, + 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, + 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, + 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, + 98, 99, 100, 101, 102, 104, 106, 108, 110, 112, 114, 116, 118, + 120, 122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 142, 144, + 146, 148, 150, 152, 155, 158, 161, 164, 167, 170, 173, 176, 179, + 182, 185, 188, 191, 194, 197, 200, 203, 207, 211, 215, 219, 223, + 227, 231, 235, 239, 243, 247, 251, 255, 260, 265, 270, 275, 280, + 285, 290, 295, 300, 305, 311, 317, 323, 329, 335, 341, 347, 353, + 359, 366, 373, 380, 387, 394, 401, 408, 416, 424, 432, 440, 448, + 456, 465, 474, 483, 492, 501, 510, 520, 530, 540, 550, 560, 571, + 582, 593, 604, 615, 627, 639, 651, 663, 676, 689, 702, 715, 729, + 743, 757, 771, 786, 801, 816, 832, 848, 864, 881, 898, 915, 933, + 951, 969, 988, 1007, 1026, 1046, 1066, 1087, 1108, 1129, 1151, 1173, 1196, + 1219, 1243, 1267, 1292, 1317, 1343, 1369, 1396, 1423, 1451, 1479, 1508, 1537, + 1567, 1597, 1628, 1660, 1692, 1725, 1759, 1793, 1828, }; #if CONFIG_VP9_HIGHBITDEPTH static const int16_t ac_qlookup_10[QINDEX_RANGE] = { - 4, 9, 11, 13, 16, 18, 21, 24, - 27, 30, 33, 37, 40, 44, 48, 51, - 55, 59, 63, 67, 71, 75, 79, 83, - 88, 92, 96, 100, 105, 109, 114, 118, - 122, 127, 131, 136, 140, 145, 149, 154, - 158, 163, 168, 172, 177, 181, 186, 190, - 195, 199, 204, 208, 213, 217, 222, 226, - 231, 235, 240, 244, 249, 253, 258, 262, - 267, 271, 275, 280, 284, 289, 293, 297, - 302, 306, 311, 315, 319, 324, 328, 332, - 337, 341, 345, 349, 354, 358, 362, 367, - 371, 375, 379, 384, 388, 392, 396, 401, - 409, 417, 425, 433, 441, 449, 458, 466, - 474, 482, 490, 498, 506, 514, 523, 531, - 539, 547, 555, 563, 571, 579, 588, 596, - 604, 616, 628, 640, 652, 664, 676, 688, - 700, 713, 725, 737, 749, 761, 773, 785, - 797, 809, 825, 841, 857, 873, 889, 905, - 922, 938, 954, 970, 986, 1002, 1018, 1038, - 1058, 1078, 1098, 1118, 1138, 1158, 1178, 1198, - 1218, 1242, 1266, 1290, 1314, 1338, 1362, 1386, - 1411, 1435, 1463, 1491, 1519, 1547, 1575, 1603, - 1631, 1663, 1695, 1727, 1759, 1791, 1823, 1859, - 1895, 1931, 1967, 2003, 2039, 2079, 2119, 2159, - 2199, 2239, 2283, 2327, 2371, 2415, 2459, 2507, - 2555, 2603, 2651, 2703, 2755, 2807, 2859, 2915, - 2971, 3027, 3083, 3143, 3203, 3263, 3327, 3391, - 3455, 3523, 3591, 3659, 3731, 3803, 3876, 3952, - 4028, 4104, 4184, 4264, 4348, 4432, 4516, 4604, - 4692, 4784, 4876, 4972, 5068, 5168, 5268, 5372, - 5476, 5584, 5692, 5804, 5916, 6032, 6148, 6268, - 6388, 6512, 6640, 6768, 6900, 7036, 7172, 7312, + 4, 9, 11, 13, 16, 18, 21, 24, 27, 30, 33, 37, 40, + 44, 48, 51, 55, 59, 63, 67, 71, 75, 79, 83, 88, 92, + 96, 100, 105, 109, 114, 118, 122, 127, 131, 136, 140, 145, 149, + 154, 158, 163, 168, 172, 177, 181, 186, 190, 195, 199, 204, 208, + 213, 217, 222, 226, 231, 235, 240, 244, 249, 253, 258, 262, 267, + 271, 275, 280, 284, 289, 293, 297, 302, 306, 311, 315, 319, 324, + 328, 332, 337, 341, 345, 349, 354, 358, 362, 367, 371, 375, 379, + 384, 388, 392, 396, 401, 409, 417, 425, 433, 441, 449, 458, 466, + 474, 482, 490, 498, 506, 514, 523, 531, 539, 547, 555, 563, 571, + 579, 588, 596, 604, 616, 628, 640, 652, 664, 676, 688, 700, 713, + 725, 737, 749, 761, 773, 785, 797, 809, 825, 841, 857, 873, 889, + 905, 922, 938, 954, 970, 986, 1002, 1018, 1038, 1058, 1078, 1098, 1118, + 1138, 1158, 1178, 1198, 1218, 1242, 1266, 1290, 1314, 1338, 1362, 1386, 1411, + 1435, 1463, 1491, 1519, 1547, 1575, 1603, 1631, 1663, 1695, 1727, 1759, 1791, + 1823, 1859, 1895, 1931, 1967, 2003, 2039, 2079, 2119, 2159, 2199, 2239, 2283, + 2327, 2371, 2415, 2459, 2507, 2555, 2603, 2651, 2703, 2755, 2807, 2859, 2915, + 2971, 3027, 3083, 3143, 3203, 3263, 3327, 3391, 3455, 3523, 3591, 3659, 3731, + 3803, 3876, 3952, 4028, 4104, 4184, 4264, 4348, 4432, 4516, 4604, 4692, 4784, + 4876, 4972, 5068, 5168, 5268, 5372, 5476, 5584, 5692, 5804, 5916, 6032, 6148, + 6268, 6388, 6512, 6640, 6768, 6900, 7036, 7172, 7312, }; static const int16_t ac_qlookup_12[QINDEX_RANGE] = { - 4, 13, 19, 27, 35, 44, 54, 64, - 75, 87, 99, 112, 126, 139, 154, 168, - 183, 199, 214, 230, 247, 263, 280, 297, - 314, 331, 349, 366, 384, 402, 420, 438, - 456, 475, 493, 511, 530, 548, 567, 586, - 604, 623, 642, 660, 679, 698, 716, 735, - 753, 772, 791, 809, 828, 846, 865, 884, - 902, 920, 939, 957, 976, 994, 1012, 1030, - 1049, 1067, 1085, 1103, 1121, 1139, 1157, 1175, - 1193, 1211, 1229, 1246, 1264, 1282, 1299, 1317, - 1335, 1352, 1370, 1387, 1405, 1422, 1440, 1457, - 1474, 1491, 1509, 1526, 1543, 1560, 1577, 1595, - 1627, 1660, 1693, 1725, 1758, 1791, 1824, 1856, - 1889, 1922, 1954, 1987, 2020, 2052, 2085, 2118, - 2150, 2183, 2216, 2248, 2281, 2313, 2346, 2378, - 2411, 2459, 2508, 2556, 2605, 2653, 2701, 2750, - 2798, 2847, 2895, 2943, 2992, 3040, 3088, 3137, - 3185, 3234, 3298, 3362, 3426, 3491, 3555, 3619, - 3684, 3748, 3812, 3876, 3941, 4005, 4069, 4149, - 4230, 4310, 4390, 4470, 4550, 4631, 4711, 4791, - 4871, 4967, 5064, 5160, 5256, 5352, 5448, 5544, - 5641, 5737, 5849, 5961, 6073, 6185, 6297, 6410, - 6522, 6650, 6778, 6906, 7034, 7162, 7290, 7435, - 7579, 7723, 7867, 8011, 8155, 8315, 8475, 8635, - 8795, 8956, 9132, 9308, 9484, 9660, 9836, 10028, - 10220, 10412, 10604, 10812, 11020, 11228, 11437, 11661, - 11885, 12109, 12333, 12573, 12813, 13053, 13309, 13565, - 13821, 14093, 14365, 14637, 14925, 15213, 15502, 15806, - 16110, 16414, 16734, 17054, 17390, 17726, 18062, 18414, - 18766, 19134, 19502, 19886, 20270, 20670, 21070, 21486, - 21902, 22334, 22766, 23214, 23662, 24126, 24590, 25070, - 25551, 26047, 26559, 27071, 27599, 28143, 28687, 29247, + 4, 13, 19, 27, 35, 44, 54, 64, 75, 87, 99, + 112, 126, 139, 154, 168, 183, 199, 214, 230, 247, 263, + 280, 297, 314, 331, 349, 366, 384, 402, 420, 438, 456, + 475, 493, 511, 530, 548, 567, 586, 604, 623, 642, 660, + 679, 698, 716, 735, 753, 772, 791, 809, 828, 846, 865, + 884, 902, 920, 939, 957, 976, 994, 1012, 1030, 1049, 1067, + 1085, 1103, 1121, 1139, 1157, 1175, 1193, 1211, 1229, 1246, 1264, + 1282, 1299, 1317, 1335, 1352, 1370, 1387, 1405, 1422, 1440, 1457, + 1474, 1491, 1509, 1526, 1543, 1560, 1577, 1595, 1627, 1660, 1693, + 1725, 1758, 1791, 1824, 1856, 1889, 1922, 1954, 1987, 2020, 2052, + 2085, 2118, 2150, 2183, 2216, 2248, 2281, 2313, 2346, 2378, 2411, + 2459, 2508, 2556, 2605, 2653, 2701, 2750, 2798, 2847, 2895, 2943, + 2992, 3040, 3088, 3137, 3185, 3234, 3298, 3362, 3426, 3491, 3555, + 3619, 3684, 3748, 3812, 3876, 3941, 4005, 4069, 4149, 4230, 4310, + 4390, 4470, 4550, 4631, 4711, 4791, 4871, 4967, 5064, 5160, 5256, + 5352, 5448, 5544, 5641, 5737, 5849, 5961, 6073, 6185, 6297, 6410, + 6522, 6650, 6778, 6906, 7034, 7162, 7290, 7435, 7579, 7723, 7867, + 8011, 8155, 8315, 8475, 8635, 8795, 8956, 9132, 9308, 9484, 9660, + 9836, 10028, 10220, 10412, 10604, 10812, 11020, 11228, 11437, 11661, 11885, + 12109, 12333, 12573, 12813, 13053, 13309, 13565, 13821, 14093, 14365, 14637, + 14925, 15213, 15502, 15806, 16110, 16414, 16734, 17054, 17390, 17726, 18062, + 18414, 18766, 19134, 19502, 19886, 20270, 20670, 21070, 21486, 21902, 22334, + 22766, 23214, 23662, 24126, 24590, 25070, 25551, 26047, 26559, 27071, 27599, + 28143, 28687, 29247, }; #endif int16_t vp10_dc_quant(int qindex, int delta, vpx_bit_depth_t bit_depth) { #if CONFIG_VP9_HIGHBITDEPTH switch (bit_depth) { - case VPX_BITS_8: - return dc_qlookup[clamp(qindex + delta, 0, MAXQ)]; - case VPX_BITS_10: - return dc_qlookup_10[clamp(qindex + delta, 0, MAXQ)]; - case VPX_BITS_12: - return dc_qlookup_12[clamp(qindex + delta, 0, MAXQ)]; + case VPX_BITS_8: return dc_qlookup[clamp(qindex + delta, 0, MAXQ)]; + case VPX_BITS_10: return dc_qlookup_10[clamp(qindex + delta, 0, MAXQ)]; + case VPX_BITS_12: return dc_qlookup_12[clamp(qindex + delta, 0, MAXQ)]; default: assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12"); return -1; } #else - (void) bit_depth; + (void)bit_depth; return dc_qlookup[clamp(qindex + delta, 0, MAXQ)]; #endif } @@ -400,31 +326,27 @@ int16_t vp10_dc_quant(int qindex, int delta, vpx_bit_depth_t bit_depth) { int16_t vp10_ac_quant(int qindex, int delta, vpx_bit_depth_t bit_depth) { #if CONFIG_VP9_HIGHBITDEPTH switch (bit_depth) { - case VPX_BITS_8: - return ac_qlookup[clamp(qindex + delta, 0, MAXQ)]; - case VPX_BITS_10: - return ac_qlookup_10[clamp(qindex + delta, 0, MAXQ)]; - case VPX_BITS_12: - return ac_qlookup_12[clamp(qindex + delta, 0, MAXQ)]; + case VPX_BITS_8: return ac_qlookup[clamp(qindex + delta, 0, MAXQ)]; + case VPX_BITS_10: return ac_qlookup_10[clamp(qindex + delta, 0, MAXQ)]; + case VPX_BITS_12: return ac_qlookup_12[clamp(qindex + delta, 0, MAXQ)]; default: assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12"); return -1; } #else - (void) bit_depth; + (void)bit_depth; return ac_qlookup[clamp(qindex + delta, 0, MAXQ)]; #endif } int vp10_get_qindex(const struct segmentation *seg, int segment_id, - int base_qindex) { + int base_qindex) { if (segfeature_active(seg, segment_id, SEG_LVL_ALT_Q)) { const int data = get_segdata(seg, segment_id, SEG_LVL_ALT_Q); - const int seg_qindex = seg->abs_delta == SEGMENT_ABSDATA ? - data : base_qindex + data; + const int seg_qindex = + seg->abs_delta == SEGMENT_ABSDATA ? data : base_qindex + data; return clamp(seg_qindex, 0, MAXQ); } else { return base_qindex; } } - diff --git a/vp10/common/quant_common.h b/vp10/common/quant_common.h index d9557969525b5e0416e3f4d30c6096e0a00dc6c0..a5dbe09303af9eee965d60e66bd9dfaebeef6067 100644 --- a/vp10/common/quant_common.h +++ b/vp10/common/quant_common.h @@ -27,19 +27,18 @@ int16_t vp10_dc_quant(int qindex, int delta, vpx_bit_depth_t bit_depth); int16_t vp10_ac_quant(int qindex, int delta, vpx_bit_depth_t bit_depth); int vp10_get_qindex(const struct segmentation *seg, int segment_id, - int base_qindex); + int base_qindex); #if CONFIG_NEW_QUANT #define QUANT_PROFILES 3 -#define QUANT_RANGES 2 -#define NUQ_KNOTS 3 +#define QUANT_RANGES 2 +#define NUQ_KNOTS 3 typedef tran_low_t dequant_val_type_nuq[NUQ_KNOTS + 1]; typedef tran_low_t cuml_bins_type_nuq[NUQ_KNOTS]; -void vp10_get_dequant_val_nuq(int q, int qindex, int band, - tran_low_t *dq, tran_low_t *cuml_bins, - int dq_off_index); +void vp10_get_dequant_val_nuq(int q, int qindex, int band, tran_low_t *dq, + tran_low_t *cuml_bins, int dq_off_index); tran_low_t vp10_dequant_abscoeff_nuq(int v, int q, const tran_low_t *dq); tran_low_t vp10_dequant_coeff_nuq(int v, int q, const tran_low_t *dq); diff --git a/vp10/common/reconinter.c b/vp10/common/reconinter.c index d7b75d6b11c3487e76ff957042e0e49f53605f66..ef9d7e243e1339565a3434c4ac421aa992027875 100644 --- a/vp10/common/reconinter.c +++ b/vp10/common/reconinter.c @@ -29,22 +29,15 @@ #if CONFIG_EXT_INTER -#define NSMOOTHERS 1 +#define NSMOOTHERS 1 static int get_masked_weight(int m, int smoothness) { -#define SMOOTHER_LEN 32 - static const uint8_t smoothfn[NSMOOTHERS][2 * SMOOTHER_LEN + 1] = { - { - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 1, 2, 4, 7, 13, 21, - 32, - 43, 51, 57, 60, 62, 63, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, - } - }; +#define SMOOTHER_LEN 32 + static const uint8_t smoothfn[NSMOOTHERS][2 * SMOOTHER_LEN + 1] = { { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 2, 4, 7, 13, 21, 32, 43, 51, 57, 60, 62, 63, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, + } }; if (m < -SMOOTHER_LEN) return 0; else if (m > SMOOTHER_LEN) @@ -53,270 +46,182 @@ static int get_masked_weight(int m, int smoothness) { return smoothfn[smoothness][m + SMOOTHER_LEN]; } - // [smoother][negative][direction] DECLARE_ALIGNED( 16, static uint8_t, - wedge_mask_obl[NSMOOTHERS][2][WEDGE_DIRECTIONS] - [MASK_MASTER_SIZE * MASK_MASTER_SIZE]); + wedge_mask_obl[NSMOOTHERS][2][WEDGE_DIRECTIONS][MASK_MASTER_SIZE * + MASK_MASTER_SIZE]); -DECLARE_ALIGNED( - 16, static uint8_t, - wedge_signflip_lookup[BLOCK_SIZES][MAX_WEDGE_TYPES]); +DECLARE_ALIGNED(16, static uint8_t, + wedge_signflip_lookup[BLOCK_SIZES][MAX_WEDGE_TYPES]); // 3 * MAX_WEDGE_SQUARE is an easy to compute and fairly tight upper bound // on the sum of all mask sizes up to an including MAX_WEDGE_SQUARE. -DECLARE_ALIGNED( - 16, static uint8_t, - wedge_mask_buf[2 * MAX_WEDGE_TYPES * 3 * MAX_WEDGE_SQUARE]); +DECLARE_ALIGNED(16, static uint8_t, + wedge_mask_buf[2 * MAX_WEDGE_TYPES * 3 * MAX_WEDGE_SQUARE]); static wedge_masks_type wedge_masks[BLOCK_SIZES][2]; // Some unused wedge codebooks left temporarily to facilitate experiments. // To be removed when setteld. static wedge_code_type wedge_codebook_8_hgtw[8] = { - {WEDGE_OBLIQUE27, 4, 4}, - {WEDGE_OBLIQUE63, 4, 4}, - {WEDGE_OBLIQUE117, 4, 4}, - {WEDGE_OBLIQUE153, 4, 4}, - {WEDGE_OBLIQUE27, 4, 2}, - {WEDGE_OBLIQUE27, 4, 6}, - {WEDGE_OBLIQUE153, 4, 2}, - {WEDGE_OBLIQUE153, 4, 6}, + { WEDGE_OBLIQUE27, 4, 4 }, { WEDGE_OBLIQUE63, 4, 4 }, + { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 }, + { WEDGE_OBLIQUE27, 4, 2 }, { WEDGE_OBLIQUE27, 4, 6 }, + { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 }, }; static wedge_code_type wedge_codebook_8_hltw[8] = { - {WEDGE_OBLIQUE27, 4, 4}, - {WEDGE_OBLIQUE63, 4, 4}, - {WEDGE_OBLIQUE117, 4, 4}, - {WEDGE_OBLIQUE153, 4, 4}, - {WEDGE_OBLIQUE63, 2, 4}, - {WEDGE_OBLIQUE63, 6, 4}, - {WEDGE_OBLIQUE117, 2, 4}, - {WEDGE_OBLIQUE117, 6, 4}, + { WEDGE_OBLIQUE27, 4, 4 }, { WEDGE_OBLIQUE63, 4, 4 }, + { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 }, + { WEDGE_OBLIQUE63, 2, 4 }, { WEDGE_OBLIQUE63, 6, 4 }, + { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 }, }; static wedge_code_type wedge_codebook_8_heqw[8] = { - {WEDGE_OBLIQUE27, 4, 4}, - {WEDGE_OBLIQUE63, 4, 4}, - {WEDGE_OBLIQUE117, 4, 4}, - {WEDGE_OBLIQUE153, 4, 4}, - {WEDGE_HORIZONTAL, 4, 2}, - {WEDGE_HORIZONTAL, 4, 6}, - {WEDGE_VERTICAL, 2, 4}, - {WEDGE_VERTICAL, 6, 4}, + { WEDGE_OBLIQUE27, 4, 4 }, { WEDGE_OBLIQUE63, 4, 4 }, + { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 }, + { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 6 }, + { WEDGE_VERTICAL, 2, 4 }, { WEDGE_VERTICAL, 6, 4 }, }; #if !USE_LARGE_WEDGE_CODEBOOK static const wedge_code_type wedge_codebook_16_hgtw[16] = { - {WEDGE_OBLIQUE27, 4, 4}, - {WEDGE_OBLIQUE63, 4, 4}, - {WEDGE_OBLIQUE117, 4, 4}, - {WEDGE_OBLIQUE153, 4, 4}, - {WEDGE_HORIZONTAL, 4, 2}, - {WEDGE_HORIZONTAL, 4, 4}, - {WEDGE_HORIZONTAL, 4, 6}, - {WEDGE_VERTICAL, 4, 4}, - {WEDGE_OBLIQUE27, 4, 2}, - {WEDGE_OBLIQUE27, 4, 6}, - {WEDGE_OBLIQUE153, 4, 2}, - {WEDGE_OBLIQUE153, 4, 6}, - {WEDGE_OBLIQUE63, 2, 4}, - {WEDGE_OBLIQUE63, 6, 4}, - {WEDGE_OBLIQUE117, 2, 4}, - {WEDGE_OBLIQUE117, 6, 4}, + { WEDGE_OBLIQUE27, 4, 4 }, { WEDGE_OBLIQUE63, 4, 4 }, + { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 }, + { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 4 }, + { WEDGE_HORIZONTAL, 4, 6 }, { WEDGE_VERTICAL, 4, 4 }, + { WEDGE_OBLIQUE27, 4, 2 }, { WEDGE_OBLIQUE27, 4, 6 }, + { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 }, + { WEDGE_OBLIQUE63, 2, 4 }, { WEDGE_OBLIQUE63, 6, 4 }, + { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 }, }; static const wedge_code_type wedge_codebook_16_hltw[16] = { - {WEDGE_OBLIQUE27, 4, 4}, - {WEDGE_OBLIQUE63, 4, 4}, - {WEDGE_OBLIQUE117, 4, 4}, - {WEDGE_OBLIQUE153, 4, 4}, - {WEDGE_VERTICAL, 2, 4}, - {WEDGE_VERTICAL, 4, 4}, - {WEDGE_VERTICAL, 6, 4}, - {WEDGE_HORIZONTAL, 4, 4}, - {WEDGE_OBLIQUE27, 4, 2}, - {WEDGE_OBLIQUE27, 4, 6}, - {WEDGE_OBLIQUE153, 4, 2}, - {WEDGE_OBLIQUE153, 4, 6}, - {WEDGE_OBLIQUE63, 2, 4}, - {WEDGE_OBLIQUE63, 6, 4}, - {WEDGE_OBLIQUE117, 2, 4}, - {WEDGE_OBLIQUE117, 6, 4}, + { WEDGE_OBLIQUE27, 4, 4 }, { WEDGE_OBLIQUE63, 4, 4 }, + { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 }, + { WEDGE_VERTICAL, 2, 4 }, { WEDGE_VERTICAL, 4, 4 }, + { WEDGE_VERTICAL, 6, 4 }, { WEDGE_HORIZONTAL, 4, 4 }, + { WEDGE_OBLIQUE27, 4, 2 }, { WEDGE_OBLIQUE27, 4, 6 }, + { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 }, + { WEDGE_OBLIQUE63, 2, 4 }, { WEDGE_OBLIQUE63, 6, 4 }, + { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 }, }; static const wedge_code_type wedge_codebook_16_heqw[16] = { - {WEDGE_OBLIQUE27, 4, 4}, - {WEDGE_OBLIQUE63, 4, 4}, - {WEDGE_OBLIQUE117, 4, 4}, - {WEDGE_OBLIQUE153, 4, 4}, - {WEDGE_HORIZONTAL, 4, 2}, - {WEDGE_HORIZONTAL, 4, 6}, - {WEDGE_VERTICAL, 2, 4}, - {WEDGE_VERTICAL, 6, 4}, - {WEDGE_OBLIQUE27, 4, 2}, - {WEDGE_OBLIQUE27, 4, 6}, - {WEDGE_OBLIQUE153, 4, 2}, - {WEDGE_OBLIQUE153, 4, 6}, - {WEDGE_OBLIQUE63, 2, 4}, - {WEDGE_OBLIQUE63, 6, 4}, - {WEDGE_OBLIQUE117, 2, 4}, - {WEDGE_OBLIQUE117, 6, 4}, + { WEDGE_OBLIQUE27, 4, 4 }, { WEDGE_OBLIQUE63, 4, 4 }, + { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 }, + { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 6 }, + { WEDGE_VERTICAL, 2, 4 }, { WEDGE_VERTICAL, 6, 4 }, + { WEDGE_OBLIQUE27, 4, 2 }, { WEDGE_OBLIQUE27, 4, 6 }, + { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 }, + { WEDGE_OBLIQUE63, 2, 4 }, { WEDGE_OBLIQUE63, 6, 4 }, + { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 }, }; const wedge_params_type wedge_params_lookup[BLOCK_SIZES] = { - {0, NULL, NULL, 0, NULL}, - {0, NULL, NULL, 0, NULL}, - {0, NULL, NULL, 0, NULL}, - {4, wedge_codebook_16_heqw, wedge_signflip_lookup[3], 0, wedge_masks[3]}, - {4, wedge_codebook_16_hgtw, wedge_signflip_lookup[4], 0, wedge_masks[4]}, - {4, wedge_codebook_16_hltw, wedge_signflip_lookup[5], 0, wedge_masks[5]}, - {4, wedge_codebook_16_heqw, wedge_signflip_lookup[6], 0, wedge_masks[6]}, - {4, wedge_codebook_16_hgtw, wedge_signflip_lookup[7], 0, wedge_masks[7]}, - {4, wedge_codebook_16_hltw, wedge_signflip_lookup[8], 0, wedge_masks[8]}, - {4, wedge_codebook_16_heqw, wedge_signflip_lookup[9], 0, wedge_masks[9]}, - {0, wedge_codebook_8_hgtw, wedge_signflip_lookup[10], 0, wedge_masks[10]}, - {0, wedge_codebook_8_hltw, wedge_signflip_lookup[11], 0, wedge_masks[11]}, - {0, wedge_codebook_8_heqw, wedge_signflip_lookup[12], 0, wedge_masks[12]}, + { 0, NULL, NULL, 0, NULL }, + { 0, NULL, NULL, 0, NULL }, + { 0, NULL, NULL, 0, NULL }, + { 4, wedge_codebook_16_heqw, wedge_signflip_lookup[3], 0, wedge_masks[3] }, + { 4, wedge_codebook_16_hgtw, wedge_signflip_lookup[4], 0, wedge_masks[4] }, + { 4, wedge_codebook_16_hltw, wedge_signflip_lookup[5], 0, wedge_masks[5] }, + { 4, wedge_codebook_16_heqw, wedge_signflip_lookup[6], 0, wedge_masks[6] }, + { 4, wedge_codebook_16_hgtw, wedge_signflip_lookup[7], 0, wedge_masks[7] }, + { 4, wedge_codebook_16_hltw, wedge_signflip_lookup[8], 0, wedge_masks[8] }, + { 4, wedge_codebook_16_heqw, wedge_signflip_lookup[9], 0, wedge_masks[9] }, + { 0, wedge_codebook_8_hgtw, wedge_signflip_lookup[10], 0, wedge_masks[10] }, + { 0, wedge_codebook_8_hltw, wedge_signflip_lookup[11], 0, wedge_masks[11] }, + { 0, wedge_codebook_8_heqw, wedge_signflip_lookup[12], 0, wedge_masks[12] }, #if CONFIG_EXT_PARTITION - {0, NULL, NULL, 0, NULL}, - {0, NULL, NULL, 0, NULL}, - {0, NULL, NULL, 0, NULL}, + { 0, NULL, NULL, 0, NULL }, + { 0, NULL, NULL, 0, NULL }, + { 0, NULL, NULL, 0, NULL }, #endif // CONFIG_EXT_PARTITION }; #else static const wedge_code_type wedge_codebook_32_hgtw[32] = { - {WEDGE_OBLIQUE27, 4, 4}, - {WEDGE_OBLIQUE63, 4, 4}, - {WEDGE_OBLIQUE117, 4, 4}, - {WEDGE_OBLIQUE153, 4, 4}, - {WEDGE_HORIZONTAL, 4, 2}, - {WEDGE_HORIZONTAL, 4, 4}, - {WEDGE_HORIZONTAL, 4, 6}, - {WEDGE_VERTICAL, 4, 4}, - {WEDGE_OBLIQUE27, 4, 1}, - {WEDGE_OBLIQUE27, 4, 2}, - {WEDGE_OBLIQUE27, 4, 3}, - {WEDGE_OBLIQUE27, 4, 5}, - {WEDGE_OBLIQUE27, 4, 6}, - {WEDGE_OBLIQUE27, 4, 7}, - {WEDGE_OBLIQUE153, 4, 1}, - {WEDGE_OBLIQUE153, 4, 2}, - {WEDGE_OBLIQUE153, 4, 3}, - {WEDGE_OBLIQUE153, 4, 5}, - {WEDGE_OBLIQUE153, 4, 6}, - {WEDGE_OBLIQUE153, 4, 7}, - {WEDGE_OBLIQUE63, 1, 4}, - {WEDGE_OBLIQUE63, 2, 4}, - {WEDGE_OBLIQUE63, 3, 4}, - {WEDGE_OBLIQUE63, 5, 4}, - {WEDGE_OBLIQUE63, 6, 4}, - {WEDGE_OBLIQUE63, 7, 4}, - {WEDGE_OBLIQUE117, 1, 4}, - {WEDGE_OBLIQUE117, 2, 4}, - {WEDGE_OBLIQUE117, 3, 4}, - {WEDGE_OBLIQUE117, 5, 4}, - {WEDGE_OBLIQUE117, 6, 4}, - {WEDGE_OBLIQUE117, 7, 4}, + { WEDGE_OBLIQUE27, 4, 4 }, { WEDGE_OBLIQUE63, 4, 4 }, + { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 }, + { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 4 }, + { WEDGE_HORIZONTAL, 4, 6 }, { WEDGE_VERTICAL, 4, 4 }, + { WEDGE_OBLIQUE27, 4, 1 }, { WEDGE_OBLIQUE27, 4, 2 }, + { WEDGE_OBLIQUE27, 4, 3 }, { WEDGE_OBLIQUE27, 4, 5 }, + { WEDGE_OBLIQUE27, 4, 6 }, { WEDGE_OBLIQUE27, 4, 7 }, + { WEDGE_OBLIQUE153, 4, 1 }, { WEDGE_OBLIQUE153, 4, 2 }, + { WEDGE_OBLIQUE153, 4, 3 }, { WEDGE_OBLIQUE153, 4, 5 }, + { WEDGE_OBLIQUE153, 4, 6 }, { WEDGE_OBLIQUE153, 4, 7 }, + { WEDGE_OBLIQUE63, 1, 4 }, { WEDGE_OBLIQUE63, 2, 4 }, + { WEDGE_OBLIQUE63, 3, 4 }, { WEDGE_OBLIQUE63, 5, 4 }, + { WEDGE_OBLIQUE63, 6, 4 }, { WEDGE_OBLIQUE63, 7, 4 }, + { WEDGE_OBLIQUE117, 1, 4 }, { WEDGE_OBLIQUE117, 2, 4 }, + { WEDGE_OBLIQUE117, 3, 4 }, { WEDGE_OBLIQUE117, 5, 4 }, + { WEDGE_OBLIQUE117, 6, 4 }, { WEDGE_OBLIQUE117, 7, 4 }, }; static const wedge_code_type wedge_codebook_32_hltw[32] = { - {WEDGE_OBLIQUE27, 4, 4}, - {WEDGE_OBLIQUE63, 4, 4}, - {WEDGE_OBLIQUE117, 4, 4}, - {WEDGE_OBLIQUE153, 4, 4}, - {WEDGE_VERTICAL, 2, 4}, - {WEDGE_VERTICAL, 4, 4}, - {WEDGE_VERTICAL, 6, 4}, - {WEDGE_HORIZONTAL, 4, 4}, - {WEDGE_OBLIQUE27, 4, 1}, - {WEDGE_OBLIQUE27, 4, 2}, - {WEDGE_OBLIQUE27, 4, 3}, - {WEDGE_OBLIQUE27, 4, 5}, - {WEDGE_OBLIQUE27, 4, 6}, - {WEDGE_OBLIQUE27, 4, 7}, - {WEDGE_OBLIQUE153, 4, 1}, - {WEDGE_OBLIQUE153, 4, 2}, - {WEDGE_OBLIQUE153, 4, 3}, - {WEDGE_OBLIQUE153, 4, 5}, - {WEDGE_OBLIQUE153, 4, 6}, - {WEDGE_OBLIQUE153, 4, 7}, - {WEDGE_OBLIQUE63, 1, 4}, - {WEDGE_OBLIQUE63, 2, 4}, - {WEDGE_OBLIQUE63, 3, 4}, - {WEDGE_OBLIQUE63, 5, 4}, - {WEDGE_OBLIQUE63, 6, 4}, - {WEDGE_OBLIQUE63, 7, 4}, - {WEDGE_OBLIQUE117, 1, 4}, - {WEDGE_OBLIQUE117, 2, 4}, - {WEDGE_OBLIQUE117, 3, 4}, - {WEDGE_OBLIQUE117, 5, 4}, - {WEDGE_OBLIQUE117, 6, 4}, - {WEDGE_OBLIQUE117, 7, 4}, + { WEDGE_OBLIQUE27, 4, 4 }, { WEDGE_OBLIQUE63, 4, 4 }, + { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 }, + { WEDGE_VERTICAL, 2, 4 }, { WEDGE_VERTICAL, 4, 4 }, + { WEDGE_VERTICAL, 6, 4 }, { WEDGE_HORIZONTAL, 4, 4 }, + { WEDGE_OBLIQUE27, 4, 1 }, { WEDGE_OBLIQUE27, 4, 2 }, + { WEDGE_OBLIQUE27, 4, 3 }, { WEDGE_OBLIQUE27, 4, 5 }, + { WEDGE_OBLIQUE27, 4, 6 }, { WEDGE_OBLIQUE27, 4, 7 }, + { WEDGE_OBLIQUE153, 4, 1 }, { WEDGE_OBLIQUE153, 4, 2 }, + { WEDGE_OBLIQUE153, 4, 3 }, { WEDGE_OBLIQUE153, 4, 5 }, + { WEDGE_OBLIQUE153, 4, 6 }, { WEDGE_OBLIQUE153, 4, 7 }, + { WEDGE_OBLIQUE63, 1, 4 }, { WEDGE_OBLIQUE63, 2, 4 }, + { WEDGE_OBLIQUE63, 3, 4 }, { WEDGE_OBLIQUE63, 5, 4 }, + { WEDGE_OBLIQUE63, 6, 4 }, { WEDGE_OBLIQUE63, 7, 4 }, + { WEDGE_OBLIQUE117, 1, 4 }, { WEDGE_OBLIQUE117, 2, 4 }, + { WEDGE_OBLIQUE117, 3, 4 }, { WEDGE_OBLIQUE117, 5, 4 }, + { WEDGE_OBLIQUE117, 6, 4 }, { WEDGE_OBLIQUE117, 7, 4 }, }; static const wedge_code_type wedge_codebook_32_heqw[32] = { - {WEDGE_OBLIQUE27, 4, 4}, - {WEDGE_OBLIQUE63, 4, 4}, - {WEDGE_OBLIQUE117, 4, 4}, - {WEDGE_OBLIQUE153, 4, 4}, - {WEDGE_HORIZONTAL, 4, 2}, - {WEDGE_HORIZONTAL, 4, 6}, - {WEDGE_VERTICAL, 2, 4}, - {WEDGE_VERTICAL, 6, 4}, - {WEDGE_OBLIQUE27, 4, 1}, - {WEDGE_OBLIQUE27, 4, 2}, - {WEDGE_OBLIQUE27, 4, 3}, - {WEDGE_OBLIQUE27, 4, 5}, - {WEDGE_OBLIQUE27, 4, 6}, - {WEDGE_OBLIQUE27, 4, 7}, - {WEDGE_OBLIQUE153, 4, 1}, - {WEDGE_OBLIQUE153, 4, 2}, - {WEDGE_OBLIQUE153, 4, 3}, - {WEDGE_OBLIQUE153, 4, 5}, - {WEDGE_OBLIQUE153, 4, 6}, - {WEDGE_OBLIQUE153, 4, 7}, - {WEDGE_OBLIQUE63, 1, 4}, - {WEDGE_OBLIQUE63, 2, 4}, - {WEDGE_OBLIQUE63, 3, 4}, - {WEDGE_OBLIQUE63, 5, 4}, - {WEDGE_OBLIQUE63, 6, 4}, - {WEDGE_OBLIQUE63, 7, 4}, - {WEDGE_OBLIQUE117, 1, 4}, - {WEDGE_OBLIQUE117, 2, 4}, - {WEDGE_OBLIQUE117, 3, 4}, - {WEDGE_OBLIQUE117, 5, 4}, - {WEDGE_OBLIQUE117, 6, 4}, - {WEDGE_OBLIQUE117, 7, 4}, + { WEDGE_OBLIQUE27, 4, 4 }, { WEDGE_OBLIQUE63, 4, 4 }, + { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 }, + { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 6 }, + { WEDGE_VERTICAL, 2, 4 }, { WEDGE_VERTICAL, 6, 4 }, + { WEDGE_OBLIQUE27, 4, 1 }, { WEDGE_OBLIQUE27, 4, 2 }, + { WEDGE_OBLIQUE27, 4, 3 }, { WEDGE_OBLIQUE27, 4, 5 }, + { WEDGE_OBLIQUE27, 4, 6 }, { WEDGE_OBLIQUE27, 4, 7 }, + { WEDGE_OBLIQUE153, 4, 1 }, { WEDGE_OBLIQUE153, 4, 2 }, + { WEDGE_OBLIQUE153, 4, 3 }, { WEDGE_OBLIQUE153, 4, 5 }, + { WEDGE_OBLIQUE153, 4, 6 }, { WEDGE_OBLIQUE153, 4, 7 }, + { WEDGE_OBLIQUE63, 1, 4 }, { WEDGE_OBLIQUE63, 2, 4 }, + { WEDGE_OBLIQUE63, 3, 4 }, { WEDGE_OBLIQUE63, 5, 4 }, + { WEDGE_OBLIQUE63, 6, 4 }, { WEDGE_OBLIQUE63, 7, 4 }, + { WEDGE_OBLIQUE117, 1, 4 }, { WEDGE_OBLIQUE117, 2, 4 }, + { WEDGE_OBLIQUE117, 3, 4 }, { WEDGE_OBLIQUE117, 5, 4 }, + { WEDGE_OBLIQUE117, 6, 4 }, { WEDGE_OBLIQUE117, 7, 4 }, }; const wedge_params_type wedge_params_lookup[BLOCK_SIZES] = { - {0, NULL, NULL, 0, NULL}, - {0, NULL, NULL, 0, NULL}, - {0, NULL, NULL, 0, NULL}, - {5, wedge_codebook_32_heqw, wedge_signflip_lookup[3], 0, wedge_masks[3]}, - {5, wedge_codebook_32_hgtw, wedge_signflip_lookup[4], 0, wedge_masks[4]}, - {5, wedge_codebook_32_hltw, wedge_signflip_lookup[5], 0, wedge_masks[5]}, - {5, wedge_codebook_32_heqw, wedge_signflip_lookup[6], 0, wedge_masks[6]}, - {5, wedge_codebook_32_hgtw, wedge_signflip_lookup[7], 0, wedge_masks[7]}, - {5, wedge_codebook_32_hltw, wedge_signflip_lookup[8], 0, wedge_masks[8]}, - {5, wedge_codebook_32_heqw, wedge_signflip_lookup[9], 0, wedge_masks[9]}, - {0, wedge_codebook_8_hgtw, wedge_signflip_lookup[10], 0, wedge_masks[10]}, - {0, wedge_codebook_8_hltw, wedge_signflip_lookup[11], 0, wedge_masks[11]}, - {0, wedge_codebook_8_heqw, wedge_signflip_lookup[12], 0, wedge_masks[12]}, + { 0, NULL, NULL, 0, NULL }, + { 0, NULL, NULL, 0, NULL }, + { 0, NULL, NULL, 0, NULL }, + { 5, wedge_codebook_32_heqw, wedge_signflip_lookup[3], 0, wedge_masks[3] }, + { 5, wedge_codebook_32_hgtw, wedge_signflip_lookup[4], 0, wedge_masks[4] }, + { 5, wedge_codebook_32_hltw, wedge_signflip_lookup[5], 0, wedge_masks[5] }, + { 5, wedge_codebook_32_heqw, wedge_signflip_lookup[6], 0, wedge_masks[6] }, + { 5, wedge_codebook_32_hgtw, wedge_signflip_lookup[7], 0, wedge_masks[7] }, + { 5, wedge_codebook_32_hltw, wedge_signflip_lookup[8], 0, wedge_masks[8] }, + { 5, wedge_codebook_32_heqw, wedge_signflip_lookup[9], 0, wedge_masks[9] }, + { 0, wedge_codebook_8_hgtw, wedge_signflip_lookup[10], 0, wedge_masks[10] }, + { 0, wedge_codebook_8_hltw, wedge_signflip_lookup[11], 0, wedge_masks[11] }, + { 0, wedge_codebook_8_heqw, wedge_signflip_lookup[12], 0, wedge_masks[12] }, #if CONFIG_EXT_PARTITION - {0, NULL, NULL, 0, NULL}, - {0, NULL, NULL, 0, NULL}, - {0, NULL, NULL, 0, NULL}, + { 0, NULL, NULL, 0, NULL }, + { 0, NULL, NULL, 0, NULL }, + { 0, NULL, NULL, 0, NULL }, #endif // CONFIG_EXT_PARTITION }; #endif // USE_LARGE_WEDGE_CODEBOOK -static const uint8_t *get_wedge_mask_inplace(int wedge_index, - int neg, +static const uint8_t *get_wedge_mask_inplace(int wedge_index, int neg, BLOCK_SIZE sb_type) { const uint8_t *master; const int bh = 4 << b_height_log2_lookup[sb_type]; @@ -332,20 +237,17 @@ static const uint8_t *get_wedge_mask_inplace(int wedge_index, woff = (a->x_offset * bw) >> 3; hoff = (a->y_offset * bh) >> 3; master = wedge_mask_obl[smoother][neg ^ wsignflip][a->direction] + - MASK_MASTER_STRIDE * (MASK_MASTER_SIZE / 2 - hoff) + - MASK_MASTER_SIZE / 2 - woff; + MASK_MASTER_STRIDE * (MASK_MASTER_SIZE / 2 - hoff) + + MASK_MASTER_SIZE / 2 - woff; return master; } -const uint8_t *vp10_get_soft_mask(int wedge_index, - int wedge_sign, - BLOCK_SIZE sb_type, - int offset_x, +const uint8_t *vp10_get_soft_mask(int wedge_index, int wedge_sign, + BLOCK_SIZE sb_type, int offset_x, int offset_y) { const uint8_t *mask = get_wedge_mask_inplace(wedge_index, wedge_sign, sb_type); - if (mask) - mask -= (offset_x + offset_y * MASK_MASTER_STRIDE); + if (mask) mask -= (offset_x + offset_y * MASK_MASTER_STRIDE); return mask; } @@ -354,7 +256,7 @@ static void init_wedge_master_masks() { const int w = MASK_MASTER_SIZE; const int h = MASK_MASTER_SIZE; const int stride = MASK_MASTER_STRIDE; - const int a[2] = {2, 1}; + const int a[2] = { 2, 1 }; const double asqrt = sqrt(a[0] * a[0] + a[1] * a[1]); for (s = 0; s < NSMOOTHERS; s++) { for (i = 0; i < h; ++i) @@ -363,23 +265,23 @@ static void init_wedge_master_masks() { int y = (2 * i + 1 - h); int m = (int)rint((a[0] * x + a[1] * y) / asqrt); wedge_mask_obl[s][1][WEDGE_OBLIQUE63][i * stride + j] = - wedge_mask_obl[s][1][WEDGE_OBLIQUE27][j * stride + i] = - get_masked_weight(m, s); + wedge_mask_obl[s][1][WEDGE_OBLIQUE27][j * stride + i] = + get_masked_weight(m, s); wedge_mask_obl[s][1][WEDGE_OBLIQUE117][i * stride + w - 1 - j] = - wedge_mask_obl[s][1][WEDGE_OBLIQUE153][(w - 1 - j) * stride + i] = - (1 << WEDGE_WEIGHT_BITS) - get_masked_weight(m, s); + wedge_mask_obl[s][1][WEDGE_OBLIQUE153][(w - 1 - j) * stride + i] = + (1 << WEDGE_WEIGHT_BITS) - get_masked_weight(m, s); wedge_mask_obl[s][0][WEDGE_OBLIQUE63][i * stride + j] = - wedge_mask_obl[s][0][WEDGE_OBLIQUE27][j * stride + i] = - (1 << WEDGE_WEIGHT_BITS) - get_masked_weight(m, s); + wedge_mask_obl[s][0][WEDGE_OBLIQUE27][j * stride + i] = + (1 << WEDGE_WEIGHT_BITS) - get_masked_weight(m, s); wedge_mask_obl[s][0][WEDGE_OBLIQUE117][i * stride + w - 1 - j] = - wedge_mask_obl[s][0][WEDGE_OBLIQUE153][(w - 1 - j) * stride + i] = - get_masked_weight(m, s); + wedge_mask_obl[s][0][WEDGE_OBLIQUE153][(w - 1 - j) * stride + i] = + get_masked_weight(m, s); wedge_mask_obl[s][1][WEDGE_VERTICAL][i * stride + j] = - wedge_mask_obl[s][1][WEDGE_HORIZONTAL][j * stride + i] = - get_masked_weight(x, s); + wedge_mask_obl[s][1][WEDGE_HORIZONTAL][j * stride + i] = + get_masked_weight(x, s); wedge_mask_obl[s][0][WEDGE_VERTICAL][i * stride + j] = - wedge_mask_obl[s][0][WEDGE_HORIZONTAL][j * stride + i] = - (1 << WEDGE_WEIGHT_BITS) - get_masked_weight(x, s); + wedge_mask_obl[s][0][WEDGE_HORIZONTAL][j * stride + i] = + (1 << WEDGE_WEIGHT_BITS) - get_masked_weight(x, s); } } } @@ -401,10 +303,8 @@ static void init_wedge_signs() { for (w = 0; w < wtypes; ++w) { const uint8_t *mask = get_wedge_mask_inplace(w, 0, sb_type); int sum = 0; - for (i = 0; i < bw; ++i) - sum += mask[i]; - for (i = 0; i < bh; ++i) - sum += mask[i * MASK_MASTER_STRIDE]; + for (i = 0; i < bw; ++i) sum += mask[i]; + for (i = 0; i < bh; ++i) sum += mask[i * MASK_MASTER_STRIDE]; sum = (sum + (bw + bh) / 2) / (bw + bh); wedge_params.signflip[w] = (sum < 32); } @@ -426,14 +326,14 @@ static void init_wedge_masks() { if (wbits == 0) continue; for (w = 0; w < wtypes; ++w) { mask = get_wedge_mask_inplace(w, 0, bsize); - vpx_convolve_copy(mask, MASK_MASTER_STRIDE, dst, bw, - NULL, 0, NULL, 0, bw, bh); + vpx_convolve_copy(mask, MASK_MASTER_STRIDE, dst, bw, NULL, 0, NULL, 0, bw, + bh); wedge_params->masks[0][w] = dst; dst += bw * bh; mask = get_wedge_mask_inplace(w, 1, bsize); - vpx_convolve_copy(mask, MASK_MASTER_STRIDE, dst, bw, - NULL, 0, NULL, 0, bw, bh); + vpx_convolve_copy(mask, MASK_MASTER_STRIDE, dst, bw, NULL, 0, NULL, 0, bw, + bh); wedge_params->masks[1][w] = dst; dst += bw * bh; } @@ -448,114 +348,87 @@ void vp10_init_wedge_masks() { init_wedge_masks(); } - #if CONFIG_SUPERTX static void build_masked_compound_wedge_extend( - uint8_t *dst, int dst_stride, - const uint8_t *src0, int src0_stride, - const uint8_t *src1, int src1_stride, - int wedge_index, - int wedge_sign, - BLOCK_SIZE sb_type, - int wedge_offset_x, int wedge_offset_y, - int h, int w) { + uint8_t *dst, int dst_stride, const uint8_t *src0, int src0_stride, + const uint8_t *src1, int src1_stride, int wedge_index, int wedge_sign, + BLOCK_SIZE sb_type, int wedge_offset_x, int wedge_offset_y, int h, int w) { const int subh = (2 << b_height_log2_lookup[sb_type]) == h; const int subw = (2 << b_width_log2_lookup[sb_type]) == w; - const uint8_t *mask = vp10_get_soft_mask( - wedge_index, wedge_sign, sb_type, wedge_offset_x, wedge_offset_y); - vpx_blend_a64_mask(dst, dst_stride, - src0, src0_stride, - src1, src1_stride, - mask, MASK_MASTER_STRIDE, - h, w, subh, subw); + const uint8_t *mask = vp10_get_soft_mask(wedge_index, wedge_sign, sb_type, + wedge_offset_x, wedge_offset_y); + vpx_blend_a64_mask(dst, dst_stride, src0, src0_stride, src1, src1_stride, + mask, MASK_MASTER_STRIDE, h, w, subh, subw); } #if CONFIG_VP9_HIGHBITDEPTH static void build_masked_compound_wedge_extend_highbd( - uint8_t *dst_8, int dst_stride, - const uint8_t *src0_8, int src0_stride, - const uint8_t *src1_8, int src1_stride, - int wedge_index, int wedge_sign, - BLOCK_SIZE sb_type, - int wedge_offset_x, int wedge_offset_y, - int h, int w, int bd) { + uint8_t *dst_8, int dst_stride, const uint8_t *src0_8, int src0_stride, + const uint8_t *src1_8, int src1_stride, int wedge_index, int wedge_sign, + BLOCK_SIZE sb_type, int wedge_offset_x, int wedge_offset_y, int h, int w, + int bd) { const int subh = (2 << b_height_log2_lookup[sb_type]) == h; const int subw = (2 << b_width_log2_lookup[sb_type]) == w; - const uint8_t *mask = vp10_get_soft_mask( - wedge_index, wedge_sign, sb_type, wedge_offset_x, wedge_offset_y); - vpx_highbd_blend_a64_mask(dst_8, dst_stride, - src0_8, src0_stride, - src1_8, src1_stride, - mask, MASK_MASTER_STRIDE, - h, w, subh, subw, bd); + const uint8_t *mask = vp10_get_soft_mask(wedge_index, wedge_sign, sb_type, + wedge_offset_x, wedge_offset_y); + vpx_highbd_blend_a64_mask(dst_8, dst_stride, src0_8, src0_stride, src1_8, + src1_stride, mask, MASK_MASTER_STRIDE, h, w, subh, + subw, bd); } #endif // CONFIG_VP9_HIGHBITDEPTH #endif // CONFIG_SUPERTX -static void build_masked_compound_wedge( - uint8_t *dst, int dst_stride, - const uint8_t *src0, int src0_stride, - const uint8_t *src1, int src1_stride, - int wedge_index, int wedge_sign, - BLOCK_SIZE sb_type, - int h, int w) { +static void build_masked_compound_wedge(uint8_t *dst, int dst_stride, + const uint8_t *src0, int src0_stride, + const uint8_t *src1, int src1_stride, + int wedge_index, int wedge_sign, + BLOCK_SIZE sb_type, int h, int w) { // Derive subsampling from h and w passed in. May be refactored to // pass in subsampling factors directly. const int subh = (2 << b_height_log2_lookup[sb_type]) == h; const int subw = (2 << b_width_log2_lookup[sb_type]) == w; - const uint8_t *mask = vp10_get_contiguous_soft_mask(wedge_index, wedge_sign, - sb_type); - vpx_blend_a64_mask(dst, dst_stride, - src0, src0_stride, - src1, src1_stride, - mask, 4 * num_4x4_blocks_wide_lookup[sb_type], - h, w, subh, subw); + const uint8_t *mask = + vp10_get_contiguous_soft_mask(wedge_index, wedge_sign, sb_type); + vpx_blend_a64_mask(dst, dst_stride, src0, src0_stride, src1, src1_stride, + mask, 4 * num_4x4_blocks_wide_lookup[sb_type], h, w, subh, + subw); } #if CONFIG_VP9_HIGHBITDEPTH static void build_masked_compound_wedge_highbd( - uint8_t *dst_8, int dst_stride, - const uint8_t *src0_8, int src0_stride, - const uint8_t *src1_8, int src1_stride, - int wedge_index, int wedge_sign, - BLOCK_SIZE sb_type, - int h, int w, int bd) { + uint8_t *dst_8, int dst_stride, const uint8_t *src0_8, int src0_stride, + const uint8_t *src1_8, int src1_stride, int wedge_index, int wedge_sign, + BLOCK_SIZE sb_type, int h, int w, int bd) { // Derive subsampling from h and w passed in. May be refactored to // pass in subsampling factors directly. const int subh = (2 << b_height_log2_lookup[sb_type]) == h; const int subw = (2 << b_width_log2_lookup[sb_type]) == w; - const uint8_t *mask = vp10_get_contiguous_soft_mask(wedge_index, wedge_sign, - sb_type); - vpx_highbd_blend_a64_mask(dst_8, dst_stride, - src0_8, src0_stride, - src1_8, src1_stride, - mask, 4 * num_4x4_blocks_wide_lookup[sb_type], - h, w, subh, subw, bd); + const uint8_t *mask = + vp10_get_contiguous_soft_mask(wedge_index, wedge_sign, sb_type); + vpx_highbd_blend_a64_mask( + dst_8, dst_stride, src0_8, src0_stride, src1_8, src1_stride, mask, + 4 * num_4x4_blocks_wide_lookup[sb_type], h, w, subh, subw, bd); } #endif // CONFIG_VP9_HIGHBITDEPTH -void vp10_make_masked_inter_predictor( - const uint8_t *pre, - int pre_stride, - uint8_t *dst, - int dst_stride, - const int subpel_x, - const int subpel_y, - const struct scale_factors *sf, - int w, int h, +void vp10_make_masked_inter_predictor(const uint8_t *pre, int pre_stride, + uint8_t *dst, int dst_stride, + const int subpel_x, const int subpel_y, + const struct scale_factors *sf, int w, + int h, #if CONFIG_DUAL_FILTER - const INTERP_FILTER *interp_filter, + const INTERP_FILTER *interp_filter, #else - const INTERP_FILTER interp_filter, + const INTERP_FILTER interp_filter, #endif - int xs, int ys, + int xs, int ys, #if CONFIG_SUPERTX - int wedge_offset_x, int wedge_offset_y, + int wedge_offset_x, int wedge_offset_y, #endif // CONFIG_SUPERTX - const MACROBLOCKD *xd) { + const MACROBLOCKD *xd) { const MODE_INFO *mi = xd->mi[0]; - // The prediction filter types used here should be those for - // the second reference block. +// The prediction filter types used here should be those for +// the second reference block. #if CONFIG_DUAL_FILTER INTERP_FILTER tmp_ipf[4] = { interp_filter[2], interp_filter[3], interp_filter[2], interp_filter[3], @@ -565,89 +438,63 @@ void vp10_make_masked_inter_predictor( #endif // CONFIG_DUAL_FILTER #if CONFIG_VP9_HIGHBITDEPTH DECLARE_ALIGNED(16, uint8_t, tmp_dst_[2 * MAX_SB_SQUARE]); - uint8_t *tmp_dst = - (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? - CONVERT_TO_BYTEPTR(tmp_dst_) : tmp_dst_; - vp10_make_inter_predictor(pre, pre_stride, tmp_dst, MAX_SB_SIZE, - subpel_x, subpel_y, sf, w, h, 0, - tmp_ipf, xs, ys, xd); + uint8_t *tmp_dst = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) + ? CONVERT_TO_BYTEPTR(tmp_dst_) + : tmp_dst_; + vp10_make_inter_predictor(pre, pre_stride, tmp_dst, MAX_SB_SIZE, subpel_x, + subpel_y, sf, w, h, 0, tmp_ipf, xs, ys, xd); #if CONFIG_SUPERTX if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) build_masked_compound_wedge_extend_highbd( - dst, dst_stride, - dst, dst_stride, - tmp_dst, MAX_SB_SIZE, - mi->mbmi.interinter_wedge_index, - mi->mbmi.interinter_wedge_sign, - mi->mbmi.sb_type, - wedge_offset_x, wedge_offset_y, h, w, xd->bd); + dst, dst_stride, dst, dst_stride, tmp_dst, MAX_SB_SIZE, + mi->mbmi.interinter_wedge_index, mi->mbmi.interinter_wedge_sign, + mi->mbmi.sb_type, wedge_offset_x, wedge_offset_y, h, w, xd->bd); else build_masked_compound_wedge_extend( - dst, dst_stride, - dst, dst_stride, - tmp_dst, MAX_SB_SIZE, - mi->mbmi.interinter_wedge_index, - mi->mbmi.interinter_wedge_sign, - mi->mbmi.sb_type, - wedge_offset_x, wedge_offset_y, h, w); + dst, dst_stride, dst, dst_stride, tmp_dst, MAX_SB_SIZE, + mi->mbmi.interinter_wedge_index, mi->mbmi.interinter_wedge_sign, + mi->mbmi.sb_type, wedge_offset_x, wedge_offset_y, h, w); #else if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) build_masked_compound_wedge_highbd( - dst, dst_stride, - dst, dst_stride, - tmp_dst, MAX_SB_SIZE, - mi->mbmi.interinter_wedge_index, - mi->mbmi.interinter_wedge_sign, + dst, dst_stride, dst, dst_stride, tmp_dst, MAX_SB_SIZE, + mi->mbmi.interinter_wedge_index, mi->mbmi.interinter_wedge_sign, mi->mbmi.sb_type, h, w, xd->bd); else - build_masked_compound_wedge( - dst, dst_stride, - dst, dst_stride, - tmp_dst, MAX_SB_SIZE, - mi->mbmi.interinter_wedge_index, - mi->mbmi.interinter_wedge_sign, - mi->mbmi.sb_type, h, w); + build_masked_compound_wedge(dst, dst_stride, dst, dst_stride, tmp_dst, + MAX_SB_SIZE, mi->mbmi.interinter_wedge_index, + mi->mbmi.interinter_wedge_sign, + mi->mbmi.sb_type, h, w); #endif // CONFIG_SUPERTX #else // CONFIG_VP9_HIGHBITDEPTH DECLARE_ALIGNED(16, uint8_t, tmp_dst[MAX_SB_SQUARE]); - vp10_make_inter_predictor(pre, pre_stride, tmp_dst, MAX_SB_SIZE, - subpel_x, subpel_y, sf, w, h, 0, - tmp_ipf, xs, ys, xd); + vp10_make_inter_predictor(pre, pre_stride, tmp_dst, MAX_SB_SIZE, subpel_x, + subpel_y, sf, w, h, 0, tmp_ipf, xs, ys, xd); #if CONFIG_SUPERTX build_masked_compound_wedge_extend( - dst, dst_stride, - dst, dst_stride, - tmp_dst, MAX_SB_SIZE, - mi->mbmi.interinter_wedge_index, - mi->mbmi.interinter_wedge_sign, - mi->mbmi.sb_type, - wedge_offset_x, wedge_offset_y, h, w); + dst, dst_stride, dst, dst_stride, tmp_dst, MAX_SB_SIZE, + mi->mbmi.interinter_wedge_index, mi->mbmi.interinter_wedge_sign, + mi->mbmi.sb_type, wedge_offset_x, wedge_offset_y, h, w); #else - build_masked_compound_wedge( - dst, dst_stride, - dst, dst_stride, - tmp_dst, MAX_SB_SIZE, - mi->mbmi.interinter_wedge_index, - mi->mbmi.interinter_wedge_sign, - mi->mbmi.sb_type, h, w); + build_masked_compound_wedge(dst, dst_stride, dst, dst_stride, tmp_dst, + MAX_SB_SIZE, mi->mbmi.interinter_wedge_index, + mi->mbmi.interinter_wedge_sign, mi->mbmi.sb_type, + h, w); #endif // CONFIG_SUPERTX #endif // CONFIG_VP9_HIGHBITDEPTH } #endif // CONFIG_EXT_INTER #if CONFIG_VP9_HIGHBITDEPTH -void vp10_highbd_build_inter_predictor(const uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride, - const MV *src_mv, - const struct scale_factors *sf, - int w, int h, int ref, +void vp10_highbd_build_inter_predictor( + const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, + const MV *src_mv, const struct scale_factors *sf, int w, int h, int ref, #if CONFIG_DUAL_FILTER - const INTERP_FILTER *interp_filter, + const INTERP_FILTER *interp_filter, #else - const INTERP_FILTER interp_filter, + const INTERP_FILTER interp_filter, #endif - enum mv_precision precision, - int x, int y, int bd) { + enum mv_precision precision, int x, int y, int bd) { const int is_q4 = precision == MV_PRECISION_Q4; const MV mv_q4 = { is_q4 ? src_mv->row : src_mv->row * 2, is_q4 ? src_mv->col : src_mv->col * 2 }; @@ -664,17 +511,15 @@ void vp10_highbd_build_inter_predictor(const uint8_t *src, int src_stride, #endif // CONFIG_VP9_HIGHBITDEPTH void vp10_build_inter_predictor(const uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride, - const MV *src_mv, - const struct scale_factors *sf, - int w, int h, int ref, + uint8_t *dst, int dst_stride, const MV *src_mv, + const struct scale_factors *sf, int w, int h, + int ref, #if CONFIG_DUAL_FILTER - const INTERP_FILTER *interp_filter, + const INTERP_FILTER *interp_filter, #else - const INTERP_FILTER interp_filter, + const INTERP_FILTER interp_filter, #endif - enum mv_precision precision, - int x, int y) { + enum mv_precision precision, int x, int y) { const int is_q4 = precision == MV_PRECISION_Q4; const MV mv_q4 = { is_q4 ? src_mv->row : src_mv->row * 2, is_q4 ? src_mv->col : src_mv->col * 2 }; @@ -684,17 +529,16 @@ void vp10_build_inter_predictor(const uint8_t *src, int src_stride, src += (mv.row >> SUBPEL_BITS) * src_stride + (mv.col >> SUBPEL_BITS); - inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y, - sf, w, h, ref, interp_filter, sf->x_step_q4, sf->y_step_q4); + inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y, sf, w, + h, ref, interp_filter, sf->x_step_q4, sf->y_step_q4); } void build_inter_predictors(MACROBLOCKD *xd, int plane, #if CONFIG_OBMC int mi_col_offset, int mi_row_offset, #endif // CONFIG_OBMC - int block, - int bw, int bh, - int x, int y, int w, int h, + int block, int bw, int bh, int x, int y, int w, + int h, #if CONFIG_SUPERTX && CONFIG_EXT_INTER int wedge_offset_x, int wedge_offset_y, #endif // CONFIG_SUPERTX && CONFIG_EXT_INTER @@ -712,12 +556,12 @@ void build_inter_predictors(MACROBLOCKD *xd, int plane, int is_global[2]; for (ref = 0; ref < 1 + is_compound; ++ref) { gm[ref] = &xd->global_motion[mi->mbmi.ref_frame[ref]]; - is_global[ref] = (get_y_mode(mi, block) == ZEROMV && - get_gmtype(gm[ref]) > GLOBAL_ZERO); + is_global[ref] = + (get_y_mode(mi, block) == ZEROMV && get_gmtype(gm[ref]) > GLOBAL_ZERO); } // TODO(sarahparker) remove these once gm works with all experiments - (void) gm; - (void) is_global; + (void)gm; + (void)is_global; #endif // CONFIG_GLOBAL_MOTION // TODO(sarahparker) enable the use of DUAL_FILTER in warped motion functions @@ -809,17 +653,16 @@ void build_inter_predictors(MACROBLOCKD *xd, int plane, struct buf_2d *const dst_buf = &pd->dst; uint8_t *const dst = dst_buf->buf + dst_buf->stride * y + x; const MV mv = mi->mbmi.sb_type < BLOCK_8X8 - ? average_split_mvs(pd, mi, ref, block) - : mi->mbmi.mv[ref].as_mv; + ? average_split_mvs(pd, mi, ref, block) + : mi->mbmi.mv[ref].as_mv; // TODO(jkoleszar): This clamping is done in the incorrect place for the // scaling case. It needs to be done on the scaled MV, not the pre-scaling // MV. Note however that it performs the subsampling aware scaling so // that the result is always q4. // mv_precision precision is MV_PRECISION_Q4. - const MV mv_q4 = clamp_mv_to_umv_border_sb(xd, &mv, bw, bh, - pd->subsampling_x, - pd->subsampling_y); + const MV mv_q4 = clamp_mv_to_umv_border_sb( + xd, &mv, bw, bh, pd->subsampling_x, pd->subsampling_y); uint8_t *pre; MV32 scaled_mv; @@ -840,20 +683,19 @@ void build_inter_predictors(MACROBLOCKD *xd, int plane, subpel_x = scaled_mv.col & SUBPEL_MASK; subpel_y = scaled_mv.row & SUBPEL_MASK; - pre += (scaled_mv.row >> SUBPEL_BITS) * pre_buf->stride - + (scaled_mv.col >> SUBPEL_BITS); + pre += (scaled_mv.row >> SUBPEL_BITS) * pre_buf->stride + + (scaled_mv.col >> SUBPEL_BITS); #if CONFIG_EXT_INTER if (ref && is_interinter_wedge_used(mi->mbmi.sb_type) && mi->mbmi.use_wedge_interinter) - vp10_make_masked_inter_predictor( - pre, pre_buf->stride, dst, dst_buf->stride, - subpel_x, subpel_y, sf, w, h, - mi->mbmi.interp_filter, xs, ys, + vp10_make_masked_inter_predictor(pre, pre_buf->stride, dst, + dst_buf->stride, subpel_x, subpel_y, sf, + w, h, mi->mbmi.interp_filter, xs, ys, #if CONFIG_SUPERTX - wedge_offset_x, wedge_offset_y, + wedge_offset_x, wedge_offset_y, #endif // CONFIG_SUPERTX - xd); + xd); else #else // CONFIG_EXT_INTER #if CONFIG_GLOBAL_MOTION @@ -875,9 +717,8 @@ void build_inter_predictors(MACROBLOCKD *xd, int plane, } } -void vp10_build_inter_predictor_sub8x8(MACROBLOCKD *xd, int plane, - int i, int ir, int ic, - int mi_row, int mi_col) { +void vp10_build_inter_predictor_sub8x8(MACROBLOCKD *xd, int plane, int i, + int ir, int ic, int mi_row, int mi_col) { struct macroblockd_plane *const pd = &xd->plane[plane]; MODE_INFO *const mi = xd->mi[0]; const BLOCK_SIZE plane_bsize = get_plane_block_size(mi->mbmi.sb_type, pd); @@ -892,32 +733,25 @@ void vp10_build_inter_predictor_sub8x8(MACROBLOCKD *xd, int plane, const uint8_t *pre = &pd->pre[ref].buf[(ir * pd->pre[ref].stride + ic) << 2]; #if CONFIG_VP9_HIGHBITDEPTH - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - vp10_highbd_build_inter_predictor(pre, pd->pre[ref].stride, - dst, pd->dst.stride, - &mi->bmi[i].as_mv[ref].as_mv, - &xd->block_refs[ref]->sf, width, height, - ref, mi->mbmi.interp_filter, - MV_PRECISION_Q3, - mi_col * MI_SIZE + 4 * ic, - mi_row * MI_SIZE + 4 * ir, xd->bd); - } else { - vp10_build_inter_predictor(pre, pd->pre[ref].stride, - dst, pd->dst.stride, - &mi->bmi[i].as_mv[ref].as_mv, - &xd->block_refs[ref]->sf, width, height, ref, - mi->mbmi.interp_filter, MV_PRECISION_Q3, - mi_col * MI_SIZE + 4 * ic, - mi_row * MI_SIZE + 4 * ir); - } + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + vp10_highbd_build_inter_predictor( + pre, pd->pre[ref].stride, dst, pd->dst.stride, + &mi->bmi[i].as_mv[ref].as_mv, &xd->block_refs[ref]->sf, width, height, + ref, mi->mbmi.interp_filter, MV_PRECISION_Q3, + mi_col * MI_SIZE + 4 * ic, mi_row * MI_SIZE + 4 * ir, xd->bd); + } else { + vp10_build_inter_predictor( + pre, pd->pre[ref].stride, dst, pd->dst.stride, + &mi->bmi[i].as_mv[ref].as_mv, &xd->block_refs[ref]->sf, width, height, + ref, mi->mbmi.interp_filter, MV_PRECISION_Q3, + mi_col * MI_SIZE + 4 * ic, mi_row * MI_SIZE + 4 * ir); + } #else - vp10_build_inter_predictor(pre, pd->pre[ref].stride, - dst, pd->dst.stride, - &mi->bmi[i].as_mv[ref].as_mv, - &xd->block_refs[ref]->sf, width, height, ref, - mi->mbmi.interp_filter, MV_PRECISION_Q3, - mi_col * MI_SIZE + 4 * ic, - mi_row * MI_SIZE + 4 * ir); + vp10_build_inter_predictor( + pre, pd->pre[ref].stride, dst, pd->dst.stride, + &mi->bmi[i].as_mv[ref].as_mv, &xd->block_refs[ref]->sf, width, height, + ref, mi->mbmi.interp_filter, MV_PRECISION_Q3, mi_col * MI_SIZE + 4 * ic, + mi_row * MI_SIZE + 4 * ir); #endif // CONFIG_VP9_HIGHBITDEPTH } } @@ -947,23 +781,21 @@ static void build_inter_predictors_for_planes(MACROBLOCKD *xd, BLOCK_SIZE bsize, assert(pw * num_4x4_w == bw && ph * num_4x4_h == bh); for (y = 0; y < num_4x4_h; ++y) for (x = 0; x < num_4x4_w; ++x) - build_inter_predictors(xd, plane, + build_inter_predictors(xd, plane, #if CONFIG_OBMC - 0, 0, + 0, 0, #endif // CONFIG_OBMC - y * 2 + x, bw, bh, - 4 * x, 4 * y, pw, ph, + y * 2 + x, bw, bh, 4 * x, 4 * y, pw, ph, #if CONFIG_SUPERTX && CONFIG_EXT_INTER - 0, 0, + 0, 0, #endif // CONFIG_SUPERTX && CONFIG_EXT_INTER - mi_x, mi_y); + mi_x, mi_y); } else { build_inter_predictors(xd, plane, #if CONFIG_OBMC 0, 0, #endif // CONFIG_OBMC - 0, bw, bh, - 0, 0, bw, bh, + 0, bw, bh, 0, 0, bw, bh, #if CONFIG_SUPERTX && CONFIG_EXT_INTER 0, 0, #endif // CONFIG_SUPERTX && CONFIG_EXT_INTER @@ -973,14 +805,12 @@ static void build_inter_predictors_for_planes(MACROBLOCKD *xd, BLOCK_SIZE bsize, } void vp10_build_inter_predictors_sby(MACROBLOCKD *xd, int mi_row, int mi_col, - BLOCK_SIZE bsize) { + BLOCK_SIZE bsize) { build_inter_predictors_for_planes(xd, bsize, mi_row, mi_col, 0, 0); #if CONFIG_EXT_INTER if (is_interintra_pred(&xd->mi[0]->mbmi)) - vp10_build_interintra_predictors_sby(xd, - xd->plane[0].dst.buf, - xd->plane[0].dst.stride, - bsize); + vp10_build_interintra_predictors_sby(xd, xd->plane[0].dst.buf, + xd->plane[0].dst.stride, bsize); #endif // CONFIG_EXT_INTER } @@ -990,15 +820,12 @@ void vp10_build_inter_predictors_sbp(MACROBLOCKD *xd, int mi_row, int mi_col, #if CONFIG_EXT_INTER if (is_interintra_pred(&xd->mi[0]->mbmi)) { if (plane == 0) { - vp10_build_interintra_predictors_sby(xd, - xd->plane[0].dst.buf, - xd->plane[0].dst.stride, - bsize); + vp10_build_interintra_predictors_sby(xd, xd->plane[0].dst.buf, + xd->plane[0].dst.stride, bsize); } else { - vp10_build_interintra_predictors_sbc(xd, - xd->plane[plane].dst.buf, - xd->plane[plane].dst.stride, - plane, bsize); + vp10_build_interintra_predictors_sbc(xd, xd->plane[plane].dst.buf, + xd->plane[plane].dst.stride, plane, + bsize); } } #endif // CONFIG_EXT_INTER @@ -1010,124 +837,103 @@ void vp10_build_inter_predictors_sbuv(MACROBLOCKD *xd, int mi_row, int mi_col, MAX_MB_PLANE - 1); #if CONFIG_EXT_INTER if (is_interintra_pred(&xd->mi[0]->mbmi)) - vp10_build_interintra_predictors_sbuv(xd, - xd->plane[1].dst.buf, - xd->plane[2].dst.buf, - xd->plane[1].dst.stride, - xd->plane[2].dst.stride, - bsize); + vp10_build_interintra_predictors_sbuv( + xd, xd->plane[1].dst.buf, xd->plane[2].dst.buf, xd->plane[1].dst.stride, + xd->plane[2].dst.stride, bsize); #endif // CONFIG_EXT_INTER } void vp10_build_inter_predictors_sb(MACROBLOCKD *xd, int mi_row, int mi_col, - BLOCK_SIZE bsize) { + BLOCK_SIZE bsize) { build_inter_predictors_for_planes(xd, bsize, mi_row, mi_col, 0, MAX_MB_PLANE - 1); #if CONFIG_EXT_INTER if (is_interintra_pred(&xd->mi[0]->mbmi)) - vp10_build_interintra_predictors(xd, - xd->plane[0].dst.buf, - xd->plane[1].dst.buf, - xd->plane[2].dst.buf, - xd->plane[0].dst.stride, - xd->plane[1].dst.stride, - xd->plane[2].dst.stride, - bsize); + vp10_build_interintra_predictors( + xd, xd->plane[0].dst.buf, xd->plane[1].dst.buf, xd->plane[2].dst.buf, + xd->plane[0].dst.stride, xd->plane[1].dst.stride, + xd->plane[2].dst.stride, bsize); #endif // CONFIG_EXT_INTER } void vp10_setup_dst_planes(struct macroblockd_plane planes[MAX_MB_PLANE], - const YV12_BUFFER_CONFIG *src, - int mi_row, int mi_col) { + const YV12_BUFFER_CONFIG *src, int mi_row, + int mi_col) { uint8_t *const buffers[MAX_MB_PLANE] = { src->y_buffer, src->u_buffer, - src->v_buffer}; + src->v_buffer }; const int widths[MAX_MB_PLANE] = { src->y_crop_width, src->uv_crop_width, - src->uv_crop_width}; + src->uv_crop_width }; const int heights[MAX_MB_PLANE] = { src->y_crop_height, src->uv_crop_height, - src->uv_crop_height}; + src->uv_crop_height }; const int strides[MAX_MB_PLANE] = { src->y_stride, src->uv_stride, - src->uv_stride}; + src->uv_stride }; int i; for (i = 0; i < MAX_MB_PLANE; ++i) { struct macroblockd_plane *const pd = &planes[i]; - setup_pred_plane(&pd->dst, buffers[i], widths[i], - heights[i], strides[i], mi_row, mi_col, NULL, - pd->subsampling_x, pd->subsampling_y); + setup_pred_plane(&pd->dst, buffers[i], widths[i], heights[i], strides[i], + mi_row, mi_col, NULL, pd->subsampling_x, + pd->subsampling_y); } } void vp10_setup_pre_planes(MACROBLOCKD *xd, int idx, - const YV12_BUFFER_CONFIG *src, - int mi_row, int mi_col, - const struct scale_factors *sf) { + const YV12_BUFFER_CONFIG *src, int mi_row, + int mi_col, const struct scale_factors *sf) { if (src != NULL) { int i; uint8_t *const buffers[MAX_MB_PLANE] = { src->y_buffer, src->u_buffer, - src->v_buffer}; + src->v_buffer }; const int widths[MAX_MB_PLANE] = { src->y_crop_width, src->uv_crop_width, - src->uv_crop_width}; + src->uv_crop_width }; const int heights[MAX_MB_PLANE] = { src->y_crop_height, src->uv_crop_height, - src->uv_crop_height}; + src->uv_crop_height }; const int strides[MAX_MB_PLANE] = { src->y_stride, src->uv_stride, - src->uv_stride}; + src->uv_stride }; for (i = 0; i < MAX_MB_PLANE; ++i) { struct macroblockd_plane *const pd = &xd->plane[i]; - setup_pred_plane(&pd->pre[idx], buffers[i], widths[i], - heights[i], strides[i], mi_row, mi_col, - sf, pd->subsampling_x, pd->subsampling_y); + setup_pred_plane(&pd->pre[idx], buffers[i], widths[i], heights[i], + strides[i], mi_row, mi_col, sf, pd->subsampling_x, + pd->subsampling_y); } } } #if CONFIG_SUPERTX -static const uint8_t mask_8[8] = { - 64, 64, 62, 52, 12, 2, 0, 0 -}; +static const uint8_t mask_8[8] = { 64, 64, 62, 52, 12, 2, 0, 0 }; -static const uint8_t mask_16[16] = { - 63, 62, 60, 58, 55, 50, 43, 36, 28, 21, 14, 9, 6, 4, 2, 1 -}; +static const uint8_t mask_16[16] = { 63, 62, 60, 58, 55, 50, 43, 36, + 28, 21, 14, 9, 6, 4, 2, 1 }; -static const uint8_t mask_32[32] = { - 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 63, 61, 57, 52, 45, 36, - 28, 19, 12, 7, 3, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 -}; +static const uint8_t mask_32[32] = { 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 63, + 61, 57, 52, 45, 36, 28, 19, 12, 7, 3, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; -static const uint8_t mask_8_uv[8] = { - 64, 64, 62, 52, 12, 2, 0, 0 -}; +static const uint8_t mask_8_uv[8] = { 64, 64, 62, 52, 12, 2, 0, 0 }; -static const uint8_t mask_16_uv[16] = { - 64, 64, 64, 64, 61, 53, 45, 36, 28, 19, 11, 3, 0, 0, 0, 0 -}; +static const uint8_t mask_16_uv[16] = { 64, 64, 64, 64, 61, 53, 45, 36, + 28, 19, 11, 3, 0, 0, 0, 0 }; -static const uint8_t mask_32_uv[32] = { - 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 60, 54, 46, 36, - 28, 18, 10, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 -}; +static const uint8_t mask_32_uv[32] = { 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 60, 54, 46, 36, + 28, 18, 10, 4, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 }; -static const uint8_t* get_supertx_mask(int length, int plane) { +static const uint8_t *get_supertx_mask(int length, int plane) { switch (length) { - case 8: - return plane ? mask_8_uv : mask_8; - case 16: - return plane ? mask_16_uv : mask_16; - case 32: - return plane ? mask_32_uv : mask_32; - default: - assert(0); + case 8: return plane ? mask_8_uv : mask_8; + case 16: return plane ? mask_16_uv : mask_16; + case 32: return plane ? mask_32_uv : mask_32; + default: assert(0); } return NULL; } void vp10_build_masked_inter_predictor_complex( - MACROBLOCKD *xd, - uint8_t *dst, int dst_stride, - const uint8_t *pre, int pre_stride, - int mi_row, int mi_col, - int mi_row_ori, int mi_col_ori, BLOCK_SIZE bsize, BLOCK_SIZE top_bsize, - PARTITION_TYPE partition, int plane) { + MACROBLOCKD *xd, uint8_t *dst, int dst_stride, const uint8_t *pre, + int pre_stride, int mi_row, int mi_col, int mi_row_ori, int mi_col_ori, + BLOCK_SIZE bsize, BLOCK_SIZE top_bsize, PARTITION_TYPE partition, + int plane) { const struct macroblockd_plane *pd = &xd->plane[plane]; const int ssx = pd->subsampling_x; const int ssy = pd->subsampling_y; @@ -1159,15 +965,11 @@ void vp10_build_masked_inter_predictor_complex( #if CONFIG_VP9_HIGHBITDEPTH if (is_hdb) - vpx_highbd_blend_a64_vmask(dst, dst_stride, - dst, dst_stride, - pre, pre_stride, - mask, h, top_w, xd->bd); + vpx_highbd_blend_a64_vmask(dst, dst_stride, dst, dst_stride, pre, + pre_stride, mask, h, top_w, xd->bd); else #endif // CONFIG_VP9_HIGHBITDEPTH - vpx_blend_a64_vmask(dst, dst_stride, - dst, dst_stride, - pre, pre_stride, + vpx_blend_a64_vmask(dst, dst_stride, dst, dst_stride, pre, pre_stride, mask, h, top_w); dst += h * dst_stride; @@ -1184,15 +986,11 @@ void vp10_build_masked_inter_predictor_complex( #if CONFIG_VP9_HIGHBITDEPTH if (is_hdb) - vpx_highbd_blend_a64_hmask(dst, dst_stride, - dst, dst_stride, - pre, pre_stride, - mask, top_h, w, xd->bd); + vpx_highbd_blend_a64_hmask(dst, dst_stride, dst, dst_stride, pre, + pre_stride, mask, top_h, w, xd->bd); else #endif // CONFIG_VP9_HIGHBITDEPTH - vpx_blend_a64_hmask(dst, dst_stride, - dst, dst_stride, - pre, pre_stride, + vpx_blend_a64_hmask(dst, dst_stride, dst, dst_stride, pre, pre_stride, mask, top_h, w); dst += w; @@ -1211,8 +1009,8 @@ void vp10_build_masked_inter_predictor_complex( #if CONFIG_VP9_HIGHBITDEPTH if (is_hdb) { - dst = (uint8_t*)CONVERT_TO_SHORTPTR(dst); - pre = (const uint8_t*)CONVERT_TO_SHORTPTR(pre); + dst = (uint8_t *)CONVERT_TO_SHORTPTR(dst); + pre = (const uint8_t *)CONVERT_TO_SHORTPTR(pre); dst_stride *= 2; pre_stride *= 2; w_remain *= 2; @@ -1226,13 +1024,13 @@ void vp10_build_masked_inter_predictor_complex( } while (--h_remain); } -void vp10_build_inter_predictors_sb_sub8x8_extend( - MACROBLOCKD *xd, +void vp10_build_inter_predictors_sb_sub8x8_extend(MACROBLOCKD *xd, #if CONFIG_EXT_INTER - int mi_row_ori, int mi_col_ori, + int mi_row_ori, + int mi_col_ori, #endif // CONFIG_EXT_INTER - int mi_row, int mi_col, - BLOCK_SIZE bsize, int block) { + int mi_row, int mi_col, + BLOCK_SIZE bsize, int block) { // Prediction function used in supertx: // Use the mv at current block (which is less than 8x8) // to get prediction of a block located at (mi_row, mi_col) at size of bsize @@ -1251,8 +1049,8 @@ void vp10_build_inter_predictors_sb_sub8x8_extend( int max_plane = block ? 1 : MAX_MB_PLANE; for (plane = 0; plane < max_plane; plane++) { - const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, - &xd->plane[plane]); + const BLOCK_SIZE plane_bsize = + get_plane_block_size(bsize, &xd->plane[plane]); const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize]; const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize]; const int bw = 4 * num_4x4_w; @@ -1262,24 +1060,18 @@ void vp10_build_inter_predictors_sb_sub8x8_extend( #if CONFIG_OBMC 0, 0, #endif // CONFIG_OBMC - block, bw, bh, - 0, 0, bw, bh, + block, bw, bh, 0, 0, bw, bh, #if CONFIG_EXT_INTER - wedge_offset_x, - wedge_offset_y, + wedge_offset_x, wedge_offset_y, #endif // CONFIG_EXT_INTER mi_x, mi_y); } #if CONFIG_EXT_INTER if (is_interintra_pred(&xd->mi[0]->mbmi)) - vp10_build_interintra_predictors(xd, - xd->plane[0].dst.buf, - xd->plane[1].dst.buf, - xd->plane[2].dst.buf, - xd->plane[0].dst.stride, - xd->plane[1].dst.stride, - xd->plane[2].dst.stride, - bsize); + vp10_build_interintra_predictors( + xd, xd->plane[0].dst.buf, xd->plane[1].dst.buf, xd->plane[2].dst.buf, + xd->plane[0].dst.stride, xd->plane[1].dst.stride, + xd->plane[2].dst.stride, bsize); #endif // CONFIG_EXT_INTER } @@ -1297,8 +1089,8 @@ void vp10_build_inter_predictors_sb_extend(MACROBLOCKD *xd, const int wedge_offset_y = (mi_row_ori - mi_row) * MI_SIZE; #endif // CONFIG_EXT_INTER for (plane = 0; plane < MAX_MB_PLANE; ++plane) { - const BLOCK_SIZE plane_bsize = get_plane_block_size( - bsize, &xd->plane[plane]); + const BLOCK_SIZE plane_bsize = + get_plane_block_size(bsize, &xd->plane[plane]); const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize]; const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize]; const int bw = 4 * num_4x4_w; @@ -1309,29 +1101,25 @@ void vp10_build_inter_predictors_sb_extend(MACROBLOCKD *xd, assert(bsize == BLOCK_8X8); for (y = 0; y < num_4x4_h; ++y) for (x = 0; x < num_4x4_w; ++x) - build_inter_predictors( - xd, plane, + build_inter_predictors(xd, plane, #if CONFIG_OBMC - 0, 0, + 0, 0, #endif // CONFIG_OBMC - y * 2 + x, bw, bh, 4 * x, 4 * y, 4, 4, + y * 2 + x, bw, bh, 4 * x, 4 * y, 4, 4, #if CONFIG_EXT_INTER - wedge_offset_x, - wedge_offset_y, + wedge_offset_x, wedge_offset_y, #endif // CONFIG_EXT_INTER - mi_x, mi_y); + mi_x, mi_y); } else { - build_inter_predictors( - xd, plane, + build_inter_predictors(xd, plane, #if CONFIG_OBMC - 0, 0, + 0, 0, #endif // CONFIG_OBMC - 0, bw, bh, 0, 0, bw, bh, + 0, bw, bh, 0, 0, bw, bh, #if CONFIG_EXT_INTER - wedge_offset_x, - wedge_offset_y, + wedge_offset_x, wedge_offset_y, #endif // CONFIG_EXT_INTER - mi_x, mi_y); + mi_x, mi_y); } } } @@ -1339,30 +1127,21 @@ void vp10_build_inter_predictors_sb_extend(MACROBLOCKD *xd, #if CONFIG_OBMC // obmc_mask_N[overlap_position] -static const uint8_t obmc_mask_1[1] = { - 55 -}; +static const uint8_t obmc_mask_1[1] = { 55 }; -static const uint8_t obmc_mask_2[2] = { - 45, 62 -}; +static const uint8_t obmc_mask_2[2] = { 45, 62 }; -static const uint8_t obmc_mask_4[4] = { - 39, 50, 59, 64 -}; +static const uint8_t obmc_mask_4[4] = { 39, 50, 59, 64 }; -static const uint8_t obmc_mask_8[8] = { - 36, 42, 48, 53, 57, 61, 63, 64 -}; +static const uint8_t obmc_mask_8[8] = { 36, 42, 48, 53, 57, 61, 63, 64 }; -static const uint8_t obmc_mask_16[16] = { - 34, 37, 40, 43, 46, 49, 52, 54, 56, 58, 60, 61, 63, 64, 64, 64 -}; +static const uint8_t obmc_mask_16[16] = { 34, 37, 40, 43, 46, 49, 52, 54, + 56, 58, 60, 61, 63, 64, 64, 64 }; -static const uint8_t obmc_mask_32[32] = { - 33, 35, 36, 38, 40, 41, 43, 44, 45, 47, 48, 50, 51, 52, 53, 55, - 56, 57, 58, 59, 60, 60, 61, 62, 62, 63, 63, 64, 64, 64, 64, 64 -}; +static const uint8_t obmc_mask_32[32] = { 33, 35, 36, 38, 40, 41, 43, 44, + 45, 47, 48, 50, 51, 52, 53, 55, + 56, 57, 58, 59, 60, 60, 61, 62, + 62, 63, 63, 64, 64, 64, 64, 64 }; #if CONFIG_EXT_PARTITION static const uint8_t obmc_mask_64[64] = { @@ -1373,28 +1152,18 @@ static const uint8_t obmc_mask_64[64] = { }; #endif // CONFIG_EXT_PARTITION - -const uint8_t* vp10_get_obmc_mask(int length) { +const uint8_t *vp10_get_obmc_mask(int length) { switch (length) { - case 1: - return obmc_mask_1; - case 2: - return obmc_mask_2; - case 4: - return obmc_mask_4; - case 8: - return obmc_mask_8; - case 16: - return obmc_mask_16; - case 32: - return obmc_mask_32; + case 1: return obmc_mask_1; + case 2: return obmc_mask_2; + case 4: return obmc_mask_4; + case 8: return obmc_mask_8; + case 16: return obmc_mask_16; + case 32: return obmc_mask_32; #if CONFIG_EXT_PARTITION - case 64: - return obmc_mask_64; + case 64: return obmc_mask_64; #endif // CONFIG_EXT_PARTITION - default: - assert(0); - return NULL; + default: assert(0); return NULL; } } @@ -1402,8 +1171,8 @@ const uint8_t* vp10_get_obmc_mask(int length) { // top/left neighboring blocks' inter predictors with the regular inter // prediction. We assume the original prediction (bmc) is stored in // xd->plane[].dst.buf -void vp10_build_obmc_inter_prediction(VP10_COMMON *cm, - MACROBLOCKD *xd, int mi_row, int mi_col, +void vp10_build_obmc_inter_prediction(VP10_COMMON *cm, MACROBLOCKD *xd, + int mi_row, int mi_col, uint8_t *above[MAX_MB_PLANE], int above_stride[MAX_MB_PLANE], uint8_t *left[MAX_MB_PLANE], @@ -1436,8 +1205,7 @@ void vp10_build_obmc_inter_prediction(VP10_COMMON *cm, const int bw = (mi_step * MI_SIZE) >> pd->subsampling_x; const int bh = overlap >> pd->subsampling_y; const int dst_stride = pd->dst.stride; - uint8_t *const dst = - &pd->dst.buf[(i * MI_SIZE) >> pd->subsampling_x]; + uint8_t *const dst = &pd->dst.buf[(i * MI_SIZE) >> pd->subsampling_x]; const int tmp_stride = above_stride[plane]; const uint8_t *const tmp = &above[plane][(i * MI_SIZE) >> pd->subsampling_x]; @@ -1445,12 +1213,12 @@ void vp10_build_obmc_inter_prediction(VP10_COMMON *cm, #if CONFIG_VP9_HIGHBITDEPTH if (is_hbd) - vpx_highbd_blend_a64_vmask(dst, dst_stride, dst, dst_stride, - tmp, tmp_stride, mask, bh, bw, xd->bd); + vpx_highbd_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp, + tmp_stride, mask, bh, bw, xd->bd); else #endif // CONFIG_VP9_HIGHBITDEPTH - vpx_blend_a64_vmask(dst, dst_stride, dst, dst_stride, - tmp, tmp_stride, mask, bh, bw); + vpx_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp, + tmp_stride, mask, bh, bw); } } i += mi_step; @@ -1488,12 +1256,12 @@ void vp10_build_obmc_inter_prediction(VP10_COMMON *cm, #if CONFIG_VP9_HIGHBITDEPTH if (is_hbd) - vpx_highbd_blend_a64_hmask(dst, dst_stride, dst, dst_stride, - tmp, tmp_stride, mask, bh, bw, xd->bd); + vpx_highbd_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp, + tmp_stride, mask, bh, bw, xd->bd); else #endif // CONFIG_VP9_HIGHBITDEPTH - vpx_blend_a64_hmask(dst, dst_stride, dst, dst_stride, - tmp, tmp_stride, mask, bh, bw); + vpx_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp, + tmp_stride, mask, bh, bw); } } i += mi_step; @@ -1514,8 +1282,7 @@ void modify_neighbor_predictor_for_obmc(MB_MODE_INFO *mbmi) { } #endif // CONFIG_EXT_INTER -void vp10_build_prediction_by_above_preds(VP10_COMMON *cm, - MACROBLOCKD *xd, +void vp10_build_prediction_by_above_preds(VP10_COMMON *cm, MACROBLOCKD *xd, int mi_row, int mi_col, uint8_t *tmp_buf[MAX_MB_PLANE], int tmp_width[MAX_MB_PLANE], @@ -1525,25 +1292,21 @@ void vp10_build_prediction_by_above_preds(VP10_COMMON *cm, BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type; int i, j, mi_step, ref; - if (mi_row <= tile->mi_row_start) - return; + if (mi_row <= tile->mi_row_start) return; for (i = 0; i < VPXMIN(xd->n8_w, cm->mi_cols - mi_col); i += mi_step) { int mi_row_offset = -1; int mi_col_offset = i; int mi_x, mi_y, bw, bh; - MODE_INFO *above_mi = xd->mi[mi_col_offset + - mi_row_offset * xd->mi_stride]; + MODE_INFO *above_mi = xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride]; MB_MODE_INFO *above_mbmi = &above_mi->mbmi; #if CONFIG_EXT_INTER MB_MODE_INFO backup_mbmi; #endif // CONFIG_EXT_INTER - mi_step = VPXMIN(xd->n8_w, - num_8x8_blocks_wide_lookup[above_mbmi->sb_type]); + mi_step = VPXMIN(xd->n8_w, num_8x8_blocks_wide_lookup[above_mbmi->sb_type]); - if (!is_neighbor_overlappable(above_mbmi)) - continue; + if (!is_neighbor_overlappable(above_mbmi)) continue; #if CONFIG_EXT_INTER backup_mbmi = *above_mbmi; @@ -1552,11 +1315,9 @@ void vp10_build_prediction_by_above_preds(VP10_COMMON *cm, for (j = 0; j < MAX_MB_PLANE; ++j) { struct macroblockd_plane *const pd = &xd->plane[j]; - setup_pred_plane(&pd->dst, - tmp_buf[j], tmp_width[j], - tmp_height[j], tmp_stride[j], - 0, i, NULL, - pd->subsampling_x, pd->subsampling_y); + setup_pred_plane(&pd->dst, tmp_buf[j], tmp_width[j], tmp_height[j], + tmp_stride[j], 0, i, NULL, pd->subsampling_x, + pd->subsampling_y); } for (ref = 0; ref < 1 + has_second_ref(above_mbmi); ++ref) { MV_REFERENCE_FRAME frame = above_mbmi->ref_frame[ref]; @@ -1570,7 +1331,7 @@ void vp10_build_prediction_by_above_preds(VP10_COMMON *cm, &ref_buf->sf); } - xd->mb_to_left_edge = -(((mi_col + i) * MI_SIZE) * 8); + xd->mb_to_left_edge = -(((mi_col + i) * MI_SIZE) * 8); mi_x = (mi_col + i) << MI_SIZE_LOG2; mi_y = mi_row << MI_SIZE_LOG2; @@ -1591,21 +1352,20 @@ void vp10_build_prediction_by_above_preds(VP10_COMMON *cm, for (y = 0; y < num_4x4_h; ++y) for (x = 0; x < num_4x4_w; ++x) { - if ((bp == PARTITION_HORZ || bp == PARTITION_SPLIT) - && y == 0 && !pd->subsampling_y) + if ((bp == PARTITION_HORZ || bp == PARTITION_SPLIT) && y == 0 && + !pd->subsampling_y) continue; build_inter_predictors(xd, j, mi_col_offset, mi_row_offset, - y * 2 + x, bw, bh, - 4 * x, 0, pw, bh, + y * 2 + x, bw, bh, 4 * x, 0, pw, bh, #if CONFIG_SUPERTX && CONFIG_EXT_INTER 0, 0, #endif // CONFIG_SUPERTX && CONFIG_EXT_INTER mi_x, mi_y); } } else { - build_inter_predictors(xd, j, mi_col_offset, mi_row_offset, - 0, bw, bh, 0, 0, bw, bh, + build_inter_predictors(xd, j, mi_col_offset, mi_row_offset, 0, bw, bh, + 0, 0, bw, bh, #if CONFIG_SUPERTX && CONFIG_EXT_INTER 0, 0, #endif // CONFIG_SUPERTX && CONFIG_EXT_INTER @@ -1616,11 +1376,10 @@ void vp10_build_prediction_by_above_preds(VP10_COMMON *cm, *above_mbmi = backup_mbmi; #endif // CONFIG_EXT_INTER } - xd->mb_to_left_edge = -((mi_col * MI_SIZE) * 8); + xd->mb_to_left_edge = -((mi_col * MI_SIZE) * 8); } -void vp10_build_prediction_by_left_preds(VP10_COMMON *cm, - MACROBLOCKD *xd, +void vp10_build_prediction_by_left_preds(VP10_COMMON *cm, MACROBLOCKD *xd, int mi_row, int mi_col, uint8_t *tmp_buf[MAX_MB_PLANE], int tmp_width[MAX_MB_PLANE], @@ -1630,25 +1389,21 @@ void vp10_build_prediction_by_left_preds(VP10_COMMON *cm, BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type; int i, j, mi_step, ref; - if (mi_col == 0 || (mi_col - 1 < tile->mi_col_start)) - return; + if (mi_col == 0 || (mi_col - 1 < tile->mi_col_start)) return; for (i = 0; i < VPXMIN(xd->n8_h, cm->mi_rows - mi_row); i += mi_step) { int mi_row_offset = i; int mi_col_offset = -1; int mi_x, mi_y, bw, bh; - MODE_INFO *left_mi = xd->mi[mi_col_offset + - mi_row_offset * xd->mi_stride]; + MODE_INFO *left_mi = xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride]; MB_MODE_INFO *left_mbmi = &left_mi->mbmi; #if CONFIG_EXT_INTER MB_MODE_INFO backup_mbmi; #endif // CONFIG_EXT_INTER - mi_step = VPXMIN(xd->n8_h, - num_8x8_blocks_high_lookup[left_mbmi->sb_type]); + mi_step = VPXMIN(xd->n8_h, num_8x8_blocks_high_lookup[left_mbmi->sb_type]); - if (!is_neighbor_overlappable(left_mbmi)) - continue; + if (!is_neighbor_overlappable(left_mbmi)) continue; #if CONFIG_EXT_INTER backup_mbmi = *left_mbmi; @@ -1657,11 +1412,9 @@ void vp10_build_prediction_by_left_preds(VP10_COMMON *cm, for (j = 0; j < MAX_MB_PLANE; ++j) { struct macroblockd_plane *const pd = &xd->plane[j]; - setup_pred_plane(&pd->dst, - tmp_buf[j], tmp_width[j], - tmp_height[j], tmp_stride[j], - i, 0, NULL, - pd->subsampling_x, pd->subsampling_y); + setup_pred_plane(&pd->dst, tmp_buf[j], tmp_width[j], tmp_height[j], + tmp_stride[j], i, 0, NULL, pd->subsampling_x, + pd->subsampling_y); } for (ref = 0; ref < 1 + has_second_ref(left_mbmi); ++ref) { MV_REFERENCE_FRAME frame = left_mbmi->ref_frame[ref]; @@ -1675,7 +1428,7 @@ void vp10_build_prediction_by_left_preds(VP10_COMMON *cm, &ref_buf->sf); } - xd->mb_to_top_edge = -(((mi_row + i) * MI_SIZE) * 8); + xd->mb_to_top_edge = -(((mi_row + i) * MI_SIZE) * 8); mi_x = mi_col << MI_SIZE_LOG2; mi_y = (mi_row + i) << MI_SIZE_LOG2; @@ -1696,21 +1449,20 @@ void vp10_build_prediction_by_left_preds(VP10_COMMON *cm, for (y = 0; y < num_4x4_h; ++y) for (x = 0; x < num_4x4_w; ++x) { - if ((bp == PARTITION_VERT || bp == PARTITION_SPLIT) - && x == 0 && !pd->subsampling_x) + if ((bp == PARTITION_VERT || bp == PARTITION_SPLIT) && x == 0 && + !pd->subsampling_x) continue; build_inter_predictors(xd, j, mi_col_offset, mi_row_offset, - y * 2 + x, bw, bh, - 0, 4 * y, bw, ph, + y * 2 + x, bw, bh, 0, 4 * y, bw, ph, #if CONFIG_SUPERTX && CONFIG_EXT_INTER 0, 0, #endif // CONFIG_SUPERTX && CONFIG_EXT_INTER mi_x, mi_y); } } else { - build_inter_predictors(xd, j, mi_col_offset, mi_row_offset, 0, - bw, bh, 0, 0, bw, bh, + build_inter_predictors(xd, j, mi_col_offset, mi_row_offset, 0, bw, bh, + 0, 0, bw, bh, #if CONFIG_SUPERTX && CONFIG_EXT_INTER 0, 0, #endif // CONFIG_SUPERTX && CONFIG_EXT_INTER @@ -1721,61 +1473,40 @@ void vp10_build_prediction_by_left_preds(VP10_COMMON *cm, *left_mbmi = backup_mbmi; #endif // CONFIG_EXT_INTER } - xd->mb_to_top_edge = -((mi_row * MI_SIZE) * 8); + xd->mb_to_top_edge = -((mi_row * MI_SIZE) * 8); } #endif // CONFIG_OBMC #if CONFIG_EXT_INTER #if CONFIG_EXT_PARTITION static const int ii_weights1d[MAX_SB_SIZE] = { - 102, 100, 97, 95, 92, 90, 88, 86, - 84, 82, 80, 78, 76, 74, 73, 71, - 69, 68, 67, 65, 64, 62, 61, 60, - 59, 58, 57, 55, 54, 53, 52, 52, - 51, 50, 49, 48, 47, 47, 46, 45, - 45, 44, 43, 43, 42, 41, 41, 40, - 40, 39, 39, 38, 38, 38, 37, 37, - 36, 36, 36, 35, 35, 35, 34, 34, - 34, 33, 33, 33, 33, 32, 32, 32, - 32, 32, 31, 31, 31, 31, 31, 30, - 30, 30, 30, 30, 30, 30, 29, 29, - 29, 29, 29, 29, 29, 29, 28, 28, - 28, 28, 28, 28, 28, 28, 28, 28, - 28, 28, 27, 27, 27, 27, 27, 27, - 27, 27, 27, 27, 27, 27, 27, 27, - 27, 27, 27, 27, 27, 27, 27, 27, -}; -static int ii_size_scales[BLOCK_SIZES] = { - 32, 16, 16, 16, 8, 8, 8, 4, 4, 4, 2, 2, 2, 1, 1, 1 + 102, 100, 97, 95, 92, 90, 88, 86, 84, 82, 80, 78, 76, 74, 73, 71, 69, 68, 67, + 65, 64, 62, 61, 60, 59, 58, 57, 55, 54, 53, 52, 52, 51, 50, 49, 48, 47, 47, + 46, 45, 45, 44, 43, 43, 42, 41, 41, 40, 40, 39, 39, 38, 38, 38, 37, 37, 36, + 36, 36, 35, 35, 35, 34, 34, 34, 33, 33, 33, 33, 32, 32, 32, 32, 32, 31, 31, + 31, 31, 31, 30, 30, 30, 30, 30, 30, 30, 29, 29, 29, 29, 29, 29, 29, 29, 28, + 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 27, 27, 27, 27, 27, 27, 27, 27, + 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, }; +static int ii_size_scales[BLOCK_SIZES] = { 32, 16, 16, 16, 8, 8, 8, 4, + 4, 4, 2, 2, 2, 1, 1, 1 }; #else static const int ii_weights1d[MAX_SB_SIZE] = { - 102, 100, 97, 95, 92, 90, 88, 86, - 84, 82, 80, 78, 76, 74, 73, 71, - 69, 68, 67, 65, 64, 62, 61, 60, - 59, 58, 57, 55, 54, 53, 52, 52, - 51, 50, 49, 48, 47, 47, 46, 45, - 45, 44, 43, 43, 42, 41, 41, 40, - 40, 39, 39, 38, 38, 38, 37, 37, - 36, 36, 36, 35, 35, 35, 34, 34, -}; -static int ii_size_scales[BLOCK_SIZES] = { - 16, 8, 8, 8, 4, 4, 4, 2, 2, 2, 1, 1, 1 + 102, 100, 97, 95, 92, 90, 88, 86, 84, 82, 80, 78, 76, 74, 73, 71, + 69, 68, 67, 65, 64, 62, 61, 60, 59, 58, 57, 55, 54, 53, 52, 52, + 51, 50, 49, 48, 47, 47, 46, 45, 45, 44, 43, 43, 42, 41, 41, 40, + 40, 39, 39, 38, 38, 38, 37, 37, 36, 36, 36, 35, 35, 35, 34, 34, }; +static int ii_size_scales[BLOCK_SIZES] = { 16, 8, 8, 8, 4, 4, 4, + 2, 2, 2, 1, 1, 1 }; #endif // CONFIG_EXT_PARTITION -static void combine_interintra(INTERINTRA_MODE mode, - int use_wedge_interintra, - int wedge_index, - int wedge_sign, - BLOCK_SIZE bsize, - BLOCK_SIZE plane_bsize, - uint8_t *comppred, - int compstride, - const uint8_t *interpred, - int interstride, - const uint8_t *intrapred, - int intrastride) { +static void combine_interintra(INTERINTRA_MODE mode, int use_wedge_interintra, + int wedge_index, int wedge_sign, + BLOCK_SIZE bsize, BLOCK_SIZE plane_bsize, + uint8_t *comppred, int compstride, + const uint8_t *interpred, int interstride, + const uint8_t *intrapred, int intrastride) { const int bw = 4 * num_4x4_blocks_wide_lookup[plane_bsize]; const int bh = 4 * num_4x4_blocks_high_lookup[plane_bsize]; const int size_scale = ii_size_scales[plane_bsize]; @@ -1783,16 +1514,13 @@ static void combine_interintra(INTERINTRA_MODE mode, if (use_wedge_interintra) { if (is_interintra_wedge_used(bsize)) { - const uint8_t *mask = vp10_get_contiguous_soft_mask(wedge_index, - wedge_sign, - bsize); + const uint8_t *mask = + vp10_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize); const int subw = 2 * num_4x4_blocks_wide_lookup[bsize] == bw; const int subh = 2 * num_4x4_blocks_high_lookup[bsize] == bh; - vpx_blend_a64_mask(comppred, compstride, - intrapred, intrastride, - interpred, interstride, - mask, 4 * num_4x4_blocks_wide_lookup[bsize], - bh, bw, subh, subw); + vpx_blend_a64_mask( + comppred, compstride, intrapred, intrastride, interpred, interstride, + mask, 4 * num_4x4_blocks_wide_lookup[bsize], bh, bw, subh, subw); } return; } @@ -1803,8 +1531,7 @@ static void combine_interintra(INTERINTRA_MODE mode, for (j = 0; j < bw; ++j) { int scale = ii_weights1d[i * size_scale]; comppred[i * compstride + j] = - VPX_BLEND_A256(scale, - intrapred[i * intrastride + j], + VPX_BLEND_A256(scale, intrapred[i * intrastride + j], interpred[i * interstride + j]); } } @@ -1815,8 +1542,7 @@ static void combine_interintra(INTERINTRA_MODE mode, for (j = 0; j < bw; ++j) { int scale = ii_weights1d[j * size_scale]; comppred[i * compstride + j] = - VPX_BLEND_A256(scale, - intrapred[i * intrastride + j], + VPX_BLEND_A256(scale, intrapred[i * intrastride + j], interpred[i * interstride + j]); } } @@ -1827,10 +1553,10 @@ static void combine_interintra(INTERINTRA_MODE mode, for (i = 0; i < bh; ++i) { for (j = 0; j < bw; ++j) { int scale = (ii_weights1d[i * size_scale] * 3 + - ii_weights1d[j * size_scale]) >> 2; + ii_weights1d[j * size_scale]) >> + 2; comppred[i * compstride + j] = - VPX_BLEND_A256(scale, - intrapred[i * intrastride + j], + VPX_BLEND_A256(scale, intrapred[i * intrastride + j], interpred[i * interstride + j]); } } @@ -1841,10 +1567,10 @@ static void combine_interintra(INTERINTRA_MODE mode, for (i = 0; i < bh; ++i) { for (j = 0; j < bw; ++j) { int scale = (ii_weights1d[j * size_scale] * 3 + - ii_weights1d[i * size_scale]) >> 2; + ii_weights1d[i * size_scale]) >> + 2; comppred[i * compstride + j] = - VPX_BLEND_A256(scale, - intrapred[i * intrastride + j], + VPX_BLEND_A256(scale, intrapred[i * intrastride + j], interpred[i * interstride + j]); } } @@ -1855,8 +1581,7 @@ static void combine_interintra(INTERINTRA_MODE mode, for (j = 0; j < bw; ++j) { int scale = ii_weights1d[(i < j ? i : j) * size_scale]; comppred[i * compstride + j] = - VPX_BLEND_A256(scale, - intrapred[i * intrastride + j], + VPX_BLEND_A256(scale, intrapred[i * intrastride + j], interpred[i * interstride + j]); } } @@ -1865,11 +1590,11 @@ static void combine_interintra(INTERINTRA_MODE mode, case II_D45_PRED: for (i = 0; i < bh; ++i) { for (j = 0; j < bw; ++j) { - int scale = (ii_weights1d[i * size_scale] + - ii_weights1d[j * size_scale]) >> 1; + int scale = + (ii_weights1d[i * size_scale] + ii_weights1d[j * size_scale]) >> + 1; comppred[i * compstride + j] = - VPX_BLEND_A256(scale, - intrapred[i * intrastride + j], + VPX_BLEND_A256(scale, intrapred[i * intrastride + j], interpred[i * interstride + j]); } } @@ -1880,9 +1605,8 @@ static void combine_interintra(INTERINTRA_MODE mode, default: for (i = 0; i < bh; ++i) { for (j = 0; j < bw; ++j) { - comppred[i * compstride + j] = - VPX_BLEND_AVG(intrapred[i * intrastride + j], - interpred[i * interstride + j]); + comppred[i * compstride + j] = VPX_BLEND_AVG( + intrapred[i * intrastride + j], interpred[i * interstride + j]); } } break; @@ -1890,18 +1614,11 @@ static void combine_interintra(INTERINTRA_MODE mode, } #if CONFIG_VP9_HIGHBITDEPTH -static void combine_interintra_highbd(INTERINTRA_MODE mode, - int use_wedge_interintra, - int wedge_index, - int wedge_sign, - BLOCK_SIZE bsize, - BLOCK_SIZE plane_bsize, - uint8_t *comppred8, - int compstride, - const uint8_t *interpred8, - int interstride, - const uint8_t *intrapred8, - int intrastride, int bd) { +static void combine_interintra_highbd( + INTERINTRA_MODE mode, int use_wedge_interintra, int wedge_index, + int wedge_sign, BLOCK_SIZE bsize, BLOCK_SIZE plane_bsize, + uint8_t *comppred8, int compstride, const uint8_t *interpred8, + int interstride, const uint8_t *intrapred8, int intrastride, int bd) { const int bw = 4 * num_4x4_blocks_wide_lookup[plane_bsize]; const int bh = 4 * num_4x4_blocks_high_lookup[plane_bsize]; const int size_scale = ii_size_scales[plane_bsize]; @@ -1913,16 +1630,13 @@ static void combine_interintra_highbd(INTERINTRA_MODE mode, if (use_wedge_interintra) { if (is_interintra_wedge_used(bsize)) { - const uint8_t *mask = vp10_get_contiguous_soft_mask(wedge_index, - wedge_sign, - bsize); + const uint8_t *mask = + vp10_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize); const int subh = 2 * num_4x4_blocks_high_lookup[bsize] == bh; const int subw = 2 * num_4x4_blocks_wide_lookup[bsize] == bw; - vpx_highbd_blend_a64_mask(comppred8, compstride, - intrapred8, intrastride, - interpred8, interstride, - mask, bw, - bh, bw, subh, subw, bd); + vpx_highbd_blend_a64_mask(comppred8, compstride, intrapred8, intrastride, + interpred8, interstride, mask, bw, bh, bw, subh, + subw, bd); } return; } @@ -1933,8 +1647,7 @@ static void combine_interintra_highbd(INTERINTRA_MODE mode, for (j = 0; j < bw; ++j) { int scale = ii_weights1d[i * size_scale]; comppred[i * compstride + j] = - VPX_BLEND_A256(scale, - intrapred[i * intrastride + j], + VPX_BLEND_A256(scale, intrapred[i * intrastride + j], interpred[i * interstride + j]); } } @@ -1945,8 +1658,7 @@ static void combine_interintra_highbd(INTERINTRA_MODE mode, for (j = 0; j < bw; ++j) { int scale = ii_weights1d[j * size_scale]; comppred[i * compstride + j] = - VPX_BLEND_A256(scale, - intrapred[i * intrastride + j], + VPX_BLEND_A256(scale, intrapred[i * intrastride + j], interpred[i * interstride + j]); } } @@ -1957,10 +1669,10 @@ static void combine_interintra_highbd(INTERINTRA_MODE mode, for (i = 0; i < bh; ++i) { for (j = 0; j < bw; ++j) { int scale = (ii_weights1d[i * size_scale] * 3 + - ii_weights1d[j * size_scale]) >> 2; + ii_weights1d[j * size_scale]) >> + 2; comppred[i * compstride + j] = - VPX_BLEND_A256(scale, - intrapred[i * intrastride + j], + VPX_BLEND_A256(scale, intrapred[i * intrastride + j], interpred[i * interstride + j]); } } @@ -1971,10 +1683,10 @@ static void combine_interintra_highbd(INTERINTRA_MODE mode, for (i = 0; i < bh; ++i) { for (j = 0; j < bw; ++j) { int scale = (ii_weights1d[j * size_scale] * 3 + - ii_weights1d[i * size_scale]) >> 2; + ii_weights1d[i * size_scale]) >> + 2; comppred[i * compstride + j] = - VPX_BLEND_A256(scale, - intrapred[i * intrastride + j], + VPX_BLEND_A256(scale, intrapred[i * intrastride + j], interpred[i * interstride + j]); } } @@ -1985,8 +1697,7 @@ static void combine_interintra_highbd(INTERINTRA_MODE mode, for (j = 0; j < bw; ++j) { int scale = ii_weights1d[(i < j ? i : j) * size_scale]; comppred[i * compstride + j] = - VPX_BLEND_A256(scale, - intrapred[i * intrastride + j], + VPX_BLEND_A256(scale, intrapred[i * intrastride + j], interpred[i * interstride + j]); } } @@ -1995,11 +1706,11 @@ static void combine_interintra_highbd(INTERINTRA_MODE mode, case II_D45_PRED: for (i = 0; i < bh; ++i) { for (j = 0; j < bw; ++j) { - int scale = (ii_weights1d[i * size_scale] + - ii_weights1d[j * size_scale]) >> 1; + int scale = + (ii_weights1d[i * size_scale] + ii_weights1d[j * size_scale]) >> + 1; comppred[i * compstride + j] = - VPX_BLEND_A256(scale, - intrapred[i * intrastride + j], + VPX_BLEND_A256(scale, intrapred[i * intrastride + j], interpred[i * interstride + j]); } } @@ -2010,9 +1721,8 @@ static void combine_interintra_highbd(INTERINTRA_MODE mode, default: for (i = 0; i < bh; ++i) { for (j = 0; j < bw; ++j) { - comppred[i * compstride + j] = - VPX_BLEND_AVG(interpred[i * interstride + j], - intrapred[i * intrastride + j]); + comppred[i * compstride + j] = VPX_BLEND_AVG( + interpred[i * interstride + j], intrapred[i * intrastride + j]); } } break; @@ -2022,13 +1732,11 @@ static void combine_interintra_highbd(INTERINTRA_MODE mode, // Break down rectangular intra prediction for joint spatio-temporal prediction // into two square intra predictions. -static void build_intra_predictors_for_interintra( - MACROBLOCKD *xd, - uint8_t *ref, int ref_stride, - uint8_t *dst, int dst_stride, - PREDICTION_MODE mode, - BLOCK_SIZE bsize, - int plane) { +static void build_intra_predictors_for_interintra(MACROBLOCKD *xd, uint8_t *ref, + int ref_stride, uint8_t *dst, + int dst_stride, + PREDICTION_MODE mode, + BLOCK_SIZE bsize, int plane) { BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, &xd->plane[plane]); const int bwl = b_width_log2_lookup[plane_bsize]; const int bhl = b_height_log2_lookup[plane_bsize]; @@ -2037,16 +1745,14 @@ static void build_intra_predictors_for_interintra( TX_SIZE max_tx_size = max_txsize_lookup[plane_bsize]; if (bwl == bhl) { - vp10_predict_intra_block(xd, bwl, bhl, max_tx_size, mode, - ref, ref_stride, dst, dst_stride, - 0, 0, plane); + vp10_predict_intra_block(xd, bwl, bhl, max_tx_size, mode, ref, ref_stride, + dst, dst_stride, 0, 0, plane); } else if (bwl < bhl) { uint8_t *src_2 = ref + pxbw * ref_stride; uint8_t *dst_2 = dst + pxbw * dst_stride; - vp10_predict_intra_block(xd, bwl, bhl, max_tx_size, mode, - ref, ref_stride, dst, dst_stride, - 0, 0, plane); + vp10_predict_intra_block(xd, bwl, bhl, max_tx_size, mode, ref, ref_stride, + dst, dst_stride, 0, 0, plane); #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { uint16_t *src_216 = CONVERT_TO_SHORTPTR(src_2); @@ -2058,16 +1764,14 @@ static void build_intra_predictors_for_interintra( { memcpy(src_2 - ref_stride, dst_2 - dst_stride, sizeof(*src_2) * pxbw); } - vp10_predict_intra_block(xd, bwl, bhl, max_tx_size, mode, - src_2, ref_stride, dst_2, dst_stride, - 0, 1 << bwl, plane); + vp10_predict_intra_block(xd, bwl, bhl, max_tx_size, mode, src_2, ref_stride, + dst_2, dst_stride, 0, 1 << bwl, plane); } else { // bwl > bhl int i; uint8_t *src_2 = ref + pxbh; uint8_t *dst_2 = dst + pxbh; - vp10_predict_intra_block(xd, bwl, bhl, max_tx_size, mode, - ref, ref_stride, dst, dst_stride, - 0, 0, plane); + vp10_predict_intra_block(xd, bwl, bhl, max_tx_size, mode, ref, ref_stride, + dst, dst_stride, 0, 0, plane); #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { uint16_t *src_216 = CONVERT_TO_SHORTPTR(src_2); @@ -2080,77 +1784,54 @@ static void build_intra_predictors_for_interintra( for (i = 0; i < pxbh; ++i) src_2[i * ref_stride - 1] = dst_2[i * dst_stride - 1]; } - vp10_predict_intra_block(xd, bwl, bhl, max_tx_size, mode, - src_2, ref_stride, dst_2, dst_stride, - 1 << bhl, 0, plane); + vp10_predict_intra_block(xd, bwl, bhl, max_tx_size, mode, src_2, ref_stride, + dst_2, dst_stride, 1 << bhl, 0, plane); } } // Mapping of interintra to intra mode for use in the intra component static const int interintra_to_intra_mode[INTERINTRA_MODES] = { - DC_PRED, - V_PRED, - H_PRED, - D45_PRED, - D135_PRED, - D117_PRED, - D153_PRED, - D207_PRED, - D63_PRED, - TM_PRED + DC_PRED, V_PRED, H_PRED, D45_PRED, D135_PRED, + D117_PRED, D153_PRED, D207_PRED, D63_PRED, TM_PRED }; -void vp10_build_intra_predictors_for_interintra( - MACROBLOCKD *xd, - BLOCK_SIZE bsize, int plane, - uint8_t *dst, int dst_stride) { +void vp10_build_intra_predictors_for_interintra(MACROBLOCKD *xd, + BLOCK_SIZE bsize, int plane, + uint8_t *dst, int dst_stride) { build_intra_predictors_for_interintra( - xd, xd->plane[plane].dst.buf, xd->plane[plane].dst.stride, - dst, dst_stride, - interintra_to_intra_mode[xd->mi[0]->mbmi.interintra_mode], + xd, xd->plane[plane].dst.buf, xd->plane[plane].dst.stride, dst, + dst_stride, interintra_to_intra_mode[xd->mi[0]->mbmi.interintra_mode], bsize, plane); } -void vp10_combine_interintra(MACROBLOCKD *xd, - BLOCK_SIZE bsize, int plane, +void vp10_combine_interintra(MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane, const uint8_t *inter_pred, int inter_stride, const uint8_t *intra_pred, int intra_stride) { const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, &xd->plane[plane]); #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - combine_interintra_highbd(xd->mi[0]->mbmi.interintra_mode, - xd->mi[0]->mbmi.use_wedge_interintra, - xd->mi[0]->mbmi.interintra_wedge_index, - xd->mi[0]->mbmi.interintra_wedge_sign, - bsize, - plane_bsize, - xd->plane[plane].dst.buf, - xd->plane[plane].dst.stride, - inter_pred, inter_stride, - intra_pred, intra_stride, - xd->bd); + combine_interintra_highbd( + xd->mi[0]->mbmi.interintra_mode, xd->mi[0]->mbmi.use_wedge_interintra, + xd->mi[0]->mbmi.interintra_wedge_index, + xd->mi[0]->mbmi.interintra_wedge_sign, bsize, plane_bsize, + xd->plane[plane].dst.buf, xd->plane[plane].dst.stride, inter_pred, + inter_stride, intra_pred, intra_stride, xd->bd); return; } #endif // CONFIG_VP9_HIGHBITDEPTH combine_interintra(xd->mi[0]->mbmi.interintra_mode, xd->mi[0]->mbmi.use_wedge_interintra, xd->mi[0]->mbmi.interintra_wedge_index, - xd->mi[0]->mbmi.interintra_wedge_sign, - bsize, - plane_bsize, + xd->mi[0]->mbmi.interintra_wedge_sign, bsize, plane_bsize, xd->plane[plane].dst.buf, xd->plane[plane].dst.stride, - inter_pred, inter_stride, - intra_pred, intra_stride); + inter_pred, inter_stride, intra_pred, intra_stride); } -void vp10_build_interintra_predictors_sby(MACROBLOCKD *xd, - uint8_t *ypred, - int ystride, - BLOCK_SIZE bsize) { +void vp10_build_interintra_predictors_sby(MACROBLOCKD *xd, uint8_t *ypred, + int ystride, BLOCK_SIZE bsize) { #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - DECLARE_ALIGNED(16, uint16_t, - intrapredictor[MAX_SB_SQUARE]); + DECLARE_ALIGNED(16, uint16_t, intrapredictor[MAX_SB_SQUARE]); vp10_build_intra_predictors_for_interintra( xd, bsize, 0, CONVERT_TO_BYTEPTR(intrapredictor), MAX_SB_SIZE); vp10_combine_interintra(xd, bsize, 0, ypred, ystride, @@ -2160,22 +1841,19 @@ void vp10_build_interintra_predictors_sby(MACROBLOCKD *xd, #endif // CONFIG_VP9_HIGHBITDEPTH { DECLARE_ALIGNED(16, uint8_t, intrapredictor[MAX_SB_SQUARE]); - vp10_build_intra_predictors_for_interintra( - xd, bsize, 0, intrapredictor, MAX_SB_SIZE); - vp10_combine_interintra(xd, bsize, 0, ypred, ystride, - intrapredictor, MAX_SB_SIZE); + vp10_build_intra_predictors_for_interintra(xd, bsize, 0, intrapredictor, + MAX_SB_SIZE); + vp10_combine_interintra(xd, bsize, 0, ypred, ystride, intrapredictor, + MAX_SB_SIZE); } } -void vp10_build_interintra_predictors_sbc(MACROBLOCKD *xd, - uint8_t *upred, - int ustride, - int plane, +void vp10_build_interintra_predictors_sbc(MACROBLOCKD *xd, uint8_t *upred, + int ustride, int plane, BLOCK_SIZE bsize) { #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - DECLARE_ALIGNED(16, uint16_t, - uintrapredictor[MAX_SB_SQUARE]); + DECLARE_ALIGNED(16, uint16_t, uintrapredictor[MAX_SB_SQUARE]); vp10_build_intra_predictors_for_interintra( xd, bsize, plane, CONVERT_TO_BYTEPTR(uintrapredictor), MAX_SB_SIZE); vp10_combine_interintra(xd, bsize, plane, upred, ustride, @@ -2185,41 +1863,35 @@ void vp10_build_interintra_predictors_sbc(MACROBLOCKD *xd, #endif // CONFIG_VP9_HIGHBITDEPTH { DECLARE_ALIGNED(16, uint8_t, uintrapredictor[MAX_SB_SQUARE]); - vp10_build_intra_predictors_for_interintra( - xd, bsize, plane, uintrapredictor, MAX_SB_SIZE); - vp10_combine_interintra(xd, bsize, plane, upred, ustride, - uintrapredictor, MAX_SB_SIZE); + vp10_build_intra_predictors_for_interintra(xd, bsize, plane, + uintrapredictor, MAX_SB_SIZE); + vp10_combine_interintra(xd, bsize, plane, upred, ustride, uintrapredictor, + MAX_SB_SIZE); } } -void vp10_build_interintra_predictors_sbuv(MACROBLOCKD *xd, - uint8_t *upred, - uint8_t *vpred, - int ustride, int vstride, - BLOCK_SIZE bsize) { +void vp10_build_interintra_predictors_sbuv(MACROBLOCKD *xd, uint8_t *upred, + uint8_t *vpred, int ustride, + int vstride, BLOCK_SIZE bsize) { vp10_build_interintra_predictors_sbc(xd, upred, ustride, 1, bsize); vp10_build_interintra_predictors_sbc(xd, vpred, vstride, 2, bsize); } -void vp10_build_interintra_predictors(MACROBLOCKD *xd, - uint8_t *ypred, - uint8_t *upred, - uint8_t *vpred, +void vp10_build_interintra_predictors(MACROBLOCKD *xd, uint8_t *ypred, + uint8_t *upred, uint8_t *vpred, int ystride, int ustride, int vstride, BLOCK_SIZE bsize) { vp10_build_interintra_predictors_sby(xd, ypred, ystride, bsize); - vp10_build_interintra_predictors_sbuv(xd, upred, vpred, - ustride, vstride, bsize); + vp10_build_interintra_predictors_sbuv(xd, upred, vpred, ustride, vstride, + bsize); } // Builds the inter-predictor for the single ref case // for use in the encoder to search the wedges efficiently. static void build_inter_predictors_single_buf(MACROBLOCKD *xd, int plane, - int block, - int bw, int bh, - int x, int y, int w, int h, - int mi_x, int mi_y, - int ref, + int block, int bw, int bh, int x, + int y, int w, int h, int mi_x, + int mi_y, int ref, uint8_t *const ext_dst, int ext_dst_stride) { struct macroblockd_plane *const pd = &xd->plane[plane]; @@ -2229,22 +1901,22 @@ static void build_inter_predictors_single_buf(MACROBLOCKD *xd, int plane, struct buf_2d *const pre_buf = &pd->pre[ref]; #if CONFIG_VP9_HIGHBITDEPTH uint8_t *const dst = - (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH ? - CONVERT_TO_BYTEPTR(ext_dst) : ext_dst) + ext_dst_stride * y + x; + (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH ? CONVERT_TO_BYTEPTR(ext_dst) + : ext_dst) + + ext_dst_stride * y + x; #else uint8_t *const dst = ext_dst + ext_dst_stride * y + x; #endif const MV mv = mi->mbmi.sb_type < BLOCK_8X8 - ? average_split_mvs(pd, mi, ref, block) - : mi->mbmi.mv[ref].as_mv; + ? average_split_mvs(pd, mi, ref, block) + : mi->mbmi.mv[ref].as_mv; // TODO(jkoleszar): This clamping is done in the incorrect place for the // scaling case. It needs to be done on the scaled MV, not the pre-scaling // MV. Note however that it performs the subsampling aware scaling so // that the result is always q4. // mv_precision precision is MV_PRECISION_Q4. - const MV mv_q4 = clamp_mv_to_umv_border_sb(xd, &mv, bw, bh, - pd->subsampling_x, + const MV mv_q4 = clamp_mv_to_umv_border_sb(xd, &mv, bw, bh, pd->subsampling_x, pd->subsampling_y); uint8_t *pre; @@ -2266,25 +1938,23 @@ static void build_inter_predictors_single_buf(MACROBLOCKD *xd, int plane, subpel_x = scaled_mv.col & SUBPEL_MASK; subpel_y = scaled_mv.row & SUBPEL_MASK; - pre += (scaled_mv.row >> SUBPEL_BITS) * pre_buf->stride - + (scaled_mv.col >> SUBPEL_BITS); + pre += (scaled_mv.row >> SUBPEL_BITS) * pre_buf->stride + + (scaled_mv.col >> SUBPEL_BITS); - vp10_make_inter_predictor(pre, pre_buf->stride, dst, ext_dst_stride, - subpel_x, subpel_y, sf, w, h, 0, - mi->mbmi.interp_filter, xs, ys, xd); + vp10_make_inter_predictor(pre, pre_buf->stride, dst, ext_dst_stride, subpel_x, + subpel_y, sf, w, h, 0, mi->mbmi.interp_filter, xs, + ys, xd); } void vp10_build_inter_predictors_for_planes_single_buf( - MACROBLOCKD *xd, BLOCK_SIZE bsize, - int plane_from, int plane_to, - int mi_row, int mi_col, int ref, - uint8_t *ext_dst[3], int ext_dst_stride[3]) { + MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane_from, int plane_to, int mi_row, + int mi_col, int ref, uint8_t *ext_dst[3], int ext_dst_stride[3]) { int plane; const int mi_x = mi_col * MI_SIZE; const int mi_y = mi_row * MI_SIZE; for (plane = plane_from; plane <= plane_to; ++plane) { - const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, - &xd->plane[plane]); + const BLOCK_SIZE plane_bsize = + get_plane_block_size(bsize, &xd->plane[plane]); const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize]; const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize]; const int bw = 4 * num_4x4_w; @@ -2295,79 +1965,62 @@ void vp10_build_inter_predictors_for_planes_single_buf( assert(bsize == BLOCK_8X8); for (y = 0; y < num_4x4_h; ++y) for (x = 0; x < num_4x4_w; ++x) - build_inter_predictors_single_buf(xd, plane, - y * 2 + x, bw, bh, - 4 * x, 4 * y, 4, 4, - mi_x, mi_y, ref, - ext_dst[plane], - ext_dst_stride[plane]); + build_inter_predictors_single_buf( + xd, plane, y * 2 + x, bw, bh, 4 * x, 4 * y, 4, 4, mi_x, mi_y, ref, + ext_dst[plane], ext_dst_stride[plane]); } else { - build_inter_predictors_single_buf(xd, plane, - 0, bw, bh, - 0, 0, bw, bh, - mi_x, mi_y, ref, - ext_dst[plane], + build_inter_predictors_single_buf(xd, plane, 0, bw, bh, 0, 0, bw, bh, + mi_x, mi_y, ref, ext_dst[plane], ext_dst_stride[plane]); } } } -static void build_wedge_inter_predictor_from_buf(MACROBLOCKD *xd, int plane, - int x, int y, int w, int h, - uint8_t *ext_dst0, - int ext_dst_stride0, - uint8_t *ext_dst1, - int ext_dst_stride1) { +static void build_wedge_inter_predictor_from_buf( + MACROBLOCKD *xd, int plane, int x, int y, int w, int h, uint8_t *ext_dst0, + int ext_dst_stride0, uint8_t *ext_dst1, int ext_dst_stride1) { const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; const int is_compound = has_second_ref(mbmi); MACROBLOCKD_PLANE *const pd = &xd->plane[plane]; struct buf_2d *const dst_buf = &pd->dst; uint8_t *const dst = dst_buf->buf + dst_buf->stride * y + x; - if (is_compound - && is_interinter_wedge_used(mbmi->sb_type) - && mbmi->use_wedge_interinter) { + if (is_compound && is_interinter_wedge_used(mbmi->sb_type) && + mbmi->use_wedge_interinter) { #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) build_masked_compound_wedge_highbd( - dst, dst_buf->stride, - CONVERT_TO_BYTEPTR(ext_dst0), ext_dst_stride0, + dst, dst_buf->stride, CONVERT_TO_BYTEPTR(ext_dst0), ext_dst_stride0, CONVERT_TO_BYTEPTR(ext_dst1), ext_dst_stride1, - mbmi->interinter_wedge_index, - mbmi->interinter_wedge_sign, - mbmi->sb_type, h, w, - xd->bd); + mbmi->interinter_wedge_index, mbmi->interinter_wedge_sign, + mbmi->sb_type, h, w, xd->bd); else #endif // CONFIG_VP9_HIGHBITDEPTH build_masked_compound_wedge( - dst, dst_buf->stride, - ext_dst0, ext_dst_stride0, - ext_dst1, ext_dst_stride1, - mbmi->interinter_wedge_index, - mbmi->interinter_wedge_sign, - mbmi->sb_type, h, w); + dst, dst_buf->stride, ext_dst0, ext_dst_stride0, ext_dst1, + ext_dst_stride1, mbmi->interinter_wedge_index, + mbmi->interinter_wedge_sign, mbmi->sb_type, h, w); } else { #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) - vpx_highbd_convolve_copy(CONVERT_TO_BYTEPTR(ext_dst0), ext_dst_stride0, + vpx_highbd_convolve_copy(CONVERT_TO_BYTEPTR(ext_dst0), ext_dst_stride0, dst, dst_buf->stride, NULL, 0, NULL, 0, w, h, xd->bd); else #endif // CONFIG_VP9_HIGHBITDEPTH - vpx_convolve_copy(ext_dst0, ext_dst_stride0, - dst, dst_buf->stride, NULL, 0, NULL, 0, w, h); + vpx_convolve_copy(ext_dst0, ext_dst_stride0, dst, dst_buf->stride, NULL, + 0, NULL, 0, w, h); } } void vp10_build_wedge_inter_predictor_from_buf( - MACROBLOCKD *xd, BLOCK_SIZE bsize, - int plane_from, int plane_to, - uint8_t *ext_dst0[3], int ext_dst_stride0[3], - uint8_t *ext_dst1[3], int ext_dst_stride1[3]) { + MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane_from, int plane_to, + uint8_t *ext_dst0[3], int ext_dst_stride0[3], uint8_t *ext_dst1[3], + int ext_dst_stride1[3]) { int plane; for (plane = plane_from; plane <= plane_to; ++plane) { - const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, - &xd->plane[plane]); + const BLOCK_SIZE plane_bsize = + get_plane_block_size(bsize, &xd->plane[plane]); const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize]; const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize]; @@ -2376,21 +2029,15 @@ void vp10_build_wedge_inter_predictor_from_buf( assert(bsize == BLOCK_8X8); for (y = 0; y < num_4x4_h; ++y) for (x = 0; x < num_4x4_w; ++x) - build_wedge_inter_predictor_from_buf(xd, plane, - 4 * x, 4 * y, 4, 4, - ext_dst0[plane], - ext_dst_stride0[plane], - ext_dst1[plane], - ext_dst_stride1[plane]); + build_wedge_inter_predictor_from_buf( + xd, plane, 4 * x, 4 * y, 4, 4, ext_dst0[plane], + ext_dst_stride0[plane], ext_dst1[plane], ext_dst_stride1[plane]); } else { const int bw = 4 * num_4x4_w; const int bh = 4 * num_4x4_h; - build_wedge_inter_predictor_from_buf(xd, plane, - 0, 0, bw, bh, - ext_dst0[plane], - ext_dst_stride0[plane], - ext_dst1[plane], - ext_dst_stride1[plane]); + build_wedge_inter_predictor_from_buf( + xd, plane, 0, 0, bw, bh, ext_dst0[plane], ext_dst_stride0[plane], + ext_dst1[plane], ext_dst_stride1[plane]); } } } diff --git a/vp10/common/reconinter.h b/vp10/common/reconinter.h index 9131c1ec8305256d38a9fc2aef596a867c3b4f27..5afad4e7e0bac4a6a959f1cca6daeff9c6a7e1a0 100644 --- a/vp10/common/reconinter.h +++ b/vp10/common/reconinter.h @@ -22,10 +22,9 @@ extern "C" { static INLINE void inter_predictor(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, - const int subpel_x, - const int subpel_y, - const struct scale_factors *sf, - int w, int h, int ref_idx, + const int subpel_x, const int subpel_y, + const struct scale_factors *sf, int w, int h, + int ref_idx, #if CONFIG_DUAL_FILTER const INTERP_FILTER *interp_filter, #else @@ -44,8 +43,7 @@ static INLINE void inter_predictor(const uint8_t *src, int src_stride, #if CONFIG_DUAL_FILTER if (interp_filter_params_x.taps == SUBPEL_TAPS && - interp_filter_params_y.taps == SUBPEL_TAPS && - w > 2 && h > 2) { + interp_filter_params_y.taps == SUBPEL_TAPS && w > 2 && h > 2) { const int16_t *kernel_x = vp10_get_interp_filter_subpel_kernel(interp_filter_params_x, subpel_x); const int16_t *kernel_y = @@ -61,17 +59,14 @@ static INLINE void inter_predictor(const uint8_t *src, int src_stride, if (IsInterpolatingFilter(interp_filter)) { // Interpolating filter sf->predict[subpel_x != 0][subpel_y != 0][ref]( - src, src_stride, dst, dst_stride, - kernel_x, xs, kernel_y, ys, w, h); + src, src_stride, dst, dst_stride, kernel_x, xs, kernel_y, ys, w, h); } else { sf->predict_ni[subpel_x != 0][subpel_y != 0][ref]( - src, src_stride, dst, dst_stride, - kernel_x, xs, kernel_y, ys, w, h); + src, src_stride, dst, dst_stride, kernel_x, xs, kernel_y, ys, w, h); } #else sf->predict[subpel_x != 0][subpel_y != 0][ref_idx]( - src, src_stride, dst, dst_stride, - kernel_x, xs, kernel_y, ys, w, h); + src, src_stride, dst, dst_stride, kernel_x, xs, kernel_y, ys, w, h); #endif // CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS } else { // ref_idx > 0 means this is the second reference frame @@ -87,8 +82,8 @@ static INLINE void highbd_inter_predictor(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int subpel_x, const int subpel_y, - const struct scale_factors *sf, - int w, int h, int ref, + const struct scale_factors *sf, int w, + int h, int ref, #if CONFIG_DUAL_FILTER const INTERP_FILTER *interp_filter, #else @@ -107,8 +102,7 @@ static INLINE void highbd_inter_predictor(const uint8_t *src, int src_stride, #if CONFIG_DUAL_FILTER if (interp_filter_params_x.taps == SUBPEL_TAPS && - interp_filter_params_y.taps == SUBPEL_TAPS && - w > 2 && h > 2) { + interp_filter_params_y.taps == SUBPEL_TAPS && w > 2 && h > 2) { const int16_t *kernel_x = vp10_get_interp_filter_subpel_kernel(interp_filter_params_x, subpel_x); const int16_t *kernel_y = @@ -124,47 +118,45 @@ static INLINE void highbd_inter_predictor(const uint8_t *src, int src_stride, if (IsInterpolatingFilter(interp_filter)) { // Interpolating filter sf->highbd_predict[subpel_x != 0][subpel_y != 0][ref]( - src, src_stride, dst, dst_stride, - kernel_x, xs, kernel_y, ys, w, h, bd); + src, src_stride, dst, dst_stride, kernel_x, xs, kernel_y, ys, w, h, + bd); } else { sf->highbd_predict_ni[subpel_x != 0][subpel_y != 0][ref]( - src, src_stride, dst, dst_stride, - kernel_x, xs, kernel_y, ys, w, h, bd); + src, src_stride, dst, dst_stride, kernel_x, xs, kernel_y, ys, w, h, + bd); } #else sf->highbd_predict[subpel_x != 0][subpel_y != 0][ref]( - src, src_stride, dst, dst_stride, - kernel_x, xs, kernel_y, ys, w, h, bd); + src, src_stride, dst, dst_stride, kernel_x, xs, kernel_y, ys, w, h, bd); #endif // CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS } else { // ref > 0 means this is the second reference frame // first reference frame's prediction result is already in dst // therefore we need to average the first and second results int avg = ref > 0; - vp10_highbd_convolve(src, src_stride, dst, dst_stride, w, h, - interp_filter, subpel_x, xs, subpel_y, ys, avg, - bd); + vp10_highbd_convolve(src, src_stride, dst, dst_stride, w, h, interp_filter, + subpel_x, xs, subpel_y, ys, avg, bd); } } #endif // CONFIG_VP9_HIGHBITDEPTH #if CONFIG_EXT_INTER // Set to one to use larger codebooks -#define USE_LARGE_WEDGE_CODEBOOK 0 +#define USE_LARGE_WEDGE_CODEBOOK 0 #if USE_LARGE_WEDGE_CODEBOOK -#define MAX_WEDGE_TYPES (1 << 5) +#define MAX_WEDGE_TYPES (1 << 5) #else -#define MAX_WEDGE_TYPES (1 << 4) +#define MAX_WEDGE_TYPES (1 << 4) #endif -#define MAX_WEDGE_SIZE_LOG2 5 // 32x32 -#define MAX_WEDGE_SIZE (1 << MAX_WEDGE_SIZE_LOG2) -#define MAX_WEDGE_SQUARE (MAX_WEDGE_SIZE * MAX_WEDGE_SIZE) +#define MAX_WEDGE_SIZE_LOG2 5 // 32x32 +#define MAX_WEDGE_SIZE (1 << MAX_WEDGE_SIZE_LOG2) +#define MAX_WEDGE_SQUARE (MAX_WEDGE_SIZE * MAX_WEDGE_SIZE) #define WEDGE_WEIGHT_BITS 6 -#define WEDGE_NONE -1 +#define WEDGE_NONE -1 // Angles are with respect to horizontal anti-clockwise typedef enum { @@ -201,7 +193,7 @@ static INLINE int get_wedge_bits_lookup(BLOCK_SIZE sb_type) { } static INLINE int is_interinter_wedge_used(BLOCK_SIZE sb_type) { - (void) sb_type; + (void)sb_type; return wedge_params_lookup[sb_type].bits > 0; } @@ -211,7 +203,7 @@ static INLINE int get_interinter_wedge_bits(BLOCK_SIZE sb_type) { } static INLINE int is_interintra_wedge_used(BLOCK_SIZE sb_type) { - (void) sb_type; + (void)sb_type; return wedge_params_lookup[sb_type].bits > 0; } @@ -224,63 +216,50 @@ void build_inter_predictors(MACROBLOCKD *xd, int plane, #if CONFIG_OBMC int mi_col_offset, int mi_row_offset, #endif // CONFIG_OBMC - int block, - int bw, int bh, - int x, int y, int w, int h, + int block, int bw, int bh, int x, int y, int w, + int h, #if CONFIG_SUPERTX && CONFIG_EXT_INTER int wedge_offset_x, int wedge_offset_y, #endif // CONFIG_SUPERTX && CONFIG_EXT_INTER int mi_x, int mi_y); static INLINE void vp10_make_inter_predictor( - const uint8_t *src, - int src_stride, - uint8_t *dst, - int dst_stride, - const int subpel_x, - const int subpel_y, - const struct scale_factors *sf, + const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, + const int subpel_x, const int subpel_y, const struct scale_factors *sf, int w, int h, int ref, #if CONFIG_DUAL_FILTER const INTERP_FILTER *interp_filter, #else const INTERP_FILTER interp_filter, #endif - int xs, int ys, - const MACROBLOCKD *xd) { - (void) xd; + int xs, int ys, const MACROBLOCKD *xd) { + (void)xd; #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) - highbd_inter_predictor(src, src_stride, dst, dst_stride, - subpel_x, subpel_y, sf, w, h, ref, - interp_filter, xs, ys, xd->bd); + highbd_inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y, + sf, w, h, ref, interp_filter, xs, ys, xd->bd); else #endif // CONFIG_VP9_HIGHBITDEPTH - inter_predictor(src, src_stride, dst, dst_stride, - subpel_x, subpel_y, sf, w, h, ref, - interp_filter, xs, ys); + inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y, sf, w, + h, ref, interp_filter, xs, ys); } #if CONFIG_EXT_INTER -void vp10_make_masked_inter_predictor( - const uint8_t *pre, - int pre_stride, - uint8_t *dst, - int dst_stride, - const int subpel_x, - const int subpel_y, - const struct scale_factors *sf, - int w, int h, +void vp10_make_masked_inter_predictor(const uint8_t *pre, int pre_stride, + uint8_t *dst, int dst_stride, + const int subpel_x, const int subpel_y, + const struct scale_factors *sf, int w, + int h, #if CONFIG_DUAL_FILTER - const INTERP_FILTER *interp_filter, + const INTERP_FILTER *interp_filter, #else - const INTERP_FILTER interp_filter, + const INTERP_FILTER interp_filter, #endif - int xs, int ys, + int xs, int ys, #if CONFIG_SUPERTX - int wedge_offset_x, int wedge_offset_y, + int wedge_offset_x, int wedge_offset_y, #endif // CONFIG_SUPERTX - const MACROBLOCKD *xd); + const MACROBLOCKD *xd); #endif // CONFIG_EXT_INTER static INLINE int round_mv_comp_q4(int value) { @@ -288,14 +267,14 @@ static INLINE int round_mv_comp_q4(int value) { } static MV mi_mv_pred_q4(const MODE_INFO *mi, int idx) { - MV res = { round_mv_comp_q4(mi->bmi[0].as_mv[idx].as_mv.row + - mi->bmi[1].as_mv[idx].as_mv.row + - mi->bmi[2].as_mv[idx].as_mv.row + - mi->bmi[3].as_mv[idx].as_mv.row), - round_mv_comp_q4(mi->bmi[0].as_mv[idx].as_mv.col + - mi->bmi[1].as_mv[idx].as_mv.col + - mi->bmi[2].as_mv[idx].as_mv.col + - mi->bmi[3].as_mv[idx].as_mv.col) }; + MV res = { + round_mv_comp_q4( + mi->bmi[0].as_mv[idx].as_mv.row + mi->bmi[1].as_mv[idx].as_mv.row + + mi->bmi[2].as_mv[idx].as_mv.row + mi->bmi[3].as_mv[idx].as_mv.row), + round_mv_comp_q4( + mi->bmi[0].as_mv[idx].as_mv.col + mi->bmi[1].as_mv[idx].as_mv.col + + mi->bmi[2].as_mv[idx].as_mv.col + mi->bmi[3].as_mv[idx].as_mv.col) + }; return res; } @@ -313,8 +292,8 @@ static MV mi_mv_pred_q2(const MODE_INFO *mi, int idx, int block0, int block1) { // TODO(jkoleszar): yet another mv clamping function :-( static INLINE MV clamp_mv_to_umv_border_sb(const MACROBLOCKD *xd, - const MV *src_mv, - int bw, int bh, int ss_x, int ss_y) { + const MV *src_mv, int bw, int bh, + int ss_x, int ss_y) { // If the MV points so far into the UMV border that no visible pixels // are used for reconstruction, the subpel part of the MV can be // discarded and the MV limited to 16 pixels with equivalent results. @@ -322,15 +301,12 @@ static INLINE MV clamp_mv_to_umv_border_sb(const MACROBLOCKD *xd, const int spel_right = spel_left - SUBPEL_SHIFTS; const int spel_top = (VPX_INTERP_EXTEND + bh) << SUBPEL_BITS; const int spel_bottom = spel_top - SUBPEL_SHIFTS; - MV clamped_mv = { - src_mv->row * (1 << (1 - ss_y)), - src_mv->col * (1 << (1 - ss_x)) - }; + MV clamped_mv = { src_mv->row * (1 << (1 - ss_y)), + src_mv->col * (1 << (1 - ss_x)) }; assert(ss_x <= 1); assert(ss_y <= 1); - clamp_mv(&clamped_mv, - xd->mb_to_left_edge * (1 << (1 - ss_x)) - spel_left, + clamp_mv(&clamped_mv, xd->mb_to_left_edge * (1 << (1 - ss_x)) - spel_left, xd->mb_to_right_edge * (1 << (1 - ss_x)) + spel_right, xd->mb_to_top_edge * (1 << (1 - ss_y)) - spel_top, xd->mb_to_bottom_edge * (1 << (1 - ss_y)) + spel_bottom); @@ -341,29 +317,19 @@ static INLINE MV clamp_mv_to_umv_border_sb(const MACROBLOCKD *xd, static INLINE MV average_split_mvs(const struct macroblockd_plane *pd, const MODE_INFO *mi, int ref, int block) { const int ss_idx = ((pd->subsampling_x > 0) << 1) | (pd->subsampling_y > 0); - MV res = {0, 0}; + MV res = { 0, 0 }; switch (ss_idx) { - case 0: - res = mi->bmi[block].as_mv[ref].as_mv; - break; - case 1: - res = mi_mv_pred_q2(mi, ref, block, block + 2); - break; - case 2: - res = mi_mv_pred_q2(mi, ref, block, block + 1); - break; - case 3: - res = mi_mv_pred_q4(mi, ref); - break; - default: - assert(ss_idx <= 3 && ss_idx >= 0); + case 0: res = mi->bmi[block].as_mv[ref].as_mv; break; + case 1: res = mi_mv_pred_q2(mi, ref, block, block + 2); break; + case 2: res = mi_mv_pred_q2(mi, ref, block, block + 1); break; + case 3: res = mi_mv_pred_q4(mi, ref); break; + default: assert(ss_idx <= 3 && ss_idx >= 0); } return res; } -void vp10_build_inter_predictor_sub8x8(MACROBLOCKD *xd, int plane, - int i, int ir, int ic, - int mi_row, int mi_col); +void vp10_build_inter_predictor_sub8x8(MACROBLOCKD *xd, int plane, int i, + int ir, int ic, int mi_row, int mi_col); void vp10_build_inter_predictors_sby(MACROBLOCKD *xd, int mi_row, int mi_col, BLOCK_SIZE bsize); @@ -378,57 +344,49 @@ void vp10_build_inter_predictors_sb(MACROBLOCKD *xd, int mi_row, int mi_col, BLOCK_SIZE bsize); #if CONFIG_SUPERTX -void vp10_build_inter_predictors_sb_sub8x8_extend( - MACROBLOCKD *xd, +void vp10_build_inter_predictors_sb_sub8x8_extend(MACROBLOCKD *xd, #if CONFIG_EXT_INTER - int mi_row_ori, int mi_col_ori, + int mi_row_ori, + int mi_col_ori, #endif // CONFIG_EXT_INTER - int mi_row, int mi_col, - BLOCK_SIZE bsize, int block); + int mi_row, int mi_col, + BLOCK_SIZE bsize, int block); -void vp10_build_inter_predictors_sb_extend( - MACROBLOCKD *xd, +void vp10_build_inter_predictors_sb_extend(MACROBLOCKD *xd, #if CONFIG_EXT_INTER - int mi_row_ori, int mi_col_ori, + int mi_row_ori, int mi_col_ori, #endif // CONFIG_EXT_INTER - int mi_row, int mi_col, - BLOCK_SIZE bsize); + int mi_row, int mi_col, + BLOCK_SIZE bsize); struct macroblockd_plane; void vp10_build_masked_inter_predictor_complex( - MACROBLOCKD *xd, - uint8_t *dst, int dst_stride, - const uint8_t *pre, int pre_stride, - int mi_row, int mi_col, int mi_row_ori, int mi_col_ori, - BLOCK_SIZE bsize, BLOCK_SIZE top_bsize, - PARTITION_TYPE partition, int plane); + MACROBLOCKD *xd, uint8_t *dst, int dst_stride, const uint8_t *pre, + int pre_stride, int mi_row, int mi_col, int mi_row_ori, int mi_col_ori, + BLOCK_SIZE bsize, BLOCK_SIZE top_bsize, PARTITION_TYPE partition, + int plane); #endif // CONFIG_SUPERTX void vp10_build_inter_predictor(const uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride, - const MV *mv_q3, - const struct scale_factors *sf, - int w, int h, int do_avg, + uint8_t *dst, int dst_stride, const MV *mv_q3, + const struct scale_factors *sf, int w, int h, + int do_avg, #if CONFIG_DUAL_FILTER - const INTERP_FILTER *interp_filter, + const INTERP_FILTER *interp_filter, #else - const INTERP_FILTER interp_filter, + const INTERP_FILTER interp_filter, #endif - enum mv_precision precision, - int x, int y); + enum mv_precision precision, int x, int y); #if CONFIG_VP9_HIGHBITDEPTH -void vp10_highbd_build_inter_predictor(const uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride, - const MV *mv_q3, - const struct scale_factors *sf, - int w, int h, int do_avg, +void vp10_highbd_build_inter_predictor( + const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, + const MV *mv_q3, const struct scale_factors *sf, int w, int h, int do_avg, #if CONFIG_DUAL_FILTER - const INTERP_FILTER *interp_filter, + const INTERP_FILTER *interp_filter, #else - const INTERP_FILTER interp_filter, + const INTERP_FILTER interp_filter, #endif - enum mv_precision precision, - int x, int y, int bd); + enum mv_precision precision, int x, int y, int bd); #endif static INLINE int scaled_buffer_offset(int x_offset, int y_offset, int stride, @@ -438,10 +396,9 @@ static INLINE int scaled_buffer_offset(int x_offset, int y_offset, int stride, return y * stride + x; } -static INLINE void setup_pred_plane(struct buf_2d *dst, - uint8_t *src, int width, - int height, int stride, - int mi_row, int mi_col, +static INLINE void setup_pred_plane(struct buf_2d *dst, uint8_t *src, int width, + int height, int stride, int mi_row, + int mi_col, const struct scale_factors *scale, int subsampling_x, int subsampling_y) { const int x = (MI_SIZE * mi_col) >> subsampling_x; @@ -454,12 +411,12 @@ static INLINE void setup_pred_plane(struct buf_2d *dst, } void vp10_setup_dst_planes(struct macroblockd_plane planes[MAX_MB_PLANE], - const YV12_BUFFER_CONFIG *src, - int mi_row, int mi_col); + const YV12_BUFFER_CONFIG *src, int mi_row, + int mi_col); void vp10_setup_pre_planes(MACROBLOCKD *xd, int idx, - const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col, - const struct scale_factors *sf); + const YV12_BUFFER_CONFIG *src, int mi_row, + int mi_col, const struct scale_factors *sf); #if CONFIG_DUAL_FILTER // Detect if the block have sub-pixel level motion vectors @@ -474,11 +431,9 @@ static INLINE int has_subpel_mv_component(const MODE_INFO *const mi, if (bsize >= BLOCK_8X8) { if (dir & 0x01) { - if (mbmi->mv[ref].as_mv.col & SUBPEL_MASK) - return 1; + if (mbmi->mv[ref].as_mv.col & SUBPEL_MASK) return 1; } else { - if (mbmi->mv[ref].as_mv.row & SUBPEL_MASK) - return 1; + if (mbmi->mv[ref].as_mv.row & SUBPEL_MASK) return 1; } } else { for (plane = 0; plane < MAX_MB_PLANE; ++plane) { @@ -494,11 +449,9 @@ static INLINE int has_subpel_mv_component(const MODE_INFO *const mi, for (x = 0; x < num_4x4_w; ++x) { const MV mv = average_split_mvs(pd, mi, ref, y * 2 + x); if (dir & 0x01) { - if (mv.col & SUBPEL_MASK) - return 1; + if (mv.col & SUBPEL_MASK) return 1; } else { - if (mv.row & SUBPEL_MASK) - return 1; + if (mv.row & SUBPEL_MASK) return 1; } } } @@ -530,10 +483,8 @@ static INLINE int vp10_is_interp_needed(const MACROBLOCKD *const xd) { #endif // For scaled references, interpolation filter is indicated all the time. - if (vp10_is_scaled(&xd->block_refs[0]->sf)) - return 1; - if (is_compound && vp10_is_scaled(&xd->block_refs[1]->sf)) - return 1; + if (vp10_is_scaled(&xd->block_refs[0]->sf)) return 1; + if (is_compound && vp10_is_scaled(&xd->block_refs[1]->sf)) return 1; if (bsize < BLOCK_8X8) { for (plane = 0; plane < MAX_MB_PLANE; ++plane) { @@ -549,8 +500,7 @@ static INLINE int vp10_is_interp_needed(const MACROBLOCKD *const xd) { for (y = 0; y < num_4x4_h; ++y) for (x = 0; x < num_4x4_w; ++x) { const MV mv = average_split_mvs(pd, mi, ref, y * 2 + x); - if (mv_has_subpel(&mv)) - return 1; + if (mv_has_subpel(&mv)) return 1; } } } @@ -566,22 +516,20 @@ static INLINE int vp10_is_interp_needed(const MACROBLOCKD *const xd) { #endif // CONFIG_EXT_INTERP #if CONFIG_OBMC -const uint8_t* vp10_get_obmc_mask(int length); -void vp10_build_obmc_inter_prediction(VP10_COMMON *cm, - MACROBLOCKD *xd, int mi_row, int mi_col, +const uint8_t *vp10_get_obmc_mask(int length); +void vp10_build_obmc_inter_prediction(VP10_COMMON *cm, MACROBLOCKD *xd, + int mi_row, int mi_col, uint8_t *above[MAX_MB_PLANE], int above_stride[MAX_MB_PLANE], uint8_t *left[MAX_MB_PLANE], int left_stride[MAX_MB_PLANE]); -void vp10_build_prediction_by_above_preds(VP10_COMMON *cm, - MACROBLOCKD *xd, +void vp10_build_prediction_by_above_preds(VP10_COMMON *cm, MACROBLOCKD *xd, int mi_row, int mi_col, uint8_t *tmp_buf[MAX_MB_PLANE], int tmp_width[MAX_MB_PLANE], int tmp_height[MAX_MB_PLANE], int tmp_stride[MAX_MB_PLANE]); -void vp10_build_prediction_by_left_preds(VP10_COMMON *cm, - MACROBLOCKD *xd, +void vp10_build_prediction_by_left_preds(VP10_COMMON *cm, MACROBLOCKD *xd, int mi_row, int mi_col, uint8_t *tmp_buf[MAX_MB_PLANE], int tmp_width[MAX_MB_PLANE], @@ -590,7 +538,7 @@ void vp10_build_prediction_by_left_preds(VP10_COMMON *cm, #endif // CONFIG_OBMC #if CONFIG_EXT_INTER -#define MASK_MASTER_SIZE (2 * MAX_SB_SIZE) +#define MASK_MASTER_SIZE (2 * MAX_SB_SIZE) #define MASK_MASTER_STRIDE (2 * MAX_SB_SIZE) void vp10_init_wedge_masks(); @@ -601,65 +549,44 @@ static INLINE const uint8_t *vp10_get_contiguous_soft_mask(int wedge_index, return wedge_params_lookup[sb_type].masks[wedge_sign][wedge_index]; } -const uint8_t *vp10_get_soft_mask(int wedge_index, - int wedge_sign, - BLOCK_SIZE sb_type, - int wedge_offset_x, +const uint8_t *vp10_get_soft_mask(int wedge_index, int wedge_sign, + BLOCK_SIZE sb_type, int wedge_offset_x, int wedge_offset_y); -void vp10_build_interintra_predictors(MACROBLOCKD *xd, - uint8_t *ypred, - uint8_t *upred, - uint8_t *vpred, - int ystride, - int ustride, - int vstride, +void vp10_build_interintra_predictors(MACROBLOCKD *xd, uint8_t *ypred, + uint8_t *upred, uint8_t *vpred, + int ystride, int ustride, int vstride, BLOCK_SIZE bsize); -void vp10_build_interintra_predictors_sby(MACROBLOCKD *xd, - uint8_t *ypred, - int ystride, - BLOCK_SIZE bsize); -void vp10_build_interintra_predictors_sbc(MACROBLOCKD *xd, - uint8_t *upred, - int ustride, - int plane, - BLOCK_SIZE bsize); -void vp10_build_interintra_predictors_sbuv(MACROBLOCKD *xd, - uint8_t *upred, - uint8_t *vpred, - int ustride, int vstride, - BLOCK_SIZE bsize); - -void vp10_build_intra_predictors_for_interintra( - MACROBLOCKD *xd, - BLOCK_SIZE bsize, int plane, - uint8_t *intra_pred, int intra_stride); -void vp10_combine_interintra( - MACROBLOCKD *xd, - BLOCK_SIZE bsize, int plane, - const uint8_t *inter_pred, int inter_stride, - const uint8_t *intra_pred, int intra_stride); -void vp10_build_interintra_predictors_sbuv(MACROBLOCKD *xd, - uint8_t *upred, - uint8_t *vpred, - int ustride, int vstride, - BLOCK_SIZE bsize); -void vp10_build_interintra_predictors_sby(MACROBLOCKD *xd, - uint8_t *ypred, - int ystride, +void vp10_build_interintra_predictors_sby(MACROBLOCKD *xd, uint8_t *ypred, + int ystride, BLOCK_SIZE bsize); +void vp10_build_interintra_predictors_sbc(MACROBLOCKD *xd, uint8_t *upred, + int ustride, int plane, BLOCK_SIZE bsize); +void vp10_build_interintra_predictors_sbuv(MACROBLOCKD *xd, uint8_t *upred, + uint8_t *vpred, int ustride, + int vstride, BLOCK_SIZE bsize); + +void vp10_build_intra_predictors_for_interintra(MACROBLOCKD *xd, + BLOCK_SIZE bsize, int plane, + uint8_t *intra_pred, + int intra_stride); +void vp10_combine_interintra(MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane, + const uint8_t *inter_pred, int inter_stride, + const uint8_t *intra_pred, int intra_stride); +void vp10_build_interintra_predictors_sbuv(MACROBLOCKD *xd, uint8_t *upred, + uint8_t *vpred, int ustride, + int vstride, BLOCK_SIZE bsize); +void vp10_build_interintra_predictors_sby(MACROBLOCKD *xd, uint8_t *ypred, + int ystride, BLOCK_SIZE bsize); // Encoder only void vp10_build_inter_predictors_for_planes_single_buf( - MACROBLOCKD *xd, BLOCK_SIZE bsize, - int plane_from, int plane_to, - int mi_row, int mi_col, int ref, - uint8_t *ext_dst[3], int ext_dst_stride[3]); + MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane_from, int plane_to, int mi_row, + int mi_col, int ref, uint8_t *ext_dst[3], int ext_dst_stride[3]); void vp10_build_wedge_inter_predictor_from_buf( - MACROBLOCKD *xd, BLOCK_SIZE bsize, - int plane_from, int plane_to, - uint8_t *ext_dst0[3], int ext_dst_stride0[3], - uint8_t *ext_dst1[3], int ext_dst_stride1[3]); + MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane_from, int plane_to, + uint8_t *ext_dst0[3], int ext_dst_stride0[3], uint8_t *ext_dst1[3], + int ext_dst_stride1[3]); #endif // CONFIG_EXT_INTER #ifdef __cplusplus diff --git a/vp10/common/reconintra.c b/vp10/common/reconintra.c index b5b0777e56fd251e286f790cc8c95dc1d0959561..c7282217dfbc1de89e2a8e1591da4da52782bc18 100644 --- a/vp10/common/reconintra.c +++ b/vp10/common/reconintra.c @@ -52,100 +52,75 @@ static const uint8_t orders_128x128[1] = { 0 }; static const uint8_t orders_128x64[2] = { 0, 1 }; static const uint8_t orders_64x128[2] = { 0, 1 }; static const uint8_t orders_64x64[4] = { - 0, 1, - 2, 3, + 0, 1, 2, 3, }; static const uint8_t orders_64x32[8] = { - 0, 2, - 1, 3, - 4, 6, - 5, 7, + 0, 2, 1, 3, 4, 6, 5, 7, }; static const uint8_t orders_32x64[8] = { - 0, 1, 2, 3, - 4, 5, 6, 7, + 0, 1, 2, 3, 4, 5, 6, 7, }; static const uint8_t orders_32x32[16] = { - 0, 1, 4, 5, - 2, 3, 6, 7, - 8, 9, 12, 13, - 10, 11, 14, 15, + 0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15, }; static const uint8_t orders_32x16[32] = { - 0, 2, 8, 10, - 1, 3, 9, 11, - 4, 6, 12, 14, - 5, 7, 13, 15, - 16, 18, 24, 26, - 17, 19, 25, 27, - 20, 22, 28, 30, - 21, 23, 29, 31, + 0, 2, 8, 10, 1, 3, 9, 11, 4, 6, 12, 14, 5, 7, 13, 15, + 16, 18, 24, 26, 17, 19, 25, 27, 20, 22, 28, 30, 21, 23, 29, 31, }; static const uint8_t orders_16x32[32] = { - 0, 1, 2, 3, 8, 9, 10, 11, - 4, 5, 6, 7, 12, 13, 14, 15, - 16, 17, 18, 19, 24, 25, 26, 27, - 20, 21, 22, 23, 28, 29, 30, 31, + 0, 1, 2, 3, 8, 9, 10, 11, 4, 5, 6, 7, 12, 13, 14, 15, + 16, 17, 18, 19, 24, 25, 26, 27, 20, 21, 22, 23, 28, 29, 30, 31, }; static const uint8_t orders_16x16[64] = { - 0, 1, 4, 5, 16, 17, 20, 21, - 2, 3, 6, 7, 18, 19, 22, 23, - 8, 9, 12, 13, 24, 25, 28, 29, - 10, 11, 14, 15, 26, 27, 30, 31, - 32, 33, 36, 37, 48, 49, 52, 53, - 34, 35, 38, 39, 50, 51, 54, 55, - 40, 41, 44, 45, 56, 57, 60, 61, - 42, 43, 46, 47, 58, 59, 62, 63, + 0, 1, 4, 5, 16, 17, 20, 21, 2, 3, 6, 7, 18, 19, 22, 23, + 8, 9, 12, 13, 24, 25, 28, 29, 10, 11, 14, 15, 26, 27, 30, 31, + 32, 33, 36, 37, 48, 49, 52, 53, 34, 35, 38, 39, 50, 51, 54, 55, + 40, 41, 44, 45, 56, 57, 60, 61, 42, 43, 46, 47, 58, 59, 62, 63, }; #if CONFIG_EXT_PARTITION static const uint8_t orders_16x8[128] = { - 0, 2, 8, 10, 32, 34, 40, 42, - 1, 3, 9, 11, 33, 35, 41, 43, - 4, 6, 12, 14, 36, 38, 44, 46, - 5, 7, 13, 15, 37, 39, 45, 47, - 16, 18, 24, 26, 48, 50, 56, 58, - 17, 19, 25, 27, 49, 51, 57, 59, - 20, 22, 28, 30, 52, 54, 60, 62, - 21, 23, 29, 31, 53, 55, 61, 63, - 64, 66, 72, 74, 96, 98, 104, 106, - 65, 67, 73, 75, 97, 99, 105, 107, - 68, 70, 76, 78, 100, 102, 108, 110, - 69, 71, 77, 79, 101, 103, 109, 111, - 80, 82, 88, 90, 112, 114, 120, 122, - 81, 83, 89, 91, 113, 115, 121, 123, - 84, 86, 92, 94, 116, 118, 124, 126, - 85, 87, 93, 95, 117, 119, 125, 127, + 0, 2, 8, 10, 32, 34, 40, 42, 1, 3, 9, 11, 33, 35, 41, 43, + 4, 6, 12, 14, 36, 38, 44, 46, 5, 7, 13, 15, 37, 39, 45, 47, + 16, 18, 24, 26, 48, 50, 56, 58, 17, 19, 25, 27, 49, 51, 57, 59, + 20, 22, 28, 30, 52, 54, 60, 62, 21, 23, 29, 31, 53, 55, 61, 63, + 64, 66, 72, 74, 96, 98, 104, 106, 65, 67, 73, 75, 97, 99, 105, 107, + 68, 70, 76, 78, 100, 102, 108, 110, 69, 71, 77, 79, 101, 103, 109, 111, + 80, 82, 88, 90, 112, 114, 120, 122, 81, 83, 89, 91, 113, 115, 121, 123, + 84, 86, 92, 94, 116, 118, 124, 126, 85, 87, 93, 95, 117, 119, 125, 127, }; static const uint8_t orders_8x16[128] = { - 0, 1, 2, 3, 8, 9, 10, 11, 32, 33, 34, 35, 40, 41, 42, 43, - 4, 5, 6, 7, 12, 13, 14, 15, 36, 37, 38, 39, 44, 45, 46, 47, - 16, 17, 18, 19, 24, 25, 26, 27, 48, 49, 50, 51, 56, 57, 58, 59, - 20, 21, 22, 23, 28, 29, 30, 31, 52, 53, 54, 55, 60, 61, 62, 63, - 64, 65, 66, 67, 72, 73, 74, 75, 96, 97, 98, 99, 104, 105, 106, 107, + 0, 1, 2, 3, 8, 9, 10, 11, 32, 33, 34, 35, 40, 41, 42, 43, + 4, 5, 6, 7, 12, 13, 14, 15, 36, 37, 38, 39, 44, 45, 46, 47, + 16, 17, 18, 19, 24, 25, 26, 27, 48, 49, 50, 51, 56, 57, 58, 59, + 20, 21, 22, 23, 28, 29, 30, 31, 52, 53, 54, 55, 60, 61, 62, 63, + 64, 65, 66, 67, 72, 73, 74, 75, 96, 97, 98, 99, 104, 105, 106, 107, 68, 69, 70, 71, 76, 77, 78, 79, 100, 101, 102, 103, 108, 109, 110, 111, 80, 81, 82, 83, 88, 89, 90, 91, 112, 113, 114, 115, 120, 121, 122, 123, 84, 85, 86, 87, 92, 93, 94, 95, 116, 117, 118, 119, 124, 125, 126, 127, }; static const uint8_t orders_8x8[256] = { -0, 1, 4, 5, 16, 17, 20, 21, 64, 65, 68, 69, 80, 81, 84, 85, -2, 3, 6, 7, 18, 19, 22, 23, 66, 67, 70, 71, 82, 83, 86, 87, -8, 9, 12, 13, 24, 25, 28, 29, 72, 73, 76, 77, 88, 89, 92, 93, -10, 11, 14, 15, 26, 27, 30, 31, 74, 75, 78, 79, 90, 91, 94, 95, -32, 33, 36, 37, 48, 49, 52, 53, 96, 97, 100, 101, 112, 113, 116, 117, -34, 35, 38, 39, 50, 51, 54, 55, 98, 99, 102, 103, 114, 115, 118, 119, -40, 41, 44, 45, 56, 57, 60, 61, 104, 105, 108, 109, 120, 121, 124, 125, -42, 43, 46, 47, 58, 59, 62, 63, 106, 107, 110, 111, 122, 123, 126, 127, -128, 129, 132, 133, 144, 145, 148, 149, 192, 193, 196, 197, 208, 209, 212, 213, -130, 131, 134, 135, 146, 147, 150, 151, 194, 195, 198, 199, 210, 211, 214, 215, -136, 137, 140, 141, 152, 153, 156, 157, 200, 201, 204, 205, 216, 217, 220, 221, -138, 139, 142, 143, 154, 155, 158, 159, 202, 203, 206, 207, 218, 219, 222, 223, -160, 161, 164, 165, 176, 177, 180, 181, 224, 225, 228, 229, 240, 241, 244, 245, -162, 163, 166, 167, 178, 179, 182, 183, 226, 227, 230, 231, 242, 243, 246, 247, -168, 169, 172, 173, 184, 185, 188, 189, 232, 233, 236, 237, 248, 249, 252, 253, -170, 171, 174, 175, 186, 187, 190, 191, 234, 235, 238, 239, 250, 251, 254, 255, + 0, 1, 4, 5, 16, 17, 20, 21, 64, 65, 68, 69, 80, 81, 84, + 85, 2, 3, 6, 7, 18, 19, 22, 23, 66, 67, 70, 71, 82, 83, + 86, 87, 8, 9, 12, 13, 24, 25, 28, 29, 72, 73, 76, 77, 88, + 89, 92, 93, 10, 11, 14, 15, 26, 27, 30, 31, 74, 75, 78, 79, + 90, 91, 94, 95, 32, 33, 36, 37, 48, 49, 52, 53, 96, 97, 100, + 101, 112, 113, 116, 117, 34, 35, 38, 39, 50, 51, 54, 55, 98, 99, + 102, 103, 114, 115, 118, 119, 40, 41, 44, 45, 56, 57, 60, 61, 104, + 105, 108, 109, 120, 121, 124, 125, 42, 43, 46, 47, 58, 59, 62, 63, + 106, 107, 110, 111, 122, 123, 126, 127, 128, 129, 132, 133, 144, 145, 148, + 149, 192, 193, 196, 197, 208, 209, 212, 213, 130, 131, 134, 135, 146, 147, + 150, 151, 194, 195, 198, 199, 210, 211, 214, 215, 136, 137, 140, 141, 152, + 153, 156, 157, 200, 201, 204, 205, 216, 217, 220, 221, 138, 139, 142, 143, + 154, 155, 158, 159, 202, 203, 206, 207, 218, 219, 222, 223, 160, 161, 164, + 165, 176, 177, 180, 181, 224, 225, 228, 229, 240, 241, 244, 245, 162, 163, + 166, 167, 178, 179, 182, 183, 226, 227, 230, 231, 242, 243, 246, 247, 168, + 169, 172, 173, 184, 185, 188, 189, 232, 233, 236, 237, 248, 249, 252, 253, + 170, 171, 174, 175, 186, 187, 190, 191, 234, 235, 238, 239, 250, 251, 254, + 255, }; +/* clang-format off */ static const uint8_t *const orders[BLOCK_SIZES] = { // 4X4 orders_8x8, @@ -160,7 +135,9 @@ static const uint8_t *const orders[BLOCK_SIZES] = { // 64x128, 128x64, 128x128 orders_64x128, orders_128x64, orders_128x128 }; +/* clang-format on */ #else +/* clang-format off */ static const uint8_t *const orders[BLOCK_SIZES] = { // 4X4 orders_16x16, @@ -173,48 +150,45 @@ static const uint8_t *const orders[BLOCK_SIZES] = { // 32X64, 64X32, 64X64 orders_64x128, orders_128x64, orders_128x128 }; +/* clang-format on */ #endif // CONFIG_EXT_PARTITION #if CONFIG_EXT_PARTITION_TYPES static const uint8_t orders_verta_64x64[4] = { - 0, 2, - 1, 2, + 0, 2, 1, 2, }; static const uint8_t orders_verta_32x32[16] = { - 0, 2, 4, 6, - 1, 2, 5, 6, - 8, 10, 12, 14, - 9, 10, 13, 14, + 0, 2, 4, 6, 1, 2, 5, 6, 8, 10, 12, 14, 9, 10, 13, 14, }; static const uint8_t orders_verta_16x16[64] = { - 0, 2, 4, 6, 16, 18, 20, 22, - 1, 2, 5, 6, 17, 18, 21, 22, - 8, 10, 12, 14, 24, 26, 28, 30, - 9, 10, 13, 14, 25, 26, 29, 30, - 32, 34, 36, 38, 48, 50, 52, 54, - 33, 34, 37, 38, 49, 50, 53, 54, - 40, 42, 44, 46, 56, 58, 60, 62, - 41, 42, 45, 46, 57, 58, 61, 62, + 0, 2, 4, 6, 16, 18, 20, 22, 1, 2, 5, 6, 17, 18, 21, 22, + 8, 10, 12, 14, 24, 26, 28, 30, 9, 10, 13, 14, 25, 26, 29, 30, + 32, 34, 36, 38, 48, 50, 52, 54, 33, 34, 37, 38, 49, 50, 53, 54, + 40, 42, 44, 46, 56, 58, 60, 62, 41, 42, 45, 46, 57, 58, 61, 62, }; #if CONFIG_EXT_PARTITION static const uint8_t orders_verta_8x8[256] = { -0, 2, 4, 6, 16, 18, 20, 22, 64, 66, 68, 70, 80, 82, 84, 86, -1, 2, 5, 6, 17, 18, 21, 22, 65, 66, 69, 70, 81, 82, 85, 86, -8, 10, 12, 14, 24, 26, 28, 30, 72, 74, 76, 78, 88, 90, 92, 94, -9, 10, 13, 14, 25, 26, 29, 30, 73, 74, 77, 78, 89, 90, 93, 94, -32, 34, 36, 38, 48, 50, 52, 54, 96, 98, 100, 102, 112, 114, 116, 118, -33, 34, 37, 38, 49, 50, 53, 54, 97, 98, 101, 102, 113, 114, 117, 118, -40, 42, 44, 46, 56, 58, 60, 62, 104, 106, 108, 110, 120, 122, 124, 126, -41, 42, 45, 46, 57, 58, 61, 62, 105, 106, 109, 110, 121, 122, 125, 126, -128, 130, 132, 134, 144, 146, 148, 150, 192, 194, 196, 198, 208, 210, 212, 214, -129, 130, 133, 134, 145, 146, 149, 150, 193, 194, 197, 198, 209, 210, 213, 214, -136, 138, 140, 142, 152, 154, 156, 158, 200, 202, 204, 206, 216, 218, 220, 222, -137, 138, 141, 142, 153, 154, 157, 158, 201, 202, 205, 206, 217, 218, 221, 222, -160, 162, 164, 166, 176, 178, 180, 182, 224, 226, 228, 230, 240, 242, 244, 246, -161, 162, 165, 166, 177, 178, 181, 182, 225, 226, 229, 230, 241, 242, 245, 246, -168, 170, 172, 174, 184, 186, 188, 190, 232, 234, 236, 238, 248, 250, 252, 254, -169, 170, 173, 174, 185, 186, 189, 190, 233, 234, 237, 238, 249, 250, 253, 254, + 0, 2, 4, 6, 16, 18, 20, 22, 64, 66, 68, 70, 80, 82, 84, + 86, 1, 2, 5, 6, 17, 18, 21, 22, 65, 66, 69, 70, 81, 82, + 85, 86, 8, 10, 12, 14, 24, 26, 28, 30, 72, 74, 76, 78, 88, + 90, 92, 94, 9, 10, 13, 14, 25, 26, 29, 30, 73, 74, 77, 78, + 89, 90, 93, 94, 32, 34, 36, 38, 48, 50, 52, 54, 96, 98, 100, + 102, 112, 114, 116, 118, 33, 34, 37, 38, 49, 50, 53, 54, 97, 98, + 101, 102, 113, 114, 117, 118, 40, 42, 44, 46, 56, 58, 60, 62, 104, + 106, 108, 110, 120, 122, 124, 126, 41, 42, 45, 46, 57, 58, 61, 62, + 105, 106, 109, 110, 121, 122, 125, 126, 128, 130, 132, 134, 144, 146, 148, + 150, 192, 194, 196, 198, 208, 210, 212, 214, 129, 130, 133, 134, 145, 146, + 149, 150, 193, 194, 197, 198, 209, 210, 213, 214, 136, 138, 140, 142, 152, + 154, 156, 158, 200, 202, 204, 206, 216, 218, 220, 222, 137, 138, 141, 142, + 153, 154, 157, 158, 201, 202, 205, 206, 217, 218, 221, 222, 160, 162, 164, + 166, 176, 178, 180, 182, 224, 226, 228, 230, 240, 242, 244, 246, 161, 162, + 165, 166, 177, 178, 181, 182, 225, 226, 229, 230, 241, 242, 245, 246, 168, + 170, 172, 174, 184, 186, 188, 190, 232, 234, 236, 238, 248, 250, 252, 254, + 169, 170, 173, 174, 185, 186, 189, 190, 233, 234, 237, 238, 249, 250, 253, + 254, }; + +/* clang-format off */ static const uint8_t *const orders_verta[BLOCK_SIZES] = { // 4X4 orders_verta_8x8, @@ -229,7 +203,9 @@ static const uint8_t *const orders_verta[BLOCK_SIZES] = { // 64x128, 128x64, 128x128 orders_64x128, orders_128x64, orders_128x128 }; +/* clang-format on */ #else +/* clang-format off */ static const uint8_t *const orders_verta[BLOCK_SIZES] = { // 4X4 orders_verta_16x16, @@ -242,6 +218,7 @@ static const uint8_t *const orders_verta[BLOCK_SIZES] = { // 32X64, 64X32, 64X64 orders_64x128, orders_128x64, orders_128x128 }; +/* clang-format on */ #endif // CONFIG_EXT_PARTITION #endif // CONFIG_EXT_PARTITION_TYPES @@ -259,8 +236,7 @@ static int vp10_has_right(BLOCK_SIZE bsize, int mi_row, int mi_col, return 0; } else { // Handle block size 4x8 and 4x4 - if (ss_x == 0 && num_4x4_blocks_wide_lookup[bsize] < 2 && x == 0) - return 1; + if (ss_x == 0 && num_4x4_blocks_wide_lookup[bsize] < 2 && x == 0) return 1; if (y == 0) { const int hl = mi_height_log2_lookup[bsize]; @@ -271,21 +247,18 @@ static int vp10_has_right(BLOCK_SIZE bsize, int mi_row, int mi_col, order = orders_verta[bsize]; else #endif // CONFIG_EXT_PARTITION_TYPES - order = orders[bsize]; + order = orders[bsize]; - if (x + step < w) - return 1; + if (x + step < w) return 1; mi_row = (mi_row & MAX_MIB_MASK) >> hl; mi_col = (mi_col & MAX_MIB_MASK) >> wl; // If top row of coding unit - if (mi_row == 0) - return 1; + if (mi_row == 0) return 1; // If rightmost column of coding unit - if (((mi_col + 1) << wl) >= MAX_MIB_SIZE) - return 0; + if (((mi_col + 1) << wl) >= MAX_MIB_SIZE) return 0; my_order = order[((mi_row + 0) << (MAX_MIB_SIZE_LOG2 - wl)) + mi_col + 0]; tr_order = order[((mi_row - 1) << (MAX_MIB_SIZE_LOG2 - wl)) + mi_col + 1]; @@ -298,8 +271,8 @@ static int vp10_has_right(BLOCK_SIZE bsize, int mi_row, int mi_col, } static int vp10_has_bottom(BLOCK_SIZE bsize, int mi_row, int mi_col, - int bottom_available, TX_SIZE txsz, - int y, int x, int ss_y) { + int bottom_available, TX_SIZE txsz, int y, int x, + int ss_y) { if (!bottom_available || x != 0) { return 0; } else { @@ -311,11 +284,9 @@ static int vp10_has_bottom(BLOCK_SIZE bsize, int mi_row, int mi_col, int my_order, bl_order; // Handle block size 8x4 and 4x4 - if (ss_y == 0 && num_4x4_blocks_high_lookup[bsize] < 2 && y == 0) - return 1; + if (ss_y == 0 && num_4x4_blocks_high_lookup[bsize] < 2 && y == 0) return 1; - if (y + step < h) - return 1; + if (y + step < h) return 1; mi_row = (mi_row & MAX_MIB_MASK) >> hl; mi_col = (mi_col & MAX_MIB_MASK) >> wl; @@ -323,8 +294,7 @@ static int vp10_has_bottom(BLOCK_SIZE bsize, int mi_row, int mi_col, if (mi_col == 0) return (mi_row << (hl + !ss_y)) + y + step < (MAX_MIB_SIZE << !ss_y); - if (((mi_row + 1) << hl) >= MAX_MIB_SIZE) - return 0; + if (((mi_row + 1) << hl) >= MAX_MIB_SIZE) return 0; my_order = order[((mi_row + 0) << (MAX_MIB_SIZE_LOG2 - wl)) + mi_col + 0]; bl_order = order[((mi_row + 1) << (MAX_MIB_SIZE_LOG2 - wl)) + mi_col - 1]; @@ -348,12 +318,12 @@ static intra_high_pred_fn dc_pred_high[2][2][4]; #endif // CONFIG_VP9_HIGHBITDEPTH static void vp10_init_intra_predictors_internal(void) { -#define INIT_NO_4X4(p, type) \ - p[TX_8X8] = vpx_##type##_predictor_8x8; \ +#define INIT_NO_4X4(p, type) \ + p[TX_8X8] = vpx_##type##_predictor_8x8; \ p[TX_16X16] = vpx_##type##_predictor_16x16; \ p[TX_32X32] = vpx_##type##_predictor_32x32 -#define INIT_ALL_SIZES(p, type) \ +#define INIT_ALL_SIZES(p, type) \ p[TX_4X4] = vpx_##type##_predictor_4x4; \ INIT_NO_4X4(p, type) @@ -395,16 +365,16 @@ static void vp10_init_intra_predictors_internal(void) { #if CONFIG_EXT_INTRA static const uint8_t ext_intra_extend_modes[FILTER_INTRA_MODES] = { - NEED_LEFT | NEED_ABOVE, // FILTER_DC - NEED_LEFT | NEED_ABOVE, // FILTER_V - NEED_LEFT | NEED_ABOVE, // FILTER_H - NEED_LEFT | NEED_ABOVE, // FILTER_D45 - NEED_LEFT | NEED_ABOVE, // FILTER_D135 - NEED_LEFT | NEED_ABOVE, // FILTER_D117 - NEED_LEFT | NEED_ABOVE, // FILTER_D153 - NEED_LEFT | NEED_ABOVE, // FILTER_D207 - NEED_LEFT | NEED_ABOVE, // FILTER_D63 - NEED_LEFT | NEED_ABOVE, // FILTER_TM + NEED_LEFT | NEED_ABOVE, // FILTER_DC + NEED_LEFT | NEED_ABOVE, // FILTER_V + NEED_LEFT | NEED_ABOVE, // FILTER_H + NEED_LEFT | NEED_ABOVE, // FILTER_D45 + NEED_LEFT | NEED_ABOVE, // FILTER_D135 + NEED_LEFT | NEED_ABOVE, // FILTER_D117 + NEED_LEFT | NEED_ABOVE, // FILTER_D153 + NEED_LEFT | NEED_ABOVE, // FILTER_D207 + NEED_LEFT | NEED_ABOVE, // FILTER_D63 + NEED_LEFT | NEED_ABOVE, // FILTER_TM }; static int intra_subpel_interp(int base, int shift, const uint8_t *ref, @@ -438,8 +408,8 @@ static int intra_subpel_interp(int base, int shift, const uint8_t *ref, // Directional prediction, zone 1: 0 < angle < 90 static void dr_prediction_z1(uint8_t *dst, ptrdiff_t stride, int bs, - const uint8_t *above, const uint8_t *left, - int dx, int dy, INTRA_FILTER filter_type) { + const uint8_t *above, const uint8_t *left, int dx, + int dy, INTRA_FILTER filter_type) { int r, c, x, base, shift, val; (void)left; @@ -492,12 +462,12 @@ static void dr_prediction_z1(uint8_t *dst, ptrdiff_t stride, int bs, if (!flags[shift]) { const int16_t *filter = vp10_intra_filter_kernels[filter_type][shift]; vpx_convolve8_horiz(src + pad_size, 2 * bs, buf[shift], 2 * bs, - filter, 16, - NULL, 16, 2 * bs, 2 * bs < 16 ? 2 : 1); + filter, 16, NULL, 16, 2 * bs, + 2 * bs < 16 ? 2 : 1); flags[shift] = 1; } memcpy(dst, shift == 0 ? src + pad_size + base : &buf[shift][base], - len * sizeof(dst[0])); + len * sizeof(dst[0])); } if (len < bs) @@ -535,8 +505,8 @@ static void dr_prediction_z1(uint8_t *dst, ptrdiff_t stride, int bs, // Directional prediction, zone 2: 90 < angle < 180 static void dr_prediction_z2(uint8_t *dst, ptrdiff_t stride, int bs, - const uint8_t *above, const uint8_t *left, - int dx, int dy, INTRA_FILTER filter_type) { + const uint8_t *above, const uint8_t *left, int dx, + int dy, INTRA_FILTER filter_type) { int r, c, x, y, shift1, shift2, val, base1, base2; assert(dx > 0); @@ -549,14 +519,14 @@ static void dr_prediction_z2(uint8_t *dst, ptrdiff_t stride, int bs, for (c = 0; c < bs; ++c, ++base1, y -= dy) { if (base1 >= -1) { shift1 = x & 0xFF; - val = intra_subpel_interp(base1, shift1, above, -1, bs - 1, - filter_type); + val = + intra_subpel_interp(base1, shift1, above, -1, bs - 1, filter_type); } else { base2 = y >> 8; if (base2 >= 0) { shift2 = y & 0xFF; - val = intra_subpel_interp(base2, shift2, left, 0, bs - 1, - filter_type); + val = + intra_subpel_interp(base2, shift2, left, 0, bs - 1, filter_type); } else { val = left[0]; } @@ -568,8 +538,8 @@ static void dr_prediction_z2(uint8_t *dst, ptrdiff_t stride, int bs, // Directional prediction, zone 3: 180 < angle < 270 static void dr_prediction_z3(uint8_t *dst, ptrdiff_t stride, int bs, - const uint8_t *above, const uint8_t *left, - int dx, int dy, INTRA_FILTER filter_type) { + const uint8_t *above, const uint8_t *left, int dx, + int dy, INTRA_FILTER filter_type) { int r, c, y, base, shift, val; (void)above; @@ -586,10 +556,8 @@ static void dr_prediction_z3(uint8_t *dst, ptrdiff_t stride, int bs, uint8_t flags[SUBPEL_SHIFTS]; memset(flags, 0, SUBPEL_SHIFTS * sizeof(flags[0])); - for (i = 0; i < pad_size; ++i) - src[4 * i] = left[0]; - for (i = 0; i < 2 * bs; ++i) - src[4 * (i + pad_size)] = left[i]; + for (i = 0; i < pad_size; ++i) src[4 * i] = left[0]; + for (i = 0; i < 2 * bs; ++i) src[4 * (i + pad_size)] = left[i]; for (i = 0; i < pad_size; ++i) src[4 * (i + 2 * bs + pad_size)] = left[2 * bs - 1]; flags[0] = 1; @@ -606,7 +574,7 @@ static void dr_prediction_z3(uint8_t *dst, ptrdiff_t stride, int bs, if (len <= 0) { for (r = 0; r < bs; ++r) { - dst[r * stride + c] = left[ 2 * bs - 1]; + dst[r * stride + c] = left[2 * bs - 1]; } continue; } @@ -623,9 +591,8 @@ static void dr_prediction_z3(uint8_t *dst, ptrdiff_t stride, int bs, } else { if (!flags[shift]) { const int16_t *filter = vp10_intra_filter_kernels[filter_type][shift]; - vpx_convolve8_vert(src + 4 * pad_size, 4, - buf[0] + 4 * shift, 4 * SUBPEL_SHIFTS, NULL, 16, - filter, 16, + vpx_convolve8_vert(src + 4 * pad_size, 4, buf[0] + 4 * shift, + 4 * SUBPEL_SHIFTS, NULL, 16, filter, 16, 2 * bs < 16 ? 4 : 4, 2 * bs); flags[shift] = 1; } @@ -643,7 +610,7 @@ static void dr_prediction_z3(uint8_t *dst, ptrdiff_t stride, int bs, if (len < bs) { for (r = len; r < bs; ++r) { - dst[r * stride + c] = left[ 2 * bs - 1]; + dst[r * stride + c] = left[2 * bs - 1]; } } } @@ -662,8 +629,7 @@ static void dr_prediction_z3(uint8_t *dst, ptrdiff_t stride, int bs, val = ROUND_POWER_OF_TWO(val, 8); dst[r * stride + c] = clip_pixel(val); } else { - for (; r < bs; ++r) - dst[r * stride + c] = left[2 * bs - 1]; + for (; r < bs; ++r) dst[r * stride + c] = left[2 * bs - 1]; break; } } @@ -691,10 +657,10 @@ static inline int get_dx(int angle) { // If angle > 180 && angle < 270, dy = -((int)(256 * t)); static inline int get_dy(int angle) { if (angle > 90 && angle < 180) { - return dr_intra_derivative[angle - 90]; - } else if (angle > 180 && angle < 270) { - return -dr_intra_derivative[270 - angle]; - } else { + return dr_intra_derivative[angle - 90]; + } else if (angle > 180 && angle < 270) { + return -dr_intra_derivative[270 - angle]; + } else { // In this case, we are not really going to use dy. We may return any value. return 1; } @@ -723,13 +689,13 @@ static void dr_predictor(uint8_t *dst, ptrdiff_t stride, TX_SIZE tx_size, static void filter_intra_predictors_4tap(uint8_t *dst, ptrdiff_t stride, int bs, const uint8_t *above, - const uint8_t *left, - int mode) { + const uint8_t *left, int mode) { int k, r, c; int pred[33][65]; int mean, ipred; - const TX_SIZE tx_size = (bs == 32) ? TX_32X32 : - ((bs == 16) ? TX_16X16 : ((bs == 8) ? TX_8X8 : (TX_4X4))); + const TX_SIZE tx_size = + (bs == 32) ? TX_32X32 + : ((bs == 16) ? TX_16X16 : ((bs == 8) ? TX_8X8 : (TX_4X4))); const int c0 = filter_intra_taps_4[tx_size][mode][0]; const int c1 = filter_intra_taps_4[tx_size][mode][1]; const int c2 = filter_intra_taps_4[tx_size][mode][2]; @@ -744,16 +710,14 @@ static void filter_intra_predictors_4tap(uint8_t *dst, ptrdiff_t stride, int bs, } mean = (mean + bs) / (2 * bs); - for (r = 0; r < bs; ++r) - pred[r + 1][0] = (int)left[r] - mean; + for (r = 0; r < bs; ++r) pred[r + 1][0] = (int)left[r] - mean; - for (c = 0; c < 2 * bs + 1; ++c) - pred[0][c] = (int)above[c - 1] - mean; + for (c = 0; c < 2 * bs + 1; ++c) pred[0][c] = (int)above[c - 1] - mean; for (r = 1; r < bs + 1; ++r) for (c = 1; c < 2 * bs + 1 - r; ++c) { ipred = c0 * pred[r - 1][c] + c1 * pred[r][c - 1] + - c2 * pred[r - 1][c - 1] + c3 * pred[r - 1][c + 1]; + c2 * pred[r - 1][c - 1] + c3 * pred[r - 1][c + 1]; pred[r][c] = ROUND_POWER_OF_TWO_SIGNED(ipred, FILTER_INTRA_PREC_BITS); } @@ -816,21 +780,15 @@ void vp10_tm_filter_predictor_c(uint8_t *dst, ptrdiff_t stride, int bs, filter_intra_predictors_4tap(dst, stride, bs, above, left, TM_PRED); } -static void filter_intra_predictors(int mode, uint8_t *dst, - ptrdiff_t stride, int bs, - const uint8_t *above, const uint8_t *left) { +static void filter_intra_predictors(int mode, uint8_t *dst, ptrdiff_t stride, + int bs, const uint8_t *above, + const uint8_t *left) { switch (mode) { - case DC_PRED: - vp10_dc_filter_predictor(dst, stride, bs, above, left); - break; - case V_PRED: - vp10_v_filter_predictor(dst, stride, bs, above, left); - break; - case H_PRED: - vp10_h_filter_predictor(dst, stride, bs, above, left); - break; + case DC_PRED: vp10_dc_filter_predictor(dst, stride, bs, above, left); break; + case V_PRED: vp10_v_filter_predictor(dst, stride, bs, above, left); break; + case H_PRED: vp10_h_filter_predictor(dst, stride, bs, above, left); break; case D45_PRED: - vp10_d45_filter_predictor(dst, stride, bs, above, left); + vp10_d45_filter_predictor(dst, stride, bs, above, left); break; case D135_PRED: vp10_d135_filter_predictor(dst, stride, bs, above, left); @@ -839,7 +797,7 @@ static void filter_intra_predictors(int mode, uint8_t *dst, vp10_d117_filter_predictor(dst, stride, bs, above, left); break; case D153_PRED: - vp10_d153_filter_predictor(dst, stride, bs, above, left); + vp10_d153_filter_predictor(dst, stride, bs, above, left); break; case D207_PRED: vp10_d207_filter_predictor(dst, stride, bs, above, left); @@ -847,11 +805,8 @@ static void filter_intra_predictors(int mode, uint8_t *dst, case D63_PRED: vp10_d63_filter_predictor(dst, stride, bs, above, left); break; - case TM_PRED: - vp10_tm_filter_predictor(dst, stride, bs, above, left); - break; - default: - assert(0); + case TM_PRED: vp10_tm_filter_predictor(dst, stride, bs, above, left); break; + default: assert(0); } } @@ -939,7 +894,7 @@ static void highbd_dr_prediction_z2(uint16_t *dst, ptrdiff_t stride, int bs, y = (r << 8) - x * dy; base = y >> 8; if (base >= 0) { - shift = y - (base << 8); + shift = y - (base << 8); val = highbd_intra_subpel_interp(base, shift, left, 0, bs - 1, filter_type); } else { @@ -982,24 +937,24 @@ static void highbd_dr_prediction_z3(uint16_t *dst, ptrdiff_t stride, int bs, } } -static INLINE void highbd_v_predictor(uint16_t *dst, ptrdiff_t stride, - int bs, const uint16_t *above, +static INLINE void highbd_v_predictor(uint16_t *dst, ptrdiff_t stride, int bs, + const uint16_t *above, const uint16_t *left, int bd) { int r; - (void) left; - (void) bd; + (void)left; + (void)bd; for (r = 0; r < bs; r++) { memcpy(dst, above, bs * sizeof(uint16_t)); dst += stride; } } -static INLINE void highbd_h_predictor(uint16_t *dst, ptrdiff_t stride, - int bs, const uint16_t *above, +static INLINE void highbd_h_predictor(uint16_t *dst, ptrdiff_t stride, int bs, + const uint16_t *above, const uint16_t *left, int bd) { int r; - (void) above; - (void) bd; + (void)above; + (void)bd; for (r = 0; r < bs; r++) { vpx_memset16(dst, left[r], bs); dst += stride; @@ -1033,8 +988,9 @@ static void highbd_filter_intra_predictors_4tap(uint16_t *dst, ptrdiff_t stride, int k, r, c; int pred[33][65]; int mean, ipred; - const TX_SIZE tx_size = (bs == 32) ? TX_32X32 : - ((bs == 16) ? TX_16X16 : ((bs == 8) ? TX_8X8 : (TX_4X4))); + const TX_SIZE tx_size = + (bs == 32) ? TX_32X32 + : ((bs == 16) ? TX_16X16 : ((bs == 8) ? TX_8X8 : (TX_4X4))); const int c0 = filter_intra_taps_4[tx_size][mode][0]; const int c1 = filter_intra_taps_4[tx_size][mode][1]; const int c2 = filter_intra_taps_4[tx_size][mode][2]; @@ -1049,16 +1005,14 @@ static void highbd_filter_intra_predictors_4tap(uint16_t *dst, ptrdiff_t stride, } mean = (mean + bs) / (2 * bs); - for (r = 0; r < bs; ++r) - pred[r + 1][0] = (int)left[r] - mean; + for (r = 0; r < bs; ++r) pred[r + 1][0] = (int)left[r] - mean; - for (c = 0; c < 2 * bs + 1; ++c) - pred[0][c] = (int)above[c - 1] - mean; + for (c = 0; c < 2 * bs + 1; ++c) pred[0][c] = (int)above[c - 1] - mean; for (r = 1; r < bs + 1; ++r) for (c = 1; c < 2 * bs + 1 - r; ++c) { ipred = c0 * pred[r - 1][c] + c1 * pred[r][c - 1] + - c2 * pred[r - 1][c - 1] + c3 * pred[r - 1][c + 1]; + c2 * pred[r - 1][c - 1] + c3 * pred[r - 1][c + 1]; pred[r][c] = ROUND_POWER_OF_TWO_SIGNED(ipred, FILTER_INTRA_PREC_BITS); } @@ -1071,29 +1025,27 @@ static void highbd_filter_intra_predictors_4tap(uint16_t *dst, ptrdiff_t stride, } } -void vp10_highbd_dc_filter_predictor_c(uint16_t *dst, ptrdiff_t stride, - int bs, const uint16_t *above, +void vp10_highbd_dc_filter_predictor_c(uint16_t *dst, ptrdiff_t stride, int bs, + const uint16_t *above, const uint16_t *left, int bd) { highbd_filter_intra_predictors_4tap(dst, stride, bs, above, left, DC_PRED, bd); } -void vp10_highbd_v_filter_predictor_c(uint16_t *dst, ptrdiff_t stride, - int bs, const uint16_t *above, +void vp10_highbd_v_filter_predictor_c(uint16_t *dst, ptrdiff_t stride, int bs, + const uint16_t *above, const uint16_t *left, int bd) { - highbd_filter_intra_predictors_4tap(dst, stride, bs, above, left, V_PRED, - bd); + highbd_filter_intra_predictors_4tap(dst, stride, bs, above, left, V_PRED, bd); } -void vp10_highbd_h_filter_predictor_c(uint16_t *dst, ptrdiff_t stride, - int bs, const uint16_t *above, +void vp10_highbd_h_filter_predictor_c(uint16_t *dst, ptrdiff_t stride, int bs, + const uint16_t *above, const uint16_t *left, int bd) { - highbd_filter_intra_predictors_4tap(dst, stride, bs, above, left, H_PRED, - bd); + highbd_filter_intra_predictors_4tap(dst, stride, bs, above, left, H_PRED, bd); } -void vp10_highbd_d45_filter_predictor_c(uint16_t *dst, ptrdiff_t stride, - int bs, const uint16_t *above, +void vp10_highbd_d45_filter_predictor_c(uint16_t *dst, ptrdiff_t stride, int bs, + const uint16_t *above, const uint16_t *left, int bd) { highbd_filter_intra_predictors_4tap(dst, stride, bs, above, left, D45_PRED, bd); @@ -1127,15 +1079,15 @@ void vp10_highbd_d207_filter_predictor_c(uint16_t *dst, ptrdiff_t stride, bd); } -void vp10_highbd_d63_filter_predictor_c(uint16_t *dst, ptrdiff_t stride, - int bs, const uint16_t *above, +void vp10_highbd_d63_filter_predictor_c(uint16_t *dst, ptrdiff_t stride, int bs, + const uint16_t *above, const uint16_t *left, int bd) { highbd_filter_intra_predictors_4tap(dst, stride, bs, above, left, D63_PRED, bd); } -void vp10_highbd_tm_filter_predictor_c(uint16_t *dst, ptrdiff_t stride, - int bs, const uint16_t *above, +void vp10_highbd_tm_filter_predictor_c(uint16_t *dst, ptrdiff_t stride, int bs, + const uint16_t *above, const uint16_t *left, int bd) { highbd_filter_intra_predictors_4tap(dst, stride, bs, above, left, TM_PRED, bd); @@ -1156,7 +1108,7 @@ static void highbd_filter_intra_predictors(int mode, uint16_t *dst, vp10_highbd_h_filter_predictor(dst, stride, bs, above, left, bd); break; case D45_PRED: - vp10_highbd_d45_filter_predictor(dst, stride, bs, above, left, bd); + vp10_highbd_d45_filter_predictor(dst, stride, bs, above, left, bd); break; case D135_PRED: vp10_highbd_d135_filter_predictor(dst, stride, bs, above, left, bd); @@ -1165,7 +1117,7 @@ static void highbd_filter_intra_predictors(int mode, uint16_t *dst, vp10_highbd_d117_filter_predictor(dst, stride, bs, above, left, bd); break; case D153_PRED: - vp10_highbd_d153_filter_predictor(dst, stride, bs, above, left, bd); + vp10_highbd_d153_filter_predictor(dst, stride, bs, above, left, bd); break; case D207_PRED: vp10_highbd_d207_filter_predictor(dst, stride, bs, above, left, bd); @@ -1176,24 +1128,17 @@ static void highbd_filter_intra_predictors(int mode, uint16_t *dst, case TM_PRED: vp10_highbd_tm_filter_predictor(dst, stride, bs, above, left, bd); break; - default: - assert(0); + default: assert(0); } } #endif // CONFIG_VP9_HIGHBITDEPTH #endif // CONFIG_EXT_INTRA #if CONFIG_VP9_HIGHBITDEPTH -static void build_intra_predictors_high(const MACROBLOCKD *xd, - const uint8_t *ref8, - int ref_stride, - uint8_t *dst8, - int dst_stride, - PREDICTION_MODE mode, - TX_SIZE tx_size, - int n_top_px, int n_topright_px, - int n_left_px, int n_bottomleft_px, - int plane) { +static void build_intra_predictors_high( + const MACROBLOCKD *xd, const uint8_t *ref8, int ref_stride, uint8_t *dst8, + int dst_stride, PREDICTION_MODE mode, TX_SIZE tx_size, int n_top_px, + int n_topright_px, int n_left_px, int n_bottomleft_px, int plane) { int i; uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); @@ -1206,11 +1151,11 @@ static void build_intra_predictors_high(const MACROBLOCKD *xd, int need_above = extend_modes[mode] & NEED_ABOVE; const uint16_t *above_ref = ref - ref_stride; int base = 128 << (xd->bd - 8); - // 127 127 127 .. 127 127 127 127 127 127 - // 129 A B .. Y Z - // 129 C D .. W X - // 129 E F .. U V - // 129 G H .. S T T T T T +// 127 127 127 .. 127 127 127 127 127 127 +// 129 A B .. Y Z +// 129 C D .. W X +// 129 E F .. U V +// 129 G H .. S T T T T T #if CONFIG_EXT_INTRA const EXT_INTRA_MODE_INFO *ext_intra_mode_info = @@ -1222,7 +1167,7 @@ static void build_intra_predictors_high(const MACROBLOCKD *xd, if (mode != DC_PRED && mode != TM_PRED && xd->mi[0]->mbmi.sb_type >= BLOCK_8X8) { p_angle = mode_to_angle_map[mode] + - xd->mi[0]->mbmi.angle_delta[plane != 0] * ANGLE_STEP; + xd->mi[0]->mbmi.angle_delta[plane != 0] * ANGLE_STEP; if (p_angle <= 90) need_above = 1, need_left = 0; else if (p_angle < 180) @@ -1239,7 +1184,7 @@ static void build_intra_predictors_high(const MACROBLOCKD *xd, } #endif // CONFIG_EXT_INTRA - (void) plane; + (void)plane; assert(n_top_px >= 0); assert(n_topright_px >= 0); assert(n_left_px >= 0); @@ -1260,10 +1205,10 @@ static void build_intra_predictors_high(const MACROBLOCKD *xd, #if CONFIG_EXT_INTRA int need_bottom; if (ext_intra_mode_info->use_ext_intra_mode[plane != 0]) { - need_bottom = 0; + need_bottom = 0; } else if (mode != DC_PRED && mode != TM_PRED && - xd->mi[0]->mbmi.sb_type >= BLOCK_8X8) { - need_bottom = p_angle > 180; + xd->mi[0]->mbmi.sb_type >= BLOCK_8X8) { + need_bottom = p_angle > 180; } else { need_bottom = !!(extend_modes[mode] & NEED_BOTTOMLEFT); } @@ -1272,8 +1217,7 @@ static void build_intra_predictors_high(const MACROBLOCKD *xd, #endif // CONFIG_EXT_INTRA i = 0; if (n_left_px > 0) { - for (; i < n_left_px; i++) - left_col[i] = ref[i * ref_stride - 1]; + for (; i < n_left_px; i++) left_col[i] = ref[i * ref_stride - 1]; if (need_bottom && n_bottomleft_px > 0) { assert(i == bs); for (; i < bs + n_bottomleft_px; i++) @@ -1293,7 +1237,7 @@ static void build_intra_predictors_high(const MACROBLOCKD *xd, if (ext_intra_mode_info->use_ext_intra_mode[plane != 0]) { need_right = 1; } else if (mode != DC_PRED && mode != TM_PRED && - xd->mi[0]->mbmi.sb_type >= BLOCK_8X8) { + xd->mi[0]->mbmi.sb_type >= BLOCK_8X8) { need_right = p_angle < 90; } else { need_right = !!(extend_modes[mode] & NEED_ABOVERIGHT); @@ -1320,21 +1264,21 @@ static void build_intra_predictors_high(const MACROBLOCKD *xd, if (ext_intra_mode_info->use_ext_intra_mode[plane != 0] || (extend_modes[mode] & NEED_ABOVELEFT) || (mode != DC_PRED && mode != TM_PRED && - xd->mi[0]->mbmi.sb_type >= BLOCK_8X8)) { - above_row[-1] = n_top_px > 0 ? - (n_left_px > 0 ? above_ref[-1] : base + 1) : base - 1; + xd->mi[0]->mbmi.sb_type >= BLOCK_8X8)) { + above_row[-1] = + n_top_px > 0 ? (n_left_px > 0 ? above_ref[-1] : base + 1) : base - 1; } #else if ((extend_modes[mode] & NEED_ABOVELEFT)) { - above_row[-1] = n_top_px > 0 ? - (n_left_px > 0 ? above_ref[-1] : base + 1) : base - 1; + above_row[-1] = + n_top_px > 0 ? (n_left_px > 0 ? above_ref[-1] : base + 1) : base - 1; } #endif // CONFIG_EXT_INTRA #if CONFIG_EXT_INTRA if (ext_intra_mode_info->use_ext_intra_mode[plane != 0]) { highbd_filter_intra_predictors(ext_intra_mode, dst, dst_stride, bs, - const_above_row, left_col, xd->bd); + const_above_row, left_col, xd->bd); return; } @@ -1343,17 +1287,16 @@ static void build_intra_predictors_high(const MACROBLOCKD *xd, INTRA_FILTER filter = INTRA_FILTER_LINEAR; if (plane == 0 && vp10_is_intra_filter_switchable(p_angle)) filter = xd->mi[0]->mbmi.intra_filter; - highbd_dr_predictor(dst, dst_stride, bs, const_above_row, left_col, - p_angle, xd->bd, filter); + highbd_dr_predictor(dst, dst_stride, bs, const_above_row, left_col, p_angle, + xd->bd, filter); return; } #endif // CONFIG_EXT_INTRA // predict if (mode == DC_PRED) { - dc_pred_high[n_left_px > 0][n_top_px > 0][tx_size](dst, dst_stride, - const_above_row, - left_col, xd->bd); + dc_pred_high[n_left_px > 0][n_top_px > 0][tx_size]( + dst, dst_stride, const_above_row, left_col, xd->bd); } else { pred_high[mode][tx_size](dst, dst_stride, const_above_row, left_col, xd->bd); @@ -1386,7 +1329,7 @@ static void build_intra_predictors(const MACROBLOCKD *xd, const uint8_t *ref, if (mode != DC_PRED && mode != TM_PRED && xd->mi[0]->mbmi.sb_type >= BLOCK_8X8) { p_angle = mode_to_angle_map[mode] + - xd->mi[0]->mbmi.angle_delta[plane != 0] * ANGLE_STEP; + xd->mi[0]->mbmi.angle_delta[plane != 0] * ANGLE_STEP; if (p_angle <= 90) need_above = 1, need_left = 0; else if (p_angle < 180) @@ -1410,8 +1353,8 @@ static void build_intra_predictors(const MACROBLOCKD *xd, const uint8_t *ref, // 129 G H .. S T T T T T // .. - (void) xd; - (void) plane; + (void)xd; + (void)plane; assert(n_top_px >= 0); assert(n_topright_px >= 0); assert(n_left_px >= 0); @@ -1434,7 +1377,7 @@ static void build_intra_predictors(const MACROBLOCKD *xd, const uint8_t *ref, if (ext_intra_mode_info->use_ext_intra_mode[plane != 0]) { need_bottom = 0; } else if (mode != DC_PRED && mode != TM_PRED && - xd->mi[0]->mbmi.sb_type >= BLOCK_8X8) { + xd->mi[0]->mbmi.sb_type >= BLOCK_8X8) { need_bottom = p_angle > 180; } else { need_bottom = !!(extend_modes[mode] & NEED_BOTTOMLEFT); @@ -1444,8 +1387,7 @@ static void build_intra_predictors(const MACROBLOCKD *xd, const uint8_t *ref, #endif // CONFIG_EXT_INTRA i = 0; if (n_left_px > 0) { - for (; i < n_left_px; i++) - left_col[i] = ref[i * ref_stride - 1]; + for (; i < n_left_px; i++) left_col[i] = ref[i * ref_stride - 1]; if (need_bottom && n_bottomleft_px > 0) { assert(i == bs); for (; i < bs + n_bottomleft_px; i++) @@ -1465,7 +1407,7 @@ static void build_intra_predictors(const MACROBLOCKD *xd, const uint8_t *ref, if (ext_intra_mode_info->use_ext_intra_mode[plane != 0]) { need_right = 1; } else if (mode != DC_PRED && mode != TM_PRED && - xd->mi[0]->mbmi.sb_type >= BLOCK_8X8) { + xd->mi[0]->mbmi.sb_type >= BLOCK_8X8) { need_right = p_angle < 90; } else { need_right = !!(extend_modes[mode] & NEED_ABOVERIGHT); @@ -1492,7 +1434,7 @@ static void build_intra_predictors(const MACROBLOCKD *xd, const uint8_t *ref, if (ext_intra_mode_info->use_ext_intra_mode[plane != 0] || (extend_modes[mode] & NEED_ABOVELEFT) || (mode != DC_PRED && mode != TM_PRED && - xd->mi[0]->mbmi.sb_type >= BLOCK_8X8)) { + xd->mi[0]->mbmi.sb_type >= BLOCK_8X8)) { above_row[-1] = n_top_px > 0 ? (n_left_px > 0 ? above_ref[-1] : 129) : 127; } #else @@ -1530,9 +1472,9 @@ static void build_intra_predictors(const MACROBLOCKD *xd, const uint8_t *ref, void vp10_predict_intra_block(const MACROBLOCKD *xd, int bwl_in, int bhl_in, TX_SIZE tx_size, PREDICTION_MODE mode, - const uint8_t *ref, int ref_stride, - uint8_t *dst, int dst_stride, - int col_off, int row_off, int plane) { + const uint8_t *ref, int ref_stride, uint8_t *dst, + int dst_stride, int col_off, int row_off, + int plane) { const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type; const struct macroblockd_plane *const pd = &xd->plane[plane]; const int txw = num_4x4_blocks_wide_txsize_lookup[tx_size]; @@ -1551,29 +1493,27 @@ void vp10_predict_intra_block(const MACROBLOCKD *xd, int bwl_in, int bhl_in, const int txhpx = 4 * txh; // Distance between the right edge of this prediction block to // the frame right edge - const int xr = (xd->mb_to_right_edge >> (3 + pd->subsampling_x)) + - (wpx - x - txwpx); + const int xr = + (xd->mb_to_right_edge >> (3 + pd->subsampling_x)) + (wpx - x - txwpx); // Distance between the bottom edge of this prediction block to // the frame bottom edge - const int yd = (xd->mb_to_bottom_edge >> (3 + pd->subsampling_y)) + - (hpx - y - txhpx); + const int yd = + (xd->mb_to_bottom_edge >> (3 + pd->subsampling_y)) + (hpx - y - txhpx); const int right_available = (mi_col + ((col_off + txw) >> (1 - pd->subsampling_x))) < xd->tile.mi_col_end; #if CONFIG_EXT_PARTITION_TYPES const PARTITION_TYPE partition = xd->mi[0]->mbmi.partition; #endif - const int have_right = vp10_has_right(bsize, mi_row, mi_col, - right_available, + const int have_right = + vp10_has_right(bsize, mi_row, mi_col, right_available, #if CONFIG_EXT_PARTITION_TYPES - partition, + partition, #endif - tx_size, row_off, col_off, - pd->subsampling_x); - const int have_bottom = vp10_has_bottom(bsize, mi_row, mi_col, - yd > 0, - tx_size, row_off, col_off, - pd->subsampling_y); + tx_size, row_off, col_off, pd->subsampling_x); + const int have_bottom = + vp10_has_bottom(bsize, mi_row, mi_col, yd > 0, tx_size, row_off, col_off, + pd->subsampling_y); if (xd->mi[0]->mbmi.palette_mode_info.palette_size[plane != 0] > 0) { const int bs = 4 * num_4x4_blocks_wide_txsize_lookup[tx_size]; @@ -1582,10 +1522,10 @@ void vp10_predict_intra_block(const MACROBLOCKD *xd, int bwl_in, int bhl_in, uint8_t *map = NULL; #if CONFIG_VP9_HIGHBITDEPTH uint16_t *palette = xd->mi[0]->mbmi.palette_mode_info.palette_colors + - plane * PALETTE_MAX_SIZE; + plane * PALETTE_MAX_SIZE; #else uint8_t *palette = xd->mi[0]->mbmi.palette_mode_info.palette_colors + - plane * PALETTE_MAX_SIZE; + plane * PALETTE_MAX_SIZE; #endif // CONFIG_VP9_HIGHBITDEPTH map = xd->plane[plane != 0].color_index_map; @@ -1595,8 +1535,7 @@ void vp10_predict_intra_block(const MACROBLOCKD *xd, int bwl_in, int bhl_in, uint16_t *dst16 = CONVERT_TO_SHORTPTR(dst); for (r = 0; r < bs; ++r) for (c = 0; c < bs; ++c) - dst16[r * dst_stride + c] = - palette[map[(r + y) * stride + c + x]]; + dst16[r * dst_stride + c] = palette[map[(r + y) * stride + c + x]]; } else { for (r = 0; r < bs; ++r) for (c = 0; c < bs; ++c) @@ -1613,18 +1552,16 @@ void vp10_predict_intra_block(const MACROBLOCKD *xd, int bwl_in, int bhl_in, #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - build_intra_predictors_high(xd, ref, ref_stride, dst, dst_stride, mode, - tx_size, - have_top ? VPXMIN(txwpx, xr + txwpx) : 0, - have_top && have_right ? VPXMIN(txwpx, xr) : 0, - have_left ? VPXMIN(txhpx, yd + txhpx) : 0, - have_bottom && have_left ? - VPXMIN(txhpx, yd) : 0, plane); + build_intra_predictors_high( + xd, ref, ref_stride, dst, dst_stride, mode, tx_size, + have_top ? VPXMIN(txwpx, xr + txwpx) : 0, + have_top && have_right ? VPXMIN(txwpx, xr) : 0, + have_left ? VPXMIN(txhpx, yd + txhpx) : 0, + have_bottom && have_left ? VPXMIN(txhpx, yd) : 0, plane); return; } #endif - build_intra_predictors(xd, ref, ref_stride, dst, dst_stride, mode, - tx_size, + build_intra_predictors(xd, ref, ref_stride, dst, dst_stride, mode, tx_size, have_top ? VPXMIN(txwpx, xr + txwpx) : 0, have_top && have_right ? VPXMIN(txwpx, xr) : 0, have_left ? VPXMIN(txhpx, yd + txhpx) : 0, diff --git a/vp10/common/reconintra.h b/vp10/common/reconintra.h index b53c2bf737aa034e24fc1c1ad2e3c0b0bbc75069..671e5c5c4f6f13bf56274d1fcceead4882f25833 100644 --- a/vp10/common/reconintra.h +++ b/vp10/common/reconintra.h @@ -21,10 +21,9 @@ extern "C" { void vp10_init_intra_predictors(void); void vp10_predict_intra_block(const MACROBLOCKD *xd, int bwl_in, int bhl_in, - TX_SIZE tx_size, PREDICTION_MODE mode, - const uint8_t *ref, int ref_stride, - uint8_t *dst, int dst_stride, - int aoff, int loff, int plane); + TX_SIZE tx_size, PREDICTION_MODE mode, + const uint8_t *ref, int ref_stride, uint8_t *dst, + int dst_stride, int aoff, int loff, int plane); #if CONFIG_EXT_INTRA int vp10_is_intra_filter_switchable(int angle); #endif // CONFIG_EXT_INTRA diff --git a/vp10/common/restoration.c b/vp10/common/restoration.c index c7dba0ae18f57c7b05a522e6a49f20833c4e9dec..1489297c1705ab5a36c36a6c2fb714586c85314e 100644 --- a/vp10/common/restoration.c +++ b/vp10/common/restoration.c @@ -18,18 +18,17 @@ #include "vpx_mem/vpx_mem.h" #include "vpx_ports/mem.h" -#define RESTORATION_PARAM_PRECISION 16 -#define RESTORATION_RANGE 256 -#define RESTORATION_RANGE_SYM (2 * RESTORATION_RANGE + 1) - -static uint8_t restoration_filters_r_kf[RESTORATION_LEVELS_KF] - [RESTORATION_RANGE_SYM]; -static uint8_t restoration_filters_r[RESTORATION_LEVELS] - [RESTORATION_RANGE_SYM]; -static uint8_t restoration_filters_s_kf[RESTORATION_LEVELS_KF] - [RESTORATION_WIN][RESTORATION_WIN]; -static uint8_t restoration_filters_s[RESTORATION_LEVELS] - [RESTORATION_WIN][RESTORATION_WIN]; +#define RESTORATION_PARAM_PRECISION 16 +#define RESTORATION_RANGE 256 +#define RESTORATION_RANGE_SYM (2 * RESTORATION_RANGE + 1) + +static uint8_t + restoration_filters_r_kf[RESTORATION_LEVELS_KF][RESTORATION_RANGE_SYM]; +static uint8_t restoration_filters_r[RESTORATION_LEVELS][RESTORATION_RANGE_SYM]; +static uint8_t restoration_filters_s_kf[RESTORATION_LEVELS_KF][RESTORATION_WIN] + [RESTORATION_WIN]; +static uint8_t + restoration_filters_s[RESTORATION_LEVELS][RESTORATION_WIN][RESTORATION_WIN]; typedef struct restoration_params { int sigma_x; // spatial variance x @@ -39,60 +38,40 @@ typedef struct restoration_params { static RestorationParamsType restoration_level_to_params_arr[RESTORATION_LEVELS] = { - // Values are rounded to 1/16 th precision - {8, 9, 30}, - {9, 8, 30}, - {9, 11, 32}, - {11, 9, 32}, - {14, 14, 32}, - {18, 18, 36}, - {24, 24, 40}, - {32, 32, 40}, -}; + // Values are rounded to 1/16 th precision + { 8, 9, 30 }, { 9, 8, 30 }, { 9, 11, 32 }, { 11, 9, 32 }, + { 14, 14, 32 }, { 18, 18, 36 }, { 24, 24, 40 }, { 32, 32, 40 }, + }; static RestorationParamsType restoration_level_to_params_arr_kf[RESTORATION_LEVELS_KF] = { - // Values are rounded to 1/16 th precision - {8, 8, 30}, - {9, 9, 32}, - {10, 10, 32}, - {12, 12, 32}, - {14, 14, 32}, - {18, 18, 36}, - {24, 24, 40}, - {30, 30, 44}, - {36, 36, 48}, - {42, 42, 48}, - {48, 48, 48}, - {48, 48, 56}, - {56, 56, 48}, - {56, 56, 56}, - {56, 56, 64}, - {64, 64, 48}, -}; - -typedef void (*restore_func_type)( - uint8_t *data8, int width, int height, - int stride, RestorationInternal *rst, - uint8_t *tmpdata8, int tmpstride); + // Values are rounded to 1/16 th precision + { 8, 8, 30 }, { 9, 9, 32 }, { 10, 10, 32 }, { 12, 12, 32 }, + { 14, 14, 32 }, { 18, 18, 36 }, { 24, 24, 40 }, { 30, 30, 44 }, + { 36, 36, 48 }, { 42, 42, 48 }, { 48, 48, 48 }, { 48, 48, 56 }, + { 56, 56, 48 }, { 56, 56, 56 }, { 56, 56, 64 }, { 64, 64, 48 }, + }; + +typedef void (*restore_func_type)(uint8_t *data8, int width, int height, + int stride, RestorationInternal *rst, + uint8_t *tmpdata8, int tmpstride); #if CONFIG_VP9_HIGHBITDEPTH -typedef void (*restore_func_highbd_type)( - uint8_t *data8, int width, int height, - int stride, RestorationInternal *rst, - uint8_t *tmpdata8, int tmpstride, - int bit_depth); +typedef void (*restore_func_highbd_type)(uint8_t *data8, int width, int height, + int stride, RestorationInternal *rst, + uint8_t *tmpdata8, int tmpstride, + int bit_depth); #endif // CONFIG_VP9_HIGHBITDEPTH -static INLINE RestorationParamsType vp10_restoration_level_to_params( - int index, int kf) { - return kf ? restoration_level_to_params_arr_kf[index] : - restoration_level_to_params_arr[index]; +static INLINE RestorationParamsType vp10_restoration_level_to_params(int index, + int kf) { + return kf ? restoration_level_to_params_arr_kf[index] + : restoration_level_to_params_arr[index]; } void vp10_loop_restoration_precal() { int i; - for (i = 0; i < RESTORATION_LEVELS_KF; i ++) { + for (i = 0; i < RESTORATION_LEVELS_KF; i++) { const RestorationParamsType param = vp10_restoration_level_to_params(i, 1); const int sigma_x = param.sigma_x; const int sigma_y = param.sigma_y; @@ -104,21 +83,24 @@ void vp10_loop_restoration_precal() { uint8_t *fr = restoration_filters_r_kf[i] + RESTORATION_RANGE; int j, x, y; for (j = 0; j <= RESTORATION_RANGE; j++) { - fr[j] = (uint8_t)(0.5 + RESTORATION_FILT_STEP * - exp(-(j * j) / (2 * sigma_r_d * sigma_r_d))); + fr[j] = (uint8_t)(0.5 + + RESTORATION_FILT_STEP * + exp(-(j * j) / (2 * sigma_r_d * sigma_r_d))); fr[-j] = fr[j]; } for (y = -RESTORATION_HALFWIN; y <= RESTORATION_HALFWIN; y++) { for (x = -RESTORATION_HALFWIN; x <= RESTORATION_HALFWIN; x++) { - restoration_filters_s_kf[i][y + RESTORATION_HALFWIN] - [x + RESTORATION_HALFWIN] = - (uint8_t)(0.5 + RESTORATION_FILT_STEP * - exp(-(x * x) / (2 * sigma_x_d * sigma_x_d) - -(y * y) / (2 * sigma_y_d * sigma_y_d))); + restoration_filters_s_kf[i][y + + RESTORATION_HALFWIN][x + + RESTORATION_HALFWIN] = + (uint8_t)(0.5 + + RESTORATION_FILT_STEP * + exp(-(x * x) / (2 * sigma_x_d * sigma_x_d) - + (y * y) / (2 * sigma_y_d * sigma_y_d))); } } } - for (i = 0; i < RESTORATION_LEVELS; i ++) { + for (i = 0; i < RESTORATION_LEVELS; i++) { const RestorationParamsType param = vp10_restoration_level_to_params(i, 0); const int sigma_x = param.sigma_x; const int sigma_y = param.sigma_y; @@ -130,39 +112,41 @@ void vp10_loop_restoration_precal() { uint8_t *fr = restoration_filters_r[i] + RESTORATION_RANGE; int j, x, y; for (j = 0; j <= RESTORATION_RANGE; j++) { - fr[j] = (uint8_t)(0.5 + RESTORATION_FILT_STEP * - exp(-(j * j) / (2 * sigma_r_d * sigma_r_d))); + fr[j] = (uint8_t)(0.5 + + RESTORATION_FILT_STEP * + exp(-(j * j) / (2 * sigma_r_d * sigma_r_d))); fr[-j] = fr[j]; } for (y = -RESTORATION_HALFWIN; y <= RESTORATION_HALFWIN; y++) { for (x = -RESTORATION_HALFWIN; x <= RESTORATION_HALFWIN; x++) { - restoration_filters_s[i][y + RESTORATION_HALFWIN] - [x + RESTORATION_HALFWIN] = - (uint8_t)(0.5 + RESTORATION_FILT_STEP * - exp(-(x * x) / (2 * sigma_x_d * sigma_x_d) - -(y * y) / (2 * sigma_y_d * sigma_y_d))); + restoration_filters_s[i][y + RESTORATION_HALFWIN][x + + RESTORATION_HALFWIN] = + (uint8_t)(0.5 + + RESTORATION_FILT_STEP * + exp(-(x * x) / (2 * sigma_x_d * sigma_x_d) - + (y * y) / (2 * sigma_y_d * sigma_y_d))); } } } } int vp10_restoration_level_bits(const VP10_COMMON *const cm) { - return cm->frame_type == KEY_FRAME ? - RESTORATION_LEVEL_BITS_KF : RESTORATION_LEVEL_BITS; + return cm->frame_type == KEY_FRAME ? RESTORATION_LEVEL_BITS_KF + : RESTORATION_LEVEL_BITS; } -void vp10_loop_restoration_init(RestorationInternal *rst, - RestorationInfo *rsi, int kf) { +void vp10_loop_restoration_init(RestorationInternal *rst, RestorationInfo *rsi, + int kf) { int i; rst->restoration_type = rsi->restoration_type; if (rsi->restoration_type == RESTORE_BILATERAL) { const int level = rsi->restoration_level; assert(level >= 0); - rst->wr_lut = kf ? restoration_filters_r_kf[level] : - restoration_filters_r[level]; + rst->wr_lut = + kf ? restoration_filters_r_kf[level] : restoration_filters_r[level]; for (i = 0; i < RESTORATION_WIN; i++) - rst->wx_lut[i] = kf ? restoration_filters_s_kf[level][i] : - restoration_filters_s[level][i]; + rst->wx_lut[i] = kf ? restoration_filters_s_kf[level][i] + : restoration_filters_s[level][i]; } else if (rsi->restoration_type == RESTORE_WIENER) { rst->vfilter[RESTORATION_HALFWIN] = rst->hfilter[RESTORATION_HALFWIN] = RESTORATION_FILT_STEP; @@ -190,8 +174,8 @@ static void loop_bilateral_filter(uint8_t *data, int width, int height, uint8_t *data_p2 = data_p + j - RESTORATION_HALFWIN * stride; for (y = -RESTORATION_HALFWIN; y <= RESTORATION_HALFWIN; ++y) { for (x = -RESTORATION_HALFWIN; x <= RESTORATION_HALFWIN; ++x) { - wt = (int)rst->wx_lut[y + RESTORATION_HALFWIN] - [x + RESTORATION_HALFWIN] * + wt = (int)rst + ->wx_lut[y + RESTORATION_HALFWIN][x + RESTORATION_HALFWIN] * (int)wr_lut_[data_p2[x] - data_p[j]]; wtsum += wt; flsum += wt * data_p2[x]; @@ -215,8 +199,8 @@ static void loop_bilateral_filter(uint8_t *data, int width, int height, } uint8_t hor_sym_filter(uint8_t *d, int *hfilter) { - int32_t s = (1 << (RESTORATION_FILT_BITS - 1)) + - d[0] * hfilter[RESTORATION_HALFWIN]; + int32_t s = + (1 << (RESTORATION_FILT_BITS - 1)) + d[0] * hfilter[RESTORATION_HALFWIN]; int i; for (i = 1; i <= RESTORATION_HALFWIN; ++i) s += (d[i] + d[-i]) * hfilter[RESTORATION_HALFWIN + i]; @@ -224,17 +208,17 @@ uint8_t hor_sym_filter(uint8_t *d, int *hfilter) { } uint8_t ver_sym_filter(uint8_t *d, int stride, int *vfilter) { - int32_t s = (1 << (RESTORATION_FILT_BITS - 1)) + - d[0] * vfilter[RESTORATION_HALFWIN]; + int32_t s = + (1 << (RESTORATION_FILT_BITS - 1)) + d[0] * vfilter[RESTORATION_HALFWIN]; int i; for (i = 1; i <= RESTORATION_HALFWIN; ++i) s += (d[i * stride] + d[-i * stride]) * vfilter[RESTORATION_HALFWIN + i]; return clip_pixel(s >> RESTORATION_FILT_BITS); } -static void loop_wiener_filter(uint8_t *data, int width, int height, - int stride, RestorationInternal *rst, - uint8_t *tmpdata, int tmpstride) { +static void loop_wiener_filter(uint8_t *data, int width, int height, int stride, + RestorationInternal *rst, uint8_t *tmpdata, + int tmpstride) { uint8_t *data_p = data; uint8_t *tmpdata_p = tmpdata; int i, j; @@ -271,10 +255,10 @@ static void loop_wiener_filter(uint8_t *data, int width, int height, } #if CONFIG_VP9_HIGHBITDEPTH -static void loop_bilateral_filter_highbd( - uint8_t *data8, int width, int height, - int stride, RestorationInternal *rst, - uint8_t *tmpdata8, int tmpstride, int bit_depth) { +static void loop_bilateral_filter_highbd(uint8_t *data8, int width, int height, + int stride, RestorationInternal *rst, + uint8_t *tmpdata8, int tmpstride, + int bit_depth) { int i, j; const uint8_t *wr_lut_ = rst->wr_lut + RESTORATION_RANGE; @@ -291,8 +275,8 @@ static void loop_bilateral_filter_highbd( for (x = -RESTORATION_HALFWIN; x <= RESTORATION_HALFWIN; ++x) { diff_r = (data_p2[x] - data_p[j]) >> (bit_depth - 8); assert(diff_r >= -RESTORATION_RANGE && diff_r <= RESTORATION_RANGE); - wt = (int)rst->wx_lut[y + RESTORATION_HALFWIN] - [x + RESTORATION_HALFWIN] * + wt = (int)rst + ->wx_lut[y + RESTORATION_HALFWIN][x + RESTORATION_HALFWIN] * (int)wr_lut_[diff_r]; wtsum += wt; flsum += wt * data_p2[x]; @@ -300,8 +284,8 @@ static void loop_bilateral_filter_highbd( data_p2 += stride; } if (wtsum > 0) - tmpdata_p[j] = clip_pixel_highbd((int)((flsum + wtsum / 2) / wtsum), - bit_depth); + tmpdata_p[j] = + clip_pixel_highbd((int)((flsum + wtsum / 2) / wtsum), bit_depth); else tmpdata_p[j] = data_p[j]; } @@ -317,8 +301,8 @@ static void loop_bilateral_filter_highbd( } uint16_t hor_sym_filter_highbd(uint16_t *d, int *hfilter, int bd) { - int32_t s = (1 << (RESTORATION_FILT_BITS - 1)) + - d[0] * hfilter[RESTORATION_HALFWIN]; + int32_t s = + (1 << (RESTORATION_FILT_BITS - 1)) + d[0] * hfilter[RESTORATION_HALFWIN]; int i; for (i = 1; i <= RESTORATION_HALFWIN; ++i) s += (d[i] + d[-i]) * hfilter[RESTORATION_HALFWIN + i]; @@ -326,8 +310,8 @@ uint16_t hor_sym_filter_highbd(uint16_t *d, int *hfilter, int bd) { } uint16_t ver_sym_filter_highbd(uint16_t *d, int stride, int *vfilter, int bd) { - int32_t s = (1 << (RESTORATION_FILT_BITS - 1)) + - d[0] * vfilter[RESTORATION_HALFWIN]; + int32_t s = + (1 << (RESTORATION_FILT_BITS - 1)) + d[0] * vfilter[RESTORATION_HALFWIN]; int i; for (i = 1; i <= RESTORATION_HALFWIN; ++i) s += (d[i * stride] + d[-i * stride]) * vfilter[RESTORATION_HALFWIN + i]; @@ -363,8 +347,8 @@ static void loop_wiener_filter_highbd(uint8_t *data8, int width, int height, } for (; i < height - RESTORATION_HALFWIN; ++i) { for (j = 0; j < width; ++j) - *data_p++ = ver_sym_filter_highbd( - tmpdata_p++, tmpstride, rst->vfilter, bit_depth); + *data_p++ = ver_sym_filter_highbd(tmpdata_p++, tmpstride, rst->vfilter, + bit_depth); data_p += stride - width; tmpdata_p += tmpstride - width; } @@ -376,10 +360,8 @@ static void loop_wiener_filter_highbd(uint8_t *data8, int width, int height, } #endif // CONFIG_VP9_HIGHBITDEPTH -void vp10_loop_restoration_rows(YV12_BUFFER_CONFIG *frame, - VP10_COMMON *cm, - int start_mi_row, int end_mi_row, - int y_only) { +void vp10_loop_restoration_rows(YV12_BUFFER_CONFIG *frame, VP10_COMMON *cm, + int start_mi_row, int end_mi_row, int y_only) { const int ywidth = frame->y_crop_width; const int ystride = frame->y_stride; const int uvwidth = frame->uv_crop_width; @@ -389,12 +371,14 @@ void vp10_loop_restoration_rows(YV12_BUFFER_CONFIG *frame, int yend = end_mi_row << MI_SIZE_LOG2; int uvend = yend >> cm->subsampling_y; restore_func_type restore_func = - cm->rst_internal.restoration_type == RESTORE_BILATERAL ? - loop_bilateral_filter : loop_wiener_filter; + cm->rst_internal.restoration_type == RESTORE_BILATERAL + ? loop_bilateral_filter + : loop_wiener_filter; #if CONFIG_VP9_HIGHBITDEPTH restore_func_highbd_type restore_func_highbd = - cm->rst_internal.restoration_type == RESTORE_BILATERAL ? - loop_bilateral_filter_highbd : loop_wiener_filter_highbd; + cm->rst_internal.restoration_type == RESTORE_BILATERAL + ? loop_bilateral_filter_highbd + : loop_wiener_filter_highbd; #endif // CONFIG_VP9_HIGHBITDEPTH YV12_BUFFER_CONFIG *tmp_buf; @@ -415,53 +399,46 @@ void vp10_loop_restoration_rows(YV12_BUFFER_CONFIG *frame, #if CONFIG_VP9_HIGHBITDEPTH if (cm->use_highbitdepth) - restore_func_highbd( - frame->y_buffer + ystart * ystride, - ywidth, yend - ystart, ystride, &cm->rst_internal, - tmp_buf->y_buffer + ystart * tmp_buf->y_stride, - tmp_buf->y_stride, cm->bit_depth); + restore_func_highbd(frame->y_buffer + ystart * ystride, ywidth, + yend - ystart, ystride, &cm->rst_internal, + tmp_buf->y_buffer + ystart * tmp_buf->y_stride, + tmp_buf->y_stride, cm->bit_depth); else #endif // CONFIG_VP9_HIGHBITDEPTH - restore_func( - frame->y_buffer + ystart * ystride, - ywidth, yend - ystart, ystride, &cm->rst_internal, - tmp_buf->y_buffer + ystart * tmp_buf->y_stride, - tmp_buf->y_stride); + restore_func(frame->y_buffer + ystart * ystride, ywidth, yend - ystart, + ystride, &cm->rst_internal, + tmp_buf->y_buffer + ystart * tmp_buf->y_stride, + tmp_buf->y_stride); if (!y_only) { #if CONFIG_VP9_HIGHBITDEPTH if (cm->use_highbitdepth) { - restore_func_highbd( - frame->u_buffer + uvstart * uvstride, - uvwidth, uvend - uvstart, uvstride, &cm->rst_internal, - tmp_buf->u_buffer + uvstart * tmp_buf->uv_stride, - tmp_buf->uv_stride, cm->bit_depth); - restore_func_highbd( - frame->v_buffer + uvstart * uvstride, - uvwidth, uvend - uvstart, uvstride, &cm->rst_internal, - tmp_buf->v_buffer + uvstart * tmp_buf->uv_stride, - tmp_buf->uv_stride, cm->bit_depth); + restore_func_highbd(frame->u_buffer + uvstart * uvstride, uvwidth, + uvend - uvstart, uvstride, &cm->rst_internal, + tmp_buf->u_buffer + uvstart * tmp_buf->uv_stride, + tmp_buf->uv_stride, cm->bit_depth); + restore_func_highbd(frame->v_buffer + uvstart * uvstride, uvwidth, + uvend - uvstart, uvstride, &cm->rst_internal, + tmp_buf->v_buffer + uvstart * tmp_buf->uv_stride, + tmp_buf->uv_stride, cm->bit_depth); } else { #endif // CONFIG_VP9_HIGHBITDEPTH - restore_func( - frame->u_buffer + uvstart * uvstride, - uvwidth, uvend - uvstart, uvstride, &cm->rst_internal, - tmp_buf->u_buffer + uvstart * tmp_buf->uv_stride, - tmp_buf->uv_stride); - restore_func( - frame->v_buffer + uvstart * uvstride, - uvwidth, uvend - uvstart, uvstride, &cm->rst_internal, - tmp_buf->v_buffer + uvstart * tmp_buf->uv_stride, - tmp_buf->uv_stride); + restore_func(frame->u_buffer + uvstart * uvstride, uvwidth, + uvend - uvstart, uvstride, &cm->rst_internal, + tmp_buf->u_buffer + uvstart * tmp_buf->uv_stride, + tmp_buf->uv_stride); + restore_func(frame->v_buffer + uvstart * uvstride, uvwidth, + uvend - uvstart, uvstride, &cm->rst_internal, + tmp_buf->v_buffer + uvstart * tmp_buf->uv_stride, + tmp_buf->uv_stride); #if CONFIG_VP9_HIGHBITDEPTH } #endif // CONFIG_VP9_HIGHBITDEPTH } } -void vp10_loop_restoration_frame(YV12_BUFFER_CONFIG *frame, - VP10_COMMON *cm, - RestorationInfo *rsi, - int y_only, int partial_frame) { +void vp10_loop_restoration_frame(YV12_BUFFER_CONFIG *frame, VP10_COMMON *cm, + RestorationInfo *rsi, int y_only, + int partial_frame) { int start_mi_row, end_mi_row, mi_rows_to_filter; if (rsi->restoration_type != RESTORE_NONE) { start_mi_row = 0; diff --git a/vp10/common/restoration.h b/vp10/common/restoration.h index 8c0f14399e841b05089b7e8a719836f5d9a8f9e8..8d1cbd1a676143eec3b60b4e91001d45a37f3599 100644 --- a/vp10/common/restoration.h +++ b/vp10/common/restoration.h @@ -21,36 +21,36 @@ extern "C" { #endif #define RESTORATION_LEVEL_BITS_KF 4 -#define RESTORATION_LEVELS_KF (1 << RESTORATION_LEVEL_BITS_KF) -#define RESTORATION_LEVEL_BITS 3 -#define RESTORATION_LEVELS (1 << RESTORATION_LEVEL_BITS) -#define DEF_RESTORATION_LEVEL 2 +#define RESTORATION_LEVELS_KF (1 << RESTORATION_LEVEL_BITS_KF) +#define RESTORATION_LEVEL_BITS 3 +#define RESTORATION_LEVELS (1 << RESTORATION_LEVEL_BITS) +#define DEF_RESTORATION_LEVEL 2 -#define RESTORATION_HALFWIN 3 -#define RESTORATION_HALFWIN1 (RESTORATION_HALFWIN + 1) -#define RESTORATION_WIN (2 * RESTORATION_HALFWIN + 1) -#define RESTORATION_WIN2 ((RESTORATION_WIN) * (RESTORATION_WIN)) +#define RESTORATION_HALFWIN 3 +#define RESTORATION_HALFWIN1 (RESTORATION_HALFWIN + 1) +#define RESTORATION_WIN (2 * RESTORATION_HALFWIN + 1) +#define RESTORATION_WIN2 ((RESTORATION_WIN) * (RESTORATION_WIN)) #define RESTORATION_FILT_BITS 7 #define RESTORATION_FILT_STEP (1 << RESTORATION_FILT_BITS) -#define WIENER_FILT_TAP0_MINV -5 -#define WIENER_FILT_TAP1_MINV (-23) -#define WIENER_FILT_TAP2_MINV -20 +#define WIENER_FILT_TAP0_MINV -5 +#define WIENER_FILT_TAP1_MINV (-23) +#define WIENER_FILT_TAP2_MINV -20 -#define WIENER_FILT_TAP0_BITS 4 -#define WIENER_FILT_TAP1_BITS 5 -#define WIENER_FILT_TAP2_BITS 6 +#define WIENER_FILT_TAP0_BITS 4 +#define WIENER_FILT_TAP1_BITS 5 +#define WIENER_FILT_TAP2_BITS 6 #define WIENER_FILT_BITS \ ((WIENER_FILT_TAP0_BITS + WIENER_FILT_TAP1_BITS + WIENER_FILT_TAP2_BITS) * 2) #define WIENER_FILT_TAP0_MAXV \ - (WIENER_FILT_TAP0_MINV -1 + (1 << WIENER_FILT_TAP0_BITS)) + (WIENER_FILT_TAP0_MINV - 1 + (1 << WIENER_FILT_TAP0_BITS)) #define WIENER_FILT_TAP1_MAXV \ - (WIENER_FILT_TAP1_MINV -1 + (1 << WIENER_FILT_TAP1_BITS)) + (WIENER_FILT_TAP1_MINV - 1 + (1 << WIENER_FILT_TAP1_BITS)) #define WIENER_FILT_TAP2_MAXV \ - (WIENER_FILT_TAP2_MINV -1 + (1 << WIENER_FILT_TAP2_BITS)) + (WIENER_FILT_TAP2_MINV - 1 + (1 << WIENER_FILT_TAP2_BITS)) typedef enum { RESTORE_NONE, @@ -71,17 +71,15 @@ typedef struct { int vfilter[RESTORATION_WIN], hfilter[RESTORATION_WIN]; } RestorationInternal; -int vp10_restoration_level_bits(const struct VP10Common *const cm); -void vp10_loop_restoration_init(RestorationInternal *rst, - RestorationInfo *rsi, int kf); +int vp10_restoration_level_bits(const struct VP10Common *const cm); +void vp10_loop_restoration_init(RestorationInternal *rst, RestorationInfo *rsi, + int kf); void vp10_loop_restoration_frame(YV12_BUFFER_CONFIG *frame, - struct VP10Common *cm, - RestorationInfo *rsi, + struct VP10Common *cm, RestorationInfo *rsi, int y_only, int partial_frame); void vp10_loop_restoration_rows(YV12_BUFFER_CONFIG *frame, - struct VP10Common *cm, - int start_mi_row, int end_mi_row, - int y_only); + struct VP10Common *cm, int start_mi_row, + int end_mi_row, int y_only); void vp10_loop_restoration_precal(); #ifdef __cplusplus } // extern "C" diff --git a/vp10/common/scale.c b/vp10/common/scale.c index 65e14a99f57d887951c4dfb8ae0c1df3b10b0329..fc2235d02561955f40580bdfb887fe7a9963ee26 100644 --- a/vp10/common/scale.c +++ b/vp10/common/scale.c @@ -22,7 +22,7 @@ static INLINE int scaled_y(int val, const struct scale_factors *sf) { } static int unscaled_value(int val, const struct scale_factors *sf) { - (void) sf; + (void)sf; return val; } @@ -37,24 +37,20 @@ static int get_fixed_point_scale_factor(int other_size, int this_size) { MV32 vp10_scale_mv(const MV *mv, int x, int y, const struct scale_factors *sf) { const int x_off_q4 = scaled_x(x << SUBPEL_BITS, sf) & SUBPEL_MASK; const int y_off_q4 = scaled_y(y << SUBPEL_BITS, sf) & SUBPEL_MASK; - const MV32 res = { - scaled_y(mv->row, sf) + y_off_q4, - scaled_x(mv->col, sf) + x_off_q4 - }; + const MV32 res = { scaled_y(mv->row, sf) + y_off_q4, + scaled_x(mv->col, sf) + x_off_q4 }; return res; } #if CONFIG_VP9_HIGHBITDEPTH -void vp10_setup_scale_factors_for_frame(struct scale_factors *sf, - int other_w, int other_h, - int this_w, int this_h, +void vp10_setup_scale_factors_for_frame(struct scale_factors *sf, int other_w, + int other_h, int this_w, int this_h, int use_highbd) { #else -void vp10_setup_scale_factors_for_frame(struct scale_factors *sf, - int other_w, int other_h, - int this_w, int this_h) { +void vp10_setup_scale_factors_for_frame(struct scale_factors *sf, int other_w, + int other_h, int this_w, int this_h) { #endif - if (!valid_ref_frame_size(other_w, other_h, this_w, this_h)) { + if (!valid_ref_frame_size(other_w, other_h, this_w, this_h)) { sf->x_scale_fp = REF_INVALID_SCALE; sf->y_scale_fp = REF_INVALID_SCALE; return; @@ -73,12 +69,12 @@ void vp10_setup_scale_factors_for_frame(struct scale_factors *sf, sf->scale_value_y = unscaled_value; } - // TODO(agrange): Investigate the best choice of functions to use here - // for EIGHTTAP_SMOOTH. Since it is not interpolating, need to choose what - // to do at full-pel offsets. The current selection, where the filter is - // applied in one direction only, and not at all for 0,0, seems to give the - // best quality, but it may be worth trying an additional mode that does - // do the filtering on full-pel. +// TODO(agrange): Investigate the best choice of functions to use here +// for EIGHTTAP_SMOOTH. Since it is not interpolating, need to choose what +// to do at full-pel offsets. The current selection, where the filter is +// applied in one direction only, and not at all for 0,0, seems to give the +// best quality, but it may be worth trying an additional mode that does +// do the filtering on full-pel. #if CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS sf->predict_ni[0][0][0] = vpx_convolve8_c; sf->predict_ni[0][0][1] = vpx_convolve8_avg_c; diff --git a/vp10/common/scale.h b/vp10/common/scale.h index 604b9d2d50cd4d4b043b764f82ac0f687252f774..a06dd4d2c00eb67617805302b553323ec9c3b893 100644 --- a/vp10/common/scale.h +++ b/vp10/common/scale.h @@ -23,8 +23,8 @@ extern "C" { #define REF_INVALID_SCALE -1 struct scale_factors { - int x_scale_fp; // horizontal fixed point scale factor - int y_scale_fp; // vertical fixed point scale factor + int x_scale_fp; // horizontal fixed point scale factor + int y_scale_fp; // vertical fixed point scale factor int x_step_q4; int y_step_q4; @@ -34,28 +34,26 @@ struct scale_factors { convolve_fn_t predict[2][2][2]; // horiz, vert, avg #if CONFIG_VP9_HIGHBITDEPTH highbd_convolve_fn_t highbd_predict[2][2][2]; // horiz, vert, avg -#endif // CONFIG_VP9_HIGHBITDEPTH +#endif // CONFIG_VP9_HIGHBITDEPTH // Functions for non-interpolating filters (those that filter zero offsets) #if CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS convolve_fn_t predict_ni[2][2][2]; // horiz, vert, avg #if CONFIG_VP9_HIGHBITDEPTH highbd_convolve_fn_t highbd_predict_ni[2][2][2]; // horiz, vert, avg -#endif // CONFIG_VP9_HIGHBITDEPTH +#endif // CONFIG_VP9_HIGHBITDEPTH #endif // CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS }; MV32 vp10_scale_mv(const MV *mv, int x, int y, const struct scale_factors *sf); #if CONFIG_VP9_HIGHBITDEPTH -void vp10_setup_scale_factors_for_frame(struct scale_factors *sf, - int other_w, int other_h, - int this_w, int this_h, - int use_high); +void vp10_setup_scale_factors_for_frame(struct scale_factors *sf, int other_w, + int other_h, int this_w, int this_h, + int use_high); #else -void vp10_setup_scale_factors_for_frame(struct scale_factors *sf, - int other_w, int other_h, - int this_w, int this_h); +void vp10_setup_scale_factors_for_frame(struct scale_factors *sf, int other_w, + int other_h, int this_w, int this_h); #endif // CONFIG_VP9_HIGHBITDEPTH static INLINE int vp10_is_valid_scale(const struct scale_factors *sf) { @@ -69,11 +67,9 @@ static INLINE int vp10_is_scaled(const struct scale_factors *sf) { } static INLINE int valid_ref_frame_size(int ref_width, int ref_height, - int this_width, int this_height) { - return 2 * this_width >= ref_width && - 2 * this_height >= ref_height && - this_width <= 16 * ref_width && - this_height <= 16 * ref_height; + int this_width, int this_height) { + return 2 * this_width >= ref_width && 2 * this_height >= ref_height && + this_width <= 16 * ref_width && this_height <= 16 * ref_height; } #ifdef __cplusplus diff --git a/vp10/common/scan.c b/vp10/common/scan.c index fadd34ce914def43ba78b233e39aa5f380952de1..fd611b95902b9d81cbaf369adb1d91dd9bab4269 100644 --- a/vp10/common/scan.c +++ b/vp10/common/scan.c @@ -13,176 +13,129 @@ #include "vp10/common/scan.h" DECLARE_ALIGNED(16, static const int16_t, default_scan_4x4[16]) = { - 0, 4, 1, 5, - 8, 2, 12, 9, - 3, 6, 13, 10, - 7, 14, 11, 15, + 0, 4, 1, 5, 8, 2, 12, 9, 3, 6, 13, 10, 7, 14, 11, 15, }; #if CONFIG_EXT_TX DECLARE_ALIGNED(16, static const int16_t, mcol_scan_4x4[16]) = { - 0, 4, 8, 12, - 1, 5, 9, 13, - 2, 6, 10, 14, - 3, 7, 11, 15, + 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15, }; DECLARE_ALIGNED(16, static const int16_t, mrow_scan_4x4[16]) = { - 0, 1, 2, 3, - 4, 5, 6, 7, - 8, 9, 10, 11, - 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, }; #endif // CONFIG_EXT_TX DECLARE_ALIGNED(16, static const int16_t, col_scan_4x4[16]) = { - 0, 4, 8, 1, - 12, 5, 9, 2, - 13, 6, 10, 3, - 7, 14, 11, 15, + 0, 4, 8, 1, 12, 5, 9, 2, 13, 6, 10, 3, 7, 14, 11, 15, }; DECLARE_ALIGNED(16, static const int16_t, row_scan_4x4[16]) = { - 0, 1, 4, 2, - 5, 3, 6, 8, - 9, 7, 12, 10, - 13, 11, 14, 15, + 0, 1, 4, 2, 5, 3, 6, 8, 9, 7, 12, 10, 13, 11, 14, 15, }; #if CONFIG_EXT_TX DECLARE_ALIGNED(16, static const int16_t, default_scan_4x8[32]) = { - 0, 1, 4, 5, 2, 8, 6, 9, - 10, 3, 12, 7, 13, 11, 14, 16, - 17, 15, 18, 19, 20, 21, 22, 23, - 24, 25, 26, 27, 28, 29, 30, 31, + 0, 1, 4, 5, 2, 8, 6, 9, 10, 3, 12, 7, 13, 11, 14, 16, + 17, 15, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, }; DECLARE_ALIGNED(16, static const int16_t, mcol_scan_4x8[32]) = { - 0, 4, 8, 12, 16, 20, 24, 28, - 1, 5, 9, 13, 17, 21, 25, 29, - 2, 6, 10, 14, 18, 22, 26, 30, - 3, 7, 11, 15, 19, 23, 27, 31, + 0, 4, 8, 12, 16, 20, 24, 28, 1, 5, 9, 13, 17, 21, 25, 29, + 2, 6, 10, 14, 18, 22, 26, 30, 3, 7, 11, 15, 19, 23, 27, 31, }; DECLARE_ALIGNED(16, static const int16_t, mrow_scan_4x8[32]) = { - 0, 1, 2, 3, 4, 5, 6, 7, - 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, - 24, 25, 26, 27, 28, 29, 30, 31, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, }; DECLARE_ALIGNED(16, static const int16_t, default_scan_8x4[32]) = { - 0, 1, 8, 9, 2, 16, 10, 17, - 18, 3, 24, 11, 25, 19, 26, 4, - 12, 27, 20, 5, 28, 13, 21, 29, - 6, 14, 22, 30, 7, 15, 23, 31, + 0, 1, 8, 9, 2, 16, 10, 17, 18, 3, 24, 11, 25, 19, 26, 4, + 12, 27, 20, 5, 28, 13, 21, 29, 6, 14, 22, 30, 7, 15, 23, 31, }; DECLARE_ALIGNED(16, static const int16_t, mcol_scan_8x4[32]) = { - 0, 8, 16, 24, 1, 9, 17, 25, - 2, 10, 18, 26, 3, 11, 19, 27, - 4, 12, 20, 28, 5, 13, 21, 29, - 6, 14, 22, 30, 7, 15, 23, 31, + 0, 8, 16, 24, 1, 9, 17, 25, 2, 10, 18, 26, 3, 11, 19, 27, + 4, 12, 20, 28, 5, 13, 21, 29, 6, 14, 22, 30, 7, 15, 23, 31, }; DECLARE_ALIGNED(16, static const int16_t, mrow_scan_8x4[32]) = { - 0, 1, 2, 3, 4, 5, 6, 7, - 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, - 24, 25, 26, 27, 28, 29, 30, 31, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, }; #endif // CONFIG_EXT_TX DECLARE_ALIGNED(16, static const int16_t, default_scan_8x8[64]) = { - 0, 8, 1, 16, 9, 2, 17, 24, - 10, 3, 18, 25, 32, 11, 4, 26, - 33, 19, 40, 12, 34, 27, 5, 41, - 20, 48, 13, 35, 42, 28, 21, 6, - 49, 56, 36, 43, 29, 7, 14, 50, - 57, 44, 22, 37, 15, 51, 58, 30, - 45, 23, 52, 59, 38, 31, 60, 53, - 46, 39, 61, 54, 47, 62, 55, 63, + 0, 8, 1, 16, 9, 2, 17, 24, 10, 3, 18, 25, 32, 11, 4, 26, + 33, 19, 40, 12, 34, 27, 5, 41, 20, 48, 13, 35, 42, 28, 21, 6, + 49, 56, 36, 43, 29, 7, 14, 50, 57, 44, 22, 37, 15, 51, 58, 30, + 45, 23, 52, 59, 38, 31, 60, 53, 46, 39, 61, 54, 47, 62, 55, 63, }; #if CONFIG_EXT_TX DECLARE_ALIGNED(16, static const int16_t, mcol_scan_8x8[64]) = { - 0, 8, 16, 24, 32, 40, 48, 56, - 1, 9, 17, 25, 33, 41, 49, 57, - 2, 10, 18, 26, 34, 42, 50, 58, - 3, 11, 19, 27, 35, 43, 51, 59, - 4, 12, 20, 28, 36, 44, 52, 60, - 5, 13, 21, 29, 37, 45, 53, 61, - 6, 14, 22, 30, 38, 46, 54, 62, - 7, 15, 23, 31, 39, 47, 55, 63, + 0, 8, 16, 24, 32, 40, 48, 56, 1, 9, 17, 25, 33, 41, 49, 57, + 2, 10, 18, 26, 34, 42, 50, 58, 3, 11, 19, 27, 35, 43, 51, 59, + 4, 12, 20, 28, 36, 44, 52, 60, 5, 13, 21, 29, 37, 45, 53, 61, + 6, 14, 22, 30, 38, 46, 54, 62, 7, 15, 23, 31, 39, 47, 55, 63, }; DECLARE_ALIGNED(16, static const int16_t, mrow_scan_8x8[64]) = { - 0, 1, 2, 3, 4, 5, 6, 7, - 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, - 24, 25, 26, 27, 28, 29, 30, 31, - 32, 33, 34, 35, 36, 37, 38, 39, - 40, 41, 42, 43, 44, 45, 46, 47, - 48, 49, 50, 51, 52, 53, 54, 55, - 56, 57, 58, 59, 60, 61, 62, 63, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, }; #endif // CONFIG_EXT_TX DECLARE_ALIGNED(16, static const int16_t, col_scan_8x8[64]) = { - 0, 8, 16, 1, 24, 9, 32, 17, - 2, 40, 25, 10, 33, 18, 48, 3, - 26, 41, 11, 56, 19, 34, 4, 49, - 27, 42, 12, 35, 20, 57, 50, 28, - 5, 43, 13, 36, 58, 51, 21, 44, - 6, 29, 59, 37, 14, 52, 22, 7, - 45, 60, 30, 15, 38, 53, 23, 46, - 31, 61, 39, 54, 47, 62, 55, 63, + 0, 8, 16, 1, 24, 9, 32, 17, 2, 40, 25, 10, 33, 18, 48, 3, + 26, 41, 11, 56, 19, 34, 4, 49, 27, 42, 12, 35, 20, 57, 50, 28, + 5, 43, 13, 36, 58, 51, 21, 44, 6, 29, 59, 37, 14, 52, 22, 7, + 45, 60, 30, 15, 38, 53, 23, 46, 31, 61, 39, 54, 47, 62, 55, 63, }; DECLARE_ALIGNED(16, static const int16_t, row_scan_8x8[64]) = { - 0, 1, 2, 8, 9, 3, 16, 10, - 4, 17, 11, 24, 5, 18, 25, 12, - 19, 26, 32, 6, 13, 20, 33, 27, - 7, 34, 40, 21, 28, 41, 14, 35, - 48, 42, 29, 36, 49, 22, 43, 15, - 56, 37, 50, 44, 30, 57, 23, 51, - 58, 45, 38, 52, 31, 59, 53, 46, - 60, 39, 61, 47, 54, 55, 62, 63, + 0, 1, 2, 8, 9, 3, 16, 10, 4, 17, 11, 24, 5, 18, 25, 12, + 19, 26, 32, 6, 13, 20, 33, 27, 7, 34, 40, 21, 28, 41, 14, 35, + 48, 42, 29, 36, 49, 22, 43, 15, 56, 37, 50, 44, 30, 57, 23, 51, + 58, 45, 38, 52, 31, 59, 53, 46, 60, 39, 61, 47, 54, 55, 62, 63, }; DECLARE_ALIGNED(16, static const int16_t, default_scan_16x16[256]) = { - 0, 16, 1, 32, 17, 2, 48, 33, 18, 3, 64, 34, 49, 19, 65, 80, - 50, 4, 35, 66, 20, 81, 96, 51, 5, 36, 82, 97, 67, 112, 21, 52, - 98, 37, 83, 113, 6, 68, 128, 53, 22, 99, 114, 84, 7, 129, 38, 69, - 100, 115, 144, 130, 85, 54, 23, 8, 145, 39, 70, 116, 101, 131, 160, 146, - 55, 86, 24, 71, 132, 117, 161, 40, 9, 102, 147, 176, 162, 87, 56, 25, - 133, 118, 177, 148, 72, 103, 41, 163, 10, 192, 178, 88, 57, 134, 149, 119, - 26, 164, 73, 104, 193, 42, 179, 208, 11, 135, 89, 165, 120, 150, 58, 194, - 180, 27, 74, 209, 105, 151, 136, 43, 90, 224, 166, 195, 181, 121, 210, 59, - 12, 152, 106, 167, 196, 75, 137, 225, 211, 240, 182, 122, 91, 28, 197, 13, - 226, 168, 183, 153, 44, 212, 138, 107, 241, 60, 29, 123, 198, 184, 227, 169, - 242, 76, 213, 154, 45, 92, 14, 199, 139, 61, 228, 214, 170, 185, 243, 108, - 77, 155, 30, 15, 200, 229, 124, 215, 244, 93, 46, 186, 171, 201, 109, 140, - 230, 62, 216, 245, 31, 125, 78, 156, 231, 47, 187, 202, 217, 94, 246, 141, - 63, 232, 172, 110, 247, 157, 79, 218, 203, 126, 233, 188, 248, 95, 173, 142, - 219, 111, 249, 234, 158, 127, 189, 204, 250, 235, 143, 174, 220, 205, 159, - 251, + 0, 16, 1, 32, 17, 2, 48, 33, 18, 3, 64, 34, 49, 19, 65, + 80, 50, 4, 35, 66, 20, 81, 96, 51, 5, 36, 82, 97, 67, 112, + 21, 52, 98, 37, 83, 113, 6, 68, 128, 53, 22, 99, 114, 84, 7, + 129, 38, 69, 100, 115, 144, 130, 85, 54, 23, 8, 145, 39, 70, 116, + 101, 131, 160, 146, 55, 86, 24, 71, 132, 117, 161, 40, 9, 102, 147, + 176, 162, 87, 56, 25, 133, 118, 177, 148, 72, 103, 41, 163, 10, 192, + 178, 88, 57, 134, 149, 119, 26, 164, 73, 104, 193, 42, 179, 208, 11, + 135, 89, 165, 120, 150, 58, 194, 180, 27, 74, 209, 105, 151, 136, 43, + 90, 224, 166, 195, 181, 121, 210, 59, 12, 152, 106, 167, 196, 75, 137, + 225, 211, 240, 182, 122, 91, 28, 197, 13, 226, 168, 183, 153, 44, 212, + 138, 107, 241, 60, 29, 123, 198, 184, 227, 169, 242, 76, 213, 154, 45, + 92, 14, 199, 139, 61, 228, 214, 170, 185, 243, 108, 77, 155, 30, 15, + 200, 229, 124, 215, 244, 93, 46, 186, 171, 201, 109, 140, 230, 62, 216, + 245, 31, 125, 78, 156, 231, 47, 187, 202, 217, 94, 246, 141, 63, 232, + 172, 110, 247, 157, 79, 218, 203, 126, 233, 188, 248, 95, 173, 142, 219, + 111, 249, 234, 158, 127, 189, 204, 250, 235, 143, 174, 220, 205, 159, 251, 190, 221, 175, 236, 237, 191, 206, 252, 222, 253, 207, 238, 223, 254, 239, 255, }; #if CONFIG_EXT_TX DECLARE_ALIGNED(16, static const int16_t, mcol_scan_16x16[256]) = { - 0, 16, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192, 208, 224, 240, - 1, 17, 33, 49, 65, 81, 97, 113, 129, 145, 161, 177, 193, 209, 225, 241, - 2, 18, 34, 50, 66, 82, 98, 114, 130, 146, 162, 178, 194, 210, 226, 242, - 3, 19, 35, 51, 67, 83, 99, 115, 131, 147, 163, 179, 195, 211, 227, 243, - 4, 20, 36, 52, 68, 84, 100, 116, 132, 148, 164, 180, 196, 212, 228, 244, - 5, 21, 37, 53, 69, 85, 101, 117, 133, 149, 165, 181, 197, 213, 229, 245, - 6, 22, 38, 54, 70, 86, 102, 118, 134, 150, 166, 182, 198, 214, 230, 246, - 7, 23, 39, 55, 71, 87, 103, 119, 135, 151, 167, 183, 199, 215, 231, 247, - 8, 24, 40, 56, 72, 88, 104, 120, 136, 152, 168, 184, 200, 216, 232, 248, - 9, 25, 41, 57, 73, 89, 105, 121, 137, 153, 169, 185, 201, 217, 233, 249, + 0, 16, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192, 208, 224, 240, + 1, 17, 33, 49, 65, 81, 97, 113, 129, 145, 161, 177, 193, 209, 225, 241, + 2, 18, 34, 50, 66, 82, 98, 114, 130, 146, 162, 178, 194, 210, 226, 242, + 3, 19, 35, 51, 67, 83, 99, 115, 131, 147, 163, 179, 195, 211, 227, 243, + 4, 20, 36, 52, 68, 84, 100, 116, 132, 148, 164, 180, 196, 212, 228, 244, + 5, 21, 37, 53, 69, 85, 101, 117, 133, 149, 165, 181, 197, 213, 229, 245, + 6, 22, 38, 54, 70, 86, 102, 118, 134, 150, 166, 182, 198, 214, 230, 246, + 7, 23, 39, 55, 71, 87, 103, 119, 135, 151, 167, 183, 199, 215, 231, 247, + 8, 24, 40, 56, 72, 88, 104, 120, 136, 152, 168, 184, 200, 216, 232, 248, + 9, 25, 41, 57, 73, 89, 105, 121, 137, 153, 169, 185, 201, 217, 233, 249, 10, 26, 42, 58, 74, 90, 106, 122, 138, 154, 170, 186, 202, 218, 234, 250, 11, 27, 43, 59, 75, 91, 107, 123, 139, 155, 171, 187, 203, 219, 235, 251, 12, 28, 44, 60, 76, 92, 108, 124, 140, 156, 172, 188, 204, 220, 236, 252, @@ -192,642 +145,560 @@ DECLARE_ALIGNED(16, static const int16_t, mcol_scan_16x16[256]) = { }; DECLARE_ALIGNED(16, static const int16_t, mrow_scan_16x16[256]) = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, - 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, - 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, - 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, - 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, - 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, - 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, - 125, 126, 127, - 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, - 141, 142, 143, - 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, - 157, 158, 159, - 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, - 173, 174, 175, - 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, - 189, 190, 191, - 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, - 205, 206, 207, - 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, - 221, 222, 223, - 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, - 237, 238, 239, - 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, - 253, 254, 255, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, + 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, + 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, + 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, + 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, + 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, + 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, + 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, + 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, + 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, + 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, + 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, + 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, + 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, + 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, + 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, + 255, }; #endif // CONFIG_EXT_TX DECLARE_ALIGNED(16, static const int16_t, col_scan_16x16[256]) = { - 0, 16, 32, 48, 1, 64, 17, 80, 33, 96, 49, 2, 65, 112, 18, 81, - 34, 128, 50, 97, 3, 66, 144, 19, 113, 35, 82, 160, 98, 51, 129, 4, - 67, 176, 20, 114, 145, 83, 36, 99, 130, 52, 192, 5, 161, 68, 115, 21, - 146, 84, 208, 177, 37, 131, 100, 53, 162, 224, 69, 6, 116, 193, 147, 85, - 22, 240, 132, 38, 178, 101, 163, 54, 209, 117, 70, 7, 148, 194, 86, 179, - 225, 23, 133, 39, 164, 8, 102, 210, 241, 55, 195, 118, 149, 71, 180, 24, - 87, 226, 134, 165, 211, 40, 103, 56, 72, 150, 196, 242, 119, 9, 181, 227, - 88, 166, 25, 135, 41, 104, 212, 57, 151, 197, 120, 73, 243, 182, 136, 167, - 213, 89, 10, 228, 105, 152, 198, 26, 42, 121, 183, 244, 168, 58, 137, 229, - 74, 214, 90, 153, 199, 184, 11, 106, 245, 27, 122, 230, 169, 43, 215, 59, - 200, 138, 185, 246, 75, 12, 91, 154, 216, 231, 107, 28, 44, 201, 123, 170, - 60, 247, 232, 76, 139, 13, 92, 217, 186, 248, 155, 108, 29, 124, 45, 202, - 233, 171, 61, 14, 77, 140, 15, 249, 93, 30, 187, 156, 218, 46, 109, 125, - 62, 172, 78, 203, 31, 141, 234, 94, 47, 188, 63, 157, 110, 250, 219, 79, - 126, 204, 173, 142, 95, 189, 111, 235, 158, 220, 251, 127, 174, 143, 205, - 236, + 0, 16, 32, 48, 1, 64, 17, 80, 33, 96, 49, 2, 65, 112, 18, + 81, 34, 128, 50, 97, 3, 66, 144, 19, 113, 35, 82, 160, 98, 51, + 129, 4, 67, 176, 20, 114, 145, 83, 36, 99, 130, 52, 192, 5, 161, + 68, 115, 21, 146, 84, 208, 177, 37, 131, 100, 53, 162, 224, 69, 6, + 116, 193, 147, 85, 22, 240, 132, 38, 178, 101, 163, 54, 209, 117, 70, + 7, 148, 194, 86, 179, 225, 23, 133, 39, 164, 8, 102, 210, 241, 55, + 195, 118, 149, 71, 180, 24, 87, 226, 134, 165, 211, 40, 103, 56, 72, + 150, 196, 242, 119, 9, 181, 227, 88, 166, 25, 135, 41, 104, 212, 57, + 151, 197, 120, 73, 243, 182, 136, 167, 213, 89, 10, 228, 105, 152, 198, + 26, 42, 121, 183, 244, 168, 58, 137, 229, 74, 214, 90, 153, 199, 184, + 11, 106, 245, 27, 122, 230, 169, 43, 215, 59, 200, 138, 185, 246, 75, + 12, 91, 154, 216, 231, 107, 28, 44, 201, 123, 170, 60, 247, 232, 76, + 139, 13, 92, 217, 186, 248, 155, 108, 29, 124, 45, 202, 233, 171, 61, + 14, 77, 140, 15, 249, 93, 30, 187, 156, 218, 46, 109, 125, 62, 172, + 78, 203, 31, 141, 234, 94, 47, 188, 63, 157, 110, 250, 219, 79, 126, + 204, 173, 142, 95, 189, 111, 235, 158, 220, 251, 127, 174, 143, 205, 236, 159, 190, 221, 252, 175, 206, 237, 191, 253, 222, 238, 207, 254, 223, 239, 255, }; DECLARE_ALIGNED(16, static const int16_t, row_scan_16x16[256]) = { - 0, 1, 2, 16, 3, 17, 4, 18, 32, 5, 33, 19, 6, 34, 48, 20, - 49, 7, 35, 21, 50, 64, 8, 36, 65, 22, 51, 37, 80, 9, 66, 52, - 23, 38, 81, 67, 10, 53, 24, 82, 68, 96, 39, 11, 54, 83, 97, 69, - 25, 98, 84, 40, 112, 55, 12, 70, 99, 113, 85, 26, 41, 56, 114, 100, - 13, 71, 128, 86, 27, 115, 101, 129, 42, 57, 72, 116, 14, 87, 130, 102, - 144, 73, 131, 117, 28, 58, 15, 88, 43, 145, 103, 132, 146, 118, 74, 160, - 89, 133, 104, 29, 59, 147, 119, 44, 161, 148, 90, 105, 134, 162, 120, 176, - 75, 135, 149, 30, 60, 163, 177, 45, 121, 91, 106, 164, 178, 150, 192, 136, - 165, 179, 31, 151, 193, 76, 122, 61, 137, 194, 107, 152, 180, 208, 46, 166, - 167, 195, 92, 181, 138, 209, 123, 153, 224, 196, 77, 168, 210, 182, 240, 108, - 197, 62, 154, 225, 183, 169, 211, 47, 139, 93, 184, 226, 212, 241, 198, 170, - 124, 155, 199, 78, 213, 185, 109, 227, 200, 63, 228, 242, 140, 214, 171, 186, - 156, 229, 243, 125, 94, 201, 244, 215, 216, 230, 141, 187, 202, 79, 172, 110, - 157, 245, 217, 231, 95, 246, 232, 126, 203, 247, 233, 173, 218, 142, 111, - 158, - 188, 248, 127, 234, 219, 249, 189, 204, 143, 174, 159, 250, 235, 205, 220, - 175, + 0, 1, 2, 16, 3, 17, 4, 18, 32, 5, 33, 19, 6, 34, 48, + 20, 49, 7, 35, 21, 50, 64, 8, 36, 65, 22, 51, 37, 80, 9, + 66, 52, 23, 38, 81, 67, 10, 53, 24, 82, 68, 96, 39, 11, 54, + 83, 97, 69, 25, 98, 84, 40, 112, 55, 12, 70, 99, 113, 85, 26, + 41, 56, 114, 100, 13, 71, 128, 86, 27, 115, 101, 129, 42, 57, 72, + 116, 14, 87, 130, 102, 144, 73, 131, 117, 28, 58, 15, 88, 43, 145, + 103, 132, 146, 118, 74, 160, 89, 133, 104, 29, 59, 147, 119, 44, 161, + 148, 90, 105, 134, 162, 120, 176, 75, 135, 149, 30, 60, 163, 177, 45, + 121, 91, 106, 164, 178, 150, 192, 136, 165, 179, 31, 151, 193, 76, 122, + 61, 137, 194, 107, 152, 180, 208, 46, 166, 167, 195, 92, 181, 138, 209, + 123, 153, 224, 196, 77, 168, 210, 182, 240, 108, 197, 62, 154, 225, 183, + 169, 211, 47, 139, 93, 184, 226, 212, 241, 198, 170, 124, 155, 199, 78, + 213, 185, 109, 227, 200, 63, 228, 242, 140, 214, 171, 186, 156, 229, 243, + 125, 94, 201, 244, 215, 216, 230, 141, 187, 202, 79, 172, 110, 157, 245, + 217, 231, 95, 246, 232, 126, 203, 247, 233, 173, 218, 142, 111, 158, 188, + 248, 127, 234, 219, 249, 189, 204, 143, 174, 159, 250, 235, 205, 220, 175, 190, 251, 221, 191, 206, 236, 207, 237, 252, 222, 253, 223, 238, 239, 254, 255, }; #if CONFIG_EXT_TX DECLARE_ALIGNED(16, static const int16_t, mcol_scan_32x32[1024]) = { - 0, 32, 64, 96, 128, 160, 192, 224, 256, 288, 320, 352, 384, 416, 448, 480, - 512, 544, 576, 608, 640, 672, 704, 736, 768, 800, 832, 864, 896, 928, 960, - 992, - 1, 33, 65, 97, 129, 161, 193, 225, 257, 289, 321, 353, 385, 417, 449, 481, - 513, 545, 577, 609, 641, 673, 705, 737, 769, 801, 833, 865, 897, 929, 961, - 993, - 2, 34, 66, 98, 130, 162, 194, 226, 258, 290, 322, 354, 386, 418, 450, 482, - 514, 546, 578, 610, 642, 674, 706, 738, 770, 802, 834, 866, 898, 930, 962, - 994, - 3, 35, 67, 99, 131, 163, 195, 227, 259, 291, 323, 355, 387, 419, 451, 483, - 515, 547, 579, 611, 643, 675, 707, 739, 771, 803, 835, 867, 899, 931, 963, - 995, - 4, 36, 68, 100, 132, 164, 196, 228, 260, 292, 324, 356, 388, 420, 452, 484, - 516, 548, 580, 612, 644, 676, 708, 740, 772, 804, 836, 868, 900, 932, 964, - 996, - 5, 37, 69, 101, 133, 165, 197, 229, 261, 293, 325, 357, 389, 421, 453, 485, - 517, 549, 581, 613, 645, 677, 709, 741, 773, 805, 837, 869, 901, 933, 965, - 997, - 6, 38, 70, 102, 134, 166, 198, 230, 262, 294, 326, 358, 390, 422, 454, 486, - 518, 550, 582, 614, 646, 678, 710, 742, 774, 806, 838, 870, 902, 934, 966, - 998, - 7, 39, 71, 103, 135, 167, 199, 231, 263, 295, 327, 359, 391, 423, 455, 487, - 519, 551, 583, 615, 647, 679, 711, 743, 775, 807, 839, 871, 903, 935, 967, - 999, - 8, 40, 72, 104, 136, 168, 200, 232, 264, 296, 328, 360, 392, 424, 456, 488, - 520, 552, 584, 616, 648, 680, 712, 744, 776, 808, 840, 872, 904, 936, 968, - 1000, - 9, 41, 73, 105, 137, 169, 201, 233, 265, 297, 329, 361, 393, 425, 457, 489, - 521, 553, 585, 617, 649, 681, 713, 745, 777, 809, 841, 873, 905, 937, 969, - 1001, - 10, 42, 74, 106, 138, 170, 202, 234, 266, 298, 330, 362, 394, 426, 458, - 490, 522, 554, 586, 618, 650, 682, 714, 746, 778, 810, 842, 874, 906, - 938, 970, 1002, - 11, 43, 75, 107, 139, 171, 203, 235, 267, 299, 331, 363, 395, 427, 459, - 491, 523, 555, 587, 619, 651, 683, 715, 747, 779, 811, 843, 875, 907, - 939, 971, 1003, - 12, 44, 76, 108, 140, 172, 204, 236, 268, 300, 332, 364, 396, 428, 460, - 492, 524, 556, 588, 620, 652, 684, 716, 748, 780, 812, 844, 876, 908, - 940, 972, 1004, - 13, 45, 77, 109, 141, 173, 205, 237, 269, 301, 333, 365, 397, 429, 461, - 493, 525, 557, 589, 621, 653, 685, 717, 749, 781, 813, 845, 877, 909, - 941, 973, 1005, - 14, 46, 78, 110, 142, 174, 206, 238, 270, 302, 334, 366, 398, 430, 462, - 494, 526, 558, 590, 622, 654, 686, 718, 750, 782, 814, 846, 878, 910, - 942, 974, 1006, - 15, 47, 79, 111, 143, 175, 207, 239, 271, 303, 335, 367, 399, 431, 463, - 495, 527, 559, 591, 623, 655, 687, 719, 751, 783, 815, 847, 879, 911, - 943, 975, 1007, - 16, 48, 80, 112, 144, 176, 208, 240, 272, 304, 336, 368, 400, 432, 464, - 496, 528, 560, 592, 624, 656, 688, 720, 752, 784, 816, 848, 880, 912, - 944, 976, 1008, - 17, 49, 81, 113, 145, 177, 209, 241, 273, 305, 337, 369, 401, 433, 465, - 497, 529, 561, 593, 625, 657, 689, 721, 753, 785, 817, 849, 881, 913, - 945, 977, 1009, - 18, 50, 82, 114, 146, 178, 210, 242, 274, 306, 338, 370, 402, 434, 466, - 498, 530, 562, 594, 626, 658, 690, 722, 754, 786, 818, 850, 882, 914, - 946, 978, 1010, - 19, 51, 83, 115, 147, 179, 211, 243, 275, 307, 339, 371, 403, 435, 467, - 499, 531, 563, 595, 627, 659, 691, 723, 755, 787, 819, 851, 883, 915, - 947, 979, 1011, - 20, 52, 84, 116, 148, 180, 212, 244, 276, 308, 340, 372, 404, 436, 468, - 500, 532, 564, 596, 628, 660, 692, 724, 756, 788, 820, 852, 884, 916, - 948, 980, 1012, - 21, 53, 85, 117, 149, 181, 213, 245, 277, 309, 341, 373, 405, 437, 469, - 501, 533, 565, 597, 629, 661, 693, 725, 757, 789, 821, 853, 885, 917, - 949, 981, 1013, - 22, 54, 86, 118, 150, 182, 214, 246, 278, 310, 342, 374, 406, 438, 470, - 502, 534, 566, 598, 630, 662, 694, 726, 758, 790, 822, 854, 886, 918, - 950, 982, 1014, - 23, 55, 87, 119, 151, 183, 215, 247, 279, 311, 343, 375, 407, 439, 471, - 503, 535, 567, 599, 631, 663, 695, 727, 759, 791, 823, 855, 887, 919, - 951, 983, 1015, - 24, 56, 88, 120, 152, 184, 216, 248, 280, 312, 344, 376, 408, 440, 472, - 504, 536, 568, 600, 632, 664, 696, 728, 760, 792, 824, 856, 888, 920, - 952, 984, 1016, - 25, 57, 89, 121, 153, 185, 217, 249, 281, 313, 345, 377, 409, 441, 473, - 505, 537, 569, 601, 633, 665, 697, 729, 761, 793, 825, 857, 889, 921, - 953, 985, 1017, - 26, 58, 90, 122, 154, 186, 218, 250, 282, 314, 346, 378, 410, 442, 474, - 506, 538, 570, 602, 634, 666, 698, 730, 762, 794, 826, 858, 890, 922, - 954, 986, 1018, - 27, 59, 91, 123, 155, 187, 219, 251, 283, 315, 347, 379, 411, 443, 475, - 507, 539, 571, 603, 635, 667, 699, 731, 763, 795, 827, 859, 891, 923, - 955, 987, 1019, - 28, 60, 92, 124, 156, 188, 220, 252, 284, 316, 348, 380, 412, 444, 476, - 508, 540, 572, 604, 636, 668, 700, 732, 764, 796, 828, 860, 892, 924, - 956, 988, 1020, - 29, 61, 93, 125, 157, 189, 221, 253, 285, 317, 349, 381, 413, 445, 477, - 509, 541, 573, 605, 637, 669, 701, 733, 765, 797, 829, 861, 893, 925, - 957, 989, 1021, - 30, 62, 94, 126, 158, 190, 222, 254, 286, 318, 350, 382, 414, 446, 478, - 510, 542, 574, 606, 638, 670, 702, 734, 766, 798, 830, 862, 894, 926, - 958, 990, 1022, - 31, 63, 95, 127, 159, 191, 223, 255, 287, 319, 351, 383, 415, 447, 479, - 511, 543, 575, 607, 639, 671, 703, 735, 767, 799, 831, 863, 895, 927, - 959, 991, 1023, + 0, 32, 64, 96, 128, 160, 192, 224, 256, 288, 320, 352, 384, 416, + 448, 480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800, 832, 864, + 896, 928, 960, 992, 1, 33, 65, 97, 129, 161, 193, 225, 257, 289, + 321, 353, 385, 417, 449, 481, 513, 545, 577, 609, 641, 673, 705, 737, + 769, 801, 833, 865, 897, 929, 961, 993, 2, 34, 66, 98, 130, 162, + 194, 226, 258, 290, 322, 354, 386, 418, 450, 482, 514, 546, 578, 610, + 642, 674, 706, 738, 770, 802, 834, 866, 898, 930, 962, 994, 3, 35, + 67, 99, 131, 163, 195, 227, 259, 291, 323, 355, 387, 419, 451, 483, + 515, 547, 579, 611, 643, 675, 707, 739, 771, 803, 835, 867, 899, 931, + 963, 995, 4, 36, 68, 100, 132, 164, 196, 228, 260, 292, 324, 356, + 388, 420, 452, 484, 516, 548, 580, 612, 644, 676, 708, 740, 772, 804, + 836, 868, 900, 932, 964, 996, 5, 37, 69, 101, 133, 165, 197, 229, + 261, 293, 325, 357, 389, 421, 453, 485, 517, 549, 581, 613, 645, 677, + 709, 741, 773, 805, 837, 869, 901, 933, 965, 997, 6, 38, 70, 102, + 134, 166, 198, 230, 262, 294, 326, 358, 390, 422, 454, 486, 518, 550, + 582, 614, 646, 678, 710, 742, 774, 806, 838, 870, 902, 934, 966, 998, + 7, 39, 71, 103, 135, 167, 199, 231, 263, 295, 327, 359, 391, 423, + 455, 487, 519, 551, 583, 615, 647, 679, 711, 743, 775, 807, 839, 871, + 903, 935, 967, 999, 8, 40, 72, 104, 136, 168, 200, 232, 264, 296, + 328, 360, 392, 424, 456, 488, 520, 552, 584, 616, 648, 680, 712, 744, + 776, 808, 840, 872, 904, 936, 968, 1000, 9, 41, 73, 105, 137, 169, + 201, 233, 265, 297, 329, 361, 393, 425, 457, 489, 521, 553, 585, 617, + 649, 681, 713, 745, 777, 809, 841, 873, 905, 937, 969, 1001, 10, 42, + 74, 106, 138, 170, 202, 234, 266, 298, 330, 362, 394, 426, 458, 490, + 522, 554, 586, 618, 650, 682, 714, 746, 778, 810, 842, 874, 906, 938, + 970, 1002, 11, 43, 75, 107, 139, 171, 203, 235, 267, 299, 331, 363, + 395, 427, 459, 491, 523, 555, 587, 619, 651, 683, 715, 747, 779, 811, + 843, 875, 907, 939, 971, 1003, 12, 44, 76, 108, 140, 172, 204, 236, + 268, 300, 332, 364, 396, 428, 460, 492, 524, 556, 588, 620, 652, 684, + 716, 748, 780, 812, 844, 876, 908, 940, 972, 1004, 13, 45, 77, 109, + 141, 173, 205, 237, 269, 301, 333, 365, 397, 429, 461, 493, 525, 557, + 589, 621, 653, 685, 717, 749, 781, 813, 845, 877, 909, 941, 973, 1005, + 14, 46, 78, 110, 142, 174, 206, 238, 270, 302, 334, 366, 398, 430, + 462, 494, 526, 558, 590, 622, 654, 686, 718, 750, 782, 814, 846, 878, + 910, 942, 974, 1006, 15, 47, 79, 111, 143, 175, 207, 239, 271, 303, + 335, 367, 399, 431, 463, 495, 527, 559, 591, 623, 655, 687, 719, 751, + 783, 815, 847, 879, 911, 943, 975, 1007, 16, 48, 80, 112, 144, 176, + 208, 240, 272, 304, 336, 368, 400, 432, 464, 496, 528, 560, 592, 624, + 656, 688, 720, 752, 784, 816, 848, 880, 912, 944, 976, 1008, 17, 49, + 81, 113, 145, 177, 209, 241, 273, 305, 337, 369, 401, 433, 465, 497, + 529, 561, 593, 625, 657, 689, 721, 753, 785, 817, 849, 881, 913, 945, + 977, 1009, 18, 50, 82, 114, 146, 178, 210, 242, 274, 306, 338, 370, + 402, 434, 466, 498, 530, 562, 594, 626, 658, 690, 722, 754, 786, 818, + 850, 882, 914, 946, 978, 1010, 19, 51, 83, 115, 147, 179, 211, 243, + 275, 307, 339, 371, 403, 435, 467, 499, 531, 563, 595, 627, 659, 691, + 723, 755, 787, 819, 851, 883, 915, 947, 979, 1011, 20, 52, 84, 116, + 148, 180, 212, 244, 276, 308, 340, 372, 404, 436, 468, 500, 532, 564, + 596, 628, 660, 692, 724, 756, 788, 820, 852, 884, 916, 948, 980, 1012, + 21, 53, 85, 117, 149, 181, 213, 245, 277, 309, 341, 373, 405, 437, + 469, 501, 533, 565, 597, 629, 661, 693, 725, 757, 789, 821, 853, 885, + 917, 949, 981, 1013, 22, 54, 86, 118, 150, 182, 214, 246, 278, 310, + 342, 374, 406, 438, 470, 502, 534, 566, 598, 630, 662, 694, 726, 758, + 790, 822, 854, 886, 918, 950, 982, 1014, 23, 55, 87, 119, 151, 183, + 215, 247, 279, 311, 343, 375, 407, 439, 471, 503, 535, 567, 599, 631, + 663, 695, 727, 759, 791, 823, 855, 887, 919, 951, 983, 1015, 24, 56, + 88, 120, 152, 184, 216, 248, 280, 312, 344, 376, 408, 440, 472, 504, + 536, 568, 600, 632, 664, 696, 728, 760, 792, 824, 856, 888, 920, 952, + 984, 1016, 25, 57, 89, 121, 153, 185, 217, 249, 281, 313, 345, 377, + 409, 441, 473, 505, 537, 569, 601, 633, 665, 697, 729, 761, 793, 825, + 857, 889, 921, 953, 985, 1017, 26, 58, 90, 122, 154, 186, 218, 250, + 282, 314, 346, 378, 410, 442, 474, 506, 538, 570, 602, 634, 666, 698, + 730, 762, 794, 826, 858, 890, 922, 954, 986, 1018, 27, 59, 91, 123, + 155, 187, 219, 251, 283, 315, 347, 379, 411, 443, 475, 507, 539, 571, + 603, 635, 667, 699, 731, 763, 795, 827, 859, 891, 923, 955, 987, 1019, + 28, 60, 92, 124, 156, 188, 220, 252, 284, 316, 348, 380, 412, 444, + 476, 508, 540, 572, 604, 636, 668, 700, 732, 764, 796, 828, 860, 892, + 924, 956, 988, 1020, 29, 61, 93, 125, 157, 189, 221, 253, 285, 317, + 349, 381, 413, 445, 477, 509, 541, 573, 605, 637, 669, 701, 733, 765, + 797, 829, 861, 893, 925, 957, 989, 1021, 30, 62, 94, 126, 158, 190, + 222, 254, 286, 318, 350, 382, 414, 446, 478, 510, 542, 574, 606, 638, + 670, 702, 734, 766, 798, 830, 862, 894, 926, 958, 990, 1022, 31, 63, + 95, 127, 159, 191, 223, 255, 287, 319, 351, 383, 415, 447, 479, 511, + 543, 575, 607, 639, 671, 703, 735, 767, 799, 831, 863, 895, 927, 959, + 991, 1023, }; DECLARE_ALIGNED(16, static const int16_t, mrow_scan_32x32[1024]) = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, - 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, - 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, - 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, - 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, - 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, - 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, - 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, - 123, 124, 125, 126, 127, - 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, - 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, - 154, 155, 156, 157, 158, 159, - 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, - 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, - 186, 187, 188, 189, 190, 191, - 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, - 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, - 218, 219, 220, 221, 222, 223, - 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, - 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, - 250, 251, 252, 253, 254, 255, - 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, - 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, - 282, 283, 284, 285, 286, 287, - 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, - 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, - 314, 315, 316, 317, 318, 319, - 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, - 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, - 346, 347, 348, 349, 350, 351, - 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, - 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, - 378, 379, 380, 381, 382, 383, - 384, 385, 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, - 397, 398, 399, 400, 401, 402, 403, 404, 405, 406, 407, 408, 409, - 410, 411, 412, 413, 414, 415, - 416, 417, 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428, - 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, - 442, 443, 444, 445, 446, 447, - 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, - 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 472, 473, - 474, 475, 476, 477, 478, 479, - 480, 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, - 493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, - 506, 507, 508, 509, 510, 511, - 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 522, 523, 524, - 525, 526, 527, 528, 529, 530, 531, 532, 533, 534, 535, 536, 537, - 538, 539, 540, 541, 542, 543, - 544, 545, 546, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, - 557, 558, 559, 560, 561, 562, 563, 564, 565, 566, 567, 568, 569, - 570, 571, 572, 573, 574, 575, - 576, 577, 578, 579, 580, 581, 582, 583, 584, 585, 586, 587, 588, - 589, 590, 591, 592, 593, 594, 595, 596, 597, 598, 599, 600, 601, - 602, 603, 604, 605, 606, 607, - 608, 609, 610, 611, 612, 613, 614, 615, 616, 617, 618, 619, 620, - 621, 622, 623, 624, 625, 626, 627, 628, 629, 630, 631, 632, 633, - 634, 635, 636, 637, 638, 639, - 640, 641, 642, 643, 644, 645, 646, 647, 648, 649, 650, 651, 652, - 653, 654, 655, 656, 657, 658, 659, 660, 661, 662, 663, 664, 665, - 666, 667, 668, 669, 670, 671, - 672, 673, 674, 675, 676, 677, 678, 679, 680, 681, 682, 683, 684, - 685, 686, 687, 688, 689, 690, 691, 692, 693, 694, 695, 696, 697, - 698, 699, 700, 701, 702, 703, - 704, 705, 706, 707, 708, 709, 710, 711, 712, 713, 714, 715, 716, - 717, 718, 719, 720, 721, 722, 723, 724, 725, 726, 727, 728, 729, - 730, 731, 732, 733, 734, 735, - 736, 737, 738, 739, 740, 741, 742, 743, 744, 745, 746, 747, 748, - 749, 750, 751, 752, 753, 754, 755, 756, 757, 758, 759, 760, 761, - 762, 763, 764, 765, 766, 767, - 768, 769, 770, 771, 772, 773, 774, 775, 776, 777, 778, 779, 780, - 781, 782, 783, 784, 785, 786, 787, 788, 789, 790, 791, 792, 793, - 794, 795, 796, 797, 798, 799, - 800, 801, 802, 803, 804, 805, 806, 807, 808, 809, 810, 811, 812, - 813, 814, 815, 816, 817, 818, 819, 820, 821, 822, 823, 824, 825, - 826, 827, 828, 829, 830, 831, - 832, 833, 834, 835, 836, 837, 838, 839, 840, 841, 842, 843, 844, - 845, 846, 847, 848, 849, 850, 851, 852, 853, 854, 855, 856, 857, - 858, 859, 860, 861, 862, 863, - 864, 865, 866, 867, 868, 869, 870, 871, 872, 873, 874, 875, 876, - 877, 878, 879, 880, 881, 882, 883, 884, 885, 886, 887, 888, 889, - 890, 891, 892, 893, 894, 895, - 896, 897, 898, 899, 900, 901, 902, 903, 904, 905, 906, 907, 908, - 909, 910, 911, 912, 913, 914, 915, 916, 917, 918, 919, 920, 921, - 922, 923, 924, 925, 926, 927, - 928, 929, 930, 931, 932, 933, 934, 935, 936, 937, 938, 939, 940, - 941, 942, 943, 944, 945, 946, 947, 948, 949, 950, 951, 952, 953, - 954, 955, 956, 957, 958, 959, - 960, 961, 962, 963, 964, 965, 966, 967, 968, 969, 970, 971, 972, - 973, 974, 975, 976, 977, 978, 979, 980, 981, 982, 983, 984, 985, - 986, 987, 988, 989, 990, 991, - 992, 993, 994, 995, 996, 997, 998, 999, 1000, 1001, 1002, 1003, - 1004, 1005, 1006, 1007, 1008, 1009, 1010, 1011, 1012, 1013, 1014, - 1015, 1016, 1017, 1018, 1019, 1020, 1021, 1022, 1023, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, + 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, + 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, + 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, + 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, + 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, + 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, + 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, + 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, + 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, + 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, + 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, + 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, + 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, + 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, + 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, + 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, + 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, + 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, + 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, + 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, + 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, + 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, + 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, + 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, + 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, + 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, + 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, + 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, + 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, + 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, + 403, 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, + 416, 417, 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428, + 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, + 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, + 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, + 468, 469, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479, 480, + 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, + 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 506, + 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, + 520, 521, 522, 523, 524, 525, 526, 527, 528, 529, 530, 531, 532, + 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, + 546, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558, + 559, 560, 561, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571, + 572, 573, 574, 575, 576, 577, 578, 579, 580, 581, 582, 583, 584, + 585, 586, 587, 588, 589, 590, 591, 592, 593, 594, 595, 596, 597, + 598, 599, 600, 601, 602, 603, 604, 605, 606, 607, 608, 609, 610, + 611, 612, 613, 614, 615, 616, 617, 618, 619, 620, 621, 622, 623, + 624, 625, 626, 627, 628, 629, 630, 631, 632, 633, 634, 635, 636, + 637, 638, 639, 640, 641, 642, 643, 644, 645, 646, 647, 648, 649, + 650, 651, 652, 653, 654, 655, 656, 657, 658, 659, 660, 661, 662, + 663, 664, 665, 666, 667, 668, 669, 670, 671, 672, 673, 674, 675, + 676, 677, 678, 679, 680, 681, 682, 683, 684, 685, 686, 687, 688, + 689, 690, 691, 692, 693, 694, 695, 696, 697, 698, 699, 700, 701, + 702, 703, 704, 705, 706, 707, 708, 709, 710, 711, 712, 713, 714, + 715, 716, 717, 718, 719, 720, 721, 722, 723, 724, 725, 726, 727, + 728, 729, 730, 731, 732, 733, 734, 735, 736, 737, 738, 739, 740, + 741, 742, 743, 744, 745, 746, 747, 748, 749, 750, 751, 752, 753, + 754, 755, 756, 757, 758, 759, 760, 761, 762, 763, 764, 765, 766, + 767, 768, 769, 770, 771, 772, 773, 774, 775, 776, 777, 778, 779, + 780, 781, 782, 783, 784, 785, 786, 787, 788, 789, 790, 791, 792, + 793, 794, 795, 796, 797, 798, 799, 800, 801, 802, 803, 804, 805, + 806, 807, 808, 809, 810, 811, 812, 813, 814, 815, 816, 817, 818, + 819, 820, 821, 822, 823, 824, 825, 826, 827, 828, 829, 830, 831, + 832, 833, 834, 835, 836, 837, 838, 839, 840, 841, 842, 843, 844, + 845, 846, 847, 848, 849, 850, 851, 852, 853, 854, 855, 856, 857, + 858, 859, 860, 861, 862, 863, 864, 865, 866, 867, 868, 869, 870, + 871, 872, 873, 874, 875, 876, 877, 878, 879, 880, 881, 882, 883, + 884, 885, 886, 887, 888, 889, 890, 891, 892, 893, 894, 895, 896, + 897, 898, 899, 900, 901, 902, 903, 904, 905, 906, 907, 908, 909, + 910, 911, 912, 913, 914, 915, 916, 917, 918, 919, 920, 921, 922, + 923, 924, 925, 926, 927, 928, 929, 930, 931, 932, 933, 934, 935, + 936, 937, 938, 939, 940, 941, 942, 943, 944, 945, 946, 947, 948, + 949, 950, 951, 952, 953, 954, 955, 956, 957, 958, 959, 960, 961, + 962, 963, 964, 965, 966, 967, 968, 969, 970, 971, 972, 973, 974, + 975, 976, 977, 978, 979, 980, 981, 982, 983, 984, 985, 986, 987, + 988, 989, 990, 991, 992, 993, 994, 995, 996, 997, 998, 999, 1000, + 1001, 1002, 1003, 1004, 1005, 1006, 1007, 1008, 1009, 1010, 1011, 1012, 1013, + 1014, 1015, 1016, 1017, 1018, 1019, 1020, 1021, 1022, 1023, }; #endif // CONFIG_EXT_TX DECLARE_ALIGNED(16, static const int16_t, default_scan_32x32[1024]) = { - 0, 32, 1, 64, 33, 2, 96, 65, 34, 128, 3, 97, 66, 160, - 129, 35, 98, 4, 67, 130, 161, 192, 36, 99, 224, 5, 162, 193, - 68, 131, 37, 100, - 225, 194, 256, 163, 69, 132, 6, 226, 257, 288, 195, 101, 164, 38, - 258, 7, 227, 289, 133, 320, 70, 196, 165, 290, 259, 228, 39, 321, - 102, 352, 8, 197, - 71, 134, 322, 291, 260, 353, 384, 229, 166, 103, 40, 354, 323, 292, - 135, 385, 198, 261, 72, 9, 416, 167, 386, 355, 230, 324, 104, 293, - 41, 417, 199, 136, - 262, 387, 448, 325, 356, 10, 73, 418, 231, 168, 449, 294, 388, 105, - 419, 263, 42, 200, 357, 450, 137, 480, 74, 326, 232, 11, 389, 169, - 295, 420, 106, 451, - 481, 358, 264, 327, 201, 43, 138, 512, 482, 390, 296, 233, 170, 421, - 75, 452, 359, 12, 513, 265, 483, 328, 107, 202, 514, 544, 422, 391, - 453, 139, 44, 234, - 484, 297, 360, 171, 76, 515, 545, 266, 329, 454, 13, 423, 203, 108, - 546, 485, 576, 298, 235, 140, 361, 330, 172, 547, 45, 455, 267, 577, - 486, 77, 204, 362, - 608, 14, 299, 578, 109, 236, 487, 609, 331, 141, 579, 46, 15, 173, - 610, 363, 78, 205, 16, 110, 237, 611, 142, 47, 174, 79, 206, 17, - 111, 238, 48, 143, - 80, 175, 112, 207, 49, 18, 239, 81, 113, 19, 50, 82, 114, 51, - 83, 115, 640, 516, 392, 268, 144, 20, 672, 641, 548, 517, 424, - 393, 300, 269, 176, 145, - 52, 21, 704, 673, 642, 580, 549, 518, 456, 425, 394, 332, 301, - 270, 208, 177, 146, 84, 53, 22, 736, 705, 674, 643, 612, 581, - 550, 519, 488, 457, 426, 395, - 364, 333, 302, 271, 240, 209, 178, 147, 116, 85, 54, 23, 737, - 706, 675, 613, 582, 551, 489, 458, 427, 365, 334, 303, 241, - 210, 179, 117, 86, 55, 738, 707, - 614, 583, 490, 459, 366, 335, 242, 211, 118, 87, 739, 615, 491, - 367, 243, 119, 768, 644, 520, 396, 272, 148, 24, 800, 769, 676, - 645, 552, 521, 428, 397, 304, - 273, 180, 149, 56, 25, 832, 801, 770, 708, 677, 646, 584, 553, - 522, 460, 429, 398, 336, 305, 274, 212, 181, 150, 88, 57, 26, - 864, 833, 802, 771, 740, 709, - 678, 647, 616, 585, 554, 523, 492, 461, 430, 399, 368, 337, 306, - 275, 244, 213, 182, 151, 120, 89, 58, 27, 865, 834, 803, 741, - 710, 679, 617, 586, 555, 493, - 462, 431, 369, 338, 307, 245, 214, 183, 121, 90, 59, 866, 835, - 742, 711, 618, 587, 494, 463, 370, 339, 246, 215, 122, 91, 867, - 743, 619, 495, 371, 247, 123, - 896, 772, 648, 524, 400, 276, 152, 28, 928, 897, 804, 773, 680, - 649, 556, 525, 432, 401, 308, 277, 184, 153, 60, 29, 960, 929, - 898, 836, 805, 774, 712, 681, - 650, 588, 557, 526, 464, 433, 402, 340, 309, 278, 216, 185, 154, - 92, 61, 30, 992, 961, 930, 899, 868, 837, 806, 775, 744, 713, 682, - 651, 620, 589, 558, 527, - 496, 465, 434, 403, 372, 341, 310, 279, 248, 217, 186, 155, 124, - 93, 62, 31, 993, 962, 931, 869, 838, 807, 745, 714, 683, 621, 590, - 559, 497, 466, 435, 373, - 342, 311, 249, 218, 187, 125, 94, 63, 994, 963, 870, 839, 746, 715, - 622, 591, 498, 467, 374, 343, 250, 219, 126, 95, 995, 871, 747, 623, - 499, 375, 251, 127, - 900, 776, 652, 528, 404, 280, 156, 932, 901, 808, 777, 684, 653, 560, - 529, 436, 405, 312, 281, 188, 157, 964, 933, 902, 840, 809, 778, 716, - 685, 654, 592, 561, - 530, 468, 437, 406, 344, 313, 282, 220, 189, 158, 996, 965, 934, 903, - 872, 841, 810, 779, 748, 717, 686, 655, 624, 593, 562, 531, 500, 469, - 438, 407, 376, 345, - 314, 283, 252, 221, 190, 159, 997, 966, 935, 873, 842, 811, 749, 718, - 687, 625, 594, 563, 501, 470, 439, 377, 346, 315, 253, 222, 191, 998, - 967, 874, 843, 750, - 719, 626, 595, 502, 471, 378, 347, 254, 223, 999, 875, 751, 627, 503, - 379, 255, 904, 780, 656, 532, 408, 284, 936, 905, 812, 781, 688, 657, - 564, 533, 440, 409, - 316, 285, 968, 937, 906, 844, 813, 782, 720, 689, 658, 596, 565, 534, - 472, 441, 410, 348, 317, 286, 1000, 969, 938, 907, 876, 845, 814, 783, - 752, 721, 690, 659, - 628, 597, 566, 535, 504, 473, 442, 411, 380, 349, 318, 287, 1001, 970, - 939, 877, 846, 815, 753, 722, 691, 629, 598, 567, 505, 474, 443, 381, - 350, 319, 1002, 971, - 878, 847, 754, 723, 630, 599, 506, 475, 382, 351, 1003, 879, 755, 631, - 507, 383, 908, 784, 660, 536, 412, 940, 909, 816, 785, 692, 661, 568, - 537, 444, 413, 972, - 941, 910, 848, 817, 786, 724, 693, 662, 600, 569, 538, 476, 445, 414, - 1004, 973, 942, 911, 880, 849, 818, 787, 756, 725, 694, 663, 632, 601, - 570, 539, 508, 477, - 446, 415, 1005, 974, 943, 881, 850, 819, 757, 726, 695, 633, 602, 571, - 509, 478, 447, 1006, 975, 882, 851, 758, 727, 634, 603, 510, 479, - 1007, 883, 759, 635, 511, - 912, 788, 664, 540, 944, 913, 820, 789, 696, 665, 572, 541, 976, 945, - 914, 852, 821, 790, 728, 697, 666, 604, 573, 542, 1008, 977, 946, 915, - 884, 853, 822, 791, - 760, 729, 698, 667, 636, 605, 574, 543, 1009, 978, 947, 885, 854, 823, - 761, 730, 699, 637, 606, 575, 1010, 979, 886, 855, 762, 731, 638, 607, - 1011, 887, 763, 639, - 916, 792, 668, 948, 917, 824, 793, 700, 669, 980, 949, 918, 856, 825, - 794, 732, 701, 670, 1012, 981, 950, 919, 888, 857, 826, 795, 764, 733, - 702, 671, 1013, 982, - 951, 889, 858, 827, 765, 734, 703, 1014, 983, 890, 859, 766, 735, 1015, - 891, 767, 920, 796, 952, 921, 828, 797, 984, 953, 922, 860, 829, 798, - 1016, 985, 954, 923, - 892, 861, 830, 799, 1017, 986, 955, 893, 862, 831, 1018, 987, 894, 863, - 1019, 895, 924, 956, 925, 988, 957, 926, 1020, 989, 958, 927, 1021, - 990, 959, 1022, 991, 1023, + 0, 32, 1, 64, 33, 2, 96, 65, 34, 128, 3, 97, 66, + 160, 129, 35, 98, 4, 67, 130, 161, 192, 36, 99, 224, 5, + 162, 193, 68, 131, 37, 100, 225, 194, 256, 163, 69, 132, 6, + 226, 257, 288, 195, 101, 164, 38, 258, 7, 227, 289, 133, 320, + 70, 196, 165, 290, 259, 228, 39, 321, 102, 352, 8, 197, 71, + 134, 322, 291, 260, 353, 384, 229, 166, 103, 40, 354, 323, 292, + 135, 385, 198, 261, 72, 9, 416, 167, 386, 355, 230, 324, 104, + 293, 41, 417, 199, 136, 262, 387, 448, 325, 356, 10, 73, 418, + 231, 168, 449, 294, 388, 105, 419, 263, 42, 200, 357, 450, 137, + 480, 74, 326, 232, 11, 389, 169, 295, 420, 106, 451, 481, 358, + 264, 327, 201, 43, 138, 512, 482, 390, 296, 233, 170, 421, 75, + 452, 359, 12, 513, 265, 483, 328, 107, 202, 514, 544, 422, 391, + 453, 139, 44, 234, 484, 297, 360, 171, 76, 515, 545, 266, 329, + 454, 13, 423, 203, 108, 546, 485, 576, 298, 235, 140, 361, 330, + 172, 547, 45, 455, 267, 577, 486, 77, 204, 362, 608, 14, 299, + 578, 109, 236, 487, 609, 331, 141, 579, 46, 15, 173, 610, 363, + 78, 205, 16, 110, 237, 611, 142, 47, 174, 79, 206, 17, 111, + 238, 48, 143, 80, 175, 112, 207, 49, 18, 239, 81, 113, 19, + 50, 82, 114, 51, 83, 115, 640, 516, 392, 268, 144, 20, 672, + 641, 548, 517, 424, 393, 300, 269, 176, 145, 52, 21, 704, 673, + 642, 580, 549, 518, 456, 425, 394, 332, 301, 270, 208, 177, 146, + 84, 53, 22, 736, 705, 674, 643, 612, 581, 550, 519, 488, 457, + 426, 395, 364, 333, 302, 271, 240, 209, 178, 147, 116, 85, 54, + 23, 737, 706, 675, 613, 582, 551, 489, 458, 427, 365, 334, 303, + 241, 210, 179, 117, 86, 55, 738, 707, 614, 583, 490, 459, 366, + 335, 242, 211, 118, 87, 739, 615, 491, 367, 243, 119, 768, 644, + 520, 396, 272, 148, 24, 800, 769, 676, 645, 552, 521, 428, 397, + 304, 273, 180, 149, 56, 25, 832, 801, 770, 708, 677, 646, 584, + 553, 522, 460, 429, 398, 336, 305, 274, 212, 181, 150, 88, 57, + 26, 864, 833, 802, 771, 740, 709, 678, 647, 616, 585, 554, 523, + 492, 461, 430, 399, 368, 337, 306, 275, 244, 213, 182, 151, 120, + 89, 58, 27, 865, 834, 803, 741, 710, 679, 617, 586, 555, 493, + 462, 431, 369, 338, 307, 245, 214, 183, 121, 90, 59, 866, 835, + 742, 711, 618, 587, 494, 463, 370, 339, 246, 215, 122, 91, 867, + 743, 619, 495, 371, 247, 123, 896, 772, 648, 524, 400, 276, 152, + 28, 928, 897, 804, 773, 680, 649, 556, 525, 432, 401, 308, 277, + 184, 153, 60, 29, 960, 929, 898, 836, 805, 774, 712, 681, 650, + 588, 557, 526, 464, 433, 402, 340, 309, 278, 216, 185, 154, 92, + 61, 30, 992, 961, 930, 899, 868, 837, 806, 775, 744, 713, 682, + 651, 620, 589, 558, 527, 496, 465, 434, 403, 372, 341, 310, 279, + 248, 217, 186, 155, 124, 93, 62, 31, 993, 962, 931, 869, 838, + 807, 745, 714, 683, 621, 590, 559, 497, 466, 435, 373, 342, 311, + 249, 218, 187, 125, 94, 63, 994, 963, 870, 839, 746, 715, 622, + 591, 498, 467, 374, 343, 250, 219, 126, 95, 995, 871, 747, 623, + 499, 375, 251, 127, 900, 776, 652, 528, 404, 280, 156, 932, 901, + 808, 777, 684, 653, 560, 529, 436, 405, 312, 281, 188, 157, 964, + 933, 902, 840, 809, 778, 716, 685, 654, 592, 561, 530, 468, 437, + 406, 344, 313, 282, 220, 189, 158, 996, 965, 934, 903, 872, 841, + 810, 779, 748, 717, 686, 655, 624, 593, 562, 531, 500, 469, 438, + 407, 376, 345, 314, 283, 252, 221, 190, 159, 997, 966, 935, 873, + 842, 811, 749, 718, 687, 625, 594, 563, 501, 470, 439, 377, 346, + 315, 253, 222, 191, 998, 967, 874, 843, 750, 719, 626, 595, 502, + 471, 378, 347, 254, 223, 999, 875, 751, 627, 503, 379, 255, 904, + 780, 656, 532, 408, 284, 936, 905, 812, 781, 688, 657, 564, 533, + 440, 409, 316, 285, 968, 937, 906, 844, 813, 782, 720, 689, 658, + 596, 565, 534, 472, 441, 410, 348, 317, 286, 1000, 969, 938, 907, + 876, 845, 814, 783, 752, 721, 690, 659, 628, 597, 566, 535, 504, + 473, 442, 411, 380, 349, 318, 287, 1001, 970, 939, 877, 846, 815, + 753, 722, 691, 629, 598, 567, 505, 474, 443, 381, 350, 319, 1002, + 971, 878, 847, 754, 723, 630, 599, 506, 475, 382, 351, 1003, 879, + 755, 631, 507, 383, 908, 784, 660, 536, 412, 940, 909, 816, 785, + 692, 661, 568, 537, 444, 413, 972, 941, 910, 848, 817, 786, 724, + 693, 662, 600, 569, 538, 476, 445, 414, 1004, 973, 942, 911, 880, + 849, 818, 787, 756, 725, 694, 663, 632, 601, 570, 539, 508, 477, + 446, 415, 1005, 974, 943, 881, 850, 819, 757, 726, 695, 633, 602, + 571, 509, 478, 447, 1006, 975, 882, 851, 758, 727, 634, 603, 510, + 479, 1007, 883, 759, 635, 511, 912, 788, 664, 540, 944, 913, 820, + 789, 696, 665, 572, 541, 976, 945, 914, 852, 821, 790, 728, 697, + 666, 604, 573, 542, 1008, 977, 946, 915, 884, 853, 822, 791, 760, + 729, 698, 667, 636, 605, 574, 543, 1009, 978, 947, 885, 854, 823, + 761, 730, 699, 637, 606, 575, 1010, 979, 886, 855, 762, 731, 638, + 607, 1011, 887, 763, 639, 916, 792, 668, 948, 917, 824, 793, 700, + 669, 980, 949, 918, 856, 825, 794, 732, 701, 670, 1012, 981, 950, + 919, 888, 857, 826, 795, 764, 733, 702, 671, 1013, 982, 951, 889, + 858, 827, 765, 734, 703, 1014, 983, 890, 859, 766, 735, 1015, 891, + 767, 920, 796, 952, 921, 828, 797, 984, 953, 922, 860, 829, 798, + 1016, 985, 954, 923, 892, 861, 830, 799, 1017, 986, 955, 893, 862, + 831, 1018, 987, 894, 863, 1019, 895, 924, 956, 925, 988, 957, 926, + 1020, 989, 958, 927, 1021, 990, 959, 1022, 991, 1023, }; #if CONFIG_EXT_TX // Scan over two rectangular vertical partitions one after the other DECLARE_ALIGNED(16, static const int16_t, v2_scan_32x32[1024]) = { - 0, 1, 32, 33, 2, 64, 34, 65, 66, 3, 96, 35, - 97, 67, 98, 4, 128, 36, 129, 99, 68, 130, 5, 100, - 131, 160, 37, 161, 69, 162, 132, 101, 163, 6, 192, 38, - 193, 70, 194, 133, 164, 102, 195, 7, 224, 39, 165, 225, - 134, 196, 71, 226, 103, 227, 166, 197, 8, 256, 40, 135, - 228, 257, 72, 258, 198, 104, 259, 167, 229, 136, 260, 9, - 288, 41, 289, 73, 199, 230, 290, 168, 261, 105, 291, 137, - 292, 231, 10, 200, 262, 320, 42, 321, 74, 322, 169, 293, - 106, 323, 232, 263, 138, 324, 201, 294, 11, 352, 43, 353, - 75, 170, 325, 354, 264, 107, 233, 295, 355, 202, 326, 139, - 356, 12, 384, 44, 265, 296, 385, 171, 357, 76, 386, 234, - 327, 108, 387, 203, 358, 140, 388, 297, 266, 328, 13, 172, - 389, 416, 45, 235, 359, 417, 77, 418, 109, 419, 204, 390, - 298, 329, 141, 267, 360, 420, 236, 391, 173, 421, 14, 448, - 46, 449, 78, 330, 450, 299, 361, 110, 205, 422, 451, 268, - 392, 142, 452, 237, 423, 174, 331, 362, 453, 15, 300, 393, - 480, 47, 481, 79, 482, 206, 454, 269, 424, 111, 483, 143, - 484, 363, 332, 394, 238, 455, 175, 301, 425, 485, 512, 513, - 270, 456, 514, 207, 486, 364, 395, 515, 333, 426, 516, 239, - 487, 302, 457, 517, 396, 271, 488, 544, 365, 427, 545, 518, - 546, 334, 458, 547, 519, 548, 303, 489, 397, 428, 549, 366, - 459, 520, 576, 335, 490, 550, 577, 578, 579, 521, 429, 551, - 398, 460, 580, 367, 491, 581, 552, 522, 582, 608, 609, 430, - 461, 610, 399, 492, 553, 611, 583, 523, 612, 613, 584, 554, - 462, 431, 493, 614, 524, 640, 641, 642, 585, 643, 555, 615, - 644, 463, 494, 586, 525, 616, 645, 556, 646, 672, 617, 673, - 587, 674, 647, 495, 675, 526, 676, 557, 618, 648, 677, 588, - 678, 527, 649, 619, 704, 558, 705, 706, 679, 589, 707, 650, - 708, 620, 680, 709, 559, 590, 710, 651, 681, 736, 621, 737, - 711, 738, 739, 682, 652, 740, 712, 591, 741, 622, 683, 713, - 742, 653, 768, 769, 743, 770, 714, 684, 771, 623, 772, 744, - 654, 773, 715, 685, 745, 774, 655, 775, 800, 801, 716, 746, - 802, 803, 686, 776, 804, 747, 805, 717, 777, 806, 687, 748, - 807, 778, 832, 833, 718, 834, 835, 808, 836, 779, 749, 837, - 809, 719, 838, 780, 750, 810, 839, 864, 865, 866, 867, 840, - 781, 868, 811, 751, 869, 841, 870, 812, 782, 842, 871, 896, - 897, 898, 872, 899, 813, 843, 900, 783, 901, 873, 844, 902, - 814, 874, 903, 928, 929, 845, 930, 904, 815, 875, 931, 932, - 905, 933, 846, 876, 934, 906, 935, 877, 960, 847, 961, 962, - 907, 936, 963, 964, 937, 878, 965, 908, 966, 938, 967, 909, - 879, 992, 939, 993, 968, 994, 995, 996, 910, 969, 940, 997, - 998, 970, 911, 941, 999, 971, 1000, 942, 1001, 972, 1002, 943, - 973, 1003, 974, 1004, 975, 1005, 1006, 1007, 16, 48, 80, 112, - 144, 176, 17, 49, 208, 81, 113, 145, 240, 177, 272, 18, - 50, 209, 82, 114, 304, 241, 146, 178, 273, 336, 210, 19, - 51, 83, 115, 305, 242, 147, 368, 179, 274, 337, 211, 20, - 400, 52, 84, 306, 116, 243, 369, 148, 338, 180, 275, 432, - 401, 212, 21, 53, 307, 85, 370, 244, 117, 464, 149, 433, - 339, 276, 181, 402, 213, 308, 496, 371, 22, 54, 465, 86, - 245, 118, 434, 150, 340, 277, 403, 182, 528, 497, 214, 466, - 372, 309, 23, 55, 435, 87, 246, 119, 341, 404, 151, 529, - 560, 278, 498, 183, 467, 373, 215, 310, 436, 24, 56, 247, - 561, 88, 530, 592, 342, 120, 405, 499, 152, 279, 468, 184, - 374, 311, 437, 216, 562, 593, 531, 624, 25, 248, 500, 57, - 406, 89, 343, 121, 469, 280, 153, 594, 185, 375, 563, 625, - 438, 532, 656, 312, 217, 501, 407, 249, 26, 344, 58, 90, - 470, 122, 595, 626, 281, 564, 657, 154, 376, 533, 688, 439, - 186, 313, 502, 218, 408, 627, 596, 658, 250, 345, 471, 27, - 59, 565, 689, 91, 123, 282, 534, 720, 155, 440, 377, 187, - 503, 314, 628, 659, 219, 597, 690, 409, 472, 566, 721, 346, - 251, 28, 60, 535, 752, 92, 124, 283, 441, 378, 156, 660, - 504, 629, 691, 598, 722, 188, 315, 567, 753, 220, 410, 473, - 347, 536, 784, 252, 29, 661, 692, 61, 93, 442, 630, 723, - 284, 125, 379, 505, 599, 754, 157, 316, 568, 785, 189, 474, - 411, 221, 537, 816, 693, 348, 662, 724, 253, 631, 755, 443, - 30, 600, 786, 62, 506, 94, 285, 380, 126, 569, 817, 158, - 317, 190, 475, 694, 725, 412, 663, 756, 538, 848, 222, 632, - 787, 349, 254, 601, 818, 444, 507, 31, 63, 381, 286, 95, - 570, 849, 726, 127, 695, 757, 664, 788, 159, 476, 318, 413, - 539, 880, 191, 633, 819, 223, 350, 602, 850, 508, 255, 445, - 727, 758, 696, 789, 571, 881, 382, 287, 665, 820, 477, 634, - 851, 540, 912, 319, 414, 603, 882, 759, 728, 790, 351, 509, - 697, 821, 446, 572, 913, 666, 852, 383, 635, 883, 478, 541, - 944, 415, 760, 791, 604, 914, 729, 822, 698, 853, 510, 667, - 884, 447, 573, 945, 636, 915, 792, 761, 823, 542, 976, 479, - 730, 854, 605, 946, 699, 885, 668, 916, 511, 574, 977, 793, - 824, 637, 947, 762, 855, 731, 886, 543, 1008, 606, 978, 700, - 917, 669, 948, 575, 825, 1009, 794, 856, 763, 887, 638, 979, - 732, 918, 701, 949, 607, 1010, 670, 980, 826, 857, 795, 888, - 764, 919, 639, 1011, 733, 950, 702, 981, 858, 827, 889, 796, - 920, 671, 1012, 765, 951, 734, 982, 703, 1013, 859, 890, 828, - 921, 797, 952, 766, 983, 735, 1014, 891, 860, 922, 829, 953, - 798, 984, 767, 1015, 892, 923, 861, 954, 830, 985, 799, 1016, - 924, 893, 955, 862, 986, 831, 1017, 925, 956, 894, 987, 863, - 1018, 957, 926, 988, 895, 1019, 958, 989, 927, 1020, 990, 959, - 1021, 991, 1022, 1023, + 0, 1, 32, 33, 2, 64, 34, 65, 66, 3, 96, 35, 97, + 67, 98, 4, 128, 36, 129, 99, 68, 130, 5, 100, 131, 160, + 37, 161, 69, 162, 132, 101, 163, 6, 192, 38, 193, 70, 194, + 133, 164, 102, 195, 7, 224, 39, 165, 225, 134, 196, 71, 226, + 103, 227, 166, 197, 8, 256, 40, 135, 228, 257, 72, 258, 198, + 104, 259, 167, 229, 136, 260, 9, 288, 41, 289, 73, 199, 230, + 290, 168, 261, 105, 291, 137, 292, 231, 10, 200, 262, 320, 42, + 321, 74, 322, 169, 293, 106, 323, 232, 263, 138, 324, 201, 294, + 11, 352, 43, 353, 75, 170, 325, 354, 264, 107, 233, 295, 355, + 202, 326, 139, 356, 12, 384, 44, 265, 296, 385, 171, 357, 76, + 386, 234, 327, 108, 387, 203, 358, 140, 388, 297, 266, 328, 13, + 172, 389, 416, 45, 235, 359, 417, 77, 418, 109, 419, 204, 390, + 298, 329, 141, 267, 360, 420, 236, 391, 173, 421, 14, 448, 46, + 449, 78, 330, 450, 299, 361, 110, 205, 422, 451, 268, 392, 142, + 452, 237, 423, 174, 331, 362, 453, 15, 300, 393, 480, 47, 481, + 79, 482, 206, 454, 269, 424, 111, 483, 143, 484, 363, 332, 394, + 238, 455, 175, 301, 425, 485, 512, 513, 270, 456, 514, 207, 486, + 364, 395, 515, 333, 426, 516, 239, 487, 302, 457, 517, 396, 271, + 488, 544, 365, 427, 545, 518, 546, 334, 458, 547, 519, 548, 303, + 489, 397, 428, 549, 366, 459, 520, 576, 335, 490, 550, 577, 578, + 579, 521, 429, 551, 398, 460, 580, 367, 491, 581, 552, 522, 582, + 608, 609, 430, 461, 610, 399, 492, 553, 611, 583, 523, 612, 613, + 584, 554, 462, 431, 493, 614, 524, 640, 641, 642, 585, 643, 555, + 615, 644, 463, 494, 586, 525, 616, 645, 556, 646, 672, 617, 673, + 587, 674, 647, 495, 675, 526, 676, 557, 618, 648, 677, 588, 678, + 527, 649, 619, 704, 558, 705, 706, 679, 589, 707, 650, 708, 620, + 680, 709, 559, 590, 710, 651, 681, 736, 621, 737, 711, 738, 739, + 682, 652, 740, 712, 591, 741, 622, 683, 713, 742, 653, 768, 769, + 743, 770, 714, 684, 771, 623, 772, 744, 654, 773, 715, 685, 745, + 774, 655, 775, 800, 801, 716, 746, 802, 803, 686, 776, 804, 747, + 805, 717, 777, 806, 687, 748, 807, 778, 832, 833, 718, 834, 835, + 808, 836, 779, 749, 837, 809, 719, 838, 780, 750, 810, 839, 864, + 865, 866, 867, 840, 781, 868, 811, 751, 869, 841, 870, 812, 782, + 842, 871, 896, 897, 898, 872, 899, 813, 843, 900, 783, 901, 873, + 844, 902, 814, 874, 903, 928, 929, 845, 930, 904, 815, 875, 931, + 932, 905, 933, 846, 876, 934, 906, 935, 877, 960, 847, 961, 962, + 907, 936, 963, 964, 937, 878, 965, 908, 966, 938, 967, 909, 879, + 992, 939, 993, 968, 994, 995, 996, 910, 969, 940, 997, 998, 970, + 911, 941, 999, 971, 1000, 942, 1001, 972, 1002, 943, 973, 1003, 974, + 1004, 975, 1005, 1006, 1007, 16, 48, 80, 112, 144, 176, 17, 49, + 208, 81, 113, 145, 240, 177, 272, 18, 50, 209, 82, 114, 304, + 241, 146, 178, 273, 336, 210, 19, 51, 83, 115, 305, 242, 147, + 368, 179, 274, 337, 211, 20, 400, 52, 84, 306, 116, 243, 369, + 148, 338, 180, 275, 432, 401, 212, 21, 53, 307, 85, 370, 244, + 117, 464, 149, 433, 339, 276, 181, 402, 213, 308, 496, 371, 22, + 54, 465, 86, 245, 118, 434, 150, 340, 277, 403, 182, 528, 497, + 214, 466, 372, 309, 23, 55, 435, 87, 246, 119, 341, 404, 151, + 529, 560, 278, 498, 183, 467, 373, 215, 310, 436, 24, 56, 247, + 561, 88, 530, 592, 342, 120, 405, 499, 152, 279, 468, 184, 374, + 311, 437, 216, 562, 593, 531, 624, 25, 248, 500, 57, 406, 89, + 343, 121, 469, 280, 153, 594, 185, 375, 563, 625, 438, 532, 656, + 312, 217, 501, 407, 249, 26, 344, 58, 90, 470, 122, 595, 626, + 281, 564, 657, 154, 376, 533, 688, 439, 186, 313, 502, 218, 408, + 627, 596, 658, 250, 345, 471, 27, 59, 565, 689, 91, 123, 282, + 534, 720, 155, 440, 377, 187, 503, 314, 628, 659, 219, 597, 690, + 409, 472, 566, 721, 346, 251, 28, 60, 535, 752, 92, 124, 283, + 441, 378, 156, 660, 504, 629, 691, 598, 722, 188, 315, 567, 753, + 220, 410, 473, 347, 536, 784, 252, 29, 661, 692, 61, 93, 442, + 630, 723, 284, 125, 379, 505, 599, 754, 157, 316, 568, 785, 189, + 474, 411, 221, 537, 816, 693, 348, 662, 724, 253, 631, 755, 443, + 30, 600, 786, 62, 506, 94, 285, 380, 126, 569, 817, 158, 317, + 190, 475, 694, 725, 412, 663, 756, 538, 848, 222, 632, 787, 349, + 254, 601, 818, 444, 507, 31, 63, 381, 286, 95, 570, 849, 726, + 127, 695, 757, 664, 788, 159, 476, 318, 413, 539, 880, 191, 633, + 819, 223, 350, 602, 850, 508, 255, 445, 727, 758, 696, 789, 571, + 881, 382, 287, 665, 820, 477, 634, 851, 540, 912, 319, 414, 603, + 882, 759, 728, 790, 351, 509, 697, 821, 446, 572, 913, 666, 852, + 383, 635, 883, 478, 541, 944, 415, 760, 791, 604, 914, 729, 822, + 698, 853, 510, 667, 884, 447, 573, 945, 636, 915, 792, 761, 823, + 542, 976, 479, 730, 854, 605, 946, 699, 885, 668, 916, 511, 574, + 977, 793, 824, 637, 947, 762, 855, 731, 886, 543, 1008, 606, 978, + 700, 917, 669, 948, 575, 825, 1009, 794, 856, 763, 887, 638, 979, + 732, 918, 701, 949, 607, 1010, 670, 980, 826, 857, 795, 888, 764, + 919, 639, 1011, 733, 950, 702, 981, 858, 827, 889, 796, 920, 671, + 1012, 765, 951, 734, 982, 703, 1013, 859, 890, 828, 921, 797, 952, + 766, 983, 735, 1014, 891, 860, 922, 829, 953, 798, 984, 767, 1015, + 892, 923, 861, 954, 830, 985, 799, 1016, 924, 893, 955, 862, 986, + 831, 1017, 925, 956, 894, 987, 863, 1018, 957, 926, 988, 895, 1019, + 958, 989, 927, 1020, 990, 959, 1021, 991, 1022, 1023, }; // Scan over two rectangular horizontal partitions one after the other DECLARE_ALIGNED(16, static const int16_t, h2_scan_32x32[1024]) = { - 0, 1, 32, 33, 2, 64, 34, 65, 66, 3, 96, 35, - 97, 67, 98, 4, 128, 36, 129, 99, 68, 130, 5, 100, - 131, 160, 37, 161, 69, 162, 132, 101, 163, 6, 192, 38, - 193, 70, 194, 133, 164, 102, 195, 7, 224, 39, 165, 225, - 134, 196, 71, 226, 103, 227, 166, 197, 8, 256, 40, 135, - 228, 257, 72, 258, 198, 104, 259, 167, 229, 136, 260, 9, - 288, 41, 289, 73, 199, 230, 290, 168, 261, 105, 291, 137, - 292, 231, 10, 200, 262, 320, 42, 321, 74, 322, 169, 293, - 106, 323, 232, 263, 138, 324, 201, 294, 11, 352, 43, 353, - 75, 170, 325, 354, 264, 107, 233, 295, 355, 202, 326, 139, - 356, 12, 384, 44, 265, 296, 385, 171, 357, 76, 386, 234, - 327, 108, 387, 203, 358, 140, 388, 297, 266, 328, 13, 172, - 389, 416, 45, 235, 359, 417, 77, 418, 109, 419, 204, 390, - 298, 329, 141, 267, 360, 420, 236, 391, 173, 421, 14, 448, - 46, 449, 78, 330, 450, 299, 361, 110, 205, 422, 451, 268, - 392, 142, 452, 237, 423, 174, 331, 362, 453, 15, 300, 393, - 480, 47, 481, 79, 482, 206, 454, 269, 424, 111, 483, 143, - 484, 363, 332, 394, 238, 455, 175, 301, 425, 485, 16, 48, - 80, 270, 456, 207, 486, 112, 364, 395, 333, 426, 144, 239, - 487, 302, 457, 176, 396, 17, 271, 488, 49, 365, 427, 208, - 81, 334, 458, 113, 145, 240, 303, 489, 397, 428, 177, 366, - 459, 272, 18, 50, 209, 335, 490, 82, 114, 304, 241, 429, - 146, 398, 460, 367, 491, 178, 273, 336, 210, 19, 51, 83, - 430, 461, 399, 492, 115, 305, 242, 147, 368, 179, 274, 337, - 462, 431, 493, 211, 20, 400, 52, 84, 306, 116, 243, 369, - 148, 463, 494, 338, 180, 275, 432, 401, 212, 21, 53, 307, - 85, 370, 244, 117, 495, 464, 149, 433, 339, 276, 181, 402, - 213, 308, 496, 371, 22, 54, 465, 86, 245, 118, 434, 150, - 340, 277, 403, 182, 497, 214, 466, 372, 309, 23, 55, 435, - 87, 246, 119, 341, 404, 151, 278, 498, 183, 467, 373, 215, - 310, 436, 24, 56, 247, 88, 342, 120, 405, 499, 152, 279, - 468, 184, 374, 311, 437, 216, 25, 248, 500, 57, 406, 89, - 343, 121, 469, 280, 153, 185, 375, 438, 312, 217, 501, 407, - 249, 26, 344, 58, 90, 470, 122, 281, 154, 376, 439, 186, - 313, 502, 218, 408, 250, 345, 471, 27, 59, 91, 123, 282, - 155, 440, 377, 187, 503, 314, 219, 409, 472, 346, 251, 28, - 60, 92, 124, 283, 441, 378, 156, 504, 188, 315, 220, 410, - 473, 347, 252, 29, 61, 93, 442, 284, 125, 379, 505, 157, - 316, 189, 474, 411, 221, 348, 253, 443, 30, 62, 506, 94, - 285, 380, 126, 158, 317, 190, 475, 412, 222, 349, 254, 444, - 507, 31, 63, 381, 286, 95, 127, 159, 476, 318, 413, 191, - 223, 350, 508, 255, 445, 382, 287, 477, 319, 414, 351, 509, - 446, 383, 478, 415, 510, 447, 479, 511, 512, 513, 514, 515, - 516, 517, 544, 545, 518, 546, 547, 519, 548, 549, 520, 576, - 550, 577, 578, 579, 521, 551, 580, 581, 552, 522, 582, 608, - 609, 610, 553, 611, 583, 523, 612, 613, 584, 554, 614, 524, - 640, 641, 642, 585, 643, 555, 615, 644, 586, 525, 616, 645, - 556, 646, 672, 617, 673, 587, 674, 647, 675, 526, 676, 557, - 618, 648, 677, 588, 678, 527, 649, 619, 704, 558, 705, 706, - 679, 589, 707, 650, 708, 620, 680, 709, 528, 559, 590, 710, - 651, 681, 736, 621, 737, 711, 738, 739, 682, 652, 529, 560, - 740, 712, 591, 741, 622, 683, 713, 742, 653, 768, 769, 561, - 743, 530, 592, 770, 714, 684, 771, 623, 772, 744, 654, 773, - 715, 685, 745, 774, 562, 593, 531, 624, 655, 775, 800, 801, - 716, 746, 802, 803, 686, 776, 804, 594, 563, 625, 747, 805, - 717, 532, 656, 777, 806, 687, 748, 807, 778, 832, 833, 718, - 834, 595, 626, 835, 564, 657, 808, 836, 533, 688, 779, 749, - 837, 809, 719, 838, 780, 627, 596, 658, 750, 810, 839, 864, - 565, 689, 865, 866, 867, 534, 720, 840, 781, 868, 811, 751, - 869, 841, 628, 659, 597, 690, 870, 812, 782, 566, 721, 842, - 871, 896, 535, 752, 897, 898, 872, 899, 813, 843, 660, 900, - 783, 629, 691, 598, 722, 901, 873, 567, 753, 844, 902, 814, - 874, 536, 784, 903, 661, 692, 928, 929, 630, 723, 845, 930, - 904, 815, 875, 931, 599, 754, 932, 568, 785, 905, 933, 846, - 876, 934, 537, 816, 693, 662, 724, 906, 631, 755, 935, 877, - 600, 786, 960, 847, 961, 962, 907, 936, 963, 569, 817, 964, - 937, 694, 725, 878, 965, 908, 663, 756, 538, 848, 966, 632, - 787, 938, 601, 818, 967, 909, 879, 992, 939, 993, 968, 570, - 849, 994, 726, 695, 757, 995, 664, 788, 996, 910, 969, 539, - 880, 940, 633, 819, 997, 998, 602, 850, 970, 911, 941, 999, - 727, 758, 696, 789, 571, 881, 971, 665, 820, 1000, 634, 851, - 942, 540, 912, 1001, 972, 603, 882, 759, 728, 790, 1002, 697, - 821, 943, 973, 572, 913, 666, 852, 1003, 635, 883, 974, 541, - 944, 760, 791, 1004, 604, 914, 729, 822, 698, 853, 975, 667, - 884, 573, 945, 1005, 636, 915, 792, 761, 823, 542, 976, 1006, - 730, 854, 605, 946, 699, 885, 668, 916, 1007, 574, 977, 793, - 824, 637, 947, 762, 855, 731, 886, 543, 1008, 606, 978, 700, - 917, 669, 948, 575, 825, 1009, 794, 856, 763, 887, 638, 979, - 732, 918, 701, 949, 607, 1010, 670, 980, 826, 857, 795, 888, - 764, 919, 639, 1011, 733, 950, 702, 981, 858, 827, 889, 796, - 920, 671, 1012, 765, 951, 734, 982, 703, 1013, 859, 890, 828, - 921, 797, 952, 766, 983, 735, 1014, 891, 860, 922, 829, 953, - 798, 984, 767, 1015, 892, 923, 861, 954, 830, 985, 799, 1016, - 924, 893, 955, 862, 986, 831, 1017, 925, 956, 894, 987, 863, - 1018, 957, 926, 988, 895, 1019, 958, 989, 927, 1020, 990, 959, - 1021, 991, 1022, 1023, + 0, 1, 32, 33, 2, 64, 34, 65, 66, 3, 96, 35, 97, + 67, 98, 4, 128, 36, 129, 99, 68, 130, 5, 100, 131, 160, + 37, 161, 69, 162, 132, 101, 163, 6, 192, 38, 193, 70, 194, + 133, 164, 102, 195, 7, 224, 39, 165, 225, 134, 196, 71, 226, + 103, 227, 166, 197, 8, 256, 40, 135, 228, 257, 72, 258, 198, + 104, 259, 167, 229, 136, 260, 9, 288, 41, 289, 73, 199, 230, + 290, 168, 261, 105, 291, 137, 292, 231, 10, 200, 262, 320, 42, + 321, 74, 322, 169, 293, 106, 323, 232, 263, 138, 324, 201, 294, + 11, 352, 43, 353, 75, 170, 325, 354, 264, 107, 233, 295, 355, + 202, 326, 139, 356, 12, 384, 44, 265, 296, 385, 171, 357, 76, + 386, 234, 327, 108, 387, 203, 358, 140, 388, 297, 266, 328, 13, + 172, 389, 416, 45, 235, 359, 417, 77, 418, 109, 419, 204, 390, + 298, 329, 141, 267, 360, 420, 236, 391, 173, 421, 14, 448, 46, + 449, 78, 330, 450, 299, 361, 110, 205, 422, 451, 268, 392, 142, + 452, 237, 423, 174, 331, 362, 453, 15, 300, 393, 480, 47, 481, + 79, 482, 206, 454, 269, 424, 111, 483, 143, 484, 363, 332, 394, + 238, 455, 175, 301, 425, 485, 16, 48, 80, 270, 456, 207, 486, + 112, 364, 395, 333, 426, 144, 239, 487, 302, 457, 176, 396, 17, + 271, 488, 49, 365, 427, 208, 81, 334, 458, 113, 145, 240, 303, + 489, 397, 428, 177, 366, 459, 272, 18, 50, 209, 335, 490, 82, + 114, 304, 241, 429, 146, 398, 460, 367, 491, 178, 273, 336, 210, + 19, 51, 83, 430, 461, 399, 492, 115, 305, 242, 147, 368, 179, + 274, 337, 462, 431, 493, 211, 20, 400, 52, 84, 306, 116, 243, + 369, 148, 463, 494, 338, 180, 275, 432, 401, 212, 21, 53, 307, + 85, 370, 244, 117, 495, 464, 149, 433, 339, 276, 181, 402, 213, + 308, 496, 371, 22, 54, 465, 86, 245, 118, 434, 150, 340, 277, + 403, 182, 497, 214, 466, 372, 309, 23, 55, 435, 87, 246, 119, + 341, 404, 151, 278, 498, 183, 467, 373, 215, 310, 436, 24, 56, + 247, 88, 342, 120, 405, 499, 152, 279, 468, 184, 374, 311, 437, + 216, 25, 248, 500, 57, 406, 89, 343, 121, 469, 280, 153, 185, + 375, 438, 312, 217, 501, 407, 249, 26, 344, 58, 90, 470, 122, + 281, 154, 376, 439, 186, 313, 502, 218, 408, 250, 345, 471, 27, + 59, 91, 123, 282, 155, 440, 377, 187, 503, 314, 219, 409, 472, + 346, 251, 28, 60, 92, 124, 283, 441, 378, 156, 504, 188, 315, + 220, 410, 473, 347, 252, 29, 61, 93, 442, 284, 125, 379, 505, + 157, 316, 189, 474, 411, 221, 348, 253, 443, 30, 62, 506, 94, + 285, 380, 126, 158, 317, 190, 475, 412, 222, 349, 254, 444, 507, + 31, 63, 381, 286, 95, 127, 159, 476, 318, 413, 191, 223, 350, + 508, 255, 445, 382, 287, 477, 319, 414, 351, 509, 446, 383, 478, + 415, 510, 447, 479, 511, 512, 513, 514, 515, 516, 517, 544, 545, + 518, 546, 547, 519, 548, 549, 520, 576, 550, 577, 578, 579, 521, + 551, 580, 581, 552, 522, 582, 608, 609, 610, 553, 611, 583, 523, + 612, 613, 584, 554, 614, 524, 640, 641, 642, 585, 643, 555, 615, + 644, 586, 525, 616, 645, 556, 646, 672, 617, 673, 587, 674, 647, + 675, 526, 676, 557, 618, 648, 677, 588, 678, 527, 649, 619, 704, + 558, 705, 706, 679, 589, 707, 650, 708, 620, 680, 709, 528, 559, + 590, 710, 651, 681, 736, 621, 737, 711, 738, 739, 682, 652, 529, + 560, 740, 712, 591, 741, 622, 683, 713, 742, 653, 768, 769, 561, + 743, 530, 592, 770, 714, 684, 771, 623, 772, 744, 654, 773, 715, + 685, 745, 774, 562, 593, 531, 624, 655, 775, 800, 801, 716, 746, + 802, 803, 686, 776, 804, 594, 563, 625, 747, 805, 717, 532, 656, + 777, 806, 687, 748, 807, 778, 832, 833, 718, 834, 595, 626, 835, + 564, 657, 808, 836, 533, 688, 779, 749, 837, 809, 719, 838, 780, + 627, 596, 658, 750, 810, 839, 864, 565, 689, 865, 866, 867, 534, + 720, 840, 781, 868, 811, 751, 869, 841, 628, 659, 597, 690, 870, + 812, 782, 566, 721, 842, 871, 896, 535, 752, 897, 898, 872, 899, + 813, 843, 660, 900, 783, 629, 691, 598, 722, 901, 873, 567, 753, + 844, 902, 814, 874, 536, 784, 903, 661, 692, 928, 929, 630, 723, + 845, 930, 904, 815, 875, 931, 599, 754, 932, 568, 785, 905, 933, + 846, 876, 934, 537, 816, 693, 662, 724, 906, 631, 755, 935, 877, + 600, 786, 960, 847, 961, 962, 907, 936, 963, 569, 817, 964, 937, + 694, 725, 878, 965, 908, 663, 756, 538, 848, 966, 632, 787, 938, + 601, 818, 967, 909, 879, 992, 939, 993, 968, 570, 849, 994, 726, + 695, 757, 995, 664, 788, 996, 910, 969, 539, 880, 940, 633, 819, + 997, 998, 602, 850, 970, 911, 941, 999, 727, 758, 696, 789, 571, + 881, 971, 665, 820, 1000, 634, 851, 942, 540, 912, 1001, 972, 603, + 882, 759, 728, 790, 1002, 697, 821, 943, 973, 572, 913, 666, 852, + 1003, 635, 883, 974, 541, 944, 760, 791, 1004, 604, 914, 729, 822, + 698, 853, 975, 667, 884, 573, 945, 1005, 636, 915, 792, 761, 823, + 542, 976, 1006, 730, 854, 605, 946, 699, 885, 668, 916, 1007, 574, + 977, 793, 824, 637, 947, 762, 855, 731, 886, 543, 1008, 606, 978, + 700, 917, 669, 948, 575, 825, 1009, 794, 856, 763, 887, 638, 979, + 732, 918, 701, 949, 607, 1010, 670, 980, 826, 857, 795, 888, 764, + 919, 639, 1011, 733, 950, 702, 981, 858, 827, 889, 796, 920, 671, + 1012, 765, 951, 734, 982, 703, 1013, 859, 890, 828, 921, 797, 952, + 766, 983, 735, 1014, 891, 860, 922, 829, 953, 798, 984, 767, 1015, + 892, 923, 861, 954, 830, 985, 799, 1016, 924, 893, 955, 862, 986, + 831, 1017, 925, 956, 894, 987, 863, 1018, 957, 926, 988, 895, 1019, + 958, 989, 927, 1020, 990, 959, 1021, 991, 1022, 1023, }; // Scan where the top left quarter is scanned first DECLARE_ALIGNED(16, static const int16_t, qtr_scan_32x32[1024]) = { - 0, 1, 32, 33, 2, 64, 34, 65, 66, 3, 96, 35, - 97, 67, 98, 4, 128, 36, 129, 99, 68, 130, 5, 100, - 131, 160, 37, 161, 69, 162, 132, 101, 163, 6, 192, 38, - 193, 70, 194, 133, 164, 102, 195, 7, 224, 39, 165, 225, - 134, 196, 71, 226, 103, 227, 166, 197, 8, 256, 40, 135, - 228, 257, 72, 258, 198, 104, 259, 167, 229, 136, 260, 9, - 288, 41, 289, 73, 199, 230, 290, 168, 261, 105, 291, 137, - 292, 231, 10, 200, 262, 320, 42, 321, 74, 322, 169, 293, - 106, 323, 232, 263, 138, 324, 201, 294, 11, 352, 43, 353, - 75, 170, 325, 354, 264, 107, 233, 295, 355, 202, 326, 139, - 356, 12, 384, 44, 265, 296, 385, 171, 357, 76, 386, 234, - 327, 108, 387, 203, 358, 140, 388, 297, 266, 328, 13, 172, - 389, 416, 45, 235, 359, 417, 77, 418, 109, 419, 204, 390, - 298, 329, 141, 267, 360, 420, 236, 391, 173, 421, 14, 448, - 46, 449, 78, 330, 450, 299, 361, 110, 205, 422, 451, 268, - 392, 142, 452, 237, 423, 174, 331, 362, 453, 15, 300, 393, - 480, 47, 481, 79, 482, 206, 454, 269, 424, 111, 483, 143, - 484, 363, 332, 394, 238, 455, 175, 301, 425, 485, 270, 456, - 207, 486, 364, 395, 333, 426, 239, 487, 302, 457, 396, 271, - 488, 365, 427, 334, 458, 303, 489, 397, 428, 366, 459, 335, - 490, 429, 398, 460, 367, 491, 430, 461, 399, 492, 462, 431, - 493, 463, 494, 495, 16, 512, 48, 513, 80, 514, 112, 515, - 144, 516, 176, 517, 17, 544, 49, 545, 208, 518, 81, 546, - 113, 547, 145, 240, 519, 548, 177, 549, 272, 520, 18, 576, - 50, 209, 550, 577, 82, 578, 114, 579, 304, 521, 241, 551, - 146, 580, 178, 581, 273, 552, 336, 522, 210, 582, 19, 608, - 51, 609, 83, 610, 115, 305, 553, 611, 242, 583, 147, 368, - 523, 612, 179, 613, 274, 584, 337, 554, 211, 614, 20, 400, - 524, 640, 52, 641, 84, 642, 306, 585, 116, 643, 243, 369, - 555, 615, 148, 644, 338, 586, 180, 275, 432, 525, 616, 645, - 401, 556, 212, 646, 21, 672, 53, 307, 617, 673, 85, 370, - 587, 674, 244, 647, 117, 675, 464, 526, 149, 676, 433, 557, - 339, 618, 276, 648, 181, 677, 402, 588, 213, 678, 308, 496, - 527, 649, 371, 619, 22, 704, 54, 465, 558, 705, 86, 706, - 245, 679, 118, 434, 589, 707, 150, 340, 650, 708, 277, 403, - 620, 680, 182, 709, 528, 497, 559, 214, 466, 590, 710, 372, - 651, 309, 681, 23, 736, 55, 435, 621, 737, 87, 246, 711, - 738, 119, 739, 341, 682, 404, 652, 151, 529, 560, 740, 278, - 712, 498, 591, 183, 741, 467, 622, 373, 683, 215, 310, 713, - 742, 436, 653, 24, 768, 56, 769, 247, 561, 743, 88, 530, - 592, 770, 342, 714, 120, 405, 684, 771, 499, 623, 152, 772, - 279, 744, 468, 654, 184, 773, 374, 715, 311, 437, 685, 745, - 216, 774, 562, 593, 531, 624, 25, 248, 500, 655, 775, 800, - 57, 801, 406, 716, 89, 343, 746, 802, 121, 803, 469, 686, - 280, 776, 153, 804, 594, 185, 375, 563, 625, 747, 805, 438, - 717, 532, 656, 312, 777, 217, 806, 501, 687, 407, 748, 249, - 807, 26, 344, 778, 832, 58, 833, 90, 470, 718, 834, 122, - 595, 626, 835, 281, 564, 657, 808, 154, 836, 376, 533, 688, - 779, 439, 749, 186, 837, 313, 809, 502, 719, 218, 838, 408, - 780, 627, 596, 658, 250, 345, 471, 750, 810, 839, 27, 864, - 59, 565, 689, 865, 91, 866, 123, 867, 282, 534, 720, 840, - 155, 440, 781, 868, 377, 811, 187, 503, 751, 869, 314, 841, - 628, 659, 219, 597, 690, 870, 409, 812, 472, 782, 566, 721, - 346, 842, 251, 871, 28, 896, 60, 535, 752, 897, 92, 898, - 124, 283, 872, 899, 441, 813, 378, 843, 156, 660, 900, 504, - 783, 629, 691, 598, 722, 188, 901, 315, 873, 567, 753, 220, - 410, 844, 902, 473, 814, 347, 874, 536, 784, 252, 903, 29, - 661, 692, 928, 61, 929, 93, 442, 630, 723, 845, 930, 284, - 904, 125, 379, 505, 815, 875, 931, 599, 754, 157, 932, 316, - 568, 785, 905, 189, 933, 474, 846, 411, 876, 221, 934, 537, - 816, 693, 348, 662, 724, 906, 253, 631, 755, 935, 443, 877, - 30, 600, 786, 960, 62, 506, 847, 961, 94, 962, 285, 380, - 907, 936, 126, 963, 569, 817, 158, 964, 317, 937, 190, 475, - 694, 725, 878, 965, 412, 908, 663, 756, 538, 848, 222, 966, - 632, 787, 349, 938, 254, 601, 818, 967, 444, 909, 507, 879, - 31, 992, 63, 381, 939, 993, 286, 968, 95, 570, 849, 994, - 726, 127, 695, 757, 995, 664, 788, 159, 996, 476, 910, 318, - 969, 413, 539, 880, 940, 191, 633, 819, 997, 223, 998, 350, - 602, 850, 970, 508, 911, 255, 445, 941, 999, 727, 758, 696, - 789, 571, 881, 382, 971, 287, 665, 820, 1000, 477, 634, 851, - 942, 540, 912, 319, 1001, 414, 972, 603, 882, 759, 728, 790, - 351, 1002, 509, 697, 821, 943, 446, 973, 572, 913, 666, 852, - 383, 1003, 635, 883, 478, 974, 541, 944, 415, 760, 791, 1004, - 604, 914, 729, 822, 698, 853, 510, 975, 667, 884, 447, 573, - 945, 1005, 636, 915, 792, 761, 823, 542, 976, 479, 1006, 730, - 854, 605, 946, 699, 885, 668, 916, 511, 1007, 574, 977, 793, - 824, 637, 947, 762, 855, 731, 886, 543, 1008, 606, 978, 700, - 917, 669, 948, 575, 825, 1009, 794, 856, 763, 887, 638, 979, - 732, 918, 701, 949, 607, 1010, 670, 980, 826, 857, 795, 888, - 764, 919, 639, 1011, 733, 950, 702, 981, 858, 827, 889, 796, - 920, 671, 1012, 765, 951, 734, 982, 703, 1013, 859, 890, 828, - 921, 797, 952, 766, 983, 735, 1014, 891, 860, 922, 829, 953, - 798, 984, 767, 1015, 892, 923, 861, 954, 830, 985, 799, 1016, - 924, 893, 955, 862, 986, 831, 1017, 925, 956, 894, 987, 863, - 1018, 957, 926, 988, 895, 1019, 958, 989, 927, 1020, 990, 959, - 1021, 991, 1022, 1023, + 0, 1, 32, 33, 2, 64, 34, 65, 66, 3, 96, 35, 97, + 67, 98, 4, 128, 36, 129, 99, 68, 130, 5, 100, 131, 160, + 37, 161, 69, 162, 132, 101, 163, 6, 192, 38, 193, 70, 194, + 133, 164, 102, 195, 7, 224, 39, 165, 225, 134, 196, 71, 226, + 103, 227, 166, 197, 8, 256, 40, 135, 228, 257, 72, 258, 198, + 104, 259, 167, 229, 136, 260, 9, 288, 41, 289, 73, 199, 230, + 290, 168, 261, 105, 291, 137, 292, 231, 10, 200, 262, 320, 42, + 321, 74, 322, 169, 293, 106, 323, 232, 263, 138, 324, 201, 294, + 11, 352, 43, 353, 75, 170, 325, 354, 264, 107, 233, 295, 355, + 202, 326, 139, 356, 12, 384, 44, 265, 296, 385, 171, 357, 76, + 386, 234, 327, 108, 387, 203, 358, 140, 388, 297, 266, 328, 13, + 172, 389, 416, 45, 235, 359, 417, 77, 418, 109, 419, 204, 390, + 298, 329, 141, 267, 360, 420, 236, 391, 173, 421, 14, 448, 46, + 449, 78, 330, 450, 299, 361, 110, 205, 422, 451, 268, 392, 142, + 452, 237, 423, 174, 331, 362, 453, 15, 300, 393, 480, 47, 481, + 79, 482, 206, 454, 269, 424, 111, 483, 143, 484, 363, 332, 394, + 238, 455, 175, 301, 425, 485, 270, 456, 207, 486, 364, 395, 333, + 426, 239, 487, 302, 457, 396, 271, 488, 365, 427, 334, 458, 303, + 489, 397, 428, 366, 459, 335, 490, 429, 398, 460, 367, 491, 430, + 461, 399, 492, 462, 431, 493, 463, 494, 495, 16, 512, 48, 513, + 80, 514, 112, 515, 144, 516, 176, 517, 17, 544, 49, 545, 208, + 518, 81, 546, 113, 547, 145, 240, 519, 548, 177, 549, 272, 520, + 18, 576, 50, 209, 550, 577, 82, 578, 114, 579, 304, 521, 241, + 551, 146, 580, 178, 581, 273, 552, 336, 522, 210, 582, 19, 608, + 51, 609, 83, 610, 115, 305, 553, 611, 242, 583, 147, 368, 523, + 612, 179, 613, 274, 584, 337, 554, 211, 614, 20, 400, 524, 640, + 52, 641, 84, 642, 306, 585, 116, 643, 243, 369, 555, 615, 148, + 644, 338, 586, 180, 275, 432, 525, 616, 645, 401, 556, 212, 646, + 21, 672, 53, 307, 617, 673, 85, 370, 587, 674, 244, 647, 117, + 675, 464, 526, 149, 676, 433, 557, 339, 618, 276, 648, 181, 677, + 402, 588, 213, 678, 308, 496, 527, 649, 371, 619, 22, 704, 54, + 465, 558, 705, 86, 706, 245, 679, 118, 434, 589, 707, 150, 340, + 650, 708, 277, 403, 620, 680, 182, 709, 528, 497, 559, 214, 466, + 590, 710, 372, 651, 309, 681, 23, 736, 55, 435, 621, 737, 87, + 246, 711, 738, 119, 739, 341, 682, 404, 652, 151, 529, 560, 740, + 278, 712, 498, 591, 183, 741, 467, 622, 373, 683, 215, 310, 713, + 742, 436, 653, 24, 768, 56, 769, 247, 561, 743, 88, 530, 592, + 770, 342, 714, 120, 405, 684, 771, 499, 623, 152, 772, 279, 744, + 468, 654, 184, 773, 374, 715, 311, 437, 685, 745, 216, 774, 562, + 593, 531, 624, 25, 248, 500, 655, 775, 800, 57, 801, 406, 716, + 89, 343, 746, 802, 121, 803, 469, 686, 280, 776, 153, 804, 594, + 185, 375, 563, 625, 747, 805, 438, 717, 532, 656, 312, 777, 217, + 806, 501, 687, 407, 748, 249, 807, 26, 344, 778, 832, 58, 833, + 90, 470, 718, 834, 122, 595, 626, 835, 281, 564, 657, 808, 154, + 836, 376, 533, 688, 779, 439, 749, 186, 837, 313, 809, 502, 719, + 218, 838, 408, 780, 627, 596, 658, 250, 345, 471, 750, 810, 839, + 27, 864, 59, 565, 689, 865, 91, 866, 123, 867, 282, 534, 720, + 840, 155, 440, 781, 868, 377, 811, 187, 503, 751, 869, 314, 841, + 628, 659, 219, 597, 690, 870, 409, 812, 472, 782, 566, 721, 346, + 842, 251, 871, 28, 896, 60, 535, 752, 897, 92, 898, 124, 283, + 872, 899, 441, 813, 378, 843, 156, 660, 900, 504, 783, 629, 691, + 598, 722, 188, 901, 315, 873, 567, 753, 220, 410, 844, 902, 473, + 814, 347, 874, 536, 784, 252, 903, 29, 661, 692, 928, 61, 929, + 93, 442, 630, 723, 845, 930, 284, 904, 125, 379, 505, 815, 875, + 931, 599, 754, 157, 932, 316, 568, 785, 905, 189, 933, 474, 846, + 411, 876, 221, 934, 537, 816, 693, 348, 662, 724, 906, 253, 631, + 755, 935, 443, 877, 30, 600, 786, 960, 62, 506, 847, 961, 94, + 962, 285, 380, 907, 936, 126, 963, 569, 817, 158, 964, 317, 937, + 190, 475, 694, 725, 878, 965, 412, 908, 663, 756, 538, 848, 222, + 966, 632, 787, 349, 938, 254, 601, 818, 967, 444, 909, 507, 879, + 31, 992, 63, 381, 939, 993, 286, 968, 95, 570, 849, 994, 726, + 127, 695, 757, 995, 664, 788, 159, 996, 476, 910, 318, 969, 413, + 539, 880, 940, 191, 633, 819, 997, 223, 998, 350, 602, 850, 970, + 508, 911, 255, 445, 941, 999, 727, 758, 696, 789, 571, 881, 382, + 971, 287, 665, 820, 1000, 477, 634, 851, 942, 540, 912, 319, 1001, + 414, 972, 603, 882, 759, 728, 790, 351, 1002, 509, 697, 821, 943, + 446, 973, 572, 913, 666, 852, 383, 1003, 635, 883, 478, 974, 541, + 944, 415, 760, 791, 1004, 604, 914, 729, 822, 698, 853, 510, 975, + 667, 884, 447, 573, 945, 1005, 636, 915, 792, 761, 823, 542, 976, + 479, 1006, 730, 854, 605, 946, 699, 885, 668, 916, 511, 1007, 574, + 977, 793, 824, 637, 947, 762, 855, 731, 886, 543, 1008, 606, 978, + 700, 917, 669, 948, 575, 825, 1009, 794, 856, 763, 887, 638, 979, + 732, 918, 701, 949, 607, 1010, 670, 980, 826, 857, 795, 888, 764, + 919, 639, 1011, 733, 950, 702, 981, 858, 827, 889, 796, 920, 671, + 1012, 765, 951, 734, 982, 703, 1013, 859, 890, 828, 921, 797, 952, + 766, 983, 735, 1014, 891, 860, 922, 829, 953, 798, 984, 767, 1015, + 892, 923, 861, 954, 830, 985, 799, 1016, 924, 893, 955, 862, 986, + 831, 1017, 925, 956, 894, 987, 863, 1018, 957, 926, 988, 895, 1019, + 958, 989, 927, 1020, 990, 959, 1021, 991, 1022, 1023, }; #endif // CONFIG_EXT_TX @@ -835,1528 +706,1247 @@ DECLARE_ALIGNED(16, static const int16_t, qtr_scan_32x32[1024]) = { // in {top, left} order for each position in corresponding scan order. DECLARE_ALIGNED(16, static const int16_t, default_scan_4x4_neighbors[17 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 4, 0, 1, 4, 4, 5, 5, 1, - 8, 8, 5, 8, 2, 2, 2, 5, 9, 12, 6, 9, - 3, 6, 10, 13, 7, 10, 11, 14, 0, 0, + 0, 0, 0, 0, 4, 0, 1, 4, 4, 5, 5, 1, 8, 8, 5, 8, 2, + 2, 2, 5, 9, 12, 6, 9, 3, 6, 10, 13, 7, 10, 11, 14, 0, 0, }; #if CONFIG_EXT_TX DECLARE_ALIGNED(16, static const int16_t, mcol_scan_4x4_neighbors[17 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 4, 4, 8, 8, 0, 0, 1, 4, 5, 8, 9, 12, 1, 1, 2, 5, 6, 9, 10, 13, - 2, 2, 3, 6, 7, 10, 11, 14, 0, 0, + 0, 0, 0, 0, 4, 4, 8, 8, 0, 0, 1, 4, 5, 8, 9, 12, 1, + 1, 2, 5, 6, 9, 10, 13, 2, 2, 3, 6, 7, 10, 11, 14, 0, 0, }; DECLARE_ALIGNED(16, static const int16_t, mrow_scan_4x4_neighbors[17 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 1, 1, 2, 2, 0, 0, 1, 4, 2, 5, 3, 6, 4, 4, 5, 8, 6, 9, 7, 10, 8, - 8, 9, 12, 10, 13, 11, 14, 0, 0, + 0, 0, 0, 0, 1, 1, 2, 2, 0, 0, 1, 4, 2, 5, 3, 6, 4, + 4, 5, 8, 6, 9, 7, 10, 8, 8, 9, 12, 10, 13, 11, 14, 0, 0, }; #endif // CONFIG_EXT_TX DECLARE_ALIGNED(16, static const int16_t, col_scan_4x4_neighbors[17 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 4, 4, 4, 0, 8, 8, 1, 4, - 5, 8, 5, 1, 9, 12, 2, 5, 6, 9, 6, 2, - 3, 6, 10, 13, 7, 10, 11, 14, 0, 0, + 0, 0, 0, 0, 4, 4, 4, 0, 8, 8, 1, 4, 5, 8, 5, 1, 9, + 12, 2, 5, 6, 9, 6, 2, 3, 6, 10, 13, 7, 10, 11, 14, 0, 0, }; DECLARE_ALIGNED(16, static const int16_t, row_scan_4x4_neighbors[17 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 0, 1, 1, 1, 1, 4, 2, 2, - 2, 5, 4, 5, 5, 8, 3, 6, 8, 9, 6, 9, - 9, 12, 7, 10, 10, 13, 11, 14, 0, 0, + 0, 0, 0, 0, 0, 1, 1, 1, 1, 4, 2, 2, 2, 5, 4, 5, 5, + 8, 3, 6, 8, 9, 6, 9, 9, 12, 7, 10, 10, 13, 11, 14, 0, 0, }; #if CONFIG_EXT_TX DECLARE_ALIGNED(16, static const int16_t, default_scan_4x8_neighbors[33 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 0, 0, 1, 4, - 1, 1, 4, 4, 2, 5, 5, 8, - 6, 9, 2, 2, 8, 8, 3, 6, - 9, 12, 7, 10, 10, 13, 12, 12, - 13, 16, 11, 14, 14, 17, 15, 18, - 16, 16, 17, 20, 18, 21, 19, 22, - 20, 20, 21, 24, 22, 25, 23, 26, - 24, 24, 25, 28, 26, 29, 27, 30, - 0, 0 + 0, 0, 0, 0, 0, 0, 1, 4, 1, 1, 4, 4, 2, 5, 5, 8, 6, + 9, 2, 2, 8, 8, 3, 6, 9, 12, 7, 10, 10, 13, 12, 12, 13, 16, + 11, 14, 14, 17, 15, 18, 16, 16, 17, 20, 18, 21, 19, 22, 20, 20, 21, + 24, 22, 25, 23, 26, 24, 24, 25, 28, 26, 29, 27, 30, 0, 0 }; DECLARE_ALIGNED(16, static const int16_t, mcol_scan_4x8_neighbors[33 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 4, 4, 8, 8, - 12, 12, 16, 16, 20, 20, 24, 24, - 0, 0, 1, 4, 5, 8, 9, 12, - 13, 16, 17, 20, 21, 24, 25, 28, - 1, 1, 2, 5, 6, 9, 10, 13, - 14, 17, 18, 21, 22, 25, 26, 29, - 2, 2, 3, 6, 7, 10, 11, 14, - 15, 18, 19, 22, 23, 26, 27, 30, - 0, 0 + 0, 0, 0, 0, 4, 4, 8, 8, 12, 12, 16, 16, 20, 20, 24, 24, 0, + 0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 1, 1, + 2, 5, 6, 9, 10, 13, 14, 17, 18, 21, 22, 25, 26, 29, 2, 2, 3, + 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 0, 0 }; DECLARE_ALIGNED(16, static const int16_t, mrow_scan_4x8_neighbors[33 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 1, 1, 2, 2, - 0, 0, 1, 4, 2, 5, 3, 6, - 4, 4, 5, 8, 6, 9, 7, 10, - 8, 8, 9, 12, 10, 13, 11, 14, - 12, 12, 13, 16, 14, 17, 15, 18, - 16, 16, 17, 20, 18, 21, 19, 22, - 20, 20, 21, 24, 22, 25, 23, 26, - 24, 24, 25, 28, 26, 29, 27, 30, - 0, 0 + 0, 0, 0, 0, 1, 1, 2, 2, 0, 0, 1, 4, 2, 5, 3, 6, 4, + 4, 5, 8, 6, 9, 7, 10, 8, 8, 9, 12, 10, 13, 11, 14, 12, 12, + 13, 16, 14, 17, 15, 18, 16, 16, 17, 20, 18, 21, 19, 22, 20, 20, 21, + 24, 22, 25, 23, 26, 24, 24, 25, 28, 26, 29, 27, 30, 0, 0 }; DECLARE_ALIGNED(16, static const int16_t, default_scan_8x4_neighbors[33 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 0, 0, 1, 8, - 1, 1, 8, 8, 2, 9, 9, 16, - 10, 17, 2, 2, 16, 16, 3, 10, - 17, 24, 11, 18, 18, 25, 3, 3, - 4, 11, 19, 26, 12, 19, 4, 4, - 20, 27, 5, 12, 13, 20, 21, 28, - 5, 5, 6, 13, 14, 21, 22, 29, - 6, 6, 7, 14, 15, 22, 23, 30, - 0, 0 + 0, 0, 0, 0, 0, 0, 1, 8, 1, 1, 8, 8, 2, 9, 9, 16, 10, + 17, 2, 2, 16, 16, 3, 10, 17, 24, 11, 18, 18, 25, 3, 3, 4, 11, + 19, 26, 12, 19, 4, 4, 20, 27, 5, 12, 13, 20, 21, 28, 5, 5, 6, + 13, 14, 21, 22, 29, 6, 6, 7, 14, 15, 22, 23, 30, 0, 0 }; DECLARE_ALIGNED(16, static const int16_t, mcol_scan_8x4_neighbors[33 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 8, 8, 16, 16, - 0, 0, 1, 8, 9, 16, 17, 24, - 1, 1, 2, 9, 10, 17, 18, 25, - 2, 2, 3, 10, 11, 18, 19, 26, - 3, 3, 4, 11, 12, 19, 20, 27, - 4, 4, 5, 12, 13, 20, 21, 28, - 5, 5, 6, 13, 14, 21, 22, 29, - 6, 6, 7, 14, 15, 22, 23, 30, - 0, 0 + 0, 0, 0, 0, 8, 8, 16, 16, 0, 0, 1, 8, 9, 16, 17, 24, 1, + 1, 2, 9, 10, 17, 18, 25, 2, 2, 3, 10, 11, 18, 19, 26, 3, 3, + 4, 11, 12, 19, 20, 27, 4, 4, 5, 12, 13, 20, 21, 28, 5, 5, 6, + 13, 14, 21, 22, 29, 6, 6, 7, 14, 15, 22, 23, 30, 0, 0 }; DECLARE_ALIGNED(16, static const int16_t, mrow_scan_8x4_neighbors[33 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 1, 1, 2, 2, - 3, 3, 4, 4, 5, 5, 6, 6, - 0, 0, 1, 8, 2, 9, 3, 10, - 4, 11, 5, 12, 6, 13, 7, 14, - 8, 8, 9, 16, 10, 17, 11, 18, - 12, 19, 13, 20, 14, 21, 15, 22, - 16, 16, 17, 24, 18, 25, 19, 26, - 20, 27, 21, 28, 22, 29, 23, 30, - 0, 0 + 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 0, + 0, 1, 8, 2, 9, 3, 10, 4, 11, 5, 12, 6, 13, 7, 14, 8, 8, + 9, 16, 10, 17, 11, 18, 12, 19, 13, 20, 14, 21, 15, 22, 16, 16, 17, + 24, 18, 25, 19, 26, 20, 27, 21, 28, 22, 29, 23, 30, 0, 0 }; #endif // CONFIG_EXT_TX DECLARE_ALIGNED(16, static const int16_t, col_scan_8x8_neighbors[65 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 8, 8, 8, 0, 16, 16, 1, 8, - 24, 24, 9, 16, 9, 1, 32, 32, 17, 24, 2, 9, - 25, 32, 10, 17, 40, 40, 10, 2, 18, 25, 33, 40, - 3, 10, 48, 48, 11, 18, 26, 33, 11, 3, 41, 48, - 19, 26, 34, 41, 4, 11, 27, 34, 12, 19, 49, 56, - 42, 49, 20, 27, 12, 4, 35, 42, 5, 12, 28, 35, - 50, 57, 43, 50, 13, 20, 36, 43, 13, 5, 21, 28, - 51, 58, 29, 36, 6, 13, 44, 51, 14, 21, 14, 6, - 37, 44, 52, 59, 22, 29, 7, 14, 30, 37, 45, 52, - 15, 22, 38, 45, 23, 30, 53, 60, 31, 38, 46, 53, - 39, 46, 54, 61, 47, 54, 55, 62, 0, 0, + 0, 0, 0, 0, 8, 8, 8, 0, 16, 16, 1, 8, 24, 24, 9, 16, 9, 1, 32, + 32, 17, 24, 2, 9, 25, 32, 10, 17, 40, 40, 10, 2, 18, 25, 33, 40, 3, 10, + 48, 48, 11, 18, 26, 33, 11, 3, 41, 48, 19, 26, 34, 41, 4, 11, 27, 34, 12, + 19, 49, 56, 42, 49, 20, 27, 12, 4, 35, 42, 5, 12, 28, 35, 50, 57, 43, 50, + 13, 20, 36, 43, 13, 5, 21, 28, 51, 58, 29, 36, 6, 13, 44, 51, 14, 21, 14, + 6, 37, 44, 52, 59, 22, 29, 7, 14, 30, 37, 45, 52, 15, 22, 38, 45, 23, 30, + 53, 60, 31, 38, 46, 53, 39, 46, 54, 61, 47, 54, 55, 62, 0, 0, }; #if CONFIG_EXT_TX DECLARE_ALIGNED(16, static const int16_t, mcol_scan_8x8_neighbors[65 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 8, 8, 16, 16, 24, 24, 32, 32, 40, 40, 48, 48, 0, 0, 1, 8, 9, - 16, 17, 24, 25, 32, 33, 40, 41, 48, 49, 56, 1, 1, 2, 9, 10, 17, 18, 25, - 26, 33, 34, 41, 42, 49, 50, 57, 2, 2, 3, 10, 11, 18, 19, 26, 27, 34, 35, - 42, 43, 50, 51, 58, 3, 3, 4, 11, 12, 19, 20, 27, 28, 35, 36, 43, 44, 51, - 52, 59, 4, 4, 5, 12, 13, 20, 21, 28, 29, 36, 37, 44, 45, 52, 53, 60, 5, - 5, 6, 13, 14, 21, 22, 29, 30, 37, 38, 45, 46, 53, 54, 61, 6, 6, 7, 14, - 15, 22, 23, 30, 31, 38, 39, 46, 47, 54, 55, 62, 0, 0, + 0, 0, 0, 0, 8, 8, 16, 16, 24, 24, 32, 32, 40, 40, 48, 48, 0, 0, 1, + 8, 9, 16, 17, 24, 25, 32, 33, 40, 41, 48, 49, 56, 1, 1, 2, 9, 10, 17, + 18, 25, 26, 33, 34, 41, 42, 49, 50, 57, 2, 2, 3, 10, 11, 18, 19, 26, 27, + 34, 35, 42, 43, 50, 51, 58, 3, 3, 4, 11, 12, 19, 20, 27, 28, 35, 36, 43, + 44, 51, 52, 59, 4, 4, 5, 12, 13, 20, 21, 28, 29, 36, 37, 44, 45, 52, 53, + 60, 5, 5, 6, 13, 14, 21, 22, 29, 30, 37, 38, 45, 46, 53, 54, 61, 6, 6, + 7, 14, 15, 22, 23, 30, 31, 38, 39, 46, 47, 54, 55, 62, 0, 0, }; DECLARE_ALIGNED(16, static const int16_t, mrow_scan_8x8_neighbors[65 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 0, 0, 1, 8, 2, 9, 3, 10, - 4, 11, 5, 12, 6, 13, 7, 14, 8, 8, 9, 16, 10, 17, 11, 18, 12, 19, 13, 20, - 14, 21, 15, 22, 16, 16, 17, 24, 18, 25, 19, 26, 20, 27, 21, 28, 22, 29, - 23, 30, 24, 24, 25, 32, 26, 33, 27, 34, 28, 35, 29, 36, 30, 37, 31, 38, - 32, 32, 33, 40, 34, 41, 35, 42, 36, 43, 37, 44, 38, 45, 39, 46, 40, 40, - 41, 48, 42, 49, 43, 50, 44, 51, 45, 52, 46, 53, 47, 54, 48, 48, 49, 56, - 50, 57, 51, 58, 52, 59, 53, 60, 54, 61, 55, 62, 0, 0, + 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 0, 0, 1, + 8, 2, 9, 3, 10, 4, 11, 5, 12, 6, 13, 7, 14, 8, 8, 9, 16, 10, 17, + 11, 18, 12, 19, 13, 20, 14, 21, 15, 22, 16, 16, 17, 24, 18, 25, 19, 26, 20, + 27, 21, 28, 22, 29, 23, 30, 24, 24, 25, 32, 26, 33, 27, 34, 28, 35, 29, 36, + 30, 37, 31, 38, 32, 32, 33, 40, 34, 41, 35, 42, 36, 43, 37, 44, 38, 45, 39, + 46, 40, 40, 41, 48, 42, 49, 43, 50, 44, 51, 45, 52, 46, 53, 47, 54, 48, 48, + 49, 56, 50, 57, 51, 58, 52, 59, 53, 60, 54, 61, 55, 62, 0, 0, }; #endif // CONFIG_EXT_TX DECLARE_ALIGNED(16, static const int16_t, row_scan_8x8_neighbors[65 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 1, 1, 0, 1, 1, 8, 2, 2, - 8, 9, 2, 9, 3, 3, 9, 16, 3, 10, 16, 17, - 4, 4, 10, 17, 17, 24, 4, 11, 11, 18, 18, 25, - 24, 25, 5, 5, 5, 12, 12, 19, 25, 32, 19, 26, - 6, 6, 26, 33, 32, 33, 13, 20, 20, 27, 33, 40, - 6, 13, 27, 34, 40, 41, 34, 41, 21, 28, 28, 35, - 41, 48, 14, 21, 35, 42, 7, 14, 48, 49, 29, 36, - 42, 49, 36, 43, 22, 29, 49, 56, 15, 22, 43, 50, - 50, 57, 37, 44, 30, 37, 44, 51, 23, 30, 51, 58, - 45, 52, 38, 45, 52, 59, 31, 38, 53, 60, 39, 46, - 46, 53, 47, 54, 54, 61, 55, 62, 0, 0, + 0, 0, 0, 0, 1, 1, 0, 1, 1, 8, 2, 2, 8, 9, 2, 9, 3, 3, 9, + 16, 3, 10, 16, 17, 4, 4, 10, 17, 17, 24, 4, 11, 11, 18, 18, 25, 24, 25, + 5, 5, 5, 12, 12, 19, 25, 32, 19, 26, 6, 6, 26, 33, 32, 33, 13, 20, 20, + 27, 33, 40, 6, 13, 27, 34, 40, 41, 34, 41, 21, 28, 28, 35, 41, 48, 14, 21, + 35, 42, 7, 14, 48, 49, 29, 36, 42, 49, 36, 43, 22, 29, 49, 56, 15, 22, 43, + 50, 50, 57, 37, 44, 30, 37, 44, 51, 23, 30, 51, 58, 45, 52, 38, 45, 52, 59, + 31, 38, 53, 60, 39, 46, 46, 53, 47, 54, 54, 61, 55, 62, 0, 0, }; DECLARE_ALIGNED(16, static const int16_t, default_scan_8x8_neighbors[65 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 8, 0, 8, 8, 1, 8, 9, 1, - 9, 16, 16, 17, 2, 9, 10, 2, 10, 17, 17, 24, - 24, 25, 3, 10, 11, 3, 18, 25, 25, 32, 11, 18, - 32, 33, 4, 11, 26, 33, 19, 26, 12, 4, 33, 40, - 12, 19, 40, 41, 5, 12, 27, 34, 34, 41, 20, 27, - 13, 20, 13, 5, 41, 48, 48, 49, 28, 35, 35, 42, - 21, 28, 6, 6, 6, 13, 42, 49, 49, 56, 36, 43, - 14, 21, 29, 36, 7, 14, 43, 50, 50, 57, 22, 29, - 37, 44, 15, 22, 44, 51, 51, 58, 30, 37, 23, 30, - 52, 59, 45, 52, 38, 45, 31, 38, 53, 60, 46, 53, - 39, 46, 54, 61, 47, 54, 55, 62, 0, 0, + 0, 0, 0, 0, 8, 0, 8, 8, 1, 8, 9, 1, 9, 16, 16, 17, 2, 9, 10, + 2, 10, 17, 17, 24, 24, 25, 3, 10, 11, 3, 18, 25, 25, 32, 11, 18, 32, 33, + 4, 11, 26, 33, 19, 26, 12, 4, 33, 40, 12, 19, 40, 41, 5, 12, 27, 34, 34, + 41, 20, 27, 13, 20, 13, 5, 41, 48, 48, 49, 28, 35, 35, 42, 21, 28, 6, 6, + 6, 13, 42, 49, 49, 56, 36, 43, 14, 21, 29, 36, 7, 14, 43, 50, 50, 57, 22, + 29, 37, 44, 15, 22, 44, 51, 51, 58, 30, 37, 23, 30, 52, 59, 45, 52, 38, 45, + 31, 38, 53, 60, 46, 53, 39, 46, 54, 61, 47, 54, 55, 62, 0, 0, }; #if CONFIG_EXT_TX DECLARE_ALIGNED(16, static const int16_t, mcol_scan_16x16_neighbors[257 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 16, 16, 32, 32, 48, 48, 64, 64, 80, 80, 96, 96, - 112, 112, 128, 128, 144, 144, 160, 160, 176, 176, 192, 192, 208, - 208, 224, 224, - 0, 0, 1, 16, 17, 32, 33, 48, 49, 64, 65, 80, 81, 96, 97, 112, - 113, 128, 129, 144, 145, 160, 161, 176, 177, 192, 193, 208, 209, - 224, 225, 240, - 1, 1, 2, 17, 18, 33, 34, 49, 50, 65, 66, 81, 82, 97, 98, 113, - 114, 129, 130, 145, 146, 161, 162, 177, 178, 193, 194, 209, 210, - 225, 226, 241, - 2, 2, 3, 18, 19, 34, 35, 50, 51, 66, 67, 82, 83, 98, 99, 114, - 115, 130, 131, 146, 147, 162, 163, 178, 179, 194, 195, 210, 211, - 226, 227, 242, - 3, 3, 4, 19, 20, 35, 36, 51, 52, 67, 68, 83, 84, 99, 100, 115, - 116, 131, 132, 147, 148, 163, 164, 179, 180, 195, 196, 211, 212, - 227, 228, 243, - 4, 4, 5, 20, 21, 36, 37, 52, 53, 68, 69, 84, 85, 100, 101, 116, - 117, 132, 133, 148, 149, 164, 165, 180, 181, 196, 197, 212, 213, - 228, 229, 244, - 5, 5, 6, 21, 22, 37, 38, 53, 54, 69, 70, 85, 86, 101, 102, 117, - 118, 133, 134, 149, 150, 165, 166, 181, 182, 197, 198, 213, 214, - 229, 230, 245, - 6, 6, 7, 22, 23, 38, 39, 54, 55, 70, 71, 86, 87, 102, 103, 118, - 119, 134, 135, 150, 151, 166, 167, 182, 183, 198, 199, 214, 215, - 230, 231, 246, - 7, 7, 8, 23, 24, 39, 40, 55, 56, 71, 72, 87, 88, 103, 104, 119, - 120, 135, 136, 151, 152, 167, 168, 183, 184, 199, 200, 215, 216, - 231, 232, 247, - 8, 8, 9, 24, 25, 40, 41, 56, 57, 72, 73, 88, 89, 104, 105, 120, - 121, 136, 137, 152, 153, 168, 169, 184, 185, 200, 201, 216, 217, - 232, 233, 248, - 9, 9, 10, 25, 26, 41, 42, 57, 58, 73, 74, 89, 90, 105, 106, 121, - 122, 137, 138, 153, 154, 169, 170, 185, 186, 201, 202, 217, 218, - 233, 234, 249, - 10, 10, 11, 26, 27, 42, 43, 58, 59, 74, 75, 90, 91, 106, 107, 122, - 123, 138, 139, 154, 155, 170, 171, 186, 187, 202, 203, 218, 219, - 234, 235, 250, - 11, 11, 12, 27, 28, 43, 44, 59, 60, 75, 76, 91, 92, 107, 108, 123, - 124, 139, 140, 155, 156, 171, 172, 187, 188, 203, 204, 219, 220, - 235, 236, 251, - 12, 12, 13, 28, 29, 44, 45, 60, 61, 76, 77, 92, 93, 108, 109, 124, - 125, 140, 141, 156, 157, 172, 173, 188, 189, 204, 205, 220, 221, - 236, 237, 252, - 13, 13, 14, 29, 30, 45, 46, 61, 62, 77, 78, 93, 94, 109, 110, 125, - 126, 141, 142, 157, 158, 173, 174, 189, 190, 205, 206, 221, 222, - 237, 238, 253, - 14, 14, 15, 30, 31, 46, 47, 62, 63, 78, 79, 94, 95, 110, 111, 126, - 127, 142, 143, 158, 159, 174, 175, 190, 191, 206, 207, 222, 223, - 238, 239, 254, - 0, 0, + 0, 0, 0, 0, 16, 16, 32, 32, 48, 48, 64, 64, 80, 80, 96, + 96, 112, 112, 128, 128, 144, 144, 160, 160, 176, 176, 192, 192, 208, 208, + 224, 224, 0, 0, 1, 16, 17, 32, 33, 48, 49, 64, 65, 80, 81, + 96, 97, 112, 113, 128, 129, 144, 145, 160, 161, 176, 177, 192, 193, 208, + 209, 224, 225, 240, 1, 1, 2, 17, 18, 33, 34, 49, 50, 65, 66, + 81, 82, 97, 98, 113, 114, 129, 130, 145, 146, 161, 162, 177, 178, 193, + 194, 209, 210, 225, 226, 241, 2, 2, 3, 18, 19, 34, 35, 50, 51, + 66, 67, 82, 83, 98, 99, 114, 115, 130, 131, 146, 147, 162, 163, 178, + 179, 194, 195, 210, 211, 226, 227, 242, 3, 3, 4, 19, 20, 35, 36, + 51, 52, 67, 68, 83, 84, 99, 100, 115, 116, 131, 132, 147, 148, 163, + 164, 179, 180, 195, 196, 211, 212, 227, 228, 243, 4, 4, 5, 20, 21, + 36, 37, 52, 53, 68, 69, 84, 85, 100, 101, 116, 117, 132, 133, 148, + 149, 164, 165, 180, 181, 196, 197, 212, 213, 228, 229, 244, 5, 5, 6, + 21, 22, 37, 38, 53, 54, 69, 70, 85, 86, 101, 102, 117, 118, 133, + 134, 149, 150, 165, 166, 181, 182, 197, 198, 213, 214, 229, 230, 245, 6, + 6, 7, 22, 23, 38, 39, 54, 55, 70, 71, 86, 87, 102, 103, 118, + 119, 134, 135, 150, 151, 166, 167, 182, 183, 198, 199, 214, 215, 230, 231, + 246, 7, 7, 8, 23, 24, 39, 40, 55, 56, 71, 72, 87, 88, 103, + 104, 119, 120, 135, 136, 151, 152, 167, 168, 183, 184, 199, 200, 215, 216, + 231, 232, 247, 8, 8, 9, 24, 25, 40, 41, 56, 57, 72, 73, 88, + 89, 104, 105, 120, 121, 136, 137, 152, 153, 168, 169, 184, 185, 200, 201, + 216, 217, 232, 233, 248, 9, 9, 10, 25, 26, 41, 42, 57, 58, 73, + 74, 89, 90, 105, 106, 121, 122, 137, 138, 153, 154, 169, 170, 185, 186, + 201, 202, 217, 218, 233, 234, 249, 10, 10, 11, 26, 27, 42, 43, 58, + 59, 74, 75, 90, 91, 106, 107, 122, 123, 138, 139, 154, 155, 170, 171, + 186, 187, 202, 203, 218, 219, 234, 235, 250, 11, 11, 12, 27, 28, 43, + 44, 59, 60, 75, 76, 91, 92, 107, 108, 123, 124, 139, 140, 155, 156, + 171, 172, 187, 188, 203, 204, 219, 220, 235, 236, 251, 12, 12, 13, 28, + 29, 44, 45, 60, 61, 76, 77, 92, 93, 108, 109, 124, 125, 140, 141, + 156, 157, 172, 173, 188, 189, 204, 205, 220, 221, 236, 237, 252, 13, 13, + 14, 29, 30, 45, 46, 61, 62, 77, 78, 93, 94, 109, 110, 125, 126, + 141, 142, 157, 158, 173, 174, 189, 190, 205, 206, 221, 222, 237, 238, 253, + 14, 14, 15, 30, 31, 46, 47, 62, 63, 78, 79, 94, 95, 110, 111, + 126, 127, 142, 143, 158, 159, 174, 175, 190, 191, 206, 207, 222, 223, 238, + 239, 254, 0, 0, }; DECLARE_ALIGNED(16, static const int16_t, mrow_scan_16x16_neighbors[257 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, - 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, - 0, 0, 1, 16, 2, 17, 3, 18, 4, 19, 5, 20, 6, 21, 7, 22, - 8, 23, 9, 24, 10, 25, 11, 26, 12, 27, 13, 28, 14, 29, 15, 30, - 16, 16, 17, 32, 18, 33, 19, 34, 20, 35, 21, 36, 22, 37, 23, 38, - 24, 39, 25, 40, 26, 41, 27, 42, 28, 43, 29, 44, 30, 45, 31, 46, - 32, 32, 33, 48, 34, 49, 35, 50, 36, 51, 37, 52, 38, 53, 39, 54, - 40, 55, 41, 56, 42, 57, 43, 58, 44, 59, 45, 60, 46, 61, 47, 62, - 48, 48, 49, 64, 50, 65, 51, 66, 52, 67, 53, 68, 54, 69, 55, 70, - 56, 71, 57, 72, 58, 73, 59, 74, 60, 75, 61, 76, 62, 77, 63, 78, - 64, 64, 65, 80, 66, 81, 67, 82, 68, 83, 69, 84, 70, 85, 71, 86, - 72, 87, 73, 88, 74, 89, 75, 90, 76, 91, 77, 92, 78, 93, 79, 94, - 80, 80, 81, 96, 82, 97, 83, 98, 84, 99, 85, 100, 86, 101, 87, 102, - 88, 103, 89, 104, 90, 105, 91, 106, 92, 107, 93, 108, 94, 109, 95, 110, - 96, 96, 97, 112, 98, 113, 99, 114, 100, 115, 101, 116, 102, 117, - 103, 118, - 104, 119, 105, 120, 106, 121, 107, 122, 108, 123, 109, 124, 110, - 125, 111, 126, - 112, 112, 113, 128, 114, 129, 115, 130, 116, 131, 117, 132, 118, - 133, 119, 134, - 120, 135, 121, 136, 122, 137, 123, 138, 124, 139, 125, 140, 126, - 141, 127, 142, - 128, 128, 129, 144, 130, 145, 131, 146, 132, 147, 133, 148, 134, - 149, 135, 150, - 136, 151, 137, 152, 138, 153, 139, 154, 140, 155, 141, 156, 142, - 157, 143, 158, - 144, 144, 145, 160, 146, 161, 147, 162, 148, 163, 149, 164, 150, - 165, 151, 166, - 152, 167, 153, 168, 154, 169, 155, 170, 156, 171, 157, 172, 158, - 173, 159, 174, - 160, 160, 161, 176, 162, 177, 163, 178, 164, 179, 165, 180, 166, - 181, 167, 182, - 168, 183, 169, 184, 170, 185, 171, 186, 172, 187, 173, 188, 174, - 189, 175, 190, - 176, 176, 177, 192, 178, 193, 179, 194, 180, 195, 181, 196, 182, - 197, 183, 198, - 184, 199, 185, 200, 186, 201, 187, 202, 188, 203, 189, 204, 190, - 205, 191, 206, - 192, 192, 193, 208, 194, 209, 195, 210, 196, 211, 197, 212, 198, - 213, 199, 214, - 200, 215, 201, 216, 202, 217, 203, 218, 204, 219, 205, 220, 206, - 221, 207, 222, - 208, 208, 209, 224, 210, 225, 211, 226, 212, 227, 213, 228, 214, - 229, 215, 230, - 216, 231, 217, 232, 218, 233, 219, 234, 220, 235, 221, 236, 222, - 237, 223, 238, - 224, 224, 225, 240, 226, 241, 227, 242, 228, 243, 229, 244, 230, - 245, 231, 246, - 232, 247, 233, 248, 234, 249, 235, 250, 236, 251, 237, 252, 238, - 253, 239, 254, - 0, 0, + 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, + 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, + 14, 14, 0, 0, 1, 16, 2, 17, 3, 18, 4, 19, 5, 20, 6, + 21, 7, 22, 8, 23, 9, 24, 10, 25, 11, 26, 12, 27, 13, 28, + 14, 29, 15, 30, 16, 16, 17, 32, 18, 33, 19, 34, 20, 35, 21, + 36, 22, 37, 23, 38, 24, 39, 25, 40, 26, 41, 27, 42, 28, 43, + 29, 44, 30, 45, 31, 46, 32, 32, 33, 48, 34, 49, 35, 50, 36, + 51, 37, 52, 38, 53, 39, 54, 40, 55, 41, 56, 42, 57, 43, 58, + 44, 59, 45, 60, 46, 61, 47, 62, 48, 48, 49, 64, 50, 65, 51, + 66, 52, 67, 53, 68, 54, 69, 55, 70, 56, 71, 57, 72, 58, 73, + 59, 74, 60, 75, 61, 76, 62, 77, 63, 78, 64, 64, 65, 80, 66, + 81, 67, 82, 68, 83, 69, 84, 70, 85, 71, 86, 72, 87, 73, 88, + 74, 89, 75, 90, 76, 91, 77, 92, 78, 93, 79, 94, 80, 80, 81, + 96, 82, 97, 83, 98, 84, 99, 85, 100, 86, 101, 87, 102, 88, 103, + 89, 104, 90, 105, 91, 106, 92, 107, 93, 108, 94, 109, 95, 110, 96, + 96, 97, 112, 98, 113, 99, 114, 100, 115, 101, 116, 102, 117, 103, 118, + 104, 119, 105, 120, 106, 121, 107, 122, 108, 123, 109, 124, 110, 125, 111, + 126, 112, 112, 113, 128, 114, 129, 115, 130, 116, 131, 117, 132, 118, 133, + 119, 134, 120, 135, 121, 136, 122, 137, 123, 138, 124, 139, 125, 140, 126, + 141, 127, 142, 128, 128, 129, 144, 130, 145, 131, 146, 132, 147, 133, 148, + 134, 149, 135, 150, 136, 151, 137, 152, 138, 153, 139, 154, 140, 155, 141, + 156, 142, 157, 143, 158, 144, 144, 145, 160, 146, 161, 147, 162, 148, 163, + 149, 164, 150, 165, 151, 166, 152, 167, 153, 168, 154, 169, 155, 170, 156, + 171, 157, 172, 158, 173, 159, 174, 160, 160, 161, 176, 162, 177, 163, 178, + 164, 179, 165, 180, 166, 181, 167, 182, 168, 183, 169, 184, 170, 185, 171, + 186, 172, 187, 173, 188, 174, 189, 175, 190, 176, 176, 177, 192, 178, 193, + 179, 194, 180, 195, 181, 196, 182, 197, 183, 198, 184, 199, 185, 200, 186, + 201, 187, 202, 188, 203, 189, 204, 190, 205, 191, 206, 192, 192, 193, 208, + 194, 209, 195, 210, 196, 211, 197, 212, 198, 213, 199, 214, 200, 215, 201, + 216, 202, 217, 203, 218, 204, 219, 205, 220, 206, 221, 207, 222, 208, 208, + 209, 224, 210, 225, 211, 226, 212, 227, 213, 228, 214, 229, 215, 230, 216, + 231, 217, 232, 218, 233, 219, 234, 220, 235, 221, 236, 222, 237, 223, 238, + 224, 224, 225, 240, 226, 241, 227, 242, 228, 243, 229, 244, 230, 245, 231, + 246, 232, 247, 233, 248, 234, 249, 235, 250, 236, 251, 237, 252, 238, 253, + 239, 254, 0, 0, }; #endif // CONFIG_EXT_TX DECLARE_ALIGNED(16, static const int16_t, col_scan_16x16_neighbors[257 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 16, 16, 32, 32, 16, 0, 48, 48, - 1, 16, 64, 64, 17, 32, 80, 80, 33, 48, 17, 1, - 49, 64, 96, 96, 2, 17, 65, 80, 18, 33, 112, 112, - 34, 49, 81, 96, 18, 2, 50, 65, 128, 128, 3, 18, - 97, 112, 19, 34, 66, 81, 144, 144, 82, 97, 35, 50, - 113, 128, 19, 3, 51, 66, 160, 160, 4, 19, 98, 113, - 129, 144, 67, 82, 20, 35, 83, 98, 114, 129, 36, 51, - 176, 176, 20, 4, 145, 160, 52, 67, 99, 114, 5, 20, - 130, 145, 68, 83, 192, 192, 161, 176, 21, 36, 115, 130, - 84, 99, 37, 52, 146, 161, 208, 208, 53, 68, 21, 5, - 100, 115, 177, 192, 131, 146, 69, 84, 6, 21, 224, 224, - 116, 131, 22, 37, 162, 177, 85, 100, 147, 162, 38, 53, - 193, 208, 101, 116, 54, 69, 22, 6, 132, 147, 178, 193, - 70, 85, 163, 178, 209, 224, 7, 22, 117, 132, 23, 38, - 148, 163, 23, 7, 86, 101, 194, 209, 225, 240, 39, 54, - 179, 194, 102, 117, 133, 148, 55, 70, 164, 179, 8, 23, - 71, 86, 210, 225, 118, 133, 149, 164, 195, 210, 24, 39, - 87, 102, 40, 55, 56, 71, 134, 149, 180, 195, 226, 241, - 103, 118, 24, 8, 165, 180, 211, 226, 72, 87, 150, 165, - 9, 24, 119, 134, 25, 40, 88, 103, 196, 211, 41, 56, - 135, 150, 181, 196, 104, 119, 57, 72, 227, 242, 166, 181, - 120, 135, 151, 166, 197, 212, 73, 88, 25, 9, 212, 227, - 89, 104, 136, 151, 182, 197, 10, 25, 26, 41, 105, 120, - 167, 182, 228, 243, 152, 167, 42, 57, 121, 136, 213, 228, - 58, 73, 198, 213, 74, 89, 137, 152, 183, 198, 168, 183, - 26, 10, 90, 105, 229, 244, 11, 26, 106, 121, 214, 229, - 153, 168, 27, 42, 199, 214, 43, 58, 184, 199, 122, 137, - 169, 184, 230, 245, 59, 74, 27, 11, 75, 90, 138, 153, - 200, 215, 215, 230, 91, 106, 12, 27, 28, 43, 185, 200, - 107, 122, 154, 169, 44, 59, 231, 246, 216, 231, 60, 75, - 123, 138, 28, 12, 76, 91, 201, 216, 170, 185, 232, 247, - 139, 154, 92, 107, 13, 28, 108, 123, 29, 44, 186, 201, - 217, 232, 155, 170, 45, 60, 29, 13, 61, 76, 124, 139, - 14, 14, 233, 248, 77, 92, 14, 29, 171, 186, 140, 155, - 202, 217, 30, 45, 93, 108, 109, 124, 46, 61, 156, 171, - 62, 77, 187, 202, 15, 30, 125, 140, 218, 233, 78, 93, - 31, 46, 172, 187, 47, 62, 141, 156, 94, 109, 234, 249, - 203, 218, 63, 78, 110, 125, 188, 203, 157, 172, 126, 141, - 79, 94, 173, 188, 95, 110, 219, 234, 142, 157, 204, 219, - 235, 250, 111, 126, 158, 173, 127, 142, 189, 204, 220, 235, - 143, 158, 174, 189, 205, 220, 236, 251, 159, 174, 190, 205, - 221, 236, 175, 190, 237, 252, 206, 221, 222, 237, 191, 206, - 238, 253, 207, 222, 223, 238, 239, 254, 0, 0, + 0, 0, 0, 0, 16, 16, 32, 32, 16, 0, 48, 48, 1, 16, 64, + 64, 17, 32, 80, 80, 33, 48, 17, 1, 49, 64, 96, 96, 2, 17, + 65, 80, 18, 33, 112, 112, 34, 49, 81, 96, 18, 2, 50, 65, 128, + 128, 3, 18, 97, 112, 19, 34, 66, 81, 144, 144, 82, 97, 35, 50, + 113, 128, 19, 3, 51, 66, 160, 160, 4, 19, 98, 113, 129, 144, 67, + 82, 20, 35, 83, 98, 114, 129, 36, 51, 176, 176, 20, 4, 145, 160, + 52, 67, 99, 114, 5, 20, 130, 145, 68, 83, 192, 192, 161, 176, 21, + 36, 115, 130, 84, 99, 37, 52, 146, 161, 208, 208, 53, 68, 21, 5, + 100, 115, 177, 192, 131, 146, 69, 84, 6, 21, 224, 224, 116, 131, 22, + 37, 162, 177, 85, 100, 147, 162, 38, 53, 193, 208, 101, 116, 54, 69, + 22, 6, 132, 147, 178, 193, 70, 85, 163, 178, 209, 224, 7, 22, 117, + 132, 23, 38, 148, 163, 23, 7, 86, 101, 194, 209, 225, 240, 39, 54, + 179, 194, 102, 117, 133, 148, 55, 70, 164, 179, 8, 23, 71, 86, 210, + 225, 118, 133, 149, 164, 195, 210, 24, 39, 87, 102, 40, 55, 56, 71, + 134, 149, 180, 195, 226, 241, 103, 118, 24, 8, 165, 180, 211, 226, 72, + 87, 150, 165, 9, 24, 119, 134, 25, 40, 88, 103, 196, 211, 41, 56, + 135, 150, 181, 196, 104, 119, 57, 72, 227, 242, 166, 181, 120, 135, 151, + 166, 197, 212, 73, 88, 25, 9, 212, 227, 89, 104, 136, 151, 182, 197, + 10, 25, 26, 41, 105, 120, 167, 182, 228, 243, 152, 167, 42, 57, 121, + 136, 213, 228, 58, 73, 198, 213, 74, 89, 137, 152, 183, 198, 168, 183, + 26, 10, 90, 105, 229, 244, 11, 26, 106, 121, 214, 229, 153, 168, 27, + 42, 199, 214, 43, 58, 184, 199, 122, 137, 169, 184, 230, 245, 59, 74, + 27, 11, 75, 90, 138, 153, 200, 215, 215, 230, 91, 106, 12, 27, 28, + 43, 185, 200, 107, 122, 154, 169, 44, 59, 231, 246, 216, 231, 60, 75, + 123, 138, 28, 12, 76, 91, 201, 216, 170, 185, 232, 247, 139, 154, 92, + 107, 13, 28, 108, 123, 29, 44, 186, 201, 217, 232, 155, 170, 45, 60, + 29, 13, 61, 76, 124, 139, 14, 14, 233, 248, 77, 92, 14, 29, 171, + 186, 140, 155, 202, 217, 30, 45, 93, 108, 109, 124, 46, 61, 156, 171, + 62, 77, 187, 202, 15, 30, 125, 140, 218, 233, 78, 93, 31, 46, 172, + 187, 47, 62, 141, 156, 94, 109, 234, 249, 203, 218, 63, 78, 110, 125, + 188, 203, 157, 172, 126, 141, 79, 94, 173, 188, 95, 110, 219, 234, 142, + 157, 204, 219, 235, 250, 111, 126, 158, 173, 127, 142, 189, 204, 220, 235, + 143, 158, 174, 189, 205, 220, 236, 251, 159, 174, 190, 205, 221, 236, 175, + 190, 237, 252, 206, 221, 222, 237, 191, 206, 238, 253, 207, 222, 223, 238, + 239, 254, 0, 0, }; DECLARE_ALIGNED(16, static const int16_t, row_scan_16x16_neighbors[257 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 1, 1, 0, 1, 2, 2, 1, 16, - 3, 3, 2, 17, 16, 17, 4, 4, 17, 32, 3, 18, - 5, 5, 18, 33, 32, 33, 4, 19, 33, 48, 6, 6, - 19, 34, 5, 20, 34, 49, 48, 49, 7, 7, 20, 35, - 49, 64, 6, 21, 35, 50, 21, 36, 64, 65, 8, 8, - 50, 65, 36, 51, 7, 22, 22, 37, 65, 80, 51, 66, - 9, 9, 37, 52, 8, 23, 66, 81, 52, 67, 80, 81, - 23, 38, 10, 10, 38, 53, 67, 82, 81, 96, 53, 68, - 9, 24, 82, 97, 68, 83, 24, 39, 96, 97, 39, 54, - 11, 11, 54, 69, 83, 98, 97, 112, 69, 84, 10, 25, - 25, 40, 40, 55, 98, 113, 84, 99, 12, 12, 55, 70, - 112, 113, 70, 85, 11, 26, 99, 114, 85, 100, 113, 128, - 26, 41, 41, 56, 56, 71, 100, 115, 13, 13, 71, 86, - 114, 129, 86, 101, 128, 129, 57, 72, 115, 130, 101, 116, - 12, 27, 42, 57, 14, 14, 72, 87, 27, 42, 129, 144, - 87, 102, 116, 131, 130, 145, 102, 117, 58, 73, 144, 145, - 73, 88, 117, 132, 88, 103, 13, 28, 43, 58, 131, 146, - 103, 118, 28, 43, 145, 160, 132, 147, 74, 89, 89, 104, - 118, 133, 146, 161, 104, 119, 160, 161, 59, 74, 119, 134, - 133, 148, 14, 29, 44, 59, 147, 162, 161, 176, 29, 44, - 105, 120, 75, 90, 90, 105, 148, 163, 162, 177, 134, 149, - 176, 177, 120, 135, 149, 164, 163, 178, 15, 30, 135, 150, - 177, 192, 60, 75, 106, 121, 45, 60, 121, 136, 178, 193, - 91, 106, 136, 151, 164, 179, 192, 193, 30, 45, 150, 165, - 151, 166, 179, 194, 76, 91, 165, 180, 122, 137, 193, 208, - 107, 122, 137, 152, 208, 209, 180, 195, 61, 76, 152, 167, - 194, 209, 166, 181, 224, 224, 92, 107, 181, 196, 46, 61, - 138, 153, 209, 224, 167, 182, 153, 168, 195, 210, 31, 46, - 123, 138, 77, 92, 168, 183, 210, 225, 196, 211, 225, 240, - 182, 197, 154, 169, 108, 123, 139, 154, 183, 198, 62, 77, - 197, 212, 169, 184, 93, 108, 211, 226, 184, 199, 47, 62, - 212, 227, 226, 241, 124, 139, 198, 213, 155, 170, 170, 185, - 140, 155, 213, 228, 227, 242, 109, 124, 78, 93, 185, 200, - 228, 243, 199, 214, 200, 215, 214, 229, 125, 140, 171, 186, - 186, 201, 63, 78, 156, 171, 94, 109, 141, 156, 229, 244, - 201, 216, 215, 230, 79, 94, 230, 245, 216, 231, 110, 125, - 187, 202, 231, 246, 217, 232, 157, 172, 202, 217, 126, 141, - 95, 110, 142, 157, 172, 187, 232, 247, 111, 126, 218, 233, - 203, 218, 233, 248, 173, 188, 188, 203, 127, 142, 158, 173, - 143, 158, 234, 249, 219, 234, 189, 204, 204, 219, 159, 174, - 174, 189, 235, 250, 205, 220, 175, 190, 190, 205, 220, 235, - 191, 206, 221, 236, 236, 251, 206, 221, 237, 252, 207, 222, - 222, 237, 223, 238, 238, 253, 239, 254, 0, 0, + 0, 0, 0, 0, 1, 1, 0, 1, 2, 2, 1, 16, 3, 3, 2, + 17, 16, 17, 4, 4, 17, 32, 3, 18, 5, 5, 18, 33, 32, 33, + 4, 19, 33, 48, 6, 6, 19, 34, 5, 20, 34, 49, 48, 49, 7, + 7, 20, 35, 49, 64, 6, 21, 35, 50, 21, 36, 64, 65, 8, 8, + 50, 65, 36, 51, 7, 22, 22, 37, 65, 80, 51, 66, 9, 9, 37, + 52, 8, 23, 66, 81, 52, 67, 80, 81, 23, 38, 10, 10, 38, 53, + 67, 82, 81, 96, 53, 68, 9, 24, 82, 97, 68, 83, 24, 39, 96, + 97, 39, 54, 11, 11, 54, 69, 83, 98, 97, 112, 69, 84, 10, 25, + 25, 40, 40, 55, 98, 113, 84, 99, 12, 12, 55, 70, 112, 113, 70, + 85, 11, 26, 99, 114, 85, 100, 113, 128, 26, 41, 41, 56, 56, 71, + 100, 115, 13, 13, 71, 86, 114, 129, 86, 101, 128, 129, 57, 72, 115, + 130, 101, 116, 12, 27, 42, 57, 14, 14, 72, 87, 27, 42, 129, 144, + 87, 102, 116, 131, 130, 145, 102, 117, 58, 73, 144, 145, 73, 88, 117, + 132, 88, 103, 13, 28, 43, 58, 131, 146, 103, 118, 28, 43, 145, 160, + 132, 147, 74, 89, 89, 104, 118, 133, 146, 161, 104, 119, 160, 161, 59, + 74, 119, 134, 133, 148, 14, 29, 44, 59, 147, 162, 161, 176, 29, 44, + 105, 120, 75, 90, 90, 105, 148, 163, 162, 177, 134, 149, 176, 177, 120, + 135, 149, 164, 163, 178, 15, 30, 135, 150, 177, 192, 60, 75, 106, 121, + 45, 60, 121, 136, 178, 193, 91, 106, 136, 151, 164, 179, 192, 193, 30, + 45, 150, 165, 151, 166, 179, 194, 76, 91, 165, 180, 122, 137, 193, 208, + 107, 122, 137, 152, 208, 209, 180, 195, 61, 76, 152, 167, 194, 209, 166, + 181, 224, 224, 92, 107, 181, 196, 46, 61, 138, 153, 209, 224, 167, 182, + 153, 168, 195, 210, 31, 46, 123, 138, 77, 92, 168, 183, 210, 225, 196, + 211, 225, 240, 182, 197, 154, 169, 108, 123, 139, 154, 183, 198, 62, 77, + 197, 212, 169, 184, 93, 108, 211, 226, 184, 199, 47, 62, 212, 227, 226, + 241, 124, 139, 198, 213, 155, 170, 170, 185, 140, 155, 213, 228, 227, 242, + 109, 124, 78, 93, 185, 200, 228, 243, 199, 214, 200, 215, 214, 229, 125, + 140, 171, 186, 186, 201, 63, 78, 156, 171, 94, 109, 141, 156, 229, 244, + 201, 216, 215, 230, 79, 94, 230, 245, 216, 231, 110, 125, 187, 202, 231, + 246, 217, 232, 157, 172, 202, 217, 126, 141, 95, 110, 142, 157, 172, 187, + 232, 247, 111, 126, 218, 233, 203, 218, 233, 248, 173, 188, 188, 203, 127, + 142, 158, 173, 143, 158, 234, 249, 219, 234, 189, 204, 204, 219, 159, 174, + 174, 189, 235, 250, 205, 220, 175, 190, 190, 205, 220, 235, 191, 206, 221, + 236, 236, 251, 206, 221, 237, 252, 207, 222, 222, 237, 223, 238, 238, 253, + 239, 254, 0, 0, }; DECLARE_ALIGNED(16, static const int16_t, default_scan_16x16_neighbors[257 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 16, 0, 16, 16, 1, 16, 17, 1, - 32, 32, 17, 32, 2, 17, 18, 2, 48, 48, 18, 33, - 33, 48, 3, 18, 49, 64, 64, 65, 34, 49, 19, 3, - 19, 34, 50, 65, 4, 19, 65, 80, 80, 81, 35, 50, - 20, 4, 20, 35, 66, 81, 81, 96, 51, 66, 96, 97, - 5, 20, 36, 51, 82, 97, 21, 36, 67, 82, 97, 112, - 21, 5, 52, 67, 112, 113, 37, 52, 6, 21, 83, 98, - 98, 113, 68, 83, 22, 6, 113, 128, 22, 37, 53, 68, - 84, 99, 99, 114, 128, 129, 114, 129, 69, 84, 38, 53, - 7, 22, 23, 7, 129, 144, 23, 38, 54, 69, 100, 115, - 85, 100, 115, 130, 144, 145, 130, 145, 39, 54, 70, 85, - 8, 23, 55, 70, 116, 131, 101, 116, 145, 160, 24, 39, - 24, 8, 86, 101, 131, 146, 160, 161, 146, 161, 71, 86, - 40, 55, 9, 24, 117, 132, 102, 117, 161, 176, 132, 147, - 56, 71, 87, 102, 25, 40, 147, 162, 25, 9, 176, 177, - 162, 177, 72, 87, 41, 56, 118, 133, 133, 148, 103, 118, - 10, 25, 148, 163, 57, 72, 88, 103, 177, 192, 26, 41, - 163, 178, 192, 193, 26, 10, 119, 134, 73, 88, 149, 164, - 104, 119, 134, 149, 42, 57, 178, 193, 164, 179, 11, 26, - 58, 73, 193, 208, 89, 104, 135, 150, 120, 135, 27, 42, - 74, 89, 208, 209, 150, 165, 179, 194, 165, 180, 105, 120, - 194, 209, 43, 58, 27, 11, 136, 151, 90, 105, 151, 166, - 180, 195, 59, 74, 121, 136, 209, 224, 195, 210, 224, 225, - 166, 181, 106, 121, 75, 90, 12, 27, 181, 196, 28, 12, - 210, 225, 152, 167, 167, 182, 137, 152, 28, 43, 196, 211, - 122, 137, 91, 106, 225, 240, 44, 59, 13, 28, 107, 122, - 182, 197, 168, 183, 211, 226, 153, 168, 226, 241, 60, 75, - 197, 212, 138, 153, 29, 44, 76, 91, 29, 13, 183, 198, - 123, 138, 45, 60, 212, 227, 198, 213, 154, 169, 169, 184, - 227, 242, 92, 107, 61, 76, 139, 154, 14, 29, 30, 14, - 184, 199, 213, 228, 108, 123, 199, 214, 228, 243, 77, 92, - 30, 45, 170, 185, 155, 170, 185, 200, 93, 108, 124, 139, - 214, 229, 46, 61, 200, 215, 229, 244, 15, 30, 109, 124, - 62, 77, 140, 155, 215, 230, 31, 46, 171, 186, 186, 201, - 201, 216, 78, 93, 230, 245, 125, 140, 47, 62, 216, 231, - 156, 171, 94, 109, 231, 246, 141, 156, 63, 78, 202, 217, - 187, 202, 110, 125, 217, 232, 172, 187, 232, 247, 79, 94, - 157, 172, 126, 141, 203, 218, 95, 110, 233, 248, 218, 233, - 142, 157, 111, 126, 173, 188, 188, 203, 234, 249, 219, 234, - 127, 142, 158, 173, 204, 219, 189, 204, 143, 158, 235, 250, - 174, 189, 205, 220, 159, 174, 220, 235, 221, 236, 175, 190, - 190, 205, 236, 251, 206, 221, 237, 252, 191, 206, 222, 237, - 207, 222, 238, 253, 223, 238, 239, 254, 0, 0, + 0, 0, 0, 0, 16, 0, 16, 16, 1, 16, 17, 1, 32, 32, 17, + 32, 2, 17, 18, 2, 48, 48, 18, 33, 33, 48, 3, 18, 49, 64, + 64, 65, 34, 49, 19, 3, 19, 34, 50, 65, 4, 19, 65, 80, 80, + 81, 35, 50, 20, 4, 20, 35, 66, 81, 81, 96, 51, 66, 96, 97, + 5, 20, 36, 51, 82, 97, 21, 36, 67, 82, 97, 112, 21, 5, 52, + 67, 112, 113, 37, 52, 6, 21, 83, 98, 98, 113, 68, 83, 22, 6, + 113, 128, 22, 37, 53, 68, 84, 99, 99, 114, 128, 129, 114, 129, 69, + 84, 38, 53, 7, 22, 23, 7, 129, 144, 23, 38, 54, 69, 100, 115, + 85, 100, 115, 130, 144, 145, 130, 145, 39, 54, 70, 85, 8, 23, 55, + 70, 116, 131, 101, 116, 145, 160, 24, 39, 24, 8, 86, 101, 131, 146, + 160, 161, 146, 161, 71, 86, 40, 55, 9, 24, 117, 132, 102, 117, 161, + 176, 132, 147, 56, 71, 87, 102, 25, 40, 147, 162, 25, 9, 176, 177, + 162, 177, 72, 87, 41, 56, 118, 133, 133, 148, 103, 118, 10, 25, 148, + 163, 57, 72, 88, 103, 177, 192, 26, 41, 163, 178, 192, 193, 26, 10, + 119, 134, 73, 88, 149, 164, 104, 119, 134, 149, 42, 57, 178, 193, 164, + 179, 11, 26, 58, 73, 193, 208, 89, 104, 135, 150, 120, 135, 27, 42, + 74, 89, 208, 209, 150, 165, 179, 194, 165, 180, 105, 120, 194, 209, 43, + 58, 27, 11, 136, 151, 90, 105, 151, 166, 180, 195, 59, 74, 121, 136, + 209, 224, 195, 210, 224, 225, 166, 181, 106, 121, 75, 90, 12, 27, 181, + 196, 28, 12, 210, 225, 152, 167, 167, 182, 137, 152, 28, 43, 196, 211, + 122, 137, 91, 106, 225, 240, 44, 59, 13, 28, 107, 122, 182, 197, 168, + 183, 211, 226, 153, 168, 226, 241, 60, 75, 197, 212, 138, 153, 29, 44, + 76, 91, 29, 13, 183, 198, 123, 138, 45, 60, 212, 227, 198, 213, 154, + 169, 169, 184, 227, 242, 92, 107, 61, 76, 139, 154, 14, 29, 30, 14, + 184, 199, 213, 228, 108, 123, 199, 214, 228, 243, 77, 92, 30, 45, 170, + 185, 155, 170, 185, 200, 93, 108, 124, 139, 214, 229, 46, 61, 200, 215, + 229, 244, 15, 30, 109, 124, 62, 77, 140, 155, 215, 230, 31, 46, 171, + 186, 186, 201, 201, 216, 78, 93, 230, 245, 125, 140, 47, 62, 216, 231, + 156, 171, 94, 109, 231, 246, 141, 156, 63, 78, 202, 217, 187, 202, 110, + 125, 217, 232, 172, 187, 232, 247, 79, 94, 157, 172, 126, 141, 203, 218, + 95, 110, 233, 248, 218, 233, 142, 157, 111, 126, 173, 188, 188, 203, 234, + 249, 219, 234, 127, 142, 158, 173, 204, 219, 189, 204, 143, 158, 235, 250, + 174, 189, 205, 220, 159, 174, 220, 235, 221, 236, 175, 190, 190, 205, 236, + 251, 206, 221, 237, 252, 191, 206, 222, 237, 207, 222, 238, 253, 223, 238, + 239, 254, 0, 0, }; #if CONFIG_EXT_TX DECLARE_ALIGNED(16, static const int16_t, mcol_scan_32x32_neighbors[1025 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 32, 32, 64, 64, 96, 96, 128, 128, 160, 160, 192, 192, - 224, 224, 256, 256, 288, 288, 320, 320, 352, 352, 384, 384, 416, - 416, 448, 448, - 480, 480, 512, 512, 544, 544, 576, 576, 608, 608, 640, 640, 672, - 672, 704, 704, 736, 736, 768, 768, 800, 800, 832, 832, 864, 864, - 896, 896, 928, 928, 960, 960, - 0, 0, 1, 32, 33, 64, 65, 96, 97, 128, 129, 160, 161, 192, 193, - 224, 225, 256, 257, 288, 289, 320, 321, 352, 353, 384, 385, 416, - 417, 448, 449, 480, - 481, 512, 513, 544, 545, 576, 577, 608, 609, 640, 641, 672, 673, - 704, 705, 736, 737, 768, 769, 800, 801, 832, 833, 864, 865, 896, - 897, 928, 929, 960, 961, 992, - 1, 1, 2, 33, 34, 65, 66, 97, 98, 129, 130, 161, 162, 193, 194, - 225, 226, 257, 258, 289, 290, 321, 322, 353, 354, 385, 386, 417, - 418, 449, 450, 481, - 482, 513, 514, 545, 546, 577, 578, 609, 610, 641, 642, 673, 674, - 705, 706, 737, 738, 769, 770, 801, 802, 833, 834, 865, 866, 897, - 898, 929, 930, 961, 962, 993, - 2, 2, 3, 34, 35, 66, 67, 98, 99, 130, 131, 162, 163, 194, 195, - 226, 227, 258, 259, 290, 291, 322, 323, 354, 355, 386, 387, 418, - 419, 450, 451, 482, - 483, 514, 515, 546, 547, 578, 579, 610, 611, 642, 643, 674, 675, - 706, 707, 738, 739, 770, 771, 802, 803, 834, 835, 866, 867, 898, - 899, 930, 931, 962, 963, 994, - 3, 3, 4, 35, 36, 67, 68, 99, 100, 131, 132, 163, 164, 195, 196, - 227, 228, 259, 260, 291, 292, 323, 324, 355, 356, 387, 388, 419, - 420, 451, 452, 483, - 484, 515, 516, 547, 548, 579, 580, 611, 612, 643, 644, 675, 676, - 707, 708, 739, 740, 771, 772, 803, 804, 835, 836, 867, 868, 899, - 900, 931, 932, 963, 964, 995, - 4, 4, 5, 36, 37, 68, 69, 100, 101, 132, 133, 164, 165, 196, 197, - 228, 229, 260, 261, 292, 293, 324, 325, 356, 357, 388, 389, 420, - 421, 452, 453, 484, - 485, 516, 517, 548, 549, 580, 581, 612, 613, 644, 645, 676, 677, - 708, 709, 740, 741, 772, 773, 804, 805, 836, 837, 868, 869, 900, - 901, 932, 933, 964, 965, 996, - 5, 5, 6, 37, 38, 69, 70, 101, 102, 133, 134, 165, 166, 197, 198, - 229, 230, 261, 262, 293, 294, 325, 326, 357, 358, 389, 390, 421, - 422, 453, 454, 485, - 486, 517, 518, 549, 550, 581, 582, 613, 614, 645, 646, 677, 678, - 709, 710, 741, 742, 773, 774, 805, 806, 837, 838, 869, 870, 901, - 902, 933, 934, 965, 966, 997, - 6, 6, 7, 38, 39, 70, 71, 102, 103, 134, 135, 166, 167, 198, 199, - 230, 231, 262, 263, 294, 295, 326, 327, 358, 359, 390, 391, 422, - 423, 454, 455, 486, - 487, 518, 519, 550, 551, 582, 583, 614, 615, 646, 647, 678, 679, - 710, 711, 742, 743, 774, 775, 806, 807, 838, 839, 870, 871, 902, - 903, 934, 935, 966, 967, 998, - 7, 7, 8, 39, 40, 71, 72, 103, 104, 135, 136, 167, 168, 199, 200, - 231, 232, 263, 264, 295, 296, 327, 328, 359, 360, 391, 392, 423, - 424, 455, 456, 487, - 488, 519, 520, 551, 552, 583, 584, 615, 616, 647, 648, 679, 680, - 711, 712, 743, 744, 775, 776, 807, 808, 839, 840, 871, 872, 903, - 904, 935, 936, 967, 968, 999, - 8, 8, 9, 40, 41, 72, 73, 104, 105, 136, 137, 168, 169, 200, 201, - 232, 233, 264, 265, 296, 297, 328, 329, 360, 361, 392, 393, 424, - 425, 456, 457, 488, - 489, 520, 521, 552, 553, 584, 585, 616, 617, 648, 649, 680, 681, - 712, 713, 744, 745, 776, 777, 808, 809, 840, 841, 872, 873, 904, - 905, 936, 937, 968, 969, 1000, - 9, 9, 10, 41, 42, 73, 74, 105, 106, 137, 138, 169, 170, 201, 202, - 233, 234, 265, 266, 297, 298, 329, 330, 361, 362, 393, 394, 425, - 426, 457, 458, 489, - 490, 521, 522, 553, 554, 585, 586, 617, 618, 649, 650, 681, 682, - 713, 714, 745, 746, 777, 778, 809, 810, 841, 842, 873, 874, 905, - 906, 937, 938, 969, 970, 1001, - 10, 10, 11, 42, 43, 74, 75, 106, 107, 138, 139, 170, 171, 202, - 203, 234, 235, 266, 267, 298, 299, 330, 331, 362, 363, 394, 395, - 426, 427, 458, 459, 490, - 491, 522, 523, 554, 555, 586, 587, 618, 619, 650, 651, 682, 683, - 714, 715, 746, 747, 778, 779, 810, 811, 842, 843, 874, 875, 906, - 907, 938, 939, 970, 971, 1002, - 11, 11, 12, 43, 44, 75, 76, 107, 108, 139, 140, 171, 172, 203, - 204, 235, 236, 267, 268, 299, 300, 331, 332, 363, 364, 395, 396, - 427, 428, 459, 460, 491, - 492, 523, 524, 555, 556, 587, 588, 619, 620, 651, 652, 683, 684, - 715, 716, 747, 748, 779, 780, 811, 812, 843, 844, 875, 876, 907, - 908, 939, 940, 971, 972, 1003, - 12, 12, 13, 44, 45, 76, 77, 108, 109, 140, 141, 172, 173, 204, - 205, 236, 237, 268, 269, 300, 301, 332, 333, 364, 365, 396, 397, - 428, 429, 460, 461, 492, - 493, 524, 525, 556, 557, 588, 589, 620, 621, 652, 653, 684, 685, - 716, 717, 748, 749, 780, 781, 812, 813, 844, 845, 876, 877, 908, - 909, 940, 941, 972, 973, 1004, - 13, 13, 14, 45, 46, 77, 78, 109, 110, 141, 142, 173, 174, 205, - 206, 237, 238, 269, 270, 301, 302, 333, 334, 365, 366, 397, 398, - 429, 430, 461, 462, 493, - 494, 525, 526, 557, 558, 589, 590, 621, 622, 653, 654, 685, 686, - 717, 718, 749, 750, 781, 782, 813, 814, 845, 846, 877, 878, 909, - 910, 941, 942, 973, 974, 1005, - 14, 14, 15, 46, 47, 78, 79, 110, 111, 142, 143, 174, 175, 206, - 207, 238, 239, 270, 271, 302, 303, 334, 335, 366, 367, 398, 399, - 430, 431, 462, 463, 494, - 495, 526, 527, 558, 559, 590, 591, 622, 623, 654, 655, 686, 687, - 718, 719, 750, 751, 782, 783, 814, 815, 846, 847, 878, 879, 910, - 911, 942, 943, 974, 975, 1006, - 15, 15, 16, 47, 48, 79, 80, 111, 112, 143, 144, 175, 176, 207, - 208, 239, 240, 271, 272, 303, 304, 335, 336, 367, 368, 399, 400, - 431, 432, 463, 464, 495, - 496, 527, 528, 559, 560, 591, 592, 623, 624, 655, 656, 687, 688, - 719, 720, 751, 752, 783, 784, 815, 816, 847, 848, 879, 880, 911, - 912, 943, 944, 975, 976, 1007, - 16, 16, 17, 48, 49, 80, 81, 112, 113, 144, 145, 176, 177, 208, - 209, 240, 241, 272, 273, 304, 305, 336, 337, 368, 369, 400, 401, - 432, 433, 464, 465, 496, - 497, 528, 529, 560, 561, 592, 593, 624, 625, 656, 657, 688, 689, - 720, 721, 752, 753, 784, 785, 816, 817, 848, 849, 880, 881, 912, - 913, 944, 945, 976, 977, 1008, - 17, 17, 18, 49, 50, 81, 82, 113, 114, 145, 146, 177, 178, 209, - 210, 241, 242, 273, 274, 305, 306, 337, 338, 369, 370, 401, 402, - 433, 434, 465, 466, 497, - 498, 529, 530, 561, 562, 593, 594, 625, 626, 657, 658, 689, 690, - 721, 722, 753, 754, 785, 786, 817, 818, 849, 850, 881, 882, 913, - 914, 945, 946, 977, 978, 1009, - 18, 18, 19, 50, 51, 82, 83, 114, 115, 146, 147, 178, 179, 210, - 211, 242, 243, 274, 275, 306, 307, 338, 339, 370, 371, 402, 403, - 434, 435, 466, 467, 498, - 499, 530, 531, 562, 563, 594, 595, 626, 627, 658, 659, 690, 691, - 722, 723, 754, 755, 786, 787, 818, 819, 850, 851, 882, 883, 914, - 915, 946, 947, 978, 979, 1010, - 19, 19, 20, 51, 52, 83, 84, 115, 116, 147, 148, 179, 180, 211, - 212, 243, 244, 275, 276, 307, 308, 339, 340, 371, 372, 403, 404, - 435, 436, 467, 468, 499, - 500, 531, 532, 563, 564, 595, 596, 627, 628, 659, 660, 691, 692, - 723, 724, 755, 756, 787, 788, 819, 820, 851, 852, 883, 884, 915, - 916, 947, 948, 979, 980, 1011, - 20, 20, 21, 52, 53, 84, 85, 116, 117, 148, 149, 180, 181, 212, - 213, 244, 245, 276, 277, 308, 309, 340, 341, 372, 373, 404, 405, - 436, 437, 468, 469, 500, - 501, 532, 533, 564, 565, 596, 597, 628, 629, 660, 661, 692, 693, - 724, 725, 756, 757, 788, 789, 820, 821, 852, 853, 884, 885, 916, - 917, 948, 949, 980, 981, 1012, - 21, 21, 22, 53, 54, 85, 86, 117, 118, 149, 150, 181, 182, 213, - 214, 245, 246, 277, 278, 309, 310, 341, 342, 373, 374, 405, 406, - 437, 438, 469, 470, 501, - 502, 533, 534, 565, 566, 597, 598, 629, 630, 661, 662, 693, 694, - 725, 726, 757, 758, 789, 790, 821, 822, 853, 854, 885, 886, 917, - 918, 949, 950, 981, 982, 1013, - 22, 22, 23, 54, 55, 86, 87, 118, 119, 150, 151, 182, 183, 214, - 215, 246, 247, 278, 279, 310, 311, 342, 343, 374, 375, 406, 407, - 438, 439, 470, 471, 502, - 503, 534, 535, 566, 567, 598, 599, 630, 631, 662, 663, 694, 695, - 726, 727, 758, 759, 790, 791, 822, 823, 854, 855, 886, 887, 918, - 919, 950, 951, 982, 983, 1014, - 23, 23, 24, 55, 56, 87, 88, 119, 120, 151, 152, 183, 184, 215, - 216, 247, 248, 279, 280, 311, 312, 343, 344, 375, 376, 407, 408, - 439, 440, 471, 472, 503, - 504, 535, 536, 567, 568, 599, 600, 631, 632, 663, 664, 695, 696, - 727, 728, 759, 760, 791, 792, 823, 824, 855, 856, 887, 888, 919, - 920, 951, 952, 983, 984, 1015, - 24, 24, 25, 56, 57, 88, 89, 120, 121, 152, 153, 184, 185, 216, - 217, 248, 249, 280, 281, 312, 313, 344, 345, 376, 377, 408, 409, - 440, 441, 472, 473, 504, - 505, 536, 537, 568, 569, 600, 601, 632, 633, 664, 665, 696, 697, - 728, 729, 760, 761, 792, 793, 824, 825, 856, 857, 888, 889, 920, - 921, 952, 953, 984, 985, 1016, - 25, 25, 26, 57, 58, 89, 90, 121, 122, 153, 154, 185, 186, 217, - 218, 249, 250, 281, 282, 313, 314, 345, 346, 377, 378, 409, 410, - 441, 442, 473, 474, 505, - 506, 537, 538, 569, 570, 601, 602, 633, 634, 665, 666, 697, 698, - 729, 730, 761, 762, 793, 794, 825, 826, 857, 858, 889, 890, 921, - 922, 953, 954, 985, 986, 1017, - 26, 26, 27, 58, 59, 90, 91, 122, 123, 154, 155, 186, 187, 218, - 219, 250, 251, 282, 283, 314, 315, 346, 347, 378, 379, 410, 411, - 442, 443, 474, 475, 506, - 507, 538, 539, 570, 571, 602, 603, 634, 635, 666, 667, 698, 699, - 730, 731, 762, 763, 794, 795, 826, 827, 858, 859, 890, 891, 922, - 923, 954, 955, 986, 987, 1018, - 27, 27, 28, 59, 60, 91, 92, 123, 124, 155, 156, 187, 188, 219, - 220, 251, 252, 283, 284, 315, 316, 347, 348, 379, 380, 411, 412, - 443, 444, 475, 476, 507, - 508, 539, 540, 571, 572, 603, 604, 635, 636, 667, 668, 699, 700, - 731, 732, 763, 764, 795, 796, 827, 828, 859, 860, 891, 892, 923, - 924, 955, 956, 987, 988, 1019, - 28, 28, 29, 60, 61, 92, 93, 124, 125, 156, 157, 188, 189, 220, - 221, 252, 253, 284, 285, 316, 317, 348, 349, 380, 381, 412, 413, - 444, 445, 476, 477, 508, - 509, 540, 541, 572, 573, 604, 605, 636, 637, 668, 669, 700, 701, - 732, 733, 764, 765, 796, 797, 828, 829, 860, 861, 892, 893, 924, - 925, 956, 957, 988, 989, 1020, - 29, 29, 30, 61, 62, 93, 94, 125, 126, 157, 158, 189, 190, 221, - 222, 253, 254, 285, 286, 317, 318, 349, 350, 381, 382, 413, 414, - 445, 446, 477, 478, 509, - 510, 541, 542, 573, 574, 605, 606, 637, 638, 669, 670, 701, 702, - 733, 734, 765, 766, 797, 798, 829, 830, 861, 862, 893, 894, 925, - 926, 957, 958, 989, 990, 1021, - 30, 30, 31, 62, 63, 94, 95, 126, 127, 158, 159, 190, 191, 222, - 223, 254, 255, 286, 287, 318, 319, 350, 351, 382, 383, 414, 415, - 446, 447, 478, 479, 510, - 511, 542, 543, 574, 575, 606, 607, 638, 639, 670, 671, 702, 703, - 734, 735, 766, 767, 798, 799, 830, 831, 862, 863, 894, 895, 926, - 927, 958, 959, 990, 991, 1022, - 0, 0, + 0, 0, 0, 0, 32, 32, 64, 64, 96, 96, 128, 128, 160, 160, + 192, 192, 224, 224, 256, 256, 288, 288, 320, 320, 352, 352, 384, 384, + 416, 416, 448, 448, 480, 480, 512, 512, 544, 544, 576, 576, 608, 608, + 640, 640, 672, 672, 704, 704, 736, 736, 768, 768, 800, 800, 832, 832, + 864, 864, 896, 896, 928, 928, 960, 960, 0, 0, 1, 32, 33, 64, + 65, 96, 97, 128, 129, 160, 161, 192, 193, 224, 225, 256, 257, 288, + 289, 320, 321, 352, 353, 384, 385, 416, 417, 448, 449, 480, 481, 512, + 513, 544, 545, 576, 577, 608, 609, 640, 641, 672, 673, 704, 705, 736, + 737, 768, 769, 800, 801, 832, 833, 864, 865, 896, 897, 928, 929, 960, + 961, 992, 1, 1, 2, 33, 34, 65, 66, 97, 98, 129, 130, 161, + 162, 193, 194, 225, 226, 257, 258, 289, 290, 321, 322, 353, 354, 385, + 386, 417, 418, 449, 450, 481, 482, 513, 514, 545, 546, 577, 578, 609, + 610, 641, 642, 673, 674, 705, 706, 737, 738, 769, 770, 801, 802, 833, + 834, 865, 866, 897, 898, 929, 930, 961, 962, 993, 2, 2, 3, 34, + 35, 66, 67, 98, 99, 130, 131, 162, 163, 194, 195, 226, 227, 258, + 259, 290, 291, 322, 323, 354, 355, 386, 387, 418, 419, 450, 451, 482, + 483, 514, 515, 546, 547, 578, 579, 610, 611, 642, 643, 674, 675, 706, + 707, 738, 739, 770, 771, 802, 803, 834, 835, 866, 867, 898, 899, 930, + 931, 962, 963, 994, 3, 3, 4, 35, 36, 67, 68, 99, 100, 131, + 132, 163, 164, 195, 196, 227, 228, 259, 260, 291, 292, 323, 324, 355, + 356, 387, 388, 419, 420, 451, 452, 483, 484, 515, 516, 547, 548, 579, + 580, 611, 612, 643, 644, 675, 676, 707, 708, 739, 740, 771, 772, 803, + 804, 835, 836, 867, 868, 899, 900, 931, 932, 963, 964, 995, 4, 4, + 5, 36, 37, 68, 69, 100, 101, 132, 133, 164, 165, 196, 197, 228, + 229, 260, 261, 292, 293, 324, 325, 356, 357, 388, 389, 420, 421, 452, + 453, 484, 485, 516, 517, 548, 549, 580, 581, 612, 613, 644, 645, 676, + 677, 708, 709, 740, 741, 772, 773, 804, 805, 836, 837, 868, 869, 900, + 901, 932, 933, 964, 965, 996, 5, 5, 6, 37, 38, 69, 70, 101, + 102, 133, 134, 165, 166, 197, 198, 229, 230, 261, 262, 293, 294, 325, + 326, 357, 358, 389, 390, 421, 422, 453, 454, 485, 486, 517, 518, 549, + 550, 581, 582, 613, 614, 645, 646, 677, 678, 709, 710, 741, 742, 773, + 774, 805, 806, 837, 838, 869, 870, 901, 902, 933, 934, 965, 966, 997, + 6, 6, 7, 38, 39, 70, 71, 102, 103, 134, 135, 166, 167, 198, + 199, 230, 231, 262, 263, 294, 295, 326, 327, 358, 359, 390, 391, 422, + 423, 454, 455, 486, 487, 518, 519, 550, 551, 582, 583, 614, 615, 646, + 647, 678, 679, 710, 711, 742, 743, 774, 775, 806, 807, 838, 839, 870, + 871, 902, 903, 934, 935, 966, 967, 998, 7, 7, 8, 39, 40, 71, + 72, 103, 104, 135, 136, 167, 168, 199, 200, 231, 232, 263, 264, 295, + 296, 327, 328, 359, 360, 391, 392, 423, 424, 455, 456, 487, 488, 519, + 520, 551, 552, 583, 584, 615, 616, 647, 648, 679, 680, 711, 712, 743, + 744, 775, 776, 807, 808, 839, 840, 871, 872, 903, 904, 935, 936, 967, + 968, 999, 8, 8, 9, 40, 41, 72, 73, 104, 105, 136, 137, 168, + 169, 200, 201, 232, 233, 264, 265, 296, 297, 328, 329, 360, 361, 392, + 393, 424, 425, 456, 457, 488, 489, 520, 521, 552, 553, 584, 585, 616, + 617, 648, 649, 680, 681, 712, 713, 744, 745, 776, 777, 808, 809, 840, + 841, 872, 873, 904, 905, 936, 937, 968, 969, 1000, 9, 9, 10, 41, + 42, 73, 74, 105, 106, 137, 138, 169, 170, 201, 202, 233, 234, 265, + 266, 297, 298, 329, 330, 361, 362, 393, 394, 425, 426, 457, 458, 489, + 490, 521, 522, 553, 554, 585, 586, 617, 618, 649, 650, 681, 682, 713, + 714, 745, 746, 777, 778, 809, 810, 841, 842, 873, 874, 905, 906, 937, + 938, 969, 970, 1001, 10, 10, 11, 42, 43, 74, 75, 106, 107, 138, + 139, 170, 171, 202, 203, 234, 235, 266, 267, 298, 299, 330, 331, 362, + 363, 394, 395, 426, 427, 458, 459, 490, 491, 522, 523, 554, 555, 586, + 587, 618, 619, 650, 651, 682, 683, 714, 715, 746, 747, 778, 779, 810, + 811, 842, 843, 874, 875, 906, 907, 938, 939, 970, 971, 1002, 11, 11, + 12, 43, 44, 75, 76, 107, 108, 139, 140, 171, 172, 203, 204, 235, + 236, 267, 268, 299, 300, 331, 332, 363, 364, 395, 396, 427, 428, 459, + 460, 491, 492, 523, 524, 555, 556, 587, 588, 619, 620, 651, 652, 683, + 684, 715, 716, 747, 748, 779, 780, 811, 812, 843, 844, 875, 876, 907, + 908, 939, 940, 971, 972, 1003, 12, 12, 13, 44, 45, 76, 77, 108, + 109, 140, 141, 172, 173, 204, 205, 236, 237, 268, 269, 300, 301, 332, + 333, 364, 365, 396, 397, 428, 429, 460, 461, 492, 493, 524, 525, 556, + 557, 588, 589, 620, 621, 652, 653, 684, 685, 716, 717, 748, 749, 780, + 781, 812, 813, 844, 845, 876, 877, 908, 909, 940, 941, 972, 973, 1004, + 13, 13, 14, 45, 46, 77, 78, 109, 110, 141, 142, 173, 174, 205, + 206, 237, 238, 269, 270, 301, 302, 333, 334, 365, 366, 397, 398, 429, + 430, 461, 462, 493, 494, 525, 526, 557, 558, 589, 590, 621, 622, 653, + 654, 685, 686, 717, 718, 749, 750, 781, 782, 813, 814, 845, 846, 877, + 878, 909, 910, 941, 942, 973, 974, 1005, 14, 14, 15, 46, 47, 78, + 79, 110, 111, 142, 143, 174, 175, 206, 207, 238, 239, 270, 271, 302, + 303, 334, 335, 366, 367, 398, 399, 430, 431, 462, 463, 494, 495, 526, + 527, 558, 559, 590, 591, 622, 623, 654, 655, 686, 687, 718, 719, 750, + 751, 782, 783, 814, 815, 846, 847, 878, 879, 910, 911, 942, 943, 974, + 975, 1006, 15, 15, 16, 47, 48, 79, 80, 111, 112, 143, 144, 175, + 176, 207, 208, 239, 240, 271, 272, 303, 304, 335, 336, 367, 368, 399, + 400, 431, 432, 463, 464, 495, 496, 527, 528, 559, 560, 591, 592, 623, + 624, 655, 656, 687, 688, 719, 720, 751, 752, 783, 784, 815, 816, 847, + 848, 879, 880, 911, 912, 943, 944, 975, 976, 1007, 16, 16, 17, 48, + 49, 80, 81, 112, 113, 144, 145, 176, 177, 208, 209, 240, 241, 272, + 273, 304, 305, 336, 337, 368, 369, 400, 401, 432, 433, 464, 465, 496, + 497, 528, 529, 560, 561, 592, 593, 624, 625, 656, 657, 688, 689, 720, + 721, 752, 753, 784, 785, 816, 817, 848, 849, 880, 881, 912, 913, 944, + 945, 976, 977, 1008, 17, 17, 18, 49, 50, 81, 82, 113, 114, 145, + 146, 177, 178, 209, 210, 241, 242, 273, 274, 305, 306, 337, 338, 369, + 370, 401, 402, 433, 434, 465, 466, 497, 498, 529, 530, 561, 562, 593, + 594, 625, 626, 657, 658, 689, 690, 721, 722, 753, 754, 785, 786, 817, + 818, 849, 850, 881, 882, 913, 914, 945, 946, 977, 978, 1009, 18, 18, + 19, 50, 51, 82, 83, 114, 115, 146, 147, 178, 179, 210, 211, 242, + 243, 274, 275, 306, 307, 338, 339, 370, 371, 402, 403, 434, 435, 466, + 467, 498, 499, 530, 531, 562, 563, 594, 595, 626, 627, 658, 659, 690, + 691, 722, 723, 754, 755, 786, 787, 818, 819, 850, 851, 882, 883, 914, + 915, 946, 947, 978, 979, 1010, 19, 19, 20, 51, 52, 83, 84, 115, + 116, 147, 148, 179, 180, 211, 212, 243, 244, 275, 276, 307, 308, 339, + 340, 371, 372, 403, 404, 435, 436, 467, 468, 499, 500, 531, 532, 563, + 564, 595, 596, 627, 628, 659, 660, 691, 692, 723, 724, 755, 756, 787, + 788, 819, 820, 851, 852, 883, 884, 915, 916, 947, 948, 979, 980, 1011, + 20, 20, 21, 52, 53, 84, 85, 116, 117, 148, 149, 180, 181, 212, + 213, 244, 245, 276, 277, 308, 309, 340, 341, 372, 373, 404, 405, 436, + 437, 468, 469, 500, 501, 532, 533, 564, 565, 596, 597, 628, 629, 660, + 661, 692, 693, 724, 725, 756, 757, 788, 789, 820, 821, 852, 853, 884, + 885, 916, 917, 948, 949, 980, 981, 1012, 21, 21, 22, 53, 54, 85, + 86, 117, 118, 149, 150, 181, 182, 213, 214, 245, 246, 277, 278, 309, + 310, 341, 342, 373, 374, 405, 406, 437, 438, 469, 470, 501, 502, 533, + 534, 565, 566, 597, 598, 629, 630, 661, 662, 693, 694, 725, 726, 757, + 758, 789, 790, 821, 822, 853, 854, 885, 886, 917, 918, 949, 950, 981, + 982, 1013, 22, 22, 23, 54, 55, 86, 87, 118, 119, 150, 151, 182, + 183, 214, 215, 246, 247, 278, 279, 310, 311, 342, 343, 374, 375, 406, + 407, 438, 439, 470, 471, 502, 503, 534, 535, 566, 567, 598, 599, 630, + 631, 662, 663, 694, 695, 726, 727, 758, 759, 790, 791, 822, 823, 854, + 855, 886, 887, 918, 919, 950, 951, 982, 983, 1014, 23, 23, 24, 55, + 56, 87, 88, 119, 120, 151, 152, 183, 184, 215, 216, 247, 248, 279, + 280, 311, 312, 343, 344, 375, 376, 407, 408, 439, 440, 471, 472, 503, + 504, 535, 536, 567, 568, 599, 600, 631, 632, 663, 664, 695, 696, 727, + 728, 759, 760, 791, 792, 823, 824, 855, 856, 887, 888, 919, 920, 951, + 952, 983, 984, 1015, 24, 24, 25, 56, 57, 88, 89, 120, 121, 152, + 153, 184, 185, 216, 217, 248, 249, 280, 281, 312, 313, 344, 345, 376, + 377, 408, 409, 440, 441, 472, 473, 504, 505, 536, 537, 568, 569, 600, + 601, 632, 633, 664, 665, 696, 697, 728, 729, 760, 761, 792, 793, 824, + 825, 856, 857, 888, 889, 920, 921, 952, 953, 984, 985, 1016, 25, 25, + 26, 57, 58, 89, 90, 121, 122, 153, 154, 185, 186, 217, 218, 249, + 250, 281, 282, 313, 314, 345, 346, 377, 378, 409, 410, 441, 442, 473, + 474, 505, 506, 537, 538, 569, 570, 601, 602, 633, 634, 665, 666, 697, + 698, 729, 730, 761, 762, 793, 794, 825, 826, 857, 858, 889, 890, 921, + 922, 953, 954, 985, 986, 1017, 26, 26, 27, 58, 59, 90, 91, 122, + 123, 154, 155, 186, 187, 218, 219, 250, 251, 282, 283, 314, 315, 346, + 347, 378, 379, 410, 411, 442, 443, 474, 475, 506, 507, 538, 539, 570, + 571, 602, 603, 634, 635, 666, 667, 698, 699, 730, 731, 762, 763, 794, + 795, 826, 827, 858, 859, 890, 891, 922, 923, 954, 955, 986, 987, 1018, + 27, 27, 28, 59, 60, 91, 92, 123, 124, 155, 156, 187, 188, 219, + 220, 251, 252, 283, 284, 315, 316, 347, 348, 379, 380, 411, 412, 443, + 444, 475, 476, 507, 508, 539, 540, 571, 572, 603, 604, 635, 636, 667, + 668, 699, 700, 731, 732, 763, 764, 795, 796, 827, 828, 859, 860, 891, + 892, 923, 924, 955, 956, 987, 988, 1019, 28, 28, 29, 60, 61, 92, + 93, 124, 125, 156, 157, 188, 189, 220, 221, 252, 253, 284, 285, 316, + 317, 348, 349, 380, 381, 412, 413, 444, 445, 476, 477, 508, 509, 540, + 541, 572, 573, 604, 605, 636, 637, 668, 669, 700, 701, 732, 733, 764, + 765, 796, 797, 828, 829, 860, 861, 892, 893, 924, 925, 956, 957, 988, + 989, 1020, 29, 29, 30, 61, 62, 93, 94, 125, 126, 157, 158, 189, + 190, 221, 222, 253, 254, 285, 286, 317, 318, 349, 350, 381, 382, 413, + 414, 445, 446, 477, 478, 509, 510, 541, 542, 573, 574, 605, 606, 637, + 638, 669, 670, 701, 702, 733, 734, 765, 766, 797, 798, 829, 830, 861, + 862, 893, 894, 925, 926, 957, 958, 989, 990, 1021, 30, 30, 31, 62, + 63, 94, 95, 126, 127, 158, 159, 190, 191, 222, 223, 254, 255, 286, + 287, 318, 319, 350, 351, 382, 383, 414, 415, 446, 447, 478, 479, 510, + 511, 542, 543, 574, 575, 606, 607, 638, 639, 670, 671, 702, 703, 734, + 735, 766, 767, 798, 799, 830, 831, 862, 863, 894, 895, 926, 927, 958, + 959, 990, 991, 1022, 0, 0, }; DECLARE_ALIGNED(16, static const int16_t, mrow_scan_32x32_neighbors[1025 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, - 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, - 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22, - 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 30, 30, - 0, 0, 1, 32, 2, 33, 3, 34, 4, 35, 5, 36, 6, 37, 7, 38, 8, 39, 9, - 40, 10, 41, 11, 42, 12, 43, 13, 44, 14, 45, 15, 46, - 16, 47, 17, 48, 18, 49, 19, 50, 20, 51, 21, 52, 22, 53, 23, 54, - 24, 55, 25, 56, 26, 57, 27, 58, 28, 59, 29, 60, 30, 61, 31, 62, - 32, 32, 33, 64, 34, 65, 35, 66, 36, 67, 37, 68, 38, 69, 39, 70, - 40, 71, 41, 72, 42, 73, 43, 74, 44, 75, 45, 76, 46, 77, 47, 78, - 48, 79, 49, 80, 50, 81, 51, 82, 52, 83, 53, 84, 54, 85, 55, 86, - 56, 87, 57, 88, 58, 89, 59, 90, 60, 91, 61, 92, 62, 93, 63, 94, - 64, 64, 65, 96, 66, 97, 67, 98, 68, 99, 69, 100, 70, 101, 71, - 102, 72, 103, 73, 104, 74, 105, 75, 106, 76, 107, 77, 108, 78, - 109, 79, 110, - 80, 111, 81, 112, 82, 113, 83, 114, 84, 115, 85, 116, 86, 117, - 87, 118, 88, 119, 89, 120, 90, 121, 91, 122, 92, 123, 93, 124, - 94, 125, 95, 126, - 96, 96, 97, 128, 98, 129, 99, 130, 100, 131, 101, 132, 102, - 133, 103, 134, 104, 135, 105, 136, 106, 137, 107, 138, 108, - 139, 109, 140, 110, 141, 111, 142, - 112, 143, 113, 144, 114, 145, 115, 146, 116, 147, 117, 148, - 118, 149, 119, 150, 120, 151, 121, 152, 122, 153, 123, 154, - 124, 155, 125, 156, 126, 157, 127, 158, - 128, 128, 129, 160, 130, 161, 131, 162, 132, 163, 133, 164, - 134, 165, 135, 166, 136, 167, 137, 168, 138, 169, 139, 170, - 140, 171, 141, 172, 142, 173, 143, 174, - 144, 175, 145, 176, 146, 177, 147, 178, 148, 179, 149, 180, - 150, 181, 151, 182, 152, 183, 153, 184, 154, 185, 155, 186, - 156, 187, 157, 188, 158, 189, 159, 190, - 160, 160, 161, 192, 162, 193, 163, 194, 164, 195, 165, 196, - 166, 197, 167, 198, 168, 199, 169, 200, 170, 201, 171, 202, - 172, 203, 173, 204, 174, 205, 175, 206, - 176, 207, 177, 208, 178, 209, 179, 210, 180, 211, 181, 212, - 182, 213, 183, 214, 184, 215, 185, 216, 186, 217, 187, 218, - 188, 219, 189, 220, 190, 221, 191, 222, - 192, 192, 193, 224, 194, 225, 195, 226, 196, 227, 197, 228, - 198, 229, 199, 230, 200, 231, 201, 232, 202, 233, 203, 234, - 204, 235, 205, 236, 206, 237, 207, 238, - 208, 239, 209, 240, 210, 241, 211, 242, 212, 243, 213, 244, - 214, 245, 215, 246, 216, 247, 217, 248, 218, 249, 219, 250, - 220, 251, 221, 252, 222, 253, 223, 254, - 224, 224, 225, 256, 226, 257, 227, 258, 228, 259, 229, 260, - 230, 261, 231, 262, 232, 263, 233, 264, 234, 265, 235, 266, - 236, 267, 237, 268, 238, 269, 239, 270, - 240, 271, 241, 272, 242, 273, 243, 274, 244, 275, 245, 276, - 246, 277, 247, 278, 248, 279, 249, 280, 250, 281, 251, 282, - 252, 283, 253, 284, 254, 285, 255, 286, - 256, 256, 257, 288, 258, 289, 259, 290, 260, 291, 261, 292, - 262, 293, 263, 294, 264, 295, 265, 296, 266, 297, 267, 298, - 268, 299, 269, 300, 270, 301, 271, 302, - 272, 303, 273, 304, 274, 305, 275, 306, 276, 307, 277, 308, - 278, 309, 279, 310, 280, 311, 281, 312, 282, 313, 283, 314, - 284, 315, 285, 316, 286, 317, 287, 318, - 288, 288, 289, 320, 290, 321, 291, 322, 292, 323, 293, 324, - 294, 325, 295, 326, 296, 327, 297, 328, 298, 329, 299, 330, - 300, 331, 301, 332, 302, 333, 303, 334, - 304, 335, 305, 336, 306, 337, 307, 338, 308, 339, 309, 340, - 310, 341, 311, 342, 312, 343, 313, 344, 314, 345, 315, 346, - 316, 347, 317, 348, 318, 349, 319, 350, - 320, 320, 321, 352, 322, 353, 323, 354, 324, 355, 325, 356, - 326, 357, 327, 358, 328, 359, 329, 360, 330, 361, 331, 362, - 332, 363, 333, 364, 334, 365, 335, 366, - 336, 367, 337, 368, 338, 369, 339, 370, 340, 371, 341, 372, - 342, 373, 343, 374, 344, 375, 345, 376, 346, 377, 347, 378, - 348, 379, 349, 380, 350, 381, 351, 382, - 352, 352, 353, 384, 354, 385, 355, 386, 356, 387, 357, 388, - 358, 389, 359, 390, 360, 391, 361, 392, 362, 393, 363, 394, - 364, 395, 365, 396, 366, 397, 367, 398, - 368, 399, 369, 400, 370, 401, 371, 402, 372, 403, 373, 404, - 374, 405, 375, 406, 376, 407, 377, 408, 378, 409, 379, 410, - 380, 411, 381, 412, 382, 413, 383, 414, - 384, 384, 385, 416, 386, 417, 387, 418, 388, 419, 389, 420, - 390, 421, 391, 422, 392, 423, 393, 424, 394, 425, 395, 426, - 396, 427, 397, 428, 398, 429, 399, 430, - 400, 431, 401, 432, 402, 433, 403, 434, 404, 435, 405, 436, - 406, 437, 407, 438, 408, 439, 409, 440, 410, 441, 411, 442, - 412, 443, 413, 444, 414, 445, 415, 446, - 416, 416, 417, 448, 418, 449, 419, 450, 420, 451, 421, 452, - 422, 453, 423, 454, 424, 455, 425, 456, 426, 457, 427, 458, - 428, 459, 429, 460, 430, 461, 431, 462, - 432, 463, 433, 464, 434, 465, 435, 466, 436, 467, 437, 468, - 438, 469, 439, 470, 440, 471, 441, 472, 442, 473, 443, 474, - 444, 475, 445, 476, 446, 477, 447, 478, - 448, 448, 449, 480, 450, 481, 451, 482, 452, 483, 453, 484, - 454, 485, 455, 486, 456, 487, 457, 488, 458, 489, 459, 490, - 460, 491, 461, 492, 462, 493, 463, 494, - 464, 495, 465, 496, 466, 497, 467, 498, 468, 499, 469, 500, - 470, 501, 471, 502, 472, 503, 473, 504, 474, 505, 475, 506, - 476, 507, 477, 508, 478, 509, 479, 510, - 480, 480, 481, 512, 482, 513, 483, 514, 484, 515, 485, 516, - 486, 517, 487, 518, 488, 519, 489, 520, 490, 521, 491, 522, - 492, 523, 493, 524, 494, 525, 495, 526, - 496, 527, 497, 528, 498, 529, 499, 530, 500, 531, 501, 532, - 502, 533, 503, 534, 504, 535, 505, 536, 506, 537, 507, 538, - 508, 539, 509, 540, 510, 541, 511, 542, - 512, 512, 513, 544, 514, 545, 515, 546, 516, 547, 517, 548, - 518, 549, 519, 550, 520, 551, 521, 552, 522, 553, 523, 554, - 524, 555, 525, 556, 526, 557, 527, 558, - 528, 559, 529, 560, 530, 561, 531, 562, 532, 563, 533, 564, - 534, 565, 535, 566, 536, 567, 537, 568, 538, 569, 539, 570, - 540, 571, 541, 572, 542, 573, 543, 574, - 544, 544, 545, 576, 546, 577, 547, 578, 548, 579, 549, 580, - 550, 581, 551, 582, 552, 583, 553, 584, 554, 585, 555, 586, - 556, 587, 557, 588, 558, 589, 559, 590, - 560, 591, 561, 592, 562, 593, 563, 594, 564, 595, 565, 596, - 566, 597, 567, 598, 568, 599, 569, 600, 570, 601, 571, 602, - 572, 603, 573, 604, 574, 605, 575, 606, - 576, 576, 577, 608, 578, 609, 579, 610, 580, 611, 581, 612, - 582, 613, 583, 614, 584, 615, 585, 616, 586, 617, 587, 618, - 588, 619, 589, 620, 590, 621, 591, 622, - 592, 623, 593, 624, 594, 625, 595, 626, 596, 627, 597, 628, - 598, 629, 599, 630, 600, 631, 601, 632, 602, 633, 603, 634, - 604, 635, 605, 636, 606, 637, 607, 638, - 608, 608, 609, 640, 610, 641, 611, 642, 612, 643, 613, 644, - 614, 645, 615, 646, 616, 647, 617, 648, 618, 649, 619, 650, - 620, 651, 621, 652, 622, 653, 623, 654, - 624, 655, 625, 656, 626, 657, 627, 658, 628, 659, 629, 660, - 630, 661, 631, 662, 632, 663, 633, 664, 634, 665, 635, 666, - 636, 667, 637, 668, 638, 669, 639, 670, - 640, 640, 641, 672, 642, 673, 643, 674, 644, 675, 645, 676, - 646, 677, 647, 678, 648, 679, 649, 680, 650, 681, 651, 682, - 652, 683, 653, 684, 654, 685, 655, 686, - 656, 687, 657, 688, 658, 689, 659, 690, 660, 691, 661, 692, - 662, 693, 663, 694, 664, 695, 665, 696, 666, 697, 667, 698, - 668, 699, 669, 700, 670, 701, 671, 702, - 672, 672, 673, 704, 674, 705, 675, 706, 676, 707, 677, 708, - 678, 709, 679, 710, 680, 711, 681, 712, 682, 713, 683, 714, - 684, 715, 685, 716, 686, 717, 687, 718, - 688, 719, 689, 720, 690, 721, 691, 722, 692, 723, 693, 724, - 694, 725, 695, 726, 696, 727, 697, 728, 698, 729, 699, 730, - 700, 731, 701, 732, 702, 733, 703, 734, - 704, 704, 705, 736, 706, 737, 707, 738, 708, 739, 709, 740, - 710, 741, 711, 742, 712, 743, 713, 744, 714, 745, 715, 746, - 716, 747, 717, 748, 718, 749, 719, 750, - 720, 751, 721, 752, 722, 753, 723, 754, 724, 755, 725, 756, - 726, 757, 727, 758, 728, 759, 729, 760, 730, 761, 731, 762, - 732, 763, 733, 764, 734, 765, 735, 766, - 736, 736, 737, 768, 738, 769, 739, 770, 740, 771, 741, 772, - 742, 773, 743, 774, 744, 775, 745, 776, 746, 777, 747, 778, - 748, 779, 749, 780, 750, 781, 751, 782, - 752, 783, 753, 784, 754, 785, 755, 786, 756, 787, 757, 788, - 758, 789, 759, 790, 760, 791, 761, 792, 762, 793, 763, 794, - 764, 795, 765, 796, 766, 797, 767, 798, - 768, 768, 769, 800, 770, 801, 771, 802, 772, 803, 773, 804, - 774, 805, 775, 806, 776, 807, 777, 808, 778, 809, 779, 810, - 780, 811, 781, 812, 782, 813, 783, 814, - 784, 815, 785, 816, 786, 817, 787, 818, 788, 819, 789, 820, - 790, 821, 791, 822, 792, 823, 793, 824, 794, 825, 795, 826, - 796, 827, 797, 828, 798, 829, 799, 830, - 800, 800, 801, 832, 802, 833, 803, 834, 804, 835, 805, 836, - 806, 837, 807, 838, 808, 839, 809, 840, 810, 841, 811, 842, - 812, 843, 813, 844, 814, 845, 815, 846, - 816, 847, 817, 848, 818, 849, 819, 850, 820, 851, 821, 852, - 822, 853, 823, 854, 824, 855, 825, 856, 826, 857, 827, 858, - 828, 859, 829, 860, 830, 861, 831, 862, - 832, 832, 833, 864, 834, 865, 835, 866, 836, 867, 837, 868, - 838, 869, 839, 870, 840, 871, 841, 872, 842, 873, 843, 874, - 844, 875, 845, 876, 846, 877, 847, 878, - 848, 879, 849, 880, 850, 881, 851, 882, 852, 883, 853, 884, - 854, 885, 855, 886, 856, 887, 857, 888, 858, 889, 859, 890, - 860, 891, 861, 892, 862, 893, 863, 894, - 864, 864, 865, 896, 866, 897, 867, 898, 868, 899, 869, 900, - 870, 901, 871, 902, 872, 903, 873, 904, 874, 905, 875, 906, - 876, 907, 877, 908, 878, 909, 879, 910, - 880, 911, 881, 912, 882, 913, 883, 914, 884, 915, 885, 916, - 886, 917, 887, 918, 888, 919, 889, 920, 890, 921, 891, 922, - 892, 923, 893, 924, 894, 925, 895, 926, - 896, 896, 897, 928, 898, 929, 899, 930, 900, 931, 901, 932, - 902, 933, 903, 934, 904, 935, 905, 936, 906, 937, 907, 938, - 908, 939, 909, 940, 910, 941, 911, 942, - 912, 943, 913, 944, 914, 945, 915, 946, 916, 947, 917, 948, - 918, 949, 919, 950, 920, 951, 921, 952, 922, 953, 923, 954, - 924, 955, 925, 956, 926, 957, 927, 958, - 928, 928, 929, 960, 930, 961, 931, 962, 932, 963, 933, 964, - 934, 965, 935, 966, 936, 967, 937, 968, 938, 969, 939, 970, - 940, 971, 941, 972, 942, 973, 943, 974, - 944, 975, 945, 976, 946, 977, 947, 978, 948, 979, 949, 980, - 950, 981, 951, 982, 952, 983, 953, 984, 954, 985, 955, 986, - 956, 987, 957, 988, 958, 989, 959, 990, - 960, 960, 961, 992, 962, 993, 963, 994, 964, 995, 965, 996, - 966, 997, 967, 998, 968, 999, 969, 1000, 970, 1001, 971, 1002, - 972, 1003, 973, 1004, 974, 1005, 975, 1006, - 976, 1007, 977, 1008, 978, 1009, 979, 1010, 980, 1011, 981, - 1012, 982, 1013, 983, 1014, 984, 1015, 985, 1016, 986, 1017, - 987, 1018, 988, 1019, 989, 1020, 990, 1021, 991, 1022, - 0, 0, + 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, + 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, + 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, + 20, 20, 21, 21, 22, 22, 23, 23, 24, 24, 25, 25, 26, 26, + 27, 27, 28, 28, 29, 29, 30, 30, 0, 0, 1, 32, 2, 33, + 3, 34, 4, 35, 5, 36, 6, 37, 7, 38, 8, 39, 9, 40, + 10, 41, 11, 42, 12, 43, 13, 44, 14, 45, 15, 46, 16, 47, + 17, 48, 18, 49, 19, 50, 20, 51, 21, 52, 22, 53, 23, 54, + 24, 55, 25, 56, 26, 57, 27, 58, 28, 59, 29, 60, 30, 61, + 31, 62, 32, 32, 33, 64, 34, 65, 35, 66, 36, 67, 37, 68, + 38, 69, 39, 70, 40, 71, 41, 72, 42, 73, 43, 74, 44, 75, + 45, 76, 46, 77, 47, 78, 48, 79, 49, 80, 50, 81, 51, 82, + 52, 83, 53, 84, 54, 85, 55, 86, 56, 87, 57, 88, 58, 89, + 59, 90, 60, 91, 61, 92, 62, 93, 63, 94, 64, 64, 65, 96, + 66, 97, 67, 98, 68, 99, 69, 100, 70, 101, 71, 102, 72, 103, + 73, 104, 74, 105, 75, 106, 76, 107, 77, 108, 78, 109, 79, 110, + 80, 111, 81, 112, 82, 113, 83, 114, 84, 115, 85, 116, 86, 117, + 87, 118, 88, 119, 89, 120, 90, 121, 91, 122, 92, 123, 93, 124, + 94, 125, 95, 126, 96, 96, 97, 128, 98, 129, 99, 130, 100, 131, + 101, 132, 102, 133, 103, 134, 104, 135, 105, 136, 106, 137, 107, 138, + 108, 139, 109, 140, 110, 141, 111, 142, 112, 143, 113, 144, 114, 145, + 115, 146, 116, 147, 117, 148, 118, 149, 119, 150, 120, 151, 121, 152, + 122, 153, 123, 154, 124, 155, 125, 156, 126, 157, 127, 158, 128, 128, + 129, 160, 130, 161, 131, 162, 132, 163, 133, 164, 134, 165, 135, 166, + 136, 167, 137, 168, 138, 169, 139, 170, 140, 171, 141, 172, 142, 173, + 143, 174, 144, 175, 145, 176, 146, 177, 147, 178, 148, 179, 149, 180, + 150, 181, 151, 182, 152, 183, 153, 184, 154, 185, 155, 186, 156, 187, + 157, 188, 158, 189, 159, 190, 160, 160, 161, 192, 162, 193, 163, 194, + 164, 195, 165, 196, 166, 197, 167, 198, 168, 199, 169, 200, 170, 201, + 171, 202, 172, 203, 173, 204, 174, 205, 175, 206, 176, 207, 177, 208, + 178, 209, 179, 210, 180, 211, 181, 212, 182, 213, 183, 214, 184, 215, + 185, 216, 186, 217, 187, 218, 188, 219, 189, 220, 190, 221, 191, 222, + 192, 192, 193, 224, 194, 225, 195, 226, 196, 227, 197, 228, 198, 229, + 199, 230, 200, 231, 201, 232, 202, 233, 203, 234, 204, 235, 205, 236, + 206, 237, 207, 238, 208, 239, 209, 240, 210, 241, 211, 242, 212, 243, + 213, 244, 214, 245, 215, 246, 216, 247, 217, 248, 218, 249, 219, 250, + 220, 251, 221, 252, 222, 253, 223, 254, 224, 224, 225, 256, 226, 257, + 227, 258, 228, 259, 229, 260, 230, 261, 231, 262, 232, 263, 233, 264, + 234, 265, 235, 266, 236, 267, 237, 268, 238, 269, 239, 270, 240, 271, + 241, 272, 242, 273, 243, 274, 244, 275, 245, 276, 246, 277, 247, 278, + 248, 279, 249, 280, 250, 281, 251, 282, 252, 283, 253, 284, 254, 285, + 255, 286, 256, 256, 257, 288, 258, 289, 259, 290, 260, 291, 261, 292, + 262, 293, 263, 294, 264, 295, 265, 296, 266, 297, 267, 298, 268, 299, + 269, 300, 270, 301, 271, 302, 272, 303, 273, 304, 274, 305, 275, 306, + 276, 307, 277, 308, 278, 309, 279, 310, 280, 311, 281, 312, 282, 313, + 283, 314, 284, 315, 285, 316, 286, 317, 287, 318, 288, 288, 289, 320, + 290, 321, 291, 322, 292, 323, 293, 324, 294, 325, 295, 326, 296, 327, + 297, 328, 298, 329, 299, 330, 300, 331, 301, 332, 302, 333, 303, 334, + 304, 335, 305, 336, 306, 337, 307, 338, 308, 339, 309, 340, 310, 341, + 311, 342, 312, 343, 313, 344, 314, 345, 315, 346, 316, 347, 317, 348, + 318, 349, 319, 350, 320, 320, 321, 352, 322, 353, 323, 354, 324, 355, + 325, 356, 326, 357, 327, 358, 328, 359, 329, 360, 330, 361, 331, 362, + 332, 363, 333, 364, 334, 365, 335, 366, 336, 367, 337, 368, 338, 369, + 339, 370, 340, 371, 341, 372, 342, 373, 343, 374, 344, 375, 345, 376, + 346, 377, 347, 378, 348, 379, 349, 380, 350, 381, 351, 382, 352, 352, + 353, 384, 354, 385, 355, 386, 356, 387, 357, 388, 358, 389, 359, 390, + 360, 391, 361, 392, 362, 393, 363, 394, 364, 395, 365, 396, 366, 397, + 367, 398, 368, 399, 369, 400, 370, 401, 371, 402, 372, 403, 373, 404, + 374, 405, 375, 406, 376, 407, 377, 408, 378, 409, 379, 410, 380, 411, + 381, 412, 382, 413, 383, 414, 384, 384, 385, 416, 386, 417, 387, 418, + 388, 419, 389, 420, 390, 421, 391, 422, 392, 423, 393, 424, 394, 425, + 395, 426, 396, 427, 397, 428, 398, 429, 399, 430, 400, 431, 401, 432, + 402, 433, 403, 434, 404, 435, 405, 436, 406, 437, 407, 438, 408, 439, + 409, 440, 410, 441, 411, 442, 412, 443, 413, 444, 414, 445, 415, 446, + 416, 416, 417, 448, 418, 449, 419, 450, 420, 451, 421, 452, 422, 453, + 423, 454, 424, 455, 425, 456, 426, 457, 427, 458, 428, 459, 429, 460, + 430, 461, 431, 462, 432, 463, 433, 464, 434, 465, 435, 466, 436, 467, + 437, 468, 438, 469, 439, 470, 440, 471, 441, 472, 442, 473, 443, 474, + 444, 475, 445, 476, 446, 477, 447, 478, 448, 448, 449, 480, 450, 481, + 451, 482, 452, 483, 453, 484, 454, 485, 455, 486, 456, 487, 457, 488, + 458, 489, 459, 490, 460, 491, 461, 492, 462, 493, 463, 494, 464, 495, + 465, 496, 466, 497, 467, 498, 468, 499, 469, 500, 470, 501, 471, 502, + 472, 503, 473, 504, 474, 505, 475, 506, 476, 507, 477, 508, 478, 509, + 479, 510, 480, 480, 481, 512, 482, 513, 483, 514, 484, 515, 485, 516, + 486, 517, 487, 518, 488, 519, 489, 520, 490, 521, 491, 522, 492, 523, + 493, 524, 494, 525, 495, 526, 496, 527, 497, 528, 498, 529, 499, 530, + 500, 531, 501, 532, 502, 533, 503, 534, 504, 535, 505, 536, 506, 537, + 507, 538, 508, 539, 509, 540, 510, 541, 511, 542, 512, 512, 513, 544, + 514, 545, 515, 546, 516, 547, 517, 548, 518, 549, 519, 550, 520, 551, + 521, 552, 522, 553, 523, 554, 524, 555, 525, 556, 526, 557, 527, 558, + 528, 559, 529, 560, 530, 561, 531, 562, 532, 563, 533, 564, 534, 565, + 535, 566, 536, 567, 537, 568, 538, 569, 539, 570, 540, 571, 541, 572, + 542, 573, 543, 574, 544, 544, 545, 576, 546, 577, 547, 578, 548, 579, + 549, 580, 550, 581, 551, 582, 552, 583, 553, 584, 554, 585, 555, 586, + 556, 587, 557, 588, 558, 589, 559, 590, 560, 591, 561, 592, 562, 593, + 563, 594, 564, 595, 565, 596, 566, 597, 567, 598, 568, 599, 569, 600, + 570, 601, 571, 602, 572, 603, 573, 604, 574, 605, 575, 606, 576, 576, + 577, 608, 578, 609, 579, 610, 580, 611, 581, 612, 582, 613, 583, 614, + 584, 615, 585, 616, 586, 617, 587, 618, 588, 619, 589, 620, 590, 621, + 591, 622, 592, 623, 593, 624, 594, 625, 595, 626, 596, 627, 597, 628, + 598, 629, 599, 630, 600, 631, 601, 632, 602, 633, 603, 634, 604, 635, + 605, 636, 606, 637, 607, 638, 608, 608, 609, 640, 610, 641, 611, 642, + 612, 643, 613, 644, 614, 645, 615, 646, 616, 647, 617, 648, 618, 649, + 619, 650, 620, 651, 621, 652, 622, 653, 623, 654, 624, 655, 625, 656, + 626, 657, 627, 658, 628, 659, 629, 660, 630, 661, 631, 662, 632, 663, + 633, 664, 634, 665, 635, 666, 636, 667, 637, 668, 638, 669, 639, 670, + 640, 640, 641, 672, 642, 673, 643, 674, 644, 675, 645, 676, 646, 677, + 647, 678, 648, 679, 649, 680, 650, 681, 651, 682, 652, 683, 653, 684, + 654, 685, 655, 686, 656, 687, 657, 688, 658, 689, 659, 690, 660, 691, + 661, 692, 662, 693, 663, 694, 664, 695, 665, 696, 666, 697, 667, 698, + 668, 699, 669, 700, 670, 701, 671, 702, 672, 672, 673, 704, 674, 705, + 675, 706, 676, 707, 677, 708, 678, 709, 679, 710, 680, 711, 681, 712, + 682, 713, 683, 714, 684, 715, 685, 716, 686, 717, 687, 718, 688, 719, + 689, 720, 690, 721, 691, 722, 692, 723, 693, 724, 694, 725, 695, 726, + 696, 727, 697, 728, 698, 729, 699, 730, 700, 731, 701, 732, 702, 733, + 703, 734, 704, 704, 705, 736, 706, 737, 707, 738, 708, 739, 709, 740, + 710, 741, 711, 742, 712, 743, 713, 744, 714, 745, 715, 746, 716, 747, + 717, 748, 718, 749, 719, 750, 720, 751, 721, 752, 722, 753, 723, 754, + 724, 755, 725, 756, 726, 757, 727, 758, 728, 759, 729, 760, 730, 761, + 731, 762, 732, 763, 733, 764, 734, 765, 735, 766, 736, 736, 737, 768, + 738, 769, 739, 770, 740, 771, 741, 772, 742, 773, 743, 774, 744, 775, + 745, 776, 746, 777, 747, 778, 748, 779, 749, 780, 750, 781, 751, 782, + 752, 783, 753, 784, 754, 785, 755, 786, 756, 787, 757, 788, 758, 789, + 759, 790, 760, 791, 761, 792, 762, 793, 763, 794, 764, 795, 765, 796, + 766, 797, 767, 798, 768, 768, 769, 800, 770, 801, 771, 802, 772, 803, + 773, 804, 774, 805, 775, 806, 776, 807, 777, 808, 778, 809, 779, 810, + 780, 811, 781, 812, 782, 813, 783, 814, 784, 815, 785, 816, 786, 817, + 787, 818, 788, 819, 789, 820, 790, 821, 791, 822, 792, 823, 793, 824, + 794, 825, 795, 826, 796, 827, 797, 828, 798, 829, 799, 830, 800, 800, + 801, 832, 802, 833, 803, 834, 804, 835, 805, 836, 806, 837, 807, 838, + 808, 839, 809, 840, 810, 841, 811, 842, 812, 843, 813, 844, 814, 845, + 815, 846, 816, 847, 817, 848, 818, 849, 819, 850, 820, 851, 821, 852, + 822, 853, 823, 854, 824, 855, 825, 856, 826, 857, 827, 858, 828, 859, + 829, 860, 830, 861, 831, 862, 832, 832, 833, 864, 834, 865, 835, 866, + 836, 867, 837, 868, 838, 869, 839, 870, 840, 871, 841, 872, 842, 873, + 843, 874, 844, 875, 845, 876, 846, 877, 847, 878, 848, 879, 849, 880, + 850, 881, 851, 882, 852, 883, 853, 884, 854, 885, 855, 886, 856, 887, + 857, 888, 858, 889, 859, 890, 860, 891, 861, 892, 862, 893, 863, 894, + 864, 864, 865, 896, 866, 897, 867, 898, 868, 899, 869, 900, 870, 901, + 871, 902, 872, 903, 873, 904, 874, 905, 875, 906, 876, 907, 877, 908, + 878, 909, 879, 910, 880, 911, 881, 912, 882, 913, 883, 914, 884, 915, + 885, 916, 886, 917, 887, 918, 888, 919, 889, 920, 890, 921, 891, 922, + 892, 923, 893, 924, 894, 925, 895, 926, 896, 896, 897, 928, 898, 929, + 899, 930, 900, 931, 901, 932, 902, 933, 903, 934, 904, 935, 905, 936, + 906, 937, 907, 938, 908, 939, 909, 940, 910, 941, 911, 942, 912, 943, + 913, 944, 914, 945, 915, 946, 916, 947, 917, 948, 918, 949, 919, 950, + 920, 951, 921, 952, 922, 953, 923, 954, 924, 955, 925, 956, 926, 957, + 927, 958, 928, 928, 929, 960, 930, 961, 931, 962, 932, 963, 933, 964, + 934, 965, 935, 966, 936, 967, 937, 968, 938, 969, 939, 970, 940, 971, + 941, 972, 942, 973, 943, 974, 944, 975, 945, 976, 946, 977, 947, 978, + 948, 979, 949, 980, 950, 981, 951, 982, 952, 983, 953, 984, 954, 985, + 955, 986, 956, 987, 957, 988, 958, 989, 959, 990, 960, 960, 961, 992, + 962, 993, 963, 994, 964, 995, 965, 996, 966, 997, 967, 998, 968, 999, + 969, 1000, 970, 1001, 971, 1002, 972, 1003, 973, 1004, 974, 1005, 975, 1006, + 976, 1007, 977, 1008, 978, 1009, 979, 1010, 980, 1011, 981, 1012, 982, 1013, + 983, 1014, 984, 1015, 985, 1016, 986, 1017, 987, 1018, 988, 1019, 989, 1020, + 990, 1021, 991, 1022, 0, 0, }; #endif // CONFIG_EXT_TX DECLARE_ALIGNED(16, static const int16_t, default_scan_32x32_neighbors[1025 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 32, 0, 32, 32, 1, 32, 33, 1, - 64, 64, 33, 64, 2, 33, 96, 96, 34, 2, 65, 96, - 34, 65, 128, 128, 97, 128, 3, 34, 66, 97, 35, 3, - 35, 66, 98, 129, 129, 160, 160, 161, 4, 35, 67, 98, - 192, 192, 36, 4, 130, 161, 161, 192, 36, 67, 99, 130, - 5, 36, 68, 99, 193, 224, 162, 193, 224, 225, 131, 162, - 37, 68, 100, 131, 37, 5, 194, 225, 225, 256, 256, 257, - 163, 194, 69, 100, 132, 163, 6, 37, 226, 257, 38, 6, - 195, 226, 257, 288, 101, 132, 288, 289, 38, 69, 164, 195, - 133, 164, 258, 289, 227, 258, 196, 227, 7, 38, 289, 320, - 70, 101, 320, 321, 39, 7, 165, 196, 39, 70, 102, 133, - 290, 321, 259, 290, 228, 259, 321, 352, 352, 353, 197, 228, - 134, 165, 71, 102, 8, 39, 322, 353, 291, 322, 260, 291, - 103, 134, 353, 384, 166, 197, 229, 260, 40, 71, 40, 8, - 384, 385, 135, 166, 354, 385, 323, 354, 198, 229, 292, 323, - 72, 103, 261, 292, 9, 40, 385, 416, 167, 198, 104, 135, - 230, 261, 355, 386, 416, 417, 293, 324, 324, 355, 41, 9, - 41, 72, 386, 417, 199, 230, 136, 167, 417, 448, 262, 293, - 356, 387, 73, 104, 387, 418, 231, 262, 10, 41, 168, 199, - 325, 356, 418, 449, 105, 136, 448, 449, 42, 73, 294, 325, - 200, 231, 42, 10, 357, 388, 137, 168, 263, 294, 388, 419, - 74, 105, 419, 450, 449, 480, 326, 357, 232, 263, 295, 326, - 169, 200, 11, 42, 106, 137, 480, 481, 450, 481, 358, 389, - 264, 295, 201, 232, 138, 169, 389, 420, 43, 74, 420, 451, - 327, 358, 43, 11, 481, 512, 233, 264, 451, 482, 296, 327, - 75, 106, 170, 201, 482, 513, 512, 513, 390, 421, 359, 390, - 421, 452, 107, 138, 12, 43, 202, 233, 452, 483, 265, 296, - 328, 359, 139, 170, 44, 75, 483, 514, 513, 544, 234, 265, - 297, 328, 422, 453, 44, 12, 391, 422, 171, 202, 76, 107, - 514, 545, 453, 484, 544, 545, 266, 297, 203, 234, 108, 139, - 329, 360, 298, 329, 140, 171, 515, 546, 13, 44, 423, 454, - 235, 266, 545, 576, 454, 485, 45, 76, 172, 203, 330, 361, - 576, 577, 45, 13, 267, 298, 546, 577, 77, 108, 204, 235, - 455, 486, 577, 608, 299, 330, 109, 140, 547, 578, 14, 45, - 46, 14, 141, 172, 578, 609, 331, 362, 46, 77, 173, 204, - 15, 15, 78, 109, 205, 236, 579, 610, 110, 141, 15, 46, - 142, 173, 47, 78, 174, 205, 16, 16, 79, 110, 206, 237, - 16, 47, 111, 142, 48, 79, 143, 174, 80, 111, 175, 206, - 17, 48, 49, 17, 207, 238, 49, 80, 81, 112, 18, 18, - 18, 49, 50, 81, 82, 113, 19, 50, 51, 82, 83, 114, - 608, 609, 484, 515, 360, 391, 236, 267, 112, 143, 51, 19, - 640, 640, 609, 640, 516, 547, 485, 516, 392, 423, 361, 392, - 268, 299, 237, 268, 144, 175, 113, 144, 20, 51, 52, 20, - 672, 672, 641, 672, 610, 641, 548, 579, 517, 548, 486, 517, - 424, 455, 393, 424, 362, 393, 300, 331, 269, 300, 238, 269, - 176, 207, 145, 176, 114, 145, 52, 83, 21, 52, 53, 21, - 704, 704, 673, 704, 642, 673, 611, 642, 580, 611, 549, 580, - 518, 549, 487, 518, 456, 487, 425, 456, 394, 425, 363, 394, - 332, 363, 301, 332, 270, 301, 239, 270, 208, 239, 177, 208, - 146, 177, 115, 146, 84, 115, 53, 84, 22, 53, 54, 22, - 705, 736, 674, 705, 643, 674, 581, 612, 550, 581, 519, 550, - 457, 488, 426, 457, 395, 426, 333, 364, 302, 333, 271, 302, - 209, 240, 178, 209, 147, 178, 85, 116, 54, 85, 23, 54, - 706, 737, 675, 706, 582, 613, 551, 582, 458, 489, 427, 458, - 334, 365, 303, 334, 210, 241, 179, 210, 86, 117, 55, 86, - 707, 738, 583, 614, 459, 490, 335, 366, 211, 242, 87, 118, - 736, 737, 612, 643, 488, 519, 364, 395, 240, 271, 116, 147, - 55, 23, 768, 768, 737, 768, 644, 675, 613, 644, 520, 551, - 489, 520, 396, 427, 365, 396, 272, 303, 241, 272, 148, 179, - 117, 148, 24, 55, 56, 24, 800, 800, 769, 800, 738, 769, - 676, 707, 645, 676, 614, 645, 552, 583, 521, 552, 490, 521, - 428, 459, 397, 428, 366, 397, 304, 335, 273, 304, 242, 273, - 180, 211, 149, 180, 118, 149, 56, 87, 25, 56, 57, 25, - 832, 832, 801, 832, 770, 801, 739, 770, 708, 739, 677, 708, - 646, 677, 615, 646, 584, 615, 553, 584, 522, 553, 491, 522, - 460, 491, 429, 460, 398, 429, 367, 398, 336, 367, 305, 336, - 274, 305, 243, 274, 212, 243, 181, 212, 150, 181, 119, 150, - 88, 119, 57, 88, 26, 57, 58, 26, 833, 864, 802, 833, - 771, 802, 709, 740, 678, 709, 647, 678, 585, 616, 554, 585, - 523, 554, 461, 492, 430, 461, 399, 430, 337, 368, 306, 337, - 275, 306, 213, 244, 182, 213, 151, 182, 89, 120, 58, 89, - 27, 58, 834, 865, 803, 834, 710, 741, 679, 710, 586, 617, - 555, 586, 462, 493, 431, 462, 338, 369, 307, 338, 214, 245, - 183, 214, 90, 121, 59, 90, 835, 866, 711, 742, 587, 618, - 463, 494, 339, 370, 215, 246, 91, 122, 864, 865, 740, 771, - 616, 647, 492, 523, 368, 399, 244, 275, 120, 151, 59, 27, - 896, 896, 865, 896, 772, 803, 741, 772, 648, 679, 617, 648, - 524, 555, 493, 524, 400, 431, 369, 400, 276, 307, 245, 276, - 152, 183, 121, 152, 28, 59, 60, 28, 928, 928, 897, 928, - 866, 897, 804, 835, 773, 804, 742, 773, 680, 711, 649, 680, - 618, 649, 556, 587, 525, 556, 494, 525, 432, 463, 401, 432, - 370, 401, 308, 339, 277, 308, 246, 277, 184, 215, 153, 184, - 122, 153, 60, 91, 29, 60, 61, 29, 960, 960, 929, 960, - 898, 929, 867, 898, 836, 867, 805, 836, 774, 805, 743, 774, - 712, 743, 681, 712, 650, 681, 619, 650, 588, 619, 557, 588, - 526, 557, 495, 526, 464, 495, 433, 464, 402, 433, 371, 402, - 340, 371, 309, 340, 278, 309, 247, 278, 216, 247, 185, 216, - 154, 185, 123, 154, 92, 123, 61, 92, 30, 61, 62, 30, - 961, 992, 930, 961, 899, 930, 837, 868, 806, 837, 775, 806, - 713, 744, 682, 713, 651, 682, 589, 620, 558, 589, 527, 558, - 465, 496, 434, 465, 403, 434, 341, 372, 310, 341, 279, 310, - 217, 248, 186, 217, 155, 186, 93, 124, 62, 93, 31, 62, - 962, 993, 931, 962, 838, 869, 807, 838, 714, 745, 683, 714, - 590, 621, 559, 590, 466, 497, 435, 466, 342, 373, 311, 342, - 218, 249, 187, 218, 94, 125, 63, 94, 963, 994, 839, 870, - 715, 746, 591, 622, 467, 498, 343, 374, 219, 250, 95, 126, - 868, 899, 744, 775, 620, 651, 496, 527, 372, 403, 248, 279, - 124, 155, 900, 931, 869, 900, 776, 807, 745, 776, 652, 683, - 621, 652, 528, 559, 497, 528, 404, 435, 373, 404, 280, 311, - 249, 280, 156, 187, 125, 156, 932, 963, 901, 932, 870, 901, - 808, 839, 777, 808, 746, 777, 684, 715, 653, 684, 622, 653, - 560, 591, 529, 560, 498, 529, 436, 467, 405, 436, 374, 405, - 312, 343, 281, 312, 250, 281, 188, 219, 157, 188, 126, 157, - 964, 995, 933, 964, 902, 933, 871, 902, 840, 871, 809, 840, - 778, 809, 747, 778, 716, 747, 685, 716, 654, 685, 623, 654, - 592, 623, 561, 592, 530, 561, 499, 530, 468, 499, 437, 468, - 406, 437, 375, 406, 344, 375, 313, 344, 282, 313, 251, 282, - 220, 251, 189, 220, 158, 189, 127, 158, 965, 996, 934, 965, - 903, 934, 841, 872, 810, 841, 779, 810, 717, 748, 686, 717, - 655, 686, 593, 624, 562, 593, 531, 562, 469, 500, 438, 469, - 407, 438, 345, 376, 314, 345, 283, 314, 221, 252, 190, 221, - 159, 190, 966, 997, 935, 966, 842, 873, 811, 842, 718, 749, - 687, 718, 594, 625, 563, 594, 470, 501, 439, 470, 346, 377, - 315, 346, 222, 253, 191, 222, 967, 998, 843, 874, 719, 750, - 595, 626, 471, 502, 347, 378, 223, 254, 872, 903, 748, 779, - 624, 655, 500, 531, 376, 407, 252, 283, 904, 935, 873, 904, - 780, 811, 749, 780, 656, 687, 625, 656, 532, 563, 501, 532, - 408, 439, 377, 408, 284, 315, 253, 284, 936, 967, 905, 936, - 874, 905, 812, 843, 781, 812, 750, 781, 688, 719, 657, 688, - 626, 657, 564, 595, 533, 564, 502, 533, 440, 471, 409, 440, - 378, 409, 316, 347, 285, 316, 254, 285, 968, 999, 937, 968, - 906, 937, 875, 906, 844, 875, 813, 844, 782, 813, 751, 782, - 720, 751, 689, 720, 658, 689, 627, 658, 596, 627, 565, 596, - 534, 565, 503, 534, 472, 503, 441, 472, 410, 441, 379, 410, - 348, 379, 317, 348, 286, 317, 255, 286, 969, 1000, 938, 969, - 907, 938, 845, 876, 814, 845, 783, 814, 721, 752, 690, 721, - 659, 690, 597, 628, 566, 597, 535, 566, 473, 504, 442, 473, - 411, 442, 349, 380, 318, 349, 287, 318, 970, 1001, 939, 970, - 846, 877, 815, 846, 722, 753, 691, 722, 598, 629, 567, 598, - 474, 505, 443, 474, 350, 381, 319, 350, 971, 1002, 847, 878, - 723, 754, 599, 630, 475, 506, 351, 382, 876, 907, 752, 783, - 628, 659, 504, 535, 380, 411, 908, 939, 877, 908, 784, 815, - 753, 784, 660, 691, 629, 660, 536, 567, 505, 536, 412, 443, - 381, 412, 940, 971, 909, 940, 878, 909, 816, 847, 785, 816, - 754, 785, 692, 723, 661, 692, 630, 661, 568, 599, 537, 568, - 506, 537, 444, 475, 413, 444, 382, 413, 972, 1003, 941, 972, - 910, 941, 879, 910, 848, 879, 817, 848, 786, 817, 755, 786, - 724, 755, 693, 724, 662, 693, 631, 662, 600, 631, 569, 600, - 538, 569, 507, 538, 476, 507, 445, 476, 414, 445, 383, 414, - 973, 1004, 942, 973, 911, 942, 849, 880, 818, 849, 787, 818, - 725, 756, 694, 725, 663, 694, 601, 632, 570, 601, 539, 570, - 477, 508, 446, 477, 415, 446, 974, 1005, 943, 974, 850, 881, - 819, 850, 726, 757, 695, 726, 602, 633, 571, 602, 478, 509, - 447, 478, 975, 1006, 851, 882, 727, 758, 603, 634, 479, 510, - 880, 911, 756, 787, 632, 663, 508, 539, 912, 943, 881, 912, - 788, 819, 757, 788, 664, 695, 633, 664, 540, 571, 509, 540, - 944, 975, 913, 944, 882, 913, 820, 851, 789, 820, 758, 789, - 696, 727, 665, 696, 634, 665, 572, 603, 541, 572, 510, 541, - 976, 1007, 945, 976, 914, 945, 883, 914, 852, 883, 821, 852, - 790, 821, 759, 790, 728, 759, 697, 728, 666, 697, 635, 666, - 604, 635, 573, 604, 542, 573, 511, 542, 977, 1008, 946, 977, - 915, 946, 853, 884, 822, 853, 791, 822, 729, 760, 698, 729, - 667, 698, 605, 636, 574, 605, 543, 574, 978, 1009, 947, 978, - 854, 885, 823, 854, 730, 761, 699, 730, 606, 637, 575, 606, - 979, 1010, 855, 886, 731, 762, 607, 638, 884, 915, 760, 791, - 636, 667, 916, 947, 885, 916, 792, 823, 761, 792, 668, 699, - 637, 668, 948, 979, 917, 948, 886, 917, 824, 855, 793, 824, - 762, 793, 700, 731, 669, 700, 638, 669, 980, 1011, 949, 980, - 918, 949, 887, 918, 856, 887, 825, 856, 794, 825, 763, 794, - 732, 763, 701, 732, 670, 701, 639, 670, 981, 1012, 950, 981, - 919, 950, 857, 888, 826, 857, 795, 826, 733, 764, 702, 733, - 671, 702, 982, 1013, 951, 982, 858, 889, 827, 858, 734, 765, - 703, 734, 983, 1014, 859, 890, 735, 766, 888, 919, 764, 795, - 920, 951, 889, 920, 796, 827, 765, 796, 952, 983, 921, 952, - 890, 921, 828, 859, 797, 828, 766, 797, 984, 1015, 953, 984, - 922, 953, 891, 922, 860, 891, 829, 860, 798, 829, 767, 798, - 985, 1016, 954, 985, 923, 954, 861, 892, 830, 861, 799, 830, - 986, 1017, 955, 986, 862, 893, 831, 862, 987, 1018, 863, 894, - 892, 923, 924, 955, 893, 924, 956, 987, 925, 956, 894, 925, - 988, 1019, 957, 988, 926, 957, 895, 926, 989, 1020, 958, 989, - 927, 958, 990, 1021, 959, 990, 991, 1022, 0, 0, + 0, 0, 0, 0, 32, 0, 32, 32, 1, 32, 33, 1, 64, 64, + 33, 64, 2, 33, 96, 96, 34, 2, 65, 96, 34, 65, 128, 128, + 97, 128, 3, 34, 66, 97, 35, 3, 35, 66, 98, 129, 129, 160, + 160, 161, 4, 35, 67, 98, 192, 192, 36, 4, 130, 161, 161, 192, + 36, 67, 99, 130, 5, 36, 68, 99, 193, 224, 162, 193, 224, 225, + 131, 162, 37, 68, 100, 131, 37, 5, 194, 225, 225, 256, 256, 257, + 163, 194, 69, 100, 132, 163, 6, 37, 226, 257, 38, 6, 195, 226, + 257, 288, 101, 132, 288, 289, 38, 69, 164, 195, 133, 164, 258, 289, + 227, 258, 196, 227, 7, 38, 289, 320, 70, 101, 320, 321, 39, 7, + 165, 196, 39, 70, 102, 133, 290, 321, 259, 290, 228, 259, 321, 352, + 352, 353, 197, 228, 134, 165, 71, 102, 8, 39, 322, 353, 291, 322, + 260, 291, 103, 134, 353, 384, 166, 197, 229, 260, 40, 71, 40, 8, + 384, 385, 135, 166, 354, 385, 323, 354, 198, 229, 292, 323, 72, 103, + 261, 292, 9, 40, 385, 416, 167, 198, 104, 135, 230, 261, 355, 386, + 416, 417, 293, 324, 324, 355, 41, 9, 41, 72, 386, 417, 199, 230, + 136, 167, 417, 448, 262, 293, 356, 387, 73, 104, 387, 418, 231, 262, + 10, 41, 168, 199, 325, 356, 418, 449, 105, 136, 448, 449, 42, 73, + 294, 325, 200, 231, 42, 10, 357, 388, 137, 168, 263, 294, 388, 419, + 74, 105, 419, 450, 449, 480, 326, 357, 232, 263, 295, 326, 169, 200, + 11, 42, 106, 137, 480, 481, 450, 481, 358, 389, 264, 295, 201, 232, + 138, 169, 389, 420, 43, 74, 420, 451, 327, 358, 43, 11, 481, 512, + 233, 264, 451, 482, 296, 327, 75, 106, 170, 201, 482, 513, 512, 513, + 390, 421, 359, 390, 421, 452, 107, 138, 12, 43, 202, 233, 452, 483, + 265, 296, 328, 359, 139, 170, 44, 75, 483, 514, 513, 544, 234, 265, + 297, 328, 422, 453, 44, 12, 391, 422, 171, 202, 76, 107, 514, 545, + 453, 484, 544, 545, 266, 297, 203, 234, 108, 139, 329, 360, 298, 329, + 140, 171, 515, 546, 13, 44, 423, 454, 235, 266, 545, 576, 454, 485, + 45, 76, 172, 203, 330, 361, 576, 577, 45, 13, 267, 298, 546, 577, + 77, 108, 204, 235, 455, 486, 577, 608, 299, 330, 109, 140, 547, 578, + 14, 45, 46, 14, 141, 172, 578, 609, 331, 362, 46, 77, 173, 204, + 15, 15, 78, 109, 205, 236, 579, 610, 110, 141, 15, 46, 142, 173, + 47, 78, 174, 205, 16, 16, 79, 110, 206, 237, 16, 47, 111, 142, + 48, 79, 143, 174, 80, 111, 175, 206, 17, 48, 49, 17, 207, 238, + 49, 80, 81, 112, 18, 18, 18, 49, 50, 81, 82, 113, 19, 50, + 51, 82, 83, 114, 608, 609, 484, 515, 360, 391, 236, 267, 112, 143, + 51, 19, 640, 640, 609, 640, 516, 547, 485, 516, 392, 423, 361, 392, + 268, 299, 237, 268, 144, 175, 113, 144, 20, 51, 52, 20, 672, 672, + 641, 672, 610, 641, 548, 579, 517, 548, 486, 517, 424, 455, 393, 424, + 362, 393, 300, 331, 269, 300, 238, 269, 176, 207, 145, 176, 114, 145, + 52, 83, 21, 52, 53, 21, 704, 704, 673, 704, 642, 673, 611, 642, + 580, 611, 549, 580, 518, 549, 487, 518, 456, 487, 425, 456, 394, 425, + 363, 394, 332, 363, 301, 332, 270, 301, 239, 270, 208, 239, 177, 208, + 146, 177, 115, 146, 84, 115, 53, 84, 22, 53, 54, 22, 705, 736, + 674, 705, 643, 674, 581, 612, 550, 581, 519, 550, 457, 488, 426, 457, + 395, 426, 333, 364, 302, 333, 271, 302, 209, 240, 178, 209, 147, 178, + 85, 116, 54, 85, 23, 54, 706, 737, 675, 706, 582, 613, 551, 582, + 458, 489, 427, 458, 334, 365, 303, 334, 210, 241, 179, 210, 86, 117, + 55, 86, 707, 738, 583, 614, 459, 490, 335, 366, 211, 242, 87, 118, + 736, 737, 612, 643, 488, 519, 364, 395, 240, 271, 116, 147, 55, 23, + 768, 768, 737, 768, 644, 675, 613, 644, 520, 551, 489, 520, 396, 427, + 365, 396, 272, 303, 241, 272, 148, 179, 117, 148, 24, 55, 56, 24, + 800, 800, 769, 800, 738, 769, 676, 707, 645, 676, 614, 645, 552, 583, + 521, 552, 490, 521, 428, 459, 397, 428, 366, 397, 304, 335, 273, 304, + 242, 273, 180, 211, 149, 180, 118, 149, 56, 87, 25, 56, 57, 25, + 832, 832, 801, 832, 770, 801, 739, 770, 708, 739, 677, 708, 646, 677, + 615, 646, 584, 615, 553, 584, 522, 553, 491, 522, 460, 491, 429, 460, + 398, 429, 367, 398, 336, 367, 305, 336, 274, 305, 243, 274, 212, 243, + 181, 212, 150, 181, 119, 150, 88, 119, 57, 88, 26, 57, 58, 26, + 833, 864, 802, 833, 771, 802, 709, 740, 678, 709, 647, 678, 585, 616, + 554, 585, 523, 554, 461, 492, 430, 461, 399, 430, 337, 368, 306, 337, + 275, 306, 213, 244, 182, 213, 151, 182, 89, 120, 58, 89, 27, 58, + 834, 865, 803, 834, 710, 741, 679, 710, 586, 617, 555, 586, 462, 493, + 431, 462, 338, 369, 307, 338, 214, 245, 183, 214, 90, 121, 59, 90, + 835, 866, 711, 742, 587, 618, 463, 494, 339, 370, 215, 246, 91, 122, + 864, 865, 740, 771, 616, 647, 492, 523, 368, 399, 244, 275, 120, 151, + 59, 27, 896, 896, 865, 896, 772, 803, 741, 772, 648, 679, 617, 648, + 524, 555, 493, 524, 400, 431, 369, 400, 276, 307, 245, 276, 152, 183, + 121, 152, 28, 59, 60, 28, 928, 928, 897, 928, 866, 897, 804, 835, + 773, 804, 742, 773, 680, 711, 649, 680, 618, 649, 556, 587, 525, 556, + 494, 525, 432, 463, 401, 432, 370, 401, 308, 339, 277, 308, 246, 277, + 184, 215, 153, 184, 122, 153, 60, 91, 29, 60, 61, 29, 960, 960, + 929, 960, 898, 929, 867, 898, 836, 867, 805, 836, 774, 805, 743, 774, + 712, 743, 681, 712, 650, 681, 619, 650, 588, 619, 557, 588, 526, 557, + 495, 526, 464, 495, 433, 464, 402, 433, 371, 402, 340, 371, 309, 340, + 278, 309, 247, 278, 216, 247, 185, 216, 154, 185, 123, 154, 92, 123, + 61, 92, 30, 61, 62, 30, 961, 992, 930, 961, 899, 930, 837, 868, + 806, 837, 775, 806, 713, 744, 682, 713, 651, 682, 589, 620, 558, 589, + 527, 558, 465, 496, 434, 465, 403, 434, 341, 372, 310, 341, 279, 310, + 217, 248, 186, 217, 155, 186, 93, 124, 62, 93, 31, 62, 962, 993, + 931, 962, 838, 869, 807, 838, 714, 745, 683, 714, 590, 621, 559, 590, + 466, 497, 435, 466, 342, 373, 311, 342, 218, 249, 187, 218, 94, 125, + 63, 94, 963, 994, 839, 870, 715, 746, 591, 622, 467, 498, 343, 374, + 219, 250, 95, 126, 868, 899, 744, 775, 620, 651, 496, 527, 372, 403, + 248, 279, 124, 155, 900, 931, 869, 900, 776, 807, 745, 776, 652, 683, + 621, 652, 528, 559, 497, 528, 404, 435, 373, 404, 280, 311, 249, 280, + 156, 187, 125, 156, 932, 963, 901, 932, 870, 901, 808, 839, 777, 808, + 746, 777, 684, 715, 653, 684, 622, 653, 560, 591, 529, 560, 498, 529, + 436, 467, 405, 436, 374, 405, 312, 343, 281, 312, 250, 281, 188, 219, + 157, 188, 126, 157, 964, 995, 933, 964, 902, 933, 871, 902, 840, 871, + 809, 840, 778, 809, 747, 778, 716, 747, 685, 716, 654, 685, 623, 654, + 592, 623, 561, 592, 530, 561, 499, 530, 468, 499, 437, 468, 406, 437, + 375, 406, 344, 375, 313, 344, 282, 313, 251, 282, 220, 251, 189, 220, + 158, 189, 127, 158, 965, 996, 934, 965, 903, 934, 841, 872, 810, 841, + 779, 810, 717, 748, 686, 717, 655, 686, 593, 624, 562, 593, 531, 562, + 469, 500, 438, 469, 407, 438, 345, 376, 314, 345, 283, 314, 221, 252, + 190, 221, 159, 190, 966, 997, 935, 966, 842, 873, 811, 842, 718, 749, + 687, 718, 594, 625, 563, 594, 470, 501, 439, 470, 346, 377, 315, 346, + 222, 253, 191, 222, 967, 998, 843, 874, 719, 750, 595, 626, 471, 502, + 347, 378, 223, 254, 872, 903, 748, 779, 624, 655, 500, 531, 376, 407, + 252, 283, 904, 935, 873, 904, 780, 811, 749, 780, 656, 687, 625, 656, + 532, 563, 501, 532, 408, 439, 377, 408, 284, 315, 253, 284, 936, 967, + 905, 936, 874, 905, 812, 843, 781, 812, 750, 781, 688, 719, 657, 688, + 626, 657, 564, 595, 533, 564, 502, 533, 440, 471, 409, 440, 378, 409, + 316, 347, 285, 316, 254, 285, 968, 999, 937, 968, 906, 937, 875, 906, + 844, 875, 813, 844, 782, 813, 751, 782, 720, 751, 689, 720, 658, 689, + 627, 658, 596, 627, 565, 596, 534, 565, 503, 534, 472, 503, 441, 472, + 410, 441, 379, 410, 348, 379, 317, 348, 286, 317, 255, 286, 969, 1000, + 938, 969, 907, 938, 845, 876, 814, 845, 783, 814, 721, 752, 690, 721, + 659, 690, 597, 628, 566, 597, 535, 566, 473, 504, 442, 473, 411, 442, + 349, 380, 318, 349, 287, 318, 970, 1001, 939, 970, 846, 877, 815, 846, + 722, 753, 691, 722, 598, 629, 567, 598, 474, 505, 443, 474, 350, 381, + 319, 350, 971, 1002, 847, 878, 723, 754, 599, 630, 475, 506, 351, 382, + 876, 907, 752, 783, 628, 659, 504, 535, 380, 411, 908, 939, 877, 908, + 784, 815, 753, 784, 660, 691, 629, 660, 536, 567, 505, 536, 412, 443, + 381, 412, 940, 971, 909, 940, 878, 909, 816, 847, 785, 816, 754, 785, + 692, 723, 661, 692, 630, 661, 568, 599, 537, 568, 506, 537, 444, 475, + 413, 444, 382, 413, 972, 1003, 941, 972, 910, 941, 879, 910, 848, 879, + 817, 848, 786, 817, 755, 786, 724, 755, 693, 724, 662, 693, 631, 662, + 600, 631, 569, 600, 538, 569, 507, 538, 476, 507, 445, 476, 414, 445, + 383, 414, 973, 1004, 942, 973, 911, 942, 849, 880, 818, 849, 787, 818, + 725, 756, 694, 725, 663, 694, 601, 632, 570, 601, 539, 570, 477, 508, + 446, 477, 415, 446, 974, 1005, 943, 974, 850, 881, 819, 850, 726, 757, + 695, 726, 602, 633, 571, 602, 478, 509, 447, 478, 975, 1006, 851, 882, + 727, 758, 603, 634, 479, 510, 880, 911, 756, 787, 632, 663, 508, 539, + 912, 943, 881, 912, 788, 819, 757, 788, 664, 695, 633, 664, 540, 571, + 509, 540, 944, 975, 913, 944, 882, 913, 820, 851, 789, 820, 758, 789, + 696, 727, 665, 696, 634, 665, 572, 603, 541, 572, 510, 541, 976, 1007, + 945, 976, 914, 945, 883, 914, 852, 883, 821, 852, 790, 821, 759, 790, + 728, 759, 697, 728, 666, 697, 635, 666, 604, 635, 573, 604, 542, 573, + 511, 542, 977, 1008, 946, 977, 915, 946, 853, 884, 822, 853, 791, 822, + 729, 760, 698, 729, 667, 698, 605, 636, 574, 605, 543, 574, 978, 1009, + 947, 978, 854, 885, 823, 854, 730, 761, 699, 730, 606, 637, 575, 606, + 979, 1010, 855, 886, 731, 762, 607, 638, 884, 915, 760, 791, 636, 667, + 916, 947, 885, 916, 792, 823, 761, 792, 668, 699, 637, 668, 948, 979, + 917, 948, 886, 917, 824, 855, 793, 824, 762, 793, 700, 731, 669, 700, + 638, 669, 980, 1011, 949, 980, 918, 949, 887, 918, 856, 887, 825, 856, + 794, 825, 763, 794, 732, 763, 701, 732, 670, 701, 639, 670, 981, 1012, + 950, 981, 919, 950, 857, 888, 826, 857, 795, 826, 733, 764, 702, 733, + 671, 702, 982, 1013, 951, 982, 858, 889, 827, 858, 734, 765, 703, 734, + 983, 1014, 859, 890, 735, 766, 888, 919, 764, 795, 920, 951, 889, 920, + 796, 827, 765, 796, 952, 983, 921, 952, 890, 921, 828, 859, 797, 828, + 766, 797, 984, 1015, 953, 984, 922, 953, 891, 922, 860, 891, 829, 860, + 798, 829, 767, 798, 985, 1016, 954, 985, 923, 954, 861, 892, 830, 861, + 799, 830, 986, 1017, 955, 986, 862, 893, 831, 862, 987, 1018, 863, 894, + 892, 923, 924, 955, 893, 924, 956, 987, 925, 956, 894, 925, 988, 1019, + 957, 988, 926, 957, 895, 926, 989, 1020, 958, 989, 927, 958, 990, 1021, + 959, 990, 991, 1022, 0, 0, }; #if CONFIG_EXT_TX DECLARE_ALIGNED(16, static const int16_t, v2_scan_32x32_neighbors[1025 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 0, 0, 1, 32, 1, 1, 32, 32, - 2, 33, 33, 64, 34, 65, 2, 2, 64, 64, 3, 34, - 65, 96, 35, 66, 66, 97, 3, 3, 96, 96, 4, 35, - 97, 128, 67, 98, 36, 67, 98, 129, 4, 4, 68, 99, - 99, 130, 128, 128, 5, 36, 129, 160, 37, 68, 130, 161, - 100, 131, 69, 100, 131, 162, 5, 5, 160, 160, 6, 37, - 161, 192, 38, 69, 162, 193, 101, 132, 132, 163, 70, 101, - 163, 194, 6, 6, 192, 192, 7, 38, 133, 164, 193, 224, - 102, 133, 164, 195, 39, 70, 194, 225, 71, 102, 195, 226, - 134, 165, 165, 196, 7, 7, 224, 224, 8, 39, 103, 134, - 196, 227, 225, 256, 40, 71, 226, 257, 166, 197, 72, 103, - 227, 258, 135, 166, 197, 228, 104, 135, 228, 259, 8, 8, - 256, 256, 9, 40, 257, 288, 41, 72, 167, 198, 198, 229, - 258, 289, 136, 167, 229, 260, 73, 104, 259, 290, 105, 136, - 260, 291, 199, 230, 9, 9, 168, 199, 230, 261, 288, 288, - 10, 41, 289, 320, 42, 73, 290, 321, 137, 168, 261, 292, - 74, 105, 291, 322, 200, 231, 231, 262, 106, 137, 292, 323, - 169, 200, 262, 293, 10, 10, 320, 320, 11, 42, 321, 352, - 43, 74, 138, 169, 293, 324, 322, 353, 232, 263, 75, 106, - 201, 232, 263, 294, 323, 354, 170, 201, 294, 325, 107, 138, - 324, 355, 11, 11, 352, 352, 12, 43, 233, 264, 264, 295, - 353, 384, 139, 170, 325, 356, 44, 75, 354, 385, 202, 233, - 295, 326, 76, 107, 355, 386, 171, 202, 326, 357, 108, 139, - 356, 387, 265, 296, 234, 265, 296, 327, 12, 12, 140, 171, - 357, 388, 384, 384, 13, 44, 203, 234, 327, 358, 385, 416, - 45, 76, 386, 417, 77, 108, 387, 418, 172, 203, 358, 389, - 266, 297, 297, 328, 109, 140, 235, 266, 328, 359, 388, 419, - 204, 235, 359, 390, 141, 172, 389, 420, 13, 13, 416, 416, - 14, 45, 417, 448, 46, 77, 298, 329, 418, 449, 267, 298, - 329, 360, 78, 109, 173, 204, 390, 421, 419, 450, 236, 267, - 360, 391, 110, 141, 420, 451, 205, 236, 391, 422, 142, 173, - 299, 330, 330, 361, 421, 452, 14, 14, 268, 299, 361, 392, - 448, 448, 15, 46, 449, 480, 47, 78, 450, 481, 174, 205, - 422, 453, 237, 268, 392, 423, 79, 110, 451, 482, 111, 142, - 452, 483, 331, 362, 300, 331, 362, 393, 206, 237, 423, 454, - 143, 174, 269, 300, 393, 424, 453, 484, 480, 480, 481, 512, - 238, 269, 424, 455, 482, 513, 175, 206, 454, 485, 332, 363, - 363, 394, 483, 514, 301, 332, 394, 425, 484, 515, 207, 238, - 455, 486, 270, 301, 425, 456, 485, 516, 364, 395, 239, 270, - 456, 487, 512, 512, 333, 364, 395, 426, 513, 544, 486, 517, - 514, 545, 302, 333, 426, 457, 515, 546, 487, 518, 516, 547, - 271, 302, 457, 488, 365, 396, 396, 427, 517, 548, 334, 365, - 427, 458, 488, 519, 544, 544, 303, 334, 458, 489, 518, 549, - 545, 576, 546, 577, 547, 578, 489, 520, 397, 428, 519, 550, - 366, 397, 428, 459, 548, 579, 335, 366, 459, 490, 549, 580, - 520, 551, 490, 521, 550, 581, 576, 576, 577, 608, 398, 429, - 429, 460, 578, 609, 367, 398, 460, 491, 521, 552, 579, 610, - 551, 582, 491, 522, 580, 611, 581, 612, 552, 583, 522, 553, - 430, 461, 399, 430, 461, 492, 582, 613, 492, 523, 608, 608, - 609, 640, 610, 641, 553, 584, 611, 642, 523, 554, 583, 614, - 612, 643, 431, 462, 462, 493, 554, 585, 493, 524, 584, 615, - 613, 644, 524, 555, 614, 645, 640, 640, 585, 616, 641, 672, - 555, 586, 642, 673, 615, 646, 463, 494, 643, 674, 494, 525, - 644, 675, 525, 556, 586, 617, 616, 647, 645, 676, 556, 587, - 646, 677, 495, 526, 617, 648, 587, 618, 672, 672, 526, 557, - 673, 704, 674, 705, 647, 678, 557, 588, 675, 706, 618, 649, - 676, 707, 588, 619, 648, 679, 677, 708, 527, 558, 558, 589, - 678, 709, 619, 650, 649, 680, 704, 704, 589, 620, 705, 736, - 679, 710, 706, 737, 707, 738, 650, 681, 620, 651, 708, 739, - 680, 711, 559, 590, 709, 740, 590, 621, 651, 682, 681, 712, - 710, 741, 621, 652, 736, 736, 737, 768, 711, 742, 738, 769, - 682, 713, 652, 683, 739, 770, 591, 622, 740, 771, 712, 743, - 622, 653, 741, 772, 683, 714, 653, 684, 713, 744, 742, 773, - 623, 654, 743, 774, 768, 768, 769, 800, 684, 715, 714, 745, - 770, 801, 771, 802, 654, 685, 744, 775, 772, 803, 715, 746, - 773, 804, 685, 716, 745, 776, 774, 805, 655, 686, 716, 747, - 775, 806, 746, 777, 800, 800, 801, 832, 686, 717, 802, 833, - 803, 834, 776, 807, 804, 835, 747, 778, 717, 748, 805, 836, - 777, 808, 687, 718, 806, 837, 748, 779, 718, 749, 778, 809, - 807, 838, 832, 832, 833, 864, 834, 865, 835, 866, 808, 839, - 749, 780, 836, 867, 779, 810, 719, 750, 837, 868, 809, 840, - 838, 869, 780, 811, 750, 781, 810, 841, 839, 870, 864, 864, - 865, 896, 866, 897, 840, 871, 867, 898, 781, 812, 811, 842, - 868, 899, 751, 782, 869, 900, 841, 872, 812, 843, 870, 901, - 782, 813, 842, 873, 871, 902, 896, 896, 897, 928, 813, 844, - 898, 929, 872, 903, 783, 814, 843, 874, 899, 930, 900, 931, - 873, 904, 901, 932, 814, 845, 844, 875, 902, 933, 874, 905, - 903, 934, 845, 876, 928, 928, 815, 846, 929, 960, 930, 961, - 875, 906, 904, 935, 931, 962, 932, 963, 905, 936, 846, 877, - 933, 964, 876, 907, 934, 965, 906, 937, 935, 966, 877, 908, - 847, 878, 960, 960, 907, 938, 961, 992, 936, 967, 962, 993, - 963, 994, 964, 995, 878, 909, 937, 968, 908, 939, 965, 996, - 966, 997, 938, 969, 879, 910, 909, 940, 967, 998, 939, 970, - 968, 999, 910, 941, 969, 1000, 940, 971, 970, 1001, 911, 942, - 941, 972, 971, 1002, 942, 973, 972, 1003, 943, 974, 973, 1004, - 974, 1005, 975, 1006, 15, 15, 16, 47, 48, 79, 80, 111, - 112, 143, 144, 175, 16, 16, 17, 48, 176, 207, 49, 80, - 81, 112, 113, 144, 208, 239, 145, 176, 240, 271, 17, 17, - 18, 49, 177, 208, 50, 81, 82, 113, 272, 303, 209, 240, - 114, 145, 146, 177, 241, 272, 304, 335, 178, 209, 18, 18, - 19, 50, 51, 82, 83, 114, 273, 304, 210, 241, 115, 146, - 336, 367, 147, 178, 242, 273, 305, 336, 179, 210, 19, 19, - 368, 399, 20, 51, 52, 83, 274, 305, 84, 115, 211, 242, - 337, 368, 116, 147, 306, 337, 148, 179, 243, 274, 400, 431, - 369, 400, 180, 211, 20, 20, 21, 52, 275, 306, 53, 84, - 338, 369, 212, 243, 85, 116, 432, 463, 117, 148, 401, 432, - 307, 338, 244, 275, 149, 180, 370, 401, 181, 212, 276, 307, - 464, 495, 339, 370, 21, 21, 22, 53, 433, 464, 54, 85, - 213, 244, 86, 117, 402, 433, 118, 149, 308, 339, 245, 276, - 371, 402, 150, 181, 496, 527, 465, 496, 182, 213, 434, 465, - 340, 371, 277, 308, 22, 22, 23, 54, 403, 434, 55, 86, - 214, 245, 87, 118, 309, 340, 372, 403, 119, 150, 497, 528, - 528, 559, 246, 277, 466, 497, 151, 182, 435, 466, 341, 372, - 183, 214, 278, 309, 404, 435, 23, 23, 24, 55, 215, 246, - 529, 560, 56, 87, 498, 529, 560, 591, 310, 341, 88, 119, - 373, 404, 467, 498, 120, 151, 247, 278, 436, 467, 152, 183, - 342, 373, 279, 310, 405, 436, 184, 215, 530, 561, 561, 592, - 499, 530, 592, 623, 24, 24, 216, 247, 468, 499, 25, 56, - 374, 405, 57, 88, 311, 342, 89, 120, 437, 468, 248, 279, - 121, 152, 562, 593, 153, 184, 343, 374, 531, 562, 593, 624, - 406, 437, 500, 531, 624, 655, 280, 311, 185, 216, 469, 500, - 375, 406, 217, 248, 25, 25, 312, 343, 26, 57, 58, 89, - 438, 469, 90, 121, 563, 594, 594, 625, 249, 280, 532, 563, - 625, 656, 122, 153, 344, 375, 501, 532, 656, 687, 407, 438, - 154, 185, 281, 312, 470, 501, 186, 217, 376, 407, 595, 626, - 564, 595, 626, 657, 218, 249, 313, 344, 439, 470, 26, 26, - 27, 58, 533, 564, 657, 688, 59, 90, 91, 122, 250, 281, - 502, 533, 688, 719, 123, 154, 408, 439, 345, 376, 155, 186, - 471, 502, 282, 313, 596, 627, 627, 658, 187, 218, 565, 596, - 658, 689, 377, 408, 440, 471, 534, 565, 689, 720, 314, 345, - 219, 250, 27, 27, 28, 59, 503, 534, 720, 751, 60, 91, - 92, 123, 251, 282, 409, 440, 346, 377, 124, 155, 628, 659, - 472, 503, 597, 628, 659, 690, 566, 597, 690, 721, 156, 187, - 283, 314, 535, 566, 721, 752, 188, 219, 378, 409, 441, 472, - 315, 346, 504, 535, 752, 783, 220, 251, 28, 28, 629, 660, - 660, 691, 29, 60, 61, 92, 410, 441, 598, 629, 691, 722, - 252, 283, 93, 124, 347, 378, 473, 504, 567, 598, 722, 753, - 125, 156, 284, 315, 536, 567, 753, 784, 157, 188, 442, 473, - 379, 410, 189, 220, 505, 536, 784, 815, 661, 692, 316, 347, - 630, 661, 692, 723, 221, 252, 599, 630, 723, 754, 411, 442, - 29, 29, 568, 599, 754, 785, 30, 61, 474, 505, 62, 93, - 253, 284, 348, 379, 94, 125, 537, 568, 785, 816, 126, 157, - 285, 316, 158, 189, 443, 474, 662, 693, 693, 724, 380, 411, - 631, 662, 724, 755, 506, 537, 816, 847, 190, 221, 600, 631, - 755, 786, 317, 348, 222, 253, 569, 600, 786, 817, 412, 443, - 475, 506, 30, 30, 31, 62, 349, 380, 254, 285, 63, 94, - 538, 569, 817, 848, 694, 725, 95, 126, 663, 694, 725, 756, - 632, 663, 756, 787, 127, 158, 444, 475, 286, 317, 381, 412, - 507, 538, 848, 879, 159, 190, 601, 632, 787, 818, 191, 222, - 318, 349, 570, 601, 818, 849, 476, 507, 223, 254, 413, 444, - 695, 726, 726, 757, 664, 695, 757, 788, 539, 570, 849, 880, - 350, 381, 255, 286, 633, 664, 788, 819, 445, 476, 602, 633, - 819, 850, 508, 539, 880, 911, 287, 318, 382, 413, 571, 602, - 850, 881, 727, 758, 696, 727, 758, 789, 319, 350, 477, 508, - 665, 696, 789, 820, 414, 445, 540, 571, 881, 912, 634, 665, - 820, 851, 351, 382, 603, 634, 851, 882, 446, 477, 509, 540, - 912, 943, 383, 414, 728, 759, 759, 790, 572, 603, 882, 913, - 697, 728, 790, 821, 666, 697, 821, 852, 478, 509, 635, 666, - 852, 883, 415, 446, 541, 572, 913, 944, 604, 635, 883, 914, - 760, 791, 729, 760, 791, 822, 510, 541, 944, 975, 447, 478, - 698, 729, 822, 853, 573, 604, 914, 945, 667, 698, 853, 884, - 636, 667, 884, 915, 479, 510, 542, 573, 945, 976, 761, 792, - 792, 823, 605, 636, 915, 946, 730, 761, 823, 854, 699, 730, - 854, 885, 511, 542, 976, 1007, 574, 605, 946, 977, 668, 699, - 885, 916, 637, 668, 916, 947, 543, 574, 793, 824, 977, 1008, - 762, 793, 824, 855, 731, 762, 855, 886, 606, 637, 947, 978, - 700, 731, 886, 917, 669, 700, 917, 948, 575, 606, 978, 1009, - 638, 669, 948, 979, 794, 825, 825, 856, 763, 794, 856, 887, - 732, 763, 887, 918, 607, 638, 979, 1010, 701, 732, 918, 949, - 670, 701, 949, 980, 826, 857, 795, 826, 857, 888, 764, 795, - 888, 919, 639, 670, 980, 1011, 733, 764, 919, 950, 702, 733, - 950, 981, 671, 702, 981, 1012, 827, 858, 858, 889, 796, 827, - 889, 920, 765, 796, 920, 951, 734, 765, 951, 982, 703, 734, - 982, 1013, 859, 890, 828, 859, 890, 921, 797, 828, 921, 952, - 766, 797, 952, 983, 735, 766, 983, 1014, 860, 891, 891, 922, - 829, 860, 922, 953, 798, 829, 953, 984, 767, 798, 984, 1015, - 892, 923, 861, 892, 923, 954, 830, 861, 954, 985, 799, 830, - 985, 1016, 893, 924, 924, 955, 862, 893, 955, 986, 831, 862, - 986, 1017, 925, 956, 894, 925, 956, 987, 863, 894, 987, 1018, - 926, 957, 957, 988, 895, 926, 988, 1019, 958, 989, 927, 958, - 989, 1020, 959, 990, 990, 1021, 991, 1022, 0, 0, + 0, 0, 0, 0, 0, 0, 1, 32, 1, 1, 32, 32, 2, 33, + 33, 64, 34, 65, 2, 2, 64, 64, 3, 34, 65, 96, 35, 66, + 66, 97, 3, 3, 96, 96, 4, 35, 97, 128, 67, 98, 36, 67, + 98, 129, 4, 4, 68, 99, 99, 130, 128, 128, 5, 36, 129, 160, + 37, 68, 130, 161, 100, 131, 69, 100, 131, 162, 5, 5, 160, 160, + 6, 37, 161, 192, 38, 69, 162, 193, 101, 132, 132, 163, 70, 101, + 163, 194, 6, 6, 192, 192, 7, 38, 133, 164, 193, 224, 102, 133, + 164, 195, 39, 70, 194, 225, 71, 102, 195, 226, 134, 165, 165, 196, + 7, 7, 224, 224, 8, 39, 103, 134, 196, 227, 225, 256, 40, 71, + 226, 257, 166, 197, 72, 103, 227, 258, 135, 166, 197, 228, 104, 135, + 228, 259, 8, 8, 256, 256, 9, 40, 257, 288, 41, 72, 167, 198, + 198, 229, 258, 289, 136, 167, 229, 260, 73, 104, 259, 290, 105, 136, + 260, 291, 199, 230, 9, 9, 168, 199, 230, 261, 288, 288, 10, 41, + 289, 320, 42, 73, 290, 321, 137, 168, 261, 292, 74, 105, 291, 322, + 200, 231, 231, 262, 106, 137, 292, 323, 169, 200, 262, 293, 10, 10, + 320, 320, 11, 42, 321, 352, 43, 74, 138, 169, 293, 324, 322, 353, + 232, 263, 75, 106, 201, 232, 263, 294, 323, 354, 170, 201, 294, 325, + 107, 138, 324, 355, 11, 11, 352, 352, 12, 43, 233, 264, 264, 295, + 353, 384, 139, 170, 325, 356, 44, 75, 354, 385, 202, 233, 295, 326, + 76, 107, 355, 386, 171, 202, 326, 357, 108, 139, 356, 387, 265, 296, + 234, 265, 296, 327, 12, 12, 140, 171, 357, 388, 384, 384, 13, 44, + 203, 234, 327, 358, 385, 416, 45, 76, 386, 417, 77, 108, 387, 418, + 172, 203, 358, 389, 266, 297, 297, 328, 109, 140, 235, 266, 328, 359, + 388, 419, 204, 235, 359, 390, 141, 172, 389, 420, 13, 13, 416, 416, + 14, 45, 417, 448, 46, 77, 298, 329, 418, 449, 267, 298, 329, 360, + 78, 109, 173, 204, 390, 421, 419, 450, 236, 267, 360, 391, 110, 141, + 420, 451, 205, 236, 391, 422, 142, 173, 299, 330, 330, 361, 421, 452, + 14, 14, 268, 299, 361, 392, 448, 448, 15, 46, 449, 480, 47, 78, + 450, 481, 174, 205, 422, 453, 237, 268, 392, 423, 79, 110, 451, 482, + 111, 142, 452, 483, 331, 362, 300, 331, 362, 393, 206, 237, 423, 454, + 143, 174, 269, 300, 393, 424, 453, 484, 480, 480, 481, 512, 238, 269, + 424, 455, 482, 513, 175, 206, 454, 485, 332, 363, 363, 394, 483, 514, + 301, 332, 394, 425, 484, 515, 207, 238, 455, 486, 270, 301, 425, 456, + 485, 516, 364, 395, 239, 270, 456, 487, 512, 512, 333, 364, 395, 426, + 513, 544, 486, 517, 514, 545, 302, 333, 426, 457, 515, 546, 487, 518, + 516, 547, 271, 302, 457, 488, 365, 396, 396, 427, 517, 548, 334, 365, + 427, 458, 488, 519, 544, 544, 303, 334, 458, 489, 518, 549, 545, 576, + 546, 577, 547, 578, 489, 520, 397, 428, 519, 550, 366, 397, 428, 459, + 548, 579, 335, 366, 459, 490, 549, 580, 520, 551, 490, 521, 550, 581, + 576, 576, 577, 608, 398, 429, 429, 460, 578, 609, 367, 398, 460, 491, + 521, 552, 579, 610, 551, 582, 491, 522, 580, 611, 581, 612, 552, 583, + 522, 553, 430, 461, 399, 430, 461, 492, 582, 613, 492, 523, 608, 608, + 609, 640, 610, 641, 553, 584, 611, 642, 523, 554, 583, 614, 612, 643, + 431, 462, 462, 493, 554, 585, 493, 524, 584, 615, 613, 644, 524, 555, + 614, 645, 640, 640, 585, 616, 641, 672, 555, 586, 642, 673, 615, 646, + 463, 494, 643, 674, 494, 525, 644, 675, 525, 556, 586, 617, 616, 647, + 645, 676, 556, 587, 646, 677, 495, 526, 617, 648, 587, 618, 672, 672, + 526, 557, 673, 704, 674, 705, 647, 678, 557, 588, 675, 706, 618, 649, + 676, 707, 588, 619, 648, 679, 677, 708, 527, 558, 558, 589, 678, 709, + 619, 650, 649, 680, 704, 704, 589, 620, 705, 736, 679, 710, 706, 737, + 707, 738, 650, 681, 620, 651, 708, 739, 680, 711, 559, 590, 709, 740, + 590, 621, 651, 682, 681, 712, 710, 741, 621, 652, 736, 736, 737, 768, + 711, 742, 738, 769, 682, 713, 652, 683, 739, 770, 591, 622, 740, 771, + 712, 743, 622, 653, 741, 772, 683, 714, 653, 684, 713, 744, 742, 773, + 623, 654, 743, 774, 768, 768, 769, 800, 684, 715, 714, 745, 770, 801, + 771, 802, 654, 685, 744, 775, 772, 803, 715, 746, 773, 804, 685, 716, + 745, 776, 774, 805, 655, 686, 716, 747, 775, 806, 746, 777, 800, 800, + 801, 832, 686, 717, 802, 833, 803, 834, 776, 807, 804, 835, 747, 778, + 717, 748, 805, 836, 777, 808, 687, 718, 806, 837, 748, 779, 718, 749, + 778, 809, 807, 838, 832, 832, 833, 864, 834, 865, 835, 866, 808, 839, + 749, 780, 836, 867, 779, 810, 719, 750, 837, 868, 809, 840, 838, 869, + 780, 811, 750, 781, 810, 841, 839, 870, 864, 864, 865, 896, 866, 897, + 840, 871, 867, 898, 781, 812, 811, 842, 868, 899, 751, 782, 869, 900, + 841, 872, 812, 843, 870, 901, 782, 813, 842, 873, 871, 902, 896, 896, + 897, 928, 813, 844, 898, 929, 872, 903, 783, 814, 843, 874, 899, 930, + 900, 931, 873, 904, 901, 932, 814, 845, 844, 875, 902, 933, 874, 905, + 903, 934, 845, 876, 928, 928, 815, 846, 929, 960, 930, 961, 875, 906, + 904, 935, 931, 962, 932, 963, 905, 936, 846, 877, 933, 964, 876, 907, + 934, 965, 906, 937, 935, 966, 877, 908, 847, 878, 960, 960, 907, 938, + 961, 992, 936, 967, 962, 993, 963, 994, 964, 995, 878, 909, 937, 968, + 908, 939, 965, 996, 966, 997, 938, 969, 879, 910, 909, 940, 967, 998, + 939, 970, 968, 999, 910, 941, 969, 1000, 940, 971, 970, 1001, 911, 942, + 941, 972, 971, 1002, 942, 973, 972, 1003, 943, 974, 973, 1004, 974, 1005, + 975, 1006, 15, 15, 16, 47, 48, 79, 80, 111, 112, 143, 144, 175, + 16, 16, 17, 48, 176, 207, 49, 80, 81, 112, 113, 144, 208, 239, + 145, 176, 240, 271, 17, 17, 18, 49, 177, 208, 50, 81, 82, 113, + 272, 303, 209, 240, 114, 145, 146, 177, 241, 272, 304, 335, 178, 209, + 18, 18, 19, 50, 51, 82, 83, 114, 273, 304, 210, 241, 115, 146, + 336, 367, 147, 178, 242, 273, 305, 336, 179, 210, 19, 19, 368, 399, + 20, 51, 52, 83, 274, 305, 84, 115, 211, 242, 337, 368, 116, 147, + 306, 337, 148, 179, 243, 274, 400, 431, 369, 400, 180, 211, 20, 20, + 21, 52, 275, 306, 53, 84, 338, 369, 212, 243, 85, 116, 432, 463, + 117, 148, 401, 432, 307, 338, 244, 275, 149, 180, 370, 401, 181, 212, + 276, 307, 464, 495, 339, 370, 21, 21, 22, 53, 433, 464, 54, 85, + 213, 244, 86, 117, 402, 433, 118, 149, 308, 339, 245, 276, 371, 402, + 150, 181, 496, 527, 465, 496, 182, 213, 434, 465, 340, 371, 277, 308, + 22, 22, 23, 54, 403, 434, 55, 86, 214, 245, 87, 118, 309, 340, + 372, 403, 119, 150, 497, 528, 528, 559, 246, 277, 466, 497, 151, 182, + 435, 466, 341, 372, 183, 214, 278, 309, 404, 435, 23, 23, 24, 55, + 215, 246, 529, 560, 56, 87, 498, 529, 560, 591, 310, 341, 88, 119, + 373, 404, 467, 498, 120, 151, 247, 278, 436, 467, 152, 183, 342, 373, + 279, 310, 405, 436, 184, 215, 530, 561, 561, 592, 499, 530, 592, 623, + 24, 24, 216, 247, 468, 499, 25, 56, 374, 405, 57, 88, 311, 342, + 89, 120, 437, 468, 248, 279, 121, 152, 562, 593, 153, 184, 343, 374, + 531, 562, 593, 624, 406, 437, 500, 531, 624, 655, 280, 311, 185, 216, + 469, 500, 375, 406, 217, 248, 25, 25, 312, 343, 26, 57, 58, 89, + 438, 469, 90, 121, 563, 594, 594, 625, 249, 280, 532, 563, 625, 656, + 122, 153, 344, 375, 501, 532, 656, 687, 407, 438, 154, 185, 281, 312, + 470, 501, 186, 217, 376, 407, 595, 626, 564, 595, 626, 657, 218, 249, + 313, 344, 439, 470, 26, 26, 27, 58, 533, 564, 657, 688, 59, 90, + 91, 122, 250, 281, 502, 533, 688, 719, 123, 154, 408, 439, 345, 376, + 155, 186, 471, 502, 282, 313, 596, 627, 627, 658, 187, 218, 565, 596, + 658, 689, 377, 408, 440, 471, 534, 565, 689, 720, 314, 345, 219, 250, + 27, 27, 28, 59, 503, 534, 720, 751, 60, 91, 92, 123, 251, 282, + 409, 440, 346, 377, 124, 155, 628, 659, 472, 503, 597, 628, 659, 690, + 566, 597, 690, 721, 156, 187, 283, 314, 535, 566, 721, 752, 188, 219, + 378, 409, 441, 472, 315, 346, 504, 535, 752, 783, 220, 251, 28, 28, + 629, 660, 660, 691, 29, 60, 61, 92, 410, 441, 598, 629, 691, 722, + 252, 283, 93, 124, 347, 378, 473, 504, 567, 598, 722, 753, 125, 156, + 284, 315, 536, 567, 753, 784, 157, 188, 442, 473, 379, 410, 189, 220, + 505, 536, 784, 815, 661, 692, 316, 347, 630, 661, 692, 723, 221, 252, + 599, 630, 723, 754, 411, 442, 29, 29, 568, 599, 754, 785, 30, 61, + 474, 505, 62, 93, 253, 284, 348, 379, 94, 125, 537, 568, 785, 816, + 126, 157, 285, 316, 158, 189, 443, 474, 662, 693, 693, 724, 380, 411, + 631, 662, 724, 755, 506, 537, 816, 847, 190, 221, 600, 631, 755, 786, + 317, 348, 222, 253, 569, 600, 786, 817, 412, 443, 475, 506, 30, 30, + 31, 62, 349, 380, 254, 285, 63, 94, 538, 569, 817, 848, 694, 725, + 95, 126, 663, 694, 725, 756, 632, 663, 756, 787, 127, 158, 444, 475, + 286, 317, 381, 412, 507, 538, 848, 879, 159, 190, 601, 632, 787, 818, + 191, 222, 318, 349, 570, 601, 818, 849, 476, 507, 223, 254, 413, 444, + 695, 726, 726, 757, 664, 695, 757, 788, 539, 570, 849, 880, 350, 381, + 255, 286, 633, 664, 788, 819, 445, 476, 602, 633, 819, 850, 508, 539, + 880, 911, 287, 318, 382, 413, 571, 602, 850, 881, 727, 758, 696, 727, + 758, 789, 319, 350, 477, 508, 665, 696, 789, 820, 414, 445, 540, 571, + 881, 912, 634, 665, 820, 851, 351, 382, 603, 634, 851, 882, 446, 477, + 509, 540, 912, 943, 383, 414, 728, 759, 759, 790, 572, 603, 882, 913, + 697, 728, 790, 821, 666, 697, 821, 852, 478, 509, 635, 666, 852, 883, + 415, 446, 541, 572, 913, 944, 604, 635, 883, 914, 760, 791, 729, 760, + 791, 822, 510, 541, 944, 975, 447, 478, 698, 729, 822, 853, 573, 604, + 914, 945, 667, 698, 853, 884, 636, 667, 884, 915, 479, 510, 542, 573, + 945, 976, 761, 792, 792, 823, 605, 636, 915, 946, 730, 761, 823, 854, + 699, 730, 854, 885, 511, 542, 976, 1007, 574, 605, 946, 977, 668, 699, + 885, 916, 637, 668, 916, 947, 543, 574, 793, 824, 977, 1008, 762, 793, + 824, 855, 731, 762, 855, 886, 606, 637, 947, 978, 700, 731, 886, 917, + 669, 700, 917, 948, 575, 606, 978, 1009, 638, 669, 948, 979, 794, 825, + 825, 856, 763, 794, 856, 887, 732, 763, 887, 918, 607, 638, 979, 1010, + 701, 732, 918, 949, 670, 701, 949, 980, 826, 857, 795, 826, 857, 888, + 764, 795, 888, 919, 639, 670, 980, 1011, 733, 764, 919, 950, 702, 733, + 950, 981, 671, 702, 981, 1012, 827, 858, 858, 889, 796, 827, 889, 920, + 765, 796, 920, 951, 734, 765, 951, 982, 703, 734, 982, 1013, 859, 890, + 828, 859, 890, 921, 797, 828, 921, 952, 766, 797, 952, 983, 735, 766, + 983, 1014, 860, 891, 891, 922, 829, 860, 922, 953, 798, 829, 953, 984, + 767, 798, 984, 1015, 892, 923, 861, 892, 923, 954, 830, 861, 954, 985, + 799, 830, 985, 1016, 893, 924, 924, 955, 862, 893, 955, 986, 831, 862, + 986, 1017, 925, 956, 894, 925, 956, 987, 863, 894, 987, 1018, 926, 957, + 957, 988, 895, 926, 988, 1019, 958, 989, 927, 958, 989, 1020, 959, 990, + 990, 1021, 991, 1022, 0, 0, }; DECLARE_ALIGNED(16, static const int16_t, h2_scan_32x32_neighbors[1025 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 0, 0, 1, 32, 1, 1, 32, 32, - 2, 33, 33, 64, 34, 65, 2, 2, 64, 64, 3, 34, - 65, 96, 35, 66, 66, 97, 3, 3, 96, 96, 4, 35, - 97, 128, 67, 98, 36, 67, 98, 129, 4, 4, 68, 99, - 99, 130, 128, 128, 5, 36, 129, 160, 37, 68, 130, 161, - 100, 131, 69, 100, 131, 162, 5, 5, 160, 160, 6, 37, - 161, 192, 38, 69, 162, 193, 101, 132, 132, 163, 70, 101, - 163, 194, 6, 6, 192, 192, 7, 38, 133, 164, 193, 224, - 102, 133, 164, 195, 39, 70, 194, 225, 71, 102, 195, 226, - 134, 165, 165, 196, 7, 7, 224, 224, 8, 39, 103, 134, - 196, 227, 225, 256, 40, 71, 226, 257, 166, 197, 72, 103, - 227, 258, 135, 166, 197, 228, 104, 135, 228, 259, 8, 8, - 256, 256, 9, 40, 257, 288, 41, 72, 167, 198, 198, 229, - 258, 289, 136, 167, 229, 260, 73, 104, 259, 290, 105, 136, - 260, 291, 199, 230, 9, 9, 168, 199, 230, 261, 288, 288, - 10, 41, 289, 320, 42, 73, 290, 321, 137, 168, 261, 292, - 74, 105, 291, 322, 200, 231, 231, 262, 106, 137, 292, 323, - 169, 200, 262, 293, 10, 10, 320, 320, 11, 42, 321, 352, - 43, 74, 138, 169, 293, 324, 322, 353, 232, 263, 75, 106, - 201, 232, 263, 294, 323, 354, 170, 201, 294, 325, 107, 138, - 324, 355, 11, 11, 352, 352, 12, 43, 233, 264, 264, 295, - 353, 384, 139, 170, 325, 356, 44, 75, 354, 385, 202, 233, - 295, 326, 76, 107, 355, 386, 171, 202, 326, 357, 108, 139, - 356, 387, 265, 296, 234, 265, 296, 327, 12, 12, 140, 171, - 357, 388, 384, 384, 13, 44, 203, 234, 327, 358, 385, 416, - 45, 76, 386, 417, 77, 108, 387, 418, 172, 203, 358, 389, - 266, 297, 297, 328, 109, 140, 235, 266, 328, 359, 388, 419, - 204, 235, 359, 390, 141, 172, 389, 420, 13, 13, 416, 416, - 14, 45, 417, 448, 46, 77, 298, 329, 418, 449, 267, 298, - 329, 360, 78, 109, 173, 204, 390, 421, 419, 450, 236, 267, - 360, 391, 110, 141, 420, 451, 205, 236, 391, 422, 142, 173, - 299, 330, 330, 361, 421, 452, 14, 14, 268, 299, 361, 392, - 448, 448, 15, 46, 449, 480, 47, 78, 450, 481, 174, 205, - 422, 453, 237, 268, 392, 423, 79, 110, 451, 482, 111, 142, - 452, 483, 331, 362, 300, 331, 362, 393, 206, 237, 423, 454, - 143, 174, 269, 300, 393, 424, 453, 484, 15, 15, 16, 47, - 48, 79, 238, 269, 424, 455, 175, 206, 454, 485, 80, 111, - 332, 363, 363, 394, 301, 332, 394, 425, 112, 143, 207, 238, - 455, 486, 270, 301, 425, 456, 144, 175, 364, 395, 16, 16, - 239, 270, 456, 487, 17, 48, 333, 364, 395, 426, 176, 207, - 49, 80, 302, 333, 426, 457, 81, 112, 113, 144, 208, 239, - 271, 302, 457, 488, 365, 396, 396, 427, 145, 176, 334, 365, - 427, 458, 240, 271, 17, 17, 18, 49, 177, 208, 303, 334, - 458, 489, 50, 81, 82, 113, 272, 303, 209, 240, 397, 428, - 114, 145, 366, 397, 428, 459, 335, 366, 459, 490, 146, 177, - 241, 272, 304, 335, 178, 209, 18, 18, 19, 50, 51, 82, - 398, 429, 429, 460, 367, 398, 460, 491, 83, 114, 273, 304, - 210, 241, 115, 146, 336, 367, 147, 178, 242, 273, 305, 336, - 430, 461, 399, 430, 461, 492, 179, 210, 19, 19, 368, 399, - 20, 51, 52, 83, 274, 305, 84, 115, 211, 242, 337, 368, - 116, 147, 431, 462, 462, 493, 306, 337, 148, 179, 243, 274, - 400, 431, 369, 400, 180, 211, 20, 20, 21, 52, 275, 306, - 53, 84, 338, 369, 212, 243, 85, 116, 463, 494, 432, 463, - 117, 148, 401, 432, 307, 338, 244, 275, 149, 180, 370, 401, - 181, 212, 276, 307, 464, 495, 339, 370, 21, 21, 22, 53, - 433, 464, 54, 85, 213, 244, 86, 117, 402, 433, 118, 149, - 308, 339, 245, 276, 371, 402, 150, 181, 465, 496, 182, 213, - 434, 465, 340, 371, 277, 308, 22, 22, 23, 54, 403, 434, - 55, 86, 214, 245, 87, 118, 309, 340, 372, 403, 119, 150, - 246, 277, 466, 497, 151, 182, 435, 466, 341, 372, 183, 214, - 278, 309, 404, 435, 23, 23, 24, 55, 215, 246, 56, 87, - 310, 341, 88, 119, 373, 404, 467, 498, 120, 151, 247, 278, - 436, 467, 152, 183, 342, 373, 279, 310, 405, 436, 184, 215, - 24, 24, 216, 247, 468, 499, 25, 56, 374, 405, 57, 88, - 311, 342, 89, 120, 437, 468, 248, 279, 121, 152, 153, 184, - 343, 374, 406, 437, 280, 311, 185, 216, 469, 500, 375, 406, - 217, 248, 25, 25, 312, 343, 26, 57, 58, 89, 438, 469, - 90, 121, 249, 280, 122, 153, 344, 375, 407, 438, 154, 185, - 281, 312, 470, 501, 186, 217, 376, 407, 218, 249, 313, 344, - 439, 470, 26, 26, 27, 58, 59, 90, 91, 122, 250, 281, - 123, 154, 408, 439, 345, 376, 155, 186, 471, 502, 282, 313, - 187, 218, 377, 408, 440, 471, 314, 345, 219, 250, 27, 27, - 28, 59, 60, 91, 92, 123, 251, 282, 409, 440, 346, 377, - 124, 155, 472, 503, 156, 187, 283, 314, 188, 219, 378, 409, - 441, 472, 315, 346, 220, 251, 28, 28, 29, 60, 61, 92, - 410, 441, 252, 283, 93, 124, 347, 378, 473, 504, 125, 156, - 284, 315, 157, 188, 442, 473, 379, 410, 189, 220, 316, 347, - 221, 252, 411, 442, 29, 29, 30, 61, 474, 505, 62, 93, - 253, 284, 348, 379, 94, 125, 126, 157, 285, 316, 158, 189, - 443, 474, 380, 411, 190, 221, 317, 348, 222, 253, 412, 443, - 475, 506, 30, 30, 31, 62, 349, 380, 254, 285, 63, 94, - 95, 126, 127, 158, 444, 475, 286, 317, 381, 412, 159, 190, - 191, 222, 318, 349, 476, 507, 223, 254, 413, 444, 350, 381, - 255, 286, 445, 476, 287, 318, 382, 413, 319, 350, 477, 508, - 414, 445, 351, 382, 446, 477, 383, 414, 478, 509, 415, 446, - 447, 478, 479, 510, 480, 480, 481, 512, 482, 513, 483, 514, - 484, 515, 485, 516, 512, 512, 513, 544, 486, 517, 514, 545, - 515, 546, 487, 518, 516, 547, 517, 548, 488, 519, 544, 544, - 518, 549, 545, 576, 546, 577, 547, 578, 489, 520, 519, 550, - 548, 579, 549, 580, 520, 551, 490, 521, 550, 581, 576, 576, - 577, 608, 578, 609, 521, 552, 579, 610, 551, 582, 491, 522, - 580, 611, 581, 612, 552, 583, 522, 553, 582, 613, 492, 523, - 608, 608, 609, 640, 610, 641, 553, 584, 611, 642, 523, 554, - 583, 614, 612, 643, 554, 585, 493, 524, 584, 615, 613, 644, - 524, 555, 614, 645, 640, 640, 585, 616, 641, 672, 555, 586, - 642, 673, 615, 646, 643, 674, 494, 525, 644, 675, 525, 556, - 586, 617, 616, 647, 645, 676, 556, 587, 646, 677, 495, 526, - 617, 648, 587, 618, 672, 672, 526, 557, 673, 704, 674, 705, - 647, 678, 557, 588, 675, 706, 618, 649, 676, 707, 588, 619, - 648, 679, 677, 708, 496, 527, 527, 558, 558, 589, 678, 709, - 619, 650, 649, 680, 704, 704, 589, 620, 705, 736, 679, 710, - 706, 737, 707, 738, 650, 681, 620, 651, 497, 528, 528, 559, - 708, 739, 680, 711, 559, 590, 709, 740, 590, 621, 651, 682, - 681, 712, 710, 741, 621, 652, 736, 736, 737, 768, 529, 560, - 711, 742, 498, 529, 560, 591, 738, 769, 682, 713, 652, 683, - 739, 770, 591, 622, 740, 771, 712, 743, 622, 653, 741, 772, - 683, 714, 653, 684, 713, 744, 742, 773, 530, 561, 561, 592, - 499, 530, 592, 623, 623, 654, 743, 774, 768, 768, 769, 800, - 684, 715, 714, 745, 770, 801, 771, 802, 654, 685, 744, 775, - 772, 803, 562, 593, 531, 562, 593, 624, 715, 746, 773, 804, - 685, 716, 500, 531, 624, 655, 745, 776, 774, 805, 655, 686, - 716, 747, 775, 806, 746, 777, 800, 800, 801, 832, 686, 717, - 802, 833, 563, 594, 594, 625, 803, 834, 532, 563, 625, 656, - 776, 807, 804, 835, 501, 532, 656, 687, 747, 778, 717, 748, - 805, 836, 777, 808, 687, 718, 806, 837, 748, 779, 595, 626, - 564, 595, 626, 657, 718, 749, 778, 809, 807, 838, 832, 832, - 533, 564, 657, 688, 833, 864, 834, 865, 835, 866, 502, 533, - 688, 719, 808, 839, 749, 780, 836, 867, 779, 810, 719, 750, - 837, 868, 809, 840, 596, 627, 627, 658, 565, 596, 658, 689, - 838, 869, 780, 811, 750, 781, 534, 565, 689, 720, 810, 841, - 839, 870, 864, 864, 503, 534, 720, 751, 865, 896, 866, 897, - 840, 871, 867, 898, 781, 812, 811, 842, 628, 659, 868, 899, - 751, 782, 597, 628, 659, 690, 566, 597, 690, 721, 869, 900, - 841, 872, 535, 566, 721, 752, 812, 843, 870, 901, 782, 813, - 842, 873, 504, 535, 752, 783, 871, 902, 629, 660, 660, 691, - 896, 896, 897, 928, 598, 629, 691, 722, 813, 844, 898, 929, - 872, 903, 783, 814, 843, 874, 899, 930, 567, 598, 722, 753, - 900, 931, 536, 567, 753, 784, 873, 904, 901, 932, 814, 845, - 844, 875, 902, 933, 505, 536, 784, 815, 661, 692, 630, 661, - 692, 723, 874, 905, 599, 630, 723, 754, 903, 934, 845, 876, - 568, 599, 754, 785, 928, 928, 815, 846, 929, 960, 930, 961, - 875, 906, 904, 935, 931, 962, 537, 568, 785, 816, 932, 963, - 905, 936, 662, 693, 693, 724, 846, 877, 933, 964, 876, 907, - 631, 662, 724, 755, 506, 537, 816, 847, 934, 965, 600, 631, - 755, 786, 906, 937, 569, 600, 786, 817, 935, 966, 877, 908, - 847, 878, 960, 960, 907, 938, 961, 992, 936, 967, 538, 569, - 817, 848, 962, 993, 694, 725, 663, 694, 725, 756, 963, 994, - 632, 663, 756, 787, 964, 995, 878, 909, 937, 968, 507, 538, - 848, 879, 908, 939, 601, 632, 787, 818, 965, 996, 966, 997, - 570, 601, 818, 849, 938, 969, 879, 910, 909, 940, 967, 998, - 695, 726, 726, 757, 664, 695, 757, 788, 539, 570, 849, 880, - 939, 970, 633, 664, 788, 819, 968, 999, 602, 633, 819, 850, - 910, 941, 508, 539, 880, 911, 969, 1000, 940, 971, 571, 602, - 850, 881, 727, 758, 696, 727, 758, 789, 970, 1001, 665, 696, - 789, 820, 911, 942, 941, 972, 540, 571, 881, 912, 634, 665, - 820, 851, 971, 1002, 603, 634, 851, 882, 942, 973, 509, 540, - 912, 943, 728, 759, 759, 790, 972, 1003, 572, 603, 882, 913, - 697, 728, 790, 821, 666, 697, 821, 852, 943, 974, 635, 666, - 852, 883, 541, 572, 913, 944, 973, 1004, 604, 635, 883, 914, - 760, 791, 729, 760, 791, 822, 510, 541, 944, 975, 974, 1005, - 698, 729, 822, 853, 573, 604, 914, 945, 667, 698, 853, 884, - 636, 667, 884, 915, 975, 1006, 542, 573, 945, 976, 761, 792, - 792, 823, 605, 636, 915, 946, 730, 761, 823, 854, 699, 730, - 854, 885, 511, 542, 976, 1007, 574, 605, 946, 977, 668, 699, - 885, 916, 637, 668, 916, 947, 543, 574, 793, 824, 977, 1008, - 762, 793, 824, 855, 731, 762, 855, 886, 606, 637, 947, 978, - 700, 731, 886, 917, 669, 700, 917, 948, 575, 606, 978, 1009, - 638, 669, 948, 979, 794, 825, 825, 856, 763, 794, 856, 887, - 732, 763, 887, 918, 607, 638, 979, 1010, 701, 732, 918, 949, - 670, 701, 949, 980, 826, 857, 795, 826, 857, 888, 764, 795, - 888, 919, 639, 670, 980, 1011, 733, 764, 919, 950, 702, 733, - 950, 981, 671, 702, 981, 1012, 827, 858, 858, 889, 796, 827, - 889, 920, 765, 796, 920, 951, 734, 765, 951, 982, 703, 734, - 982, 1013, 859, 890, 828, 859, 890, 921, 797, 828, 921, 952, - 766, 797, 952, 983, 735, 766, 983, 1014, 860, 891, 891, 922, - 829, 860, 922, 953, 798, 829, 953, 984, 767, 798, 984, 1015, - 892, 923, 861, 892, 923, 954, 830, 861, 954, 985, 799, 830, - 985, 1016, 893, 924, 924, 955, 862, 893, 955, 986, 831, 862, - 986, 1017, 925, 956, 894, 925, 956, 987, 863, 894, 987, 1018, - 926, 957, 957, 988, 895, 926, 988, 1019, 958, 989, 927, 958, - 989, 1020, 959, 990, 990, 1021, 991, 1022, 0, 0 + 0, 0, 0, 0, 0, 0, 1, 32, 1, 1, 32, 32, 2, 33, + 33, 64, 34, 65, 2, 2, 64, 64, 3, 34, 65, 96, 35, 66, + 66, 97, 3, 3, 96, 96, 4, 35, 97, 128, 67, 98, 36, 67, + 98, 129, 4, 4, 68, 99, 99, 130, 128, 128, 5, 36, 129, 160, + 37, 68, 130, 161, 100, 131, 69, 100, 131, 162, 5, 5, 160, 160, + 6, 37, 161, 192, 38, 69, 162, 193, 101, 132, 132, 163, 70, 101, + 163, 194, 6, 6, 192, 192, 7, 38, 133, 164, 193, 224, 102, 133, + 164, 195, 39, 70, 194, 225, 71, 102, 195, 226, 134, 165, 165, 196, + 7, 7, 224, 224, 8, 39, 103, 134, 196, 227, 225, 256, 40, 71, + 226, 257, 166, 197, 72, 103, 227, 258, 135, 166, 197, 228, 104, 135, + 228, 259, 8, 8, 256, 256, 9, 40, 257, 288, 41, 72, 167, 198, + 198, 229, 258, 289, 136, 167, 229, 260, 73, 104, 259, 290, 105, 136, + 260, 291, 199, 230, 9, 9, 168, 199, 230, 261, 288, 288, 10, 41, + 289, 320, 42, 73, 290, 321, 137, 168, 261, 292, 74, 105, 291, 322, + 200, 231, 231, 262, 106, 137, 292, 323, 169, 200, 262, 293, 10, 10, + 320, 320, 11, 42, 321, 352, 43, 74, 138, 169, 293, 324, 322, 353, + 232, 263, 75, 106, 201, 232, 263, 294, 323, 354, 170, 201, 294, 325, + 107, 138, 324, 355, 11, 11, 352, 352, 12, 43, 233, 264, 264, 295, + 353, 384, 139, 170, 325, 356, 44, 75, 354, 385, 202, 233, 295, 326, + 76, 107, 355, 386, 171, 202, 326, 357, 108, 139, 356, 387, 265, 296, + 234, 265, 296, 327, 12, 12, 140, 171, 357, 388, 384, 384, 13, 44, + 203, 234, 327, 358, 385, 416, 45, 76, 386, 417, 77, 108, 387, 418, + 172, 203, 358, 389, 266, 297, 297, 328, 109, 140, 235, 266, 328, 359, + 388, 419, 204, 235, 359, 390, 141, 172, 389, 420, 13, 13, 416, 416, + 14, 45, 417, 448, 46, 77, 298, 329, 418, 449, 267, 298, 329, 360, + 78, 109, 173, 204, 390, 421, 419, 450, 236, 267, 360, 391, 110, 141, + 420, 451, 205, 236, 391, 422, 142, 173, 299, 330, 330, 361, 421, 452, + 14, 14, 268, 299, 361, 392, 448, 448, 15, 46, 449, 480, 47, 78, + 450, 481, 174, 205, 422, 453, 237, 268, 392, 423, 79, 110, 451, 482, + 111, 142, 452, 483, 331, 362, 300, 331, 362, 393, 206, 237, 423, 454, + 143, 174, 269, 300, 393, 424, 453, 484, 15, 15, 16, 47, 48, 79, + 238, 269, 424, 455, 175, 206, 454, 485, 80, 111, 332, 363, 363, 394, + 301, 332, 394, 425, 112, 143, 207, 238, 455, 486, 270, 301, 425, 456, + 144, 175, 364, 395, 16, 16, 239, 270, 456, 487, 17, 48, 333, 364, + 395, 426, 176, 207, 49, 80, 302, 333, 426, 457, 81, 112, 113, 144, + 208, 239, 271, 302, 457, 488, 365, 396, 396, 427, 145, 176, 334, 365, + 427, 458, 240, 271, 17, 17, 18, 49, 177, 208, 303, 334, 458, 489, + 50, 81, 82, 113, 272, 303, 209, 240, 397, 428, 114, 145, 366, 397, + 428, 459, 335, 366, 459, 490, 146, 177, 241, 272, 304, 335, 178, 209, + 18, 18, 19, 50, 51, 82, 398, 429, 429, 460, 367, 398, 460, 491, + 83, 114, 273, 304, 210, 241, 115, 146, 336, 367, 147, 178, 242, 273, + 305, 336, 430, 461, 399, 430, 461, 492, 179, 210, 19, 19, 368, 399, + 20, 51, 52, 83, 274, 305, 84, 115, 211, 242, 337, 368, 116, 147, + 431, 462, 462, 493, 306, 337, 148, 179, 243, 274, 400, 431, 369, 400, + 180, 211, 20, 20, 21, 52, 275, 306, 53, 84, 338, 369, 212, 243, + 85, 116, 463, 494, 432, 463, 117, 148, 401, 432, 307, 338, 244, 275, + 149, 180, 370, 401, 181, 212, 276, 307, 464, 495, 339, 370, 21, 21, + 22, 53, 433, 464, 54, 85, 213, 244, 86, 117, 402, 433, 118, 149, + 308, 339, 245, 276, 371, 402, 150, 181, 465, 496, 182, 213, 434, 465, + 340, 371, 277, 308, 22, 22, 23, 54, 403, 434, 55, 86, 214, 245, + 87, 118, 309, 340, 372, 403, 119, 150, 246, 277, 466, 497, 151, 182, + 435, 466, 341, 372, 183, 214, 278, 309, 404, 435, 23, 23, 24, 55, + 215, 246, 56, 87, 310, 341, 88, 119, 373, 404, 467, 498, 120, 151, + 247, 278, 436, 467, 152, 183, 342, 373, 279, 310, 405, 436, 184, 215, + 24, 24, 216, 247, 468, 499, 25, 56, 374, 405, 57, 88, 311, 342, + 89, 120, 437, 468, 248, 279, 121, 152, 153, 184, 343, 374, 406, 437, + 280, 311, 185, 216, 469, 500, 375, 406, 217, 248, 25, 25, 312, 343, + 26, 57, 58, 89, 438, 469, 90, 121, 249, 280, 122, 153, 344, 375, + 407, 438, 154, 185, 281, 312, 470, 501, 186, 217, 376, 407, 218, 249, + 313, 344, 439, 470, 26, 26, 27, 58, 59, 90, 91, 122, 250, 281, + 123, 154, 408, 439, 345, 376, 155, 186, 471, 502, 282, 313, 187, 218, + 377, 408, 440, 471, 314, 345, 219, 250, 27, 27, 28, 59, 60, 91, + 92, 123, 251, 282, 409, 440, 346, 377, 124, 155, 472, 503, 156, 187, + 283, 314, 188, 219, 378, 409, 441, 472, 315, 346, 220, 251, 28, 28, + 29, 60, 61, 92, 410, 441, 252, 283, 93, 124, 347, 378, 473, 504, + 125, 156, 284, 315, 157, 188, 442, 473, 379, 410, 189, 220, 316, 347, + 221, 252, 411, 442, 29, 29, 30, 61, 474, 505, 62, 93, 253, 284, + 348, 379, 94, 125, 126, 157, 285, 316, 158, 189, 443, 474, 380, 411, + 190, 221, 317, 348, 222, 253, 412, 443, 475, 506, 30, 30, 31, 62, + 349, 380, 254, 285, 63, 94, 95, 126, 127, 158, 444, 475, 286, 317, + 381, 412, 159, 190, 191, 222, 318, 349, 476, 507, 223, 254, 413, 444, + 350, 381, 255, 286, 445, 476, 287, 318, 382, 413, 319, 350, 477, 508, + 414, 445, 351, 382, 446, 477, 383, 414, 478, 509, 415, 446, 447, 478, + 479, 510, 480, 480, 481, 512, 482, 513, 483, 514, 484, 515, 485, 516, + 512, 512, 513, 544, 486, 517, 514, 545, 515, 546, 487, 518, 516, 547, + 517, 548, 488, 519, 544, 544, 518, 549, 545, 576, 546, 577, 547, 578, + 489, 520, 519, 550, 548, 579, 549, 580, 520, 551, 490, 521, 550, 581, + 576, 576, 577, 608, 578, 609, 521, 552, 579, 610, 551, 582, 491, 522, + 580, 611, 581, 612, 552, 583, 522, 553, 582, 613, 492, 523, 608, 608, + 609, 640, 610, 641, 553, 584, 611, 642, 523, 554, 583, 614, 612, 643, + 554, 585, 493, 524, 584, 615, 613, 644, 524, 555, 614, 645, 640, 640, + 585, 616, 641, 672, 555, 586, 642, 673, 615, 646, 643, 674, 494, 525, + 644, 675, 525, 556, 586, 617, 616, 647, 645, 676, 556, 587, 646, 677, + 495, 526, 617, 648, 587, 618, 672, 672, 526, 557, 673, 704, 674, 705, + 647, 678, 557, 588, 675, 706, 618, 649, 676, 707, 588, 619, 648, 679, + 677, 708, 496, 527, 527, 558, 558, 589, 678, 709, 619, 650, 649, 680, + 704, 704, 589, 620, 705, 736, 679, 710, 706, 737, 707, 738, 650, 681, + 620, 651, 497, 528, 528, 559, 708, 739, 680, 711, 559, 590, 709, 740, + 590, 621, 651, 682, 681, 712, 710, 741, 621, 652, 736, 736, 737, 768, + 529, 560, 711, 742, 498, 529, 560, 591, 738, 769, 682, 713, 652, 683, + 739, 770, 591, 622, 740, 771, 712, 743, 622, 653, 741, 772, 683, 714, + 653, 684, 713, 744, 742, 773, 530, 561, 561, 592, 499, 530, 592, 623, + 623, 654, 743, 774, 768, 768, 769, 800, 684, 715, 714, 745, 770, 801, + 771, 802, 654, 685, 744, 775, 772, 803, 562, 593, 531, 562, 593, 624, + 715, 746, 773, 804, 685, 716, 500, 531, 624, 655, 745, 776, 774, 805, + 655, 686, 716, 747, 775, 806, 746, 777, 800, 800, 801, 832, 686, 717, + 802, 833, 563, 594, 594, 625, 803, 834, 532, 563, 625, 656, 776, 807, + 804, 835, 501, 532, 656, 687, 747, 778, 717, 748, 805, 836, 777, 808, + 687, 718, 806, 837, 748, 779, 595, 626, 564, 595, 626, 657, 718, 749, + 778, 809, 807, 838, 832, 832, 533, 564, 657, 688, 833, 864, 834, 865, + 835, 866, 502, 533, 688, 719, 808, 839, 749, 780, 836, 867, 779, 810, + 719, 750, 837, 868, 809, 840, 596, 627, 627, 658, 565, 596, 658, 689, + 838, 869, 780, 811, 750, 781, 534, 565, 689, 720, 810, 841, 839, 870, + 864, 864, 503, 534, 720, 751, 865, 896, 866, 897, 840, 871, 867, 898, + 781, 812, 811, 842, 628, 659, 868, 899, 751, 782, 597, 628, 659, 690, + 566, 597, 690, 721, 869, 900, 841, 872, 535, 566, 721, 752, 812, 843, + 870, 901, 782, 813, 842, 873, 504, 535, 752, 783, 871, 902, 629, 660, + 660, 691, 896, 896, 897, 928, 598, 629, 691, 722, 813, 844, 898, 929, + 872, 903, 783, 814, 843, 874, 899, 930, 567, 598, 722, 753, 900, 931, + 536, 567, 753, 784, 873, 904, 901, 932, 814, 845, 844, 875, 902, 933, + 505, 536, 784, 815, 661, 692, 630, 661, 692, 723, 874, 905, 599, 630, + 723, 754, 903, 934, 845, 876, 568, 599, 754, 785, 928, 928, 815, 846, + 929, 960, 930, 961, 875, 906, 904, 935, 931, 962, 537, 568, 785, 816, + 932, 963, 905, 936, 662, 693, 693, 724, 846, 877, 933, 964, 876, 907, + 631, 662, 724, 755, 506, 537, 816, 847, 934, 965, 600, 631, 755, 786, + 906, 937, 569, 600, 786, 817, 935, 966, 877, 908, 847, 878, 960, 960, + 907, 938, 961, 992, 936, 967, 538, 569, 817, 848, 962, 993, 694, 725, + 663, 694, 725, 756, 963, 994, 632, 663, 756, 787, 964, 995, 878, 909, + 937, 968, 507, 538, 848, 879, 908, 939, 601, 632, 787, 818, 965, 996, + 966, 997, 570, 601, 818, 849, 938, 969, 879, 910, 909, 940, 967, 998, + 695, 726, 726, 757, 664, 695, 757, 788, 539, 570, 849, 880, 939, 970, + 633, 664, 788, 819, 968, 999, 602, 633, 819, 850, 910, 941, 508, 539, + 880, 911, 969, 1000, 940, 971, 571, 602, 850, 881, 727, 758, 696, 727, + 758, 789, 970, 1001, 665, 696, 789, 820, 911, 942, 941, 972, 540, 571, + 881, 912, 634, 665, 820, 851, 971, 1002, 603, 634, 851, 882, 942, 973, + 509, 540, 912, 943, 728, 759, 759, 790, 972, 1003, 572, 603, 882, 913, + 697, 728, 790, 821, 666, 697, 821, 852, 943, 974, 635, 666, 852, 883, + 541, 572, 913, 944, 973, 1004, 604, 635, 883, 914, 760, 791, 729, 760, + 791, 822, 510, 541, 944, 975, 974, 1005, 698, 729, 822, 853, 573, 604, + 914, 945, 667, 698, 853, 884, 636, 667, 884, 915, 975, 1006, 542, 573, + 945, 976, 761, 792, 792, 823, 605, 636, 915, 946, 730, 761, 823, 854, + 699, 730, 854, 885, 511, 542, 976, 1007, 574, 605, 946, 977, 668, 699, + 885, 916, 637, 668, 916, 947, 543, 574, 793, 824, 977, 1008, 762, 793, + 824, 855, 731, 762, 855, 886, 606, 637, 947, 978, 700, 731, 886, 917, + 669, 700, 917, 948, 575, 606, 978, 1009, 638, 669, 948, 979, 794, 825, + 825, 856, 763, 794, 856, 887, 732, 763, 887, 918, 607, 638, 979, 1010, + 701, 732, 918, 949, 670, 701, 949, 980, 826, 857, 795, 826, 857, 888, + 764, 795, 888, 919, 639, 670, 980, 1011, 733, 764, 919, 950, 702, 733, + 950, 981, 671, 702, 981, 1012, 827, 858, 858, 889, 796, 827, 889, 920, + 765, 796, 920, 951, 734, 765, 951, 982, 703, 734, 982, 1013, 859, 890, + 828, 859, 890, 921, 797, 828, 921, 952, 766, 797, 952, 983, 735, 766, + 983, 1014, 860, 891, 891, 922, 829, 860, 922, 953, 798, 829, 953, 984, + 767, 798, 984, 1015, 892, 923, 861, 892, 923, 954, 830, 861, 954, 985, + 799, 830, 985, 1016, 893, 924, 924, 955, 862, 893, 955, 986, 831, 862, + 986, 1017, 925, 956, 894, 925, 956, 987, 863, 894, 987, 1018, 926, 957, + 957, 988, 895, 926, 988, 1019, 958, 989, 927, 958, 989, 1020, 959, 990, + 990, 1021, 991, 1022, 0, 0 }; DECLARE_ALIGNED(16, static const int16_t, qtr_scan_32x32_neighbors[1025 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 0, 0, 1, 32, 1, 1, 32, 32, - 2, 33, 33, 64, 34, 65, 2, 2, 64, 64, 3, 34, - 65, 96, 35, 66, 66, 97, 3, 3, 96, 96, 4, 35, - 97, 128, 67, 98, 36, 67, 98, 129, 4, 4, 68, 99, - 99, 130, 128, 128, 5, 36, 129, 160, 37, 68, 130, 161, - 100, 131, 69, 100, 131, 162, 5, 5, 160, 160, 6, 37, - 161, 192, 38, 69, 162, 193, 101, 132, 132, 163, 70, 101, - 163, 194, 6, 6, 192, 192, 7, 38, 133, 164, 193, 224, - 102, 133, 164, 195, 39, 70, 194, 225, 71, 102, 195, 226, - 134, 165, 165, 196, 7, 7, 224, 224, 8, 39, 103, 134, - 196, 227, 225, 256, 40, 71, 226, 257, 166, 197, 72, 103, - 227, 258, 135, 166, 197, 228, 104, 135, 228, 259, 8, 8, - 256, 256, 9, 40, 257, 288, 41, 72, 167, 198, 198, 229, - 258, 289, 136, 167, 229, 260, 73, 104, 259, 290, 105, 136, - 260, 291, 199, 230, 9, 9, 168, 199, 230, 261, 288, 288, - 10, 41, 289, 320, 42, 73, 290, 321, 137, 168, 261, 292, - 74, 105, 291, 322, 200, 231, 231, 262, 106, 137, 292, 323, - 169, 200, 262, 293, 10, 10, 320, 320, 11, 42, 321, 352, - 43, 74, 138, 169, 293, 324, 322, 353, 232, 263, 75, 106, - 201, 232, 263, 294, 323, 354, 170, 201, 294, 325, 107, 138, - 324, 355, 11, 11, 352, 352, 12, 43, 233, 264, 264, 295, - 353, 384, 139, 170, 325, 356, 44, 75, 354, 385, 202, 233, - 295, 326, 76, 107, 355, 386, 171, 202, 326, 357, 108, 139, - 356, 387, 265, 296, 234, 265, 296, 327, 12, 12, 140, 171, - 357, 388, 384, 384, 13, 44, 203, 234, 327, 358, 385, 416, - 45, 76, 386, 417, 77, 108, 387, 418, 172, 203, 358, 389, - 266, 297, 297, 328, 109, 140, 235, 266, 328, 359, 388, 419, - 204, 235, 359, 390, 141, 172, 389, 420, 13, 13, 416, 416, - 14, 45, 417, 448, 46, 77, 298, 329, 418, 449, 267, 298, - 329, 360, 78, 109, 173, 204, 390, 421, 419, 450, 236, 267, - 360, 391, 110, 141, 420, 451, 205, 236, 391, 422, 142, 173, - 299, 330, 330, 361, 421, 452, 14, 14, 268, 299, 361, 392, - 448, 448, 15, 46, 449, 480, 47, 78, 450, 481, 174, 205, - 422, 453, 237, 268, 392, 423, 79, 110, 451, 482, 111, 142, - 452, 483, 331, 362, 300, 331, 362, 393, 206, 237, 423, 454, - 143, 174, 269, 300, 393, 424, 453, 484, 238, 269, 424, 455, - 175, 206, 454, 485, 332, 363, 363, 394, 301, 332, 394, 425, - 207, 238, 455, 486, 270, 301, 425, 456, 364, 395, 239, 270, - 456, 487, 333, 364, 395, 426, 302, 333, 426, 457, 271, 302, - 457, 488, 365, 396, 396, 427, 334, 365, 427, 458, 303, 334, - 458, 489, 397, 428, 366, 397, 428, 459, 335, 366, 459, 490, - 398, 429, 429, 460, 367, 398, 460, 491, 430, 461, 399, 430, - 461, 492, 431, 462, 462, 493, 463, 494, 15, 15, 480, 480, - 16, 47, 481, 512, 48, 79, 482, 513, 80, 111, 483, 514, - 112, 143, 484, 515, 144, 175, 485, 516, 16, 16, 512, 512, - 17, 48, 513, 544, 176, 207, 486, 517, 49, 80, 514, 545, - 81, 112, 515, 546, 113, 144, 208, 239, 487, 518, 516, 547, - 145, 176, 517, 548, 240, 271, 488, 519, 17, 17, 544, 544, - 18, 49, 177, 208, 518, 549, 545, 576, 50, 81, 546, 577, - 82, 113, 547, 578, 272, 303, 489, 520, 209, 240, 519, 550, - 114, 145, 548, 579, 146, 177, 549, 580, 241, 272, 520, 551, - 304, 335, 490, 521, 178, 209, 550, 581, 18, 18, 576, 576, - 19, 50, 577, 608, 51, 82, 578, 609, 83, 114, 273, 304, - 521, 552, 579, 610, 210, 241, 551, 582, 115, 146, 336, 367, - 491, 522, 580, 611, 147, 178, 581, 612, 242, 273, 552, 583, - 305, 336, 522, 553, 179, 210, 582, 613, 19, 19, 368, 399, - 492, 523, 608, 608, 20, 51, 609, 640, 52, 83, 610, 641, - 274, 305, 553, 584, 84, 115, 611, 642, 211, 242, 337, 368, - 523, 554, 583, 614, 116, 147, 612, 643, 306, 337, 554, 585, - 148, 179, 243, 274, 400, 431, 493, 524, 584, 615, 613, 644, - 369, 400, 524, 555, 180, 211, 614, 645, 20, 20, 640, 640, - 21, 52, 275, 306, 585, 616, 641, 672, 53, 84, 338, 369, - 555, 586, 642, 673, 212, 243, 615, 646, 85, 116, 643, 674, - 432, 463, 494, 525, 117, 148, 644, 675, 401, 432, 525, 556, - 307, 338, 586, 617, 244, 275, 616, 647, 149, 180, 645, 676, - 370, 401, 556, 587, 181, 212, 646, 677, 276, 307, 464, 495, - 495, 526, 617, 648, 339, 370, 587, 618, 21, 21, 672, 672, - 22, 53, 433, 464, 526, 557, 673, 704, 54, 85, 674, 705, - 213, 244, 647, 678, 86, 117, 402, 433, 557, 588, 675, 706, - 118, 149, 308, 339, 618, 649, 676, 707, 245, 276, 371, 402, - 588, 619, 648, 679, 150, 181, 677, 708, 496, 527, 465, 496, - 527, 558, 182, 213, 434, 465, 558, 589, 678, 709, 340, 371, - 619, 650, 277, 308, 649, 680, 22, 22, 704, 704, 23, 54, - 403, 434, 589, 620, 705, 736, 55, 86, 214, 245, 679, 710, - 706, 737, 87, 118, 707, 738, 309, 340, 650, 681, 372, 403, - 620, 651, 119, 150, 497, 528, 528, 559, 708, 739, 246, 277, - 680, 711, 466, 497, 559, 590, 151, 182, 709, 740, 435, 466, - 590, 621, 341, 372, 651, 682, 183, 214, 278, 309, 681, 712, - 710, 741, 404, 435, 621, 652, 23, 23, 736, 736, 24, 55, - 737, 768, 215, 246, 529, 560, 711, 742, 56, 87, 498, 529, - 560, 591, 738, 769, 310, 341, 682, 713, 88, 119, 373, 404, - 652, 683, 739, 770, 467, 498, 591, 622, 120, 151, 740, 771, - 247, 278, 712, 743, 436, 467, 622, 653, 152, 183, 741, 772, - 342, 373, 683, 714, 279, 310, 405, 436, 653, 684, 713, 744, - 184, 215, 742, 773, 530, 561, 561, 592, 499, 530, 592, 623, - 24, 24, 216, 247, 468, 499, 623, 654, 743, 774, 768, 768, - 25, 56, 769, 800, 374, 405, 684, 715, 57, 88, 311, 342, - 714, 745, 770, 801, 89, 120, 771, 802, 437, 468, 654, 685, - 248, 279, 744, 775, 121, 152, 772, 803, 562, 593, 153, 184, - 343, 374, 531, 562, 593, 624, 715, 746, 773, 804, 406, 437, - 685, 716, 500, 531, 624, 655, 280, 311, 745, 776, 185, 216, - 774, 805, 469, 500, 655, 686, 375, 406, 716, 747, 217, 248, - 775, 806, 25, 25, 312, 343, 746, 777, 800, 800, 26, 57, - 801, 832, 58, 89, 438, 469, 686, 717, 802, 833, 90, 121, - 563, 594, 594, 625, 803, 834, 249, 280, 532, 563, 625, 656, - 776, 807, 122, 153, 804, 835, 344, 375, 501, 532, 656, 687, - 747, 778, 407, 438, 717, 748, 154, 185, 805, 836, 281, 312, - 777, 808, 470, 501, 687, 718, 186, 217, 806, 837, 376, 407, - 748, 779, 595, 626, 564, 595, 626, 657, 218, 249, 313, 344, - 439, 470, 718, 749, 778, 809, 807, 838, 26, 26, 832, 832, - 27, 58, 533, 564, 657, 688, 833, 864, 59, 90, 834, 865, - 91, 122, 835, 866, 250, 281, 502, 533, 688, 719, 808, 839, - 123, 154, 408, 439, 749, 780, 836, 867, 345, 376, 779, 810, - 155, 186, 471, 502, 719, 750, 837, 868, 282, 313, 809, 840, - 596, 627, 627, 658, 187, 218, 565, 596, 658, 689, 838, 869, - 377, 408, 780, 811, 440, 471, 750, 781, 534, 565, 689, 720, - 314, 345, 810, 841, 219, 250, 839, 870, 27, 27, 864, 864, - 28, 59, 503, 534, 720, 751, 865, 896, 60, 91, 866, 897, - 92, 123, 251, 282, 840, 871, 867, 898, 409, 440, 781, 812, - 346, 377, 811, 842, 124, 155, 628, 659, 868, 899, 472, 503, - 751, 782, 597, 628, 659, 690, 566, 597, 690, 721, 156, 187, - 869, 900, 283, 314, 841, 872, 535, 566, 721, 752, 188, 219, - 378, 409, 812, 843, 870, 901, 441, 472, 782, 813, 315, 346, - 842, 873, 504, 535, 752, 783, 220, 251, 871, 902, 28, 28, - 629, 660, 660, 691, 896, 896, 29, 60, 897, 928, 61, 92, - 410, 441, 598, 629, 691, 722, 813, 844, 898, 929, 252, 283, - 872, 903, 93, 124, 347, 378, 473, 504, 783, 814, 843, 874, - 899, 930, 567, 598, 722, 753, 125, 156, 900, 931, 284, 315, - 536, 567, 753, 784, 873, 904, 157, 188, 901, 932, 442, 473, - 814, 845, 379, 410, 844, 875, 189, 220, 902, 933, 505, 536, - 784, 815, 661, 692, 316, 347, 630, 661, 692, 723, 874, 905, - 221, 252, 599, 630, 723, 754, 903, 934, 411, 442, 845, 876, - 29, 29, 568, 599, 754, 785, 928, 928, 30, 61, 474, 505, - 815, 846, 929, 960, 62, 93, 930, 961, 253, 284, 348, 379, - 875, 906, 904, 935, 94, 125, 931, 962, 537, 568, 785, 816, - 126, 157, 932, 963, 285, 316, 905, 936, 158, 189, 443, 474, - 662, 693, 693, 724, 846, 877, 933, 964, 380, 411, 876, 907, - 631, 662, 724, 755, 506, 537, 816, 847, 190, 221, 934, 965, - 600, 631, 755, 786, 317, 348, 906, 937, 222, 253, 569, 600, - 786, 817, 935, 966, 412, 443, 877, 908, 475, 506, 847, 878, - 30, 30, 960, 960, 31, 62, 349, 380, 907, 938, 961, 992, - 254, 285, 936, 967, 63, 94, 538, 569, 817, 848, 962, 993, - 694, 725, 95, 126, 663, 694, 725, 756, 963, 994, 632, 663, - 756, 787, 127, 158, 964, 995, 444, 475, 878, 909, 286, 317, - 937, 968, 381, 412, 507, 538, 848, 879, 908, 939, 159, 190, - 601, 632, 787, 818, 965, 996, 191, 222, 966, 997, 318, 349, - 570, 601, 818, 849, 938, 969, 476, 507, 879, 910, 223, 254, - 413, 444, 909, 940, 967, 998, 695, 726, 726, 757, 664, 695, - 757, 788, 539, 570, 849, 880, 350, 381, 939, 970, 255, 286, - 633, 664, 788, 819, 968, 999, 445, 476, 602, 633, 819, 850, - 910, 941, 508, 539, 880, 911, 287, 318, 969, 1000, 382, 413, - 940, 971, 571, 602, 850, 881, 727, 758, 696, 727, 758, 789, - 319, 350, 970, 1001, 477, 508, 665, 696, 789, 820, 911, 942, - 414, 445, 941, 972, 540, 571, 881, 912, 634, 665, 820, 851, - 351, 382, 971, 1002, 603, 634, 851, 882, 446, 477, 942, 973, - 509, 540, 912, 943, 383, 414, 728, 759, 759, 790, 972, 1003, - 572, 603, 882, 913, 697, 728, 790, 821, 666, 697, 821, 852, - 478, 509, 943, 974, 635, 666, 852, 883, 415, 446, 541, 572, - 913, 944, 973, 1004, 604, 635, 883, 914, 760, 791, 729, 760, - 791, 822, 510, 541, 944, 975, 447, 478, 974, 1005, 698, 729, - 822, 853, 573, 604, 914, 945, 667, 698, 853, 884, 636, 667, - 884, 915, 479, 510, 975, 1006, 542, 573, 945, 976, 761, 792, - 792, 823, 605, 636, 915, 946, 730, 761, 823, 854, 699, 730, - 854, 885, 511, 542, 976, 1007, 574, 605, 946, 977, 668, 699, - 885, 916, 637, 668, 916, 947, 543, 574, 793, 824, 977, 1008, - 762, 793, 824, 855, 731, 762, 855, 886, 606, 637, 947, 978, - 700, 731, 886, 917, 669, 700, 917, 948, 575, 606, 978, 1009, - 638, 669, 948, 979, 794, 825, 825, 856, 763, 794, 856, 887, - 732, 763, 887, 918, 607, 638, 979, 1010, 701, 732, 918, 949, - 670, 701, 949, 980, 826, 857, 795, 826, 857, 888, 764, 795, - 888, 919, 639, 670, 980, 1011, 733, 764, 919, 950, 702, 733, - 950, 981, 671, 702, 981, 1012, 827, 858, 858, 889, 796, 827, - 889, 920, 765, 796, 920, 951, 734, 765, 951, 982, 703, 734, - 982, 1013, 859, 890, 828, 859, 890, 921, 797, 828, 921, 952, - 766, 797, 952, 983, 735, 766, 983, 1014, 860, 891, 891, 922, - 829, 860, 922, 953, 798, 829, 953, 984, 767, 798, 984, 1015, - 892, 923, 861, 892, 923, 954, 830, 861, 954, 985, 799, 830, - 985, 1016, 893, 924, 924, 955, 862, 893, 955, 986, 831, 862, - 986, 1017, 925, 956, 894, 925, 956, 987, 863, 894, 987, 1018, - 926, 957, 957, 988, 895, 926, 988, 1019, 958, 989, 927, 958, - 989, 1020, 959, 990, 990, 1021, 991, 1022, 0, 0 + 0, 0, 0, 0, 0, 0, 1, 32, 1, 1, 32, 32, 2, 33, + 33, 64, 34, 65, 2, 2, 64, 64, 3, 34, 65, 96, 35, 66, + 66, 97, 3, 3, 96, 96, 4, 35, 97, 128, 67, 98, 36, 67, + 98, 129, 4, 4, 68, 99, 99, 130, 128, 128, 5, 36, 129, 160, + 37, 68, 130, 161, 100, 131, 69, 100, 131, 162, 5, 5, 160, 160, + 6, 37, 161, 192, 38, 69, 162, 193, 101, 132, 132, 163, 70, 101, + 163, 194, 6, 6, 192, 192, 7, 38, 133, 164, 193, 224, 102, 133, + 164, 195, 39, 70, 194, 225, 71, 102, 195, 226, 134, 165, 165, 196, + 7, 7, 224, 224, 8, 39, 103, 134, 196, 227, 225, 256, 40, 71, + 226, 257, 166, 197, 72, 103, 227, 258, 135, 166, 197, 228, 104, 135, + 228, 259, 8, 8, 256, 256, 9, 40, 257, 288, 41, 72, 167, 198, + 198, 229, 258, 289, 136, 167, 229, 260, 73, 104, 259, 290, 105, 136, + 260, 291, 199, 230, 9, 9, 168, 199, 230, 261, 288, 288, 10, 41, + 289, 320, 42, 73, 290, 321, 137, 168, 261, 292, 74, 105, 291, 322, + 200, 231, 231, 262, 106, 137, 292, 323, 169, 200, 262, 293, 10, 10, + 320, 320, 11, 42, 321, 352, 43, 74, 138, 169, 293, 324, 322, 353, + 232, 263, 75, 106, 201, 232, 263, 294, 323, 354, 170, 201, 294, 325, + 107, 138, 324, 355, 11, 11, 352, 352, 12, 43, 233, 264, 264, 295, + 353, 384, 139, 170, 325, 356, 44, 75, 354, 385, 202, 233, 295, 326, + 76, 107, 355, 386, 171, 202, 326, 357, 108, 139, 356, 387, 265, 296, + 234, 265, 296, 327, 12, 12, 140, 171, 357, 388, 384, 384, 13, 44, + 203, 234, 327, 358, 385, 416, 45, 76, 386, 417, 77, 108, 387, 418, + 172, 203, 358, 389, 266, 297, 297, 328, 109, 140, 235, 266, 328, 359, + 388, 419, 204, 235, 359, 390, 141, 172, 389, 420, 13, 13, 416, 416, + 14, 45, 417, 448, 46, 77, 298, 329, 418, 449, 267, 298, 329, 360, + 78, 109, 173, 204, 390, 421, 419, 450, 236, 267, 360, 391, 110, 141, + 420, 451, 205, 236, 391, 422, 142, 173, 299, 330, 330, 361, 421, 452, + 14, 14, 268, 299, 361, 392, 448, 448, 15, 46, 449, 480, 47, 78, + 450, 481, 174, 205, 422, 453, 237, 268, 392, 423, 79, 110, 451, 482, + 111, 142, 452, 483, 331, 362, 300, 331, 362, 393, 206, 237, 423, 454, + 143, 174, 269, 300, 393, 424, 453, 484, 238, 269, 424, 455, 175, 206, + 454, 485, 332, 363, 363, 394, 301, 332, 394, 425, 207, 238, 455, 486, + 270, 301, 425, 456, 364, 395, 239, 270, 456, 487, 333, 364, 395, 426, + 302, 333, 426, 457, 271, 302, 457, 488, 365, 396, 396, 427, 334, 365, + 427, 458, 303, 334, 458, 489, 397, 428, 366, 397, 428, 459, 335, 366, + 459, 490, 398, 429, 429, 460, 367, 398, 460, 491, 430, 461, 399, 430, + 461, 492, 431, 462, 462, 493, 463, 494, 15, 15, 480, 480, 16, 47, + 481, 512, 48, 79, 482, 513, 80, 111, 483, 514, 112, 143, 484, 515, + 144, 175, 485, 516, 16, 16, 512, 512, 17, 48, 513, 544, 176, 207, + 486, 517, 49, 80, 514, 545, 81, 112, 515, 546, 113, 144, 208, 239, + 487, 518, 516, 547, 145, 176, 517, 548, 240, 271, 488, 519, 17, 17, + 544, 544, 18, 49, 177, 208, 518, 549, 545, 576, 50, 81, 546, 577, + 82, 113, 547, 578, 272, 303, 489, 520, 209, 240, 519, 550, 114, 145, + 548, 579, 146, 177, 549, 580, 241, 272, 520, 551, 304, 335, 490, 521, + 178, 209, 550, 581, 18, 18, 576, 576, 19, 50, 577, 608, 51, 82, + 578, 609, 83, 114, 273, 304, 521, 552, 579, 610, 210, 241, 551, 582, + 115, 146, 336, 367, 491, 522, 580, 611, 147, 178, 581, 612, 242, 273, + 552, 583, 305, 336, 522, 553, 179, 210, 582, 613, 19, 19, 368, 399, + 492, 523, 608, 608, 20, 51, 609, 640, 52, 83, 610, 641, 274, 305, + 553, 584, 84, 115, 611, 642, 211, 242, 337, 368, 523, 554, 583, 614, + 116, 147, 612, 643, 306, 337, 554, 585, 148, 179, 243, 274, 400, 431, + 493, 524, 584, 615, 613, 644, 369, 400, 524, 555, 180, 211, 614, 645, + 20, 20, 640, 640, 21, 52, 275, 306, 585, 616, 641, 672, 53, 84, + 338, 369, 555, 586, 642, 673, 212, 243, 615, 646, 85, 116, 643, 674, + 432, 463, 494, 525, 117, 148, 644, 675, 401, 432, 525, 556, 307, 338, + 586, 617, 244, 275, 616, 647, 149, 180, 645, 676, 370, 401, 556, 587, + 181, 212, 646, 677, 276, 307, 464, 495, 495, 526, 617, 648, 339, 370, + 587, 618, 21, 21, 672, 672, 22, 53, 433, 464, 526, 557, 673, 704, + 54, 85, 674, 705, 213, 244, 647, 678, 86, 117, 402, 433, 557, 588, + 675, 706, 118, 149, 308, 339, 618, 649, 676, 707, 245, 276, 371, 402, + 588, 619, 648, 679, 150, 181, 677, 708, 496, 527, 465, 496, 527, 558, + 182, 213, 434, 465, 558, 589, 678, 709, 340, 371, 619, 650, 277, 308, + 649, 680, 22, 22, 704, 704, 23, 54, 403, 434, 589, 620, 705, 736, + 55, 86, 214, 245, 679, 710, 706, 737, 87, 118, 707, 738, 309, 340, + 650, 681, 372, 403, 620, 651, 119, 150, 497, 528, 528, 559, 708, 739, + 246, 277, 680, 711, 466, 497, 559, 590, 151, 182, 709, 740, 435, 466, + 590, 621, 341, 372, 651, 682, 183, 214, 278, 309, 681, 712, 710, 741, + 404, 435, 621, 652, 23, 23, 736, 736, 24, 55, 737, 768, 215, 246, + 529, 560, 711, 742, 56, 87, 498, 529, 560, 591, 738, 769, 310, 341, + 682, 713, 88, 119, 373, 404, 652, 683, 739, 770, 467, 498, 591, 622, + 120, 151, 740, 771, 247, 278, 712, 743, 436, 467, 622, 653, 152, 183, + 741, 772, 342, 373, 683, 714, 279, 310, 405, 436, 653, 684, 713, 744, + 184, 215, 742, 773, 530, 561, 561, 592, 499, 530, 592, 623, 24, 24, + 216, 247, 468, 499, 623, 654, 743, 774, 768, 768, 25, 56, 769, 800, + 374, 405, 684, 715, 57, 88, 311, 342, 714, 745, 770, 801, 89, 120, + 771, 802, 437, 468, 654, 685, 248, 279, 744, 775, 121, 152, 772, 803, + 562, 593, 153, 184, 343, 374, 531, 562, 593, 624, 715, 746, 773, 804, + 406, 437, 685, 716, 500, 531, 624, 655, 280, 311, 745, 776, 185, 216, + 774, 805, 469, 500, 655, 686, 375, 406, 716, 747, 217, 248, 775, 806, + 25, 25, 312, 343, 746, 777, 800, 800, 26, 57, 801, 832, 58, 89, + 438, 469, 686, 717, 802, 833, 90, 121, 563, 594, 594, 625, 803, 834, + 249, 280, 532, 563, 625, 656, 776, 807, 122, 153, 804, 835, 344, 375, + 501, 532, 656, 687, 747, 778, 407, 438, 717, 748, 154, 185, 805, 836, + 281, 312, 777, 808, 470, 501, 687, 718, 186, 217, 806, 837, 376, 407, + 748, 779, 595, 626, 564, 595, 626, 657, 218, 249, 313, 344, 439, 470, + 718, 749, 778, 809, 807, 838, 26, 26, 832, 832, 27, 58, 533, 564, + 657, 688, 833, 864, 59, 90, 834, 865, 91, 122, 835, 866, 250, 281, + 502, 533, 688, 719, 808, 839, 123, 154, 408, 439, 749, 780, 836, 867, + 345, 376, 779, 810, 155, 186, 471, 502, 719, 750, 837, 868, 282, 313, + 809, 840, 596, 627, 627, 658, 187, 218, 565, 596, 658, 689, 838, 869, + 377, 408, 780, 811, 440, 471, 750, 781, 534, 565, 689, 720, 314, 345, + 810, 841, 219, 250, 839, 870, 27, 27, 864, 864, 28, 59, 503, 534, + 720, 751, 865, 896, 60, 91, 866, 897, 92, 123, 251, 282, 840, 871, + 867, 898, 409, 440, 781, 812, 346, 377, 811, 842, 124, 155, 628, 659, + 868, 899, 472, 503, 751, 782, 597, 628, 659, 690, 566, 597, 690, 721, + 156, 187, 869, 900, 283, 314, 841, 872, 535, 566, 721, 752, 188, 219, + 378, 409, 812, 843, 870, 901, 441, 472, 782, 813, 315, 346, 842, 873, + 504, 535, 752, 783, 220, 251, 871, 902, 28, 28, 629, 660, 660, 691, + 896, 896, 29, 60, 897, 928, 61, 92, 410, 441, 598, 629, 691, 722, + 813, 844, 898, 929, 252, 283, 872, 903, 93, 124, 347, 378, 473, 504, + 783, 814, 843, 874, 899, 930, 567, 598, 722, 753, 125, 156, 900, 931, + 284, 315, 536, 567, 753, 784, 873, 904, 157, 188, 901, 932, 442, 473, + 814, 845, 379, 410, 844, 875, 189, 220, 902, 933, 505, 536, 784, 815, + 661, 692, 316, 347, 630, 661, 692, 723, 874, 905, 221, 252, 599, 630, + 723, 754, 903, 934, 411, 442, 845, 876, 29, 29, 568, 599, 754, 785, + 928, 928, 30, 61, 474, 505, 815, 846, 929, 960, 62, 93, 930, 961, + 253, 284, 348, 379, 875, 906, 904, 935, 94, 125, 931, 962, 537, 568, + 785, 816, 126, 157, 932, 963, 285, 316, 905, 936, 158, 189, 443, 474, + 662, 693, 693, 724, 846, 877, 933, 964, 380, 411, 876, 907, 631, 662, + 724, 755, 506, 537, 816, 847, 190, 221, 934, 965, 600, 631, 755, 786, + 317, 348, 906, 937, 222, 253, 569, 600, 786, 817, 935, 966, 412, 443, + 877, 908, 475, 506, 847, 878, 30, 30, 960, 960, 31, 62, 349, 380, + 907, 938, 961, 992, 254, 285, 936, 967, 63, 94, 538, 569, 817, 848, + 962, 993, 694, 725, 95, 126, 663, 694, 725, 756, 963, 994, 632, 663, + 756, 787, 127, 158, 964, 995, 444, 475, 878, 909, 286, 317, 937, 968, + 381, 412, 507, 538, 848, 879, 908, 939, 159, 190, 601, 632, 787, 818, + 965, 996, 191, 222, 966, 997, 318, 349, 570, 601, 818, 849, 938, 969, + 476, 507, 879, 910, 223, 254, 413, 444, 909, 940, 967, 998, 695, 726, + 726, 757, 664, 695, 757, 788, 539, 570, 849, 880, 350, 381, 939, 970, + 255, 286, 633, 664, 788, 819, 968, 999, 445, 476, 602, 633, 819, 850, + 910, 941, 508, 539, 880, 911, 287, 318, 969, 1000, 382, 413, 940, 971, + 571, 602, 850, 881, 727, 758, 696, 727, 758, 789, 319, 350, 970, 1001, + 477, 508, 665, 696, 789, 820, 911, 942, 414, 445, 941, 972, 540, 571, + 881, 912, 634, 665, 820, 851, 351, 382, 971, 1002, 603, 634, 851, 882, + 446, 477, 942, 973, 509, 540, 912, 943, 383, 414, 728, 759, 759, 790, + 972, 1003, 572, 603, 882, 913, 697, 728, 790, 821, 666, 697, 821, 852, + 478, 509, 943, 974, 635, 666, 852, 883, 415, 446, 541, 572, 913, 944, + 973, 1004, 604, 635, 883, 914, 760, 791, 729, 760, 791, 822, 510, 541, + 944, 975, 447, 478, 974, 1005, 698, 729, 822, 853, 573, 604, 914, 945, + 667, 698, 853, 884, 636, 667, 884, 915, 479, 510, 975, 1006, 542, 573, + 945, 976, 761, 792, 792, 823, 605, 636, 915, 946, 730, 761, 823, 854, + 699, 730, 854, 885, 511, 542, 976, 1007, 574, 605, 946, 977, 668, 699, + 885, 916, 637, 668, 916, 947, 543, 574, 793, 824, 977, 1008, 762, 793, + 824, 855, 731, 762, 855, 886, 606, 637, 947, 978, 700, 731, 886, 917, + 669, 700, 917, 948, 575, 606, 978, 1009, 638, 669, 948, 979, 794, 825, + 825, 856, 763, 794, 856, 887, 732, 763, 887, 918, 607, 638, 979, 1010, + 701, 732, 918, 949, 670, 701, 949, 980, 826, 857, 795, 826, 857, 888, + 764, 795, 888, 919, 639, 670, 980, 1011, 733, 764, 919, 950, 702, 733, + 950, 981, 671, 702, 981, 1012, 827, 858, 858, 889, 796, 827, 889, 920, + 765, 796, 920, 951, 734, 765, 951, 982, 703, 734, 982, 1013, 859, 890, + 828, 859, 890, 921, 797, 828, 921, 952, 766, 797, 952, 983, 735, 766, + 983, 1014, 860, 891, 891, 922, 829, 860, 922, 953, 798, 829, 953, 984, + 767, 798, 984, 1015, 892, 923, 861, 892, 923, 954, 830, 861, 954, 985, + 799, 830, 985, 1016, 893, 924, 924, 955, 862, 893, 955, 986, 831, 862, + 986, 1017, 925, 956, 894, 925, 956, 987, 863, 894, 987, 1018, 926, 957, + 957, 988, 895, 926, 988, 1019, 958, 989, 927, 958, 989, 1020, 959, 990, + 990, 1021, 991, 1022, 0, 0 }; #endif // CONFIG_EXT_TX @@ -2384,97 +1974,85 @@ DECLARE_ALIGNED(16, static const int16_t, vp10_row_iscan_4x4[16]) = { #if CONFIG_EXT_TX DECLARE_ALIGNED(16, static const int16_t, vp10_default_iscan_4x8[32]) = { - 0, 1, 4, 9, 2, 3, 6, 11, - 5, 7, 8, 13, 10, 12, 14, 17, - 15, 16, 18, 19, 20, 21, 22, 23, - 24, 25, 26, 27, 28, 29, 30, 31, + 0, 1, 4, 9, 2, 3, 6, 11, 5, 7, 8, 13, 10, 12, 14, 17, + 15, 16, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, }; DECLARE_ALIGNED(16, static const int16_t, vp10_mcol_iscan_4x8[32]) = { - 0, 8, 16, 24, 1, 9, 17, 25, - 2, 10, 18, 26, 3, 11, 19, 27, - 4, 12, 20, 28, 5, 13, 21, 29, - 6, 14, 22, 30, 7, 15, 23, 31, + 0, 8, 16, 24, 1, 9, 17, 25, 2, 10, 18, 26, 3, 11, 19, 27, + 4, 12, 20, 28, 5, 13, 21, 29, 6, 14, 22, 30, 7, 15, 23, 31, }; DECLARE_ALIGNED(16, static const int16_t, vp10_mrow_iscan_4x8[32]) = { - 0, 1, 2, 3, 4, 5, 6, 7, - 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, - 24, 25, 26, 27, 28, 29, 30, 31, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, }; DECLARE_ALIGNED(16, static const int16_t, vp10_default_iscan_8x4[32]) = { - 0, 1, 4, 9, 15, 19, 24, 28, - 2, 3, 6, 11, 16, 21, 25, 29, - 5, 7, 8, 13, 18, 22, 26, 30, - 10, 12, 14, 17, 20, 23, 27, 31, + 0, 1, 4, 9, 15, 19, 24, 28, 2, 3, 6, 11, 16, 21, 25, 29, + 5, 7, 8, 13, 18, 22, 26, 30, 10, 12, 14, 17, 20, 23, 27, 31, }; DECLARE_ALIGNED(16, static const int16_t, vp10_mcol_iscan_8x4[32]) = { - 0, 4, 8, 12, 16, 20, 24, 28, - 1, 5, 9, 13, 17, 21, 25, 29, - 2, 6, 10, 14, 18, 22, 26, 30, - 3, 7, 11, 15, 19, 23, 27, 31, + 0, 4, 8, 12, 16, 20, 24, 28, 1, 5, 9, 13, 17, 21, 25, 29, + 2, 6, 10, 14, 18, 22, 26, 30, 3, 7, 11, 15, 19, 23, 27, 31, }; DECLARE_ALIGNED(16, static const int16_t, vp10_mrow_iscan_8x4[32]) = { - 0, 1, 2, 3, 4, 5, 6, 7, - 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, - 24, 25, 26, 27, 28, 29, 30, 31, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, }; #endif // CONFIG_EXT_TX #if CONFIG_EXT_TX DECLARE_ALIGNED(16, static const int16_t, vp10_mcol_iscan_8x8[64]) = { - 0, 8, 16, 24, 32, 40, 48, 56, 1, 9, 17, 25, 33, 41, 49, 57, 2, 10, - 18, 26, 34, 42, 50, 58, 3, 11, 19, 27, 35, 43, 51, 59, 4, 12, 20, - 28, 36, 44, 52, 60, 5, 13, 21, 29, 37, 45, 53, 61, 6, 14, 22, 30, - 38, 46, 54, 62, 7, 15, 23, 31, 39, 47, 55, 63, + 0, 8, 16, 24, 32, 40, 48, 56, 1, 9, 17, 25, 33, 41, 49, 57, + 2, 10, 18, 26, 34, 42, 50, 58, 3, 11, 19, 27, 35, 43, 51, 59, + 4, 12, 20, 28, 36, 44, 52, 60, 5, 13, 21, 29, 37, 45, 53, 61, + 6, 14, 22, 30, 38, 46, 54, 62, 7, 15, 23, 31, 39, 47, 55, 63, }; DECLARE_ALIGNED(16, static const int16_t, vp10_mrow_iscan_8x8[64]) = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, - 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, - 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, - 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, }; #endif // CONFIG_EXT_TX DECLARE_ALIGNED(16, static const int16_t, vp10_col_iscan_8x8[64]) = { - 0, 3, 8, 15, 22, 32, 40, 47, 1, 5, 11, 18, 26, 34, 44, 51, - 2, 7, 13, 20, 28, 38, 46, 54, 4, 10, 16, 24, 31, 41, 50, 56, - 6, 12, 21, 27, 35, 43, 52, 58, 9, 17, 25, 33, 39, 48, 55, 60, + 0, 3, 8, 15, 22, 32, 40, 47, 1, 5, 11, 18, 26, 34, 44, 51, + 2, 7, 13, 20, 28, 38, 46, 54, 4, 10, 16, 24, 31, 41, 50, 56, + 6, 12, 21, 27, 35, 43, 52, 58, 9, 17, 25, 33, 39, 48, 55, 60, 14, 23, 30, 37, 45, 53, 59, 62, 19, 29, 36, 42, 49, 57, 61, 63, }; DECLARE_ALIGNED(16, static const int16_t, vp10_row_iscan_8x8[64]) = { - 0, 1, 2, 5, 8, 12, 19, 24, 3, 4, 7, 10, 15, 20, 30, 39, - 6, 9, 13, 16, 21, 27, 37, 46, 11, 14, 17, 23, 28, 34, 44, 52, + 0, 1, 2, 5, 8, 12, 19, 24, 3, 4, 7, 10, 15, 20, 30, 39, + 6, 9, 13, 16, 21, 27, 37, 46, 11, 14, 17, 23, 28, 34, 44, 52, 18, 22, 25, 31, 35, 41, 50, 57, 26, 29, 33, 38, 43, 49, 55, 59, 32, 36, 42, 47, 51, 54, 60, 61, 40, 45, 48, 53, 56, 58, 62, 63, }; DECLARE_ALIGNED(16, static const int16_t, vp10_default_iscan_8x8[64]) = { - 0, 2, 5, 9, 14, 22, 31, 37, 1, 4, 8, 13, 19, 26, 38, 44, - 3, 6, 10, 17, 24, 30, 42, 49, 7, 11, 15, 21, 29, 36, 47, 53, + 0, 2, 5, 9, 14, 22, 31, 37, 1, 4, 8, 13, 19, 26, 38, 44, + 3, 6, 10, 17, 24, 30, 42, 49, 7, 11, 15, 21, 29, 36, 47, 53, 12, 16, 20, 27, 34, 43, 52, 57, 18, 23, 28, 35, 41, 48, 56, 60, 25, 32, 39, 45, 50, 55, 59, 62, 33, 40, 46, 51, 54, 58, 61, 63, }; #if CONFIG_EXT_TX DECLARE_ALIGNED(16, static const int16_t, vp10_mcol_iscan_16x16[256]) = { - 0, 16, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192, 208, 224, 240, - 1, 17, 33, 49, 65, 81, 97, 113, 129, 145, 161, 177, 193, 209, 225, 241, - 2, 18, 34, 50, 66, 82, 98, 114, 130, 146, 162, 178, 194, 210, 226, 242, - 3, 19, 35, 51, 67, 83, 99, 115, 131, 147, 163, 179, 195, 211, 227, 243, - 4, 20, 36, 52, 68, 84, 100, 116, 132, 148, 164, 180, 196, 212, 228, 244, - 5, 21, 37, 53, 69, 85, 101, 117, 133, 149, 165, 181, 197, 213, 229, 245, - 6, 22, 38, 54, 70, 86, 102, 118, 134, 150, 166, 182, 198, 214, 230, 246, - 7, 23, 39, 55, 71, 87, 103, 119, 135, 151, 167, 183, 199, 215, 231, 247, - 8, 24, 40, 56, 72, 88, 104, 120, 136, 152, 168, 184, 200, 216, 232, 248, - 9, 25, 41, 57, 73, 89, 105, 121, 137, 153, 169, 185, 201, 217, 233, 249, + 0, 16, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192, 208, 224, 240, + 1, 17, 33, 49, 65, 81, 97, 113, 129, 145, 161, 177, 193, 209, 225, 241, + 2, 18, 34, 50, 66, 82, 98, 114, 130, 146, 162, 178, 194, 210, 226, 242, + 3, 19, 35, 51, 67, 83, 99, 115, 131, 147, 163, 179, 195, 211, 227, 243, + 4, 20, 36, 52, 68, 84, 100, 116, 132, 148, 164, 180, 196, 212, 228, 244, + 5, 21, 37, 53, 69, 85, 101, 117, 133, 149, 165, 181, 197, 213, 229, 245, + 6, 22, 38, 54, 70, 86, 102, 118, 134, 150, 166, 182, 198, 214, 230, 246, + 7, 23, 39, 55, 71, 87, 103, 119, 135, 151, 167, 183, 199, 215, 231, 247, + 8, 24, 40, 56, 72, 88, 104, 120, 136, 152, 168, 184, 200, 216, 232, 248, + 9, 25, 41, 57, 73, 89, 105, 121, 137, 153, 169, 185, 201, 217, 233, 249, 10, 26, 42, 58, 74, 90, 106, 122, 138, 154, 170, 186, 202, 218, 234, 250, 11, 27, 43, 59, 75, 91, 107, 123, 139, 155, 171, 187, 203, 219, 235, 251, 12, 28, 44, 60, 76, 92, 108, 124, 140, 156, 172, 188, 204, 220, 236, 252, @@ -2484,882 +2062,833 @@ DECLARE_ALIGNED(16, static const int16_t, vp10_mcol_iscan_16x16[256]) = { }; DECLARE_ALIGNED(16, static const int16_t, vp10_mrow_iscan_16x16[256]) = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, - 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, - 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, - 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, - 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, - 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, - 112, 113, 114, 115, 116, 117, 118, 119, - 120, 121, 122, 123, 124, 125, 126, 127, - 128, 129, 130, 131, 132, 133, 134, 135, - 136, 137, 138, 139, 140, 141, 142, 143, - 144, 145, 146, 147, 148, 149, 150, 151, - 152, 153, 154, 155, 156, 157, 158, 159, - 160, 161, 162, 163, 164, 165, 166, 167, - 168, 169, 170, 171, 172, 173, 174, 175, - 176, 177, 178, 179, 180, 181, 182, 183, - 184, 185, 186, 187, 188, 189, 190, 191, - 192, 193, 194, 195, 196, 197, 198, 199, - 200, 201, 202, 203, 204, 205, 206, 207, - 208, 209, 210, 211, 212, 213, 214, 215, - 216, 217, 218, 219, 220, 221, 222, 223, - 224, 225, 226, 227, 228, 229, 230, 231, - 232, 233, 234, 235, 236, 237, 238, 239, - 240, 241, 242, 243, 244, 245, 246, 247, - 248, 249, 250, 251, 252, 253, 254, 255, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, + 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, + 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, + 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, + 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, + 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, + 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, + 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, + 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, + 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, + 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, + 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, + 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, + 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, + 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, + 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, + 255, }; #endif // CONFIG_EXT_TX DECLARE_ALIGNED(16, static const int16_t, vp10_col_iscan_16x16[256]) = { - 0, 4, 11, 20, 31, 43, 59, 75, 85, 109, 130, 150, 165, 181, 195, 198, - 1, 6, 14, 23, 34, 47, 64, 81, 95, 114, 135, 153, 171, 188, 201, 212, - 2, 8, 16, 25, 38, 52, 67, 83, 101, 116, 136, 157, 172, 190, 205, 216, - 3, 10, 18, 29, 41, 55, 71, 89, 103, 119, 141, 159, 176, 194, 208, 218, - 5, 12, 21, 32, 45, 58, 74, 93, 104, 123, 144, 164, 179, 196, 210, 223, - 7, 15, 26, 37, 49, 63, 78, 96, 112, 129, 146, 166, 182, 200, 215, 228, - 9, 19, 28, 39, 54, 69, 86, 102, 117, 132, 151, 170, 187, 206, 220, 230, - 13, 24, 35, 46, 60, 73, 91, 108, 122, 137, 154, 174, 189, 207, 224, 235, - 17, 30, 40, 53, 66, 82, 98, 115, 126, 142, 161, 180, 197, 213, 227, 237, - 22, 36, 48, 62, 76, 92, 105, 120, 133, 147, 167, 186, 203, 219, 232, 240, - 27, 44, 56, 70, 84, 99, 113, 127, 140, 156, 175, 193, 209, 226, 236, 244, - 33, 51, 68, 79, 94, 110, 125, 138, 149, 162, 184, 202, 217, 229, 241, 247, - 42, 61, 77, 90, 106, 121, 134, 148, 160, 173, 191, 211, 225, 238, 245, 251, - 50, 72, 87, 100, 118, 128, 145, 158, 168, 183, 204, 222, 233, 242, 249, 253, - 57, 80, 97, 111, 131, 143, 155, 169, 178, 192, 214, 231, 239, 246, 250, 254, + 0, 4, 11, 20, 31, 43, 59, 75, 85, 109, 130, 150, 165, 181, 195, 198, + 1, 6, 14, 23, 34, 47, 64, 81, 95, 114, 135, 153, 171, 188, 201, 212, + 2, 8, 16, 25, 38, 52, 67, 83, 101, 116, 136, 157, 172, 190, 205, 216, + 3, 10, 18, 29, 41, 55, 71, 89, 103, 119, 141, 159, 176, 194, 208, 218, + 5, 12, 21, 32, 45, 58, 74, 93, 104, 123, 144, 164, 179, 196, 210, 223, + 7, 15, 26, 37, 49, 63, 78, 96, 112, 129, 146, 166, 182, 200, 215, 228, + 9, 19, 28, 39, 54, 69, 86, 102, 117, 132, 151, 170, 187, 206, 220, 230, + 13, 24, 35, 46, 60, 73, 91, 108, 122, 137, 154, 174, 189, 207, 224, 235, + 17, 30, 40, 53, 66, 82, 98, 115, 126, 142, 161, 180, 197, 213, 227, 237, + 22, 36, 48, 62, 76, 92, 105, 120, 133, 147, 167, 186, 203, 219, 232, 240, + 27, 44, 56, 70, 84, 99, 113, 127, 140, 156, 175, 193, 209, 226, 236, 244, + 33, 51, 68, 79, 94, 110, 125, 138, 149, 162, 184, 202, 217, 229, 241, 247, + 42, 61, 77, 90, 106, 121, 134, 148, 160, 173, 191, 211, 225, 238, 245, 251, + 50, 72, 87, 100, 118, 128, 145, 158, 168, 183, 204, 222, 233, 242, 249, 253, + 57, 80, 97, 111, 131, 143, 155, 169, 178, 192, 214, 231, 239, 246, 250, 254, 65, 88, 107, 124, 139, 152, 163, 177, 185, 199, 221, 234, 243, 248, 252, 255, }; DECLARE_ALIGNED(16, static const int16_t, vp10_row_iscan_16x16[256]) = { - 0, 1, 2, 4, 6, 9, 12, 17, 22, 29, 36, 43, 54, 64, 76, 86, - 3, 5, 7, 11, 15, 19, 25, 32, 38, 48, 59, 68, 84, 99, 115, 130, - 8, 10, 13, 18, 23, 27, 33, 42, 51, 60, 72, 88, 103, 119, 142, 167, - 14, 16, 20, 26, 31, 37, 44, 53, 61, 73, 85, 100, 116, 135, 161, 185, - 21, 24, 30, 35, 40, 47, 55, 65, 74, 81, 94, 112, 133, 154, 179, 205, - 28, 34, 39, 45, 50, 58, 67, 77, 87, 96, 106, 121, 146, 169, 196, 212, - 41, 46, 49, 56, 63, 70, 79, 90, 98, 107, 122, 138, 159, 182, 207, 222, - 52, 57, 62, 69, 75, 83, 93, 102, 110, 120, 134, 150, 176, 195, 215, 226, - 66, 71, 78, 82, 91, 97, 108, 113, 127, 136, 148, 168, 188, 202, 221, 232, - 80, 89, 92, 101, 105, 114, 125, 131, 139, 151, 162, 177, 192, 208, 223, 234, - 95, 104, 109, 117, 123, 128, 143, 144, 155, 165, 175, 190, 206, 219, 233, 239, - 111, 118, 124, 129, 140, 147, 157, 164, 170, 181, 191, 203, 224, 230, 240, - 243, 126, 132, 137, 145, 153, 160, 174, 178, 184, 197, 204, 216, 231, 237, - 244, 246, 141, 149, 156, 166, 172, 180, 189, 199, 200, 210, 220, 228, 238, - 242, 249, 251, 152, 163, 171, 183, 186, 193, 201, 211, 214, 218, 227, 236, - 245, 247, 252, 253, 158, 173, 187, 194, 198, 209, 213, 217, 225, 229, 235, - 241, 248, 250, 254, 255, + 0, 1, 2, 4, 6, 9, 12, 17, 22, 29, 36, 43, 54, 64, 76, + 86, 3, 5, 7, 11, 15, 19, 25, 32, 38, 48, 59, 68, 84, 99, + 115, 130, 8, 10, 13, 18, 23, 27, 33, 42, 51, 60, 72, 88, 103, + 119, 142, 167, 14, 16, 20, 26, 31, 37, 44, 53, 61, 73, 85, 100, + 116, 135, 161, 185, 21, 24, 30, 35, 40, 47, 55, 65, 74, 81, 94, + 112, 133, 154, 179, 205, 28, 34, 39, 45, 50, 58, 67, 77, 87, 96, + 106, 121, 146, 169, 196, 212, 41, 46, 49, 56, 63, 70, 79, 90, 98, + 107, 122, 138, 159, 182, 207, 222, 52, 57, 62, 69, 75, 83, 93, 102, + 110, 120, 134, 150, 176, 195, 215, 226, 66, 71, 78, 82, 91, 97, 108, + 113, 127, 136, 148, 168, 188, 202, 221, 232, 80, 89, 92, 101, 105, 114, + 125, 131, 139, 151, 162, 177, 192, 208, 223, 234, 95, 104, 109, 117, 123, + 128, 143, 144, 155, 165, 175, 190, 206, 219, 233, 239, 111, 118, 124, 129, + 140, 147, 157, 164, 170, 181, 191, 203, 224, 230, 240, 243, 126, 132, 137, + 145, 153, 160, 174, 178, 184, 197, 204, 216, 231, 237, 244, 246, 141, 149, + 156, 166, 172, 180, 189, 199, 200, 210, 220, 228, 238, 242, 249, 251, 152, + 163, 171, 183, 186, 193, 201, 211, 214, 218, 227, 236, 245, 247, 252, 253, + 158, 173, 187, 194, 198, 209, 213, 217, 225, 229, 235, 241, 248, 250, 254, + 255, }; DECLARE_ALIGNED(16, static const int16_t, vp10_default_iscan_16x16[256]) = { - 0, 2, 5, 9, 17, 24, 36, 44, 55, 72, 88, 104, 128, 143, 166, 179, - 1, 4, 8, 13, 20, 30, 40, 54, 66, 79, 96, 113, 141, 154, 178, 196, - 3, 7, 11, 18, 25, 33, 46, 57, 71, 86, 101, 119, 148, 164, 186, 201, - 6, 12, 16, 23, 31, 39, 53, 64, 78, 92, 110, 127, 153, 169, 193, 208, - 10, 14, 19, 28, 37, 47, 58, 67, 84, 98, 114, 133, 161, 176, 198, 214, - 15, 21, 26, 34, 43, 52, 65, 77, 91, 106, 120, 140, 165, 185, 205, 221, - 22, 27, 32, 41, 48, 60, 73, 85, 99, 116, 130, 151, 175, 190, 211, 225, - 29, 35, 42, 49, 59, 69, 81, 95, 108, 125, 139, 155, 182, 197, 217, 229, - 38, 45, 51, 61, 68, 80, 93, 105, 118, 134, 150, 168, 191, 207, 223, 234, - 50, 56, 63, 74, 83, 94, 109, 117, 129, 147, 163, 177, 199, 213, 228, 238, - 62, 70, 76, 87, 97, 107, 122, 131, 145, 159, 172, 188, 210, 222, 235, 242, - 75, 82, 90, 102, 112, 124, 138, 146, 157, 173, 187, 202, 219, 230, 240, 245, - 89, 100, 111, 123, 132, 142, 156, 167, 180, 189, 203, 216, 231, 237, 246, 250, - 103, 115, 126, 136, 149, 162, 171, 183, 194, 204, 215, 224, 236, 241, 248, - 252, 121, 135, 144, 158, 170, 181, 192, 200, 209, 218, 227, 233, 243, 244, - 251, 254, 137, 152, 160, 174, 184, 195, 206, 212, 220, 226, 232, 239, 247, - 249, 253, 255, + 0, 2, 5, 9, 17, 24, 36, 44, 55, 72, 88, 104, 128, 143, 166, + 179, 1, 4, 8, 13, 20, 30, 40, 54, 66, 79, 96, 113, 141, 154, + 178, 196, 3, 7, 11, 18, 25, 33, 46, 57, 71, 86, 101, 119, 148, + 164, 186, 201, 6, 12, 16, 23, 31, 39, 53, 64, 78, 92, 110, 127, + 153, 169, 193, 208, 10, 14, 19, 28, 37, 47, 58, 67, 84, 98, 114, + 133, 161, 176, 198, 214, 15, 21, 26, 34, 43, 52, 65, 77, 91, 106, + 120, 140, 165, 185, 205, 221, 22, 27, 32, 41, 48, 60, 73, 85, 99, + 116, 130, 151, 175, 190, 211, 225, 29, 35, 42, 49, 59, 69, 81, 95, + 108, 125, 139, 155, 182, 197, 217, 229, 38, 45, 51, 61, 68, 80, 93, + 105, 118, 134, 150, 168, 191, 207, 223, 234, 50, 56, 63, 74, 83, 94, + 109, 117, 129, 147, 163, 177, 199, 213, 228, 238, 62, 70, 76, 87, 97, + 107, 122, 131, 145, 159, 172, 188, 210, 222, 235, 242, 75, 82, 90, 102, + 112, 124, 138, 146, 157, 173, 187, 202, 219, 230, 240, 245, 89, 100, 111, + 123, 132, 142, 156, 167, 180, 189, 203, 216, 231, 237, 246, 250, 103, 115, + 126, 136, 149, 162, 171, 183, 194, 204, 215, 224, 236, 241, 248, 252, 121, + 135, 144, 158, 170, 181, 192, 200, 209, 218, 227, 233, 243, 244, 251, 254, + 137, 152, 160, 174, 184, 195, 206, 212, 220, 226, 232, 239, 247, 249, 253, + 255, }; #if CONFIG_EXT_TX DECLARE_ALIGNED(16, static const int16_t, vp10_mcol_iscan_32x32[1024]) = { - 0, 32, 64, 96, 128, 160, 192, 224, 256, 288, 320, 352, 384, 416, - 448, 480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800, 832, - 864, 896, 928, 960, 992, - 1, 33, 65, 97, 129, 161, 193, 225, 257, 289, 321, 353, 385, 417, - 449, 481, 513, 545, 577, 609, 641, 673, 705, 737, 769, 801, 833, - 865, 897, 929, 961, 993, - 2, 34, 66, 98, 130, 162, 194, 226, 258, 290, 322, 354, 386, 418, - 450, 482, 514, 546, 578, 610, 642, 674, 706, 738, 770, 802, 834, - 866, 898, 930, 962, 994, - 3, 35, 67, 99, 131, 163, 195, 227, 259, 291, 323, 355, 387, 419, - 451, 483, 515, 547, 579, 611, 643, 675, 707, 739, 771, 803, 835, - 867, 899, 931, 963, 995, - 4, 36, 68, 100, 132, 164, 196, 228, 260, 292, 324, 356, 388, 420, - 452, 484, 516, 548, 580, 612, 644, 676, 708, 740, 772, 804, 836, - 868, 900, 932, 964, 996, - 5, 37, 69, 101, 133, 165, 197, 229, 261, 293, 325, 357, 389, 421, - 453, 485, 517, 549, 581, 613, 645, 677, 709, 741, 773, 805, 837, - 869, 901, 933, 965, 997, - 6, 38, 70, 102, 134, 166, 198, 230, 262, 294, 326, 358, 390, 422, - 454, 486, 518, 550, 582, 614, 646, 678, 710, 742, 774, 806, 838, - 870, 902, 934, 966, 998, - 7, 39, 71, 103, 135, 167, 199, 231, 263, 295, 327, 359, 391, 423, - 455, 487, 519, 551, 583, 615, 647, 679, 711, 743, 775, 807, 839, - 871, 903, 935, 967, 999, - 8, 40, 72, 104, 136, 168, 200, 232, 264, 296, 328, 360, 392, 424, - 456, 488, 520, 552, 584, 616, 648, 680, 712, 744, 776, 808, 840, - 872, 904, 936, 968, 1000, - 9, 41, 73, 105, 137, 169, 201, 233, 265, 297, 329, 361, 393, 425, - 457, 489, 521, 553, 585, 617, 649, 681, 713, 745, 777, 809, 841, - 873, 905, 937, 969, 1001, - 10, 42, 74, 106, 138, 170, 202, 234, 266, 298, 330, 362, 394, - 426, 458, 490, 522, 554, 586, 618, 650, 682, 714, 746, 778, 810, - 842, 874, 906, 938, 970, 1002, - 11, 43, 75, 107, 139, 171, 203, 235, 267, 299, 331, 363, 395, - 427, 459, 491, 523, 555, 587, 619, 651, 683, 715, 747, 779, 811, - 843, 875, 907, 939, 971, 1003, - 12, 44, 76, 108, 140, 172, 204, 236, 268, 300, 332, 364, 396, - 428, 460, 492, 524, 556, 588, 620, 652, 684, 716, 748, 780, - 812, 844, 876, 908, 940, 972, 1004, - 13, 45, 77, 109, 141, 173, 205, 237, 269, 301, 333, 365, 397, - 429, 461, 493, 525, 557, 589, 621, 653, 685, 717, 749, 781, - 813, 845, 877, 909, 941, 973, 1005, - 14, 46, 78, 110, 142, 174, 206, 238, 270, 302, 334, 366, 398, - 430, 462, 494, 526, 558, 590, 622, 654, 686, 718, 750, 782, - 814, 846, 878, 910, 942, 974, 1006, - 15, 47, 79, 111, 143, 175, 207, 239, 271, 303, 335, 367, 399, - 431, 463, 495, 527, 559, 591, 623, 655, 687, 719, 751, 783, - 815, 847, 879, 911, 943, 975, 1007, - 16, 48, 80, 112, 144, 176, 208, 240, 272, 304, 336, 368, 400, - 432, 464, 496, 528, 560, 592, 624, 656, 688, 720, 752, 784, - 816, 848, 880, 912, 944, 976, 1008, - 17, 49, 81, 113, 145, 177, 209, 241, 273, 305, 337, 369, 401, - 433, 465, 497, 529, 561, 593, 625, 657, 689, 721, 753, 785, - 817, 849, 881, 913, 945, 977, 1009, - 18, 50, 82, 114, 146, 178, 210, 242, 274, 306, 338, 370, 402, - 434, 466, 498, 530, 562, 594, 626, 658, 690, 722, 754, 786, - 818, 850, 882, 914, 946, 978, 1010, - 19, 51, 83, 115, 147, 179, 211, 243, 275, 307, 339, 371, 403, - 435, 467, 499, 531, 563, 595, 627, 659, 691, 723, 755, 787, - 819, 851, 883, 915, 947, 979, 1011, - 20, 52, 84, 116, 148, 180, 212, 244, 276, 308, 340, 372, 404, - 436, 468, 500, 532, 564, 596, 628, 660, 692, 724, 756, 788, - 820, 852, 884, 916, 948, 980, 1012, - 21, 53, 85, 117, 149, 181, 213, 245, 277, 309, 341, 373, 405, - 437, 469, 501, 533, 565, 597, 629, 661, 693, 725, 757, 789, - 821, 853, 885, 917, 949, 981, 1013, - 22, 54, 86, 118, 150, 182, 214, 246, 278, 310, 342, 374, 406, - 438, 470, 502, 534, 566, 598, 630, 662, 694, 726, 758, 790, - 822, 854, 886, 918, 950, 982, 1014, - 23, 55, 87, 119, 151, 183, 215, 247, 279, 311, 343, 375, 407, - 439, 471, 503, 535, 567, 599, 631, 663, 695, 727, 759, 791, - 823, 855, 887, 919, 951, 983, 1015, - 24, 56, 88, 120, 152, 184, 216, 248, 280, 312, 344, 376, 408, - 440, 472, 504, 536, 568, 600, 632, 664, 696, 728, 760, 792, - 824, 856, 888, 920, 952, 984, 1016, - 25, 57, 89, 121, 153, 185, 217, 249, 281, 313, 345, 377, 409, - 441, 473, 505, 537, 569, 601, 633, 665, 697, 729, 761, 793, - 825, 857, 889, 921, 953, 985, 1017, - 26, 58, 90, 122, 154, 186, 218, 250, 282, 314, 346, 378, 410, - 442, 474, 506, 538, 570, 602, 634, 666, 698, 730, 762, 794, - 826, 858, 890, 922, 954, 986, 1018, - 27, 59, 91, 123, 155, 187, 219, 251, 283, 315, 347, 379, 411, - 443, 475, 507, 539, 571, 603, 635, 667, 699, 731, 763, 795, - 827, 859, 891, 923, 955, 987, 1019, - 28, 60, 92, 124, 156, 188, 220, 252, 284, 316, 348, 380, 412, - 444, 476, 508, 540, 572, 604, 636, 668, 700, 732, 764, 796, - 828, 860, 892, 924, 956, 988, 1020, - 29, 61, 93, 125, 157, 189, 221, 253, 285, 317, 349, 381, 413, - 445, 477, 509, 541, 573, 605, 637, 669, 701, 733, 765, 797, - 829, 861, 893, 925, 957, 989, 1021, - 30, 62, 94, 126, 158, 190, 222, 254, 286, 318, 350, 382, 414, - 446, 478, 510, 542, 574, 606, 638, 670, 702, 734, 766, 798, - 830, 862, 894, 926, 958, 990, 1022, - 31, 63, 95, 127, 159, 191, 223, 255, 287, 319, 351, 383, 415, - 447, 479, 511, 543, 575, 607, 639, 671, 703, 735, 767, 799, - 831, 863, 895, 927, 959, 991, 1023, + 0, 32, 64, 96, 128, 160, 192, 224, 256, 288, 320, 352, 384, 416, + 448, 480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800, 832, 864, + 896, 928, 960, 992, 1, 33, 65, 97, 129, 161, 193, 225, 257, 289, + 321, 353, 385, 417, 449, 481, 513, 545, 577, 609, 641, 673, 705, 737, + 769, 801, 833, 865, 897, 929, 961, 993, 2, 34, 66, 98, 130, 162, + 194, 226, 258, 290, 322, 354, 386, 418, 450, 482, 514, 546, 578, 610, + 642, 674, 706, 738, 770, 802, 834, 866, 898, 930, 962, 994, 3, 35, + 67, 99, 131, 163, 195, 227, 259, 291, 323, 355, 387, 419, 451, 483, + 515, 547, 579, 611, 643, 675, 707, 739, 771, 803, 835, 867, 899, 931, + 963, 995, 4, 36, 68, 100, 132, 164, 196, 228, 260, 292, 324, 356, + 388, 420, 452, 484, 516, 548, 580, 612, 644, 676, 708, 740, 772, 804, + 836, 868, 900, 932, 964, 996, 5, 37, 69, 101, 133, 165, 197, 229, + 261, 293, 325, 357, 389, 421, 453, 485, 517, 549, 581, 613, 645, 677, + 709, 741, 773, 805, 837, 869, 901, 933, 965, 997, 6, 38, 70, 102, + 134, 166, 198, 230, 262, 294, 326, 358, 390, 422, 454, 486, 518, 550, + 582, 614, 646, 678, 710, 742, 774, 806, 838, 870, 902, 934, 966, 998, + 7, 39, 71, 103, 135, 167, 199, 231, 263, 295, 327, 359, 391, 423, + 455, 487, 519, 551, 583, 615, 647, 679, 711, 743, 775, 807, 839, 871, + 903, 935, 967, 999, 8, 40, 72, 104, 136, 168, 200, 232, 264, 296, + 328, 360, 392, 424, 456, 488, 520, 552, 584, 616, 648, 680, 712, 744, + 776, 808, 840, 872, 904, 936, 968, 1000, 9, 41, 73, 105, 137, 169, + 201, 233, 265, 297, 329, 361, 393, 425, 457, 489, 521, 553, 585, 617, + 649, 681, 713, 745, 777, 809, 841, 873, 905, 937, 969, 1001, 10, 42, + 74, 106, 138, 170, 202, 234, 266, 298, 330, 362, 394, 426, 458, 490, + 522, 554, 586, 618, 650, 682, 714, 746, 778, 810, 842, 874, 906, 938, + 970, 1002, 11, 43, 75, 107, 139, 171, 203, 235, 267, 299, 331, 363, + 395, 427, 459, 491, 523, 555, 587, 619, 651, 683, 715, 747, 779, 811, + 843, 875, 907, 939, 971, 1003, 12, 44, 76, 108, 140, 172, 204, 236, + 268, 300, 332, 364, 396, 428, 460, 492, 524, 556, 588, 620, 652, 684, + 716, 748, 780, 812, 844, 876, 908, 940, 972, 1004, 13, 45, 77, 109, + 141, 173, 205, 237, 269, 301, 333, 365, 397, 429, 461, 493, 525, 557, + 589, 621, 653, 685, 717, 749, 781, 813, 845, 877, 909, 941, 973, 1005, + 14, 46, 78, 110, 142, 174, 206, 238, 270, 302, 334, 366, 398, 430, + 462, 494, 526, 558, 590, 622, 654, 686, 718, 750, 782, 814, 846, 878, + 910, 942, 974, 1006, 15, 47, 79, 111, 143, 175, 207, 239, 271, 303, + 335, 367, 399, 431, 463, 495, 527, 559, 591, 623, 655, 687, 719, 751, + 783, 815, 847, 879, 911, 943, 975, 1007, 16, 48, 80, 112, 144, 176, + 208, 240, 272, 304, 336, 368, 400, 432, 464, 496, 528, 560, 592, 624, + 656, 688, 720, 752, 784, 816, 848, 880, 912, 944, 976, 1008, 17, 49, + 81, 113, 145, 177, 209, 241, 273, 305, 337, 369, 401, 433, 465, 497, + 529, 561, 593, 625, 657, 689, 721, 753, 785, 817, 849, 881, 913, 945, + 977, 1009, 18, 50, 82, 114, 146, 178, 210, 242, 274, 306, 338, 370, + 402, 434, 466, 498, 530, 562, 594, 626, 658, 690, 722, 754, 786, 818, + 850, 882, 914, 946, 978, 1010, 19, 51, 83, 115, 147, 179, 211, 243, + 275, 307, 339, 371, 403, 435, 467, 499, 531, 563, 595, 627, 659, 691, + 723, 755, 787, 819, 851, 883, 915, 947, 979, 1011, 20, 52, 84, 116, + 148, 180, 212, 244, 276, 308, 340, 372, 404, 436, 468, 500, 532, 564, + 596, 628, 660, 692, 724, 756, 788, 820, 852, 884, 916, 948, 980, 1012, + 21, 53, 85, 117, 149, 181, 213, 245, 277, 309, 341, 373, 405, 437, + 469, 501, 533, 565, 597, 629, 661, 693, 725, 757, 789, 821, 853, 885, + 917, 949, 981, 1013, 22, 54, 86, 118, 150, 182, 214, 246, 278, 310, + 342, 374, 406, 438, 470, 502, 534, 566, 598, 630, 662, 694, 726, 758, + 790, 822, 854, 886, 918, 950, 982, 1014, 23, 55, 87, 119, 151, 183, + 215, 247, 279, 311, 343, 375, 407, 439, 471, 503, 535, 567, 599, 631, + 663, 695, 727, 759, 791, 823, 855, 887, 919, 951, 983, 1015, 24, 56, + 88, 120, 152, 184, 216, 248, 280, 312, 344, 376, 408, 440, 472, 504, + 536, 568, 600, 632, 664, 696, 728, 760, 792, 824, 856, 888, 920, 952, + 984, 1016, 25, 57, 89, 121, 153, 185, 217, 249, 281, 313, 345, 377, + 409, 441, 473, 505, 537, 569, 601, 633, 665, 697, 729, 761, 793, 825, + 857, 889, 921, 953, 985, 1017, 26, 58, 90, 122, 154, 186, 218, 250, + 282, 314, 346, 378, 410, 442, 474, 506, 538, 570, 602, 634, 666, 698, + 730, 762, 794, 826, 858, 890, 922, 954, 986, 1018, 27, 59, 91, 123, + 155, 187, 219, 251, 283, 315, 347, 379, 411, 443, 475, 507, 539, 571, + 603, 635, 667, 699, 731, 763, 795, 827, 859, 891, 923, 955, 987, 1019, + 28, 60, 92, 124, 156, 188, 220, 252, 284, 316, 348, 380, 412, 444, + 476, 508, 540, 572, 604, 636, 668, 700, 732, 764, 796, 828, 860, 892, + 924, 956, 988, 1020, 29, 61, 93, 125, 157, 189, 221, 253, 285, 317, + 349, 381, 413, 445, 477, 509, 541, 573, 605, 637, 669, 701, 733, 765, + 797, 829, 861, 893, 925, 957, 989, 1021, 30, 62, 94, 126, 158, 190, + 222, 254, 286, 318, 350, 382, 414, 446, 478, 510, 542, 574, 606, 638, + 670, 702, 734, 766, 798, 830, 862, 894, 926, 958, 990, 1022, 31, 63, + 95, 127, 159, 191, 223, 255, 287, 319, 351, 383, 415, 447, 479, 511, + 543, 575, 607, 639, 671, 703, 735, 767, 799, 831, 863, 895, 927, 959, + 991, 1023, }; DECLARE_ALIGNED(16, static const int16_t, vp10_mrow_iscan_32x32[1024]) = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, - 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, - 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, - 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, - 60, 61, 62, 63, - 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, - 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, - 92, 93, 94, 95, - 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, - 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, - 119, 120, 121, 122, 123, 124, 125, 126, 127, - 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, - 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, - 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, - 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, - 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, - 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, - 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, - 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, - 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, - 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, - 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, - 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, - 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, - 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, - 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, - 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, - 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, - 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, - 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, - 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, - 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, - 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, - 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, - 374, 375, 376, 377, 378, 379, 380, 381, 382, 383, - 384, 385, 386, 387, 388, 389, 390, 391, 392, 393, 394, - 395, 396, 397, 398, 399, 400, 401, 402, 403, 404, 405, - 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, - 416, 417, 418, 419, 420, 421, 422, 423, 424, 425, 426, - 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, - 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, - 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, - 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, - 470, 471, 472, 473, 474, 475, 476, 477, 478, 479, - 480, 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, - 491, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, - 502, 503, 504, 505, 506, 507, 508, 509, 510, 511, - 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 522, - 523, 524, 525, 526, 527, 528, 529, 530, 531, 532, 533, - 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, - 544, 545, 546, 547, 548, 549, 550, 551, 552, 553, 554, - 555, 556, 557, 558, 559, 560, 561, 562, 563, 564, 565, - 566, 567, 568, 569, 570, 571, 572, 573, 574, 575, - 576, 577, 578, 579, 580, 581, 582, 583, 584, 585, 586, - 587, 588, 589, 590, 591, 592, 593, 594, 595, 596, 597, - 598, 599, 600, 601, 602, 603, 604, 605, 606, 607, - 608, 609, 610, 611, 612, 613, 614, 615, 616, 617, 618, - 619, 620, 621, 622, 623, 624, 625, 626, 627, 628, 629, - 630, 631, 632, 633, 634, 635, 636, 637, 638, 639, - 640, 641, 642, 643, 644, 645, 646, 647, 648, 649, 650, - 651, 652, 653, 654, 655, 656, 657, 658, 659, 660, 661, - 662, 663, 664, 665, 666, 667, 668, 669, 670, 671, - 672, 673, 674, 675, 676, 677, 678, 679, 680, 681, 682, - 683, 684, 685, 686, 687, 688, 689, 690, 691, 692, 693, - 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, - 704, 705, 706, 707, 708, 709, 710, 711, 712, 713, 714, - 715, 716, 717, 718, 719, 720, 721, 722, 723, 724, 725, - 726, 727, 728, 729, 730, 731, 732, 733, 734, 735, - 736, 737, 738, 739, 740, 741, 742, 743, 744, 745, 746, - 747, 748, 749, 750, 751, 752, 753, 754, 755, 756, 757, - 758, 759, 760, 761, 762, 763, 764, 765, 766, 767, - 768, 769, 770, 771, 772, 773, 774, 775, 776, 777, 778, - 779, 780, 781, 782, 783, 784, 785, 786, 787, 788, 789, - 790, 791, 792, 793, 794, 795, 796, 797, 798, 799, - 800, 801, 802, 803, 804, 805, 806, 807, 808, 809, 810, - 811, 812, 813, 814, 815, 816, 817, 818, 819, 820, 821, - 822, 823, 824, 825, 826, 827, 828, 829, 830, 831, - 832, 833, 834, 835, 836, 837, 838, 839, 840, 841, 842, - 843, 844, 845, 846, 847, 848, 849, 850, 851, 852, 853, - 854, 855, 856, 857, 858, 859, 860, 861, 862, 863, - 864, 865, 866, 867, 868, 869, 870, 871, 872, 873, 874, - 875, 876, 877, 878, 879, 880, 881, 882, 883, 884, 885, - 886, 887, 888, 889, 890, 891, 892, 893, 894, 895, - 896, 897, 898, 899, 900, 901, 902, 903, 904, 905, 906, - 907, 908, 909, 910, 911, 912, 913, 914, 915, 916, 917, - 918, 919, 920, 921, 922, 923, 924, 925, 926, 927, - 928, 929, 930, 931, 932, 933, 934, 935, 936, 937, 938, - 939, 940, 941, 942, 943, 944, 945, 946, 947, 948, 949, - 950, 951, 952, 953, 954, 955, 956, 957, 958, 959, - 960, 961, 962, 963, 964, 965, 966, 967, 968, 969, 970, - 971, 972, 973, 974, 975, 976, 977, 978, 979, 980, 981, - 982, 983, 984, 985, 986, 987, 988, 989, 990, 991, - 992, 993, 994, 995, 996, 997, 998, 999, 1000, 1001, - 1002, 1003, 1004, 1005, 1006, 1007, 1008, 1009, 1010, - 1011, 1012, 1013, 1014, 1015, 1016, 1017, 1018, 1019, - 1020, 1021, 1022, 1023, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, + 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, + 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, + 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, + 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, + 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, + 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, + 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, + 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, + 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, + 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, + 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, + 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, + 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, + 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, + 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, + 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, + 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, + 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, + 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, + 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, + 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, + 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, + 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, + 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, + 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, + 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, + 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, + 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, + 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, + 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, + 403, 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, + 416, 417, 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428, + 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, + 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, + 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, + 468, 469, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479, 480, + 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, + 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 506, + 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, + 520, 521, 522, 523, 524, 525, 526, 527, 528, 529, 530, 531, 532, + 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, + 546, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558, + 559, 560, 561, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571, + 572, 573, 574, 575, 576, 577, 578, 579, 580, 581, 582, 583, 584, + 585, 586, 587, 588, 589, 590, 591, 592, 593, 594, 595, 596, 597, + 598, 599, 600, 601, 602, 603, 604, 605, 606, 607, 608, 609, 610, + 611, 612, 613, 614, 615, 616, 617, 618, 619, 620, 621, 622, 623, + 624, 625, 626, 627, 628, 629, 630, 631, 632, 633, 634, 635, 636, + 637, 638, 639, 640, 641, 642, 643, 644, 645, 646, 647, 648, 649, + 650, 651, 652, 653, 654, 655, 656, 657, 658, 659, 660, 661, 662, + 663, 664, 665, 666, 667, 668, 669, 670, 671, 672, 673, 674, 675, + 676, 677, 678, 679, 680, 681, 682, 683, 684, 685, 686, 687, 688, + 689, 690, 691, 692, 693, 694, 695, 696, 697, 698, 699, 700, 701, + 702, 703, 704, 705, 706, 707, 708, 709, 710, 711, 712, 713, 714, + 715, 716, 717, 718, 719, 720, 721, 722, 723, 724, 725, 726, 727, + 728, 729, 730, 731, 732, 733, 734, 735, 736, 737, 738, 739, 740, + 741, 742, 743, 744, 745, 746, 747, 748, 749, 750, 751, 752, 753, + 754, 755, 756, 757, 758, 759, 760, 761, 762, 763, 764, 765, 766, + 767, 768, 769, 770, 771, 772, 773, 774, 775, 776, 777, 778, 779, + 780, 781, 782, 783, 784, 785, 786, 787, 788, 789, 790, 791, 792, + 793, 794, 795, 796, 797, 798, 799, 800, 801, 802, 803, 804, 805, + 806, 807, 808, 809, 810, 811, 812, 813, 814, 815, 816, 817, 818, + 819, 820, 821, 822, 823, 824, 825, 826, 827, 828, 829, 830, 831, + 832, 833, 834, 835, 836, 837, 838, 839, 840, 841, 842, 843, 844, + 845, 846, 847, 848, 849, 850, 851, 852, 853, 854, 855, 856, 857, + 858, 859, 860, 861, 862, 863, 864, 865, 866, 867, 868, 869, 870, + 871, 872, 873, 874, 875, 876, 877, 878, 879, 880, 881, 882, 883, + 884, 885, 886, 887, 888, 889, 890, 891, 892, 893, 894, 895, 896, + 897, 898, 899, 900, 901, 902, 903, 904, 905, 906, 907, 908, 909, + 910, 911, 912, 913, 914, 915, 916, 917, 918, 919, 920, 921, 922, + 923, 924, 925, 926, 927, 928, 929, 930, 931, 932, 933, 934, 935, + 936, 937, 938, 939, 940, 941, 942, 943, 944, 945, 946, 947, 948, + 949, 950, 951, 952, 953, 954, 955, 956, 957, 958, 959, 960, 961, + 962, 963, 964, 965, 966, 967, 968, 969, 970, 971, 972, 973, 974, + 975, 976, 977, 978, 979, 980, 981, 982, 983, 984, 985, 986, 987, + 988, 989, 990, 991, 992, 993, 994, 995, 996, 997, 998, 999, 1000, + 1001, 1002, 1003, 1004, 1005, 1006, 1007, 1008, 1009, 1010, 1011, 1012, 1013, + 1014, 1015, 1016, 1017, 1018, 1019, 1020, 1021, 1022, 1023, }; #endif // CONFIG_EXT_TX DECLARE_ALIGNED(16, static const int16_t, vp10_default_iscan_32x32[1024]) = { - 0, 2, 5, 10, 17, 25, 38, 47, 62, 83, 101, 121, 145, 170, 193, 204, - 210, 219, 229, 233, 245, 257, 275, 299, 342, 356, 377, 405, 455, 471, 495, - 527, 1, 4, 8, 15, 22, 30, 45, 58, 74, 92, 112, 133, 158, 184, 203, 215, 222, - 228, 234, 237, 256, 274, 298, 317, 355, 376, 404, 426, 470, 494, 526, 551, - 3, 7, 12, 18, 28, 36, 52, 64, 82, 102, 118, 142, 164, 189, 208, 217, 224, - 231, 235, 238, 273, 297, 316, 329, 375, 403, 425, 440, 493, 525, 550, 567, - 6, 11, 16, 23, 31, 43, 60, 73, 90, 109, 126, 150, 173, 196, 211, 220, 226, - 232, 236, 239, 296, 315, 328, 335, 402, 424, 439, 447, 524, 549, 566, 575, - 9, 14, 19, 29, 37, 50, 65, 78, 95, 116, 134, 157, 179, 201, 214, 223, 244, - 255, 272, 295, 341, 354, 374, 401, 454, 469, 492, 523, 582, 596, 617, 645, - 13, 20, 26, 35, 44, 54, 72, 85, 105, 123, 140, 163, 182, 205, 216, 225, - 254, 271, 294, 314, 353, 373, 400, 423, 468, 491, 522, 548, 595, 616, 644, - 666, 21, 27, 33, 42, 53, 63, 80, 94, 113, 132, 151, 172, 190, 209, 218, 227, - 270, 293, 313, 327, 372, 399, 422, 438, 490, 521, 547, 565, 615, 643, 665, - 680, 24, 32, 39, 48, 57, 71, 88, 104, 120, 139, 159, 178, 197, 212, 221, 230, - 292, 312, 326, 334, 398, 421, 437, 446, 520, 546, 564, 574, 642, 664, 679, - 687, 34, 40, 46, 56, 68, 81, 96, 111, 130, 147, 167, 186, 243, 253, 269, 291, - 340, 352, 371, 397, 453, 467, 489, 519, 581, 594, 614, 641, 693, 705, 723, - 747, 41, 49, 55, 67, 77, 91, 107, 124, 138, 161, 177, 194, 252, 268, 290, - 311, 351, 370, 396, 420, 466, 488, 518, 545, 593, 613, 640, 663, 704, 722, - 746, 765, 51, 59, 66, 76, 89, 99, 119, 131, 149, 168, 181, 200, 267, 289, - 310, 325, 369, 395, 419, 436, 487, 517, 544, 563, 612, 639, 662, 678, 721, - 745, 764, 777, 61, 69, 75, 87, 100, 114, 129, 144, 162, 180, 191, 207, 288, - 309, 324, 333, 394, 418, 435, 445, 516, 543, 562, 573, 638, 661, 677, 686, - 744, 763, 776, 783, 70, 79, 86, 97, 108, 122, 137, 155, 242, 251, 266, 287, - 339, 350, 368, 393, 452, 465, 486, 515, 580, 592, 611, 637, 692, 703, 720, - 743, 788, 798, 813, 833, 84, 93, 103, 110, 125, 141, 154, 171, 250, 265, 286, - 308, 349, 367, 392, 417, 464, 485, 514, 542, 591, 610, 636, 660, 702, 719, - 742, 762, 797, 812, 832, 848, 98, 106, 115, 127, 143, 156, 169, 185, 264, - 285, 307, 323, 366, 391, 416, 434, 484, 513, 541, 561, 609, 635, 659, 676, - 718, 741, 761, 775, 811, 831, 847, 858, 117, 128, 136, 148, 160, 175, 188, - 198, 284, 306, 322, 332, 390, 415, 433, 444, 512, 540, 560, 572, 634, 658, - 675, 685, 740, 760, 774, 782, 830, 846, 857, 863, 135, 146, 152, 165, 241, - 249, 263, 283, 338, 348, 365, 389, 451, 463, 483, 511, 579, 590, 608, 633, - 691, 701, 717, 739, 787, 796, 810, 829, 867, 875, 887, 903, 153, 166, 174, - 183, 248, 262, 282, 305, 347, 364, 388, 414, 462, 482, 510, 539, 589, 607, - 632, 657, 700, 716, 738, 759, 795, 809, 828, 845, 874, 886, 902, 915, 176, - 187, 195, 202, 261, 281, 304, 321, 363, 387, 413, 432, 481, 509, 538, 559, - 606, 631, 656, 674, 715, 737, 758, 773, 808, 827, 844, 856, 885, 901, 914, - 923, 192, 199, 206, 213, 280, 303, 320, 331, 386, 412, 431, 443, 508, 537, - 558, 571, 630, 655, 673, 684, 736, 757, 772, 781, 826, 843, 855, 862, 900, - 913, 922, 927, 240, 247, 260, 279, 337, 346, 362, 385, 450, 461, 480, 507, - 578, 588, 605, 629, 690, 699, 714, 735, 786, 794, 807, 825, 866, 873, 884, - 899, 930, 936, 945, 957, 246, 259, 278, 302, 345, 361, 384, 411, 460, 479, - 506, 536, 587, 604, 628, 654, 698, 713, 734, 756, 793, 806, 824, 842, 872, - 883, 898, 912, 935, 944, 956, 966, 258, 277, 301, 319, 360, 383, 410, 430, - 478, 505, 535, 557, 603, 627, 653, 672, 712, 733, 755, 771, 805, 823, 841, - 854, 882, 897, 911, 921, 943, 955, 965, 972, 276, 300, 318, 330, 382, 409, - 429, 442, 504, 534, 556, 570, 626, 652, 671, 683, 732, 754, 770, 780, 822, - 840, 853, 861, 896, 910, 920, 926, 954, 964, 971, 975, 336, 344, 359, 381, - 449, 459, 477, 503, 577, 586, 602, 625, 689, 697, 711, 731, 785, 792, 804, - 821, 865, 871, 881, 895, 929, 934, 942, 953, 977, 981, 987, 995, 343, 358, - 380, 408, 458, 476, 502, 533, 585, 601, 624, 651, 696, 710, 730, 753, 791, - 803, 820, 839, 870, 880, 894, 909, 933, 941, 952, 963, 980, 986, 994, 1001, - 357, 379, 407, 428, 475, 501, 532, 555, 600, 623, 650, 670, 709, 729, 752, - 769, 802, 819, 838, 852, 879, 893, 908, 919, 940, 951, 962, 970, 985, 993, - 1000, 1005, 378, 406, 427, 441, 500, 531, 554, 569, 622, 649, 669, 682, 728, - 751, 768, 779, 818, 837, 851, 860, 892, 907, 918, 925, 950, 961, 969, 974, - 992, 999, 1004, 1007, 448, 457, 474, 499, 576, 584, 599, 621, 688, 695, 708, - 727, 784, 790, 801, 817, 864, 869, 878, 891, 928, 932, 939, 949, 976, 979, - 984, 991, 1008, 1010, 1013, 1017, 456, 473, 498, 530, 583, 598, 620, 648, - 694, 707, 726, 750, 789, 800, 816, 836, 868, 877, 890, 906, 931, 938, 948, - 960, 978, 983, 990, 998, 1009, 1012, 1016, 1020, 472, 497, 529, 553, 597, - 619, 647, 668, 706, 725, 749, 767, 799, 815, 835, 850, 876, 889, 905, 917, - 937, 947, 959, 968, 982, 989, 997, 1003, 1011, 1015, 1019, 1022, 496, 528, - 552, 568, 618, 646, 667, 681, 724, 748, 766, 778, 814, 834, 849, 859, 888, - 904, 916, 924, 946, 958, 967, 973, 988, 996, 1002, 1006, 1014, 1018, 1021, - 1023, + 0, 2, 5, 10, 17, 25, 38, 47, 62, 83, 101, 121, 145, + 170, 193, 204, 210, 219, 229, 233, 245, 257, 275, 299, 342, 356, + 377, 405, 455, 471, 495, 527, 1, 4, 8, 15, 22, 30, 45, + 58, 74, 92, 112, 133, 158, 184, 203, 215, 222, 228, 234, 237, + 256, 274, 298, 317, 355, 376, 404, 426, 470, 494, 526, 551, 3, + 7, 12, 18, 28, 36, 52, 64, 82, 102, 118, 142, 164, 189, + 208, 217, 224, 231, 235, 238, 273, 297, 316, 329, 375, 403, 425, + 440, 493, 525, 550, 567, 6, 11, 16, 23, 31, 43, 60, 73, + 90, 109, 126, 150, 173, 196, 211, 220, 226, 232, 236, 239, 296, + 315, 328, 335, 402, 424, 439, 447, 524, 549, 566, 575, 9, 14, + 19, 29, 37, 50, 65, 78, 95, 116, 134, 157, 179, 201, 214, + 223, 244, 255, 272, 295, 341, 354, 374, 401, 454, 469, 492, 523, + 582, 596, 617, 645, 13, 20, 26, 35, 44, 54, 72, 85, 105, + 123, 140, 163, 182, 205, 216, 225, 254, 271, 294, 314, 353, 373, + 400, 423, 468, 491, 522, 548, 595, 616, 644, 666, 21, 27, 33, + 42, 53, 63, 80, 94, 113, 132, 151, 172, 190, 209, 218, 227, + 270, 293, 313, 327, 372, 399, 422, 438, 490, 521, 547, 565, 615, + 643, 665, 680, 24, 32, 39, 48, 57, 71, 88, 104, 120, 139, + 159, 178, 197, 212, 221, 230, 292, 312, 326, 334, 398, 421, 437, + 446, 520, 546, 564, 574, 642, 664, 679, 687, 34, 40, 46, 56, + 68, 81, 96, 111, 130, 147, 167, 186, 243, 253, 269, 291, 340, + 352, 371, 397, 453, 467, 489, 519, 581, 594, 614, 641, 693, 705, + 723, 747, 41, 49, 55, 67, 77, 91, 107, 124, 138, 161, 177, + 194, 252, 268, 290, 311, 351, 370, 396, 420, 466, 488, 518, 545, + 593, 613, 640, 663, 704, 722, 746, 765, 51, 59, 66, 76, 89, + 99, 119, 131, 149, 168, 181, 200, 267, 289, 310, 325, 369, 395, + 419, 436, 487, 517, 544, 563, 612, 639, 662, 678, 721, 745, 764, + 777, 61, 69, 75, 87, 100, 114, 129, 144, 162, 180, 191, 207, + 288, 309, 324, 333, 394, 418, 435, 445, 516, 543, 562, 573, 638, + 661, 677, 686, 744, 763, 776, 783, 70, 79, 86, 97, 108, 122, + 137, 155, 242, 251, 266, 287, 339, 350, 368, 393, 452, 465, 486, + 515, 580, 592, 611, 637, 692, 703, 720, 743, 788, 798, 813, 833, + 84, 93, 103, 110, 125, 141, 154, 171, 250, 265, 286, 308, 349, + 367, 392, 417, 464, 485, 514, 542, 591, 610, 636, 660, 702, 719, + 742, 762, 797, 812, 832, 848, 98, 106, 115, 127, 143, 156, 169, + 185, 264, 285, 307, 323, 366, 391, 416, 434, 484, 513, 541, 561, + 609, 635, 659, 676, 718, 741, 761, 775, 811, 831, 847, 858, 117, + 128, 136, 148, 160, 175, 188, 198, 284, 306, 322, 332, 390, 415, + 433, 444, 512, 540, 560, 572, 634, 658, 675, 685, 740, 760, 774, + 782, 830, 846, 857, 863, 135, 146, 152, 165, 241, 249, 263, 283, + 338, 348, 365, 389, 451, 463, 483, 511, 579, 590, 608, 633, 691, + 701, 717, 739, 787, 796, 810, 829, 867, 875, 887, 903, 153, 166, + 174, 183, 248, 262, 282, 305, 347, 364, 388, 414, 462, 482, 510, + 539, 589, 607, 632, 657, 700, 716, 738, 759, 795, 809, 828, 845, + 874, 886, 902, 915, 176, 187, 195, 202, 261, 281, 304, 321, 363, + 387, 413, 432, 481, 509, 538, 559, 606, 631, 656, 674, 715, 737, + 758, 773, 808, 827, 844, 856, 885, 901, 914, 923, 192, 199, 206, + 213, 280, 303, 320, 331, 386, 412, 431, 443, 508, 537, 558, 571, + 630, 655, 673, 684, 736, 757, 772, 781, 826, 843, 855, 862, 900, + 913, 922, 927, 240, 247, 260, 279, 337, 346, 362, 385, 450, 461, + 480, 507, 578, 588, 605, 629, 690, 699, 714, 735, 786, 794, 807, + 825, 866, 873, 884, 899, 930, 936, 945, 957, 246, 259, 278, 302, + 345, 361, 384, 411, 460, 479, 506, 536, 587, 604, 628, 654, 698, + 713, 734, 756, 793, 806, 824, 842, 872, 883, 898, 912, 935, 944, + 956, 966, 258, 277, 301, 319, 360, 383, 410, 430, 478, 505, 535, + 557, 603, 627, 653, 672, 712, 733, 755, 771, 805, 823, 841, 854, + 882, 897, 911, 921, 943, 955, 965, 972, 276, 300, 318, 330, 382, + 409, 429, 442, 504, 534, 556, 570, 626, 652, 671, 683, 732, 754, + 770, 780, 822, 840, 853, 861, 896, 910, 920, 926, 954, 964, 971, + 975, 336, 344, 359, 381, 449, 459, 477, 503, 577, 586, 602, 625, + 689, 697, 711, 731, 785, 792, 804, 821, 865, 871, 881, 895, 929, + 934, 942, 953, 977, 981, 987, 995, 343, 358, 380, 408, 458, 476, + 502, 533, 585, 601, 624, 651, 696, 710, 730, 753, 791, 803, 820, + 839, 870, 880, 894, 909, 933, 941, 952, 963, 980, 986, 994, 1001, + 357, 379, 407, 428, 475, 501, 532, 555, 600, 623, 650, 670, 709, + 729, 752, 769, 802, 819, 838, 852, 879, 893, 908, 919, 940, 951, + 962, 970, 985, 993, 1000, 1005, 378, 406, 427, 441, 500, 531, 554, + 569, 622, 649, 669, 682, 728, 751, 768, 779, 818, 837, 851, 860, + 892, 907, 918, 925, 950, 961, 969, 974, 992, 999, 1004, 1007, 448, + 457, 474, 499, 576, 584, 599, 621, 688, 695, 708, 727, 784, 790, + 801, 817, 864, 869, 878, 891, 928, 932, 939, 949, 976, 979, 984, + 991, 1008, 1010, 1013, 1017, 456, 473, 498, 530, 583, 598, 620, 648, + 694, 707, 726, 750, 789, 800, 816, 836, 868, 877, 890, 906, 931, + 938, 948, 960, 978, 983, 990, 998, 1009, 1012, 1016, 1020, 472, 497, + 529, 553, 597, 619, 647, 668, 706, 725, 749, 767, 799, 815, 835, + 850, 876, 889, 905, 917, 937, 947, 959, 968, 982, 989, 997, 1003, + 1011, 1015, 1019, 1022, 496, 528, 552, 568, 618, 646, 667, 681, 724, + 748, 766, 778, 814, 834, 849, 859, 888, 904, 916, 924, 946, 958, + 967, 973, 988, 996, 1002, 1006, 1014, 1018, 1021, 1023, }; #if CONFIG_EXT_TX DECLARE_ALIGNED(16, static const int16_t, vp10_v2_iscan_32x32[1024]) = { - 0, 1, 4, 9, 15, 22, 33, 43, 56, 71, 86, 104, - 121, 142, 166, 189, 512, 518, 527, 539, 551, 566, 584, 602, - 621, 644, 668, 695, 721, 748, 780, 811, 2, 3, 6, 11, - 17, 26, 35, 45, 58, 73, 90, 106, 123, 146, 168, 193, - 513, 519, 528, 540, 553, 567, 585, 603, 622, 647, 670, 696, - 722, 751, 783, 812, 5, 7, 8, 13, 20, 28, 37, 50, - 62, 75, 92, 108, 129, 150, 170, 195, 514, 521, 530, 541, - 554, 569, 587, 605, 625, 649, 671, 699, 725, 752, 785, 815, - 10, 12, 14, 19, 23, 31, 41, 52, 65, 81, 96, 113, - 133, 152, 175, 201, 515, 522, 531, 542, 556, 572, 589, 607, - 629, 651, 673, 700, 726, 757, 788, 819, 16, 18, 21, 24, - 30, 39, 48, 59, 69, 83, 100, 119, 137, 158, 181, 203, - 516, 523, 534, 545, 559, 574, 591, 610, 632, 654, 679, 704, - 730, 762, 791, 824, 25, 27, 29, 32, 40, 46, 54, 67, - 79, 94, 109, 127, 143, 164, 185, 210, 517, 525, 535, 547, - 561, 578, 595, 615, 635, 656, 684, 707, 737, 766, 793, 830, - 34, 36, 38, 42, 49, 55, 64, 76, 87, 102, 117, 135, - 154, 176, 197, 219, 520, 529, 538, 550, 565, 580, 598, 618, - 639, 664, 687, 712, 741, 769, 802, 833, 44, 47, 51, 53, - 60, 68, 77, 85, 98, 114, 131, 147, 162, 183, 208, 227, - 524, 533, 544, 557, 571, 588, 606, 623, 645, 667, 692, 720, - 747, 776, 806, 838, 57, 61, 63, 66, 70, 80, 88, 99, - 112, 124, 140, 159, 179, 199, 216, 233, 526, 536, 548, 562, - 577, 593, 613, 633, 653, 676, 701, 727, 756, 786, 814, 847, - 72, 74, 78, 82, 84, 95, 103, 115, 125, 139, 156, 173, - 190, 211, 229, 246, 532, 543, 555, 568, 581, 601, 619, 637, - 663, 685, 709, 738, 763, 792, 826, 855, 89, 91, 93, 97, - 101, 110, 118, 132, 141, 157, 171, 186, 206, 224, 241, 255, - 537, 549, 560, 576, 592, 608, 628, 650, 669, 693, 719, 744, - 773, 805, 834, 862, 105, 107, 111, 116, 120, 128, 136, 148, - 160, 174, 187, 205, 221, 236, 251, 267, 546, 558, 570, 583, - 600, 617, 636, 657, 680, 706, 729, 758, 787, 813, 846, 871, - 122, 126, 130, 134, 138, 144, 155, 163, 180, 191, 207, 222, - 232, 248, 264, 278, 552, 564, 579, 594, 609, 630, 648, 666, - 688, 715, 742, 768, 797, 827, 856, 877, 145, 149, 151, 153, - 161, 165, 177, 184, 200, 212, 225, 237, 249, 262, 275, 289, - 563, 575, 590, 604, 620, 638, 660, 683, 705, 728, 753, 779, - 809, 839, 866, 889, 167, 169, 172, 178, 182, 188, 198, 209, - 217, 230, 242, 252, 265, 276, 288, 301, 573, 586, 599, 616, - 634, 652, 672, 694, 716, 743, 767, 794, 825, 850, 874, 899, - 192, 194, 196, 202, 204, 213, 220, 228, 234, 247, 256, 268, - 279, 290, 302, 315, 582, 597, 614, 631, 646, 665, 686, 708, - 732, 759, 784, 810, 837, 863, 886, 908, 214, 215, 218, 223, - 226, 231, 239, 244, 253, 261, 271, 283, 292, 304, 317, 325, - 596, 611, 626, 642, 661, 681, 702, 723, 745, 770, 800, 828, - 853, 875, 897, 919, 235, 238, 240, 243, 245, 250, 257, 263, - 270, 280, 287, 298, 307, 319, 329, 340, 612, 624, 640, 658, - 677, 697, 717, 739, 764, 789, 816, 844, 867, 890, 909, 927, - 254, 258, 259, 260, 266, 269, 272, 282, 286, 296, 303, 312, - 323, 333, 341, 355, 627, 641, 655, 674, 690, 713, 735, 760, - 781, 807, 835, 857, 880, 902, 921, 940, 273, 274, 277, 281, - 284, 285, 291, 299, 305, 310, 320, 327, 337, 346, 357, 369, - 643, 659, 675, 689, 710, 733, 754, 777, 803, 831, 851, 872, - 892, 913, 934, 950, 293, 294, 295, 297, 300, 306, 308, 314, - 321, 326, 335, 343, 352, 361, 372, 378, 662, 678, 691, 711, - 731, 749, 774, 798, 822, 848, 869, 887, 906, 925, 942, 961, - 309, 311, 313, 316, 318, 322, 324, 332, 338, 344, 351, 358, - 367, 375, 386, 394, 682, 698, 714, 734, 750, 772, 795, 820, - 842, 864, 884, 904, 923, 938, 954, 967, 328, 330, 331, 334, - 336, 339, 342, 348, 354, 359, 366, 374, 382, 391, 400, 409, - 703, 718, 736, 755, 775, 796, 818, 840, 860, 882, 900, 917, - 936, 952, 965, 977, 345, 347, 349, 350, 353, 356, 360, 364, - 371, 376, 383, 389, 395, 406, 412, 423, 724, 740, 761, 778, - 799, 821, 841, 859, 878, 895, 915, 932, 948, 963, 975, 986, - 362, 363, 365, 368, 370, 373, 377, 379, 387, 392, 397, 405, - 411, 420, 428, 439, 746, 765, 782, 804, 823, 843, 861, 879, - 894, 911, 930, 946, 959, 973, 984, 994, 380, 381, 384, 385, - 388, 390, 393, 396, 403, 408, 413, 422, 427, 436, 444, 452, - 771, 790, 808, 832, 849, 865, 883, 896, 912, 928, 944, 957, - 971, 982, 992, 1001, 398, 399, 401, 402, 404, 407, 410, 414, - 419, 425, 429, 437, 442, 449, 458, 465, 801, 817, 836, 852, - 870, 885, 901, 916, 931, 945, 956, 969, 980, 990, 999, 1007, - 415, 416, 417, 418, 421, 424, 426, 430, 434, 441, 445, 453, - 459, 463, 473, 480, 829, 845, 858, 873, 888, 905, 918, 933, - 947, 958, 970, 979, 988, 997, 1005, 1012, 431, 432, 433, 435, - 438, 440, 443, 446, 451, 456, 461, 468, 475, 479, 488, 494, - 854, 868, 881, 893, 907, 924, 937, 949, 960, 972, 981, 989, - 996, 1003, 1010, 1016, 447, 448, 450, 454, 455, 457, 460, 462, - 469, 472, 477, 482, 490, 495, 499, 503, 876, 891, 903, 914, - 926, 939, 953, 964, 974, 983, 991, 998, 1004, 1009, 1014, 1019, - 464, 466, 467, 470, 471, 474, 476, 478, 484, 489, 493, 497, - 501, 504, 506, 508, 898, 910, 922, 935, 943, 955, 966, 976, - 985, 993, 1000, 1006, 1011, 1015, 1018, 1021, 481, 483, 485, 486, - 487, 491, 492, 496, 498, 500, 502, 505, 507, 509, 510, 511, - 920, 929, 941, 951, 962, 968, 978, 987, 995, 1002, 1008, 1013, - 1017, 1020, 1022, 1023, + 0, 1, 4, 9, 15, 22, 33, 43, 56, 71, 86, 104, 121, + 142, 166, 189, 512, 518, 527, 539, 551, 566, 584, 602, 621, 644, + 668, 695, 721, 748, 780, 811, 2, 3, 6, 11, 17, 26, 35, + 45, 58, 73, 90, 106, 123, 146, 168, 193, 513, 519, 528, 540, + 553, 567, 585, 603, 622, 647, 670, 696, 722, 751, 783, 812, 5, + 7, 8, 13, 20, 28, 37, 50, 62, 75, 92, 108, 129, 150, + 170, 195, 514, 521, 530, 541, 554, 569, 587, 605, 625, 649, 671, + 699, 725, 752, 785, 815, 10, 12, 14, 19, 23, 31, 41, 52, + 65, 81, 96, 113, 133, 152, 175, 201, 515, 522, 531, 542, 556, + 572, 589, 607, 629, 651, 673, 700, 726, 757, 788, 819, 16, 18, + 21, 24, 30, 39, 48, 59, 69, 83, 100, 119, 137, 158, 181, + 203, 516, 523, 534, 545, 559, 574, 591, 610, 632, 654, 679, 704, + 730, 762, 791, 824, 25, 27, 29, 32, 40, 46, 54, 67, 79, + 94, 109, 127, 143, 164, 185, 210, 517, 525, 535, 547, 561, 578, + 595, 615, 635, 656, 684, 707, 737, 766, 793, 830, 34, 36, 38, + 42, 49, 55, 64, 76, 87, 102, 117, 135, 154, 176, 197, 219, + 520, 529, 538, 550, 565, 580, 598, 618, 639, 664, 687, 712, 741, + 769, 802, 833, 44, 47, 51, 53, 60, 68, 77, 85, 98, 114, + 131, 147, 162, 183, 208, 227, 524, 533, 544, 557, 571, 588, 606, + 623, 645, 667, 692, 720, 747, 776, 806, 838, 57, 61, 63, 66, + 70, 80, 88, 99, 112, 124, 140, 159, 179, 199, 216, 233, 526, + 536, 548, 562, 577, 593, 613, 633, 653, 676, 701, 727, 756, 786, + 814, 847, 72, 74, 78, 82, 84, 95, 103, 115, 125, 139, 156, + 173, 190, 211, 229, 246, 532, 543, 555, 568, 581, 601, 619, 637, + 663, 685, 709, 738, 763, 792, 826, 855, 89, 91, 93, 97, 101, + 110, 118, 132, 141, 157, 171, 186, 206, 224, 241, 255, 537, 549, + 560, 576, 592, 608, 628, 650, 669, 693, 719, 744, 773, 805, 834, + 862, 105, 107, 111, 116, 120, 128, 136, 148, 160, 174, 187, 205, + 221, 236, 251, 267, 546, 558, 570, 583, 600, 617, 636, 657, 680, + 706, 729, 758, 787, 813, 846, 871, 122, 126, 130, 134, 138, 144, + 155, 163, 180, 191, 207, 222, 232, 248, 264, 278, 552, 564, 579, + 594, 609, 630, 648, 666, 688, 715, 742, 768, 797, 827, 856, 877, + 145, 149, 151, 153, 161, 165, 177, 184, 200, 212, 225, 237, 249, + 262, 275, 289, 563, 575, 590, 604, 620, 638, 660, 683, 705, 728, + 753, 779, 809, 839, 866, 889, 167, 169, 172, 178, 182, 188, 198, + 209, 217, 230, 242, 252, 265, 276, 288, 301, 573, 586, 599, 616, + 634, 652, 672, 694, 716, 743, 767, 794, 825, 850, 874, 899, 192, + 194, 196, 202, 204, 213, 220, 228, 234, 247, 256, 268, 279, 290, + 302, 315, 582, 597, 614, 631, 646, 665, 686, 708, 732, 759, 784, + 810, 837, 863, 886, 908, 214, 215, 218, 223, 226, 231, 239, 244, + 253, 261, 271, 283, 292, 304, 317, 325, 596, 611, 626, 642, 661, + 681, 702, 723, 745, 770, 800, 828, 853, 875, 897, 919, 235, 238, + 240, 243, 245, 250, 257, 263, 270, 280, 287, 298, 307, 319, 329, + 340, 612, 624, 640, 658, 677, 697, 717, 739, 764, 789, 816, 844, + 867, 890, 909, 927, 254, 258, 259, 260, 266, 269, 272, 282, 286, + 296, 303, 312, 323, 333, 341, 355, 627, 641, 655, 674, 690, 713, + 735, 760, 781, 807, 835, 857, 880, 902, 921, 940, 273, 274, 277, + 281, 284, 285, 291, 299, 305, 310, 320, 327, 337, 346, 357, 369, + 643, 659, 675, 689, 710, 733, 754, 777, 803, 831, 851, 872, 892, + 913, 934, 950, 293, 294, 295, 297, 300, 306, 308, 314, 321, 326, + 335, 343, 352, 361, 372, 378, 662, 678, 691, 711, 731, 749, 774, + 798, 822, 848, 869, 887, 906, 925, 942, 961, 309, 311, 313, 316, + 318, 322, 324, 332, 338, 344, 351, 358, 367, 375, 386, 394, 682, + 698, 714, 734, 750, 772, 795, 820, 842, 864, 884, 904, 923, 938, + 954, 967, 328, 330, 331, 334, 336, 339, 342, 348, 354, 359, 366, + 374, 382, 391, 400, 409, 703, 718, 736, 755, 775, 796, 818, 840, + 860, 882, 900, 917, 936, 952, 965, 977, 345, 347, 349, 350, 353, + 356, 360, 364, 371, 376, 383, 389, 395, 406, 412, 423, 724, 740, + 761, 778, 799, 821, 841, 859, 878, 895, 915, 932, 948, 963, 975, + 986, 362, 363, 365, 368, 370, 373, 377, 379, 387, 392, 397, 405, + 411, 420, 428, 439, 746, 765, 782, 804, 823, 843, 861, 879, 894, + 911, 930, 946, 959, 973, 984, 994, 380, 381, 384, 385, 388, 390, + 393, 396, 403, 408, 413, 422, 427, 436, 444, 452, 771, 790, 808, + 832, 849, 865, 883, 896, 912, 928, 944, 957, 971, 982, 992, 1001, + 398, 399, 401, 402, 404, 407, 410, 414, 419, 425, 429, 437, 442, + 449, 458, 465, 801, 817, 836, 852, 870, 885, 901, 916, 931, 945, + 956, 969, 980, 990, 999, 1007, 415, 416, 417, 418, 421, 424, 426, + 430, 434, 441, 445, 453, 459, 463, 473, 480, 829, 845, 858, 873, + 888, 905, 918, 933, 947, 958, 970, 979, 988, 997, 1005, 1012, 431, + 432, 433, 435, 438, 440, 443, 446, 451, 456, 461, 468, 475, 479, + 488, 494, 854, 868, 881, 893, 907, 924, 937, 949, 960, 972, 981, + 989, 996, 1003, 1010, 1016, 447, 448, 450, 454, 455, 457, 460, 462, + 469, 472, 477, 482, 490, 495, 499, 503, 876, 891, 903, 914, 926, + 939, 953, 964, 974, 983, 991, 998, 1004, 1009, 1014, 1019, 464, 466, + 467, 470, 471, 474, 476, 478, 484, 489, 493, 497, 501, 504, 506, + 508, 898, 910, 922, 935, 943, 955, 966, 976, 985, 993, 1000, 1006, + 1011, 1015, 1018, 1021, 481, 483, 485, 486, 487, 491, 492, 496, 498, + 500, 502, 505, 507, 509, 510, 511, 920, 929, 941, 951, 962, 968, + 978, 987, 995, 1002, 1008, 1013, 1017, 1020, 1022, 1023, }; DECLARE_ALIGNED(16, static const int16_t, vp10_h2_iscan_32x32[1024]) = { - 0, 1, 4, 9, 15, 22, 33, 43, 56, 71, 86, 104, - 121, 142, 166, 189, 214, 233, 254, 273, 292, 309, 328, 345, - 362, 378, 397, 415, 431, 447, 464, 481, 2, 3, 6, 11, - 17, 26, 35, 45, 58, 73, 90, 106, 123, 146, 168, 193, - 215, 236, 255, 274, 294, 310, 329, 346, 363, 381, 399, 416, - 432, 448, 465, 482, 5, 7, 8, 13, 20, 28, 37, 50, - 62, 75, 92, 108, 129, 150, 170, 195, 216, 240, 259, 275, - 295, 312, 331, 348, 365, 383, 400, 417, 433, 449, 467, 485, - 10, 12, 14, 19, 23, 31, 41, 52, 65, 81, 96, 113, - 133, 152, 175, 201, 221, 243, 260, 280, 297, 315, 333, 350, - 367, 385, 402, 418, 434, 452, 470, 486, 16, 18, 21, 24, - 30, 39, 48, 59, 69, 83, 100, 119, 137, 158, 181, 203, - 226, 244, 264, 283, 300, 318, 335, 353, 370, 388, 404, 420, - 438, 455, 471, 487, 25, 27, 29, 32, 40, 46, 54, 67, - 79, 94, 109, 127, 143, 164, 185, 210, 231, 250, 269, 285, - 304, 322, 339, 356, 373, 389, 407, 423, 440, 457, 473, 491, - 34, 36, 38, 42, 49, 55, 64, 76, 87, 102, 117, 135, - 154, 176, 197, 219, 239, 256, 272, 291, 308, 324, 341, 359, - 377, 393, 410, 426, 442, 460, 476, 492, 44, 47, 51, 53, - 60, 68, 77, 85, 98, 114, 131, 147, 162, 183, 208, 227, - 245, 262, 282, 298, 314, 332, 349, 364, 379, 396, 412, 430, - 446, 462, 478, 495, 57, 61, 63, 66, 70, 80, 88, 99, - 112, 124, 140, 159, 179, 199, 217, 234, 253, 270, 286, 305, - 321, 337, 354, 371, 387, 403, 419, 435, 451, 468, 484, 498, - 72, 74, 78, 82, 84, 95, 103, 115, 125, 139, 156, 173, - 190, 211, 229, 246, 261, 281, 296, 311, 325, 344, 360, 375, - 392, 408, 425, 441, 456, 472, 489, 500, 89, 91, 93, 97, - 101, 110, 118, 132, 141, 157, 171, 186, 206, 224, 241, 257, - 271, 287, 303, 320, 336, 351, 366, 384, 398, 413, 429, 445, - 461, 477, 493, 502, 105, 107, 111, 116, 120, 128, 136, 148, - 160, 174, 187, 205, 222, 237, 251, 267, 284, 299, 313, 327, - 343, 358, 374, 390, 405, 422, 437, 453, 469, 483, 497, 505, - 122, 126, 130, 134, 138, 144, 155, 163, 180, 191, 207, 223, - 232, 248, 265, 278, 293, 307, 323, 338, 352, 368, 382, 395, - 411, 427, 443, 459, 475, 490, 501, 507, 145, 149, 151, 153, - 161, 165, 177, 184, 200, 212, 225, 238, 249, 263, 276, 289, - 306, 319, 334, 347, 361, 376, 391, 406, 421, 436, 450, 463, - 479, 496, 504, 509, 167, 169, 172, 178, 182, 188, 198, 209, - 218, 230, 242, 252, 266, 277, 288, 301, 317, 330, 342, 357, - 372, 386, 401, 414, 428, 444, 458, 474, 488, 499, 506, 510, - 192, 194, 196, 202, 204, 213, 220, 228, 235, 247, 258, 268, - 279, 290, 302, 316, 326, 340, 355, 369, 380, 394, 409, 424, - 439, 454, 466, 480, 494, 503, 508, 511, 512, 513, 514, 515, - 516, 517, 520, 523, 526, 532, 537, 545, 551, 561, 573, 581, - 596, 610, 625, 642, 661, 680, 701, 722, 745, 770, 800, 827, - 853, 875, 897, 919, 518, 519, 521, 522, 524, 525, 528, 533, - 536, 542, 549, 557, 564, 575, 585, 597, 611, 623, 640, 656, - 676, 696, 717, 739, 763, 789, 815, 844, 867, 889, 909, 927, - 527, 529, 530, 531, 534, 535, 538, 544, 548, 555, 560, 569, - 579, 589, 598, 614, 626, 641, 655, 673, 690, 712, 735, 760, - 780, 806, 834, 857, 880, 902, 921, 940, 539, 540, 541, 543, - 546, 547, 550, 558, 562, 567, 576, 583, 593, 603, 616, 631, - 643, 657, 674, 689, 710, 733, 752, 776, 803, 830, 850, 872, - 892, 913, 934, 950, 552, 553, 554, 556, 559, 563, 565, 571, - 577, 582, 591, 600, 609, 620, 634, 644, 662, 677, 691, 711, - 730, 748, 773, 798, 822, 847, 869, 887, 906, 925, 942, 961, - 566, 568, 570, 572, 574, 578, 580, 588, 594, 601, 608, 617, - 629, 637, 652, 665, 681, 697, 713, 734, 749, 772, 793, 819, - 842, 863, 884, 904, 923, 938, 954, 967, 584, 586, 587, 590, - 592, 595, 599, 605, 613, 618, 628, 636, 648, 660, 671, 686, - 702, 718, 736, 753, 774, 794, 818, 840, 860, 882, 900, 917, - 936, 952, 965, 977, 602, 604, 606, 607, 612, 615, 619, 624, - 633, 638, 649, 658, 666, 683, 692, 707, 723, 740, 761, 777, - 799, 820, 841, 859, 877, 895, 915, 932, 948, 963, 975, 986, - 621, 622, 627, 630, 632, 635, 639, 645, 653, 663, 668, 682, - 688, 704, 716, 732, 746, 764, 781, 804, 823, 843, 861, 878, - 894, 911, 930, 946, 959, 973, 984, 994, 646, 647, 650, 651, - 654, 659, 664, 667, 678, 685, 693, 706, 715, 728, 743, 757, - 771, 790, 807, 831, 848, 864, 883, 896, 912, 928, 944, 957, - 971, 982, 992, 1001, 669, 670, 672, 675, 679, 684, 687, 694, - 703, 709, 719, 729, 741, 754, 767, 783, 801, 816, 835, 851, - 870, 885, 901, 916, 931, 945, 956, 969, 980, 990, 999, 1007, - 695, 698, 699, 700, 705, 708, 714, 720, 726, 738, 744, 758, - 768, 779, 795, 810, 828, 845, 858, 873, 888, 905, 918, 933, - 947, 958, 970, 979, 988, 997, 1005, 1012, 721, 724, 725, 727, - 731, 737, 742, 747, 756, 765, 775, 786, 797, 809, 825, 837, - 854, 868, 881, 893, 907, 924, 937, 949, 960, 972, 981, 989, - 996, 1003, 1010, 1016, 750, 751, 755, 759, 762, 766, 769, 778, - 787, 792, 805, 812, 829, 838, 852, 865, 876, 890, 903, 914, - 926, 939, 953, 964, 974, 983, 991, 998, 1004, 1009, 1014, 1019, - 782, 784, 785, 788, 791, 796, 802, 808, 814, 826, 836, 846, - 856, 866, 874, 886, 898, 910, 922, 935, 943, 955, 966, 976, - 985, 993, 1000, 1006, 1011, 1015, 1018, 1021, 811, 813, 817, 821, - 824, 832, 833, 839, 849, 855, 862, 871, 879, 891, 899, 908, - 920, 929, 941, 951, 962, 968, 978, 987, 995, 1002, 1008, 1013, - 1017, 1020, 1022, 1023, + 0, 1, 4, 9, 15, 22, 33, 43, 56, 71, 86, 104, 121, + 142, 166, 189, 214, 233, 254, 273, 292, 309, 328, 345, 362, 378, + 397, 415, 431, 447, 464, 481, 2, 3, 6, 11, 17, 26, 35, + 45, 58, 73, 90, 106, 123, 146, 168, 193, 215, 236, 255, 274, + 294, 310, 329, 346, 363, 381, 399, 416, 432, 448, 465, 482, 5, + 7, 8, 13, 20, 28, 37, 50, 62, 75, 92, 108, 129, 150, + 170, 195, 216, 240, 259, 275, 295, 312, 331, 348, 365, 383, 400, + 417, 433, 449, 467, 485, 10, 12, 14, 19, 23, 31, 41, 52, + 65, 81, 96, 113, 133, 152, 175, 201, 221, 243, 260, 280, 297, + 315, 333, 350, 367, 385, 402, 418, 434, 452, 470, 486, 16, 18, + 21, 24, 30, 39, 48, 59, 69, 83, 100, 119, 137, 158, 181, + 203, 226, 244, 264, 283, 300, 318, 335, 353, 370, 388, 404, 420, + 438, 455, 471, 487, 25, 27, 29, 32, 40, 46, 54, 67, 79, + 94, 109, 127, 143, 164, 185, 210, 231, 250, 269, 285, 304, 322, + 339, 356, 373, 389, 407, 423, 440, 457, 473, 491, 34, 36, 38, + 42, 49, 55, 64, 76, 87, 102, 117, 135, 154, 176, 197, 219, + 239, 256, 272, 291, 308, 324, 341, 359, 377, 393, 410, 426, 442, + 460, 476, 492, 44, 47, 51, 53, 60, 68, 77, 85, 98, 114, + 131, 147, 162, 183, 208, 227, 245, 262, 282, 298, 314, 332, 349, + 364, 379, 396, 412, 430, 446, 462, 478, 495, 57, 61, 63, 66, + 70, 80, 88, 99, 112, 124, 140, 159, 179, 199, 217, 234, 253, + 270, 286, 305, 321, 337, 354, 371, 387, 403, 419, 435, 451, 468, + 484, 498, 72, 74, 78, 82, 84, 95, 103, 115, 125, 139, 156, + 173, 190, 211, 229, 246, 261, 281, 296, 311, 325, 344, 360, 375, + 392, 408, 425, 441, 456, 472, 489, 500, 89, 91, 93, 97, 101, + 110, 118, 132, 141, 157, 171, 186, 206, 224, 241, 257, 271, 287, + 303, 320, 336, 351, 366, 384, 398, 413, 429, 445, 461, 477, 493, + 502, 105, 107, 111, 116, 120, 128, 136, 148, 160, 174, 187, 205, + 222, 237, 251, 267, 284, 299, 313, 327, 343, 358, 374, 390, 405, + 422, 437, 453, 469, 483, 497, 505, 122, 126, 130, 134, 138, 144, + 155, 163, 180, 191, 207, 223, 232, 248, 265, 278, 293, 307, 323, + 338, 352, 368, 382, 395, 411, 427, 443, 459, 475, 490, 501, 507, + 145, 149, 151, 153, 161, 165, 177, 184, 200, 212, 225, 238, 249, + 263, 276, 289, 306, 319, 334, 347, 361, 376, 391, 406, 421, 436, + 450, 463, 479, 496, 504, 509, 167, 169, 172, 178, 182, 188, 198, + 209, 218, 230, 242, 252, 266, 277, 288, 301, 317, 330, 342, 357, + 372, 386, 401, 414, 428, 444, 458, 474, 488, 499, 506, 510, 192, + 194, 196, 202, 204, 213, 220, 228, 235, 247, 258, 268, 279, 290, + 302, 316, 326, 340, 355, 369, 380, 394, 409, 424, 439, 454, 466, + 480, 494, 503, 508, 511, 512, 513, 514, 515, 516, 517, 520, 523, + 526, 532, 537, 545, 551, 561, 573, 581, 596, 610, 625, 642, 661, + 680, 701, 722, 745, 770, 800, 827, 853, 875, 897, 919, 518, 519, + 521, 522, 524, 525, 528, 533, 536, 542, 549, 557, 564, 575, 585, + 597, 611, 623, 640, 656, 676, 696, 717, 739, 763, 789, 815, 844, + 867, 889, 909, 927, 527, 529, 530, 531, 534, 535, 538, 544, 548, + 555, 560, 569, 579, 589, 598, 614, 626, 641, 655, 673, 690, 712, + 735, 760, 780, 806, 834, 857, 880, 902, 921, 940, 539, 540, 541, + 543, 546, 547, 550, 558, 562, 567, 576, 583, 593, 603, 616, 631, + 643, 657, 674, 689, 710, 733, 752, 776, 803, 830, 850, 872, 892, + 913, 934, 950, 552, 553, 554, 556, 559, 563, 565, 571, 577, 582, + 591, 600, 609, 620, 634, 644, 662, 677, 691, 711, 730, 748, 773, + 798, 822, 847, 869, 887, 906, 925, 942, 961, 566, 568, 570, 572, + 574, 578, 580, 588, 594, 601, 608, 617, 629, 637, 652, 665, 681, + 697, 713, 734, 749, 772, 793, 819, 842, 863, 884, 904, 923, 938, + 954, 967, 584, 586, 587, 590, 592, 595, 599, 605, 613, 618, 628, + 636, 648, 660, 671, 686, 702, 718, 736, 753, 774, 794, 818, 840, + 860, 882, 900, 917, 936, 952, 965, 977, 602, 604, 606, 607, 612, + 615, 619, 624, 633, 638, 649, 658, 666, 683, 692, 707, 723, 740, + 761, 777, 799, 820, 841, 859, 877, 895, 915, 932, 948, 963, 975, + 986, 621, 622, 627, 630, 632, 635, 639, 645, 653, 663, 668, 682, + 688, 704, 716, 732, 746, 764, 781, 804, 823, 843, 861, 878, 894, + 911, 930, 946, 959, 973, 984, 994, 646, 647, 650, 651, 654, 659, + 664, 667, 678, 685, 693, 706, 715, 728, 743, 757, 771, 790, 807, + 831, 848, 864, 883, 896, 912, 928, 944, 957, 971, 982, 992, 1001, + 669, 670, 672, 675, 679, 684, 687, 694, 703, 709, 719, 729, 741, + 754, 767, 783, 801, 816, 835, 851, 870, 885, 901, 916, 931, 945, + 956, 969, 980, 990, 999, 1007, 695, 698, 699, 700, 705, 708, 714, + 720, 726, 738, 744, 758, 768, 779, 795, 810, 828, 845, 858, 873, + 888, 905, 918, 933, 947, 958, 970, 979, 988, 997, 1005, 1012, 721, + 724, 725, 727, 731, 737, 742, 747, 756, 765, 775, 786, 797, 809, + 825, 837, 854, 868, 881, 893, 907, 924, 937, 949, 960, 972, 981, + 989, 996, 1003, 1010, 1016, 750, 751, 755, 759, 762, 766, 769, 778, + 787, 792, 805, 812, 829, 838, 852, 865, 876, 890, 903, 914, 926, + 939, 953, 964, 974, 983, 991, 998, 1004, 1009, 1014, 1019, 782, 784, + 785, 788, 791, 796, 802, 808, 814, 826, 836, 846, 856, 866, 874, + 886, 898, 910, 922, 935, 943, 955, 966, 976, 985, 993, 1000, 1006, + 1011, 1015, 1018, 1021, 811, 813, 817, 821, 824, 832, 833, 839, 849, + 855, 862, 871, 879, 891, 899, 908, 920, 929, 941, 951, 962, 968, + 978, 987, 995, 1002, 1008, 1013, 1017, 1020, 1022, 1023, }; DECLARE_ALIGNED(16, static const int16_t, vp10_qtr_iscan_32x32[1024]) = { - 0, 1, 4, 9, 15, 22, 33, 43, 56, 71, 86, 104, - 121, 142, 166, 189, 256, 268, 286, 310, 334, 364, 400, 435, - 471, 510, 553, 598, 640, 683, 732, 780, 2, 3, 6, 11, - 17, 26, 35, 45, 58, 73, 90, 106, 123, 146, 168, 193, - 258, 270, 288, 312, 338, 366, 402, 437, 473, 516, 557, 600, - 642, 687, 736, 782, 5, 7, 8, 13, 20, 28, 37, 50, - 62, 75, 92, 108, 129, 150, 170, 195, 260, 274, 292, 314, - 340, 370, 406, 441, 478, 520, 559, 604, 646, 689, 740, 788, - 10, 12, 14, 19, 23, 31, 41, 52, 65, 81, 96, 113, - 133, 152, 175, 201, 262, 276, 294, 316, 344, 376, 410, 445, - 484, 524, 563, 606, 648, 697, 746, 793, 16, 18, 21, 24, - 30, 39, 48, 59, 69, 83, 100, 119, 137, 158, 181, 203, - 264, 278, 300, 322, 350, 380, 414, 451, 490, 530, 571, 612, - 656, 705, 750, 799, 25, 27, 29, 32, 40, 46, 54, 67, - 79, 94, 109, 127, 143, 164, 185, 210, 266, 282, 302, 326, - 354, 388, 422, 459, 496, 533, 579, 618, 665, 711, 754, 809, - 34, 36, 38, 42, 49, 55, 64, 76, 87, 102, 117, 135, - 154, 176, 197, 216, 272, 289, 308, 332, 362, 392, 427, 465, - 504, 545, 585, 626, 671, 717, 766, 813, 44, 47, 51, 53, - 60, 68, 77, 85, 98, 114, 131, 147, 162, 183, 208, 222, - 279, 298, 320, 346, 374, 408, 442, 475, 511, 551, 592, 638, - 681, 726, 772, 821, 57, 61, 63, 66, 70, 80, 88, 99, - 112, 124, 140, 159, 179, 199, 214, 227, 284, 304, 328, 355, - 386, 418, 455, 492, 528, 567, 608, 649, 695, 742, 786, 833, - 72, 74, 78, 82, 84, 95, 103, 115, 125, 139, 156, 173, - 190, 211, 224, 233, 296, 317, 342, 367, 394, 433, 466, 500, - 543, 581, 622, 667, 707, 752, 803, 843, 89, 91, 93, 97, - 101, 110, 118, 132, 141, 157, 171, 186, 206, 220, 231, 239, - 306, 330, 352, 384, 415, 447, 482, 521, 554, 593, 636, 677, - 722, 770, 815, 852, 105, 107, 111, 116, 120, 128, 136, 148, - 160, 174, 187, 205, 218, 229, 237, 244, 323, 347, 371, 398, - 431, 463, 498, 534, 573, 616, 654, 698, 743, 783, 831, 864, - 122, 126, 130, 134, 138, 144, 155, 163, 180, 191, 207, 219, - 226, 235, 242, 248, 335, 360, 390, 419, 449, 485, 518, 549, - 587, 630, 672, 715, 760, 805, 845, 872, 145, 149, 151, 153, - 161, 165, 177, 184, 200, 212, 221, 230, 236, 241, 246, 251, - 356, 382, 411, 438, 469, 501, 539, 577, 613, 652, 690, 730, - 776, 822, 858, 886, 167, 169, 172, 178, 182, 188, 198, 209, - 215, 225, 232, 238, 243, 247, 250, 253, 378, 403, 428, 461, - 494, 526, 560, 594, 632, 675, 713, 755, 801, 837, 868, 897, - 192, 194, 196, 202, 204, 213, 217, 223, 228, 234, 240, 245, - 249, 252, 254, 255, 395, 425, 457, 488, 512, 547, 583, 619, - 659, 699, 737, 778, 819, 854, 882, 907, 257, 259, 261, 263, - 265, 267, 273, 280, 285, 297, 307, 324, 336, 357, 379, 396, - 424, 452, 479, 508, 541, 574, 609, 643, 679, 719, 764, 806, - 841, 870, 895, 919, 269, 271, 275, 277, 281, 283, 290, 299, - 305, 318, 331, 348, 361, 383, 404, 426, 453, 476, 506, 535, - 568, 601, 634, 669, 708, 748, 789, 829, 860, 887, 909, 927, - 287, 291, 293, 295, 301, 303, 309, 321, 329, 343, 353, 372, - 391, 412, 429, 458, 480, 507, 532, 564, 590, 627, 663, 703, - 733, 773, 816, 847, 876, 901, 921, 940, 311, 313, 315, 319, - 325, 327, 333, 349, 358, 368, 385, 399, 420, 439, 462, 489, - 509, 536, 565, 589, 624, 661, 691, 727, 768, 810, 838, 866, - 890, 913, 934, 950, 337, 339, 341, 345, 351, 359, 363, 375, - 387, 397, 416, 432, 450, 470, 495, 513, 542, 569, 591, 625, - 657, 684, 723, 762, 797, 834, 862, 884, 905, 925, 942, 961, - 365, 369, 373, 377, 381, 389, 393, 409, 421, 434, 448, 464, - 486, 502, 527, 548, 575, 602, 628, 662, 685, 721, 756, 794, - 827, 855, 880, 903, 923, 938, 954, 967, 401, 405, 407, 413, - 417, 423, 430, 443, 456, 467, 483, 499, 519, 540, 561, 584, - 610, 635, 664, 692, 724, 757, 792, 825, 850, 878, 899, 917, - 936, 952, 965, 977, 436, 440, 444, 446, 454, 460, 468, 477, - 493, 503, 522, 537, 550, 578, 595, 620, 644, 670, 704, 728, - 763, 795, 826, 849, 873, 893, 915, 932, 948, 963, 975, 986, - 472, 474, 481, 487, 491, 497, 505, 514, 529, 544, 555, 576, - 588, 614, 633, 660, 680, 709, 734, 769, 798, 828, 851, 874, - 892, 911, 930, 946, 959, 973, 984, 994, 515, 517, 523, 525, - 531, 538, 546, 552, 570, 582, 596, 617, 631, 653, 676, 700, - 720, 749, 774, 811, 835, 856, 879, 894, 912, 928, 944, 957, - 971, 982, 992, 1001, 556, 558, 562, 566, 572, 580, 586, 597, - 611, 623, 637, 655, 673, 693, 714, 738, 765, 790, 817, 839, - 863, 881, 900, 916, 931, 945, 956, 969, 980, 990, 999, 1007, - 599, 603, 605, 607, 615, 621, 629, 639, 650, 668, 678, 701, - 716, 731, 758, 779, 807, 830, 848, 867, 885, 904, 918, 933, - 947, 958, 970, 979, 988, 997, 1005, 1012, 641, 645, 647, 651, - 658, 666, 674, 682, 696, 710, 725, 744, 761, 777, 802, 820, - 842, 861, 877, 891, 906, 924, 937, 949, 960, 972, 981, 989, - 996, 1003, 1010, 1016, 686, 688, 694, 702, 706, 712, 718, 729, - 745, 753, 771, 784, 808, 823, 840, 857, 871, 888, 902, 914, - 926, 939, 953, 964, 974, 983, 991, 998, 1004, 1009, 1014, 1019, - 735, 739, 741, 747, 751, 759, 767, 775, 787, 804, 818, 832, - 846, 859, 869, 883, 896, 910, 922, 935, 943, 955, 966, 976, - 985, 993, 1000, 1006, 1011, 1015, 1018, 1021, 781, 785, 791, 796, - 800, 812, 814, 824, 836, 844, 853, 865, 875, 889, 898, 908, - 920, 929, 941, 951, 962, 968, 978, 987, 995, 1002, 1008, 1013, - 1017, 1020, 1022, 1023, + 0, 1, 4, 9, 15, 22, 33, 43, 56, 71, 86, 104, 121, + 142, 166, 189, 256, 268, 286, 310, 334, 364, 400, 435, 471, 510, + 553, 598, 640, 683, 732, 780, 2, 3, 6, 11, 17, 26, 35, + 45, 58, 73, 90, 106, 123, 146, 168, 193, 258, 270, 288, 312, + 338, 366, 402, 437, 473, 516, 557, 600, 642, 687, 736, 782, 5, + 7, 8, 13, 20, 28, 37, 50, 62, 75, 92, 108, 129, 150, + 170, 195, 260, 274, 292, 314, 340, 370, 406, 441, 478, 520, 559, + 604, 646, 689, 740, 788, 10, 12, 14, 19, 23, 31, 41, 52, + 65, 81, 96, 113, 133, 152, 175, 201, 262, 276, 294, 316, 344, + 376, 410, 445, 484, 524, 563, 606, 648, 697, 746, 793, 16, 18, + 21, 24, 30, 39, 48, 59, 69, 83, 100, 119, 137, 158, 181, + 203, 264, 278, 300, 322, 350, 380, 414, 451, 490, 530, 571, 612, + 656, 705, 750, 799, 25, 27, 29, 32, 40, 46, 54, 67, 79, + 94, 109, 127, 143, 164, 185, 210, 266, 282, 302, 326, 354, 388, + 422, 459, 496, 533, 579, 618, 665, 711, 754, 809, 34, 36, 38, + 42, 49, 55, 64, 76, 87, 102, 117, 135, 154, 176, 197, 216, + 272, 289, 308, 332, 362, 392, 427, 465, 504, 545, 585, 626, 671, + 717, 766, 813, 44, 47, 51, 53, 60, 68, 77, 85, 98, 114, + 131, 147, 162, 183, 208, 222, 279, 298, 320, 346, 374, 408, 442, + 475, 511, 551, 592, 638, 681, 726, 772, 821, 57, 61, 63, 66, + 70, 80, 88, 99, 112, 124, 140, 159, 179, 199, 214, 227, 284, + 304, 328, 355, 386, 418, 455, 492, 528, 567, 608, 649, 695, 742, + 786, 833, 72, 74, 78, 82, 84, 95, 103, 115, 125, 139, 156, + 173, 190, 211, 224, 233, 296, 317, 342, 367, 394, 433, 466, 500, + 543, 581, 622, 667, 707, 752, 803, 843, 89, 91, 93, 97, 101, + 110, 118, 132, 141, 157, 171, 186, 206, 220, 231, 239, 306, 330, + 352, 384, 415, 447, 482, 521, 554, 593, 636, 677, 722, 770, 815, + 852, 105, 107, 111, 116, 120, 128, 136, 148, 160, 174, 187, 205, + 218, 229, 237, 244, 323, 347, 371, 398, 431, 463, 498, 534, 573, + 616, 654, 698, 743, 783, 831, 864, 122, 126, 130, 134, 138, 144, + 155, 163, 180, 191, 207, 219, 226, 235, 242, 248, 335, 360, 390, + 419, 449, 485, 518, 549, 587, 630, 672, 715, 760, 805, 845, 872, + 145, 149, 151, 153, 161, 165, 177, 184, 200, 212, 221, 230, 236, + 241, 246, 251, 356, 382, 411, 438, 469, 501, 539, 577, 613, 652, + 690, 730, 776, 822, 858, 886, 167, 169, 172, 178, 182, 188, 198, + 209, 215, 225, 232, 238, 243, 247, 250, 253, 378, 403, 428, 461, + 494, 526, 560, 594, 632, 675, 713, 755, 801, 837, 868, 897, 192, + 194, 196, 202, 204, 213, 217, 223, 228, 234, 240, 245, 249, 252, + 254, 255, 395, 425, 457, 488, 512, 547, 583, 619, 659, 699, 737, + 778, 819, 854, 882, 907, 257, 259, 261, 263, 265, 267, 273, 280, + 285, 297, 307, 324, 336, 357, 379, 396, 424, 452, 479, 508, 541, + 574, 609, 643, 679, 719, 764, 806, 841, 870, 895, 919, 269, 271, + 275, 277, 281, 283, 290, 299, 305, 318, 331, 348, 361, 383, 404, + 426, 453, 476, 506, 535, 568, 601, 634, 669, 708, 748, 789, 829, + 860, 887, 909, 927, 287, 291, 293, 295, 301, 303, 309, 321, 329, + 343, 353, 372, 391, 412, 429, 458, 480, 507, 532, 564, 590, 627, + 663, 703, 733, 773, 816, 847, 876, 901, 921, 940, 311, 313, 315, + 319, 325, 327, 333, 349, 358, 368, 385, 399, 420, 439, 462, 489, + 509, 536, 565, 589, 624, 661, 691, 727, 768, 810, 838, 866, 890, + 913, 934, 950, 337, 339, 341, 345, 351, 359, 363, 375, 387, 397, + 416, 432, 450, 470, 495, 513, 542, 569, 591, 625, 657, 684, 723, + 762, 797, 834, 862, 884, 905, 925, 942, 961, 365, 369, 373, 377, + 381, 389, 393, 409, 421, 434, 448, 464, 486, 502, 527, 548, 575, + 602, 628, 662, 685, 721, 756, 794, 827, 855, 880, 903, 923, 938, + 954, 967, 401, 405, 407, 413, 417, 423, 430, 443, 456, 467, 483, + 499, 519, 540, 561, 584, 610, 635, 664, 692, 724, 757, 792, 825, + 850, 878, 899, 917, 936, 952, 965, 977, 436, 440, 444, 446, 454, + 460, 468, 477, 493, 503, 522, 537, 550, 578, 595, 620, 644, 670, + 704, 728, 763, 795, 826, 849, 873, 893, 915, 932, 948, 963, 975, + 986, 472, 474, 481, 487, 491, 497, 505, 514, 529, 544, 555, 576, + 588, 614, 633, 660, 680, 709, 734, 769, 798, 828, 851, 874, 892, + 911, 930, 946, 959, 973, 984, 994, 515, 517, 523, 525, 531, 538, + 546, 552, 570, 582, 596, 617, 631, 653, 676, 700, 720, 749, 774, + 811, 835, 856, 879, 894, 912, 928, 944, 957, 971, 982, 992, 1001, + 556, 558, 562, 566, 572, 580, 586, 597, 611, 623, 637, 655, 673, + 693, 714, 738, 765, 790, 817, 839, 863, 881, 900, 916, 931, 945, + 956, 969, 980, 990, 999, 1007, 599, 603, 605, 607, 615, 621, 629, + 639, 650, 668, 678, 701, 716, 731, 758, 779, 807, 830, 848, 867, + 885, 904, 918, 933, 947, 958, 970, 979, 988, 997, 1005, 1012, 641, + 645, 647, 651, 658, 666, 674, 682, 696, 710, 725, 744, 761, 777, + 802, 820, 842, 861, 877, 891, 906, 924, 937, 949, 960, 972, 981, + 989, 996, 1003, 1010, 1016, 686, 688, 694, 702, 706, 712, 718, 729, + 745, 753, 771, 784, 808, 823, 840, 857, 871, 888, 902, 914, 926, + 939, 953, 964, 974, 983, 991, 998, 1004, 1009, 1014, 1019, 735, 739, + 741, 747, 751, 759, 767, 775, 787, 804, 818, 832, 846, 859, 869, + 883, 896, 910, 922, 935, 943, 955, 966, 976, 985, 993, 1000, 1006, + 1011, 1015, 1018, 1021, 781, 785, 791, 796, 800, 812, 814, 824, 836, + 844, 853, 865, 875, 889, 898, 908, 920, 929, 941, 951, 962, 968, + 978, 987, 995, 1002, 1008, 1013, 1017, 1020, 1022, 1023, }; #endif // CONFIG_EXT_TX const scan_order vp10_default_scan_orders[TX_SIZES] = { - {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, - {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, - {default_scan_16x16, vp10_default_iscan_16x16, default_scan_16x16_neighbors}, - {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors}, + { default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors }, + { default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors }, + { default_scan_16x16, vp10_default_iscan_16x16, + default_scan_16x16_neighbors }, + { default_scan_32x32, vp10_default_iscan_32x32, + default_scan_32x32_neighbors }, }; #if CONFIG_EXT_TX const scan_order vp10_intra_scan_orders[TX_SIZES][TX_TYPES] = { - { // TX_4X4 - {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, - {row_scan_4x4, vp10_row_iscan_4x4, row_scan_4x4_neighbors}, - {col_scan_4x4, vp10_col_iscan_4x4, col_scan_4x4_neighbors}, - {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, - {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, - {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, - {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, - {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, - {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, - {mrow_scan_4x4, vp10_mrow_iscan_4x4, mrow_scan_4x4_neighbors}, - {row_scan_4x4, vp10_row_iscan_4x4, row_scan_4x4_neighbors}, - {col_scan_4x4, vp10_col_iscan_4x4, col_scan_4x4_neighbors}, - {row_scan_4x4, vp10_row_iscan_4x4, row_scan_4x4_neighbors}, - {col_scan_4x4, vp10_col_iscan_4x4, col_scan_4x4_neighbors}, - {row_scan_4x4, vp10_row_iscan_4x4, row_scan_4x4_neighbors}, - {col_scan_4x4, vp10_col_iscan_4x4, col_scan_4x4_neighbors}, - }, { // TX_8X8 - {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, - {row_scan_8x8, vp10_row_iscan_8x8, row_scan_8x8_neighbors}, - {col_scan_8x8, vp10_col_iscan_8x8, col_scan_8x8_neighbors}, - {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, - {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, - {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, - {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, - {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, - {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, - {mrow_scan_8x8, vp10_mrow_iscan_8x8, mrow_scan_8x8_neighbors}, - {row_scan_8x8, vp10_row_iscan_8x8, row_scan_8x8_neighbors}, - {col_scan_8x8, vp10_col_iscan_8x8, col_scan_8x8_neighbors}, - {row_scan_8x8, vp10_row_iscan_8x8, row_scan_8x8_neighbors}, - {col_scan_8x8, vp10_col_iscan_8x8, col_scan_8x8_neighbors}, - {row_scan_8x8, vp10_row_iscan_8x8, row_scan_8x8_neighbors}, - {col_scan_8x8, vp10_col_iscan_8x8, col_scan_8x8_neighbors}, - }, { // TX_16X16 - {default_scan_16x16, vp10_default_iscan_16x16, - default_scan_16x16_neighbors}, - {row_scan_16x16, vp10_row_iscan_16x16, row_scan_16x16_neighbors}, - {col_scan_16x16, vp10_col_iscan_16x16, col_scan_16x16_neighbors}, - {default_scan_16x16, vp10_default_iscan_16x16, - default_scan_16x16_neighbors}, - {default_scan_16x16, vp10_default_iscan_16x16, - default_scan_16x16_neighbors}, - {default_scan_16x16, vp10_default_iscan_16x16, - default_scan_16x16_neighbors}, - {default_scan_16x16, vp10_default_iscan_16x16, - default_scan_16x16_neighbors}, - {default_scan_16x16, vp10_default_iscan_16x16, - default_scan_16x16_neighbors}, - {default_scan_16x16, vp10_default_iscan_16x16, - default_scan_16x16_neighbors}, - {mrow_scan_16x16, vp10_mrow_iscan_16x16, mrow_scan_16x16_neighbors}, - {row_scan_16x16, vp10_row_iscan_16x16, row_scan_16x16_neighbors}, - {col_scan_16x16, vp10_col_iscan_16x16, col_scan_16x16_neighbors}, - {row_scan_16x16, vp10_row_iscan_16x16, row_scan_16x16_neighbors}, - {col_scan_16x16, vp10_col_iscan_16x16, col_scan_16x16_neighbors}, - {row_scan_16x16, vp10_row_iscan_16x16, row_scan_16x16_neighbors}, - {col_scan_16x16, vp10_col_iscan_16x16, col_scan_16x16_neighbors}, - }, { // TX_32X32 - {default_scan_32x32, vp10_default_iscan_32x32, - default_scan_32x32_neighbors}, - {h2_scan_32x32, vp10_h2_iscan_32x32, - h2_scan_32x32_neighbors}, - {v2_scan_32x32, vp10_v2_iscan_32x32, - v2_scan_32x32_neighbors}, - {qtr_scan_32x32, vp10_qtr_iscan_32x32, - qtr_scan_32x32_neighbors}, - {h2_scan_32x32, vp10_h2_iscan_32x32, - h2_scan_32x32_neighbors}, - {v2_scan_32x32, vp10_v2_iscan_32x32, - v2_scan_32x32_neighbors}, - {qtr_scan_32x32, vp10_qtr_iscan_32x32, - qtr_scan_32x32_neighbors}, - {qtr_scan_32x32, vp10_qtr_iscan_32x32, - qtr_scan_32x32_neighbors}, - {qtr_scan_32x32, vp10_qtr_iscan_32x32, - qtr_scan_32x32_neighbors}, - {mrow_scan_32x32, vp10_mrow_iscan_32x32, mrow_scan_32x32_neighbors}, - {mrow_scan_32x32, vp10_mrow_iscan_32x32, mrow_scan_32x32_neighbors}, - {mcol_scan_32x32, vp10_mcol_iscan_32x32, mcol_scan_32x32_neighbors}, - {mrow_scan_32x32, vp10_mrow_iscan_32x32, mrow_scan_32x32_neighbors}, - {mcol_scan_32x32, vp10_mcol_iscan_32x32, mcol_scan_32x32_neighbors}, - {mrow_scan_32x32, vp10_mrow_iscan_32x32, mrow_scan_32x32_neighbors}, - {mcol_scan_32x32, vp10_mcol_iscan_32x32, mcol_scan_32x32_neighbors}, + { + // TX_4X4 + { default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors }, + { row_scan_4x4, vp10_row_iscan_4x4, row_scan_4x4_neighbors }, + { col_scan_4x4, vp10_col_iscan_4x4, col_scan_4x4_neighbors }, + { default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors }, + { default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors }, + { default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors }, + { default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors }, + { default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors }, + { default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors }, + { mrow_scan_4x4, vp10_mrow_iscan_4x4, mrow_scan_4x4_neighbors }, + { row_scan_4x4, vp10_row_iscan_4x4, row_scan_4x4_neighbors }, + { col_scan_4x4, vp10_col_iscan_4x4, col_scan_4x4_neighbors }, + { row_scan_4x4, vp10_row_iscan_4x4, row_scan_4x4_neighbors }, + { col_scan_4x4, vp10_col_iscan_4x4, col_scan_4x4_neighbors }, + { row_scan_4x4, vp10_row_iscan_4x4, row_scan_4x4_neighbors }, + { col_scan_4x4, vp10_col_iscan_4x4, col_scan_4x4_neighbors }, + }, + { + // TX_8X8 + { default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors }, + { row_scan_8x8, vp10_row_iscan_8x8, row_scan_8x8_neighbors }, + { col_scan_8x8, vp10_col_iscan_8x8, col_scan_8x8_neighbors }, + { default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors }, + { default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors }, + { default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors }, + { default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors }, + { default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors }, + { default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors }, + { mrow_scan_8x8, vp10_mrow_iscan_8x8, mrow_scan_8x8_neighbors }, + { row_scan_8x8, vp10_row_iscan_8x8, row_scan_8x8_neighbors }, + { col_scan_8x8, vp10_col_iscan_8x8, col_scan_8x8_neighbors }, + { row_scan_8x8, vp10_row_iscan_8x8, row_scan_8x8_neighbors }, + { col_scan_8x8, vp10_col_iscan_8x8, col_scan_8x8_neighbors }, + { row_scan_8x8, vp10_row_iscan_8x8, row_scan_8x8_neighbors }, + { col_scan_8x8, vp10_col_iscan_8x8, col_scan_8x8_neighbors }, + }, + { + // TX_16X16 + { default_scan_16x16, vp10_default_iscan_16x16, + default_scan_16x16_neighbors }, + { row_scan_16x16, vp10_row_iscan_16x16, row_scan_16x16_neighbors }, + { col_scan_16x16, vp10_col_iscan_16x16, col_scan_16x16_neighbors }, + { default_scan_16x16, vp10_default_iscan_16x16, + default_scan_16x16_neighbors }, + { default_scan_16x16, vp10_default_iscan_16x16, + default_scan_16x16_neighbors }, + { default_scan_16x16, vp10_default_iscan_16x16, + default_scan_16x16_neighbors }, + { default_scan_16x16, vp10_default_iscan_16x16, + default_scan_16x16_neighbors }, + { default_scan_16x16, vp10_default_iscan_16x16, + default_scan_16x16_neighbors }, + { default_scan_16x16, vp10_default_iscan_16x16, + default_scan_16x16_neighbors }, + { mrow_scan_16x16, vp10_mrow_iscan_16x16, mrow_scan_16x16_neighbors }, + { row_scan_16x16, vp10_row_iscan_16x16, row_scan_16x16_neighbors }, + { col_scan_16x16, vp10_col_iscan_16x16, col_scan_16x16_neighbors }, + { row_scan_16x16, vp10_row_iscan_16x16, row_scan_16x16_neighbors }, + { col_scan_16x16, vp10_col_iscan_16x16, col_scan_16x16_neighbors }, + { row_scan_16x16, vp10_row_iscan_16x16, row_scan_16x16_neighbors }, + { col_scan_16x16, vp10_col_iscan_16x16, col_scan_16x16_neighbors }, + }, + { + // TX_32X32 + { default_scan_32x32, vp10_default_iscan_32x32, + default_scan_32x32_neighbors }, + { h2_scan_32x32, vp10_h2_iscan_32x32, h2_scan_32x32_neighbors }, + { v2_scan_32x32, vp10_v2_iscan_32x32, v2_scan_32x32_neighbors }, + { qtr_scan_32x32, vp10_qtr_iscan_32x32, qtr_scan_32x32_neighbors }, + { h2_scan_32x32, vp10_h2_iscan_32x32, h2_scan_32x32_neighbors }, + { v2_scan_32x32, vp10_v2_iscan_32x32, v2_scan_32x32_neighbors }, + { qtr_scan_32x32, vp10_qtr_iscan_32x32, qtr_scan_32x32_neighbors }, + { qtr_scan_32x32, vp10_qtr_iscan_32x32, qtr_scan_32x32_neighbors }, + { qtr_scan_32x32, vp10_qtr_iscan_32x32, qtr_scan_32x32_neighbors }, + { mrow_scan_32x32, vp10_mrow_iscan_32x32, mrow_scan_32x32_neighbors }, + { mrow_scan_32x32, vp10_mrow_iscan_32x32, mrow_scan_32x32_neighbors }, + { mcol_scan_32x32, vp10_mcol_iscan_32x32, mcol_scan_32x32_neighbors }, + { mrow_scan_32x32, vp10_mrow_iscan_32x32, mrow_scan_32x32_neighbors }, + { mcol_scan_32x32, vp10_mcol_iscan_32x32, mcol_scan_32x32_neighbors }, + { mrow_scan_32x32, vp10_mrow_iscan_32x32, mrow_scan_32x32_neighbors }, + { mcol_scan_32x32, vp10_mcol_iscan_32x32, mcol_scan_32x32_neighbors }, } }; const scan_order vp10_inter_scan_orders[TX_SIZES_ALL][TX_TYPES] = { - { // TX_4X4 - {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, - {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, - {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, - {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, - {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, - {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, - {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, - {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, - {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, - {mrow_scan_4x4, vp10_mrow_iscan_4x4, mrow_scan_4x4_neighbors}, - {mrow_scan_4x4, vp10_mrow_iscan_4x4, mrow_scan_4x4_neighbors}, - {mcol_scan_4x4, vp10_mcol_iscan_4x4, mcol_scan_4x4_neighbors}, - {mrow_scan_4x4, vp10_mrow_iscan_4x4, mrow_scan_4x4_neighbors}, - {mcol_scan_4x4, vp10_mcol_iscan_4x4, mcol_scan_4x4_neighbors}, - {mrow_scan_4x4, vp10_mrow_iscan_4x4, mrow_scan_4x4_neighbors}, - {mcol_scan_4x4, vp10_mcol_iscan_4x4, mcol_scan_4x4_neighbors}, - }, { // TX_8X8 - {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, - {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, - {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, - {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, - {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, - {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, - {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, - {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, - {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, - {mrow_scan_8x8, vp10_mrow_iscan_8x8, mrow_scan_8x8_neighbors}, - {mrow_scan_8x8, vp10_mrow_iscan_8x8, mrow_scan_8x8_neighbors}, - {mcol_scan_8x8, vp10_mcol_iscan_8x8, mcol_scan_8x8_neighbors}, - {mrow_scan_8x8, vp10_mrow_iscan_8x8, mrow_scan_8x8_neighbors}, - {mcol_scan_8x8, vp10_mcol_iscan_8x8, mcol_scan_8x8_neighbors}, - {mrow_scan_8x8, vp10_mrow_iscan_8x8, mrow_scan_8x8_neighbors}, - {mcol_scan_8x8, vp10_mcol_iscan_8x8, mcol_scan_8x8_neighbors}, - }, { // TX_16X16 - {default_scan_16x16, vp10_default_iscan_16x16, - default_scan_16x16_neighbors}, - {default_scan_16x16, vp10_default_iscan_16x16, - default_scan_16x16_neighbors}, - {default_scan_16x16, vp10_default_iscan_16x16, - default_scan_16x16_neighbors}, - {default_scan_16x16, vp10_default_iscan_16x16, - default_scan_16x16_neighbors}, - {default_scan_16x16, vp10_default_iscan_16x16, - default_scan_16x16_neighbors}, - {default_scan_16x16, vp10_default_iscan_16x16, - default_scan_16x16_neighbors}, - {default_scan_16x16, vp10_default_iscan_16x16, - default_scan_16x16_neighbors}, - {default_scan_16x16, vp10_default_iscan_16x16, - default_scan_16x16_neighbors}, - {default_scan_16x16, vp10_default_iscan_16x16, - default_scan_16x16_neighbors}, - {mrow_scan_16x16, vp10_mrow_iscan_16x16, mrow_scan_16x16_neighbors}, - {mrow_scan_16x16, vp10_mrow_iscan_16x16, mrow_scan_16x16_neighbors}, - {mcol_scan_16x16, vp10_mcol_iscan_16x16, mcol_scan_16x16_neighbors}, - {mrow_scan_16x16, vp10_mrow_iscan_16x16, mrow_scan_16x16_neighbors}, - {mcol_scan_16x16, vp10_mcol_iscan_16x16, mcol_scan_16x16_neighbors}, - {mrow_scan_16x16, vp10_mrow_iscan_16x16, mrow_scan_16x16_neighbors}, - {mcol_scan_16x16, vp10_mcol_iscan_16x16, mcol_scan_16x16_neighbors}, - }, { // TX_32X32 - {default_scan_32x32, vp10_default_iscan_32x32, - default_scan_32x32_neighbors}, - {h2_scan_32x32, vp10_h2_iscan_32x32, - h2_scan_32x32_neighbors}, - {v2_scan_32x32, vp10_v2_iscan_32x32, - v2_scan_32x32_neighbors}, - {qtr_scan_32x32, vp10_qtr_iscan_32x32, - qtr_scan_32x32_neighbors}, - {h2_scan_32x32, vp10_h2_iscan_32x32, - h2_scan_32x32_neighbors}, - {v2_scan_32x32, vp10_v2_iscan_32x32, - v2_scan_32x32_neighbors}, - {qtr_scan_32x32, vp10_qtr_iscan_32x32, - qtr_scan_32x32_neighbors}, - {qtr_scan_32x32, vp10_qtr_iscan_32x32, - qtr_scan_32x32_neighbors}, - {qtr_scan_32x32, vp10_qtr_iscan_32x32, - qtr_scan_32x32_neighbors}, - {mrow_scan_32x32, vp10_mrow_iscan_32x32, mrow_scan_32x32_neighbors}, - {mrow_scan_32x32, vp10_mrow_iscan_32x32, mrow_scan_32x32_neighbors}, - {mcol_scan_32x32, vp10_mcol_iscan_32x32, mcol_scan_32x32_neighbors}, - {mrow_scan_32x32, vp10_mrow_iscan_32x32, mrow_scan_32x32_neighbors}, - {mcol_scan_32x32, vp10_mcol_iscan_32x32, mcol_scan_32x32_neighbors}, - {mrow_scan_32x32, vp10_mrow_iscan_32x32, mrow_scan_32x32_neighbors}, - {mcol_scan_32x32, vp10_mcol_iscan_32x32, mcol_scan_32x32_neighbors}, - }, { // TX_4X8 - {default_scan_4x8, vp10_default_iscan_4x8, default_scan_4x8_neighbors}, - {default_scan_4x8, vp10_default_iscan_4x8, default_scan_4x8_neighbors}, - {default_scan_4x8, vp10_default_iscan_4x8, default_scan_4x8_neighbors}, - {default_scan_4x8, vp10_default_iscan_4x8, default_scan_4x8_neighbors}, - {default_scan_4x8, vp10_default_iscan_4x8, default_scan_4x8_neighbors}, - {default_scan_4x8, vp10_default_iscan_4x8, default_scan_4x8_neighbors}, - {default_scan_4x8, vp10_default_iscan_4x8, default_scan_4x8_neighbors}, - {default_scan_4x8, vp10_default_iscan_4x8, default_scan_4x8_neighbors}, - {default_scan_4x8, vp10_default_iscan_4x8, default_scan_4x8_neighbors}, - {mrow_scan_4x8, vp10_mrow_iscan_4x8, mrow_scan_4x8_neighbors}, - {mrow_scan_4x8, vp10_mrow_iscan_4x8, mrow_scan_4x8_neighbors}, - {mcol_scan_4x8, vp10_mcol_iscan_4x8, mcol_scan_4x8_neighbors}, - {mrow_scan_4x8, vp10_mrow_iscan_4x8, mrow_scan_4x8_neighbors}, - {mcol_scan_4x8, vp10_mcol_iscan_4x8, mcol_scan_4x8_neighbors}, - {mrow_scan_4x8, vp10_mrow_iscan_4x8, mrow_scan_4x8_neighbors}, - {mcol_scan_4x8, vp10_mcol_iscan_4x8, mcol_scan_4x8_neighbors}, - }, { // TX_8X4 - {default_scan_8x4, vp10_default_iscan_8x4, default_scan_8x4_neighbors}, - {default_scan_8x4, vp10_default_iscan_8x4, default_scan_8x4_neighbors}, - {default_scan_8x4, vp10_default_iscan_8x4, default_scan_8x4_neighbors}, - {default_scan_8x4, vp10_default_iscan_8x4, default_scan_8x4_neighbors}, - {default_scan_8x4, vp10_default_iscan_8x4, default_scan_8x4_neighbors}, - {default_scan_8x4, vp10_default_iscan_8x4, default_scan_8x4_neighbors}, - {default_scan_8x4, vp10_default_iscan_8x4, default_scan_8x4_neighbors}, - {default_scan_8x4, vp10_default_iscan_8x4, default_scan_8x4_neighbors}, - {default_scan_8x4, vp10_default_iscan_8x4, default_scan_8x4_neighbors}, - {mrow_scan_8x4, vp10_mrow_iscan_8x4, mrow_scan_8x4_neighbors}, - {mrow_scan_8x4, vp10_mrow_iscan_8x4, mrow_scan_8x4_neighbors}, - {mcol_scan_8x4, vp10_mcol_iscan_8x4, mcol_scan_8x4_neighbors}, - {mrow_scan_8x4, vp10_mrow_iscan_8x4, mrow_scan_8x4_neighbors}, - {mcol_scan_8x4, vp10_mcol_iscan_8x4, mcol_scan_8x4_neighbors}, - {mrow_scan_8x4, vp10_mrow_iscan_8x4, mrow_scan_8x4_neighbors}, - {mcol_scan_8x4, vp10_mcol_iscan_8x4, mcol_scan_8x4_neighbors}, + { + // TX_4X4 + { default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors }, + { default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors }, + { default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors }, + { default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors }, + { default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors }, + { default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors }, + { default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors }, + { default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors }, + { default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors }, + { mrow_scan_4x4, vp10_mrow_iscan_4x4, mrow_scan_4x4_neighbors }, + { mrow_scan_4x4, vp10_mrow_iscan_4x4, mrow_scan_4x4_neighbors }, + { mcol_scan_4x4, vp10_mcol_iscan_4x4, mcol_scan_4x4_neighbors }, + { mrow_scan_4x4, vp10_mrow_iscan_4x4, mrow_scan_4x4_neighbors }, + { mcol_scan_4x4, vp10_mcol_iscan_4x4, mcol_scan_4x4_neighbors }, + { mrow_scan_4x4, vp10_mrow_iscan_4x4, mrow_scan_4x4_neighbors }, + { mcol_scan_4x4, vp10_mcol_iscan_4x4, mcol_scan_4x4_neighbors }, + }, + { + // TX_8X8 + { default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors }, + { default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors }, + { default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors }, + { default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors }, + { default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors }, + { default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors }, + { default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors }, + { default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors }, + { default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors }, + { mrow_scan_8x8, vp10_mrow_iscan_8x8, mrow_scan_8x8_neighbors }, + { mrow_scan_8x8, vp10_mrow_iscan_8x8, mrow_scan_8x8_neighbors }, + { mcol_scan_8x8, vp10_mcol_iscan_8x8, mcol_scan_8x8_neighbors }, + { mrow_scan_8x8, vp10_mrow_iscan_8x8, mrow_scan_8x8_neighbors }, + { mcol_scan_8x8, vp10_mcol_iscan_8x8, mcol_scan_8x8_neighbors }, + { mrow_scan_8x8, vp10_mrow_iscan_8x8, mrow_scan_8x8_neighbors }, + { mcol_scan_8x8, vp10_mcol_iscan_8x8, mcol_scan_8x8_neighbors }, + }, + { + // TX_16X16 + { default_scan_16x16, vp10_default_iscan_16x16, + default_scan_16x16_neighbors }, + { default_scan_16x16, vp10_default_iscan_16x16, + default_scan_16x16_neighbors }, + { default_scan_16x16, vp10_default_iscan_16x16, + default_scan_16x16_neighbors }, + { default_scan_16x16, vp10_default_iscan_16x16, + default_scan_16x16_neighbors }, + { default_scan_16x16, vp10_default_iscan_16x16, + default_scan_16x16_neighbors }, + { default_scan_16x16, vp10_default_iscan_16x16, + default_scan_16x16_neighbors }, + { default_scan_16x16, vp10_default_iscan_16x16, + default_scan_16x16_neighbors }, + { default_scan_16x16, vp10_default_iscan_16x16, + default_scan_16x16_neighbors }, + { default_scan_16x16, vp10_default_iscan_16x16, + default_scan_16x16_neighbors }, + { mrow_scan_16x16, vp10_mrow_iscan_16x16, mrow_scan_16x16_neighbors }, + { mrow_scan_16x16, vp10_mrow_iscan_16x16, mrow_scan_16x16_neighbors }, + { mcol_scan_16x16, vp10_mcol_iscan_16x16, mcol_scan_16x16_neighbors }, + { mrow_scan_16x16, vp10_mrow_iscan_16x16, mrow_scan_16x16_neighbors }, + { mcol_scan_16x16, vp10_mcol_iscan_16x16, mcol_scan_16x16_neighbors }, + { mrow_scan_16x16, vp10_mrow_iscan_16x16, mrow_scan_16x16_neighbors }, + { mcol_scan_16x16, vp10_mcol_iscan_16x16, mcol_scan_16x16_neighbors }, + }, + { + // TX_32X32 + { default_scan_32x32, vp10_default_iscan_32x32, + default_scan_32x32_neighbors }, + { h2_scan_32x32, vp10_h2_iscan_32x32, h2_scan_32x32_neighbors }, + { v2_scan_32x32, vp10_v2_iscan_32x32, v2_scan_32x32_neighbors }, + { qtr_scan_32x32, vp10_qtr_iscan_32x32, qtr_scan_32x32_neighbors }, + { h2_scan_32x32, vp10_h2_iscan_32x32, h2_scan_32x32_neighbors }, + { v2_scan_32x32, vp10_v2_iscan_32x32, v2_scan_32x32_neighbors }, + { qtr_scan_32x32, vp10_qtr_iscan_32x32, qtr_scan_32x32_neighbors }, + { qtr_scan_32x32, vp10_qtr_iscan_32x32, qtr_scan_32x32_neighbors }, + { qtr_scan_32x32, vp10_qtr_iscan_32x32, qtr_scan_32x32_neighbors }, + { mrow_scan_32x32, vp10_mrow_iscan_32x32, mrow_scan_32x32_neighbors }, + { mrow_scan_32x32, vp10_mrow_iscan_32x32, mrow_scan_32x32_neighbors }, + { mcol_scan_32x32, vp10_mcol_iscan_32x32, mcol_scan_32x32_neighbors }, + { mrow_scan_32x32, vp10_mrow_iscan_32x32, mrow_scan_32x32_neighbors }, + { mcol_scan_32x32, vp10_mcol_iscan_32x32, mcol_scan_32x32_neighbors }, + { mrow_scan_32x32, vp10_mrow_iscan_32x32, mrow_scan_32x32_neighbors }, + { mcol_scan_32x32, vp10_mcol_iscan_32x32, mcol_scan_32x32_neighbors }, + }, + { + // TX_4X8 + { default_scan_4x8, vp10_default_iscan_4x8, default_scan_4x8_neighbors }, + { default_scan_4x8, vp10_default_iscan_4x8, default_scan_4x8_neighbors }, + { default_scan_4x8, vp10_default_iscan_4x8, default_scan_4x8_neighbors }, + { default_scan_4x8, vp10_default_iscan_4x8, default_scan_4x8_neighbors }, + { default_scan_4x8, vp10_default_iscan_4x8, default_scan_4x8_neighbors }, + { default_scan_4x8, vp10_default_iscan_4x8, default_scan_4x8_neighbors }, + { default_scan_4x8, vp10_default_iscan_4x8, default_scan_4x8_neighbors }, + { default_scan_4x8, vp10_default_iscan_4x8, default_scan_4x8_neighbors }, + { default_scan_4x8, vp10_default_iscan_4x8, default_scan_4x8_neighbors }, + { mrow_scan_4x8, vp10_mrow_iscan_4x8, mrow_scan_4x8_neighbors }, + { mrow_scan_4x8, vp10_mrow_iscan_4x8, mrow_scan_4x8_neighbors }, + { mcol_scan_4x8, vp10_mcol_iscan_4x8, mcol_scan_4x8_neighbors }, + { mrow_scan_4x8, vp10_mrow_iscan_4x8, mrow_scan_4x8_neighbors }, + { mcol_scan_4x8, vp10_mcol_iscan_4x8, mcol_scan_4x8_neighbors }, + { mrow_scan_4x8, vp10_mrow_iscan_4x8, mrow_scan_4x8_neighbors }, + { mcol_scan_4x8, vp10_mcol_iscan_4x8, mcol_scan_4x8_neighbors }, + }, + { + // TX_8X4 + { default_scan_8x4, vp10_default_iscan_8x4, default_scan_8x4_neighbors }, + { default_scan_8x4, vp10_default_iscan_8x4, default_scan_8x4_neighbors }, + { default_scan_8x4, vp10_default_iscan_8x4, default_scan_8x4_neighbors }, + { default_scan_8x4, vp10_default_iscan_8x4, default_scan_8x4_neighbors }, + { default_scan_8x4, vp10_default_iscan_8x4, default_scan_8x4_neighbors }, + { default_scan_8x4, vp10_default_iscan_8x4, default_scan_8x4_neighbors }, + { default_scan_8x4, vp10_default_iscan_8x4, default_scan_8x4_neighbors }, + { default_scan_8x4, vp10_default_iscan_8x4, default_scan_8x4_neighbors }, + { default_scan_8x4, vp10_default_iscan_8x4, default_scan_8x4_neighbors }, + { mrow_scan_8x4, vp10_mrow_iscan_8x4, mrow_scan_8x4_neighbors }, + { mrow_scan_8x4, vp10_mrow_iscan_8x4, mrow_scan_8x4_neighbors }, + { mcol_scan_8x4, vp10_mcol_iscan_8x4, mcol_scan_8x4_neighbors }, + { mrow_scan_8x4, vp10_mrow_iscan_8x4, mrow_scan_8x4_neighbors }, + { mcol_scan_8x4, vp10_mcol_iscan_8x4, mcol_scan_8x4_neighbors }, + { mrow_scan_8x4, vp10_mrow_iscan_8x4, mrow_scan_8x4_neighbors }, + { mcol_scan_8x4, vp10_mcol_iscan_8x4, mcol_scan_8x4_neighbors }, } }; #else // CONFIG_EXT_TX const scan_order vp10_intra_scan_orders[TX_SIZES][TX_TYPES] = { - { // TX_4X4 - {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors}, - {row_scan_4x4, vp10_row_iscan_4x4, row_scan_4x4_neighbors}, - {col_scan_4x4, vp10_col_iscan_4x4, col_scan_4x4_neighbors}, - {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors} - }, { // TX_8X8 - {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors}, - {row_scan_8x8, vp10_row_iscan_8x8, row_scan_8x8_neighbors}, - {col_scan_8x8, vp10_col_iscan_8x8, col_scan_8x8_neighbors}, - {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors} - }, { // TX_16X16 - {default_scan_16x16, vp10_default_iscan_16x16, - default_scan_16x16_neighbors}, - {row_scan_16x16, vp10_row_iscan_16x16, row_scan_16x16_neighbors}, - {col_scan_16x16, vp10_col_iscan_16x16, col_scan_16x16_neighbors}, - {default_scan_16x16, vp10_default_iscan_16x16, - default_scan_16x16_neighbors} - }, { // TX_32X32 - {default_scan_32x32, vp10_default_iscan_32x32, - default_scan_32x32_neighbors}, - {default_scan_32x32, vp10_default_iscan_32x32, - default_scan_32x32_neighbors}, - {default_scan_32x32, vp10_default_iscan_32x32, - default_scan_32x32_neighbors}, - {default_scan_32x32, vp10_default_iscan_32x32, - default_scan_32x32_neighbors}, + { // TX_4X4 + { default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors }, + { row_scan_4x4, vp10_row_iscan_4x4, row_scan_4x4_neighbors }, + { col_scan_4x4, vp10_col_iscan_4x4, col_scan_4x4_neighbors }, + { default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors } }, + { // TX_8X8 + { default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors }, + { row_scan_8x8, vp10_row_iscan_8x8, row_scan_8x8_neighbors }, + { col_scan_8x8, vp10_col_iscan_8x8, col_scan_8x8_neighbors }, + { default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors } }, + { // TX_16X16 + { default_scan_16x16, vp10_default_iscan_16x16, + default_scan_16x16_neighbors }, + { row_scan_16x16, vp10_row_iscan_16x16, row_scan_16x16_neighbors }, + { col_scan_16x16, vp10_col_iscan_16x16, col_scan_16x16_neighbors }, + { default_scan_16x16, vp10_default_iscan_16x16, + default_scan_16x16_neighbors } }, + { + // TX_32X32 + { default_scan_32x32, vp10_default_iscan_32x32, + default_scan_32x32_neighbors }, + { default_scan_32x32, vp10_default_iscan_32x32, + default_scan_32x32_neighbors }, + { default_scan_32x32, vp10_default_iscan_32x32, + default_scan_32x32_neighbors }, + { default_scan_32x32, vp10_default_iscan_32x32, + default_scan_32x32_neighbors }, } }; #endif // CONFIG_EXT_TX diff --git a/vp10/common/scan.h b/vp10/common/scan.h index bafefbcf3b716866981dbae267cc7ae49f4961c3..32329ba8d67685582520cfcb6041e8f2efbf5601 100644 --- a/vp10/common/scan.h +++ b/vp10/common/scan.h @@ -35,7 +35,8 @@ extern const scan_order vp10_intra_scan_orders[TX_SIZES][TX_TYPES]; static INLINE int get_coef_context(const int16_t *neighbors, const uint8_t *token_cache, int c) { return (1 + token_cache[neighbors[MAX_NEIGHBORS * c + 0]] + - token_cache[neighbors[MAX_NEIGHBORS * c + 1]]) >> 1; + token_cache[neighbors[MAX_NEIGHBORS * c + 1]]) >> + 1; } static INLINE const scan_order *get_intra_scan(TX_SIZE tx_size, @@ -52,15 +53,13 @@ static INLINE const scan_order *get_inter_scan(TX_SIZE tx_size, } #endif // CONFIG_EXT_TX -static INLINE const scan_order *get_scan(TX_SIZE tx_size, - TX_TYPE tx_type, +static INLINE const scan_order *get_scan(TX_SIZE tx_size, TX_TYPE tx_type, int is_inter) { #if CONFIG_EXT_TX - return - is_inter ? &vp10_inter_scan_orders[tx_size][tx_type] : - &vp10_intra_scan_orders[tx_size][tx_type]; + return is_inter ? &vp10_inter_scan_orders[tx_size][tx_type] + : &vp10_intra_scan_orders[tx_size][tx_type]; #else - (void) is_inter; + (void)is_inter; return &vp10_intra_scan_orders[tx_size][tx_type]; #endif // CONFIG_EXT_TX } diff --git a/vp10/common/seg_common.c b/vp10/common/seg_common.c index 1bf09b9a0f97542f0c5f7ca242758dab06988f47..7e5dcc2bb81bb8ecd366d553b98fe86aaa48ab27 100644 --- a/vp10/common/seg_common.c +++ b/vp10/common/seg_common.c @@ -17,8 +17,8 @@ static const int seg_feature_data_signed[SEG_LVL_MAX] = { 1, 1, 0, 0 }; -static const int seg_feature_data_max[SEG_LVL_MAX] = { - MAXQ, MAX_LOOP_FILTER, 3, 0 }; +static const int seg_feature_data_max[SEG_LVL_MAX] = { MAXQ, MAX_LOOP_FILTER, 3, + 0 }; // These functions provide access to new segment level features. // Eventually these function may be "optimized out" but for the moment, @@ -31,7 +31,7 @@ void vp10_clearall_segfeatures(struct segmentation *seg) { } void vp10_enable_segfeature(struct segmentation *seg, int segment_id, - SEG_LVL_FEATURES feature_id) { + SEG_LVL_FEATURES feature_id) { seg->feature_mask[segment_id] |= 1 << feature_id; } @@ -44,7 +44,7 @@ int vp10_is_segfeature_signed(SEG_LVL_FEATURES feature_id) { } void vp10_set_segdata(struct segmentation *seg, int segment_id, - SEG_LVL_FEATURES feature_id, int seg_data) { + SEG_LVL_FEATURES feature_id, int seg_data) { assert(seg_data <= seg_feature_data_max[feature_id]); if (seg_data < 0) { assert(seg_feature_data_signed[feature_id]); @@ -55,9 +55,7 @@ void vp10_set_segdata(struct segmentation *seg, int segment_id, } const vpx_tree_index vp10_segment_tree[TREE_SIZE(MAX_SEGMENTS)] = { - 2, 4, 6, 8, 10, 12, - 0, -1, -2, -3, -4, -5, -6, -7 + 2, 4, 6, 8, 10, 12, 0, -1, -2, -3, -4, -5, -6, -7 }; - // TBD? Functions to read and write segment data with range / validity checking diff --git a/vp10/common/seg_common.h b/vp10/common/seg_common.h index cd38e8ee0d1070c72f83c5c848a9bef3ae0e4f39..4f8b80e801430a4fa03ffd5d0a686d20165cb0a5 100644 --- a/vp10/common/seg_common.h +++ b/vp10/common/seg_common.h @@ -17,24 +17,23 @@ extern "C" { #endif -#define SEGMENT_DELTADATA 0 -#define SEGMENT_ABSDATA 1 +#define SEGMENT_DELTADATA 0 +#define SEGMENT_ABSDATA 1 -#define MAX_SEGMENTS 8 -#define SEG_TREE_PROBS (MAX_SEGMENTS-1) +#define MAX_SEGMENTS 8 +#define SEG_TREE_PROBS (MAX_SEGMENTS - 1) #define PREDICTION_PROBS 3 // Segment level features. typedef enum { - SEG_LVL_ALT_Q = 0, // Use alternate Quantizer .... - SEG_LVL_ALT_LF = 1, // Use alternate loop filter value... - SEG_LVL_REF_FRAME = 2, // Optional Segment reference frame - SEG_LVL_SKIP = 3, // Optional Segment (0,0) + skip mode - SEG_LVL_MAX = 4 // Number of features supported + SEG_LVL_ALT_Q = 0, // Use alternate Quantizer .... + SEG_LVL_ALT_LF = 1, // Use alternate loop filter value... + SEG_LVL_REF_FRAME = 2, // Optional Segment reference frame + SEG_LVL_SKIP = 3, // Optional Segment (0,0) + skip mode + SEG_LVL_MAX = 4 // Number of features supported } SEG_LVL_FEATURES; - struct segmentation { uint8_t enabled; uint8_t update_map; @@ -54,24 +53,20 @@ struct segmentation_probs { static INLINE int segfeature_active(const struct segmentation *seg, int segment_id, SEG_LVL_FEATURES feature_id) { - return seg->enabled && - (seg->feature_mask[segment_id] & (1 << feature_id)); + return seg->enabled && (seg->feature_mask[segment_id] & (1 << feature_id)); } void vp10_clearall_segfeatures(struct segmentation *seg); -void vp10_enable_segfeature(struct segmentation *seg, - int segment_id, - SEG_LVL_FEATURES feature_id); +void vp10_enable_segfeature(struct segmentation *seg, int segment_id, + SEG_LVL_FEATURES feature_id); int vp10_seg_feature_data_max(SEG_LVL_FEATURES feature_id); int vp10_is_segfeature_signed(SEG_LVL_FEATURES feature_id); -void vp10_set_segdata(struct segmentation *seg, - int segment_id, - SEG_LVL_FEATURES feature_id, - int seg_data); +void vp10_set_segdata(struct segmentation *seg, int segment_id, + SEG_LVL_FEATURES feature_id, int seg_data); static INLINE int get_segdata(const struct segmentation *seg, int segment_id, SEG_LVL_FEATURES feature_id) { @@ -85,4 +80,3 @@ extern const vpx_tree_index vp10_segment_tree[TREE_SIZE(MAX_SEGMENTS)]; #endif #endif // VP10_COMMON_SEG_COMMON_H_ - diff --git a/vp10/common/thread_common.c b/vp10/common/thread_common.c index 164676c447f4e7781860aac04568dad577ac637d..947c26906f055756ff298bd31691e430d5556735 100644 --- a/vp10/common/thread_common.c +++ b/vp10/common/thread_common.c @@ -29,8 +29,7 @@ static INLINE void mutex_lock(pthread_mutex_t *const mutex) { } } - if (!locked) - pthread_mutex_lock(mutex); + if (!locked) pthread_mutex_lock(mutex); } #endif // CONFIG_MULTITHREAD @@ -64,8 +63,7 @@ static INLINE void sync_write(VP10LfSync *const lf_sync, int r, int c, if (c < sb_cols - 1) { cur = c; - if (c % nsync) - sig = 0; + if (c % nsync) sig = 0; } else { cur = sb_cols + nsync; } @@ -87,12 +85,10 @@ static INLINE void sync_write(VP10LfSync *const lf_sync, int r, int c, } // Implement row loopfiltering for each thread. -static INLINE -void thread_loop_filter_rows(const YV12_BUFFER_CONFIG *const frame_buffer, - VP10_COMMON *const cm, - struct macroblockd_plane planes[MAX_MB_PLANE], - int start, int stop, int y_only, - VP10LfSync *const lf_sync) { +static INLINE void thread_loop_filter_rows( + const YV12_BUFFER_CONFIG *const frame_buffer, VP10_COMMON *const cm, + struct macroblockd_plane planes[MAX_MB_PLANE], int start, int stop, + int y_only, VP10LfSync *const lf_sync) { const int num_planes = y_only ? 1 : MAX_MB_PLANE; const int sb_cols = mi_cols_aligned_to_sb(cm) >> cm->mib_size_log2; int mi_row, mi_col; @@ -110,8 +106,9 @@ void thread_loop_filter_rows(const YV12_BUFFER_CONFIG *const frame_buffer, #endif // !CONFIG_EXT_PARTITION_TYPES #if CONFIG_EXT_PARTITION - printf("STOPPING: This code has not been modified to work with the " - "extended coding unit size experiment"); + printf( + "STOPPING: This code has not been modified to work with the " + "extended coding unit size experiment"); exit(EXIT_FAILURE); #endif // CONFIG_EXT_PARTITION @@ -130,12 +127,11 @@ void thread_loop_filter_rows(const YV12_BUFFER_CONFIG *const frame_buffer, #if CONFIG_EXT_PARTITION_TYPES for (plane = 0; plane < num_planes; ++plane) - vp10_filter_block_plane_non420(cm, &planes[plane], mi + mi_col, - mi_row, mi_col); + vp10_filter_block_plane_non420(cm, &planes[plane], mi + mi_col, mi_row, + mi_col); #else // TODO(JBB): Make setup_mask work for non 420. - vp10_setup_mask(cm, mi_row, mi_col, mi + mi_col, cm->mi_stride, - &lfm); + vp10_setup_mask(cm, mi_row, mi_col, mi + mi_col, cm->mi_stride, &lfm); vp10_filter_block_plane_ss00(cm, &planes[0], mi_row, &lfm); for (plane = 1; plane < num_planes; ++plane) { @@ -148,7 +144,7 @@ void thread_loop_filter_rows(const YV12_BUFFER_CONFIG *const frame_buffer, break; case LF_PATH_SLOW: vp10_filter_block_plane_non420(cm, &planes[plane], mi + mi_col, - mi_row, mi_col); + mi_row, mi_col); break; } } @@ -167,8 +163,7 @@ static int loop_filter_row_worker(VP10LfSync *const lf_sync, return 1; } -static void loop_filter_rows_mt(YV12_BUFFER_CONFIG *frame, - VP10_COMMON *cm, +static void loop_filter_rows_mt(YV12_BUFFER_CONFIG *frame, VP10_COMMON *cm, struct macroblockd_plane planes[MAX_MB_PLANE], int start, int stop, int y_only, VPxWorker *workers, int nworkers, @@ -183,9 +178,10 @@ static void loop_filter_rows_mt(YV12_BUFFER_CONFIG *frame, int i; #if CONFIG_EXT_PARTITION - printf("STOPPING: This code has not been modified to work with the " - "extended coding unit size experiment"); - exit(EXIT_FAILURE); + printf( + "STOPPING: This code has not been modified to work with the " + "extended coding unit size experiment"); + exit(EXIT_FAILURE); #endif // CONFIG_EXT_PARTITION if (!lf_sync->sync_range || sb_rows != lf_sync->rows || @@ -233,13 +229,11 @@ static void loop_filter_rows_mt(YV12_BUFFER_CONFIG *frame, } } -void vp10_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame, - VP10_COMMON *cm, - struct macroblockd_plane planes[MAX_MB_PLANE], - int frame_filter_level, - int y_only, int partial_frame, - VPxWorker *workers, int num_workers, - VP10LfSync *lf_sync) { +void vp10_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame, VP10_COMMON *cm, + struct macroblockd_plane planes[MAX_MB_PLANE], + int frame_filter_level, int y_only, + int partial_frame, VPxWorker *workers, + int num_workers, VP10LfSync *lf_sync) { int start_mi_row, end_mi_row, mi_rows_to_filter; if (!frame_filter_level) return; @@ -254,8 +248,8 @@ void vp10_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame, end_mi_row = start_mi_row + mi_rows_to_filter; vp10_loop_filter_frame_init(cm, frame_filter_level); - loop_filter_rows_mt(frame, cm, planes, start_mi_row, end_mi_row, - y_only, workers, num_workers, lf_sync); + loop_filter_rows_mt(frame, cm, planes, start_mi_row, end_mi_row, y_only, + workers, num_workers, lf_sync); } // Set up nsync by width. @@ -274,7 +268,7 @@ static INLINE int get_sync_range(int width) { // Allocate memory for lf row synchronization void vp10_loop_filter_alloc(VP10LfSync *lf_sync, VP10_COMMON *cm, int rows, - int width, int num_workers) { + int width, int num_workers) { lf_sync->rows = rows; #if CONFIG_MULTITHREAD { @@ -339,12 +333,11 @@ void vp10_loop_filter_dealloc(VP10LfSync *lf_sync) { // Accumulate frame counts. FRAME_COUNTS consist solely of 'unsigned int' // members, so we treat it as an array, and sum over the whole length. void vp10_accumulate_frame_counts(VP10_COMMON *cm, FRAME_COUNTS *counts) { - unsigned int *const acc = (unsigned int*)&cm->counts; - const unsigned int *const cnt = (unsigned int*)counts; + unsigned int *const acc = (unsigned int *)&cm->counts; + const unsigned int *const cnt = (unsigned int *)counts; - const unsigned int n_counts = sizeof(FRAME_COUNTS)/sizeof(unsigned int); + const unsigned int n_counts = sizeof(FRAME_COUNTS) / sizeof(unsigned int); unsigned int i; - for (i = 0; i < n_counts; i++) - acc[i] += cnt[i]; + for (i = 0; i < n_counts; i++) acc[i] += cnt[i]; } diff --git a/vp10/common/thread_common.h b/vp10/common/thread_common.h index 8c39aff43fa5f1473b37573d098a0b853a757076..9c623ef8a36b396d1c6df6a06f5ba3dcca4d2c23 100644 --- a/vp10/common/thread_common.h +++ b/vp10/common/thread_common.h @@ -40,23 +40,21 @@ typedef struct VP10LfSyncData { } VP10LfSync; // Allocate memory for loopfilter row synchronization. -void vp10_loop_filter_alloc(VP10LfSync *lf_sync, struct VP10Common *cm, int rows, - int width, int num_workers); +void vp10_loop_filter_alloc(VP10LfSync *lf_sync, struct VP10Common *cm, + int rows, int width, int num_workers); // Deallocate loopfilter synchronization related mutex and data. void vp10_loop_filter_dealloc(VP10LfSync *lf_sync); // Multi-threaded loopfilter that uses the tile threads. -void vp10_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame, - struct VP10Common *cm, - struct macroblockd_plane planes[MAX_MB_PLANE], - int frame_filter_level, - int y_only, int partial_frame, - VPxWorker *workers, int num_workers, - VP10LfSync *lf_sync); +void vp10_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame, struct VP10Common *cm, + struct macroblockd_plane planes[MAX_MB_PLANE], + int frame_filter_level, int y_only, + int partial_frame, VPxWorker *workers, + int num_workers, VP10LfSync *lf_sync); void vp10_accumulate_frame_counts(struct VP10Common *cm, - struct FRAME_COUNTS *counts); + struct FRAME_COUNTS *counts); #ifdef __cplusplus } // extern "C" diff --git a/vp10/common/tile_common.c b/vp10/common/tile_common.c index 04b19eb87b15a4d211326b2b679cff9bddf41627..c478fd292d46864a542e0b4ab1781ca169da5711 100644 --- a/vp10/common/tile_common.c +++ b/vp10/common/tile_common.c @@ -14,14 +14,12 @@ void vp10_tile_set_row(TileInfo *tile, const VP10_COMMON *cm, int row) { tile->mi_row_start = row * cm->tile_height; - tile->mi_row_end = VPXMIN(tile->mi_row_start + cm->tile_height, - cm->mi_rows); + tile->mi_row_end = VPXMIN(tile->mi_row_start + cm->tile_height, cm->mi_rows); } void vp10_tile_set_col(TileInfo *tile, const VP10_COMMON *cm, int col) { tile->mi_col_start = col * cm->tile_width; - tile->mi_col_end = VPXMIN(tile->mi_col_start + cm->tile_width, - cm->mi_cols); + tile->mi_col_end = VPXMIN(tile->mi_col_start + cm->tile_width, cm->mi_cols); } void vp10_tile_init(TileInfo *tile, const VP10_COMMON *cm, int row, int col) { @@ -31,30 +29,28 @@ void vp10_tile_init(TileInfo *tile, const VP10_COMMON *cm, int row, int col) { #if !CONFIG_EXT_TILE -# if CONFIG_EXT_PARTITION -# define MIN_TILE_WIDTH_MAX_SB 2 -# define MAX_TILE_WIDTH_MAX_SB 32 -# else -# define MIN_TILE_WIDTH_MAX_SB 4 -# define MAX_TILE_WIDTH_MAX_SB 64 -# endif // CONFIG_EXT_PARTITION +#if CONFIG_EXT_PARTITION +#define MIN_TILE_WIDTH_MAX_SB 2 +#define MAX_TILE_WIDTH_MAX_SB 32 +#else +#define MIN_TILE_WIDTH_MAX_SB 4 +#define MAX_TILE_WIDTH_MAX_SB 64 +#endif // CONFIG_EXT_PARTITION static int get_min_log2_tile_cols(const int max_sb_cols) { int min_log2 = 0; - while ((MAX_TILE_WIDTH_MAX_SB << min_log2) < max_sb_cols) - ++min_log2; + while ((MAX_TILE_WIDTH_MAX_SB << min_log2) < max_sb_cols) ++min_log2; return min_log2; } static int get_max_log2_tile_cols(const int max_sb_cols) { int max_log2 = 1; - while ((max_sb_cols >> max_log2) >= MIN_TILE_WIDTH_MAX_SB) - ++max_log2; + while ((max_sb_cols >> max_log2) >= MIN_TILE_WIDTH_MAX_SB) ++max_log2; return max_log2 - 1; } -void vp10_get_tile_n_bits(const int mi_cols, - int *min_log2_tile_cols, int *max_log2_tile_cols) { +void vp10_get_tile_n_bits(const int mi_cols, int *min_log2_tile_cols, + int *max_log2_tile_cols) { const int max_sb_cols = ALIGN_POWER_OF_TWO(mi_cols, MAX_MIB_SIZE_LOG2) >> MAX_MIB_SIZE_LOG2; *min_log2_tile_cols = get_min_log2_tile_cols(max_sb_cols); diff --git a/vp10/common/tile_common.h b/vp10/common/tile_common.h index 2babc89071c43ba8c7f796cb65223c5dde81802b..a5021736453449dbdf75a31efb924a72b28400de 100644 --- a/vp10/common/tile_common.h +++ b/vp10/common/tile_common.h @@ -24,14 +24,14 @@ typedef struct TileInfo { // initializes 'tile->mi_(row|col)_(start|end)' for (row, col) based on // 'cm->log2_tile_(rows|cols)' & 'cm->mi_(rows|cols)' -void vp10_tile_init(TileInfo *tile, const struct VP10Common *cm, - int row, int col); +void vp10_tile_init(TileInfo *tile, const struct VP10Common *cm, int row, + int col); void vp10_tile_set_row(TileInfo *tile, const struct VP10Common *cm, int row); void vp10_tile_set_col(TileInfo *tile, const struct VP10Common *cm, int col); -void vp10_get_tile_n_bits(const int mi_cols, - int *min_log2_tile_cols, int *max_log2_tile_cols); +void vp10_get_tile_n_bits(const int mi_cols, int *min_log2_tile_cols, + int *max_log2_tile_cols); #ifdef __cplusplus } // extern "C" diff --git a/vp10/common/vp10_convolve.c b/vp10/common/vp10_convolve.c index 32e63addd38094150cb6e01468e19ffa42f1e6a5..6b29025203988269cb1e27dc3dbf10cf17e3b903 100644 --- a/vp10/common/vp10_convolve.c +++ b/vp10/common/vp10_convolve.c @@ -23,9 +23,8 @@ void vp10_convolve_horiz_c(const uint8_t *src, int src_stride, uint8_t *dst, int x_q4 = subpel_x_q4; for (x = 0; x < w; ++x) { const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS]; - const int16_t *x_filter = - vp10_get_interp_filter_subpel_kernel( - filter_params, x_q4 & SUBPEL_MASK); + const int16_t *x_filter = vp10_get_interp_filter_subpel_kernel( + filter_params, x_q4 & SUBPEL_MASK); int k, sum = 0; for (k = 0; k < filter_size; ++k) sum += src_x[k] * x_filter[k]; if (avg) { @@ -53,9 +52,8 @@ void vp10_convolve_vert_c(const uint8_t *src, int src_stride, uint8_t *dst, int y_q4 = subpel_y_q4; for (y = 0; y < h; ++y) { const uint8_t *const src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride]; - const int16_t *y_filter = - vp10_get_interp_filter_subpel_kernel( - filter_params, y_q4 & SUBPEL_MASK); + const int16_t *y_filter = vp10_get_interp_filter_subpel_kernel( + filter_params, y_q4 & SUBPEL_MASK); int k, sum = 0; for (k = 0; k < filter_size; ++k) sum += src_y[k * src_stride] * y_filter[k]; @@ -102,8 +100,8 @@ void vp10_convolve(const uint8_t *src, int src_stride, uint8_t *dst, #else const INTERP_FILTER interp_filter, #endif - const int subpel_x_q4, int x_step_q4, - const int subpel_y_q4, int y_step_q4, int ref_idx) { + const int subpel_x_q4, int x_step_q4, const int subpel_y_q4, + int y_step_q4, int ref_idx) { int ignore_horiz = x_step_q4 == 16 && subpel_x_q4 == 0; int ignore_vert = y_step_q4 == 16 && subpel_y_q4 == 0; @@ -177,8 +175,8 @@ void vp10_convolve(const uint8_t *src, int src_stride, uint8_t *dst, assert(filter_params.taps <= MAX_FILTER_TAP); vp10_convolve_vert(temp + temp_stride * (filter_size / 2 - 1), temp_stride, - dst, dst_stride, w, h, filter_params, - subpel_y_q4, y_step_q4, ref_idx); + dst, dst_stride, w, h, filter_params, subpel_y_q4, + y_step_q4, ref_idx); } } @@ -195,9 +193,8 @@ void vp10_highbd_convolve_horiz_c(const uint16_t *src, int src_stride, int x_q4 = subpel_x_q4; for (x = 0; x < w; ++x) { const uint16_t *const src_x = &src[x_q4 >> SUBPEL_BITS]; - const int16_t *x_filter = - vp10_get_interp_filter_subpel_kernel( - filter_params, x_q4 & SUBPEL_MASK); + const int16_t *x_filter = vp10_get_interp_filter_subpel_kernel( + filter_params, x_q4 & SUBPEL_MASK); int k, sum = 0; for (k = 0; k < filter_size; ++k) sum += src_x[k] * x_filter[k]; if (avg) @@ -227,9 +224,8 @@ void vp10_highbd_convolve_vert_c(const uint16_t *src, int src_stride, int y_q4 = subpel_y_q4; for (y = 0; y < h; ++y) { const uint16_t *const src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride]; - const int16_t *y_filter = - vp10_get_interp_filter_subpel_kernel( - filter_params, y_q4 & SUBPEL_MASK); + const int16_t *y_filter = vp10_get_interp_filter_subpel_kernel( + filter_params, y_q4 & SUBPEL_MASK); int k, sum = 0; for (k = 0; k < filter_size; ++k) sum += src_y[k * src_stride] * y_filter[k]; @@ -339,10 +335,9 @@ void vp10_highbd_convolve(const uint8_t *src8, int src_stride, uint8_t *dst8, int intermediate_height = (((h - 1) * y_step_q4 + subpel_y_q4) >> SUBPEL_BITS) + filter_size; - vp10_highbd_convolve_horiz(src - src_stride * (filter_size / 2 - 1), - src_stride, temp, temp_stride, w, - intermediate_height, filter_params, subpel_x_q4, - x_step_q4, 0, bd); + vp10_highbd_convolve_horiz( + src - src_stride * (filter_size / 2 - 1), src_stride, temp, temp_stride, + w, intermediate_height, filter_params, subpel_x_q4, x_step_q4, 0, bd); #if CONFIG_DUAL_FILTER filter_params = filter_params_y; diff --git a/vp10/common/vp10_convolve.h b/vp10/common/vp10_convolve.h index 2cc57fe623521a0bc7d9b96ffa3a7ed39b7081f9..a15da19e8db5b9b9b4bc26659e2715de2d3aeac4 100644 --- a/vp10/common/vp10_convolve.h +++ b/vp10/common/vp10_convolve.h @@ -6,29 +6,26 @@ extern "C" { #endif -void vp10_convolve(const uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride, - int w, int h, +void vp10_convolve(const uint8_t *src, int src_stride, uint8_t *dst, + int dst_stride, int w, int h, #if CONFIG_DUAL_FILTER const INTERP_FILTER *interp_filter, #else const INTERP_FILTER interp_filter, #endif - const int subpel_x, int xstep, - const int subpel_y, int ystep, int avg); + const int subpel_x, int xstep, const int subpel_y, int ystep, + int avg); #if CONFIG_VP9_HIGHBITDEPTH -void vp10_highbd_convolve(const uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride, - int w, int h, +void vp10_highbd_convolve(const uint8_t *src, int src_stride, uint8_t *dst, + int dst_stride, int w, int h, #if CONFIG_DUAL_FILTER - const INTERP_FILTER *interp_filter, + const INTERP_FILTER *interp_filter, #else - const INTERP_FILTER interp_filter, + const INTERP_FILTER interp_filter, #endif - const int subpel_x, int xstep, - const int subpel_y, int ystep, - int avg, int bd); + const int subpel_x, int xstep, const int subpel_y, + int ystep, int avg, int bd); #endif // CONFIG_VP9_HIGHBITDEPTH #ifdef __cplusplus diff --git a/vp10/common/vp10_fwd_txfm.c b/vp10/common/vp10_fwd_txfm.c index 17935c5919042b292ee0f9f0a659ef363c81cf18..ddcf10ef01217cc0d3563bd379e6242dd2c164ed 100644 --- a/vp10/common/vp10_fwd_txfm.c +++ b/vp10/common/vp10_fwd_txfm.c @@ -72,8 +72,7 @@ void vp10_fdct4x4_c(const int16_t *input, tran_low_t *output, int stride) { { int i, j; for (i = 0; i < 4; ++i) { - for (j = 0; j < 4; ++j) - output[j + i * 4] = (output[j + i * 4] + 1) >> 2; + for (j = 0; j < 4; ++j) output[j + i * 4] = (output[j + i * 4] + 1) >> 2; } } } @@ -82,15 +81,14 @@ void vp10_fdct4x4_1_c(const int16_t *input, tran_low_t *output, int stride) { int r, c; tran_low_t sum = 0; for (r = 0; r < 4; ++r) - for (c = 0; c < 4; ++c) - sum += input[r * stride + c]; + for (c = 0; c < 4; ++c) sum += input[r * stride + c]; output[0] = sum << 1; output[1] = 0; } -void vp10_fdct8x8_c(const int16_t *input, - tran_low_t *final_output, int stride) { +void vp10_fdct8x8_c(const int16_t *input, tran_low_t *final_output, + int stride) { int i, j; tran_low_t intermediate[64]; int pass; @@ -135,8 +133,8 @@ void vp10_fdct8x8_c(const int16_t *input, x3 = s0 - s3; t0 = (x0 + x1) * cospi_16_64; t1 = (x0 - x1) * cospi_16_64; - t2 = x2 * cospi_24_64 + x3 * cospi_8_64; - t3 = -x2 * cospi_8_64 + x3 * cospi_24_64; + t2 = x2 * cospi_24_64 + x3 * cospi_8_64; + t3 = -x2 * cospi_8_64 + x3 * cospi_24_64; output[0] = (tran_low_t)fdct_round_shift(t0); output[2] = (tran_low_t)fdct_round_shift(t2); output[4] = (tran_low_t)fdct_round_shift(t1); @@ -155,24 +153,23 @@ void vp10_fdct8x8_c(const int16_t *input, x3 = s7 + t3; // Stage 4 - t0 = x0 * cospi_28_64 + x3 * cospi_4_64; - t1 = x1 * cospi_12_64 + x2 * cospi_20_64; + t0 = x0 * cospi_28_64 + x3 * cospi_4_64; + t1 = x1 * cospi_12_64 + x2 * cospi_20_64; t2 = x2 * cospi_12_64 + x1 * -cospi_20_64; - t3 = x3 * cospi_28_64 + x0 * -cospi_4_64; + t3 = x3 * cospi_28_64 + x0 * -cospi_4_64; output[1] = (tran_low_t)fdct_round_shift(t0); output[3] = (tran_low_t)fdct_round_shift(t2); output[5] = (tran_low_t)fdct_round_shift(t1); output[7] = (tran_low_t)fdct_round_shift(t3); output += 8; } - in = intermediate; + in = intermediate; output = final_output; } // Rows for (i = 0; i < 8; ++i) { - for (j = 0; j < 8; ++j) - final_output[j + i * 8] /= 2; + for (j = 0; j < 8; ++j) final_output[j + i * 8] /= 2; } } @@ -180,8 +177,7 @@ void vp10_fdct8x8_1_c(const int16_t *input, tran_low_t *output, int stride) { int r, c; tran_low_t sum = 0; for (r = 0; r < 8; ++r) - for (c = 0; c < 8; ++c) - sum += input[r * stride + c]; + for (c = 0; c < 8; ++c) sum += input[r * stride + c]; output[0] = sum; output[1] = 0; @@ -217,11 +213,11 @@ void vp10_fdct16x16_c(const int16_t *input, tran_low_t *output, int stride) { input[3] = (in_pass0[3 * stride] + in_pass0[12 * stride]) * 4; input[4] = (in_pass0[4 * stride] + in_pass0[11 * stride]) * 4; input[5] = (in_pass0[5 * stride] + in_pass0[10 * stride]) * 4; - input[6] = (in_pass0[6 * stride] + in_pass0[ 9 * stride]) * 4; - input[7] = (in_pass0[7 * stride] + in_pass0[ 8 * stride]) * 4; + input[6] = (in_pass0[6 * stride] + in_pass0[9 * stride]) * 4; + input[7] = (in_pass0[7 * stride] + in_pass0[8 * stride]) * 4; // Calculate input for the next 8 results. - step1[0] = (in_pass0[7 * stride] - in_pass0[ 8 * stride]) * 4; - step1[1] = (in_pass0[6 * stride] - in_pass0[ 9 * stride]) * 4; + step1[0] = (in_pass0[7 * stride] - in_pass0[8 * stride]) * 4; + step1[1] = (in_pass0[6 * stride] - in_pass0[9 * stride]) * 4; step1[2] = (in_pass0[5 * stride] - in_pass0[10 * stride]) * 4; step1[3] = (in_pass0[4 * stride] - in_pass0[11 * stride]) * 4; step1[4] = (in_pass0[3 * stride] - in_pass0[12 * stride]) * 4; @@ -236,11 +232,11 @@ void vp10_fdct16x16_c(const int16_t *input, tran_low_t *output, int stride) { input[3] = ((in[3 * 16] + 1) >> 2) + ((in[12 * 16] + 1) >> 2); input[4] = ((in[4 * 16] + 1) >> 2) + ((in[11 * 16] + 1) >> 2); input[5] = ((in[5 * 16] + 1) >> 2) + ((in[10 * 16] + 1) >> 2); - input[6] = ((in[6 * 16] + 1) >> 2) + ((in[ 9 * 16] + 1) >> 2); - input[7] = ((in[7 * 16] + 1) >> 2) + ((in[ 8 * 16] + 1) >> 2); + input[6] = ((in[6 * 16] + 1) >> 2) + ((in[9 * 16] + 1) >> 2); + input[7] = ((in[7 * 16] + 1) >> 2) + ((in[8 * 16] + 1) >> 2); // Calculate input for the next 8 results. - step1[0] = ((in[7 * 16] + 1) >> 2) - ((in[ 8 * 16] + 1) >> 2); - step1[1] = ((in[6 * 16] + 1) >> 2) - ((in[ 9 * 16] + 1) >> 2); + step1[0] = ((in[7 * 16] + 1) >> 2) - ((in[8 * 16] + 1) >> 2); + step1[1] = ((in[6 * 16] + 1) >> 2) - ((in[9 * 16] + 1) >> 2); step1[2] = ((in[5 * 16] + 1) >> 2) - ((in[10 * 16] + 1) >> 2); step1[3] = ((in[4 * 16] + 1) >> 2) - ((in[11 * 16] + 1) >> 2); step1[4] = ((in[3 * 16] + 1) >> 2) - ((in[12 * 16] + 1) >> 2); @@ -271,7 +267,7 @@ void vp10_fdct16x16_c(const int16_t *input, tran_low_t *output, int stride) { x3 = s0 - s3; t0 = (x0 + x1) * cospi_16_64; t1 = (x0 - x1) * cospi_16_64; - t2 = x3 * cospi_8_64 + x2 * cospi_24_64; + t2 = x3 * cospi_8_64 + x2 * cospi_24_64; t3 = x3 * cospi_24_64 - x2 * cospi_8_64; out[0] = (tran_low_t)fdct_round_shift(t0); out[4] = (tran_low_t)fdct_round_shift(t2); @@ -291,10 +287,10 @@ void vp10_fdct16x16_c(const int16_t *input, tran_low_t *output, int stride) { x3 = s7 + t3; // Stage 4 - t0 = x0 * cospi_28_64 + x3 * cospi_4_64; - t1 = x1 * cospi_12_64 + x2 * cospi_20_64; + t0 = x0 * cospi_28_64 + x3 * cospi_4_64; + t1 = x1 * cospi_12_64 + x2 * cospi_20_64; t2 = x2 * cospi_12_64 + x1 * -cospi_20_64; - t3 = x3 * cospi_28_64 + x0 * -cospi_4_64; + t3 = x3 * cospi_28_64 + x0 * -cospi_4_64; out[2] = (tran_low_t)fdct_round_shift(t0); out[6] = (tran_low_t)fdct_round_shift(t2); out[10] = (tran_low_t)fdct_round_shift(t1); @@ -321,12 +317,12 @@ void vp10_fdct16x16_c(const int16_t *input, tran_low_t *output, int stride) { step3[6] = step1[6] + step2[5]; step3[7] = step1[7] + step2[4]; // step 4 - temp1 = step3[1] * -cospi_8_64 + step3[6] * cospi_24_64; - temp2 = step3[2] * cospi_24_64 + step3[5] * cospi_8_64; + temp1 = step3[1] * -cospi_8_64 + step3[6] * cospi_24_64; + temp2 = step3[2] * cospi_24_64 + step3[5] * cospi_8_64; step2[1] = fdct_round_shift(temp1); step2[2] = fdct_round_shift(temp2); temp1 = step3[2] * cospi_8_64 - step3[5] * cospi_24_64; - temp2 = step3[1] * cospi_24_64 + step3[6] * cospi_8_64; + temp2 = step3[1] * cospi_24_64 + step3[6] * cospi_8_64; step2[5] = fdct_round_shift(temp1); step2[6] = fdct_round_shift(temp2); // step 5 @@ -339,20 +335,20 @@ void vp10_fdct16x16_c(const int16_t *input, tran_low_t *output, int stride) { step1[6] = step3[7] - step2[6]; step1[7] = step3[7] + step2[6]; // step 6 - temp1 = step1[0] * cospi_30_64 + step1[7] * cospi_2_64; + temp1 = step1[0] * cospi_30_64 + step1[7] * cospi_2_64; temp2 = step1[1] * cospi_14_64 + step1[6] * cospi_18_64; out[1] = (tran_low_t)fdct_round_shift(temp1); out[9] = (tran_low_t)fdct_round_shift(temp2); temp1 = step1[2] * cospi_22_64 + step1[5] * cospi_10_64; - temp2 = step1[3] * cospi_6_64 + step1[4] * cospi_26_64; + temp2 = step1[3] * cospi_6_64 + step1[4] * cospi_26_64; out[5] = (tran_low_t)fdct_round_shift(temp1); out[13] = (tran_low_t)fdct_round_shift(temp2); - temp1 = step1[3] * -cospi_26_64 + step1[4] * cospi_6_64; + temp1 = step1[3] * -cospi_26_64 + step1[4] * cospi_6_64; temp2 = step1[2] * -cospi_10_64 + step1[5] * cospi_22_64; out[3] = (tran_low_t)fdct_round_shift(temp1); out[11] = (tran_low_t)fdct_round_shift(temp2); temp1 = step1[1] * -cospi_18_64 + step1[6] * cospi_14_64; - temp2 = step1[0] * -cospi_2_64 + step1[7] * cospi_30_64; + temp2 = step1[0] * -cospi_2_64 + step1[7] * cospi_30_64; out[7] = (tran_low_t)fdct_round_shift(temp1); out[15] = (tran_low_t)fdct_round_shift(temp2); } @@ -371,8 +367,7 @@ void vp10_fdct16x16_1_c(const int16_t *input, tran_low_t *output, int stride) { int r, c; tran_low_t sum = 0; for (r = 0; r < 16; ++r) - for (c = 0; c < 16; ++c) - sum += input[r * stride + c]; + for (c = 0; c < 16; ++c) sum += input[r * stride + c]; output[0] = sum >> 1; output[1] = 0; @@ -679,36 +674,36 @@ void vp10_fdct32(const tran_high_t *input, tran_high_t *output, int round) { step[31] = output[31] + output[30]; // Final stage --- outputs indices are bit-reversed. - output[0] = step[0]; + output[0] = step[0]; output[16] = step[1]; - output[8] = step[2]; + output[8] = step[2]; output[24] = step[3]; - output[4] = step[4]; + output[4] = step[4]; output[20] = step[5]; output[12] = step[6]; output[28] = step[7]; - output[2] = step[8]; + output[2] = step[8]; output[18] = step[9]; output[10] = step[10]; output[26] = step[11]; - output[6] = step[12]; + output[6] = step[12]; output[22] = step[13]; output[14] = step[14]; output[30] = step[15]; - output[1] = dct_32_round(step[16] * cospi_31_64 + step[31] * cospi_1_64); + output[1] = dct_32_round(step[16] * cospi_31_64 + step[31] * cospi_1_64); output[17] = dct_32_round(step[17] * cospi_15_64 + step[30] * cospi_17_64); - output[9] = dct_32_round(step[18] * cospi_23_64 + step[29] * cospi_9_64); + output[9] = dct_32_round(step[18] * cospi_23_64 + step[29] * cospi_9_64); output[25] = dct_32_round(step[19] * cospi_7_64 + step[28] * cospi_25_64); - output[5] = dct_32_round(step[20] * cospi_27_64 + step[27] * cospi_5_64); + output[5] = dct_32_round(step[20] * cospi_27_64 + step[27] * cospi_5_64); output[21] = dct_32_round(step[21] * cospi_11_64 + step[26] * cospi_21_64); output[13] = dct_32_round(step[22] * cospi_19_64 + step[25] * cospi_13_64); output[29] = dct_32_round(step[23] * cospi_3_64 + step[24] * cospi_29_64); - output[3] = dct_32_round(step[24] * cospi_3_64 + step[23] * -cospi_29_64); + output[3] = dct_32_round(step[24] * cospi_3_64 + step[23] * -cospi_29_64); output[19] = dct_32_round(step[25] * cospi_19_64 + step[22] * -cospi_13_64); output[11] = dct_32_round(step[26] * cospi_11_64 + step[21] * -cospi_21_64); output[27] = dct_32_round(step[27] * cospi_27_64 + step[20] * -cospi_5_64); - output[7] = dct_32_round(step[28] * cospi_7_64 + step[19] * -cospi_25_64); + output[7] = dct_32_round(step[28] * cospi_7_64 + step[19] * -cospi_25_64); output[23] = dct_32_round(step[29] * cospi_23_64 + step[18] * -cospi_9_64); output[15] = dct_32_round(step[30] * cospi_15_64 + step[17] * -cospi_17_64); output[31] = dct_32_round(step[31] * cospi_31_64 + step[16] * -cospi_1_64); @@ -721,8 +716,7 @@ void vp10_fdct32x32_c(const int16_t *input, tran_low_t *out, int stride) { // Columns for (i = 0; i < 32; ++i) { tran_high_t temp_in[32], temp_out[32]; - for (j = 0; j < 32; ++j) - temp_in[j] = input[j * stride + i] * 4; + for (j = 0; j < 32; ++j) temp_in[j] = input[j * stride + i] * 4; vp10_fdct32(temp_in, temp_out, 0); for (j = 0; j < 32; ++j) output[j * 32 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2; @@ -731,8 +725,7 @@ void vp10_fdct32x32_c(const int16_t *input, tran_low_t *out, int stride) { // Rows for (i = 0; i < 32; ++i) { tran_high_t temp_in[32], temp_out[32]; - for (j = 0; j < 32; ++j) - temp_in[j] = output[j + i * 32]; + for (j = 0; j < 32; ++j) temp_in[j] = output[j + i * 32]; vp10_fdct32(temp_in, temp_out, 0); for (j = 0; j < 32; ++j) out[j + i * 32] = @@ -750,8 +743,7 @@ void vp10_fdct32x32_rd_c(const int16_t *input, tran_low_t *out, int stride) { // Columns for (i = 0; i < 32; ++i) { tran_high_t temp_in[32], temp_out[32]; - for (j = 0; j < 32; ++j) - temp_in[j] = input[j * stride + i] * 4; + for (j = 0; j < 32; ++j) temp_in[j] = input[j * stride + i] * 4; vp10_fdct32(temp_in, temp_out, 0); for (j = 0; j < 32; ++j) // TODO(cd): see quality impact of only doing @@ -763,11 +755,9 @@ void vp10_fdct32x32_rd_c(const int16_t *input, tran_low_t *out, int stride) { // Rows for (i = 0; i < 32; ++i) { tran_high_t temp_in[32], temp_out[32]; - for (j = 0; j < 32; ++j) - temp_in[j] = output[j + i * 32]; + for (j = 0; j < 32; ++j) temp_in[j] = output[j + i * 32]; vp10_fdct32(temp_in, temp_out, 1); - for (j = 0; j < 32; ++j) - out[j + i * 32] = (tran_low_t)temp_out[j]; + for (j = 0; j < 32; ++j) out[j + i * 32] = (tran_low_t)temp_out[j]; } } @@ -775,8 +765,7 @@ void vp10_fdct32x32_1_c(const int16_t *input, tran_low_t *output, int stride) { int r, c; tran_low_t sum = 0; for (r = 0; r < 32; ++r) - for (c = 0; c < 32; ++c) - sum += input[r * stride + c]; + for (c = 0; c < 32; ++c) sum += input[r * stride + c]; output[0] = sum >> 3; output[1] = 0; @@ -784,42 +773,42 @@ void vp10_fdct32x32_1_c(const int16_t *input, tran_low_t *output, int stride) { #if CONFIG_VP9_HIGHBITDEPTH void vp10_highbd_fdct4x4_c(const int16_t *input, tran_low_t *output, - int stride) { + int stride) { vp10_fdct4x4_c(input, output, stride); } void vp10_highbd_fdct8x8_c(const int16_t *input, tran_low_t *final_output, - int stride) { + int stride) { vp10_fdct8x8_c(input, final_output, stride); } void vp10_highbd_fdct8x8_1_c(const int16_t *input, tran_low_t *final_output, - int stride) { + int stride) { vp10_fdct8x8_1_c(input, final_output, stride); } void vp10_highbd_fdct16x16_c(const int16_t *input, tran_low_t *output, - int stride) { + int stride) { vp10_fdct16x16_c(input, output, stride); } void vp10_highbd_fdct16x16_1_c(const int16_t *input, tran_low_t *output, - int stride) { + int stride) { vp10_fdct16x16_1_c(input, output, stride); } -void vp10_highbd_fdct32x32_c(const int16_t *input, - tran_low_t *out, int stride) { +void vp10_highbd_fdct32x32_c(const int16_t *input, tran_low_t *out, + int stride) { vp10_fdct32x32_c(input, out, stride); } void vp10_highbd_fdct32x32_rd_c(const int16_t *input, tran_low_t *out, - int stride) { + int stride) { vp10_fdct32x32_rd_c(input, out, stride); } -void vp10_highbd_fdct32x32_1_c(const int16_t *input, - tran_low_t *out, int stride) { +void vp10_highbd_fdct32x32_1_c(const int16_t *input, tran_low_t *out, + int stride) { vp10_fdct32x32_1_c(input, out, stride); } #endif // CONFIG_VP9_HIGHBITDEPTH diff --git a/vp10/common/vp10_fwd_txfm1d.c b/vp10/common/vp10_fwd_txfm1d.c index ef24362377e3689ed517bc0b0cfa05df591d98d1..589347ce1dd91cd1a31a59b697a5911a66fd207c 100644 --- a/vp10/common/vp10_fwd_txfm1d.c +++ b/vp10/common/vp10_fwd_txfm1d.c @@ -15,8 +15,8 @@ { \ int i, j; \ for (i = 0; i < size; ++i) { \ - int buf_bit = get_max_bit(abs(buf[i])) + 1; \ - if (buf_bit > bit) { \ + int buf_bit = get_max_bit(abs(buf[i])) + 1; \ + if (buf_bit > bit) { \ printf("======== %s overflow ========\n", __func__); \ printf("stage: %d node: %d\n", stage, i); \ printf("bit: %d buf_bit: %d buf[i]: %d\n", bit, buf_bit, buf[i]); \ @@ -32,11 +32,11 @@ #else #define range_check(stage, input, buf, size, bit) \ { \ - (void)stage; \ - (void)input; \ - (void)buf; \ - (void)size; \ - (void)bit; \ + (void) stage; \ + (void) input; \ + (void) buf; \ + (void) size; \ + (void) bit; \ } #endif diff --git a/vp10/common/vp10_fwd_txfm2d.c b/vp10/common/vp10_fwd_txfm2d.c index e03e593b7f9994716e990b3089ce1dcda15f7db2..ec40b4ed3f7aa2954ff7fe3539fac2b43fab2993 100644 --- a/vp10/common/vp10_fwd_txfm2d.c +++ b/vp10/common/vp10_fwd_txfm2d.c @@ -18,27 +18,16 @@ static INLINE TxfmFunc fwd_txfm_type_to_func(TXFM_TYPE txfm_type) { switch (txfm_type) { - case TXFM_TYPE_DCT4: - return vp10_fdct4_new; - case TXFM_TYPE_DCT8: - return vp10_fdct8_new; - case TXFM_TYPE_DCT16: - return vp10_fdct16_new; - case TXFM_TYPE_DCT32: - return vp10_fdct32_new; - case TXFM_TYPE_DCT64: - return vp10_fdct64_new; - case TXFM_TYPE_ADST4: - return vp10_fadst4_new; - case TXFM_TYPE_ADST8: - return vp10_fadst8_new; - case TXFM_TYPE_ADST16: - return vp10_fadst16_new; - case TXFM_TYPE_ADST32: - return vp10_fadst32_new; - default: - assert(0); - return NULL; + case TXFM_TYPE_DCT4: return vp10_fdct4_new; + case TXFM_TYPE_DCT8: return vp10_fdct8_new; + case TXFM_TYPE_DCT16: return vp10_fdct16_new; + case TXFM_TYPE_DCT32: return vp10_fdct32_new; + case TXFM_TYPE_DCT64: return vp10_fdct64_new; + case TXFM_TYPE_ADST4: return vp10_fadst4_new; + case TXFM_TYPE_ADST8: return vp10_fadst8_new; + case TXFM_TYPE_ADST16: return vp10_fadst16_new; + case TXFM_TYPE_ADST32: return vp10_fadst32_new; + default: assert(0); return NULL; } } @@ -56,14 +45,13 @@ static INLINE void fwd_txfm2d_c(const int16_t *input, int32_t *output, const TxfmFunc txfm_func_row = fwd_txfm_type_to_func(cfg->cfg->txfm_type_row); // use output buffer as temp buffer - int32_t* temp_in = output; - int32_t* temp_out = output + txfm_size; + int32_t *temp_in = output; + int32_t *temp_out = output + txfm_size; // Columns for (c = 0; c < txfm_size; ++c) { if (cfg->ud_flip == 0) { - for (r = 0; r < txfm_size; ++r) - temp_in[r] = input[r * stride + c]; + for (r = 0; r < txfm_size; ++r) temp_in[r] = input[r * stride + c]; } else { for (r = 0; r < txfm_size; ++r) // flip upside down @@ -73,8 +61,7 @@ static INLINE void fwd_txfm2d_c(const int16_t *input, int32_t *output, txfm_func_col(temp_in, temp_out, cos_bit_col, stage_range_col); round_shift_array(temp_out, txfm_size, -shift[1]); if (cfg->lr_flip == 0) { - for (r = 0; r < txfm_size; ++r) - buf[r * txfm_size + c] = temp_out[r]; + for (r = 0; r < txfm_size; ++r) buf[r * txfm_size + c] = temp_out[r]; } else { for (r = 0; r < txfm_size; ++r) // flip from left to right @@ -90,40 +77,40 @@ static INLINE void fwd_txfm2d_c(const int16_t *input, int32_t *output, } } -void vp10_fwd_txfm2d_4x4_c(const int16_t *input, int32_t *output, - int stride, int tx_type, int bd) { +void vp10_fwd_txfm2d_4x4_c(const int16_t *input, int32_t *output, int stride, + int tx_type, int bd) { int32_t txfm_buf[4 * 4]; TXFM_2D_FLIP_CFG cfg = vp10_get_fwd_txfm_cfg(tx_type, TX_4X4); (void)bd; fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf); } -void vp10_fwd_txfm2d_8x8_c(const int16_t *input, int32_t *output, - int stride, int tx_type, int bd) { +void vp10_fwd_txfm2d_8x8_c(const int16_t *input, int32_t *output, int stride, + int tx_type, int bd) { int32_t txfm_buf[8 * 8]; TXFM_2D_FLIP_CFG cfg = vp10_get_fwd_txfm_cfg(tx_type, TX_8X8); (void)bd; fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf); } -void vp10_fwd_txfm2d_16x16_c(const int16_t *input, int32_t *output, - int stride, int tx_type, int bd) { +void vp10_fwd_txfm2d_16x16_c(const int16_t *input, int32_t *output, int stride, + int tx_type, int bd) { int32_t txfm_buf[16 * 16]; TXFM_2D_FLIP_CFG cfg = vp10_get_fwd_txfm_cfg(tx_type, TX_16X16); (void)bd; fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf); } -void vp10_fwd_txfm2d_32x32_c(const int16_t *input, int32_t *output, - int stride, int tx_type, int bd) { +void vp10_fwd_txfm2d_32x32_c(const int16_t *input, int32_t *output, int stride, + int tx_type, int bd) { int32_t txfm_buf[32 * 32]; TXFM_2D_FLIP_CFG cfg = vp10_get_fwd_txfm_cfg(tx_type, TX_32X32); (void)bd; fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf); } -void vp10_fwd_txfm2d_64x64_c(const int16_t *input, int32_t *output, - int stride, int tx_type, int bd) { +void vp10_fwd_txfm2d_64x64_c(const int16_t *input, int32_t *output, int stride, + int tx_type, int bd) { int32_t txfm_buf[64 * 64]; TXFM_2D_FLIP_CFG cfg = vp10_get_fwd_txfm_64x64_cfg(tx_type); (void)bd; @@ -131,36 +118,36 @@ void vp10_fwd_txfm2d_64x64_c(const int16_t *input, int32_t *output, } #if CONFIG_EXT_TX -static const TXFM_2D_CFG* fwd_txfm_cfg_ls[FLIPADST_ADST + 1][TX_SIZES] = { - {&fwd_txfm_2d_cfg_dct_dct_4 , &fwd_txfm_2d_cfg_dct_dct_8, - &fwd_txfm_2d_cfg_dct_dct_16 , &fwd_txfm_2d_cfg_dct_dct_32}, - {&fwd_txfm_2d_cfg_adst_dct_4 , &fwd_txfm_2d_cfg_adst_dct_8, - &fwd_txfm_2d_cfg_adst_dct_16 , &fwd_txfm_2d_cfg_adst_dct_32}, - {&fwd_txfm_2d_cfg_dct_adst_4 , &fwd_txfm_2d_cfg_dct_adst_8, - &fwd_txfm_2d_cfg_dct_adst_16 , &fwd_txfm_2d_cfg_dct_adst_32}, - {&fwd_txfm_2d_cfg_adst_adst_4, &fwd_txfm_2d_cfg_adst_adst_8, - &fwd_txfm_2d_cfg_adst_adst_16, &fwd_txfm_2d_cfg_adst_adst_32}, - {&fwd_txfm_2d_cfg_adst_dct_4 , &fwd_txfm_2d_cfg_adst_dct_8, - &fwd_txfm_2d_cfg_adst_dct_16 , &fwd_txfm_2d_cfg_adst_dct_32}, - {&fwd_txfm_2d_cfg_dct_adst_4 , &fwd_txfm_2d_cfg_dct_adst_8, - &fwd_txfm_2d_cfg_dct_adst_16 , &fwd_txfm_2d_cfg_dct_adst_32}, - {&fwd_txfm_2d_cfg_adst_adst_4, &fwd_txfm_2d_cfg_adst_adst_8, - &fwd_txfm_2d_cfg_adst_adst_16, &fwd_txfm_2d_cfg_adst_adst_32}, - {&fwd_txfm_2d_cfg_adst_adst_4, &fwd_txfm_2d_cfg_adst_adst_8, - &fwd_txfm_2d_cfg_adst_adst_16, &fwd_txfm_2d_cfg_adst_adst_32}, - {&fwd_txfm_2d_cfg_adst_adst_4, &fwd_txfm_2d_cfg_adst_adst_8, - &fwd_txfm_2d_cfg_adst_adst_16, &fwd_txfm_2d_cfg_adst_adst_32}, +static const TXFM_2D_CFG *fwd_txfm_cfg_ls[FLIPADST_ADST + 1][TX_SIZES] = { + { &fwd_txfm_2d_cfg_dct_dct_4, &fwd_txfm_2d_cfg_dct_dct_8, + &fwd_txfm_2d_cfg_dct_dct_16, &fwd_txfm_2d_cfg_dct_dct_32 }, + { &fwd_txfm_2d_cfg_adst_dct_4, &fwd_txfm_2d_cfg_adst_dct_8, + &fwd_txfm_2d_cfg_adst_dct_16, &fwd_txfm_2d_cfg_adst_dct_32 }, + { &fwd_txfm_2d_cfg_dct_adst_4, &fwd_txfm_2d_cfg_dct_adst_8, + &fwd_txfm_2d_cfg_dct_adst_16, &fwd_txfm_2d_cfg_dct_adst_32 }, + { &fwd_txfm_2d_cfg_adst_adst_4, &fwd_txfm_2d_cfg_adst_adst_8, + &fwd_txfm_2d_cfg_adst_adst_16, &fwd_txfm_2d_cfg_adst_adst_32 }, + { &fwd_txfm_2d_cfg_adst_dct_4, &fwd_txfm_2d_cfg_adst_dct_8, + &fwd_txfm_2d_cfg_adst_dct_16, &fwd_txfm_2d_cfg_adst_dct_32 }, + { &fwd_txfm_2d_cfg_dct_adst_4, &fwd_txfm_2d_cfg_dct_adst_8, + &fwd_txfm_2d_cfg_dct_adst_16, &fwd_txfm_2d_cfg_dct_adst_32 }, + { &fwd_txfm_2d_cfg_adst_adst_4, &fwd_txfm_2d_cfg_adst_adst_8, + &fwd_txfm_2d_cfg_adst_adst_16, &fwd_txfm_2d_cfg_adst_adst_32 }, + { &fwd_txfm_2d_cfg_adst_adst_4, &fwd_txfm_2d_cfg_adst_adst_8, + &fwd_txfm_2d_cfg_adst_adst_16, &fwd_txfm_2d_cfg_adst_adst_32 }, + { &fwd_txfm_2d_cfg_adst_adst_4, &fwd_txfm_2d_cfg_adst_adst_8, + &fwd_txfm_2d_cfg_adst_adst_16, &fwd_txfm_2d_cfg_adst_adst_32 }, }; -#else // CONFIG_EXT_TX -static const TXFM_2D_CFG* fwd_txfm_cfg_ls[TX_TYPES][TX_SIZES] = { - {&fwd_txfm_2d_cfg_dct_dct_4 , &fwd_txfm_2d_cfg_dct_dct_8, - &fwd_txfm_2d_cfg_dct_dct_16 , &fwd_txfm_2d_cfg_dct_dct_32}, - {&fwd_txfm_2d_cfg_adst_dct_4 , &fwd_txfm_2d_cfg_adst_dct_8, - &fwd_txfm_2d_cfg_adst_dct_16 , &fwd_txfm_2d_cfg_adst_dct_32}, - {&fwd_txfm_2d_cfg_dct_adst_4 , &fwd_txfm_2d_cfg_dct_adst_8, - &fwd_txfm_2d_cfg_dct_adst_16 , &fwd_txfm_2d_cfg_dct_adst_32}, - {&fwd_txfm_2d_cfg_adst_adst_4, &fwd_txfm_2d_cfg_adst_adst_8, - &fwd_txfm_2d_cfg_adst_adst_16, &fwd_txfm_2d_cfg_adst_adst_32}, +#else // CONFIG_EXT_TX +static const TXFM_2D_CFG *fwd_txfm_cfg_ls[TX_TYPES][TX_SIZES] = { + { &fwd_txfm_2d_cfg_dct_dct_4, &fwd_txfm_2d_cfg_dct_dct_8, + &fwd_txfm_2d_cfg_dct_dct_16, &fwd_txfm_2d_cfg_dct_dct_32 }, + { &fwd_txfm_2d_cfg_adst_dct_4, &fwd_txfm_2d_cfg_adst_dct_8, + &fwd_txfm_2d_cfg_adst_dct_16, &fwd_txfm_2d_cfg_adst_dct_32 }, + { &fwd_txfm_2d_cfg_dct_adst_4, &fwd_txfm_2d_cfg_dct_adst_8, + &fwd_txfm_2d_cfg_dct_adst_16, &fwd_txfm_2d_cfg_dct_adst_32 }, + { &fwd_txfm_2d_cfg_adst_adst_4, &fwd_txfm_2d_cfg_adst_adst_8, + &fwd_txfm_2d_cfg_adst_adst_16, &fwd_txfm_2d_cfg_adst_adst_32 }, }; #endif // CONFIG_EXT_TX diff --git a/vp10/common/vp10_fwd_txfm2d_cfg.h b/vp10/common/vp10_fwd_txfm2d_cfg.h index e15e4baf933838678f8436ff2113fc7ab653d80e..f069adf4177586dc1276da2b0d4c294d9b3e5426 100644 --- a/vp10/common/vp10_fwd_txfm2d_cfg.h +++ b/vp10/common/vp10_fwd_txfm2d_cfg.h @@ -13,390 +13,431 @@ #include "vp10/common/enums.h" #include "vp10/common/vp10_fwd_txfm1d.h" // ---------------- config fwd_dct_dct_4 ---------------- -static const int8_t fwd_shift_dct_dct_4[3] = {2, 0, 0}; -static const int8_t fwd_stage_range_col_dct_dct_4[4] = {15, 16, 17, 17}; -static const int8_t fwd_stage_range_row_dct_dct_4[4] = {17, 18, 18, 18}; -static const int8_t fwd_cos_bit_col_dct_dct_4[4] = {13, 13, 13, 13}; -static const int8_t fwd_cos_bit_row_dct_dct_4[4] = {13, 13, 13, 13}; +static const int8_t fwd_shift_dct_dct_4[3] = { 2, 0, 0 }; +static const int8_t fwd_stage_range_col_dct_dct_4[4] = { 15, 16, 17, 17 }; +static const int8_t fwd_stage_range_row_dct_dct_4[4] = { 17, 18, 18, 18 }; +static const int8_t fwd_cos_bit_col_dct_dct_4[4] = { 13, 13, 13, 13 }; +static const int8_t fwd_cos_bit_row_dct_dct_4[4] = { 13, 13, 13, 13 }; static const TXFM_2D_CFG fwd_txfm_2d_cfg_dct_dct_4 = { - 4, // .txfm_size - 4, // .stage_num_col - 4, // .stage_num_row - // 0, // .log_scale - fwd_shift_dct_dct_4, // .shift - fwd_stage_range_col_dct_dct_4, // .stage_range_col - fwd_stage_range_row_dct_dct_4, // .stage_range_row - fwd_cos_bit_col_dct_dct_4, // .cos_bit_col - fwd_cos_bit_row_dct_dct_4, // .cos_bit_row - TXFM_TYPE_DCT4, // .txfm_type_col - TXFM_TYPE_DCT4}; // .txfm_type_row + 4, // .txfm_size + 4, // .stage_num_col + 4, // .stage_num_row + // 0, // .log_scale + fwd_shift_dct_dct_4, // .shift + fwd_stage_range_col_dct_dct_4, // .stage_range_col + fwd_stage_range_row_dct_dct_4, // .stage_range_row + fwd_cos_bit_col_dct_dct_4, // .cos_bit_col + fwd_cos_bit_row_dct_dct_4, // .cos_bit_row + TXFM_TYPE_DCT4, // .txfm_type_col + TXFM_TYPE_DCT4 +}; // .txfm_type_row // ---------------- config fwd_dct_dct_8 ---------------- -static const int8_t fwd_shift_dct_dct_8[3] = {2, -1, 0}; -static const int8_t fwd_stage_range_col_dct_dct_8[6] = {15, 16, 17, 18, 18, 18}; -static const int8_t fwd_stage_range_row_dct_dct_8[6] = {17, 18, 19, 19, 19, 19}; -static const int8_t fwd_cos_bit_col_dct_dct_8[6] = {13, 13, 13, 13, 13, 13}; -static const int8_t fwd_cos_bit_row_dct_dct_8[6] = {13, 13, 13, 13, 13, 13}; +static const int8_t fwd_shift_dct_dct_8[3] = { 2, -1, 0 }; +static const int8_t fwd_stage_range_col_dct_dct_8[6] = { + 15, 16, 17, 18, 18, 18 +}; +static const int8_t fwd_stage_range_row_dct_dct_8[6] = { + 17, 18, 19, 19, 19, 19 +}; +static const int8_t fwd_cos_bit_col_dct_dct_8[6] = { 13, 13, 13, 13, 13, 13 }; +static const int8_t fwd_cos_bit_row_dct_dct_8[6] = { 13, 13, 13, 13, 13, 13 }; static const TXFM_2D_CFG fwd_txfm_2d_cfg_dct_dct_8 = { - 8, // .txfm_size - 6, // .stage_num_col - 6, // .stage_num_row - // 0, // .log_scale - fwd_shift_dct_dct_8, // .shift - fwd_stage_range_col_dct_dct_8, // .stage_range_col - fwd_stage_range_row_dct_dct_8, // .stage_range_row - fwd_cos_bit_col_dct_dct_8, // .cos_bit_col - fwd_cos_bit_row_dct_dct_8, // .cos_bit_row - TXFM_TYPE_DCT8, // .txfm_type_col - TXFM_TYPE_DCT8}; // .txfm_type_row + 8, // .txfm_size + 6, // .stage_num_col + 6, // .stage_num_row + // 0, // .log_scale + fwd_shift_dct_dct_8, // .shift + fwd_stage_range_col_dct_dct_8, // .stage_range_col + fwd_stage_range_row_dct_dct_8, // .stage_range_row + fwd_cos_bit_col_dct_dct_8, // .cos_bit_col + fwd_cos_bit_row_dct_dct_8, // .cos_bit_row + TXFM_TYPE_DCT8, // .txfm_type_col + TXFM_TYPE_DCT8 +}; // .txfm_type_row // ---------------- config fwd_dct_dct_16 ---------------- -static const int8_t fwd_shift_dct_dct_16[3] = {2, -2, 0}; -static const int8_t fwd_stage_range_col_dct_dct_16[8] = {15, 16, 17, 18, - 19, 19, 19, 19}; -static const int8_t fwd_stage_range_row_dct_dct_16[8] = {17, 18, 19, 20, - 20, 20, 20, 20}; -static const int8_t fwd_cos_bit_col_dct_dct_16[8] = {13, 13, 13, 13, - 13, 13, 13, 13}; -static const int8_t fwd_cos_bit_row_dct_dct_16[8] = {12, 12, 12, 12, - 12, 12, 12, 12}; +static const int8_t fwd_shift_dct_dct_16[3] = { 2, -2, 0 }; +static const int8_t fwd_stage_range_col_dct_dct_16[8] = { 15, 16, 17, 18, + 19, 19, 19, 19 }; +static const int8_t fwd_stage_range_row_dct_dct_16[8] = { 17, 18, 19, 20, + 20, 20, 20, 20 }; +static const int8_t fwd_cos_bit_col_dct_dct_16[8] = { 13, 13, 13, 13, + 13, 13, 13, 13 }; +static const int8_t fwd_cos_bit_row_dct_dct_16[8] = { 12, 12, 12, 12, + 12, 12, 12, 12 }; static const TXFM_2D_CFG fwd_txfm_2d_cfg_dct_dct_16 = { - 16, // .txfm_size - 8, // .stage_num_col - 8, // .stage_num_row - // 0, // .log_scale - fwd_shift_dct_dct_16, // .shift - fwd_stage_range_col_dct_dct_16, // .stage_range_col - fwd_stage_range_row_dct_dct_16, // .stage_range_row - fwd_cos_bit_col_dct_dct_16, // .cos_bit_col - fwd_cos_bit_row_dct_dct_16, // .cos_bit_row - TXFM_TYPE_DCT16, // .txfm_type_col - TXFM_TYPE_DCT16}; // .txfm_type_row + 16, // .txfm_size + 8, // .stage_num_col + 8, // .stage_num_row + // 0, // .log_scale + fwd_shift_dct_dct_16, // .shift + fwd_stage_range_col_dct_dct_16, // .stage_range_col + fwd_stage_range_row_dct_dct_16, // .stage_range_row + fwd_cos_bit_col_dct_dct_16, // .cos_bit_col + fwd_cos_bit_row_dct_dct_16, // .cos_bit_row + TXFM_TYPE_DCT16, // .txfm_type_col + TXFM_TYPE_DCT16 +}; // .txfm_type_row // ---------------- config fwd_dct_dct_32 ---------------- -static const int8_t fwd_shift_dct_dct_32[3] = {2, -4, 0}; -static const int8_t fwd_stage_range_col_dct_dct_32[10] = {15, 16, 17, 18, 19, - 20, 20, 20, 20, 20}; -static const int8_t fwd_stage_range_row_dct_dct_32[10] = {16, 17, 18, 19, 20, - 20, 20, 20, 20, 20}; -static const int8_t fwd_cos_bit_col_dct_dct_32[10] = {12, 12, 12, 12, 12, - 12, 12, 12, 12, 12}; -static const int8_t fwd_cos_bit_row_dct_dct_32[10] = {12, 12, 12, 12, 12, - 12, 12, 12, 12, 12}; +static const int8_t fwd_shift_dct_dct_32[3] = { 2, -4, 0 }; +static const int8_t fwd_stage_range_col_dct_dct_32[10] = { 15, 16, 17, 18, 19, + 20, 20, 20, 20, 20 }; +static const int8_t fwd_stage_range_row_dct_dct_32[10] = { 16, 17, 18, 19, 20, + 20, 20, 20, 20, 20 }; +static const int8_t fwd_cos_bit_col_dct_dct_32[10] = { 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12 }; +static const int8_t fwd_cos_bit_row_dct_dct_32[10] = { 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12 }; static const TXFM_2D_CFG fwd_txfm_2d_cfg_dct_dct_32 = { - 32, // .txfm_size - 10, // .stage_num_col - 10, // .stage_num_row - // 1, // .log_scale - fwd_shift_dct_dct_32, // .shift - fwd_stage_range_col_dct_dct_32, // .stage_range_col - fwd_stage_range_row_dct_dct_32, // .stage_range_row - fwd_cos_bit_col_dct_dct_32, // .cos_bit_col - fwd_cos_bit_row_dct_dct_32, // .cos_bit_row - TXFM_TYPE_DCT32, // .txfm_type_col - TXFM_TYPE_DCT32}; // .txfm_type_row + 32, // .txfm_size + 10, // .stage_num_col + 10, // .stage_num_row + // 1, // .log_scale + fwd_shift_dct_dct_32, // .shift + fwd_stage_range_col_dct_dct_32, // .stage_range_col + fwd_stage_range_row_dct_dct_32, // .stage_range_row + fwd_cos_bit_col_dct_dct_32, // .cos_bit_col + fwd_cos_bit_row_dct_dct_32, // .cos_bit_row + TXFM_TYPE_DCT32, // .txfm_type_col + TXFM_TYPE_DCT32 +}; // .txfm_type_row // ---------------- config fwd_dct_dct_64 ---------------- -static const int8_t fwd_shift_dct_dct_64[3] = {2, -2, -2}; +static const int8_t fwd_shift_dct_dct_64[3] = { 2, -2, -2 }; static const int8_t fwd_stage_range_col_dct_dct_64[12] = { - 13, 14, 15, 16, 17, 18, 19, 19, 19, 19, 19, 19}; + 13, 14, 15, 16, 17, 18, 19, 19, 19, 19, 19, 19 +}; static const int8_t fwd_stage_range_row_dct_dct_64[12] = { - 17, 18, 19, 20, 21, 22, 22, 22, 22, 22, 22, 22}; -static const int8_t fwd_cos_bit_col_dct_dct_64[12] = {15, 15, 15, 15, 15, 14, - 13, 13, 13, 13, 13, 13}; -static const int8_t fwd_cos_bit_row_dct_dct_64[12] = {15, 14, 13, 12, 11, 10, - 10, 10, 10, 10, 10, 10}; + 17, 18, 19, 20, 21, 22, 22, 22, 22, 22, 22, 22 +}; +static const int8_t fwd_cos_bit_col_dct_dct_64[12] = { 15, 15, 15, 15, 15, 14, + 13, 13, 13, 13, 13, 13 }; +static const int8_t fwd_cos_bit_row_dct_dct_64[12] = { 15, 14, 13, 12, 11, 10, + 10, 10, 10, 10, 10, 10 }; static const TXFM_2D_CFG fwd_txfm_2d_cfg_dct_dct_64 = { - 64, // .txfm_size - 12, // .stage_num_col - 12, // .stage_num_row - fwd_shift_dct_dct_64, // .shift - fwd_stage_range_col_dct_dct_64, // .stage_range_col - fwd_stage_range_row_dct_dct_64, // .stage_range_row - fwd_cos_bit_col_dct_dct_64, // .cos_bit_col - fwd_cos_bit_row_dct_dct_64, // .cos_bit_row - TXFM_TYPE_DCT64, // .txfm_type_col - TXFM_TYPE_DCT64}; // .txfm_type_row + 64, // .txfm_size + 12, // .stage_num_col + 12, // .stage_num_row + fwd_shift_dct_dct_64, // .shift + fwd_stage_range_col_dct_dct_64, // .stage_range_col + fwd_stage_range_row_dct_dct_64, // .stage_range_row + fwd_cos_bit_col_dct_dct_64, // .cos_bit_col + fwd_cos_bit_row_dct_dct_64, // .cos_bit_row + TXFM_TYPE_DCT64, // .txfm_type_col + TXFM_TYPE_DCT64 +}; // .txfm_type_row // ---------------- config fwd_dct_adst_4 ---------------- -static const int8_t fwd_shift_dct_adst_4[3] = {2, 0, 0}; -static const int8_t fwd_stage_range_col_dct_adst_4[4] = {15, 16, 17, 17}; -static const int8_t fwd_stage_range_row_dct_adst_4[6] = {17, 17, 17, - 18, 18, 18}; -static const int8_t fwd_cos_bit_col_dct_adst_4[4] = {13, 13, 13, 13}; -static const int8_t fwd_cos_bit_row_dct_adst_4[6] = {13, 13, 13, 13, 13, 13}; +static const int8_t fwd_shift_dct_adst_4[3] = { 2, 0, 0 }; +static const int8_t fwd_stage_range_col_dct_adst_4[4] = { 15, 16, 17, 17 }; +static const int8_t fwd_stage_range_row_dct_adst_4[6] = { + 17, 17, 17, 18, 18, 18 +}; +static const int8_t fwd_cos_bit_col_dct_adst_4[4] = { 13, 13, 13, 13 }; +static const int8_t fwd_cos_bit_row_dct_adst_4[6] = { 13, 13, 13, 13, 13, 13 }; static const TXFM_2D_CFG fwd_txfm_2d_cfg_dct_adst_4 = { - 4, // .txfm_size - 4, // .stage_num_col - 6, // .stage_num_row - // 0, // .log_scale - fwd_shift_dct_adst_4, // .shift - fwd_stage_range_col_dct_adst_4, // .stage_range_col - fwd_stage_range_row_dct_adst_4, // .stage_range_row - fwd_cos_bit_col_dct_adst_4, // .cos_bit_col - fwd_cos_bit_row_dct_adst_4, // .cos_bit_row - TXFM_TYPE_DCT4, // .txfm_type_col - TXFM_TYPE_ADST4}; // .txfm_type_row + 4, // .txfm_size + 4, // .stage_num_col + 6, // .stage_num_row + // 0, // .log_scale + fwd_shift_dct_adst_4, // .shift + fwd_stage_range_col_dct_adst_4, // .stage_range_col + fwd_stage_range_row_dct_adst_4, // .stage_range_row + fwd_cos_bit_col_dct_adst_4, // .cos_bit_col + fwd_cos_bit_row_dct_adst_4, // .cos_bit_row + TXFM_TYPE_DCT4, // .txfm_type_col + TXFM_TYPE_ADST4 +}; // .txfm_type_row // ---------------- config fwd_dct_adst_8 ---------------- -static const int8_t fwd_shift_dct_adst_8[3] = {2, -1, 0}; -static const int8_t fwd_stage_range_col_dct_adst_8[6] = {15, 16, 17, - 18, 18, 18}; -static const int8_t fwd_stage_range_row_dct_adst_8[8] = {17, 17, 17, 18, - 18, 19, 19, 19}; -static const int8_t fwd_cos_bit_col_dct_adst_8[6] = {13, 13, 13, 13, 13, 13}; -static const int8_t fwd_cos_bit_row_dct_adst_8[8] = {13, 13, 13, 13, - 13, 13, 13, 13}; +static const int8_t fwd_shift_dct_adst_8[3] = { 2, -1, 0 }; +static const int8_t fwd_stage_range_col_dct_adst_8[6] = { + 15, 16, 17, 18, 18, 18 +}; +static const int8_t fwd_stage_range_row_dct_adst_8[8] = { 17, 17, 17, 18, + 18, 19, 19, 19 }; +static const int8_t fwd_cos_bit_col_dct_adst_8[6] = { 13, 13, 13, 13, 13, 13 }; +static const int8_t fwd_cos_bit_row_dct_adst_8[8] = { 13, 13, 13, 13, + 13, 13, 13, 13 }; static const TXFM_2D_CFG fwd_txfm_2d_cfg_dct_adst_8 = { - 8, // .txfm_size - 6, // .stage_num_col - 8, // .stage_num_row - // 0, // .log_scale - fwd_shift_dct_adst_8, // .shift - fwd_stage_range_col_dct_adst_8, // .stage_range_col - fwd_stage_range_row_dct_adst_8, // .stage_range_row - fwd_cos_bit_col_dct_adst_8, // .cos_bit_col - fwd_cos_bit_row_dct_adst_8, // .cos_bit_row - TXFM_TYPE_DCT8, // .txfm_type_col - TXFM_TYPE_ADST8}; // .txfm_type_row + 8, // .txfm_size + 6, // .stage_num_col + 8, // .stage_num_row + // 0, // .log_scale + fwd_shift_dct_adst_8, // .shift + fwd_stage_range_col_dct_adst_8, // .stage_range_col + fwd_stage_range_row_dct_adst_8, // .stage_range_row + fwd_cos_bit_col_dct_adst_8, // .cos_bit_col + fwd_cos_bit_row_dct_adst_8, // .cos_bit_row + TXFM_TYPE_DCT8, // .txfm_type_col + TXFM_TYPE_ADST8 +}; // .txfm_type_row // ---------------- config fwd_dct_adst_16 ---------------- -static const int8_t fwd_shift_dct_adst_16[3] = {2, -2, 0}; -static const int8_t fwd_stage_range_col_dct_adst_16[8] = {15, 16, 17, 18, - 19, 19, 19, 19}; -static const int8_t fwd_stage_range_row_dct_adst_16[10] = {17, 17, 17, 18, 18, - 19, 19, 20, 20, 20}; -static const int8_t fwd_cos_bit_col_dct_adst_16[8] = {13, 13, 13, 13, - 13, 13, 13, 13}; -static const int8_t fwd_cos_bit_row_dct_adst_16[10] = {12, 12, 12, 12, 12, - 12, 12, 12, 12, 12}; +static const int8_t fwd_shift_dct_adst_16[3] = { 2, -2, 0 }; +static const int8_t fwd_stage_range_col_dct_adst_16[8] = { 15, 16, 17, 18, + 19, 19, 19, 19 }; +static const int8_t fwd_stage_range_row_dct_adst_16[10] = { + 17, 17, 17, 18, 18, 19, 19, 20, 20, 20 +}; +static const int8_t fwd_cos_bit_col_dct_adst_16[8] = { 13, 13, 13, 13, + 13, 13, 13, 13 }; +static const int8_t fwd_cos_bit_row_dct_adst_16[10] = { 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12 }; static const TXFM_2D_CFG fwd_txfm_2d_cfg_dct_adst_16 = { - 16, // .txfm_size - 8, // .stage_num_col - 10, // .stage_num_row - // 0, // .log_scale - fwd_shift_dct_adst_16, // .shift - fwd_stage_range_col_dct_adst_16, // .stage_range_col - fwd_stage_range_row_dct_adst_16, // .stage_range_row - fwd_cos_bit_col_dct_adst_16, // .cos_bit_col - fwd_cos_bit_row_dct_adst_16, // .cos_bit_row - TXFM_TYPE_DCT16, // .txfm_type_col - TXFM_TYPE_ADST16}; // .txfm_type_row + 16, // .txfm_size + 8, // .stage_num_col + 10, // .stage_num_row + // 0, // .log_scale + fwd_shift_dct_adst_16, // .shift + fwd_stage_range_col_dct_adst_16, // .stage_range_col + fwd_stage_range_row_dct_adst_16, // .stage_range_row + fwd_cos_bit_col_dct_adst_16, // .cos_bit_col + fwd_cos_bit_row_dct_adst_16, // .cos_bit_row + TXFM_TYPE_DCT16, // .txfm_type_col + TXFM_TYPE_ADST16 +}; // .txfm_type_row // ---------------- config fwd_dct_adst_32 ---------------- -static const int8_t fwd_shift_dct_adst_32[3] = {2, -4, 0}; -static const int8_t fwd_stage_range_col_dct_adst_32[10] = {15, 16, 17, 18, 19, - 20, 20, 20, 20, 20}; +static const int8_t fwd_shift_dct_adst_32[3] = { 2, -4, 0 }; +static const int8_t fwd_stage_range_col_dct_adst_32[10] = { + 15, 16, 17, 18, 19, 20, 20, 20, 20, 20 +}; static const int8_t fwd_stage_range_row_dct_adst_32[12] = { - 16, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 20}; -static const int8_t fwd_cos_bit_col_dct_adst_32[10] = {12, 12, 12, 12, 12, - 12, 12, 12, 12, 12}; -static const int8_t fwd_cos_bit_row_dct_adst_32[12] = {12, 12, 12, 12, 12, 12, - 12, 12, 12, 12, 12, 12}; + 16, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 20 +}; +static const int8_t fwd_cos_bit_col_dct_adst_32[10] = { 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12 }; +static const int8_t fwd_cos_bit_row_dct_adst_32[12] = { + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12 +}; static const TXFM_2D_CFG fwd_txfm_2d_cfg_dct_adst_32 = { - 32, // .txfm_size - 10, // .stage_num_col - 12, // .stage_num_row - // 1, // .log_scale - fwd_shift_dct_adst_32, // .shift - fwd_stage_range_col_dct_adst_32, // .stage_range_col - fwd_stage_range_row_dct_adst_32, // .stage_range_row - fwd_cos_bit_col_dct_adst_32, // .cos_bit_col - fwd_cos_bit_row_dct_adst_32, // .cos_bit_row - TXFM_TYPE_DCT32, // .txfm_type_col - TXFM_TYPE_ADST32}; // .txfm_type_row + 32, // .txfm_size + 10, // .stage_num_col + 12, // .stage_num_row + // 1, // .log_scale + fwd_shift_dct_adst_32, // .shift + fwd_stage_range_col_dct_adst_32, // .stage_range_col + fwd_stage_range_row_dct_adst_32, // .stage_range_row + fwd_cos_bit_col_dct_adst_32, // .cos_bit_col + fwd_cos_bit_row_dct_adst_32, // .cos_bit_row + TXFM_TYPE_DCT32, // .txfm_type_col + TXFM_TYPE_ADST32 +}; // .txfm_type_row // ---------------- config fwd_adst_adst_4 ---------------- -static const int8_t fwd_shift_adst_adst_4[3] = {2, 0, 0}; -static const int8_t fwd_stage_range_col_adst_adst_4[6] = {15, 15, 16, - 17, 17, 17}; -static const int8_t fwd_stage_range_row_adst_adst_4[6] = {17, 17, 17, - 18, 18, 18}; -static const int8_t fwd_cos_bit_col_adst_adst_4[6] = {13, 13, 13, 13, 13, 13}; -static const int8_t fwd_cos_bit_row_adst_adst_4[6] = {13, 13, 13, 13, 13, 13}; +static const int8_t fwd_shift_adst_adst_4[3] = { 2, 0, 0 }; +static const int8_t fwd_stage_range_col_adst_adst_4[6] = { 15, 15, 16, + 17, 17, 17 }; +static const int8_t fwd_stage_range_row_adst_adst_4[6] = { 17, 17, 17, + 18, 18, 18 }; +static const int8_t fwd_cos_bit_col_adst_adst_4[6] = { 13, 13, 13, 13, 13, 13 }; +static const int8_t fwd_cos_bit_row_adst_adst_4[6] = { 13, 13, 13, 13, 13, 13 }; static const TXFM_2D_CFG fwd_txfm_2d_cfg_adst_adst_4 = { - 4, // .txfm_size - 6, // .stage_num_col - 6, // .stage_num_row - // 0, // .log_scale - fwd_shift_adst_adst_4, // .shift - fwd_stage_range_col_adst_adst_4, // .stage_range_col - fwd_stage_range_row_adst_adst_4, // .stage_range_row - fwd_cos_bit_col_adst_adst_4, // .cos_bit_col - fwd_cos_bit_row_adst_adst_4, // .cos_bit_row - TXFM_TYPE_ADST4, // .txfm_type_col - TXFM_TYPE_ADST4}; // .txfm_type_row + 4, // .txfm_size + 6, // .stage_num_col + 6, // .stage_num_row + // 0, // .log_scale + fwd_shift_adst_adst_4, // .shift + fwd_stage_range_col_adst_adst_4, // .stage_range_col + fwd_stage_range_row_adst_adst_4, // .stage_range_row + fwd_cos_bit_col_adst_adst_4, // .cos_bit_col + fwd_cos_bit_row_adst_adst_4, // .cos_bit_row + TXFM_TYPE_ADST4, // .txfm_type_col + TXFM_TYPE_ADST4 +}; // .txfm_type_row // ---------------- config fwd_adst_adst_8 ---------------- -static const int8_t fwd_shift_adst_adst_8[3] = {2, -1, 0}; -static const int8_t fwd_stage_range_col_adst_adst_8[8] = {15, 15, 16, 17, - 17, 18, 18, 18}; -static const int8_t fwd_stage_range_row_adst_adst_8[8] = {17, 17, 17, 18, - 18, 19, 19, 19}; -static const int8_t fwd_cos_bit_col_adst_adst_8[8] = {13, 13, 13, 13, - 13, 13, 13, 13}; -static const int8_t fwd_cos_bit_row_adst_adst_8[8] = {13, 13, 13, 13, - 13, 13, 13, 13}; +static const int8_t fwd_shift_adst_adst_8[3] = { 2, -1, 0 }; +static const int8_t fwd_stage_range_col_adst_adst_8[8] = { 15, 15, 16, 17, + 17, 18, 18, 18 }; +static const int8_t fwd_stage_range_row_adst_adst_8[8] = { 17, 17, 17, 18, + 18, 19, 19, 19 }; +static const int8_t fwd_cos_bit_col_adst_adst_8[8] = { 13, 13, 13, 13, + 13, 13, 13, 13 }; +static const int8_t fwd_cos_bit_row_adst_adst_8[8] = { 13, 13, 13, 13, + 13, 13, 13, 13 }; static const TXFM_2D_CFG fwd_txfm_2d_cfg_adst_adst_8 = { - 8, // .txfm_size - 8, // .stage_num_col - 8, // .stage_num_row - // 0, // .log_scale - fwd_shift_adst_adst_8, // .shift - fwd_stage_range_col_adst_adst_8, // .stage_range_col - fwd_stage_range_row_adst_adst_8, // .stage_range_row - fwd_cos_bit_col_adst_adst_8, // .cos_bit_col - fwd_cos_bit_row_adst_adst_8, // .cos_bit_row - TXFM_TYPE_ADST8, // .txfm_type_col - TXFM_TYPE_ADST8}; // .txfm_type_row + 8, // .txfm_size + 8, // .stage_num_col + 8, // .stage_num_row + // 0, // .log_scale + fwd_shift_adst_adst_8, // .shift + fwd_stage_range_col_adst_adst_8, // .stage_range_col + fwd_stage_range_row_adst_adst_8, // .stage_range_row + fwd_cos_bit_col_adst_adst_8, // .cos_bit_col + fwd_cos_bit_row_adst_adst_8, // .cos_bit_row + TXFM_TYPE_ADST8, // .txfm_type_col + TXFM_TYPE_ADST8 +}; // .txfm_type_row // ---------------- config fwd_adst_adst_16 ---------------- -static const int8_t fwd_shift_adst_adst_16[3] = {2, -2, 0}; -static const int8_t fwd_stage_range_col_adst_adst_16[10] = {15, 15, 16, 17, 17, - 18, 18, 19, 19, 19}; -static const int8_t fwd_stage_range_row_adst_adst_16[10] = {17, 17, 17, 18, 18, - 19, 19, 20, 20, 20}; -static const int8_t fwd_cos_bit_col_adst_adst_16[10] = {13, 13, 13, 13, 13, - 13, 13, 13, 13, 13}; -static const int8_t fwd_cos_bit_row_adst_adst_16[10] = {12, 12, 12, 12, 12, - 12, 12, 12, 12, 12}; +static const int8_t fwd_shift_adst_adst_16[3] = { 2, -2, 0 }; +static const int8_t fwd_stage_range_col_adst_adst_16[10] = { + 15, 15, 16, 17, 17, 18, 18, 19, 19, 19 +}; +static const int8_t fwd_stage_range_row_adst_adst_16[10] = { + 17, 17, 17, 18, 18, 19, 19, 20, 20, 20 +}; +static const int8_t fwd_cos_bit_col_adst_adst_16[10] = { 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13 }; +static const int8_t fwd_cos_bit_row_adst_adst_16[10] = { 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12 }; static const TXFM_2D_CFG fwd_txfm_2d_cfg_adst_adst_16 = { - 16, // .txfm_size - 10, // .stage_num_col - 10, // .stage_num_row - // 0, // .log_scale - fwd_shift_adst_adst_16, // .shift - fwd_stage_range_col_adst_adst_16, // .stage_range_col - fwd_stage_range_row_adst_adst_16, // .stage_range_row - fwd_cos_bit_col_adst_adst_16, // .cos_bit_col - fwd_cos_bit_row_adst_adst_16, // .cos_bit_row - TXFM_TYPE_ADST16, // .txfm_type_col - TXFM_TYPE_ADST16}; // .txfm_type_row + 16, // .txfm_size + 10, // .stage_num_col + 10, // .stage_num_row + // 0, // .log_scale + fwd_shift_adst_adst_16, // .shift + fwd_stage_range_col_adst_adst_16, // .stage_range_col + fwd_stage_range_row_adst_adst_16, // .stage_range_row + fwd_cos_bit_col_adst_adst_16, // .cos_bit_col + fwd_cos_bit_row_adst_adst_16, // .cos_bit_row + TXFM_TYPE_ADST16, // .txfm_type_col + TXFM_TYPE_ADST16 +}; // .txfm_type_row // ---------------- config fwd_adst_adst_32 ---------------- -static const int8_t fwd_shift_adst_adst_32[3] = {2, -4, 0}; +static const int8_t fwd_shift_adst_adst_32[3] = { 2, -4, 0 }; static const int8_t fwd_stage_range_col_adst_adst_32[12] = { - 15, 15, 16, 17, 17, 18, 18, 19, 19, 20, 20, 20}; + 15, 15, 16, 17, 17, 18, 18, 19, 19, 20, 20, 20 +}; static const int8_t fwd_stage_range_row_adst_adst_32[12] = { - 16, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 20}; -static const int8_t fwd_cos_bit_col_adst_adst_32[12] = {12, 12, 12, 12, 12, 12, - 12, 12, 12, 12, 12, 12}; -static const int8_t fwd_cos_bit_row_adst_adst_32[12] = {12, 12, 12, 12, 12, 12, - 12, 12, 12, 12, 12, 12}; + 16, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 20 +}; +static const int8_t fwd_cos_bit_col_adst_adst_32[12] = { + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12 +}; +static const int8_t fwd_cos_bit_row_adst_adst_32[12] = { + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12 +}; static const TXFM_2D_CFG fwd_txfm_2d_cfg_adst_adst_32 = { - 32, // .txfm_size - 12, // .stage_num_col - 12, // .stage_num_row - // 1, // .log_scale - fwd_shift_adst_adst_32, // .shift - fwd_stage_range_col_adst_adst_32, // .stage_range_col - fwd_stage_range_row_adst_adst_32, // .stage_range_row - fwd_cos_bit_col_adst_adst_32, // .cos_bit_col - fwd_cos_bit_row_adst_adst_32, // .cos_bit_row - TXFM_TYPE_ADST32, // .txfm_type_col - TXFM_TYPE_ADST32}; // .txfm_type_row + 32, // .txfm_size + 12, // .stage_num_col + 12, // .stage_num_row + // 1, // .log_scale + fwd_shift_adst_adst_32, // .shift + fwd_stage_range_col_adst_adst_32, // .stage_range_col + fwd_stage_range_row_adst_adst_32, // .stage_range_row + fwd_cos_bit_col_adst_adst_32, // .cos_bit_col + fwd_cos_bit_row_adst_adst_32, // .cos_bit_row + TXFM_TYPE_ADST32, // .txfm_type_col + TXFM_TYPE_ADST32 +}; // .txfm_type_row // ---------------- config fwd_adst_dct_4 ---------------- -static const int8_t fwd_shift_adst_dct_4[3] = {2, 0, 0}; -static const int8_t fwd_stage_range_col_adst_dct_4[6] = {15, 15, 16, - 17, 17, 17}; -static const int8_t fwd_stage_range_row_adst_dct_4[4] = {17, 18, 18, 18}; -static const int8_t fwd_cos_bit_col_adst_dct_4[6] = {13, 13, 13, 13, 13, 13}; -static const int8_t fwd_cos_bit_row_adst_dct_4[4] = {13, 13, 13, 13}; +static const int8_t fwd_shift_adst_dct_4[3] = { 2, 0, 0 }; +static const int8_t fwd_stage_range_col_adst_dct_4[6] = { + 15, 15, 16, 17, 17, 17 +}; +static const int8_t fwd_stage_range_row_adst_dct_4[4] = { 17, 18, 18, 18 }; +static const int8_t fwd_cos_bit_col_adst_dct_4[6] = { 13, 13, 13, 13, 13, 13 }; +static const int8_t fwd_cos_bit_row_adst_dct_4[4] = { 13, 13, 13, 13 }; static const TXFM_2D_CFG fwd_txfm_2d_cfg_adst_dct_4 = { - 4, // .txfm_size - 6, // .stage_num_col - 4, // .stage_num_row - // 0, // .log_scale - fwd_shift_adst_dct_4, // .shift - fwd_stage_range_col_adst_dct_4, // .stage_range_col - fwd_stage_range_row_adst_dct_4, // .stage_range_row - fwd_cos_bit_col_adst_dct_4, // .cos_bit_col - fwd_cos_bit_row_adst_dct_4, // .cos_bit_row - TXFM_TYPE_ADST4, // .txfm_type_col - TXFM_TYPE_DCT4}; // .txfm_type_row + 4, // .txfm_size + 6, // .stage_num_col + 4, // .stage_num_row + // 0, // .log_scale + fwd_shift_adst_dct_4, // .shift + fwd_stage_range_col_adst_dct_4, // .stage_range_col + fwd_stage_range_row_adst_dct_4, // .stage_range_row + fwd_cos_bit_col_adst_dct_4, // .cos_bit_col + fwd_cos_bit_row_adst_dct_4, // .cos_bit_row + TXFM_TYPE_ADST4, // .txfm_type_col + TXFM_TYPE_DCT4 +}; // .txfm_type_row // ---------------- config fwd_adst_dct_8 ---------------- -static const int8_t fwd_shift_adst_dct_8[3] = {2, -1, 0}; -static const int8_t fwd_stage_range_col_adst_dct_8[8] = {15, 15, 16, 17, - 17, 18, 18, 18}; -static const int8_t fwd_stage_range_row_adst_dct_8[6] = {17, 18, 19, - 19, 19, 19}; -static const int8_t fwd_cos_bit_col_adst_dct_8[8] = {13, 13, 13, 13, - 13, 13, 13, 13}; -static const int8_t fwd_cos_bit_row_adst_dct_8[6] = {13, 13, 13, 13, 13, 13}; +static const int8_t fwd_shift_adst_dct_8[3] = { 2, -1, 0 }; +static const int8_t fwd_stage_range_col_adst_dct_8[8] = { 15, 15, 16, 17, + 17, 18, 18, 18 }; +static const int8_t fwd_stage_range_row_adst_dct_8[6] = { + 17, 18, 19, 19, 19, 19 +}; +static const int8_t fwd_cos_bit_col_adst_dct_8[8] = { 13, 13, 13, 13, + 13, 13, 13, 13 }; +static const int8_t fwd_cos_bit_row_adst_dct_8[6] = { 13, 13, 13, 13, 13, 13 }; static const TXFM_2D_CFG fwd_txfm_2d_cfg_adst_dct_8 = { - 8, // .txfm_size - 8, // .stage_num_col - 6, // .stage_num_row - // 0, // .log_scale - fwd_shift_adst_dct_8, // .shift - fwd_stage_range_col_adst_dct_8, // .stage_range_col - fwd_stage_range_row_adst_dct_8, // .stage_range_row - fwd_cos_bit_col_adst_dct_8, // .cos_bit_col - fwd_cos_bit_row_adst_dct_8, // .cos_bit_row - TXFM_TYPE_ADST8, // .txfm_type_col - TXFM_TYPE_DCT8}; // .txfm_type_row + 8, // .txfm_size + 8, // .stage_num_col + 6, // .stage_num_row + // 0, // .log_scale + fwd_shift_adst_dct_8, // .shift + fwd_stage_range_col_adst_dct_8, // .stage_range_col + fwd_stage_range_row_adst_dct_8, // .stage_range_row + fwd_cos_bit_col_adst_dct_8, // .cos_bit_col + fwd_cos_bit_row_adst_dct_8, // .cos_bit_row + TXFM_TYPE_ADST8, // .txfm_type_col + TXFM_TYPE_DCT8 +}; // .txfm_type_row // ---------------- config fwd_adst_dct_16 ---------------- -static const int8_t fwd_shift_adst_dct_16[3] = {2, -2, 0}; -static const int8_t fwd_stage_range_col_adst_dct_16[10] = {15, 15, 16, 17, 17, - 18, 18, 19, 19, 19}; -static const int8_t fwd_stage_range_row_adst_dct_16[8] = {17, 18, 19, 20, - 20, 20, 20, 20}; -static const int8_t fwd_cos_bit_col_adst_dct_16[10] = {13, 13, 13, 13, 13, - 13, 13, 13, 13, 13}; -static const int8_t fwd_cos_bit_row_adst_dct_16[8] = {12, 12, 12, 12, - 12, 12, 12, 12}; +static const int8_t fwd_shift_adst_dct_16[3] = { 2, -2, 0 }; +static const int8_t fwd_stage_range_col_adst_dct_16[10] = { + 15, 15, 16, 17, 17, 18, 18, 19, 19, 19 +}; +static const int8_t fwd_stage_range_row_adst_dct_16[8] = { 17, 18, 19, 20, + 20, 20, 20, 20 }; +static const int8_t fwd_cos_bit_col_adst_dct_16[10] = { 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13 }; +static const int8_t fwd_cos_bit_row_adst_dct_16[8] = { 12, 12, 12, 12, + 12, 12, 12, 12 }; static const TXFM_2D_CFG fwd_txfm_2d_cfg_adst_dct_16 = { - 16, // .txfm_size - 10, // .stage_num_col - 8, // .stage_num_row - // 0, // .log_scale - fwd_shift_adst_dct_16, // .shift - fwd_stage_range_col_adst_dct_16, // .stage_range_col - fwd_stage_range_row_adst_dct_16, // .stage_range_row - fwd_cos_bit_col_adst_dct_16, // .cos_bit_col - fwd_cos_bit_row_adst_dct_16, // .cos_bit_row - TXFM_TYPE_ADST16, // .txfm_type_col - TXFM_TYPE_DCT16}; // .txfm_type_row + 16, // .txfm_size + 10, // .stage_num_col + 8, // .stage_num_row + // 0, // .log_scale + fwd_shift_adst_dct_16, // .shift + fwd_stage_range_col_adst_dct_16, // .stage_range_col + fwd_stage_range_row_adst_dct_16, // .stage_range_row + fwd_cos_bit_col_adst_dct_16, // .cos_bit_col + fwd_cos_bit_row_adst_dct_16, // .cos_bit_row + TXFM_TYPE_ADST16, // .txfm_type_col + TXFM_TYPE_DCT16 +}; // .txfm_type_row // ---------------- config fwd_adst_dct_32 ---------------- -static const int8_t fwd_shift_adst_dct_32[3] = {2, -4, 0}; +static const int8_t fwd_shift_adst_dct_32[3] = { 2, -4, 0 }; static const int8_t fwd_stage_range_col_adst_dct_32[12] = { - 15, 15, 16, 17, 17, 18, 18, 19, 19, 20, 20, 20}; -static const int8_t fwd_stage_range_row_adst_dct_32[10] = {16, 17, 18, 19, 20, - 20, 20, 20, 20, 20}; -static const int8_t fwd_cos_bit_col_adst_dct_32[12] = {12, 12, 12, 12, 12, 12, - 12, 12, 12, 12, 12, 12}; -static const int8_t fwd_cos_bit_row_adst_dct_32[10] = {12, 12, 12, 12, 12, - 12, 12, 12, 12, 12}; + 15, 15, 16, 17, 17, 18, 18, 19, 19, 20, 20, 20 +}; +static const int8_t fwd_stage_range_row_adst_dct_32[10] = { + 16, 17, 18, 19, 20, 20, 20, 20, 20, 20 +}; +static const int8_t fwd_cos_bit_col_adst_dct_32[12] = { + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12 +}; +static const int8_t fwd_cos_bit_row_adst_dct_32[10] = { 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12 }; static const TXFM_2D_CFG fwd_txfm_2d_cfg_adst_dct_32 = { - 32, // .txfm_size - 12, // .stage_num_col - 10, // .stage_num_row - // 1, // .log_scale - fwd_shift_adst_dct_32, // .shift - fwd_stage_range_col_adst_dct_32, // .stage_range_col - fwd_stage_range_row_adst_dct_32, // .stage_range_row - fwd_cos_bit_col_adst_dct_32, // .cos_bit_col - fwd_cos_bit_row_adst_dct_32, // .cos_bit_row - TXFM_TYPE_ADST32, // .txfm_type_col - TXFM_TYPE_DCT32}; // .txfm_type_row + 32, // .txfm_size + 12, // .stage_num_col + 10, // .stage_num_row + // 1, // .log_scale + fwd_shift_adst_dct_32, // .shift + fwd_stage_range_col_adst_dct_32, // .stage_range_col + fwd_stage_range_row_adst_dct_32, // .stage_range_row + fwd_cos_bit_col_adst_dct_32, // .cos_bit_col + fwd_cos_bit_row_adst_dct_32, // .cos_bit_row + TXFM_TYPE_ADST32, // .txfm_type_col + TXFM_TYPE_DCT32 +}; // .txfm_type_row #endif // VP10_FWD_TXFM2D_CFG_H_ diff --git a/vp10/common/vp10_inv_txfm.c b/vp10/common/vp10_inv_txfm.c index d41f389b2ee79dd9edf2894efbb8931b8aca8b2a..f3bcf8bb8a4445d9f65c694ac20c330589e9c495 100644 --- a/vp10/common/vp10_inv_txfm.c +++ b/vp10/common/vp10_inv_txfm.c @@ -16,8 +16,8 @@ #include "vp10/common/vp10_inv_txfm.h" void vp10_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride) { -/* 4-point reversible, orthonormal inverse Walsh-Hadamard in 3.5 adds, - 0.5 shifts per pixel. */ + /* 4-point reversible, orthonormal inverse Walsh-Hadamard in 3.5 adds, + 0.5 shifts per pixel. */ int i; tran_low_t output[16]; tran_high_t a1, b1, c1, d1, e1; @@ -67,8 +67,7 @@ void vp10_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride) { } } -void vp10_iwht4x4_1_add_c(const tran_low_t *in, - uint8_t *dest, +void vp10_iwht4x4_1_add_c(const tran_low_t *in, uint8_t *dest, int dest_stride) { int i; tran_high_t a1, e1; @@ -130,8 +129,7 @@ void vp10_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride) { // Columns for (i = 0; i < 4; ++i) { - for (j = 0; j < 4; ++j) - temp_in[j] = out[j * 4 + i]; + for (j = 0; j < 4; ++j) temp_in[j] = out[j * 4 + i]; vp10_idct4_c(temp_in, temp_out); for (j = 0; j < 4; ++j) { dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], @@ -141,7 +139,7 @@ void vp10_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride) { } void vp10_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest, - int dest_stride) { + int dest_stride) { int i; tran_high_t a1; tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64)); @@ -226,8 +224,7 @@ void vp10_idct8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride) { // Then transform columns for (i = 0; i < 8; ++i) { - for (j = 0; j < 8; ++j) - temp_in[j] = out[j * 8 + i]; + for (j = 0; j < 8; ++j) temp_in[j] = out[j * 8 + i]; vp10_idct8_c(temp_in, temp_out); for (j = 0; j < 8; ++j) { dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], @@ -243,8 +240,7 @@ void vp10_idct8x8_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) { out = WRAPLOW(dct_const_round_shift(out * cospi_16_64)); a1 = ROUND_POWER_OF_TWO(out, 5); for (j = 0; j < 8; ++j) { - for (i = 0; i < 8; ++i) - dest[i] = clip_pixel_add(dest[i], a1); + for (i = 0; i < 8; ++i) dest[i] = clip_pixel_add(dest[i], a1); dest += stride; } } @@ -299,20 +295,20 @@ void vp10_iadst8_c(const tran_low_t *input, tran_low_t *output) { tran_high_t x7 = input[6]; if (!(x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7)) { - output[0] = output[1] = output[2] = output[3] = output[4] - = output[5] = output[6] = output[7] = 0; + output[0] = output[1] = output[2] = output[3] = output[4] = output[5] = + output[6] = output[7] = 0; return; } // stage 1 - s0 = (int)(cospi_2_64 * x0 + cospi_30_64 * x1); - s1 = (int)(cospi_30_64 * x0 - cospi_2_64 * x1); + s0 = (int)(cospi_2_64 * x0 + cospi_30_64 * x1); + s1 = (int)(cospi_30_64 * x0 - cospi_2_64 * x1); s2 = (int)(cospi_10_64 * x2 + cospi_22_64 * x3); s3 = (int)(cospi_22_64 * x2 - cospi_10_64 * x3); s4 = (int)(cospi_18_64 * x4 + cospi_14_64 * x5); s5 = (int)(cospi_14_64 * x4 - cospi_18_64 * x5); - s6 = (int)(cospi_26_64 * x6 + cospi_6_64 * x7); - s7 = (int)(cospi_6_64 * x6 - cospi_26_64 * x7); + s6 = (int)(cospi_26_64 * x6 + cospi_6_64 * x7); + s7 = (int)(cospi_6_64 * x6 - cospi_26_64 * x7); x0 = WRAPLOW(dct_const_round_shift(s0 + s4)); x1 = WRAPLOW(dct_const_round_shift(s1 + s5)); @@ -379,8 +375,7 @@ void vp10_idct8x8_12_add_c(const tran_low_t *input, uint8_t *dest, int stride) { // Then transform columns for (i = 0; i < 8; ++i) { - for (j = 0; j < 8; ++j) - temp_in[j] = out[j * 8 + i]; + for (j = 0; j < 8; ++j) temp_in[j] = out[j * 8 + i]; vp10_idct8_c(temp_in, temp_out); for (j = 0; j < 8; ++j) { dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], @@ -394,22 +389,22 @@ void vp10_idct16_c(const tran_low_t *input, tran_low_t *output) { tran_high_t temp1, temp2; // stage 1 - step1[0] = input[0/2]; - step1[1] = input[16/2]; - step1[2] = input[8/2]; - step1[3] = input[24/2]; - step1[4] = input[4/2]; - step1[5] = input[20/2]; - step1[6] = input[12/2]; - step1[7] = input[28/2]; - step1[8] = input[2/2]; - step1[9] = input[18/2]; - step1[10] = input[10/2]; - step1[11] = input[26/2]; - step1[12] = input[6/2]; - step1[13] = input[22/2]; - step1[14] = input[14/2]; - step1[15] = input[30/2]; + step1[0] = input[0 / 2]; + step1[1] = input[16 / 2]; + step1[2] = input[8 / 2]; + step1[3] = input[24 / 2]; + step1[4] = input[4 / 2]; + step1[5] = input[20 / 2]; + step1[6] = input[12 / 2]; + step1[7] = input[28 / 2]; + step1[8] = input[2 / 2]; + step1[9] = input[18 / 2]; + step1[10] = input[10 / 2]; + step1[11] = input[26 / 2]; + step1[12] = input[6 / 2]; + step1[13] = input[22 / 2]; + step1[14] = input[14 / 2]; + step1[15] = input[30 / 2]; // stage 2 step2[0] = step1[0]; @@ -555,7 +550,7 @@ void vp10_idct16_c(const tran_low_t *input, tran_low_t *output) { } void vp10_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest, - int stride) { + int stride) { tran_low_t out[16 * 16]; tran_low_t *outptr = out; int i, j; @@ -570,8 +565,7 @@ void vp10_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest, // Then transform columns for (i = 0; i < 16; ++i) { - for (j = 0; j < 16; ++j) - temp_in[j] = out[j * 16 + i]; + for (j = 0; j < 16; ++j) temp_in[j] = out[j * 16 + i]; vp10_idct16_c(temp_in, temp_out); for (j = 0; j < 16; ++j) { dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], @@ -601,21 +595,20 @@ void vp10_iadst16_c(const tran_low_t *input, tran_low_t *output) { tran_high_t x14 = input[1]; tran_high_t x15 = input[14]; - if (!(x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7 | x8 - | x9 | x10 | x11 | x12 | x13 | x14 | x15)) { - output[0] = output[1] = output[2] = output[3] = output[4] - = output[5] = output[6] = output[7] = output[8] - = output[9] = output[10] = output[11] = output[12] - = output[13] = output[14] = output[15] = 0; + if (!(x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7 | x8 | x9 | x10 | x11 | x12 | + x13 | x14 | x15)) { + output[0] = output[1] = output[2] = output[3] = output[4] = output[5] = + output[6] = output[7] = output[8] = output[9] = output[10] = + output[11] = output[12] = output[13] = output[14] = output[15] = 0; return; } // stage 1 - s0 = x0 * cospi_1_64 + x1 * cospi_31_64; + s0 = x0 * cospi_1_64 + x1 * cospi_31_64; s1 = x0 * cospi_31_64 - x1 * cospi_1_64; - s2 = x2 * cospi_5_64 + x3 * cospi_27_64; + s2 = x2 * cospi_5_64 + x3 * cospi_27_64; s3 = x2 * cospi_27_64 - x3 * cospi_5_64; - s4 = x4 * cospi_9_64 + x5 * cospi_23_64; + s4 = x4 * cospi_9_64 + x5 * cospi_23_64; s5 = x4 * cospi_23_64 - x5 * cospi_9_64; s6 = x6 * cospi_13_64 + x7 * cospi_19_64; s7 = x6 * cospi_19_64 - x7 * cospi_13_64; @@ -624,9 +617,9 @@ void vp10_iadst16_c(const tran_low_t *input, tran_low_t *output) { s10 = x10 * cospi_21_64 + x11 * cospi_11_64; s11 = x10 * cospi_11_64 - x11 * cospi_21_64; s12 = x12 * cospi_25_64 + x13 * cospi_7_64; - s13 = x12 * cospi_7_64 - x13 * cospi_25_64; + s13 = x12 * cospi_7_64 - x13 * cospi_25_64; s14 = x14 * cospi_29_64 + x15 * cospi_3_64; - s15 = x14 * cospi_3_64 - x15 * cospi_29_64; + s15 = x14 * cospi_3_64 - x15 * cospi_29_64; x0 = WRAPLOW(dct_const_round_shift(s0 + s8)); x1 = WRAPLOW(dct_const_round_shift(s1 + s9)); @@ -654,14 +647,14 @@ void vp10_iadst16_c(const tran_low_t *input, tran_low_t *output) { s5 = x5; s6 = x6; s7 = x7; - s8 = x8 * cospi_4_64 + x9 * cospi_28_64; - s9 = x8 * cospi_28_64 - x9 * cospi_4_64; - s10 = x10 * cospi_20_64 + x11 * cospi_12_64; - s11 = x10 * cospi_12_64 - x11 * cospi_20_64; - s12 = - x12 * cospi_28_64 + x13 * cospi_4_64; - s13 = x12 * cospi_4_64 + x13 * cospi_28_64; - s14 = - x14 * cospi_12_64 + x15 * cospi_20_64; - s15 = x14 * cospi_20_64 + x15 * cospi_12_64; + s8 = x8 * cospi_4_64 + x9 * cospi_28_64; + s9 = x8 * cospi_28_64 - x9 * cospi_4_64; + s10 = x10 * cospi_20_64 + x11 * cospi_12_64; + s11 = x10 * cospi_12_64 - x11 * cospi_20_64; + s12 = -x12 * cospi_28_64 + x13 * cospi_4_64; + s13 = x12 * cospi_4_64 + x13 * cospi_28_64; + s14 = -x14 * cospi_12_64 + x15 * cospi_20_64; + s15 = x14 * cospi_20_64 + x15 * cospi_12_64; x0 = WRAPLOW(s0 + s4); x1 = WRAPLOW(s1 + s5); @@ -685,18 +678,18 @@ void vp10_iadst16_c(const tran_low_t *input, tran_low_t *output) { s1 = x1; s2 = x2; s3 = x3; - s4 = x4 * cospi_8_64 + x5 * cospi_24_64; + s4 = x4 * cospi_8_64 + x5 * cospi_24_64; s5 = x4 * cospi_24_64 - x5 * cospi_8_64; - s6 = - x6 * cospi_24_64 + x7 * cospi_8_64; - s7 = x6 * cospi_8_64 + x7 * cospi_24_64; + s6 = -x6 * cospi_24_64 + x7 * cospi_8_64; + s7 = x6 * cospi_8_64 + x7 * cospi_24_64; s8 = x8; s9 = x9; s10 = x10; s11 = x11; - s12 = x12 * cospi_8_64 + x13 * cospi_24_64; + s12 = x12 * cospi_8_64 + x13 * cospi_24_64; s13 = x12 * cospi_24_64 - x13 * cospi_8_64; - s14 = - x14 * cospi_24_64 + x15 * cospi_8_64; - s15 = x14 * cospi_8_64 + x15 * cospi_24_64; + s14 = -x14 * cospi_24_64 + x15 * cospi_8_64; + s15 = x14 * cospi_8_64 + x15 * cospi_24_64; x0 = WRAPLOW(s0 + s2); x1 = WRAPLOW(s1 + s3); @@ -716,13 +709,13 @@ void vp10_iadst16_c(const tran_low_t *input, tran_low_t *output) { x15 = WRAPLOW(dct_const_round_shift(s13 - s15)); // stage 4 - s2 = (- cospi_16_64) * (x2 + x3); + s2 = (-cospi_16_64) * (x2 + x3); s3 = cospi_16_64 * (x2 - x3); s6 = cospi_16_64 * (x6 + x7); - s7 = cospi_16_64 * (- x6 + x7); + s7 = cospi_16_64 * (-x6 + x7); s10 = cospi_16_64 * (x10 + x11); - s11 = cospi_16_64 * (- x10 + x11); - s14 = (- cospi_16_64) * (x14 + x15); + s11 = cospi_16_64 * (-x10 + x11); + s14 = (-cospi_16_64) * (x14 + x15); s15 = cospi_16_64 * (x14 - x15); x2 = WRAPLOW(dct_const_round_shift(s2)); @@ -753,7 +746,7 @@ void vp10_iadst16_c(const tran_low_t *input, tran_low_t *output) { } void vp10_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest, - int stride) { + int stride) { tran_low_t out[16 * 16] = { 0 }; tran_low_t *outptr = out; int i, j; @@ -769,19 +762,16 @@ void vp10_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest, // Then transform columns for (i = 0; i < 16; ++i) { - for (j = 0; j < 16; ++j) - temp_in[j] = out[j*16 + i]; + for (j = 0; j < 16; ++j) temp_in[j] = out[j * 16 + i]; vp10_idct16_c(temp_in, temp_out); for (j = 0; j < 16; ++j) { - dest[j * stride + i] = clip_pixel_add( - dest[j * stride + i], - ROUND_POWER_OF_TWO(temp_out[j], 6)); + dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], + ROUND_POWER_OF_TWO(temp_out[j], 6)); } } } -void vp10_idct16x16_1_add_c(const tran_low_t *input, - uint8_t *dest, +void vp10_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) { int i, j; tran_high_t a1; @@ -789,8 +779,7 @@ void vp10_idct16x16_1_add_c(const tran_low_t *input, out = WRAPLOW(dct_const_round_shift(out * cospi_16_64)); a1 = ROUND_POWER_OF_TWO(out, 6); for (j = 0; j < 16; ++j) { - for (i = 0; i < 16; ++i) - dest[i] = clip_pixel_add(dest[i], a1); + for (i = 0; i < 16; ++i) dest[i] = clip_pixel_add(dest[i], a1); dest += stride; } } @@ -1163,7 +1152,7 @@ void vp10_idct32_c(const tran_low_t *input, tran_low_t *output) { } void vp10_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest, - int stride) { + int stride) { tran_low_t out[32 * 32]; tran_low_t *outptr = out; int i, j; @@ -1172,8 +1161,7 @@ void vp10_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest, // Rows for (i = 0; i < 32; ++i) { int16_t zero_coeff[16]; - for (j = 0; j < 16; ++j) - zero_coeff[j] = input[2 * j] | input[2 * j + 1]; + for (j = 0; j < 16; ++j) zero_coeff[j] = input[2 * j] | input[2 * j + 1]; for (j = 0; j < 8; ++j) zero_coeff[j] = zero_coeff[2 * j] | zero_coeff[2 * j + 1]; for (j = 0; j < 4; ++j) @@ -1191,8 +1179,7 @@ void vp10_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest, // Columns for (i = 0; i < 32; ++i) { - for (j = 0; j < 32; ++j) - temp_in[j] = out[j * 32 + i]; + for (j = 0; j < 32; ++j) temp_in[j] = out[j * 32 + i]; vp10_idct32_c(temp_in, temp_out); for (j = 0; j < 32; ++j) { dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], @@ -1202,8 +1189,8 @@ void vp10_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest, } void vp10_idct32x32_34_add_c(const tran_low_t *input, uint8_t *dest, - int stride) { - tran_low_t out[32 * 32] = {0}; + int stride) { + tran_low_t out[32 * 32] = { 0 }; tran_low_t *outptr = out; int i, j; tran_low_t temp_in[32], temp_out[32]; @@ -1218,19 +1205,16 @@ void vp10_idct32x32_34_add_c(const tran_low_t *input, uint8_t *dest, // Columns for (i = 0; i < 32; ++i) { - for (j = 0; j < 32; ++j) - temp_in[j] = out[j * 32 + i]; + for (j = 0; j < 32; ++j) temp_in[j] = out[j * 32 + i]; vp10_idct32_c(temp_in, temp_out); for (j = 0; j < 32; ++j) { - dest[j * stride + i] = clip_pixel_add( - dest[j * stride + i], - ROUND_POWER_OF_TWO(temp_out[j], 6)); + dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], + ROUND_POWER_OF_TWO(temp_out[j], 6)); } } } -void vp10_idct32x32_1_add_c(const tran_low_t *input, - uint8_t *dest, +void vp10_idct32x32_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) { int i, j; tran_high_t a1; @@ -1240,15 +1224,14 @@ void vp10_idct32x32_1_add_c(const tran_low_t *input, a1 = ROUND_POWER_OF_TWO(out, 6); for (j = 0; j < 32; ++j) { - for (i = 0; i < 32; ++i) - dest[i] = clip_pixel_add(dest[i], a1); + for (i = 0; i < 32; ++i) dest[i] = clip_pixel_add(dest[i], a1); dest += stride; } } #if CONFIG_VP9_HIGHBITDEPTH void vp10_highbd_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8, - int stride, int bd) { + int stride, int bd) { /* 4-point reversible, orthonormal inverse Walsh-Hadamard in 3.5 adds, 0.5 shifts per pixel. */ int i; @@ -1291,28 +1274,28 @@ void vp10_highbd_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8, c1 = e1 - c1; a1 -= b1; d1 += c1; - dest[stride * 0] = highbd_clip_pixel_add(dest[stride * 0], - HIGHBD_WRAPLOW(a1, bd), bd); - dest[stride * 1] = highbd_clip_pixel_add(dest[stride * 1], - HIGHBD_WRAPLOW(b1, bd), bd); - dest[stride * 2] = highbd_clip_pixel_add(dest[stride * 2], - HIGHBD_WRAPLOW(c1, bd), bd); - dest[stride * 3] = highbd_clip_pixel_add(dest[stride * 3], - HIGHBD_WRAPLOW(d1, bd), bd); + dest[stride * 0] = + highbd_clip_pixel_add(dest[stride * 0], HIGHBD_WRAPLOW(a1, bd), bd); + dest[stride * 1] = + highbd_clip_pixel_add(dest[stride * 1], HIGHBD_WRAPLOW(b1, bd), bd); + dest[stride * 2] = + highbd_clip_pixel_add(dest[stride * 2], HIGHBD_WRAPLOW(c1, bd), bd); + dest[stride * 3] = + highbd_clip_pixel_add(dest[stride * 3], HIGHBD_WRAPLOW(d1, bd), bd); ip++; dest++; } } void vp10_highbd_iwht4x4_1_add_c(const tran_low_t *in, uint8_t *dest8, - int dest_stride, int bd) { + int dest_stride, int bd) { int i; tran_high_t a1, e1; tran_low_t tmp[4]; const tran_low_t *ip = in; tran_low_t *op = tmp; uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); - (void) bd; + (void)bd; a1 = ip[0] >> UNIT_QUANT_SHIFT; e1 = a1 >> 1; @@ -1324,14 +1307,14 @@ void vp10_highbd_iwht4x4_1_add_c(const tran_low_t *in, uint8_t *dest8, for (i = 0; i < 4; i++) { e1 = ip[0] >> 1; a1 = ip[0] - e1; - dest[dest_stride * 0] = highbd_clip_pixel_add( - dest[dest_stride * 0], a1, bd); - dest[dest_stride * 1] = highbd_clip_pixel_add( - dest[dest_stride * 1], e1, bd); - dest[dest_stride * 2] = highbd_clip_pixel_add( - dest[dest_stride * 2], e1, bd); - dest[dest_stride * 3] = highbd_clip_pixel_add( - dest[dest_stride * 3], e1, bd); + dest[dest_stride * 0] = + highbd_clip_pixel_add(dest[dest_stride * 0], a1, bd); + dest[dest_stride * 1] = + highbd_clip_pixel_add(dest[dest_stride * 1], e1, bd); + dest[dest_stride * 2] = + highbd_clip_pixel_add(dest[dest_stride * 2], e1, bd); + dest[dest_stride * 3] = + highbd_clip_pixel_add(dest[dest_stride * 3], e1, bd); ip++; dest++; } @@ -1340,7 +1323,7 @@ void vp10_highbd_iwht4x4_1_add_c(const tran_low_t *in, uint8_t *dest8, void vp10_highbd_idct4_c(const tran_low_t *input, tran_low_t *output, int bd) { tran_low_t step[4]; tran_high_t temp1, temp2; - (void) bd; + (void)bd; // stage 1 temp1 = (input[0] + input[2]) * cospi_16_64; temp2 = (input[0] - input[2]) * cospi_16_64; @@ -1359,7 +1342,7 @@ void vp10_highbd_idct4_c(const tran_low_t *input, tran_low_t *output, int bd) { } void vp10_highbd_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest8, - int stride, int bd) { + int stride, int bd) { tran_low_t out[4 * 4]; tran_low_t *outptr = out; int i, j; @@ -1375,8 +1358,7 @@ void vp10_highbd_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest8, // Columns for (i = 0; i < 4; ++i) { - for (j = 0; j < 4; ++j) - temp_in[j] = out[j * 4 + i]; + for (j = 0; j < 4; ++j) temp_in[j] = out[j * 4 + i]; vp10_highbd_idct4_c(temp_in, temp_out, bd); for (j = 0; j < 4; ++j) { dest[j * stride + i] = highbd_clip_pixel_add( @@ -1386,11 +1368,11 @@ void vp10_highbd_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest8, } void vp10_highbd_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest8, - int dest_stride, int bd) { + int dest_stride, int bd) { int i; tran_high_t a1; - tran_low_t out = HIGHBD_WRAPLOW( - highbd_dct_const_round_shift(input[0] * cospi_16_64), bd); + tran_low_t out = + HIGHBD_WRAPLOW(highbd_dct_const_round_shift(input[0] * cospi_16_64), bd); uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); out = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64), bd); @@ -1451,7 +1433,7 @@ void vp10_highbd_idct8_c(const tran_low_t *input, tran_low_t *output, int bd) { } void vp10_highbd_idct8x8_64_add_c(const tran_low_t *input, uint8_t *dest8, - int stride, int bd) { + int stride, int bd) { tran_low_t out[8 * 8]; tran_low_t *outptr = out; int i, j; @@ -1467,8 +1449,7 @@ void vp10_highbd_idct8x8_64_add_c(const tran_low_t *input, uint8_t *dest8, // Then transform columns. for (i = 0; i < 8; ++i) { - for (j = 0; j < 8; ++j) - temp_in[j] = out[j * 8 + i]; + for (j = 0; j < 8; ++j) temp_in[j] = out[j * 8 + i]; vp10_highbd_idct8_c(temp_in, temp_out, bd); for (j = 0; j < 8; ++j) { dest[j * stride + i] = highbd_clip_pixel_add( @@ -1478,17 +1459,16 @@ void vp10_highbd_idct8x8_64_add_c(const tran_low_t *input, uint8_t *dest8, } void vp10_highbd_idct8x8_1_add_c(const tran_low_t *input, uint8_t *dest8, - int stride, int bd) { + int stride, int bd) { int i, j; tran_high_t a1; - tran_low_t out = HIGHBD_WRAPLOW( - highbd_dct_const_round_shift(input[0] * cospi_16_64), bd); + tran_low_t out = + HIGHBD_WRAPLOW(highbd_dct_const_round_shift(input[0] * cospi_16_64), bd); uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); out = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64), bd); a1 = ROUND_POWER_OF_TWO(out, 5); for (j = 0; j < 8; ++j) { - for (i = 0; i < 8; ++i) - dest[i] = highbd_clip_pixel_add(dest[i], a1, bd); + for (i = 0; i < 8; ++i) dest[i] = highbd_clip_pixel_add(dest[i], a1, bd); dest += stride; } } @@ -1500,7 +1480,7 @@ void vp10_highbd_iadst4_c(const tran_low_t *input, tran_low_t *output, int bd) { tran_low_t x1 = input[1]; tran_low_t x2 = input[2]; tran_low_t x3 = input[3]; - (void) bd; + (void)bd; if (!(x0 | x1 | x2 | x3)) { memset(output, 0, 4 * sizeof(*output)); @@ -1542,7 +1522,7 @@ void vp10_highbd_iadst8_c(const tran_low_t *input, tran_low_t *output, int bd) { tran_low_t x5 = input[4]; tran_low_t x6 = input[1]; tran_low_t x7 = input[6]; - (void) bd; + (void)bd; if (!(x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7)) { memset(output, 0, 8 * sizeof(*output)); @@ -1550,14 +1530,14 @@ void vp10_highbd_iadst8_c(const tran_low_t *input, tran_low_t *output, int bd) { } // stage 1 - s0 = cospi_2_64 * x0 + cospi_30_64 * x1; - s1 = cospi_30_64 * x0 - cospi_2_64 * x1; + s0 = cospi_2_64 * x0 + cospi_30_64 * x1; + s1 = cospi_30_64 * x0 - cospi_2_64 * x1; s2 = cospi_10_64 * x2 + cospi_22_64 * x3; s3 = cospi_22_64 * x2 - cospi_10_64 * x3; s4 = cospi_18_64 * x4 + cospi_14_64 * x5; s5 = cospi_14_64 * x4 - cospi_18_64 * x5; - s6 = cospi_26_64 * x6 + cospi_6_64 * x7; - s7 = cospi_6_64 * x6 - cospi_26_64 * x7; + s6 = cospi_26_64 * x6 + cospi_6_64 * x7; + s7 = cospi_6_64 * x6 - cospi_26_64 * x7; x0 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s0 + s4), bd); x1 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s1 + s5), bd); @@ -1573,10 +1553,10 @@ void vp10_highbd_iadst8_c(const tran_low_t *input, tran_low_t *output, int bd) { s1 = x1; s2 = x2; s3 = x3; - s4 = cospi_8_64 * x4 + cospi_24_64 * x5; - s5 = cospi_24_64 * x4 - cospi_8_64 * x5; - s6 = -cospi_24_64 * x6 + cospi_8_64 * x7; - s7 = cospi_8_64 * x6 + cospi_24_64 * x7; + s4 = cospi_8_64 * x4 + cospi_24_64 * x5; + s5 = cospi_24_64 * x4 - cospi_8_64 * x5; + s6 = -cospi_24_64 * x6 + cospi_8_64 * x7; + s7 = cospi_8_64 * x6 + cospi_24_64 * x7; x0 = HIGHBD_WRAPLOW(s0 + s2, bd); x1 = HIGHBD_WRAPLOW(s1 + s3, bd); @@ -1609,7 +1589,7 @@ void vp10_highbd_iadst8_c(const tran_low_t *input, tran_low_t *output, int bd) { } void vp10_highbd_idct8x8_10_add_c(const tran_low_t *input, uint8_t *dest8, - int stride, int bd) { + int stride, int bd) { tran_low_t out[8 * 8] = { 0 }; tran_low_t *outptr = out; int i, j; @@ -1625,8 +1605,7 @@ void vp10_highbd_idct8x8_10_add_c(const tran_low_t *input, uint8_t *dest8, } // Then transform columns. for (i = 0; i < 8; ++i) { - for (j = 0; j < 8; ++j) - temp_in[j] = out[j * 8 + i]; + for (j = 0; j < 8; ++j) temp_in[j] = out[j * 8 + i]; vp10_highbd_idct8_c(temp_in, temp_out, bd); for (j = 0; j < 8; ++j) { dest[j * stride + i] = highbd_clip_pixel_add( @@ -1638,25 +1617,25 @@ void vp10_highbd_idct8x8_10_add_c(const tran_low_t *input, uint8_t *dest8, void vp10_highbd_idct16_c(const tran_low_t *input, tran_low_t *output, int bd) { tran_low_t step1[16], step2[16]; tran_high_t temp1, temp2; - (void) bd; + (void)bd; // stage 1 - step1[0] = input[0/2]; - step1[1] = input[16/2]; - step1[2] = input[8/2]; - step1[3] = input[24/2]; - step1[4] = input[4/2]; - step1[5] = input[20/2]; - step1[6] = input[12/2]; - step1[7] = input[28/2]; - step1[8] = input[2/2]; - step1[9] = input[18/2]; - step1[10] = input[10/2]; - step1[11] = input[26/2]; - step1[12] = input[6/2]; - step1[13] = input[22/2]; - step1[14] = input[14/2]; - step1[15] = input[30/2]; + step1[0] = input[0 / 2]; + step1[1] = input[16 / 2]; + step1[2] = input[8 / 2]; + step1[3] = input[24 / 2]; + step1[4] = input[4 / 2]; + step1[5] = input[20 / 2]; + step1[6] = input[12 / 2]; + step1[7] = input[28 / 2]; + step1[8] = input[2 / 2]; + step1[9] = input[18 / 2]; + step1[10] = input[10 / 2]; + step1[11] = input[26 / 2]; + step1[12] = input[6 / 2]; + step1[13] = input[22 / 2]; + step1[14] = input[14 / 2]; + step1[15] = input[30 / 2]; // stage 2 step2[0] = step1[0]; @@ -1802,7 +1781,7 @@ void vp10_highbd_idct16_c(const tran_low_t *input, tran_low_t *output, int bd) { } void vp10_highbd_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest8, - int stride, int bd) { + int stride, int bd) { tran_low_t out[16 * 16]; tran_low_t *outptr = out; int i, j; @@ -1818,20 +1797,16 @@ void vp10_highbd_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest8, // Then transform columns. for (i = 0; i < 16; ++i) { - for (j = 0; j < 16; ++j) - temp_in[j] = out[j * 16 + i]; + for (j = 0; j < 16; ++j) temp_in[j] = out[j * 16 + i]; vp10_highbd_idct16_c(temp_in, temp_out, bd); for (j = 0; j < 16; ++j) { dest[j * stride + i] = highbd_clip_pixel_add( - dest[j * stride + i], - ROUND_POWER_OF_TWO(temp_out[j], 6), - bd); + dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd); } } } -void vp10_highbd_iadst16_c(const tran_low_t *input, - tran_low_t *output, +void vp10_highbd_iadst16_c(const tran_low_t *input, tran_low_t *output, int bd) { tran_high_t s0, s1, s2, s3, s4, s5, s6, s7, s8; tran_high_t s9, s10, s11, s12, s13, s14, s15; @@ -1852,20 +1827,20 @@ void vp10_highbd_iadst16_c(const tran_low_t *input, tran_low_t x13 = input[12]; tran_low_t x14 = input[1]; tran_low_t x15 = input[14]; - (void) bd; + (void)bd; - if (!(x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7 | x8 - | x9 | x10 | x11 | x12 | x13 | x14 | x15)) { + if (!(x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7 | x8 | x9 | x10 | x11 | x12 | + x13 | x14 | x15)) { memset(output, 0, 16 * sizeof(*output)); return; } // stage 1 - s0 = x0 * cospi_1_64 + x1 * cospi_31_64; + s0 = x0 * cospi_1_64 + x1 * cospi_31_64; s1 = x0 * cospi_31_64 - x1 * cospi_1_64; - s2 = x2 * cospi_5_64 + x3 * cospi_27_64; + s2 = x2 * cospi_5_64 + x3 * cospi_27_64; s3 = x2 * cospi_27_64 - x3 * cospi_5_64; - s4 = x4 * cospi_9_64 + x5 * cospi_23_64; + s4 = x4 * cospi_9_64 + x5 * cospi_23_64; s5 = x4 * cospi_23_64 - x5 * cospi_9_64; s6 = x6 * cospi_13_64 + x7 * cospi_19_64; s7 = x6 * cospi_19_64 - x7 * cospi_13_64; @@ -1874,9 +1849,9 @@ void vp10_highbd_iadst16_c(const tran_low_t *input, s10 = x10 * cospi_21_64 + x11 * cospi_11_64; s11 = x10 * cospi_11_64 - x11 * cospi_21_64; s12 = x12 * cospi_25_64 + x13 * cospi_7_64; - s13 = x12 * cospi_7_64 - x13 * cospi_25_64; + s13 = x12 * cospi_7_64 - x13 * cospi_25_64; s14 = x14 * cospi_29_64 + x15 * cospi_3_64; - s15 = x14 * cospi_3_64 - x15 * cospi_29_64; + s15 = x14 * cospi_3_64 - x15 * cospi_29_64; x0 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s0 + s8), bd); x1 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s1 + s9), bd); @@ -1886,8 +1861,8 @@ void vp10_highbd_iadst16_c(const tran_low_t *input, x5 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s5 + s13), bd); x6 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s6 + s14), bd); x7 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s7 + s15), bd); - x8 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s0 - s8), bd); - x9 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s1 - s9), bd); + x8 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s0 - s8), bd); + x9 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s1 - s9), bd); x10 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s2 - s10), bd); x11 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s3 - s11), bd); x12 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s4 - s12), bd); @@ -1966,13 +1941,13 @@ void vp10_highbd_iadst16_c(const tran_low_t *input, x15 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s13 - s15), bd); // stage 4 - s2 = (- cospi_16_64) * (x2 + x3); + s2 = (-cospi_16_64) * (x2 + x3); s3 = cospi_16_64 * (x2 - x3); s6 = cospi_16_64 * (x6 + x7); s7 = cospi_16_64 * (-x6 + x7); s10 = cospi_16_64 * (x10 + x11); s11 = cospi_16_64 * (-x10 + x11); - s14 = (- cospi_16_64) * (x14 + x15); + s14 = (-cospi_16_64) * (x14 + x15); s15 = cospi_16_64 * (x14 - x15); x2 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s2), bd); @@ -2003,7 +1978,7 @@ void vp10_highbd_iadst16_c(const tran_low_t *input, } void vp10_highbd_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest8, - int stride, int bd) { + int stride, int bd) { tran_low_t out[16 * 16] = { 0 }; tran_low_t *outptr = out; int i, j; @@ -2020,8 +1995,7 @@ void vp10_highbd_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest8, // Then transform columns. for (i = 0; i < 16; ++i) { - for (j = 0; j < 16; ++j) - temp_in[j] = out[j*16 + i]; + for (j = 0; j < 16; ++j) temp_in[j] = out[j * 16 + i]; vp10_highbd_idct16_c(temp_in, temp_out, bd); for (j = 0; j < 16; ++j) { dest[j * stride + i] = highbd_clip_pixel_add( @@ -2031,27 +2005,26 @@ void vp10_highbd_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest8, } void vp10_highbd_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest8, - int stride, int bd) { + int stride, int bd) { int i, j; tran_high_t a1; - tran_low_t out = HIGHBD_WRAPLOW( - highbd_dct_const_round_shift(input[0] * cospi_16_64), bd); + tran_low_t out = + HIGHBD_WRAPLOW(highbd_dct_const_round_shift(input[0] * cospi_16_64), bd); uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); out = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64), bd); a1 = ROUND_POWER_OF_TWO(out, 6); for (j = 0; j < 16; ++j) { - for (i = 0; i < 16; ++i) - dest[i] = highbd_clip_pixel_add(dest[i], a1, bd); + for (i = 0; i < 16; ++i) dest[i] = highbd_clip_pixel_add(dest[i], a1, bd); dest += stride; } } -static void highbd_idct32_c(const tran_low_t *input, - tran_low_t *output, int bd) { +static void highbd_idct32_c(const tran_low_t *input, tran_low_t *output, + int bd) { tran_low_t step1[32], step2[32]; tran_high_t temp1, temp2; - (void) bd; + (void)bd; // stage 1 step1[0] = input[0]; @@ -2417,7 +2390,7 @@ static void highbd_idct32_c(const tran_low_t *input, } void vp10_highbd_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest8, - int stride, int bd) { + int stride, int bd) { tran_low_t out[32 * 32]; tran_low_t *outptr = out; int i, j; @@ -2427,8 +2400,7 @@ void vp10_highbd_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest8, // Rows for (i = 0; i < 32; ++i) { tran_low_t zero_coeff[16]; - for (j = 0; j < 16; ++j) - zero_coeff[j] = input[2 * j] | input[2 * j + 1]; + for (j = 0; j < 16; ++j) zero_coeff[j] = input[2 * j] | input[2 * j + 1]; for (j = 0; j < 8; ++j) zero_coeff[j] = zero_coeff[2 * j] | zero_coeff[2 * j + 1]; for (j = 0; j < 4; ++j) @@ -2446,8 +2418,7 @@ void vp10_highbd_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest8, // Columns for (i = 0; i < 32; ++i) { - for (j = 0; j < 32; ++j) - temp_in[j] = out[j * 32 + i]; + for (j = 0; j < 32; ++j) temp_in[j] = out[j * 32 + i]; highbd_idct32_c(temp_in, temp_out, bd); for (j = 0; j < 32; ++j) { dest[j * stride + i] = highbd_clip_pixel_add( @@ -2457,8 +2428,8 @@ void vp10_highbd_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest8, } void vp10_highbd_idct32x32_34_add_c(const tran_low_t *input, uint8_t *dest8, - int stride, int bd) { - tran_low_t out[32 * 32] = {0}; + int stride, int bd) { + tran_low_t out[32 * 32] = { 0 }; tran_low_t *outptr = out; int i, j; tran_low_t temp_in[32], temp_out[32]; @@ -2473,8 +2444,7 @@ void vp10_highbd_idct32x32_34_add_c(const tran_low_t *input, uint8_t *dest8, } // Columns for (i = 0; i < 32; ++i) { - for (j = 0; j < 32; ++j) - temp_in[j] = out[j * 32 + i]; + for (j = 0; j < 32; ++j) temp_in[j] = out[j * 32 + i]; highbd_idct32_c(temp_in, temp_out, bd); for (j = 0; j < 32; ++j) { dest[j * stride + i] = highbd_clip_pixel_add( @@ -2484,19 +2454,18 @@ void vp10_highbd_idct32x32_34_add_c(const tran_low_t *input, uint8_t *dest8, } void vp10_highbd_idct32x32_1_add_c(const tran_low_t *input, uint8_t *dest8, - int stride, int bd) { + int stride, int bd) { int i, j; int a1; uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); - tran_low_t out = HIGHBD_WRAPLOW( - highbd_dct_const_round_shift(input[0] * cospi_16_64), bd); + tran_low_t out = + HIGHBD_WRAPLOW(highbd_dct_const_round_shift(input[0] * cospi_16_64), bd); out = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64), bd); a1 = ROUND_POWER_OF_TWO(out, 6); for (j = 0; j < 32; ++j) { - for (i = 0; i < 32; ++i) - dest[i] = highbd_clip_pixel_add(dest[i], a1, bd); + for (i = 0; i < 32; ++i) dest[i] = highbd_clip_pixel_add(dest[i], a1, bd); dest += stride; } } diff --git a/vp10/common/vp10_inv_txfm.h b/vp10/common/vp10_inv_txfm.h index 2c29fc0c8af9f2c7cea7cc115f3bbcfca54cbd20..f2c42ac24c1b9d548a857919dee204f11f26be80 100644 --- a/vp10/common/vp10_inv_txfm.h +++ b/vp10/common/vp10_inv_txfm.h @@ -24,9 +24,9 @@ extern "C" { static INLINE tran_high_t check_range(tran_high_t input) { #if CONFIG_COEFFICIENT_RANGE_CHECKING - // For valid VP9 input streams, intermediate stage coefficients should always + // For valid input streams, intermediate stage coefficients should always // stay within the range of a signed 16 bit integer. Coefficients can go out - // of this range for invalid/corrupt VP9 streams. However, strictly checking + // of this range for invalid/corrupt streams. However, strictly checking // this range for every intermediate coefficient can burdensome for a decoder, // therefore the following assertion is only enabled when configured with // --enable-coefficient-range-checking. @@ -42,10 +42,9 @@ static INLINE tran_high_t dct_const_round_shift(tran_high_t input) { } #if CONFIG_VP9_HIGHBITDEPTH -static INLINE tran_high_t highbd_check_range(tran_high_t input, - int bd) { +static INLINE tran_high_t highbd_check_range(tran_high_t input, int bd) { #if CONFIG_COEFFICIENT_RANGE_CHECKING - // For valid highbitdepth VP9 streams, intermediate stage coefficients will + // For valid highbitdepth streams, intermediate stage coefficients will // stay within the ranges: // - 8 bit: signed 16 bit integer // - 10 bit: signed 18 bit integer @@ -54,9 +53,9 @@ static INLINE tran_high_t highbd_check_range(tran_high_t input, const int32_t int_min = -int_max - 1; assert(int_min <= input); assert(input <= int_max); - (void) int_min; + (void)int_min; #endif // CONFIG_COEFFICIENT_RANGE_CHECKING - (void) bd; + (void)bd; return input; } @@ -87,15 +86,14 @@ static INLINE tran_high_t highbd_dct_const_round_shift(tran_high_t input) { #define WRAPLOW(x) ((((int32_t)check_range(x)) << 16) >> 16) #if CONFIG_VP9_HIGHBITDEPTH #define HIGHBD_WRAPLOW(x, bd) \ - ((((int32_t)highbd_check_range((x), bd)) << (24 - bd)) >> (24 - bd)) + ((((int32_t)highbd_check_range((x), bd)) << (24 - bd)) >> (24 - bd)) #endif // CONFIG_VP9_HIGHBITDEPTH -#else // CONFIG_EMULATE_HARDWARE +#else // CONFIG_EMULATE_HARDWARE #define WRAPLOW(x) ((int32_t)check_range(x)) #if CONFIG_VP9_HIGHBITDEPTH -#define HIGHBD_WRAPLOW(x, bd) \ - ((int32_t)highbd_check_range((x), bd)) +#define HIGHBD_WRAPLOW(x, bd) ((int32_t)highbd_check_range((x), bd)) #endif // CONFIG_VP9_HIGHBITDEPTH #endif // CONFIG_EMULATE_HARDWARE diff --git a/vp10/common/vp10_inv_txfm1d.c b/vp10/common/vp10_inv_txfm1d.c index 494000f7f12fb078f618ce71be71434f0eaf6baa..7cc027f2e9ce8efdfbf3938489a623ed9bb59463 100644 --- a/vp10/common/vp10_inv_txfm1d.c +++ b/vp10/common/vp10_inv_txfm1d.c @@ -32,11 +32,11 @@ #else #define range_check(stage, input, buf, size, bit) \ { \ - (void)stage; \ - (void)input; \ - (void)buf; \ - (void)size; \ - (void)bit; \ + (void) stage; \ + (void) input; \ + (void) buf; \ + (void) size; \ + (void) bit; \ } #endif diff --git a/vp10/common/vp10_inv_txfm2d.c b/vp10/common/vp10_inv_txfm2d.c index 071419e86ed9af936fa1f2f9390c23e3d70b5e2e..ec87be887fb555e769ef96ab13db5402c81b20ae 100644 --- a/vp10/common/vp10_inv_txfm2d.c +++ b/vp10/common/vp10_inv_txfm2d.c @@ -16,61 +16,50 @@ static INLINE TxfmFunc inv_txfm_type_to_func(TXFM_TYPE txfm_type) { switch (txfm_type) { - case TXFM_TYPE_DCT4: - return vp10_idct4_new; - case TXFM_TYPE_DCT8: - return vp10_idct8_new; - case TXFM_TYPE_DCT16: - return vp10_idct16_new; - case TXFM_TYPE_DCT32: - return vp10_idct32_new; - case TXFM_TYPE_DCT64: - return vp10_idct64_new; - case TXFM_TYPE_ADST4: - return vp10_iadst4_new; - case TXFM_TYPE_ADST8: - return vp10_iadst8_new; - case TXFM_TYPE_ADST16: - return vp10_iadst16_new; - case TXFM_TYPE_ADST32: - return vp10_iadst32_new; - default: - assert(0); - return NULL; + case TXFM_TYPE_DCT4: return vp10_idct4_new; + case TXFM_TYPE_DCT8: return vp10_idct8_new; + case TXFM_TYPE_DCT16: return vp10_idct16_new; + case TXFM_TYPE_DCT32: return vp10_idct32_new; + case TXFM_TYPE_DCT64: return vp10_idct64_new; + case TXFM_TYPE_ADST4: return vp10_iadst4_new; + case TXFM_TYPE_ADST8: return vp10_iadst8_new; + case TXFM_TYPE_ADST16: return vp10_iadst16_new; + case TXFM_TYPE_ADST32: return vp10_iadst32_new; + default: assert(0); return NULL; } } #if CONFIG_EXT_TX -static const TXFM_2D_CFG* inv_txfm_cfg_ls[FLIPADST_ADST + 1][TX_SIZES] = { - {&inv_txfm_2d_cfg_dct_dct_4 , &inv_txfm_2d_cfg_dct_dct_8, - &inv_txfm_2d_cfg_dct_dct_16 , &inv_txfm_2d_cfg_dct_dct_32}, - {&inv_txfm_2d_cfg_adst_dct_4 , &inv_txfm_2d_cfg_adst_dct_8, - &inv_txfm_2d_cfg_adst_dct_16 , &inv_txfm_2d_cfg_adst_dct_32}, - {&inv_txfm_2d_cfg_dct_adst_4 , &inv_txfm_2d_cfg_dct_adst_8, - &inv_txfm_2d_cfg_dct_adst_16 , &inv_txfm_2d_cfg_dct_adst_32}, - {&inv_txfm_2d_cfg_adst_adst_4, &inv_txfm_2d_cfg_adst_adst_8, - &inv_txfm_2d_cfg_adst_adst_16, &inv_txfm_2d_cfg_adst_adst_32}, - {&inv_txfm_2d_cfg_adst_dct_4 , &inv_txfm_2d_cfg_adst_dct_8, - &inv_txfm_2d_cfg_adst_dct_16 , &inv_txfm_2d_cfg_adst_dct_32}, - {&inv_txfm_2d_cfg_dct_adst_4 , &inv_txfm_2d_cfg_dct_adst_8, - &inv_txfm_2d_cfg_dct_adst_16 , &inv_txfm_2d_cfg_dct_adst_32}, - {&inv_txfm_2d_cfg_adst_adst_4, &inv_txfm_2d_cfg_adst_adst_8, - &inv_txfm_2d_cfg_adst_adst_16, &inv_txfm_2d_cfg_adst_adst_32}, - {&inv_txfm_2d_cfg_adst_adst_4, &inv_txfm_2d_cfg_adst_adst_8, - &inv_txfm_2d_cfg_adst_adst_16, &inv_txfm_2d_cfg_adst_adst_32}, - {&inv_txfm_2d_cfg_adst_adst_4, &inv_txfm_2d_cfg_adst_adst_8, - &inv_txfm_2d_cfg_adst_adst_16, &inv_txfm_2d_cfg_adst_adst_32}, +static const TXFM_2D_CFG *inv_txfm_cfg_ls[FLIPADST_ADST + 1][TX_SIZES] = { + { &inv_txfm_2d_cfg_dct_dct_4, &inv_txfm_2d_cfg_dct_dct_8, + &inv_txfm_2d_cfg_dct_dct_16, &inv_txfm_2d_cfg_dct_dct_32 }, + { &inv_txfm_2d_cfg_adst_dct_4, &inv_txfm_2d_cfg_adst_dct_8, + &inv_txfm_2d_cfg_adst_dct_16, &inv_txfm_2d_cfg_adst_dct_32 }, + { &inv_txfm_2d_cfg_dct_adst_4, &inv_txfm_2d_cfg_dct_adst_8, + &inv_txfm_2d_cfg_dct_adst_16, &inv_txfm_2d_cfg_dct_adst_32 }, + { &inv_txfm_2d_cfg_adst_adst_4, &inv_txfm_2d_cfg_adst_adst_8, + &inv_txfm_2d_cfg_adst_adst_16, &inv_txfm_2d_cfg_adst_adst_32 }, + { &inv_txfm_2d_cfg_adst_dct_4, &inv_txfm_2d_cfg_adst_dct_8, + &inv_txfm_2d_cfg_adst_dct_16, &inv_txfm_2d_cfg_adst_dct_32 }, + { &inv_txfm_2d_cfg_dct_adst_4, &inv_txfm_2d_cfg_dct_adst_8, + &inv_txfm_2d_cfg_dct_adst_16, &inv_txfm_2d_cfg_dct_adst_32 }, + { &inv_txfm_2d_cfg_adst_adst_4, &inv_txfm_2d_cfg_adst_adst_8, + &inv_txfm_2d_cfg_adst_adst_16, &inv_txfm_2d_cfg_adst_adst_32 }, + { &inv_txfm_2d_cfg_adst_adst_4, &inv_txfm_2d_cfg_adst_adst_8, + &inv_txfm_2d_cfg_adst_adst_16, &inv_txfm_2d_cfg_adst_adst_32 }, + { &inv_txfm_2d_cfg_adst_adst_4, &inv_txfm_2d_cfg_adst_adst_8, + &inv_txfm_2d_cfg_adst_adst_16, &inv_txfm_2d_cfg_adst_adst_32 }, }; #else -static const TXFM_2D_CFG* inv_txfm_cfg_ls[TX_TYPES][TX_SIZES] = { - {&inv_txfm_2d_cfg_dct_dct_4 , &inv_txfm_2d_cfg_dct_dct_8, - &inv_txfm_2d_cfg_dct_dct_16 , &inv_txfm_2d_cfg_dct_dct_32}, - {&inv_txfm_2d_cfg_adst_dct_4 , &inv_txfm_2d_cfg_adst_dct_8, - &inv_txfm_2d_cfg_adst_dct_16 , &inv_txfm_2d_cfg_adst_dct_32}, - {&inv_txfm_2d_cfg_dct_adst_4 , &inv_txfm_2d_cfg_dct_adst_8, - &inv_txfm_2d_cfg_dct_adst_16 , &inv_txfm_2d_cfg_dct_adst_32}, - {&inv_txfm_2d_cfg_adst_adst_4, &inv_txfm_2d_cfg_adst_adst_8, - &inv_txfm_2d_cfg_adst_adst_16, &inv_txfm_2d_cfg_adst_adst_32}, +static const TXFM_2D_CFG *inv_txfm_cfg_ls[TX_TYPES][TX_SIZES] = { + { &inv_txfm_2d_cfg_dct_dct_4, &inv_txfm_2d_cfg_dct_dct_8, + &inv_txfm_2d_cfg_dct_dct_16, &inv_txfm_2d_cfg_dct_dct_32 }, + { &inv_txfm_2d_cfg_adst_dct_4, &inv_txfm_2d_cfg_adst_dct_8, + &inv_txfm_2d_cfg_adst_dct_16, &inv_txfm_2d_cfg_adst_dct_32 }, + { &inv_txfm_2d_cfg_dct_adst_4, &inv_txfm_2d_cfg_dct_adst_8, + &inv_txfm_2d_cfg_dct_adst_16, &inv_txfm_2d_cfg_dct_adst_32 }, + { &inv_txfm_2d_cfg_adst_adst_4, &inv_txfm_2d_cfg_adst_adst_8, + &inv_txfm_2d_cfg_adst_adst_16, &inv_txfm_2d_cfg_adst_adst_32 }, }; #endif @@ -82,14 +71,13 @@ TXFM_2D_FLIP_CFG vp10_get_inv_txfm_cfg(int tx_type, int tx_size) { } TXFM_2D_FLIP_CFG vp10_get_inv_txfm_64x64_cfg(int tx_type) { - TXFM_2D_FLIP_CFG cfg = {0, 0, NULL}; + TXFM_2D_FLIP_CFG cfg = { 0, 0, NULL }; switch (tx_type) { case DCT_DCT: cfg.cfg = &inv_txfm_2d_cfg_dct_dct_64; set_flip_cfg(tx_type, &cfg); break; - default: - assert(0); + default: assert(0); } return cfg; } @@ -125,8 +113,7 @@ static INLINE void inv_txfm2d_add_c(const int32_t *input, int16_t *output, // Columns for (c = 0; c < txfm_size; ++c) { if (cfg->lr_flip == 0) { - for (r = 0; r < txfm_size; ++r) - temp_in[r] = buf[r * txfm_size + c]; + for (r = 0; r < txfm_size; ++r) temp_in[r] = buf[r * txfm_size + c]; } else { // flip left right for (r = 0; r < txfm_size; ++r) @@ -135,8 +122,7 @@ static INLINE void inv_txfm2d_add_c(const int32_t *input, int16_t *output, txfm_func_col(temp_in, temp_out, cos_bit_col, stage_range_col); round_shift_array(temp_out, txfm_size, -shift[1]); if (cfg->ud_flip == 0) { - for (r = 0; r < txfm_size; ++r) - output[r * stride + c] += temp_out[r]; + for (r = 0; r < txfm_size; ++r) output[r * stride + c] += temp_out[r]; } else { // flip upside down for (r = 0; r < txfm_size; ++r) @@ -146,8 +132,7 @@ static INLINE void inv_txfm2d_add_c(const int32_t *input, int16_t *output, } void vp10_inv_txfm2d_add_4x4_c(const int32_t *input, uint16_t *output, - int stride, int tx_type, - int bd) { + int stride, int tx_type, int bd) { int txfm_buf[4 * 4 + 4 + 4]; // output contains the prediction signal which is always positive and smaller // than (1 << bd) - 1 @@ -159,8 +144,7 @@ void vp10_inv_txfm2d_add_4x4_c(const int32_t *input, uint16_t *output, } void vp10_inv_txfm2d_add_8x8_c(const int32_t *input, uint16_t *output, - int stride, int tx_type, - int bd) { + int stride, int tx_type, int bd) { int txfm_buf[8 * 8 + 8 + 8]; // output contains the prediction signal which is always positive and smaller // than (1 << bd) - 1 @@ -172,8 +156,7 @@ void vp10_inv_txfm2d_add_8x8_c(const int32_t *input, uint16_t *output, } void vp10_inv_txfm2d_add_16x16_c(const int32_t *input, uint16_t *output, - int stride, int tx_type, - int bd) { + int stride, int tx_type, int bd) { int txfm_buf[16 * 16 + 16 + 16]; // output contains the prediction signal which is always positive and smaller // than (1 << bd) - 1 @@ -185,8 +168,7 @@ void vp10_inv_txfm2d_add_16x16_c(const int32_t *input, uint16_t *output, } void vp10_inv_txfm2d_add_32x32_c(const int32_t *input, uint16_t *output, - int stride, int tx_type, - int bd) { + int stride, int tx_type, int bd) { int txfm_buf[32 * 32 + 32 + 32]; // output contains the prediction signal which is always positive and smaller // than (1 << bd) - 1 @@ -198,8 +180,7 @@ void vp10_inv_txfm2d_add_32x32_c(const int32_t *input, uint16_t *output, } void vp10_inv_txfm2d_add_64x64_c(const int32_t *input, uint16_t *output, - int stride, int tx_type, - int bd) { + int stride, int tx_type, int bd) { int txfm_buf[64 * 64 + 64 + 64]; // output contains the prediction signal which is always positive and smaller // than (1 << bd) - 1 diff --git a/vp10/common/vp10_inv_txfm2d_cfg.h b/vp10/common/vp10_inv_txfm2d_cfg.h index 9199068fca334fbc1020adfc257f5badd1e8495e..195c319708531064d6226c188531f714506fb3b8 100644 --- a/vp10/common/vp10_inv_txfm2d_cfg.h +++ b/vp10/common/vp10_inv_txfm2d_cfg.h @@ -12,392 +12,433 @@ #define VP10_INV_TXFM2D_CFG_H_ #include "vp10/common/vp10_inv_txfm1d.h" // ---------------- config inv_dct_dct_4 ---------------- -static const int8_t inv_shift_dct_dct_4[2] = {0, -4}; -static const int8_t inv_stage_range_col_dct_dct_4[4] = {18, 18, 17, 17}; -static const int8_t inv_stage_range_row_dct_dct_4[4] = {18, 18, 18, 18}; -static const int8_t inv_cos_bit_col_dct_dct_4[4] = {13, 13, 13, 13}; -static const int8_t inv_cos_bit_row_dct_dct_4[4] = {13, 13, 13, 13}; +static const int8_t inv_shift_dct_dct_4[2] = { 0, -4 }; +static const int8_t inv_stage_range_col_dct_dct_4[4] = { 18, 18, 17, 17 }; +static const int8_t inv_stage_range_row_dct_dct_4[4] = { 18, 18, 18, 18 }; +static const int8_t inv_cos_bit_col_dct_dct_4[4] = { 13, 13, 13, 13 }; +static const int8_t inv_cos_bit_row_dct_dct_4[4] = { 13, 13, 13, 13 }; static const TXFM_2D_CFG inv_txfm_2d_cfg_dct_dct_4 = { - 4, // .txfm_size - 4, // .stage_num_col - 4, // .stage_num_row - // 0, // .log_scale - inv_shift_dct_dct_4, // .shift - inv_stage_range_col_dct_dct_4, // .stage_range_col - inv_stage_range_row_dct_dct_4, // .stage_range_row - inv_cos_bit_col_dct_dct_4, // .cos_bit_col - inv_cos_bit_row_dct_dct_4, // .cos_bit_row - TXFM_TYPE_DCT4, // .txfm_type_col - TXFM_TYPE_DCT4}; // .txfm_type_row + 4, // .txfm_size + 4, // .stage_num_col + 4, // .stage_num_row + // 0, // .log_scale + inv_shift_dct_dct_4, // .shift + inv_stage_range_col_dct_dct_4, // .stage_range_col + inv_stage_range_row_dct_dct_4, // .stage_range_row + inv_cos_bit_col_dct_dct_4, // .cos_bit_col + inv_cos_bit_row_dct_dct_4, // .cos_bit_row + TXFM_TYPE_DCT4, // .txfm_type_col + TXFM_TYPE_DCT4 +}; // .txfm_type_row // ---------------- config inv_dct_dct_8 ---------------- -static const int8_t inv_shift_dct_dct_8[2] = {0, -5}; -static const int8_t inv_stage_range_col_dct_dct_8[6] = {19, 19, 19, 19, 18, 18}; -static const int8_t inv_stage_range_row_dct_dct_8[6] = {19, 19, 19, 19, 19, 19}; -static const int8_t inv_cos_bit_col_dct_dct_8[6] = {13, 13, 13, 13, 13, 13}; -static const int8_t inv_cos_bit_row_dct_dct_8[6] = {13, 13, 13, 13, 13, 13}; +static const int8_t inv_shift_dct_dct_8[2] = { 0, -5 }; +static const int8_t inv_stage_range_col_dct_dct_8[6] = { + 19, 19, 19, 19, 18, 18 +}; +static const int8_t inv_stage_range_row_dct_dct_8[6] = { + 19, 19, 19, 19, 19, 19 +}; +static const int8_t inv_cos_bit_col_dct_dct_8[6] = { 13, 13, 13, 13, 13, 13 }; +static const int8_t inv_cos_bit_row_dct_dct_8[6] = { 13, 13, 13, 13, 13, 13 }; static const TXFM_2D_CFG inv_txfm_2d_cfg_dct_dct_8 = { - 8, // .txfm_size - 6, // .stage_num_col - 6, // .stage_num_row - // 0, // .log_scale - inv_shift_dct_dct_8, // .shift - inv_stage_range_col_dct_dct_8, // .stage_range_col - inv_stage_range_row_dct_dct_8, // .stage_range_row - inv_cos_bit_col_dct_dct_8, // .cos_bit_col - inv_cos_bit_row_dct_dct_8, // .cos_bit_row - TXFM_TYPE_DCT8, // .txfm_type_col - TXFM_TYPE_DCT8}; // .txfm_type_row + 8, // .txfm_size + 6, // .stage_num_col + 6, // .stage_num_row + // 0, // .log_scale + inv_shift_dct_dct_8, // .shift + inv_stage_range_col_dct_dct_8, // .stage_range_col + inv_stage_range_row_dct_dct_8, // .stage_range_row + inv_cos_bit_col_dct_dct_8, // .cos_bit_col + inv_cos_bit_row_dct_dct_8, // .cos_bit_row + TXFM_TYPE_DCT8, // .txfm_type_col + TXFM_TYPE_DCT8 +}; // .txfm_type_row // ---------------- config inv_dct_dct_16 ---------------- -static const int8_t inv_shift_dct_dct_16[2] = {-1, -5}; -static const int8_t inv_stage_range_col_dct_dct_16[8] = {19, 19, 19, 19, - 19, 19, 18, 18}; -static const int8_t inv_stage_range_row_dct_dct_16[8] = {20, 20, 20, 20, - 20, 20, 20, 20}; -static const int8_t inv_cos_bit_col_dct_dct_16[8] = {13, 13, 13, 13, - 13, 13, 13, 13}; -static const int8_t inv_cos_bit_row_dct_dct_16[8] = {12, 12, 12, 12, - 12, 12, 12, 12}; +static const int8_t inv_shift_dct_dct_16[2] = { -1, -5 }; +static const int8_t inv_stage_range_col_dct_dct_16[8] = { 19, 19, 19, 19, + 19, 19, 18, 18 }; +static const int8_t inv_stage_range_row_dct_dct_16[8] = { 20, 20, 20, 20, + 20, 20, 20, 20 }; +static const int8_t inv_cos_bit_col_dct_dct_16[8] = { 13, 13, 13, 13, + 13, 13, 13, 13 }; +static const int8_t inv_cos_bit_row_dct_dct_16[8] = { 12, 12, 12, 12, + 12, 12, 12, 12 }; static const TXFM_2D_CFG inv_txfm_2d_cfg_dct_dct_16 = { - 16, // .txfm_size - 8, // .stage_num_col - 8, // .stage_num_row - // 0, // .log_scale - inv_shift_dct_dct_16, // .shift - inv_stage_range_col_dct_dct_16, // .stage_range_col - inv_stage_range_row_dct_dct_16, // .stage_range_row - inv_cos_bit_col_dct_dct_16, // .cos_bit_col - inv_cos_bit_row_dct_dct_16, // .cos_bit_row - TXFM_TYPE_DCT16, // .txfm_type_col - TXFM_TYPE_DCT16}; // .txfm_type_row + 16, // .txfm_size + 8, // .stage_num_col + 8, // .stage_num_row + // 0, // .log_scale + inv_shift_dct_dct_16, // .shift + inv_stage_range_col_dct_dct_16, // .stage_range_col + inv_stage_range_row_dct_dct_16, // .stage_range_row + inv_cos_bit_col_dct_dct_16, // .cos_bit_col + inv_cos_bit_row_dct_dct_16, // .cos_bit_row + TXFM_TYPE_DCT16, // .txfm_type_col + TXFM_TYPE_DCT16 +}; // .txfm_type_row // ---------------- config inv_dct_dct_32 ---------------- -static const int8_t inv_shift_dct_dct_32[2] = {-1, -5}; -static const int8_t inv_stage_range_col_dct_dct_32[10] = {19, 19, 19, 19, 19, - 19, 19, 19, 18, 18}; -static const int8_t inv_stage_range_row_dct_dct_32[10] = {20, 20, 20, 20, 20, - 20, 20, 20, 20, 20}; -static const int8_t inv_cos_bit_col_dct_dct_32[10] = {13, 13, 13, 13, 13, - 13, 13, 13, 13, 13}; -static const int8_t inv_cos_bit_row_dct_dct_32[10] = {12, 12, 12, 12, 12, - 12, 12, 12, 12, 12}; +static const int8_t inv_shift_dct_dct_32[2] = { -1, -5 }; +static const int8_t inv_stage_range_col_dct_dct_32[10] = { 19, 19, 19, 19, 19, + 19, 19, 19, 18, 18 }; +static const int8_t inv_stage_range_row_dct_dct_32[10] = { 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20 }; +static const int8_t inv_cos_bit_col_dct_dct_32[10] = { 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13 }; +static const int8_t inv_cos_bit_row_dct_dct_32[10] = { 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12 }; static const TXFM_2D_CFG inv_txfm_2d_cfg_dct_dct_32 = { - 32, // .txfm_size - 10, // .stage_num_col - 10, // .stage_num_row - // 1, // .log_scale - inv_shift_dct_dct_32, // .shift - inv_stage_range_col_dct_dct_32, // .stage_range_col - inv_stage_range_row_dct_dct_32, // .stage_range_row - inv_cos_bit_col_dct_dct_32, // .cos_bit_col - inv_cos_bit_row_dct_dct_32, // .cos_bit_row - TXFM_TYPE_DCT32, // .txfm_type_col - TXFM_TYPE_DCT32}; // .txfm_type_row + 32, // .txfm_size + 10, // .stage_num_col + 10, // .stage_num_row + // 1, // .log_scale + inv_shift_dct_dct_32, // .shift + inv_stage_range_col_dct_dct_32, // .stage_range_col + inv_stage_range_row_dct_dct_32, // .stage_range_row + inv_cos_bit_col_dct_dct_32, // .cos_bit_col + inv_cos_bit_row_dct_dct_32, // .cos_bit_row + TXFM_TYPE_DCT32, // .txfm_type_col + TXFM_TYPE_DCT32 +}; // .txfm_type_row // ---------------- config inv_dct_dct_64 ---------------- -static const int8_t inv_shift_dct_dct_64[2] = {-1, -7}; +static const int8_t inv_shift_dct_dct_64[2] = { -1, -7 }; static const int8_t inv_stage_range_col_dct_dct_64[12] = { - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 18, 18}; + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 18, 18 +}; static const int8_t inv_stage_range_row_dct_dct_64[12] = { - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20}; -static const int8_t inv_cos_bit_col_dct_dct_64[12] = {13, 13, 13, 13, 13, 13, - 13, 13, 13, 13, 13, 13}; -static const int8_t inv_cos_bit_row_dct_dct_64[12] = {12, 12, 12, 12, 12, 12, - 12, 12, 12, 12, 12, 12}; + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20 +}; +static const int8_t inv_cos_bit_col_dct_dct_64[12] = { 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13 }; +static const int8_t inv_cos_bit_row_dct_dct_64[12] = { 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12 }; static const TXFM_2D_CFG inv_txfm_2d_cfg_dct_dct_64 = { - 64, // .txfm_size - 12, // .stage_num_col - 12, // .stage_num_row - inv_shift_dct_dct_64, // .shift - inv_stage_range_col_dct_dct_64, // .stage_range_col - inv_stage_range_row_dct_dct_64, // .stage_range_row - inv_cos_bit_col_dct_dct_64, // .cos_bit_col - inv_cos_bit_row_dct_dct_64, // .cos_bit_row - TXFM_TYPE_DCT64, // .txfm_type_col - TXFM_TYPE_DCT64}; // .txfm_type_row + 64, // .txfm_size + 12, // .stage_num_col + 12, // .stage_num_row + inv_shift_dct_dct_64, // .shift + inv_stage_range_col_dct_dct_64, // .stage_range_col + inv_stage_range_row_dct_dct_64, // .stage_range_row + inv_cos_bit_col_dct_dct_64, // .cos_bit_col + inv_cos_bit_row_dct_dct_64, // .cos_bit_row + TXFM_TYPE_DCT64, // .txfm_type_col + TXFM_TYPE_DCT64 +}; // .txfm_type_row // ---------------- config inv_dct_adst_4 ---------------- -static const int8_t inv_shift_dct_adst_4[2] = {0, -4}; -static const int8_t inv_stage_range_col_dct_adst_4[4] = {18, 18, 17, 17}; -static const int8_t inv_stage_range_row_dct_adst_4[6] = {18, 18, 18, - 18, 18, 18}; -static const int8_t inv_cos_bit_col_dct_adst_4[4] = {13, 13, 13, 13}; -static const int8_t inv_cos_bit_row_dct_adst_4[6] = {13, 13, 13, 13, 13, 13}; +static const int8_t inv_shift_dct_adst_4[2] = { 0, -4 }; +static const int8_t inv_stage_range_col_dct_adst_4[4] = { 18, 18, 17, 17 }; +static const int8_t inv_stage_range_row_dct_adst_4[6] = { + 18, 18, 18, 18, 18, 18 +}; +static const int8_t inv_cos_bit_col_dct_adst_4[4] = { 13, 13, 13, 13 }; +static const int8_t inv_cos_bit_row_dct_adst_4[6] = { 13, 13, 13, 13, 13, 13 }; static const TXFM_2D_CFG inv_txfm_2d_cfg_dct_adst_4 = { - 4, // .txfm_size - 4, // .stage_num_col - 6, // .stage_num_row - // 0, // .log_scale - inv_shift_dct_adst_4, // .shift - inv_stage_range_col_dct_adst_4, // .stage_range_col - inv_stage_range_row_dct_adst_4, // .stage_range_row - inv_cos_bit_col_dct_adst_4, // .cos_bit_col - inv_cos_bit_row_dct_adst_4, // .cos_bit_row - TXFM_TYPE_DCT4, // .txfm_type_col - TXFM_TYPE_ADST4}; // .txfm_type_row + 4, // .txfm_size + 4, // .stage_num_col + 6, // .stage_num_row + // 0, // .log_scale + inv_shift_dct_adst_4, // .shift + inv_stage_range_col_dct_adst_4, // .stage_range_col + inv_stage_range_row_dct_adst_4, // .stage_range_row + inv_cos_bit_col_dct_adst_4, // .cos_bit_col + inv_cos_bit_row_dct_adst_4, // .cos_bit_row + TXFM_TYPE_DCT4, // .txfm_type_col + TXFM_TYPE_ADST4 +}; // .txfm_type_row // ---------------- config inv_dct_adst_8 ---------------- -static const int8_t inv_shift_dct_adst_8[2] = {0, -5}; -static const int8_t inv_stage_range_col_dct_adst_8[6] = {19, 19, 19, - 19, 18, 18}; -static const int8_t inv_stage_range_row_dct_adst_8[8] = {19, 19, 19, 19, - 19, 19, 19, 19}; -static const int8_t inv_cos_bit_col_dct_adst_8[6] = {13, 13, 13, 13, 13, 13}; -static const int8_t inv_cos_bit_row_dct_adst_8[8] = {13, 13, 13, 13, - 13, 13, 13, 13}; +static const int8_t inv_shift_dct_adst_8[2] = { 0, -5 }; +static const int8_t inv_stage_range_col_dct_adst_8[6] = { + 19, 19, 19, 19, 18, 18 +}; +static const int8_t inv_stage_range_row_dct_adst_8[8] = { 19, 19, 19, 19, + 19, 19, 19, 19 }; +static const int8_t inv_cos_bit_col_dct_adst_8[6] = { 13, 13, 13, 13, 13, 13 }; +static const int8_t inv_cos_bit_row_dct_adst_8[8] = { 13, 13, 13, 13, + 13, 13, 13, 13 }; static const TXFM_2D_CFG inv_txfm_2d_cfg_dct_adst_8 = { - 8, // .txfm_size - 6, // .stage_num_col - 8, // .stage_num_row - // 0, // .log_scale - inv_shift_dct_adst_8, // .shift - inv_stage_range_col_dct_adst_8, // .stage_range_col - inv_stage_range_row_dct_adst_8, // .stage_range_row - inv_cos_bit_col_dct_adst_8, // .cos_bit_col - inv_cos_bit_row_dct_adst_8, // .cos_bit_row - TXFM_TYPE_DCT8, // .txfm_type_col - TXFM_TYPE_ADST8}; // .txfm_type_row + 8, // .txfm_size + 6, // .stage_num_col + 8, // .stage_num_row + // 0, // .log_scale + inv_shift_dct_adst_8, // .shift + inv_stage_range_col_dct_adst_8, // .stage_range_col + inv_stage_range_row_dct_adst_8, // .stage_range_row + inv_cos_bit_col_dct_adst_8, // .cos_bit_col + inv_cos_bit_row_dct_adst_8, // .cos_bit_row + TXFM_TYPE_DCT8, // .txfm_type_col + TXFM_TYPE_ADST8 +}; // .txfm_type_row // ---------------- config inv_dct_adst_16 ---------------- -static const int8_t inv_shift_dct_adst_16[2] = {-1, -5}; -static const int8_t inv_stage_range_col_dct_adst_16[8] = {19, 19, 19, 19, - 19, 19, 18, 18}; -static const int8_t inv_stage_range_row_dct_adst_16[10] = {20, 20, 20, 20, 20, - 20, 20, 20, 20, 20}; -static const int8_t inv_cos_bit_col_dct_adst_16[8] = {13, 13, 13, 13, - 13, 13, 13, 13}; -static const int8_t inv_cos_bit_row_dct_adst_16[10] = {12, 12, 12, 12, 12, - 12, 12, 12, 12, 12}; +static const int8_t inv_shift_dct_adst_16[2] = { -1, -5 }; +static const int8_t inv_stage_range_col_dct_adst_16[8] = { 19, 19, 19, 19, + 19, 19, 18, 18 }; +static const int8_t inv_stage_range_row_dct_adst_16[10] = { + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20 +}; +static const int8_t inv_cos_bit_col_dct_adst_16[8] = { 13, 13, 13, 13, + 13, 13, 13, 13 }; +static const int8_t inv_cos_bit_row_dct_adst_16[10] = { 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12 }; static const TXFM_2D_CFG inv_txfm_2d_cfg_dct_adst_16 = { - 16, // .txfm_size - 8, // .stage_num_col - 10, // .stage_num_row - // 0, // .log_scale - inv_shift_dct_adst_16, // .shift - inv_stage_range_col_dct_adst_16, // .stage_range_col - inv_stage_range_row_dct_adst_16, // .stage_range_row - inv_cos_bit_col_dct_adst_16, // .cos_bit_col - inv_cos_bit_row_dct_adst_16, // .cos_bit_row - TXFM_TYPE_DCT16, // .txfm_type_col - TXFM_TYPE_ADST16}; // .txfm_type_row + 16, // .txfm_size + 8, // .stage_num_col + 10, // .stage_num_row + // 0, // .log_scale + inv_shift_dct_adst_16, // .shift + inv_stage_range_col_dct_adst_16, // .stage_range_col + inv_stage_range_row_dct_adst_16, // .stage_range_row + inv_cos_bit_col_dct_adst_16, // .cos_bit_col + inv_cos_bit_row_dct_adst_16, // .cos_bit_row + TXFM_TYPE_DCT16, // .txfm_type_col + TXFM_TYPE_ADST16 +}; // .txfm_type_row // ---------------- config inv_dct_adst_32 ---------------- -static const int8_t inv_shift_dct_adst_32[2] = {-1, -5}; -static const int8_t inv_stage_range_col_dct_adst_32[10] = {19, 19, 19, 19, 19, - 19, 19, 19, 18, 18}; +static const int8_t inv_shift_dct_adst_32[2] = { -1, -5 }; +static const int8_t inv_stage_range_col_dct_adst_32[10] = { + 19, 19, 19, 19, 19, 19, 19, 19, 18, 18 +}; static const int8_t inv_stage_range_row_dct_adst_32[12] = { - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20}; -static const int8_t inv_cos_bit_col_dct_adst_32[10] = {13, 13, 13, 13, 13, - 13, 13, 13, 13, 13}; -static const int8_t inv_cos_bit_row_dct_adst_32[12] = {12, 12, 12, 12, 12, 12, - 12, 12, 12, 12, 12, 12}; + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20 +}; +static const int8_t inv_cos_bit_col_dct_adst_32[10] = { 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13 }; +static const int8_t inv_cos_bit_row_dct_adst_32[12] = { + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12 +}; static const TXFM_2D_CFG inv_txfm_2d_cfg_dct_adst_32 = { - 32, // .txfm_size - 10, // .stage_num_col - 12, // .stage_num_row - // 1, // .log_scale - inv_shift_dct_adst_32, // .shift - inv_stage_range_col_dct_adst_32, // .stage_range_col - inv_stage_range_row_dct_adst_32, // .stage_range_row - inv_cos_bit_col_dct_adst_32, // .cos_bit_col - inv_cos_bit_row_dct_adst_32, // .cos_bit_row - TXFM_TYPE_DCT32, // .txfm_type_col - TXFM_TYPE_ADST32}; // .txfm_type_row + 32, // .txfm_size + 10, // .stage_num_col + 12, // .stage_num_row + // 1, // .log_scale + inv_shift_dct_adst_32, // .shift + inv_stage_range_col_dct_adst_32, // .stage_range_col + inv_stage_range_row_dct_adst_32, // .stage_range_row + inv_cos_bit_col_dct_adst_32, // .cos_bit_col + inv_cos_bit_row_dct_adst_32, // .cos_bit_row + TXFM_TYPE_DCT32, // .txfm_type_col + TXFM_TYPE_ADST32 +}; // .txfm_type_row // ---------------- config inv_adst_adst_4 ---------------- -static const int8_t inv_shift_adst_adst_4[2] = {0, -4}; -static const int8_t inv_stage_range_col_adst_adst_4[6] = {18, 18, 18, - 18, 17, 17}; -static const int8_t inv_stage_range_row_adst_adst_4[6] = {18, 18, 18, - 18, 18, 18}; -static const int8_t inv_cos_bit_col_adst_adst_4[6] = {13, 13, 13, 13, 13, 13}; -static const int8_t inv_cos_bit_row_adst_adst_4[6] = {13, 13, 13, 13, 13, 13}; +static const int8_t inv_shift_adst_adst_4[2] = { 0, -4 }; +static const int8_t inv_stage_range_col_adst_adst_4[6] = { 18, 18, 18, + 18, 17, 17 }; +static const int8_t inv_stage_range_row_adst_adst_4[6] = { 18, 18, 18, + 18, 18, 18 }; +static const int8_t inv_cos_bit_col_adst_adst_4[6] = { 13, 13, 13, 13, 13, 13 }; +static const int8_t inv_cos_bit_row_adst_adst_4[6] = { 13, 13, 13, 13, 13, 13 }; static const TXFM_2D_CFG inv_txfm_2d_cfg_adst_adst_4 = { - 4, // .txfm_size - 6, // .stage_num_col - 6, // .stage_num_row - // 0, // .log_scale - inv_shift_adst_adst_4, // .shift - inv_stage_range_col_adst_adst_4, // .stage_range_col - inv_stage_range_row_adst_adst_4, // .stage_range_row - inv_cos_bit_col_adst_adst_4, // .cos_bit_col - inv_cos_bit_row_adst_adst_4, // .cos_bit_row - TXFM_TYPE_ADST4, // .txfm_type_col - TXFM_TYPE_ADST4}; // .txfm_type_row + 4, // .txfm_size + 6, // .stage_num_col + 6, // .stage_num_row + // 0, // .log_scale + inv_shift_adst_adst_4, // .shift + inv_stage_range_col_adst_adst_4, // .stage_range_col + inv_stage_range_row_adst_adst_4, // .stage_range_row + inv_cos_bit_col_adst_adst_4, // .cos_bit_col + inv_cos_bit_row_adst_adst_4, // .cos_bit_row + TXFM_TYPE_ADST4, // .txfm_type_col + TXFM_TYPE_ADST4 +}; // .txfm_type_row // ---------------- config inv_adst_adst_8 ---------------- -static const int8_t inv_shift_adst_adst_8[2] = {0, -5}; -static const int8_t inv_stage_range_col_adst_adst_8[8] = {19, 19, 19, 19, - 19, 19, 18, 18}; -static const int8_t inv_stage_range_row_adst_adst_8[8] = {19, 19, 19, 19, - 19, 19, 19, 19}; -static const int8_t inv_cos_bit_col_adst_adst_8[8] = {13, 13, 13, 13, - 13, 13, 13, 13}; -static const int8_t inv_cos_bit_row_adst_adst_8[8] = {13, 13, 13, 13, - 13, 13, 13, 13}; +static const int8_t inv_shift_adst_adst_8[2] = { 0, -5 }; +static const int8_t inv_stage_range_col_adst_adst_8[8] = { 19, 19, 19, 19, + 19, 19, 18, 18 }; +static const int8_t inv_stage_range_row_adst_adst_8[8] = { 19, 19, 19, 19, + 19, 19, 19, 19 }; +static const int8_t inv_cos_bit_col_adst_adst_8[8] = { 13, 13, 13, 13, + 13, 13, 13, 13 }; +static const int8_t inv_cos_bit_row_adst_adst_8[8] = { 13, 13, 13, 13, + 13, 13, 13, 13 }; static const TXFM_2D_CFG inv_txfm_2d_cfg_adst_adst_8 = { - 8, // .txfm_size - 8, // .stage_num_col - 8, // .stage_num_row - // 0, // .log_scale - inv_shift_adst_adst_8, // .shift - inv_stage_range_col_adst_adst_8, // .stage_range_col - inv_stage_range_row_adst_adst_8, // .stage_range_row - inv_cos_bit_col_adst_adst_8, // .cos_bit_col - inv_cos_bit_row_adst_adst_8, // .cos_bit_row - TXFM_TYPE_ADST8, // .txfm_type_col - TXFM_TYPE_ADST8}; // .txfm_type_row + 8, // .txfm_size + 8, // .stage_num_col + 8, // .stage_num_row + // 0, // .log_scale + inv_shift_adst_adst_8, // .shift + inv_stage_range_col_adst_adst_8, // .stage_range_col + inv_stage_range_row_adst_adst_8, // .stage_range_row + inv_cos_bit_col_adst_adst_8, // .cos_bit_col + inv_cos_bit_row_adst_adst_8, // .cos_bit_row + TXFM_TYPE_ADST8, // .txfm_type_col + TXFM_TYPE_ADST8 +}; // .txfm_type_row // ---------------- config inv_adst_adst_16 ---------------- -static const int8_t inv_shift_adst_adst_16[2] = {-1, -5}; -static const int8_t inv_stage_range_col_adst_adst_16[10] = {19, 19, 19, 19, 19, - 19, 19, 19, 18, 18}; -static const int8_t inv_stage_range_row_adst_adst_16[10] = {20, 20, 20, 20, 20, - 20, 20, 20, 20, 20}; -static const int8_t inv_cos_bit_col_adst_adst_16[10] = {13, 13, 13, 13, 13, - 13, 13, 13, 13, 13}; -static const int8_t inv_cos_bit_row_adst_adst_16[10] = {12, 12, 12, 12, 12, - 12, 12, 12, 12, 12}; +static const int8_t inv_shift_adst_adst_16[2] = { -1, -5 }; +static const int8_t inv_stage_range_col_adst_adst_16[10] = { + 19, 19, 19, 19, 19, 19, 19, 19, 18, 18 +}; +static const int8_t inv_stage_range_row_adst_adst_16[10] = { + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20 +}; +static const int8_t inv_cos_bit_col_adst_adst_16[10] = { 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13 }; +static const int8_t inv_cos_bit_row_adst_adst_16[10] = { 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12 }; static const TXFM_2D_CFG inv_txfm_2d_cfg_adst_adst_16 = { - 16, // .txfm_size - 10, // .stage_num_col - 10, // .stage_num_row - // 0, // .log_scale - inv_shift_adst_adst_16, // .shift - inv_stage_range_col_adst_adst_16, // .stage_range_col - inv_stage_range_row_adst_adst_16, // .stage_range_row - inv_cos_bit_col_adst_adst_16, // .cos_bit_col - inv_cos_bit_row_adst_adst_16, // .cos_bit_row - TXFM_TYPE_ADST16, // .txfm_type_col - TXFM_TYPE_ADST16}; // .txfm_type_row + 16, // .txfm_size + 10, // .stage_num_col + 10, // .stage_num_row + // 0, // .log_scale + inv_shift_adst_adst_16, // .shift + inv_stage_range_col_adst_adst_16, // .stage_range_col + inv_stage_range_row_adst_adst_16, // .stage_range_row + inv_cos_bit_col_adst_adst_16, // .cos_bit_col + inv_cos_bit_row_adst_adst_16, // .cos_bit_row + TXFM_TYPE_ADST16, // .txfm_type_col + TXFM_TYPE_ADST16 +}; // .txfm_type_row // ---------------- config inv_adst_adst_32 ---------------- -static const int8_t inv_shift_adst_adst_32[2] = {-1, -5}; +static const int8_t inv_shift_adst_adst_32[2] = { -1, -5 }; static const int8_t inv_stage_range_col_adst_adst_32[12] = { - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 18, 18}; + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 18, 18 +}; static const int8_t inv_stage_range_row_adst_adst_32[12] = { - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20}; -static const int8_t inv_cos_bit_col_adst_adst_32[12] = {13, 13, 13, 13, 13, 13, - 13, 13, 13, 13, 13, 13}; -static const int8_t inv_cos_bit_row_adst_adst_32[12] = {12, 12, 12, 12, 12, 12, - 12, 12, 12, 12, 12, 12}; + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20 +}; +static const int8_t inv_cos_bit_col_adst_adst_32[12] = { + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13 +}; +static const int8_t inv_cos_bit_row_adst_adst_32[12] = { + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12 +}; static const TXFM_2D_CFG inv_txfm_2d_cfg_adst_adst_32 = { - 32, // .txfm_size - 12, // .stage_num_col - 12, // .stage_num_row - // 1, // .log_scale - inv_shift_adst_adst_32, // .shift - inv_stage_range_col_adst_adst_32, // .stage_range_col - inv_stage_range_row_adst_adst_32, // .stage_range_row - inv_cos_bit_col_adst_adst_32, // .cos_bit_col - inv_cos_bit_row_adst_adst_32, // .cos_bit_row - TXFM_TYPE_ADST32, // .txfm_type_col - TXFM_TYPE_ADST32}; // .txfm_type_row + 32, // .txfm_size + 12, // .stage_num_col + 12, // .stage_num_row + // 1, // .log_scale + inv_shift_adst_adst_32, // .shift + inv_stage_range_col_adst_adst_32, // .stage_range_col + inv_stage_range_row_adst_adst_32, // .stage_range_row + inv_cos_bit_col_adst_adst_32, // .cos_bit_col + inv_cos_bit_row_adst_adst_32, // .cos_bit_row + TXFM_TYPE_ADST32, // .txfm_type_col + TXFM_TYPE_ADST32 +}; // .txfm_type_row // ---------------- config inv_adst_dct_4 ---------------- -static const int8_t inv_shift_adst_dct_4[2] = {0, -4}; -static const int8_t inv_stage_range_col_adst_dct_4[6] = {18, 18, 18, - 18, 17, 17}; -static const int8_t inv_stage_range_row_adst_dct_4[4] = {18, 18, 18, 18}; -static const int8_t inv_cos_bit_col_adst_dct_4[6] = {13, 13, 13, 13, 13, 13}; -static const int8_t inv_cos_bit_row_adst_dct_4[4] = {13, 13, 13, 13}; +static const int8_t inv_shift_adst_dct_4[2] = { 0, -4 }; +static const int8_t inv_stage_range_col_adst_dct_4[6] = { + 18, 18, 18, 18, 17, 17 +}; +static const int8_t inv_stage_range_row_adst_dct_4[4] = { 18, 18, 18, 18 }; +static const int8_t inv_cos_bit_col_adst_dct_4[6] = { 13, 13, 13, 13, 13, 13 }; +static const int8_t inv_cos_bit_row_adst_dct_4[4] = { 13, 13, 13, 13 }; static const TXFM_2D_CFG inv_txfm_2d_cfg_adst_dct_4 = { - 4, // .txfm_size - 6, // .stage_num_col - 4, // .stage_num_row - // 0, // .log_scale - inv_shift_adst_dct_4, // .shift - inv_stage_range_col_adst_dct_4, // .stage_range_col - inv_stage_range_row_adst_dct_4, // .stage_range_row - inv_cos_bit_col_adst_dct_4, // .cos_bit_col - inv_cos_bit_row_adst_dct_4, // .cos_bit_row - TXFM_TYPE_ADST4, // .txfm_type_col - TXFM_TYPE_DCT4}; // .txfm_type_row + 4, // .txfm_size + 6, // .stage_num_col + 4, // .stage_num_row + // 0, // .log_scale + inv_shift_adst_dct_4, // .shift + inv_stage_range_col_adst_dct_4, // .stage_range_col + inv_stage_range_row_adst_dct_4, // .stage_range_row + inv_cos_bit_col_adst_dct_4, // .cos_bit_col + inv_cos_bit_row_adst_dct_4, // .cos_bit_row + TXFM_TYPE_ADST4, // .txfm_type_col + TXFM_TYPE_DCT4 +}; // .txfm_type_row // ---------------- config inv_adst_dct_8 ---------------- -static const int8_t inv_shift_adst_dct_8[2] = {0, -5}; -static const int8_t inv_stage_range_col_adst_dct_8[8] = {19, 19, 19, 19, - 19, 19, 18, 18}; -static const int8_t inv_stage_range_row_adst_dct_8[6] = {19, 19, 19, - 19, 19, 19}; -static const int8_t inv_cos_bit_col_adst_dct_8[8] = {13, 13, 13, 13, - 13, 13, 13, 13}; -static const int8_t inv_cos_bit_row_adst_dct_8[6] = {13, 13, 13, 13, 13, 13}; +static const int8_t inv_shift_adst_dct_8[2] = { 0, -5 }; +static const int8_t inv_stage_range_col_adst_dct_8[8] = { 19, 19, 19, 19, + 19, 19, 18, 18 }; +static const int8_t inv_stage_range_row_adst_dct_8[6] = { + 19, 19, 19, 19, 19, 19 +}; +static const int8_t inv_cos_bit_col_adst_dct_8[8] = { 13, 13, 13, 13, + 13, 13, 13, 13 }; +static const int8_t inv_cos_bit_row_adst_dct_8[6] = { 13, 13, 13, 13, 13, 13 }; static const TXFM_2D_CFG inv_txfm_2d_cfg_adst_dct_8 = { - 8, // .txfm_size - 8, // .stage_num_col - 6, // .stage_num_row - // 0, // .log_scale - inv_shift_adst_dct_8, // .shift - inv_stage_range_col_adst_dct_8, // .stage_range_col - inv_stage_range_row_adst_dct_8, // .stage_range_row - inv_cos_bit_col_adst_dct_8, // .cos_bit_col - inv_cos_bit_row_adst_dct_8, // .cos_bit_row - TXFM_TYPE_ADST8, // .txfm_type_col - TXFM_TYPE_DCT8}; // .txfm_type_row + 8, // .txfm_size + 8, // .stage_num_col + 6, // .stage_num_row + // 0, // .log_scale + inv_shift_adst_dct_8, // .shift + inv_stage_range_col_adst_dct_8, // .stage_range_col + inv_stage_range_row_adst_dct_8, // .stage_range_row + inv_cos_bit_col_adst_dct_8, // .cos_bit_col + inv_cos_bit_row_adst_dct_8, // .cos_bit_row + TXFM_TYPE_ADST8, // .txfm_type_col + TXFM_TYPE_DCT8 +}; // .txfm_type_row // ---------------- config inv_adst_dct_16 ---------------- -static const int8_t inv_shift_adst_dct_16[2] = {-1, -5}; -static const int8_t inv_stage_range_col_adst_dct_16[10] = {19, 19, 19, 19, 19, - 19, 19, 19, 18, 18}; -static const int8_t inv_stage_range_row_adst_dct_16[8] = {20, 20, 20, 20, - 20, 20, 20, 20}; -static const int8_t inv_cos_bit_col_adst_dct_16[10] = {13, 13, 13, 13, 13, - 13, 13, 13, 13, 13}; -static const int8_t inv_cos_bit_row_adst_dct_16[8] = {12, 12, 12, 12, - 12, 12, 12, 12}; +static const int8_t inv_shift_adst_dct_16[2] = { -1, -5 }; +static const int8_t inv_stage_range_col_adst_dct_16[10] = { + 19, 19, 19, 19, 19, 19, 19, 19, 18, 18 +}; +static const int8_t inv_stage_range_row_adst_dct_16[8] = { 20, 20, 20, 20, + 20, 20, 20, 20 }; +static const int8_t inv_cos_bit_col_adst_dct_16[10] = { 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13 }; +static const int8_t inv_cos_bit_row_adst_dct_16[8] = { 12, 12, 12, 12, + 12, 12, 12, 12 }; static const TXFM_2D_CFG inv_txfm_2d_cfg_adst_dct_16 = { - 16, // .txfm_size - 10, // .stage_num_col - 8, // .stage_num_row - // 0, // .log_scale - inv_shift_adst_dct_16, // .shift - inv_stage_range_col_adst_dct_16, // .stage_range_col - inv_stage_range_row_adst_dct_16, // .stage_range_row - inv_cos_bit_col_adst_dct_16, // .cos_bit_col - inv_cos_bit_row_adst_dct_16, // .cos_bit_row - TXFM_TYPE_ADST16, // .txfm_type_col - TXFM_TYPE_DCT16}; // .txfm_type_row + 16, // .txfm_size + 10, // .stage_num_col + 8, // .stage_num_row + // 0, // .log_scale + inv_shift_adst_dct_16, // .shift + inv_stage_range_col_adst_dct_16, // .stage_range_col + inv_stage_range_row_adst_dct_16, // .stage_range_row + inv_cos_bit_col_adst_dct_16, // .cos_bit_col + inv_cos_bit_row_adst_dct_16, // .cos_bit_row + TXFM_TYPE_ADST16, // .txfm_type_col + TXFM_TYPE_DCT16 +}; // .txfm_type_row // ---------------- config inv_adst_dct_32 ---------------- -static const int8_t inv_shift_adst_dct_32[2] = {-1, -5}; +static const int8_t inv_shift_adst_dct_32[2] = { -1, -5 }; static const int8_t inv_stage_range_col_adst_dct_32[12] = { - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 18, 18}; -static const int8_t inv_stage_range_row_adst_dct_32[10] = {20, 20, 20, 20, 20, - 20, 20, 20, 20, 20}; -static const int8_t inv_cos_bit_col_adst_dct_32[12] = {13, 13, 13, 13, 13, 13, - 13, 13, 13, 13, 13, 13}; -static const int8_t inv_cos_bit_row_adst_dct_32[10] = {12, 12, 12, 12, 12, - 12, 12, 12, 12, 12}; + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 18, 18 +}; +static const int8_t inv_stage_range_row_adst_dct_32[10] = { + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20 +}; +static const int8_t inv_cos_bit_col_adst_dct_32[12] = { + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13 +}; +static const int8_t inv_cos_bit_row_adst_dct_32[10] = { 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12 }; static const TXFM_2D_CFG inv_txfm_2d_cfg_adst_dct_32 = { - 32, // .txfm_size - 12, // .stage_num_col - 10, // .stage_num_row - // 1, // .log_scale - inv_shift_adst_dct_32, // .shift - inv_stage_range_col_adst_dct_32, // .stage_range_col - inv_stage_range_row_adst_dct_32, // .stage_range_row - inv_cos_bit_col_adst_dct_32, // .cos_bit_col - inv_cos_bit_row_adst_dct_32, // .cos_bit_row - TXFM_TYPE_ADST32, // .txfm_type_col - TXFM_TYPE_DCT32}; // .txfm_type_row + 32, // .txfm_size + 12, // .stage_num_col + 10, // .stage_num_row + // 1, // .log_scale + inv_shift_adst_dct_32, // .shift + inv_stage_range_col_adst_dct_32, // .stage_range_col + inv_stage_range_row_adst_dct_32, // .stage_range_row + inv_cos_bit_col_adst_dct_32, // .cos_bit_col + inv_cos_bit_row_adst_dct_32, // .cos_bit_row + TXFM_TYPE_ADST32, // .txfm_type_col + TXFM_TYPE_DCT32 +}; // .txfm_type_row #endif // VP10_INV_TXFM2D_CFG_H_ diff --git a/vp10/common/vp10_rtcd.c b/vp10/common/vp10_rtcd.c index 36b294ae8a6d550f1627bb454521985ae4e4e8ff..0fd8ab0ea6276e9f7703bf3792e2d93e6226de8d 100644 --- a/vp10/common/vp10_rtcd.c +++ b/vp10/common/vp10_rtcd.c @@ -13,7 +13,7 @@ #include "vpx_ports/vpx_once.h" void vp10_rtcd() { - // TODO(JBB): Remove this once, by insuring that both the encoder and - // decoder setup functions are protected by once(); - once(setup_rtcd_internal); + // TODO(JBB): Remove this once, by insuring that both the encoder and + // decoder setup functions are protected by once(); + once(setup_rtcd_internal); } diff --git a/vp10/common/vp10_txfm.h b/vp10/common/vp10_txfm.h index 2ac8f81b32951c9a65ba61cd26147b25300b78d7..a7673526625985ba5101906d2fcbc032e8ae8cab 100644 --- a/vp10/common/vp10_txfm.h +++ b/vp10/common/vp10_txfm.h @@ -22,63 +22,48 @@ static const int cos_bit_min = 10; static const int cos_bit_max = 16; // cospi_arr[i][j] = (int)round(cos(M_PI*j/128) * (1<<(cos_bit_min+i))); -static const int32_t cospi_arr[7][64] = - {{ 1024, 1024, 1023, 1021, 1019, 1016, 1013, 1009, - 1004, 999, 993, 987, 980, 972, 964, 955, - 946, 936, 926, 915, 903, 891, 878, 865, - 851, 837, 822, 807, 792, 775, 759, 742, - 724, 706, 688, 669, 650, 630, 610, 590, - 569, 548, 526, 505, 483, 460, 438, 415, - 392, 369, 345, 321, 297, 273, 249, 224, - 200, 175, 150, 125, 100, 75, 50, 25}, - { 2048, 2047, 2046, 2042, 2038, 2033, 2026, 2018, - 2009, 1998, 1987, 1974, 1960, 1945, 1928, 1911, - 1892, 1872, 1851, 1829, 1806, 1782, 1757, 1730, - 1703, 1674, 1645, 1615, 1583, 1551, 1517, 1483, - 1448, 1412, 1375, 1338, 1299, 1260, 1220, 1179, - 1138, 1096, 1053, 1009, 965, 921, 876, 830, - 784, 737, 690, 642, 595, 546, 498, 449, - 400, 350, 301, 251, 201, 151, 100, 50}, - { 4096, 4095, 4091, 4085, 4076, 4065, 4052, 4036, - 4017, 3996, 3973, 3948, 3920, 3889, 3857, 3822, - 3784, 3745, 3703, 3659, 3612, 3564, 3513, 3461, - 3406, 3349, 3290, 3229, 3166, 3102, 3035, 2967, - 2896, 2824, 2751, 2675, 2598, 2520, 2440, 2359, - 2276, 2191, 2106, 2019, 1931, 1842, 1751, 1660, - 1567, 1474, 1380, 1285, 1189, 1092, 995, 897, - 799, 700, 601, 501, 401, 301, 201, 101}, - { 8192, 8190, 8182, 8170, 8153, 8130, 8103, 8071, - 8035, 7993, 7946, 7895, 7839, 7779, 7713, 7643, - 7568, 7489, 7405, 7317, 7225, 7128, 7027, 6921, - 6811, 6698, 6580, 6458, 6333, 6203, 6070, 5933, - 5793, 5649, 5501, 5351, 5197, 5040, 4880, 4717, - 4551, 4383, 4212, 4038, 3862, 3683, 3503, 3320, - 3135, 2948, 2760, 2570, 2378, 2185, 1990, 1795, - 1598, 1401, 1202, 1003, 803, 603, 402, 201}, - { 16384, 16379, 16364, 16340, 16305, 16261, 16207, 16143, - 16069, 15986, 15893, 15791, 15679, 15557, 15426, 15286, - 15137, 14978, 14811, 14635, 14449, 14256, 14053, 13842, - 13623, 13395, 13160, 12916, 12665, 12406, 12140, 11866, - 11585, 11297, 11003, 10702, 10394, 10080, 9760, 9434, - 9102, 8765, 8423, 8076, 7723, 7366, 7005, 6639, - 6270, 5897, 5520, 5139, 4756, 4370, 3981, 3590, - 3196, 2801, 2404, 2006, 1606, 1205, 804, 402}, - { 32768, 32758, 32729, 32679, 32610, 32522, 32413, 32286, - 32138, 31972, 31786, 31581, 31357, 31114, 30853, 30572, - 30274, 29957, 29622, 29269, 28899, 28511, 28106, 27684, - 27246, 26791, 26320, 25833, 25330, 24812, 24279, 23732, - 23170, 22595, 22006, 21403, 20788, 20160, 19520, 18868, - 18205, 17531, 16846, 16151, 15447, 14733, 14010, 13279, - 12540, 11793, 11039, 10279, 9512, 8740, 7962, 7180, - 6393, 5602, 4808, 4011, 3212, 2411, 1608, 804}, - { 65536, 65516, 65457, 65358, 65220, 65043, 64827, 64571, - 64277, 63944, 63572, 63162, 62714, 62228, 61705, 61145, - 60547, 59914, 59244, 58538, 57798, 57022, 56212, 55368, - 54491, 53581, 52639, 51665, 50660, 49624, 48559, 47464, - 46341, 45190, 44011, 42806, 41576, 40320, 39040, 37736, - 36410, 35062, 33692, 32303, 30893, 29466, 28020, 26558, - 25080, 23586, 22078, 20557, 19024, 17479, 15924, 14359, - 12785, 11204, 9616, 8022, 6424, 4821, 3216, 1608}}; +static const int32_t cospi_arr[7][64] = { + { 1024, 1024, 1023, 1021, 1019, 1016, 1013, 1009, 1004, 999, 993, 987, 980, + 972, 964, 955, 946, 936, 926, 915, 903, 891, 878, 865, 851, 837, 822, 807, + 792, 775, 759, 742, 724, 706, 688, 669, 650, 630, 610, 590, 569, 548, 526, + 505, 483, 460, 438, 415, 392, 369, 345, 321, 297, 273, 249, 224, 200, 175, + 150, 125, 100, 75, 50, 25 }, + { 2048, 2047, 2046, 2042, 2038, 2033, 2026, 2018, 2009, 1998, 1987, 1974, + 1960, 1945, 1928, 1911, 1892, 1872, 1851, 1829, 1806, 1782, 1757, 1730, + 1703, 1674, 1645, 1615, 1583, 1551, 1517, 1483, 1448, 1412, 1375, 1338, + 1299, 1260, 1220, 1179, 1138, 1096, 1053, 1009, 965, 921, 876, 830, 784, + 737, 690, 642, 595, 546, 498, 449, 400, 350, 301, 251, 201, 151, 100, 50 }, + { 4096, 4095, 4091, 4085, 4076, 4065, 4052, 4036, 4017, 3996, 3973, 3948, + 3920, 3889, 3857, 3822, 3784, 3745, 3703, 3659, 3612, 3564, 3513, 3461, + 3406, 3349, 3290, 3229, 3166, 3102, 3035, 2967, 2896, 2824, 2751, 2675, + 2598, 2520, 2440, 2359, 2276, 2191, 2106, 2019, 1931, 1842, 1751, 1660, + 1567, 1474, 1380, 1285, 1189, 1092, 995, 897, 799, 700, 601, 501, 401, 301, + 201, 101 }, + { 8192, 8190, 8182, 8170, 8153, 8130, 8103, 8071, 8035, 7993, 7946, 7895, + 7839, 7779, 7713, 7643, 7568, 7489, 7405, 7317, 7225, 7128, 7027, 6921, + 6811, 6698, 6580, 6458, 6333, 6203, 6070, 5933, 5793, 5649, 5501, 5351, + 5197, 5040, 4880, 4717, 4551, 4383, 4212, 4038, 3862, 3683, 3503, 3320, + 3135, 2948, 2760, 2570, 2378, 2185, 1990, 1795, 1598, 1401, 1202, 1003, 803, + 603, 402, 201 }, + { 16384, 16379, 16364, 16340, 16305, 16261, 16207, 16143, 16069, 15986, 15893, + 15791, 15679, 15557, 15426, 15286, 15137, 14978, 14811, 14635, 14449, 14256, + 14053, 13842, 13623, 13395, 13160, 12916, 12665, 12406, 12140, 11866, 11585, + 11297, 11003, 10702, 10394, 10080, 9760, 9434, 9102, 8765, 8423, 8076, 7723, + 7366, 7005, 6639, 6270, 5897, 5520, 5139, 4756, 4370, 3981, 3590, 3196, + 2801, 2404, 2006, 1606, 1205, 804, 402 }, + { 32768, 32758, 32729, 32679, 32610, 32522, 32413, 32286, 32138, 31972, 31786, + 31581, 31357, 31114, 30853, 30572, 30274, 29957, 29622, 29269, 28899, 28511, + 28106, 27684, 27246, 26791, 26320, 25833, 25330, 24812, 24279, 23732, 23170, + 22595, 22006, 21403, 20788, 20160, 19520, 18868, 18205, 17531, 16846, 16151, + 15447, 14733, 14010, 13279, 12540, 11793, 11039, 10279, 9512, 8740, 7962, + 7180, 6393, 5602, 4808, 4011, 3212, 2411, 1608, 804 }, + { 65536, 65516, 65457, 65358, 65220, 65043, 64827, 64571, 64277, 63944, 63572, + 63162, 62714, 62228, 61705, 61145, 60547, 59914, 59244, 58538, 57798, 57022, + 56212, 55368, 54491, 53581, 52639, 51665, 50660, 49624, 48559, 47464, 46341, + 45190, 44011, 42806, 41576, 40320, 39040, 37736, 36410, 35062, 33692, 32303, + 30893, 29466, 28020, 26558, 25080, 23586, 22078, 20557, 19024, 17479, 15924, + 14359, 12785, 11204, 9616, 8022, 6424, 4821, 3216, 1608 } +}; static INLINE int32_t round_shift(int32_t value, int bit) { return (value + (1 << (bit - 1))) >> bit; @@ -169,10 +154,10 @@ typedef struct TXFM_2D_CFG { typedef struct TXFM_2D_FLIP_CFG { int ud_flip; // flip upside down int lr_flip; // flip left to right - const TXFM_2D_CFG* cfg; + const TXFM_2D_CFG *cfg; } TXFM_2D_FLIP_CFG; -static INLINE void set_flip_cfg(int tx_type, TXFM_2D_FLIP_CFG* cfg) { +static INLINE void set_flip_cfg(int tx_type, TXFM_2D_FLIP_CFG *cfg) { switch (tx_type) { case DCT_DCT: case ADST_DCT: diff --git a/vp10/common/warped_motion.c b/vp10/common/warped_motion.c index e1c1a07095aa89db95507334c8d54a0f2c1689a0..7e3aebf4f184f76a1cb7cc5f3a79676a52d4b3e9 100644 --- a/vp10/common/warped_motion.c +++ b/vp10/common/warped_motion.c @@ -16,68 +16,43 @@ #include "vp10/common/warped_motion.h" - -typedef void (*projectPointsType)(int *mat, - int *points, - int *proj, - const int n, +typedef void (*projectPointsType)(int *mat, int *points, int *proj, const int n, const int stride_points, const int stride_proj, const int subsampling_x, const int subsampling_y); -static void projectPointsHomography(int *mat, - int *points, - int *proj, - const int n, - const int stride_points, +static void projectPointsHomography(int *mat, int *points, int *proj, + const int n, const int stride_points, const int stride_proj, const int subsampling_x, const int subsampling_y); -static void projectPointsAffine(int *mat, - int *points, - int *proj, - const int n, - const int stride_points, - const int stride_proj, +static void projectPointsAffine(int *mat, int *points, int *proj, const int n, + const int stride_points, const int stride_proj, const int subsampling_x, const int subsampling_y); -static void projectPointsRotZoom(int *mat, - int *points, - int *proj, - const int n, - const int stride_points, - const int stride_proj, +static void projectPointsRotZoom(int *mat, int *points, int *proj, const int n, + const int stride_points, const int stride_proj, const int subsampling_x, const int subsampling_y); -static void projectPointsTranslation(int *mat, - int *points, - int *proj, - const int n, - const int stride_points, +static void projectPointsTranslation(int *mat, int *points, int *proj, + const int n, const int stride_points, const int stride_proj, const int subsampling_x, const int subsampling_y); static projectPointsType get_projectPointsType(TransformationType type) { switch (type) { - case HOMOGRAPHY: - return projectPointsHomography; - case AFFINE: - return projectPointsAffine; - case ROTZOOM: - return projectPointsRotZoom; - case TRANSLATION: - return projectPointsTranslation; - default: - assert(0); - return NULL; + case HOMOGRAPHY: return projectPointsHomography; + case AFFINE: return projectPointsAffine; + case ROTZOOM: return projectPointsRotZoom; + case TRANSLATION: return projectPointsTranslation; + default: assert(0); return NULL; } } static void projectPointsTranslation(int *mat, int *points, int *proj, - const int n, - const int stride_points, + const int n, const int stride_points, const int stride_proj, const int subsampling_x, const int subsampling_y) { @@ -90,34 +65,29 @@ static void projectPointsTranslation(int *mat, int *points, int *proj, WARPEDPIXEL_PREC_BITS + 1); else *(proj++) = ROUND_POWER_OF_TWO_SIGNED( - ((x << WARPEDMODEL_PREC_BITS)) + mat[0], - WARPEDPIXEL_PREC_BITS); + ((x << WARPEDMODEL_PREC_BITS)) + mat[0], WARPEDPIXEL_PREC_BITS); if (subsampling_y) *(proj++) = ROUND_POWER_OF_TWO_SIGNED( ((y << (WARPEDMODEL_PREC_BITS + 1)) + mat[1]), WARPEDPIXEL_PREC_BITS + 1); else *(proj++) = ROUND_POWER_OF_TWO_SIGNED( - ((y << WARPEDMODEL_PREC_BITS)) + mat[1], - WARPEDPIXEL_PREC_BITS); + ((y << WARPEDMODEL_PREC_BITS)) + mat[1], WARPEDPIXEL_PREC_BITS); points += stride_points - 2; proj += stride_proj - 2; } } -void projectPointsRotZoom(int *mat, int *points, int *proj, - const int n, - const int stride_points, - const int stride_proj, - const int subsampling_x, - const int subsampling_y) { +void projectPointsRotZoom(int *mat, int *points, int *proj, const int n, + const int stride_points, const int stride_proj, + const int subsampling_x, const int subsampling_y) { int i; for (i = 0; i < n; ++i) { const int x = *(points++), y = *(points++); if (subsampling_x) *(proj++) = ROUND_POWER_OF_TWO_SIGNED( mat[2] * 2 * x + mat[3] * 2 * y + mat[0] + - (mat[2] + mat[3] - (1 << WARPEDMODEL_PREC_BITS)) / 2, + (mat[2] + mat[3] - (1 << WARPEDMODEL_PREC_BITS)) / 2, WARPEDDIFF_PREC_BITS + 1); else *(proj++) = ROUND_POWER_OF_TWO_SIGNED(mat[2] * x + mat[3] * y + mat[0], @@ -125,7 +95,7 @@ void projectPointsRotZoom(int *mat, int *points, int *proj, if (subsampling_y) *(proj++) = ROUND_POWER_OF_TWO_SIGNED( -mat[3] * 2 * x + mat[2] * 2 * y + mat[1] + - (-mat[3] + mat[2] - (1 << WARPEDMODEL_PREC_BITS)) / 2, + (-mat[3] + mat[2] - (1 << WARPEDMODEL_PREC_BITS)) / 2, WARPEDDIFF_PREC_BITS + 1); else *(proj++) = ROUND_POWER_OF_TWO_SIGNED(-mat[3] * x + mat[2] * y + mat[1], @@ -135,10 +105,8 @@ void projectPointsRotZoom(int *mat, int *points, int *proj, } } -static void projectPointsAffine(int *mat, int *points, int *proj, - const int n, - const int stride_points, - const int stride_proj, +static void projectPointsAffine(int *mat, int *points, int *proj, const int n, + const int stride_points, const int stride_proj, const int subsampling_x, const int subsampling_y) { int i; @@ -147,7 +115,7 @@ static void projectPointsAffine(int *mat, int *points, int *proj, if (subsampling_x) *(proj++) = ROUND_POWER_OF_TWO_SIGNED( mat[2] * 2 * x + mat[3] * 2 * y + mat[0] + - (mat[2] + mat[3] - (1 << WARPEDMODEL_PREC_BITS)) / 2, + (mat[2] + mat[3] - (1 << WARPEDMODEL_PREC_BITS)) / 2, WARPEDDIFF_PREC_BITS + 1); else *(proj++) = ROUND_POWER_OF_TWO_SIGNED(mat[2] * x + mat[3] * y + mat[0], @@ -155,7 +123,7 @@ static void projectPointsAffine(int *mat, int *points, int *proj, if (subsampling_y) *(proj++) = ROUND_POWER_OF_TWO_SIGNED( mat[4] * 2 * x + mat[5] * 2 * y + mat[1] + - (mat[4] + mat[5] - (1 << WARPEDMODEL_PREC_BITS)) / 2, + (mat[4] + mat[5] - (1 << WARPEDMODEL_PREC_BITS)) / 2, WARPEDDIFF_PREC_BITS + 1); else *(proj++) = ROUND_POWER_OF_TWO_SIGNED(mat[4] * x + mat[5] * y + mat[1], @@ -166,8 +134,7 @@ static void projectPointsAffine(int *mat, int *points, int *proj, } static void projectPointsHomography(int *mat, int *points, int *proj, - const int n, - const int stride_points, + const int n, const int stride_points, const int stride_proj, const int subsampling_x, const int subsampling_y) { @@ -181,19 +148,17 @@ static void projectPointsHomography(int *mat, int *points, int *proj, Z = (mat[6] * x + mat[7] * y + (1 << (WARPEDMODEL_ROW3HOMO_PREC_BITS + 1))); xp = (mat[0] * x + mat[1] * y + 2 * mat[2]) - << (WARPEDPIXEL_PREC_BITS + - WARPEDMODEL_ROW3HOMO_PREC_BITS - WARPEDMODEL_PREC_BITS); + << (WARPEDPIXEL_PREC_BITS + WARPEDMODEL_ROW3HOMO_PREC_BITS - + WARPEDMODEL_PREC_BITS); yp = (mat[3] * x + mat[4] * y + 2 * mat[5]) - << (WARPEDPIXEL_PREC_BITS + - WARPEDMODEL_ROW3HOMO_PREC_BITS - WARPEDMODEL_PREC_BITS); + << (WARPEDPIXEL_PREC_BITS + WARPEDMODEL_ROW3HOMO_PREC_BITS - + WARPEDMODEL_PREC_BITS); xp = xp > 0 ? (xp + Z / 2) / Z : (xp - Z / 2) / Z; yp = yp > 0 ? (yp + Z / 2) / Z : (yp - Z / 2) / Z; - if (subsampling_x) - xp = (xp - (1 << (WARPEDPIXEL_PREC_BITS - 1))) / 2; - if (subsampling_y) - yp = (yp - (1 << (WARPEDPIXEL_PREC_BITS - 1))) / 2; + if (subsampling_x) xp = (xp - (1 << (WARPEDPIXEL_PREC_BITS - 1))) / 2; + if (subsampling_y) yp = (yp - (1 << (WARPEDPIXEL_PREC_BITS - 1))) / 2; *(proj++) = xp; *(proj++) = yp; @@ -202,141 +167,66 @@ static void projectPointsHomography(int *mat, int *points, int *proj, } } -static const int16_t -filter_4tap[WARPEDPIXEL_PREC_SHIFTS][4] = { - {0, 128, 0, 0}, - {-1, 127, 2, 0}, - {-2, 127, 4, -1}, - {-3, 126, 6, -1}, - {-3, 125, 8, -2}, - {-4, 124, 11, -3}, - {-5, 123, 13, -3}, - {-5, 121, 15, -3}, - {-6, 120, 18, -4}, - {-7, 119, 20, -4}, - {-7, 118, 22, -5}, - {-8, 116, 25, -5}, - {-8, 115, 27, -6}, - {-9, 113, 30, -6}, - {-9, 112, 32, -7}, - {-9, 110, 34, -7}, - {-10, 108, 37, -7}, - {-10, 107, 39, -8}, - {-10, 105, 41, -8}, - {-11, 103, 44, -8}, - {-11, 101, 47, -9}, - {-11, 99, 49, -9}, - {-11, 97, 51, -9}, - {-11, 95, 54, -10}, - {-11, 93, 56, -10}, - {-12, 91, 59, -10}, - {-12, 89, 61, -10}, - {-12, 87, 64, -11}, - {-12, 85, 66, -11}, - {-12, 82, 69, -11}, - {-12, 80, 71, -11}, - {-12, 78, 73, -11}, - {-11, 75, 75, -11}, - {-11, 73, 78, -12}, - {-11, 71, 80, -12}, - {-11, 69, 82, -12}, - {-11, 66, 85, -12}, - {-11, 64, 87, -12}, - {-10, 61, 89, -12}, - {-10, 59, 91, -12}, - {-10, 56, 93, -11}, - {-10, 54, 95, -11}, - {-9, 51, 97, -11}, - {-9, 49, 99, -11}, - {-9, 47, 101, -11}, - {-8, 44, 103, -11}, - {-8, 41, 105, -10}, - {-8, 39, 107, -10}, - {-7, 37, 108, -10}, - {-7, 34, 110, -9}, - {-7, 32, 112, -9}, - {-6, 30, 113, -9}, - {-6, 27, 115, -8}, - {-5, 25, 116, -8}, - {-5, 22, 118, -7}, - {-4, 20, 119, -7}, - {-4, 18, 120, -6}, - {-3, 15, 121, -5}, - {-3, 13, 123, -5}, - {-3, 11, 124, -4}, - {-2, 8, 125, -3}, - {-1, 6, 126, -3}, - {-1, 4, 127, -2}, - {0, 2, 127, -1}, +static const int16_t filter_4tap[WARPEDPIXEL_PREC_SHIFTS][4] = { + { 0, 128, 0, 0 }, { -1, 127, 2, 0 }, { -2, 127, 4, -1 }, + { -3, 126, 6, -1 }, { -3, 125, 8, -2 }, { -4, 124, 11, -3 }, + { -5, 123, 13, -3 }, { -5, 121, 15, -3 }, { -6, 120, 18, -4 }, + { -7, 119, 20, -4 }, { -7, 118, 22, -5 }, { -8, 116, 25, -5 }, + { -8, 115, 27, -6 }, { -9, 113, 30, -6 }, { -9, 112, 32, -7 }, + { -9, 110, 34, -7 }, { -10, 108, 37, -7 }, { -10, 107, 39, -8 }, + { -10, 105, 41, -8 }, { -11, 103, 44, -8 }, { -11, 101, 47, -9 }, + { -11, 99, 49, -9 }, { -11, 97, 51, -9 }, { -11, 95, 54, -10 }, + { -11, 93, 56, -10 }, { -12, 91, 59, -10 }, { -12, 89, 61, -10 }, + { -12, 87, 64, -11 }, { -12, 85, 66, -11 }, { -12, 82, 69, -11 }, + { -12, 80, 71, -11 }, { -12, 78, 73, -11 }, { -11, 75, 75, -11 }, + { -11, 73, 78, -12 }, { -11, 71, 80, -12 }, { -11, 69, 82, -12 }, + { -11, 66, 85, -12 }, { -11, 64, 87, -12 }, { -10, 61, 89, -12 }, + { -10, 59, 91, -12 }, { -10, 56, 93, -11 }, { -10, 54, 95, -11 }, + { -9, 51, 97, -11 }, { -9, 49, 99, -11 }, { -9, 47, 101, -11 }, + { -8, 44, 103, -11 }, { -8, 41, 105, -10 }, { -8, 39, 107, -10 }, + { -7, 37, 108, -10 }, { -7, 34, 110, -9 }, { -7, 32, 112, -9 }, + { -6, 30, 113, -9 }, { -6, 27, 115, -8 }, { -5, 25, 116, -8 }, + { -5, 22, 118, -7 }, { -4, 20, 119, -7 }, { -4, 18, 120, -6 }, + { -3, 15, 121, -5 }, { -3, 13, 123, -5 }, { -3, 11, 124, -4 }, + { -2, 8, 125, -3 }, { -1, 6, 126, -3 }, { -1, 4, 127, -2 }, + { 0, 2, 127, -1 }, }; static const int16_t -filter_ntap[WARPEDPIXEL_PREC_SHIFTS][WARPEDPIXEL_FILTER_TAPS] = { - {0, 0, 128, 0, 0, 0}, - {0, -1, 128, 2, -1, 0}, - {1, -3, 127, 4, -1, 0}, - {1, -4, 126, 6, -2, 1}, - {1, -5, 126, 8, -3, 1}, - {1, -6, 125, 11, -4, 1}, - {1, -7, 124, 13, -4, 1}, - {2, -8, 123, 15, -5, 1}, - {2, -9, 122, 18, -6, 1}, - {2, -10, 121, 20, -6, 1}, - {2, -11, 120, 22, -7, 2}, - {2, -12, 119, 25, -8, 2}, - {3, -13, 117, 27, -8, 2}, - {3, -13, 116, 29, -9, 2}, - {3, -14, 114, 32, -10, 3}, - {3, -15, 113, 35, -10, 2}, - {3, -15, 111, 37, -11, 3}, - {3, -16, 109, 40, -11, 3}, - {3, -16, 108, 42, -12, 3}, - {4, -17, 106, 45, -13, 3}, - {4, -17, 104, 47, -13, 3}, - {4, -17, 102, 50, -14, 3}, - {4, -17, 100, 52, -14, 3}, - {4, -18, 98, 55, -15, 4}, - {4, -18, 96, 58, -15, 3}, - {4, -18, 94, 60, -16, 4}, - {4, -18, 91, 63, -16, 4}, - {4, -18, 89, 65, -16, 4}, - {4, -18, 87, 68, -17, 4}, - {4, -18, 85, 70, -17, 4}, - {4, -18, 82, 73, -17, 4}, - {4, -18, 80, 75, -17, 4}, - {4, -18, 78, 78, -18, 4}, - {4, -17, 75, 80, -18, 4}, - {4, -17, 73, 82, -18, 4}, - {4, -17, 70, 85, -18, 4}, - {4, -17, 68, 87, -18, 4}, - {4, -16, 65, 89, -18, 4}, - {4, -16, 63, 91, -18, 4}, - {4, -16, 60, 94, -18, 4}, - {3, -15, 58, 96, -18, 4}, - {4, -15, 55, 98, -18, 4}, - {3, -14, 52, 100, -17, 4}, - {3, -14, 50, 102, -17, 4}, - {3, -13, 47, 104, -17, 4}, - {3, -13, 45, 106, -17, 4}, - {3, -12, 42, 108, -16, 3}, - {3, -11, 40, 109, -16, 3}, - {3, -11, 37, 111, -15, 3}, - {2, -10, 35, 113, -15, 3}, - {3, -10, 32, 114, -14, 3}, - {2, -9, 29, 116, -13, 3}, - {2, -8, 27, 117, -13, 3}, - {2, -8, 25, 119, -12, 2}, - {2, -7, 22, 120, -11, 2}, - {1, -6, 20, 121, -10, 2}, - {1, -6, 18, 122, -9, 2}, - {1, -5, 15, 123, -8, 2}, - {1, -4, 13, 124, -7, 1}, - {1, -4, 11, 125, -6, 1}, - {1, -3, 8, 126, -5, 1}, - {1, -2, 6, 126, -4, 1}, - {0, -1, 4, 127, -3, 1}, - {0, -1, 2, 128, -1, 0}, -}; + filter_ntap[WARPEDPIXEL_PREC_SHIFTS][WARPEDPIXEL_FILTER_TAPS] = { + { 0, 0, 128, 0, 0, 0 }, { 0, -1, 128, 2, -1, 0 }, + { 1, -3, 127, 4, -1, 0 }, { 1, -4, 126, 6, -2, 1 }, + { 1, -5, 126, 8, -3, 1 }, { 1, -6, 125, 11, -4, 1 }, + { 1, -7, 124, 13, -4, 1 }, { 2, -8, 123, 15, -5, 1 }, + { 2, -9, 122, 18, -6, 1 }, { 2, -10, 121, 20, -6, 1 }, + { 2, -11, 120, 22, -7, 2 }, { 2, -12, 119, 25, -8, 2 }, + { 3, -13, 117, 27, -8, 2 }, { 3, -13, 116, 29, -9, 2 }, + { 3, -14, 114, 32, -10, 3 }, { 3, -15, 113, 35, -10, 2 }, + { 3, -15, 111, 37, -11, 3 }, { 3, -16, 109, 40, -11, 3 }, + { 3, -16, 108, 42, -12, 3 }, { 4, -17, 106, 45, -13, 3 }, + { 4, -17, 104, 47, -13, 3 }, { 4, -17, 102, 50, -14, 3 }, + { 4, -17, 100, 52, -14, 3 }, { 4, -18, 98, 55, -15, 4 }, + { 4, -18, 96, 58, -15, 3 }, { 4, -18, 94, 60, -16, 4 }, + { 4, -18, 91, 63, -16, 4 }, { 4, -18, 89, 65, -16, 4 }, + { 4, -18, 87, 68, -17, 4 }, { 4, -18, 85, 70, -17, 4 }, + { 4, -18, 82, 73, -17, 4 }, { 4, -18, 80, 75, -17, 4 }, + { 4, -18, 78, 78, -18, 4 }, { 4, -17, 75, 80, -18, 4 }, + { 4, -17, 73, 82, -18, 4 }, { 4, -17, 70, 85, -18, 4 }, + { 4, -17, 68, 87, -18, 4 }, { 4, -16, 65, 89, -18, 4 }, + { 4, -16, 63, 91, -18, 4 }, { 4, -16, 60, 94, -18, 4 }, + { 3, -15, 58, 96, -18, 4 }, { 4, -15, 55, 98, -18, 4 }, + { 3, -14, 52, 100, -17, 4 }, { 3, -14, 50, 102, -17, 4 }, + { 3, -13, 47, 104, -17, 4 }, { 3, -13, 45, 106, -17, 4 }, + { 3, -12, 42, 108, -16, 3 }, { 3, -11, 40, 109, -16, 3 }, + { 3, -11, 37, 111, -15, 3 }, { 2, -10, 35, 113, -15, 3 }, + { 3, -10, 32, 114, -14, 3 }, { 2, -9, 29, 116, -13, 3 }, + { 2, -8, 27, 117, -13, 3 }, { 2, -8, 25, 119, -12, 2 }, + { 2, -7, 22, 120, -11, 2 }, { 1, -6, 20, 121, -10, 2 }, + { 1, -6, 18, 122, -9, 2 }, { 1, -5, 15, 123, -8, 2 }, + { 1, -4, 13, 124, -7, 1 }, { 1, -4, 11, 125, -6, 1 }, + { 1, -3, 8, 126, -5, 1 }, { 1, -2, 6, 126, -4, 1 }, + { 0, -1, 4, 127, -3, 1 }, { 0, -1, 2, 128, -1, 0 }, + }; static int32_t do_ntap_filter(int32_t *p, int x) { int i; @@ -348,7 +238,7 @@ static int32_t do_ntap_filter(int32_t *p, int x) { } static int32_t do_cubic_filter(int32_t *p, int x) { - if (x == 0) { + if (x == 0) { return p[0]; } else if (x == (1 << WARPEDPIXEL_PREC_BITS)) { return p[1]; @@ -359,8 +249,8 @@ static int32_t do_cubic_filter(int32_t *p, int x) { const int64_t v4 = 2 * p[0]; return (int32_t)ROUND_POWER_OF_TWO_SIGNED( (v4 << (3 * WARPEDPIXEL_PREC_BITS)) + - (v3 << (2 * WARPEDPIXEL_PREC_BITS)) + - (v2 << WARPEDPIXEL_PREC_BITS) + v1, + (v3 << (2 * WARPEDPIXEL_PREC_BITS)) + + (v2 << WARPEDPIXEL_PREC_BITS) + v1, 3 * WARPEDPIXEL_PREC_BITS + 1 - WARPEDPIXEL_FILTER_BITS); } } @@ -394,8 +284,8 @@ static INLINE void get_subcolumn(int taps, uint8_t *ref, int32_t *col, static uint8_t bi_ntap_filter(uint8_t *ref, int x, int y, int stride) { int32_t val, arr[WARPEDPIXEL_FILTER_TAPS]; int k; - int i = (int) x >> WARPEDPIXEL_PREC_BITS; - int j = (int) y >> WARPEDPIXEL_PREC_BITS; + int i = (int)x >> WARPEDPIXEL_PREC_BITS; + int j = (int)y >> WARPEDPIXEL_PREC_BITS; for (k = 0; k < WARPEDPIXEL_FILTER_TAPS; ++k) { int32_t arr_temp[WARPEDPIXEL_FILTER_TAPS]; get_subcolumn(WARPEDPIXEL_FILTER_TAPS, ref, arr_temp, stride, @@ -413,12 +303,11 @@ static uint8_t bi_ntap_filter(uint8_t *ref, int x, int y, int stride) { static uint8_t bi_cubic_filter(uint8_t *ref, int x, int y, int stride) { int32_t val, arr[4]; int k; - int i = (int) x >> WARPEDPIXEL_PREC_BITS; - int j = (int) y >> WARPEDPIXEL_PREC_BITS; + int i = (int)x >> WARPEDPIXEL_PREC_BITS; + int j = (int)y >> WARPEDPIXEL_PREC_BITS; for (k = 0; k < 4; ++k) { int32_t arr_temp[4]; - get_subcolumn(4, ref, arr_temp, stride, - i + k - 1, j - 1); + get_subcolumn(4, ref, arr_temp, stride, i + k - 1, j - 1); arr[k] = do_cubic_filter(arr_temp + 1, y - (j << WARPEDPIXEL_PREC_BITS)); } val = do_cubic_filter(arr + 1, x - (i << WARPEDPIXEL_PREC_BITS)); @@ -434,23 +323,24 @@ static uint8_t bi_linear_filter(uint8_t *ref, int x, int y, int stride) { int32_t val; val = ROUND_POWER_OF_TWO_SIGNED( ref[iy * stride + ix] * (WARPEDPIXEL_PREC_SHIFTS - sy) * - (WARPEDPIXEL_PREC_SHIFTS - sx) + - ref[iy * stride + ix + 1] * (WARPEDPIXEL_PREC_SHIFTS - sy) * sx + - ref[(iy + 1) * stride + ix] * sy * (WARPEDPIXEL_PREC_SHIFTS - sx) + - ref[(iy + 1) * stride + ix + 1] * sy * sx, + (WARPEDPIXEL_PREC_SHIFTS - sx) + + ref[iy * stride + ix + 1] * (WARPEDPIXEL_PREC_SHIFTS - sy) * sx + + ref[(iy + 1) * stride + ix] * sy * (WARPEDPIXEL_PREC_SHIFTS - sx) + + ref[(iy + 1) * stride + ix + 1] * sy * sx, WARPEDPIXEL_PREC_BITS * 2); return (uint8_t)clip_pixel(val); } -static uint8_t warp_interpolate(uint8_t *ref, int x, int y, - int width, int height, int stride) { +static uint8_t warp_interpolate(uint8_t *ref, int x, int y, int width, + int height, int stride) { int ix = x >> WARPEDPIXEL_PREC_BITS; int iy = y >> WARPEDPIXEL_PREC_BITS; int sx = x - (ix << WARPEDPIXEL_PREC_BITS); int sy = y - (iy << WARPEDPIXEL_PREC_BITS); int32_t v; - if (ix < 0 && iy < 0) return ref[0]; + if (ix < 0 && iy < 0) + return ref[0]; else if (ix < 0 && iy > height - 1) return ref[(height - 1) * stride]; else if (ix > width - 1 && iy < 0) @@ -460,25 +350,24 @@ static uint8_t warp_interpolate(uint8_t *ref, int x, int y, else if (ix < 0) { v = ROUND_POWER_OF_TWO_SIGNED( ref[iy * stride] * (WARPEDPIXEL_PREC_SHIFTS - sy) + - ref[(iy + 1) * stride] * sy, + ref[(iy + 1) * stride] * sy, WARPEDPIXEL_PREC_BITS); return clip_pixel(v); } else if (iy < 0) { v = ROUND_POWER_OF_TWO_SIGNED( - ref[ix] * (WARPEDPIXEL_PREC_SHIFTS - sx) + - ref[ix + 1] * sx, + ref[ix] * (WARPEDPIXEL_PREC_SHIFTS - sx) + ref[ix + 1] * sx, WARPEDPIXEL_PREC_BITS); return clip_pixel(v); } else if (ix > width - 1) { v = ROUND_POWER_OF_TWO_SIGNED( ref[iy * stride + width - 1] * (WARPEDPIXEL_PREC_SHIFTS - sy) + - ref[(iy + 1) * stride + width - 1] * sy, + ref[(iy + 1) * stride + width - 1] * sy, WARPEDPIXEL_PREC_BITS); return clip_pixel(v); } else if (iy > height - 1) { v = ROUND_POWER_OF_TWO_SIGNED( ref[(height - 1) * stride + ix] * (WARPEDPIXEL_PREC_SHIFTS - sx) + - ref[(height - 1) * stride + ix + 1] * sx, + ref[(height - 1) * stride + ix + 1] * sx, WARPEDPIXEL_PREC_BITS); return clip_pixel(v); } else if (ix >= WARPEDPIXEL_FILTER_TAPS / 2 - 1 && @@ -486,26 +375,21 @@ static uint8_t warp_interpolate(uint8_t *ref, int x, int y, ix < width - WARPEDPIXEL_FILTER_TAPS / 2 && iy < height - WARPEDPIXEL_FILTER_TAPS / 2) { return bi_ntap_filter(ref, x, y, stride); - } else if (ix >= 1 && iy >= 1 && - ix < width - 2 && iy < height - 2) { + } else if (ix >= 1 && iy >= 1 && ix < width - 2 && iy < height - 2) { return bi_cubic_filter(ref, x, y, stride); } else { return bi_linear_filter(ref, x, y, stride); } } -static void warp_plane(WarpedMotionParams *wm, - uint8_t *ref, - int width, int height, int stride, - uint8_t *pred, - int p_col, int p_row, - int p_width, int p_height, int p_stride, - int subsampling_x, int subsampling_y, - int x_scale, int y_scale) { +static void warp_plane(WarpedMotionParams *wm, uint8_t *ref, int width, + int height, int stride, uint8_t *pred, int p_col, + int p_row, int p_width, int p_height, int p_stride, + int subsampling_x, int subsampling_y, int x_scale, + int y_scale) { int i, j; projectPointsType projectPoints = get_projectPointsType(wm->wmtype); - if (projectPoints == NULL) - return; + if (projectPoints == NULL) return; for (i = p_row; i < p_row + p_height; ++i) { for (j = p_col; j < p_col + p_width; ++j) { int in[2], out[2]; @@ -529,13 +413,12 @@ static INLINE void highbd_get_subcolumn(int taps, uint16_t *ref, int32_t *col, } } -static uint16_t highbd_bi_ntap_filter(uint16_t *ref, - int x, int y, int stride, +static uint16_t highbd_bi_ntap_filter(uint16_t *ref, int x, int y, int stride, int bd) { int32_t val, arr[WARPEDPIXEL_FILTER_TAPS]; int k; - int i = (int) x >> WARPEDPIXEL_PREC_BITS; - int j = (int) y >> WARPEDPIXEL_PREC_BITS; + int i = (int)x >> WARPEDPIXEL_PREC_BITS; + int j = (int)y >> WARPEDPIXEL_PREC_BITS; for (k = 0; k < WARPEDPIXEL_FILTER_TAPS; ++k) { int32_t arr_temp[WARPEDPIXEL_FILTER_TAPS]; highbd_get_subcolumn(WARPEDPIXEL_FILTER_TAPS, ref, arr_temp, stride, @@ -550,17 +433,15 @@ static uint16_t highbd_bi_ntap_filter(uint16_t *ref, return (uint16_t)clip_pixel_highbd(val, bd); } -static uint16_t highbd_bi_cubic_filter(uint16_t *ref, - int x, int y, int stride, +static uint16_t highbd_bi_cubic_filter(uint16_t *ref, int x, int y, int stride, int bd) { int32_t val, arr[4]; int k; - int i = (int) x >> WARPEDPIXEL_PREC_BITS; - int j = (int) y >> WARPEDPIXEL_PREC_BITS; + int i = (int)x >> WARPEDPIXEL_PREC_BITS; + int j = (int)y >> WARPEDPIXEL_PREC_BITS; for (k = 0; k < 4; ++k) { int32_t arr_temp[4]; - highbd_get_subcolumn(4, ref, arr_temp, stride, - i + k - 1, j - 1); + highbd_get_subcolumn(4, ref, arr_temp, stride, i + k - 1, j - 1); arr[k] = do_cubic_filter(arr_temp + 1, y - (j << WARPEDPIXEL_PREC_BITS)); } val = do_cubic_filter(arr + 1, x - (i << WARPEDPIXEL_PREC_BITS)); @@ -568,8 +449,7 @@ static uint16_t highbd_bi_cubic_filter(uint16_t *ref, return (uint16_t)clip_pixel_highbd(val, bd); } -static uint16_t highbd_bi_linear_filter(uint16_t *ref, - int x, int y, int stride, +static uint16_t highbd_bi_linear_filter(uint16_t *ref, int x, int y, int stride, int bd) { const int ix = x >> WARPEDPIXEL_PREC_BITS; const int iy = y >> WARPEDPIXEL_PREC_BITS; @@ -578,25 +458,24 @@ static uint16_t highbd_bi_linear_filter(uint16_t *ref, int32_t val; val = ROUND_POWER_OF_TWO_SIGNED( ref[iy * stride + ix] * (WARPEDPIXEL_PREC_SHIFTS - sy) * - (WARPEDPIXEL_PREC_SHIFTS - sx) + - ref[iy * stride + ix + 1] * (WARPEDPIXEL_PREC_SHIFTS - sy) * sx + - ref[(iy + 1) * stride + ix] * sy * (WARPEDPIXEL_PREC_SHIFTS - sx) + - ref[(iy + 1) * stride + ix + 1] * sy * sx, + (WARPEDPIXEL_PREC_SHIFTS - sx) + + ref[iy * stride + ix + 1] * (WARPEDPIXEL_PREC_SHIFTS - sy) * sx + + ref[(iy + 1) * stride + ix] * sy * (WARPEDPIXEL_PREC_SHIFTS - sx) + + ref[(iy + 1) * stride + ix + 1] * sy * sx, WARPEDPIXEL_PREC_BITS * 2); return (uint16_t)clip_pixel_highbd(val, bd); } -static uint16_t highbd_warp_interpolate(uint16_t *ref, - int x, int y, - int width, int height, int stride, - int bd) { +static uint16_t highbd_warp_interpolate(uint16_t *ref, int x, int y, int width, + int height, int stride, int bd) { int ix = x >> WARPEDPIXEL_PREC_BITS; int iy = y >> WARPEDPIXEL_PREC_BITS; int sx = x - (ix << WARPEDPIXEL_PREC_BITS); int sy = y - (iy << WARPEDPIXEL_PREC_BITS); int32_t v; - if (ix < 0 && iy < 0) return ref[0]; + if (ix < 0 && iy < 0) + return ref[0]; else if (ix < 0 && iy > height - 1) return ref[(height - 1) * stride]; else if (ix > width - 1 && iy < 0) @@ -606,25 +485,24 @@ static uint16_t highbd_warp_interpolate(uint16_t *ref, else if (ix < 0) { v = ROUND_POWER_OF_TWO_SIGNED( ref[iy * stride] * (WARPEDPIXEL_PREC_SHIFTS - sy) + - ref[(iy + 1) * stride] * sy, + ref[(iy + 1) * stride] * sy, WARPEDPIXEL_PREC_BITS); return clip_pixel_highbd(v, bd); } else if (iy < 0) { v = ROUND_POWER_OF_TWO_SIGNED( - ref[ix] * (WARPEDPIXEL_PREC_SHIFTS - sx) + - ref[ix + 1] * sx, + ref[ix] * (WARPEDPIXEL_PREC_SHIFTS - sx) + ref[ix + 1] * sx, WARPEDPIXEL_PREC_BITS); return clip_pixel_highbd(v, bd); } else if (ix > width - 1) { v = ROUND_POWER_OF_TWO_SIGNED( ref[iy * stride + width - 1] * (WARPEDPIXEL_PREC_SHIFTS - sy) + - ref[(iy + 1) * stride + width - 1] * sy, + ref[(iy + 1) * stride + width - 1] * sy, WARPEDPIXEL_PREC_BITS); return clip_pixel_highbd(v, bd); } else if (iy > height - 1) { v = ROUND_POWER_OF_TWO_SIGNED( ref[(height - 1) * stride + ix] * (WARPEDPIXEL_PREC_SHIFTS - sx) + - ref[(height - 1) * stride + ix + 1] * sx, + ref[(height - 1) * stride + ix + 1] * sx, WARPEDPIXEL_PREC_BITS); return clip_pixel_highbd(v, bd); } else if (ix >= WARPEDPIXEL_FILTER_TAPS / 2 - 1 && @@ -632,29 +510,24 @@ static uint16_t highbd_warp_interpolate(uint16_t *ref, ix < width - WARPEDPIXEL_FILTER_TAPS / 2 && iy < height - WARPEDPIXEL_FILTER_TAPS / 2) { return highbd_bi_ntap_filter(ref, x, y, stride, bd); - } else if (ix >= 1 && iy >= 1 && - ix < width - 2 && iy < height - 2) { + } else if (ix >= 1 && iy >= 1 && ix < width - 2 && iy < height - 2) { return highbd_bi_cubic_filter(ref, x, y, stride, bd); } else { return highbd_bi_linear_filter(ref, x, y, stride, bd); } } -static void highbd_warp_plane(WarpedMotionParams *wm, - uint8_t *ref8, - int width, int height, int stride, - uint8_t *pred8, - int p_col, int p_row, - int p_width, int p_height, int p_stride, - int subsampling_x, int subsampling_y, - int x_scale, int y_scale, +static void highbd_warp_plane(WarpedMotionParams *wm, uint8_t *ref8, int width, + int height, int stride, uint8_t *pred8, int p_col, + int p_row, int p_width, int p_height, + int p_stride, int subsampling_x, + int subsampling_y, int x_scale, int y_scale, int bd) { int i, j; projectPointsType projectPoints = get_projectPointsType(wm->wmtype); uint16_t *pred = CONVERT_TO_SHORTPTR(pred8); uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); - if (projectPoints == NULL) - return; + if (projectPoints == NULL) return; for (i = p_row; i < p_row + p_height; ++i) { for (j = p_col; j < p_col + p_width; ++j) { int in[2], out[2]; @@ -663,9 +536,8 @@ static void highbd_warp_plane(WarpedMotionParams *wm, projectPoints(wm->wmmat, in, out, 1, 2, 2, subsampling_x, subsampling_y); out[0] = ROUND_POWER_OF_TWO_SIGNED(out[0] * x_scale, 4); out[1] = ROUND_POWER_OF_TWO_SIGNED(out[1] * y_scale, 4); - pred[(j - p_col) + (i - p_row) * p_stride] = - highbd_warp_interpolate( - ref, out[0], out[1], width, height, stride, bd); + pred[(j - p_col) + (i - p_row) * p_stride] = highbd_warp_interpolate( + ref, out[0], out[1], width, height, stride, bd); } } } @@ -675,25 +547,18 @@ void vp10_warp_plane(WarpedMotionParams *wm, #if CONFIG_VP9_HIGHBITDEPTH int use_hbd, int bd, #endif // CONFIG_VP9_HIGHBITDEPTH - uint8_t *ref, - int width, int height, int stride, - uint8_t *pred, - int p_col, int p_row, - int p_width, int p_height, int p_stride, - int subsampling_x, int subsampling_y, - int x_scale, int y_scale) { + uint8_t *ref, int width, int height, int stride, + uint8_t *pred, int p_col, int p_row, int p_width, + int p_height, int p_stride, int subsampling_x, + int subsampling_y, int x_scale, int y_scale) { #if CONFIG_VP9_HIGHBITDEPTH if (use_hbd) - highbd_warp_plane(wm, ref, width, height, stride, - pred, p_col, p_row, - p_width, p_height, p_stride, - subsampling_x, subsampling_y, + highbd_warp_plane(wm, ref, width, height, stride, pred, p_col, p_row, + p_width, p_height, p_stride, subsampling_x, subsampling_y, x_scale, y_scale, bd); else #endif // CONFIG_VP9_HIGHBITDEPTH - warp_plane(wm, ref, width, height, stride, - pred, p_col, p_row, - p_width, p_height, p_stride, - subsampling_x, subsampling_y, - x_scale, y_scale); + warp_plane(wm, ref, width, height, stride, pred, p_col, p_row, p_width, + p_height, p_stride, subsampling_x, subsampling_y, x_scale, + y_scale); } diff --git a/vp10/common/warped_motion.h b/vp10/common/warped_motion.h index 72e7c716af8f180b7a63207f9128d35721c35819..a9b53a5e16335a58599ea774004678de35229d13 100644 --- a/vp10/common/warped_motion.h +++ b/vp10/common/warped_motion.h @@ -22,32 +22,32 @@ #include "vpx_dsp/vpx_dsp_common.h" // Bits of precision used for the model -#define WARPEDMODEL_PREC_BITS 8 -#define WARPEDMODEL_ROW3HOMO_PREC_BITS 12 +#define WARPEDMODEL_PREC_BITS 8 +#define WARPEDMODEL_ROW3HOMO_PREC_BITS 12 // Bits of subpel precision for warped interpolation -#define WARPEDPIXEL_PREC_BITS 6 -#define WARPEDPIXEL_PREC_SHIFTS (1 << WARPEDPIXEL_PREC_BITS) +#define WARPEDPIXEL_PREC_BITS 6 +#define WARPEDPIXEL_PREC_SHIFTS (1 << WARPEDPIXEL_PREC_BITS) // Taps for ntap filter -#define WARPEDPIXEL_FILTER_TAPS 6 +#define WARPEDPIXEL_FILTER_TAPS 6 // Precision of filter taps -#define WARPEDPIXEL_FILTER_BITS 7 +#define WARPEDPIXEL_FILTER_BITS 7 -#define WARPEDDIFF_PREC_BITS (WARPEDMODEL_PREC_BITS - WARPEDPIXEL_PREC_BITS) +#define WARPEDDIFF_PREC_BITS (WARPEDMODEL_PREC_BITS - WARPEDPIXEL_PREC_BITS) typedef enum { UNKNOWN_TRANSFORM = -1, - HOMOGRAPHY, // homography, 8-parameter - AFFINE, // affine, 6-parameter - ROTZOOM, // simplified affine with rotation and zoom only, 4-parameter - TRANSLATION, // translational motion 2-parameter + HOMOGRAPHY, // homography, 8-parameter + AFFINE, // affine, 6-parameter + ROTZOOM, // simplified affine with rotation and zoom only, 4-parameter + TRANSLATION, // translational motion 2-parameter TRANS_TYPES } TransformationType; // number of parameters used by each transformation in TransformationTypes -static const int n_trans_model_params[TRANS_TYPES] = {9, 6, 4, 2}; +static const int n_trans_model_params[TRANS_TYPES] = { 9, 6, 4, 2 }; typedef struct { TransformationType wmtype; @@ -58,11 +58,8 @@ void vp10_warp_plane(WarpedMotionParams *wm, #if CONFIG_VP9_HIGHBITDEPTH int use_hbd, int bd, #endif // CONFIG_VP9_HIGHBITDEPTH - uint8_t *ref, - int width, int height, int stride, - uint8_t *pred, - int p_col, int p_row, - int p_width, int p_height, int p_stride, - int subsampling_x, int subsampling_y, - int x_scale, int y_scale); + uint8_t *ref, int width, int height, int stride, + uint8_t *pred, int p_col, int p_row, int p_width, + int p_height, int p_stride, int subsampling_x, + int subsampling_y, int x_scale, int y_scale); #endif // VP10_COMMON_WARPED_MOTION_H diff --git a/vp10/common/x86/highbd_inv_txfm_sse4.c b/vp10/common/x86/highbd_inv_txfm_sse4.c index 349aec5baf79fecfb75ce5c8d647a123858ac215..92db49b448cc69e88a19a84d0683697d548e5085 100644 --- a/vp10/common/x86/highbd_inv_txfm_sse4.c +++ b/vp10/common/x86/highbd_inv_txfm_sse4.c @@ -9,7 +9,7 @@ */ #include <assert.h> -#include <smmintrin.h> /* SSE4.1 */ +#include <smmintrin.h> /* SSE4.1 */ #include "./vp10_rtcd.h" #include "./vpx_config.h" @@ -300,8 +300,7 @@ void vp10_inv_txfm2d_add_4x4_sse4_1(const int32_t *coeff, uint16_t *output, write_buffer_4x4(in, output, stride, 0, 1, -cfg->shift[1], bd); break; #endif // CONFIG_EXT_TX - default: - assert(0); + default: assert(0); } } @@ -620,15 +619,15 @@ static void iadst8x8_sse4_1(__m128i *in, __m128i *out, int bit) { } } -static void round_shift_8x8(__m128i *in , int shift) { +static void round_shift_8x8(__m128i *in, int shift) { round_shift_4x4(&in[0], shift); round_shift_4x4(&in[4], shift); round_shift_4x4(&in[8], shift); round_shift_4x4(&in[12], shift); } -static __m128i get_recon_8x8(const __m128i pred, __m128i res_lo, - __m128i res_hi, int fliplr, int bd) { +static __m128i get_recon_8x8(const __m128i pred, __m128i res_lo, __m128i res_hi, + int fliplr, int bd) { __m128i x0, x1; const __m128i zero = _mm_setzero_si128(); @@ -785,8 +784,7 @@ void vp10_inv_txfm2d_add_8x8_sse4_1(const int32_t *coeff, uint16_t *output, write_buffer_8x8(in, output, stride, 0, 1, -cfg->shift[1], bd); break; #endif // CONFIG_EXT_TX - default: - assert(0); + default: assert(0); } } @@ -1395,7 +1393,6 @@ void vp10_inv_txfm2d_add_16x16_sse4_1(const int32_t *coeff, uint16_t *output, write_buffer_16x16(in, output, stride, 0, 1, -cfg->shift[1], bd); break; #endif - default: - assert(0); + default: assert(0); } } diff --git a/vp10/common/x86/highbd_txfm_utility_sse4.h b/vp10/common/x86/highbd_txfm_utility_sse4.h index 319b50a396881a667efec2a2b1bf6921ad1ab029..f1e298dcc98a741bf4236eb96b5a19a34262abcb 100644 --- a/vp10/common/x86/highbd_txfm_utility_sse4.h +++ b/vp10/common/x86/highbd_txfm_utility_sse4.h @@ -11,78 +11,73 @@ #ifndef _HIGHBD_TXFM_UTILITY_SSE4_H #define _HIGHBD_TXFM_UTILITY_SSE4_H -#include <smmintrin.h> /* SSE4.1 */ +#include <smmintrin.h> /* SSE4.1 */ #define TRANSPOSE_4X4(x0, x1, x2, x3, y0, y1, y2, y3) \ - do { \ - __m128i u0, u1, u2, u3; \ - u0 = _mm_unpacklo_epi32(x0, x1); \ - u1 = _mm_unpackhi_epi32(x0, x1); \ - u2 = _mm_unpacklo_epi32(x2, x3); \ - u3 = _mm_unpackhi_epi32(x2, x3); \ - y0 = _mm_unpacklo_epi64(u0, u2); \ - y1 = _mm_unpackhi_epi64(u0, u2); \ - y2 = _mm_unpacklo_epi64(u1, u3); \ - y3 = _mm_unpackhi_epi64(u1, u3); \ + do { \ + __m128i u0, u1, u2, u3; \ + u0 = _mm_unpacklo_epi32(x0, x1); \ + u1 = _mm_unpackhi_epi32(x0, x1); \ + u2 = _mm_unpacklo_epi32(x2, x3); \ + u3 = _mm_unpackhi_epi32(x2, x3); \ + y0 = _mm_unpacklo_epi64(u0, u2); \ + y1 = _mm_unpackhi_epi64(u0, u2); \ + y2 = _mm_unpacklo_epi64(u1, u3); \ + y3 = _mm_unpackhi_epi64(u1, u3); \ } while (0) static INLINE void transpose_8x8(const __m128i *in, __m128i *out) { - TRANSPOSE_4X4(in[0], in[2], in[4], in[6], - out[0], out[2], out[4], out[6]); - TRANSPOSE_4X4(in[1], in[3], in[5], in[7], - out[8], out[10], out[12], out[14]); - TRANSPOSE_4X4(in[8], in[10], in[12], in[14], - out[1], out[3], out[5], out[7]); - TRANSPOSE_4X4(in[9], in[11], in[13], in[15], - out[9], out[11], out[13], out[15]); + TRANSPOSE_4X4(in[0], in[2], in[4], in[6], out[0], out[2], out[4], out[6]); + TRANSPOSE_4X4(in[1], in[3], in[5], in[7], out[8], out[10], out[12], out[14]); + TRANSPOSE_4X4(in[8], in[10], in[12], in[14], out[1], out[3], out[5], out[7]); + TRANSPOSE_4X4(in[9], in[11], in[13], in[15], out[9], out[11], out[13], + out[15]); } static INLINE void transpose_16x16(const __m128i *in, __m128i *out) { // Upper left 8x8 - TRANSPOSE_4X4(in[0], in[4], in[8], in[12], - out[0], out[4], out[8], out[12]); - TRANSPOSE_4X4(in[1], in[5], in[9], in[13], - out[16], out[20], out[24], out[28]); - TRANSPOSE_4X4(in[16], in[20], in[24], in[28], - out[1], out[5], out[9], out[13]); - TRANSPOSE_4X4(in[17], in[21], in[25], in[29], - out[17], out[21], out[25], out[29]); + TRANSPOSE_4X4(in[0], in[4], in[8], in[12], out[0], out[4], out[8], out[12]); + TRANSPOSE_4X4(in[1], in[5], in[9], in[13], out[16], out[20], out[24], + out[28]); + TRANSPOSE_4X4(in[16], in[20], in[24], in[28], out[1], out[5], out[9], + out[13]); + TRANSPOSE_4X4(in[17], in[21], in[25], in[29], out[17], out[21], out[25], + out[29]); // Upper right 8x8 - TRANSPOSE_4X4(in[2], in[6], in[10], in[14], - out[32], out[36], out[40], out[44]); - TRANSPOSE_4X4(in[3], in[7], in[11], in[15], - out[48], out[52], out[56], out[60]); - TRANSPOSE_4X4(in[18], in[22], in[26], in[30], - out[33], out[37], out[41], out[45]); - TRANSPOSE_4X4(in[19], in[23], in[27], in[31], - out[49], out[53], out[57], out[61]); + TRANSPOSE_4X4(in[2], in[6], in[10], in[14], out[32], out[36], out[40], + out[44]); + TRANSPOSE_4X4(in[3], in[7], in[11], in[15], out[48], out[52], out[56], + out[60]); + TRANSPOSE_4X4(in[18], in[22], in[26], in[30], out[33], out[37], out[41], + out[45]); + TRANSPOSE_4X4(in[19], in[23], in[27], in[31], out[49], out[53], out[57], + out[61]); // Lower left 8x8 - TRANSPOSE_4X4(in[32], in[36], in[40], in[44], - out[2], out[6], out[10], out[14]); - TRANSPOSE_4X4(in[33], in[37], in[41], in[45], - out[18], out[22], out[26], out[30]); - TRANSPOSE_4X4(in[48], in[52], in[56], in[60], - out[3], out[7], out[11], out[15]); - TRANSPOSE_4X4(in[49], in[53], in[57], in[61], - out[19], out[23], out[27], out[31]); + TRANSPOSE_4X4(in[32], in[36], in[40], in[44], out[2], out[6], out[10], + out[14]); + TRANSPOSE_4X4(in[33], in[37], in[41], in[45], out[18], out[22], out[26], + out[30]); + TRANSPOSE_4X4(in[48], in[52], in[56], in[60], out[3], out[7], out[11], + out[15]); + TRANSPOSE_4X4(in[49], in[53], in[57], in[61], out[19], out[23], out[27], + out[31]); // Lower right 8x8 - TRANSPOSE_4X4(in[34], in[38], in[42], in[46], - out[34], out[38], out[42], out[46]); - TRANSPOSE_4X4(in[35], in[39], in[43], in[47], - out[50], out[54], out[58], out[62]); - TRANSPOSE_4X4(in[50], in[54], in[58], in[62], - out[35], out[39], out[43], out[47]); - TRANSPOSE_4X4(in[51], in[55], in[59], in[63], - out[51], out[55], out[59], out[63]); + TRANSPOSE_4X4(in[34], in[38], in[42], in[46], out[34], out[38], out[42], + out[46]); + TRANSPOSE_4X4(in[35], in[39], in[43], in[47], out[50], out[54], out[58], + out[62]); + TRANSPOSE_4X4(in[50], in[54], in[58], in[62], out[35], out[39], out[43], + out[47]); + TRANSPOSE_4X4(in[51], in[55], in[59], in[63], out[51], out[55], out[59], + out[63]); } // Note: // rounding = 1 << (bit - 1) -static INLINE __m128i half_btf_sse4_1(__m128i w0, __m128i n0, - __m128i w1, __m128i n1, - __m128i rounding, int bit) { +static INLINE __m128i half_btf_sse4_1(__m128i w0, __m128i n0, __m128i w1, + __m128i n1, __m128i rounding, int bit) { __m128i x, y; x = _mm_mullo_epi32(w0, n0); diff --git a/vp10/common/x86/idct_intrin_sse2.c b/vp10/common/x86/idct_intrin_sse2.c index da60764ec289ed1735de00373cf1adb1fe7c9c73..fd9f5cfe6f506051ea916bdcd7df40da32bf5812 100644 --- a/vp10/common/x86/idct_intrin_sse2.c +++ b/vp10/common/x86/idct_intrin_sse2.c @@ -39,19 +39,21 @@ static INLINE void fliplr_16x8(__m128i in[16]) { fliplr_8x8(&in[8]); } -#define FLIPLR_16x16(in0, in1) do { \ - __m128i *tmp; \ - fliplr_16x8(in0); \ - fliplr_16x8(in1); \ - tmp = (in0); \ - (in0) = (in1); \ - (in1) = tmp; \ -} while (0) +#define FLIPLR_16x16(in0, in1) \ + do { \ + __m128i *tmp; \ + fliplr_16x8(in0); \ + fliplr_16x8(in1); \ + tmp = (in0); \ + (in0) = (in1); \ + (in1) = tmp; \ + } while (0) -#define FLIPUD_PTR(dest, stride, size) do { \ - (dest) = (dest) + ((size) - 1) * (stride); \ - (stride) = - (stride); \ -} while (0) +#define FLIPUD_PTR(dest, stride, size) \ + do { \ + (dest) = (dest) + ((size)-1) * (stride); \ + (stride) = -(stride); \ + } while (0) #endif void vp10_iht4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest, int stride, @@ -108,9 +110,7 @@ void vp10_iht4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest, int stride, FLIPUD_PTR(dest, stride, 4); break; #endif // CONFIG_EXT_TX - default: - assert(0); - break; + default: assert(0); break; } // Final round and shift @@ -208,9 +208,7 @@ void vp10_iht8x8_64_add_sse2(const tran_low_t *input, uint8_t *dest, int stride, FLIPUD_PTR(dest, stride, 8); break; #endif // CONFIG_EXT_TX - default: - assert(0); - break; + default: assert(0); break; } // Final rounding and shift @@ -297,9 +295,7 @@ void vp10_iht16x16_256_add_sse2(const tran_low_t *input, uint8_t *dest, FLIPUD_PTR(dest, stride, 16); break; #endif // CONFIG_EXT_TX - default: - assert(0); - break; + default: assert(0); break; } write_buffer_8x16(dest, in0, stride); diff --git a/vp10/common/x86/reconintra_sse4.c b/vp10/common/x86/reconintra_sse4.c index 7399de2b0f95dfa8270364050d03948b1f5c202f..d8ce329aab2722a3784a2a1a583ecacded1f8111 100644 --- a/vp10/common/x86/reconintra_sse4.c +++ b/vp10/common/x86/reconintra_sse4.c @@ -85,7 +85,7 @@ static INLINE void AddPixelsLarge(const uint8_t *above, const uint8_t *left, } static INLINE int GetMeanValue16x16(const uint8_t *above, const uint8_t *left, - __m128i *params) { + __m128i *params) { const __m128i zero = _mm_setzero_si128(); __m128i sum_vector, u; uint16_t sum_value; @@ -132,24 +132,15 @@ static INLINE int GetMeanValue32x32(const uint8_t *above, const uint8_t *left, // params[4] : mean value, 4 int32_t repetition // static INLINE int CalcRefPixelsMeanValue(const uint8_t *above, - const uint8_t *left, - int bs, __m128i *params) { + const uint8_t *left, int bs, + __m128i *params) { int meanValue = 0; switch (bs) { - case 4: - meanValue = GetMeanValue4x4(above, left, params); - break; - case 8: - meanValue = GetMeanValue8x8(above, left, params); - break; - case 16: - meanValue = GetMeanValue16x16(above, left, params); - break; - case 32: - meanValue = GetMeanValue32x32(above, left, params); - break; - default: - assert(0); + case 4: meanValue = GetMeanValue4x4(above, left, params); break; + case 8: meanValue = GetMeanValue8x8(above, left, params); break; + case 16: meanValue = GetMeanValue16x16(above, left, params); break; + case 32: meanValue = GetMeanValue32x32(above, left, params); break; + default: assert(0); } return meanValue; } @@ -158,8 +149,9 @@ static INLINE int CalcRefPixelsMeanValue(const uint8_t *above, // params[0-3] : 4-tap filter coefficients (int32_t per coefficient) // static INLINE void GetIntraFilterParams(int bs, int mode, __m128i *params) { - const TX_SIZE tx_size = (bs == 32) ? TX_32X32 : - ((bs == 16) ? TX_16X16 : ((bs == 8) ? TX_8X8 : (TX_4X4))); + const TX_SIZE tx_size = + (bs == 32) ? TX_32X32 + : ((bs == 16) ? TX_16X16 : ((bs == 8) ? TX_8X8 : (TX_4X4))); // c0 params[0] = _mm_set_epi32(filter_intra_taps_4[tx_size][mode][0], filter_intra_taps_4[tx_size][mode][0], @@ -321,20 +313,11 @@ static void SavePred32x32(int *pred, const __m128i *mean, uint8_t *dst, static void SavePrediction(int *pred, const __m128i *mean, int bs, uint8_t *dst, ptrdiff_t stride) { switch (bs) { - case 4: - SavePred4x4(pred, mean, dst, stride); - break; - case 8: - SavePred8x8(pred, mean, dst, stride); - break; - case 16: - SavePred16x16(pred, mean, dst, stride); - break; - case 32: - SavePred32x32(pred, mean, dst, stride); - break; - default: - assert(0); + case 4: SavePred4x4(pred, mean, dst, stride); break; + case 8: SavePred8x8(pred, mean, dst, stride); break; + case 16: SavePred16x16(pred, mean, dst, stride); break; + case 32: SavePred32x32(pred, mean, dst, stride); break; + default: assert(0); } } @@ -452,7 +435,8 @@ static void ProduceOnePixels(__m128i *p, const __m128i *prm, int *pred, } static ProducePixelsFunc prodPixelsFuncTab[4] = { - ProduceOnePixels, ProduceTwoPixels, ProduceThreePixels, ProduceFourPixels}; + ProduceOnePixels, ProduceTwoPixels, ProduceThreePixels, ProduceFourPixels +}; static void ProducePixels(int *pred, const __m128i *prm, int remain) { __m128i p[3]; @@ -646,7 +630,7 @@ static INLINE int HighbdGetMeanValue8x8(const uint16_t *above, // Process 16 pixels above and left, 10-bit depth // Add to the last 8 pixels sum static INLINE void AddPixels10bit(const uint16_t *above, const uint16_t *left, - __m128i *sum) { + __m128i *sum) { __m128i a = _mm_loadu_si128((const __m128i *)above); __m128i l = _mm_loadu_si128((const __m128i *)left); sum[0] = _mm_add_epi16(a, l); @@ -660,7 +644,7 @@ static INLINE void AddPixels10bit(const uint16_t *above, const uint16_t *left, // Process 16 pixels above and left, 12-bit depth // Add to the last 8 pixels sum static INLINE void AddPixels12bit(const uint16_t *above, const uint16_t *left, - __m128i *sum) { + __m128i *sum) { __m128i a = _mm_loadu_si128((const __m128i *)above); __m128i l = _mm_loadu_si128((const __m128i *)left); const __m128i zero = _mm_setzero_si128(); @@ -762,20 +746,15 @@ static INLINE int HighbdCalcRefPixelsMeanValue(const uint16_t *above, const int bd, __m128i *params) { int meanValue = 0; switch (bs) { - case 4: - meanValue = HighbdGetMeanValue4x4(above, left, bd, params); - break; - case 8: - meanValue = HighbdGetMeanValue8x8(above, left, bd, params); - break; + case 4: meanValue = HighbdGetMeanValue4x4(above, left, bd, params); break; + case 8: meanValue = HighbdGetMeanValue8x8(above, left, bd, params); break; case 16: meanValue = HighbdGetMeanValue16x16(above, left, bd, params); break; case 32: meanValue = HighbdGetMeanValue32x32(above, left, bd, params); break; - default: - assert(0); + default: assert(0); } return meanValue; } @@ -784,10 +763,9 @@ static INLINE int HighbdCalcRefPixelsMeanValue(const uint16_t *above, // At column index c, the remaining pixels are R = 2 * bs + 1 - r - c // the number of pixels to produce is R - 2 = 2 * bs - r - c - 1 static void HighbdGeneratePrediction(const uint16_t *above, - const uint16_t *left, - const int bs, const int bd, - const __m128i *prm, int meanValue, - uint16_t *dst, + const uint16_t *left, const int bs, + const int bd, const __m128i *prm, + int meanValue, uint16_t *dst, ptrdiff_t stride) { int pred[33][65]; int r, c, colBound; diff --git a/vp10/common/x86/vp10_convolve_filters_ssse3.c b/vp10/common/x86/vp10_convolve_filters_ssse3.c index 2f7b3c7be3d0153f7dd10ac23e55bf3e38ea3c2e..7cda988f025b7c3d19951efe4c0fe2c546f3b975 100644 --- a/vp10/common/x86/vp10_convolve_filters_ssse3.c +++ b/vp10/common/x86/vp10_convolve_filters_ssse3.c @@ -14,94 +14,64 @@ DECLARE_ALIGNED(16, const int8_t, sub_pel_filters_10sharp_signal_dir[15][2][16]) = { { - { 0, 0, -1, 3, -6, 127, 8, -4, - 2, -1, 0, 0, 0, 0, 0, 0, }, - { 0, 0, 0, 0, -1, 3, -6, 127, - 8, -4, 2, -1, 0, 0, 0, 0, }, + { 0, 0, -1, 3, -6, 127, 8, -4, 2, -1, 0, 0, 0, 0, 0, 0 }, + { 0, 0, 0, 0, -1, 3, -6, 127, 8, -4, 2, -1, 0, 0, 0, 0 }, }, { - { 0, 1, -2, 5, -12, 124, 18, -7, - 3, -2, 0, 0, 0, 0, 0, 0, }, - { 0, 0, 0, 1, -2, 5, -12, 124, - 18, -7, 3, -2, 0, 0, 0, 0, }, + { 0, 1, -2, 5, -12, 124, 18, -7, 3, -2, 0, 0, 0, 0, 0, 0 }, + { 0, 0, 0, 1, -2, 5, -12, 124, 18, -7, 3, -2, 0, 0, 0, 0 }, }, { - { 0, 1, -3, 7, -17, 119, 28, -11, - 5, -2, 1, 0, 0, 0, 0, 0, }, - { 0, 0, 0, 1, -3, 7, -17, 119, - 28, -11, 5, -2, 1, 0, 0, 0, }, + { 0, 1, -3, 7, -17, 119, 28, -11, 5, -2, 1, 0, 0, 0, 0, 0 }, + { 0, 0, 0, 1, -3, 7, -17, 119, 28, -11, 5, -2, 1, 0, 0, 0 }, }, { - { 0, 1, -4, 8, -20, 114, 38, -14, - 7, -3, 1, 0, 0, 0, 0, 0, }, - { 0, 0, 0, 1, -4, 8, -20, 114, - 38, -14, 7, -3, 1, 0, 0, 0, }, + { 0, 1, -4, 8, -20, 114, 38, -14, 7, -3, 1, 0, 0, 0, 0, 0 }, + { 0, 0, 0, 1, -4, 8, -20, 114, 38, -14, 7, -3, 1, 0, 0, 0 }, }, { - { 0, 1, -4, 9, -22, 107, 49, -17, - 8, -4, 1, 0, 0, 0, 0, 0, }, - { 0, 0, 0, 1, -4, 9, -22, 107, - 49, -17, 8, -4, 1, 0, 0, 0, }, + { 0, 1, -4, 9, -22, 107, 49, -17, 8, -4, 1, 0, 0, 0, 0, 0 }, + { 0, 0, 0, 1, -4, 9, -22, 107, 49, -17, 8, -4, 1, 0, 0, 0 }, }, { - { 0, 2, -5, 10, -24, 99, 59, -20, - 9, -4, 2, 0, 0, 0, 0, 0, }, - { 0, 0, 0, 2, -5, 10, -24, 99, - 59, -20, 9, -4, 2, 0, 0, 0, }, + { 0, 2, -5, 10, -24, 99, 59, -20, 9, -4, 2, 0, 0, 0, 0, 0 }, + { 0, 0, 0, 2, -5, 10, -24, 99, 59, -20, 9, -4, 2, 0, 0, 0 }, }, { - { 0, 2, -5, 10, -24, 90, 70, -22, - 10, -5, 2, 0, 0, 0, 0, 0, }, - { 0, 0, 0, 2, -5, 10, -24, 90, - 70, -22, 10, -5, 2, 0, 0, 0, }, + { 0, 2, -5, 10, -24, 90, 70, -22, 10, -5, 2, 0, 0, 0, 0, 0 }, + { 0, 0, 0, 2, -5, 10, -24, 90, 70, -22, 10, -5, 2, 0, 0, 0 }, }, { - { 0, 2, -5, 10, -23, 80, 80, -23, - 10, -5, 2, 0, 0, 0, 0, 0, }, - { 0, 0, 0, 2, -5, 10, -23, 80, - 80, -23, 10, -5, 2, 0, 0, 0, }, + { 0, 2, -5, 10, -23, 80, 80, -23, 10, -5, 2, 0, 0, 0, 0, 0 }, + { 0, 0, 0, 2, -5, 10, -23, 80, 80, -23, 10, -5, 2, 0, 0, 0 }, }, { - { 0, 2, -5, 10, -22, 70, 90, -24, - 10, -5, 2, 0, 0, 0, 0, 0, }, - { 0, 0, 0, 2, -5, 10, -22, 70, - 90, -24, 10, -5, 2, 0, 0, 0, }, + { 0, 2, -5, 10, -22, 70, 90, -24, 10, -5, 2, 0, 0, 0, 0, 0 }, + { 0, 0, 0, 2, -5, 10, -22, 70, 90, -24, 10, -5, 2, 0, 0, 0 }, }, { - { 0, 2, -4, 9, -20, 59, 99, -24, - 10, -5, 2, 0, 0, 0, 0, 0, }, - { 0, 0, 0, 2, -4, 9, -20, 59, - 99, -24, 10, -5, 2, 0, 0, 0, }, + { 0, 2, -4, 9, -20, 59, 99, -24, 10, -5, 2, 0, 0, 0, 0, 0 }, + { 0, 0, 0, 2, -4, 9, -20, 59, 99, -24, 10, -5, 2, 0, 0, 0 }, }, { - { 0, 1, -4, 8, -17, 49, 107, -22, - 9, -4, 1, 0, 0, 0, 0, 0, }, - { 0, 0, 0, 1, -4, 8, -17, 49, - 107, -22, 9, -4, 1, 0, 0, 0, }, + { 0, 1, -4, 8, -17, 49, 107, -22, 9, -4, 1, 0, 0, 0, 0, 0 }, + { 0, 0, 0, 1, -4, 8, -17, 49, 107, -22, 9, -4, 1, 0, 0, 0 }, }, { - { 0, 1, -3, 7, -14, 38, 114, -20, - 8, -4, 1, 0, 0, 0, 0, 0, }, - { 0, 0, 0, 1, -3, 7, -14, 38, - 114, -20, 8, -4, 1, 0, 0, 0, }, + { 0, 1, -3, 7, -14, 38, 114, -20, 8, -4, 1, 0, 0, 0, 0, 0 }, + { 0, 0, 0, 1, -3, 7, -14, 38, 114, -20, 8, -4, 1, 0, 0, 0 }, }, { - { 0, 1, -2, 5, -11, 28, 119, -17, - 7, -3, 1, 0, 0, 0, 0, 0, }, - { 0, 0, 0, 1, -2, 5, -11, 28, - 119, -17, 7, -3, 1, 0, 0, 0, }, + { 0, 1, -2, 5, -11, 28, 119, -17, 7, -3, 1, 0, 0, 0, 0, 0 }, + { 0, 0, 0, 1, -2, 5, -11, 28, 119, -17, 7, -3, 1, 0, 0, 0 }, }, { - { 0, 0, -2, 3, -7, 18, 124, -12, - 5, -2, 1, 0, 0, 0, 0, 0, }, - { 0, 0, 0, 0, -2, 3, -7, 18, - 124, -12, 5, -2, 1, 0, 0, 0, }, + { 0, 0, -2, 3, -7, 18, 124, -12, 5, -2, 1, 0, 0, 0, 0, 0 }, + { 0, 0, 0, 0, -2, 3, -7, 18, 124, -12, 5, -2, 1, 0, 0, 0 }, }, { - { 0, 0, -1, 2, -4, 8, 127, -6, - 3, -1, 0, 0, 0, 0, 0, 0, }, - { 0, 0, 0, 0, -1, 2, -4, 8, - 127, -6, 3, -1, 0, 0, 0, 0, }, + { 0, 0, -1, 2, -4, 8, 127, -6, 3, -1, 0, 0, 0, 0, 0, 0 }, + { 0, 0, 0, 0, -1, 2, -4, 8, 127, -6, 3, -1, 0, 0, 0, 0 }, }, }; #endif @@ -109,214 +79,150 @@ DECLARE_ALIGNED(16, const int8_t, DECLARE_ALIGNED(16, const int8_t, sub_pel_filters_10sharp_ver_signal_dir[15][6][16]) = { { - { 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, }, - { -1, 3, -1, 3, -1, 3, -1, 3, - -1, 3, -1, 3, -1, 3, -1, 3, }, - { -6, 127, -6, 127, -6, 127, -6, 127, - -6, 127, -6, 127, -6, 127, -6, 127, }, - { 8, -4, 8, -4, 8, -4, 8, -4, - 8, -4, 8, -4, 8, -4, 8, -4, }, - { 2, -1, 2, -1, 2, -1, 2, -1, - 2, -1, 2, -1, 2, -1, 2, -1, }, - { 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, }, - }, - { - { 0, 1, 0, 1, 0, 1, 0, 1, - 0, 1, 0, 1, 0, 1, 0, 1, }, - { -2, 5, -2, 5, -2, 5, -2, 5, - -2, 5, -2, 5, -2, 5, -2, 5, }, - {-12, 124, -12, 124, -12, 124, -12, 124, - -12, 124, -12, 124, -12, 124, -12, 124, }, - { 18, -7, 18, -7, 18, -7, 18, -7, - 18, -7, 18, -7, 18, -7, 18, -7, }, - { 3, -2, 3, -2, 3, -2, 3, -2, - 3, -2, 3, -2, 3, -2, 3, -2, }, - { 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, }, - }, - { - { 0, 1, 0, 1, 0, 1, 0, 1, - 0, 1, 0, 1, 0, 1, 0, 1, }, - { -3, 7, -3, 7, -3, 7, -3, 7, - -3, 7, -3, 7, -3, 7, -3, 7, }, - {-17, 119, -17, 119, -17, 119, -17, 119, - -17, 119, -17, 119, -17, 119, -17, 119, }, - { 28, -11, 28, -11, 28, -11, 28, -11, - 28, -11, 28, -11, 28, -11, 28, -11, }, - { 5, -2, 5, -2, 5, -2, 5, -2, - 5, -2, 5, -2, 5, -2, 5, -2, }, - { 1, 0, 1, 0, 1, 0, 1, 0, - 1, 0, 1, 0, 1, 0, 1, 0, }, - }, - { - { 0, 1, 0, 1, 0, 1, 0, 1, - 0, 1, 0, 1, 0, 1, 0, 1, }, - { -4, 8, -4, 8, -4, 8, -4, 8, - -4, 8, -4, 8, -4, 8, -4, 8, }, - {-20, 114, -20, 114, -20, 114, -20, 114, - -20, 114, -20, 114, -20, 114, -20, 114, }, - { 38, -14, 38, -14, 38, -14, 38, -14, - 38, -14, 38, -14, 38, -14, 38, -14, }, - { 7, -3, 7, -3, 7, -3, 7, -3, - 7, -3, 7, -3, 7, -3, 7, -3, }, - { 1, 0, 1, 0, 1, 0, 1, 0, - 1, 0, 1, 0, 1, 0, 1, 0, }, - }, - { - { 0, 1, 0, 1, 0, 1, 0, 1, - 0, 1, 0, 1, 0, 1, 0, 1, }, - { -4, 9, -4, 9, -4, 9, -4, 9, - -4, 9, -4, 9, -4, 9, -4, 9, }, - {-22, 107, -22, 107, -22, 107, -22, 107, - -22, 107, -22, 107, -22, 107, -22, 107, }, - { 49, -17, 49, -17, 49, -17, 49, -17, - 49, -17, 49, -17, 49, -17, 49, -17, }, - { 8, -4, 8, -4, 8, -4, 8, -4, - 8, -4, 8, -4, 8, -4, 8, -4, }, - { 1, 0, 1, 0, 1, 0, 1, 0, - 1, 0, 1, 0, 1, 0, 1, 0, }, - }, - { - { 0, 2, 0, 2, 0, 2, 0, 2, - 0, 2, 0, 2, 0, 2, 0, 2, }, - { -5, 10, -5, 10, -5, 10, -5, 10, - -5, 10, -5, 10, -5, 10, -5, 10, }, - {-24, 99, -24, 99, -24, 99, -24, 99, - -24, 99, -24, 99, -24, 99, -24, 99, }, - { 59, -20, 59, -20, 59, -20, 59, -20, - 59, -20, 59, -20, 59, -20, 59, -20, }, - { 9, -4, 9, -4, 9, -4, 9, -4, - 9, -4, 9, -4, 9, -4, 9, -4, }, - { 2, 0, 2, 0, 2, 0, 2, 0, - 2, 0, 2, 0, 2, 0, 2, 0, }, - }, - { - { 0, 2, 0, 2, 0, 2, 0, 2, - 0, 2, 0, 2, 0, 2, 0, 2, }, - { -5, 10, -5, 10, -5, 10, -5, 10, - -5, 10, -5, 10, -5, 10, -5, 10, }, - {-24, 90, -24, 90, -24, 90, -24, 90, - -24, 90, -24, 90, -24, 90, -24, 90, }, - { 70, -22, 70, -22, 70, -22, 70, -22, - 70, -22, 70, -22, 70, -22, 70, -22, }, - { 10, -5, 10, -5, 10, -5, 10, -5, - 10, -5, 10, -5, 10, -5, 10, -5, }, - { 2, 0, 2, 0, 2, 0, 2, 0, - 2, 0, 2, 0, 2, 0, 2, 0, }, - }, - { - { 0, 2, 0, 2, 0, 2, 0, 2, - 0, 2, 0, 2, 0, 2, 0, 2, }, - { -5, 10, -5, 10, -5, 10, -5, 10, - -5, 10, -5, 10, -5, 10, -5, 10, }, - {-23, 80, -23, 80, -23, 80, -23, 80, - -23, 80, -23, 80, -23, 80, -23, 80, }, - { 80, -23, 80, -23, 80, -23, 80, -23, - 80, -23, 80, -23, 80, -23, 80, -23, }, - { 10, -5, 10, -5, 10, -5, 10, -5, - 10, -5, 10, -5, 10, -5, 10, -5, }, - { 2, 0, 2, 0, 2, 0, 2, 0, - 2, 0, 2, 0, 2, 0, 2, 0, }, - }, - { - { 0, 2, 0, 2, 0, 2, 0, 2, - 0, 2, 0, 2, 0, 2, 0, 2, }, - { -5, 10, -5, 10, -5, 10, -5, 10, - -5, 10, -5, 10, -5, 10, -5, 10, }, - {-22, 70, -22, 70, -22, 70, -22, 70, - -22, 70, -22, 70, -22, 70, -22, 70, }, - { 90, -24, 90, -24, 90, -24, 90, -24, - 90, -24, 90, -24, 90, -24, 90, -24, }, - { 10, -5, 10, -5, 10, -5, 10, -5, - 10, -5, 10, -5, 10, -5, 10, -5, }, - { 2, 0, 2, 0, 2, 0, 2, 0, - 2, 0, 2, 0, 2, 0, 2, 0, }, - }, - { - { 0, 2, 0, 2, 0, 2, 0, 2, - 0, 2, 0, 2, 0, 2, 0, 2, }, - { -4, 9, -4, 9, -4, 9, -4, 9, - -4, 9, -4, 9, -4, 9, -4, 9, }, - {-20, 59, -20, 59, -20, 59, -20, 59, - -20, 59, -20, 59, -20, 59, -20, 59, }, - { 99, -24, 99, -24, 99, -24, 99, -24, - 99, -24, 99, -24, 99, -24, 99, -24, }, - { 10, -5, 10, -5, 10, -5, 10, -5, - 10, -5, 10, -5, 10, -5, 10, -5, }, - { 2, 0, 2, 0, 2, 0, 2, 0, - 2, 0, 2, 0, 2, 0, 2, 0, }, - }, - { - { 0, 1, 0, 1, 0, 1, 0, 1, - 0, 1, 0, 1, 0, 1, 0, 1, }, - { -4, 8, -4, 8, -4, 8, -4, 8, - -4, 8, -4, 8, -4, 8, -4, 8, }, - {-17, 49, -17, 49, -17, 49, -17, 49, - -17, 49, -17, 49, -17, 49, -17, 49, }, - {107, -22, 107, -22, 107, -22, 107, -22, - 107, -22, 107, -22, 107, -22, 107, -22, }, - { 9, -4, 9, -4, 9, -4, 9, -4, - 9, -4, 9, -4, 9, -4, 9, -4, }, - { 1, 0, 1, 0, 1, 0, 1, 0, - 1, 0, 1, 0, 1, 0, 1, 0, }, - }, - { - { 0, 1, 0, 1, 0, 1, 0, 1, - 0, 1, 0, 1, 0, 1, 0, 1, }, - { -3, 7, -3, 7, -3, 7, -3, 7, - -3, 7, -3, 7, -3, 7, -3, 7, }, - {-14, 38, -14, 38, -14, 38, -14, 38, - -14, 38, -14, 38, -14, 38, -14, 38, }, - {114, -20, 114, -20, 114, -20, 114, -20, - 114, -20, 114, -20, 114, -20, 114, -20, }, - { 8, -4, 8, -4, 8, -4, 8, -4, - 8, -4, 8, -4, 8, -4, 8, -4, }, - { 1, 0, 1, 0, 1, 0, 1, 0, - 1, 0, 1, 0, 1, 0, 1, 0, }, - }, - { - { 0, 1, 0, 1, 0, 1, 0, 1, - 0, 1, 0, 1, 0, 1, 0, 1, }, - { -2, 5, -2, 5, -2, 5, -2, 5, - -2, 5, -2, 5, -2, 5, -2, 5, }, - {-11, 28, -11, 28, -11, 28, -11, 28, - -11, 28, -11, 28, -11, 28, -11, 28, }, - {119, -17, 119, -17, 119, -17, 119, -17, - 119, -17, 119, -17, 119, -17, 119, -17, }, - { 7, -3, 7, -3, 7, -3, 7, -3, - 7, -3, 7, -3, 7, -3, 7, -3, }, - { 1, 0, 1, 0, 1, 0, 1, 0, - 1, 0, 1, 0, 1, 0, 1, 0, }, - }, - { - { 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, }, - { -2, 3, -2, 3, -2, 3, -2, 3, - -2, 3, -2, 3, -2, 3, -2, 3, }, - { -7, 18, -7, 18, -7, 18, -7, 18, - -7, 18, -7, 18, -7, 18, -7, 18, }, - {124, -12, 124, -12, 124, -12, 124, -12, - 124, -12, 124, -12, 124, -12, 124, -12, }, - { 5, -2, 5, -2, 5, -2, 5, -2, - 5, -2, 5, -2, 5, -2, 5, -2, }, - { 1, 0, 1, 0, 1, 0, 1, 0, - 1, 0, 1, 0, 1, 0, 1, 0, }, - }, - { - { 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, }, - { -1, 2, -1, 2, -1, 2, -1, 2, - -1, 2, -1, 2, -1, 2, -1, 2, }, - { -4, 8, -4, 8, -4, 8, -4, 8, - -4, 8, -4, 8, -4, 8, -4, 8, }, - {127, -6, 127, -6, 127, -6, 127, -6, - 127, -6, 127, -6, 127, -6, 127, -6, }, - { 3, -1, 3, -1, 3, -1, 3, -1, - 3, -1, 3, -1, 3, -1, 3, -1, }, - { 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, }, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, + { -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3 }, + { -6, 127, -6, 127, -6, 127, -6, 127, -6, 127, -6, 127, -6, 127, -6, + 127 }, + { 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4 }, + { 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1 }, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, + }, + { + { 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 }, + { -2, 5, -2, 5, -2, 5, -2, 5, -2, 5, -2, 5, -2, 5, -2, 5 }, + { -12, 124, -12, 124, -12, 124, -12, 124, -12, 124, -12, 124, -12, 124, + -12, 124 }, + { 18, -7, 18, -7, 18, -7, 18, -7, 18, -7, 18, -7, 18, -7, 18, -7 }, + { 3, -2, 3, -2, 3, -2, 3, -2, 3, -2, 3, -2, 3, -2, 3, -2 }, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, + }, + { + { 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 }, + { -3, 7, -3, 7, -3, 7, -3, 7, -3, 7, -3, 7, -3, 7, -3, 7 }, + { -17, 119, -17, 119, -17, 119, -17, 119, -17, 119, -17, 119, -17, 119, + -17, 119 }, + { 28, -11, 28, -11, 28, -11, 28, -11, 28, -11, 28, -11, 28, -11, 28, + -11 }, + { 5, -2, 5, -2, 5, -2, 5, -2, 5, -2, 5, -2, 5, -2, 5, -2 }, + { 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0 }, + }, + { + { 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 }, + { -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8 }, + { -20, 114, -20, 114, -20, 114, -20, 114, -20, 114, -20, 114, -20, 114, + -20, 114 }, + { 38, -14, 38, -14, 38, -14, 38, -14, 38, -14, 38, -14, 38, -14, 38, + -14 }, + { 7, -3, 7, -3, 7, -3, 7, -3, 7, -3, 7, -3, 7, -3, 7, -3 }, + { 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0 }, + }, + { + { 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 }, + { -4, 9, -4, 9, -4, 9, -4, 9, -4, 9, -4, 9, -4, 9, -4, 9 }, + { -22, 107, -22, 107, -22, 107, -22, 107, -22, 107, -22, 107, -22, 107, + -22, 107 }, + { 49, -17, 49, -17, 49, -17, 49, -17, 49, -17, 49, -17, 49, -17, 49, + -17 }, + { 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4 }, + { 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0 }, + }, + { + { 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2 }, + { -5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10 }, + { -24, 99, -24, 99, -24, 99, -24, 99, -24, 99, -24, 99, -24, 99, -24, + 99 }, + { 59, -20, 59, -20, 59, -20, 59, -20, 59, -20, 59, -20, 59, -20, 59, + -20 }, + { 9, -4, 9, -4, 9, -4, 9, -4, 9, -4, 9, -4, 9, -4, 9, -4 }, + { 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0 }, + }, + { + { 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2 }, + { -5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10 }, + { -24, 90, -24, 90, -24, 90, -24, 90, -24, 90, -24, 90, -24, 90, -24, + 90 }, + { 70, -22, 70, -22, 70, -22, 70, -22, 70, -22, 70, -22, 70, -22, 70, + -22 }, + { 10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5 }, + { 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0 }, + }, + { + { 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2 }, + { -5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10 }, + { -23, 80, -23, 80, -23, 80, -23, 80, -23, 80, -23, 80, -23, 80, -23, + 80 }, + { 80, -23, 80, -23, 80, -23, 80, -23, 80, -23, 80, -23, 80, -23, 80, + -23 }, + { 10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5 }, + { 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0 }, + }, + { + { 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2 }, + { -5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10 }, + { -22, 70, -22, 70, -22, 70, -22, 70, -22, 70, -22, 70, -22, 70, -22, + 70 }, + { 90, -24, 90, -24, 90, -24, 90, -24, 90, -24, 90, -24, 90, -24, 90, + -24 }, + { 10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5 }, + { 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0 }, + }, + { + { 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2 }, + { -4, 9, -4, 9, -4, 9, -4, 9, -4, 9, -4, 9, -4, 9, -4, 9 }, + { -20, 59, -20, 59, -20, 59, -20, 59, -20, 59, -20, 59, -20, 59, -20, + 59 }, + { 99, -24, 99, -24, 99, -24, 99, -24, 99, -24, 99, -24, 99, -24, 99, + -24 }, + { 10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5 }, + { 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0 }, + }, + { + { 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 }, + { -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8 }, + { -17, 49, -17, 49, -17, 49, -17, 49, -17, 49, -17, 49, -17, 49, -17, + 49 }, + { 107, -22, 107, -22, 107, -22, 107, -22, 107, -22, 107, -22, 107, -22, + 107, -22 }, + { 9, -4, 9, -4, 9, -4, 9, -4, 9, -4, 9, -4, 9, -4, 9, -4 }, + { 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0 }, + }, + { + { 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 }, + { -3, 7, -3, 7, -3, 7, -3, 7, -3, 7, -3, 7, -3, 7, -3, 7 }, + { -14, 38, -14, 38, -14, 38, -14, 38, -14, 38, -14, 38, -14, 38, -14, + 38 }, + { 114, -20, 114, -20, 114, -20, 114, -20, 114, -20, 114, -20, 114, -20, + 114, -20 }, + { 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4 }, + { 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0 }, + }, + { + { 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 }, + { -2, 5, -2, 5, -2, 5, -2, 5, -2, 5, -2, 5, -2, 5, -2, 5 }, + { -11, 28, -11, 28, -11, 28, -11, 28, -11, 28, -11, 28, -11, 28, -11, + 28 }, + { 119, -17, 119, -17, 119, -17, 119, -17, 119, -17, 119, -17, 119, -17, + 119, -17 }, + { 7, -3, 7, -3, 7, -3, 7, -3, 7, -3, 7, -3, 7, -3, 7, -3 }, + { 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0 }, + }, + { + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, + { -2, 3, -2, 3, -2, 3, -2, 3, -2, 3, -2, 3, -2, 3, -2, 3 }, + { -7, 18, -7, 18, -7, 18, -7, 18, -7, 18, -7, 18, -7, 18, -7, 18 }, + { 124, -12, 124, -12, 124, -12, 124, -12, 124, -12, 124, -12, 124, -12, + 124, -12 }, + { 5, -2, 5, -2, 5, -2, 5, -2, 5, -2, 5, -2, 5, -2, 5, -2 }, + { 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0 }, + }, + { + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, + { -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2 }, + { -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8 }, + { 127, -6, 127, -6, 127, -6, 127, -6, 127, -6, 127, -6, 127, -6, 127, + -6 }, + { 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1 }, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, }, }; #endif @@ -324,94 +230,64 @@ DECLARE_ALIGNED(16, const int8_t, DECLARE_ALIGNED(16, const int8_t, sub_pel_filters_12sharp_signal_dir[15][2][16]) = { { - { 0, 1, -2, 3, -7, 127, 8, -4, - 2, -1, 1, 0, 0, 0, 0, 0, }, - { 0, 0, 0, 1, -2, 3, -7, 127, - 8, -4, 2, -1, 1, 0, 0, 0, }, + { 0, 1, -2, 3, -7, 127, 8, -4, 2, -1, 1, 0, 0, 0, 0, 0 }, + { 0, 0, 0, 1, -2, 3, -7, 127, 8, -4, 2, -1, 1, 0, 0, 0 }, }, { - { -1, 2, -3, 6, -13, 124, 18, -8, - 4, -2, 2, -1, 0, 0, 0, 0, }, - { 0, 0, -1, 2, -3, 6, -13, 124, - 18, -8, 4, -2, 2, -1, 0, 0, }, + { -1, 2, -3, 6, -13, 124, 18, -8, 4, -2, 2, -1, 0, 0, 0, 0 }, + { 0, 0, -1, 2, -3, 6, -13, 124, 18, -8, 4, -2, 2, -1, 0, 0 }, }, { - { -1, 3, -4, 8, -18, 120, 28, -12, - 7, -4, 2, -1, 0, 0, 0, 0, }, - { 0, 0, -1, 3, -4, 8, -18, 120, - 28, -12, 7, -4, 2, -1, 0, 0, }, + { -1, 3, -4, 8, -18, 120, 28, -12, 7, -4, 2, -1, 0, 0, 0, 0 }, + { 0, 0, -1, 3, -4, 8, -18, 120, 28, -12, 7, -4, 2, -1, 0, 0 }, }, { - { -1, 3, -6, 10, -21, 115, 38, -15, - 8, -5, 3, -1, 0, 0, 0, 0, }, - { 0, 0, -1, 3, -6, 10, -21, 115, - 38, -15, 8, -5, 3, -1, 0, 0, }, + { -1, 3, -6, 10, -21, 115, 38, -15, 8, -5, 3, -1, 0, 0, 0, 0 }, + { 0, 0, -1, 3, -6, 10, -21, 115, 38, -15, 8, -5, 3, -1, 0, 0 }, }, { - { -2, 4, -6, 12, -24, 108, 49, -18, - 10, -6, 3, -2, 0, 0, 0, 0, }, - { 0, 0, -2, 4, -6, 12, -24, 108, - 49, -18, 10, -6, 3, -2, 0, 0, }, + { -2, 4, -6, 12, -24, 108, 49, -18, 10, -6, 3, -2, 0, 0, 0, 0 }, + { 0, 0, -2, 4, -6, 12, -24, 108, 49, -18, 10, -6, 3, -2, 0, 0 }, }, { - { -2, 4, -7, 13, -25, 100, 60, -21, - 11, -7, 4, -2, 0, 0, 0, 0, }, - { 0, 0, -2, 4, -7, 13, -25, 100, - 60, -21, 11, -7, 4, -2, 0, 0, }, + { -2, 4, -7, 13, -25, 100, 60, -21, 11, -7, 4, -2, 0, 0, 0, 0 }, + { 0, 0, -2, 4, -7, 13, -25, 100, 60, -21, 11, -7, 4, -2, 0, 0 }, }, { - { -2, 4, -7, 13, -26, 91, 71, -24, - 13, -7, 4, -2, 0, 0, 0, 0, }, - { 0, 0, -2, 4, -7, 13, -26, 91, - 71, -24, 13, -7, 4, -2, 0, 0, }, + { -2, 4, -7, 13, -26, 91, 71, -24, 13, -7, 4, -2, 0, 0, 0, 0 }, + { 0, 0, -2, 4, -7, 13, -26, 91, 71, -24, 13, -7, 4, -2, 0, 0 }, }, { - { -2, 4, -7, 13, -25, 81, 81, -25, - 13, -7, 4, -2, 0, 0, 0, 0, }, - { 0, 0, -2, 4, -7, 13, -25, 81, - 81, -25, 13, -7, 4, -2, 0, 0, }, + { -2, 4, -7, 13, -25, 81, 81, -25, 13, -7, 4, -2, 0, 0, 0, 0 }, + { 0, 0, -2, 4, -7, 13, -25, 81, 81, -25, 13, -7, 4, -2, 0, 0 }, }, { - { -2, 4, -7, 13, -24, 71, 91, -26, - 13, -7, 4, -2, 0, 0, 0, 0, }, - { 0, 0, -2, 4, -7, 13, -24, 71, - 91, -26, 13, -7, 4, -2, 0, 0, }, + { -2, 4, -7, 13, -24, 71, 91, -26, 13, -7, 4, -2, 0, 0, 0, 0 }, + { 0, 0, -2, 4, -7, 13, -24, 71, 91, -26, 13, -7, 4, -2, 0, 0 }, }, { - { -2, 4, -7, 11, -21, 60, 100, -25, - 13, -7, 4, -2, 0, 0, 0, 0, }, - { 0, 0, -2, 4, -7, 11, -21, 60, - 100, -25, 13, -7, 4, -2, 0, 0, }, + { -2, 4, -7, 11, -21, 60, 100, -25, 13, -7, 4, -2, 0, 0, 0, 0 }, + { 0, 0, -2, 4, -7, 11, -21, 60, 100, -25, 13, -7, 4, -2, 0, 0 }, }, { - { -2, 3, -6, 10, -18, 49, 108, -24, - 12, -6, 4, -2, 0, 0, 0, 0, }, - { 0, 0, -2, 3, -6, 10, -18, 49, - 108, -24, 12, -6, 4, -2, 0, 0, }, + { -2, 3, -6, 10, -18, 49, 108, -24, 12, -6, 4, -2, 0, 0, 0, 0 }, + { 0, 0, -2, 3, -6, 10, -18, 49, 108, -24, 12, -6, 4, -2, 0, 0 }, }, { - { -1, 3, -5, 8, -15, 38, 115, -21, - 10, -6, 3, -1, 0, 0, 0, 0, }, - { 0, 0, -1, 3, -5, 8, -15, 38, - 115, -21, 10, -6, 3, -1, 0, 0, }, + { -1, 3, -5, 8, -15, 38, 115, -21, 10, -6, 3, -1, 0, 0, 0, 0 }, + { 0, 0, -1, 3, -5, 8, -15, 38, 115, -21, 10, -6, 3, -1, 0, 0 }, }, { - { -1, 2, -4, 7, -12, 28, 120, -18, - 8, -4, 3, -1, 0, 0, 0, 0, }, - { 0, 0, -1, 2, -4, 7, -12, 28, - 120, -18, 8, -4, 3, -1, 0, 0, }, + { -1, 2, -4, 7, -12, 28, 120, -18, 8, -4, 3, -1, 0, 0, 0, 0 }, + { 0, 0, -1, 2, -4, 7, -12, 28, 120, -18, 8, -4, 3, -1, 0, 0 }, }, { - { -1, 2, -2, 4, -8, 18, 124, -13, - 6, -3, 2, -1, 0, 0, 0, 0, }, - { 0, 0, -1, 2, -2, 4, -8, 18, - 124, -13, 6, -3, 2, -1, 0, 0, }, + { -1, 2, -2, 4, -8, 18, 124, -13, 6, -3, 2, -1, 0, 0, 0, 0 }, + { 0, 0, -1, 2, -2, 4, -8, 18, 124, -13, 6, -3, 2, -1, 0, 0 }, }, { - { 0, 1, -1, 2, -4, 8, 127, -7, - 3, -2, 1, 0, 0, 0, 0, 0, }, - { 0, 0, 0, 1, -1, 2, -4, 8, - 127, -7, 3, -2, 1, 0, 0, 0, }, + { 0, 1, -1, 2, -4, 8, 127, -7, 3, -2, 1, 0, 0, 0, 0, 0 }, + { 0, 0, 0, 1, -1, 2, -4, 8, 127, -7, 3, -2, 1, 0, 0, 0 }, }, }; #endif @@ -419,214 +295,150 @@ DECLARE_ALIGNED(16, const int8_t, DECLARE_ALIGNED(16, const int8_t, sub_pel_filters_12sharp_ver_signal_dir[15][6][16]) = { { - { 0, 1, 0, 1, 0, 1, 0, 1, - 0, 1, 0, 1, 0, 1, 0, 1, }, - { -2, 3, -2, 3, -2, 3, -2, 3, - -2, 3, -2, 3, -2, 3, -2, 3, }, - { -7, 127, -7, 127, -7, 127, -7, 127, - -7, 127, -7, 127, -7, 127, -7, 127, }, - { 8, -4, 8, -4, 8, -4, 8, -4, - 8, -4, 8, -4, 8, -4, 8, -4, }, - { 2, -1, 2, -1, 2, -1, 2, -1, - 2, -1, 2, -1, 2, -1, 2, -1, }, - { 1, 0, 1, 0, 1, 0, 1, 0, - 1, 0, 1, 0, 1, 0, 1, 0, }, - }, - { - { -1, 2, -1, 2, -1, 2, -1, 2, - -1, 2, -1, 2, -1, 2, -1, 2, }, - { -3, 6, -3, 6, -3, 6, -3, 6, - -3, 6, -3, 6, -3, 6, -3, 6, }, - {-13, 124, -13, 124, -13, 124, -13, 124, - -13, 124, -13, 124, -13, 124, -13, 124, }, - { 18, -8, 18, -8, 18, -8, 18, -8, - 18, -8, 18, -8, 18, -8, 18, -8, }, - { 4, -2, 4, -2, 4, -2, 4, -2, - 4, -2, 4, -2, 4, -2, 4, -2, }, - { 2, -1, 2, -1, 2, -1, 2, -1, - 2, -1, 2, -1, 2, -1, 2, -1, }, - }, - { - { -1, 3, -1, 3, -1, 3, -1, 3, - -1, 3, -1, 3, -1, 3, -1, 3, }, - { -4, 8, -4, 8, -4, 8, -4, 8, - -4, 8, -4, 8, -4, 8, -4, 8, }, - {-18, 120, -18, 120, -18, 120, -18, 120, - -18, 120, -18, 120, -18, 120, -18, 120, }, - { 28, -12, 28, -12, 28, -12, 28, -12, - 28, -12, 28, -12, 28, -12, 28, -12, }, - { 7, -4, 7, -4, 7, -4, 7, -4, - 7, -4, 7, -4, 7, -4, 7, -4, }, - { 2, -1, 2, -1, 2, -1, 2, -1, - 2, -1, 2, -1, 2, -1, 2, -1, }, - }, - { - { -1, 3, -1, 3, -1, 3, -1, 3, - -1, 3, -1, 3, -1, 3, -1, 3, }, - { -6, 10, -6, 10, -6, 10, -6, 10, - -6, 10, -6, 10, -6, 10, -6, 10, }, - {-21, 115, -21, 115, -21, 115, -21, 115, - -21, 115, -21, 115, -21, 115, -21, 115, }, - { 38, -15, 38, -15, 38, -15, 38, -15, - 38, -15, 38, -15, 38, -15, 38, -15, }, - { 8, -5, 8, -5, 8, -5, 8, -5, - 8, -5, 8, -5, 8, -5, 8, -5, }, - { 3, -1, 3, -1, 3, -1, 3, -1, - 3, -1, 3, -1, 3, -1, 3, -1, }, - }, - { - { -2, 4, -2, 4, -2, 4, -2, 4, - -2, 4, -2, 4, -2, 4, -2, 4, }, - { -6, 12, -6, 12, -6, 12, -6, 12, - -6, 12, -6, 12, -6, 12, -6, 12, }, - {-24, 108, -24, 108, -24, 108, -24, 108, - -24, 108, -24, 108, -24, 108, -24, 108, }, - { 49, -18, 49, -18, 49, -18, 49, -18, - 49, -18, 49, -18, 49, -18, 49, -18, }, - { 10, -6, 10, -6, 10, -6, 10, -6, - 10, -6, 10, -6, 10, -6, 10, -6, }, - { 3, -2, 3, -2, 3, -2, 3, -2, - 3, -2, 3, -2, 3, -2, 3, -2, }, - }, - { - { -2, 4, -2, 4, -2, 4, -2, 4, - -2, 4, -2, 4, -2, 4, -2, 4, }, - { -7, 13, -7, 13, -7, 13, -7, 13, - -7, 13, -7, 13, -7, 13, -7, 13, }, - {-25, 100, -25, 100, -25, 100, -25, 100, - -25, 100, -25, 100, -25, 100, -25, 100, }, - { 60, -21, 60, -21, 60, -21, 60, -21, - 60, -21, 60, -21, 60, -21, 60, -21, }, - { 11, -7, 11, -7, 11, -7, 11, -7, - 11, -7, 11, -7, 11, -7, 11, -7, }, - { 4, -2, 4, -2, 4, -2, 4, -2, - 4, -2, 4, -2, 4, -2, 4, -2, }, - }, - { - { -2, 4, -2, 4, -2, 4, -2, 4, - -2, 4, -2, 4, -2, 4, -2, 4, }, - { -7, 13, -7, 13, -7, 13, -7, 13, - -7, 13, -7, 13, -7, 13, -7, 13, }, - {-26, 91, -26, 91, -26, 91, -26, 91, - -26, 91, -26, 91, -26, 91, -26, 91, }, - { 71, -24, 71, -24, 71, -24, 71, -24, - 71, -24, 71, -24, 71, -24, 71, -24, }, - { 13, -7, 13, -7, 13, -7, 13, -7, - 13, -7, 13, -7, 13, -7, 13, -7, }, - { 4, -2, 4, -2, 4, -2, 4, -2, - 4, -2, 4, -2, 4, -2, 4, -2, }, - }, - { - { -2, 4, -2, 4, -2, 4, -2, 4, - -2, 4, -2, 4, -2, 4, -2, 4, }, - { -7, 13, -7, 13, -7, 13, -7, 13, - -7, 13, -7, 13, -7, 13, -7, 13, }, - {-25, 81, -25, 81, -25, 81, -25, 81, - -25, 81, -25, 81, -25, 81, -25, 81, }, - { 81, -25, 81, -25, 81, -25, 81, -25, - 81, -25, 81, -25, 81, -25, 81, -25, }, - { 13, -7, 13, -7, 13, -7, 13, -7, - 13, -7, 13, -7, 13, -7, 13, -7, }, - { 4, -2, 4, -2, 4, -2, 4, -2, - 4, -2, 4, -2, 4, -2, 4, -2, }, - }, - { - { -2, 4, -2, 4, -2, 4, -2, 4, - -2, 4, -2, 4, -2, 4, -2, 4, }, - { -7, 13, -7, 13, -7, 13, -7, 13, - -7, 13, -7, 13, -7, 13, -7, 13, }, - {-24, 71, -24, 71, -24, 71, -24, 71, - -24, 71, -24, 71, -24, 71, -24, 71, }, - { 91, -26, 91, -26, 91, -26, 91, -26, - 91, -26, 91, -26, 91, -26, 91, -26, }, - { 13, -7, 13, -7, 13, -7, 13, -7, - 13, -7, 13, -7, 13, -7, 13, -7, }, - { 4, -2, 4, -2, 4, -2, 4, -2, - 4, -2, 4, -2, 4, -2, 4, -2, }, - }, - { - { -2, 4, -2, 4, -2, 4, -2, 4, - -2, 4, -2, 4, -2, 4, -2, 4, }, - { -7, 11, -7, 11, -7, 11, -7, 11, - -7, 11, -7, 11, -7, 11, -7, 11, }, - {-21, 60, -21, 60, -21, 60, -21, 60, - -21, 60, -21, 60, -21, 60, -21, 60, }, - {100, -25, 100, -25, 100, -25, 100, -25, - 100, -25, 100, -25, 100, -25, 100, -25, }, - { 13, -7, 13, -7, 13, -7, 13, -7, - 13, -7, 13, -7, 13, -7, 13, -7, }, - { 4, -2, 4, -2, 4, -2, 4, -2, - 4, -2, 4, -2, 4, -2, 4, -2, }, - }, - { - { -2, 3, -2, 3, -2, 3, -2, 3, - -2, 3, -2, 3, -2, 3, -2, 3, }, - { -6, 10, -6, 10, -6, 10, -6, 10, - -6, 10, -6, 10, -6, 10, -6, 10, }, - {-18, 49, -18, 49, -18, 49, -18, 49, - -18, 49, -18, 49, -18, 49, -18, 49, }, - {108, -24, 108, -24, 108, -24, 108, -24, - 108, -24, 108, -24, 108, -24, 108, -24, }, - { 12, -6, 12, -6, 12, -6, 12, -6, - 12, -6, 12, -6, 12, -6, 12, -6, }, - { 4, -2, 4, -2, 4, -2, 4, -2, - 4, -2, 4, -2, 4, -2, 4, -2, }, - }, - { - { -1, 3, -1, 3, -1, 3, -1, 3, - -1, 3, -1, 3, -1, 3, -1, 3, }, - { -5, 8, -5, 8, -5, 8, -5, 8, - -5, 8, -5, 8, -5, 8, -5, 8, }, - {-15, 38, -15, 38, -15, 38, -15, 38, - -15, 38, -15, 38, -15, 38, -15, 38, }, - {115, -21, 115, -21, 115, -21, 115, -21, - 115, -21, 115, -21, 115, -21, 115, -21, }, - { 10, -6, 10, -6, 10, -6, 10, -6, - 10, -6, 10, -6, 10, -6, 10, -6, }, - { 3, -1, 3, -1, 3, -1, 3, -1, - 3, -1, 3, -1, 3, -1, 3, -1, }, - }, - { - { -1, 2, -1, 2, -1, 2, -1, 2, - -1, 2, -1, 2, -1, 2, -1, 2, }, - { -4, 7, -4, 7, -4, 7, -4, 7, - -4, 7, -4, 7, -4, 7, -4, 7, }, - {-12, 28, -12, 28, -12, 28, -12, 28, - -12, 28, -12, 28, -12, 28, -12, 28, }, - {120, -18, 120, -18, 120, -18, 120, -18, - 120, -18, 120, -18, 120, -18, 120, -18, }, - { 8, -4, 8, -4, 8, -4, 8, -4, - 8, -4, 8, -4, 8, -4, 8, -4, }, - { 3, -1, 3, -1, 3, -1, 3, -1, - 3, -1, 3, -1, 3, -1, 3, -1, }, - }, - { - { -1, 2, -1, 2, -1, 2, -1, 2, - -1, 2, -1, 2, -1, 2, -1, 2, }, - { -2, 4, -2, 4, -2, 4, -2, 4, - -2, 4, -2, 4, -2, 4, -2, 4, }, - { -8, 18, -8, 18, -8, 18, -8, 18, - -8, 18, -8, 18, -8, 18, -8, 18, }, - {124, -13, 124, -13, 124, -13, 124, -13, - 124, -13, 124, -13, 124, -13, 124, -13, }, - { 6, -3, 6, -3, 6, -3, 6, -3, - 6, -3, 6, -3, 6, -3, 6, -3, }, - { 2, -1, 2, -1, 2, -1, 2, -1, - 2, -1, 2, -1, 2, -1, 2, -1, }, - }, - { - { 0, 1, 0, 1, 0, 1, 0, 1, - 0, 1, 0, 1, 0, 1, 0, 1, }, - { -1, 2, -1, 2, -1, 2, -1, 2, - -1, 2, -1, 2, -1, 2, -1, 2, }, - { -4, 8, -4, 8, -4, 8, -4, 8, - -4, 8, -4, 8, -4, 8, -4, 8, }, - {127, -7, 127, -7, 127, -7, 127, -7, - 127, -7, 127, -7, 127, -7, 127, -7, }, - { 3, -2, 3, -2, 3, -2, 3, -2, - 3, -2, 3, -2, 3, -2, 3, -2, }, - { 1, 0, 1, 0, 1, 0, 1, 0, - 1, 0, 1, 0, 1, 0, 1, 0, }, + { 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 }, + { -2, 3, -2, 3, -2, 3, -2, 3, -2, 3, -2, 3, -2, 3, -2, 3 }, + { -7, 127, -7, 127, -7, 127, -7, 127, -7, 127, -7, 127, -7, 127, -7, + 127 }, + { 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4 }, + { 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1 }, + { 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0 }, + }, + { + { -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2 }, + { -3, 6, -3, 6, -3, 6, -3, 6, -3, 6, -3, 6, -3, 6, -3, 6 }, + { -13, 124, -13, 124, -13, 124, -13, 124, -13, 124, -13, 124, -13, 124, + -13, 124 }, + { 18, -8, 18, -8, 18, -8, 18, -8, 18, -8, 18, -8, 18, -8, 18, -8 }, + { 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2 }, + { 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1 }, + }, + { + { -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3 }, + { -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8 }, + { -18, 120, -18, 120, -18, 120, -18, 120, -18, 120, -18, 120, -18, 120, + -18, 120 }, + { 28, -12, 28, -12, 28, -12, 28, -12, 28, -12, 28, -12, 28, -12, 28, + -12 }, + { 7, -4, 7, -4, 7, -4, 7, -4, 7, -4, 7, -4, 7, -4, 7, -4 }, + { 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1 }, + }, + { + { -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3 }, + { -6, 10, -6, 10, -6, 10, -6, 10, -6, 10, -6, 10, -6, 10, -6, 10 }, + { -21, 115, -21, 115, -21, 115, -21, 115, -21, 115, -21, 115, -21, 115, + -21, 115 }, + { 38, -15, 38, -15, 38, -15, 38, -15, 38, -15, 38, -15, 38, -15, 38, + -15 }, + { 8, -5, 8, -5, 8, -5, 8, -5, 8, -5, 8, -5, 8, -5, 8, -5 }, + { 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1 }, + }, + { + { -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4 }, + { -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12 }, + { -24, 108, -24, 108, -24, 108, -24, 108, -24, 108, -24, 108, -24, 108, + -24, 108 }, + { 49, -18, 49, -18, 49, -18, 49, -18, 49, -18, 49, -18, 49, -18, 49, + -18 }, + { 10, -6, 10, -6, 10, -6, 10, -6, 10, -6, 10, -6, 10, -6, 10, -6 }, + { 3, -2, 3, -2, 3, -2, 3, -2, 3, -2, 3, -2, 3, -2, 3, -2 }, + }, + { + { -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4 }, + { -7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13 }, + { -25, 100, -25, 100, -25, 100, -25, 100, -25, 100, -25, 100, -25, 100, + -25, 100 }, + { 60, -21, 60, -21, 60, -21, 60, -21, 60, -21, 60, -21, 60, -21, 60, + -21 }, + { 11, -7, 11, -7, 11, -7, 11, -7, 11, -7, 11, -7, 11, -7, 11, -7 }, + { 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2 }, + }, + { + { -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4 }, + { -7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13 }, + { -26, 91, -26, 91, -26, 91, -26, 91, -26, 91, -26, 91, -26, 91, -26, + 91 }, + { 71, -24, 71, -24, 71, -24, 71, -24, 71, -24, 71, -24, 71, -24, 71, + -24 }, + { 13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7 }, + { 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2 }, + }, + { + { -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4 }, + { -7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13 }, + { -25, 81, -25, 81, -25, 81, -25, 81, -25, 81, -25, 81, -25, 81, -25, + 81 }, + { 81, -25, 81, -25, 81, -25, 81, -25, 81, -25, 81, -25, 81, -25, 81, + -25 }, + { 13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7 }, + { 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2 }, + }, + { + { -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4 }, + { -7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13 }, + { -24, 71, -24, 71, -24, 71, -24, 71, -24, 71, -24, 71, -24, 71, -24, + 71 }, + { 91, -26, 91, -26, 91, -26, 91, -26, 91, -26, 91, -26, 91, -26, 91, + -26 }, + { 13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7 }, + { 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2 }, + }, + { + { -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4 }, + { -7, 11, -7, 11, -7, 11, -7, 11, -7, 11, -7, 11, -7, 11, -7, 11 }, + { -21, 60, -21, 60, -21, 60, -21, 60, -21, 60, -21, 60, -21, 60, -21, + 60 }, + { 100, -25, 100, -25, 100, -25, 100, -25, 100, -25, 100, -25, 100, -25, + 100, -25 }, + { 13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7 }, + { 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2 }, + }, + { + { -2, 3, -2, 3, -2, 3, -2, 3, -2, 3, -2, 3, -2, 3, -2, 3 }, + { -6, 10, -6, 10, -6, 10, -6, 10, -6, 10, -6, 10, -6, 10, -6, 10 }, + { -18, 49, -18, 49, -18, 49, -18, 49, -18, 49, -18, 49, -18, 49, -18, + 49 }, + { 108, -24, 108, -24, 108, -24, 108, -24, 108, -24, 108, -24, 108, -24, + 108, -24 }, + { 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6 }, + { 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2 }, + }, + { + { -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3 }, + { -5, 8, -5, 8, -5, 8, -5, 8, -5, 8, -5, 8, -5, 8, -5, 8 }, + { -15, 38, -15, 38, -15, 38, -15, 38, -15, 38, -15, 38, -15, 38, -15, + 38 }, + { 115, -21, 115, -21, 115, -21, 115, -21, 115, -21, 115, -21, 115, -21, + 115, -21 }, + { 10, -6, 10, -6, 10, -6, 10, -6, 10, -6, 10, -6, 10, -6, 10, -6 }, + { 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1 }, + }, + { + { -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2 }, + { -4, 7, -4, 7, -4, 7, -4, 7, -4, 7, -4, 7, -4, 7, -4, 7 }, + { -12, 28, -12, 28, -12, 28, -12, 28, -12, 28, -12, 28, -12, 28, -12, + 28 }, + { 120, -18, 120, -18, 120, -18, 120, -18, 120, -18, 120, -18, 120, -18, + 120, -18 }, + { 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4 }, + { 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1 }, + }, + { + { -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2 }, + { -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4 }, + { -8, 18, -8, 18, -8, 18, -8, 18, -8, 18, -8, 18, -8, 18, -8, 18 }, + { 124, -13, 124, -13, 124, -13, 124, -13, 124, -13, 124, -13, 124, -13, + 124, -13 }, + { 6, -3, 6, -3, 6, -3, 6, -3, 6, -3, 6, -3, 6, -3, 6, -3 }, + { 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1 }, + }, + { + { 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 }, + { -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2 }, + { -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8 }, + { 127, -7, 127, -7, 127, -7, 127, -7, 127, -7, 127, -7, 127, -7, 127, + -7 }, + { 3, -2, 3, -2, 3, -2, 3, -2, 3, -2, 3, -2, 3, -2, 3, -2 }, + { 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0 }, }, }; #endif @@ -634,94 +446,64 @@ DECLARE_ALIGNED(16, const int8_t, DECLARE_ALIGNED(16, const int8_t, sub_pel_filters_temporalfilter_12_signal_dir[15][2][16]) = { { - { 0, 1, -1, 3, -7, 127, 8, -4, - 2, -1, 0, 0, 0, 0, 0, 0, }, - { 0, 0, 0, 1, -1, 3, -7, 127, - 8, -4, 2, -1, 0, 0, 0, 0, }, + { 0, 1, -1, 3, -7, 127, 8, -4, 2, -1, 0, 0, 0, 0, 0, 0 }, + { 0, 0, 0, 1, -1, 3, -7, 127, 8, -4, 2, -1, 0, 0, 0, 0 }, }, { - { 0, 1, -3, 5, -12, 124, 18, -8, - 4, -2, 1, 0, 0, 0, 0, 0, }, - { 0, 0, 0, 1, -3, 5, -12, 124, - 18, -8, 4, -2, 1, 0, 0, 0, }, + { 0, 1, -3, 5, -12, 124, 18, -8, 4, -2, 1, 0, 0, 0, 0, 0 }, + { 0, 0, 0, 1, -3, 5, -12, 124, 18, -8, 4, -2, 1, 0, 0, 0 }, }, { - { -1, 2, -4, 8, -17, 120, 28, -11, - 6, -3, 1, -1, 0, 0, 0, 0, }, - { 0, 0, -1, 2, -4, 8, -17, 120, - 28, -11, 6, -3, 1, -1, 0, 0, }, + { -1, 2, -4, 8, -17, 120, 28, -11, 6, -3, 1, -1, 0, 0, 0, 0 }, + { 0, 0, -1, 2, -4, 8, -17, 120, 28, -11, 6, -3, 1, -1, 0, 0 }, }, { - { -1, 2, -4, 10, -21, 114, 38, -15, - 8, -4, 2, -1, 0, 0, 0, 0, }, - { 0, 0, -1, 2, -4, 10, -21, 114, - 38, -15, 8, -4, 2, -1, 0, 0, }, + { -1, 2, -4, 10, -21, 114, 38, -15, 8, -4, 2, -1, 0, 0, 0, 0 }, + { 0, 0, -1, 2, -4, 10, -21, 114, 38, -15, 8, -4, 2, -1, 0, 0 }, }, { - { -1, 3, -5, 11, -23, 107, 49, -18, - 9, -5, 2, -1, 0, 0, 0, 0, }, - { 0, 0, -1, 3, -5, 11, -23, 107, - 49, -18, 9, -5, 2, -1, 0, 0, }, + { -1, 3, -5, 11, -23, 107, 49, -18, 9, -5, 2, -1, 0, 0, 0, 0 }, + { 0, 0, -1, 3, -5, 11, -23, 107, 49, -18, 9, -5, 2, -1, 0, 0 }, }, { - { -1, 3, -6, 12, -25, 99, 60, -21, - 11, -6, 3, -1, 0, 0, 0, 0, }, - { 0, 0, -1, 3, -6, 12, -25, 99, - 60, -21, 11, -6, 3, -1, 0, 0, }, + { -1, 3, -6, 12, -25, 99, 60, -21, 11, -6, 3, -1, 0, 0, 0, 0 }, + { 0, 0, -1, 3, -6, 12, -25, 99, 60, -21, 11, -6, 3, -1, 0, 0 }, }, { - { -1, 3, -6, 12, -25, 90, 70, -23, - 12, -6, 3, -1, 0, 0, 0, 0, }, - { 0, 0, -1, 3, -6, 12, -25, 90, - 70, -23, 12, -6, 3, -1, 0, 0, }, + { -1, 3, -6, 12, -25, 90, 70, -23, 12, -6, 3, -1, 0, 0, 0, 0 }, + { 0, 0, -1, 3, -6, 12, -25, 90, 70, -23, 12, -6, 3, -1, 0, 0 }, }, { - { -1, 3, -6, 12, -24, 80, 80, -24, - 12, -6, 3, -1, 0, 0, 0, 0, }, - { 0, 0, -1, 3, -6, 12, -24, 80, - 80, -24, 12, -6, 3, -1, 0, 0, }, + { -1, 3, -6, 12, -24, 80, 80, -24, 12, -6, 3, -1, 0, 0, 0, 0 }, + { 0, 0, -1, 3, -6, 12, -24, 80, 80, -24, 12, -6, 3, -1, 0, 0 }, }, { - { -1, 3, -6, 12, -23, 70, 90, -25, - 12, -6, 3, -1, 0, 0, 0, 0, }, - { 0, 0, -1, 3, -6, 12, -23, 70, - 90, -25, 12, -6, 3, -1, 0, 0, }, + { -1, 3, -6, 12, -23, 70, 90, -25, 12, -6, 3, -1, 0, 0, 0, 0 }, + { 0, 0, -1, 3, -6, 12, -23, 70, 90, -25, 12, -6, 3, -1, 0, 0 }, }, { - { -1, 3, -6, 11, -21, 60, 99, -25, - 12, -6, 3, -1, 0, 0, 0, 0, }, - { 0, 0, -1, 3, -6, 11, -21, 60, - 99, -25, 12, -6, 3, -1, 0, 0, }, + { -1, 3, -6, 11, -21, 60, 99, -25, 12, -6, 3, -1, 0, 0, 0, 0 }, + { 0, 0, -1, 3, -6, 11, -21, 60, 99, -25, 12, -6, 3, -1, 0, 0 }, }, { - { -1, 2, -5, 9, -18, 49, 107, -23, - 11, -5, 3, -1, 0, 0, 0, 0, }, - { 0, 0, -1, 2, -5, 9, -18, 49, - 107, -23, 11, -5, 3, -1, 0, 0, }, + { -1, 2, -5, 9, -18, 49, 107, -23, 11, -5, 3, -1, 0, 0, 0, 0 }, + { 0, 0, -1, 2, -5, 9, -18, 49, 107, -23, 11, -5, 3, -1, 0, 0 }, }, { - { -1, 2, -4, 8, -15, 38, 114, -21, - 10, -4, 2, -1, 0, 0, 0, 0, }, - { 0, 0, -1, 2, -4, 8, -15, 38, - 114, -21, 10, -4, 2, -1, 0, 0, }, + { -1, 2, -4, 8, -15, 38, 114, -21, 10, -4, 2, -1, 0, 0, 0, 0 }, + { 0, 0, -1, 2, -4, 8, -15, 38, 114, -21, 10, -4, 2, -1, 0, 0 }, }, { - { -1, 1, -3, 6, -11, 28, 120, -17, - 8, -4, 2, -1, 0, 0, 0, 0, }, - { 0, 0, -1, 1, -3, 6, -11, 28, - 120, -17, 8, -4, 2, -1, 0, 0, }, + { -1, 1, -3, 6, -11, 28, 120, -17, 8, -4, 2, -1, 0, 0, 0, 0 }, + { 0, 0, -1, 1, -3, 6, -11, 28, 120, -17, 8, -4, 2, -1, 0, 0 }, }, { - { 0, 1, -2, 4, -8, 18, 124, -12, - 5, -3, 1, 0, 0, 0, 0, 0, }, - { 0, 0, 0, 1, -2, 4, -8, 18, - 124, -12, 5, -3, 1, 0, 0, 0, }, + { 0, 1, -2, 4, -8, 18, 124, -12, 5, -3, 1, 0, 0, 0, 0, 0 }, + { 0, 0, 0, 1, -2, 4, -8, 18, 124, -12, 5, -3, 1, 0, 0, 0 }, }, { - { 0, 0, -1, 2, -4, 8, 127, -7, - 3, -1, 1, 0, 0, 0, 0, 0, }, - { 0, 0, 0, 0, -1, 2, -4, 8, - 127, -7, 3, -1, 1, 0, 0, 0, }, + { 0, 0, -1, 2, -4, 8, 127, -7, 3, -1, 1, 0, 0, 0, 0, 0 }, + { 0, 0, 0, 0, -1, 2, -4, 8, 127, -7, 3, -1, 1, 0, 0, 0 }, }, }; #endif @@ -729,214 +511,150 @@ DECLARE_ALIGNED(16, const int8_t, DECLARE_ALIGNED(16, const int8_t, sub_pel_filters_temporalfilter_12_ver_signal_dir[15][6][16]) = { { - { 0, 1, 0, 1, 0, 1, 0, 1, - 0, 1, 0, 1, 0, 1, 0, 1, }, - { -1, 3, -1, 3, -1, 3, -1, 3, - -1, 3, -1, 3, -1, 3, -1, 3, }, - { -7, 127, -7, 127, -7, 127, -7, 127, - -7, 127, -7, 127, -7, 127, -7, 127, }, - { 8, -4, 8, -4, 8, -4, 8, -4, - 8, -4, 8, -4, 8, -4, 8, -4, }, - { 2, -1, 2, -1, 2, -1, 2, -1, - 2, -1, 2, -1, 2, -1, 2, -1, }, - { 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, }, - }, - { - { 0, 1, 0, 1, 0, 1, 0, 1, - 0, 1, 0, 1, 0, 1, 0, 1, }, - { -3, 5, -3, 5, -3, 5, -3, 5, - -3, 5, -3, 5, -3, 5, -3, 5, }, - {-12, 124, -12, 124, -12, 124, -12, 124, - -12, 124, -12, 124, -12, 124, -12, 124, }, - { 18, -8, 18, -8, 18, -8, 18, -8, - 18, -8, 18, -8, 18, -8, 18, -8, }, - { 4, -2, 4, -2, 4, -2, 4, -2, - 4, -2, 4, -2, 4, -2, 4, -2, }, - { 1, 0, 1, 0, 1, 0, 1, 0, - 1, 0, 1, 0, 1, 0, 1, 0, }, - }, - { - { -1, 2, -1, 2, -1, 2, -1, 2, - -1, 2, -1, 2, -1, 2, -1, 2, }, - { -4, 8, -4, 8, -4, 8, -4, 8, - -4, 8, -4, 8, -4, 8, -4, 8, }, - {-17, 120, -17, 120, -17, 120, -17, 120, - -17, 120, -17, 120, -17, 120, -17, 120, }, - { 28, -11, 28, -11, 28, -11, 28, -11, - 28, -11, 28, -11, 28, -11, 28, -11, }, - { 6, -3, 6, -3, 6, -3, 6, -3, - 6, -3, 6, -3, 6, -3, 6, -3, }, - { 1, -1, 1, -1, 1, -1, 1, -1, - 1, -1, 1, -1, 1, -1, 1, -1, }, - }, - { - { -1, 2, -1, 2, -1, 2, -1, 2, - -1, 2, -1, 2, -1, 2, -1, 2, }, - { -4, 10, -4, 10, -4, 10, -4, 10, - -4, 10, -4, 10, -4, 10, -4, 10, }, - {-21, 114, -21, 114, -21, 114, -21, 114, - -21, 114, -21, 114, -21, 114, -21, 114, }, - { 38, -15, 38, -15, 38, -15, 38, -15, - 38, -15, 38, -15, 38, -15, 38, -15, }, - { 8, -4, 8, -4, 8, -4, 8, -4, - 8, -4, 8, -4, 8, -4, 8, -4, }, - { 2, -1, 2, -1, 2, -1, 2, -1, - 2, -1, 2, -1, 2, -1, 2, -1, }, - }, - { - { -1, 3, -1, 3, -1, 3, -1, 3, - -1, 3, -1, 3, -1, 3, -1, 3, }, - { -5, 11, -5, 11, -5, 11, -5, 11, - -5, 11, -5, 11, -5, 11, -5, 11, }, - {-23, 107, -23, 107, -23, 107, -23, 107, - -23, 107, -23, 107, -23, 107, -23, 107, }, - { 49, -18, 49, -18, 49, -18, 49, -18, - 49, -18, 49, -18, 49, -18, 49, -18, }, - { 9, -5, 9, -5, 9, -5, 9, -5, - 9, -5, 9, -5, 9, -5, 9, -5, }, - { 2, -1, 2, -1, 2, -1, 2, -1, - 2, -1, 2, -1, 2, -1, 2, -1, }, - }, - { - { -1, 3, -1, 3, -1, 3, -1, 3, - -1, 3, -1, 3, -1, 3, -1, 3, }, - { -6, 12, -6, 12, -6, 12, -6, 12, - -6, 12, -6, 12, -6, 12, -6, 12, }, - {-25, 99, -25, 99, -25, 99, -25, 99, - -25, 99, -25, 99, -25, 99, -25, 99, }, - { 60, -21, 60, -21, 60, -21, 60, -21, - 60, -21, 60, -21, 60, -21, 60, -21, }, - { 11, -6, 11, -6, 11, -6, 11, -6, - 11, -6, 11, -6, 11, -6, 11, -6, }, - { 3, -1, 3, -1, 3, -1, 3, -1, - 3, -1, 3, -1, 3, -1, 3, -1, }, - }, - { - { -1, 3, -1, 3, -1, 3, -1, 3, - -1, 3, -1, 3, -1, 3, -1, 3, }, - { -6, 12, -6, 12, -6, 12, -6, 12, - -6, 12, -6, 12, -6, 12, -6, 12, }, - {-25, 90, -25, 90, -25, 90, -25, 90, - -25, 90, -25, 90, -25, 90, -25, 90, }, - { 70, -23, 70, -23, 70, -23, 70, -23, - 70, -23, 70, -23, 70, -23, 70, -23, }, - { 12, -6, 12, -6, 12, -6, 12, -6, - 12, -6, 12, -6, 12, -6, 12, -6, }, - { 3, -1, 3, -1, 3, -1, 3, -1, - 3, -1, 3, -1, 3, -1, 3, -1, }, - }, - { - { -1, 3, -1, 3, -1, 3, -1, 3, - -1, 3, -1, 3, -1, 3, -1, 3, }, - { -6, 12, -6, 12, -6, 12, -6, 12, - -6, 12, -6, 12, -6, 12, -6, 12, }, - {-24, 80, -24, 80, -24, 80, -24, 80, - -24, 80, -24, 80, -24, 80, -24, 80, }, - { 80, -24, 80, -24, 80, -24, 80, -24, - 80, -24, 80, -24, 80, -24, 80, -24, }, - { 12, -6, 12, -6, 12, -6, 12, -6, - 12, -6, 12, -6, 12, -6, 12, -6, }, - { 3, -1, 3, -1, 3, -1, 3, -1, - 3, -1, 3, -1, 3, -1, 3, -1, }, - }, - { - { -1, 3, -1, 3, -1, 3, -1, 3, - -1, 3, -1, 3, -1, 3, -1, 3, }, - { -6, 12, -6, 12, -6, 12, -6, 12, - -6, 12, -6, 12, -6, 12, -6, 12, }, - {-23, 70, -23, 70, -23, 70, -23, 70, - -23, 70, -23, 70, -23, 70, -23, 70, }, - { 90, -25, 90, -25, 90, -25, 90, -25, - 90, -25, 90, -25, 90, -25, 90, -25, }, - { 12, -6, 12, -6, 12, -6, 12, -6, - 12, -6, 12, -6, 12, -6, 12, -6, }, - { 3, -1, 3, -1, 3, -1, 3, -1, - 3, -1, 3, -1, 3, -1, 3, -1, }, - }, - { - { -1, 3, -1, 3, -1, 3, -1, 3, - -1, 3, -1, 3, -1, 3, -1, 3, }, - { -6, 11, -6, 11, -6, 11, -6, 11, - -6, 11, -6, 11, -6, 11, -6, 11, }, - {-21, 60, -21, 60, -21, 60, -21, 60, - -21, 60, -21, 60, -21, 60, -21, 60, }, - { 99, -25, 99, -25, 99, -25, 99, -25, - 99, -25, 99, -25, 99, -25, 99, -25, }, - { 12, -6, 12, -6, 12, -6, 12, -6, - 12, -6, 12, -6, 12, -6, 12, -6, }, - { 3, -1, 3, -1, 3, -1, 3, -1, - 3, -1, 3, -1, 3, -1, 3, -1, }, - }, - { - { -1, 2, -1, 2, -1, 2, -1, 2, - -1, 2, -1, 2, -1, 2, -1, 2, }, - { -5, 9, -5, 9, -5, 9, -5, 9, - -5, 9, -5, 9, -5, 9, -5, 9, }, - {-18, 49, -18, 49, -18, 49, -18, 49, - -18, 49, -18, 49, -18, 49, -18, 49, }, - {107, -23, 107, -23, 107, -23, 107, -23, - 107, -23, 107, -23, 107, -23, 107, -23, }, - { 11, -5, 11, -5, 11, -5, 11, -5, - 11, -5, 11, -5, 11, -5, 11, -5, }, - { 3, -1, 3, -1, 3, -1, 3, -1, - 3, -1, 3, -1, 3, -1, 3, -1, }, - }, - { - { -1, 2, -1, 2, -1, 2, -1, 2, - -1, 2, -1, 2, -1, 2, -1, 2, }, - { -4, 8, -4, 8, -4, 8, -4, 8, - -4, 8, -4, 8, -4, 8, -4, 8, }, - {-15, 38, -15, 38, -15, 38, -15, 38, - -15, 38, -15, 38, -15, 38, -15, 38, }, - {114, -21, 114, -21, 114, -21, 114, -21, - 114, -21, 114, -21, 114, -21, 114, -21, }, - { 10, -4, 10, -4, 10, -4, 10, -4, - 10, -4, 10, -4, 10, -4, 10, -4, }, - { 2, -1, 2, -1, 2, -1, 2, -1, - 2, -1, 2, -1, 2, -1, 2, -1, }, - }, - { - { -1, 1, -1, 1, -1, 1, -1, 1, - -1, 1, -1, 1, -1, 1, -1, 1, }, - { -3, 6, -3, 6, -3, 6, -3, 6, - -3, 6, -3, 6, -3, 6, -3, 6, }, - {-11, 28, -11, 28, -11, 28, -11, 28, - -11, 28, -11, 28, -11, 28, -11, 28, }, - {120, -17, 120, -17, 120, -17, 120, -17, - 120, -17, 120, -17, 120, -17, 120, -17, }, - { 8, -4, 8, -4, 8, -4, 8, -4, - 8, -4, 8, -4, 8, -4, 8, -4, }, - { 2, -1, 2, -1, 2, -1, 2, -1, - 2, -1, 2, -1, 2, -1, 2, -1, }, - }, - { - { 0, 1, 0, 1, 0, 1, 0, 1, - 0, 1, 0, 1, 0, 1, 0, 1, }, - { -2, 4, -2, 4, -2, 4, -2, 4, - -2, 4, -2, 4, -2, 4, -2, 4, }, - { -8, 18, -8, 18, -8, 18, -8, 18, - -8, 18, -8, 18, -8, 18, -8, 18, }, - {124, -12, 124, -12, 124, -12, 124, -12, - 124, -12, 124, -12, 124, -12, 124, -12, }, - { 5, -3, 5, -3, 5, -3, 5, -3, - 5, -3, 5, -3, 5, -3, 5, -3, }, - { 1, 0, 1, 0, 1, 0, 1, 0, - 1, 0, 1, 0, 1, 0, 1, 0, }, - }, - { - { 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, }, - { -1, 2, -1, 2, -1, 2, -1, 2, - -1, 2, -1, 2, -1, 2, -1, 2, }, - { -4, 8, -4, 8, -4, 8, -4, 8, - -4, 8, -4, 8, -4, 8, -4, 8, }, - {127, -7, 127, -7, 127, -7, 127, -7, - 127, -7, 127, -7, 127, -7, 127, -7, }, - { 3, -1, 3, -1, 3, -1, 3, -1, - 3, -1, 3, -1, 3, -1, 3, -1, }, - { 1, 0, 1, 0, 1, 0, 1, 0, - 1, 0, 1, 0, 1, 0, 1, 0, }, + { 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 }, + { -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3 }, + { -7, 127, -7, 127, -7, 127, -7, 127, -7, 127, -7, 127, -7, 127, -7, + 127 }, + { 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4 }, + { 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1 }, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, + }, + { + { 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 }, + { -3, 5, -3, 5, -3, 5, -3, 5, -3, 5, -3, 5, -3, 5, -3, 5 }, + { -12, 124, -12, 124, -12, 124, -12, 124, -12, 124, -12, 124, -12, 124, + -12, 124 }, + { 18, -8, 18, -8, 18, -8, 18, -8, 18, -8, 18, -8, 18, -8, 18, -8 }, + { 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2 }, + { 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0 }, + }, + { + { -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2 }, + { -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8 }, + { -17, 120, -17, 120, -17, 120, -17, 120, -17, 120, -17, 120, -17, 120, + -17, 120 }, + { 28, -11, 28, -11, 28, -11, 28, -11, 28, -11, 28, -11, 28, -11, 28, + -11 }, + { 6, -3, 6, -3, 6, -3, 6, -3, 6, -3, 6, -3, 6, -3, 6, -3 }, + { 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1 }, + }, + { + { -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2 }, + { -4, 10, -4, 10, -4, 10, -4, 10, -4, 10, -4, 10, -4, 10, -4, 10 }, + { -21, 114, -21, 114, -21, 114, -21, 114, -21, 114, -21, 114, -21, 114, + -21, 114 }, + { 38, -15, 38, -15, 38, -15, 38, -15, 38, -15, 38, -15, 38, -15, 38, + -15 }, + { 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4 }, + { 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1 }, + }, + { + { -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3 }, + { -5, 11, -5, 11, -5, 11, -5, 11, -5, 11, -5, 11, -5, 11, -5, 11 }, + { -23, 107, -23, 107, -23, 107, -23, 107, -23, 107, -23, 107, -23, 107, + -23, 107 }, + { 49, -18, 49, -18, 49, -18, 49, -18, 49, -18, 49, -18, 49, -18, 49, + -18 }, + { 9, -5, 9, -5, 9, -5, 9, -5, 9, -5, 9, -5, 9, -5, 9, -5 }, + { 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1 }, + }, + { + { -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3 }, + { -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12 }, + { -25, 99, -25, 99, -25, 99, -25, 99, -25, 99, -25, 99, -25, 99, -25, + 99 }, + { 60, -21, 60, -21, 60, -21, 60, -21, 60, -21, 60, -21, 60, -21, 60, + -21 }, + { 11, -6, 11, -6, 11, -6, 11, -6, 11, -6, 11, -6, 11, -6, 11, -6 }, + { 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1 }, + }, + { + { -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3 }, + { -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12 }, + { -25, 90, -25, 90, -25, 90, -25, 90, -25, 90, -25, 90, -25, 90, -25, + 90 }, + { 70, -23, 70, -23, 70, -23, 70, -23, 70, -23, 70, -23, 70, -23, 70, + -23 }, + { 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6 }, + { 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1 }, + }, + { + { -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3 }, + { -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12 }, + { -24, 80, -24, 80, -24, 80, -24, 80, -24, 80, -24, 80, -24, 80, -24, + 80 }, + { 80, -24, 80, -24, 80, -24, 80, -24, 80, -24, 80, -24, 80, -24, 80, + -24 }, + { 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6 }, + { 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1 }, + }, + { + { -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3 }, + { -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12 }, + { -23, 70, -23, 70, -23, 70, -23, 70, -23, 70, -23, 70, -23, 70, -23, + 70 }, + { 90, -25, 90, -25, 90, -25, 90, -25, 90, -25, 90, -25, 90, -25, 90, + -25 }, + { 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6 }, + { 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1 }, + }, + { + { -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3 }, + { -6, 11, -6, 11, -6, 11, -6, 11, -6, 11, -6, 11, -6, 11, -6, 11 }, + { -21, 60, -21, 60, -21, 60, -21, 60, -21, 60, -21, 60, -21, 60, -21, + 60 }, + { 99, -25, 99, -25, 99, -25, 99, -25, 99, -25, 99, -25, 99, -25, 99, + -25 }, + { 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6 }, + { 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1 }, + }, + { + { -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2 }, + { -5, 9, -5, 9, -5, 9, -5, 9, -5, 9, -5, 9, -5, 9, -5, 9 }, + { -18, 49, -18, 49, -18, 49, -18, 49, -18, 49, -18, 49, -18, 49, -18, + 49 }, + { 107, -23, 107, -23, 107, -23, 107, -23, 107, -23, 107, -23, 107, -23, + 107, -23 }, + { 11, -5, 11, -5, 11, -5, 11, -5, 11, -5, 11, -5, 11, -5, 11, -5 }, + { 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1 }, + }, + { + { -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2 }, + { -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8 }, + { -15, 38, -15, 38, -15, 38, -15, 38, -15, 38, -15, 38, -15, 38, -15, + 38 }, + { 114, -21, 114, -21, 114, -21, 114, -21, 114, -21, 114, -21, 114, -21, + 114, -21 }, + { 10, -4, 10, -4, 10, -4, 10, -4, 10, -4, 10, -4, 10, -4, 10, -4 }, + { 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1 }, + }, + { + { -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1 }, + { -3, 6, -3, 6, -3, 6, -3, 6, -3, 6, -3, 6, -3, 6, -3, 6 }, + { -11, 28, -11, 28, -11, 28, -11, 28, -11, 28, -11, 28, -11, 28, -11, + 28 }, + { 120, -17, 120, -17, 120, -17, 120, -17, 120, -17, 120, -17, 120, -17, + 120, -17 }, + { 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4 }, + { 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1 }, + }, + { + { 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 }, + { -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4 }, + { -8, 18, -8, 18, -8, 18, -8, 18, -8, 18, -8, 18, -8, 18, -8, 18 }, + { 124, -12, 124, -12, 124, -12, 124, -12, 124, -12, 124, -12, 124, -12, + 124, -12 }, + { 5, -3, 5, -3, 5, -3, 5, -3, 5, -3, 5, -3, 5, -3, 5, -3 }, + { 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0 }, + }, + { + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, + { -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2 }, + { -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8 }, + { 127, -7, 127, -7, 127, -7, 127, -7, 127, -7, 127, -7, 127, -7, 127, + -7 }, + { 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1 }, + { 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0 }, }, }; #endif diff --git a/vp10/common/x86/vp10_convolve_ssse3.c b/vp10/common/x86/vp10_convolve_ssse3.c index 07dc11d349b96bf521f0f0c0e835a86fe526f664..68cfca7696795842cea4cb1625560b6bd3574930 100644 --- a/vp10/common/x86/vp10_convolve_ssse3.c +++ b/vp10/common/x86/vp10_convolve_ssse3.c @@ -14,7 +14,7 @@ #include "./vp10_rtcd.h" #include "vp10/common/filter.h" -#define WIDTH_BOUND (16) +#define WIDTH_BOUND (16) #define HEIGHT_BOUND (16) static INLINE void transpose_4x8(const __m128i *in, __m128i *out) { @@ -65,8 +65,8 @@ static INLINE void accumulate_store_2_pixel(const __m128i *x, uint8_t *dst) { *(uint16_t *)dst = (uint16_t)temp; } -static store_pixel_t store2pixelTab[2] = { - store_2_pixel_only, accumulate_store_2_pixel}; +static store_pixel_t store2pixelTab[2] = { store_2_pixel_only, + accumulate_store_2_pixel }; static INLINE void store_4_pixel_only(const __m128i *x, uint8_t *dst) { __m128i u = _mm_packus_epi16(*x, *x); @@ -78,12 +78,11 @@ static INLINE void accumulate_store_4_pixel(const __m128i *x, uint8_t *dst) { *(int *)dst = _mm_cvtsi128_si32(y); } -static store_pixel_t store4pixelTab[2] = { - store_4_pixel_only, accumulate_store_4_pixel}; +static store_pixel_t store4pixelTab[2] = { store_4_pixel_only, + accumulate_store_4_pixel }; -static void horiz_w4_ssse3(const uint8_t *src, const __m128i *f, - int tapsNum, store_pixel_t store_func, - uint8_t *dst) { +static void horiz_w4_ssse3(const uint8_t *src, const __m128i *f, int tapsNum, + store_pixel_t store_func, uint8_t *dst) { __m128i sumPairRow[4]; __m128i sumPairCol[8]; __m128i pixel; @@ -163,14 +162,10 @@ static void horiz_w128_ssse3(const uint8_t *src, const __m128i *f, int tapsNum, horiz_w64_ssse3(src, f, tapsNum, store, buf); } -static void (*horizTab[6])(const uint8_t *, const __m128i *, int, - store_pixel_t, uint8_t *) = { - horiz_w4_ssse3, - horiz_w8_ssse3, - horiz_w16_ssse3, - horiz_w32_ssse3, - horiz_w64_ssse3, - horiz_w128_ssse3, +static void (*horizTab[6])(const uint8_t *, const __m128i *, int, store_pixel_t, + uint8_t *) = { + horiz_w4_ssse3, horiz_w8_ssse3, horiz_w16_ssse3, + horiz_w32_ssse3, horiz_w64_ssse3, horiz_w128_ssse3, }; static void filter_horiz_ssse3(const uint8_t *src, __m128i *f, int tapsNum, @@ -179,26 +174,13 @@ static void filter_horiz_ssse3(const uint8_t *src, __m128i *f, int tapsNum, // Note: // For width=2 and 4, store function must be different case 2: - case 4: - horizTab[0](src, f, tapsNum, store, dst); - break; - case 8: - horizTab[1](src, f, tapsNum, store, dst); - break; - case 16: - horizTab[2](src, f, tapsNum, store, dst); - break; - case 32: - horizTab[3](src, f, tapsNum, store, dst); - break; - case 64: - horizTab[4](src, f, tapsNum, store, dst); - break; - case 128: - horizTab[5](src, f, tapsNum, store, dst); - break; - default: - assert(0); + case 4: horizTab[0](src, f, tapsNum, store, dst); break; + case 8: horizTab[1](src, f, tapsNum, store, dst); break; + case 16: horizTab[2](src, f, tapsNum, store, dst); break; + case 32: horizTab[3](src, f, tapsNum, store, dst); break; + case 64: horizTab[4](src, f, tapsNum, store, dst); break; + case 128: horizTab[5](src, f, tapsNum, store, dst); break; + default: assert(0); } } @@ -207,8 +189,7 @@ typedef void (*transpose_to_dst_t)(const uint16_t *src, int src_stride, uint8_t *dst, int dst_stride); static INLINE void transpose8x8_direct_to_dst(const uint16_t *src, - int src_stride, - uint8_t *dst, + int src_stride, uint8_t *dst, int dst_stride) { const __m128i k_256 = _mm_set1_epi16(1 << 8); __m128i v0, v1, v2, v3; @@ -256,19 +237,18 @@ static INLINE void transpose8x8_direct_to_dst(const uint16_t *src, u6 = _mm_srli_si128(u2, 8); u7 = _mm_srli_si128(u3, 8); - _mm_storel_epi64((__m128i*)dst, u0); - _mm_storel_epi64((__m128i*)(dst + dst_stride * 1), u4); - _mm_storel_epi64((__m128i*)(dst + dst_stride * 2), u1); - _mm_storel_epi64((__m128i*)(dst + dst_stride * 3), u5); - _mm_storel_epi64((__m128i*)(dst + dst_stride * 4), u2); - _mm_storel_epi64((__m128i*)(dst + dst_stride * 5), u6); - _mm_storel_epi64((__m128i*)(dst + dst_stride * 6), u3); - _mm_storel_epi64((__m128i*)(dst + dst_stride * 7), u7); + _mm_storel_epi64((__m128i *)dst, u0); + _mm_storel_epi64((__m128i *)(dst + dst_stride * 1), u4); + _mm_storel_epi64((__m128i *)(dst + dst_stride * 2), u1); + _mm_storel_epi64((__m128i *)(dst + dst_stride * 3), u5); + _mm_storel_epi64((__m128i *)(dst + dst_stride * 4), u2); + _mm_storel_epi64((__m128i *)(dst + dst_stride * 5), u6); + _mm_storel_epi64((__m128i *)(dst + dst_stride * 6), u3); + _mm_storel_epi64((__m128i *)(dst + dst_stride * 7), u7); } static INLINE void transpose8x8_accumu_to_dst(const uint16_t *src, - int src_stride, - uint8_t *dst, + int src_stride, uint8_t *dst, int dst_stride) { const __m128i k_256 = _mm_set1_epi16(1 << 8); const __m128i zero = _mm_setzero_si128(); @@ -382,19 +362,18 @@ static INLINE void transpose8x8_accumu_to_dst(const uint16_t *src, u6 = _mm_srli_si128(u2, 8); u7 = _mm_srli_si128(u3, 8); - _mm_storel_epi64((__m128i*)dst, u0); - _mm_storel_epi64((__m128i*)(dst + dst_stride * 1), u4); - _mm_storel_epi64((__m128i*)(dst + dst_stride * 2), u1); - _mm_storel_epi64((__m128i*)(dst + dst_stride * 3), u5); - _mm_storel_epi64((__m128i*)(dst + dst_stride * 4), u2); - _mm_storel_epi64((__m128i*)(dst + dst_stride * 5), u6); - _mm_storel_epi64((__m128i*)(dst + dst_stride * 6), u3); - _mm_storel_epi64((__m128i*)(dst + dst_stride * 7), u7); + _mm_storel_epi64((__m128i *)dst, u0); + _mm_storel_epi64((__m128i *)(dst + dst_stride * 1), u4); + _mm_storel_epi64((__m128i *)(dst + dst_stride * 2), u1); + _mm_storel_epi64((__m128i *)(dst + dst_stride * 3), u5); + _mm_storel_epi64((__m128i *)(dst + dst_stride * 4), u2); + _mm_storel_epi64((__m128i *)(dst + dst_stride * 5), u6); + _mm_storel_epi64((__m128i *)(dst + dst_stride * 6), u3); + _mm_storel_epi64((__m128i *)(dst + dst_stride * 7), u7); } -static transpose_to_dst_t trans8x8Tab[2] = { - transpose8x8_direct_to_dst, transpose8x8_accumu_to_dst -}; +static transpose_to_dst_t trans8x8Tab[2] = { transpose8x8_direct_to_dst, + transpose8x8_accumu_to_dst }; static INLINE void transpose_8x16(const __m128i *in, __m128i *out) { __m128i t0, t1, t2, t3, u0, u1; @@ -476,8 +455,7 @@ static void filter_horiz_v8p_ssse3(const uint8_t *src_ptr, ptrdiff_t src_pitch, // Vertical 4-pixel parallel static INLINE void transpose4x4_direct_to_dst(const uint16_t *src, - int src_stride, - uint8_t *dst, + int src_stride, uint8_t *dst, int dst_stride) { const __m128i k_256 = _mm_set1_epi16(1 << 8); __m128i v0, v1, v2, v3; @@ -509,8 +487,7 @@ static INLINE void transpose4x4_direct_to_dst(const uint16_t *src, } static INLINE void transpose4x4_accumu_to_dst(const uint16_t *src, - int src_stride, - uint8_t *dst, + int src_stride, uint8_t *dst, int dst_stride) { const __m128i k_256 = _mm_set1_epi16(1 << 8); const __m128i zero = _mm_setzero_si128(); @@ -571,9 +548,8 @@ static INLINE void transpose4x4_accumu_to_dst(const uint16_t *src, *(int *)(dst + dst_stride * 3) = _mm_cvtsi128_si32(u3); } -static transpose_to_dst_t trans4x4Tab[2] = { - transpose4x4_direct_to_dst, transpose4x4_accumu_to_dst -}; +static transpose_to_dst_t trans4x4Tab[2] = { transpose4x4_direct_to_dst, + transpose4x4_accumu_to_dst }; static void filter_horiz_v4p_ssse3(const uint8_t *src_ptr, ptrdiff_t src_pitch, __m128i *f, int tapsNum, uint16_t *buf) { @@ -597,7 +573,7 @@ static void filter_horiz_v4p_ssse3(const uint8_t *src_ptr, ptrdiff_t src_pitch, // 20 21 30 31 22 23 32 33 24 25 34 35 26 27 36 37 tr0_1 = _mm_unpacklo_epi16(C, D); // 00 01 10 11 20 21 30 31 02 03 12 13 22 23 32 33 - s1s0 = _mm_unpacklo_epi32(tr0_0, tr0_1); + s1s0 = _mm_unpacklo_epi32(tr0_0, tr0_1); // 04 05 14 15 24 25 34 35 06 07 16 17 26 27 36 37 s5s4 = _mm_unpackhi_epi32(tr0_0, tr0_1); // 02 03 12 13 22 23 32 33 @@ -607,7 +583,7 @@ static void filter_horiz_v4p_ssse3(const uint8_t *src_ptr, ptrdiff_t src_pitch, tr0_0 = _mm_unpackhi_epi16(A, B); tr0_1 = _mm_unpackhi_epi16(C, D); - s9s8 = _mm_unpacklo_epi32(tr0_0, tr0_1); + s9s8 = _mm_unpacklo_epi32(tr0_0, tr0_1); sbsa = _mm_srli_si128(s9s8, 8); // multiply 2 adjacent elements with the filter and add the result @@ -659,10 +635,9 @@ void vp10_convolve_horiz_ssse3(const uint8_t *src, int src_stride, uint8_t *dst, return; } - hCoeffs = vp10_get_subpel_filter_signal_dir( - filter_params, subpel_x_q4 - 1); - vCoeffs = vp10_get_subpel_filter_ver_signal_dir( - filter_params, subpel_x_q4 - 1); + hCoeffs = vp10_get_subpel_filter_signal_dir(filter_params, subpel_x_q4 - 1); + vCoeffs = + vp10_get_subpel_filter_ver_signal_dir(filter_params, subpel_x_q4 - 1); if (!hCoeffs || !vCoeffs) { vp10_convolve_horiz_c(src, src_stride, dst, dst_stride, w, h, filter_params, @@ -755,8 +730,8 @@ static INLINE void accumulate_store_8_pixel(const __m128i *x, uint8_t *dst) { _mm_storel_epi64((__m128i *)dst, y); } -static store_pixel_t store8pixelTab[2] = { - store_8_pixel_only, accumulate_store_8_pixel}; +static store_pixel_t store8pixelTab[2] = { store_8_pixel_only, + accumulate_store_8_pixel }; static __m128i filter_vert_ssse3(const uint8_t *src, int src_stride, int tapsNum, __m128i *f) { @@ -869,8 +844,8 @@ void vp10_convolve_vert_ssse3(const uint8_t *src, int src_stride, uint8_t *dst, return; } - vCoeffs = vp10_get_subpel_filter_ver_signal_dir( - filter_params, subpel_y_q4 - 1); + vCoeffs = + vp10_get_subpel_filter_ver_signal_dir(filter_params, subpel_y_q4 - 1); if (!vCoeffs) { vp10_convolve_vert_c(src, src_stride, dst, dst_stride, w, h, filter_params, @@ -889,14 +864,14 @@ void vp10_convolve_vert_ssse3(const uint8_t *src, int src_stride, uint8_t *dst, src_ptr = src; if (w > 4) { - filter_vert_compute_large(src_ptr, src_stride, verf, tapsNum, store8p, - w, h, dst_ptr, dst_stride); + filter_vert_compute_large(src_ptr, src_stride, verf, tapsNum, store8p, w, h, + dst_ptr, dst_stride); } else if (4 == w) { - filter_vert_compute_small(src_ptr, src_stride, verf, tapsNum, store4p, - h, dst_ptr, dst_stride); + filter_vert_compute_small(src_ptr, src_stride, verf, tapsNum, store4p, h, + dst_ptr, dst_stride); } else if (2 == w) { - filter_vert_compute_small(src_ptr, src_stride, verf, tapsNum, store2p, - h, dst_ptr, dst_stride); + filter_vert_compute_small(src_ptr, src_stride, verf, tapsNum, store2p, h, + dst_ptr, dst_stride); } else { assert(0); } diff --git a/vp10/common/x86/vp10_fwd_dct32x32_impl_sse2.h b/vp10/common/x86/vp10_fwd_dct32x32_impl_sse2.h index 77f633e9168f53708240ccebf4b833c9f930f54a..eea9f97090703e90d8954c7a873f82531337ea94 100644 --- a/vp10/common/x86/vp10_fwd_dct32x32_impl_sse2.h +++ b/vp10/common/x86/vp10_fwd_dct32x32_impl_sse2.h @@ -23,42 +23,37 @@ #define SUB_EPI16 _mm_subs_epi16 #if FDCT32x32_HIGH_PRECISION void vp10_fdct32x32_rows_c(const int16_t *intermediate, tran_low_t *out) { - int i, j; - for (i = 0; i < 32; ++i) { - tran_high_t temp_in[32], temp_out[32]; - for (j = 0; j < 32; ++j) - temp_in[j] = intermediate[j * 32 + i]; - vp10_fdct32(temp_in, temp_out, 0); - for (j = 0; j < 32; ++j) - out[j + i * 32] = - (tran_low_t)((temp_out[j] + 1 + (temp_out[j] < 0)) >> 2); - } + int i, j; + for (i = 0; i < 32; ++i) { + tran_high_t temp_in[32], temp_out[32]; + for (j = 0; j < 32; ++j) temp_in[j] = intermediate[j * 32 + i]; + vp10_fdct32(temp_in, temp_out, 0); + for (j = 0; j < 32; ++j) + out[j + i * 32] = + (tran_low_t)((temp_out[j] + 1 + (temp_out[j] < 0)) >> 2); + } } - #define HIGH_FDCT32x32_2D_C vp10_highbd_fdct32x32_c - #define HIGH_FDCT32x32_2D_ROWS_C vp10_fdct32x32_rows_c +#define HIGH_FDCT32x32_2D_C vp10_highbd_fdct32x32_c +#define HIGH_FDCT32x32_2D_ROWS_C vp10_fdct32x32_rows_c #else void vp10_fdct32x32_rd_rows_c(const int16_t *intermediate, tran_low_t *out) { - int i, j; - for (i = 0; i < 32; ++i) { - tran_high_t temp_in[32], temp_out[32]; - for (j = 0; j < 32; ++j) - temp_in[j] = intermediate[j * 32 + i]; - vp10_fdct32(temp_in, temp_out, 1); - for (j = 0; j < 32; ++j) - out[j + i * 32] = (tran_low_t)temp_out[j]; - } + int i, j; + for (i = 0; i < 32; ++i) { + tran_high_t temp_in[32], temp_out[32]; + for (j = 0; j < 32; ++j) temp_in[j] = intermediate[j * 32 + i]; + vp10_fdct32(temp_in, temp_out, 1); + for (j = 0; j < 32; ++j) out[j + i * 32] = (tran_low_t)temp_out[j]; + } } - #define HIGH_FDCT32x32_2D_C vp10_highbd_fdct32x32_rd_c - #define HIGH_FDCT32x32_2D_ROWS_C vp10_fdct32x32_rd_rows_c +#define HIGH_FDCT32x32_2D_C vp10_highbd_fdct32x32_rd_c +#define HIGH_FDCT32x32_2D_ROWS_C vp10_fdct32x32_rd_rows_c #endif // FDCT32x32_HIGH_PRECISION #else #define ADD_EPI16 _mm_add_epi16 #define SUB_EPI16 _mm_sub_epi16 #endif // DCT_HIGH_BIT_DEPTH - -void FDCT32x32_2D(const int16_t *input, - tran_low_t *output_org, int stride) { +void FDCT32x32_2D(const int16_t *input, tran_low_t *output_org, int stride) { // Calculate pre-multiplied strides const int str1 = stride; const int str2 = 2 * stride; @@ -71,42 +66,42 @@ void FDCT32x32_2D(const int16_t *input, // by constructing the 32 bit constant corresponding to that pair. const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64); const __m128i k__cospi_p16_m16 = pair_set_epi16(+cospi_16_64, -cospi_16_64); - const __m128i k__cospi_m08_p24 = pair_set_epi16(-cospi_8_64, cospi_24_64); + const __m128i k__cospi_m08_p24 = pair_set_epi16(-cospi_8_64, cospi_24_64); const __m128i k__cospi_m24_m08 = pair_set_epi16(-cospi_24_64, -cospi_8_64); - const __m128i k__cospi_p24_p08 = pair_set_epi16(+cospi_24_64, cospi_8_64); - const __m128i k__cospi_p12_p20 = pair_set_epi16(+cospi_12_64, cospi_20_64); - const __m128i k__cospi_m20_p12 = pair_set_epi16(-cospi_20_64, cospi_12_64); - const __m128i k__cospi_m04_p28 = pair_set_epi16(-cospi_4_64, cospi_28_64); - const __m128i k__cospi_p28_p04 = pair_set_epi16(+cospi_28_64, cospi_4_64); + const __m128i k__cospi_p24_p08 = pair_set_epi16(+cospi_24_64, cospi_8_64); + const __m128i k__cospi_p12_p20 = pair_set_epi16(+cospi_12_64, cospi_20_64); + const __m128i k__cospi_m20_p12 = pair_set_epi16(-cospi_20_64, cospi_12_64); + const __m128i k__cospi_m04_p28 = pair_set_epi16(-cospi_4_64, cospi_28_64); + const __m128i k__cospi_p28_p04 = pair_set_epi16(+cospi_28_64, cospi_4_64); const __m128i k__cospi_m28_m04 = pair_set_epi16(-cospi_28_64, -cospi_4_64); const __m128i k__cospi_m12_m20 = pair_set_epi16(-cospi_12_64, -cospi_20_64); - const __m128i k__cospi_p30_p02 = pair_set_epi16(+cospi_30_64, cospi_2_64); - const __m128i k__cospi_p14_p18 = pair_set_epi16(+cospi_14_64, cospi_18_64); - const __m128i k__cospi_p22_p10 = pair_set_epi16(+cospi_22_64, cospi_10_64); - const __m128i k__cospi_p06_p26 = pair_set_epi16(+cospi_6_64, cospi_26_64); - const __m128i k__cospi_m26_p06 = pair_set_epi16(-cospi_26_64, cospi_6_64); - const __m128i k__cospi_m10_p22 = pair_set_epi16(-cospi_10_64, cospi_22_64); - const __m128i k__cospi_m18_p14 = pair_set_epi16(-cospi_18_64, cospi_14_64); - const __m128i k__cospi_m02_p30 = pair_set_epi16(-cospi_2_64, cospi_30_64); - const __m128i k__cospi_p31_p01 = pair_set_epi16(+cospi_31_64, cospi_1_64); - const __m128i k__cospi_p15_p17 = pair_set_epi16(+cospi_15_64, cospi_17_64); - const __m128i k__cospi_p23_p09 = pair_set_epi16(+cospi_23_64, cospi_9_64); - const __m128i k__cospi_p07_p25 = pair_set_epi16(+cospi_7_64, cospi_25_64); - const __m128i k__cospi_m25_p07 = pair_set_epi16(-cospi_25_64, cospi_7_64); - const __m128i k__cospi_m09_p23 = pair_set_epi16(-cospi_9_64, cospi_23_64); - const __m128i k__cospi_m17_p15 = pair_set_epi16(-cospi_17_64, cospi_15_64); - const __m128i k__cospi_m01_p31 = pair_set_epi16(-cospi_1_64, cospi_31_64); - const __m128i k__cospi_p27_p05 = pair_set_epi16(+cospi_27_64, cospi_5_64); - const __m128i k__cospi_p11_p21 = pair_set_epi16(+cospi_11_64, cospi_21_64); - const __m128i k__cospi_p19_p13 = pair_set_epi16(+cospi_19_64, cospi_13_64); - const __m128i k__cospi_p03_p29 = pair_set_epi16(+cospi_3_64, cospi_29_64); - const __m128i k__cospi_m29_p03 = pair_set_epi16(-cospi_29_64, cospi_3_64); - const __m128i k__cospi_m13_p19 = pair_set_epi16(-cospi_13_64, cospi_19_64); - const __m128i k__cospi_m21_p11 = pair_set_epi16(-cospi_21_64, cospi_11_64); - const __m128i k__cospi_m05_p27 = pair_set_epi16(-cospi_5_64, cospi_27_64); + const __m128i k__cospi_p30_p02 = pair_set_epi16(+cospi_30_64, cospi_2_64); + const __m128i k__cospi_p14_p18 = pair_set_epi16(+cospi_14_64, cospi_18_64); + const __m128i k__cospi_p22_p10 = pair_set_epi16(+cospi_22_64, cospi_10_64); + const __m128i k__cospi_p06_p26 = pair_set_epi16(+cospi_6_64, cospi_26_64); + const __m128i k__cospi_m26_p06 = pair_set_epi16(-cospi_26_64, cospi_6_64); + const __m128i k__cospi_m10_p22 = pair_set_epi16(-cospi_10_64, cospi_22_64); + const __m128i k__cospi_m18_p14 = pair_set_epi16(-cospi_18_64, cospi_14_64); + const __m128i k__cospi_m02_p30 = pair_set_epi16(-cospi_2_64, cospi_30_64); + const __m128i k__cospi_p31_p01 = pair_set_epi16(+cospi_31_64, cospi_1_64); + const __m128i k__cospi_p15_p17 = pair_set_epi16(+cospi_15_64, cospi_17_64); + const __m128i k__cospi_p23_p09 = pair_set_epi16(+cospi_23_64, cospi_9_64); + const __m128i k__cospi_p07_p25 = pair_set_epi16(+cospi_7_64, cospi_25_64); + const __m128i k__cospi_m25_p07 = pair_set_epi16(-cospi_25_64, cospi_7_64); + const __m128i k__cospi_m09_p23 = pair_set_epi16(-cospi_9_64, cospi_23_64); + const __m128i k__cospi_m17_p15 = pair_set_epi16(-cospi_17_64, cospi_15_64); + const __m128i k__cospi_m01_p31 = pair_set_epi16(-cospi_1_64, cospi_31_64); + const __m128i k__cospi_p27_p05 = pair_set_epi16(+cospi_27_64, cospi_5_64); + const __m128i k__cospi_p11_p21 = pair_set_epi16(+cospi_11_64, cospi_21_64); + const __m128i k__cospi_p19_p13 = pair_set_epi16(+cospi_19_64, cospi_13_64); + const __m128i k__cospi_p03_p29 = pair_set_epi16(+cospi_3_64, cospi_29_64); + const __m128i k__cospi_m29_p03 = pair_set_epi16(-cospi_29_64, cospi_3_64); + const __m128i k__cospi_m13_p19 = pair_set_epi16(-cospi_13_64, cospi_19_64); + const __m128i k__cospi_m21_p11 = pair_set_epi16(-cospi_21_64, cospi_11_64); + const __m128i k__cospi_m05_p27 = pair_set_epi16(-cospi_5_64, cospi_27_64); const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING); const __m128i kZero = _mm_set1_epi16(0); - const __m128i kOne = _mm_set1_epi16(1); + const __m128i kOne = _mm_set1_epi16(1); // Do the two transform/transpose passes int pass; #if DCT_HIGH_BIT_DEPTH @@ -124,125 +119,125 @@ void FDCT32x32_2D(const int16_t *input, // Note: even though all the loads below are aligned, using the aligned // intrinsic make the code slightly slower. if (0 == pass) { - const int16_t *in = &input[column_start]; + const int16_t *in = &input[column_start]; // step1[i] = (in[ 0 * stride] + in[(32 - 1) * stride]) << 2; // Note: the next four blocks could be in a loop. That would help the // instruction cache but is actually slower. { - const int16_t *ina = in + 0 * str1; - const int16_t *inb = in + 31 * str1; - __m128i *step1a = &step1[ 0]; + const int16_t *ina = in + 0 * str1; + const int16_t *inb = in + 31 * str1; + __m128i *step1a = &step1[0]; __m128i *step1b = &step1[31]; - const __m128i ina0 = _mm_loadu_si128((const __m128i *)(ina)); - const __m128i ina1 = _mm_loadu_si128((const __m128i *)(ina + str1)); - const __m128i ina2 = _mm_loadu_si128((const __m128i *)(ina + str2)); - const __m128i ina3 = _mm_loadu_si128((const __m128i *)(ina + str3)); - const __m128i inb3 = _mm_loadu_si128((const __m128i *)(inb - str3)); - const __m128i inb2 = _mm_loadu_si128((const __m128i *)(inb - str2)); - const __m128i inb1 = _mm_loadu_si128((const __m128i *)(inb - str1)); - const __m128i inb0 = _mm_loadu_si128((const __m128i *)(inb)); - step1a[ 0] = _mm_add_epi16(ina0, inb0); - step1a[ 1] = _mm_add_epi16(ina1, inb1); - step1a[ 2] = _mm_add_epi16(ina2, inb2); - step1a[ 3] = _mm_add_epi16(ina3, inb3); + const __m128i ina0 = _mm_loadu_si128((const __m128i *)(ina)); + const __m128i ina1 = _mm_loadu_si128((const __m128i *)(ina + str1)); + const __m128i ina2 = _mm_loadu_si128((const __m128i *)(ina + str2)); + const __m128i ina3 = _mm_loadu_si128((const __m128i *)(ina + str3)); + const __m128i inb3 = _mm_loadu_si128((const __m128i *)(inb - str3)); + const __m128i inb2 = _mm_loadu_si128((const __m128i *)(inb - str2)); + const __m128i inb1 = _mm_loadu_si128((const __m128i *)(inb - str1)); + const __m128i inb0 = _mm_loadu_si128((const __m128i *)(inb)); + step1a[0] = _mm_add_epi16(ina0, inb0); + step1a[1] = _mm_add_epi16(ina1, inb1); + step1a[2] = _mm_add_epi16(ina2, inb2); + step1a[3] = _mm_add_epi16(ina3, inb3); step1b[-3] = _mm_sub_epi16(ina3, inb3); step1b[-2] = _mm_sub_epi16(ina2, inb2); step1b[-1] = _mm_sub_epi16(ina1, inb1); step1b[-0] = _mm_sub_epi16(ina0, inb0); - step1a[ 0] = _mm_slli_epi16(step1a[ 0], 2); - step1a[ 1] = _mm_slli_epi16(step1a[ 1], 2); - step1a[ 2] = _mm_slli_epi16(step1a[ 2], 2); - step1a[ 3] = _mm_slli_epi16(step1a[ 3], 2); + step1a[0] = _mm_slli_epi16(step1a[0], 2); + step1a[1] = _mm_slli_epi16(step1a[1], 2); + step1a[2] = _mm_slli_epi16(step1a[2], 2); + step1a[3] = _mm_slli_epi16(step1a[3], 2); step1b[-3] = _mm_slli_epi16(step1b[-3], 2); step1b[-2] = _mm_slli_epi16(step1b[-2], 2); step1b[-1] = _mm_slli_epi16(step1b[-1], 2); step1b[-0] = _mm_slli_epi16(step1b[-0], 2); } { - const int16_t *ina = in + 4 * str1; - const int16_t *inb = in + 27 * str1; - __m128i *step1a = &step1[ 4]; + const int16_t *ina = in + 4 * str1; + const int16_t *inb = in + 27 * str1; + __m128i *step1a = &step1[4]; __m128i *step1b = &step1[27]; - const __m128i ina0 = _mm_loadu_si128((const __m128i *)(ina)); - const __m128i ina1 = _mm_loadu_si128((const __m128i *)(ina + str1)); - const __m128i ina2 = _mm_loadu_si128((const __m128i *)(ina + str2)); - const __m128i ina3 = _mm_loadu_si128((const __m128i *)(ina + str3)); - const __m128i inb3 = _mm_loadu_si128((const __m128i *)(inb - str3)); - const __m128i inb2 = _mm_loadu_si128((const __m128i *)(inb - str2)); - const __m128i inb1 = _mm_loadu_si128((const __m128i *)(inb - str1)); - const __m128i inb0 = _mm_loadu_si128((const __m128i *)(inb)); - step1a[ 0] = _mm_add_epi16(ina0, inb0); - step1a[ 1] = _mm_add_epi16(ina1, inb1); - step1a[ 2] = _mm_add_epi16(ina2, inb2); - step1a[ 3] = _mm_add_epi16(ina3, inb3); + const __m128i ina0 = _mm_loadu_si128((const __m128i *)(ina)); + const __m128i ina1 = _mm_loadu_si128((const __m128i *)(ina + str1)); + const __m128i ina2 = _mm_loadu_si128((const __m128i *)(ina + str2)); + const __m128i ina3 = _mm_loadu_si128((const __m128i *)(ina + str3)); + const __m128i inb3 = _mm_loadu_si128((const __m128i *)(inb - str3)); + const __m128i inb2 = _mm_loadu_si128((const __m128i *)(inb - str2)); + const __m128i inb1 = _mm_loadu_si128((const __m128i *)(inb - str1)); + const __m128i inb0 = _mm_loadu_si128((const __m128i *)(inb)); + step1a[0] = _mm_add_epi16(ina0, inb0); + step1a[1] = _mm_add_epi16(ina1, inb1); + step1a[2] = _mm_add_epi16(ina2, inb2); + step1a[3] = _mm_add_epi16(ina3, inb3); step1b[-3] = _mm_sub_epi16(ina3, inb3); step1b[-2] = _mm_sub_epi16(ina2, inb2); step1b[-1] = _mm_sub_epi16(ina1, inb1); step1b[-0] = _mm_sub_epi16(ina0, inb0); - step1a[ 0] = _mm_slli_epi16(step1a[ 0], 2); - step1a[ 1] = _mm_slli_epi16(step1a[ 1], 2); - step1a[ 2] = _mm_slli_epi16(step1a[ 2], 2); - step1a[ 3] = _mm_slli_epi16(step1a[ 3], 2); + step1a[0] = _mm_slli_epi16(step1a[0], 2); + step1a[1] = _mm_slli_epi16(step1a[1], 2); + step1a[2] = _mm_slli_epi16(step1a[2], 2); + step1a[3] = _mm_slli_epi16(step1a[3], 2); step1b[-3] = _mm_slli_epi16(step1b[-3], 2); step1b[-2] = _mm_slli_epi16(step1b[-2], 2); step1b[-1] = _mm_slli_epi16(step1b[-1], 2); step1b[-0] = _mm_slli_epi16(step1b[-0], 2); } { - const int16_t *ina = in + 8 * str1; - const int16_t *inb = in + 23 * str1; - __m128i *step1a = &step1[ 8]; + const int16_t *ina = in + 8 * str1; + const int16_t *inb = in + 23 * str1; + __m128i *step1a = &step1[8]; __m128i *step1b = &step1[23]; - const __m128i ina0 = _mm_loadu_si128((const __m128i *)(ina)); - const __m128i ina1 = _mm_loadu_si128((const __m128i *)(ina + str1)); - const __m128i ina2 = _mm_loadu_si128((const __m128i *)(ina + str2)); - const __m128i ina3 = _mm_loadu_si128((const __m128i *)(ina + str3)); - const __m128i inb3 = _mm_loadu_si128((const __m128i *)(inb - str3)); - const __m128i inb2 = _mm_loadu_si128((const __m128i *)(inb - str2)); - const __m128i inb1 = _mm_loadu_si128((const __m128i *)(inb - str1)); - const __m128i inb0 = _mm_loadu_si128((const __m128i *)(inb)); - step1a[ 0] = _mm_add_epi16(ina0, inb0); - step1a[ 1] = _mm_add_epi16(ina1, inb1); - step1a[ 2] = _mm_add_epi16(ina2, inb2); - step1a[ 3] = _mm_add_epi16(ina3, inb3); + const __m128i ina0 = _mm_loadu_si128((const __m128i *)(ina)); + const __m128i ina1 = _mm_loadu_si128((const __m128i *)(ina + str1)); + const __m128i ina2 = _mm_loadu_si128((const __m128i *)(ina + str2)); + const __m128i ina3 = _mm_loadu_si128((const __m128i *)(ina + str3)); + const __m128i inb3 = _mm_loadu_si128((const __m128i *)(inb - str3)); + const __m128i inb2 = _mm_loadu_si128((const __m128i *)(inb - str2)); + const __m128i inb1 = _mm_loadu_si128((const __m128i *)(inb - str1)); + const __m128i inb0 = _mm_loadu_si128((const __m128i *)(inb)); + step1a[0] = _mm_add_epi16(ina0, inb0); + step1a[1] = _mm_add_epi16(ina1, inb1); + step1a[2] = _mm_add_epi16(ina2, inb2); + step1a[3] = _mm_add_epi16(ina3, inb3); step1b[-3] = _mm_sub_epi16(ina3, inb3); step1b[-2] = _mm_sub_epi16(ina2, inb2); step1b[-1] = _mm_sub_epi16(ina1, inb1); step1b[-0] = _mm_sub_epi16(ina0, inb0); - step1a[ 0] = _mm_slli_epi16(step1a[ 0], 2); - step1a[ 1] = _mm_slli_epi16(step1a[ 1], 2); - step1a[ 2] = _mm_slli_epi16(step1a[ 2], 2); - step1a[ 3] = _mm_slli_epi16(step1a[ 3], 2); + step1a[0] = _mm_slli_epi16(step1a[0], 2); + step1a[1] = _mm_slli_epi16(step1a[1], 2); + step1a[2] = _mm_slli_epi16(step1a[2], 2); + step1a[3] = _mm_slli_epi16(step1a[3], 2); step1b[-3] = _mm_slli_epi16(step1b[-3], 2); step1b[-2] = _mm_slli_epi16(step1b[-2], 2); step1b[-1] = _mm_slli_epi16(step1b[-1], 2); step1b[-0] = _mm_slli_epi16(step1b[-0], 2); } { - const int16_t *ina = in + 12 * str1; - const int16_t *inb = in + 19 * str1; + const int16_t *ina = in + 12 * str1; + const int16_t *inb = in + 19 * str1; __m128i *step1a = &step1[12]; __m128i *step1b = &step1[19]; - const __m128i ina0 = _mm_loadu_si128((const __m128i *)(ina)); - const __m128i ina1 = _mm_loadu_si128((const __m128i *)(ina + str1)); - const __m128i ina2 = _mm_loadu_si128((const __m128i *)(ina + str2)); - const __m128i ina3 = _mm_loadu_si128((const __m128i *)(ina + str3)); - const __m128i inb3 = _mm_loadu_si128((const __m128i *)(inb - str3)); - const __m128i inb2 = _mm_loadu_si128((const __m128i *)(inb - str2)); - const __m128i inb1 = _mm_loadu_si128((const __m128i *)(inb - str1)); - const __m128i inb0 = _mm_loadu_si128((const __m128i *)(inb)); - step1a[ 0] = _mm_add_epi16(ina0, inb0); - step1a[ 1] = _mm_add_epi16(ina1, inb1); - step1a[ 2] = _mm_add_epi16(ina2, inb2); - step1a[ 3] = _mm_add_epi16(ina3, inb3); + const __m128i ina0 = _mm_loadu_si128((const __m128i *)(ina)); + const __m128i ina1 = _mm_loadu_si128((const __m128i *)(ina + str1)); + const __m128i ina2 = _mm_loadu_si128((const __m128i *)(ina + str2)); + const __m128i ina3 = _mm_loadu_si128((const __m128i *)(ina + str3)); + const __m128i inb3 = _mm_loadu_si128((const __m128i *)(inb - str3)); + const __m128i inb2 = _mm_loadu_si128((const __m128i *)(inb - str2)); + const __m128i inb1 = _mm_loadu_si128((const __m128i *)(inb - str1)); + const __m128i inb0 = _mm_loadu_si128((const __m128i *)(inb)); + step1a[0] = _mm_add_epi16(ina0, inb0); + step1a[1] = _mm_add_epi16(ina1, inb1); + step1a[2] = _mm_add_epi16(ina2, inb2); + step1a[3] = _mm_add_epi16(ina3, inb3); step1b[-3] = _mm_sub_epi16(ina3, inb3); step1b[-2] = _mm_sub_epi16(ina2, inb2); step1b[-1] = _mm_sub_epi16(ina1, inb1); step1b[-0] = _mm_sub_epi16(ina0, inb0); - step1a[ 0] = _mm_slli_epi16(step1a[ 0], 2); - step1a[ 1] = _mm_slli_epi16(step1a[ 1], 2); - step1a[ 2] = _mm_slli_epi16(step1a[ 2], 2); - step1a[ 3] = _mm_slli_epi16(step1a[ 3], 2); + step1a[0] = _mm_slli_epi16(step1a[0], 2); + step1a[1] = _mm_slli_epi16(step1a[1], 2); + step1a[2] = _mm_slli_epi16(step1a[2], 2); + step1a[3] = _mm_slli_epi16(step1a[3], 2); step1b[-3] = _mm_slli_epi16(step1b[-3], 2); step1b[-2] = _mm_slli_epi16(step1b[-2], 2); step1b[-1] = _mm_slli_epi16(step1b[-1], 2); @@ -257,14 +252,14 @@ void FDCT32x32_2D(const int16_t *input, // Note: the next four blocks could be in a loop. That would help the // instruction cache but is actually slower. { - __m128i in00 = _mm_loadu_si128((const __m128i *)(in + 0 * 32)); - __m128i in01 = _mm_loadu_si128((const __m128i *)(in + 1 * 32)); - __m128i in02 = _mm_loadu_si128((const __m128i *)(in + 2 * 32)); - __m128i in03 = _mm_loadu_si128((const __m128i *)(in + 3 * 32)); - __m128i in28 = _mm_loadu_si128((const __m128i *)(in + 28 * 32)); - __m128i in29 = _mm_loadu_si128((const __m128i *)(in + 29 * 32)); - __m128i in30 = _mm_loadu_si128((const __m128i *)(in + 30 * 32)); - __m128i in31 = _mm_loadu_si128((const __m128i *)(in + 31 * 32)); + __m128i in00 = _mm_loadu_si128((const __m128i *)(in + 0 * 32)); + __m128i in01 = _mm_loadu_si128((const __m128i *)(in + 1 * 32)); + __m128i in02 = _mm_loadu_si128((const __m128i *)(in + 2 * 32)); + __m128i in03 = _mm_loadu_si128((const __m128i *)(in + 3 * 32)); + __m128i in28 = _mm_loadu_si128((const __m128i *)(in + 28 * 32)); + __m128i in29 = _mm_loadu_si128((const __m128i *)(in + 29 * 32)); + __m128i in30 = _mm_loadu_si128((const __m128i *)(in + 30 * 32)); + __m128i in31 = _mm_loadu_si128((const __m128i *)(in + 31 * 32)); step1[0] = ADD_EPI16(in00, in31); step1[1] = ADD_EPI16(in01, in30); step1[2] = ADD_EPI16(in02, in29); @@ -284,14 +279,14 @@ void FDCT32x32_2D(const int16_t *input, #endif // DCT_HIGH_BIT_DEPTH } { - __m128i in04 = _mm_loadu_si128((const __m128i *)(in + 4 * 32)); - __m128i in05 = _mm_loadu_si128((const __m128i *)(in + 5 * 32)); - __m128i in06 = _mm_loadu_si128((const __m128i *)(in + 6 * 32)); - __m128i in07 = _mm_loadu_si128((const __m128i *)(in + 7 * 32)); - __m128i in24 = _mm_loadu_si128((const __m128i *)(in + 24 * 32)); - __m128i in25 = _mm_loadu_si128((const __m128i *)(in + 25 * 32)); - __m128i in26 = _mm_loadu_si128((const __m128i *)(in + 26 * 32)); - __m128i in27 = _mm_loadu_si128((const __m128i *)(in + 27 * 32)); + __m128i in04 = _mm_loadu_si128((const __m128i *)(in + 4 * 32)); + __m128i in05 = _mm_loadu_si128((const __m128i *)(in + 5 * 32)); + __m128i in06 = _mm_loadu_si128((const __m128i *)(in + 6 * 32)); + __m128i in07 = _mm_loadu_si128((const __m128i *)(in + 7 * 32)); + __m128i in24 = _mm_loadu_si128((const __m128i *)(in + 24 * 32)); + __m128i in25 = _mm_loadu_si128((const __m128i *)(in + 25 * 32)); + __m128i in26 = _mm_loadu_si128((const __m128i *)(in + 26 * 32)); + __m128i in27 = _mm_loadu_si128((const __m128i *)(in + 27 * 32)); step1[4] = ADD_EPI16(in04, in27); step1[5] = ADD_EPI16(in05, in26); step1[6] = ADD_EPI16(in06, in25); @@ -311,14 +306,14 @@ void FDCT32x32_2D(const int16_t *input, #endif // DCT_HIGH_BIT_DEPTH } { - __m128i in08 = _mm_loadu_si128((const __m128i *)(in + 8 * 32)); - __m128i in09 = _mm_loadu_si128((const __m128i *)(in + 9 * 32)); - __m128i in10 = _mm_loadu_si128((const __m128i *)(in + 10 * 32)); - __m128i in11 = _mm_loadu_si128((const __m128i *)(in + 11 * 32)); - __m128i in20 = _mm_loadu_si128((const __m128i *)(in + 20 * 32)); - __m128i in21 = _mm_loadu_si128((const __m128i *)(in + 21 * 32)); - __m128i in22 = _mm_loadu_si128((const __m128i *)(in + 22 * 32)); - __m128i in23 = _mm_loadu_si128((const __m128i *)(in + 23 * 32)); + __m128i in08 = _mm_loadu_si128((const __m128i *)(in + 8 * 32)); + __m128i in09 = _mm_loadu_si128((const __m128i *)(in + 9 * 32)); + __m128i in10 = _mm_loadu_si128((const __m128i *)(in + 10 * 32)); + __m128i in11 = _mm_loadu_si128((const __m128i *)(in + 11 * 32)); + __m128i in20 = _mm_loadu_si128((const __m128i *)(in + 20 * 32)); + __m128i in21 = _mm_loadu_si128((const __m128i *)(in + 21 * 32)); + __m128i in22 = _mm_loadu_si128((const __m128i *)(in + 22 * 32)); + __m128i in23 = _mm_loadu_si128((const __m128i *)(in + 23 * 32)); step1[8] = ADD_EPI16(in08, in23); step1[9] = ADD_EPI16(in09, in22); step1[10] = ADD_EPI16(in10, in21); @@ -338,14 +333,14 @@ void FDCT32x32_2D(const int16_t *input, #endif // DCT_HIGH_BIT_DEPTH } { - __m128i in12 = _mm_loadu_si128((const __m128i *)(in + 12 * 32)); - __m128i in13 = _mm_loadu_si128((const __m128i *)(in + 13 * 32)); - __m128i in14 = _mm_loadu_si128((const __m128i *)(in + 14 * 32)); - __m128i in15 = _mm_loadu_si128((const __m128i *)(in + 15 * 32)); - __m128i in16 = _mm_loadu_si128((const __m128i *)(in + 16 * 32)); - __m128i in17 = _mm_loadu_si128((const __m128i *)(in + 17 * 32)); - __m128i in18 = _mm_loadu_si128((const __m128i *)(in + 18 * 32)); - __m128i in19 = _mm_loadu_si128((const __m128i *)(in + 19 * 32)); + __m128i in12 = _mm_loadu_si128((const __m128i *)(in + 12 * 32)); + __m128i in13 = _mm_loadu_si128((const __m128i *)(in + 13 * 32)); + __m128i in14 = _mm_loadu_si128((const __m128i *)(in + 14 * 32)); + __m128i in15 = _mm_loadu_si128((const __m128i *)(in + 15 * 32)); + __m128i in16 = _mm_loadu_si128((const __m128i *)(in + 16 * 32)); + __m128i in17 = _mm_loadu_si128((const __m128i *)(in + 17 * 32)); + __m128i in18 = _mm_loadu_si128((const __m128i *)(in + 18 * 32)); + __m128i in19 = _mm_loadu_si128((const __m128i *)(in + 19 * 32)); step1[12] = ADD_EPI16(in12, in19); step1[13] = ADD_EPI16(in13, in18); step1[14] = ADD_EPI16(in14, in17); @@ -373,10 +368,10 @@ void FDCT32x32_2D(const int16_t *input, step2[3] = ADD_EPI16(step1[3], step1[12]); step2[4] = ADD_EPI16(step1[4], step1[11]); step2[5] = ADD_EPI16(step1[5], step1[10]); - step2[6] = ADD_EPI16(step1[6], step1[ 9]); - step2[7] = ADD_EPI16(step1[7], step1[ 8]); - step2[8] = SUB_EPI16(step1[7], step1[ 8]); - step2[9] = SUB_EPI16(step1[6], step1[ 9]); + step2[6] = ADD_EPI16(step1[6], step1[9]); + step2[7] = ADD_EPI16(step1[7], step1[8]); + step2[8] = SUB_EPI16(step1[7], step1[8]); + step2[9] = SUB_EPI16(step1[6], step1[9]); step2[10] = SUB_EPI16(step1[5], step1[10]); step2[11] = SUB_EPI16(step1[4], step1[11]); step2[12] = SUB_EPI16(step1[3], step1[12]); @@ -385,9 +380,8 @@ void FDCT32x32_2D(const int16_t *input, step2[15] = SUB_EPI16(step1[0], step1[15]); #if DCT_HIGH_BIT_DEPTH overflow = check_epi16_overflow_x16( - &step2[0], &step2[1], &step2[2], &step2[3], - &step2[4], &step2[5], &step2[6], &step2[7], - &step2[8], &step2[9], &step2[10], &step2[11], + &step2[0], &step2[1], &step2[2], &step2[3], &step2[4], &step2[5], + &step2[6], &step2[7], &step2[8], &step2[9], &step2[10], &step2[11], &step2[12], &step2[13], &step2[14], &step2[15]); if (overflow) { if (pass == 0) @@ -483,16 +477,16 @@ void FDCT32x32_2D(const int16_t *input, // dump the magnitude by half, hence the intermediate values are within // the range of 16 bits. if (1 == pass) { - __m128i s3_00_0 = _mm_cmplt_epi16(step2[ 0], kZero); - __m128i s3_01_0 = _mm_cmplt_epi16(step2[ 1], kZero); - __m128i s3_02_0 = _mm_cmplt_epi16(step2[ 2], kZero); - __m128i s3_03_0 = _mm_cmplt_epi16(step2[ 3], kZero); - __m128i s3_04_0 = _mm_cmplt_epi16(step2[ 4], kZero); - __m128i s3_05_0 = _mm_cmplt_epi16(step2[ 5], kZero); - __m128i s3_06_0 = _mm_cmplt_epi16(step2[ 6], kZero); - __m128i s3_07_0 = _mm_cmplt_epi16(step2[ 7], kZero); - __m128i s2_08_0 = _mm_cmplt_epi16(step2[ 8], kZero); - __m128i s2_09_0 = _mm_cmplt_epi16(step2[ 9], kZero); + __m128i s3_00_0 = _mm_cmplt_epi16(step2[0], kZero); + __m128i s3_01_0 = _mm_cmplt_epi16(step2[1], kZero); + __m128i s3_02_0 = _mm_cmplt_epi16(step2[2], kZero); + __m128i s3_03_0 = _mm_cmplt_epi16(step2[3], kZero); + __m128i s3_04_0 = _mm_cmplt_epi16(step2[4], kZero); + __m128i s3_05_0 = _mm_cmplt_epi16(step2[5], kZero); + __m128i s3_06_0 = _mm_cmplt_epi16(step2[6], kZero); + __m128i s3_07_0 = _mm_cmplt_epi16(step2[7], kZero); + __m128i s2_08_0 = _mm_cmplt_epi16(step2[8], kZero); + __m128i s2_09_0 = _mm_cmplt_epi16(step2[9], kZero); __m128i s3_10_0 = _mm_cmplt_epi16(step2[10], kZero); __m128i s3_11_0 = _mm_cmplt_epi16(step2[11], kZero); __m128i s3_12_0 = _mm_cmplt_epi16(step2[12], kZero); @@ -516,16 +510,16 @@ void FDCT32x32_2D(const int16_t *input, __m128i s3_30_0 = _mm_cmplt_epi16(step1[30], kZero); __m128i s3_31_0 = _mm_cmplt_epi16(step1[31], kZero); - step2[0] = SUB_EPI16(step2[ 0], s3_00_0); - step2[1] = SUB_EPI16(step2[ 1], s3_01_0); - step2[2] = SUB_EPI16(step2[ 2], s3_02_0); - step2[3] = SUB_EPI16(step2[ 3], s3_03_0); - step2[4] = SUB_EPI16(step2[ 4], s3_04_0); - step2[5] = SUB_EPI16(step2[ 5], s3_05_0); - step2[6] = SUB_EPI16(step2[ 6], s3_06_0); - step2[7] = SUB_EPI16(step2[ 7], s3_07_0); - step2[8] = SUB_EPI16(step2[ 8], s2_08_0); - step2[9] = SUB_EPI16(step2[ 9], s2_09_0); + step2[0] = SUB_EPI16(step2[0], s3_00_0); + step2[1] = SUB_EPI16(step2[1], s3_01_0); + step2[2] = SUB_EPI16(step2[2], s3_02_0); + step2[3] = SUB_EPI16(step2[3], s3_03_0); + step2[4] = SUB_EPI16(step2[4], s3_04_0); + step2[5] = SUB_EPI16(step2[5], s3_05_0); + step2[6] = SUB_EPI16(step2[6], s3_06_0); + step2[7] = SUB_EPI16(step2[7], s3_07_0); + step2[8] = SUB_EPI16(step2[8], s2_08_0); + step2[9] = SUB_EPI16(step2[9], s2_09_0); step2[10] = SUB_EPI16(step2[10], s3_10_0); step2[11] = SUB_EPI16(step2[11], s3_11_0); step2[12] = SUB_EPI16(step2[12], s3_12_0); @@ -550,29 +544,27 @@ void FDCT32x32_2D(const int16_t *input, step1[31] = SUB_EPI16(step1[31], s3_31_0); #if DCT_HIGH_BIT_DEPTH overflow = check_epi16_overflow_x32( - &step2[0], &step2[1], &step2[2], &step2[3], - &step2[4], &step2[5], &step2[6], &step2[7], - &step2[8], &step2[9], &step2[10], &step2[11], - &step2[12], &step2[13], &step2[14], &step2[15], - &step1[16], &step1[17], &step1[18], &step1[19], - &step2[20], &step2[21], &step2[22], &step2[23], - &step2[24], &step2[25], &step2[26], &step2[27], - &step1[28], &step1[29], &step1[30], &step1[31]); + &step2[0], &step2[1], &step2[2], &step2[3], &step2[4], &step2[5], + &step2[6], &step2[7], &step2[8], &step2[9], &step2[10], &step2[11], + &step2[12], &step2[13], &step2[14], &step2[15], &step1[16], + &step1[17], &step1[18], &step1[19], &step2[20], &step2[21], + &step2[22], &step2[23], &step2[24], &step2[25], &step2[26], + &step2[27], &step1[28], &step1[29], &step1[30], &step1[31]); if (overflow) { HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org); return; } #endif // DCT_HIGH_BIT_DEPTH - step2[0] = _mm_add_epi16(step2[ 0], kOne); - step2[1] = _mm_add_epi16(step2[ 1], kOne); - step2[2] = _mm_add_epi16(step2[ 2], kOne); - step2[3] = _mm_add_epi16(step2[ 3], kOne); - step2[4] = _mm_add_epi16(step2[ 4], kOne); - step2[5] = _mm_add_epi16(step2[ 5], kOne); - step2[6] = _mm_add_epi16(step2[ 6], kOne); - step2[7] = _mm_add_epi16(step2[ 7], kOne); - step2[8] = _mm_add_epi16(step2[ 8], kOne); - step2[9] = _mm_add_epi16(step2[ 9], kOne); + step2[0] = _mm_add_epi16(step2[0], kOne); + step2[1] = _mm_add_epi16(step2[1], kOne); + step2[2] = _mm_add_epi16(step2[2], kOne); + step2[3] = _mm_add_epi16(step2[3], kOne); + step2[4] = _mm_add_epi16(step2[4], kOne); + step2[5] = _mm_add_epi16(step2[5], kOne); + step2[6] = _mm_add_epi16(step2[6], kOne); + step2[7] = _mm_add_epi16(step2[7], kOne); + step2[8] = _mm_add_epi16(step2[8], kOne); + step2[9] = _mm_add_epi16(step2[9], kOne); step2[10] = _mm_add_epi16(step2[10], kOne); step2[11] = _mm_add_epi16(step2[11], kOne); step2[12] = _mm_add_epi16(step2[12], kOne); @@ -596,16 +588,16 @@ void FDCT32x32_2D(const int16_t *input, step1[30] = _mm_add_epi16(step1[30], kOne); step1[31] = _mm_add_epi16(step1[31], kOne); - step2[0] = _mm_srai_epi16(step2[ 0], 2); - step2[1] = _mm_srai_epi16(step2[ 1], 2); - step2[2] = _mm_srai_epi16(step2[ 2], 2); - step2[3] = _mm_srai_epi16(step2[ 3], 2); - step2[4] = _mm_srai_epi16(step2[ 4], 2); - step2[5] = _mm_srai_epi16(step2[ 5], 2); - step2[6] = _mm_srai_epi16(step2[ 6], 2); - step2[7] = _mm_srai_epi16(step2[ 7], 2); - step2[8] = _mm_srai_epi16(step2[ 8], 2); - step2[9] = _mm_srai_epi16(step2[ 9], 2); + step2[0] = _mm_srai_epi16(step2[0], 2); + step2[1] = _mm_srai_epi16(step2[1], 2); + step2[2] = _mm_srai_epi16(step2[2], 2); + step2[3] = _mm_srai_epi16(step2[3], 2); + step2[4] = _mm_srai_epi16(step2[4], 2); + step2[5] = _mm_srai_epi16(step2[5], 2); + step2[6] = _mm_srai_epi16(step2[6], 2); + step2[7] = _mm_srai_epi16(step2[7], 2); + step2[8] = _mm_srai_epi16(step2[8], 2); + step2[9] = _mm_srai_epi16(step2[9], 2); step2[10] = _mm_srai_epi16(step2[10], 2); step2[11] = _mm_srai_epi16(step2[11], 2); step2[12] = _mm_srai_epi16(step2[12], 2); @@ -634,821 +626,884 @@ void FDCT32x32_2D(const int16_t *input, #if FDCT32x32_HIGH_PRECISION if (pass == 0) { #endif - // Stage 3 - { - step3[0] = ADD_EPI16(step2[(8 - 1)], step2[0]); - step3[1] = ADD_EPI16(step2[(8 - 2)], step2[1]); - step3[2] = ADD_EPI16(step2[(8 - 3)], step2[2]); - step3[3] = ADD_EPI16(step2[(8 - 4)], step2[3]); - step3[4] = SUB_EPI16(step2[(8 - 5)], step2[4]); - step3[5] = SUB_EPI16(step2[(8 - 6)], step2[5]); - step3[6] = SUB_EPI16(step2[(8 - 7)], step2[6]); - step3[7] = SUB_EPI16(step2[(8 - 8)], step2[7]); + // Stage 3 + { + step3[0] = ADD_EPI16(step2[(8 - 1)], step2[0]); + step3[1] = ADD_EPI16(step2[(8 - 2)], step2[1]); + step3[2] = ADD_EPI16(step2[(8 - 3)], step2[2]); + step3[3] = ADD_EPI16(step2[(8 - 4)], step2[3]); + step3[4] = SUB_EPI16(step2[(8 - 5)], step2[4]); + step3[5] = SUB_EPI16(step2[(8 - 6)], step2[5]); + step3[6] = SUB_EPI16(step2[(8 - 7)], step2[6]); + step3[7] = SUB_EPI16(step2[(8 - 8)], step2[7]); #if DCT_HIGH_BIT_DEPTH - overflow = check_epi16_overflow_x8(&step3[0], &step3[1], &step3[2], - &step3[3], &step3[4], &step3[5], - &step3[6], &step3[7]); - if (overflow) { - if (pass == 0) - HIGH_FDCT32x32_2D_C(input, output_org, stride); - else - HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org); - return; - } + overflow = check_epi16_overflow_x8(&step3[0], &step3[1], &step3[2], + &step3[3], &step3[4], &step3[5], + &step3[6], &step3[7]); + if (overflow) { + if (pass == 0) + HIGH_FDCT32x32_2D_C(input, output_org, stride); + else + HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org); + return; + } #endif // DCT_HIGH_BIT_DEPTH - } - { - const __m128i s3_10_0 = _mm_unpacklo_epi16(step2[13], step2[10]); - const __m128i s3_10_1 = _mm_unpackhi_epi16(step2[13], step2[10]); - const __m128i s3_11_0 = _mm_unpacklo_epi16(step2[12], step2[11]); - const __m128i s3_11_1 = _mm_unpackhi_epi16(step2[12], step2[11]); - const __m128i s3_10_2 = _mm_madd_epi16(s3_10_0, k__cospi_p16_m16); - const __m128i s3_10_3 = _mm_madd_epi16(s3_10_1, k__cospi_p16_m16); - const __m128i s3_11_2 = _mm_madd_epi16(s3_11_0, k__cospi_p16_m16); - const __m128i s3_11_3 = _mm_madd_epi16(s3_11_1, k__cospi_p16_m16); - const __m128i s3_12_2 = _mm_madd_epi16(s3_11_0, k__cospi_p16_p16); - const __m128i s3_12_3 = _mm_madd_epi16(s3_11_1, k__cospi_p16_p16); - const __m128i s3_13_2 = _mm_madd_epi16(s3_10_0, k__cospi_p16_p16); - const __m128i s3_13_3 = _mm_madd_epi16(s3_10_1, k__cospi_p16_p16); - // dct_const_round_shift - const __m128i s3_10_4 = _mm_add_epi32(s3_10_2, k__DCT_CONST_ROUNDING); - const __m128i s3_10_5 = _mm_add_epi32(s3_10_3, k__DCT_CONST_ROUNDING); - const __m128i s3_11_4 = _mm_add_epi32(s3_11_2, k__DCT_CONST_ROUNDING); - const __m128i s3_11_5 = _mm_add_epi32(s3_11_3, k__DCT_CONST_ROUNDING); - const __m128i s3_12_4 = _mm_add_epi32(s3_12_2, k__DCT_CONST_ROUNDING); - const __m128i s3_12_5 = _mm_add_epi32(s3_12_3, k__DCT_CONST_ROUNDING); - const __m128i s3_13_4 = _mm_add_epi32(s3_13_2, k__DCT_CONST_ROUNDING); - const __m128i s3_13_5 = _mm_add_epi32(s3_13_3, k__DCT_CONST_ROUNDING); - const __m128i s3_10_6 = _mm_srai_epi32(s3_10_4, DCT_CONST_BITS); - const __m128i s3_10_7 = _mm_srai_epi32(s3_10_5, DCT_CONST_BITS); - const __m128i s3_11_6 = _mm_srai_epi32(s3_11_4, DCT_CONST_BITS); - const __m128i s3_11_7 = _mm_srai_epi32(s3_11_5, DCT_CONST_BITS); - const __m128i s3_12_6 = _mm_srai_epi32(s3_12_4, DCT_CONST_BITS); - const __m128i s3_12_7 = _mm_srai_epi32(s3_12_5, DCT_CONST_BITS); - const __m128i s3_13_6 = _mm_srai_epi32(s3_13_4, DCT_CONST_BITS); - const __m128i s3_13_7 = _mm_srai_epi32(s3_13_5, DCT_CONST_BITS); - // Combine - step3[10] = _mm_packs_epi32(s3_10_6, s3_10_7); - step3[11] = _mm_packs_epi32(s3_11_6, s3_11_7); - step3[12] = _mm_packs_epi32(s3_12_6, s3_12_7); - step3[13] = _mm_packs_epi32(s3_13_6, s3_13_7); -#if DCT_HIGH_BIT_DEPTH - overflow = check_epi16_overflow_x4(&step3[10], &step3[11], - &step3[12], &step3[13]); - if (overflow) { - if (pass == 0) - HIGH_FDCT32x32_2D_C(input, output_org, stride); - else - HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org); - return; } -#endif // DCT_HIGH_BIT_DEPTH - } - { - step3[16] = ADD_EPI16(step2[23], step1[16]); - step3[17] = ADD_EPI16(step2[22], step1[17]); - step3[18] = ADD_EPI16(step2[21], step1[18]); - step3[19] = ADD_EPI16(step2[20], step1[19]); - step3[20] = SUB_EPI16(step1[19], step2[20]); - step3[21] = SUB_EPI16(step1[18], step2[21]); - step3[22] = SUB_EPI16(step1[17], step2[22]); - step3[23] = SUB_EPI16(step1[16], step2[23]); - step3[24] = SUB_EPI16(step1[31], step2[24]); - step3[25] = SUB_EPI16(step1[30], step2[25]); - step3[26] = SUB_EPI16(step1[29], step2[26]); - step3[27] = SUB_EPI16(step1[28], step2[27]); - step3[28] = ADD_EPI16(step2[27], step1[28]); - step3[29] = ADD_EPI16(step2[26], step1[29]); - step3[30] = ADD_EPI16(step2[25], step1[30]); - step3[31] = ADD_EPI16(step2[24], step1[31]); + { + const __m128i s3_10_0 = _mm_unpacklo_epi16(step2[13], step2[10]); + const __m128i s3_10_1 = _mm_unpackhi_epi16(step2[13], step2[10]); + const __m128i s3_11_0 = _mm_unpacklo_epi16(step2[12], step2[11]); + const __m128i s3_11_1 = _mm_unpackhi_epi16(step2[12], step2[11]); + const __m128i s3_10_2 = _mm_madd_epi16(s3_10_0, k__cospi_p16_m16); + const __m128i s3_10_3 = _mm_madd_epi16(s3_10_1, k__cospi_p16_m16); + const __m128i s3_11_2 = _mm_madd_epi16(s3_11_0, k__cospi_p16_m16); + const __m128i s3_11_3 = _mm_madd_epi16(s3_11_1, k__cospi_p16_m16); + const __m128i s3_12_2 = _mm_madd_epi16(s3_11_0, k__cospi_p16_p16); + const __m128i s3_12_3 = _mm_madd_epi16(s3_11_1, k__cospi_p16_p16); + const __m128i s3_13_2 = _mm_madd_epi16(s3_10_0, k__cospi_p16_p16); + const __m128i s3_13_3 = _mm_madd_epi16(s3_10_1, k__cospi_p16_p16); + // dct_const_round_shift + const __m128i s3_10_4 = _mm_add_epi32(s3_10_2, k__DCT_CONST_ROUNDING); + const __m128i s3_10_5 = _mm_add_epi32(s3_10_3, k__DCT_CONST_ROUNDING); + const __m128i s3_11_4 = _mm_add_epi32(s3_11_2, k__DCT_CONST_ROUNDING); + const __m128i s3_11_5 = _mm_add_epi32(s3_11_3, k__DCT_CONST_ROUNDING); + const __m128i s3_12_4 = _mm_add_epi32(s3_12_2, k__DCT_CONST_ROUNDING); + const __m128i s3_12_5 = _mm_add_epi32(s3_12_3, k__DCT_CONST_ROUNDING); + const __m128i s3_13_4 = _mm_add_epi32(s3_13_2, k__DCT_CONST_ROUNDING); + const __m128i s3_13_5 = _mm_add_epi32(s3_13_3, k__DCT_CONST_ROUNDING); + const __m128i s3_10_6 = _mm_srai_epi32(s3_10_4, DCT_CONST_BITS); + const __m128i s3_10_7 = _mm_srai_epi32(s3_10_5, DCT_CONST_BITS); + const __m128i s3_11_6 = _mm_srai_epi32(s3_11_4, DCT_CONST_BITS); + const __m128i s3_11_7 = _mm_srai_epi32(s3_11_5, DCT_CONST_BITS); + const __m128i s3_12_6 = _mm_srai_epi32(s3_12_4, DCT_CONST_BITS); + const __m128i s3_12_7 = _mm_srai_epi32(s3_12_5, DCT_CONST_BITS); + const __m128i s3_13_6 = _mm_srai_epi32(s3_13_4, DCT_CONST_BITS); + const __m128i s3_13_7 = _mm_srai_epi32(s3_13_5, DCT_CONST_BITS); + // Combine + step3[10] = _mm_packs_epi32(s3_10_6, s3_10_7); + step3[11] = _mm_packs_epi32(s3_11_6, s3_11_7); + step3[12] = _mm_packs_epi32(s3_12_6, s3_12_7); + step3[13] = _mm_packs_epi32(s3_13_6, s3_13_7); #if DCT_HIGH_BIT_DEPTH - overflow = check_epi16_overflow_x16( - &step3[16], &step3[17], &step3[18], &step3[19], - &step3[20], &step3[21], &step3[22], &step3[23], - &step3[24], &step3[25], &step3[26], &step3[27], - &step3[28], &step3[29], &step3[30], &step3[31]); - if (overflow) { - if (pass == 0) - HIGH_FDCT32x32_2D_C(input, output_org, stride); - else - HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org); - return; + overflow = check_epi16_overflow_x4(&step3[10], &step3[11], &step3[12], + &step3[13]); + if (overflow) { + if (pass == 0) + HIGH_FDCT32x32_2D_C(input, output_org, stride); + else + HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org); + return; + } +#endif // DCT_HIGH_BIT_DEPTH } + { + step3[16] = ADD_EPI16(step2[23], step1[16]); + step3[17] = ADD_EPI16(step2[22], step1[17]); + step3[18] = ADD_EPI16(step2[21], step1[18]); + step3[19] = ADD_EPI16(step2[20], step1[19]); + step3[20] = SUB_EPI16(step1[19], step2[20]); + step3[21] = SUB_EPI16(step1[18], step2[21]); + step3[22] = SUB_EPI16(step1[17], step2[22]); + step3[23] = SUB_EPI16(step1[16], step2[23]); + step3[24] = SUB_EPI16(step1[31], step2[24]); + step3[25] = SUB_EPI16(step1[30], step2[25]); + step3[26] = SUB_EPI16(step1[29], step2[26]); + step3[27] = SUB_EPI16(step1[28], step2[27]); + step3[28] = ADD_EPI16(step2[27], step1[28]); + step3[29] = ADD_EPI16(step2[26], step1[29]); + step3[30] = ADD_EPI16(step2[25], step1[30]); + step3[31] = ADD_EPI16(step2[24], step1[31]); +#if DCT_HIGH_BIT_DEPTH + overflow = check_epi16_overflow_x16( + &step3[16], &step3[17], &step3[18], &step3[19], &step3[20], + &step3[21], &step3[22], &step3[23], &step3[24], &step3[25], + &step3[26], &step3[27], &step3[28], &step3[29], &step3[30], + &step3[31]); + if (overflow) { + if (pass == 0) + HIGH_FDCT32x32_2D_C(input, output_org, stride); + else + HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org); + return; + } #endif // DCT_HIGH_BIT_DEPTH - } + } - // Stage 4 - { - step1[0] = ADD_EPI16(step3[ 3], step3[ 0]); - step1[1] = ADD_EPI16(step3[ 2], step3[ 1]); - step1[2] = SUB_EPI16(step3[ 1], step3[ 2]); - step1[3] = SUB_EPI16(step3[ 0], step3[ 3]); - step1[8] = ADD_EPI16(step3[11], step2[ 8]); - step1[9] = ADD_EPI16(step3[10], step2[ 9]); - step1[10] = SUB_EPI16(step2[ 9], step3[10]); - step1[11] = SUB_EPI16(step2[ 8], step3[11]); - step1[12] = SUB_EPI16(step2[15], step3[12]); - step1[13] = SUB_EPI16(step2[14], step3[13]); - step1[14] = ADD_EPI16(step3[13], step2[14]); - step1[15] = ADD_EPI16(step3[12], step2[15]); + // Stage 4 + { + step1[0] = ADD_EPI16(step3[3], step3[0]); + step1[1] = ADD_EPI16(step3[2], step3[1]); + step1[2] = SUB_EPI16(step3[1], step3[2]); + step1[3] = SUB_EPI16(step3[0], step3[3]); + step1[8] = ADD_EPI16(step3[11], step2[8]); + step1[9] = ADD_EPI16(step3[10], step2[9]); + step1[10] = SUB_EPI16(step2[9], step3[10]); + step1[11] = SUB_EPI16(step2[8], step3[11]); + step1[12] = SUB_EPI16(step2[15], step3[12]); + step1[13] = SUB_EPI16(step2[14], step3[13]); + step1[14] = ADD_EPI16(step3[13], step2[14]); + step1[15] = ADD_EPI16(step3[12], step2[15]); #if DCT_HIGH_BIT_DEPTH - overflow = check_epi16_overflow_x16( - &step1[0], &step1[1], &step1[2], &step1[3], - &step1[4], &step1[5], &step1[6], &step1[7], - &step1[8], &step1[9], &step1[10], &step1[11], - &step1[12], &step1[13], &step1[14], &step1[15]); - if (overflow) { - if (pass == 0) - HIGH_FDCT32x32_2D_C(input, output_org, stride); - else - HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org); - return; - } + overflow = check_epi16_overflow_x16( + &step1[0], &step1[1], &step1[2], &step1[3], &step1[4], &step1[5], + &step1[6], &step1[7], &step1[8], &step1[9], &step1[10], + &step1[11], &step1[12], &step1[13], &step1[14], &step1[15]); + if (overflow) { + if (pass == 0) + HIGH_FDCT32x32_2D_C(input, output_org, stride); + else + HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org); + return; + } #endif // DCT_HIGH_BIT_DEPTH - } - { - const __m128i s1_05_0 = _mm_unpacklo_epi16(step3[6], step3[5]); - const __m128i s1_05_1 = _mm_unpackhi_epi16(step3[6], step3[5]); - const __m128i s1_05_2 = _mm_madd_epi16(s1_05_0, k__cospi_p16_m16); - const __m128i s1_05_3 = _mm_madd_epi16(s1_05_1, k__cospi_p16_m16); - const __m128i s1_06_2 = _mm_madd_epi16(s1_05_0, k__cospi_p16_p16); - const __m128i s1_06_3 = _mm_madd_epi16(s1_05_1, k__cospi_p16_p16); - // dct_const_round_shift - const __m128i s1_05_4 = _mm_add_epi32(s1_05_2, k__DCT_CONST_ROUNDING); - const __m128i s1_05_5 = _mm_add_epi32(s1_05_3, k__DCT_CONST_ROUNDING); - const __m128i s1_06_4 = _mm_add_epi32(s1_06_2, k__DCT_CONST_ROUNDING); - const __m128i s1_06_5 = _mm_add_epi32(s1_06_3, k__DCT_CONST_ROUNDING); - const __m128i s1_05_6 = _mm_srai_epi32(s1_05_4, DCT_CONST_BITS); - const __m128i s1_05_7 = _mm_srai_epi32(s1_05_5, DCT_CONST_BITS); - const __m128i s1_06_6 = _mm_srai_epi32(s1_06_4, DCT_CONST_BITS); - const __m128i s1_06_7 = _mm_srai_epi32(s1_06_5, DCT_CONST_BITS); - // Combine - step1[5] = _mm_packs_epi32(s1_05_6, s1_05_7); - step1[6] = _mm_packs_epi32(s1_06_6, s1_06_7); -#if DCT_HIGH_BIT_DEPTH - overflow = check_epi16_overflow_x2(&step1[5], &step1[6]); - if (overflow) { - if (pass == 0) - HIGH_FDCT32x32_2D_C(input, output_org, stride); - else - HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org); - return; } -#endif // DCT_HIGH_BIT_DEPTH - } - { - const __m128i s1_18_0 = _mm_unpacklo_epi16(step3[18], step3[29]); - const __m128i s1_18_1 = _mm_unpackhi_epi16(step3[18], step3[29]); - const __m128i s1_19_0 = _mm_unpacklo_epi16(step3[19], step3[28]); - const __m128i s1_19_1 = _mm_unpackhi_epi16(step3[19], step3[28]); - const __m128i s1_20_0 = _mm_unpacklo_epi16(step3[20], step3[27]); - const __m128i s1_20_1 = _mm_unpackhi_epi16(step3[20], step3[27]); - const __m128i s1_21_0 = _mm_unpacklo_epi16(step3[21], step3[26]); - const __m128i s1_21_1 = _mm_unpackhi_epi16(step3[21], step3[26]); - const __m128i s1_18_2 = _mm_madd_epi16(s1_18_0, k__cospi_m08_p24); - const __m128i s1_18_3 = _mm_madd_epi16(s1_18_1, k__cospi_m08_p24); - const __m128i s1_19_2 = _mm_madd_epi16(s1_19_0, k__cospi_m08_p24); - const __m128i s1_19_3 = _mm_madd_epi16(s1_19_1, k__cospi_m08_p24); - const __m128i s1_20_2 = _mm_madd_epi16(s1_20_0, k__cospi_m24_m08); - const __m128i s1_20_3 = _mm_madd_epi16(s1_20_1, k__cospi_m24_m08); - const __m128i s1_21_2 = _mm_madd_epi16(s1_21_0, k__cospi_m24_m08); - const __m128i s1_21_3 = _mm_madd_epi16(s1_21_1, k__cospi_m24_m08); - const __m128i s1_26_2 = _mm_madd_epi16(s1_21_0, k__cospi_m08_p24); - const __m128i s1_26_3 = _mm_madd_epi16(s1_21_1, k__cospi_m08_p24); - const __m128i s1_27_2 = _mm_madd_epi16(s1_20_0, k__cospi_m08_p24); - const __m128i s1_27_3 = _mm_madd_epi16(s1_20_1, k__cospi_m08_p24); - const __m128i s1_28_2 = _mm_madd_epi16(s1_19_0, k__cospi_p24_p08); - const __m128i s1_28_3 = _mm_madd_epi16(s1_19_1, k__cospi_p24_p08); - const __m128i s1_29_2 = _mm_madd_epi16(s1_18_0, k__cospi_p24_p08); - const __m128i s1_29_3 = _mm_madd_epi16(s1_18_1, k__cospi_p24_p08); - // dct_const_round_shift - const __m128i s1_18_4 = _mm_add_epi32(s1_18_2, k__DCT_CONST_ROUNDING); - const __m128i s1_18_5 = _mm_add_epi32(s1_18_3, k__DCT_CONST_ROUNDING); - const __m128i s1_19_4 = _mm_add_epi32(s1_19_2, k__DCT_CONST_ROUNDING); - const __m128i s1_19_5 = _mm_add_epi32(s1_19_3, k__DCT_CONST_ROUNDING); - const __m128i s1_20_4 = _mm_add_epi32(s1_20_2, k__DCT_CONST_ROUNDING); - const __m128i s1_20_5 = _mm_add_epi32(s1_20_3, k__DCT_CONST_ROUNDING); - const __m128i s1_21_4 = _mm_add_epi32(s1_21_2, k__DCT_CONST_ROUNDING); - const __m128i s1_21_5 = _mm_add_epi32(s1_21_3, k__DCT_CONST_ROUNDING); - const __m128i s1_26_4 = _mm_add_epi32(s1_26_2, k__DCT_CONST_ROUNDING); - const __m128i s1_26_5 = _mm_add_epi32(s1_26_3, k__DCT_CONST_ROUNDING); - const __m128i s1_27_4 = _mm_add_epi32(s1_27_2, k__DCT_CONST_ROUNDING); - const __m128i s1_27_5 = _mm_add_epi32(s1_27_3, k__DCT_CONST_ROUNDING); - const __m128i s1_28_4 = _mm_add_epi32(s1_28_2, k__DCT_CONST_ROUNDING); - const __m128i s1_28_5 = _mm_add_epi32(s1_28_3, k__DCT_CONST_ROUNDING); - const __m128i s1_29_4 = _mm_add_epi32(s1_29_2, k__DCT_CONST_ROUNDING); - const __m128i s1_29_5 = _mm_add_epi32(s1_29_3, k__DCT_CONST_ROUNDING); - const __m128i s1_18_6 = _mm_srai_epi32(s1_18_4, DCT_CONST_BITS); - const __m128i s1_18_7 = _mm_srai_epi32(s1_18_5, DCT_CONST_BITS); - const __m128i s1_19_6 = _mm_srai_epi32(s1_19_4, DCT_CONST_BITS); - const __m128i s1_19_7 = _mm_srai_epi32(s1_19_5, DCT_CONST_BITS); - const __m128i s1_20_6 = _mm_srai_epi32(s1_20_4, DCT_CONST_BITS); - const __m128i s1_20_7 = _mm_srai_epi32(s1_20_5, DCT_CONST_BITS); - const __m128i s1_21_6 = _mm_srai_epi32(s1_21_4, DCT_CONST_BITS); - const __m128i s1_21_7 = _mm_srai_epi32(s1_21_5, DCT_CONST_BITS); - const __m128i s1_26_6 = _mm_srai_epi32(s1_26_4, DCT_CONST_BITS); - const __m128i s1_26_7 = _mm_srai_epi32(s1_26_5, DCT_CONST_BITS); - const __m128i s1_27_6 = _mm_srai_epi32(s1_27_4, DCT_CONST_BITS); - const __m128i s1_27_7 = _mm_srai_epi32(s1_27_5, DCT_CONST_BITS); - const __m128i s1_28_6 = _mm_srai_epi32(s1_28_4, DCT_CONST_BITS); - const __m128i s1_28_7 = _mm_srai_epi32(s1_28_5, DCT_CONST_BITS); - const __m128i s1_29_6 = _mm_srai_epi32(s1_29_4, DCT_CONST_BITS); - const __m128i s1_29_7 = _mm_srai_epi32(s1_29_5, DCT_CONST_BITS); - // Combine - step1[18] = _mm_packs_epi32(s1_18_6, s1_18_7); - step1[19] = _mm_packs_epi32(s1_19_6, s1_19_7); - step1[20] = _mm_packs_epi32(s1_20_6, s1_20_7); - step1[21] = _mm_packs_epi32(s1_21_6, s1_21_7); - step1[26] = _mm_packs_epi32(s1_26_6, s1_26_7); - step1[27] = _mm_packs_epi32(s1_27_6, s1_27_7); - step1[28] = _mm_packs_epi32(s1_28_6, s1_28_7); - step1[29] = _mm_packs_epi32(s1_29_6, s1_29_7); + { + const __m128i s1_05_0 = _mm_unpacklo_epi16(step3[6], step3[5]); + const __m128i s1_05_1 = _mm_unpackhi_epi16(step3[6], step3[5]); + const __m128i s1_05_2 = _mm_madd_epi16(s1_05_0, k__cospi_p16_m16); + const __m128i s1_05_3 = _mm_madd_epi16(s1_05_1, k__cospi_p16_m16); + const __m128i s1_06_2 = _mm_madd_epi16(s1_05_0, k__cospi_p16_p16); + const __m128i s1_06_3 = _mm_madd_epi16(s1_05_1, k__cospi_p16_p16); + // dct_const_round_shift + const __m128i s1_05_4 = _mm_add_epi32(s1_05_2, k__DCT_CONST_ROUNDING); + const __m128i s1_05_5 = _mm_add_epi32(s1_05_3, k__DCT_CONST_ROUNDING); + const __m128i s1_06_4 = _mm_add_epi32(s1_06_2, k__DCT_CONST_ROUNDING); + const __m128i s1_06_5 = _mm_add_epi32(s1_06_3, k__DCT_CONST_ROUNDING); + const __m128i s1_05_6 = _mm_srai_epi32(s1_05_4, DCT_CONST_BITS); + const __m128i s1_05_7 = _mm_srai_epi32(s1_05_5, DCT_CONST_BITS); + const __m128i s1_06_6 = _mm_srai_epi32(s1_06_4, DCT_CONST_BITS); + const __m128i s1_06_7 = _mm_srai_epi32(s1_06_5, DCT_CONST_BITS); + // Combine + step1[5] = _mm_packs_epi32(s1_05_6, s1_05_7); + step1[6] = _mm_packs_epi32(s1_06_6, s1_06_7); #if DCT_HIGH_BIT_DEPTH - overflow = check_epi16_overflow_x8(&step1[18], &step1[19], &step1[20], - &step1[21], &step1[26], &step1[27], - &step1[28], &step1[29]); - if (overflow) { - if (pass == 0) - HIGH_FDCT32x32_2D_C(input, output_org, stride); - else - HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org); - return; - } + overflow = check_epi16_overflow_x2(&step1[5], &step1[6]); + if (overflow) { + if (pass == 0) + HIGH_FDCT32x32_2D_C(input, output_org, stride); + else + HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org); + return; + } #endif // DCT_HIGH_BIT_DEPTH - } - // Stage 5 - { - step2[4] = ADD_EPI16(step1[5], step3[4]); - step2[5] = SUB_EPI16(step3[4], step1[5]); - step2[6] = SUB_EPI16(step3[7], step1[6]); - step2[7] = ADD_EPI16(step1[6], step3[7]); -#if DCT_HIGH_BIT_DEPTH - overflow = check_epi16_overflow_x4(&step2[4], &step2[5], - &step2[6], &step2[7]); - if (overflow) { - if (pass == 0) - HIGH_FDCT32x32_2D_C(input, output_org, stride); - else - HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org); - return; } -#endif // DCT_HIGH_BIT_DEPTH - } - { - const __m128i out_00_0 = _mm_unpacklo_epi16(step1[0], step1[1]); - const __m128i out_00_1 = _mm_unpackhi_epi16(step1[0], step1[1]); - const __m128i out_08_0 = _mm_unpacklo_epi16(step1[2], step1[3]); - const __m128i out_08_1 = _mm_unpackhi_epi16(step1[2], step1[3]); - const __m128i out_00_2 = _mm_madd_epi16(out_00_0, k__cospi_p16_p16); - const __m128i out_00_3 = _mm_madd_epi16(out_00_1, k__cospi_p16_p16); - const __m128i out_16_2 = _mm_madd_epi16(out_00_0, k__cospi_p16_m16); - const __m128i out_16_3 = _mm_madd_epi16(out_00_1, k__cospi_p16_m16); - const __m128i out_08_2 = _mm_madd_epi16(out_08_0, k__cospi_p24_p08); - const __m128i out_08_3 = _mm_madd_epi16(out_08_1, k__cospi_p24_p08); - const __m128i out_24_2 = _mm_madd_epi16(out_08_0, k__cospi_m08_p24); - const __m128i out_24_3 = _mm_madd_epi16(out_08_1, k__cospi_m08_p24); - // dct_const_round_shift - const __m128i out_00_4 = _mm_add_epi32(out_00_2, k__DCT_CONST_ROUNDING); - const __m128i out_00_5 = _mm_add_epi32(out_00_3, k__DCT_CONST_ROUNDING); - const __m128i out_16_4 = _mm_add_epi32(out_16_2, k__DCT_CONST_ROUNDING); - const __m128i out_16_5 = _mm_add_epi32(out_16_3, k__DCT_CONST_ROUNDING); - const __m128i out_08_4 = _mm_add_epi32(out_08_2, k__DCT_CONST_ROUNDING); - const __m128i out_08_5 = _mm_add_epi32(out_08_3, k__DCT_CONST_ROUNDING); - const __m128i out_24_4 = _mm_add_epi32(out_24_2, k__DCT_CONST_ROUNDING); - const __m128i out_24_5 = _mm_add_epi32(out_24_3, k__DCT_CONST_ROUNDING); - const __m128i out_00_6 = _mm_srai_epi32(out_00_4, DCT_CONST_BITS); - const __m128i out_00_7 = _mm_srai_epi32(out_00_5, DCT_CONST_BITS); - const __m128i out_16_6 = _mm_srai_epi32(out_16_4, DCT_CONST_BITS); - const __m128i out_16_7 = _mm_srai_epi32(out_16_5, DCT_CONST_BITS); - const __m128i out_08_6 = _mm_srai_epi32(out_08_4, DCT_CONST_BITS); - const __m128i out_08_7 = _mm_srai_epi32(out_08_5, DCT_CONST_BITS); - const __m128i out_24_6 = _mm_srai_epi32(out_24_4, DCT_CONST_BITS); - const __m128i out_24_7 = _mm_srai_epi32(out_24_5, DCT_CONST_BITS); - // Combine - out[ 0] = _mm_packs_epi32(out_00_6, out_00_7); - out[16] = _mm_packs_epi32(out_16_6, out_16_7); - out[ 8] = _mm_packs_epi32(out_08_6, out_08_7); - out[24] = _mm_packs_epi32(out_24_6, out_24_7); + { + const __m128i s1_18_0 = _mm_unpacklo_epi16(step3[18], step3[29]); + const __m128i s1_18_1 = _mm_unpackhi_epi16(step3[18], step3[29]); + const __m128i s1_19_0 = _mm_unpacklo_epi16(step3[19], step3[28]); + const __m128i s1_19_1 = _mm_unpackhi_epi16(step3[19], step3[28]); + const __m128i s1_20_0 = _mm_unpacklo_epi16(step3[20], step3[27]); + const __m128i s1_20_1 = _mm_unpackhi_epi16(step3[20], step3[27]); + const __m128i s1_21_0 = _mm_unpacklo_epi16(step3[21], step3[26]); + const __m128i s1_21_1 = _mm_unpackhi_epi16(step3[21], step3[26]); + const __m128i s1_18_2 = _mm_madd_epi16(s1_18_0, k__cospi_m08_p24); + const __m128i s1_18_3 = _mm_madd_epi16(s1_18_1, k__cospi_m08_p24); + const __m128i s1_19_2 = _mm_madd_epi16(s1_19_0, k__cospi_m08_p24); + const __m128i s1_19_3 = _mm_madd_epi16(s1_19_1, k__cospi_m08_p24); + const __m128i s1_20_2 = _mm_madd_epi16(s1_20_0, k__cospi_m24_m08); + const __m128i s1_20_3 = _mm_madd_epi16(s1_20_1, k__cospi_m24_m08); + const __m128i s1_21_2 = _mm_madd_epi16(s1_21_0, k__cospi_m24_m08); + const __m128i s1_21_3 = _mm_madd_epi16(s1_21_1, k__cospi_m24_m08); + const __m128i s1_26_2 = _mm_madd_epi16(s1_21_0, k__cospi_m08_p24); + const __m128i s1_26_3 = _mm_madd_epi16(s1_21_1, k__cospi_m08_p24); + const __m128i s1_27_2 = _mm_madd_epi16(s1_20_0, k__cospi_m08_p24); + const __m128i s1_27_3 = _mm_madd_epi16(s1_20_1, k__cospi_m08_p24); + const __m128i s1_28_2 = _mm_madd_epi16(s1_19_0, k__cospi_p24_p08); + const __m128i s1_28_3 = _mm_madd_epi16(s1_19_1, k__cospi_p24_p08); + const __m128i s1_29_2 = _mm_madd_epi16(s1_18_0, k__cospi_p24_p08); + const __m128i s1_29_3 = _mm_madd_epi16(s1_18_1, k__cospi_p24_p08); + // dct_const_round_shift + const __m128i s1_18_4 = _mm_add_epi32(s1_18_2, k__DCT_CONST_ROUNDING); + const __m128i s1_18_5 = _mm_add_epi32(s1_18_3, k__DCT_CONST_ROUNDING); + const __m128i s1_19_4 = _mm_add_epi32(s1_19_2, k__DCT_CONST_ROUNDING); + const __m128i s1_19_5 = _mm_add_epi32(s1_19_3, k__DCT_CONST_ROUNDING); + const __m128i s1_20_4 = _mm_add_epi32(s1_20_2, k__DCT_CONST_ROUNDING); + const __m128i s1_20_5 = _mm_add_epi32(s1_20_3, k__DCT_CONST_ROUNDING); + const __m128i s1_21_4 = _mm_add_epi32(s1_21_2, k__DCT_CONST_ROUNDING); + const __m128i s1_21_5 = _mm_add_epi32(s1_21_3, k__DCT_CONST_ROUNDING); + const __m128i s1_26_4 = _mm_add_epi32(s1_26_2, k__DCT_CONST_ROUNDING); + const __m128i s1_26_5 = _mm_add_epi32(s1_26_3, k__DCT_CONST_ROUNDING); + const __m128i s1_27_4 = _mm_add_epi32(s1_27_2, k__DCT_CONST_ROUNDING); + const __m128i s1_27_5 = _mm_add_epi32(s1_27_3, k__DCT_CONST_ROUNDING); + const __m128i s1_28_4 = _mm_add_epi32(s1_28_2, k__DCT_CONST_ROUNDING); + const __m128i s1_28_5 = _mm_add_epi32(s1_28_3, k__DCT_CONST_ROUNDING); + const __m128i s1_29_4 = _mm_add_epi32(s1_29_2, k__DCT_CONST_ROUNDING); + const __m128i s1_29_5 = _mm_add_epi32(s1_29_3, k__DCT_CONST_ROUNDING); + const __m128i s1_18_6 = _mm_srai_epi32(s1_18_4, DCT_CONST_BITS); + const __m128i s1_18_7 = _mm_srai_epi32(s1_18_5, DCT_CONST_BITS); + const __m128i s1_19_6 = _mm_srai_epi32(s1_19_4, DCT_CONST_BITS); + const __m128i s1_19_7 = _mm_srai_epi32(s1_19_5, DCT_CONST_BITS); + const __m128i s1_20_6 = _mm_srai_epi32(s1_20_4, DCT_CONST_BITS); + const __m128i s1_20_7 = _mm_srai_epi32(s1_20_5, DCT_CONST_BITS); + const __m128i s1_21_6 = _mm_srai_epi32(s1_21_4, DCT_CONST_BITS); + const __m128i s1_21_7 = _mm_srai_epi32(s1_21_5, DCT_CONST_BITS); + const __m128i s1_26_6 = _mm_srai_epi32(s1_26_4, DCT_CONST_BITS); + const __m128i s1_26_7 = _mm_srai_epi32(s1_26_5, DCT_CONST_BITS); + const __m128i s1_27_6 = _mm_srai_epi32(s1_27_4, DCT_CONST_BITS); + const __m128i s1_27_7 = _mm_srai_epi32(s1_27_5, DCT_CONST_BITS); + const __m128i s1_28_6 = _mm_srai_epi32(s1_28_4, DCT_CONST_BITS); + const __m128i s1_28_7 = _mm_srai_epi32(s1_28_5, DCT_CONST_BITS); + const __m128i s1_29_6 = _mm_srai_epi32(s1_29_4, DCT_CONST_BITS); + const __m128i s1_29_7 = _mm_srai_epi32(s1_29_5, DCT_CONST_BITS); + // Combine + step1[18] = _mm_packs_epi32(s1_18_6, s1_18_7); + step1[19] = _mm_packs_epi32(s1_19_6, s1_19_7); + step1[20] = _mm_packs_epi32(s1_20_6, s1_20_7); + step1[21] = _mm_packs_epi32(s1_21_6, s1_21_7); + step1[26] = _mm_packs_epi32(s1_26_6, s1_26_7); + step1[27] = _mm_packs_epi32(s1_27_6, s1_27_7); + step1[28] = _mm_packs_epi32(s1_28_6, s1_28_7); + step1[29] = _mm_packs_epi32(s1_29_6, s1_29_7); #if DCT_HIGH_BIT_DEPTH - overflow = check_epi16_overflow_x4(&out[0], &out[16], - &out[8], &out[24]); - if (overflow) { - if (pass == 0) - HIGH_FDCT32x32_2D_C(input, output_org, stride); - else - HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org); - return; - } + overflow = check_epi16_overflow_x8(&step1[18], &step1[19], &step1[20], + &step1[21], &step1[26], &step1[27], + &step1[28], &step1[29]); + if (overflow) { + if (pass == 0) + HIGH_FDCT32x32_2D_C(input, output_org, stride); + else + HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org); + return; + } #endif // DCT_HIGH_BIT_DEPTH - } - { - const __m128i s2_09_0 = _mm_unpacklo_epi16(step1[ 9], step1[14]); - const __m128i s2_09_1 = _mm_unpackhi_epi16(step1[ 9], step1[14]); - const __m128i s2_10_0 = _mm_unpacklo_epi16(step1[10], step1[13]); - const __m128i s2_10_1 = _mm_unpackhi_epi16(step1[10], step1[13]); - const __m128i s2_09_2 = _mm_madd_epi16(s2_09_0, k__cospi_m08_p24); - const __m128i s2_09_3 = _mm_madd_epi16(s2_09_1, k__cospi_m08_p24); - const __m128i s2_10_2 = _mm_madd_epi16(s2_10_0, k__cospi_m24_m08); - const __m128i s2_10_3 = _mm_madd_epi16(s2_10_1, k__cospi_m24_m08); - const __m128i s2_13_2 = _mm_madd_epi16(s2_10_0, k__cospi_m08_p24); - const __m128i s2_13_3 = _mm_madd_epi16(s2_10_1, k__cospi_m08_p24); - const __m128i s2_14_2 = _mm_madd_epi16(s2_09_0, k__cospi_p24_p08); - const __m128i s2_14_3 = _mm_madd_epi16(s2_09_1, k__cospi_p24_p08); - // dct_const_round_shift - const __m128i s2_09_4 = _mm_add_epi32(s2_09_2, k__DCT_CONST_ROUNDING); - const __m128i s2_09_5 = _mm_add_epi32(s2_09_3, k__DCT_CONST_ROUNDING); - const __m128i s2_10_4 = _mm_add_epi32(s2_10_2, k__DCT_CONST_ROUNDING); - const __m128i s2_10_5 = _mm_add_epi32(s2_10_3, k__DCT_CONST_ROUNDING); - const __m128i s2_13_4 = _mm_add_epi32(s2_13_2, k__DCT_CONST_ROUNDING); - const __m128i s2_13_5 = _mm_add_epi32(s2_13_3, k__DCT_CONST_ROUNDING); - const __m128i s2_14_4 = _mm_add_epi32(s2_14_2, k__DCT_CONST_ROUNDING); - const __m128i s2_14_5 = _mm_add_epi32(s2_14_3, k__DCT_CONST_ROUNDING); - const __m128i s2_09_6 = _mm_srai_epi32(s2_09_4, DCT_CONST_BITS); - const __m128i s2_09_7 = _mm_srai_epi32(s2_09_5, DCT_CONST_BITS); - const __m128i s2_10_6 = _mm_srai_epi32(s2_10_4, DCT_CONST_BITS); - const __m128i s2_10_7 = _mm_srai_epi32(s2_10_5, DCT_CONST_BITS); - const __m128i s2_13_6 = _mm_srai_epi32(s2_13_4, DCT_CONST_BITS); - const __m128i s2_13_7 = _mm_srai_epi32(s2_13_5, DCT_CONST_BITS); - const __m128i s2_14_6 = _mm_srai_epi32(s2_14_4, DCT_CONST_BITS); - const __m128i s2_14_7 = _mm_srai_epi32(s2_14_5, DCT_CONST_BITS); - // Combine - step2[ 9] = _mm_packs_epi32(s2_09_6, s2_09_7); - step2[10] = _mm_packs_epi32(s2_10_6, s2_10_7); - step2[13] = _mm_packs_epi32(s2_13_6, s2_13_7); - step2[14] = _mm_packs_epi32(s2_14_6, s2_14_7); -#if DCT_HIGH_BIT_DEPTH - overflow = check_epi16_overflow_x4(&step2[9], &step2[10], - &step2[13], &step2[14]); - if (overflow) { - if (pass == 0) - HIGH_FDCT32x32_2D_C(input, output_org, stride); - else - HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org); - return; } -#endif // DCT_HIGH_BIT_DEPTH - } - { - step2[16] = ADD_EPI16(step1[19], step3[16]); - step2[17] = ADD_EPI16(step1[18], step3[17]); - step2[18] = SUB_EPI16(step3[17], step1[18]); - step2[19] = SUB_EPI16(step3[16], step1[19]); - step2[20] = SUB_EPI16(step3[23], step1[20]); - step2[21] = SUB_EPI16(step3[22], step1[21]); - step2[22] = ADD_EPI16(step1[21], step3[22]); - step2[23] = ADD_EPI16(step1[20], step3[23]); - step2[24] = ADD_EPI16(step1[27], step3[24]); - step2[25] = ADD_EPI16(step1[26], step3[25]); - step2[26] = SUB_EPI16(step3[25], step1[26]); - step2[27] = SUB_EPI16(step3[24], step1[27]); - step2[28] = SUB_EPI16(step3[31], step1[28]); - step2[29] = SUB_EPI16(step3[30], step1[29]); - step2[30] = ADD_EPI16(step1[29], step3[30]); - step2[31] = ADD_EPI16(step1[28], step3[31]); + // Stage 5 + { + step2[4] = ADD_EPI16(step1[5], step3[4]); + step2[5] = SUB_EPI16(step3[4], step1[5]); + step2[6] = SUB_EPI16(step3[7], step1[6]); + step2[7] = ADD_EPI16(step1[6], step3[7]); #if DCT_HIGH_BIT_DEPTH - overflow = check_epi16_overflow_x16( - &step2[16], &step2[17], &step2[18], &step2[19], - &step2[20], &step2[21], &step2[22], &step2[23], - &step2[24], &step2[25], &step2[26], &step2[27], - &step2[28], &step2[29], &step2[30], &step2[31]); - if (overflow) { - if (pass == 0) - HIGH_FDCT32x32_2D_C(input, output_org, stride); - else - HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org); - return; - } + overflow = check_epi16_overflow_x4(&step2[4], &step2[5], &step2[6], + &step2[7]); + if (overflow) { + if (pass == 0) + HIGH_FDCT32x32_2D_C(input, output_org, stride); + else + HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org); + return; + } #endif // DCT_HIGH_BIT_DEPTH - } - // Stage 6 - { - const __m128i out_04_0 = _mm_unpacklo_epi16(step2[4], step2[7]); - const __m128i out_04_1 = _mm_unpackhi_epi16(step2[4], step2[7]); - const __m128i out_20_0 = _mm_unpacklo_epi16(step2[5], step2[6]); - const __m128i out_20_1 = _mm_unpackhi_epi16(step2[5], step2[6]); - const __m128i out_12_0 = _mm_unpacklo_epi16(step2[5], step2[6]); - const __m128i out_12_1 = _mm_unpackhi_epi16(step2[5], step2[6]); - const __m128i out_28_0 = _mm_unpacklo_epi16(step2[4], step2[7]); - const __m128i out_28_1 = _mm_unpackhi_epi16(step2[4], step2[7]); - const __m128i out_04_2 = _mm_madd_epi16(out_04_0, k__cospi_p28_p04); - const __m128i out_04_3 = _mm_madd_epi16(out_04_1, k__cospi_p28_p04); - const __m128i out_20_2 = _mm_madd_epi16(out_20_0, k__cospi_p12_p20); - const __m128i out_20_3 = _mm_madd_epi16(out_20_1, k__cospi_p12_p20); - const __m128i out_12_2 = _mm_madd_epi16(out_12_0, k__cospi_m20_p12); - const __m128i out_12_3 = _mm_madd_epi16(out_12_1, k__cospi_m20_p12); - const __m128i out_28_2 = _mm_madd_epi16(out_28_0, k__cospi_m04_p28); - const __m128i out_28_3 = _mm_madd_epi16(out_28_1, k__cospi_m04_p28); - // dct_const_round_shift - const __m128i out_04_4 = _mm_add_epi32(out_04_2, k__DCT_CONST_ROUNDING); - const __m128i out_04_5 = _mm_add_epi32(out_04_3, k__DCT_CONST_ROUNDING); - const __m128i out_20_4 = _mm_add_epi32(out_20_2, k__DCT_CONST_ROUNDING); - const __m128i out_20_5 = _mm_add_epi32(out_20_3, k__DCT_CONST_ROUNDING); - const __m128i out_12_4 = _mm_add_epi32(out_12_2, k__DCT_CONST_ROUNDING); - const __m128i out_12_5 = _mm_add_epi32(out_12_3, k__DCT_CONST_ROUNDING); - const __m128i out_28_4 = _mm_add_epi32(out_28_2, k__DCT_CONST_ROUNDING); - const __m128i out_28_5 = _mm_add_epi32(out_28_3, k__DCT_CONST_ROUNDING); - const __m128i out_04_6 = _mm_srai_epi32(out_04_4, DCT_CONST_BITS); - const __m128i out_04_7 = _mm_srai_epi32(out_04_5, DCT_CONST_BITS); - const __m128i out_20_6 = _mm_srai_epi32(out_20_4, DCT_CONST_BITS); - const __m128i out_20_7 = _mm_srai_epi32(out_20_5, DCT_CONST_BITS); - const __m128i out_12_6 = _mm_srai_epi32(out_12_4, DCT_CONST_BITS); - const __m128i out_12_7 = _mm_srai_epi32(out_12_5, DCT_CONST_BITS); - const __m128i out_28_6 = _mm_srai_epi32(out_28_4, DCT_CONST_BITS); - const __m128i out_28_7 = _mm_srai_epi32(out_28_5, DCT_CONST_BITS); - // Combine - out[4] = _mm_packs_epi32(out_04_6, out_04_7); - out[20] = _mm_packs_epi32(out_20_6, out_20_7); - out[12] = _mm_packs_epi32(out_12_6, out_12_7); - out[28] = _mm_packs_epi32(out_28_6, out_28_7); -#if DCT_HIGH_BIT_DEPTH - overflow = check_epi16_overflow_x4(&out[4], &out[20], - &out[12], &out[28]); - if (overflow) { - if (pass == 0) - HIGH_FDCT32x32_2D_C(input, output_org, stride); - else - HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org); - return; } -#endif // DCT_HIGH_BIT_DEPTH - } - { - step3[8] = ADD_EPI16(step2[ 9], step1[ 8]); - step3[9] = SUB_EPI16(step1[ 8], step2[ 9]); - step3[10] = SUB_EPI16(step1[11], step2[10]); - step3[11] = ADD_EPI16(step2[10], step1[11]); - step3[12] = ADD_EPI16(step2[13], step1[12]); - step3[13] = SUB_EPI16(step1[12], step2[13]); - step3[14] = SUB_EPI16(step1[15], step2[14]); - step3[15] = ADD_EPI16(step2[14], step1[15]); + { + const __m128i out_00_0 = _mm_unpacklo_epi16(step1[0], step1[1]); + const __m128i out_00_1 = _mm_unpackhi_epi16(step1[0], step1[1]); + const __m128i out_08_0 = _mm_unpacklo_epi16(step1[2], step1[3]); + const __m128i out_08_1 = _mm_unpackhi_epi16(step1[2], step1[3]); + const __m128i out_00_2 = _mm_madd_epi16(out_00_0, k__cospi_p16_p16); + const __m128i out_00_3 = _mm_madd_epi16(out_00_1, k__cospi_p16_p16); + const __m128i out_16_2 = _mm_madd_epi16(out_00_0, k__cospi_p16_m16); + const __m128i out_16_3 = _mm_madd_epi16(out_00_1, k__cospi_p16_m16); + const __m128i out_08_2 = _mm_madd_epi16(out_08_0, k__cospi_p24_p08); + const __m128i out_08_3 = _mm_madd_epi16(out_08_1, k__cospi_p24_p08); + const __m128i out_24_2 = _mm_madd_epi16(out_08_0, k__cospi_m08_p24); + const __m128i out_24_3 = _mm_madd_epi16(out_08_1, k__cospi_m08_p24); + // dct_const_round_shift + const __m128i out_00_4 = + _mm_add_epi32(out_00_2, k__DCT_CONST_ROUNDING); + const __m128i out_00_5 = + _mm_add_epi32(out_00_3, k__DCT_CONST_ROUNDING); + const __m128i out_16_4 = + _mm_add_epi32(out_16_2, k__DCT_CONST_ROUNDING); + const __m128i out_16_5 = + _mm_add_epi32(out_16_3, k__DCT_CONST_ROUNDING); + const __m128i out_08_4 = + _mm_add_epi32(out_08_2, k__DCT_CONST_ROUNDING); + const __m128i out_08_5 = + _mm_add_epi32(out_08_3, k__DCT_CONST_ROUNDING); + const __m128i out_24_4 = + _mm_add_epi32(out_24_2, k__DCT_CONST_ROUNDING); + const __m128i out_24_5 = + _mm_add_epi32(out_24_3, k__DCT_CONST_ROUNDING); + const __m128i out_00_6 = _mm_srai_epi32(out_00_4, DCT_CONST_BITS); + const __m128i out_00_7 = _mm_srai_epi32(out_00_5, DCT_CONST_BITS); + const __m128i out_16_6 = _mm_srai_epi32(out_16_4, DCT_CONST_BITS); + const __m128i out_16_7 = _mm_srai_epi32(out_16_5, DCT_CONST_BITS); + const __m128i out_08_6 = _mm_srai_epi32(out_08_4, DCT_CONST_BITS); + const __m128i out_08_7 = _mm_srai_epi32(out_08_5, DCT_CONST_BITS); + const __m128i out_24_6 = _mm_srai_epi32(out_24_4, DCT_CONST_BITS); + const __m128i out_24_7 = _mm_srai_epi32(out_24_5, DCT_CONST_BITS); + // Combine + out[0] = _mm_packs_epi32(out_00_6, out_00_7); + out[16] = _mm_packs_epi32(out_16_6, out_16_7); + out[8] = _mm_packs_epi32(out_08_6, out_08_7); + out[24] = _mm_packs_epi32(out_24_6, out_24_7); #if DCT_HIGH_BIT_DEPTH - overflow = check_epi16_overflow_x8(&step3[8], &step3[9], &step3[10], - &step3[11], &step3[12], &step3[13], - &step3[14], &step3[15]); - if (overflow) { - if (pass == 0) - HIGH_FDCT32x32_2D_C(input, output_org, stride); - else - HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org); - return; - } + overflow = + check_epi16_overflow_x4(&out[0], &out[16], &out[8], &out[24]); + if (overflow) { + if (pass == 0) + HIGH_FDCT32x32_2D_C(input, output_org, stride); + else + HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org); + return; + } #endif // DCT_HIGH_BIT_DEPTH - } - { - const __m128i s3_17_0 = _mm_unpacklo_epi16(step2[17], step2[30]); - const __m128i s3_17_1 = _mm_unpackhi_epi16(step2[17], step2[30]); - const __m128i s3_18_0 = _mm_unpacklo_epi16(step2[18], step2[29]); - const __m128i s3_18_1 = _mm_unpackhi_epi16(step2[18], step2[29]); - const __m128i s3_21_0 = _mm_unpacklo_epi16(step2[21], step2[26]); - const __m128i s3_21_1 = _mm_unpackhi_epi16(step2[21], step2[26]); - const __m128i s3_22_0 = _mm_unpacklo_epi16(step2[22], step2[25]); - const __m128i s3_22_1 = _mm_unpackhi_epi16(step2[22], step2[25]); - const __m128i s3_17_2 = _mm_madd_epi16(s3_17_0, k__cospi_m04_p28); - const __m128i s3_17_3 = _mm_madd_epi16(s3_17_1, k__cospi_m04_p28); - const __m128i s3_18_2 = _mm_madd_epi16(s3_18_0, k__cospi_m28_m04); - const __m128i s3_18_3 = _mm_madd_epi16(s3_18_1, k__cospi_m28_m04); - const __m128i s3_21_2 = _mm_madd_epi16(s3_21_0, k__cospi_m20_p12); - const __m128i s3_21_3 = _mm_madd_epi16(s3_21_1, k__cospi_m20_p12); - const __m128i s3_22_2 = _mm_madd_epi16(s3_22_0, k__cospi_m12_m20); - const __m128i s3_22_3 = _mm_madd_epi16(s3_22_1, k__cospi_m12_m20); - const __m128i s3_25_2 = _mm_madd_epi16(s3_22_0, k__cospi_m20_p12); - const __m128i s3_25_3 = _mm_madd_epi16(s3_22_1, k__cospi_m20_p12); - const __m128i s3_26_2 = _mm_madd_epi16(s3_21_0, k__cospi_p12_p20); - const __m128i s3_26_3 = _mm_madd_epi16(s3_21_1, k__cospi_p12_p20); - const __m128i s3_29_2 = _mm_madd_epi16(s3_18_0, k__cospi_m04_p28); - const __m128i s3_29_3 = _mm_madd_epi16(s3_18_1, k__cospi_m04_p28); - const __m128i s3_30_2 = _mm_madd_epi16(s3_17_0, k__cospi_p28_p04); - const __m128i s3_30_3 = _mm_madd_epi16(s3_17_1, k__cospi_p28_p04); - // dct_const_round_shift - const __m128i s3_17_4 = _mm_add_epi32(s3_17_2, k__DCT_CONST_ROUNDING); - const __m128i s3_17_5 = _mm_add_epi32(s3_17_3, k__DCT_CONST_ROUNDING); - const __m128i s3_18_4 = _mm_add_epi32(s3_18_2, k__DCT_CONST_ROUNDING); - const __m128i s3_18_5 = _mm_add_epi32(s3_18_3, k__DCT_CONST_ROUNDING); - const __m128i s3_21_4 = _mm_add_epi32(s3_21_2, k__DCT_CONST_ROUNDING); - const __m128i s3_21_5 = _mm_add_epi32(s3_21_3, k__DCT_CONST_ROUNDING); - const __m128i s3_22_4 = _mm_add_epi32(s3_22_2, k__DCT_CONST_ROUNDING); - const __m128i s3_22_5 = _mm_add_epi32(s3_22_3, k__DCT_CONST_ROUNDING); - const __m128i s3_17_6 = _mm_srai_epi32(s3_17_4, DCT_CONST_BITS); - const __m128i s3_17_7 = _mm_srai_epi32(s3_17_5, DCT_CONST_BITS); - const __m128i s3_18_6 = _mm_srai_epi32(s3_18_4, DCT_CONST_BITS); - const __m128i s3_18_7 = _mm_srai_epi32(s3_18_5, DCT_CONST_BITS); - const __m128i s3_21_6 = _mm_srai_epi32(s3_21_4, DCT_CONST_BITS); - const __m128i s3_21_7 = _mm_srai_epi32(s3_21_5, DCT_CONST_BITS); - const __m128i s3_22_6 = _mm_srai_epi32(s3_22_4, DCT_CONST_BITS); - const __m128i s3_22_7 = _mm_srai_epi32(s3_22_5, DCT_CONST_BITS); - const __m128i s3_25_4 = _mm_add_epi32(s3_25_2, k__DCT_CONST_ROUNDING); - const __m128i s3_25_5 = _mm_add_epi32(s3_25_3, k__DCT_CONST_ROUNDING); - const __m128i s3_26_4 = _mm_add_epi32(s3_26_2, k__DCT_CONST_ROUNDING); - const __m128i s3_26_5 = _mm_add_epi32(s3_26_3, k__DCT_CONST_ROUNDING); - const __m128i s3_29_4 = _mm_add_epi32(s3_29_2, k__DCT_CONST_ROUNDING); - const __m128i s3_29_5 = _mm_add_epi32(s3_29_3, k__DCT_CONST_ROUNDING); - const __m128i s3_30_4 = _mm_add_epi32(s3_30_2, k__DCT_CONST_ROUNDING); - const __m128i s3_30_5 = _mm_add_epi32(s3_30_3, k__DCT_CONST_ROUNDING); - const __m128i s3_25_6 = _mm_srai_epi32(s3_25_4, DCT_CONST_BITS); - const __m128i s3_25_7 = _mm_srai_epi32(s3_25_5, DCT_CONST_BITS); - const __m128i s3_26_6 = _mm_srai_epi32(s3_26_4, DCT_CONST_BITS); - const __m128i s3_26_7 = _mm_srai_epi32(s3_26_5, DCT_CONST_BITS); - const __m128i s3_29_6 = _mm_srai_epi32(s3_29_4, DCT_CONST_BITS); - const __m128i s3_29_7 = _mm_srai_epi32(s3_29_5, DCT_CONST_BITS); - const __m128i s3_30_6 = _mm_srai_epi32(s3_30_4, DCT_CONST_BITS); - const __m128i s3_30_7 = _mm_srai_epi32(s3_30_5, DCT_CONST_BITS); - // Combine - step3[17] = _mm_packs_epi32(s3_17_6, s3_17_7); - step3[18] = _mm_packs_epi32(s3_18_6, s3_18_7); - step3[21] = _mm_packs_epi32(s3_21_6, s3_21_7); - step3[22] = _mm_packs_epi32(s3_22_6, s3_22_7); - // Combine - step3[25] = _mm_packs_epi32(s3_25_6, s3_25_7); - step3[26] = _mm_packs_epi32(s3_26_6, s3_26_7); - step3[29] = _mm_packs_epi32(s3_29_6, s3_29_7); - step3[30] = _mm_packs_epi32(s3_30_6, s3_30_7); -#if DCT_HIGH_BIT_DEPTH - overflow = check_epi16_overflow_x8(&step3[17], &step3[18], &step3[21], - &step3[22], &step3[25], &step3[26], - &step3[29], &step3[30]); - if (overflow) { - if (pass == 0) - HIGH_FDCT32x32_2D_C(input, output_org, stride); - else - HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org); - return; } + { + const __m128i s2_09_0 = _mm_unpacklo_epi16(step1[9], step1[14]); + const __m128i s2_09_1 = _mm_unpackhi_epi16(step1[9], step1[14]); + const __m128i s2_10_0 = _mm_unpacklo_epi16(step1[10], step1[13]); + const __m128i s2_10_1 = _mm_unpackhi_epi16(step1[10], step1[13]); + const __m128i s2_09_2 = _mm_madd_epi16(s2_09_0, k__cospi_m08_p24); + const __m128i s2_09_3 = _mm_madd_epi16(s2_09_1, k__cospi_m08_p24); + const __m128i s2_10_2 = _mm_madd_epi16(s2_10_0, k__cospi_m24_m08); + const __m128i s2_10_3 = _mm_madd_epi16(s2_10_1, k__cospi_m24_m08); + const __m128i s2_13_2 = _mm_madd_epi16(s2_10_0, k__cospi_m08_p24); + const __m128i s2_13_3 = _mm_madd_epi16(s2_10_1, k__cospi_m08_p24); + const __m128i s2_14_2 = _mm_madd_epi16(s2_09_0, k__cospi_p24_p08); + const __m128i s2_14_3 = _mm_madd_epi16(s2_09_1, k__cospi_p24_p08); + // dct_const_round_shift + const __m128i s2_09_4 = _mm_add_epi32(s2_09_2, k__DCT_CONST_ROUNDING); + const __m128i s2_09_5 = _mm_add_epi32(s2_09_3, k__DCT_CONST_ROUNDING); + const __m128i s2_10_4 = _mm_add_epi32(s2_10_2, k__DCT_CONST_ROUNDING); + const __m128i s2_10_5 = _mm_add_epi32(s2_10_3, k__DCT_CONST_ROUNDING); + const __m128i s2_13_4 = _mm_add_epi32(s2_13_2, k__DCT_CONST_ROUNDING); + const __m128i s2_13_5 = _mm_add_epi32(s2_13_3, k__DCT_CONST_ROUNDING); + const __m128i s2_14_4 = _mm_add_epi32(s2_14_2, k__DCT_CONST_ROUNDING); + const __m128i s2_14_5 = _mm_add_epi32(s2_14_3, k__DCT_CONST_ROUNDING); + const __m128i s2_09_6 = _mm_srai_epi32(s2_09_4, DCT_CONST_BITS); + const __m128i s2_09_7 = _mm_srai_epi32(s2_09_5, DCT_CONST_BITS); + const __m128i s2_10_6 = _mm_srai_epi32(s2_10_4, DCT_CONST_BITS); + const __m128i s2_10_7 = _mm_srai_epi32(s2_10_5, DCT_CONST_BITS); + const __m128i s2_13_6 = _mm_srai_epi32(s2_13_4, DCT_CONST_BITS); + const __m128i s2_13_7 = _mm_srai_epi32(s2_13_5, DCT_CONST_BITS); + const __m128i s2_14_6 = _mm_srai_epi32(s2_14_4, DCT_CONST_BITS); + const __m128i s2_14_7 = _mm_srai_epi32(s2_14_5, DCT_CONST_BITS); + // Combine + step2[9] = _mm_packs_epi32(s2_09_6, s2_09_7); + step2[10] = _mm_packs_epi32(s2_10_6, s2_10_7); + step2[13] = _mm_packs_epi32(s2_13_6, s2_13_7); + step2[14] = _mm_packs_epi32(s2_14_6, s2_14_7); +#if DCT_HIGH_BIT_DEPTH + overflow = check_epi16_overflow_x4(&step2[9], &step2[10], &step2[13], + &step2[14]); + if (overflow) { + if (pass == 0) + HIGH_FDCT32x32_2D_C(input, output_org, stride); + else + HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org); + return; + } #endif // DCT_HIGH_BIT_DEPTH - } - // Stage 7 - { - const __m128i out_02_0 = _mm_unpacklo_epi16(step3[ 8], step3[15]); - const __m128i out_02_1 = _mm_unpackhi_epi16(step3[ 8], step3[15]); - const __m128i out_18_0 = _mm_unpacklo_epi16(step3[ 9], step3[14]); - const __m128i out_18_1 = _mm_unpackhi_epi16(step3[ 9], step3[14]); - const __m128i out_10_0 = _mm_unpacklo_epi16(step3[10], step3[13]); - const __m128i out_10_1 = _mm_unpackhi_epi16(step3[10], step3[13]); - const __m128i out_26_0 = _mm_unpacklo_epi16(step3[11], step3[12]); - const __m128i out_26_1 = _mm_unpackhi_epi16(step3[11], step3[12]); - const __m128i out_02_2 = _mm_madd_epi16(out_02_0, k__cospi_p30_p02); - const __m128i out_02_3 = _mm_madd_epi16(out_02_1, k__cospi_p30_p02); - const __m128i out_18_2 = _mm_madd_epi16(out_18_0, k__cospi_p14_p18); - const __m128i out_18_3 = _mm_madd_epi16(out_18_1, k__cospi_p14_p18); - const __m128i out_10_2 = _mm_madd_epi16(out_10_0, k__cospi_p22_p10); - const __m128i out_10_3 = _mm_madd_epi16(out_10_1, k__cospi_p22_p10); - const __m128i out_26_2 = _mm_madd_epi16(out_26_0, k__cospi_p06_p26); - const __m128i out_26_3 = _mm_madd_epi16(out_26_1, k__cospi_p06_p26); - const __m128i out_06_2 = _mm_madd_epi16(out_26_0, k__cospi_m26_p06); - const __m128i out_06_3 = _mm_madd_epi16(out_26_1, k__cospi_m26_p06); - const __m128i out_22_2 = _mm_madd_epi16(out_10_0, k__cospi_m10_p22); - const __m128i out_22_3 = _mm_madd_epi16(out_10_1, k__cospi_m10_p22); - const __m128i out_14_2 = _mm_madd_epi16(out_18_0, k__cospi_m18_p14); - const __m128i out_14_3 = _mm_madd_epi16(out_18_1, k__cospi_m18_p14); - const __m128i out_30_2 = _mm_madd_epi16(out_02_0, k__cospi_m02_p30); - const __m128i out_30_3 = _mm_madd_epi16(out_02_1, k__cospi_m02_p30); - // dct_const_round_shift - const __m128i out_02_4 = _mm_add_epi32(out_02_2, k__DCT_CONST_ROUNDING); - const __m128i out_02_5 = _mm_add_epi32(out_02_3, k__DCT_CONST_ROUNDING); - const __m128i out_18_4 = _mm_add_epi32(out_18_2, k__DCT_CONST_ROUNDING); - const __m128i out_18_5 = _mm_add_epi32(out_18_3, k__DCT_CONST_ROUNDING); - const __m128i out_10_4 = _mm_add_epi32(out_10_2, k__DCT_CONST_ROUNDING); - const __m128i out_10_5 = _mm_add_epi32(out_10_3, k__DCT_CONST_ROUNDING); - const __m128i out_26_4 = _mm_add_epi32(out_26_2, k__DCT_CONST_ROUNDING); - const __m128i out_26_5 = _mm_add_epi32(out_26_3, k__DCT_CONST_ROUNDING); - const __m128i out_06_4 = _mm_add_epi32(out_06_2, k__DCT_CONST_ROUNDING); - const __m128i out_06_5 = _mm_add_epi32(out_06_3, k__DCT_CONST_ROUNDING); - const __m128i out_22_4 = _mm_add_epi32(out_22_2, k__DCT_CONST_ROUNDING); - const __m128i out_22_5 = _mm_add_epi32(out_22_3, k__DCT_CONST_ROUNDING); - const __m128i out_14_4 = _mm_add_epi32(out_14_2, k__DCT_CONST_ROUNDING); - const __m128i out_14_5 = _mm_add_epi32(out_14_3, k__DCT_CONST_ROUNDING); - const __m128i out_30_4 = _mm_add_epi32(out_30_2, k__DCT_CONST_ROUNDING); - const __m128i out_30_5 = _mm_add_epi32(out_30_3, k__DCT_CONST_ROUNDING); - const __m128i out_02_6 = _mm_srai_epi32(out_02_4, DCT_CONST_BITS); - const __m128i out_02_7 = _mm_srai_epi32(out_02_5, DCT_CONST_BITS); - const __m128i out_18_6 = _mm_srai_epi32(out_18_4, DCT_CONST_BITS); - const __m128i out_18_7 = _mm_srai_epi32(out_18_5, DCT_CONST_BITS); - const __m128i out_10_6 = _mm_srai_epi32(out_10_4, DCT_CONST_BITS); - const __m128i out_10_7 = _mm_srai_epi32(out_10_5, DCT_CONST_BITS); - const __m128i out_26_6 = _mm_srai_epi32(out_26_4, DCT_CONST_BITS); - const __m128i out_26_7 = _mm_srai_epi32(out_26_5, DCT_CONST_BITS); - const __m128i out_06_6 = _mm_srai_epi32(out_06_4, DCT_CONST_BITS); - const __m128i out_06_7 = _mm_srai_epi32(out_06_5, DCT_CONST_BITS); - const __m128i out_22_6 = _mm_srai_epi32(out_22_4, DCT_CONST_BITS); - const __m128i out_22_7 = _mm_srai_epi32(out_22_5, DCT_CONST_BITS); - const __m128i out_14_6 = _mm_srai_epi32(out_14_4, DCT_CONST_BITS); - const __m128i out_14_7 = _mm_srai_epi32(out_14_5, DCT_CONST_BITS); - const __m128i out_30_6 = _mm_srai_epi32(out_30_4, DCT_CONST_BITS); - const __m128i out_30_7 = _mm_srai_epi32(out_30_5, DCT_CONST_BITS); - // Combine - out[ 2] = _mm_packs_epi32(out_02_6, out_02_7); - out[18] = _mm_packs_epi32(out_18_6, out_18_7); - out[10] = _mm_packs_epi32(out_10_6, out_10_7); - out[26] = _mm_packs_epi32(out_26_6, out_26_7); - out[ 6] = _mm_packs_epi32(out_06_6, out_06_7); - out[22] = _mm_packs_epi32(out_22_6, out_22_7); - out[14] = _mm_packs_epi32(out_14_6, out_14_7); - out[30] = _mm_packs_epi32(out_30_6, out_30_7); + } + { + step2[16] = ADD_EPI16(step1[19], step3[16]); + step2[17] = ADD_EPI16(step1[18], step3[17]); + step2[18] = SUB_EPI16(step3[17], step1[18]); + step2[19] = SUB_EPI16(step3[16], step1[19]); + step2[20] = SUB_EPI16(step3[23], step1[20]); + step2[21] = SUB_EPI16(step3[22], step1[21]); + step2[22] = ADD_EPI16(step1[21], step3[22]); + step2[23] = ADD_EPI16(step1[20], step3[23]); + step2[24] = ADD_EPI16(step1[27], step3[24]); + step2[25] = ADD_EPI16(step1[26], step3[25]); + step2[26] = SUB_EPI16(step3[25], step1[26]); + step2[27] = SUB_EPI16(step3[24], step1[27]); + step2[28] = SUB_EPI16(step3[31], step1[28]); + step2[29] = SUB_EPI16(step3[30], step1[29]); + step2[30] = ADD_EPI16(step1[29], step3[30]); + step2[31] = ADD_EPI16(step1[28], step3[31]); #if DCT_HIGH_BIT_DEPTH - overflow = check_epi16_overflow_x8(&out[2], &out[18], &out[10], - &out[26], &out[6], &out[22], - &out[14], &out[30]); - if (overflow) { - if (pass == 0) - HIGH_FDCT32x32_2D_C(input, output_org, stride); - else - HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org); - return; + overflow = check_epi16_overflow_x16( + &step2[16], &step2[17], &step2[18], &step2[19], &step2[20], + &step2[21], &step2[22], &step2[23], &step2[24], &step2[25], + &step2[26], &step2[27], &step2[28], &step2[29], &step2[30], + &step2[31]); + if (overflow) { + if (pass == 0) + HIGH_FDCT32x32_2D_C(input, output_org, stride); + else + HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org); + return; + } +#endif // DCT_HIGH_BIT_DEPTH } + // Stage 6 + { + const __m128i out_04_0 = _mm_unpacklo_epi16(step2[4], step2[7]); + const __m128i out_04_1 = _mm_unpackhi_epi16(step2[4], step2[7]); + const __m128i out_20_0 = _mm_unpacklo_epi16(step2[5], step2[6]); + const __m128i out_20_1 = _mm_unpackhi_epi16(step2[5], step2[6]); + const __m128i out_12_0 = _mm_unpacklo_epi16(step2[5], step2[6]); + const __m128i out_12_1 = _mm_unpackhi_epi16(step2[5], step2[6]); + const __m128i out_28_0 = _mm_unpacklo_epi16(step2[4], step2[7]); + const __m128i out_28_1 = _mm_unpackhi_epi16(step2[4], step2[7]); + const __m128i out_04_2 = _mm_madd_epi16(out_04_0, k__cospi_p28_p04); + const __m128i out_04_3 = _mm_madd_epi16(out_04_1, k__cospi_p28_p04); + const __m128i out_20_2 = _mm_madd_epi16(out_20_0, k__cospi_p12_p20); + const __m128i out_20_3 = _mm_madd_epi16(out_20_1, k__cospi_p12_p20); + const __m128i out_12_2 = _mm_madd_epi16(out_12_0, k__cospi_m20_p12); + const __m128i out_12_3 = _mm_madd_epi16(out_12_1, k__cospi_m20_p12); + const __m128i out_28_2 = _mm_madd_epi16(out_28_0, k__cospi_m04_p28); + const __m128i out_28_3 = _mm_madd_epi16(out_28_1, k__cospi_m04_p28); + // dct_const_round_shift + const __m128i out_04_4 = + _mm_add_epi32(out_04_2, k__DCT_CONST_ROUNDING); + const __m128i out_04_5 = + _mm_add_epi32(out_04_3, k__DCT_CONST_ROUNDING); + const __m128i out_20_4 = + _mm_add_epi32(out_20_2, k__DCT_CONST_ROUNDING); + const __m128i out_20_5 = + _mm_add_epi32(out_20_3, k__DCT_CONST_ROUNDING); + const __m128i out_12_4 = + _mm_add_epi32(out_12_2, k__DCT_CONST_ROUNDING); + const __m128i out_12_5 = + _mm_add_epi32(out_12_3, k__DCT_CONST_ROUNDING); + const __m128i out_28_4 = + _mm_add_epi32(out_28_2, k__DCT_CONST_ROUNDING); + const __m128i out_28_5 = + _mm_add_epi32(out_28_3, k__DCT_CONST_ROUNDING); + const __m128i out_04_6 = _mm_srai_epi32(out_04_4, DCT_CONST_BITS); + const __m128i out_04_7 = _mm_srai_epi32(out_04_5, DCT_CONST_BITS); + const __m128i out_20_6 = _mm_srai_epi32(out_20_4, DCT_CONST_BITS); + const __m128i out_20_7 = _mm_srai_epi32(out_20_5, DCT_CONST_BITS); + const __m128i out_12_6 = _mm_srai_epi32(out_12_4, DCT_CONST_BITS); + const __m128i out_12_7 = _mm_srai_epi32(out_12_5, DCT_CONST_BITS); + const __m128i out_28_6 = _mm_srai_epi32(out_28_4, DCT_CONST_BITS); + const __m128i out_28_7 = _mm_srai_epi32(out_28_5, DCT_CONST_BITS); + // Combine + out[4] = _mm_packs_epi32(out_04_6, out_04_7); + out[20] = _mm_packs_epi32(out_20_6, out_20_7); + out[12] = _mm_packs_epi32(out_12_6, out_12_7); + out[28] = _mm_packs_epi32(out_28_6, out_28_7); +#if DCT_HIGH_BIT_DEPTH + overflow = + check_epi16_overflow_x4(&out[4], &out[20], &out[12], &out[28]); + if (overflow) { + if (pass == 0) + HIGH_FDCT32x32_2D_C(input, output_org, stride); + else + HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org); + return; + } #endif // DCT_HIGH_BIT_DEPTH - } - { - step1[16] = ADD_EPI16(step3[17], step2[16]); - step1[17] = SUB_EPI16(step2[16], step3[17]); - step1[18] = SUB_EPI16(step2[19], step3[18]); - step1[19] = ADD_EPI16(step3[18], step2[19]); - step1[20] = ADD_EPI16(step3[21], step2[20]); - step1[21] = SUB_EPI16(step2[20], step3[21]); - step1[22] = SUB_EPI16(step2[23], step3[22]); - step1[23] = ADD_EPI16(step3[22], step2[23]); - step1[24] = ADD_EPI16(step3[25], step2[24]); - step1[25] = SUB_EPI16(step2[24], step3[25]); - step1[26] = SUB_EPI16(step2[27], step3[26]); - step1[27] = ADD_EPI16(step3[26], step2[27]); - step1[28] = ADD_EPI16(step3[29], step2[28]); - step1[29] = SUB_EPI16(step2[28], step3[29]); - step1[30] = SUB_EPI16(step2[31], step3[30]); - step1[31] = ADD_EPI16(step3[30], step2[31]); + } + { + step3[8] = ADD_EPI16(step2[9], step1[8]); + step3[9] = SUB_EPI16(step1[8], step2[9]); + step3[10] = SUB_EPI16(step1[11], step2[10]); + step3[11] = ADD_EPI16(step2[10], step1[11]); + step3[12] = ADD_EPI16(step2[13], step1[12]); + step3[13] = SUB_EPI16(step1[12], step2[13]); + step3[14] = SUB_EPI16(step1[15], step2[14]); + step3[15] = ADD_EPI16(step2[14], step1[15]); #if DCT_HIGH_BIT_DEPTH - overflow = check_epi16_overflow_x16( - &step1[16], &step1[17], &step1[18], &step1[19], - &step1[20], &step1[21], &step1[22], &step1[23], - &step1[24], &step1[25], &step1[26], &step1[27], - &step1[28], &step1[29], &step1[30], &step1[31]); - if (overflow) { - if (pass == 0) - HIGH_FDCT32x32_2D_C(input, output_org, stride); - else - HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org); - return; + overflow = check_epi16_overflow_x8(&step3[8], &step3[9], &step3[10], + &step3[11], &step3[12], &step3[13], + &step3[14], &step3[15]); + if (overflow) { + if (pass == 0) + HIGH_FDCT32x32_2D_C(input, output_org, stride); + else + HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org); + return; + } +#endif // DCT_HIGH_BIT_DEPTH } + { + const __m128i s3_17_0 = _mm_unpacklo_epi16(step2[17], step2[30]); + const __m128i s3_17_1 = _mm_unpackhi_epi16(step2[17], step2[30]); + const __m128i s3_18_0 = _mm_unpacklo_epi16(step2[18], step2[29]); + const __m128i s3_18_1 = _mm_unpackhi_epi16(step2[18], step2[29]); + const __m128i s3_21_0 = _mm_unpacklo_epi16(step2[21], step2[26]); + const __m128i s3_21_1 = _mm_unpackhi_epi16(step2[21], step2[26]); + const __m128i s3_22_0 = _mm_unpacklo_epi16(step2[22], step2[25]); + const __m128i s3_22_1 = _mm_unpackhi_epi16(step2[22], step2[25]); + const __m128i s3_17_2 = _mm_madd_epi16(s3_17_0, k__cospi_m04_p28); + const __m128i s3_17_3 = _mm_madd_epi16(s3_17_1, k__cospi_m04_p28); + const __m128i s3_18_2 = _mm_madd_epi16(s3_18_0, k__cospi_m28_m04); + const __m128i s3_18_3 = _mm_madd_epi16(s3_18_1, k__cospi_m28_m04); + const __m128i s3_21_2 = _mm_madd_epi16(s3_21_0, k__cospi_m20_p12); + const __m128i s3_21_3 = _mm_madd_epi16(s3_21_1, k__cospi_m20_p12); + const __m128i s3_22_2 = _mm_madd_epi16(s3_22_0, k__cospi_m12_m20); + const __m128i s3_22_3 = _mm_madd_epi16(s3_22_1, k__cospi_m12_m20); + const __m128i s3_25_2 = _mm_madd_epi16(s3_22_0, k__cospi_m20_p12); + const __m128i s3_25_3 = _mm_madd_epi16(s3_22_1, k__cospi_m20_p12); + const __m128i s3_26_2 = _mm_madd_epi16(s3_21_0, k__cospi_p12_p20); + const __m128i s3_26_3 = _mm_madd_epi16(s3_21_1, k__cospi_p12_p20); + const __m128i s3_29_2 = _mm_madd_epi16(s3_18_0, k__cospi_m04_p28); + const __m128i s3_29_3 = _mm_madd_epi16(s3_18_1, k__cospi_m04_p28); + const __m128i s3_30_2 = _mm_madd_epi16(s3_17_0, k__cospi_p28_p04); + const __m128i s3_30_3 = _mm_madd_epi16(s3_17_1, k__cospi_p28_p04); + // dct_const_round_shift + const __m128i s3_17_4 = _mm_add_epi32(s3_17_2, k__DCT_CONST_ROUNDING); + const __m128i s3_17_5 = _mm_add_epi32(s3_17_3, k__DCT_CONST_ROUNDING); + const __m128i s3_18_4 = _mm_add_epi32(s3_18_2, k__DCT_CONST_ROUNDING); + const __m128i s3_18_5 = _mm_add_epi32(s3_18_3, k__DCT_CONST_ROUNDING); + const __m128i s3_21_4 = _mm_add_epi32(s3_21_2, k__DCT_CONST_ROUNDING); + const __m128i s3_21_5 = _mm_add_epi32(s3_21_3, k__DCT_CONST_ROUNDING); + const __m128i s3_22_4 = _mm_add_epi32(s3_22_2, k__DCT_CONST_ROUNDING); + const __m128i s3_22_5 = _mm_add_epi32(s3_22_3, k__DCT_CONST_ROUNDING); + const __m128i s3_17_6 = _mm_srai_epi32(s3_17_4, DCT_CONST_BITS); + const __m128i s3_17_7 = _mm_srai_epi32(s3_17_5, DCT_CONST_BITS); + const __m128i s3_18_6 = _mm_srai_epi32(s3_18_4, DCT_CONST_BITS); + const __m128i s3_18_7 = _mm_srai_epi32(s3_18_5, DCT_CONST_BITS); + const __m128i s3_21_6 = _mm_srai_epi32(s3_21_4, DCT_CONST_BITS); + const __m128i s3_21_7 = _mm_srai_epi32(s3_21_5, DCT_CONST_BITS); + const __m128i s3_22_6 = _mm_srai_epi32(s3_22_4, DCT_CONST_BITS); + const __m128i s3_22_7 = _mm_srai_epi32(s3_22_5, DCT_CONST_BITS); + const __m128i s3_25_4 = _mm_add_epi32(s3_25_2, k__DCT_CONST_ROUNDING); + const __m128i s3_25_5 = _mm_add_epi32(s3_25_3, k__DCT_CONST_ROUNDING); + const __m128i s3_26_4 = _mm_add_epi32(s3_26_2, k__DCT_CONST_ROUNDING); + const __m128i s3_26_5 = _mm_add_epi32(s3_26_3, k__DCT_CONST_ROUNDING); + const __m128i s3_29_4 = _mm_add_epi32(s3_29_2, k__DCT_CONST_ROUNDING); + const __m128i s3_29_5 = _mm_add_epi32(s3_29_3, k__DCT_CONST_ROUNDING); + const __m128i s3_30_4 = _mm_add_epi32(s3_30_2, k__DCT_CONST_ROUNDING); + const __m128i s3_30_5 = _mm_add_epi32(s3_30_3, k__DCT_CONST_ROUNDING); + const __m128i s3_25_6 = _mm_srai_epi32(s3_25_4, DCT_CONST_BITS); + const __m128i s3_25_7 = _mm_srai_epi32(s3_25_5, DCT_CONST_BITS); + const __m128i s3_26_6 = _mm_srai_epi32(s3_26_4, DCT_CONST_BITS); + const __m128i s3_26_7 = _mm_srai_epi32(s3_26_5, DCT_CONST_BITS); + const __m128i s3_29_6 = _mm_srai_epi32(s3_29_4, DCT_CONST_BITS); + const __m128i s3_29_7 = _mm_srai_epi32(s3_29_5, DCT_CONST_BITS); + const __m128i s3_30_6 = _mm_srai_epi32(s3_30_4, DCT_CONST_BITS); + const __m128i s3_30_7 = _mm_srai_epi32(s3_30_5, DCT_CONST_BITS); + // Combine + step3[17] = _mm_packs_epi32(s3_17_6, s3_17_7); + step3[18] = _mm_packs_epi32(s3_18_6, s3_18_7); + step3[21] = _mm_packs_epi32(s3_21_6, s3_21_7); + step3[22] = _mm_packs_epi32(s3_22_6, s3_22_7); + // Combine + step3[25] = _mm_packs_epi32(s3_25_6, s3_25_7); + step3[26] = _mm_packs_epi32(s3_26_6, s3_26_7); + step3[29] = _mm_packs_epi32(s3_29_6, s3_29_7); + step3[30] = _mm_packs_epi32(s3_30_6, s3_30_7); +#if DCT_HIGH_BIT_DEPTH + overflow = check_epi16_overflow_x8(&step3[17], &step3[18], &step3[21], + &step3[22], &step3[25], &step3[26], + &step3[29], &step3[30]); + if (overflow) { + if (pass == 0) + HIGH_FDCT32x32_2D_C(input, output_org, stride); + else + HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org); + return; + } #endif // DCT_HIGH_BIT_DEPTH - } - // Final stage --- outputs indices are bit-reversed. - { - const __m128i out_01_0 = _mm_unpacklo_epi16(step1[16], step1[31]); - const __m128i out_01_1 = _mm_unpackhi_epi16(step1[16], step1[31]); - const __m128i out_17_0 = _mm_unpacklo_epi16(step1[17], step1[30]); - const __m128i out_17_1 = _mm_unpackhi_epi16(step1[17], step1[30]); - const __m128i out_09_0 = _mm_unpacklo_epi16(step1[18], step1[29]); - const __m128i out_09_1 = _mm_unpackhi_epi16(step1[18], step1[29]); - const __m128i out_25_0 = _mm_unpacklo_epi16(step1[19], step1[28]); - const __m128i out_25_1 = _mm_unpackhi_epi16(step1[19], step1[28]); - const __m128i out_01_2 = _mm_madd_epi16(out_01_0, k__cospi_p31_p01); - const __m128i out_01_3 = _mm_madd_epi16(out_01_1, k__cospi_p31_p01); - const __m128i out_17_2 = _mm_madd_epi16(out_17_0, k__cospi_p15_p17); - const __m128i out_17_3 = _mm_madd_epi16(out_17_1, k__cospi_p15_p17); - const __m128i out_09_2 = _mm_madd_epi16(out_09_0, k__cospi_p23_p09); - const __m128i out_09_3 = _mm_madd_epi16(out_09_1, k__cospi_p23_p09); - const __m128i out_25_2 = _mm_madd_epi16(out_25_0, k__cospi_p07_p25); - const __m128i out_25_3 = _mm_madd_epi16(out_25_1, k__cospi_p07_p25); - const __m128i out_07_2 = _mm_madd_epi16(out_25_0, k__cospi_m25_p07); - const __m128i out_07_3 = _mm_madd_epi16(out_25_1, k__cospi_m25_p07); - const __m128i out_23_2 = _mm_madd_epi16(out_09_0, k__cospi_m09_p23); - const __m128i out_23_3 = _mm_madd_epi16(out_09_1, k__cospi_m09_p23); - const __m128i out_15_2 = _mm_madd_epi16(out_17_0, k__cospi_m17_p15); - const __m128i out_15_3 = _mm_madd_epi16(out_17_1, k__cospi_m17_p15); - const __m128i out_31_2 = _mm_madd_epi16(out_01_0, k__cospi_m01_p31); - const __m128i out_31_3 = _mm_madd_epi16(out_01_1, k__cospi_m01_p31); - // dct_const_round_shift - const __m128i out_01_4 = _mm_add_epi32(out_01_2, k__DCT_CONST_ROUNDING); - const __m128i out_01_5 = _mm_add_epi32(out_01_3, k__DCT_CONST_ROUNDING); - const __m128i out_17_4 = _mm_add_epi32(out_17_2, k__DCT_CONST_ROUNDING); - const __m128i out_17_5 = _mm_add_epi32(out_17_3, k__DCT_CONST_ROUNDING); - const __m128i out_09_4 = _mm_add_epi32(out_09_2, k__DCT_CONST_ROUNDING); - const __m128i out_09_5 = _mm_add_epi32(out_09_3, k__DCT_CONST_ROUNDING); - const __m128i out_25_4 = _mm_add_epi32(out_25_2, k__DCT_CONST_ROUNDING); - const __m128i out_25_5 = _mm_add_epi32(out_25_3, k__DCT_CONST_ROUNDING); - const __m128i out_07_4 = _mm_add_epi32(out_07_2, k__DCT_CONST_ROUNDING); - const __m128i out_07_5 = _mm_add_epi32(out_07_3, k__DCT_CONST_ROUNDING); - const __m128i out_23_4 = _mm_add_epi32(out_23_2, k__DCT_CONST_ROUNDING); - const __m128i out_23_5 = _mm_add_epi32(out_23_3, k__DCT_CONST_ROUNDING); - const __m128i out_15_4 = _mm_add_epi32(out_15_2, k__DCT_CONST_ROUNDING); - const __m128i out_15_5 = _mm_add_epi32(out_15_3, k__DCT_CONST_ROUNDING); - const __m128i out_31_4 = _mm_add_epi32(out_31_2, k__DCT_CONST_ROUNDING); - const __m128i out_31_5 = _mm_add_epi32(out_31_3, k__DCT_CONST_ROUNDING); - const __m128i out_01_6 = _mm_srai_epi32(out_01_4, DCT_CONST_BITS); - const __m128i out_01_7 = _mm_srai_epi32(out_01_5, DCT_CONST_BITS); - const __m128i out_17_6 = _mm_srai_epi32(out_17_4, DCT_CONST_BITS); - const __m128i out_17_7 = _mm_srai_epi32(out_17_5, DCT_CONST_BITS); - const __m128i out_09_6 = _mm_srai_epi32(out_09_4, DCT_CONST_BITS); - const __m128i out_09_7 = _mm_srai_epi32(out_09_5, DCT_CONST_BITS); - const __m128i out_25_6 = _mm_srai_epi32(out_25_4, DCT_CONST_BITS); - const __m128i out_25_7 = _mm_srai_epi32(out_25_5, DCT_CONST_BITS); - const __m128i out_07_6 = _mm_srai_epi32(out_07_4, DCT_CONST_BITS); - const __m128i out_07_7 = _mm_srai_epi32(out_07_5, DCT_CONST_BITS); - const __m128i out_23_6 = _mm_srai_epi32(out_23_4, DCT_CONST_BITS); - const __m128i out_23_7 = _mm_srai_epi32(out_23_5, DCT_CONST_BITS); - const __m128i out_15_6 = _mm_srai_epi32(out_15_4, DCT_CONST_BITS); - const __m128i out_15_7 = _mm_srai_epi32(out_15_5, DCT_CONST_BITS); - const __m128i out_31_6 = _mm_srai_epi32(out_31_4, DCT_CONST_BITS); - const __m128i out_31_7 = _mm_srai_epi32(out_31_5, DCT_CONST_BITS); - // Combine - out[ 1] = _mm_packs_epi32(out_01_6, out_01_7); - out[17] = _mm_packs_epi32(out_17_6, out_17_7); - out[ 9] = _mm_packs_epi32(out_09_6, out_09_7); - out[25] = _mm_packs_epi32(out_25_6, out_25_7); - out[ 7] = _mm_packs_epi32(out_07_6, out_07_7); - out[23] = _mm_packs_epi32(out_23_6, out_23_7); - out[15] = _mm_packs_epi32(out_15_6, out_15_7); - out[31] = _mm_packs_epi32(out_31_6, out_31_7); + } + // Stage 7 + { + const __m128i out_02_0 = _mm_unpacklo_epi16(step3[8], step3[15]); + const __m128i out_02_1 = _mm_unpackhi_epi16(step3[8], step3[15]); + const __m128i out_18_0 = _mm_unpacklo_epi16(step3[9], step3[14]); + const __m128i out_18_1 = _mm_unpackhi_epi16(step3[9], step3[14]); + const __m128i out_10_0 = _mm_unpacklo_epi16(step3[10], step3[13]); + const __m128i out_10_1 = _mm_unpackhi_epi16(step3[10], step3[13]); + const __m128i out_26_0 = _mm_unpacklo_epi16(step3[11], step3[12]); + const __m128i out_26_1 = _mm_unpackhi_epi16(step3[11], step3[12]); + const __m128i out_02_2 = _mm_madd_epi16(out_02_0, k__cospi_p30_p02); + const __m128i out_02_3 = _mm_madd_epi16(out_02_1, k__cospi_p30_p02); + const __m128i out_18_2 = _mm_madd_epi16(out_18_0, k__cospi_p14_p18); + const __m128i out_18_3 = _mm_madd_epi16(out_18_1, k__cospi_p14_p18); + const __m128i out_10_2 = _mm_madd_epi16(out_10_0, k__cospi_p22_p10); + const __m128i out_10_3 = _mm_madd_epi16(out_10_1, k__cospi_p22_p10); + const __m128i out_26_2 = _mm_madd_epi16(out_26_0, k__cospi_p06_p26); + const __m128i out_26_3 = _mm_madd_epi16(out_26_1, k__cospi_p06_p26); + const __m128i out_06_2 = _mm_madd_epi16(out_26_0, k__cospi_m26_p06); + const __m128i out_06_3 = _mm_madd_epi16(out_26_1, k__cospi_m26_p06); + const __m128i out_22_2 = _mm_madd_epi16(out_10_0, k__cospi_m10_p22); + const __m128i out_22_3 = _mm_madd_epi16(out_10_1, k__cospi_m10_p22); + const __m128i out_14_2 = _mm_madd_epi16(out_18_0, k__cospi_m18_p14); + const __m128i out_14_3 = _mm_madd_epi16(out_18_1, k__cospi_m18_p14); + const __m128i out_30_2 = _mm_madd_epi16(out_02_0, k__cospi_m02_p30); + const __m128i out_30_3 = _mm_madd_epi16(out_02_1, k__cospi_m02_p30); + // dct_const_round_shift + const __m128i out_02_4 = + _mm_add_epi32(out_02_2, k__DCT_CONST_ROUNDING); + const __m128i out_02_5 = + _mm_add_epi32(out_02_3, k__DCT_CONST_ROUNDING); + const __m128i out_18_4 = + _mm_add_epi32(out_18_2, k__DCT_CONST_ROUNDING); + const __m128i out_18_5 = + _mm_add_epi32(out_18_3, k__DCT_CONST_ROUNDING); + const __m128i out_10_4 = + _mm_add_epi32(out_10_2, k__DCT_CONST_ROUNDING); + const __m128i out_10_5 = + _mm_add_epi32(out_10_3, k__DCT_CONST_ROUNDING); + const __m128i out_26_4 = + _mm_add_epi32(out_26_2, k__DCT_CONST_ROUNDING); + const __m128i out_26_5 = + _mm_add_epi32(out_26_3, k__DCT_CONST_ROUNDING); + const __m128i out_06_4 = + _mm_add_epi32(out_06_2, k__DCT_CONST_ROUNDING); + const __m128i out_06_5 = + _mm_add_epi32(out_06_3, k__DCT_CONST_ROUNDING); + const __m128i out_22_4 = + _mm_add_epi32(out_22_2, k__DCT_CONST_ROUNDING); + const __m128i out_22_5 = + _mm_add_epi32(out_22_3, k__DCT_CONST_ROUNDING); + const __m128i out_14_4 = + _mm_add_epi32(out_14_2, k__DCT_CONST_ROUNDING); + const __m128i out_14_5 = + _mm_add_epi32(out_14_3, k__DCT_CONST_ROUNDING); + const __m128i out_30_4 = + _mm_add_epi32(out_30_2, k__DCT_CONST_ROUNDING); + const __m128i out_30_5 = + _mm_add_epi32(out_30_3, k__DCT_CONST_ROUNDING); + const __m128i out_02_6 = _mm_srai_epi32(out_02_4, DCT_CONST_BITS); + const __m128i out_02_7 = _mm_srai_epi32(out_02_5, DCT_CONST_BITS); + const __m128i out_18_6 = _mm_srai_epi32(out_18_4, DCT_CONST_BITS); + const __m128i out_18_7 = _mm_srai_epi32(out_18_5, DCT_CONST_BITS); + const __m128i out_10_6 = _mm_srai_epi32(out_10_4, DCT_CONST_BITS); + const __m128i out_10_7 = _mm_srai_epi32(out_10_5, DCT_CONST_BITS); + const __m128i out_26_6 = _mm_srai_epi32(out_26_4, DCT_CONST_BITS); + const __m128i out_26_7 = _mm_srai_epi32(out_26_5, DCT_CONST_BITS); + const __m128i out_06_6 = _mm_srai_epi32(out_06_4, DCT_CONST_BITS); + const __m128i out_06_7 = _mm_srai_epi32(out_06_5, DCT_CONST_BITS); + const __m128i out_22_6 = _mm_srai_epi32(out_22_4, DCT_CONST_BITS); + const __m128i out_22_7 = _mm_srai_epi32(out_22_5, DCT_CONST_BITS); + const __m128i out_14_6 = _mm_srai_epi32(out_14_4, DCT_CONST_BITS); + const __m128i out_14_7 = _mm_srai_epi32(out_14_5, DCT_CONST_BITS); + const __m128i out_30_6 = _mm_srai_epi32(out_30_4, DCT_CONST_BITS); + const __m128i out_30_7 = _mm_srai_epi32(out_30_5, DCT_CONST_BITS); + // Combine + out[2] = _mm_packs_epi32(out_02_6, out_02_7); + out[18] = _mm_packs_epi32(out_18_6, out_18_7); + out[10] = _mm_packs_epi32(out_10_6, out_10_7); + out[26] = _mm_packs_epi32(out_26_6, out_26_7); + out[6] = _mm_packs_epi32(out_06_6, out_06_7); + out[22] = _mm_packs_epi32(out_22_6, out_22_7); + out[14] = _mm_packs_epi32(out_14_6, out_14_7); + out[30] = _mm_packs_epi32(out_30_6, out_30_7); #if DCT_HIGH_BIT_DEPTH - overflow = check_epi16_overflow_x8(&out[1], &out[17], &out[9], - &out[25], &out[7], &out[23], - &out[15], &out[31]); - if (overflow) { - if (pass == 0) - HIGH_FDCT32x32_2D_C(input, output_org, stride); - else - HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org); - return; + overflow = + check_epi16_overflow_x8(&out[2], &out[18], &out[10], &out[26], + &out[6], &out[22], &out[14], &out[30]); + if (overflow) { + if (pass == 0) + HIGH_FDCT32x32_2D_C(input, output_org, stride); + else + HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org); + return; + } +#endif // DCT_HIGH_BIT_DEPTH } + { + step1[16] = ADD_EPI16(step3[17], step2[16]); + step1[17] = SUB_EPI16(step2[16], step3[17]); + step1[18] = SUB_EPI16(step2[19], step3[18]); + step1[19] = ADD_EPI16(step3[18], step2[19]); + step1[20] = ADD_EPI16(step3[21], step2[20]); + step1[21] = SUB_EPI16(step2[20], step3[21]); + step1[22] = SUB_EPI16(step2[23], step3[22]); + step1[23] = ADD_EPI16(step3[22], step2[23]); + step1[24] = ADD_EPI16(step3[25], step2[24]); + step1[25] = SUB_EPI16(step2[24], step3[25]); + step1[26] = SUB_EPI16(step2[27], step3[26]); + step1[27] = ADD_EPI16(step3[26], step2[27]); + step1[28] = ADD_EPI16(step3[29], step2[28]); + step1[29] = SUB_EPI16(step2[28], step3[29]); + step1[30] = SUB_EPI16(step2[31], step3[30]); + step1[31] = ADD_EPI16(step3[30], step2[31]); +#if DCT_HIGH_BIT_DEPTH + overflow = check_epi16_overflow_x16( + &step1[16], &step1[17], &step1[18], &step1[19], &step1[20], + &step1[21], &step1[22], &step1[23], &step1[24], &step1[25], + &step1[26], &step1[27], &step1[28], &step1[29], &step1[30], + &step1[31]); + if (overflow) { + if (pass == 0) + HIGH_FDCT32x32_2D_C(input, output_org, stride); + else + HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org); + return; + } #endif // DCT_HIGH_BIT_DEPTH - } - { - const __m128i out_05_0 = _mm_unpacklo_epi16(step1[20], step1[27]); - const __m128i out_05_1 = _mm_unpackhi_epi16(step1[20], step1[27]); - const __m128i out_21_0 = _mm_unpacklo_epi16(step1[21], step1[26]); - const __m128i out_21_1 = _mm_unpackhi_epi16(step1[21], step1[26]); - const __m128i out_13_0 = _mm_unpacklo_epi16(step1[22], step1[25]); - const __m128i out_13_1 = _mm_unpackhi_epi16(step1[22], step1[25]); - const __m128i out_29_0 = _mm_unpacklo_epi16(step1[23], step1[24]); - const __m128i out_29_1 = _mm_unpackhi_epi16(step1[23], step1[24]); - const __m128i out_05_2 = _mm_madd_epi16(out_05_0, k__cospi_p27_p05); - const __m128i out_05_3 = _mm_madd_epi16(out_05_1, k__cospi_p27_p05); - const __m128i out_21_2 = _mm_madd_epi16(out_21_0, k__cospi_p11_p21); - const __m128i out_21_3 = _mm_madd_epi16(out_21_1, k__cospi_p11_p21); - const __m128i out_13_2 = _mm_madd_epi16(out_13_0, k__cospi_p19_p13); - const __m128i out_13_3 = _mm_madd_epi16(out_13_1, k__cospi_p19_p13); - const __m128i out_29_2 = _mm_madd_epi16(out_29_0, k__cospi_p03_p29); - const __m128i out_29_3 = _mm_madd_epi16(out_29_1, k__cospi_p03_p29); - const __m128i out_03_2 = _mm_madd_epi16(out_29_0, k__cospi_m29_p03); - const __m128i out_03_3 = _mm_madd_epi16(out_29_1, k__cospi_m29_p03); - const __m128i out_19_2 = _mm_madd_epi16(out_13_0, k__cospi_m13_p19); - const __m128i out_19_3 = _mm_madd_epi16(out_13_1, k__cospi_m13_p19); - const __m128i out_11_2 = _mm_madd_epi16(out_21_0, k__cospi_m21_p11); - const __m128i out_11_3 = _mm_madd_epi16(out_21_1, k__cospi_m21_p11); - const __m128i out_27_2 = _mm_madd_epi16(out_05_0, k__cospi_m05_p27); - const __m128i out_27_3 = _mm_madd_epi16(out_05_1, k__cospi_m05_p27); - // dct_const_round_shift - const __m128i out_05_4 = _mm_add_epi32(out_05_2, k__DCT_CONST_ROUNDING); - const __m128i out_05_5 = _mm_add_epi32(out_05_3, k__DCT_CONST_ROUNDING); - const __m128i out_21_4 = _mm_add_epi32(out_21_2, k__DCT_CONST_ROUNDING); - const __m128i out_21_5 = _mm_add_epi32(out_21_3, k__DCT_CONST_ROUNDING); - const __m128i out_13_4 = _mm_add_epi32(out_13_2, k__DCT_CONST_ROUNDING); - const __m128i out_13_5 = _mm_add_epi32(out_13_3, k__DCT_CONST_ROUNDING); - const __m128i out_29_4 = _mm_add_epi32(out_29_2, k__DCT_CONST_ROUNDING); - const __m128i out_29_5 = _mm_add_epi32(out_29_3, k__DCT_CONST_ROUNDING); - const __m128i out_03_4 = _mm_add_epi32(out_03_2, k__DCT_CONST_ROUNDING); - const __m128i out_03_5 = _mm_add_epi32(out_03_3, k__DCT_CONST_ROUNDING); - const __m128i out_19_4 = _mm_add_epi32(out_19_2, k__DCT_CONST_ROUNDING); - const __m128i out_19_5 = _mm_add_epi32(out_19_3, k__DCT_CONST_ROUNDING); - const __m128i out_11_4 = _mm_add_epi32(out_11_2, k__DCT_CONST_ROUNDING); - const __m128i out_11_5 = _mm_add_epi32(out_11_3, k__DCT_CONST_ROUNDING); - const __m128i out_27_4 = _mm_add_epi32(out_27_2, k__DCT_CONST_ROUNDING); - const __m128i out_27_5 = _mm_add_epi32(out_27_3, k__DCT_CONST_ROUNDING); - const __m128i out_05_6 = _mm_srai_epi32(out_05_4, DCT_CONST_BITS); - const __m128i out_05_7 = _mm_srai_epi32(out_05_5, DCT_CONST_BITS); - const __m128i out_21_6 = _mm_srai_epi32(out_21_4, DCT_CONST_BITS); - const __m128i out_21_7 = _mm_srai_epi32(out_21_5, DCT_CONST_BITS); - const __m128i out_13_6 = _mm_srai_epi32(out_13_4, DCT_CONST_BITS); - const __m128i out_13_7 = _mm_srai_epi32(out_13_5, DCT_CONST_BITS); - const __m128i out_29_6 = _mm_srai_epi32(out_29_4, DCT_CONST_BITS); - const __m128i out_29_7 = _mm_srai_epi32(out_29_5, DCT_CONST_BITS); - const __m128i out_03_6 = _mm_srai_epi32(out_03_4, DCT_CONST_BITS); - const __m128i out_03_7 = _mm_srai_epi32(out_03_5, DCT_CONST_BITS); - const __m128i out_19_6 = _mm_srai_epi32(out_19_4, DCT_CONST_BITS); - const __m128i out_19_7 = _mm_srai_epi32(out_19_5, DCT_CONST_BITS); - const __m128i out_11_6 = _mm_srai_epi32(out_11_4, DCT_CONST_BITS); - const __m128i out_11_7 = _mm_srai_epi32(out_11_5, DCT_CONST_BITS); - const __m128i out_27_6 = _mm_srai_epi32(out_27_4, DCT_CONST_BITS); - const __m128i out_27_7 = _mm_srai_epi32(out_27_5, DCT_CONST_BITS); - // Combine - out[ 5] = _mm_packs_epi32(out_05_6, out_05_7); - out[21] = _mm_packs_epi32(out_21_6, out_21_7); - out[13] = _mm_packs_epi32(out_13_6, out_13_7); - out[29] = _mm_packs_epi32(out_29_6, out_29_7); - out[ 3] = _mm_packs_epi32(out_03_6, out_03_7); - out[19] = _mm_packs_epi32(out_19_6, out_19_7); - out[11] = _mm_packs_epi32(out_11_6, out_11_7); - out[27] = _mm_packs_epi32(out_27_6, out_27_7); + } + // Final stage --- outputs indices are bit-reversed. + { + const __m128i out_01_0 = _mm_unpacklo_epi16(step1[16], step1[31]); + const __m128i out_01_1 = _mm_unpackhi_epi16(step1[16], step1[31]); + const __m128i out_17_0 = _mm_unpacklo_epi16(step1[17], step1[30]); + const __m128i out_17_1 = _mm_unpackhi_epi16(step1[17], step1[30]); + const __m128i out_09_0 = _mm_unpacklo_epi16(step1[18], step1[29]); + const __m128i out_09_1 = _mm_unpackhi_epi16(step1[18], step1[29]); + const __m128i out_25_0 = _mm_unpacklo_epi16(step1[19], step1[28]); + const __m128i out_25_1 = _mm_unpackhi_epi16(step1[19], step1[28]); + const __m128i out_01_2 = _mm_madd_epi16(out_01_0, k__cospi_p31_p01); + const __m128i out_01_3 = _mm_madd_epi16(out_01_1, k__cospi_p31_p01); + const __m128i out_17_2 = _mm_madd_epi16(out_17_0, k__cospi_p15_p17); + const __m128i out_17_3 = _mm_madd_epi16(out_17_1, k__cospi_p15_p17); + const __m128i out_09_2 = _mm_madd_epi16(out_09_0, k__cospi_p23_p09); + const __m128i out_09_3 = _mm_madd_epi16(out_09_1, k__cospi_p23_p09); + const __m128i out_25_2 = _mm_madd_epi16(out_25_0, k__cospi_p07_p25); + const __m128i out_25_3 = _mm_madd_epi16(out_25_1, k__cospi_p07_p25); + const __m128i out_07_2 = _mm_madd_epi16(out_25_0, k__cospi_m25_p07); + const __m128i out_07_3 = _mm_madd_epi16(out_25_1, k__cospi_m25_p07); + const __m128i out_23_2 = _mm_madd_epi16(out_09_0, k__cospi_m09_p23); + const __m128i out_23_3 = _mm_madd_epi16(out_09_1, k__cospi_m09_p23); + const __m128i out_15_2 = _mm_madd_epi16(out_17_0, k__cospi_m17_p15); + const __m128i out_15_3 = _mm_madd_epi16(out_17_1, k__cospi_m17_p15); + const __m128i out_31_2 = _mm_madd_epi16(out_01_0, k__cospi_m01_p31); + const __m128i out_31_3 = _mm_madd_epi16(out_01_1, k__cospi_m01_p31); + // dct_const_round_shift + const __m128i out_01_4 = + _mm_add_epi32(out_01_2, k__DCT_CONST_ROUNDING); + const __m128i out_01_5 = + _mm_add_epi32(out_01_3, k__DCT_CONST_ROUNDING); + const __m128i out_17_4 = + _mm_add_epi32(out_17_2, k__DCT_CONST_ROUNDING); + const __m128i out_17_5 = + _mm_add_epi32(out_17_3, k__DCT_CONST_ROUNDING); + const __m128i out_09_4 = + _mm_add_epi32(out_09_2, k__DCT_CONST_ROUNDING); + const __m128i out_09_5 = + _mm_add_epi32(out_09_3, k__DCT_CONST_ROUNDING); + const __m128i out_25_4 = + _mm_add_epi32(out_25_2, k__DCT_CONST_ROUNDING); + const __m128i out_25_5 = + _mm_add_epi32(out_25_3, k__DCT_CONST_ROUNDING); + const __m128i out_07_4 = + _mm_add_epi32(out_07_2, k__DCT_CONST_ROUNDING); + const __m128i out_07_5 = + _mm_add_epi32(out_07_3, k__DCT_CONST_ROUNDING); + const __m128i out_23_4 = + _mm_add_epi32(out_23_2, k__DCT_CONST_ROUNDING); + const __m128i out_23_5 = + _mm_add_epi32(out_23_3, k__DCT_CONST_ROUNDING); + const __m128i out_15_4 = + _mm_add_epi32(out_15_2, k__DCT_CONST_ROUNDING); + const __m128i out_15_5 = + _mm_add_epi32(out_15_3, k__DCT_CONST_ROUNDING); + const __m128i out_31_4 = + _mm_add_epi32(out_31_2, k__DCT_CONST_ROUNDING); + const __m128i out_31_5 = + _mm_add_epi32(out_31_3, k__DCT_CONST_ROUNDING); + const __m128i out_01_6 = _mm_srai_epi32(out_01_4, DCT_CONST_BITS); + const __m128i out_01_7 = _mm_srai_epi32(out_01_5, DCT_CONST_BITS); + const __m128i out_17_6 = _mm_srai_epi32(out_17_4, DCT_CONST_BITS); + const __m128i out_17_7 = _mm_srai_epi32(out_17_5, DCT_CONST_BITS); + const __m128i out_09_6 = _mm_srai_epi32(out_09_4, DCT_CONST_BITS); + const __m128i out_09_7 = _mm_srai_epi32(out_09_5, DCT_CONST_BITS); + const __m128i out_25_6 = _mm_srai_epi32(out_25_4, DCT_CONST_BITS); + const __m128i out_25_7 = _mm_srai_epi32(out_25_5, DCT_CONST_BITS); + const __m128i out_07_6 = _mm_srai_epi32(out_07_4, DCT_CONST_BITS); + const __m128i out_07_7 = _mm_srai_epi32(out_07_5, DCT_CONST_BITS); + const __m128i out_23_6 = _mm_srai_epi32(out_23_4, DCT_CONST_BITS); + const __m128i out_23_7 = _mm_srai_epi32(out_23_5, DCT_CONST_BITS); + const __m128i out_15_6 = _mm_srai_epi32(out_15_4, DCT_CONST_BITS); + const __m128i out_15_7 = _mm_srai_epi32(out_15_5, DCT_CONST_BITS); + const __m128i out_31_6 = _mm_srai_epi32(out_31_4, DCT_CONST_BITS); + const __m128i out_31_7 = _mm_srai_epi32(out_31_5, DCT_CONST_BITS); + // Combine + out[1] = _mm_packs_epi32(out_01_6, out_01_7); + out[17] = _mm_packs_epi32(out_17_6, out_17_7); + out[9] = _mm_packs_epi32(out_09_6, out_09_7); + out[25] = _mm_packs_epi32(out_25_6, out_25_7); + out[7] = _mm_packs_epi32(out_07_6, out_07_7); + out[23] = _mm_packs_epi32(out_23_6, out_23_7); + out[15] = _mm_packs_epi32(out_15_6, out_15_7); + out[31] = _mm_packs_epi32(out_31_6, out_31_7); #if DCT_HIGH_BIT_DEPTH - overflow = check_epi16_overflow_x8(&out[5], &out[21], &out[13], - &out[29], &out[3], &out[19], - &out[11], &out[27]); - if (overflow) { - if (pass == 0) - HIGH_FDCT32x32_2D_C(input, output_org, stride); - else - HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org); - return; + overflow = + check_epi16_overflow_x8(&out[1], &out[17], &out[9], &out[25], + &out[7], &out[23], &out[15], &out[31]); + if (overflow) { + if (pass == 0) + HIGH_FDCT32x32_2D_C(input, output_org, stride); + else + HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org); + return; + } +#endif // DCT_HIGH_BIT_DEPTH } + { + const __m128i out_05_0 = _mm_unpacklo_epi16(step1[20], step1[27]); + const __m128i out_05_1 = _mm_unpackhi_epi16(step1[20], step1[27]); + const __m128i out_21_0 = _mm_unpacklo_epi16(step1[21], step1[26]); + const __m128i out_21_1 = _mm_unpackhi_epi16(step1[21], step1[26]); + const __m128i out_13_0 = _mm_unpacklo_epi16(step1[22], step1[25]); + const __m128i out_13_1 = _mm_unpackhi_epi16(step1[22], step1[25]); + const __m128i out_29_0 = _mm_unpacklo_epi16(step1[23], step1[24]); + const __m128i out_29_1 = _mm_unpackhi_epi16(step1[23], step1[24]); + const __m128i out_05_2 = _mm_madd_epi16(out_05_0, k__cospi_p27_p05); + const __m128i out_05_3 = _mm_madd_epi16(out_05_1, k__cospi_p27_p05); + const __m128i out_21_2 = _mm_madd_epi16(out_21_0, k__cospi_p11_p21); + const __m128i out_21_3 = _mm_madd_epi16(out_21_1, k__cospi_p11_p21); + const __m128i out_13_2 = _mm_madd_epi16(out_13_0, k__cospi_p19_p13); + const __m128i out_13_3 = _mm_madd_epi16(out_13_1, k__cospi_p19_p13); + const __m128i out_29_2 = _mm_madd_epi16(out_29_0, k__cospi_p03_p29); + const __m128i out_29_3 = _mm_madd_epi16(out_29_1, k__cospi_p03_p29); + const __m128i out_03_2 = _mm_madd_epi16(out_29_0, k__cospi_m29_p03); + const __m128i out_03_3 = _mm_madd_epi16(out_29_1, k__cospi_m29_p03); + const __m128i out_19_2 = _mm_madd_epi16(out_13_0, k__cospi_m13_p19); + const __m128i out_19_3 = _mm_madd_epi16(out_13_1, k__cospi_m13_p19); + const __m128i out_11_2 = _mm_madd_epi16(out_21_0, k__cospi_m21_p11); + const __m128i out_11_3 = _mm_madd_epi16(out_21_1, k__cospi_m21_p11); + const __m128i out_27_2 = _mm_madd_epi16(out_05_0, k__cospi_m05_p27); + const __m128i out_27_3 = _mm_madd_epi16(out_05_1, k__cospi_m05_p27); + // dct_const_round_shift + const __m128i out_05_4 = + _mm_add_epi32(out_05_2, k__DCT_CONST_ROUNDING); + const __m128i out_05_5 = + _mm_add_epi32(out_05_3, k__DCT_CONST_ROUNDING); + const __m128i out_21_4 = + _mm_add_epi32(out_21_2, k__DCT_CONST_ROUNDING); + const __m128i out_21_5 = + _mm_add_epi32(out_21_3, k__DCT_CONST_ROUNDING); + const __m128i out_13_4 = + _mm_add_epi32(out_13_2, k__DCT_CONST_ROUNDING); + const __m128i out_13_5 = + _mm_add_epi32(out_13_3, k__DCT_CONST_ROUNDING); + const __m128i out_29_4 = + _mm_add_epi32(out_29_2, k__DCT_CONST_ROUNDING); + const __m128i out_29_5 = + _mm_add_epi32(out_29_3, k__DCT_CONST_ROUNDING); + const __m128i out_03_4 = + _mm_add_epi32(out_03_2, k__DCT_CONST_ROUNDING); + const __m128i out_03_5 = + _mm_add_epi32(out_03_3, k__DCT_CONST_ROUNDING); + const __m128i out_19_4 = + _mm_add_epi32(out_19_2, k__DCT_CONST_ROUNDING); + const __m128i out_19_5 = + _mm_add_epi32(out_19_3, k__DCT_CONST_ROUNDING); + const __m128i out_11_4 = + _mm_add_epi32(out_11_2, k__DCT_CONST_ROUNDING); + const __m128i out_11_5 = + _mm_add_epi32(out_11_3, k__DCT_CONST_ROUNDING); + const __m128i out_27_4 = + _mm_add_epi32(out_27_2, k__DCT_CONST_ROUNDING); + const __m128i out_27_5 = + _mm_add_epi32(out_27_3, k__DCT_CONST_ROUNDING); + const __m128i out_05_6 = _mm_srai_epi32(out_05_4, DCT_CONST_BITS); + const __m128i out_05_7 = _mm_srai_epi32(out_05_5, DCT_CONST_BITS); + const __m128i out_21_6 = _mm_srai_epi32(out_21_4, DCT_CONST_BITS); + const __m128i out_21_7 = _mm_srai_epi32(out_21_5, DCT_CONST_BITS); + const __m128i out_13_6 = _mm_srai_epi32(out_13_4, DCT_CONST_BITS); + const __m128i out_13_7 = _mm_srai_epi32(out_13_5, DCT_CONST_BITS); + const __m128i out_29_6 = _mm_srai_epi32(out_29_4, DCT_CONST_BITS); + const __m128i out_29_7 = _mm_srai_epi32(out_29_5, DCT_CONST_BITS); + const __m128i out_03_6 = _mm_srai_epi32(out_03_4, DCT_CONST_BITS); + const __m128i out_03_7 = _mm_srai_epi32(out_03_5, DCT_CONST_BITS); + const __m128i out_19_6 = _mm_srai_epi32(out_19_4, DCT_CONST_BITS); + const __m128i out_19_7 = _mm_srai_epi32(out_19_5, DCT_CONST_BITS); + const __m128i out_11_6 = _mm_srai_epi32(out_11_4, DCT_CONST_BITS); + const __m128i out_11_7 = _mm_srai_epi32(out_11_5, DCT_CONST_BITS); + const __m128i out_27_6 = _mm_srai_epi32(out_27_4, DCT_CONST_BITS); + const __m128i out_27_7 = _mm_srai_epi32(out_27_5, DCT_CONST_BITS); + // Combine + out[5] = _mm_packs_epi32(out_05_6, out_05_7); + out[21] = _mm_packs_epi32(out_21_6, out_21_7); + out[13] = _mm_packs_epi32(out_13_6, out_13_7); + out[29] = _mm_packs_epi32(out_29_6, out_29_7); + out[3] = _mm_packs_epi32(out_03_6, out_03_7); + out[19] = _mm_packs_epi32(out_19_6, out_19_7); + out[11] = _mm_packs_epi32(out_11_6, out_11_7); + out[27] = _mm_packs_epi32(out_27_6, out_27_7); +#if DCT_HIGH_BIT_DEPTH + overflow = + check_epi16_overflow_x8(&out[5], &out[21], &out[13], &out[29], + &out[3], &out[19], &out[11], &out[27]); + if (overflow) { + if (pass == 0) + HIGH_FDCT32x32_2D_C(input, output_org, stride); + else + HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org); + return; + } #endif // DCT_HIGH_BIT_DEPTH - } + } #if FDCT32x32_HIGH_PRECISION } else { __m128i lstep1[64], lstep2[64], lstep3[64]; @@ -1458,32 +1513,32 @@ void FDCT32x32_2D(const int16_t *input, // stage 3 { // expanding to 32-bit length priori to addition operations - lstep2[ 0] = _mm_unpacklo_epi16(step2[ 0], kZero); - lstep2[ 1] = _mm_unpackhi_epi16(step2[ 0], kZero); - lstep2[ 2] = _mm_unpacklo_epi16(step2[ 1], kZero); - lstep2[ 3] = _mm_unpackhi_epi16(step2[ 1], kZero); - lstep2[ 4] = _mm_unpacklo_epi16(step2[ 2], kZero); - lstep2[ 5] = _mm_unpackhi_epi16(step2[ 2], kZero); - lstep2[ 6] = _mm_unpacklo_epi16(step2[ 3], kZero); - lstep2[ 7] = _mm_unpackhi_epi16(step2[ 3], kZero); - lstep2[ 8] = _mm_unpacklo_epi16(step2[ 4], kZero); - lstep2[ 9] = _mm_unpackhi_epi16(step2[ 4], kZero); - lstep2[10] = _mm_unpacklo_epi16(step2[ 5], kZero); - lstep2[11] = _mm_unpackhi_epi16(step2[ 5], kZero); - lstep2[12] = _mm_unpacklo_epi16(step2[ 6], kZero); - lstep2[13] = _mm_unpackhi_epi16(step2[ 6], kZero); - lstep2[14] = _mm_unpacklo_epi16(step2[ 7], kZero); - lstep2[15] = _mm_unpackhi_epi16(step2[ 7], kZero); - lstep2[ 0] = _mm_madd_epi16(lstep2[ 0], kOne); - lstep2[ 1] = _mm_madd_epi16(lstep2[ 1], kOne); - lstep2[ 2] = _mm_madd_epi16(lstep2[ 2], kOne); - lstep2[ 3] = _mm_madd_epi16(lstep2[ 3], kOne); - lstep2[ 4] = _mm_madd_epi16(lstep2[ 4], kOne); - lstep2[ 5] = _mm_madd_epi16(lstep2[ 5], kOne); - lstep2[ 6] = _mm_madd_epi16(lstep2[ 6], kOne); - lstep2[ 7] = _mm_madd_epi16(lstep2[ 7], kOne); - lstep2[ 8] = _mm_madd_epi16(lstep2[ 8], kOne); - lstep2[ 9] = _mm_madd_epi16(lstep2[ 9], kOne); + lstep2[0] = _mm_unpacklo_epi16(step2[0], kZero); + lstep2[1] = _mm_unpackhi_epi16(step2[0], kZero); + lstep2[2] = _mm_unpacklo_epi16(step2[1], kZero); + lstep2[3] = _mm_unpackhi_epi16(step2[1], kZero); + lstep2[4] = _mm_unpacklo_epi16(step2[2], kZero); + lstep2[5] = _mm_unpackhi_epi16(step2[2], kZero); + lstep2[6] = _mm_unpacklo_epi16(step2[3], kZero); + lstep2[7] = _mm_unpackhi_epi16(step2[3], kZero); + lstep2[8] = _mm_unpacklo_epi16(step2[4], kZero); + lstep2[9] = _mm_unpackhi_epi16(step2[4], kZero); + lstep2[10] = _mm_unpacklo_epi16(step2[5], kZero); + lstep2[11] = _mm_unpackhi_epi16(step2[5], kZero); + lstep2[12] = _mm_unpacklo_epi16(step2[6], kZero); + lstep2[13] = _mm_unpackhi_epi16(step2[6], kZero); + lstep2[14] = _mm_unpacklo_epi16(step2[7], kZero); + lstep2[15] = _mm_unpackhi_epi16(step2[7], kZero); + lstep2[0] = _mm_madd_epi16(lstep2[0], kOne); + lstep2[1] = _mm_madd_epi16(lstep2[1], kOne); + lstep2[2] = _mm_madd_epi16(lstep2[2], kOne); + lstep2[3] = _mm_madd_epi16(lstep2[3], kOne); + lstep2[4] = _mm_madd_epi16(lstep2[4], kOne); + lstep2[5] = _mm_madd_epi16(lstep2[5], kOne); + lstep2[6] = _mm_madd_epi16(lstep2[6], kOne); + lstep2[7] = _mm_madd_epi16(lstep2[7], kOne); + lstep2[8] = _mm_madd_epi16(lstep2[8], kOne); + lstep2[9] = _mm_madd_epi16(lstep2[9], kOne); lstep2[10] = _mm_madd_epi16(lstep2[10], kOne); lstep2[11] = _mm_madd_epi16(lstep2[11], kOne); lstep2[12] = _mm_madd_epi16(lstep2[12], kOne); @@ -1491,22 +1546,22 @@ void FDCT32x32_2D(const int16_t *input, lstep2[14] = _mm_madd_epi16(lstep2[14], kOne); lstep2[15] = _mm_madd_epi16(lstep2[15], kOne); - lstep3[ 0] = _mm_add_epi32(lstep2[14], lstep2[ 0]); - lstep3[ 1] = _mm_add_epi32(lstep2[15], lstep2[ 1]); - lstep3[ 2] = _mm_add_epi32(lstep2[12], lstep2[ 2]); - lstep3[ 3] = _mm_add_epi32(lstep2[13], lstep2[ 3]); - lstep3[ 4] = _mm_add_epi32(lstep2[10], lstep2[ 4]); - lstep3[ 5] = _mm_add_epi32(lstep2[11], lstep2[ 5]); - lstep3[ 6] = _mm_add_epi32(lstep2[ 8], lstep2[ 6]); - lstep3[ 7] = _mm_add_epi32(lstep2[ 9], lstep2[ 7]); - lstep3[ 8] = _mm_sub_epi32(lstep2[ 6], lstep2[ 8]); - lstep3[ 9] = _mm_sub_epi32(lstep2[ 7], lstep2[ 9]); - lstep3[10] = _mm_sub_epi32(lstep2[ 4], lstep2[10]); - lstep3[11] = _mm_sub_epi32(lstep2[ 5], lstep2[11]); - lstep3[12] = _mm_sub_epi32(lstep2[ 2], lstep2[12]); - lstep3[13] = _mm_sub_epi32(lstep2[ 3], lstep2[13]); - lstep3[14] = _mm_sub_epi32(lstep2[ 0], lstep2[14]); - lstep3[15] = _mm_sub_epi32(lstep2[ 1], lstep2[15]); + lstep3[0] = _mm_add_epi32(lstep2[14], lstep2[0]); + lstep3[1] = _mm_add_epi32(lstep2[15], lstep2[1]); + lstep3[2] = _mm_add_epi32(lstep2[12], lstep2[2]); + lstep3[3] = _mm_add_epi32(lstep2[13], lstep2[3]); + lstep3[4] = _mm_add_epi32(lstep2[10], lstep2[4]); + lstep3[5] = _mm_add_epi32(lstep2[11], lstep2[5]); + lstep3[6] = _mm_add_epi32(lstep2[8], lstep2[6]); + lstep3[7] = _mm_add_epi32(lstep2[9], lstep2[7]); + lstep3[8] = _mm_sub_epi32(lstep2[6], lstep2[8]); + lstep3[9] = _mm_sub_epi32(lstep2[7], lstep2[9]); + lstep3[10] = _mm_sub_epi32(lstep2[4], lstep2[10]); + lstep3[11] = _mm_sub_epi32(lstep2[5], lstep2[11]); + lstep3[12] = _mm_sub_epi32(lstep2[2], lstep2[12]); + lstep3[13] = _mm_sub_epi32(lstep2[3], lstep2[13]); + lstep3[14] = _mm_sub_epi32(lstep2[0], lstep2[14]); + lstep3[15] = _mm_sub_epi32(lstep2[1], lstep2[15]); } { const __m128i s3_10_0 = _mm_unpacklo_epi16(step2[13], step2[10]); @@ -1644,10 +1699,10 @@ void FDCT32x32_2D(const int16_t *input, // stage 4 { // expanding to 32-bit length priori to addition operations - lstep2[16] = _mm_unpacklo_epi16(step2[ 8], kZero); - lstep2[17] = _mm_unpackhi_epi16(step2[ 8], kZero); - lstep2[18] = _mm_unpacklo_epi16(step2[ 9], kZero); - lstep2[19] = _mm_unpackhi_epi16(step2[ 9], kZero); + lstep2[16] = _mm_unpacklo_epi16(step2[8], kZero); + lstep2[17] = _mm_unpackhi_epi16(step2[8], kZero); + lstep2[18] = _mm_unpacklo_epi16(step2[9], kZero); + lstep2[19] = _mm_unpackhi_epi16(step2[9], kZero); lstep2[28] = _mm_unpacklo_epi16(step2[14], kZero); lstep2[29] = _mm_unpackhi_epi16(step2[14], kZero); lstep2[30] = _mm_unpacklo_epi16(step2[15], kZero); @@ -1661,14 +1716,14 @@ void FDCT32x32_2D(const int16_t *input, lstep2[30] = _mm_madd_epi16(lstep2[30], kOne); lstep2[31] = _mm_madd_epi16(lstep2[31], kOne); - lstep1[ 0] = _mm_add_epi32(lstep3[ 6], lstep3[ 0]); - lstep1[ 1] = _mm_add_epi32(lstep3[ 7], lstep3[ 1]); - lstep1[ 2] = _mm_add_epi32(lstep3[ 4], lstep3[ 2]); - lstep1[ 3] = _mm_add_epi32(lstep3[ 5], lstep3[ 3]); - lstep1[ 4] = _mm_sub_epi32(lstep3[ 2], lstep3[ 4]); - lstep1[ 5] = _mm_sub_epi32(lstep3[ 3], lstep3[ 5]); - lstep1[ 6] = _mm_sub_epi32(lstep3[ 0], lstep3[ 6]); - lstep1[ 7] = _mm_sub_epi32(lstep3[ 1], lstep3[ 7]); + lstep1[0] = _mm_add_epi32(lstep3[6], lstep3[0]); + lstep1[1] = _mm_add_epi32(lstep3[7], lstep3[1]); + lstep1[2] = _mm_add_epi32(lstep3[4], lstep3[2]); + lstep1[3] = _mm_add_epi32(lstep3[5], lstep3[3]); + lstep1[4] = _mm_sub_epi32(lstep3[2], lstep3[4]); + lstep1[5] = _mm_sub_epi32(lstep3[3], lstep3[5]); + lstep1[6] = _mm_sub_epi32(lstep3[0], lstep3[6]); + lstep1[7] = _mm_sub_epi32(lstep3[1], lstep3[7]); lstep1[16] = _mm_add_epi32(lstep3[22], lstep2[16]); lstep1[17] = _mm_add_epi32(lstep3[23], lstep2[17]); lstep1[18] = _mm_add_epi32(lstep3[20], lstep2[18]); @@ -1687,64 +1742,64 @@ void FDCT32x32_2D(const int16_t *input, lstep1[31] = _mm_add_epi32(lstep3[25], lstep2[31]); } { - // to be continued... - // - const __m128i k32_p16_p16 = pair_set_epi32(cospi_16_64, cospi_16_64); - const __m128i k32_p16_m16 = pair_set_epi32(cospi_16_64, -cospi_16_64); - - u[0] = _mm_unpacklo_epi32(lstep3[12], lstep3[10]); - u[1] = _mm_unpackhi_epi32(lstep3[12], lstep3[10]); - u[2] = _mm_unpacklo_epi32(lstep3[13], lstep3[11]); - u[3] = _mm_unpackhi_epi32(lstep3[13], lstep3[11]); - - // TODO(jingning): manually inline k_madd_epi32_ to further hide - // instruction latency. - v[0] = k_madd_epi32(u[0], k32_p16_m16); - v[1] = k_madd_epi32(u[1], k32_p16_m16); - v[2] = k_madd_epi32(u[2], k32_p16_m16); - v[3] = k_madd_epi32(u[3], k32_p16_m16); - v[4] = k_madd_epi32(u[0], k32_p16_p16); - v[5] = k_madd_epi32(u[1], k32_p16_p16); - v[6] = k_madd_epi32(u[2], k32_p16_p16); - v[7] = k_madd_epi32(u[3], k32_p16_p16); + // to be continued... + // + const __m128i k32_p16_p16 = pair_set_epi32(cospi_16_64, cospi_16_64); + const __m128i k32_p16_m16 = pair_set_epi32(cospi_16_64, -cospi_16_64); + + u[0] = _mm_unpacklo_epi32(lstep3[12], lstep3[10]); + u[1] = _mm_unpackhi_epi32(lstep3[12], lstep3[10]); + u[2] = _mm_unpacklo_epi32(lstep3[13], lstep3[11]); + u[3] = _mm_unpackhi_epi32(lstep3[13], lstep3[11]); + + // TODO(jingning): manually inline k_madd_epi32_ to further hide + // instruction latency. + v[0] = k_madd_epi32(u[0], k32_p16_m16); + v[1] = k_madd_epi32(u[1], k32_p16_m16); + v[2] = k_madd_epi32(u[2], k32_p16_m16); + v[3] = k_madd_epi32(u[3], k32_p16_m16); + v[4] = k_madd_epi32(u[0], k32_p16_p16); + v[5] = k_madd_epi32(u[1], k32_p16_p16); + v[6] = k_madd_epi32(u[2], k32_p16_p16); + v[7] = k_madd_epi32(u[3], k32_p16_p16); #if DCT_HIGH_BIT_DEPTH - overflow = k_check_epi32_overflow_8(&v[0], &v[1], &v[2], &v[3], - &v[4], &v[5], &v[6], &v[7], &kZero); - if (overflow) { - HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org); - return; - } + overflow = k_check_epi32_overflow_8(&v[0], &v[1], &v[2], &v[3], &v[4], + &v[5], &v[6], &v[7], &kZero); + if (overflow) { + HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org); + return; + } #endif // DCT_HIGH_BIT_DEPTH - u[0] = k_packs_epi64(v[0], v[1]); - u[1] = k_packs_epi64(v[2], v[3]); - u[2] = k_packs_epi64(v[4], v[5]); - u[3] = k_packs_epi64(v[6], v[7]); - - v[0] = _mm_add_epi32(u[0], k__DCT_CONST_ROUNDING); - v[1] = _mm_add_epi32(u[1], k__DCT_CONST_ROUNDING); - v[2] = _mm_add_epi32(u[2], k__DCT_CONST_ROUNDING); - v[3] = _mm_add_epi32(u[3], k__DCT_CONST_ROUNDING); - - lstep1[10] = _mm_srai_epi32(v[0], DCT_CONST_BITS); - lstep1[11] = _mm_srai_epi32(v[1], DCT_CONST_BITS); - lstep1[12] = _mm_srai_epi32(v[2], DCT_CONST_BITS); - lstep1[13] = _mm_srai_epi32(v[3], DCT_CONST_BITS); + u[0] = k_packs_epi64(v[0], v[1]); + u[1] = k_packs_epi64(v[2], v[3]); + u[2] = k_packs_epi64(v[4], v[5]); + u[3] = k_packs_epi64(v[6], v[7]); + + v[0] = _mm_add_epi32(u[0], k__DCT_CONST_ROUNDING); + v[1] = _mm_add_epi32(u[1], k__DCT_CONST_ROUNDING); + v[2] = _mm_add_epi32(u[2], k__DCT_CONST_ROUNDING); + v[3] = _mm_add_epi32(u[3], k__DCT_CONST_ROUNDING); + + lstep1[10] = _mm_srai_epi32(v[0], DCT_CONST_BITS); + lstep1[11] = _mm_srai_epi32(v[1], DCT_CONST_BITS); + lstep1[12] = _mm_srai_epi32(v[2], DCT_CONST_BITS); + lstep1[13] = _mm_srai_epi32(v[3], DCT_CONST_BITS); } { const __m128i k32_m08_p24 = pair_set_epi32(-cospi_8_64, cospi_24_64); const __m128i k32_m24_m08 = pair_set_epi32(-cospi_24_64, -cospi_8_64); const __m128i k32_p24_p08 = pair_set_epi32(cospi_24_64, cospi_8_64); - u[ 0] = _mm_unpacklo_epi32(lstep3[36], lstep3[58]); - u[ 1] = _mm_unpackhi_epi32(lstep3[36], lstep3[58]); - u[ 2] = _mm_unpacklo_epi32(lstep3[37], lstep3[59]); - u[ 3] = _mm_unpackhi_epi32(lstep3[37], lstep3[59]); - u[ 4] = _mm_unpacklo_epi32(lstep3[38], lstep3[56]); - u[ 5] = _mm_unpackhi_epi32(lstep3[38], lstep3[56]); - u[ 6] = _mm_unpacklo_epi32(lstep3[39], lstep3[57]); - u[ 7] = _mm_unpackhi_epi32(lstep3[39], lstep3[57]); - u[ 8] = _mm_unpacklo_epi32(lstep3[40], lstep3[54]); - u[ 9] = _mm_unpackhi_epi32(lstep3[40], lstep3[54]); + u[0] = _mm_unpacklo_epi32(lstep3[36], lstep3[58]); + u[1] = _mm_unpackhi_epi32(lstep3[36], lstep3[58]); + u[2] = _mm_unpacklo_epi32(lstep3[37], lstep3[59]); + u[3] = _mm_unpackhi_epi32(lstep3[37], lstep3[59]); + u[4] = _mm_unpacklo_epi32(lstep3[38], lstep3[56]); + u[5] = _mm_unpackhi_epi32(lstep3[38], lstep3[56]); + u[6] = _mm_unpacklo_epi32(lstep3[39], lstep3[57]); + u[7] = _mm_unpackhi_epi32(lstep3[39], lstep3[57]); + u[8] = _mm_unpacklo_epi32(lstep3[40], lstep3[54]); + u[9] = _mm_unpackhi_epi32(lstep3[40], lstep3[54]); u[10] = _mm_unpacklo_epi32(lstep3[41], lstep3[55]); u[11] = _mm_unpackhi_epi32(lstep3[41], lstep3[55]); u[12] = _mm_unpacklo_epi32(lstep3[42], lstep3[52]); @@ -1752,16 +1807,16 @@ void FDCT32x32_2D(const int16_t *input, u[14] = _mm_unpacklo_epi32(lstep3[43], lstep3[53]); u[15] = _mm_unpackhi_epi32(lstep3[43], lstep3[53]); - v[ 0] = k_madd_epi32(u[ 0], k32_m08_p24); - v[ 1] = k_madd_epi32(u[ 1], k32_m08_p24); - v[ 2] = k_madd_epi32(u[ 2], k32_m08_p24); - v[ 3] = k_madd_epi32(u[ 3], k32_m08_p24); - v[ 4] = k_madd_epi32(u[ 4], k32_m08_p24); - v[ 5] = k_madd_epi32(u[ 5], k32_m08_p24); - v[ 6] = k_madd_epi32(u[ 6], k32_m08_p24); - v[ 7] = k_madd_epi32(u[ 7], k32_m08_p24); - v[ 8] = k_madd_epi32(u[ 8], k32_m24_m08); - v[ 9] = k_madd_epi32(u[ 9], k32_m24_m08); + v[0] = k_madd_epi32(u[0], k32_m08_p24); + v[1] = k_madd_epi32(u[1], k32_m08_p24); + v[2] = k_madd_epi32(u[2], k32_m08_p24); + v[3] = k_madd_epi32(u[3], k32_m08_p24); + v[4] = k_madd_epi32(u[4], k32_m08_p24); + v[5] = k_madd_epi32(u[5], k32_m08_p24); + v[6] = k_madd_epi32(u[6], k32_m08_p24); + v[7] = k_madd_epi32(u[7], k32_m08_p24); + v[8] = k_madd_epi32(u[8], k32_m24_m08); + v[9] = k_madd_epi32(u[9], k32_m24_m08); v[10] = k_madd_epi32(u[10], k32_m24_m08); v[11] = k_madd_epi32(u[11], k32_m24_m08); v[12] = k_madd_epi32(u[12], k32_m24_m08); @@ -1772,41 +1827,40 @@ void FDCT32x32_2D(const int16_t *input, v[17] = k_madd_epi32(u[13], k32_m08_p24); v[18] = k_madd_epi32(u[14], k32_m08_p24); v[19] = k_madd_epi32(u[15], k32_m08_p24); - v[20] = k_madd_epi32(u[ 8], k32_m08_p24); - v[21] = k_madd_epi32(u[ 9], k32_m08_p24); + v[20] = k_madd_epi32(u[8], k32_m08_p24); + v[21] = k_madd_epi32(u[9], k32_m08_p24); v[22] = k_madd_epi32(u[10], k32_m08_p24); v[23] = k_madd_epi32(u[11], k32_m08_p24); - v[24] = k_madd_epi32(u[ 4], k32_p24_p08); - v[25] = k_madd_epi32(u[ 5], k32_p24_p08); - v[26] = k_madd_epi32(u[ 6], k32_p24_p08); - v[27] = k_madd_epi32(u[ 7], k32_p24_p08); - v[28] = k_madd_epi32(u[ 0], k32_p24_p08); - v[29] = k_madd_epi32(u[ 1], k32_p24_p08); - v[30] = k_madd_epi32(u[ 2], k32_p24_p08); - v[31] = k_madd_epi32(u[ 3], k32_p24_p08); + v[24] = k_madd_epi32(u[4], k32_p24_p08); + v[25] = k_madd_epi32(u[5], k32_p24_p08); + v[26] = k_madd_epi32(u[6], k32_p24_p08); + v[27] = k_madd_epi32(u[7], k32_p24_p08); + v[28] = k_madd_epi32(u[0], k32_p24_p08); + v[29] = k_madd_epi32(u[1], k32_p24_p08); + v[30] = k_madd_epi32(u[2], k32_p24_p08); + v[31] = k_madd_epi32(u[3], k32_p24_p08); #if DCT_HIGH_BIT_DEPTH overflow = k_check_epi32_overflow_32( - &v[0], &v[1], &v[2], &v[3], &v[4], &v[5], &v[6], &v[7], - &v[8], &v[9], &v[10], &v[11], &v[12], &v[13], &v[14], &v[15], - &v[16], &v[17], &v[18], &v[19], &v[20], &v[21], &v[22], &v[23], - &v[24], &v[25], &v[26], &v[27], &v[28], &v[29], &v[30], &v[31], - &kZero); + &v[0], &v[1], &v[2], &v[3], &v[4], &v[5], &v[6], &v[7], &v[8], + &v[9], &v[10], &v[11], &v[12], &v[13], &v[14], &v[15], &v[16], + &v[17], &v[18], &v[19], &v[20], &v[21], &v[22], &v[23], &v[24], + &v[25], &v[26], &v[27], &v[28], &v[29], &v[30], &v[31], &kZero); if (overflow) { HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org); return; } #endif // DCT_HIGH_BIT_DEPTH - u[ 0] = k_packs_epi64(v[ 0], v[ 1]); - u[ 1] = k_packs_epi64(v[ 2], v[ 3]); - u[ 2] = k_packs_epi64(v[ 4], v[ 5]); - u[ 3] = k_packs_epi64(v[ 6], v[ 7]); - u[ 4] = k_packs_epi64(v[ 8], v[ 9]); - u[ 5] = k_packs_epi64(v[10], v[11]); - u[ 6] = k_packs_epi64(v[12], v[13]); - u[ 7] = k_packs_epi64(v[14], v[15]); - u[ 8] = k_packs_epi64(v[16], v[17]); - u[ 9] = k_packs_epi64(v[18], v[19]); + u[0] = k_packs_epi64(v[0], v[1]); + u[1] = k_packs_epi64(v[2], v[3]); + u[2] = k_packs_epi64(v[4], v[5]); + u[3] = k_packs_epi64(v[6], v[7]); + u[4] = k_packs_epi64(v[8], v[9]); + u[5] = k_packs_epi64(v[10], v[11]); + u[6] = k_packs_epi64(v[12], v[13]); + u[7] = k_packs_epi64(v[14], v[15]); + u[8] = k_packs_epi64(v[16], v[17]); + u[9] = k_packs_epi64(v[18], v[19]); u[10] = k_packs_epi64(v[20], v[21]); u[11] = k_packs_epi64(v[22], v[23]); u[12] = k_packs_epi64(v[24], v[25]); @@ -1814,16 +1868,16 @@ void FDCT32x32_2D(const int16_t *input, u[14] = k_packs_epi64(v[28], v[29]); u[15] = k_packs_epi64(v[30], v[31]); - v[ 0] = _mm_add_epi32(u[ 0], k__DCT_CONST_ROUNDING); - v[ 1] = _mm_add_epi32(u[ 1], k__DCT_CONST_ROUNDING); - v[ 2] = _mm_add_epi32(u[ 2], k__DCT_CONST_ROUNDING); - v[ 3] = _mm_add_epi32(u[ 3], k__DCT_CONST_ROUNDING); - v[ 4] = _mm_add_epi32(u[ 4], k__DCT_CONST_ROUNDING); - v[ 5] = _mm_add_epi32(u[ 5], k__DCT_CONST_ROUNDING); - v[ 6] = _mm_add_epi32(u[ 6], k__DCT_CONST_ROUNDING); - v[ 7] = _mm_add_epi32(u[ 7], k__DCT_CONST_ROUNDING); - v[ 8] = _mm_add_epi32(u[ 8], k__DCT_CONST_ROUNDING); - v[ 9] = _mm_add_epi32(u[ 9], k__DCT_CONST_ROUNDING); + v[0] = _mm_add_epi32(u[0], k__DCT_CONST_ROUNDING); + v[1] = _mm_add_epi32(u[1], k__DCT_CONST_ROUNDING); + v[2] = _mm_add_epi32(u[2], k__DCT_CONST_ROUNDING); + v[3] = _mm_add_epi32(u[3], k__DCT_CONST_ROUNDING); + v[4] = _mm_add_epi32(u[4], k__DCT_CONST_ROUNDING); + v[5] = _mm_add_epi32(u[5], k__DCT_CONST_ROUNDING); + v[6] = _mm_add_epi32(u[6], k__DCT_CONST_ROUNDING); + v[7] = _mm_add_epi32(u[7], k__DCT_CONST_ROUNDING); + v[8] = _mm_add_epi32(u[8], k__DCT_CONST_ROUNDING); + v[9] = _mm_add_epi32(u[9], k__DCT_CONST_ROUNDING); v[10] = _mm_add_epi32(u[10], k__DCT_CONST_ROUNDING); v[11] = _mm_add_epi32(u[11], k__DCT_CONST_ROUNDING); v[12] = _mm_add_epi32(u[12], k__DCT_CONST_ROUNDING); @@ -1831,16 +1885,16 @@ void FDCT32x32_2D(const int16_t *input, v[14] = _mm_add_epi32(u[14], k__DCT_CONST_ROUNDING); v[15] = _mm_add_epi32(u[15], k__DCT_CONST_ROUNDING); - lstep1[36] = _mm_srai_epi32(v[ 0], DCT_CONST_BITS); - lstep1[37] = _mm_srai_epi32(v[ 1], DCT_CONST_BITS); - lstep1[38] = _mm_srai_epi32(v[ 2], DCT_CONST_BITS); - lstep1[39] = _mm_srai_epi32(v[ 3], DCT_CONST_BITS); - lstep1[40] = _mm_srai_epi32(v[ 4], DCT_CONST_BITS); - lstep1[41] = _mm_srai_epi32(v[ 5], DCT_CONST_BITS); - lstep1[42] = _mm_srai_epi32(v[ 6], DCT_CONST_BITS); - lstep1[43] = _mm_srai_epi32(v[ 7], DCT_CONST_BITS); - lstep1[52] = _mm_srai_epi32(v[ 8], DCT_CONST_BITS); - lstep1[53] = _mm_srai_epi32(v[ 9], DCT_CONST_BITS); + lstep1[36] = _mm_srai_epi32(v[0], DCT_CONST_BITS); + lstep1[37] = _mm_srai_epi32(v[1], DCT_CONST_BITS); + lstep1[38] = _mm_srai_epi32(v[2], DCT_CONST_BITS); + lstep1[39] = _mm_srai_epi32(v[3], DCT_CONST_BITS); + lstep1[40] = _mm_srai_epi32(v[4], DCT_CONST_BITS); + lstep1[41] = _mm_srai_epi32(v[5], DCT_CONST_BITS); + lstep1[42] = _mm_srai_epi32(v[6], DCT_CONST_BITS); + lstep1[43] = _mm_srai_epi32(v[7], DCT_CONST_BITS); + lstep1[52] = _mm_srai_epi32(v[8], DCT_CONST_BITS); + lstep1[53] = _mm_srai_epi32(v[9], DCT_CONST_BITS); lstep1[54] = _mm_srai_epi32(v[10], DCT_CONST_BITS); lstep1[55] = _mm_srai_epi32(v[11], DCT_CONST_BITS); lstep1[56] = _mm_srai_epi32(v[12], DCT_CONST_BITS); @@ -1850,10 +1904,10 @@ void FDCT32x32_2D(const int16_t *input, } // stage 5 { - lstep2[ 8] = _mm_add_epi32(lstep1[10], lstep3[ 8]); - lstep2[ 9] = _mm_add_epi32(lstep1[11], lstep3[ 9]); - lstep2[10] = _mm_sub_epi32(lstep3[ 8], lstep1[10]); - lstep2[11] = _mm_sub_epi32(lstep3[ 9], lstep1[11]); + lstep2[8] = _mm_add_epi32(lstep1[10], lstep3[8]); + lstep2[9] = _mm_add_epi32(lstep1[11], lstep3[9]); + lstep2[10] = _mm_sub_epi32(lstep3[8], lstep1[10]); + lstep2[11] = _mm_sub_epi32(lstep3[9], lstep1[11]); lstep2[12] = _mm_sub_epi32(lstep3[14], lstep1[12]); lstep2[13] = _mm_sub_epi32(lstep3[15], lstep1[13]); lstep2[14] = _mm_add_epi32(lstep1[12], lstep3[14]); @@ -1876,16 +1930,16 @@ void FDCT32x32_2D(const int16_t *input, // TODO(jingning): manually inline k_madd_epi32_ to further hide // instruction latency. - v[ 0] = k_madd_epi32(u[0], k32_p16_p16); - v[ 1] = k_madd_epi32(u[1], k32_p16_p16); - v[ 2] = k_madd_epi32(u[2], k32_p16_p16); - v[ 3] = k_madd_epi32(u[3], k32_p16_p16); - v[ 4] = k_madd_epi32(u[0], k32_p16_m16); - v[ 5] = k_madd_epi32(u[1], k32_p16_m16); - v[ 6] = k_madd_epi32(u[2], k32_p16_m16); - v[ 7] = k_madd_epi32(u[3], k32_p16_m16); - v[ 8] = k_madd_epi32(u[4], k32_p24_p08); - v[ 9] = k_madd_epi32(u[5], k32_p24_p08); + v[0] = k_madd_epi32(u[0], k32_p16_p16); + v[1] = k_madd_epi32(u[1], k32_p16_p16); + v[2] = k_madd_epi32(u[2], k32_p16_p16); + v[3] = k_madd_epi32(u[3], k32_p16_p16); + v[4] = k_madd_epi32(u[0], k32_p16_m16); + v[5] = k_madd_epi32(u[1], k32_p16_m16); + v[6] = k_madd_epi32(u[2], k32_p16_m16); + v[7] = k_madd_epi32(u[3], k32_p16_m16); + v[8] = k_madd_epi32(u[4], k32_p24_p08); + v[9] = k_madd_epi32(u[5], k32_p24_p08); v[10] = k_madd_epi32(u[6], k32_p24_p08); v[11] = k_madd_epi32(u[7], k32_p24_p08); v[12] = k_madd_epi32(u[4], k32_m08_p24); @@ -1895,9 +1949,8 @@ void FDCT32x32_2D(const int16_t *input, #if DCT_HIGH_BIT_DEPTH overflow = k_check_epi32_overflow_16( - &v[0], &v[1], &v[2], &v[3], &v[4], &v[5], &v[6], &v[7], - &v[8], &v[9], &v[10], &v[11], &v[12], &v[13], &v[14], &v[15], - &kZero); + &v[0], &v[1], &v[2], &v[3], &v[4], &v[5], &v[6], &v[7], &v[8], + &v[9], &v[10], &v[11], &v[12], &v[13], &v[14], &v[15], &kZero); if (overflow) { HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org); return; @@ -1967,13 +2020,13 @@ void FDCT32x32_2D(const int16_t *input, u[7] = _mm_srai_epi32(u[7], 2); // Combine - out[ 0] = _mm_packs_epi32(u[0], u[1]); + out[0] = _mm_packs_epi32(u[0], u[1]); out[16] = _mm_packs_epi32(u[2], u[3]); - out[ 8] = _mm_packs_epi32(u[4], u[5]); + out[8] = _mm_packs_epi32(u[4], u[5]); out[24] = _mm_packs_epi32(u[6], u[7]); #if DCT_HIGH_BIT_DEPTH - overflow = check_epi16_overflow_x4(&out[0], &out[16], - &out[8], &out[24]); + overflow = + check_epi16_overflow_x4(&out[0], &out[16], &out[8], &out[24]); if (overflow) { HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org); return; @@ -2002,8 +2055,8 @@ void FDCT32x32_2D(const int16_t *input, v[5] = k_madd_epi32(u[5], k32_m24_m08); v[6] = k_madd_epi32(u[6], k32_m24_m08); v[7] = k_madd_epi32(u[7], k32_m24_m08); - v[ 8] = k_madd_epi32(u[4], k32_m08_p24); - v[ 9] = k_madd_epi32(u[5], k32_m08_p24); + v[8] = k_madd_epi32(u[4], k32_m08_p24); + v[9] = k_madd_epi32(u[5], k32_m08_p24); v[10] = k_madd_epi32(u[6], k32_m08_p24); v[11] = k_madd_epi32(u[7], k32_m08_p24); v[12] = k_madd_epi32(u[0], k32_p24_p08); @@ -2013,9 +2066,8 @@ void FDCT32x32_2D(const int16_t *input, #if DCT_HIGH_BIT_DEPTH overflow = k_check_epi32_overflow_16( - &v[0], &v[1], &v[2], &v[3], &v[4], &v[5], &v[6], &v[7], - &v[8], &v[9], &v[10], &v[11], &v[12], &v[13], &v[14], &v[15], - &kZero); + &v[0], &v[1], &v[2], &v[3], &v[4], &v[5], &v[6], &v[7], &v[8], + &v[9], &v[10], &v[11], &v[12], &v[13], &v[14], &v[15], &kZero); if (overflow) { HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org); return; @@ -2089,10 +2141,10 @@ void FDCT32x32_2D(const int16_t *input, const __m128i k32_m20_p12 = pair_set_epi32(-cospi_20_64, cospi_12_64); const __m128i k32_m04_p28 = pair_set_epi32(-cospi_4_64, cospi_28_64); - u[0] = _mm_unpacklo_epi32(lstep2[ 8], lstep2[14]); - u[1] = _mm_unpackhi_epi32(lstep2[ 8], lstep2[14]); - u[2] = _mm_unpacklo_epi32(lstep2[ 9], lstep2[15]); - u[3] = _mm_unpackhi_epi32(lstep2[ 9], lstep2[15]); + u[0] = _mm_unpacklo_epi32(lstep2[8], lstep2[14]); + u[1] = _mm_unpackhi_epi32(lstep2[8], lstep2[14]); + u[2] = _mm_unpacklo_epi32(lstep2[9], lstep2[15]); + u[3] = _mm_unpackhi_epi32(lstep2[9], lstep2[15]); u[4] = _mm_unpacklo_epi32(lstep2[10], lstep2[12]); u[5] = _mm_unpackhi_epi32(lstep2[10], lstep2[12]); u[6] = _mm_unpacklo_epi32(lstep2[11], lstep2[13]); @@ -2101,10 +2153,10 @@ void FDCT32x32_2D(const int16_t *input, u[9] = _mm_unpackhi_epi32(lstep2[10], lstep2[12]); u[10] = _mm_unpacklo_epi32(lstep2[11], lstep2[13]); u[11] = _mm_unpackhi_epi32(lstep2[11], lstep2[13]); - u[12] = _mm_unpacklo_epi32(lstep2[ 8], lstep2[14]); - u[13] = _mm_unpackhi_epi32(lstep2[ 8], lstep2[14]); - u[14] = _mm_unpacklo_epi32(lstep2[ 9], lstep2[15]); - u[15] = _mm_unpackhi_epi32(lstep2[ 9], lstep2[15]); + u[12] = _mm_unpacklo_epi32(lstep2[8], lstep2[14]); + u[13] = _mm_unpackhi_epi32(lstep2[8], lstep2[14]); + u[14] = _mm_unpacklo_epi32(lstep2[9], lstep2[15]); + u[15] = _mm_unpackhi_epi32(lstep2[9], lstep2[15]); v[0] = k_madd_epi32(u[0], k32_p28_p04); v[1] = k_madd_epi32(u[1], k32_p28_p04); @@ -2114,8 +2166,8 @@ void FDCT32x32_2D(const int16_t *input, v[5] = k_madd_epi32(u[5], k32_p12_p20); v[6] = k_madd_epi32(u[6], k32_p12_p20); v[7] = k_madd_epi32(u[7], k32_p12_p20); - v[ 8] = k_madd_epi32(u[ 8], k32_m20_p12); - v[ 9] = k_madd_epi32(u[ 9], k32_m20_p12); + v[8] = k_madd_epi32(u[8], k32_m20_p12); + v[9] = k_madd_epi32(u[9], k32_m20_p12); v[10] = k_madd_epi32(u[10], k32_m20_p12); v[11] = k_madd_epi32(u[11], k32_m20_p12); v[12] = k_madd_epi32(u[12], k32_m04_p28); @@ -2125,9 +2177,8 @@ void FDCT32x32_2D(const int16_t *input, #if DCT_HIGH_BIT_DEPTH overflow = k_check_epi32_overflow_16( - &v[0], &v[1], &v[2], &v[3], &v[4], &v[5], &v[6], &v[7], - &v[8], &v[9], &v[10], &v[11], &v[12], &v[13], &v[14], &v[15], - &kZero); + &v[0], &v[1], &v[2], &v[3], &v[4], &v[5], &v[6], &v[7], &v[8], + &v[9], &v[10], &v[11], &v[12], &v[13], &v[14], &v[15], &kZero); if (overflow) { HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org); return; @@ -2196,13 +2247,13 @@ void FDCT32x32_2D(const int16_t *input, u[6] = _mm_srai_epi32(u[6], 2); u[7] = _mm_srai_epi32(u[7], 2); - out[ 4] = _mm_packs_epi32(u[0], u[1]); + out[4] = _mm_packs_epi32(u[0], u[1]); out[20] = _mm_packs_epi32(u[2], u[3]); out[12] = _mm_packs_epi32(u[4], u[5]); out[28] = _mm_packs_epi32(u[6], u[7]); #if DCT_HIGH_BIT_DEPTH - overflow = check_epi16_overflow_x4(&out[4], &out[20], - &out[12], &out[28]); + overflow = + check_epi16_overflow_x4(&out[4], &out[20], &out[12], &out[28]); if (overflow) { HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org); return; @@ -2231,21 +2282,21 @@ void FDCT32x32_2D(const int16_t *input, const __m128i k32_m04_p28 = pair_set_epi32(-cospi_4_64, cospi_28_64); const __m128i k32_m28_m04 = pair_set_epi32(-cospi_28_64, -cospi_4_64); const __m128i k32_m20_p12 = pair_set_epi32(-cospi_20_64, cospi_12_64); - const __m128i k32_m12_m20 = pair_set_epi32(-cospi_12_64, - -cospi_20_64); + const __m128i k32_m12_m20 = + pair_set_epi32(-cospi_12_64, -cospi_20_64); const __m128i k32_p12_p20 = pair_set_epi32(cospi_12_64, cospi_20_64); const __m128i k32_p28_p04 = pair_set_epi32(cospi_28_64, cospi_4_64); - u[ 0] = _mm_unpacklo_epi32(lstep2[34], lstep2[60]); - u[ 1] = _mm_unpackhi_epi32(lstep2[34], lstep2[60]); - u[ 2] = _mm_unpacklo_epi32(lstep2[35], lstep2[61]); - u[ 3] = _mm_unpackhi_epi32(lstep2[35], lstep2[61]); - u[ 4] = _mm_unpacklo_epi32(lstep2[36], lstep2[58]); - u[ 5] = _mm_unpackhi_epi32(lstep2[36], lstep2[58]); - u[ 6] = _mm_unpacklo_epi32(lstep2[37], lstep2[59]); - u[ 7] = _mm_unpackhi_epi32(lstep2[37], lstep2[59]); - u[ 8] = _mm_unpacklo_epi32(lstep2[42], lstep2[52]); - u[ 9] = _mm_unpackhi_epi32(lstep2[42], lstep2[52]); + u[0] = _mm_unpacklo_epi32(lstep2[34], lstep2[60]); + u[1] = _mm_unpackhi_epi32(lstep2[34], lstep2[60]); + u[2] = _mm_unpacklo_epi32(lstep2[35], lstep2[61]); + u[3] = _mm_unpackhi_epi32(lstep2[35], lstep2[61]); + u[4] = _mm_unpacklo_epi32(lstep2[36], lstep2[58]); + u[5] = _mm_unpackhi_epi32(lstep2[36], lstep2[58]); + u[6] = _mm_unpacklo_epi32(lstep2[37], lstep2[59]); + u[7] = _mm_unpackhi_epi32(lstep2[37], lstep2[59]); + u[8] = _mm_unpacklo_epi32(lstep2[42], lstep2[52]); + u[9] = _mm_unpackhi_epi32(lstep2[42], lstep2[52]); u[10] = _mm_unpacklo_epi32(lstep2[43], lstep2[53]); u[11] = _mm_unpackhi_epi32(lstep2[43], lstep2[53]); u[12] = _mm_unpacklo_epi32(lstep2[44], lstep2[50]); @@ -2253,16 +2304,16 @@ void FDCT32x32_2D(const int16_t *input, u[14] = _mm_unpacklo_epi32(lstep2[45], lstep2[51]); u[15] = _mm_unpackhi_epi32(lstep2[45], lstep2[51]); - v[ 0] = k_madd_epi32(u[ 0], k32_m04_p28); - v[ 1] = k_madd_epi32(u[ 1], k32_m04_p28); - v[ 2] = k_madd_epi32(u[ 2], k32_m04_p28); - v[ 3] = k_madd_epi32(u[ 3], k32_m04_p28); - v[ 4] = k_madd_epi32(u[ 4], k32_m28_m04); - v[ 5] = k_madd_epi32(u[ 5], k32_m28_m04); - v[ 6] = k_madd_epi32(u[ 6], k32_m28_m04); - v[ 7] = k_madd_epi32(u[ 7], k32_m28_m04); - v[ 8] = k_madd_epi32(u[ 8], k32_m20_p12); - v[ 9] = k_madd_epi32(u[ 9], k32_m20_p12); + v[0] = k_madd_epi32(u[0], k32_m04_p28); + v[1] = k_madd_epi32(u[1], k32_m04_p28); + v[2] = k_madd_epi32(u[2], k32_m04_p28); + v[3] = k_madd_epi32(u[3], k32_m04_p28); + v[4] = k_madd_epi32(u[4], k32_m28_m04); + v[5] = k_madd_epi32(u[5], k32_m28_m04); + v[6] = k_madd_epi32(u[6], k32_m28_m04); + v[7] = k_madd_epi32(u[7], k32_m28_m04); + v[8] = k_madd_epi32(u[8], k32_m20_p12); + v[9] = k_madd_epi32(u[9], k32_m20_p12); v[10] = k_madd_epi32(u[10], k32_m20_p12); v[11] = k_madd_epi32(u[11], k32_m20_p12); v[12] = k_madd_epi32(u[12], k32_m12_m20); @@ -2273,41 +2324,40 @@ void FDCT32x32_2D(const int16_t *input, v[17] = k_madd_epi32(u[13], k32_m20_p12); v[18] = k_madd_epi32(u[14], k32_m20_p12); v[19] = k_madd_epi32(u[15], k32_m20_p12); - v[20] = k_madd_epi32(u[ 8], k32_p12_p20); - v[21] = k_madd_epi32(u[ 9], k32_p12_p20); + v[20] = k_madd_epi32(u[8], k32_p12_p20); + v[21] = k_madd_epi32(u[9], k32_p12_p20); v[22] = k_madd_epi32(u[10], k32_p12_p20); v[23] = k_madd_epi32(u[11], k32_p12_p20); - v[24] = k_madd_epi32(u[ 4], k32_m04_p28); - v[25] = k_madd_epi32(u[ 5], k32_m04_p28); - v[26] = k_madd_epi32(u[ 6], k32_m04_p28); - v[27] = k_madd_epi32(u[ 7], k32_m04_p28); - v[28] = k_madd_epi32(u[ 0], k32_p28_p04); - v[29] = k_madd_epi32(u[ 1], k32_p28_p04); - v[30] = k_madd_epi32(u[ 2], k32_p28_p04); - v[31] = k_madd_epi32(u[ 3], k32_p28_p04); + v[24] = k_madd_epi32(u[4], k32_m04_p28); + v[25] = k_madd_epi32(u[5], k32_m04_p28); + v[26] = k_madd_epi32(u[6], k32_m04_p28); + v[27] = k_madd_epi32(u[7], k32_m04_p28); + v[28] = k_madd_epi32(u[0], k32_p28_p04); + v[29] = k_madd_epi32(u[1], k32_p28_p04); + v[30] = k_madd_epi32(u[2], k32_p28_p04); + v[31] = k_madd_epi32(u[3], k32_p28_p04); #if DCT_HIGH_BIT_DEPTH overflow = k_check_epi32_overflow_32( - &v[0], &v[1], &v[2], &v[3], &v[4], &v[5], &v[6], &v[7], - &v[8], &v[9], &v[10], &v[11], &v[12], &v[13], &v[14], &v[15], - &v[16], &v[17], &v[18], &v[19], &v[20], &v[21], &v[22], &v[23], - &v[24], &v[25], &v[26], &v[27], &v[28], &v[29], &v[30], &v[31], - &kZero); + &v[0], &v[1], &v[2], &v[3], &v[4], &v[5], &v[6], &v[7], &v[8], + &v[9], &v[10], &v[11], &v[12], &v[13], &v[14], &v[15], &v[16], + &v[17], &v[18], &v[19], &v[20], &v[21], &v[22], &v[23], &v[24], + &v[25], &v[26], &v[27], &v[28], &v[29], &v[30], &v[31], &kZero); if (overflow) { HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org); return; } #endif // DCT_HIGH_BIT_DEPTH - u[ 0] = k_packs_epi64(v[ 0], v[ 1]); - u[ 1] = k_packs_epi64(v[ 2], v[ 3]); - u[ 2] = k_packs_epi64(v[ 4], v[ 5]); - u[ 3] = k_packs_epi64(v[ 6], v[ 7]); - u[ 4] = k_packs_epi64(v[ 8], v[ 9]); - u[ 5] = k_packs_epi64(v[10], v[11]); - u[ 6] = k_packs_epi64(v[12], v[13]); - u[ 7] = k_packs_epi64(v[14], v[15]); - u[ 8] = k_packs_epi64(v[16], v[17]); - u[ 9] = k_packs_epi64(v[18], v[19]); + u[0] = k_packs_epi64(v[0], v[1]); + u[1] = k_packs_epi64(v[2], v[3]); + u[2] = k_packs_epi64(v[4], v[5]); + u[3] = k_packs_epi64(v[6], v[7]); + u[4] = k_packs_epi64(v[8], v[9]); + u[5] = k_packs_epi64(v[10], v[11]); + u[6] = k_packs_epi64(v[12], v[13]); + u[7] = k_packs_epi64(v[14], v[15]); + u[8] = k_packs_epi64(v[16], v[17]); + u[9] = k_packs_epi64(v[18], v[19]); u[10] = k_packs_epi64(v[20], v[21]); u[11] = k_packs_epi64(v[22], v[23]); u[12] = k_packs_epi64(v[24], v[25]); @@ -2315,16 +2365,16 @@ void FDCT32x32_2D(const int16_t *input, u[14] = k_packs_epi64(v[28], v[29]); u[15] = k_packs_epi64(v[30], v[31]); - v[ 0] = _mm_add_epi32(u[ 0], k__DCT_CONST_ROUNDING); - v[ 1] = _mm_add_epi32(u[ 1], k__DCT_CONST_ROUNDING); - v[ 2] = _mm_add_epi32(u[ 2], k__DCT_CONST_ROUNDING); - v[ 3] = _mm_add_epi32(u[ 3], k__DCT_CONST_ROUNDING); - v[ 4] = _mm_add_epi32(u[ 4], k__DCT_CONST_ROUNDING); - v[ 5] = _mm_add_epi32(u[ 5], k__DCT_CONST_ROUNDING); - v[ 6] = _mm_add_epi32(u[ 6], k__DCT_CONST_ROUNDING); - v[ 7] = _mm_add_epi32(u[ 7], k__DCT_CONST_ROUNDING); - v[ 8] = _mm_add_epi32(u[ 8], k__DCT_CONST_ROUNDING); - v[ 9] = _mm_add_epi32(u[ 9], k__DCT_CONST_ROUNDING); + v[0] = _mm_add_epi32(u[0], k__DCT_CONST_ROUNDING); + v[1] = _mm_add_epi32(u[1], k__DCT_CONST_ROUNDING); + v[2] = _mm_add_epi32(u[2], k__DCT_CONST_ROUNDING); + v[3] = _mm_add_epi32(u[3], k__DCT_CONST_ROUNDING); + v[4] = _mm_add_epi32(u[4], k__DCT_CONST_ROUNDING); + v[5] = _mm_add_epi32(u[5], k__DCT_CONST_ROUNDING); + v[6] = _mm_add_epi32(u[6], k__DCT_CONST_ROUNDING); + v[7] = _mm_add_epi32(u[7], k__DCT_CONST_ROUNDING); + v[8] = _mm_add_epi32(u[8], k__DCT_CONST_ROUNDING); + v[9] = _mm_add_epi32(u[9], k__DCT_CONST_ROUNDING); v[10] = _mm_add_epi32(u[10], k__DCT_CONST_ROUNDING); v[11] = _mm_add_epi32(u[11], k__DCT_CONST_ROUNDING); v[12] = _mm_add_epi32(u[12], k__DCT_CONST_ROUNDING); @@ -2332,16 +2382,16 @@ void FDCT32x32_2D(const int16_t *input, v[14] = _mm_add_epi32(u[14], k__DCT_CONST_ROUNDING); v[15] = _mm_add_epi32(u[15], k__DCT_CONST_ROUNDING); - lstep3[34] = _mm_srai_epi32(v[ 0], DCT_CONST_BITS); - lstep3[35] = _mm_srai_epi32(v[ 1], DCT_CONST_BITS); - lstep3[36] = _mm_srai_epi32(v[ 2], DCT_CONST_BITS); - lstep3[37] = _mm_srai_epi32(v[ 3], DCT_CONST_BITS); - lstep3[42] = _mm_srai_epi32(v[ 4], DCT_CONST_BITS); - lstep3[43] = _mm_srai_epi32(v[ 5], DCT_CONST_BITS); - lstep3[44] = _mm_srai_epi32(v[ 6], DCT_CONST_BITS); - lstep3[45] = _mm_srai_epi32(v[ 7], DCT_CONST_BITS); - lstep3[50] = _mm_srai_epi32(v[ 8], DCT_CONST_BITS); - lstep3[51] = _mm_srai_epi32(v[ 9], DCT_CONST_BITS); + lstep3[34] = _mm_srai_epi32(v[0], DCT_CONST_BITS); + lstep3[35] = _mm_srai_epi32(v[1], DCT_CONST_BITS); + lstep3[36] = _mm_srai_epi32(v[2], DCT_CONST_BITS); + lstep3[37] = _mm_srai_epi32(v[3], DCT_CONST_BITS); + lstep3[42] = _mm_srai_epi32(v[4], DCT_CONST_BITS); + lstep3[43] = _mm_srai_epi32(v[5], DCT_CONST_BITS); + lstep3[44] = _mm_srai_epi32(v[6], DCT_CONST_BITS); + lstep3[45] = _mm_srai_epi32(v[7], DCT_CONST_BITS); + lstep3[50] = _mm_srai_epi32(v[8], DCT_CONST_BITS); + lstep3[51] = _mm_srai_epi32(v[9], DCT_CONST_BITS); lstep3[52] = _mm_srai_epi32(v[10], DCT_CONST_BITS); lstep3[53] = _mm_srai_epi32(v[11], DCT_CONST_BITS); lstep3[58] = _mm_srai_epi32(v[12], DCT_CONST_BITS); @@ -2354,22 +2404,22 @@ void FDCT32x32_2D(const int16_t *input, const __m128i k32_p30_p02 = pair_set_epi32(cospi_30_64, cospi_2_64); const __m128i k32_p14_p18 = pair_set_epi32(cospi_14_64, cospi_18_64); const __m128i k32_p22_p10 = pair_set_epi32(cospi_22_64, cospi_10_64); - const __m128i k32_p06_p26 = pair_set_epi32(cospi_6_64, cospi_26_64); + const __m128i k32_p06_p26 = pair_set_epi32(cospi_6_64, cospi_26_64); const __m128i k32_m26_p06 = pair_set_epi32(-cospi_26_64, cospi_6_64); const __m128i k32_m10_p22 = pair_set_epi32(-cospi_10_64, cospi_22_64); const __m128i k32_m18_p14 = pair_set_epi32(-cospi_18_64, cospi_14_64); const __m128i k32_m02_p30 = pair_set_epi32(-cospi_2_64, cospi_30_64); - u[ 0] = _mm_unpacklo_epi32(lstep3[16], lstep3[30]); - u[ 1] = _mm_unpackhi_epi32(lstep3[16], lstep3[30]); - u[ 2] = _mm_unpacklo_epi32(lstep3[17], lstep3[31]); - u[ 3] = _mm_unpackhi_epi32(lstep3[17], lstep3[31]); - u[ 4] = _mm_unpacklo_epi32(lstep3[18], lstep3[28]); - u[ 5] = _mm_unpackhi_epi32(lstep3[18], lstep3[28]); - u[ 6] = _mm_unpacklo_epi32(lstep3[19], lstep3[29]); - u[ 7] = _mm_unpackhi_epi32(lstep3[19], lstep3[29]); - u[ 8] = _mm_unpacklo_epi32(lstep3[20], lstep3[26]); - u[ 9] = _mm_unpackhi_epi32(lstep3[20], lstep3[26]); + u[0] = _mm_unpacklo_epi32(lstep3[16], lstep3[30]); + u[1] = _mm_unpackhi_epi32(lstep3[16], lstep3[30]); + u[2] = _mm_unpacklo_epi32(lstep3[17], lstep3[31]); + u[3] = _mm_unpackhi_epi32(lstep3[17], lstep3[31]); + u[4] = _mm_unpacklo_epi32(lstep3[18], lstep3[28]); + u[5] = _mm_unpackhi_epi32(lstep3[18], lstep3[28]); + u[6] = _mm_unpacklo_epi32(lstep3[19], lstep3[29]); + u[7] = _mm_unpackhi_epi32(lstep3[19], lstep3[29]); + u[8] = _mm_unpacklo_epi32(lstep3[20], lstep3[26]); + u[9] = _mm_unpackhi_epi32(lstep3[20], lstep3[26]); u[10] = _mm_unpacklo_epi32(lstep3[21], lstep3[27]); u[11] = _mm_unpackhi_epi32(lstep3[21], lstep3[27]); u[12] = _mm_unpacklo_epi32(lstep3[22], lstep3[24]); @@ -2377,16 +2427,16 @@ void FDCT32x32_2D(const int16_t *input, u[14] = _mm_unpacklo_epi32(lstep3[23], lstep3[25]); u[15] = _mm_unpackhi_epi32(lstep3[23], lstep3[25]); - v[ 0] = k_madd_epi32(u[ 0], k32_p30_p02); - v[ 1] = k_madd_epi32(u[ 1], k32_p30_p02); - v[ 2] = k_madd_epi32(u[ 2], k32_p30_p02); - v[ 3] = k_madd_epi32(u[ 3], k32_p30_p02); - v[ 4] = k_madd_epi32(u[ 4], k32_p14_p18); - v[ 5] = k_madd_epi32(u[ 5], k32_p14_p18); - v[ 6] = k_madd_epi32(u[ 6], k32_p14_p18); - v[ 7] = k_madd_epi32(u[ 7], k32_p14_p18); - v[ 8] = k_madd_epi32(u[ 8], k32_p22_p10); - v[ 9] = k_madd_epi32(u[ 9], k32_p22_p10); + v[0] = k_madd_epi32(u[0], k32_p30_p02); + v[1] = k_madd_epi32(u[1], k32_p30_p02); + v[2] = k_madd_epi32(u[2], k32_p30_p02); + v[3] = k_madd_epi32(u[3], k32_p30_p02); + v[4] = k_madd_epi32(u[4], k32_p14_p18); + v[5] = k_madd_epi32(u[5], k32_p14_p18); + v[6] = k_madd_epi32(u[6], k32_p14_p18); + v[7] = k_madd_epi32(u[7], k32_p14_p18); + v[8] = k_madd_epi32(u[8], k32_p22_p10); + v[9] = k_madd_epi32(u[9], k32_p22_p10); v[10] = k_madd_epi32(u[10], k32_p22_p10); v[11] = k_madd_epi32(u[11], k32_p22_p10); v[12] = k_madd_epi32(u[12], k32_p06_p26); @@ -2397,41 +2447,40 @@ void FDCT32x32_2D(const int16_t *input, v[17] = k_madd_epi32(u[13], k32_m26_p06); v[18] = k_madd_epi32(u[14], k32_m26_p06); v[19] = k_madd_epi32(u[15], k32_m26_p06); - v[20] = k_madd_epi32(u[ 8], k32_m10_p22); - v[21] = k_madd_epi32(u[ 9], k32_m10_p22); + v[20] = k_madd_epi32(u[8], k32_m10_p22); + v[21] = k_madd_epi32(u[9], k32_m10_p22); v[22] = k_madd_epi32(u[10], k32_m10_p22); v[23] = k_madd_epi32(u[11], k32_m10_p22); - v[24] = k_madd_epi32(u[ 4], k32_m18_p14); - v[25] = k_madd_epi32(u[ 5], k32_m18_p14); - v[26] = k_madd_epi32(u[ 6], k32_m18_p14); - v[27] = k_madd_epi32(u[ 7], k32_m18_p14); - v[28] = k_madd_epi32(u[ 0], k32_m02_p30); - v[29] = k_madd_epi32(u[ 1], k32_m02_p30); - v[30] = k_madd_epi32(u[ 2], k32_m02_p30); - v[31] = k_madd_epi32(u[ 3], k32_m02_p30); + v[24] = k_madd_epi32(u[4], k32_m18_p14); + v[25] = k_madd_epi32(u[5], k32_m18_p14); + v[26] = k_madd_epi32(u[6], k32_m18_p14); + v[27] = k_madd_epi32(u[7], k32_m18_p14); + v[28] = k_madd_epi32(u[0], k32_m02_p30); + v[29] = k_madd_epi32(u[1], k32_m02_p30); + v[30] = k_madd_epi32(u[2], k32_m02_p30); + v[31] = k_madd_epi32(u[3], k32_m02_p30); #if DCT_HIGH_BIT_DEPTH overflow = k_check_epi32_overflow_32( - &v[0], &v[1], &v[2], &v[3], &v[4], &v[5], &v[6], &v[7], - &v[8], &v[9], &v[10], &v[11], &v[12], &v[13], &v[14], &v[15], - &v[16], &v[17], &v[18], &v[19], &v[20], &v[21], &v[22], &v[23], - &v[24], &v[25], &v[26], &v[27], &v[28], &v[29], &v[30], &v[31], - &kZero); + &v[0], &v[1], &v[2], &v[3], &v[4], &v[5], &v[6], &v[7], &v[8], + &v[9], &v[10], &v[11], &v[12], &v[13], &v[14], &v[15], &v[16], + &v[17], &v[18], &v[19], &v[20], &v[21], &v[22], &v[23], &v[24], + &v[25], &v[26], &v[27], &v[28], &v[29], &v[30], &v[31], &kZero); if (overflow) { HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org); return; } #endif // DCT_HIGH_BIT_DEPTH - u[ 0] = k_packs_epi64(v[ 0], v[ 1]); - u[ 1] = k_packs_epi64(v[ 2], v[ 3]); - u[ 2] = k_packs_epi64(v[ 4], v[ 5]); - u[ 3] = k_packs_epi64(v[ 6], v[ 7]); - u[ 4] = k_packs_epi64(v[ 8], v[ 9]); - u[ 5] = k_packs_epi64(v[10], v[11]); - u[ 6] = k_packs_epi64(v[12], v[13]); - u[ 7] = k_packs_epi64(v[14], v[15]); - u[ 8] = k_packs_epi64(v[16], v[17]); - u[ 9] = k_packs_epi64(v[18], v[19]); + u[0] = k_packs_epi64(v[0], v[1]); + u[1] = k_packs_epi64(v[2], v[3]); + u[2] = k_packs_epi64(v[4], v[5]); + u[3] = k_packs_epi64(v[6], v[7]); + u[4] = k_packs_epi64(v[8], v[9]); + u[5] = k_packs_epi64(v[10], v[11]); + u[6] = k_packs_epi64(v[12], v[13]); + u[7] = k_packs_epi64(v[14], v[15]); + u[8] = k_packs_epi64(v[16], v[17]); + u[9] = k_packs_epi64(v[18], v[19]); u[10] = k_packs_epi64(v[20], v[21]); u[11] = k_packs_epi64(v[22], v[23]); u[12] = k_packs_epi64(v[24], v[25]); @@ -2439,16 +2488,16 @@ void FDCT32x32_2D(const int16_t *input, u[14] = k_packs_epi64(v[28], v[29]); u[15] = k_packs_epi64(v[30], v[31]); - v[ 0] = _mm_add_epi32(u[ 0], k__DCT_CONST_ROUNDING); - v[ 1] = _mm_add_epi32(u[ 1], k__DCT_CONST_ROUNDING); - v[ 2] = _mm_add_epi32(u[ 2], k__DCT_CONST_ROUNDING); - v[ 3] = _mm_add_epi32(u[ 3], k__DCT_CONST_ROUNDING); - v[ 4] = _mm_add_epi32(u[ 4], k__DCT_CONST_ROUNDING); - v[ 5] = _mm_add_epi32(u[ 5], k__DCT_CONST_ROUNDING); - v[ 6] = _mm_add_epi32(u[ 6], k__DCT_CONST_ROUNDING); - v[ 7] = _mm_add_epi32(u[ 7], k__DCT_CONST_ROUNDING); - v[ 8] = _mm_add_epi32(u[ 8], k__DCT_CONST_ROUNDING); - v[ 9] = _mm_add_epi32(u[ 9], k__DCT_CONST_ROUNDING); + v[0] = _mm_add_epi32(u[0], k__DCT_CONST_ROUNDING); + v[1] = _mm_add_epi32(u[1], k__DCT_CONST_ROUNDING); + v[2] = _mm_add_epi32(u[2], k__DCT_CONST_ROUNDING); + v[3] = _mm_add_epi32(u[3], k__DCT_CONST_ROUNDING); + v[4] = _mm_add_epi32(u[4], k__DCT_CONST_ROUNDING); + v[5] = _mm_add_epi32(u[5], k__DCT_CONST_ROUNDING); + v[6] = _mm_add_epi32(u[6], k__DCT_CONST_ROUNDING); + v[7] = _mm_add_epi32(u[7], k__DCT_CONST_ROUNDING); + v[8] = _mm_add_epi32(u[8], k__DCT_CONST_ROUNDING); + v[9] = _mm_add_epi32(u[9], k__DCT_CONST_ROUNDING); v[10] = _mm_add_epi32(u[10], k__DCT_CONST_ROUNDING); v[11] = _mm_add_epi32(u[11], k__DCT_CONST_ROUNDING); v[12] = _mm_add_epi32(u[12], k__DCT_CONST_ROUNDING); @@ -2456,16 +2505,16 @@ void FDCT32x32_2D(const int16_t *input, v[14] = _mm_add_epi32(u[14], k__DCT_CONST_ROUNDING); v[15] = _mm_add_epi32(u[15], k__DCT_CONST_ROUNDING); - u[ 0] = _mm_srai_epi32(v[ 0], DCT_CONST_BITS); - u[ 1] = _mm_srai_epi32(v[ 1], DCT_CONST_BITS); - u[ 2] = _mm_srai_epi32(v[ 2], DCT_CONST_BITS); - u[ 3] = _mm_srai_epi32(v[ 3], DCT_CONST_BITS); - u[ 4] = _mm_srai_epi32(v[ 4], DCT_CONST_BITS); - u[ 5] = _mm_srai_epi32(v[ 5], DCT_CONST_BITS); - u[ 6] = _mm_srai_epi32(v[ 6], DCT_CONST_BITS); - u[ 7] = _mm_srai_epi32(v[ 7], DCT_CONST_BITS); - u[ 8] = _mm_srai_epi32(v[ 8], DCT_CONST_BITS); - u[ 9] = _mm_srai_epi32(v[ 9], DCT_CONST_BITS); + u[0] = _mm_srai_epi32(v[0], DCT_CONST_BITS); + u[1] = _mm_srai_epi32(v[1], DCT_CONST_BITS); + u[2] = _mm_srai_epi32(v[2], DCT_CONST_BITS); + u[3] = _mm_srai_epi32(v[3], DCT_CONST_BITS); + u[4] = _mm_srai_epi32(v[4], DCT_CONST_BITS); + u[5] = _mm_srai_epi32(v[5], DCT_CONST_BITS); + u[6] = _mm_srai_epi32(v[6], DCT_CONST_BITS); + u[7] = _mm_srai_epi32(v[7], DCT_CONST_BITS); + u[8] = _mm_srai_epi32(v[8], DCT_CONST_BITS); + u[9] = _mm_srai_epi32(v[9], DCT_CONST_BITS); u[10] = _mm_srai_epi32(v[10], DCT_CONST_BITS); u[11] = _mm_srai_epi32(v[11], DCT_CONST_BITS); u[12] = _mm_srai_epi32(v[12], DCT_CONST_BITS); @@ -2473,16 +2522,16 @@ void FDCT32x32_2D(const int16_t *input, u[14] = _mm_srai_epi32(v[14], DCT_CONST_BITS); u[15] = _mm_srai_epi32(v[15], DCT_CONST_BITS); - v[ 0] = _mm_cmplt_epi32(u[ 0], kZero); - v[ 1] = _mm_cmplt_epi32(u[ 1], kZero); - v[ 2] = _mm_cmplt_epi32(u[ 2], kZero); - v[ 3] = _mm_cmplt_epi32(u[ 3], kZero); - v[ 4] = _mm_cmplt_epi32(u[ 4], kZero); - v[ 5] = _mm_cmplt_epi32(u[ 5], kZero); - v[ 6] = _mm_cmplt_epi32(u[ 6], kZero); - v[ 7] = _mm_cmplt_epi32(u[ 7], kZero); - v[ 8] = _mm_cmplt_epi32(u[ 8], kZero); - v[ 9] = _mm_cmplt_epi32(u[ 9], kZero); + v[0] = _mm_cmplt_epi32(u[0], kZero); + v[1] = _mm_cmplt_epi32(u[1], kZero); + v[2] = _mm_cmplt_epi32(u[2], kZero); + v[3] = _mm_cmplt_epi32(u[3], kZero); + v[4] = _mm_cmplt_epi32(u[4], kZero); + v[5] = _mm_cmplt_epi32(u[5], kZero); + v[6] = _mm_cmplt_epi32(u[6], kZero); + v[7] = _mm_cmplt_epi32(u[7], kZero); + v[8] = _mm_cmplt_epi32(u[8], kZero); + v[9] = _mm_cmplt_epi32(u[9], kZero); v[10] = _mm_cmplt_epi32(u[10], kZero); v[11] = _mm_cmplt_epi32(u[11], kZero); v[12] = _mm_cmplt_epi32(u[12], kZero); @@ -2490,16 +2539,16 @@ void FDCT32x32_2D(const int16_t *input, v[14] = _mm_cmplt_epi32(u[14], kZero); v[15] = _mm_cmplt_epi32(u[15], kZero); - u[ 0] = _mm_sub_epi32(u[ 0], v[ 0]); - u[ 1] = _mm_sub_epi32(u[ 1], v[ 1]); - u[ 2] = _mm_sub_epi32(u[ 2], v[ 2]); - u[ 3] = _mm_sub_epi32(u[ 3], v[ 3]); - u[ 4] = _mm_sub_epi32(u[ 4], v[ 4]); - u[ 5] = _mm_sub_epi32(u[ 5], v[ 5]); - u[ 6] = _mm_sub_epi32(u[ 6], v[ 6]); - u[ 7] = _mm_sub_epi32(u[ 7], v[ 7]); - u[ 8] = _mm_sub_epi32(u[ 8], v[ 8]); - u[ 9] = _mm_sub_epi32(u[ 9], v[ 9]); + u[0] = _mm_sub_epi32(u[0], v[0]); + u[1] = _mm_sub_epi32(u[1], v[1]); + u[2] = _mm_sub_epi32(u[2], v[2]); + u[3] = _mm_sub_epi32(u[3], v[3]); + u[4] = _mm_sub_epi32(u[4], v[4]); + u[5] = _mm_sub_epi32(u[5], v[5]); + u[6] = _mm_sub_epi32(u[6], v[6]); + u[7] = _mm_sub_epi32(u[7], v[7]); + u[8] = _mm_sub_epi32(u[8], v[8]); + u[9] = _mm_sub_epi32(u[9], v[9]); u[10] = _mm_sub_epi32(u[10], v[10]); u[11] = _mm_sub_epi32(u[11], v[11]); u[12] = _mm_sub_epi32(u[12], v[12]); @@ -2507,16 +2556,16 @@ void FDCT32x32_2D(const int16_t *input, u[14] = _mm_sub_epi32(u[14], v[14]); u[15] = _mm_sub_epi32(u[15], v[15]); - v[ 0] = _mm_add_epi32(u[ 0], K32One); - v[ 1] = _mm_add_epi32(u[ 1], K32One); - v[ 2] = _mm_add_epi32(u[ 2], K32One); - v[ 3] = _mm_add_epi32(u[ 3], K32One); - v[ 4] = _mm_add_epi32(u[ 4], K32One); - v[ 5] = _mm_add_epi32(u[ 5], K32One); - v[ 6] = _mm_add_epi32(u[ 6], K32One); - v[ 7] = _mm_add_epi32(u[ 7], K32One); - v[ 8] = _mm_add_epi32(u[ 8], K32One); - v[ 9] = _mm_add_epi32(u[ 9], K32One); + v[0] = _mm_add_epi32(u[0], K32One); + v[1] = _mm_add_epi32(u[1], K32One); + v[2] = _mm_add_epi32(u[2], K32One); + v[3] = _mm_add_epi32(u[3], K32One); + v[4] = _mm_add_epi32(u[4], K32One); + v[5] = _mm_add_epi32(u[5], K32One); + v[6] = _mm_add_epi32(u[6], K32One); + v[7] = _mm_add_epi32(u[7], K32One); + v[8] = _mm_add_epi32(u[8], K32One); + v[9] = _mm_add_epi32(u[9], K32One); v[10] = _mm_add_epi32(u[10], K32One); v[11] = _mm_add_epi32(u[11], K32One); v[12] = _mm_add_epi32(u[12], K32One); @@ -2524,16 +2573,16 @@ void FDCT32x32_2D(const int16_t *input, v[14] = _mm_add_epi32(u[14], K32One); v[15] = _mm_add_epi32(u[15], K32One); - u[ 0] = _mm_srai_epi32(v[ 0], 2); - u[ 1] = _mm_srai_epi32(v[ 1], 2); - u[ 2] = _mm_srai_epi32(v[ 2], 2); - u[ 3] = _mm_srai_epi32(v[ 3], 2); - u[ 4] = _mm_srai_epi32(v[ 4], 2); - u[ 5] = _mm_srai_epi32(v[ 5], 2); - u[ 6] = _mm_srai_epi32(v[ 6], 2); - u[ 7] = _mm_srai_epi32(v[ 7], 2); - u[ 8] = _mm_srai_epi32(v[ 8], 2); - u[ 9] = _mm_srai_epi32(v[ 9], 2); + u[0] = _mm_srai_epi32(v[0], 2); + u[1] = _mm_srai_epi32(v[1], 2); + u[2] = _mm_srai_epi32(v[2], 2); + u[3] = _mm_srai_epi32(v[3], 2); + u[4] = _mm_srai_epi32(v[4], 2); + u[5] = _mm_srai_epi32(v[5], 2); + u[6] = _mm_srai_epi32(v[6], 2); + u[7] = _mm_srai_epi32(v[7], 2); + u[8] = _mm_srai_epi32(v[8], 2); + u[9] = _mm_srai_epi32(v[9], 2); u[10] = _mm_srai_epi32(v[10], 2); u[11] = _mm_srai_epi32(v[11], 2); u[12] = _mm_srai_epi32(v[12], 2); @@ -2541,18 +2590,18 @@ void FDCT32x32_2D(const int16_t *input, u[14] = _mm_srai_epi32(v[14], 2); u[15] = _mm_srai_epi32(v[15], 2); - out[ 2] = _mm_packs_epi32(u[0], u[1]); + out[2] = _mm_packs_epi32(u[0], u[1]); out[18] = _mm_packs_epi32(u[2], u[3]); out[10] = _mm_packs_epi32(u[4], u[5]); out[26] = _mm_packs_epi32(u[6], u[7]); - out[ 6] = _mm_packs_epi32(u[8], u[9]); + out[6] = _mm_packs_epi32(u[8], u[9]); out[22] = _mm_packs_epi32(u[10], u[11]); out[14] = _mm_packs_epi32(u[12], u[13]); out[30] = _mm_packs_epi32(u[14], u[15]); #if DCT_HIGH_BIT_DEPTH - overflow = check_epi16_overflow_x8(&out[2], &out[18], &out[10], - &out[26], &out[6], &out[22], - &out[14], &out[30]); + overflow = + check_epi16_overflow_x8(&out[2], &out[18], &out[10], &out[26], + &out[6], &out[22], &out[14], &out[30]); if (overflow) { HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org); return; @@ -2604,16 +2653,16 @@ void FDCT32x32_2D(const int16_t *input, const __m128i k32_m17_p15 = pair_set_epi32(-cospi_17_64, cospi_15_64); const __m128i k32_m01_p31 = pair_set_epi32(-cospi_1_64, cospi_31_64); - u[ 0] = _mm_unpacklo_epi32(lstep1[32], lstep1[62]); - u[ 1] = _mm_unpackhi_epi32(lstep1[32], lstep1[62]); - u[ 2] = _mm_unpacklo_epi32(lstep1[33], lstep1[63]); - u[ 3] = _mm_unpackhi_epi32(lstep1[33], lstep1[63]); - u[ 4] = _mm_unpacklo_epi32(lstep1[34], lstep1[60]); - u[ 5] = _mm_unpackhi_epi32(lstep1[34], lstep1[60]); - u[ 6] = _mm_unpacklo_epi32(lstep1[35], lstep1[61]); - u[ 7] = _mm_unpackhi_epi32(lstep1[35], lstep1[61]); - u[ 8] = _mm_unpacklo_epi32(lstep1[36], lstep1[58]); - u[ 9] = _mm_unpackhi_epi32(lstep1[36], lstep1[58]); + u[0] = _mm_unpacklo_epi32(lstep1[32], lstep1[62]); + u[1] = _mm_unpackhi_epi32(lstep1[32], lstep1[62]); + u[2] = _mm_unpacklo_epi32(lstep1[33], lstep1[63]); + u[3] = _mm_unpackhi_epi32(lstep1[33], lstep1[63]); + u[4] = _mm_unpacklo_epi32(lstep1[34], lstep1[60]); + u[5] = _mm_unpackhi_epi32(lstep1[34], lstep1[60]); + u[6] = _mm_unpacklo_epi32(lstep1[35], lstep1[61]); + u[7] = _mm_unpackhi_epi32(lstep1[35], lstep1[61]); + u[8] = _mm_unpacklo_epi32(lstep1[36], lstep1[58]); + u[9] = _mm_unpackhi_epi32(lstep1[36], lstep1[58]); u[10] = _mm_unpacklo_epi32(lstep1[37], lstep1[59]); u[11] = _mm_unpackhi_epi32(lstep1[37], lstep1[59]); u[12] = _mm_unpacklo_epi32(lstep1[38], lstep1[56]); @@ -2621,16 +2670,16 @@ void FDCT32x32_2D(const int16_t *input, u[14] = _mm_unpacklo_epi32(lstep1[39], lstep1[57]); u[15] = _mm_unpackhi_epi32(lstep1[39], lstep1[57]); - v[ 0] = k_madd_epi32(u[ 0], k32_p31_p01); - v[ 1] = k_madd_epi32(u[ 1], k32_p31_p01); - v[ 2] = k_madd_epi32(u[ 2], k32_p31_p01); - v[ 3] = k_madd_epi32(u[ 3], k32_p31_p01); - v[ 4] = k_madd_epi32(u[ 4], k32_p15_p17); - v[ 5] = k_madd_epi32(u[ 5], k32_p15_p17); - v[ 6] = k_madd_epi32(u[ 6], k32_p15_p17); - v[ 7] = k_madd_epi32(u[ 7], k32_p15_p17); - v[ 8] = k_madd_epi32(u[ 8], k32_p23_p09); - v[ 9] = k_madd_epi32(u[ 9], k32_p23_p09); + v[0] = k_madd_epi32(u[0], k32_p31_p01); + v[1] = k_madd_epi32(u[1], k32_p31_p01); + v[2] = k_madd_epi32(u[2], k32_p31_p01); + v[3] = k_madd_epi32(u[3], k32_p31_p01); + v[4] = k_madd_epi32(u[4], k32_p15_p17); + v[5] = k_madd_epi32(u[5], k32_p15_p17); + v[6] = k_madd_epi32(u[6], k32_p15_p17); + v[7] = k_madd_epi32(u[7], k32_p15_p17); + v[8] = k_madd_epi32(u[8], k32_p23_p09); + v[9] = k_madd_epi32(u[9], k32_p23_p09); v[10] = k_madd_epi32(u[10], k32_p23_p09); v[11] = k_madd_epi32(u[11], k32_p23_p09); v[12] = k_madd_epi32(u[12], k32_p07_p25); @@ -2641,41 +2690,40 @@ void FDCT32x32_2D(const int16_t *input, v[17] = k_madd_epi32(u[13], k32_m25_p07); v[18] = k_madd_epi32(u[14], k32_m25_p07); v[19] = k_madd_epi32(u[15], k32_m25_p07); - v[20] = k_madd_epi32(u[ 8], k32_m09_p23); - v[21] = k_madd_epi32(u[ 9], k32_m09_p23); + v[20] = k_madd_epi32(u[8], k32_m09_p23); + v[21] = k_madd_epi32(u[9], k32_m09_p23); v[22] = k_madd_epi32(u[10], k32_m09_p23); v[23] = k_madd_epi32(u[11], k32_m09_p23); - v[24] = k_madd_epi32(u[ 4], k32_m17_p15); - v[25] = k_madd_epi32(u[ 5], k32_m17_p15); - v[26] = k_madd_epi32(u[ 6], k32_m17_p15); - v[27] = k_madd_epi32(u[ 7], k32_m17_p15); - v[28] = k_madd_epi32(u[ 0], k32_m01_p31); - v[29] = k_madd_epi32(u[ 1], k32_m01_p31); - v[30] = k_madd_epi32(u[ 2], k32_m01_p31); - v[31] = k_madd_epi32(u[ 3], k32_m01_p31); + v[24] = k_madd_epi32(u[4], k32_m17_p15); + v[25] = k_madd_epi32(u[5], k32_m17_p15); + v[26] = k_madd_epi32(u[6], k32_m17_p15); + v[27] = k_madd_epi32(u[7], k32_m17_p15); + v[28] = k_madd_epi32(u[0], k32_m01_p31); + v[29] = k_madd_epi32(u[1], k32_m01_p31); + v[30] = k_madd_epi32(u[2], k32_m01_p31); + v[31] = k_madd_epi32(u[3], k32_m01_p31); #if DCT_HIGH_BIT_DEPTH overflow = k_check_epi32_overflow_32( - &v[0], &v[1], &v[2], &v[3], &v[4], &v[5], &v[6], &v[7], - &v[8], &v[9], &v[10], &v[11], &v[12], &v[13], &v[14], &v[15], - &v[16], &v[17], &v[18], &v[19], &v[20], &v[21], &v[22], &v[23], - &v[24], &v[25], &v[26], &v[27], &v[28], &v[29], &v[30], &v[31], - &kZero); + &v[0], &v[1], &v[2], &v[3], &v[4], &v[5], &v[6], &v[7], &v[8], + &v[9], &v[10], &v[11], &v[12], &v[13], &v[14], &v[15], &v[16], + &v[17], &v[18], &v[19], &v[20], &v[21], &v[22], &v[23], &v[24], + &v[25], &v[26], &v[27], &v[28], &v[29], &v[30], &v[31], &kZero); if (overflow) { HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org); return; } #endif // DCT_HIGH_BIT_DEPTH - u[ 0] = k_packs_epi64(v[ 0], v[ 1]); - u[ 1] = k_packs_epi64(v[ 2], v[ 3]); - u[ 2] = k_packs_epi64(v[ 4], v[ 5]); - u[ 3] = k_packs_epi64(v[ 6], v[ 7]); - u[ 4] = k_packs_epi64(v[ 8], v[ 9]); - u[ 5] = k_packs_epi64(v[10], v[11]); - u[ 6] = k_packs_epi64(v[12], v[13]); - u[ 7] = k_packs_epi64(v[14], v[15]); - u[ 8] = k_packs_epi64(v[16], v[17]); - u[ 9] = k_packs_epi64(v[18], v[19]); + u[0] = k_packs_epi64(v[0], v[1]); + u[1] = k_packs_epi64(v[2], v[3]); + u[2] = k_packs_epi64(v[4], v[5]); + u[3] = k_packs_epi64(v[6], v[7]); + u[4] = k_packs_epi64(v[8], v[9]); + u[5] = k_packs_epi64(v[10], v[11]); + u[6] = k_packs_epi64(v[12], v[13]); + u[7] = k_packs_epi64(v[14], v[15]); + u[8] = k_packs_epi64(v[16], v[17]); + u[9] = k_packs_epi64(v[18], v[19]); u[10] = k_packs_epi64(v[20], v[21]); u[11] = k_packs_epi64(v[22], v[23]); u[12] = k_packs_epi64(v[24], v[25]); @@ -2683,16 +2731,16 @@ void FDCT32x32_2D(const int16_t *input, u[14] = k_packs_epi64(v[28], v[29]); u[15] = k_packs_epi64(v[30], v[31]); - v[ 0] = _mm_add_epi32(u[ 0], k__DCT_CONST_ROUNDING); - v[ 1] = _mm_add_epi32(u[ 1], k__DCT_CONST_ROUNDING); - v[ 2] = _mm_add_epi32(u[ 2], k__DCT_CONST_ROUNDING); - v[ 3] = _mm_add_epi32(u[ 3], k__DCT_CONST_ROUNDING); - v[ 4] = _mm_add_epi32(u[ 4], k__DCT_CONST_ROUNDING); - v[ 5] = _mm_add_epi32(u[ 5], k__DCT_CONST_ROUNDING); - v[ 6] = _mm_add_epi32(u[ 6], k__DCT_CONST_ROUNDING); - v[ 7] = _mm_add_epi32(u[ 7], k__DCT_CONST_ROUNDING); - v[ 8] = _mm_add_epi32(u[ 8], k__DCT_CONST_ROUNDING); - v[ 9] = _mm_add_epi32(u[ 9], k__DCT_CONST_ROUNDING); + v[0] = _mm_add_epi32(u[0], k__DCT_CONST_ROUNDING); + v[1] = _mm_add_epi32(u[1], k__DCT_CONST_ROUNDING); + v[2] = _mm_add_epi32(u[2], k__DCT_CONST_ROUNDING); + v[3] = _mm_add_epi32(u[3], k__DCT_CONST_ROUNDING); + v[4] = _mm_add_epi32(u[4], k__DCT_CONST_ROUNDING); + v[5] = _mm_add_epi32(u[5], k__DCT_CONST_ROUNDING); + v[6] = _mm_add_epi32(u[6], k__DCT_CONST_ROUNDING); + v[7] = _mm_add_epi32(u[7], k__DCT_CONST_ROUNDING); + v[8] = _mm_add_epi32(u[8], k__DCT_CONST_ROUNDING); + v[9] = _mm_add_epi32(u[9], k__DCT_CONST_ROUNDING); v[10] = _mm_add_epi32(u[10], k__DCT_CONST_ROUNDING); v[11] = _mm_add_epi32(u[11], k__DCT_CONST_ROUNDING); v[12] = _mm_add_epi32(u[12], k__DCT_CONST_ROUNDING); @@ -2700,16 +2748,16 @@ void FDCT32x32_2D(const int16_t *input, v[14] = _mm_add_epi32(u[14], k__DCT_CONST_ROUNDING); v[15] = _mm_add_epi32(u[15], k__DCT_CONST_ROUNDING); - u[ 0] = _mm_srai_epi32(v[ 0], DCT_CONST_BITS); - u[ 1] = _mm_srai_epi32(v[ 1], DCT_CONST_BITS); - u[ 2] = _mm_srai_epi32(v[ 2], DCT_CONST_BITS); - u[ 3] = _mm_srai_epi32(v[ 3], DCT_CONST_BITS); - u[ 4] = _mm_srai_epi32(v[ 4], DCT_CONST_BITS); - u[ 5] = _mm_srai_epi32(v[ 5], DCT_CONST_BITS); - u[ 6] = _mm_srai_epi32(v[ 6], DCT_CONST_BITS); - u[ 7] = _mm_srai_epi32(v[ 7], DCT_CONST_BITS); - u[ 8] = _mm_srai_epi32(v[ 8], DCT_CONST_BITS); - u[ 9] = _mm_srai_epi32(v[ 9], DCT_CONST_BITS); + u[0] = _mm_srai_epi32(v[0], DCT_CONST_BITS); + u[1] = _mm_srai_epi32(v[1], DCT_CONST_BITS); + u[2] = _mm_srai_epi32(v[2], DCT_CONST_BITS); + u[3] = _mm_srai_epi32(v[3], DCT_CONST_BITS); + u[4] = _mm_srai_epi32(v[4], DCT_CONST_BITS); + u[5] = _mm_srai_epi32(v[5], DCT_CONST_BITS); + u[6] = _mm_srai_epi32(v[6], DCT_CONST_BITS); + u[7] = _mm_srai_epi32(v[7], DCT_CONST_BITS); + u[8] = _mm_srai_epi32(v[8], DCT_CONST_BITS); + u[9] = _mm_srai_epi32(v[9], DCT_CONST_BITS); u[10] = _mm_srai_epi32(v[10], DCT_CONST_BITS); u[11] = _mm_srai_epi32(v[11], DCT_CONST_BITS); u[12] = _mm_srai_epi32(v[12], DCT_CONST_BITS); @@ -2717,16 +2765,16 @@ void FDCT32x32_2D(const int16_t *input, u[14] = _mm_srai_epi32(v[14], DCT_CONST_BITS); u[15] = _mm_srai_epi32(v[15], DCT_CONST_BITS); - v[ 0] = _mm_cmplt_epi32(u[ 0], kZero); - v[ 1] = _mm_cmplt_epi32(u[ 1], kZero); - v[ 2] = _mm_cmplt_epi32(u[ 2], kZero); - v[ 3] = _mm_cmplt_epi32(u[ 3], kZero); - v[ 4] = _mm_cmplt_epi32(u[ 4], kZero); - v[ 5] = _mm_cmplt_epi32(u[ 5], kZero); - v[ 6] = _mm_cmplt_epi32(u[ 6], kZero); - v[ 7] = _mm_cmplt_epi32(u[ 7], kZero); - v[ 8] = _mm_cmplt_epi32(u[ 8], kZero); - v[ 9] = _mm_cmplt_epi32(u[ 9], kZero); + v[0] = _mm_cmplt_epi32(u[0], kZero); + v[1] = _mm_cmplt_epi32(u[1], kZero); + v[2] = _mm_cmplt_epi32(u[2], kZero); + v[3] = _mm_cmplt_epi32(u[3], kZero); + v[4] = _mm_cmplt_epi32(u[4], kZero); + v[5] = _mm_cmplt_epi32(u[5], kZero); + v[6] = _mm_cmplt_epi32(u[6], kZero); + v[7] = _mm_cmplt_epi32(u[7], kZero); + v[8] = _mm_cmplt_epi32(u[8], kZero); + v[9] = _mm_cmplt_epi32(u[9], kZero); v[10] = _mm_cmplt_epi32(u[10], kZero); v[11] = _mm_cmplt_epi32(u[11], kZero); v[12] = _mm_cmplt_epi32(u[12], kZero); @@ -2734,16 +2782,16 @@ void FDCT32x32_2D(const int16_t *input, v[14] = _mm_cmplt_epi32(u[14], kZero); v[15] = _mm_cmplt_epi32(u[15], kZero); - u[ 0] = _mm_sub_epi32(u[ 0], v[ 0]); - u[ 1] = _mm_sub_epi32(u[ 1], v[ 1]); - u[ 2] = _mm_sub_epi32(u[ 2], v[ 2]); - u[ 3] = _mm_sub_epi32(u[ 3], v[ 3]); - u[ 4] = _mm_sub_epi32(u[ 4], v[ 4]); - u[ 5] = _mm_sub_epi32(u[ 5], v[ 5]); - u[ 6] = _mm_sub_epi32(u[ 6], v[ 6]); - u[ 7] = _mm_sub_epi32(u[ 7], v[ 7]); - u[ 8] = _mm_sub_epi32(u[ 8], v[ 8]); - u[ 9] = _mm_sub_epi32(u[ 9], v[ 9]); + u[0] = _mm_sub_epi32(u[0], v[0]); + u[1] = _mm_sub_epi32(u[1], v[1]); + u[2] = _mm_sub_epi32(u[2], v[2]); + u[3] = _mm_sub_epi32(u[3], v[3]); + u[4] = _mm_sub_epi32(u[4], v[4]); + u[5] = _mm_sub_epi32(u[5], v[5]); + u[6] = _mm_sub_epi32(u[6], v[6]); + u[7] = _mm_sub_epi32(u[7], v[7]); + u[8] = _mm_sub_epi32(u[8], v[8]); + u[9] = _mm_sub_epi32(u[9], v[9]); u[10] = _mm_sub_epi32(u[10], v[10]); u[11] = _mm_sub_epi32(u[11], v[11]); u[12] = _mm_sub_epi32(u[12], v[12]); @@ -2785,18 +2833,18 @@ void FDCT32x32_2D(const int16_t *input, u[14] = _mm_srai_epi32(v[14], 2); u[15] = _mm_srai_epi32(v[15], 2); - out[ 1] = _mm_packs_epi32(u[0], u[1]); + out[1] = _mm_packs_epi32(u[0], u[1]); out[17] = _mm_packs_epi32(u[2], u[3]); - out[ 9] = _mm_packs_epi32(u[4], u[5]); + out[9] = _mm_packs_epi32(u[4], u[5]); out[25] = _mm_packs_epi32(u[6], u[7]); - out[ 7] = _mm_packs_epi32(u[8], u[9]); + out[7] = _mm_packs_epi32(u[8], u[9]); out[23] = _mm_packs_epi32(u[10], u[11]); out[15] = _mm_packs_epi32(u[12], u[13]); out[31] = _mm_packs_epi32(u[14], u[15]); #if DCT_HIGH_BIT_DEPTH - overflow = check_epi16_overflow_x8(&out[1], &out[17], &out[9], - &out[25], &out[7], &out[23], - &out[15], &out[31]); + overflow = + check_epi16_overflow_x8(&out[1], &out[17], &out[9], &out[25], + &out[7], &out[23], &out[15], &out[31]); if (overflow) { HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org); return; @@ -2813,16 +2861,16 @@ void FDCT32x32_2D(const int16_t *input, const __m128i k32_m21_p11 = pair_set_epi32(-cospi_21_64, cospi_11_64); const __m128i k32_m05_p27 = pair_set_epi32(-cospi_5_64, cospi_27_64); - u[ 0] = _mm_unpacklo_epi32(lstep1[40], lstep1[54]); - u[ 1] = _mm_unpackhi_epi32(lstep1[40], lstep1[54]); - u[ 2] = _mm_unpacklo_epi32(lstep1[41], lstep1[55]); - u[ 3] = _mm_unpackhi_epi32(lstep1[41], lstep1[55]); - u[ 4] = _mm_unpacklo_epi32(lstep1[42], lstep1[52]); - u[ 5] = _mm_unpackhi_epi32(lstep1[42], lstep1[52]); - u[ 6] = _mm_unpacklo_epi32(lstep1[43], lstep1[53]); - u[ 7] = _mm_unpackhi_epi32(lstep1[43], lstep1[53]); - u[ 8] = _mm_unpacklo_epi32(lstep1[44], lstep1[50]); - u[ 9] = _mm_unpackhi_epi32(lstep1[44], lstep1[50]); + u[0] = _mm_unpacklo_epi32(lstep1[40], lstep1[54]); + u[1] = _mm_unpackhi_epi32(lstep1[40], lstep1[54]); + u[2] = _mm_unpacklo_epi32(lstep1[41], lstep1[55]); + u[3] = _mm_unpackhi_epi32(lstep1[41], lstep1[55]); + u[4] = _mm_unpacklo_epi32(lstep1[42], lstep1[52]); + u[5] = _mm_unpackhi_epi32(lstep1[42], lstep1[52]); + u[6] = _mm_unpacklo_epi32(lstep1[43], lstep1[53]); + u[7] = _mm_unpackhi_epi32(lstep1[43], lstep1[53]); + u[8] = _mm_unpacklo_epi32(lstep1[44], lstep1[50]); + u[9] = _mm_unpackhi_epi32(lstep1[44], lstep1[50]); u[10] = _mm_unpacklo_epi32(lstep1[45], lstep1[51]); u[11] = _mm_unpackhi_epi32(lstep1[45], lstep1[51]); u[12] = _mm_unpacklo_epi32(lstep1[46], lstep1[48]); @@ -2830,16 +2878,16 @@ void FDCT32x32_2D(const int16_t *input, u[14] = _mm_unpacklo_epi32(lstep1[47], lstep1[49]); u[15] = _mm_unpackhi_epi32(lstep1[47], lstep1[49]); - v[ 0] = k_madd_epi32(u[ 0], k32_p27_p05); - v[ 1] = k_madd_epi32(u[ 1], k32_p27_p05); - v[ 2] = k_madd_epi32(u[ 2], k32_p27_p05); - v[ 3] = k_madd_epi32(u[ 3], k32_p27_p05); - v[ 4] = k_madd_epi32(u[ 4], k32_p11_p21); - v[ 5] = k_madd_epi32(u[ 5], k32_p11_p21); - v[ 6] = k_madd_epi32(u[ 6], k32_p11_p21); - v[ 7] = k_madd_epi32(u[ 7], k32_p11_p21); - v[ 8] = k_madd_epi32(u[ 8], k32_p19_p13); - v[ 9] = k_madd_epi32(u[ 9], k32_p19_p13); + v[0] = k_madd_epi32(u[0], k32_p27_p05); + v[1] = k_madd_epi32(u[1], k32_p27_p05); + v[2] = k_madd_epi32(u[2], k32_p27_p05); + v[3] = k_madd_epi32(u[3], k32_p27_p05); + v[4] = k_madd_epi32(u[4], k32_p11_p21); + v[5] = k_madd_epi32(u[5], k32_p11_p21); + v[6] = k_madd_epi32(u[6], k32_p11_p21); + v[7] = k_madd_epi32(u[7], k32_p11_p21); + v[8] = k_madd_epi32(u[8], k32_p19_p13); + v[9] = k_madd_epi32(u[9], k32_p19_p13); v[10] = k_madd_epi32(u[10], k32_p19_p13); v[11] = k_madd_epi32(u[11], k32_p19_p13); v[12] = k_madd_epi32(u[12], k32_p03_p29); @@ -2850,41 +2898,40 @@ void FDCT32x32_2D(const int16_t *input, v[17] = k_madd_epi32(u[13], k32_m29_p03); v[18] = k_madd_epi32(u[14], k32_m29_p03); v[19] = k_madd_epi32(u[15], k32_m29_p03); - v[20] = k_madd_epi32(u[ 8], k32_m13_p19); - v[21] = k_madd_epi32(u[ 9], k32_m13_p19); + v[20] = k_madd_epi32(u[8], k32_m13_p19); + v[21] = k_madd_epi32(u[9], k32_m13_p19); v[22] = k_madd_epi32(u[10], k32_m13_p19); v[23] = k_madd_epi32(u[11], k32_m13_p19); - v[24] = k_madd_epi32(u[ 4], k32_m21_p11); - v[25] = k_madd_epi32(u[ 5], k32_m21_p11); - v[26] = k_madd_epi32(u[ 6], k32_m21_p11); - v[27] = k_madd_epi32(u[ 7], k32_m21_p11); - v[28] = k_madd_epi32(u[ 0], k32_m05_p27); - v[29] = k_madd_epi32(u[ 1], k32_m05_p27); - v[30] = k_madd_epi32(u[ 2], k32_m05_p27); - v[31] = k_madd_epi32(u[ 3], k32_m05_p27); + v[24] = k_madd_epi32(u[4], k32_m21_p11); + v[25] = k_madd_epi32(u[5], k32_m21_p11); + v[26] = k_madd_epi32(u[6], k32_m21_p11); + v[27] = k_madd_epi32(u[7], k32_m21_p11); + v[28] = k_madd_epi32(u[0], k32_m05_p27); + v[29] = k_madd_epi32(u[1], k32_m05_p27); + v[30] = k_madd_epi32(u[2], k32_m05_p27); + v[31] = k_madd_epi32(u[3], k32_m05_p27); #if DCT_HIGH_BIT_DEPTH overflow = k_check_epi32_overflow_32( - &v[0], &v[1], &v[2], &v[3], &v[4], &v[5], &v[6], &v[7], - &v[8], &v[9], &v[10], &v[11], &v[12], &v[13], &v[14], &v[15], - &v[16], &v[17], &v[18], &v[19], &v[20], &v[21], &v[22], &v[23], - &v[24], &v[25], &v[26], &v[27], &v[28], &v[29], &v[30], &v[31], - &kZero); + &v[0], &v[1], &v[2], &v[3], &v[4], &v[5], &v[6], &v[7], &v[8], + &v[9], &v[10], &v[11], &v[12], &v[13], &v[14], &v[15], &v[16], + &v[17], &v[18], &v[19], &v[20], &v[21], &v[22], &v[23], &v[24], + &v[25], &v[26], &v[27], &v[28], &v[29], &v[30], &v[31], &kZero); if (overflow) { HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org); return; } #endif // DCT_HIGH_BIT_DEPTH - u[ 0] = k_packs_epi64(v[ 0], v[ 1]); - u[ 1] = k_packs_epi64(v[ 2], v[ 3]); - u[ 2] = k_packs_epi64(v[ 4], v[ 5]); - u[ 3] = k_packs_epi64(v[ 6], v[ 7]); - u[ 4] = k_packs_epi64(v[ 8], v[ 9]); - u[ 5] = k_packs_epi64(v[10], v[11]); - u[ 6] = k_packs_epi64(v[12], v[13]); - u[ 7] = k_packs_epi64(v[14], v[15]); - u[ 8] = k_packs_epi64(v[16], v[17]); - u[ 9] = k_packs_epi64(v[18], v[19]); + u[0] = k_packs_epi64(v[0], v[1]); + u[1] = k_packs_epi64(v[2], v[3]); + u[2] = k_packs_epi64(v[4], v[5]); + u[3] = k_packs_epi64(v[6], v[7]); + u[4] = k_packs_epi64(v[8], v[9]); + u[5] = k_packs_epi64(v[10], v[11]); + u[6] = k_packs_epi64(v[12], v[13]); + u[7] = k_packs_epi64(v[14], v[15]); + u[8] = k_packs_epi64(v[16], v[17]); + u[9] = k_packs_epi64(v[18], v[19]); u[10] = k_packs_epi64(v[20], v[21]); u[11] = k_packs_epi64(v[22], v[23]); u[12] = k_packs_epi64(v[24], v[25]); @@ -2892,16 +2939,16 @@ void FDCT32x32_2D(const int16_t *input, u[14] = k_packs_epi64(v[28], v[29]); u[15] = k_packs_epi64(v[30], v[31]); - v[ 0] = _mm_add_epi32(u[ 0], k__DCT_CONST_ROUNDING); - v[ 1] = _mm_add_epi32(u[ 1], k__DCT_CONST_ROUNDING); - v[ 2] = _mm_add_epi32(u[ 2], k__DCT_CONST_ROUNDING); - v[ 3] = _mm_add_epi32(u[ 3], k__DCT_CONST_ROUNDING); - v[ 4] = _mm_add_epi32(u[ 4], k__DCT_CONST_ROUNDING); - v[ 5] = _mm_add_epi32(u[ 5], k__DCT_CONST_ROUNDING); - v[ 6] = _mm_add_epi32(u[ 6], k__DCT_CONST_ROUNDING); - v[ 7] = _mm_add_epi32(u[ 7], k__DCT_CONST_ROUNDING); - v[ 8] = _mm_add_epi32(u[ 8], k__DCT_CONST_ROUNDING); - v[ 9] = _mm_add_epi32(u[ 9], k__DCT_CONST_ROUNDING); + v[0] = _mm_add_epi32(u[0], k__DCT_CONST_ROUNDING); + v[1] = _mm_add_epi32(u[1], k__DCT_CONST_ROUNDING); + v[2] = _mm_add_epi32(u[2], k__DCT_CONST_ROUNDING); + v[3] = _mm_add_epi32(u[3], k__DCT_CONST_ROUNDING); + v[4] = _mm_add_epi32(u[4], k__DCT_CONST_ROUNDING); + v[5] = _mm_add_epi32(u[5], k__DCT_CONST_ROUNDING); + v[6] = _mm_add_epi32(u[6], k__DCT_CONST_ROUNDING); + v[7] = _mm_add_epi32(u[7], k__DCT_CONST_ROUNDING); + v[8] = _mm_add_epi32(u[8], k__DCT_CONST_ROUNDING); + v[9] = _mm_add_epi32(u[9], k__DCT_CONST_ROUNDING); v[10] = _mm_add_epi32(u[10], k__DCT_CONST_ROUNDING); v[11] = _mm_add_epi32(u[11], k__DCT_CONST_ROUNDING); v[12] = _mm_add_epi32(u[12], k__DCT_CONST_ROUNDING); @@ -2909,16 +2956,16 @@ void FDCT32x32_2D(const int16_t *input, v[14] = _mm_add_epi32(u[14], k__DCT_CONST_ROUNDING); v[15] = _mm_add_epi32(u[15], k__DCT_CONST_ROUNDING); - u[ 0] = _mm_srai_epi32(v[ 0], DCT_CONST_BITS); - u[ 1] = _mm_srai_epi32(v[ 1], DCT_CONST_BITS); - u[ 2] = _mm_srai_epi32(v[ 2], DCT_CONST_BITS); - u[ 3] = _mm_srai_epi32(v[ 3], DCT_CONST_BITS); - u[ 4] = _mm_srai_epi32(v[ 4], DCT_CONST_BITS); - u[ 5] = _mm_srai_epi32(v[ 5], DCT_CONST_BITS); - u[ 6] = _mm_srai_epi32(v[ 6], DCT_CONST_BITS); - u[ 7] = _mm_srai_epi32(v[ 7], DCT_CONST_BITS); - u[ 8] = _mm_srai_epi32(v[ 8], DCT_CONST_BITS); - u[ 9] = _mm_srai_epi32(v[ 9], DCT_CONST_BITS); + u[0] = _mm_srai_epi32(v[0], DCT_CONST_BITS); + u[1] = _mm_srai_epi32(v[1], DCT_CONST_BITS); + u[2] = _mm_srai_epi32(v[2], DCT_CONST_BITS); + u[3] = _mm_srai_epi32(v[3], DCT_CONST_BITS); + u[4] = _mm_srai_epi32(v[4], DCT_CONST_BITS); + u[5] = _mm_srai_epi32(v[5], DCT_CONST_BITS); + u[6] = _mm_srai_epi32(v[6], DCT_CONST_BITS); + u[7] = _mm_srai_epi32(v[7], DCT_CONST_BITS); + u[8] = _mm_srai_epi32(v[8], DCT_CONST_BITS); + u[9] = _mm_srai_epi32(v[9], DCT_CONST_BITS); u[10] = _mm_srai_epi32(v[10], DCT_CONST_BITS); u[11] = _mm_srai_epi32(v[11], DCT_CONST_BITS); u[12] = _mm_srai_epi32(v[12], DCT_CONST_BITS); @@ -2926,16 +2973,16 @@ void FDCT32x32_2D(const int16_t *input, u[14] = _mm_srai_epi32(v[14], DCT_CONST_BITS); u[15] = _mm_srai_epi32(v[15], DCT_CONST_BITS); - v[ 0] = _mm_cmplt_epi32(u[ 0], kZero); - v[ 1] = _mm_cmplt_epi32(u[ 1], kZero); - v[ 2] = _mm_cmplt_epi32(u[ 2], kZero); - v[ 3] = _mm_cmplt_epi32(u[ 3], kZero); - v[ 4] = _mm_cmplt_epi32(u[ 4], kZero); - v[ 5] = _mm_cmplt_epi32(u[ 5], kZero); - v[ 6] = _mm_cmplt_epi32(u[ 6], kZero); - v[ 7] = _mm_cmplt_epi32(u[ 7], kZero); - v[ 8] = _mm_cmplt_epi32(u[ 8], kZero); - v[ 9] = _mm_cmplt_epi32(u[ 9], kZero); + v[0] = _mm_cmplt_epi32(u[0], kZero); + v[1] = _mm_cmplt_epi32(u[1], kZero); + v[2] = _mm_cmplt_epi32(u[2], kZero); + v[3] = _mm_cmplt_epi32(u[3], kZero); + v[4] = _mm_cmplt_epi32(u[4], kZero); + v[5] = _mm_cmplt_epi32(u[5], kZero); + v[6] = _mm_cmplt_epi32(u[6], kZero); + v[7] = _mm_cmplt_epi32(u[7], kZero); + v[8] = _mm_cmplt_epi32(u[8], kZero); + v[9] = _mm_cmplt_epi32(u[9], kZero); v[10] = _mm_cmplt_epi32(u[10], kZero); v[11] = _mm_cmplt_epi32(u[11], kZero); v[12] = _mm_cmplt_epi32(u[12], kZero); @@ -2943,16 +2990,16 @@ void FDCT32x32_2D(const int16_t *input, v[14] = _mm_cmplt_epi32(u[14], kZero); v[15] = _mm_cmplt_epi32(u[15], kZero); - u[ 0] = _mm_sub_epi32(u[ 0], v[ 0]); - u[ 1] = _mm_sub_epi32(u[ 1], v[ 1]); - u[ 2] = _mm_sub_epi32(u[ 2], v[ 2]); - u[ 3] = _mm_sub_epi32(u[ 3], v[ 3]); - u[ 4] = _mm_sub_epi32(u[ 4], v[ 4]); - u[ 5] = _mm_sub_epi32(u[ 5], v[ 5]); - u[ 6] = _mm_sub_epi32(u[ 6], v[ 6]); - u[ 7] = _mm_sub_epi32(u[ 7], v[ 7]); - u[ 8] = _mm_sub_epi32(u[ 8], v[ 8]); - u[ 9] = _mm_sub_epi32(u[ 9], v[ 9]); + u[0] = _mm_sub_epi32(u[0], v[0]); + u[1] = _mm_sub_epi32(u[1], v[1]); + u[2] = _mm_sub_epi32(u[2], v[2]); + u[3] = _mm_sub_epi32(u[3], v[3]); + u[4] = _mm_sub_epi32(u[4], v[4]); + u[5] = _mm_sub_epi32(u[5], v[5]); + u[6] = _mm_sub_epi32(u[6], v[6]); + u[7] = _mm_sub_epi32(u[7], v[7]); + u[8] = _mm_sub_epi32(u[8], v[8]); + u[9] = _mm_sub_epi32(u[9], v[9]); u[10] = _mm_sub_epi32(u[10], v[10]); u[11] = _mm_sub_epi32(u[11], v[11]); u[12] = _mm_sub_epi32(u[12], v[12]); @@ -2994,18 +3041,18 @@ void FDCT32x32_2D(const int16_t *input, u[14] = _mm_srai_epi32(v[14], 2); u[15] = _mm_srai_epi32(v[15], 2); - out[ 5] = _mm_packs_epi32(u[0], u[1]); + out[5] = _mm_packs_epi32(u[0], u[1]); out[21] = _mm_packs_epi32(u[2], u[3]); out[13] = _mm_packs_epi32(u[4], u[5]); out[29] = _mm_packs_epi32(u[6], u[7]); - out[ 3] = _mm_packs_epi32(u[8], u[9]); + out[3] = _mm_packs_epi32(u[8], u[9]); out[19] = _mm_packs_epi32(u[10], u[11]); out[11] = _mm_packs_epi32(u[12], u[13]); out[27] = _mm_packs_epi32(u[14], u[15]); #if DCT_HIGH_BIT_DEPTH - overflow = check_epi16_overflow_x8(&out[5], &out[21], &out[13], - &out[29], &out[3], &out[19], - &out[11], &out[27]); + overflow = + check_epi16_overflow_x8(&out[5], &out[21], &out[13], &out[29], + &out[3], &out[19], &out[11], &out[27]); if (overflow) { HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org); return; diff --git a/vp10/common/x86/vp10_fwd_txfm1d_sse4.c b/vp10/common/x86/vp10_fwd_txfm1d_sse4.c index 5ade8bd3f102c14debb8244d6ece83201df5b42d..caade2a0b67aaece280baeeb5aa08ae27b59c4d5 100644 --- a/vp10/common/x86/vp10_fwd_txfm1d_sse4.c +++ b/vp10/common/x86/vp10_fwd_txfm1d_sse4.c @@ -1,10 +1,10 @@ #include "vp10/common/x86/vp10_txfm1d_sse4.h" -void vp10_fdct4_new_sse4_1(const __m128i* input, __m128i* output, - const int8_t* cos_bit, const int8_t* stage_range) { +void vp10_fdct4_new_sse4_1(const __m128i *input, __m128i *output, + const int8_t *cos_bit, const int8_t *stage_range) { const int txfm_size = 4; const int num_per_128 = 4; - const int32_t* cospi; + const int32_t *cospi; __m128i buf0[4]; __m128i buf1[4]; int col_num = txfm_size / num_per_128; @@ -53,11 +53,11 @@ void vp10_fdct4_new_sse4_1(const __m128i* input, __m128i* output, } } -void vp10_fdct8_new_sse4_1(const __m128i* input, __m128i* output, - const int8_t* cos_bit, const int8_t* stage_range) { +void vp10_fdct8_new_sse4_1(const __m128i *input, __m128i *output, + const int8_t *cos_bit, const int8_t *stage_range) { const int txfm_size = 8; const int num_per_128 = 4; - const int32_t* cospi; + const int32_t *cospi; __m128i buf0[8]; __m128i buf1[8]; int col_num = txfm_size / num_per_128; @@ -152,11 +152,11 @@ void vp10_fdct8_new_sse4_1(const __m128i* input, __m128i* output, } } -void vp10_fdct16_new_sse4_1(const __m128i* input, __m128i* output, - const int8_t* cos_bit, const int8_t* stage_range) { +void vp10_fdct16_new_sse4_1(const __m128i *input, __m128i *output, + const int8_t *cos_bit, const int8_t *stage_range) { const int txfm_size = 16; const int num_per_128 = 4; - const int32_t* cospi; + const int32_t *cospi; __m128i buf0[16]; __m128i buf1[16]; int col_num = txfm_size / num_per_128; @@ -349,11 +349,11 @@ void vp10_fdct16_new_sse4_1(const __m128i* input, __m128i* output, } } -void vp10_fdct32_new_sse4_1(const __m128i* input, __m128i* output, - const int8_t* cos_bit, const int8_t* stage_range) { +void vp10_fdct32_new_sse4_1(const __m128i *input, __m128i *output, + const int8_t *cos_bit, const int8_t *stage_range) { const int txfm_size = 32; const int num_per_128 = 4; - const int32_t* cospi; + const int32_t *cospi; __m128i buf0[32]; __m128i buf1[32]; int col_num = txfm_size / num_per_128; @@ -764,11 +764,11 @@ void vp10_fdct32_new_sse4_1(const __m128i* input, __m128i* output, } } -void vp10_fadst4_new_sse4_1(const __m128i* input, __m128i* output, - const int8_t* cos_bit, const int8_t* stage_range) { +void vp10_fadst4_new_sse4_1(const __m128i *input, __m128i *output, + const int8_t *cos_bit, const int8_t *stage_range) { const int txfm_size = 4; const int num_per_128 = 4; - const int32_t* cospi; + const int32_t *cospi; __m128i buf0[4]; __m128i buf1[4]; int col_num = txfm_size / num_per_128; @@ -835,11 +835,11 @@ void vp10_fadst4_new_sse4_1(const __m128i* input, __m128i* output, } } -void vp10_fadst8_new_sse4_1(const __m128i* input, __m128i* output, - const int8_t* cos_bit, const int8_t* stage_range) { +void vp10_fadst8_new_sse4_1(const __m128i *input, __m128i *output, + const int8_t *cos_bit, const int8_t *stage_range) { const int txfm_size = 8; const int num_per_128 = 4; - const int32_t* cospi; + const int32_t *cospi; __m128i buf0[8]; __m128i buf1[8]; int col_num = txfm_size / num_per_128; @@ -960,11 +960,11 @@ void vp10_fadst8_new_sse4_1(const __m128i* input, __m128i* output, } } -void vp10_fadst16_new_sse4_1(const __m128i* input, __m128i* output, - const int8_t* cos_bit, const int8_t* stage_range) { +void vp10_fadst16_new_sse4_1(const __m128i *input, __m128i *output, + const int8_t *cos_bit, const int8_t *stage_range) { const int txfm_size = 16; const int num_per_128 = 4; - const int32_t* cospi; + const int32_t *cospi; __m128i buf0[16]; __m128i buf1[16]; int col_num = txfm_size / num_per_128; @@ -1199,11 +1199,11 @@ void vp10_fadst16_new_sse4_1(const __m128i* input, __m128i* output, } } -void vp10_fadst32_new_sse4_1(const __m128i* input, __m128i* output, - const int8_t* cos_bit, const int8_t* stage_range) { +void vp10_fadst32_new_sse4_1(const __m128i *input, __m128i *output, + const int8_t *cos_bit, const int8_t *stage_range) { const int txfm_size = 32; const int num_per_128 = 4; - const int32_t* cospi; + const int32_t *cospi; __m128i buf0[32]; __m128i buf1[32]; int col_num = txfm_size / num_per_128; @@ -1688,11 +1688,11 @@ void vp10_fadst32_new_sse4_1(const __m128i* input, __m128i* output, } } -void vp10_fdct64_new_sse4_1(const __m128i* input, __m128i* output, - const int8_t* cos_bit, const int8_t* stage_range) { +void vp10_fdct64_new_sse4_1(const __m128i *input, __m128i *output, + const int8_t *cos_bit, const int8_t *stage_range) { const int txfm_size = 64; const int num_per_128 = 4; - const int32_t* cospi; + const int32_t *cospi; __m128i buf0[64]; __m128i buf1[64]; int col_num = txfm_size / num_per_128; diff --git a/vp10/common/x86/vp10_fwd_txfm2d_sse4.c b/vp10/common/x86/vp10_fwd_txfm2d_sse4.c index eb9662eb84bdc7554836705faa70122d4a85b27b..4d4e5e6e49c9d45b5a429fe5fe4f2a62f2991675 100644 --- a/vp10/common/x86/vp10_fwd_txfm2d_sse4.c +++ b/vp10/common/x86/vp10_fwd_txfm2d_sse4.c @@ -28,35 +28,16 @@ typedef void (*TxfmFuncSSE2)(const __m128i *input, __m128i *output, static INLINE TxfmFuncSSE2 fwd_txfm_type_to_func(TXFM_TYPE txfm_type) { switch (txfm_type) { - case TXFM_TYPE_DCT4: - return vp10_fdct4_new_sse4_1; - break; - case TXFM_TYPE_DCT8: - return vp10_fdct8_new_sse4_1; - break; - case TXFM_TYPE_DCT16: - return vp10_fdct16_new_sse4_1; - break; - case TXFM_TYPE_DCT32: - return vp10_fdct32_new_sse4_1; - break; - case TXFM_TYPE_DCT64: - return vp10_fdct64_new_sse4_1; - break; - case TXFM_TYPE_ADST4: - return vp10_fadst4_new_sse4_1; - break; - case TXFM_TYPE_ADST8: - return vp10_fadst8_new_sse4_1; - break; - case TXFM_TYPE_ADST16: - return vp10_fadst16_new_sse4_1; - break; - case TXFM_TYPE_ADST32: - return vp10_fadst32_new_sse4_1; - break; - default: - assert(0); + case TXFM_TYPE_DCT4: return vp10_fdct4_new_sse4_1; break; + case TXFM_TYPE_DCT8: return vp10_fdct8_new_sse4_1; break; + case TXFM_TYPE_DCT16: return vp10_fdct16_new_sse4_1; break; + case TXFM_TYPE_DCT32: return vp10_fdct32_new_sse4_1; break; + case TXFM_TYPE_DCT64: return vp10_fdct64_new_sse4_1; break; + case TXFM_TYPE_ADST4: return vp10_fadst4_new_sse4_1; break; + case TXFM_TYPE_ADST8: return vp10_fadst8_new_sse4_1; break; + case TXFM_TYPE_ADST16: return vp10_fadst16_new_sse4_1; break; + case TXFM_TYPE_ADST32: return vp10_fadst32_new_sse4_1; break; + default: assert(0); } return NULL; } diff --git a/vp10/common/x86/vp10_fwd_txfm_impl_sse2.h b/vp10/common/x86/vp10_fwd_txfm_impl_sse2.h index 69889e2e98cdf6d10788db303f3266fbf10a3718..b3bcc4a99488984a4dd61d0f13ce20fea68c5f33 100644 --- a/vp10/common/x86/vp10_fwd_txfm_impl_sse2.h +++ b/vp10/common/x86/vp10_fwd_txfm_impl_sse2.h @@ -43,44 +43,36 @@ void FDCT4x4_2D(const int16_t *input, tran_low_t *output, int stride) { // These are the coefficients used for the multiplies. // In the comments, pN means cos(N pi /64) and mN is -cos(N pi /64), // where cospi_N_64 = cos(N pi /64) - const __m128i k__cospi_A = octa_set_epi16(cospi_16_64, cospi_16_64, - cospi_16_64, cospi_16_64, - cospi_16_64, -cospi_16_64, - cospi_16_64, -cospi_16_64); - const __m128i k__cospi_B = octa_set_epi16(cospi_16_64, -cospi_16_64, - cospi_16_64, -cospi_16_64, - cospi_16_64, cospi_16_64, - cospi_16_64, cospi_16_64); - const __m128i k__cospi_C = octa_set_epi16(cospi_8_64, cospi_24_64, - cospi_8_64, cospi_24_64, - cospi_24_64, -cospi_8_64, - cospi_24_64, -cospi_8_64); - const __m128i k__cospi_D = octa_set_epi16(cospi_24_64, -cospi_8_64, - cospi_24_64, -cospi_8_64, - cospi_8_64, cospi_24_64, - cospi_8_64, cospi_24_64); - const __m128i k__cospi_E = octa_set_epi16(cospi_16_64, cospi_16_64, - cospi_16_64, cospi_16_64, - cospi_16_64, cospi_16_64, - cospi_16_64, cospi_16_64); - const __m128i k__cospi_F = octa_set_epi16(cospi_16_64, -cospi_16_64, - cospi_16_64, -cospi_16_64, - cospi_16_64, -cospi_16_64, - cospi_16_64, -cospi_16_64); - const __m128i k__cospi_G = octa_set_epi16(cospi_8_64, cospi_24_64, - cospi_8_64, cospi_24_64, - -cospi_8_64, -cospi_24_64, - -cospi_8_64, -cospi_24_64); - const __m128i k__cospi_H = octa_set_epi16(cospi_24_64, -cospi_8_64, - cospi_24_64, -cospi_8_64, - -cospi_24_64, cospi_8_64, - -cospi_24_64, cospi_8_64); + const __m128i k__cospi_A = + octa_set_epi16(cospi_16_64, cospi_16_64, cospi_16_64, cospi_16_64, + cospi_16_64, -cospi_16_64, cospi_16_64, -cospi_16_64); + const __m128i k__cospi_B = + octa_set_epi16(cospi_16_64, -cospi_16_64, cospi_16_64, -cospi_16_64, + cospi_16_64, cospi_16_64, cospi_16_64, cospi_16_64); + const __m128i k__cospi_C = + octa_set_epi16(cospi_8_64, cospi_24_64, cospi_8_64, cospi_24_64, + cospi_24_64, -cospi_8_64, cospi_24_64, -cospi_8_64); + const __m128i k__cospi_D = + octa_set_epi16(cospi_24_64, -cospi_8_64, cospi_24_64, -cospi_8_64, + cospi_8_64, cospi_24_64, cospi_8_64, cospi_24_64); + const __m128i k__cospi_E = + octa_set_epi16(cospi_16_64, cospi_16_64, cospi_16_64, cospi_16_64, + cospi_16_64, cospi_16_64, cospi_16_64, cospi_16_64); + const __m128i k__cospi_F = + octa_set_epi16(cospi_16_64, -cospi_16_64, cospi_16_64, -cospi_16_64, + cospi_16_64, -cospi_16_64, cospi_16_64, -cospi_16_64); + const __m128i k__cospi_G = + octa_set_epi16(cospi_8_64, cospi_24_64, cospi_8_64, cospi_24_64, + -cospi_8_64, -cospi_24_64, -cospi_8_64, -cospi_24_64); + const __m128i k__cospi_H = + octa_set_epi16(cospi_24_64, -cospi_8_64, cospi_24_64, -cospi_8_64, + -cospi_24_64, cospi_8_64, -cospi_24_64, cospi_8_64); const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING); // This second rounding constant saves doing some extra adds at the end - const __m128i k__DCT_CONST_ROUNDING2 = _mm_set1_epi32(DCT_CONST_ROUNDING - +(DCT_CONST_ROUNDING << 1)); - const int DCT_CONST_BITS2 = DCT_CONST_BITS + 2; + const __m128i k__DCT_CONST_ROUNDING2 = + _mm_set1_epi32(DCT_CONST_ROUNDING + (DCT_CONST_ROUNDING << 1)); + const int DCT_CONST_BITS2 = DCT_CONST_BITS + 2; const __m128i k__nonzero_bias_a = _mm_setr_epi16(0, 1, 1, 1, 1, 1, 1, 1); const __m128i k__nonzero_bias_b = _mm_setr_epi16(1, 0, 0, 0, 0, 0, 0, 0); __m128i in0, in1; @@ -90,14 +82,14 @@ void FDCT4x4_2D(const int16_t *input, tran_low_t *output, int stride) { #endif // Load inputs. - in0 = _mm_loadl_epi64((const __m128i *)(input + 0 * stride)); - in1 = _mm_loadl_epi64((const __m128i *)(input + 1 * stride)); - in1 = _mm_unpacklo_epi64(in1, _mm_loadl_epi64((const __m128i *) - (input + 2 * stride))); - in0 = _mm_unpacklo_epi64(in0, _mm_loadl_epi64((const __m128i *) - (input + 3 * stride))); - // in0 = [i0 i1 i2 i3 iC iD iE iF] - // in1 = [i4 i5 i6 i7 i8 i9 iA iB] + in0 = _mm_loadl_epi64((const __m128i *)(input + 0 * stride)); + in1 = _mm_loadl_epi64((const __m128i *)(input + 1 * stride)); + in1 = _mm_unpacklo_epi64( + in1, _mm_loadl_epi64((const __m128i *)(input + 2 * stride))); + in0 = _mm_unpacklo_epi64( + in0, _mm_loadl_epi64((const __m128i *)(input + 3 * stride))); +// in0 = [i0 i1 i2 i3 iC iD iE iF] +// in1 = [i4 i5 i6 i7 i8 i9 iA iB] #if DCT_HIGH_BIT_DEPTH // Check inputs small enough to use optimised code cmp0 = _mm_xor_si128(_mm_cmpgt_epi16(in0, _mm_set1_epi16(0x3ff)), @@ -192,10 +184,10 @@ void FDCT4x4_2D(const int16_t *input, tran_low_t *output, int stride) { // vertical DCTs finished. Now we do the horizontal DCTs. // Stage 3: Add/subtract - const __m128i t0 = ADD_EPI16(in0, in1); - const __m128i t1 = SUB_EPI16(in0, in1); // t0 = [c0 c1 c8 c9 c4 c5 cC cD] // t1 = [c3 c2 cB cA -c7 -c6 -cF -cE] + const __m128i t0 = ADD_EPI16(in0, in1); + const __m128i t1 = SUB_EPI16(in0, in1); #if DCT_HIGH_BIT_DEPTH overflow = check_epi16_overflow_x2(&t0, &t1); if (overflow) { @@ -263,7 +255,6 @@ void FDCT4x4_2D(const int16_t *input, tran_low_t *output, int stride) { storeu_output(&in1, output + 2 * 4); } - void FDCT8x8_2D(const int16_t *input, tran_low_t *output, int stride) { int pass; // Constants @@ -283,14 +274,14 @@ void FDCT8x8_2D(const int16_t *input, tran_low_t *output, int stride) { int overflow; #endif // Load input - __m128i in0 = _mm_load_si128((const __m128i *)(input + 0 * stride)); - __m128i in1 = _mm_load_si128((const __m128i *)(input + 1 * stride)); - __m128i in2 = _mm_load_si128((const __m128i *)(input + 2 * stride)); - __m128i in3 = _mm_load_si128((const __m128i *)(input + 3 * stride)); - __m128i in4 = _mm_load_si128((const __m128i *)(input + 4 * stride)); - __m128i in5 = _mm_load_si128((const __m128i *)(input + 5 * stride)); - __m128i in6 = _mm_load_si128((const __m128i *)(input + 6 * stride)); - __m128i in7 = _mm_load_si128((const __m128i *)(input + 7 * stride)); + __m128i in0 = _mm_load_si128((const __m128i *)(input + 0 * stride)); + __m128i in1 = _mm_load_si128((const __m128i *)(input + 1 * stride)); + __m128i in2 = _mm_load_si128((const __m128i *)(input + 2 * stride)); + __m128i in3 = _mm_load_si128((const __m128i *)(input + 3 * stride)); + __m128i in4 = _mm_load_si128((const __m128i *)(input + 4 * stride)); + __m128i in5 = _mm_load_si128((const __m128i *)(input + 5 * stride)); + __m128i in6 = _mm_load_si128((const __m128i *)(input + 6 * stride)); + __m128i in7 = _mm_load_si128((const __m128i *)(input + 7 * stride)); // Pre-condition input (shift by two) in0 = _mm_slli_epi16(in0, 2); in1 = _mm_slli_epi16(in1, 2); @@ -319,8 +310,8 @@ void FDCT8x8_2D(const int16_t *input, tran_low_t *output, int stride) { const __m128i q7 = SUB_EPI16(in0, in7); #if DCT_HIGH_BIT_DEPTH if (pass == 1) { - overflow = check_epi16_overflow_x8(&q0, &q1, &q2, &q3, - &q4, &q5, &q6, &q7); + overflow = + check_epi16_overflow_x8(&q0, &q1, &q2, &q3, &q4, &q5, &q6, &q7); if (overflow) { vpx_highbd_fdct8x8_c(input, output, stride); return; @@ -630,22 +621,22 @@ void FDCT16x16_2D(const int16_t *input, tran_low_t *output, int stride) { __m128i res08, res09, res10, res11, res12, res13, res14, res15; // Load and pre-condition input. if (0 == pass) { - in00 = _mm_load_si128((const __m128i *)(in + 0 * stride)); - in01 = _mm_load_si128((const __m128i *)(in + 1 * stride)); - in02 = _mm_load_si128((const __m128i *)(in + 2 * stride)); - in03 = _mm_load_si128((const __m128i *)(in + 3 * stride)); - in04 = _mm_load_si128((const __m128i *)(in + 4 * stride)); - in05 = _mm_load_si128((const __m128i *)(in + 5 * stride)); - in06 = _mm_load_si128((const __m128i *)(in + 6 * stride)); - in07 = _mm_load_si128((const __m128i *)(in + 7 * stride)); - in08 = _mm_load_si128((const __m128i *)(in + 8 * stride)); - in09 = _mm_load_si128((const __m128i *)(in + 9 * stride)); - in10 = _mm_load_si128((const __m128i *)(in + 10 * stride)); - in11 = _mm_load_si128((const __m128i *)(in + 11 * stride)); - in12 = _mm_load_si128((const __m128i *)(in + 12 * stride)); - in13 = _mm_load_si128((const __m128i *)(in + 13 * stride)); - in14 = _mm_load_si128((const __m128i *)(in + 14 * stride)); - in15 = _mm_load_si128((const __m128i *)(in + 15 * stride)); + in00 = _mm_load_si128((const __m128i *)(in + 0 * stride)); + in01 = _mm_load_si128((const __m128i *)(in + 1 * stride)); + in02 = _mm_load_si128((const __m128i *)(in + 2 * stride)); + in03 = _mm_load_si128((const __m128i *)(in + 3 * stride)); + in04 = _mm_load_si128((const __m128i *)(in + 4 * stride)); + in05 = _mm_load_si128((const __m128i *)(in + 5 * stride)); + in06 = _mm_load_si128((const __m128i *)(in + 6 * stride)); + in07 = _mm_load_si128((const __m128i *)(in + 7 * stride)); + in08 = _mm_load_si128((const __m128i *)(in + 8 * stride)); + in09 = _mm_load_si128((const __m128i *)(in + 9 * stride)); + in10 = _mm_load_si128((const __m128i *)(in + 10 * stride)); + in11 = _mm_load_si128((const __m128i *)(in + 11 * stride)); + in12 = _mm_load_si128((const __m128i *)(in + 12 * stride)); + in13 = _mm_load_si128((const __m128i *)(in + 13 * stride)); + in14 = _mm_load_si128((const __m128i *)(in + 14 * stride)); + in15 = _mm_load_si128((const __m128i *)(in + 15 * stride)); // x = x << 2 in00 = _mm_slli_epi16(in00, 2); in01 = _mm_slli_epi16(in01, 2); @@ -664,22 +655,22 @@ void FDCT16x16_2D(const int16_t *input, tran_low_t *output, int stride) { in14 = _mm_slli_epi16(in14, 2); in15 = _mm_slli_epi16(in15, 2); } else { - in00 = _mm_load_si128((const __m128i *)(in + 0 * 16)); - in01 = _mm_load_si128((const __m128i *)(in + 1 * 16)); - in02 = _mm_load_si128((const __m128i *)(in + 2 * 16)); - in03 = _mm_load_si128((const __m128i *)(in + 3 * 16)); - in04 = _mm_load_si128((const __m128i *)(in + 4 * 16)); - in05 = _mm_load_si128((const __m128i *)(in + 5 * 16)); - in06 = _mm_load_si128((const __m128i *)(in + 6 * 16)); - in07 = _mm_load_si128((const __m128i *)(in + 7 * 16)); - in08 = _mm_load_si128((const __m128i *)(in + 8 * 16)); - in09 = _mm_load_si128((const __m128i *)(in + 9 * 16)); - in10 = _mm_load_si128((const __m128i *)(in + 10 * 16)); - in11 = _mm_load_si128((const __m128i *)(in + 11 * 16)); - in12 = _mm_load_si128((const __m128i *)(in + 12 * 16)); - in13 = _mm_load_si128((const __m128i *)(in + 13 * 16)); - in14 = _mm_load_si128((const __m128i *)(in + 14 * 16)); - in15 = _mm_load_si128((const __m128i *)(in + 15 * 16)); + in00 = _mm_load_si128((const __m128i *)(in + 0 * 16)); + in01 = _mm_load_si128((const __m128i *)(in + 1 * 16)); + in02 = _mm_load_si128((const __m128i *)(in + 2 * 16)); + in03 = _mm_load_si128((const __m128i *)(in + 3 * 16)); + in04 = _mm_load_si128((const __m128i *)(in + 4 * 16)); + in05 = _mm_load_si128((const __m128i *)(in + 5 * 16)); + in06 = _mm_load_si128((const __m128i *)(in + 6 * 16)); + in07 = _mm_load_si128((const __m128i *)(in + 7 * 16)); + in08 = _mm_load_si128((const __m128i *)(in + 8 * 16)); + in09 = _mm_load_si128((const __m128i *)(in + 9 * 16)); + in10 = _mm_load_si128((const __m128i *)(in + 10 * 16)); + in11 = _mm_load_si128((const __m128i *)(in + 11 * 16)); + in12 = _mm_load_si128((const __m128i *)(in + 12 * 16)); + in13 = _mm_load_si128((const __m128i *)(in + 13 * 16)); + in14 = _mm_load_si128((const __m128i *)(in + 14 * 16)); + in15 = _mm_load_si128((const __m128i *)(in + 15 * 16)); // x = (x + 1) >> 2 in00 = _mm_add_epi16(in00, kOne); in01 = _mm_add_epi16(in01, kOne); @@ -745,10 +736,9 @@ void FDCT16x16_2D(const int16_t *input, tran_low_t *output, int stride) { step1_6 = SUB_EPI16(in01, in14); step1_7 = SUB_EPI16(in00, in15); #if DCT_HIGH_BIT_DEPTH - overflow = check_epi16_overflow_x8(&step1_0, &step1_1, - &step1_2, &step1_3, - &step1_4, &step1_5, - &step1_6, &step1_7); + overflow = + check_epi16_overflow_x8(&step1_0, &step1_1, &step1_2, &step1_3, + &step1_4, &step1_5, &step1_6, &step1_7); if (overflow) { vpx_highbd_fdct16x16_c(input, output, stride); return; @@ -767,8 +757,8 @@ void FDCT16x16_2D(const int16_t *input, tran_low_t *output, int stride) { const __m128i q6 = SUB_EPI16(input1, input6); const __m128i q7 = SUB_EPI16(input0, input7); #if DCT_HIGH_BIT_DEPTH - overflow = check_epi16_overflow_x8(&q0, &q1, &q2, &q3, - &q4, &q5, &q6, &q7); + overflow = + check_epi16_overflow_x8(&q0, &q1, &q2, &q3, &q4, &q5, &q6, &q7); if (overflow) { vpx_highbd_fdct16x16_c(input, output, stride); return; @@ -818,12 +808,12 @@ void FDCT16x16_2D(const int16_t *input, tran_low_t *output, int stride) { // into 32 bits. const __m128i d0 = _mm_unpacklo_epi16(q6, q5); const __m128i d1 = _mm_unpackhi_epi16(q6, q5); - const __m128i r0 = mult_round_shift(&d0, &d1, &k__cospi_p16_m16, - &k__DCT_CONST_ROUNDING, - DCT_CONST_BITS); - const __m128i r1 = mult_round_shift(&d0, &d1, &k__cospi_p16_p16, - &k__DCT_CONST_ROUNDING, - DCT_CONST_BITS); + const __m128i r0 = + mult_round_shift(&d0, &d1, &k__cospi_p16_m16, + &k__DCT_CONST_ROUNDING, DCT_CONST_BITS); + const __m128i r1 = + mult_round_shift(&d0, &d1, &k__cospi_p16_p16, + &k__DCT_CONST_ROUNDING, DCT_CONST_BITS); #if DCT_HIGH_BIT_DEPTH overflow = check_epi16_overflow_x2(&r0, &r1); if (overflow) { @@ -860,8 +850,8 @@ void FDCT16x16_2D(const int16_t *input, tran_low_t *output, int stride) { res06 = mult_round_shift(&t2, &t3, &k__cospi_m20_p12, &k__DCT_CONST_ROUNDING, DCT_CONST_BITS); #if DCT_HIGH_BIT_DEPTH - overflow = check_epi16_overflow_x4(&res02, &res14, - &res10, &res06); + overflow = + check_epi16_overflow_x4(&res02, &res14, &res10, &res06); if (overflow) { vpx_highbd_fdct16x16_c(input, output, stride); return; @@ -888,8 +878,8 @@ void FDCT16x16_2D(const int16_t *input, tran_low_t *output, int stride) { step2_4 = mult_round_shift(&t2, &t3, &k__cospi_p16_p16, &k__DCT_CONST_ROUNDING, DCT_CONST_BITS); #if DCT_HIGH_BIT_DEPTH - overflow = check_epi16_overflow_x4(&step2_2, &step2_3, &step2_5, - &step2_4); + overflow = + check_epi16_overflow_x4(&step2_2, &step2_3, &step2_5, &step2_4); if (overflow) { vpx_highbd_fdct16x16_c(input, output, stride); return; @@ -907,10 +897,9 @@ void FDCT16x16_2D(const int16_t *input, tran_low_t *output, int stride) { step3_6 = ADD_EPI16(step1_6, step2_5); step3_7 = ADD_EPI16(step1_7, step2_4); #if DCT_HIGH_BIT_DEPTH - overflow = check_epi16_overflow_x8(&step3_0, &step3_1, - &step3_2, &step3_3, - &step3_4, &step3_5, - &step3_6, &step3_7); + overflow = + check_epi16_overflow_x8(&step3_0, &step3_1, &step3_2, &step3_3, + &step3_4, &step3_5, &step3_6, &step3_7); if (overflow) { vpx_highbd_fdct16x16_c(input, output, stride); return; @@ -932,8 +921,8 @@ void FDCT16x16_2D(const int16_t *input, tran_low_t *output, int stride) { step2_5 = mult_round_shift(&t2, &t3, &k__cospi_p08_m24, &k__DCT_CONST_ROUNDING, DCT_CONST_BITS); #if DCT_HIGH_BIT_DEPTH - overflow = check_epi16_overflow_x4(&step2_1, &step2_2, &step2_6, - &step2_5); + overflow = + check_epi16_overflow_x4(&step2_1, &step2_2, &step2_6, &step2_5); if (overflow) { vpx_highbd_fdct16x16_c(input, output, stride); return; @@ -951,10 +940,9 @@ void FDCT16x16_2D(const int16_t *input, tran_low_t *output, int stride) { step1_6 = SUB_EPI16(step3_7, step2_6); step1_7 = ADD_EPI16(step3_7, step2_6); #if DCT_HIGH_BIT_DEPTH - overflow = check_epi16_overflow_x8(&step1_0, &step1_1, - &step1_2, &step1_3, - &step1_4, &step1_5, - &step1_6, &step1_7); + overflow = + check_epi16_overflow_x8(&step1_0, &step1_1, &step1_2, &step1_3, + &step1_4, &step1_5, &step1_6, &step1_7); if (overflow) { vpx_highbd_fdct16x16_c(input, output, stride); return; @@ -1006,16 +994,14 @@ void FDCT16x16_2D(const int16_t *input, tran_low_t *output, int stride) { } } // Transpose the results, do it as two 8x8 transposes. - transpose_and_output8x8(&res00, &res01, &res02, &res03, - &res04, &res05, &res06, &res07, - pass, out0, out1); - transpose_and_output8x8(&res08, &res09, &res10, &res11, - &res12, &res13, &res14, &res15, - pass, out0 + 8, out1 + 8); + transpose_and_output8x8(&res00, &res01, &res02, &res03, &res04, &res05, + &res06, &res07, pass, out0, out1); + transpose_and_output8x8(&res08, &res09, &res10, &res11, &res12, &res13, + &res14, &res15, pass, out0 + 8, out1 + 8); if (pass == 0) { - out0 += 8*16; + out0 += 8 * 16; } else { - out1 += 8*16; + out1 += 8 * 16; } } // Setup in/out for next pass. diff --git a/vp10/common/x86/vp10_fwd_txfm_sse2.c b/vp10/common/x86/vp10_fwd_txfm_sse2.c index 30bce5f7fc83c56096c20653b628a9a95e212ad9..015e86ec429c0a2c9b7e1c30af8cfa313950956c 100644 --- a/vp10/common/x86/vp10_fwd_txfm_sse2.c +++ b/vp10/common/x86/vp10_fwd_txfm_sse2.c @@ -19,12 +19,12 @@ void vp10_fdct4x4_1_sse2(const int16_t *input, tran_low_t *output, int stride) { __m128i in0, in1; __m128i tmp; const __m128i zero = _mm_setzero_si128(); - in0 = _mm_loadl_epi64((const __m128i *)(input + 0 * stride)); - in1 = _mm_loadl_epi64((const __m128i *)(input + 1 * stride)); - in1 = _mm_unpacklo_epi64(in1, _mm_loadl_epi64((const __m128i *) - (input + 2 * stride))); - in0 = _mm_unpacklo_epi64(in0, _mm_loadl_epi64((const __m128i *) - (input + 3 * stride))); + in0 = _mm_loadl_epi64((const __m128i *)(input + 0 * stride)); + in1 = _mm_loadl_epi64((const __m128i *)(input + 1 * stride)); + in1 = _mm_unpacklo_epi64( + in1, _mm_loadl_epi64((const __m128i *)(input + 2 * stride))); + in0 = _mm_unpacklo_epi64( + in0, _mm_loadl_epi64((const __m128i *)(input + 3 * stride))); tmp = _mm_add_epi16(in0, in1); in0 = _mm_unpacklo_epi16(zero, tmp); @@ -45,19 +45,19 @@ void vp10_fdct4x4_1_sse2(const int16_t *input, tran_low_t *output, int stride) { } void vp10_fdct8x8_1_sse2(const int16_t *input, tran_low_t *output, int stride) { - __m128i in0 = _mm_load_si128((const __m128i *)(input + 0 * stride)); - __m128i in1 = _mm_load_si128((const __m128i *)(input + 1 * stride)); - __m128i in2 = _mm_load_si128((const __m128i *)(input + 2 * stride)); - __m128i in3 = _mm_load_si128((const __m128i *)(input + 3 * stride)); + __m128i in0 = _mm_load_si128((const __m128i *)(input + 0 * stride)); + __m128i in1 = _mm_load_si128((const __m128i *)(input + 1 * stride)); + __m128i in2 = _mm_load_si128((const __m128i *)(input + 2 * stride)); + __m128i in3 = _mm_load_si128((const __m128i *)(input + 3 * stride)); __m128i u0, u1, sum; u0 = _mm_add_epi16(in0, in1); u1 = _mm_add_epi16(in2, in3); - in0 = _mm_load_si128((const __m128i *)(input + 4 * stride)); - in1 = _mm_load_si128((const __m128i *)(input + 5 * stride)); - in2 = _mm_load_si128((const __m128i *)(input + 6 * stride)); - in3 = _mm_load_si128((const __m128i *)(input + 7 * stride)); + in0 = _mm_load_si128((const __m128i *)(input + 4 * stride)); + in1 = _mm_load_si128((const __m128i *)(input + 5 * stride)); + in2 = _mm_load_si128((const __m128i *)(input + 6 * stride)); + in3 = _mm_load_si128((const __m128i *)(input + 7 * stride)); sum = _mm_add_epi16(u0, u1); @@ -65,7 +65,7 @@ void vp10_fdct8x8_1_sse2(const int16_t *input, tran_low_t *output, int stride) { in2 = _mm_add_epi16(in2, in3); sum = _mm_add_epi16(sum, in0); - u0 = _mm_setzero_si128(); + u0 = _mm_setzero_si128(); sum = _mm_add_epi16(sum, in2); in0 = _mm_unpacklo_epi16(u0, sum); @@ -85,7 +85,7 @@ void vp10_fdct8x8_1_sse2(const int16_t *input, tran_low_t *output, int stride) { } void vp10_fdct16x16_1_sse2(const int16_t *input, tran_low_t *output, - int stride) { + int stride) { __m128i in0, in1, in2, in3; __m128i u0, u1; __m128i sum = _mm_setzero_si128(); @@ -93,49 +93,49 @@ void vp10_fdct16x16_1_sse2(const int16_t *input, tran_low_t *output, for (i = 0; i < 2; ++i) { input += 8 * i; - in0 = _mm_load_si128((const __m128i *)(input + 0 * stride)); - in1 = _mm_load_si128((const __m128i *)(input + 1 * stride)); - in2 = _mm_load_si128((const __m128i *)(input + 2 * stride)); - in3 = _mm_load_si128((const __m128i *)(input + 3 * stride)); + in0 = _mm_load_si128((const __m128i *)(input + 0 * stride)); + in1 = _mm_load_si128((const __m128i *)(input + 1 * stride)); + in2 = _mm_load_si128((const __m128i *)(input + 2 * stride)); + in3 = _mm_load_si128((const __m128i *)(input + 3 * stride)); u0 = _mm_add_epi16(in0, in1); u1 = _mm_add_epi16(in2, in3); sum = _mm_add_epi16(sum, u0); - in0 = _mm_load_si128((const __m128i *)(input + 4 * stride)); - in1 = _mm_load_si128((const __m128i *)(input + 5 * stride)); - in2 = _mm_load_si128((const __m128i *)(input + 6 * stride)); - in3 = _mm_load_si128((const __m128i *)(input + 7 * stride)); + in0 = _mm_load_si128((const __m128i *)(input + 4 * stride)); + in1 = _mm_load_si128((const __m128i *)(input + 5 * stride)); + in2 = _mm_load_si128((const __m128i *)(input + 6 * stride)); + in3 = _mm_load_si128((const __m128i *)(input + 7 * stride)); sum = _mm_add_epi16(sum, u1); - u0 = _mm_add_epi16(in0, in1); - u1 = _mm_add_epi16(in2, in3); + u0 = _mm_add_epi16(in0, in1); + u1 = _mm_add_epi16(in2, in3); sum = _mm_add_epi16(sum, u0); - in0 = _mm_load_si128((const __m128i *)(input + 8 * stride)); - in1 = _mm_load_si128((const __m128i *)(input + 9 * stride)); - in2 = _mm_load_si128((const __m128i *)(input + 10 * stride)); - in3 = _mm_load_si128((const __m128i *)(input + 11 * stride)); + in0 = _mm_load_si128((const __m128i *)(input + 8 * stride)); + in1 = _mm_load_si128((const __m128i *)(input + 9 * stride)); + in2 = _mm_load_si128((const __m128i *)(input + 10 * stride)); + in3 = _mm_load_si128((const __m128i *)(input + 11 * stride)); sum = _mm_add_epi16(sum, u1); - u0 = _mm_add_epi16(in0, in1); - u1 = _mm_add_epi16(in2, in3); + u0 = _mm_add_epi16(in0, in1); + u1 = _mm_add_epi16(in2, in3); sum = _mm_add_epi16(sum, u0); - in0 = _mm_load_si128((const __m128i *)(input + 12 * stride)); - in1 = _mm_load_si128((const __m128i *)(input + 13 * stride)); - in2 = _mm_load_si128((const __m128i *)(input + 14 * stride)); - in3 = _mm_load_si128((const __m128i *)(input + 15 * stride)); + in0 = _mm_load_si128((const __m128i *)(input + 12 * stride)); + in1 = _mm_load_si128((const __m128i *)(input + 13 * stride)); + in2 = _mm_load_si128((const __m128i *)(input + 14 * stride)); + in3 = _mm_load_si128((const __m128i *)(input + 15 * stride)); sum = _mm_add_epi16(sum, u1); - u0 = _mm_add_epi16(in0, in1); - u1 = _mm_add_epi16(in2, in3); + u0 = _mm_add_epi16(in0, in1); + u1 = _mm_add_epi16(in2, in3); sum = _mm_add_epi16(sum, u0); sum = _mm_add_epi16(sum, u1); } - u0 = _mm_setzero_si128(); + u0 = _mm_setzero_si128(); in0 = _mm_unpacklo_epi16(u0, sum); in1 = _mm_unpackhi_epi16(u0, sum); in0 = _mm_srai_epi32(in0, 16); @@ -154,60 +154,60 @@ void vp10_fdct16x16_1_sse2(const int16_t *input, tran_low_t *output, } void vp10_fdct32x32_1_sse2(const int16_t *input, tran_low_t *output, - int stride) { + int stride) { __m128i in0, in1, in2, in3; __m128i u0, u1; __m128i sum = _mm_setzero_si128(); int i; for (i = 0; i < 8; ++i) { - in0 = _mm_load_si128((const __m128i *)(input + 0)); - in1 = _mm_load_si128((const __m128i *)(input + 8)); - in2 = _mm_load_si128((const __m128i *)(input + 16)); - in3 = _mm_load_si128((const __m128i *)(input + 24)); + in0 = _mm_load_si128((const __m128i *)(input + 0)); + in1 = _mm_load_si128((const __m128i *)(input + 8)); + in2 = _mm_load_si128((const __m128i *)(input + 16)); + in3 = _mm_load_si128((const __m128i *)(input + 24)); input += stride; u0 = _mm_add_epi16(in0, in1); u1 = _mm_add_epi16(in2, in3); sum = _mm_add_epi16(sum, u0); - in0 = _mm_load_si128((const __m128i *)(input + 0)); - in1 = _mm_load_si128((const __m128i *)(input + 8)); - in2 = _mm_load_si128((const __m128i *)(input + 16)); - in3 = _mm_load_si128((const __m128i *)(input + 24)); + in0 = _mm_load_si128((const __m128i *)(input + 0)); + in1 = _mm_load_si128((const __m128i *)(input + 8)); + in2 = _mm_load_si128((const __m128i *)(input + 16)); + in3 = _mm_load_si128((const __m128i *)(input + 24)); input += stride; sum = _mm_add_epi16(sum, u1); - u0 = _mm_add_epi16(in0, in1); - u1 = _mm_add_epi16(in2, in3); + u0 = _mm_add_epi16(in0, in1); + u1 = _mm_add_epi16(in2, in3); sum = _mm_add_epi16(sum, u0); - in0 = _mm_load_si128((const __m128i *)(input + 0)); - in1 = _mm_load_si128((const __m128i *)(input + 8)); - in2 = _mm_load_si128((const __m128i *)(input + 16)); - in3 = _mm_load_si128((const __m128i *)(input + 24)); + in0 = _mm_load_si128((const __m128i *)(input + 0)); + in1 = _mm_load_si128((const __m128i *)(input + 8)); + in2 = _mm_load_si128((const __m128i *)(input + 16)); + in3 = _mm_load_si128((const __m128i *)(input + 24)); input += stride; sum = _mm_add_epi16(sum, u1); - u0 = _mm_add_epi16(in0, in1); - u1 = _mm_add_epi16(in2, in3); + u0 = _mm_add_epi16(in0, in1); + u1 = _mm_add_epi16(in2, in3); sum = _mm_add_epi16(sum, u0); - in0 = _mm_load_si128((const __m128i *)(input + 0)); - in1 = _mm_load_si128((const __m128i *)(input + 8)); - in2 = _mm_load_si128((const __m128i *)(input + 16)); - in3 = _mm_load_si128((const __m128i *)(input + 24)); + in0 = _mm_load_si128((const __m128i *)(input + 0)); + in1 = _mm_load_si128((const __m128i *)(input + 8)); + in2 = _mm_load_si128((const __m128i *)(input + 16)); + in3 = _mm_load_si128((const __m128i *)(input + 24)); input += stride; sum = _mm_add_epi16(sum, u1); - u0 = _mm_add_epi16(in0, in1); - u1 = _mm_add_epi16(in2, in3); + u0 = _mm_add_epi16(in0, in1); + u1 = _mm_add_epi16(in2, in3); sum = _mm_add_epi16(sum, u0); sum = _mm_add_epi16(sum, u1); } - u0 = _mm_setzero_si128(); + u0 = _mm_setzero_si128(); in0 = _mm_unpacklo_epi16(u0, sum); in1 = _mm_unpackhi_epi16(u0, sum); in0 = _mm_srai_epi32(in0, 16); @@ -230,43 +230,43 @@ void vp10_fdct32x32_1_sse2(const int16_t *input, tran_low_t *output, #define FDCT8x8_2D vp10_fdct8x8_sse2 #define FDCT16x16_2D vp10_fdct16x16_sse2 #include "vp10/common/x86/vp10_fwd_txfm_impl_sse2.h" -#undef FDCT4x4_2D -#undef FDCT8x8_2D -#undef FDCT16x16_2D +#undef FDCT4x4_2D +#undef FDCT8x8_2D +#undef FDCT16x16_2D #define FDCT32x32_2D vp10_fdct32x32_rd_sse2 #define FDCT32x32_HIGH_PRECISION 0 #include "vp10/common/x86/vp10_fwd_dct32x32_impl_sse2.h" -#undef FDCT32x32_2D -#undef FDCT32x32_HIGH_PRECISION +#undef FDCT32x32_2D +#undef FDCT32x32_HIGH_PRECISION #define FDCT32x32_2D vp10_fdct32x32_sse2 #define FDCT32x32_HIGH_PRECISION 1 #include "vp10/common/x86/vp10_fwd_dct32x32_impl_sse2.h" // NOLINT -#undef FDCT32x32_2D -#undef FDCT32x32_HIGH_PRECISION -#undef DCT_HIGH_BIT_DEPTH +#undef FDCT32x32_2D +#undef FDCT32x32_HIGH_PRECISION +#undef DCT_HIGH_BIT_DEPTH #if CONFIG_VP9_HIGHBITDEPTH #define DCT_HIGH_BIT_DEPTH 1 #define FDCT4x4_2D vp10_highbd_fdct4x4_sse2 #define FDCT8x8_2D vp10_highbd_fdct8x8_sse2 #define FDCT16x16_2D vp10_highbd_fdct16x16_sse2 -#include "vp10/common/x86/vp10_fwd_txfm_impl_sse2.h" // NOLINT -#undef FDCT4x4_2D -#undef FDCT8x8_2D -#undef FDCT16x16_2D +#include "vp10/common/x86/vp10_fwd_txfm_impl_sse2.h" // NOLINT +#undef FDCT4x4_2D +#undef FDCT8x8_2D +#undef FDCT16x16_2D #define FDCT32x32_2D vp10_highbd_fdct32x32_rd_sse2 #define FDCT32x32_HIGH_PRECISION 0 -#include "vp10/common/x86/vp10_fwd_dct32x32_impl_sse2.h" // NOLINT -#undef FDCT32x32_2D -#undef FDCT32x32_HIGH_PRECISION +#include "vp10/common/x86/vp10_fwd_dct32x32_impl_sse2.h" // NOLINT +#undef FDCT32x32_2D +#undef FDCT32x32_HIGH_PRECISION #define FDCT32x32_2D vp10_highbd_fdct32x32_sse2 #define FDCT32x32_HIGH_PRECISION 1 -#include "vp10/common/x86/vp10_fwd_dct32x32_impl_sse2.h" // NOLINT -#undef FDCT32x32_2D -#undef FDCT32x32_HIGH_PRECISION -#undef DCT_HIGH_BIT_DEPTH +#include "vp10/common/x86/vp10_fwd_dct32x32_impl_sse2.h" // NOLINT +#undef FDCT32x32_2D +#undef FDCT32x32_HIGH_PRECISION +#undef DCT_HIGH_BIT_DEPTH #endif // CONFIG_VP9_HIGHBITDEPTH diff --git a/vp10/common/x86/vp10_highbd_convolve_filters_sse4.c b/vp10/common/x86/vp10_highbd_convolve_filters_sse4.c index 0251022197d2fd5ba42e6dfbc44be4606c471097..ed043455f767161ba87313548a405c58a5c1785b 100644 --- a/vp10/common/x86/vp10_highbd_convolve_filters_sse4.c +++ b/vp10/common/x86/vp10_highbd_convolve_filters_sse4.c @@ -13,126 +13,126 @@ #if CONFIG_VP9_HIGHBITDEPTH #if CONFIG_EXT_INTERP DECLARE_ALIGNED(16, const int16_t, - sub_pel_filters_10sharp_highbd_ver_signal_dir[15][6][8]) = { + sub_pel_filters_10sharp_highbd_ver_signal_dir[15][6][8]) = { { - { 0, 0, 0, 0, 0, 0, 0, 0, }, - { -1, 3, -1, 3, -1, 3, -1, 3, }, - { -6, 127, -6, 127, -6, 127, -6, 127, }, - { 8, -4, 8, -4, 8, -4, 8, -4, }, - { 2, -1, 2, -1, 2, -1, 2, -1, }, - { 0, 0, 0, 0, 0, 0, 0, 0, }, + { 0, 0, 0, 0, 0, 0, 0, 0 }, + { -1, 3, -1, 3, -1, 3, -1, 3 }, + { -6, 127, -6, 127, -6, 127, -6, 127 }, + { 8, -4, 8, -4, 8, -4, 8, -4 }, + { 2, -1, 2, -1, 2, -1, 2, -1 }, + { 0, 0, 0, 0, 0, 0, 0, 0 }, }, { - { 0, 1, 0, 1, 0, 1, 0, 1, }, - { -2, 5, -2, 5, -2, 5, -2, 5, }, - {-12, 124, -12, 124, -12, 124, -12, 124, }, - { 18, -7, 18, -7, 18, -7, 18, -7, }, - { 3, -2, 3, -2, 3, -2, 3, -2, }, - { 0, 0, 0, 0, 0, 0, 0, 0, }, + { 0, 1, 0, 1, 0, 1, 0, 1 }, + { -2, 5, -2, 5, -2, 5, -2, 5 }, + { -12, 124, -12, 124, -12, 124, -12, 124 }, + { 18, -7, 18, -7, 18, -7, 18, -7 }, + { 3, -2, 3, -2, 3, -2, 3, -2 }, + { 0, 0, 0, 0, 0, 0, 0, 0 }, }, { - { 0, 1, 0, 1, 0, 1, 0, 1, }, - { -3, 7, -3, 7, -3, 7, -3, 7, }, - {-17, 119, -17, 119, -17, 119, -17, 119, }, - { 28, -11, 28, -11, 28, -11, 28, -11, }, - { 5, -2, 5, -2, 5, -2, 5, -2, }, - { 1, 0, 1, 0, 1, 0, 1, 0, }, + { 0, 1, 0, 1, 0, 1, 0, 1 }, + { -3, 7, -3, 7, -3, 7, -3, 7 }, + { -17, 119, -17, 119, -17, 119, -17, 119 }, + { 28, -11, 28, -11, 28, -11, 28, -11 }, + { 5, -2, 5, -2, 5, -2, 5, -2 }, + { 1, 0, 1, 0, 1, 0, 1, 0 }, }, { - { 0, 1, 0, 1, 0, 1, 0, 1, }, - { -4, 8, -4, 8, -4, 8, -4, 8, }, - {-20, 114, -20, 114, -20, 114, -20, 114, }, - { 38, -14, 38, -14, 38, -14, 38, -14, }, - { 7, -3, 7, -3, 7, -3, 7, -3, }, - { 1, 0, 1, 0, 1, 0, 1, 0, }, + { 0, 1, 0, 1, 0, 1, 0, 1 }, + { -4, 8, -4, 8, -4, 8, -4, 8 }, + { -20, 114, -20, 114, -20, 114, -20, 114 }, + { 38, -14, 38, -14, 38, -14, 38, -14 }, + { 7, -3, 7, -3, 7, -3, 7, -3 }, + { 1, 0, 1, 0, 1, 0, 1, 0 }, }, { - { 0, 1, 0, 1, 0, 1, 0, 1, }, - { -4, 9, -4, 9, -4, 9, -4, 9, }, - {-22, 107, -22, 107, -22, 107, -22, 107, }, - { 49, -17, 49, -17, 49, -17, 49, -17, }, - { 8, -4, 8, -4, 8, -4, 8, -4, }, - { 1, 0, 1, 0, 1, 0, 1, 0, }, + { 0, 1, 0, 1, 0, 1, 0, 1 }, + { -4, 9, -4, 9, -4, 9, -4, 9 }, + { -22, 107, -22, 107, -22, 107, -22, 107 }, + { 49, -17, 49, -17, 49, -17, 49, -17 }, + { 8, -4, 8, -4, 8, -4, 8, -4 }, + { 1, 0, 1, 0, 1, 0, 1, 0 }, }, { - { 0, 2, 0, 2, 0, 2, 0, 2, }, - { -5, 10, -5, 10, -5, 10, -5, 10, }, - {-24, 99, -24, 99, -24, 99, -24, 99, }, - { 59, -20, 59, -20, 59, -20, 59, -20, }, - { 9, -4, 9, -4, 9, -4, 9, -4, }, - { 2, 0, 2, 0, 2, 0, 2, 0, }, + { 0, 2, 0, 2, 0, 2, 0, 2 }, + { -5, 10, -5, 10, -5, 10, -5, 10 }, + { -24, 99, -24, 99, -24, 99, -24, 99 }, + { 59, -20, 59, -20, 59, -20, 59, -20 }, + { 9, -4, 9, -4, 9, -4, 9, -4 }, + { 2, 0, 2, 0, 2, 0, 2, 0 }, }, { - { 0, 2, 0, 2, 0, 2, 0, 2, }, - { -5, 10, -5, 10, -5, 10, -5, 10, }, - {-24, 90, -24, 90, -24, 90, -24, 90, }, - { 70, -22, 70, -22, 70, -22, 70, -22, }, - { 10, -5, 10, -5, 10, -5, 10, -5, }, - { 2, 0, 2, 0, 2, 0, 2, 0, }, + { 0, 2, 0, 2, 0, 2, 0, 2 }, + { -5, 10, -5, 10, -5, 10, -5, 10 }, + { -24, 90, -24, 90, -24, 90, -24, 90 }, + { 70, -22, 70, -22, 70, -22, 70, -22 }, + { 10, -5, 10, -5, 10, -5, 10, -5 }, + { 2, 0, 2, 0, 2, 0, 2, 0 }, }, { - { 0, 2, 0, 2, 0, 2, 0, 2, }, - { -5, 10, -5, 10, -5, 10, -5, 10, }, - {-23, 80, -23, 80, -23, 80, -23, 80, }, - { 80, -23, 80, -23, 80, -23, 80, -23, }, - { 10, -5, 10, -5, 10, -5, 10, -5, }, - { 2, 0, 2, 0, 2, 0, 2, 0, }, + { 0, 2, 0, 2, 0, 2, 0, 2 }, + { -5, 10, -5, 10, -5, 10, -5, 10 }, + { -23, 80, -23, 80, -23, 80, -23, 80 }, + { 80, -23, 80, -23, 80, -23, 80, -23 }, + { 10, -5, 10, -5, 10, -5, 10, -5 }, + { 2, 0, 2, 0, 2, 0, 2, 0 }, }, { - { 0, 2, 0, 2, 0, 2, 0, 2, }, - { -5, 10, -5, 10, -5, 10, -5, 10, }, - {-22, 70, -22, 70, -22, 70, -22, 70, }, - { 90, -24, 90, -24, 90, -24, 90, -24, }, - { 10, -5, 10, -5, 10, -5, 10, -5, }, - { 2, 0, 2, 0, 2, 0, 2, 0, }, + { 0, 2, 0, 2, 0, 2, 0, 2 }, + { -5, 10, -5, 10, -5, 10, -5, 10 }, + { -22, 70, -22, 70, -22, 70, -22, 70 }, + { 90, -24, 90, -24, 90, -24, 90, -24 }, + { 10, -5, 10, -5, 10, -5, 10, -5 }, + { 2, 0, 2, 0, 2, 0, 2, 0 }, }, { - { 0, 2, 0, 2, 0, 2, 0, 2, }, - { -4, 9, -4, 9, -4, 9, -4, 9, }, - {-20, 59, -20, 59, -20, 59, -20, 59, }, - { 99, -24, 99, -24, 99, -24, 99, -24, }, - { 10, -5, 10, -5, 10, -5, 10, -5, }, - { 2, 0, 2, 0, 2, 0, 2, 0, }, + { 0, 2, 0, 2, 0, 2, 0, 2 }, + { -4, 9, -4, 9, -4, 9, -4, 9 }, + { -20, 59, -20, 59, -20, 59, -20, 59 }, + { 99, -24, 99, -24, 99, -24, 99, -24 }, + { 10, -5, 10, -5, 10, -5, 10, -5 }, + { 2, 0, 2, 0, 2, 0, 2, 0 }, }, { - { 0, 1, 0, 1, 0, 1, 0, 1, }, - { -4, 8, -4, 8, -4, 8, -4, 8, }, - {-17, 49, -17, 49, -17, 49, -17, 49, }, - {107, -22, 107, -22, 107, -22, 107, -22, }, - { 9, -4, 9, -4, 9, -4, 9, -4, }, - { 1, 0, 1, 0, 1, 0, 1, 0, }, + { 0, 1, 0, 1, 0, 1, 0, 1 }, + { -4, 8, -4, 8, -4, 8, -4, 8 }, + { -17, 49, -17, 49, -17, 49, -17, 49 }, + { 107, -22, 107, -22, 107, -22, 107, -22 }, + { 9, -4, 9, -4, 9, -4, 9, -4 }, + { 1, 0, 1, 0, 1, 0, 1, 0 }, }, { - { 0, 1, 0, 1, 0, 1, 0, 1, }, - { -3, 7, -3, 7, -3, 7, -3, 7, }, - {-14, 38, -14, 38, -14, 38, -14, 38, }, - {114, -20, 114, -20, 114, -20, 114, -20, }, - { 8, -4, 8, -4, 8, -4, 8, -4, }, - { 1, 0, 1, 0, 1, 0, 1, 0, }, + { 0, 1, 0, 1, 0, 1, 0, 1 }, + { -3, 7, -3, 7, -3, 7, -3, 7 }, + { -14, 38, -14, 38, -14, 38, -14, 38 }, + { 114, -20, 114, -20, 114, -20, 114, -20 }, + { 8, -4, 8, -4, 8, -4, 8, -4 }, + { 1, 0, 1, 0, 1, 0, 1, 0 }, }, { - { 0, 1, 0, 1, 0, 1, 0, 1, }, - { -2, 5, -2, 5, -2, 5, -2, 5, }, - {-11, 28, -11, 28, -11, 28, -11, 28, }, - {119, -17, 119, -17, 119, -17, 119, -17, }, - { 7, -3, 7, -3, 7, -3, 7, -3, }, - { 1, 0, 1, 0, 1, 0, 1, 0, }, + { 0, 1, 0, 1, 0, 1, 0, 1 }, + { -2, 5, -2, 5, -2, 5, -2, 5 }, + { -11, 28, -11, 28, -11, 28, -11, 28 }, + { 119, -17, 119, -17, 119, -17, 119, -17 }, + { 7, -3, 7, -3, 7, -3, 7, -3 }, + { 1, 0, 1, 0, 1, 0, 1, 0 }, }, { - { 0, 0, 0, 0, 0, 0, 0, 0, }, - { -2, 3, -2, 3, -2, 3, -2, 3, }, - { -7, 18, -7, 18, -7, 18, -7, 18, }, - {124, -12, 124, -12, 124, -12, 124, -12, }, - { 5, -2, 5, -2, 5, -2, 5, -2, }, - { 1, 0, 1, 0, 1, 0, 1, 0, }, + { 0, 0, 0, 0, 0, 0, 0, 0 }, + { -2, 3, -2, 3, -2, 3, -2, 3 }, + { -7, 18, -7, 18, -7, 18, -7, 18 }, + { 124, -12, 124, -12, 124, -12, 124, -12 }, + { 5, -2, 5, -2, 5, -2, 5, -2 }, + { 1, 0, 1, 0, 1, 0, 1, 0 }, }, { - { 0, 0, 0, 0, 0, 0, 0, 0, }, - { -1, 2, -1, 2, -1, 2, -1, 2, }, - { -4, 8, -4, 8, -4, 8, -4, 8, }, - {127, -6, 127, -6, 127, -6, 127, -6, }, - { 3, -1, 3, -1, 3, -1, 3, -1, }, - { 0, 0, 0, 0, 0, 0, 0, 0, }, + { 0, 0, 0, 0, 0, 0, 0, 0 }, + { -1, 2, -1, 2, -1, 2, -1, 2 }, + { -4, 8, -4, 8, -4, 8, -4, 8 }, + { 127, -6, 127, -6, 127, -6, 127, -6 }, + { 3, -1, 3, -1, 3, -1, 3, -1 }, + { 0, 0, 0, 0, 0, 0, 0, 0 }, }, }; #endif @@ -140,253 +140,254 @@ DECLARE_ALIGNED(16, const int16_t, #if CONFIG_VP9_HIGHBITDEPTH #if CONFIG_EXT_INTERP DECLARE_ALIGNED(16, const int16_t, - sub_pel_filters_12sharp_highbd_ver_signal_dir[15][6][8]) = { + sub_pel_filters_12sharp_highbd_ver_signal_dir[15][6][8]) = { { - { 0, 1, 0, 1, 0, 1, 0, 1, }, - { -2, 3, -2, 3, -2, 3, -2, 3, }, - { -7, 127, -7, 127, -7, 127, -7, 127, }, - { 8, -4, 8, -4, 8, -4, 8, -4, }, - { 2, -1, 2, -1, 2, -1, 2, -1, }, - { 1, 0, 1, 0, 1, 0, 1, 0, }, + { 0, 1, 0, 1, 0, 1, 0, 1 }, + { -2, 3, -2, 3, -2, 3, -2, 3 }, + { -7, 127, -7, 127, -7, 127, -7, 127 }, + { 8, -4, 8, -4, 8, -4, 8, -4 }, + { 2, -1, 2, -1, 2, -1, 2, -1 }, + { 1, 0, 1, 0, 1, 0, 1, 0 }, }, { - { -1, 2, -1, 2, -1, 2, -1, 2, }, - { -3, 6, -3, 6, -3, 6, -3, 6, }, - {-13, 124, -13, 124, -13, 124, -13, 124, }, - { 18, -8, 18, -8, 18, -8, 18, -8, }, - { 4, -2, 4, -2, 4, -2, 4, -2, }, - { 2, -1, 2, -1, 2, -1, 2, -1, }, + { -1, 2, -1, 2, -1, 2, -1, 2 }, + { -3, 6, -3, 6, -3, 6, -3, 6 }, + { -13, 124, -13, 124, -13, 124, -13, 124 }, + { 18, -8, 18, -8, 18, -8, 18, -8 }, + { 4, -2, 4, -2, 4, -2, 4, -2 }, + { 2, -1, 2, -1, 2, -1, 2, -1 }, }, { - { -1, 3, -1, 3, -1, 3, -1, 3, }, - { -4, 8, -4, 8, -4, 8, -4, 8, }, - {-18, 120, -18, 120, -18, 120, -18, 120, }, - { 28, -12, 28, -12, 28, -12, 28, -12, }, - { 7, -4, 7, -4, 7, -4, 7, -4, }, - { 2, -1, 2, -1, 2, -1, 2, -1, }, + { -1, 3, -1, 3, -1, 3, -1, 3 }, + { -4, 8, -4, 8, -4, 8, -4, 8 }, + { -18, 120, -18, 120, -18, 120, -18, 120 }, + { 28, -12, 28, -12, 28, -12, 28, -12 }, + { 7, -4, 7, -4, 7, -4, 7, -4 }, + { 2, -1, 2, -1, 2, -1, 2, -1 }, }, { - { -1, 3, -1, 3, -1, 3, -1, 3, }, - { -6, 10, -6, 10, -6, 10, -6, 10, }, - {-21, 115, -21, 115, -21, 115, -21, 115, }, - { 38, -15, 38, -15, 38, -15, 38, -15, }, - { 8, -5, 8, -5, 8, -5, 8, -5, }, - { 3, -1, 3, -1, 3, -1, 3, -1, }, + { -1, 3, -1, 3, -1, 3, -1, 3 }, + { -6, 10, -6, 10, -6, 10, -6, 10 }, + { -21, 115, -21, 115, -21, 115, -21, 115 }, + { 38, -15, 38, -15, 38, -15, 38, -15 }, + { 8, -5, 8, -5, 8, -5, 8, -5 }, + { 3, -1, 3, -1, 3, -1, 3, -1 }, }, { - { -2, 4, -2, 4, -2, 4, -2, 4, }, - { -6, 12, -6, 12, -6, 12, -6, 12, }, - {-24, 108, -24, 108, -24, 108, -24, 108, }, - { 49, -18, 49, -18, 49, -18, 49, -18, }, - { 10, -6, 10, -6, 10, -6, 10, -6, }, - { 3, -2, 3, -2, 3, -2, 3, -2, }, + { -2, 4, -2, 4, -2, 4, -2, 4 }, + { -6, 12, -6, 12, -6, 12, -6, 12 }, + { -24, 108, -24, 108, -24, 108, -24, 108 }, + { 49, -18, 49, -18, 49, -18, 49, -18 }, + { 10, -6, 10, -6, 10, -6, 10, -6 }, + { 3, -2, 3, -2, 3, -2, 3, -2 }, }, { - { -2, 4, -2, 4, -2, 4, -2, 4, }, - { -7, 13, -7, 13, -7, 13, -7, 13, }, - {-25, 100, -25, 100, -25, 100, -25, 100, }, - { 60, -21, 60, -21, 60, -21, 60, -21, }, - { 11, -7, 11, -7, 11, -7, 11, -7, }, - { 4, -2, 4, -2, 4, -2, 4, -2, }, + { -2, 4, -2, 4, -2, 4, -2, 4 }, + { -7, 13, -7, 13, -7, 13, -7, 13 }, + { -25, 100, -25, 100, -25, 100, -25, 100 }, + { 60, -21, 60, -21, 60, -21, 60, -21 }, + { 11, -7, 11, -7, 11, -7, 11, -7 }, + { 4, -2, 4, -2, 4, -2, 4, -2 }, }, { - { -2, 4, -2, 4, -2, 4, -2, 4, }, - { -7, 13, -7, 13, -7, 13, -7, 13, }, - {-26, 91, -26, 91, -26, 91, -26, 91, }, - { 71, -24, 71, -24, 71, -24, 71, -24, }, - { 13, -7, 13, -7, 13, -7, 13, -7, }, - { 4, -2, 4, -2, 4, -2, 4, -2, }, + { -2, 4, -2, 4, -2, 4, -2, 4 }, + { -7, 13, -7, 13, -7, 13, -7, 13 }, + { -26, 91, -26, 91, -26, 91, -26, 91 }, + { 71, -24, 71, -24, 71, -24, 71, -24 }, + { 13, -7, 13, -7, 13, -7, 13, -7 }, + { 4, -2, 4, -2, 4, -2, 4, -2 }, }, { - { -2, 4, -2, 4, -2, 4, -2, 4, }, - { -7, 13, -7, 13, -7, 13, -7, 13, }, - {-25, 81, -25, 81, -25, 81, -25, 81, }, - { 81, -25, 81, -25, 81, -25, 81, -25, }, - { 13, -7, 13, -7, 13, -7, 13, -7, }, - { 4, -2, 4, -2, 4, -2, 4, -2, }, + { -2, 4, -2, 4, -2, 4, -2, 4 }, + { -7, 13, -7, 13, -7, 13, -7, 13 }, + { -25, 81, -25, 81, -25, 81, -25, 81 }, + { 81, -25, 81, -25, 81, -25, 81, -25 }, + { 13, -7, 13, -7, 13, -7, 13, -7 }, + { 4, -2, 4, -2, 4, -2, 4, -2 }, }, { - { -2, 4, -2, 4, -2, 4, -2, 4, }, - { -7, 13, -7, 13, -7, 13, -7, 13, }, - {-24, 71, -24, 71, -24, 71, -24, 71, }, - { 91, -26, 91, -26, 91, -26, 91, -26, }, - { 13, -7, 13, -7, 13, -7, 13, -7, }, - { 4, -2, 4, -2, 4, -2, 4, -2, }, + { -2, 4, -2, 4, -2, 4, -2, 4 }, + { -7, 13, -7, 13, -7, 13, -7, 13 }, + { -24, 71, -24, 71, -24, 71, -24, 71 }, + { 91, -26, 91, -26, 91, -26, 91, -26 }, + { 13, -7, 13, -7, 13, -7, 13, -7 }, + { 4, -2, 4, -2, 4, -2, 4, -2 }, }, { - { -2, 4, -2, 4, -2, 4, -2, 4, }, - { -7, 11, -7, 11, -7, 11, -7, 11, }, - {-21, 60, -21, 60, -21, 60, -21, 60, }, - {100, -25, 100, -25, 100, -25, 100, -25, }, - { 13, -7, 13, -7, 13, -7, 13, -7, }, - { 4, -2, 4, -2, 4, -2, 4, -2, }, + { -2, 4, -2, 4, -2, 4, -2, 4 }, + { -7, 11, -7, 11, -7, 11, -7, 11 }, + { -21, 60, -21, 60, -21, 60, -21, 60 }, + { 100, -25, 100, -25, 100, -25, 100, -25 }, + { 13, -7, 13, -7, 13, -7, 13, -7 }, + { 4, -2, 4, -2, 4, -2, 4, -2 }, }, { - { -2, 3, -2, 3, -2, 3, -2, 3, }, - { -6, 10, -6, 10, -6, 10, -6, 10, }, - {-18, 49, -18, 49, -18, 49, -18, 49, }, - {108, -24, 108, -24, 108, -24, 108, -24, }, - { 12, -6, 12, -6, 12, -6, 12, -6, }, - { 4, -2, 4, -2, 4, -2, 4, -2, }, + { -2, 3, -2, 3, -2, 3, -2, 3 }, + { -6, 10, -6, 10, -6, 10, -6, 10 }, + { -18, 49, -18, 49, -18, 49, -18, 49 }, + { 108, -24, 108, -24, 108, -24, 108, -24 }, + { 12, -6, 12, -6, 12, -6, 12, -6 }, + { 4, -2, 4, -2, 4, -2, 4, -2 }, }, { - { -1, 3, -1, 3, -1, 3, -1, 3, }, - { -5, 8, -5, 8, -5, 8, -5, 8, }, - {-15, 38, -15, 38, -15, 38, -15, 38, }, - {115, -21, 115, -21, 115, -21, 115, -21, }, - { 10, -6, 10, -6, 10, -6, 10, -6, }, - { 3, -1, 3, -1, 3, -1, 3, -1, }, + { -1, 3, -1, 3, -1, 3, -1, 3 }, + { -5, 8, -5, 8, -5, 8, -5, 8 }, + { -15, 38, -15, 38, -15, 38, -15, 38 }, + { 115, -21, 115, -21, 115, -21, 115, -21 }, + { 10, -6, 10, -6, 10, -6, 10, -6 }, + { 3, -1, 3, -1, 3, -1, 3, -1 }, }, { - { -1, 2, -1, 2, -1, 2, -1, 2, }, - { -4, 7, -4, 7, -4, 7, -4, 7, }, - {-12, 28, -12, 28, -12, 28, -12, 28, }, - {120, -18, 120, -18, 120, -18, 120, -18, }, - { 8, -4, 8, -4, 8, -4, 8, -4, }, - { 3, -1, 3, -1, 3, -1, 3, -1, }, + { -1, 2, -1, 2, -1, 2, -1, 2 }, + { -4, 7, -4, 7, -4, 7, -4, 7 }, + { -12, 28, -12, 28, -12, 28, -12, 28 }, + { 120, -18, 120, -18, 120, -18, 120, -18 }, + { 8, -4, 8, -4, 8, -4, 8, -4 }, + { 3, -1, 3, -1, 3, -1, 3, -1 }, }, { - { -1, 2, -1, 2, -1, 2, -1, 2, }, - { -2, 4, -2, 4, -2, 4, -2, 4, }, - { -8, 18, -8, 18, -8, 18, -8, 18, }, - {124, -13, 124, -13, 124, -13, 124, -13, }, - { 6, -3, 6, -3, 6, -3, 6, -3, }, - { 2, -1, 2, -1, 2, -1, 2, -1, }, + { -1, 2, -1, 2, -1, 2, -1, 2 }, + { -2, 4, -2, 4, -2, 4, -2, 4 }, + { -8, 18, -8, 18, -8, 18, -8, 18 }, + { 124, -13, 124, -13, 124, -13, 124, -13 }, + { 6, -3, 6, -3, 6, -3, 6, -3 }, + { 2, -1, 2, -1, 2, -1, 2, -1 }, }, { - { 0, 1, 0, 1, 0, 1, 0, 1, }, - { -1, 2, -1, 2, -1, 2, -1, 2, }, - { -4, 8, -4, 8, -4, 8, -4, 8, }, - {127, -7, 127, -7, 127, -7, 127, -7, }, - { 3, -2, 3, -2, 3, -2, 3, -2, }, - { 1, 0, 1, 0, 1, 0, 1, 0, }, + { 0, 1, 0, 1, 0, 1, 0, 1 }, + { -1, 2, -1, 2, -1, 2, -1, 2 }, + { -4, 8, -4, 8, -4, 8, -4, 8 }, + { 127, -7, 127, -7, 127, -7, 127, -7 }, + { 3, -2, 3, -2, 3, -2, 3, -2 }, + { 1, 0, 1, 0, 1, 0, 1, 0 }, }, }; #endif #endif #if CONFIG_VP9_HIGHBITDEPTH #if USE_TEMPORALFILTER_12TAP -DECLARE_ALIGNED(16, const int16_t, - sub_pel_filters_temporalfilter_12_highbd_ver_signal_dir[15][6][8]) = { - { - { 0, 1, 0, 1, 0, 1, 0, 1, }, - { -1, 3, -1, 3, -1, 3, -1, 3, }, - { -7, 127, -7, 127, -7, 127, -7, 127, }, - { 8, -4, 8, -4, 8, -4, 8, -4, }, - { 2, -1, 2, -1, 2, -1, 2, -1, }, - { 0, 0, 0, 0, 0, 0, 0, 0, }, +DECLARE_ALIGNED( + 16, const int16_t, + sub_pel_filters_temporalfilter_12_highbd_ver_signal_dir[15][6][8]) = { + { + { 0, 1, 0, 1, 0, 1, 0, 1 }, + { -1, 3, -1, 3, -1, 3, -1, 3 }, + { -7, 127, -7, 127, -7, 127, -7, 127 }, + { 8, -4, 8, -4, 8, -4, 8, -4 }, + { 2, -1, 2, -1, 2, -1, 2, -1 }, + { 0, 0, 0, 0, 0, 0, 0, 0 }, }, - { - { 0, 1, 0, 1, 0, 1, 0, 1, }, - { -3, 5, -3, 5, -3, 5, -3, 5, }, - {-12, 124, -12, 124, -12, 124, -12, 124, }, - { 18, -8, 18, -8, 18, -8, 18, -8, }, - { 4, -2, 4, -2, 4, -2, 4, -2, }, - { 1, 0, 1, 0, 1, 0, 1, 0, }, - }, - { - { -1, 2, -1, 2, -1, 2, -1, 2, }, - { -4, 8, -4, 8, -4, 8, -4, 8, }, - {-17, 120, -17, 120, -17, 120, -17, 120, }, - { 28, -11, 28, -11, 28, -11, 28, -11, }, - { 6, -3, 6, -3, 6, -3, 6, -3, }, - { 1, -1, 1, -1, 1, -1, 1, -1, }, - }, - { - { -1, 2, -1, 2, -1, 2, -1, 2, }, - { -4, 10, -4, 10, -4, 10, -4, 10, }, - {-21, 114, -21, 114, -21, 114, -21, 114, }, - { 38, -15, 38, -15, 38, -15, 38, -15, }, - { 8, -4, 8, -4, 8, -4, 8, -4, }, - { 2, -1, 2, -1, 2, -1, 2, -1, }, - }, - { - { -1, 3, -1, 3, -1, 3, -1, 3, }, - { -5, 11, -5, 11, -5, 11, -5, 11, }, - {-23, 107, -23, 107, -23, 107, -23, 107, }, - { 49, -18, 49, -18, 49, -18, 49, -18, }, - { 9, -5, 9, -5, 9, -5, 9, -5, }, - { 2, -1, 2, -1, 2, -1, 2, -1, }, - }, - { - { -1, 3, -1, 3, -1, 3, -1, 3, }, - { -6, 12, -6, 12, -6, 12, -6, 12, }, - {-25, 99, -25, 99, -25, 99, -25, 99, }, - { 60, -21, 60, -21, 60, -21, 60, -21, }, - { 11, -6, 11, -6, 11, -6, 11, -6, }, - { 3, -1, 3, -1, 3, -1, 3, -1, }, - }, - { - { -1, 3, -1, 3, -1, 3, -1, 3, }, - { -6, 12, -6, 12, -6, 12, -6, 12, }, - {-25, 90, -25, 90, -25, 90, -25, 90, }, - { 70, -23, 70, -23, 70, -23, 70, -23, }, - { 12, -6, 12, -6, 12, -6, 12, -6, }, - { 3, -1, 3, -1, 3, -1, 3, -1, }, - }, - { - { -1, 3, -1, 3, -1, 3, -1, 3, }, - { -6, 12, -6, 12, -6, 12, -6, 12, }, - {-24, 80, -24, 80, -24, 80, -24, 80, }, - { 80, -24, 80, -24, 80, -24, 80, -24, }, - { 12, -6, 12, -6, 12, -6, 12, -6, }, - { 3, -1, 3, -1, 3, -1, 3, -1, }, - }, - { - { -1, 3, -1, 3, -1, 3, -1, 3, }, - { -6, 12, -6, 12, -6, 12, -6, 12, }, - {-23, 70, -23, 70, -23, 70, -23, 70, }, - { 90, -25, 90, -25, 90, -25, 90, -25, }, - { 12, -6, 12, -6, 12, -6, 12, -6, }, - { 3, -1, 3, -1, 3, -1, 3, -1, }, - }, - { - { -1, 3, -1, 3, -1, 3, -1, 3, }, - { -6, 11, -6, 11, -6, 11, -6, 11, }, - {-21, 60, -21, 60, -21, 60, -21, 60, }, - { 99, -25, 99, -25, 99, -25, 99, -25, }, - { 12, -6, 12, -6, 12, -6, 12, -6, }, - { 3, -1, 3, -1, 3, -1, 3, -1, }, - }, - { - { -1, 2, -1, 2, -1, 2, -1, 2, }, - { -5, 9, -5, 9, -5, 9, -5, 9, }, - {-18, 49, -18, 49, -18, 49, -18, 49, }, - {107, -23, 107, -23, 107, -23, 107, -23, }, - { 11, -5, 11, -5, 11, -5, 11, -5, }, - { 3, -1, 3, -1, 3, -1, 3, -1, }, - }, - { - { -1, 2, -1, 2, -1, 2, -1, 2, }, - { -4, 8, -4, 8, -4, 8, -4, 8, }, - {-15, 38, -15, 38, -15, 38, -15, 38, }, - {114, -21, 114, -21, 114, -21, 114, -21, }, - { 10, -4, 10, -4, 10, -4, 10, -4, }, - { 2, -1, 2, -1, 2, -1, 2, -1, }, - }, - { - { -1, 1, -1, 1, -1, 1, -1, 1, }, - { -3, 6, -3, 6, -3, 6, -3, 6, }, - {-11, 28, -11, 28, -11, 28, -11, 28, }, - {120, -17, 120, -17, 120, -17, 120, -17, }, - { 8, -4, 8, -4, 8, -4, 8, -4, }, - { 2, -1, 2, -1, 2, -1, 2, -1, }, + { + { 0, 1, 0, 1, 0, 1, 0, 1 }, + { -3, 5, -3, 5, -3, 5, -3, 5 }, + { -12, 124, -12, 124, -12, 124, -12, 124 }, + { 18, -8, 18, -8, 18, -8, 18, -8 }, + { 4, -2, 4, -2, 4, -2, 4, -2 }, + { 1, 0, 1, 0, 1, 0, 1, 0 }, + }, + { + { -1, 2, -1, 2, -1, 2, -1, 2 }, + { -4, 8, -4, 8, -4, 8, -4, 8 }, + { -17, 120, -17, 120, -17, 120, -17, 120 }, + { 28, -11, 28, -11, 28, -11, 28, -11 }, + { 6, -3, 6, -3, 6, -3, 6, -3 }, + { 1, -1, 1, -1, 1, -1, 1, -1 }, + }, + { + { -1, 2, -1, 2, -1, 2, -1, 2 }, + { -4, 10, -4, 10, -4, 10, -4, 10 }, + { -21, 114, -21, 114, -21, 114, -21, 114 }, + { 38, -15, 38, -15, 38, -15, 38, -15 }, + { 8, -4, 8, -4, 8, -4, 8, -4 }, + { 2, -1, 2, -1, 2, -1, 2, -1 }, + }, + { + { -1, 3, -1, 3, -1, 3, -1, 3 }, + { -5, 11, -5, 11, -5, 11, -5, 11 }, + { -23, 107, -23, 107, -23, 107, -23, 107 }, + { 49, -18, 49, -18, 49, -18, 49, -18 }, + { 9, -5, 9, -5, 9, -5, 9, -5 }, + { 2, -1, 2, -1, 2, -1, 2, -1 }, + }, + { + { -1, 3, -1, 3, -1, 3, -1, 3 }, + { -6, 12, -6, 12, -6, 12, -6, 12 }, + { -25, 99, -25, 99, -25, 99, -25, 99 }, + { 60, -21, 60, -21, 60, -21, 60, -21 }, + { 11, -6, 11, -6, 11, -6, 11, -6 }, + { 3, -1, 3, -1, 3, -1, 3, -1 }, + }, + { + { -1, 3, -1, 3, -1, 3, -1, 3 }, + { -6, 12, -6, 12, -6, 12, -6, 12 }, + { -25, 90, -25, 90, -25, 90, -25, 90 }, + { 70, -23, 70, -23, 70, -23, 70, -23 }, + { 12, -6, 12, -6, 12, -6, 12, -6 }, + { 3, -1, 3, -1, 3, -1, 3, -1 }, + }, + { + { -1, 3, -1, 3, -1, 3, -1, 3 }, + { -6, 12, -6, 12, -6, 12, -6, 12 }, + { -24, 80, -24, 80, -24, 80, -24, 80 }, + { 80, -24, 80, -24, 80, -24, 80, -24 }, + { 12, -6, 12, -6, 12, -6, 12, -6 }, + { 3, -1, 3, -1, 3, -1, 3, -1 }, + }, + { + { -1, 3, -1, 3, -1, 3, -1, 3 }, + { -6, 12, -6, 12, -6, 12, -6, 12 }, + { -23, 70, -23, 70, -23, 70, -23, 70 }, + { 90, -25, 90, -25, 90, -25, 90, -25 }, + { 12, -6, 12, -6, 12, -6, 12, -6 }, + { 3, -1, 3, -1, 3, -1, 3, -1 }, + }, + { + { -1, 3, -1, 3, -1, 3, -1, 3 }, + { -6, 11, -6, 11, -6, 11, -6, 11 }, + { -21, 60, -21, 60, -21, 60, -21, 60 }, + { 99, -25, 99, -25, 99, -25, 99, -25 }, + { 12, -6, 12, -6, 12, -6, 12, -6 }, + { 3, -1, 3, -1, 3, -1, 3, -1 }, + }, + { + { -1, 2, -1, 2, -1, 2, -1, 2 }, + { -5, 9, -5, 9, -5, 9, -5, 9 }, + { -18, 49, -18, 49, -18, 49, -18, 49 }, + { 107, -23, 107, -23, 107, -23, 107, -23 }, + { 11, -5, 11, -5, 11, -5, 11, -5 }, + { 3, -1, 3, -1, 3, -1, 3, -1 }, + }, + { + { -1, 2, -1, 2, -1, 2, -1, 2 }, + { -4, 8, -4, 8, -4, 8, -4, 8 }, + { -15, 38, -15, 38, -15, 38, -15, 38 }, + { 114, -21, 114, -21, 114, -21, 114, -21 }, + { 10, -4, 10, -4, 10, -4, 10, -4 }, + { 2, -1, 2, -1, 2, -1, 2, -1 }, + }, + { + { -1, 1, -1, 1, -1, 1, -1, 1 }, + { -3, 6, -3, 6, -3, 6, -3, 6 }, + { -11, 28, -11, 28, -11, 28, -11, 28 }, + { 120, -17, 120, -17, 120, -17, 120, -17 }, + { 8, -4, 8, -4, 8, -4, 8, -4 }, + { 2, -1, 2, -1, 2, -1, 2, -1 }, }, { - { 0, 1, 0, 1, 0, 1, 0, 1, }, - { -2, 4, -2, 4, -2, 4, -2, 4, }, - { -8, 18, -8, 18, -8, 18, -8, 18, }, - {124, -12, 124, -12, 124, -12, 124, -12, }, - { 5, -3, 5, -3, 5, -3, 5, -3, }, - { 1, 0, 1, 0, 1, 0, 1, 0, }, - }, + { 0, 1, 0, 1, 0, 1, 0, 1 }, + { -2, 4, -2, 4, -2, 4, -2, 4 }, + { -8, 18, -8, 18, -8, 18, -8, 18 }, + { 124, -12, 124, -12, 124, -12, 124, -12 }, + { 5, -3, 5, -3, 5, -3, 5, -3 }, + { 1, 0, 1, 0, 1, 0, 1, 0 }, + }, { - { 0, 0, 0, 0, 0, 0, 0, 0, }, - { -1, 2, -1, 2, -1, 2, -1, 2, }, - { -4, 8, -4, 8, -4, 8, -4, 8, }, - {127, -7, 127, -7, 127, -7, 127, -7, }, - { 3, -1, 3, -1, 3, -1, 3, -1, }, - { 1, 0, 1, 0, 1, 0, 1, 0, }, + { 0, 0, 0, 0, 0, 0, 0, 0 }, + { -1, 2, -1, 2, -1, 2, -1, 2 }, + { -4, 8, -4, 8, -4, 8, -4, 8 }, + { 127, -7, 127, -7, 127, -7, 127, -7 }, + { 3, -1, 3, -1, 3, -1, 3, -1 }, + { 1, 0, 1, 0, 1, 0, 1, 0 }, }, }; #endif diff --git a/vp10/common/x86/vp10_highbd_convolve_sse4.c b/vp10/common/x86/vp10_highbd_convolve_sse4.c index e8e4f77bb891046a4b2899736a2aa2c1fc1fb394..e8009a810369ab18e55cac58e93b9ba2f6cda013 100644 --- a/vp10/common/x86/vp10_highbd_convolve_sse4.c +++ b/vp10/common/x86/vp10_highbd_convolve_sse4.c @@ -14,15 +14,14 @@ #include "./vp10_rtcd.h" #include "vp10/common/filter.h" -typedef void (*TransposeSave)(const int width, int pixelsNum, - uint32_t *src, int src_stride, - uint16_t *dst, int dst_stride, +typedef void (*TransposeSave)(const int width, int pixelsNum, uint32_t *src, + int src_stride, uint16_t *dst, int dst_stride, int bd); // pixelsNum 0: write all 4 pixels // 1/2/3: residual pixels 1/2/3 -static void writePixel(__m128i *u, int width, int pixelsNum, - uint16_t *dst, int dst_stride) { +static void writePixel(__m128i *u, int width, int pixelsNum, uint16_t *dst, + int dst_stride) { if (2 == width) { if (0 == pixelsNum) { *(int *)dst = _mm_cvtsi128_si32(u[0]); @@ -112,18 +111,15 @@ static void transClipPixel(uint32_t *src, int src_stride, __m128i *u, int bd) { // pixelsNum = 0 : all 4 rows of pixels will be saved. // pixelsNum = 1/2/3 : residual 1/2/4 rows of pixels will be saved. -void trans_save_4x4(const int width, int pixelsNum, - uint32_t *src, int src_stride, - uint16_t *dst, int dst_stride, - int bd) { +void trans_save_4x4(const int width, int pixelsNum, uint32_t *src, + int src_stride, uint16_t *dst, int dst_stride, int bd) { __m128i u[4]; transClipPixel(src, src_stride, u, bd); writePixel(u, width, pixelsNum, dst, dst_stride); } -void trans_accum_save_4x4(const int width, int pixelsNum, - uint32_t *src, int src_stride, - uint16_t *dst, int dst_stride, +void trans_accum_save_4x4(const int width, int pixelsNum, uint32_t *src, + int src_stride, uint16_t *dst, int dst_stride, int bd) { __m128i u[4], v[4]; const __m128i ones = _mm_set1_epi16(1); @@ -153,8 +149,7 @@ void trans_accum_save_4x4(const int width, int pixelsNum, writePixel(u, width, pixelsNum, dst, dst_stride); } -static TransposeSave transSaveTab[2] = { - trans_save_4x4, trans_accum_save_4x4}; +static TransposeSave transSaveTab[2] = { trans_save_4x4, trans_accum_save_4x4 }; static INLINE void transpose_pair(__m128i *in, __m128i *out) { __m128i x0, x1; @@ -178,8 +173,8 @@ static INLINE void transpose_pair(__m128i *in, __m128i *out) { out[5] = _mm_unpackhi_epi64(x0, x1); } -static void highbd_filter_horiz(const uint16_t *src, int src_stride, - __m128i *f, int tapsNum, uint32_t *buf) { +static void highbd_filter_horiz(const uint16_t *src, int src_stride, __m128i *f, + int tapsNum, uint32_t *buf) { __m128i u[8], v[6]; if (tapsNum == 10) { @@ -218,8 +213,8 @@ static void highbd_filter_horiz(const uint16_t *src, int src_stride, } void vp10_highbd_convolve_horiz_sse4_1(const uint16_t *src, int src_stride, - uint16_t *dst, int dst_stride, - int w, int h, + uint16_t *dst, int dst_stride, int w, + int h, const InterpFilterParams filter_params, const int subpel_x_q4, int x_step_q4, int avg, int bd) { @@ -239,8 +234,8 @@ void vp10_highbd_convolve_horiz_sse4_1(const uint16_t *src, int src_stride, return; } - vCoeffs = vp10_hbd_get_subpel_filter_ver_signal_dir( - filter_params, subpel_x_q4 - 1); + vCoeffs = + vp10_hbd_get_subpel_filter_ver_signal_dir(filter_params, subpel_x_q4 - 1); if (!vCoeffs) { vp10_highbd_convolve_horiz_c(src, src_stride, dst, dst_stride, w, h, filter_params, subpel_x_q4, x_step_q4, avg, @@ -276,8 +271,7 @@ void vp10_highbd_convolve_horiz_sse4_1(const uint16_t *src, int src_stride, blkHeight--; } - if (blkResidu == 0) - return; + if (blkResidu == 0) return; for (col = 0; col < w; col += 4) { for (i = 0; i < 4; ++i) { @@ -318,7 +312,7 @@ static void write2pixelsAccum(__m128i *u, int bd, uint16_t *dst) { *(uint32_t *)dst = _mm_cvtsi128_si32(v); } -WritePixels write2pixelsTab[2] = {write2pixelsOnly, write2pixelsAccum}; +WritePixels write2pixelsTab[2] = { write2pixelsOnly, write2pixelsAccum }; static void write4pixelsOnly(__m128i *u, int bd, uint16_t *dst) { highbdRndingPacks(u); @@ -339,7 +333,7 @@ static void write4pixelsAccum(__m128i *u, int bd, uint16_t *dst) { _mm_storel_epi64((__m128i *)dst, v); } -WritePixels write4pixelsTab[2] = {write4pixelsOnly, write4pixelsAccum}; +WritePixels write4pixelsTab[2] = { write4pixelsOnly, write4pixelsAccum }; static void filter_vert_horiz_parallel(const uint16_t *src, int src_stride, const __m128i *f, int taps, @@ -388,9 +382,8 @@ static void filter_vert_horiz_parallel(const uint16_t *src, int src_stride, } static void highbd_filter_vert_compute_large(const uint16_t *src, - int src_stride, - const __m128i *f, int taps, - int w, int h, + int src_stride, const __m128i *f, + int taps, int w, int h, uint16_t *dst, int dst_stride, int avg, int bd) { int col; @@ -402,8 +395,8 @@ static void highbd_filter_vert_compute_large(const uint16_t *src, do { for (col = 0; col < w; col += step) { - filter_vert_horiz_parallel(src_ptr, src_stride, f, taps, - dst_ptr, write4pixels, bd); + filter_vert_horiz_parallel(src_ptr, src_stride, f, taps, dst_ptr, + write4pixels, bd); src_ptr += step; dst_ptr += step; } @@ -414,9 +407,8 @@ static void highbd_filter_vert_compute_large(const uint16_t *src, } static void highbd_filter_vert_compute_small(const uint16_t *src, - int src_stride, - const __m128i *f, int taps, - int w, int h, + int src_stride, const __m128i *f, + int taps, int w, int h, uint16_t *dst, int dst_stride, int avg, int bd) { int rowIndex = 0; @@ -424,8 +416,7 @@ static void highbd_filter_vert_compute_small(const uint16_t *src, (void)w; do { - filter_vert_horiz_parallel(src, src_stride, f, taps, dst, write2pixels, - bd); + filter_vert_horiz_parallel(src, src_stride, f, taps, dst, write2pixels, bd); rowIndex++; src += src_stride; dst += dst_stride; @@ -433,8 +424,8 @@ static void highbd_filter_vert_compute_small(const uint16_t *src, } void vp10_highbd_convolve_vert_sse4_1(const uint16_t *src, int src_stride, - uint16_t *dst, int dst_stride, - int w, int h, + uint16_t *dst, int dst_stride, int w, + int h, const InterpFilterParams filter_params, const int subpel_y_q4, int y_step_q4, int avg, int bd) { @@ -444,17 +435,15 @@ void vp10_highbd_convolve_vert_sse4_1(const uint16_t *src, int src_stride, if (0 == subpel_y_q4 || 16 != y_step_q4) { vp10_highbd_convolve_vert_c(src, src_stride, dst, dst_stride, w, h, - filter_params, subpel_y_q4, y_step_q4, avg, - bd); + filter_params, subpel_y_q4, y_step_q4, avg, bd); return; } - vCoeffs = vp10_hbd_get_subpel_filter_ver_signal_dir( - filter_params, subpel_y_q4 - 1); + vCoeffs = + vp10_hbd_get_subpel_filter_ver_signal_dir(filter_params, subpel_y_q4 - 1); if (!vCoeffs) { vp10_highbd_convolve_vert_c(src, src_stride, dst, dst_stride, w, h, - filter_params, subpel_y_q4, y_step_q4, avg, - bd); + filter_params, subpel_y_q4, y_step_q4, avg, bd); return; } @@ -468,10 +457,10 @@ void vp10_highbd_convolve_vert_sse4_1(const uint16_t *src, int src_stride, src -= src_stride * ((tapsNum >> 1) - 1); if (w > 2) { - highbd_filter_vert_compute_large(src, src_stride, verf, tapsNum, w, h, - dst, dst_stride, avg, bd); + highbd_filter_vert_compute_large(src, src_stride, verf, tapsNum, w, h, dst, + dst_stride, avg, bd); } else { - highbd_filter_vert_compute_small(src, src_stride, verf, tapsNum, w, h, - dst, dst_stride, avg, bd); + highbd_filter_vert_compute_small(src, src_stride, verf, tapsNum, w, h, dst, + dst_stride, avg, bd); } } diff --git a/vp10/common/x86/vp10_inv_txfm_sse2.c b/vp10/common/x86/vp10_inv_txfm_sse2.c index 0e9ee55a0d64f7730d1f8b8e8e3bb56a72235702..b731cdf945a2de5ae32ed7d53d521cf6bde2d316 100644 --- a/vp10/common/x86/vp10_inv_txfm_sse2.c +++ b/vp10/common/x86/vp10_inv_txfm_sse2.c @@ -12,14 +12,14 @@ #include "vp10/common/x86/vp10_inv_txfm_sse2.h" #include "vpx_dsp/x86/txfm_common_sse2.h" -#define RECON_AND_STORE4X4(dest, in_x) \ -{ \ - __m128i d0 = _mm_cvtsi32_si128(*(const int *)(dest)); \ - d0 = _mm_unpacklo_epi8(d0, zero); \ - d0 = _mm_add_epi16(in_x, d0); \ - d0 = _mm_packus_epi16(d0, d0); \ - *(int *)(dest) = _mm_cvtsi128_si32(d0); \ -} +#define RECON_AND_STORE4X4(dest, in_x) \ + { \ + __m128i d0 = _mm_cvtsi32_si128(*(const int *)(dest)); \ + d0 = _mm_unpacklo_epi8(d0, zero); \ + d0 = _mm_add_epi16(in_x, d0); \ + d0 = _mm_packus_epi16(d0, d0); \ + *(int *)(dest) = _mm_cvtsi128_si32(d0); \ + } void vp10_idct4x4_16_add_sse2(const int16_t *input, uint8_t *dest, int stride) { const __m128i zero = _mm_setzero_si128(); @@ -261,192 +261,189 @@ void vp10_iadst4_sse2(__m128i *in) { in[1] = _mm_packs_epi32(u[2], u[3]); } -#define TRANSPOSE_8X8(in0, in1, in2, in3, in4, in5, in6, in7, \ - out0, out1, out2, out3, out4, out5, out6, out7) \ - { \ - const __m128i tr0_0 = _mm_unpacklo_epi16(in0, in1); \ - const __m128i tr0_1 = _mm_unpacklo_epi16(in2, in3); \ - const __m128i tr0_2 = _mm_unpackhi_epi16(in0, in1); \ - const __m128i tr0_3 = _mm_unpackhi_epi16(in2, in3); \ - const __m128i tr0_4 = _mm_unpacklo_epi16(in4, in5); \ - const __m128i tr0_5 = _mm_unpacklo_epi16(in6, in7); \ - const __m128i tr0_6 = _mm_unpackhi_epi16(in4, in5); \ - const __m128i tr0_7 = _mm_unpackhi_epi16(in6, in7); \ - \ - const __m128i tr1_0 = _mm_unpacklo_epi32(tr0_0, tr0_1); \ - const __m128i tr1_1 = _mm_unpacklo_epi32(tr0_2, tr0_3); \ - const __m128i tr1_2 = _mm_unpackhi_epi32(tr0_0, tr0_1); \ - const __m128i tr1_3 = _mm_unpackhi_epi32(tr0_2, tr0_3); \ - const __m128i tr1_4 = _mm_unpacklo_epi32(tr0_4, tr0_5); \ - const __m128i tr1_5 = _mm_unpacklo_epi32(tr0_6, tr0_7); \ - const __m128i tr1_6 = _mm_unpackhi_epi32(tr0_4, tr0_5); \ - const __m128i tr1_7 = _mm_unpackhi_epi32(tr0_6, tr0_7); \ - \ - out0 = _mm_unpacklo_epi64(tr1_0, tr1_4); \ - out1 = _mm_unpackhi_epi64(tr1_0, tr1_4); \ - out2 = _mm_unpacklo_epi64(tr1_2, tr1_6); \ - out3 = _mm_unpackhi_epi64(tr1_2, tr1_6); \ - out4 = _mm_unpacklo_epi64(tr1_1, tr1_5); \ - out5 = _mm_unpackhi_epi64(tr1_1, tr1_5); \ - out6 = _mm_unpacklo_epi64(tr1_3, tr1_7); \ - out7 = _mm_unpackhi_epi64(tr1_3, tr1_7); \ +#define TRANSPOSE_8X8(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, \ + out2, out3, out4, out5, out6, out7) \ + { \ + const __m128i tr0_0 = _mm_unpacklo_epi16(in0, in1); \ + const __m128i tr0_1 = _mm_unpacklo_epi16(in2, in3); \ + const __m128i tr0_2 = _mm_unpackhi_epi16(in0, in1); \ + const __m128i tr0_3 = _mm_unpackhi_epi16(in2, in3); \ + const __m128i tr0_4 = _mm_unpacklo_epi16(in4, in5); \ + const __m128i tr0_5 = _mm_unpacklo_epi16(in6, in7); \ + const __m128i tr0_6 = _mm_unpackhi_epi16(in4, in5); \ + const __m128i tr0_7 = _mm_unpackhi_epi16(in6, in7); \ + \ + const __m128i tr1_0 = _mm_unpacklo_epi32(tr0_0, tr0_1); \ + const __m128i tr1_1 = _mm_unpacklo_epi32(tr0_2, tr0_3); \ + const __m128i tr1_2 = _mm_unpackhi_epi32(tr0_0, tr0_1); \ + const __m128i tr1_3 = _mm_unpackhi_epi32(tr0_2, tr0_3); \ + const __m128i tr1_4 = _mm_unpacklo_epi32(tr0_4, tr0_5); \ + const __m128i tr1_5 = _mm_unpacklo_epi32(tr0_6, tr0_7); \ + const __m128i tr1_6 = _mm_unpackhi_epi32(tr0_4, tr0_5); \ + const __m128i tr1_7 = _mm_unpackhi_epi32(tr0_6, tr0_7); \ + \ + out0 = _mm_unpacklo_epi64(tr1_0, tr1_4); \ + out1 = _mm_unpackhi_epi64(tr1_0, tr1_4); \ + out2 = _mm_unpacklo_epi64(tr1_2, tr1_6); \ + out3 = _mm_unpackhi_epi64(tr1_2, tr1_6); \ + out4 = _mm_unpacklo_epi64(tr1_1, tr1_5); \ + out5 = _mm_unpackhi_epi64(tr1_1, tr1_5); \ + out6 = _mm_unpacklo_epi64(tr1_3, tr1_7); \ + out7 = _mm_unpackhi_epi64(tr1_3, tr1_7); \ } -#define TRANSPOSE_4X8_10(tmp0, tmp1, tmp2, tmp3, \ - out0, out1, out2, out3) \ - { \ - const __m128i tr0_0 = _mm_unpackhi_epi16(tmp0, tmp1); \ - const __m128i tr0_1 = _mm_unpacklo_epi16(tmp1, tmp0); \ - const __m128i tr0_4 = _mm_unpacklo_epi16(tmp2, tmp3); \ - const __m128i tr0_5 = _mm_unpackhi_epi16(tmp3, tmp2); \ - \ - const __m128i tr1_0 = _mm_unpacklo_epi32(tr0_0, tr0_1); \ - const __m128i tr1_2 = _mm_unpackhi_epi32(tr0_0, tr0_1); \ - const __m128i tr1_4 = _mm_unpacklo_epi32(tr0_4, tr0_5); \ - const __m128i tr1_6 = _mm_unpackhi_epi32(tr0_4, tr0_5); \ - \ - out0 = _mm_unpacklo_epi64(tr1_0, tr1_4); \ - out1 = _mm_unpackhi_epi64(tr1_0, tr1_4); \ - out2 = _mm_unpacklo_epi64(tr1_2, tr1_6); \ - out3 = _mm_unpackhi_epi64(tr1_2, tr1_6); \ +#define TRANSPOSE_4X8_10(tmp0, tmp1, tmp2, tmp3, out0, out1, out2, out3) \ + { \ + const __m128i tr0_0 = _mm_unpackhi_epi16(tmp0, tmp1); \ + const __m128i tr0_1 = _mm_unpacklo_epi16(tmp1, tmp0); \ + const __m128i tr0_4 = _mm_unpacklo_epi16(tmp2, tmp3); \ + const __m128i tr0_5 = _mm_unpackhi_epi16(tmp3, tmp2); \ + \ + const __m128i tr1_0 = _mm_unpacklo_epi32(tr0_0, tr0_1); \ + const __m128i tr1_2 = _mm_unpackhi_epi32(tr0_0, tr0_1); \ + const __m128i tr1_4 = _mm_unpacklo_epi32(tr0_4, tr0_5); \ + const __m128i tr1_6 = _mm_unpackhi_epi32(tr0_4, tr0_5); \ + \ + out0 = _mm_unpacklo_epi64(tr1_0, tr1_4); \ + out1 = _mm_unpackhi_epi64(tr1_0, tr1_4); \ + out2 = _mm_unpacklo_epi64(tr1_2, tr1_6); \ + out3 = _mm_unpackhi_epi64(tr1_2, tr1_6); \ } #define TRANSPOSE_8X8_10(in0, in1, in2, in3, out0, out1) \ - { \ - const __m128i tr0_0 = _mm_unpacklo_epi16(in0, in1); \ - const __m128i tr0_1 = _mm_unpacklo_epi16(in2, in3); \ - out0 = _mm_unpacklo_epi32(tr0_0, tr0_1); \ - out1 = _mm_unpackhi_epi32(tr0_0, tr0_1); \ + { \ + const __m128i tr0_0 = _mm_unpacklo_epi16(in0, in1); \ + const __m128i tr0_1 = _mm_unpacklo_epi16(in2, in3); \ + out0 = _mm_unpacklo_epi32(tr0_0, tr0_1); \ + out1 = _mm_unpackhi_epi32(tr0_0, tr0_1); \ } // Define Macro for multiplying elements by constants and adding them together. -#define MULTIPLICATION_AND_ADD(lo_0, hi_0, lo_1, hi_1, \ - cst0, cst1, cst2, cst3, res0, res1, res2, res3) \ - { \ - tmp0 = _mm_madd_epi16(lo_0, cst0); \ - tmp1 = _mm_madd_epi16(hi_0, cst0); \ - tmp2 = _mm_madd_epi16(lo_0, cst1); \ - tmp3 = _mm_madd_epi16(hi_0, cst1); \ - tmp4 = _mm_madd_epi16(lo_1, cst2); \ - tmp5 = _mm_madd_epi16(hi_1, cst2); \ - tmp6 = _mm_madd_epi16(lo_1, cst3); \ - tmp7 = _mm_madd_epi16(hi_1, cst3); \ - \ - tmp0 = _mm_add_epi32(tmp0, rounding); \ - tmp1 = _mm_add_epi32(tmp1, rounding); \ - tmp2 = _mm_add_epi32(tmp2, rounding); \ - tmp3 = _mm_add_epi32(tmp3, rounding); \ - tmp4 = _mm_add_epi32(tmp4, rounding); \ - tmp5 = _mm_add_epi32(tmp5, rounding); \ - tmp6 = _mm_add_epi32(tmp6, rounding); \ - tmp7 = _mm_add_epi32(tmp7, rounding); \ - \ - tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS); \ - tmp1 = _mm_srai_epi32(tmp1, DCT_CONST_BITS); \ - tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS); \ - tmp3 = _mm_srai_epi32(tmp3, DCT_CONST_BITS); \ - tmp4 = _mm_srai_epi32(tmp4, DCT_CONST_BITS); \ - tmp5 = _mm_srai_epi32(tmp5, DCT_CONST_BITS); \ - tmp6 = _mm_srai_epi32(tmp6, DCT_CONST_BITS); \ - tmp7 = _mm_srai_epi32(tmp7, DCT_CONST_BITS); \ - \ - res0 = _mm_packs_epi32(tmp0, tmp1); \ - res1 = _mm_packs_epi32(tmp2, tmp3); \ - res2 = _mm_packs_epi32(tmp4, tmp5); \ - res3 = _mm_packs_epi32(tmp6, tmp7); \ +#define MULTIPLICATION_AND_ADD(lo_0, hi_0, lo_1, hi_1, cst0, cst1, cst2, cst3, \ + res0, res1, res2, res3) \ + { \ + tmp0 = _mm_madd_epi16(lo_0, cst0); \ + tmp1 = _mm_madd_epi16(hi_0, cst0); \ + tmp2 = _mm_madd_epi16(lo_0, cst1); \ + tmp3 = _mm_madd_epi16(hi_0, cst1); \ + tmp4 = _mm_madd_epi16(lo_1, cst2); \ + tmp5 = _mm_madd_epi16(hi_1, cst2); \ + tmp6 = _mm_madd_epi16(lo_1, cst3); \ + tmp7 = _mm_madd_epi16(hi_1, cst3); \ + \ + tmp0 = _mm_add_epi32(tmp0, rounding); \ + tmp1 = _mm_add_epi32(tmp1, rounding); \ + tmp2 = _mm_add_epi32(tmp2, rounding); \ + tmp3 = _mm_add_epi32(tmp3, rounding); \ + tmp4 = _mm_add_epi32(tmp4, rounding); \ + tmp5 = _mm_add_epi32(tmp5, rounding); \ + tmp6 = _mm_add_epi32(tmp6, rounding); \ + tmp7 = _mm_add_epi32(tmp7, rounding); \ + \ + tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS); \ + tmp1 = _mm_srai_epi32(tmp1, DCT_CONST_BITS); \ + tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS); \ + tmp3 = _mm_srai_epi32(tmp3, DCT_CONST_BITS); \ + tmp4 = _mm_srai_epi32(tmp4, DCT_CONST_BITS); \ + tmp5 = _mm_srai_epi32(tmp5, DCT_CONST_BITS); \ + tmp6 = _mm_srai_epi32(tmp6, DCT_CONST_BITS); \ + tmp7 = _mm_srai_epi32(tmp7, DCT_CONST_BITS); \ + \ + res0 = _mm_packs_epi32(tmp0, tmp1); \ + res1 = _mm_packs_epi32(tmp2, tmp3); \ + res2 = _mm_packs_epi32(tmp4, tmp5); \ + res3 = _mm_packs_epi32(tmp6, tmp7); \ } #define MULTIPLICATION_AND_ADD_2(lo_0, hi_0, cst0, cst1, res0, res1) \ - { \ - tmp0 = _mm_madd_epi16(lo_0, cst0); \ - tmp1 = _mm_madd_epi16(hi_0, cst0); \ - tmp2 = _mm_madd_epi16(lo_0, cst1); \ - tmp3 = _mm_madd_epi16(hi_0, cst1); \ - \ - tmp0 = _mm_add_epi32(tmp0, rounding); \ - tmp1 = _mm_add_epi32(tmp1, rounding); \ - tmp2 = _mm_add_epi32(tmp2, rounding); \ - tmp3 = _mm_add_epi32(tmp3, rounding); \ - \ - tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS); \ - tmp1 = _mm_srai_epi32(tmp1, DCT_CONST_BITS); \ - tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS); \ - tmp3 = _mm_srai_epi32(tmp3, DCT_CONST_BITS); \ - \ - res0 = _mm_packs_epi32(tmp0, tmp1); \ - res1 = _mm_packs_epi32(tmp2, tmp3); \ + { \ + tmp0 = _mm_madd_epi16(lo_0, cst0); \ + tmp1 = _mm_madd_epi16(hi_0, cst0); \ + tmp2 = _mm_madd_epi16(lo_0, cst1); \ + tmp3 = _mm_madd_epi16(hi_0, cst1); \ + \ + tmp0 = _mm_add_epi32(tmp0, rounding); \ + tmp1 = _mm_add_epi32(tmp1, rounding); \ + tmp2 = _mm_add_epi32(tmp2, rounding); \ + tmp3 = _mm_add_epi32(tmp3, rounding); \ + \ + tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS); \ + tmp1 = _mm_srai_epi32(tmp1, DCT_CONST_BITS); \ + tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS); \ + tmp3 = _mm_srai_epi32(tmp3, DCT_CONST_BITS); \ + \ + res0 = _mm_packs_epi32(tmp0, tmp1); \ + res1 = _mm_packs_epi32(tmp2, tmp3); \ } -#define IDCT8(in0, in1, in2, in3, in4, in5, in6, in7, \ - out0, out1, out2, out3, out4, out5, out6, out7) \ - { \ - /* Stage1 */ \ - { \ - const __m128i lo_17 = _mm_unpacklo_epi16(in1, in7); \ - const __m128i hi_17 = _mm_unpackhi_epi16(in1, in7); \ - const __m128i lo_35 = _mm_unpacklo_epi16(in3, in5); \ - const __m128i hi_35 = _mm_unpackhi_epi16(in3, in5); \ - \ - MULTIPLICATION_AND_ADD(lo_17, hi_17, lo_35, hi_35, stg1_0, \ - stg1_1, stg1_2, stg1_3, stp1_4, \ - stp1_7, stp1_5, stp1_6) \ - } \ - \ - /* Stage2 */ \ - { \ - const __m128i lo_04 = _mm_unpacklo_epi16(in0, in4); \ - const __m128i hi_04 = _mm_unpackhi_epi16(in0, in4); \ - const __m128i lo_26 = _mm_unpacklo_epi16(in2, in6); \ - const __m128i hi_26 = _mm_unpackhi_epi16(in2, in6); \ - \ - MULTIPLICATION_AND_ADD(lo_04, hi_04, lo_26, hi_26, stg2_0, \ - stg2_1, stg2_2, stg2_3, stp2_0, \ - stp2_1, stp2_2, stp2_3) \ - \ - stp2_4 = _mm_adds_epi16(stp1_4, stp1_5); \ - stp2_5 = _mm_subs_epi16(stp1_4, stp1_5); \ - stp2_6 = _mm_subs_epi16(stp1_7, stp1_6); \ - stp2_7 = _mm_adds_epi16(stp1_7, stp1_6); \ - } \ - \ - /* Stage3 */ \ - { \ - const __m128i lo_56 = _mm_unpacklo_epi16(stp2_6, stp2_5); \ - const __m128i hi_56 = _mm_unpackhi_epi16(stp2_6, stp2_5); \ - \ - stp1_0 = _mm_adds_epi16(stp2_0, stp2_3); \ - stp1_1 = _mm_adds_epi16(stp2_1, stp2_2); \ - stp1_2 = _mm_subs_epi16(stp2_1, stp2_2); \ - stp1_3 = _mm_subs_epi16(stp2_0, stp2_3); \ - \ - tmp0 = _mm_madd_epi16(lo_56, stg2_1); \ - tmp1 = _mm_madd_epi16(hi_56, stg2_1); \ - tmp2 = _mm_madd_epi16(lo_56, stg2_0); \ - tmp3 = _mm_madd_epi16(hi_56, stg2_0); \ - \ - tmp0 = _mm_add_epi32(tmp0, rounding); \ - tmp1 = _mm_add_epi32(tmp1, rounding); \ - tmp2 = _mm_add_epi32(tmp2, rounding); \ - tmp3 = _mm_add_epi32(tmp3, rounding); \ - \ - tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS); \ - tmp1 = _mm_srai_epi32(tmp1, DCT_CONST_BITS); \ - tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS); \ - tmp3 = _mm_srai_epi32(tmp3, DCT_CONST_BITS); \ - \ - stp1_5 = _mm_packs_epi32(tmp0, tmp1); \ - stp1_6 = _mm_packs_epi32(tmp2, tmp3); \ - } \ - \ - /* Stage4 */ \ - out0 = _mm_adds_epi16(stp1_0, stp2_7); \ - out1 = _mm_adds_epi16(stp1_1, stp1_6); \ - out2 = _mm_adds_epi16(stp1_2, stp1_5); \ - out3 = _mm_adds_epi16(stp1_3, stp2_4); \ - out4 = _mm_subs_epi16(stp1_3, stp2_4); \ - out5 = _mm_subs_epi16(stp1_2, stp1_5); \ - out6 = _mm_subs_epi16(stp1_1, stp1_6); \ - out7 = _mm_subs_epi16(stp1_0, stp2_7); \ +#define IDCT8(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, out3, \ + out4, out5, out6, out7) \ + { \ + /* Stage1 */ \ + { \ + const __m128i lo_17 = _mm_unpacklo_epi16(in1, in7); \ + const __m128i hi_17 = _mm_unpackhi_epi16(in1, in7); \ + const __m128i lo_35 = _mm_unpacklo_epi16(in3, in5); \ + const __m128i hi_35 = _mm_unpackhi_epi16(in3, in5); \ + \ + MULTIPLICATION_AND_ADD(lo_17, hi_17, lo_35, hi_35, stg1_0, stg1_1, \ + stg1_2, stg1_3, stp1_4, stp1_7, stp1_5, stp1_6) \ + } \ + \ + /* Stage2 */ \ + { \ + const __m128i lo_04 = _mm_unpacklo_epi16(in0, in4); \ + const __m128i hi_04 = _mm_unpackhi_epi16(in0, in4); \ + const __m128i lo_26 = _mm_unpacklo_epi16(in2, in6); \ + const __m128i hi_26 = _mm_unpackhi_epi16(in2, in6); \ + \ + MULTIPLICATION_AND_ADD(lo_04, hi_04, lo_26, hi_26, stg2_0, stg2_1, \ + stg2_2, stg2_3, stp2_0, stp2_1, stp2_2, stp2_3) \ + \ + stp2_4 = _mm_adds_epi16(stp1_4, stp1_5); \ + stp2_5 = _mm_subs_epi16(stp1_4, stp1_5); \ + stp2_6 = _mm_subs_epi16(stp1_7, stp1_6); \ + stp2_7 = _mm_adds_epi16(stp1_7, stp1_6); \ + } \ + \ + /* Stage3 */ \ + { \ + const __m128i lo_56 = _mm_unpacklo_epi16(stp2_6, stp2_5); \ + const __m128i hi_56 = _mm_unpackhi_epi16(stp2_6, stp2_5); \ + \ + stp1_0 = _mm_adds_epi16(stp2_0, stp2_3); \ + stp1_1 = _mm_adds_epi16(stp2_1, stp2_2); \ + stp1_2 = _mm_subs_epi16(stp2_1, stp2_2); \ + stp1_3 = _mm_subs_epi16(stp2_0, stp2_3); \ + \ + tmp0 = _mm_madd_epi16(lo_56, stg2_1); \ + tmp1 = _mm_madd_epi16(hi_56, stg2_1); \ + tmp2 = _mm_madd_epi16(lo_56, stg2_0); \ + tmp3 = _mm_madd_epi16(hi_56, stg2_0); \ + \ + tmp0 = _mm_add_epi32(tmp0, rounding); \ + tmp1 = _mm_add_epi32(tmp1, rounding); \ + tmp2 = _mm_add_epi32(tmp2, rounding); \ + tmp3 = _mm_add_epi32(tmp3, rounding); \ + \ + tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS); \ + tmp1 = _mm_srai_epi32(tmp1, DCT_CONST_BITS); \ + tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS); \ + tmp3 = _mm_srai_epi32(tmp3, DCT_CONST_BITS); \ + \ + stp1_5 = _mm_packs_epi32(tmp0, tmp1); \ + stp1_6 = _mm_packs_epi32(tmp2, tmp3); \ + } \ + \ + /* Stage4 */ \ + out0 = _mm_adds_epi16(stp1_0, stp2_7); \ + out1 = _mm_adds_epi16(stp1_1, stp1_6); \ + out2 = _mm_adds_epi16(stp1_2, stp1_5); \ + out3 = _mm_adds_epi16(stp1_3, stp2_4); \ + out4 = _mm_subs_epi16(stp1_3, stp2_4); \ + out5 = _mm_subs_epi16(stp1_2, stp1_5); \ + out6 = _mm_subs_epi16(stp1_1, stp1_6); \ + out7 = _mm_subs_epi16(stp1_0, stp2_7); \ } void vp10_idct8x8_64_add_sse2(const int16_t *input, uint8_t *dest, int stride) { @@ -481,12 +478,12 @@ void vp10_idct8x8_64_add_sse2(const int16_t *input, uint8_t *dest, int stride) { // 2-D for (i = 0; i < 2; i++) { // 8x8 Transpose is copied from vp10_fdct8x8_sse2() - TRANSPOSE_8X8(in0, in1, in2, in3, in4, in5, in6, in7, - in0, in1, in2, in3, in4, in5, in6, in7); + TRANSPOSE_8X8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, + in4, in5, in6, in7); // 4-stage 1D vp10_idct8x8 - IDCT8(in0, in1, in2, in3, in4, in5, in6, in7, - in0, in1, in2, in3, in4, in5, in6, in7); + IDCT8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4, in5, + in6, in7); } // Final rounding and shift @@ -556,12 +553,12 @@ void vp10_idct8_sse2(__m128i *in) { __m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; // 8x8 Transpose is copied from vp10_fdct8x8_sse2() - TRANSPOSE_8X8(in[0], in[1], in[2], in[3], in[4], in[5], in[6], in[7], - in0, in1, in2, in3, in4, in5, in6, in7); + TRANSPOSE_8X8(in[0], in[1], in[2], in[3], in[4], in[5], in[6], in[7], in0, + in1, in2, in3, in4, in5, in6, in7); // 4-stage 1D vp10_idct8x8 - IDCT8(in0, in1, in2, in3, in4, in5, in6, in7, - in[0], in[1], in[2], in[3], in[4], in[5], in[6], in[7]); + IDCT8(in0, in1, in2, in3, in4, in5, in6, in7, in[0], in[1], in[2], in[3], + in[4], in[5], in[6], in[7]); } void vp10_iadst8_sse2(__m128i *in) { @@ -901,8 +898,8 @@ void vp10_idct8x8_12_add_sse2(const int16_t *input, uint8_t *dest, int stride) { TRANSPOSE_4X8_10(tmp0, tmp1, tmp2, tmp3, in0, in1, in2, in3) - IDCT8(in0, in1, in2, in3, zero, zero, zero, zero, - in0, in1, in2, in3, in4, in5, in6, in7); + IDCT8(in0, in1, in2, in3, zero, zero, zero, zero, in0, in1, in2, in3, in4, + in5, in6, in7); // Final rounding and shift in0 = _mm_adds_epi16(in0, final_rounding); in1 = _mm_adds_epi16(in1, final_rounding); @@ -932,245 +929,237 @@ void vp10_idct8x8_12_add_sse2(const int16_t *input, uint8_t *dest, int stride) { RECON_AND_STORE(dest + 7 * stride, in7); } -#define IDCT16 \ - /* Stage2 */ \ - { \ - const __m128i lo_1_15 = _mm_unpacklo_epi16(in[1], in[15]); \ - const __m128i hi_1_15 = _mm_unpackhi_epi16(in[1], in[15]); \ - const __m128i lo_9_7 = _mm_unpacklo_epi16(in[9], in[7]); \ - const __m128i hi_9_7 = _mm_unpackhi_epi16(in[9], in[7]); \ - const __m128i lo_5_11 = _mm_unpacklo_epi16(in[5], in[11]); \ - const __m128i hi_5_11 = _mm_unpackhi_epi16(in[5], in[11]); \ - const __m128i lo_13_3 = _mm_unpacklo_epi16(in[13], in[3]); \ - const __m128i hi_13_3 = _mm_unpackhi_epi16(in[13], in[3]); \ - \ - MULTIPLICATION_AND_ADD(lo_1_15, hi_1_15, lo_9_7, hi_9_7, \ - stg2_0, stg2_1, stg2_2, stg2_3, \ - stp2_8, stp2_15, stp2_9, stp2_14) \ - \ - MULTIPLICATION_AND_ADD(lo_5_11, hi_5_11, lo_13_3, hi_13_3, \ - stg2_4, stg2_5, stg2_6, stg2_7, \ - stp2_10, stp2_13, stp2_11, stp2_12) \ - } \ - \ - /* Stage3 */ \ - { \ - const __m128i lo_2_14 = _mm_unpacklo_epi16(in[2], in[14]); \ - const __m128i hi_2_14 = _mm_unpackhi_epi16(in[2], in[14]); \ - const __m128i lo_10_6 = _mm_unpacklo_epi16(in[10], in[6]); \ - const __m128i hi_10_6 = _mm_unpackhi_epi16(in[10], in[6]); \ - \ - MULTIPLICATION_AND_ADD(lo_2_14, hi_2_14, lo_10_6, hi_10_6, \ - stg3_0, stg3_1, stg3_2, stg3_3, \ - stp1_4, stp1_7, stp1_5, stp1_6) \ - \ - stp1_8_0 = _mm_add_epi16(stp2_8, stp2_9); \ - stp1_9 = _mm_sub_epi16(stp2_8, stp2_9); \ - stp1_10 = _mm_sub_epi16(stp2_11, stp2_10); \ - stp1_11 = _mm_add_epi16(stp2_11, stp2_10); \ - \ - stp1_12_0 = _mm_add_epi16(stp2_12, stp2_13); \ - stp1_13 = _mm_sub_epi16(stp2_12, stp2_13); \ - stp1_14 = _mm_sub_epi16(stp2_15, stp2_14); \ - stp1_15 = _mm_add_epi16(stp2_15, stp2_14); \ - } \ - \ - /* Stage4 */ \ - { \ - const __m128i lo_0_8 = _mm_unpacklo_epi16(in[0], in[8]); \ - const __m128i hi_0_8 = _mm_unpackhi_epi16(in[0], in[8]); \ - const __m128i lo_4_12 = _mm_unpacklo_epi16(in[4], in[12]); \ - const __m128i hi_4_12 = _mm_unpackhi_epi16(in[4], in[12]); \ - \ - const __m128i lo_9_14 = _mm_unpacklo_epi16(stp1_9, stp1_14); \ - const __m128i hi_9_14 = _mm_unpackhi_epi16(stp1_9, stp1_14); \ - const __m128i lo_10_13 = _mm_unpacklo_epi16(stp1_10, stp1_13); \ - const __m128i hi_10_13 = _mm_unpackhi_epi16(stp1_10, stp1_13); \ - \ - MULTIPLICATION_AND_ADD(lo_0_8, hi_0_8, lo_4_12, hi_4_12, \ - stg4_0, stg4_1, stg4_2, stg4_3, \ - stp2_0, stp2_1, stp2_2, stp2_3) \ - \ - stp2_4 = _mm_add_epi16(stp1_4, stp1_5); \ - stp2_5 = _mm_sub_epi16(stp1_4, stp1_5); \ - stp2_6 = _mm_sub_epi16(stp1_7, stp1_6); \ - stp2_7 = _mm_add_epi16(stp1_7, stp1_6); \ - \ - MULTIPLICATION_AND_ADD(lo_9_14, hi_9_14, lo_10_13, hi_10_13, \ - stg4_4, stg4_5, stg4_6, stg4_7, \ - stp2_9, stp2_14, stp2_10, stp2_13) \ - } \ - \ - /* Stage5 */ \ - { \ - const __m128i lo_6_5 = _mm_unpacklo_epi16(stp2_6, stp2_5); \ - const __m128i hi_6_5 = _mm_unpackhi_epi16(stp2_6, stp2_5); \ - \ - stp1_0 = _mm_add_epi16(stp2_0, stp2_3); \ - stp1_1 = _mm_add_epi16(stp2_1, stp2_2); \ - stp1_2 = _mm_sub_epi16(stp2_1, stp2_2); \ - stp1_3 = _mm_sub_epi16(stp2_0, stp2_3); \ - \ - tmp0 = _mm_madd_epi16(lo_6_5, stg4_1); \ - tmp1 = _mm_madd_epi16(hi_6_5, stg4_1); \ - tmp2 = _mm_madd_epi16(lo_6_5, stg4_0); \ - tmp3 = _mm_madd_epi16(hi_6_5, stg4_0); \ - \ - tmp0 = _mm_add_epi32(tmp0, rounding); \ - tmp1 = _mm_add_epi32(tmp1, rounding); \ - tmp2 = _mm_add_epi32(tmp2, rounding); \ - tmp3 = _mm_add_epi32(tmp3, rounding); \ - \ - tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS); \ - tmp1 = _mm_srai_epi32(tmp1, DCT_CONST_BITS); \ - tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS); \ - tmp3 = _mm_srai_epi32(tmp3, DCT_CONST_BITS); \ - \ - stp1_5 = _mm_packs_epi32(tmp0, tmp1); \ - stp1_6 = _mm_packs_epi32(tmp2, tmp3); \ - \ - stp1_8 = _mm_add_epi16(stp1_8_0, stp1_11); \ - stp1_9 = _mm_add_epi16(stp2_9, stp2_10); \ - stp1_10 = _mm_sub_epi16(stp2_9, stp2_10); \ - stp1_11 = _mm_sub_epi16(stp1_8_0, stp1_11); \ - \ - stp1_12 = _mm_sub_epi16(stp1_15, stp1_12_0); \ - stp1_13 = _mm_sub_epi16(stp2_14, stp2_13); \ - stp1_14 = _mm_add_epi16(stp2_14, stp2_13); \ - stp1_15 = _mm_add_epi16(stp1_15, stp1_12_0); \ - } \ - \ - /* Stage6 */ \ - { \ - const __m128i lo_10_13 = _mm_unpacklo_epi16(stp1_10, stp1_13); \ - const __m128i hi_10_13 = _mm_unpackhi_epi16(stp1_10, stp1_13); \ - const __m128i lo_11_12 = _mm_unpacklo_epi16(stp1_11, stp1_12); \ - const __m128i hi_11_12 = _mm_unpackhi_epi16(stp1_11, stp1_12); \ - \ - stp2_0 = _mm_add_epi16(stp1_0, stp2_7); \ - stp2_1 = _mm_add_epi16(stp1_1, stp1_6); \ - stp2_2 = _mm_add_epi16(stp1_2, stp1_5); \ - stp2_3 = _mm_add_epi16(stp1_3, stp2_4); \ - stp2_4 = _mm_sub_epi16(stp1_3, stp2_4); \ - stp2_5 = _mm_sub_epi16(stp1_2, stp1_5); \ - stp2_6 = _mm_sub_epi16(stp1_1, stp1_6); \ - stp2_7 = _mm_sub_epi16(stp1_0, stp2_7); \ - \ - MULTIPLICATION_AND_ADD(lo_10_13, hi_10_13, lo_11_12, hi_11_12, \ - stg6_0, stg4_0, stg6_0, stg4_0, \ - stp2_10, stp2_13, stp2_11, stp2_12) \ +#define IDCT16 \ + /* Stage2 */ \ + { \ + const __m128i lo_1_15 = _mm_unpacklo_epi16(in[1], in[15]); \ + const __m128i hi_1_15 = _mm_unpackhi_epi16(in[1], in[15]); \ + const __m128i lo_9_7 = _mm_unpacklo_epi16(in[9], in[7]); \ + const __m128i hi_9_7 = _mm_unpackhi_epi16(in[9], in[7]); \ + const __m128i lo_5_11 = _mm_unpacklo_epi16(in[5], in[11]); \ + const __m128i hi_5_11 = _mm_unpackhi_epi16(in[5], in[11]); \ + const __m128i lo_13_3 = _mm_unpacklo_epi16(in[13], in[3]); \ + const __m128i hi_13_3 = _mm_unpackhi_epi16(in[13], in[3]); \ + \ + MULTIPLICATION_AND_ADD(lo_1_15, hi_1_15, lo_9_7, hi_9_7, stg2_0, stg2_1, \ + stg2_2, stg2_3, stp2_8, stp2_15, stp2_9, stp2_14) \ + \ + MULTIPLICATION_AND_ADD(lo_5_11, hi_5_11, lo_13_3, hi_13_3, stg2_4, stg2_5, \ + stg2_6, stg2_7, stp2_10, stp2_13, stp2_11, stp2_12) \ + } \ + \ + /* Stage3 */ \ + { \ + const __m128i lo_2_14 = _mm_unpacklo_epi16(in[2], in[14]); \ + const __m128i hi_2_14 = _mm_unpackhi_epi16(in[2], in[14]); \ + const __m128i lo_10_6 = _mm_unpacklo_epi16(in[10], in[6]); \ + const __m128i hi_10_6 = _mm_unpackhi_epi16(in[10], in[6]); \ + \ + MULTIPLICATION_AND_ADD(lo_2_14, hi_2_14, lo_10_6, hi_10_6, stg3_0, stg3_1, \ + stg3_2, stg3_3, stp1_4, stp1_7, stp1_5, stp1_6) \ + \ + stp1_8_0 = _mm_add_epi16(stp2_8, stp2_9); \ + stp1_9 = _mm_sub_epi16(stp2_8, stp2_9); \ + stp1_10 = _mm_sub_epi16(stp2_11, stp2_10); \ + stp1_11 = _mm_add_epi16(stp2_11, stp2_10); \ + \ + stp1_12_0 = _mm_add_epi16(stp2_12, stp2_13); \ + stp1_13 = _mm_sub_epi16(stp2_12, stp2_13); \ + stp1_14 = _mm_sub_epi16(stp2_15, stp2_14); \ + stp1_15 = _mm_add_epi16(stp2_15, stp2_14); \ + } \ + \ + /* Stage4 */ \ + { \ + const __m128i lo_0_8 = _mm_unpacklo_epi16(in[0], in[8]); \ + const __m128i hi_0_8 = _mm_unpackhi_epi16(in[0], in[8]); \ + const __m128i lo_4_12 = _mm_unpacklo_epi16(in[4], in[12]); \ + const __m128i hi_4_12 = _mm_unpackhi_epi16(in[4], in[12]); \ + \ + const __m128i lo_9_14 = _mm_unpacklo_epi16(stp1_9, stp1_14); \ + const __m128i hi_9_14 = _mm_unpackhi_epi16(stp1_9, stp1_14); \ + const __m128i lo_10_13 = _mm_unpacklo_epi16(stp1_10, stp1_13); \ + const __m128i hi_10_13 = _mm_unpackhi_epi16(stp1_10, stp1_13); \ + \ + MULTIPLICATION_AND_ADD(lo_0_8, hi_0_8, lo_4_12, hi_4_12, stg4_0, stg4_1, \ + stg4_2, stg4_3, stp2_0, stp2_1, stp2_2, stp2_3) \ + \ + stp2_4 = _mm_add_epi16(stp1_4, stp1_5); \ + stp2_5 = _mm_sub_epi16(stp1_4, stp1_5); \ + stp2_6 = _mm_sub_epi16(stp1_7, stp1_6); \ + stp2_7 = _mm_add_epi16(stp1_7, stp1_6); \ + \ + MULTIPLICATION_AND_ADD(lo_9_14, hi_9_14, lo_10_13, hi_10_13, stg4_4, \ + stg4_5, stg4_6, stg4_7, stp2_9, stp2_14, stp2_10, \ + stp2_13) \ + } \ + \ + /* Stage5 */ \ + { \ + const __m128i lo_6_5 = _mm_unpacklo_epi16(stp2_6, stp2_5); \ + const __m128i hi_6_5 = _mm_unpackhi_epi16(stp2_6, stp2_5); \ + \ + stp1_0 = _mm_add_epi16(stp2_0, stp2_3); \ + stp1_1 = _mm_add_epi16(stp2_1, stp2_2); \ + stp1_2 = _mm_sub_epi16(stp2_1, stp2_2); \ + stp1_3 = _mm_sub_epi16(stp2_0, stp2_3); \ + \ + tmp0 = _mm_madd_epi16(lo_6_5, stg4_1); \ + tmp1 = _mm_madd_epi16(hi_6_5, stg4_1); \ + tmp2 = _mm_madd_epi16(lo_6_5, stg4_0); \ + tmp3 = _mm_madd_epi16(hi_6_5, stg4_0); \ + \ + tmp0 = _mm_add_epi32(tmp0, rounding); \ + tmp1 = _mm_add_epi32(tmp1, rounding); \ + tmp2 = _mm_add_epi32(tmp2, rounding); \ + tmp3 = _mm_add_epi32(tmp3, rounding); \ + \ + tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS); \ + tmp1 = _mm_srai_epi32(tmp1, DCT_CONST_BITS); \ + tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS); \ + tmp3 = _mm_srai_epi32(tmp3, DCT_CONST_BITS); \ + \ + stp1_5 = _mm_packs_epi32(tmp0, tmp1); \ + stp1_6 = _mm_packs_epi32(tmp2, tmp3); \ + \ + stp1_8 = _mm_add_epi16(stp1_8_0, stp1_11); \ + stp1_9 = _mm_add_epi16(stp2_9, stp2_10); \ + stp1_10 = _mm_sub_epi16(stp2_9, stp2_10); \ + stp1_11 = _mm_sub_epi16(stp1_8_0, stp1_11); \ + \ + stp1_12 = _mm_sub_epi16(stp1_15, stp1_12_0); \ + stp1_13 = _mm_sub_epi16(stp2_14, stp2_13); \ + stp1_14 = _mm_add_epi16(stp2_14, stp2_13); \ + stp1_15 = _mm_add_epi16(stp1_15, stp1_12_0); \ + } \ + \ + /* Stage6 */ \ + { \ + const __m128i lo_10_13 = _mm_unpacklo_epi16(stp1_10, stp1_13); \ + const __m128i hi_10_13 = _mm_unpackhi_epi16(stp1_10, stp1_13); \ + const __m128i lo_11_12 = _mm_unpacklo_epi16(stp1_11, stp1_12); \ + const __m128i hi_11_12 = _mm_unpackhi_epi16(stp1_11, stp1_12); \ + \ + stp2_0 = _mm_add_epi16(stp1_0, stp2_7); \ + stp2_1 = _mm_add_epi16(stp1_1, stp1_6); \ + stp2_2 = _mm_add_epi16(stp1_2, stp1_5); \ + stp2_3 = _mm_add_epi16(stp1_3, stp2_4); \ + stp2_4 = _mm_sub_epi16(stp1_3, stp2_4); \ + stp2_5 = _mm_sub_epi16(stp1_2, stp1_5); \ + stp2_6 = _mm_sub_epi16(stp1_1, stp1_6); \ + stp2_7 = _mm_sub_epi16(stp1_0, stp2_7); \ + \ + MULTIPLICATION_AND_ADD(lo_10_13, hi_10_13, lo_11_12, hi_11_12, stg6_0, \ + stg4_0, stg6_0, stg4_0, stp2_10, stp2_13, stp2_11, \ + stp2_12) \ } -#define IDCT16_10 \ - /* Stage2 */ \ - { \ - const __m128i lo_1_15 = _mm_unpacklo_epi16(in[1], zero); \ - const __m128i hi_1_15 = _mm_unpackhi_epi16(in[1], zero); \ - const __m128i lo_13_3 = _mm_unpacklo_epi16(zero, in[3]); \ - const __m128i hi_13_3 = _mm_unpackhi_epi16(zero, in[3]); \ - \ - MULTIPLICATION_AND_ADD(lo_1_15, hi_1_15, lo_13_3, hi_13_3, \ - stg2_0, stg2_1, stg2_6, stg2_7, \ - stp1_8_0, stp1_15, stp1_11, stp1_12_0) \ - } \ - \ - /* Stage3 */ \ - { \ - const __m128i lo_2_14 = _mm_unpacklo_epi16(in[2], zero); \ - const __m128i hi_2_14 = _mm_unpackhi_epi16(in[2], zero); \ - \ - MULTIPLICATION_AND_ADD_2(lo_2_14, hi_2_14, \ - stg3_0, stg3_1, \ - stp2_4, stp2_7) \ - \ - stp1_9 = stp1_8_0; \ - stp1_10 = stp1_11; \ - \ - stp1_13 = stp1_12_0; \ - stp1_14 = stp1_15; \ - } \ - \ - /* Stage4 */ \ - { \ - const __m128i lo_0_8 = _mm_unpacklo_epi16(in[0], zero); \ - const __m128i hi_0_8 = _mm_unpackhi_epi16(in[0], zero); \ - \ - const __m128i lo_9_14 = _mm_unpacklo_epi16(stp1_9, stp1_14); \ - const __m128i hi_9_14 = _mm_unpackhi_epi16(stp1_9, stp1_14); \ - const __m128i lo_10_13 = _mm_unpacklo_epi16(stp1_10, stp1_13); \ - const __m128i hi_10_13 = _mm_unpackhi_epi16(stp1_10, stp1_13); \ - \ - MULTIPLICATION_AND_ADD_2(lo_0_8, hi_0_8, \ - stg4_0, stg4_1, \ - stp1_0, stp1_1) \ - stp2_5 = stp2_4; \ - stp2_6 = stp2_7; \ - \ - MULTIPLICATION_AND_ADD(lo_9_14, hi_9_14, lo_10_13, hi_10_13, \ - stg4_4, stg4_5, stg4_6, stg4_7, \ - stp2_9, stp2_14, stp2_10, stp2_13) \ - } \ - \ - /* Stage5 */ \ - { \ - const __m128i lo_6_5 = _mm_unpacklo_epi16(stp2_6, stp2_5); \ - const __m128i hi_6_5 = _mm_unpackhi_epi16(stp2_6, stp2_5); \ - \ - stp1_2 = stp1_1; \ - stp1_3 = stp1_0; \ - \ - tmp0 = _mm_madd_epi16(lo_6_5, stg4_1); \ - tmp1 = _mm_madd_epi16(hi_6_5, stg4_1); \ - tmp2 = _mm_madd_epi16(lo_6_5, stg4_0); \ - tmp3 = _mm_madd_epi16(hi_6_5, stg4_0); \ - \ - tmp0 = _mm_add_epi32(tmp0, rounding); \ - tmp1 = _mm_add_epi32(tmp1, rounding); \ - tmp2 = _mm_add_epi32(tmp2, rounding); \ - tmp3 = _mm_add_epi32(tmp3, rounding); \ - \ - tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS); \ - tmp1 = _mm_srai_epi32(tmp1, DCT_CONST_BITS); \ - tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS); \ - tmp3 = _mm_srai_epi32(tmp3, DCT_CONST_BITS); \ - \ - stp1_5 = _mm_packs_epi32(tmp0, tmp1); \ - stp1_6 = _mm_packs_epi32(tmp2, tmp3); \ - \ - stp1_8 = _mm_add_epi16(stp1_8_0, stp1_11); \ - stp1_9 = _mm_add_epi16(stp2_9, stp2_10); \ - stp1_10 = _mm_sub_epi16(stp2_9, stp2_10); \ - stp1_11 = _mm_sub_epi16(stp1_8_0, stp1_11); \ - \ - stp1_12 = _mm_sub_epi16(stp1_15, stp1_12_0); \ - stp1_13 = _mm_sub_epi16(stp2_14, stp2_13); \ - stp1_14 = _mm_add_epi16(stp2_14, stp2_13); \ - stp1_15 = _mm_add_epi16(stp1_15, stp1_12_0); \ - } \ - \ - /* Stage6 */ \ - { \ - const __m128i lo_10_13 = _mm_unpacklo_epi16(stp1_10, stp1_13); \ - const __m128i hi_10_13 = _mm_unpackhi_epi16(stp1_10, stp1_13); \ - const __m128i lo_11_12 = _mm_unpacklo_epi16(stp1_11, stp1_12); \ - const __m128i hi_11_12 = _mm_unpackhi_epi16(stp1_11, stp1_12); \ - \ - stp2_0 = _mm_add_epi16(stp1_0, stp2_7); \ - stp2_1 = _mm_add_epi16(stp1_1, stp1_6); \ - stp2_2 = _mm_add_epi16(stp1_2, stp1_5); \ - stp2_3 = _mm_add_epi16(stp1_3, stp2_4); \ - stp2_4 = _mm_sub_epi16(stp1_3, stp2_4); \ - stp2_5 = _mm_sub_epi16(stp1_2, stp1_5); \ - stp2_6 = _mm_sub_epi16(stp1_1, stp1_6); \ - stp2_7 = _mm_sub_epi16(stp1_0, stp2_7); \ - \ - MULTIPLICATION_AND_ADD(lo_10_13, hi_10_13, lo_11_12, hi_11_12, \ - stg6_0, stg4_0, stg6_0, stg4_0, \ - stp2_10, stp2_13, stp2_11, stp2_12) \ - } +#define IDCT16_10 \ + /* Stage2 */ \ + { \ + const __m128i lo_1_15 = _mm_unpacklo_epi16(in[1], zero); \ + const __m128i hi_1_15 = _mm_unpackhi_epi16(in[1], zero); \ + const __m128i lo_13_3 = _mm_unpacklo_epi16(zero, in[3]); \ + const __m128i hi_13_3 = _mm_unpackhi_epi16(zero, in[3]); \ + \ + MULTIPLICATION_AND_ADD(lo_1_15, hi_1_15, lo_13_3, hi_13_3, stg2_0, stg2_1, \ + stg2_6, stg2_7, stp1_8_0, stp1_15, stp1_11, \ + stp1_12_0) \ + } \ + \ + /* Stage3 */ \ + { \ + const __m128i lo_2_14 = _mm_unpacklo_epi16(in[2], zero); \ + const __m128i hi_2_14 = _mm_unpackhi_epi16(in[2], zero); \ + \ + MULTIPLICATION_AND_ADD_2(lo_2_14, hi_2_14, stg3_0, stg3_1, stp2_4, stp2_7) \ + \ + stp1_9 = stp1_8_0; \ + stp1_10 = stp1_11; \ + \ + stp1_13 = stp1_12_0; \ + stp1_14 = stp1_15; \ + } \ + \ + /* Stage4 */ \ + { \ + const __m128i lo_0_8 = _mm_unpacklo_epi16(in[0], zero); \ + const __m128i hi_0_8 = _mm_unpackhi_epi16(in[0], zero); \ + \ + const __m128i lo_9_14 = _mm_unpacklo_epi16(stp1_9, stp1_14); \ + const __m128i hi_9_14 = _mm_unpackhi_epi16(stp1_9, stp1_14); \ + const __m128i lo_10_13 = _mm_unpacklo_epi16(stp1_10, stp1_13); \ + const __m128i hi_10_13 = _mm_unpackhi_epi16(stp1_10, stp1_13); \ + \ + MULTIPLICATION_AND_ADD_2(lo_0_8, hi_0_8, stg4_0, stg4_1, stp1_0, stp1_1) \ + stp2_5 = stp2_4; \ + stp2_6 = stp2_7; \ + \ + MULTIPLICATION_AND_ADD(lo_9_14, hi_9_14, lo_10_13, hi_10_13, stg4_4, \ + stg4_5, stg4_6, stg4_7, stp2_9, stp2_14, stp2_10, \ + stp2_13) \ + } \ + \ + /* Stage5 */ \ + { \ + const __m128i lo_6_5 = _mm_unpacklo_epi16(stp2_6, stp2_5); \ + const __m128i hi_6_5 = _mm_unpackhi_epi16(stp2_6, stp2_5); \ + \ + stp1_2 = stp1_1; \ + stp1_3 = stp1_0; \ + \ + tmp0 = _mm_madd_epi16(lo_6_5, stg4_1); \ + tmp1 = _mm_madd_epi16(hi_6_5, stg4_1); \ + tmp2 = _mm_madd_epi16(lo_6_5, stg4_0); \ + tmp3 = _mm_madd_epi16(hi_6_5, stg4_0); \ + \ + tmp0 = _mm_add_epi32(tmp0, rounding); \ + tmp1 = _mm_add_epi32(tmp1, rounding); \ + tmp2 = _mm_add_epi32(tmp2, rounding); \ + tmp3 = _mm_add_epi32(tmp3, rounding); \ + \ + tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS); \ + tmp1 = _mm_srai_epi32(tmp1, DCT_CONST_BITS); \ + tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS); \ + tmp3 = _mm_srai_epi32(tmp3, DCT_CONST_BITS); \ + \ + stp1_5 = _mm_packs_epi32(tmp0, tmp1); \ + stp1_6 = _mm_packs_epi32(tmp2, tmp3); \ + \ + stp1_8 = _mm_add_epi16(stp1_8_0, stp1_11); \ + stp1_9 = _mm_add_epi16(stp2_9, stp2_10); \ + stp1_10 = _mm_sub_epi16(stp2_9, stp2_10); \ + stp1_11 = _mm_sub_epi16(stp1_8_0, stp1_11); \ + \ + stp1_12 = _mm_sub_epi16(stp1_15, stp1_12_0); \ + stp1_13 = _mm_sub_epi16(stp2_14, stp2_13); \ + stp1_14 = _mm_add_epi16(stp2_14, stp2_13); \ + stp1_15 = _mm_add_epi16(stp1_15, stp1_12_0); \ + } \ + \ + /* Stage6 */ \ + { \ + const __m128i lo_10_13 = _mm_unpacklo_epi16(stp1_10, stp1_13); \ + const __m128i hi_10_13 = _mm_unpackhi_epi16(stp1_10, stp1_13); \ + const __m128i lo_11_12 = _mm_unpacklo_epi16(stp1_11, stp1_12); \ + const __m128i hi_11_12 = _mm_unpackhi_epi16(stp1_11, stp1_12); \ + \ + stp2_0 = _mm_add_epi16(stp1_0, stp2_7); \ + stp2_1 = _mm_add_epi16(stp1_1, stp1_6); \ + stp2_2 = _mm_add_epi16(stp1_2, stp1_5); \ + stp2_3 = _mm_add_epi16(stp1_3, stp2_4); \ + stp2_4 = _mm_sub_epi16(stp1_3, stp2_4); \ + stp2_5 = _mm_sub_epi16(stp1_2, stp1_5); \ + stp2_6 = _mm_sub_epi16(stp1_1, stp1_6); \ + stp2_7 = _mm_sub_epi16(stp1_0, stp2_7); \ + \ + MULTIPLICATION_AND_ADD(lo_10_13, hi_10_13, lo_11_12, hi_11_12, stg6_0, \ + stg4_0, stg6_0, stg4_0, stp2_10, stp2_13, stp2_11, \ + stp2_12) \ + } void vp10_idct16x16_256_add_sse2(const int16_t *input, uint8_t *dest, - int stride) { + int stride) { const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING); const __m128i final_rounding = _mm_set1_epi16(1 << 5); const __m128i zero = _mm_setzero_si128(); @@ -1202,10 +1191,10 @@ void vp10_idct16x16_256_add_sse2(const int16_t *input, uint8_t *dest, __m128i in[16], l[16], r[16], *curr1; __m128i stp1_0, stp1_1, stp1_2, stp1_3, stp1_4, stp1_5, stp1_6, stp1_7, - stp1_8, stp1_9, stp1_10, stp1_11, stp1_12, stp1_13, stp1_14, stp1_15, - stp1_8_0, stp1_12_0; + stp1_8, stp1_9, stp1_10, stp1_11, stp1_12, stp1_13, stp1_14, stp1_15, + stp1_8_0, stp1_12_0; __m128i stp2_0, stp2_1, stp2_2, stp2_3, stp2_4, stp2_5, stp2_6, stp2_7, - stp2_8, stp2_9, stp2_10, stp2_11, stp2_12, stp2_13, stp2_14, stp2_15; + stp2_8, stp2_9, stp2_10, stp2_11, stp2_12, stp2_13, stp2_14, stp2_15; __m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; int i; @@ -1294,8 +1283,7 @@ void vp10_idct16x16_256_add_sse2(const int16_t *input, uint8_t *dest, } } -void vp10_idct16x16_1_add_sse2(const int16_t *input, - uint8_t *dest, +void vp10_idct16x16_1_add_sse2(const int16_t *input, uint8_t *dest, int stride) { __m128i dc_value; const __m128i zero = _mm_setzero_si128(); @@ -1308,16 +1296,16 @@ void vp10_idct16x16_1_add_sse2(const int16_t *input, dc_value = _mm_set1_epi16(a); for (i = 0; i < 2; ++i) { - RECON_AND_STORE(dest + 0 * stride, dc_value); - RECON_AND_STORE(dest + 1 * stride, dc_value); - RECON_AND_STORE(dest + 2 * stride, dc_value); - RECON_AND_STORE(dest + 3 * stride, dc_value); - RECON_AND_STORE(dest + 4 * stride, dc_value); - RECON_AND_STORE(dest + 5 * stride, dc_value); - RECON_AND_STORE(dest + 6 * stride, dc_value); - RECON_AND_STORE(dest + 7 * stride, dc_value); - RECON_AND_STORE(dest + 8 * stride, dc_value); - RECON_AND_STORE(dest + 9 * stride, dc_value); + RECON_AND_STORE(dest + 0 * stride, dc_value); + RECON_AND_STORE(dest + 1 * stride, dc_value); + RECON_AND_STORE(dest + 2 * stride, dc_value); + RECON_AND_STORE(dest + 3 * stride, dc_value); + RECON_AND_STORE(dest + 4 * stride, dc_value); + RECON_AND_STORE(dest + 5 * stride, dc_value); + RECON_AND_STORE(dest + 6 * stride, dc_value); + RECON_AND_STORE(dest + 7 * stride, dc_value); + RECON_AND_STORE(dest + 8 * stride, dc_value); + RECON_AND_STORE(dest + 9 * stride, dc_value); RECON_AND_STORE(dest + 10 * stride, dc_value); RECON_AND_STORE(dest + 11 * stride, dc_value); RECON_AND_STORE(dest + 12 * stride, dc_value); @@ -1901,9 +1889,9 @@ static void vp10_idct16_8col(__m128i *in) { u[14] = _mm_srai_epi32(u[14], DCT_CONST_BITS); u[15] = _mm_srai_epi32(u[15], DCT_CONST_BITS); - s[8] = _mm_packs_epi32(u[0], u[1]); + s[8] = _mm_packs_epi32(u[0], u[1]); s[15] = _mm_packs_epi32(u[2], u[3]); - s[9] = _mm_packs_epi32(u[4], u[5]); + s[9] = _mm_packs_epi32(u[4], u[5]); s[14] = _mm_packs_epi32(u[6], u[7]); s[10] = _mm_packs_epi32(u[8], u[9]); s[13] = _mm_packs_epi32(u[10], u[11]); @@ -2031,7 +2019,7 @@ static void vp10_idct16_8col(__m128i *in) { s[7] = _mm_add_epi16(t[6], t[7]); s[8] = t[8]; s[15] = t[15]; - s[9] = _mm_packs_epi32(u[8], u[9]); + s[9] = _mm_packs_epi32(u[8], u[9]); s[14] = _mm_packs_epi32(u[10], u[11]); s[10] = _mm_packs_epi32(u[12], u[13]); s[13] = _mm_packs_epi32(u[14], u[15]); @@ -2155,7 +2143,7 @@ void vp10_iadst16_sse2(__m128i *in0, __m128i *in1) { } void vp10_idct16x16_10_add_sse2(const int16_t *input, uint8_t *dest, - int stride) { + int stride) { const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING); const __m128i final_rounding = _mm_set1_epi16(1 << 5); const __m128i zero = _mm_setzero_si128(); @@ -2177,11 +2165,11 @@ void vp10_idct16x16_10_add_sse2(const int16_t *input, uint8_t *dest, const __m128i stg6_0 = pair_set_epi16(-cospi_16_64, cospi_16_64); __m128i in[16], l[16]; - __m128i stp1_0, stp1_1, stp1_2, stp1_3, stp1_4, stp1_5, stp1_6, - stp1_8, stp1_9, stp1_10, stp1_11, stp1_12, stp1_13, stp1_14, stp1_15, - stp1_8_0, stp1_12_0; + __m128i stp1_0, stp1_1, stp1_2, stp1_3, stp1_4, stp1_5, stp1_6, stp1_8, + stp1_9, stp1_10, stp1_11, stp1_12, stp1_13, stp1_14, stp1_15, stp1_8_0, + stp1_12_0; __m128i stp2_0, stp2_1, stp2_2, stp2_3, stp2_4, stp2_5, stp2_6, stp2_7, - stp2_8, stp2_9, stp2_10, stp2_11, stp2_12, stp2_13, stp2_14; + stp2_8, stp2_9, stp2_10, stp2_11, stp2_12, stp2_13, stp2_14; __m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; int i; // First 1-D inverse DCT @@ -2213,7 +2201,7 @@ void vp10_idct16x16_10_add_sse2(const int16_t *input, uint8_t *dest, tmp5 = _mm_srai_epi32(tmp5, DCT_CONST_BITS); tmp7 = _mm_srai_epi32(tmp7, DCT_CONST_BITS); - stp2_8 = _mm_packs_epi32(tmp0, tmp2); + stp2_8 = _mm_packs_epi32(tmp0, tmp2); stp2_11 = _mm_packs_epi32(tmp5, tmp7); } @@ -2277,9 +2265,9 @@ void vp10_idct16x16_10_add_sse2(const int16_t *input, uint8_t *dest, tmp2 = _mm_add_epi16(stp2_9, stp2_10); tmp3 = _mm_sub_epi16(stp2_9, stp2_10); - stp1_9 = _mm_unpacklo_epi64(tmp2, zero); + stp1_9 = _mm_unpacklo_epi64(tmp2, zero); stp1_10 = _mm_unpacklo_epi64(tmp3, zero); - stp1_8 = _mm_unpacklo_epi64(tmp0, zero); + stp1_8 = _mm_unpacklo_epi64(tmp0, zero); stp1_11 = _mm_unpacklo_epi64(tmp1, zero); stp1_13 = _mm_unpackhi_epi64(tmp3, zero); @@ -2391,650 +2379,647 @@ void vp10_idct16x16_10_add_sse2(const int16_t *input, uint8_t *dest, } } -#define LOAD_DQCOEFF(reg, input) \ - { \ - reg = _mm_load_si128((const __m128i *) input); \ - input += 8; \ - } \ - -#define IDCT32_34 \ -/* Stage1 */ \ -{ \ - const __m128i zero = _mm_setzero_si128();\ - const __m128i lo_1_31 = _mm_unpacklo_epi16(in[1], zero); \ - const __m128i hi_1_31 = _mm_unpackhi_epi16(in[1], zero); \ - \ - const __m128i lo_25_7= _mm_unpacklo_epi16(zero, in[7]); \ - const __m128i hi_25_7 = _mm_unpackhi_epi16(zero, in[7]); \ - \ - const __m128i lo_5_27 = _mm_unpacklo_epi16(in[5], zero); \ - const __m128i hi_5_27 = _mm_unpackhi_epi16(in[5], zero); \ - \ - const __m128i lo_29_3 = _mm_unpacklo_epi16(zero, in[3]); \ - const __m128i hi_29_3 = _mm_unpackhi_epi16(zero, in[3]); \ - \ - MULTIPLICATION_AND_ADD_2(lo_1_31, hi_1_31, stg1_0, \ - stg1_1, stp1_16, stp1_31); \ - MULTIPLICATION_AND_ADD_2(lo_25_7, hi_25_7, stg1_6, \ - stg1_7, stp1_19, stp1_28); \ - MULTIPLICATION_AND_ADD_2(lo_5_27, hi_5_27, stg1_8, \ - stg1_9, stp1_20, stp1_27); \ - MULTIPLICATION_AND_ADD_2(lo_29_3, hi_29_3, stg1_14, \ - stg1_15, stp1_23, stp1_24); \ -} \ -\ -/* Stage2 */ \ -{ \ - const __m128i zero = _mm_setzero_si128();\ - const __m128i lo_2_30 = _mm_unpacklo_epi16(in[2], zero); \ - const __m128i hi_2_30 = _mm_unpackhi_epi16(in[2], zero); \ - \ - const __m128i lo_26_6 = _mm_unpacklo_epi16(zero, in[6]); \ - const __m128i hi_26_6 = _mm_unpackhi_epi16(zero, in[6]); \ - \ - MULTIPLICATION_AND_ADD_2(lo_2_30, hi_2_30, stg2_0, \ - stg2_1, stp2_8, stp2_15); \ - MULTIPLICATION_AND_ADD_2(lo_26_6, hi_26_6, stg2_6, \ - stg2_7, stp2_11, stp2_12); \ - \ - stp2_16 = stp1_16; \ - stp2_19 = stp1_19; \ - \ - stp2_20 = stp1_20; \ - stp2_23 = stp1_23; \ - \ - stp2_24 = stp1_24; \ - stp2_27 = stp1_27; \ - \ - stp2_28 = stp1_28; \ - stp2_31 = stp1_31; \ -} \ -\ -/* Stage3 */ \ -{ \ - const __m128i zero = _mm_setzero_si128();\ - const __m128i lo_4_28 = _mm_unpacklo_epi16(in[4], zero); \ - const __m128i hi_4_28 = _mm_unpackhi_epi16(in[4], zero); \ - \ - const __m128i lo_17_30 = _mm_unpacklo_epi16(stp1_16, stp1_31); \ - const __m128i hi_17_30 = _mm_unpackhi_epi16(stp1_16, stp1_31); \ - const __m128i lo_18_29 = _mm_unpacklo_epi16(stp1_19, stp1_28); \ - const __m128i hi_18_29 = _mm_unpackhi_epi16(stp1_19, stp1_28); \ - \ - const __m128i lo_21_26 = _mm_unpacklo_epi16(stp1_20, stp1_27); \ - const __m128i hi_21_26 = _mm_unpackhi_epi16(stp1_20, stp1_27); \ - const __m128i lo_22_25 = _mm_unpacklo_epi16(stp1_23, stp1_24); \ - const __m128i hi_22_25 = _mm_unpackhi_epi16(stp1_23, stp2_24); \ - \ - MULTIPLICATION_AND_ADD_2(lo_4_28, hi_4_28, stg3_0, \ - stg3_1, stp1_4, stp1_7); \ - \ - stp1_8 = stp2_8; \ - stp1_11 = stp2_11; \ - stp1_12 = stp2_12; \ - stp1_15 = stp2_15; \ - \ - MULTIPLICATION_AND_ADD(lo_17_30, hi_17_30, lo_18_29, hi_18_29, stg3_4, \ - stg3_5, stg3_6, stg3_4, stp1_17, stp1_30, \ - stp1_18, stp1_29) \ - MULTIPLICATION_AND_ADD(lo_21_26, hi_21_26, lo_22_25, hi_22_25, stg3_8, \ - stg3_9, stg3_10, stg3_8, stp1_21, stp1_26, \ - stp1_22, stp1_25) \ - \ - stp1_16 = stp2_16; \ - stp1_31 = stp2_31; \ - stp1_19 = stp2_19; \ - stp1_20 = stp2_20; \ - stp1_23 = stp2_23; \ - stp1_24 = stp2_24; \ - stp1_27 = stp2_27; \ - stp1_28 = stp2_28; \ -} \ -\ -/* Stage4 */ \ -{ \ - const __m128i zero = _mm_setzero_si128();\ - const __m128i lo_0_16 = _mm_unpacklo_epi16(in[0], zero); \ - const __m128i hi_0_16 = _mm_unpackhi_epi16(in[0], zero); \ - \ - const __m128i lo_9_14 = _mm_unpacklo_epi16(stp2_8, stp2_15); \ - const __m128i hi_9_14 = _mm_unpackhi_epi16(stp2_8, stp2_15); \ - const __m128i lo_10_13 = _mm_unpacklo_epi16(stp2_11, stp2_12); \ - const __m128i hi_10_13 = _mm_unpackhi_epi16(stp2_11, stp2_12); \ - \ - MULTIPLICATION_AND_ADD_2(lo_0_16, hi_0_16, stg4_0, \ - stg4_1, stp2_0, stp2_1); \ - \ - stp2_4 = stp1_4; \ - stp2_5 = stp1_4; \ - stp2_6 = stp1_7; \ - stp2_7 = stp1_7; \ - \ - MULTIPLICATION_AND_ADD(lo_9_14, hi_9_14, lo_10_13, hi_10_13, stg4_4, \ - stg4_5, stg4_6, stg4_4, stp2_9, stp2_14, \ - stp2_10, stp2_13) \ - \ - stp2_8 = stp1_8; \ - stp2_15 = stp1_15; \ - stp2_11 = stp1_11; \ - stp2_12 = stp1_12; \ - \ - stp2_16 = _mm_add_epi16(stp1_16, stp1_19); \ - stp2_17 = _mm_add_epi16(stp1_17, stp1_18); \ - stp2_18 = _mm_sub_epi16(stp1_17, stp1_18); \ - stp2_19 = _mm_sub_epi16(stp1_16, stp1_19); \ - stp2_20 = _mm_sub_epi16(stp1_23, stp1_20); \ - stp2_21 = _mm_sub_epi16(stp1_22, stp1_21); \ - stp2_22 = _mm_add_epi16(stp1_22, stp1_21); \ - stp2_23 = _mm_add_epi16(stp1_23, stp1_20); \ - \ - stp2_24 = _mm_add_epi16(stp1_24, stp1_27); \ - stp2_25 = _mm_add_epi16(stp1_25, stp1_26); \ - stp2_26 = _mm_sub_epi16(stp1_25, stp1_26); \ - stp2_27 = _mm_sub_epi16(stp1_24, stp1_27); \ - stp2_28 = _mm_sub_epi16(stp1_31, stp1_28); \ - stp2_29 = _mm_sub_epi16(stp1_30, stp1_29); \ - stp2_30 = _mm_add_epi16(stp1_29, stp1_30); \ - stp2_31 = _mm_add_epi16(stp1_28, stp1_31); \ -} \ -\ -/* Stage5 */ \ -{ \ - const __m128i lo_6_5 = _mm_unpacklo_epi16(stp2_6, stp2_5); \ - const __m128i hi_6_5 = _mm_unpackhi_epi16(stp2_6, stp2_5); \ - const __m128i lo_18_29 = _mm_unpacklo_epi16(stp2_18, stp2_29); \ - const __m128i hi_18_29 = _mm_unpackhi_epi16(stp2_18, stp2_29); \ - \ - const __m128i lo_19_28 = _mm_unpacklo_epi16(stp2_19, stp2_28); \ - const __m128i hi_19_28 = _mm_unpackhi_epi16(stp2_19, stp2_28); \ - const __m128i lo_20_27 = _mm_unpacklo_epi16(stp2_20, stp2_27); \ - const __m128i hi_20_27 = _mm_unpackhi_epi16(stp2_20, stp2_27); \ - \ - const __m128i lo_21_26 = _mm_unpacklo_epi16(stp2_21, stp2_26); \ - const __m128i hi_21_26 = _mm_unpackhi_epi16(stp2_21, stp2_26); \ - \ - stp1_0 = stp2_0; \ - stp1_1 = stp2_1; \ - stp1_2 = stp2_1; \ - stp1_3 = stp2_0; \ - \ - tmp0 = _mm_madd_epi16(lo_6_5, stg4_1); \ - tmp1 = _mm_madd_epi16(hi_6_5, stg4_1); \ - tmp2 = _mm_madd_epi16(lo_6_5, stg4_0); \ - tmp3 = _mm_madd_epi16(hi_6_5, stg4_0); \ - \ - tmp0 = _mm_add_epi32(tmp0, rounding); \ - tmp1 = _mm_add_epi32(tmp1, rounding); \ - tmp2 = _mm_add_epi32(tmp2, rounding); \ - tmp3 = _mm_add_epi32(tmp3, rounding); \ - \ - tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS); \ - tmp1 = _mm_srai_epi32(tmp1, DCT_CONST_BITS); \ - tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS); \ - tmp3 = _mm_srai_epi32(tmp3, DCT_CONST_BITS); \ - \ - stp1_5 = _mm_packs_epi32(tmp0, tmp1); \ - stp1_6 = _mm_packs_epi32(tmp2, tmp3); \ - \ - stp1_4 = stp2_4; \ - stp1_7 = stp2_7; \ - \ - stp1_8 = _mm_add_epi16(stp2_8, stp2_11); \ - stp1_9 = _mm_add_epi16(stp2_9, stp2_10); \ - stp1_10 = _mm_sub_epi16(stp2_9, stp2_10); \ - stp1_11 = _mm_sub_epi16(stp2_8, stp2_11); \ - stp1_12 = _mm_sub_epi16(stp2_15, stp2_12); \ - stp1_13 = _mm_sub_epi16(stp2_14, stp2_13); \ - stp1_14 = _mm_add_epi16(stp2_14, stp2_13); \ - stp1_15 = _mm_add_epi16(stp2_15, stp2_12); \ - \ - stp1_16 = stp2_16; \ - stp1_17 = stp2_17; \ - \ - MULTIPLICATION_AND_ADD(lo_18_29, hi_18_29, lo_19_28, hi_19_28, stg4_4, \ - stg4_5, stg4_4, stg4_5, stp1_18, stp1_29, \ - stp1_19, stp1_28) \ - MULTIPLICATION_AND_ADD(lo_20_27, hi_20_27, lo_21_26, hi_21_26, stg4_6, \ - stg4_4, stg4_6, stg4_4, stp1_20, stp1_27, \ - stp1_21, stp1_26) \ - \ - stp1_22 = stp2_22; \ - stp1_23 = stp2_23; \ - stp1_24 = stp2_24; \ - stp1_25 = stp2_25; \ - stp1_30 = stp2_30; \ - stp1_31 = stp2_31; \ -} \ -\ -/* Stage6 */ \ -{ \ - const __m128i lo_10_13 = _mm_unpacklo_epi16(stp1_10, stp1_13); \ - const __m128i hi_10_13 = _mm_unpackhi_epi16(stp1_10, stp1_13); \ - const __m128i lo_11_12 = _mm_unpacklo_epi16(stp1_11, stp1_12); \ - const __m128i hi_11_12 = _mm_unpackhi_epi16(stp1_11, stp1_12); \ - \ - stp2_0 = _mm_add_epi16(stp1_0, stp1_7); \ - stp2_1 = _mm_add_epi16(stp1_1, stp1_6); \ - stp2_2 = _mm_add_epi16(stp1_2, stp1_5); \ - stp2_3 = _mm_add_epi16(stp1_3, stp1_4); \ - stp2_4 = _mm_sub_epi16(stp1_3, stp1_4); \ - stp2_5 = _mm_sub_epi16(stp1_2, stp1_5); \ - stp2_6 = _mm_sub_epi16(stp1_1, stp1_6); \ - stp2_7 = _mm_sub_epi16(stp1_0, stp1_7); \ - \ - stp2_8 = stp1_8; \ - stp2_9 = stp1_9; \ - stp2_14 = stp1_14; \ - stp2_15 = stp1_15; \ - \ - MULTIPLICATION_AND_ADD(lo_10_13, hi_10_13, lo_11_12, hi_11_12, \ - stg6_0, stg4_0, stg6_0, stg4_0, stp2_10, \ - stp2_13, stp2_11, stp2_12) \ - \ - stp2_16 = _mm_add_epi16(stp1_16, stp1_23); \ - stp2_17 = _mm_add_epi16(stp1_17, stp1_22); \ - stp2_18 = _mm_add_epi16(stp1_18, stp1_21); \ - stp2_19 = _mm_add_epi16(stp1_19, stp1_20); \ - stp2_20 = _mm_sub_epi16(stp1_19, stp1_20); \ - stp2_21 = _mm_sub_epi16(stp1_18, stp1_21); \ - stp2_22 = _mm_sub_epi16(stp1_17, stp1_22); \ - stp2_23 = _mm_sub_epi16(stp1_16, stp1_23); \ - \ - stp2_24 = _mm_sub_epi16(stp1_31, stp1_24); \ - stp2_25 = _mm_sub_epi16(stp1_30, stp1_25); \ - stp2_26 = _mm_sub_epi16(stp1_29, stp1_26); \ - stp2_27 = _mm_sub_epi16(stp1_28, stp1_27); \ - stp2_28 = _mm_add_epi16(stp1_27, stp1_28); \ - stp2_29 = _mm_add_epi16(stp1_26, stp1_29); \ - stp2_30 = _mm_add_epi16(stp1_25, stp1_30); \ - stp2_31 = _mm_add_epi16(stp1_24, stp1_31); \ -} \ -\ -/* Stage7 */ \ -{ \ - const __m128i lo_20_27 = _mm_unpacklo_epi16(stp2_20, stp2_27); \ - const __m128i hi_20_27 = _mm_unpackhi_epi16(stp2_20, stp2_27); \ - const __m128i lo_21_26 = _mm_unpacklo_epi16(stp2_21, stp2_26); \ - const __m128i hi_21_26 = _mm_unpackhi_epi16(stp2_21, stp2_26); \ - \ - const __m128i lo_22_25 = _mm_unpacklo_epi16(stp2_22, stp2_25); \ - const __m128i hi_22_25 = _mm_unpackhi_epi16(stp2_22, stp2_25); \ - const __m128i lo_23_24 = _mm_unpacklo_epi16(stp2_23, stp2_24); \ - const __m128i hi_23_24 = _mm_unpackhi_epi16(stp2_23, stp2_24); \ - \ - stp1_0 = _mm_add_epi16(stp2_0, stp2_15); \ - stp1_1 = _mm_add_epi16(stp2_1, stp2_14); \ - stp1_2 = _mm_add_epi16(stp2_2, stp2_13); \ - stp1_3 = _mm_add_epi16(stp2_3, stp2_12); \ - stp1_4 = _mm_add_epi16(stp2_4, stp2_11); \ - stp1_5 = _mm_add_epi16(stp2_5, stp2_10); \ - stp1_6 = _mm_add_epi16(stp2_6, stp2_9); \ - stp1_7 = _mm_add_epi16(stp2_7, stp2_8); \ - stp1_8 = _mm_sub_epi16(stp2_7, stp2_8); \ - stp1_9 = _mm_sub_epi16(stp2_6, stp2_9); \ - stp1_10 = _mm_sub_epi16(stp2_5, stp2_10); \ - stp1_11 = _mm_sub_epi16(stp2_4, stp2_11); \ - stp1_12 = _mm_sub_epi16(stp2_3, stp2_12); \ - stp1_13 = _mm_sub_epi16(stp2_2, stp2_13); \ - stp1_14 = _mm_sub_epi16(stp2_1, stp2_14); \ - stp1_15 = _mm_sub_epi16(stp2_0, stp2_15); \ - \ - stp1_16 = stp2_16; \ - stp1_17 = stp2_17; \ - stp1_18 = stp2_18; \ - stp1_19 = stp2_19; \ - \ - MULTIPLICATION_AND_ADD(lo_20_27, hi_20_27, lo_21_26, hi_21_26, stg6_0, \ - stg4_0, stg6_0, stg4_0, stp1_20, stp1_27, \ - stp1_21, stp1_26) \ - MULTIPLICATION_AND_ADD(lo_22_25, hi_22_25, lo_23_24, hi_23_24, stg6_0, \ - stg4_0, stg6_0, stg4_0, stp1_22, stp1_25, \ - stp1_23, stp1_24) \ - \ - stp1_28 = stp2_28; \ - stp1_29 = stp2_29; \ - stp1_30 = stp2_30; \ - stp1_31 = stp2_31; \ -} +#define LOAD_DQCOEFF(reg, input) \ + { \ + reg = _mm_load_si128((const __m128i *)input); \ + input += 8; \ + } +#define IDCT32_34 \ + /* Stage1 */ \ + { \ + const __m128i zero = _mm_setzero_si128(); \ + const __m128i lo_1_31 = _mm_unpacklo_epi16(in[1], zero); \ + const __m128i hi_1_31 = _mm_unpackhi_epi16(in[1], zero); \ + \ + const __m128i lo_25_7 = _mm_unpacklo_epi16(zero, in[7]); \ + const __m128i hi_25_7 = _mm_unpackhi_epi16(zero, in[7]); \ + \ + const __m128i lo_5_27 = _mm_unpacklo_epi16(in[5], zero); \ + const __m128i hi_5_27 = _mm_unpackhi_epi16(in[5], zero); \ + \ + const __m128i lo_29_3 = _mm_unpacklo_epi16(zero, in[3]); \ + const __m128i hi_29_3 = _mm_unpackhi_epi16(zero, in[3]); \ + \ + MULTIPLICATION_AND_ADD_2(lo_1_31, hi_1_31, stg1_0, stg1_1, stp1_16, \ + stp1_31); \ + MULTIPLICATION_AND_ADD_2(lo_25_7, hi_25_7, stg1_6, stg1_7, stp1_19, \ + stp1_28); \ + MULTIPLICATION_AND_ADD_2(lo_5_27, hi_5_27, stg1_8, stg1_9, stp1_20, \ + stp1_27); \ + MULTIPLICATION_AND_ADD_2(lo_29_3, hi_29_3, stg1_14, stg1_15, stp1_23, \ + stp1_24); \ + } \ + \ + /* Stage2 */ \ + { \ + const __m128i zero = _mm_setzero_si128(); \ + const __m128i lo_2_30 = _mm_unpacklo_epi16(in[2], zero); \ + const __m128i hi_2_30 = _mm_unpackhi_epi16(in[2], zero); \ + \ + const __m128i lo_26_6 = _mm_unpacklo_epi16(zero, in[6]); \ + const __m128i hi_26_6 = _mm_unpackhi_epi16(zero, in[6]); \ + \ + MULTIPLICATION_AND_ADD_2(lo_2_30, hi_2_30, stg2_0, stg2_1, stp2_8, \ + stp2_15); \ + MULTIPLICATION_AND_ADD_2(lo_26_6, hi_26_6, stg2_6, stg2_7, stp2_11, \ + stp2_12); \ + \ + stp2_16 = stp1_16; \ + stp2_19 = stp1_19; \ + \ + stp2_20 = stp1_20; \ + stp2_23 = stp1_23; \ + \ + stp2_24 = stp1_24; \ + stp2_27 = stp1_27; \ + \ + stp2_28 = stp1_28; \ + stp2_31 = stp1_31; \ + } \ + \ + /* Stage3 */ \ + { \ + const __m128i zero = _mm_setzero_si128(); \ + const __m128i lo_4_28 = _mm_unpacklo_epi16(in[4], zero); \ + const __m128i hi_4_28 = _mm_unpackhi_epi16(in[4], zero); \ + \ + const __m128i lo_17_30 = _mm_unpacklo_epi16(stp1_16, stp1_31); \ + const __m128i hi_17_30 = _mm_unpackhi_epi16(stp1_16, stp1_31); \ + const __m128i lo_18_29 = _mm_unpacklo_epi16(stp1_19, stp1_28); \ + const __m128i hi_18_29 = _mm_unpackhi_epi16(stp1_19, stp1_28); \ + \ + const __m128i lo_21_26 = _mm_unpacklo_epi16(stp1_20, stp1_27); \ + const __m128i hi_21_26 = _mm_unpackhi_epi16(stp1_20, stp1_27); \ + const __m128i lo_22_25 = _mm_unpacklo_epi16(stp1_23, stp1_24); \ + const __m128i hi_22_25 = _mm_unpackhi_epi16(stp1_23, stp2_24); \ + \ + MULTIPLICATION_AND_ADD_2(lo_4_28, hi_4_28, stg3_0, stg3_1, stp1_4, \ + stp1_7); \ + \ + stp1_8 = stp2_8; \ + stp1_11 = stp2_11; \ + stp1_12 = stp2_12; \ + stp1_15 = stp2_15; \ + \ + MULTIPLICATION_AND_ADD(lo_17_30, hi_17_30, lo_18_29, hi_18_29, stg3_4, \ + stg3_5, stg3_6, stg3_4, stp1_17, stp1_30, stp1_18, \ + stp1_29) \ + MULTIPLICATION_AND_ADD(lo_21_26, hi_21_26, lo_22_25, hi_22_25, stg3_8, \ + stg3_9, stg3_10, stg3_8, stp1_21, stp1_26, stp1_22, \ + stp1_25) \ + \ + stp1_16 = stp2_16; \ + stp1_31 = stp2_31; \ + stp1_19 = stp2_19; \ + stp1_20 = stp2_20; \ + stp1_23 = stp2_23; \ + stp1_24 = stp2_24; \ + stp1_27 = stp2_27; \ + stp1_28 = stp2_28; \ + } \ + \ + /* Stage4 */ \ + { \ + const __m128i zero = _mm_setzero_si128(); \ + const __m128i lo_0_16 = _mm_unpacklo_epi16(in[0], zero); \ + const __m128i hi_0_16 = _mm_unpackhi_epi16(in[0], zero); \ + \ + const __m128i lo_9_14 = _mm_unpacklo_epi16(stp2_8, stp2_15); \ + const __m128i hi_9_14 = _mm_unpackhi_epi16(stp2_8, stp2_15); \ + const __m128i lo_10_13 = _mm_unpacklo_epi16(stp2_11, stp2_12); \ + const __m128i hi_10_13 = _mm_unpackhi_epi16(stp2_11, stp2_12); \ + \ + MULTIPLICATION_AND_ADD_2(lo_0_16, hi_0_16, stg4_0, stg4_1, stp2_0, \ + stp2_1); \ + \ + stp2_4 = stp1_4; \ + stp2_5 = stp1_4; \ + stp2_6 = stp1_7; \ + stp2_7 = stp1_7; \ + \ + MULTIPLICATION_AND_ADD(lo_9_14, hi_9_14, lo_10_13, hi_10_13, stg4_4, \ + stg4_5, stg4_6, stg4_4, stp2_9, stp2_14, stp2_10, \ + stp2_13) \ + \ + stp2_8 = stp1_8; \ + stp2_15 = stp1_15; \ + stp2_11 = stp1_11; \ + stp2_12 = stp1_12; \ + \ + stp2_16 = _mm_add_epi16(stp1_16, stp1_19); \ + stp2_17 = _mm_add_epi16(stp1_17, stp1_18); \ + stp2_18 = _mm_sub_epi16(stp1_17, stp1_18); \ + stp2_19 = _mm_sub_epi16(stp1_16, stp1_19); \ + stp2_20 = _mm_sub_epi16(stp1_23, stp1_20); \ + stp2_21 = _mm_sub_epi16(stp1_22, stp1_21); \ + stp2_22 = _mm_add_epi16(stp1_22, stp1_21); \ + stp2_23 = _mm_add_epi16(stp1_23, stp1_20); \ + \ + stp2_24 = _mm_add_epi16(stp1_24, stp1_27); \ + stp2_25 = _mm_add_epi16(stp1_25, stp1_26); \ + stp2_26 = _mm_sub_epi16(stp1_25, stp1_26); \ + stp2_27 = _mm_sub_epi16(stp1_24, stp1_27); \ + stp2_28 = _mm_sub_epi16(stp1_31, stp1_28); \ + stp2_29 = _mm_sub_epi16(stp1_30, stp1_29); \ + stp2_30 = _mm_add_epi16(stp1_29, stp1_30); \ + stp2_31 = _mm_add_epi16(stp1_28, stp1_31); \ + } \ + \ + /* Stage5 */ \ + { \ + const __m128i lo_6_5 = _mm_unpacklo_epi16(stp2_6, stp2_5); \ + const __m128i hi_6_5 = _mm_unpackhi_epi16(stp2_6, stp2_5); \ + const __m128i lo_18_29 = _mm_unpacklo_epi16(stp2_18, stp2_29); \ + const __m128i hi_18_29 = _mm_unpackhi_epi16(stp2_18, stp2_29); \ + \ + const __m128i lo_19_28 = _mm_unpacklo_epi16(stp2_19, stp2_28); \ + const __m128i hi_19_28 = _mm_unpackhi_epi16(stp2_19, stp2_28); \ + const __m128i lo_20_27 = _mm_unpacklo_epi16(stp2_20, stp2_27); \ + const __m128i hi_20_27 = _mm_unpackhi_epi16(stp2_20, stp2_27); \ + \ + const __m128i lo_21_26 = _mm_unpacklo_epi16(stp2_21, stp2_26); \ + const __m128i hi_21_26 = _mm_unpackhi_epi16(stp2_21, stp2_26); \ + \ + stp1_0 = stp2_0; \ + stp1_1 = stp2_1; \ + stp1_2 = stp2_1; \ + stp1_3 = stp2_0; \ + \ + tmp0 = _mm_madd_epi16(lo_6_5, stg4_1); \ + tmp1 = _mm_madd_epi16(hi_6_5, stg4_1); \ + tmp2 = _mm_madd_epi16(lo_6_5, stg4_0); \ + tmp3 = _mm_madd_epi16(hi_6_5, stg4_0); \ + \ + tmp0 = _mm_add_epi32(tmp0, rounding); \ + tmp1 = _mm_add_epi32(tmp1, rounding); \ + tmp2 = _mm_add_epi32(tmp2, rounding); \ + tmp3 = _mm_add_epi32(tmp3, rounding); \ + \ + tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS); \ + tmp1 = _mm_srai_epi32(tmp1, DCT_CONST_BITS); \ + tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS); \ + tmp3 = _mm_srai_epi32(tmp3, DCT_CONST_BITS); \ + \ + stp1_5 = _mm_packs_epi32(tmp0, tmp1); \ + stp1_6 = _mm_packs_epi32(tmp2, tmp3); \ + \ + stp1_4 = stp2_4; \ + stp1_7 = stp2_7; \ + \ + stp1_8 = _mm_add_epi16(stp2_8, stp2_11); \ + stp1_9 = _mm_add_epi16(stp2_9, stp2_10); \ + stp1_10 = _mm_sub_epi16(stp2_9, stp2_10); \ + stp1_11 = _mm_sub_epi16(stp2_8, stp2_11); \ + stp1_12 = _mm_sub_epi16(stp2_15, stp2_12); \ + stp1_13 = _mm_sub_epi16(stp2_14, stp2_13); \ + stp1_14 = _mm_add_epi16(stp2_14, stp2_13); \ + stp1_15 = _mm_add_epi16(stp2_15, stp2_12); \ + \ + stp1_16 = stp2_16; \ + stp1_17 = stp2_17; \ + \ + MULTIPLICATION_AND_ADD(lo_18_29, hi_18_29, lo_19_28, hi_19_28, stg4_4, \ + stg4_5, stg4_4, stg4_5, stp1_18, stp1_29, stp1_19, \ + stp1_28) \ + MULTIPLICATION_AND_ADD(lo_20_27, hi_20_27, lo_21_26, hi_21_26, stg4_6, \ + stg4_4, stg4_6, stg4_4, stp1_20, stp1_27, stp1_21, \ + stp1_26) \ + \ + stp1_22 = stp2_22; \ + stp1_23 = stp2_23; \ + stp1_24 = stp2_24; \ + stp1_25 = stp2_25; \ + stp1_30 = stp2_30; \ + stp1_31 = stp2_31; \ + } \ + \ + /* Stage6 */ \ + { \ + const __m128i lo_10_13 = _mm_unpacklo_epi16(stp1_10, stp1_13); \ + const __m128i hi_10_13 = _mm_unpackhi_epi16(stp1_10, stp1_13); \ + const __m128i lo_11_12 = _mm_unpacklo_epi16(stp1_11, stp1_12); \ + const __m128i hi_11_12 = _mm_unpackhi_epi16(stp1_11, stp1_12); \ + \ + stp2_0 = _mm_add_epi16(stp1_0, stp1_7); \ + stp2_1 = _mm_add_epi16(stp1_1, stp1_6); \ + stp2_2 = _mm_add_epi16(stp1_2, stp1_5); \ + stp2_3 = _mm_add_epi16(stp1_3, stp1_4); \ + stp2_4 = _mm_sub_epi16(stp1_3, stp1_4); \ + stp2_5 = _mm_sub_epi16(stp1_2, stp1_5); \ + stp2_6 = _mm_sub_epi16(stp1_1, stp1_6); \ + stp2_7 = _mm_sub_epi16(stp1_0, stp1_7); \ + \ + stp2_8 = stp1_8; \ + stp2_9 = stp1_9; \ + stp2_14 = stp1_14; \ + stp2_15 = stp1_15; \ + \ + MULTIPLICATION_AND_ADD(lo_10_13, hi_10_13, lo_11_12, hi_11_12, stg6_0, \ + stg4_0, stg6_0, stg4_0, stp2_10, stp2_13, stp2_11, \ + stp2_12) \ + \ + stp2_16 = _mm_add_epi16(stp1_16, stp1_23); \ + stp2_17 = _mm_add_epi16(stp1_17, stp1_22); \ + stp2_18 = _mm_add_epi16(stp1_18, stp1_21); \ + stp2_19 = _mm_add_epi16(stp1_19, stp1_20); \ + stp2_20 = _mm_sub_epi16(stp1_19, stp1_20); \ + stp2_21 = _mm_sub_epi16(stp1_18, stp1_21); \ + stp2_22 = _mm_sub_epi16(stp1_17, stp1_22); \ + stp2_23 = _mm_sub_epi16(stp1_16, stp1_23); \ + \ + stp2_24 = _mm_sub_epi16(stp1_31, stp1_24); \ + stp2_25 = _mm_sub_epi16(stp1_30, stp1_25); \ + stp2_26 = _mm_sub_epi16(stp1_29, stp1_26); \ + stp2_27 = _mm_sub_epi16(stp1_28, stp1_27); \ + stp2_28 = _mm_add_epi16(stp1_27, stp1_28); \ + stp2_29 = _mm_add_epi16(stp1_26, stp1_29); \ + stp2_30 = _mm_add_epi16(stp1_25, stp1_30); \ + stp2_31 = _mm_add_epi16(stp1_24, stp1_31); \ + } \ + \ + /* Stage7 */ \ + { \ + const __m128i lo_20_27 = _mm_unpacklo_epi16(stp2_20, stp2_27); \ + const __m128i hi_20_27 = _mm_unpackhi_epi16(stp2_20, stp2_27); \ + const __m128i lo_21_26 = _mm_unpacklo_epi16(stp2_21, stp2_26); \ + const __m128i hi_21_26 = _mm_unpackhi_epi16(stp2_21, stp2_26); \ + \ + const __m128i lo_22_25 = _mm_unpacklo_epi16(stp2_22, stp2_25); \ + const __m128i hi_22_25 = _mm_unpackhi_epi16(stp2_22, stp2_25); \ + const __m128i lo_23_24 = _mm_unpacklo_epi16(stp2_23, stp2_24); \ + const __m128i hi_23_24 = _mm_unpackhi_epi16(stp2_23, stp2_24); \ + \ + stp1_0 = _mm_add_epi16(stp2_0, stp2_15); \ + stp1_1 = _mm_add_epi16(stp2_1, stp2_14); \ + stp1_2 = _mm_add_epi16(stp2_2, stp2_13); \ + stp1_3 = _mm_add_epi16(stp2_3, stp2_12); \ + stp1_4 = _mm_add_epi16(stp2_4, stp2_11); \ + stp1_5 = _mm_add_epi16(stp2_5, stp2_10); \ + stp1_6 = _mm_add_epi16(stp2_6, stp2_9); \ + stp1_7 = _mm_add_epi16(stp2_7, stp2_8); \ + stp1_8 = _mm_sub_epi16(stp2_7, stp2_8); \ + stp1_9 = _mm_sub_epi16(stp2_6, stp2_9); \ + stp1_10 = _mm_sub_epi16(stp2_5, stp2_10); \ + stp1_11 = _mm_sub_epi16(stp2_4, stp2_11); \ + stp1_12 = _mm_sub_epi16(stp2_3, stp2_12); \ + stp1_13 = _mm_sub_epi16(stp2_2, stp2_13); \ + stp1_14 = _mm_sub_epi16(stp2_1, stp2_14); \ + stp1_15 = _mm_sub_epi16(stp2_0, stp2_15); \ + \ + stp1_16 = stp2_16; \ + stp1_17 = stp2_17; \ + stp1_18 = stp2_18; \ + stp1_19 = stp2_19; \ + \ + MULTIPLICATION_AND_ADD(lo_20_27, hi_20_27, lo_21_26, hi_21_26, stg6_0, \ + stg4_0, stg6_0, stg4_0, stp1_20, stp1_27, stp1_21, \ + stp1_26) \ + MULTIPLICATION_AND_ADD(lo_22_25, hi_22_25, lo_23_24, hi_23_24, stg6_0, \ + stg4_0, stg6_0, stg4_0, stp1_22, stp1_25, stp1_23, \ + stp1_24) \ + \ + stp1_28 = stp2_28; \ + stp1_29 = stp2_29; \ + stp1_30 = stp2_30; \ + stp1_31 = stp2_31; \ + } -#define IDCT32 \ -/* Stage1 */ \ -{ \ - const __m128i lo_1_31 = _mm_unpacklo_epi16(in[1], in[31]); \ - const __m128i hi_1_31 = _mm_unpackhi_epi16(in[1], in[31]); \ - const __m128i lo_17_15 = _mm_unpacklo_epi16(in[17], in[15]); \ - const __m128i hi_17_15 = _mm_unpackhi_epi16(in[17], in[15]); \ - \ - const __m128i lo_9_23 = _mm_unpacklo_epi16(in[9], in[23]); \ - const __m128i hi_9_23 = _mm_unpackhi_epi16(in[9], in[23]); \ - const __m128i lo_25_7= _mm_unpacklo_epi16(in[25], in[7]); \ - const __m128i hi_25_7 = _mm_unpackhi_epi16(in[25], in[7]); \ - \ - const __m128i lo_5_27 = _mm_unpacklo_epi16(in[5], in[27]); \ - const __m128i hi_5_27 = _mm_unpackhi_epi16(in[5], in[27]); \ - const __m128i lo_21_11 = _mm_unpacklo_epi16(in[21], in[11]); \ - const __m128i hi_21_11 = _mm_unpackhi_epi16(in[21], in[11]); \ - \ - const __m128i lo_13_19 = _mm_unpacklo_epi16(in[13], in[19]); \ - const __m128i hi_13_19 = _mm_unpackhi_epi16(in[13], in[19]); \ - const __m128i lo_29_3 = _mm_unpacklo_epi16(in[29], in[3]); \ - const __m128i hi_29_3 = _mm_unpackhi_epi16(in[29], in[3]); \ - \ - MULTIPLICATION_AND_ADD(lo_1_31, hi_1_31, lo_17_15, hi_17_15, stg1_0, \ - stg1_1, stg1_2, stg1_3, stp1_16, stp1_31, \ - stp1_17, stp1_30) \ - MULTIPLICATION_AND_ADD(lo_9_23, hi_9_23, lo_25_7, hi_25_7, stg1_4, \ - stg1_5, stg1_6, stg1_7, stp1_18, stp1_29, \ - stp1_19, stp1_28) \ - MULTIPLICATION_AND_ADD(lo_5_27, hi_5_27, lo_21_11, hi_21_11, stg1_8, \ - stg1_9, stg1_10, stg1_11, stp1_20, stp1_27, \ - stp1_21, stp1_26) \ - MULTIPLICATION_AND_ADD(lo_13_19, hi_13_19, lo_29_3, hi_29_3, stg1_12, \ - stg1_13, stg1_14, stg1_15, stp1_22, stp1_25, \ - stp1_23, stp1_24) \ -} \ -\ -/* Stage2 */ \ -{ \ - const __m128i lo_2_30 = _mm_unpacklo_epi16(in[2], in[30]); \ - const __m128i hi_2_30 = _mm_unpackhi_epi16(in[2], in[30]); \ - const __m128i lo_18_14 = _mm_unpacklo_epi16(in[18], in[14]); \ - const __m128i hi_18_14 = _mm_unpackhi_epi16(in[18], in[14]); \ - \ - const __m128i lo_10_22 = _mm_unpacklo_epi16(in[10], in[22]); \ - const __m128i hi_10_22 = _mm_unpackhi_epi16(in[10], in[22]); \ - const __m128i lo_26_6 = _mm_unpacklo_epi16(in[26], in[6]); \ - const __m128i hi_26_6 = _mm_unpackhi_epi16(in[26], in[6]); \ - \ - MULTIPLICATION_AND_ADD(lo_2_30, hi_2_30, lo_18_14, hi_18_14, stg2_0, \ - stg2_1, stg2_2, stg2_3, stp2_8, stp2_15, stp2_9, \ - stp2_14) \ - MULTIPLICATION_AND_ADD(lo_10_22, hi_10_22, lo_26_6, hi_26_6, stg2_4, \ - stg2_5, stg2_6, stg2_7, stp2_10, stp2_13, \ - stp2_11, stp2_12) \ - \ - stp2_16 = _mm_add_epi16(stp1_16, stp1_17); \ - stp2_17 = _mm_sub_epi16(stp1_16, stp1_17); \ - stp2_18 = _mm_sub_epi16(stp1_19, stp1_18); \ - stp2_19 = _mm_add_epi16(stp1_19, stp1_18); \ - \ - stp2_20 = _mm_add_epi16(stp1_20, stp1_21); \ - stp2_21 = _mm_sub_epi16(stp1_20, stp1_21); \ - stp2_22 = _mm_sub_epi16(stp1_23, stp1_22); \ - stp2_23 = _mm_add_epi16(stp1_23, stp1_22); \ - \ - stp2_24 = _mm_add_epi16(stp1_24, stp1_25); \ - stp2_25 = _mm_sub_epi16(stp1_24, stp1_25); \ - stp2_26 = _mm_sub_epi16(stp1_27, stp1_26); \ - stp2_27 = _mm_add_epi16(stp1_27, stp1_26); \ - \ - stp2_28 = _mm_add_epi16(stp1_28, stp1_29); \ - stp2_29 = _mm_sub_epi16(stp1_28, stp1_29); \ - stp2_30 = _mm_sub_epi16(stp1_31, stp1_30); \ - stp2_31 = _mm_add_epi16(stp1_31, stp1_30); \ -} \ -\ -/* Stage3 */ \ -{ \ - const __m128i lo_4_28 = _mm_unpacklo_epi16(in[4], in[28]); \ - const __m128i hi_4_28 = _mm_unpackhi_epi16(in[4], in[28]); \ - const __m128i lo_20_12 = _mm_unpacklo_epi16(in[20], in[12]); \ - const __m128i hi_20_12 = _mm_unpackhi_epi16(in[20], in[12]); \ - \ - const __m128i lo_17_30 = _mm_unpacklo_epi16(stp2_17, stp2_30); \ - const __m128i hi_17_30 = _mm_unpackhi_epi16(stp2_17, stp2_30); \ - const __m128i lo_18_29 = _mm_unpacklo_epi16(stp2_18, stp2_29); \ - const __m128i hi_18_29 = _mm_unpackhi_epi16(stp2_18, stp2_29); \ - \ - const __m128i lo_21_26 = _mm_unpacklo_epi16(stp2_21, stp2_26); \ - const __m128i hi_21_26 = _mm_unpackhi_epi16(stp2_21, stp2_26); \ - const __m128i lo_22_25 = _mm_unpacklo_epi16(stp2_22, stp2_25); \ - const __m128i hi_22_25 = _mm_unpackhi_epi16(stp2_22, stp2_25); \ - \ - MULTIPLICATION_AND_ADD(lo_4_28, hi_4_28, lo_20_12, hi_20_12, stg3_0, \ - stg3_1, stg3_2, stg3_3, stp1_4, stp1_7, stp1_5, \ - stp1_6) \ - \ - stp1_8 = _mm_add_epi16(stp2_8, stp2_9); \ - stp1_9 = _mm_sub_epi16(stp2_8, stp2_9); \ - stp1_10 = _mm_sub_epi16(stp2_11, stp2_10); \ - stp1_11 = _mm_add_epi16(stp2_11, stp2_10); \ - stp1_12 = _mm_add_epi16(stp2_12, stp2_13); \ - stp1_13 = _mm_sub_epi16(stp2_12, stp2_13); \ - stp1_14 = _mm_sub_epi16(stp2_15, stp2_14); \ - stp1_15 = _mm_add_epi16(stp2_15, stp2_14); \ - \ - MULTIPLICATION_AND_ADD(lo_17_30, hi_17_30, lo_18_29, hi_18_29, stg3_4, \ - stg3_5, stg3_6, stg3_4, stp1_17, stp1_30, \ - stp1_18, stp1_29) \ - MULTIPLICATION_AND_ADD(lo_21_26, hi_21_26, lo_22_25, hi_22_25, stg3_8, \ - stg3_9, stg3_10, stg3_8, stp1_21, stp1_26, \ - stp1_22, stp1_25) \ - \ - stp1_16 = stp2_16; \ - stp1_31 = stp2_31; \ - stp1_19 = stp2_19; \ - stp1_20 = stp2_20; \ - stp1_23 = stp2_23; \ - stp1_24 = stp2_24; \ - stp1_27 = stp2_27; \ - stp1_28 = stp2_28; \ -} \ -\ -/* Stage4 */ \ -{ \ - const __m128i lo_0_16 = _mm_unpacklo_epi16(in[0], in[16]); \ - const __m128i hi_0_16 = _mm_unpackhi_epi16(in[0], in[16]); \ - const __m128i lo_8_24 = _mm_unpacklo_epi16(in[8], in[24]); \ - const __m128i hi_8_24 = _mm_unpackhi_epi16(in[8], in[24]); \ - \ - const __m128i lo_9_14 = _mm_unpacklo_epi16(stp1_9, stp1_14); \ - const __m128i hi_9_14 = _mm_unpackhi_epi16(stp1_9, stp1_14); \ - const __m128i lo_10_13 = _mm_unpacklo_epi16(stp1_10, stp1_13); \ - const __m128i hi_10_13 = _mm_unpackhi_epi16(stp1_10, stp1_13); \ - \ - MULTIPLICATION_AND_ADD(lo_0_16, hi_0_16, lo_8_24, hi_8_24, stg4_0, \ - stg4_1, stg4_2, stg4_3, stp2_0, stp2_1, \ - stp2_2, stp2_3) \ - \ - stp2_4 = _mm_add_epi16(stp1_4, stp1_5); \ - stp2_5 = _mm_sub_epi16(stp1_4, stp1_5); \ - stp2_6 = _mm_sub_epi16(stp1_7, stp1_6); \ - stp2_7 = _mm_add_epi16(stp1_7, stp1_6); \ - \ - MULTIPLICATION_AND_ADD(lo_9_14, hi_9_14, lo_10_13, hi_10_13, stg4_4, \ - stg4_5, stg4_6, stg4_4, stp2_9, stp2_14, \ - stp2_10, stp2_13) \ - \ - stp2_8 = stp1_8; \ - stp2_15 = stp1_15; \ - stp2_11 = stp1_11; \ - stp2_12 = stp1_12; \ - \ - stp2_16 = _mm_add_epi16(stp1_16, stp1_19); \ - stp2_17 = _mm_add_epi16(stp1_17, stp1_18); \ - stp2_18 = _mm_sub_epi16(stp1_17, stp1_18); \ - stp2_19 = _mm_sub_epi16(stp1_16, stp1_19); \ - stp2_20 = _mm_sub_epi16(stp1_23, stp1_20); \ - stp2_21 = _mm_sub_epi16(stp1_22, stp1_21); \ - stp2_22 = _mm_add_epi16(stp1_22, stp1_21); \ - stp2_23 = _mm_add_epi16(stp1_23, stp1_20); \ - \ - stp2_24 = _mm_add_epi16(stp1_24, stp1_27); \ - stp2_25 = _mm_add_epi16(stp1_25, stp1_26); \ - stp2_26 = _mm_sub_epi16(stp1_25, stp1_26); \ - stp2_27 = _mm_sub_epi16(stp1_24, stp1_27); \ - stp2_28 = _mm_sub_epi16(stp1_31, stp1_28); \ - stp2_29 = _mm_sub_epi16(stp1_30, stp1_29); \ - stp2_30 = _mm_add_epi16(stp1_29, stp1_30); \ - stp2_31 = _mm_add_epi16(stp1_28, stp1_31); \ -} \ -\ -/* Stage5 */ \ -{ \ - const __m128i lo_6_5 = _mm_unpacklo_epi16(stp2_6, stp2_5); \ - const __m128i hi_6_5 = _mm_unpackhi_epi16(stp2_6, stp2_5); \ - const __m128i lo_18_29 = _mm_unpacklo_epi16(stp2_18, stp2_29); \ - const __m128i hi_18_29 = _mm_unpackhi_epi16(stp2_18, stp2_29); \ - \ - const __m128i lo_19_28 = _mm_unpacklo_epi16(stp2_19, stp2_28); \ - const __m128i hi_19_28 = _mm_unpackhi_epi16(stp2_19, stp2_28); \ - const __m128i lo_20_27 = _mm_unpacklo_epi16(stp2_20, stp2_27); \ - const __m128i hi_20_27 = _mm_unpackhi_epi16(stp2_20, stp2_27); \ - \ - const __m128i lo_21_26 = _mm_unpacklo_epi16(stp2_21, stp2_26); \ - const __m128i hi_21_26 = _mm_unpackhi_epi16(stp2_21, stp2_26); \ - \ - stp1_0 = _mm_add_epi16(stp2_0, stp2_3); \ - stp1_1 = _mm_add_epi16(stp2_1, stp2_2); \ - stp1_2 = _mm_sub_epi16(stp2_1, stp2_2); \ - stp1_3 = _mm_sub_epi16(stp2_0, stp2_3); \ - \ - tmp0 = _mm_madd_epi16(lo_6_5, stg4_1); \ - tmp1 = _mm_madd_epi16(hi_6_5, stg4_1); \ - tmp2 = _mm_madd_epi16(lo_6_5, stg4_0); \ - tmp3 = _mm_madd_epi16(hi_6_5, stg4_0); \ - \ - tmp0 = _mm_add_epi32(tmp0, rounding); \ - tmp1 = _mm_add_epi32(tmp1, rounding); \ - tmp2 = _mm_add_epi32(tmp2, rounding); \ - tmp3 = _mm_add_epi32(tmp3, rounding); \ - \ - tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS); \ - tmp1 = _mm_srai_epi32(tmp1, DCT_CONST_BITS); \ - tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS); \ - tmp3 = _mm_srai_epi32(tmp3, DCT_CONST_BITS); \ - \ - stp1_5 = _mm_packs_epi32(tmp0, tmp1); \ - stp1_6 = _mm_packs_epi32(tmp2, tmp3); \ - \ - stp1_4 = stp2_4; \ - stp1_7 = stp2_7; \ - \ - stp1_8 = _mm_add_epi16(stp2_8, stp2_11); \ - stp1_9 = _mm_add_epi16(stp2_9, stp2_10); \ - stp1_10 = _mm_sub_epi16(stp2_9, stp2_10); \ - stp1_11 = _mm_sub_epi16(stp2_8, stp2_11); \ - stp1_12 = _mm_sub_epi16(stp2_15, stp2_12); \ - stp1_13 = _mm_sub_epi16(stp2_14, stp2_13); \ - stp1_14 = _mm_add_epi16(stp2_14, stp2_13); \ - stp1_15 = _mm_add_epi16(stp2_15, stp2_12); \ - \ - stp1_16 = stp2_16; \ - stp1_17 = stp2_17; \ - \ - MULTIPLICATION_AND_ADD(lo_18_29, hi_18_29, lo_19_28, hi_19_28, stg4_4, \ - stg4_5, stg4_4, stg4_5, stp1_18, stp1_29, \ - stp1_19, stp1_28) \ - MULTIPLICATION_AND_ADD(lo_20_27, hi_20_27, lo_21_26, hi_21_26, stg4_6, \ - stg4_4, stg4_6, stg4_4, stp1_20, stp1_27, \ - stp1_21, stp1_26) \ - \ - stp1_22 = stp2_22; \ - stp1_23 = stp2_23; \ - stp1_24 = stp2_24; \ - stp1_25 = stp2_25; \ - stp1_30 = stp2_30; \ - stp1_31 = stp2_31; \ -} \ -\ -/* Stage6 */ \ -{ \ - const __m128i lo_10_13 = _mm_unpacklo_epi16(stp1_10, stp1_13); \ - const __m128i hi_10_13 = _mm_unpackhi_epi16(stp1_10, stp1_13); \ - const __m128i lo_11_12 = _mm_unpacklo_epi16(stp1_11, stp1_12); \ - const __m128i hi_11_12 = _mm_unpackhi_epi16(stp1_11, stp1_12); \ - \ - stp2_0 = _mm_add_epi16(stp1_0, stp1_7); \ - stp2_1 = _mm_add_epi16(stp1_1, stp1_6); \ - stp2_2 = _mm_add_epi16(stp1_2, stp1_5); \ - stp2_3 = _mm_add_epi16(stp1_3, stp1_4); \ - stp2_4 = _mm_sub_epi16(stp1_3, stp1_4); \ - stp2_5 = _mm_sub_epi16(stp1_2, stp1_5); \ - stp2_6 = _mm_sub_epi16(stp1_1, stp1_6); \ - stp2_7 = _mm_sub_epi16(stp1_0, stp1_7); \ - \ - stp2_8 = stp1_8; \ - stp2_9 = stp1_9; \ - stp2_14 = stp1_14; \ - stp2_15 = stp1_15; \ - \ - MULTIPLICATION_AND_ADD(lo_10_13, hi_10_13, lo_11_12, hi_11_12, \ - stg6_0, stg4_0, stg6_0, stg4_0, stp2_10, \ - stp2_13, stp2_11, stp2_12) \ - \ - stp2_16 = _mm_add_epi16(stp1_16, stp1_23); \ - stp2_17 = _mm_add_epi16(stp1_17, stp1_22); \ - stp2_18 = _mm_add_epi16(stp1_18, stp1_21); \ - stp2_19 = _mm_add_epi16(stp1_19, stp1_20); \ - stp2_20 = _mm_sub_epi16(stp1_19, stp1_20); \ - stp2_21 = _mm_sub_epi16(stp1_18, stp1_21); \ - stp2_22 = _mm_sub_epi16(stp1_17, stp1_22); \ - stp2_23 = _mm_sub_epi16(stp1_16, stp1_23); \ - \ - stp2_24 = _mm_sub_epi16(stp1_31, stp1_24); \ - stp2_25 = _mm_sub_epi16(stp1_30, stp1_25); \ - stp2_26 = _mm_sub_epi16(stp1_29, stp1_26); \ - stp2_27 = _mm_sub_epi16(stp1_28, stp1_27); \ - stp2_28 = _mm_add_epi16(stp1_27, stp1_28); \ - stp2_29 = _mm_add_epi16(stp1_26, stp1_29); \ - stp2_30 = _mm_add_epi16(stp1_25, stp1_30); \ - stp2_31 = _mm_add_epi16(stp1_24, stp1_31); \ -} \ -\ -/* Stage7 */ \ -{ \ - const __m128i lo_20_27 = _mm_unpacklo_epi16(stp2_20, stp2_27); \ - const __m128i hi_20_27 = _mm_unpackhi_epi16(stp2_20, stp2_27); \ - const __m128i lo_21_26 = _mm_unpacklo_epi16(stp2_21, stp2_26); \ - const __m128i hi_21_26 = _mm_unpackhi_epi16(stp2_21, stp2_26); \ - \ - const __m128i lo_22_25 = _mm_unpacklo_epi16(stp2_22, stp2_25); \ - const __m128i hi_22_25 = _mm_unpackhi_epi16(stp2_22, stp2_25); \ - const __m128i lo_23_24 = _mm_unpacklo_epi16(stp2_23, stp2_24); \ - const __m128i hi_23_24 = _mm_unpackhi_epi16(stp2_23, stp2_24); \ - \ - stp1_0 = _mm_add_epi16(stp2_0, stp2_15); \ - stp1_1 = _mm_add_epi16(stp2_1, stp2_14); \ - stp1_2 = _mm_add_epi16(stp2_2, stp2_13); \ - stp1_3 = _mm_add_epi16(stp2_3, stp2_12); \ - stp1_4 = _mm_add_epi16(stp2_4, stp2_11); \ - stp1_5 = _mm_add_epi16(stp2_5, stp2_10); \ - stp1_6 = _mm_add_epi16(stp2_6, stp2_9); \ - stp1_7 = _mm_add_epi16(stp2_7, stp2_8); \ - stp1_8 = _mm_sub_epi16(stp2_7, stp2_8); \ - stp1_9 = _mm_sub_epi16(stp2_6, stp2_9); \ - stp1_10 = _mm_sub_epi16(stp2_5, stp2_10); \ - stp1_11 = _mm_sub_epi16(stp2_4, stp2_11); \ - stp1_12 = _mm_sub_epi16(stp2_3, stp2_12); \ - stp1_13 = _mm_sub_epi16(stp2_2, stp2_13); \ - stp1_14 = _mm_sub_epi16(stp2_1, stp2_14); \ - stp1_15 = _mm_sub_epi16(stp2_0, stp2_15); \ - \ - stp1_16 = stp2_16; \ - stp1_17 = stp2_17; \ - stp1_18 = stp2_18; \ - stp1_19 = stp2_19; \ - \ - MULTIPLICATION_AND_ADD(lo_20_27, hi_20_27, lo_21_26, hi_21_26, stg6_0, \ - stg4_0, stg6_0, stg4_0, stp1_20, stp1_27, \ - stp1_21, stp1_26) \ - MULTIPLICATION_AND_ADD(lo_22_25, hi_22_25, lo_23_24, hi_23_24, stg6_0, \ - stg4_0, stg6_0, stg4_0, stp1_22, stp1_25, \ - stp1_23, stp1_24) \ - \ - stp1_28 = stp2_28; \ - stp1_29 = stp2_29; \ - stp1_30 = stp2_30; \ - stp1_31 = stp2_31; \ -} +#define IDCT32 \ + /* Stage1 */ \ + { \ + const __m128i lo_1_31 = _mm_unpacklo_epi16(in[1], in[31]); \ + const __m128i hi_1_31 = _mm_unpackhi_epi16(in[1], in[31]); \ + const __m128i lo_17_15 = _mm_unpacklo_epi16(in[17], in[15]); \ + const __m128i hi_17_15 = _mm_unpackhi_epi16(in[17], in[15]); \ + \ + const __m128i lo_9_23 = _mm_unpacklo_epi16(in[9], in[23]); \ + const __m128i hi_9_23 = _mm_unpackhi_epi16(in[9], in[23]); \ + const __m128i lo_25_7 = _mm_unpacklo_epi16(in[25], in[7]); \ + const __m128i hi_25_7 = _mm_unpackhi_epi16(in[25], in[7]); \ + \ + const __m128i lo_5_27 = _mm_unpacklo_epi16(in[5], in[27]); \ + const __m128i hi_5_27 = _mm_unpackhi_epi16(in[5], in[27]); \ + const __m128i lo_21_11 = _mm_unpacklo_epi16(in[21], in[11]); \ + const __m128i hi_21_11 = _mm_unpackhi_epi16(in[21], in[11]); \ + \ + const __m128i lo_13_19 = _mm_unpacklo_epi16(in[13], in[19]); \ + const __m128i hi_13_19 = _mm_unpackhi_epi16(in[13], in[19]); \ + const __m128i lo_29_3 = _mm_unpacklo_epi16(in[29], in[3]); \ + const __m128i hi_29_3 = _mm_unpackhi_epi16(in[29], in[3]); \ + \ + MULTIPLICATION_AND_ADD(lo_1_31, hi_1_31, lo_17_15, hi_17_15, stg1_0, \ + stg1_1, stg1_2, stg1_3, stp1_16, stp1_31, stp1_17, \ + stp1_30) \ + MULTIPLICATION_AND_ADD(lo_9_23, hi_9_23, lo_25_7, hi_25_7, stg1_4, stg1_5, \ + stg1_6, stg1_7, stp1_18, stp1_29, stp1_19, stp1_28) \ + MULTIPLICATION_AND_ADD(lo_5_27, hi_5_27, lo_21_11, hi_21_11, stg1_8, \ + stg1_9, stg1_10, stg1_11, stp1_20, stp1_27, \ + stp1_21, stp1_26) \ + MULTIPLICATION_AND_ADD(lo_13_19, hi_13_19, lo_29_3, hi_29_3, stg1_12, \ + stg1_13, stg1_14, stg1_15, stp1_22, stp1_25, \ + stp1_23, stp1_24) \ + } \ + \ + /* Stage2 */ \ + { \ + const __m128i lo_2_30 = _mm_unpacklo_epi16(in[2], in[30]); \ + const __m128i hi_2_30 = _mm_unpackhi_epi16(in[2], in[30]); \ + const __m128i lo_18_14 = _mm_unpacklo_epi16(in[18], in[14]); \ + const __m128i hi_18_14 = _mm_unpackhi_epi16(in[18], in[14]); \ + \ + const __m128i lo_10_22 = _mm_unpacklo_epi16(in[10], in[22]); \ + const __m128i hi_10_22 = _mm_unpackhi_epi16(in[10], in[22]); \ + const __m128i lo_26_6 = _mm_unpacklo_epi16(in[26], in[6]); \ + const __m128i hi_26_6 = _mm_unpackhi_epi16(in[26], in[6]); \ + \ + MULTIPLICATION_AND_ADD(lo_2_30, hi_2_30, lo_18_14, hi_18_14, stg2_0, \ + stg2_1, stg2_2, stg2_3, stp2_8, stp2_15, stp2_9, \ + stp2_14) \ + MULTIPLICATION_AND_ADD(lo_10_22, hi_10_22, lo_26_6, hi_26_6, stg2_4, \ + stg2_5, stg2_6, stg2_7, stp2_10, stp2_13, stp2_11, \ + stp2_12) \ + \ + stp2_16 = _mm_add_epi16(stp1_16, stp1_17); \ + stp2_17 = _mm_sub_epi16(stp1_16, stp1_17); \ + stp2_18 = _mm_sub_epi16(stp1_19, stp1_18); \ + stp2_19 = _mm_add_epi16(stp1_19, stp1_18); \ + \ + stp2_20 = _mm_add_epi16(stp1_20, stp1_21); \ + stp2_21 = _mm_sub_epi16(stp1_20, stp1_21); \ + stp2_22 = _mm_sub_epi16(stp1_23, stp1_22); \ + stp2_23 = _mm_add_epi16(stp1_23, stp1_22); \ + \ + stp2_24 = _mm_add_epi16(stp1_24, stp1_25); \ + stp2_25 = _mm_sub_epi16(stp1_24, stp1_25); \ + stp2_26 = _mm_sub_epi16(stp1_27, stp1_26); \ + stp2_27 = _mm_add_epi16(stp1_27, stp1_26); \ + \ + stp2_28 = _mm_add_epi16(stp1_28, stp1_29); \ + stp2_29 = _mm_sub_epi16(stp1_28, stp1_29); \ + stp2_30 = _mm_sub_epi16(stp1_31, stp1_30); \ + stp2_31 = _mm_add_epi16(stp1_31, stp1_30); \ + } \ + \ + /* Stage3 */ \ + { \ + const __m128i lo_4_28 = _mm_unpacklo_epi16(in[4], in[28]); \ + const __m128i hi_4_28 = _mm_unpackhi_epi16(in[4], in[28]); \ + const __m128i lo_20_12 = _mm_unpacklo_epi16(in[20], in[12]); \ + const __m128i hi_20_12 = _mm_unpackhi_epi16(in[20], in[12]); \ + \ + const __m128i lo_17_30 = _mm_unpacklo_epi16(stp2_17, stp2_30); \ + const __m128i hi_17_30 = _mm_unpackhi_epi16(stp2_17, stp2_30); \ + const __m128i lo_18_29 = _mm_unpacklo_epi16(stp2_18, stp2_29); \ + const __m128i hi_18_29 = _mm_unpackhi_epi16(stp2_18, stp2_29); \ + \ + const __m128i lo_21_26 = _mm_unpacklo_epi16(stp2_21, stp2_26); \ + const __m128i hi_21_26 = _mm_unpackhi_epi16(stp2_21, stp2_26); \ + const __m128i lo_22_25 = _mm_unpacklo_epi16(stp2_22, stp2_25); \ + const __m128i hi_22_25 = _mm_unpackhi_epi16(stp2_22, stp2_25); \ + \ + MULTIPLICATION_AND_ADD(lo_4_28, hi_4_28, lo_20_12, hi_20_12, stg3_0, \ + stg3_1, stg3_2, stg3_3, stp1_4, stp1_7, stp1_5, \ + stp1_6) \ + \ + stp1_8 = _mm_add_epi16(stp2_8, stp2_9); \ + stp1_9 = _mm_sub_epi16(stp2_8, stp2_9); \ + stp1_10 = _mm_sub_epi16(stp2_11, stp2_10); \ + stp1_11 = _mm_add_epi16(stp2_11, stp2_10); \ + stp1_12 = _mm_add_epi16(stp2_12, stp2_13); \ + stp1_13 = _mm_sub_epi16(stp2_12, stp2_13); \ + stp1_14 = _mm_sub_epi16(stp2_15, stp2_14); \ + stp1_15 = _mm_add_epi16(stp2_15, stp2_14); \ + \ + MULTIPLICATION_AND_ADD(lo_17_30, hi_17_30, lo_18_29, hi_18_29, stg3_4, \ + stg3_5, stg3_6, stg3_4, stp1_17, stp1_30, stp1_18, \ + stp1_29) \ + MULTIPLICATION_AND_ADD(lo_21_26, hi_21_26, lo_22_25, hi_22_25, stg3_8, \ + stg3_9, stg3_10, stg3_8, stp1_21, stp1_26, stp1_22, \ + stp1_25) \ + \ + stp1_16 = stp2_16; \ + stp1_31 = stp2_31; \ + stp1_19 = stp2_19; \ + stp1_20 = stp2_20; \ + stp1_23 = stp2_23; \ + stp1_24 = stp2_24; \ + stp1_27 = stp2_27; \ + stp1_28 = stp2_28; \ + } \ + \ + /* Stage4 */ \ + { \ + const __m128i lo_0_16 = _mm_unpacklo_epi16(in[0], in[16]); \ + const __m128i hi_0_16 = _mm_unpackhi_epi16(in[0], in[16]); \ + const __m128i lo_8_24 = _mm_unpacklo_epi16(in[8], in[24]); \ + const __m128i hi_8_24 = _mm_unpackhi_epi16(in[8], in[24]); \ + \ + const __m128i lo_9_14 = _mm_unpacklo_epi16(stp1_9, stp1_14); \ + const __m128i hi_9_14 = _mm_unpackhi_epi16(stp1_9, stp1_14); \ + const __m128i lo_10_13 = _mm_unpacklo_epi16(stp1_10, stp1_13); \ + const __m128i hi_10_13 = _mm_unpackhi_epi16(stp1_10, stp1_13); \ + \ + MULTIPLICATION_AND_ADD(lo_0_16, hi_0_16, lo_8_24, hi_8_24, stg4_0, stg4_1, \ + stg4_2, stg4_3, stp2_0, stp2_1, stp2_2, stp2_3) \ + \ + stp2_4 = _mm_add_epi16(stp1_4, stp1_5); \ + stp2_5 = _mm_sub_epi16(stp1_4, stp1_5); \ + stp2_6 = _mm_sub_epi16(stp1_7, stp1_6); \ + stp2_7 = _mm_add_epi16(stp1_7, stp1_6); \ + \ + MULTIPLICATION_AND_ADD(lo_9_14, hi_9_14, lo_10_13, hi_10_13, stg4_4, \ + stg4_5, stg4_6, stg4_4, stp2_9, stp2_14, stp2_10, \ + stp2_13) \ + \ + stp2_8 = stp1_8; \ + stp2_15 = stp1_15; \ + stp2_11 = stp1_11; \ + stp2_12 = stp1_12; \ + \ + stp2_16 = _mm_add_epi16(stp1_16, stp1_19); \ + stp2_17 = _mm_add_epi16(stp1_17, stp1_18); \ + stp2_18 = _mm_sub_epi16(stp1_17, stp1_18); \ + stp2_19 = _mm_sub_epi16(stp1_16, stp1_19); \ + stp2_20 = _mm_sub_epi16(stp1_23, stp1_20); \ + stp2_21 = _mm_sub_epi16(stp1_22, stp1_21); \ + stp2_22 = _mm_add_epi16(stp1_22, stp1_21); \ + stp2_23 = _mm_add_epi16(stp1_23, stp1_20); \ + \ + stp2_24 = _mm_add_epi16(stp1_24, stp1_27); \ + stp2_25 = _mm_add_epi16(stp1_25, stp1_26); \ + stp2_26 = _mm_sub_epi16(stp1_25, stp1_26); \ + stp2_27 = _mm_sub_epi16(stp1_24, stp1_27); \ + stp2_28 = _mm_sub_epi16(stp1_31, stp1_28); \ + stp2_29 = _mm_sub_epi16(stp1_30, stp1_29); \ + stp2_30 = _mm_add_epi16(stp1_29, stp1_30); \ + stp2_31 = _mm_add_epi16(stp1_28, stp1_31); \ + } \ + \ + /* Stage5 */ \ + { \ + const __m128i lo_6_5 = _mm_unpacklo_epi16(stp2_6, stp2_5); \ + const __m128i hi_6_5 = _mm_unpackhi_epi16(stp2_6, stp2_5); \ + const __m128i lo_18_29 = _mm_unpacklo_epi16(stp2_18, stp2_29); \ + const __m128i hi_18_29 = _mm_unpackhi_epi16(stp2_18, stp2_29); \ + \ + const __m128i lo_19_28 = _mm_unpacklo_epi16(stp2_19, stp2_28); \ + const __m128i hi_19_28 = _mm_unpackhi_epi16(stp2_19, stp2_28); \ + const __m128i lo_20_27 = _mm_unpacklo_epi16(stp2_20, stp2_27); \ + const __m128i hi_20_27 = _mm_unpackhi_epi16(stp2_20, stp2_27); \ + \ + const __m128i lo_21_26 = _mm_unpacklo_epi16(stp2_21, stp2_26); \ + const __m128i hi_21_26 = _mm_unpackhi_epi16(stp2_21, stp2_26); \ + \ + stp1_0 = _mm_add_epi16(stp2_0, stp2_3); \ + stp1_1 = _mm_add_epi16(stp2_1, stp2_2); \ + stp1_2 = _mm_sub_epi16(stp2_1, stp2_2); \ + stp1_3 = _mm_sub_epi16(stp2_0, stp2_3); \ + \ + tmp0 = _mm_madd_epi16(lo_6_5, stg4_1); \ + tmp1 = _mm_madd_epi16(hi_6_5, stg4_1); \ + tmp2 = _mm_madd_epi16(lo_6_5, stg4_0); \ + tmp3 = _mm_madd_epi16(hi_6_5, stg4_0); \ + \ + tmp0 = _mm_add_epi32(tmp0, rounding); \ + tmp1 = _mm_add_epi32(tmp1, rounding); \ + tmp2 = _mm_add_epi32(tmp2, rounding); \ + tmp3 = _mm_add_epi32(tmp3, rounding); \ + \ + tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS); \ + tmp1 = _mm_srai_epi32(tmp1, DCT_CONST_BITS); \ + tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS); \ + tmp3 = _mm_srai_epi32(tmp3, DCT_CONST_BITS); \ + \ + stp1_5 = _mm_packs_epi32(tmp0, tmp1); \ + stp1_6 = _mm_packs_epi32(tmp2, tmp3); \ + \ + stp1_4 = stp2_4; \ + stp1_7 = stp2_7; \ + \ + stp1_8 = _mm_add_epi16(stp2_8, stp2_11); \ + stp1_9 = _mm_add_epi16(stp2_9, stp2_10); \ + stp1_10 = _mm_sub_epi16(stp2_9, stp2_10); \ + stp1_11 = _mm_sub_epi16(stp2_8, stp2_11); \ + stp1_12 = _mm_sub_epi16(stp2_15, stp2_12); \ + stp1_13 = _mm_sub_epi16(stp2_14, stp2_13); \ + stp1_14 = _mm_add_epi16(stp2_14, stp2_13); \ + stp1_15 = _mm_add_epi16(stp2_15, stp2_12); \ + \ + stp1_16 = stp2_16; \ + stp1_17 = stp2_17; \ + \ + MULTIPLICATION_AND_ADD(lo_18_29, hi_18_29, lo_19_28, hi_19_28, stg4_4, \ + stg4_5, stg4_4, stg4_5, stp1_18, stp1_29, stp1_19, \ + stp1_28) \ + MULTIPLICATION_AND_ADD(lo_20_27, hi_20_27, lo_21_26, hi_21_26, stg4_6, \ + stg4_4, stg4_6, stg4_4, stp1_20, stp1_27, stp1_21, \ + stp1_26) \ + \ + stp1_22 = stp2_22; \ + stp1_23 = stp2_23; \ + stp1_24 = stp2_24; \ + stp1_25 = stp2_25; \ + stp1_30 = stp2_30; \ + stp1_31 = stp2_31; \ + } \ + \ + /* Stage6 */ \ + { \ + const __m128i lo_10_13 = _mm_unpacklo_epi16(stp1_10, stp1_13); \ + const __m128i hi_10_13 = _mm_unpackhi_epi16(stp1_10, stp1_13); \ + const __m128i lo_11_12 = _mm_unpacklo_epi16(stp1_11, stp1_12); \ + const __m128i hi_11_12 = _mm_unpackhi_epi16(stp1_11, stp1_12); \ + \ + stp2_0 = _mm_add_epi16(stp1_0, stp1_7); \ + stp2_1 = _mm_add_epi16(stp1_1, stp1_6); \ + stp2_2 = _mm_add_epi16(stp1_2, stp1_5); \ + stp2_3 = _mm_add_epi16(stp1_3, stp1_4); \ + stp2_4 = _mm_sub_epi16(stp1_3, stp1_4); \ + stp2_5 = _mm_sub_epi16(stp1_2, stp1_5); \ + stp2_6 = _mm_sub_epi16(stp1_1, stp1_6); \ + stp2_7 = _mm_sub_epi16(stp1_0, stp1_7); \ + \ + stp2_8 = stp1_8; \ + stp2_9 = stp1_9; \ + stp2_14 = stp1_14; \ + stp2_15 = stp1_15; \ + \ + MULTIPLICATION_AND_ADD(lo_10_13, hi_10_13, lo_11_12, hi_11_12, stg6_0, \ + stg4_0, stg6_0, stg4_0, stp2_10, stp2_13, stp2_11, \ + stp2_12) \ + \ + stp2_16 = _mm_add_epi16(stp1_16, stp1_23); \ + stp2_17 = _mm_add_epi16(stp1_17, stp1_22); \ + stp2_18 = _mm_add_epi16(stp1_18, stp1_21); \ + stp2_19 = _mm_add_epi16(stp1_19, stp1_20); \ + stp2_20 = _mm_sub_epi16(stp1_19, stp1_20); \ + stp2_21 = _mm_sub_epi16(stp1_18, stp1_21); \ + stp2_22 = _mm_sub_epi16(stp1_17, stp1_22); \ + stp2_23 = _mm_sub_epi16(stp1_16, stp1_23); \ + \ + stp2_24 = _mm_sub_epi16(stp1_31, stp1_24); \ + stp2_25 = _mm_sub_epi16(stp1_30, stp1_25); \ + stp2_26 = _mm_sub_epi16(stp1_29, stp1_26); \ + stp2_27 = _mm_sub_epi16(stp1_28, stp1_27); \ + stp2_28 = _mm_add_epi16(stp1_27, stp1_28); \ + stp2_29 = _mm_add_epi16(stp1_26, stp1_29); \ + stp2_30 = _mm_add_epi16(stp1_25, stp1_30); \ + stp2_31 = _mm_add_epi16(stp1_24, stp1_31); \ + } \ + \ + /* Stage7 */ \ + { \ + const __m128i lo_20_27 = _mm_unpacklo_epi16(stp2_20, stp2_27); \ + const __m128i hi_20_27 = _mm_unpackhi_epi16(stp2_20, stp2_27); \ + const __m128i lo_21_26 = _mm_unpacklo_epi16(stp2_21, stp2_26); \ + const __m128i hi_21_26 = _mm_unpackhi_epi16(stp2_21, stp2_26); \ + \ + const __m128i lo_22_25 = _mm_unpacklo_epi16(stp2_22, stp2_25); \ + const __m128i hi_22_25 = _mm_unpackhi_epi16(stp2_22, stp2_25); \ + const __m128i lo_23_24 = _mm_unpacklo_epi16(stp2_23, stp2_24); \ + const __m128i hi_23_24 = _mm_unpackhi_epi16(stp2_23, stp2_24); \ + \ + stp1_0 = _mm_add_epi16(stp2_0, stp2_15); \ + stp1_1 = _mm_add_epi16(stp2_1, stp2_14); \ + stp1_2 = _mm_add_epi16(stp2_2, stp2_13); \ + stp1_3 = _mm_add_epi16(stp2_3, stp2_12); \ + stp1_4 = _mm_add_epi16(stp2_4, stp2_11); \ + stp1_5 = _mm_add_epi16(stp2_5, stp2_10); \ + stp1_6 = _mm_add_epi16(stp2_6, stp2_9); \ + stp1_7 = _mm_add_epi16(stp2_7, stp2_8); \ + stp1_8 = _mm_sub_epi16(stp2_7, stp2_8); \ + stp1_9 = _mm_sub_epi16(stp2_6, stp2_9); \ + stp1_10 = _mm_sub_epi16(stp2_5, stp2_10); \ + stp1_11 = _mm_sub_epi16(stp2_4, stp2_11); \ + stp1_12 = _mm_sub_epi16(stp2_3, stp2_12); \ + stp1_13 = _mm_sub_epi16(stp2_2, stp2_13); \ + stp1_14 = _mm_sub_epi16(stp2_1, stp2_14); \ + stp1_15 = _mm_sub_epi16(stp2_0, stp2_15); \ + \ + stp1_16 = stp2_16; \ + stp1_17 = stp2_17; \ + stp1_18 = stp2_18; \ + stp1_19 = stp2_19; \ + \ + MULTIPLICATION_AND_ADD(lo_20_27, hi_20_27, lo_21_26, hi_21_26, stg6_0, \ + stg4_0, stg6_0, stg4_0, stp1_20, stp1_27, stp1_21, \ + stp1_26) \ + MULTIPLICATION_AND_ADD(lo_22_25, hi_22_25, lo_23_24, hi_23_24, stg6_0, \ + stg4_0, stg6_0, stg4_0, stp1_22, stp1_25, stp1_23, \ + stp1_24) \ + \ + stp1_28 = stp2_28; \ + stp1_29 = stp2_29; \ + stp1_30 = stp2_30; \ + stp1_31 = stp2_31; \ + } // Only upper-left 8x8 has non-zero coeff void vp10_idct32x32_34_add_sse2(const int16_t *input, uint8_t *dest, - int stride) { + int stride) { const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING); - const __m128i final_rounding = _mm_set1_epi16(1<<5); + const __m128i final_rounding = _mm_set1_epi16(1 << 5); // vp10_idct constants for each stage const __m128i stg1_0 = pair_set_epi16(cospi_31_64, -cospi_1_64); @@ -3070,15 +3055,13 @@ void vp10_idct32x32_34_add_sse2(const int16_t *input, uint8_t *dest, __m128i in[32], col[32]; __m128i stp1_0, stp1_1, stp1_2, stp1_3, stp1_4, stp1_5, stp1_6, stp1_7, - stp1_8, stp1_9, stp1_10, stp1_11, stp1_12, stp1_13, stp1_14, stp1_15, - stp1_16, stp1_17, stp1_18, stp1_19, stp1_20, stp1_21, stp1_22, - stp1_23, stp1_24, stp1_25, stp1_26, stp1_27, stp1_28, stp1_29, - stp1_30, stp1_31; + stp1_8, stp1_9, stp1_10, stp1_11, stp1_12, stp1_13, stp1_14, stp1_15, + stp1_16, stp1_17, stp1_18, stp1_19, stp1_20, stp1_21, stp1_22, stp1_23, + stp1_24, stp1_25, stp1_26, stp1_27, stp1_28, stp1_29, stp1_30, stp1_31; __m128i stp2_0, stp2_1, stp2_2, stp2_3, stp2_4, stp2_5, stp2_6, stp2_7, - stp2_8, stp2_9, stp2_10, stp2_11, stp2_12, stp2_13, stp2_14, stp2_15, - stp2_16, stp2_17, stp2_18, stp2_19, stp2_20, stp2_21, stp2_22, - stp2_23, stp2_24, stp2_25, stp2_26, stp2_27, stp2_28, stp2_29, - stp2_30, stp2_31; + stp2_8, stp2_9, stp2_10, stp2_11, stp2_12, stp2_13, stp2_14, stp2_15, + stp2_16, stp2_17, stp2_18, stp2_19, stp2_20, stp2_21, stp2_22, stp2_23, + stp2_24, stp2_25, stp2_26, stp2_27, stp2_28, stp2_29, stp2_30, stp2_31; __m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; int i; @@ -3191,7 +3174,7 @@ void vp10_idct32x32_34_add_sse2(const int16_t *input, uint8_t *dest, } void vp10_idct32x32_1024_add_sse2(const int16_t *input, uint8_t *dest, - int stride) { + int stride) { const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING); const __m128i final_rounding = _mm_set1_epi16(1 << 5); const __m128i zero = _mm_setzero_si128(); @@ -3246,15 +3229,13 @@ void vp10_idct32x32_1024_add_sse2(const int16_t *input, uint8_t *dest, __m128i in[32], col[128], zero_idx[16]; __m128i stp1_0, stp1_1, stp1_2, stp1_3, stp1_4, stp1_5, stp1_6, stp1_7, - stp1_8, stp1_9, stp1_10, stp1_11, stp1_12, stp1_13, stp1_14, stp1_15, - stp1_16, stp1_17, stp1_18, stp1_19, stp1_20, stp1_21, stp1_22, - stp1_23, stp1_24, stp1_25, stp1_26, stp1_27, stp1_28, stp1_29, - stp1_30, stp1_31; + stp1_8, stp1_9, stp1_10, stp1_11, stp1_12, stp1_13, stp1_14, stp1_15, + stp1_16, stp1_17, stp1_18, stp1_19, stp1_20, stp1_21, stp1_22, stp1_23, + stp1_24, stp1_25, stp1_26, stp1_27, stp1_28, stp1_29, stp1_30, stp1_31; __m128i stp2_0, stp2_1, stp2_2, stp2_3, stp2_4, stp2_5, stp2_6, stp2_7, - stp2_8, stp2_9, stp2_10, stp2_11, stp2_12, stp2_13, stp2_14, stp2_15, - stp2_16, stp2_17, stp2_18, stp2_19, stp2_20, stp2_21, stp2_22, - stp2_23, stp2_24, stp2_25, stp2_26, stp2_27, stp2_28, stp2_29, - stp2_30, stp2_31; + stp2_8, stp2_9, stp2_10, stp2_11, stp2_12, stp2_13, stp2_14, stp2_15, + stp2_16, stp2_17, stp2_18, stp2_19, stp2_20, stp2_21, stp2_22, stp2_23, + stp2_24, stp2_25, stp2_26, stp2_27, stp2_28, stp2_29, stp2_30, stp2_31; __m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; int i, j, i32; @@ -3466,8 +3447,7 @@ void vp10_idct32x32_1024_add_sse2(const int16_t *input, uint8_t *dest, } } -void vp10_idct32x32_1_add_sse2(const int16_t *input, - uint8_t *dest, +void vp10_idct32x32_1_add_sse2(const int16_t *input, uint8_t *dest, int stride) { __m128i dc_value; const __m128i zero = _mm_setzero_si128(); @@ -3503,7 +3483,7 @@ static INLINE __m128i clamp_high_sse2(__m128i value, int bd) { } void vp10_highbd_idct4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest8, - int stride, int bd) { + int stride, int bd) { tran_low_t out[4 * 4]; tran_low_t *outptr = out; int i, j; @@ -3607,8 +3587,7 @@ void vp10_highbd_idct4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest8, tran_low_t temp_in[4], temp_out[4]; // Columns for (i = 0; i < 4; ++i) { - for (j = 0; j < 4; ++j) - temp_in[j] = out[j * 4 + i]; + for (j = 0; j < 4; ++j) temp_in[j] = out[j * 4 + i]; vp10_highbd_idct4_c(temp_in, temp_out, bd); for (j = 0; j < 4; ++j) { dest[j * stride + i] = highbd_clip_pixel_add( @@ -3619,7 +3598,7 @@ void vp10_highbd_idct4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest8, } void vp10_highbd_idct8x8_64_add_sse2(const tran_low_t *input, uint8_t *dest8, - int stride, int bd) { + int stride, int bd) { tran_low_t out[8 * 8]; tran_low_t *outptr = out; int i, j, test; @@ -3697,19 +3676,18 @@ void vp10_highbd_idct8x8_64_add_sse2(const tran_low_t *input, uint8_t *dest8, __m128i d[8]; for (i = 0; i < 8; i++) { inptr[i] = _mm_add_epi16(inptr[i], sixteen); - d[i] = _mm_loadu_si128((const __m128i *)(dest + stride*i)); + d[i] = _mm_loadu_si128((const __m128i *)(dest + stride * i)); inptr[i] = _mm_srai_epi16(inptr[i], 5); d[i] = clamp_high_sse2(_mm_adds_epi16(d[i], inptr[i]), bd); // Store - _mm_storeu_si128((__m128i *)(dest + stride*i), d[i]); + _mm_storeu_si128((__m128i *)(dest + stride * i), d[i]); } } } else { // Run the un-optimised column transform tran_low_t temp_in[8], temp_out[8]; for (i = 0; i < 8; ++i) { - for (j = 0; j < 8; ++j) - temp_in[j] = out[j * 8 + i]; + for (j = 0; j < 8; ++j) temp_in[j] = out[j * 8 + i]; vp10_highbd_idct8_c(temp_in, temp_out, bd); for (j = 0; j < 8; ++j) { dest[j * stride + i] = highbd_clip_pixel_add( @@ -3720,7 +3698,7 @@ void vp10_highbd_idct8x8_64_add_sse2(const tran_low_t *input, uint8_t *dest8, } void vp10_highbd_idct8x8_10_add_sse2(const tran_low_t *input, uint8_t *dest8, - int stride, int bd) { + int stride, int bd) { tran_low_t out[8 * 8] = { 0 }; tran_low_t *outptr = out; int i, j, test; @@ -3801,19 +3779,18 @@ void vp10_highbd_idct8x8_10_add_sse2(const tran_low_t *input, uint8_t *dest8, __m128i d[8]; for (i = 0; i < 8; i++) { inptr[i] = _mm_add_epi16(inptr[i], sixteen); - d[i] = _mm_loadu_si128((const __m128i *)(dest + stride*i)); + d[i] = _mm_loadu_si128((const __m128i *)(dest + stride * i)); inptr[i] = _mm_srai_epi16(inptr[i], 5); d[i] = clamp_high_sse2(_mm_adds_epi16(d[i], inptr[i]), bd); // Store - _mm_storeu_si128((__m128i *)(dest + stride*i), d[i]); + _mm_storeu_si128((__m128i *)(dest + stride * i), d[i]); } } } else { // Run the un-optimised column transform tran_low_t temp_in[8], temp_out[8]; for (i = 0; i < 8; ++i) { - for (j = 0; j < 8; ++j) - temp_in[j] = out[j * 8 + i]; + for (j = 0; j < 8; ++j) temp_in[j] = out[j * 8 + i]; vp10_highbd_idct8_c(temp_in, temp_out, bd); for (j = 0; j < 8; ++j) { dest[j * stride + i] = highbd_clip_pixel_add( @@ -3824,7 +3801,7 @@ void vp10_highbd_idct8x8_10_add_sse2(const tran_low_t *input, uint8_t *dest8, } void vp10_highbd_idct16x16_256_add_sse2(const tran_low_t *input, uint8_t *dest8, - int stride, int bd) { + int stride, int bd) { tran_low_t out[16 * 16]; tran_low_t *outptr = out; int i, j, test; @@ -3909,25 +3886,24 @@ void vp10_highbd_idct16x16_256_add_sse2(const tran_low_t *input, uint8_t *dest8, { __m128i d[2]; for (i = 0; i < 16; i++) { - inptr[i ] = _mm_add_epi16(inptr[i ], rounding); - inptr[i+16] = _mm_add_epi16(inptr[i+16], rounding); - d[0] = _mm_loadu_si128((const __m128i *)(dest + stride*i)); - d[1] = _mm_loadu_si128((const __m128i *)(dest + stride*i + 8)); - inptr[i ] = _mm_srai_epi16(inptr[i ], 6); - inptr[i+16] = _mm_srai_epi16(inptr[i+16], 6); - d[0] = clamp_high_sse2(_mm_add_epi16(d[0], inptr[i ]), bd); - d[1] = clamp_high_sse2(_mm_add_epi16(d[1], inptr[i+16]), bd); + inptr[i] = _mm_add_epi16(inptr[i], rounding); + inptr[i + 16] = _mm_add_epi16(inptr[i + 16], rounding); + d[0] = _mm_loadu_si128((const __m128i *)(dest + stride * i)); + d[1] = _mm_loadu_si128((const __m128i *)(dest + stride * i + 8)); + inptr[i] = _mm_srai_epi16(inptr[i], 6); + inptr[i + 16] = _mm_srai_epi16(inptr[i + 16], 6); + d[0] = clamp_high_sse2(_mm_add_epi16(d[0], inptr[i]), bd); + d[1] = clamp_high_sse2(_mm_add_epi16(d[1], inptr[i + 16]), bd); // Store - _mm_storeu_si128((__m128i *)(dest + stride*i), d[0]); - _mm_storeu_si128((__m128i *)(dest + stride*i + 8), d[1]); + _mm_storeu_si128((__m128i *)(dest + stride * i), d[0]); + _mm_storeu_si128((__m128i *)(dest + stride * i + 8), d[1]); } } } else { // Run the un-optimised column transform tran_low_t temp_in[16], temp_out[16]; for (i = 0; i < 16; ++i) { - for (j = 0; j < 16; ++j) - temp_in[j] = out[j * 16 + i]; + for (j = 0; j < 16; ++j) temp_in[j] = out[j * 16 + i]; vp10_highbd_idct16_c(temp_in, temp_out, bd); for (j = 0; j < 16; ++j) { dest[j * stride + i] = highbd_clip_pixel_add( @@ -3938,7 +3914,7 @@ void vp10_highbd_idct16x16_256_add_sse2(const tran_low_t *input, uint8_t *dest8, } void vp10_highbd_idct16x16_10_add_sse2(const tran_low_t *input, uint8_t *dest8, - int stride, int bd) { + int stride, int bd) { tran_low_t out[16 * 16] = { 0 }; tran_low_t *outptr = out; int i, j, test; @@ -4028,25 +4004,24 @@ void vp10_highbd_idct16x16_10_add_sse2(const tran_low_t *input, uint8_t *dest8, { __m128i d[2]; for (i = 0; i < 16; i++) { - inptr[i ] = _mm_add_epi16(inptr[i ], rounding); - inptr[i+16] = _mm_add_epi16(inptr[i+16], rounding); - d[0] = _mm_loadu_si128((const __m128i *)(dest + stride*i)); - d[1] = _mm_loadu_si128((const __m128i *)(dest + stride*i + 8)); - inptr[i ] = _mm_srai_epi16(inptr[i ], 6); - inptr[i+16] = _mm_srai_epi16(inptr[i+16], 6); - d[0] = clamp_high_sse2(_mm_add_epi16(d[0], inptr[i ]), bd); - d[1] = clamp_high_sse2(_mm_add_epi16(d[1], inptr[i+16]), bd); + inptr[i] = _mm_add_epi16(inptr[i], rounding); + inptr[i + 16] = _mm_add_epi16(inptr[i + 16], rounding); + d[0] = _mm_loadu_si128((const __m128i *)(dest + stride * i)); + d[1] = _mm_loadu_si128((const __m128i *)(dest + stride * i + 8)); + inptr[i] = _mm_srai_epi16(inptr[i], 6); + inptr[i + 16] = _mm_srai_epi16(inptr[i + 16], 6); + d[0] = clamp_high_sse2(_mm_add_epi16(d[0], inptr[i]), bd); + d[1] = clamp_high_sse2(_mm_add_epi16(d[1], inptr[i + 16]), bd); // Store - _mm_storeu_si128((__m128i *)(dest + stride*i), d[0]); - _mm_storeu_si128((__m128i *)(dest + stride*i + 8), d[1]); + _mm_storeu_si128((__m128i *)(dest + stride * i), d[0]); + _mm_storeu_si128((__m128i *)(dest + stride * i + 8), d[1]); } } } else { // Run the un-optimised column transform tran_low_t temp_in[16], temp_out[16]; for (i = 0; i < 16; ++i) { - for (j = 0; j < 16; ++j) - temp_in[j] = out[j * 16 + i]; + for (j = 0; j < 16; ++j) temp_in[j] = out[j * 16 + i]; vp10_highbd_idct16_c(temp_in, temp_out, bd); for (j = 0; j < 16; ++j) { dest[j * stride + i] = highbd_clip_pixel_add( diff --git a/vp10/common/x86/vp10_inv_txfm_sse2.h b/vp10/common/x86/vp10_inv_txfm_sse2.h index b79781aeeb277d1ee57e2cfacd3b44196ca67f4a..9d8c46bfc0fb5be5d64348111bd58908ca059036 100644 --- a/vp10/common/x86/vp10_inv_txfm_sse2.h +++ b/vp10/common/x86/vp10_inv_txfm_sse2.h @@ -46,16 +46,16 @@ static INLINE void array_transpose_8x8(__m128i *in, __m128i *res) { res[7] = _mm_unpackhi_epi64(tr1_6, tr1_7); } -#define TRANSPOSE_8X4(in0, in1, in2, in3, out0, out1) \ +#define TRANSPOSE_8X4(in0, in1, in2, in3, out0, out1) \ { \ const __m128i tr0_0 = _mm_unpacklo_epi16(in0, in1); \ const __m128i tr0_1 = _mm_unpacklo_epi16(in2, in3); \ \ - in0 = _mm_unpacklo_epi32(tr0_0, tr0_1); /* i1 i0 */ \ - in1 = _mm_unpackhi_epi32(tr0_0, tr0_1); /* i3 i2 */ \ + in0 = _mm_unpacklo_epi32(tr0_0, tr0_1); /* i1 i0 */ \ + in1 = _mm_unpackhi_epi32(tr0_0, tr0_1); /* i3 i2 */ \ } -static INLINE void array_transpose_4X8(__m128i *in, __m128i * out) { +static INLINE void array_transpose_4X8(__m128i *in, __m128i *out) { const __m128i tr0_0 = _mm_unpacklo_epi16(in[0], in[1]); const __m128i tr0_1 = _mm_unpacklo_epi16(in[2], in[3]); const __m128i tr0_4 = _mm_unpacklo_epi16(in[4], in[5]); @@ -90,36 +90,36 @@ static INLINE void array_transpose_16x16(__m128i *res0, __m128i *res1) { } static INLINE void load_buffer_8x16(const int16_t *input, __m128i *in) { - in[0] = _mm_load_si128((const __m128i *)(input + 0 * 16)); - in[1] = _mm_load_si128((const __m128i *)(input + 1 * 16)); - in[2] = _mm_load_si128((const __m128i *)(input + 2 * 16)); - in[3] = _mm_load_si128((const __m128i *)(input + 3 * 16)); - in[4] = _mm_load_si128((const __m128i *)(input + 4 * 16)); - in[5] = _mm_load_si128((const __m128i *)(input + 5 * 16)); - in[6] = _mm_load_si128((const __m128i *)(input + 6 * 16)); - in[7] = _mm_load_si128((const __m128i *)(input + 7 * 16)); - - in[8] = _mm_load_si128((const __m128i *)(input + 8 * 16)); - in[9] = _mm_load_si128((const __m128i *)(input + 9 * 16)); - in[10] = _mm_load_si128((const __m128i *)(input + 10 * 16)); - in[11] = _mm_load_si128((const __m128i *)(input + 11 * 16)); - in[12] = _mm_load_si128((const __m128i *)(input + 12 * 16)); - in[13] = _mm_load_si128((const __m128i *)(input + 13 * 16)); - in[14] = _mm_load_si128((const __m128i *)(input + 14 * 16)); - in[15] = _mm_load_si128((const __m128i *)(input + 15 * 16)); + in[0] = _mm_load_si128((const __m128i *)(input + 0 * 16)); + in[1] = _mm_load_si128((const __m128i *)(input + 1 * 16)); + in[2] = _mm_load_si128((const __m128i *)(input + 2 * 16)); + in[3] = _mm_load_si128((const __m128i *)(input + 3 * 16)); + in[4] = _mm_load_si128((const __m128i *)(input + 4 * 16)); + in[5] = _mm_load_si128((const __m128i *)(input + 5 * 16)); + in[6] = _mm_load_si128((const __m128i *)(input + 6 * 16)); + in[7] = _mm_load_si128((const __m128i *)(input + 7 * 16)); + + in[8] = _mm_load_si128((const __m128i *)(input + 8 * 16)); + in[9] = _mm_load_si128((const __m128i *)(input + 9 * 16)); + in[10] = _mm_load_si128((const __m128i *)(input + 10 * 16)); + in[11] = _mm_load_si128((const __m128i *)(input + 11 * 16)); + in[12] = _mm_load_si128((const __m128i *)(input + 12 * 16)); + in[13] = _mm_load_si128((const __m128i *)(input + 13 * 16)); + in[14] = _mm_load_si128((const __m128i *)(input + 14 * 16)); + in[15] = _mm_load_si128((const __m128i *)(input + 15 * 16)); } -#define RECON_AND_STORE(dest, in_x) \ - { \ - __m128i d0 = _mm_loadl_epi64((__m128i *)(dest)); \ - d0 = _mm_unpacklo_epi8(d0, zero); \ - d0 = _mm_add_epi16(in_x, d0); \ - d0 = _mm_packus_epi16(d0, d0); \ - _mm_storel_epi64((__m128i *)(dest), d0); \ +#define RECON_AND_STORE(dest, in_x) \ + { \ + __m128i d0 = _mm_loadl_epi64((__m128i *)(dest)); \ + d0 = _mm_unpacklo_epi8(d0, zero); \ + d0 = _mm_add_epi16(in_x, d0); \ + d0 = _mm_packus_epi16(d0, d0); \ + _mm_storel_epi64((__m128i *)(dest), d0); \ } static INLINE void write_buffer_8x16(uint8_t *dest, __m128i *in, int stride) { - const __m128i final_rounding = _mm_set1_epi16(1<<5); + const __m128i final_rounding = _mm_set1_epi16(1 << 5); const __m128i zero = _mm_setzero_si128(); // Final rounding and shift in[0] = _mm_adds_epi16(in[0], final_rounding); @@ -156,16 +156,16 @@ static INLINE void write_buffer_8x16(uint8_t *dest, __m128i *in, int stride) { in[14] = _mm_srai_epi16(in[14], 6); in[15] = _mm_srai_epi16(in[15], 6); - RECON_AND_STORE(dest + 0 * stride, in[0]); - RECON_AND_STORE(dest + 1 * stride, in[1]); - RECON_AND_STORE(dest + 2 * stride, in[2]); - RECON_AND_STORE(dest + 3 * stride, in[3]); - RECON_AND_STORE(dest + 4 * stride, in[4]); - RECON_AND_STORE(dest + 5 * stride, in[5]); - RECON_AND_STORE(dest + 6 * stride, in[6]); - RECON_AND_STORE(dest + 7 * stride, in[7]); - RECON_AND_STORE(dest + 8 * stride, in[8]); - RECON_AND_STORE(dest + 9 * stride, in[9]); + RECON_AND_STORE(dest + 0 * stride, in[0]); + RECON_AND_STORE(dest + 1 * stride, in[1]); + RECON_AND_STORE(dest + 2 * stride, in[2]); + RECON_AND_STORE(dest + 3 * stride, in[3]); + RECON_AND_STORE(dest + 4 * stride, in[4]); + RECON_AND_STORE(dest + 5 * stride, in[5]); + RECON_AND_STORE(dest + 6 * stride, in[6]); + RECON_AND_STORE(dest + 7 * stride, in[7]); + RECON_AND_STORE(dest + 8 * stride, in[8]); + RECON_AND_STORE(dest + 9 * stride, in[9]); RECON_AND_STORE(dest + 10 * stride, in[10]); RECON_AND_STORE(dest + 11 * stride, in[11]); RECON_AND_STORE(dest + 12 * stride, in[12]); diff --git a/vp10/common/x86/vp10_txfm1d_sse4.h b/vp10/common/x86/vp10_txfm1d_sse4.h index 86ab66011d6f20dc5c6756cbf7740e37e33eb475..dec72bda9363ccad92300a88422fffefe212e76a 100644 --- a/vp10/common/x86/vp10_txfm1d_sse4.h +++ b/vp10/common/x86/vp10_txfm1d_sse4.h @@ -8,48 +8,48 @@ extern "C" { #endif -void vp10_fdct4_new_sse4_1(const __m128i* input, __m128i* output, - const int8_t* cos_bit, const int8_t* stage_range); -void vp10_fdct8_new_sse4_1(const __m128i* input, __m128i* output, - const int8_t* cos_bit, const int8_t* stage_range); -void vp10_fdct16_new_sse4_1(const __m128i* input, __m128i* output, - const int8_t* cos_bit, const int8_t* stage_range); -void vp10_fdct32_new_sse4_1(const __m128i* input, __m128i* output, - const int8_t* cos_bit, const int8_t* stage_range); -void vp10_fdct64_new_sse4_1(const __m128i* input, __m128i* output, - const int8_t* cos_bit, const int8_t* stage_range); - -void vp10_fadst4_new_sse4_1(const __m128i* input, __m128i* output, - const int8_t* cos_bit, const int8_t* stage_range); -void vp10_fadst8_new_sse4_1(const __m128i* input, __m128i* output, - const int8_t* cos_bit, const int8_t* stage_range); -void vp10_fadst16_new_sse4_1(const __m128i* input, __m128i* output, - const int8_t* cos_bit, const int8_t* stage_range); -void vp10_fadst32_new_sse4_1(const __m128i* input, __m128i* output, - const int8_t* cos_bit, const int8_t* stage_range); - -void vp10_idct4_new_sse4_1(const __m128i* input, __m128i* output, - const int8_t* cos_bit, const int8_t* stage_range); -void vp10_idct8_new_sse4_1(const __m128i* input, __m128i* output, - const int8_t* cos_bit, const int8_t* stage_range); -void vp10_idct16_new_sse4_1(const __m128i* input, __m128i* output, - const int8_t* cos_bit, const int8_t* stage_range); -void vp10_idct32_new_sse4_1(const __m128i* input, __m128i* output, - const int8_t* cos_bit, const int8_t* stage_range); -void vp10_idct64_new_sse4_1(const __m128i* input, __m128i* output, - const int8_t* cos_bit, const int8_t* stage_range); - -void vp10_iadst4_new_sse4_1(const __m128i* input, __m128i* output, - const int8_t* cos_bit, const int8_t* stage_range); -void vp10_iadst8_new_sse4_1(const __m128i* input, __m128i* output, - const int8_t* cos_bit, const int8_t* stage_range); -void vp10_iadst16_new_sse4_1(const __m128i* input, __m128i* output, - const int8_t* cos_bit, const int8_t* stage_range); -void vp10_iadst32_new_sse4_1(const __m128i* input, __m128i* output, - const int8_t* cos_bit, const int8_t* stage_range); - -static INLINE void transpose_32_4x4(int stride, const __m128i* input, - __m128i* output) { +void vp10_fdct4_new_sse4_1(const __m128i *input, __m128i *output, + const int8_t *cos_bit, const int8_t *stage_range); +void vp10_fdct8_new_sse4_1(const __m128i *input, __m128i *output, + const int8_t *cos_bit, const int8_t *stage_range); +void vp10_fdct16_new_sse4_1(const __m128i *input, __m128i *output, + const int8_t *cos_bit, const int8_t *stage_range); +void vp10_fdct32_new_sse4_1(const __m128i *input, __m128i *output, + const int8_t *cos_bit, const int8_t *stage_range); +void vp10_fdct64_new_sse4_1(const __m128i *input, __m128i *output, + const int8_t *cos_bit, const int8_t *stage_range); + +void vp10_fadst4_new_sse4_1(const __m128i *input, __m128i *output, + const int8_t *cos_bit, const int8_t *stage_range); +void vp10_fadst8_new_sse4_1(const __m128i *input, __m128i *output, + const int8_t *cos_bit, const int8_t *stage_range); +void vp10_fadst16_new_sse4_1(const __m128i *input, __m128i *output, + const int8_t *cos_bit, const int8_t *stage_range); +void vp10_fadst32_new_sse4_1(const __m128i *input, __m128i *output, + const int8_t *cos_bit, const int8_t *stage_range); + +void vp10_idct4_new_sse4_1(const __m128i *input, __m128i *output, + const int8_t *cos_bit, const int8_t *stage_range); +void vp10_idct8_new_sse4_1(const __m128i *input, __m128i *output, + const int8_t *cos_bit, const int8_t *stage_range); +void vp10_idct16_new_sse4_1(const __m128i *input, __m128i *output, + const int8_t *cos_bit, const int8_t *stage_range); +void vp10_idct32_new_sse4_1(const __m128i *input, __m128i *output, + const int8_t *cos_bit, const int8_t *stage_range); +void vp10_idct64_new_sse4_1(const __m128i *input, __m128i *output, + const int8_t *cos_bit, const int8_t *stage_range); + +void vp10_iadst4_new_sse4_1(const __m128i *input, __m128i *output, + const int8_t *cos_bit, const int8_t *stage_range); +void vp10_iadst8_new_sse4_1(const __m128i *input, __m128i *output, + const int8_t *cos_bit, const int8_t *stage_range); +void vp10_iadst16_new_sse4_1(const __m128i *input, __m128i *output, + const int8_t *cos_bit, const int8_t *stage_range); +void vp10_iadst32_new_sse4_1(const __m128i *input, __m128i *output, + const int8_t *cos_bit, const int8_t *stage_range); + +static INLINE void transpose_32_4x4(int stride, const __m128i *input, + __m128i *output) { __m128i temp0 = _mm_unpacklo_epi32(input[0 * stride], input[2 * stride]); __m128i temp1 = _mm_unpackhi_epi32(input[0 * stride], input[2 * stride]); __m128i temp2 = _mm_unpacklo_epi32(input[1 * stride], input[3 * stride]); @@ -65,8 +65,8 @@ static INLINE void transpose_32_4x4(int stride, const __m128i* input, // each 4x4 blocks can be represent by 4 vertical __m128i // we first transpose each 4x4 block internally // than transpose the grid -static INLINE void transpose_32(int txfm_size, const __m128i* input, - __m128i* output) { +static INLINE void transpose_32(int txfm_size, const __m128i *input, + __m128i *output) { const int num_per_128 = 4; const int row_size = txfm_size; const int col_size = txfm_size / num_per_128;