Commit 4efb9771 authored by James Zern's avatar James Zern Committed by Gerrit Code Review
Browse files

Merge "vp10/common: apply clang-format" into nextgenv2

parents ca502bf0 7feae8e8
......@@ -35,8 +35,7 @@ static int alloc_seg_map(VP10_COMMON *cm, int seg_map_size) {
for (i = 0; i < NUM_PING_PONG_BUFFERS; ++i) {
cm->seg_map_array[i] = (uint8_t *)vpx_calloc(seg_map_size, 1);
if (cm->seg_map_array[i] == NULL)
return 1;
if (cm->seg_map_array[i] == NULL) return 1;
}
cm->seg_map_alloc_size = seg_map_size;
......@@ -91,7 +90,7 @@ void vp10_free_context_buffers(VP10_COMMON *cm) {
int i;
cm->free_mi(cm);
free_seg_map(cm);
for (i = 0 ; i < MAX_MB_PLANE ; i++) {
for (i = 0; i < MAX_MB_PLANE; i++) {
vpx_free(cm->above_context[i]);
cm->above_context[i] = NULL;
}
......@@ -110,15 +109,13 @@ int vp10_alloc_context_buffers(VP10_COMMON *cm, int width, int height) {
new_mi_size = cm->mi_stride * calc_mi_size(cm->mi_rows);
if (cm->mi_alloc_size < new_mi_size) {
cm->free_mi(cm);
if (cm->alloc_mi(cm, new_mi_size))
goto fail;
if (cm->alloc_mi(cm, new_mi_size)) goto fail;
}
if (cm->seg_map_alloc_size < cm->mi_rows * cm->mi_cols) {
// Create the segmentation map structure and set to 0.
free_seg_map(cm);
if (alloc_seg_map(cm, cm->mi_rows * cm->mi_cols))
goto fail;
if (alloc_seg_map(cm, cm->mi_rows * cm->mi_cols)) goto fail;
}
if (cm->above_context_alloc_cols < cm->mi_cols) {
......@@ -129,7 +126,7 @@ int vp10_alloc_context_buffers(VP10_COMMON *cm, int width, int height) {
ALIGN_POWER_OF_TWO(cm->mi_cols, MAX_MIB_SIZE_LOG2);
int i;
for (i = 0 ; i < MAX_MB_PLANE ; i++) {
for (i = 0; i < MAX_MB_PLANE; i++) {
vpx_free(cm->above_context[i]);
cm->above_context[i] = (ENTROPY_CONTEXT *)vpx_calloc(
2 * aligned_mi_cols, sizeof(*cm->above_context[0]));
......@@ -153,7 +150,7 @@ int vp10_alloc_context_buffers(VP10_COMMON *cm, int width, int height) {
return 0;
fail:
fail:
vp10_free_context_buffers(cm);
return 1;
}
......
......@@ -8,7 +8,6 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP10_COMMON_ALLOCCOMMON_H_
#define VP10_COMMON_ALLOCCOMMON_H_
......
......@@ -23,20 +23,18 @@
#if ANS_DIVIDE_BY_MULTIPLY
#include "vp10/common/divide.h"
#define ANS_DIVREM(quotient, remainder, dividend, divisor) \
do { \
quotient = fastdiv(dividend, divisor); \
remainder = dividend - quotient * divisor; \
do { \
quotient = fastdiv(dividend, divisor); \
remainder = dividend - quotient * divisor; \
} while (0)
#define ANS_DIV(dividend, divisor) \
fastdiv(dividend, divisor)
#define ANS_DIV(dividend, divisor) fastdiv(dividend, divisor)
#else
#define ANS_DIVREM(quotient, remainder, dividend, divisor) \
do { \
quotient = dividend / divisor; \
remainder = dividend % divisor; \
do { \
quotient = dividend / divisor; \
remainder = dividend % divisor; \
} while (0)
#define ANS_DIV(dividend, divisor) \
((dividend) / (divisor))
#define ANS_DIV(dividend, divisor) ((dividend) / (divisor))
#endif
#ifdef __cplusplus
......@@ -245,8 +243,7 @@ static INLINE int uabs_read_literal(struct AnsDecoder *ans, int bits) {
// TODO(aconverse): Investigate ways to read/write literals faster,
// e.g. 8-bit chunks.
for (bit = bits - 1; bit >= 0; bit--)
literal |= uabs_read_bit(ans) << bit;
for (bit = bits - 1; bit >= 0; bit--) literal |= uabs_read_bit(ans) << bit;
return literal;
}
......@@ -257,8 +254,7 @@ static INLINE int uabs_read_tree(struct AnsDecoder *ans,
const AnsP8 *probs) {
vpx_tree_index i = 0;
while ((i = tree[i + uabs_read(ans, probs[i >> 1])]) > 0)
continue;
while ((i = tree[i + uabs_read(ans, probs[i >> 1])]) > 0) continue;
return -i;
}
......@@ -288,8 +284,7 @@ static INLINE void rans_build_cdf_from_pdf(const AnsP10 token_probs[],
assert(cdf_tab[i - 1] == rans_precision);
}
static INLINE int ans_find_largest(const AnsP10 *const pdf_tab,
int num_syms) {
static INLINE int ans_find_largest(const AnsP10 *const pdf_tab, int num_syms) {
int largest_idx = -1;
int largest_p = -1;
int i;
......@@ -365,8 +360,7 @@ static INLINE void fetch_sym(struct rans_dec_sym *out, const rans_dec_lut cdf,
out->cum_prob = (AnsP10)cdf[i - 1];
}
static INLINE int rans_read(struct AnsDecoder *ans,
const rans_dec_lut tab) {
static INLINE int rans_read(struct AnsDecoder *ans, const rans_dec_lut tab) {
unsigned rem;
unsigned quo;
struct rans_dec_sym sym;
......@@ -381,8 +375,7 @@ static INLINE int rans_read(struct AnsDecoder *ans,
}
static INLINE int ans_read_init(struct AnsDecoder *const ans,
const uint8_t *const buf,
int offset) {
const uint8_t *const buf, int offset) {
unsigned x;
if (offset < 1) return 1;
ans->buf = buf;
......@@ -403,8 +396,7 @@ static INLINE int ans_read_init(struct AnsDecoder *const ans,
return 1;
}
ans->state += l_base;
if (ans->state >= l_base * io_base)
return 1;
if (ans->state >= l_base * io_base) return 1;
return 0;
}
......
......@@ -23,226 +23,211 @@ static int16_t cospi_8_64 = 0x3b21;
static int16_t cospi_16_64 = 0x2d41;
static int16_t cospi_24_64 = 0x187e;
static INLINE void TRANSPOSE4X4(
int16x8_t *q8s16,
int16x8_t *q9s16) {
int32x4_t q8s32, q9s32;
int16x4x2_t d0x2s16, d1x2s16;
int32x4x2_t q0x2s32;
d0x2s16 = vtrn_s16(vget_low_s16(*q8s16), vget_high_s16(*q8s16));
d1x2s16 = vtrn_s16(vget_low_s16(*q9s16), vget_high_s16(*q9s16));
q8s32 = vreinterpretq_s32_s16(vcombine_s16(d0x2s16.val[0], d0x2s16.val[1]));
q9s32 = vreinterpretq_s32_s16(vcombine_s16(d1x2s16.val[0], d1x2s16.val[1]));
q0x2s32 = vtrnq_s32(q8s32, q9s32);
*q8s16 = vreinterpretq_s16_s32(q0x2s32.val[0]);
*q9s16 = vreinterpretq_s16_s32(q0x2s32.val[1]);
return;
static INLINE void TRANSPOSE4X4(int16x8_t *q8s16, int16x8_t *q9s16) {
int32x4_t q8s32, q9s32;
int16x4x2_t d0x2s16, d1x2s16;
int32x4x2_t q0x2s32;
d0x2s16 = vtrn_s16(vget_low_s16(*q8s16), vget_high_s16(*q8s16));
d1x2s16 = vtrn_s16(vget_low_s16(*q9s16), vget_high_s16(*q9s16));
q8s32 = vreinterpretq_s32_s16(vcombine_s16(d0x2s16.val[0], d0x2s16.val[1]));
q9s32 = vreinterpretq_s32_s16(vcombine_s16(d1x2s16.val[0], d1x2s16.val[1]));
q0x2s32 = vtrnq_s32(q8s32, q9s32);
*q8s16 = vreinterpretq_s16_s32(q0x2s32.val[0]);
*q9s16 = vreinterpretq_s16_s32(q0x2s32.val[1]);
return;
}
static INLINE void GENERATE_COSINE_CONSTANTS(
int16x4_t *d0s16,
int16x4_t *d1s16,
int16x4_t *d2s16) {
*d0s16 = vdup_n_s16(cospi_8_64);
*d1s16 = vdup_n_s16(cospi_16_64);
*d2s16 = vdup_n_s16(cospi_24_64);
return;
static INLINE void GENERATE_COSINE_CONSTANTS(int16x4_t *d0s16, int16x4_t *d1s16,
int16x4_t *d2s16) {
*d0s16 = vdup_n_s16(cospi_8_64);
*d1s16 = vdup_n_s16(cospi_16_64);
*d2s16 = vdup_n_s16(cospi_24_64);
return;
}
static INLINE void GENERATE_SINE_CONSTANTS(
int16x4_t *d3s16,
int16x4_t *d4s16,
int16x4_t *d5s16,
int16x8_t *q3s16) {
*d3s16 = vdup_n_s16(sinpi_1_9);
*d4s16 = vdup_n_s16(sinpi_2_9);
*q3s16 = vdupq_n_s16(sinpi_3_9);
*d5s16 = vdup_n_s16(sinpi_4_9);
return;
static INLINE void GENERATE_SINE_CONSTANTS(int16x4_t *d3s16, int16x4_t *d4s16,
int16x4_t *d5s16, int16x8_t *q3s16) {
*d3s16 = vdup_n_s16(sinpi_1_9);
*d4s16 = vdup_n_s16(sinpi_2_9);
*q3s16 = vdupq_n_s16(sinpi_3_9);
*d5s16 = vdup_n_s16(sinpi_4_9);
return;
}
static INLINE void IDCT4x4_1D(
int16x4_t *d0s16,
int16x4_t *d1s16,
int16x4_t *d2s16,
int16x8_t *q8s16,
int16x8_t *q9s16) {
int16x4_t d16s16, d17s16, d18s16, d19s16, d23s16, d24s16;
int16x4_t d26s16, d27s16, d28s16, d29s16;
int32x4_t q10s32, q13s32, q14s32, q15s32;
int16x8_t q13s16, q14s16;
d16s16 = vget_low_s16(*q8s16);
d17s16 = vget_high_s16(*q8s16);
d18s16 = vget_low_s16(*q9s16);
d19s16 = vget_high_s16(*q9s16);
d23s16 = vadd_s16(d16s16, d18s16);
d24s16 = vsub_s16(d16s16, d18s16);
q15s32 = vmull_s16(d17s16, *d2s16);
q10s32 = vmull_s16(d17s16, *d0s16);
q13s32 = vmull_s16(d23s16, *d1s16);
q14s32 = vmull_s16(d24s16, *d1s16);
q15s32 = vmlsl_s16(q15s32, d19s16, *d0s16);
q10s32 = vmlal_s16(q10s32, d19s16, *d2s16);
d26s16 = vqrshrn_n_s32(q13s32, 14);
d27s16 = vqrshrn_n_s32(q14s32, 14);
d29s16 = vqrshrn_n_s32(q15s32, 14);
d28s16 = vqrshrn_n_s32(q10s32, 14);
q13s16 = vcombine_s16(d26s16, d27s16);
q14s16 = vcombine_s16(d28s16, d29s16);
*q8s16 = vaddq_s16(q13s16, q14s16);
*q9s16 = vsubq_s16(q13s16, q14s16);
*q9s16 = vcombine_s16(vget_high_s16(*q9s16),
vget_low_s16(*q9s16)); // vswp
return;
static INLINE void IDCT4x4_1D(int16x4_t *d0s16, int16x4_t *d1s16,
int16x4_t *d2s16, int16x8_t *q8s16,
int16x8_t *q9s16) {
int16x4_t d16s16, d17s16, d18s16, d19s16, d23s16, d24s16;
int16x4_t d26s16, d27s16, d28s16, d29s16;
int32x4_t q10s32, q13s32, q14s32, q15s32;
int16x8_t q13s16, q14s16;
d16s16 = vget_low_s16(*q8s16);
d17s16 = vget_high_s16(*q8s16);
d18s16 = vget_low_s16(*q9s16);
d19s16 = vget_high_s16(*q9s16);
d23s16 = vadd_s16(d16s16, d18s16);
d24s16 = vsub_s16(d16s16, d18s16);
q15s32 = vmull_s16(d17s16, *d2s16);
q10s32 = vmull_s16(d17s16, *d0s16);
q13s32 = vmull_s16(d23s16, *d1s16);
q14s32 = vmull_s16(d24s16, *d1s16);
q15s32 = vmlsl_s16(q15s32, d19s16, *d0s16);
q10s32 = vmlal_s16(q10s32, d19s16, *d2s16);
d26s16 = vqrshrn_n_s32(q13s32, 14);
d27s16 = vqrshrn_n_s32(q14s32, 14);
d29s16 = vqrshrn_n_s32(q15s32, 14);
d28s16 = vqrshrn_n_s32(q10s32, 14);
q13s16 = vcombine_s16(d26s16, d27s16);
q14s16 = vcombine_s16(d28s16, d29s16);
*q8s16 = vaddq_s16(q13s16, q14s16);
*q9s16 = vsubq_s16(q13s16, q14s16);
*q9s16 = vcombine_s16(vget_high_s16(*q9s16), vget_low_s16(*q9s16)); // vswp
return;
}
static INLINE void IADST4x4_1D(
int16x4_t *d3s16,
int16x4_t *d4s16,
int16x4_t *d5s16,
int16x8_t *q3s16,
int16x8_t *q8s16,
int16x8_t *q9s16) {
int16x4_t d6s16, d16s16, d17s16, d18s16, d19s16;
int32x4_t q8s32, q9s32, q10s32, q11s32, q12s32, q13s32, q14s32, q15s32;
d6s16 = vget_low_s16(*q3s16);
d16s16 = vget_low_s16(*q8s16);
d17s16 = vget_high_s16(*q8s16);
d18s16 = vget_low_s16(*q9s16);
d19s16 = vget_high_s16(*q9s16);
q10s32 = vmull_s16(*d3s16, d16s16);
q11s32 = vmull_s16(*d4s16, d16s16);
q12s32 = vmull_s16(d6s16, d17s16);
q13s32 = vmull_s16(*d5s16, d18s16);
q14s32 = vmull_s16(*d3s16, d18s16);
q15s32 = vmovl_s16(d16s16);
q15s32 = vaddw_s16(q15s32, d19s16);
q8s32 = vmull_s16(*d4s16, d19s16);
q15s32 = vsubw_s16(q15s32, d18s16);
q9s32 = vmull_s16(*d5s16, d19s16);
q10s32 = vaddq_s32(q10s32, q13s32);
q10s32 = vaddq_s32(q10s32, q8s32);
q11s32 = vsubq_s32(q11s32, q14s32);
q8s32 = vdupq_n_s32(sinpi_3_9);
q11s32 = vsubq_s32(q11s32, q9s32);
q15s32 = vmulq_s32(q15s32, q8s32);
q13s32 = vaddq_s32(q10s32, q12s32);
q10s32 = vaddq_s32(q10s32, q11s32);
q14s32 = vaddq_s32(q11s32, q12s32);
q10s32 = vsubq_s32(q10s32, q12s32);
d16s16 = vqrshrn_n_s32(q13s32, 14);
d17s16 = vqrshrn_n_s32(q14s32, 14);
d18s16 = vqrshrn_n_s32(q15s32, 14);
d19s16 = vqrshrn_n_s32(q10s32, 14);
*q8s16 = vcombine_s16(d16s16, d17s16);
*q9s16 = vcombine_s16(d18s16, d19s16);
return;
static INLINE void IADST4x4_1D(int16x4_t *d3s16, int16x4_t *d4s16,
int16x4_t *d5s16, int16x8_t *q3s16,
int16x8_t *q8s16, int16x8_t *q9s16) {
int16x4_t d6s16, d16s16, d17s16, d18s16, d19s16;
int32x4_t q8s32, q9s32, q10s32, q11s32, q12s32, q13s32, q14s32, q15s32;
d6s16 = vget_low_s16(*q3s16);
d16s16 = vget_low_s16(*q8s16);
d17s16 = vget_high_s16(*q8s16);
d18s16 = vget_low_s16(*q9s16);
d19s16 = vget_high_s16(*q9s16);
q10s32 = vmull_s16(*d3s16, d16s16);
q11s32 = vmull_s16(*d4s16, d16s16);
q12s32 = vmull_s16(d6s16, d17s16);
q13s32 = vmull_s16(*d5s16, d18s16);
q14s32 = vmull_s16(*d3s16, d18s16);
q15s32 = vmovl_s16(d16s16);
q15s32 = vaddw_s16(q15s32, d19s16);
q8s32 = vmull_s16(*d4s16, d19s16);
q15s32 = vsubw_s16(q15s32, d18s16);
q9s32 = vmull_s16(*d5s16, d19s16);
q10s32 = vaddq_s32(q10s32, q13s32);
q10s32 = vaddq_s32(q10s32, q8s32);
q11s32 = vsubq_s32(q11s32, q14s32);
q8s32 = vdupq_n_s32(sinpi_3_9);
q11s32 = vsubq_s32(q11s32, q9s32);
q15s32 = vmulq_s32(q15s32, q8s32);
q13s32 = vaddq_s32(q10s32, q12s32);
q10s32 = vaddq_s32(q10s32, q11s32);
q14s32 = vaddq_s32(q11s32, q12s32);
q10s32 = vsubq_s32(q10s32, q12s32);
d16s16 = vqrshrn_n_s32(q13s32, 14);
d17s16 = vqrshrn_n_s32(q14s32, 14);
d18s16 = vqrshrn_n_s32(q15s32, 14);
d19s16 = vqrshrn_n_s32(q10s32, 14);
*q8s16 = vcombine_s16(d16s16, d17s16);
*q9s16 = vcombine_s16(d18s16, d19s16);
return;
}
void vp10_iht4x4_16_add_neon(const tran_low_t *input, uint8_t *dest,
int dest_stride, int tx_type) {
uint8x8_t d26u8, d27u8;
int16x4_t d0s16, d1s16, d2s16, d3s16, d4s16, d5s16;
uint32x2_t d26u32, d27u32;
int16x8_t q3s16, q8s16, q9s16;
uint16x8_t q8u16, q9u16;
d26u32 = d27u32 = vdup_n_u32(0);
q8s16 = vld1q_s16(input);
q9s16 = vld1q_s16(input + 8);
TRANSPOSE4X4(&q8s16, &q9s16);
switch (tx_type) {
case 0: // idct_idct is not supported. Fall back to C
vp10_iht4x4_16_add_c(input, dest, dest_stride, tx_type);
return;
break;
case 1: // iadst_idct
// generate constants
GENERATE_COSINE_CONSTANTS(&d0s16, &d1s16, &d2s16);
GENERATE_SINE_CONSTANTS(&d3s16, &d4s16, &d5s16, &q3s16);
// first transform rows
IDCT4x4_1D(&d0s16, &d1s16, &d2s16, &q8s16, &q9s16);
// transpose the matrix
TRANSPOSE4X4(&q8s16, &q9s16);
// then transform columns
IADST4x4_1D(&d3s16, &d4s16, &d5s16, &q3s16, &q8s16, &q9s16);
break;
case 2: // idct_iadst
// generate constantsyy
GENERATE_COSINE_CONSTANTS(&d0s16, &d1s16, &d2s16);
GENERATE_SINE_CONSTANTS(&d3s16, &d4s16, &d5s16, &q3s16);
// first transform rows
IADST4x4_1D(&d3s16, &d4s16, &d5s16, &q3s16, &q8s16, &q9s16);
// transpose the matrix
TRANSPOSE4X4(&q8s16, &q9s16);
// then transform columns
IDCT4x4_1D(&d0s16, &d1s16, &d2s16, &q8s16, &q9s16);
break;
case 3: // iadst_iadst
// generate constants
GENERATE_SINE_CONSTANTS(&d3s16, &d4s16, &d5s16, &q3s16);
// first transform rows
IADST4x4_1D(&d3s16, &d4s16, &d5s16, &q3s16, &q8s16, &q9s16);
// transpose the matrix
TRANSPOSE4X4(&q8s16, &q9s16);
// then transform columns
IADST4x4_1D(&d3s16, &d4s16, &d5s16, &q3s16, &q8s16, &q9s16);
break;
default: // iadst_idct
assert(0);
break;
}
q8s16 = vrshrq_n_s16(q8s16, 4);
q9s16 = vrshrq_n_s16(q9s16, 4);
d26u32 = vld1_lane_u32((const uint32_t *)dest, d26u32, 0);
dest += dest_stride;
d26u32 = vld1_lane_u32((const uint32_t *)dest, d26u32, 1);
dest += dest_stride;
d27u32 = vld1_lane_u32((const uint32_t *)dest, d27u32, 0);
dest += dest_stride;
d27u32 = vld1_lane_u32((const uint32_t *)dest, d27u32, 1);
q8u16 = vaddw_u8(vreinterpretq_u16_s16(q8s16), vreinterpret_u8_u32(d26u32));
q9u16 = vaddw_u8(vreinterpretq_u16_s16(q9s16), vreinterpret_u8_u32(d27u32));
d26u8 = vqmovun_s16(vreinterpretq_s16_u16(q8u16));
d27u8 = vqmovun_s16(vreinterpretq_s16_u16(q9u16));
vst1_lane_u32((uint32_t *)dest, vreinterpret_u32_u8(d27u8), 1);
dest -= dest_stride;
vst1_lane_u32((uint32_t *)dest, vreinterpret_u32_u8(d27u8), 0);
dest -= dest_stride;
vst1_lane_u32((uint32_t *)dest, vreinterpret_u32_u8(d26u8), 1);
dest -= dest_stride;
vst1_lane_u32((uint32_t *)dest, vreinterpret_u32_u8(d26u8), 0);
return;
int dest_stride, int tx_type) {
uint8x8_t d26u8, d27u8;
int16x4_t d0s16, d1s16, d2s16, d3s16, d4s16, d5s16;
uint32x2_t d26u32, d27u32;
int16x8_t q3s16, q8s16, q9s16;
uint16x8_t q8u16, q9u16;
d26u32 = d27u32 = vdup_n_u32(0);
q8s16 = vld1q_s16(input);
q9s16 = vld1q_s16(input + 8);
TRANSPOSE4X4(&q8s16, &q9s16);
switch (tx_type) {
case 0: // idct_idct is not supported. Fall back to C
vp10_iht4x4_16_add_c(input, dest, dest_stride, tx_type);
return;
break;
case 1: // iadst_idct
// generate constants
GENERATE_COSINE_CONSTANTS(&d0s16, &d1s16, &d2s16);
GENERATE_SINE_CONSTANTS(&d3s16, &d4s16, &d5s16, &q3s16);
// first transform rows
IDCT4x4_1D(&d0s16, &d1s16, &d2s16, &q8s16, &q9s16);
// transpose the matrix
TRANSPOSE4X4(&q8s16, &q9s16);
// then transform columns
IADST4x4_1D(&d3s16, &d4s16, &d5s16, &q3s16, &q8s16, &q9s16);
break;
case 2: // idct_iadst
// generate constantsyy
GENERATE_COSINE_CONSTANTS(&d0s16, &d1s16, &d2s16);
GENERATE_SINE_CONSTANTS(&d3s16, &d4s16, &d5s16, &q3s16);
// first transform rows
IADST4x4_1D(&d3s16, &d4s16, &d5s16, &q3s16, &q8s16, &q9s16);
// transpose the matrix
TRANSPOSE4X4(&q8s16, &q9s16);
// then transform columns
IDCT4x4_1D(&d0s16, &d1s16, &d2s16, &q8s16, &q9s16);
break;
case 3: // iadst_iadst
// generate constants
GENERATE_SINE_CONSTANTS(&d3s16, &d4s16, &d5s16, &q3s16);
// first transform rows
IADST4x4_1D(&d3s16, &d4s16, &d5s16, &q3s16, &q8s16, &q9s16);
// transpose the matrix
TRANSPOSE4X4(&q8s16, &q9s16);
// then transform columns
IADST4x4_1D(&d3s16, &d4s16, &d5s16, &q3s16, &q8s16, &q9s16);
break;
default: // iadst_idct
assert(0);
break;
}
q8s16 = vrshrq_n_s16(q8s16, 4);
q9s16 = vrshrq_n_s16(q9s16, 4);
d26u32 = vld1_lane_u32((const uint32_t *)dest, d26u32, 0);
dest += dest_stride;
d26u32 = vld1_lane_u32((const uint32_t *)dest, d26u32, 1);
dest += dest_stride;
d27u32 = vld1_lane_u32((const uint32_t *)dest, d27u32, 0);
dest += dest_stride;
d27u32 = vld1_lane_u32((const uint32_t *)dest, d27u32, 1);
q8u16 = vaddw_u8(vreinterpretq_u16_s16(q8s16), vreinterpret_u8_u32(d26u32));
q9u16 = vaddw_u8(vreinterpretq_u16_s16(q9s16), vreinterpret_u8_u32(d27u32));
d26u8 = vqmovun_s16(vreinterpretq_s16_u16(q8u16));
d27u8 = vqmovun_s16(vreinterpretq_s16_u16(q9u16));
vst1_lane_u32((uint32_t *)dest, vreinterpret_u32_u8(d27u8), 1);
dest -= dest_stride;
vst1_lane_u32((uint32_t *)dest, vreinterpret_u32_u8(d27u8), 0);
dest -= dest_stride;
vst1_lane_u32((uint32_t *)dest, vreinterpret_u32_u8(d26u8), 1);
dest -= dest_stride;
vst1_lane_u32((uint32_t *)dest, vreinterpret_u32_u8(d26u8), 0);
return;
}
This diff is collapsed.
......@@ -15,10 +15,9 @@
#include "vp10/common/blockd.h"
PREDICTION_MODE vp10_left_block_mode(const MODE_INFO *cur_mi,
const MODE_INFO *left_mi, int b) {
const MODE_INFO *left_mi, int b) {
if (b == 0 || b == 2) {
if (!left_mi || is_inter_block(&left_mi->mbmi))
return DC_PRED;
if (!left_mi || is_inter_block(&left_mi->mbmi)) return DC_PRED;
return get_y_mode(left_mi, b + 1);
} else {
......@@ -28,10 +27,9 @@ PREDICTION_MODE vp10_left_block_mode(const MODE_INFO *cur_mi,
}
PREDICTION_MODE vp10_above_block_mode(const