...
 
Commits (52)
......@@ -747,7 +747,8 @@ foreach (aom_source_var ${aom_source_vars})
foreach (file ${${aom_source_var}})
if (NOT "${file}" MATCHES "${AOM_CONFIG_DIR}")
string(REPLACE "${AOM_ROOT}" "//third_party/aom/src" file "${file}")
string(REPLACE "${AOM_ROOT}" "//third_party/libaom/source/libaom"
file "${file}")
file(APPEND "${libaom_srcs_gni_file}" " \"${file}\",\n")
endif ()
endforeach ()
......@@ -768,7 +769,8 @@ foreach (aom_source_var ${aom_source_vars})
endif ()
foreach (file ${${aom_source_var}})
if (NOT "${file}" MATCHES "${AOM_ROOT}")
string(REPLACE "${AOM_CONFIG_DIR}" "//third_party/aom/src" file "${file}")
string(REPLACE "${AOM_CONFIG_DIR}" "//third_party/libaom/source/libaom"
file "${file}")
file(APPEND "${libaom_srcs_gni_file}" " \"${file}\",\n")
endif ()
endforeach ()
......
......@@ -77,6 +77,7 @@ extern "C" {
#endif
/*!\brief Decorator indicating that given struct/union/enum is packed */
#if 0
#ifndef ATTRIBUTE_PACKED
#if defined(__GNUC__) && __GNUC__
#define ATTRIBUTE_PACKED __attribute__((packed))
......@@ -86,6 +87,9 @@ extern "C" {
#define ATTRIBUTE_PACKED
#endif
#endif /* ATTRIBUTE_PACKED */
#else
#define ATTRIBUTE_PACKED
#endif
/*!\brief Current ABI version number
*
......
......@@ -38,19 +38,6 @@ extern "C" {
/*!\brief List of supported image formats */
typedef enum aom_img_fmt {
AOM_IMG_FMT_NONE,
AOM_IMG_FMT_RGB24, /**< 24 bit per pixel packed RGB */
AOM_IMG_FMT_RGB32, /**< 32 bit per pixel packed 0RGB */
AOM_IMG_FMT_RGB565, /**< 16 bit per pixel, 565 */
AOM_IMG_FMT_RGB555, /**< 16 bit per pixel, 555 */
AOM_IMG_FMT_UYVY, /**< UYVY packed YUV */
AOM_IMG_FMT_YUY2, /**< YUYV packed YUV */
AOM_IMG_FMT_YVYU, /**< YVYU packed YUV */
AOM_IMG_FMT_BGR24, /**< 24 bit per pixel packed BGR */
AOM_IMG_FMT_RGB32_LE, /**< 32 bit packed BGR0 */
AOM_IMG_FMT_ARGB, /**< 32 bit packed ARGB, alpha=255 */
AOM_IMG_FMT_ARGB_LE, /**< 32 bit packed BGRA, alpha=255 */
AOM_IMG_FMT_RGB565_LE, /**< 16 bit per pixel, gggbbbbb rrrrrggg */
AOM_IMG_FMT_RGB555_LE, /**< 16 bit per pixel, gggbbbbb 0rrrrrgg */
AOM_IMG_FMT_YV12 =
AOM_IMG_FMT_PLANAR | AOM_IMG_FMT_UV_FLIP | 1, /**< planar YVU */
AOM_IMG_FMT_I420 = AOM_IMG_FMT_PLANAR | 2,
......
......@@ -118,6 +118,24 @@ extern aom_codec_iface_t *aom_codec_av1_cx(void);
* model based on the entropy of this frame.
*/
#define AOM_EFLAG_NO_UPD_ENTROPY (1 << 26)
/*!\brief Disable ref frame mvs
*
* When this flag is set, the encoder will not allow frames to
* be encoded using mfmv.
*/
#define AOM_EFLAG_NO_REF_FRAME_MVS (1 << 27)
/*!\brief Enable error resilient frame
*
* When this flag is set, the encoder will code frames as error
* resilient.
*/
#define AOM_EFLAG_ERROR_RESILIENT (1 << 28)
/*!\brief Enable s frame mode
*
* When this flag is set, the encoder will code frames as an
* s frame.
*/
#define AOM_EFLAG_SET_S_FRAME (1 << 29)
/*!\brief AVx encoder control functions
*
......
......@@ -39,19 +39,6 @@ static aom_image_t *img_alloc_helper(aom_image_t *img, aom_img_fmt_t fmt,
/* Get sample size for this format */
switch (fmt) {
case AOM_IMG_FMT_RGB32:
case AOM_IMG_FMT_RGB32_LE:
case AOM_IMG_FMT_ARGB:
case AOM_IMG_FMT_ARGB_LE: bps = 32; break;
case AOM_IMG_FMT_RGB24:
case AOM_IMG_FMT_BGR24: bps = 24; break;
case AOM_IMG_FMT_RGB565:
case AOM_IMG_FMT_RGB565_LE:
case AOM_IMG_FMT_RGB555:
case AOM_IMG_FMT_RGB555_LE:
case AOM_IMG_FMT_UYVY:
case AOM_IMG_FMT_YUY2:
case AOM_IMG_FMT_YVYU: bps = 16; break;
case AOM_IMG_FMT_I420:
case AOM_IMG_FMT_YV12:
case AOM_IMG_FMT_AOMI420:
......
......@@ -87,6 +87,7 @@ specialize qw/aom_dc_top_predictor_8x4 sse2/;
specialize qw/aom_dc_top_predictor_8x8 neon msa sse2/;
specialize qw/aom_dc_top_predictor_8x16 sse2/;
specialize qw/aom_dc_top_predictor_8x32 sse2/;
specialize qw/aom_dc_top_predictor_16x4 sse2/;
specialize qw/aom_dc_top_predictor_16x8 sse2/;
specialize qw/aom_dc_top_predictor_16x16 neon msa sse2/;
specialize qw/aom_dc_top_predictor_16x32 sse2/;
......@@ -105,6 +106,7 @@ specialize qw/aom_dc_left_predictor_8x4 sse2/;
specialize qw/aom_dc_left_predictor_8x8 neon msa sse2/;
specialize qw/aom_dc_left_predictor_8x16 sse2/;
specialize qw/aom_dc_left_predictor_8x32 sse2/;
specialize qw/aom_dc_left_predictor_16x4 sse2/;
specialize qw/aom_dc_left_predictor_16x8 sse2/;
specialize qw/aom_dc_left_predictor_16x16 neon msa sse2/;
specialize qw/aom_dc_left_predictor_16x32 sse2/;
......@@ -123,6 +125,7 @@ specialize qw/aom_dc_128_predictor_8x4 sse2/;
specialize qw/aom_dc_128_predictor_8x8 neon msa sse2/;
specialize qw/aom_dc_128_predictor_8x16 sse2/;
specialize qw/aom_dc_128_predictor_8x32 sse2/;
specialize qw/aom_dc_128_predictor_16x4 sse2/;
specialize qw/aom_dc_128_predictor_16x8 sse2/;
specialize qw/aom_dc_128_predictor_16x16 neon msa sse2/;
specialize qw/aom_dc_128_predictor_16x32 sse2/;
......@@ -141,6 +144,7 @@ specialize qw/aom_v_predictor_8x4 sse2/;
specialize qw/aom_v_predictor_8x8 neon msa sse2/;
specialize qw/aom_v_predictor_8x16 sse2/;
specialize qw/aom_v_predictor_8x32 sse2/;
specialize qw/aom_v_predictor_16x4 sse2/;
specialize qw/aom_v_predictor_16x8 sse2/;
specialize qw/aom_v_predictor_16x16 neon msa sse2/;
specialize qw/aom_v_predictor_16x32 sse2/;
......@@ -159,6 +163,7 @@ specialize qw/aom_h_predictor_8x4 sse2/;
specialize qw/aom_h_predictor_8x8 neon dspr2 msa sse2/;
specialize qw/aom_h_predictor_8x16 sse2/;
specialize qw/aom_h_predictor_8x32 sse2/;
specialize qw/aom_h_predictor_16x4 sse2/;
specialize qw/aom_h_predictor_16x8 sse2/;
specialize qw/aom_h_predictor_16x16 neon dspr2 msa sse2/;
specialize qw/aom_h_predictor_16x32 sse2/;
......@@ -177,6 +182,7 @@ specialize qw/aom_paeth_predictor_8x4 ssse3/;
specialize qw/aom_paeth_predictor_8x8 ssse3/;
specialize qw/aom_paeth_predictor_8x16 ssse3/;
specialize qw/aom_paeth_predictor_8x32 ssse3/;
specialize qw/aom_paeth_predictor_16x4 ssse3/;
specialize qw/aom_paeth_predictor_16x8 ssse3 avx2/;
specialize qw/aom_paeth_predictor_16x16 ssse3 avx2/;
specialize qw/aom_paeth_predictor_16x32 ssse3 avx2/;
......@@ -200,6 +206,7 @@ specialize qw/aom_smooth_predictor_8x4 ssse3/;
specialize qw/aom_smooth_predictor_8x8 ssse3/;
specialize qw/aom_smooth_predictor_8x16 ssse3/;
specialize qw/aom_smooth_predictor_8x32 ssse3/;
specialize qw/aom_smooth_predictor_16x4 ssse3/;
specialize qw/aom_smooth_predictor_16x8 ssse3/;
specialize qw/aom_smooth_predictor_16x16 ssse3/;
specialize qw/aom_smooth_predictor_16x32 ssse3/;
......@@ -261,6 +268,7 @@ specialize qw/aom_dc_predictor_8x4 sse2/;
specialize qw/aom_dc_predictor_8x8 dspr2 neon msa sse2/;
specialize qw/aom_dc_predictor_8x16 sse2/;
specialize qw/aom_dc_predictor_8x32 sse2/;
specialize qw/aom_dc_predictor_16x4 sse2/;
specialize qw/aom_dc_predictor_16x8 sse2/;
specialize qw/aom_dc_predictor_16x16 dspr2 neon msa sse2/;
specialize qw/aom_dc_predictor_16x32 sse2/;
......
......@@ -364,7 +364,7 @@ void aom_dc_predictor_64x32_c(uint8_t *dst, ptrdiff_t stride,
#undef DC_MULTIPLIER_1X2
#undef DC_MULTIPLIER_1X4
static INLINE void highbd_v_predictor(uint16_t *dst, ptrdiff_t stride, int bw,
void highbd_v_predictor(uint16_t *dst, ptrdiff_t stride, int bw,
int bh, const uint16_t *above,
const uint16_t *left, int bd) {
int r;
......@@ -376,7 +376,7 @@ static INLINE void highbd_v_predictor(uint16_t *dst, ptrdiff_t stride, int bw,
}
}
static INLINE void highbd_h_predictor(uint16_t *dst, ptrdiff_t stride, int bw,
void highbd_h_predictor(uint16_t *dst, ptrdiff_t stride, int bw,
int bh, const uint16_t *above,
const uint16_t *left, int bd) {
int r;
......@@ -388,7 +388,7 @@ static INLINE void highbd_h_predictor(uint16_t *dst, ptrdiff_t stride, int bw,
}
}
static INLINE void highbd_paeth_predictor(uint16_t *dst, ptrdiff_t stride,
void highbd_paeth_predictor(uint16_t *dst, ptrdiff_t stride,
int bw, int bh, const uint16_t *above,
const uint16_t *left, int bd) {
int r, c;
......@@ -402,7 +402,7 @@ static INLINE void highbd_paeth_predictor(uint16_t *dst, ptrdiff_t stride,
}
}
static INLINE void highbd_smooth_predictor(uint16_t *dst, ptrdiff_t stride,
void highbd_smooth_predictor(uint16_t *dst, ptrdiff_t stride,
int bw, int bh,
const uint16_t *above,
const uint16_t *left, int bd) {
......@@ -435,7 +435,7 @@ static INLINE void highbd_smooth_predictor(uint16_t *dst, ptrdiff_t stride,
}
}
static INLINE void highbd_smooth_v_predictor(uint16_t *dst, ptrdiff_t stride,
void highbd_smooth_v_predictor(uint16_t *dst, ptrdiff_t stride,
int bw, int bh,
const uint16_t *above,
const uint16_t *left, int bd) {
......@@ -466,7 +466,7 @@ static INLINE void highbd_smooth_v_predictor(uint16_t *dst, ptrdiff_t stride,
}
}
static INLINE void highbd_smooth_h_predictor(uint16_t *dst, ptrdiff_t stride,
void highbd_smooth_h_predictor(uint16_t *dst, ptrdiff_t stride,
int bw, int bh,
const uint16_t *above,
const uint16_t *left, int bd) {
......@@ -511,7 +511,7 @@ static INLINE void highbd_dc_128_predictor(uint16_t *dst, ptrdiff_t stride,
}
}
static INLINE void highbd_dc_left_predictor(uint16_t *dst, ptrdiff_t stride,
void highbd_dc_left_predictor(uint16_t *dst, ptrdiff_t stride,
int bw, int bh,
const uint16_t *above,
const uint16_t *left, int bd) {
......@@ -528,7 +528,7 @@ static INLINE void highbd_dc_left_predictor(uint16_t *dst, ptrdiff_t stride,
}
}
static INLINE void highbd_dc_top_predictor(uint16_t *dst, ptrdiff_t stride,
void highbd_dc_top_predictor(uint16_t *dst, ptrdiff_t stride,
int bw, int bh,
const uint16_t *above,
const uint16_t *left, int bd) {
......@@ -545,7 +545,7 @@ static INLINE void highbd_dc_top_predictor(uint16_t *dst, ptrdiff_t stride,
}
}
static INLINE void highbd_dc_predictor(uint16_t *dst, ptrdiff_t stride, int bw,
void highbd_dc_predictor(uint16_t *dst, ptrdiff_t stride, int bw,
int bh, const uint16_t *above,
const uint16_t *left, int bd) {
int i, r, expected_dc, sum = 0;
......
......@@ -66,7 +66,8 @@ void quantize_b_helper_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
const int dequant =
(dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >>
AOM_QM_BITS;
dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant / (1 << log_scale);
const tran_low_t abs_dqcoeff = (tmp32 * dequant) >> log_scale;
dqcoeff_ptr[rc] = (tran_low_t)((abs_dqcoeff ^ coeff_sign) - coeff_sign);
if (tmp32) eob = i;
}
......@@ -126,7 +127,8 @@ void highbd_quantize_b_helper_c(
qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
dequant = (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >>
AOM_QM_BITS;
dqcoeff_ptr[rc] = (qcoeff_ptr[rc] * dequant) / (1 << log_scale);
const tran_low_t abs_dqcoeff = (abs_qcoeff * dequant) >> log_scale;
dqcoeff_ptr[rc] = (tran_low_t)((abs_dqcoeff ^ coeff_sign) - coeff_sign);
if (abs_qcoeff) eob = idx_arr[i];
}
}
......
This diff is collapsed.
......@@ -184,6 +184,19 @@ void aom_dc_predictor_8x32_sse2(uint8_t *dst, ptrdiff_t stride,
dc_store_8xh(&row, 32, dst, stride);
}
void aom_dc_predictor_16x4_sse2(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left) {
const __m128i sum_left = dc_sum_4(left);
__m128i sum_above = dc_sum_16(above);
sum_above = _mm_add_epi16(sum_above, sum_left);
uint32_t sum = _mm_cvtsi128_si32(sum_above);
sum += 10;
sum /= 20;
const __m128i row = _mm_set1_epi8((uint8_t)sum);
dc_store_16xh(&row, 4, dst, stride);
}
void aom_dc_predictor_16x8_sse2(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left) {
const __m128i sum_left = dc_sum_8(left);
......@@ -368,6 +381,19 @@ void aom_dc_top_predictor_8x32_sse2(uint8_t *dst, ptrdiff_t stride,
dc_store_8xh(&row, 32, dst, stride);
}
void aom_dc_top_predictor_16x4_sse2(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left) {
(void)left;
__m128i sum_above = dc_sum_16(above);
const __m128i eight = _mm_set1_epi16((uint16_t)8);
sum_above = _mm_add_epi16(sum_above, eight);
sum_above = _mm_srai_epi16(sum_above, 4);
sum_above = _mm_unpacklo_epi8(sum_above, sum_above);
sum_above = _mm_shufflelo_epi16(sum_above, 0);
const __m128i row = _mm_unpacklo_epi64(sum_above, sum_above);
dc_store_16xh(&row, 4, dst, stride);
}
void aom_dc_top_predictor_16x8_sse2(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left) {
(void)left;
......@@ -562,6 +588,20 @@ void aom_dc_left_predictor_8x32_sse2(uint8_t *dst, ptrdiff_t stride,
dc_store_8xh(&row, 32, dst, stride);
}
void aom_dc_left_predictor_16x4_sse2(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above,
const uint8_t *left) {
(void)above;
__m128i sum_left = dc_sum_4(left);
const __m128i two = _mm_set1_epi16((uint16_t)2);
sum_left = _mm_add_epi16(sum_left, two);
sum_left = _mm_srai_epi16(sum_left, 2);
sum_left = _mm_unpacklo_epi8(sum_left, sum_left);
sum_left = _mm_shufflelo_epi16(sum_left, 0);
const __m128i row = _mm_unpacklo_epi64(sum_left, sum_left);
dc_store_16xh(&row, 4, dst, stride);
}
void aom_dc_left_predictor_16x8_sse2(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above,
const uint8_t *left) {
......@@ -731,6 +771,14 @@ void aom_dc_128_predictor_8x32_sse2(uint8_t *dst, ptrdiff_t stride,
dc_store_8xh(&row, 32, dst, stride);
}
void aom_dc_128_predictor_16x4_sse2(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left) {
(void)above;
(void)left;
const __m128i row = _mm_set1_epi8((uint8_t)128);
dc_store_16xh(&row, 4, dst, stride);
}
void aom_dc_128_predictor_16x8_sse2(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left) {
(void)above;
......@@ -848,6 +896,13 @@ void aom_v_predictor_8x32_sse2(uint8_t *dst, ptrdiff_t stride,
dc_store_8xh(&row, 32, dst, stride);
}
void aom_v_predictor_16x4_sse2(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left) {
const __m128i row = _mm_load_si128((__m128i const *)above);
(void)left;
dc_store_16xh(&row, 4, dst, stride);
}
void aom_v_predictor_16x8_sse2(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left) {
const __m128i row = _mm_load_si128((__m128i const *)above);
......@@ -1171,6 +1226,14 @@ static INLINE void h_prediction_16x8_2(const __m128i *left, uint8_t *dst,
h_pred_store_16xh(row, 4, dst, stride);
}
void aom_h_predictor_16x4_sse2(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left) {
(void)above;
const __m128i left_col = _mm_loadl_epi64((const __m128i *)left);
const __m128i left_col_8p = _mm_unpacklo_epi8(left_col, left_col);
h_prediction_16x8_1(&left_col_8p, dst, stride);
}
void aom_h_predictor_16x8_sse2(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left) {
(void)above;
......
......@@ -196,6 +196,27 @@ static INLINE __m128i paeth_16x1_pred(const __m128i *left, const __m128i *top0,
return _mm_packus_epi16(p0, p1);
}
void aom_paeth_predictor_16x4_ssse3(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left) {
__m128i l = _mm_cvtsi32_si128(((const uint32_t *)left)[0]);
const __m128i t = _mm_load_si128((const __m128i *)above);
const __m128i zero = _mm_setzero_si128();
const __m128i top0 = _mm_unpacklo_epi8(t, zero);
const __m128i top1 = _mm_unpackhi_epi8(t, zero);
const __m128i tl16 = _mm_set1_epi16((uint16_t)above[-1]);
__m128i rep = _mm_set1_epi16(0x8000);
const __m128i one = _mm_set1_epi16(1);
for (int i = 0; i < 4; ++i) {
const __m128i l16 = _mm_shuffle_epi8(l, rep);
const __m128i row = paeth_16x1_pred(&l16, &top0, &top1, &tl16);
_mm_store_si128((__m128i *)dst, row);
dst += stride;
rep = _mm_add_epi16(rep, one);
}
}
void aom_paeth_predictor_16x8_ssse3(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left) {
__m128i l = _mm_loadl_epi64((const __m128i *)left);
......@@ -919,6 +940,12 @@ static INLINE void smooth_predictor_wxh(uint8_t *dst, ptrdiff_t stride,
}
}
void aom_smooth_predictor_16x4_ssse3(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above,
const uint8_t *left) {
smooth_predictor_wxh(dst, stride, above, left, 16, 4);
}
void aom_smooth_predictor_16x8_ssse3(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above,
const uint8_t *left) {
......
......@@ -16,64 +16,120 @@
#include "./aom_config.h"
static INLINE void highbd_transpose6x6(uint16_t *src[], int in_p,
uint16_t *dst[], int out_p,
int num_6x6_to_transpose) {
int idx6x6 = 0;
__m128i p0, p1, p2, p3, p4, p5, x0, x1, x2, x3, x4, x5;
do {
uint16_t *in = src[idx6x6];
uint16_t *out = dst[idx6x6];
static INLINE void highbd_transpose6x6_sse2(__m128i *x0, __m128i *x1,
__m128i *x2, __m128i *x3,
__m128i *x4, __m128i *x5,
__m128i *d0, __m128i *d1,
__m128i *d2, __m128i *d3,
__m128i *d4, __m128i *d5) {
__m128i w0, w1, w2, w3, w4, w5, ww0;
// 00 01 02 03 04 05 xx xx
// 10 11 12 13 14 15 xx xx
// 20 21 22 23 24 25 xx xx
// 30 31 32 33 34 35 xx xx
// 40 41 42 43 44 45 xx xx
// 50 51 52 53 54 55 xx xx
w0 = _mm_unpacklo_epi16(*x0, *x1); // 00 10 01 11 02 12 03 13
w1 = _mm_unpacklo_epi16(*x2, *x3); // 20 30 21 31 22 32 23 33
w2 = _mm_unpacklo_epi16(*x4, *x5); // 40 50 41 51 42 52 43 53
ww0 = _mm_unpacklo_epi32(w0, w1); // 00 10 20 30 01 11 21 31
*d0 = _mm_unpacklo_epi64(ww0, w2); // 00 10 20 30 40 50 41 51
*d1 = _mm_unpackhi_epi64(ww0,
_mm_srli_si128(w2, 4)); // 01 11 21 31 41 51 xx xx
ww0 = _mm_unpackhi_epi32(w0, w1); // 02 12 22 32 03 13 23 33
*d2 = _mm_unpacklo_epi64(ww0,
_mm_srli_si128(w2, 8)); // 02 12 22 32 42 52 xx xx
w3 = _mm_unpackhi_epi16(*x0, *x1); // 04 14 05 15 xx xx xx xx
w4 = _mm_unpackhi_epi16(*x2, *x3); // 24 34 25 35 xx xx xx xx
w5 = _mm_unpackhi_epi16(*x4, *x5); // 44 54 45 55 xx xx xx xx
*d3 = _mm_unpackhi_epi64(ww0, _mm_srli_si128(w2, 4)); // 03 13 23 33 43 53
ww0 = _mm_unpacklo_epi32(w3, w4); // 04 14 24 34 05 15 25 35
*d4 = _mm_unpacklo_epi64(ww0, w5); // 04 14 24 34 44 54 45 55
*d5 = _mm_unpackhi_epi64(ww0,
_mm_slli_si128(w5, 4)); // 05 15 25 35 45 55 xx xx
}
p0 =
_mm_loadu_si128((__m128i *)(in + 0 * in_p)); // 00 01 02 03 04 05 xx xx
p1 =
_mm_loadu_si128((__m128i *)(in + 1 * in_p)); // 10 11 12 13 14 15 xx xx
p2 =
_mm_loadu_si128((__m128i *)(in + 2 * in_p)); // 20 21 22 23 24 25 xx xx
p3 =
_mm_loadu_si128((__m128i *)(in + 3 * in_p)); // 30 31 32 33 34 35 xx xx
p4 =
_mm_loadu_si128((__m128i *)(in + 4 * in_p)); // 40 41 42 43 44 45 xx xx
p5 =
_mm_loadu_si128((__m128i *)(in + 5 * in_p)); // 50 51 52 53 54 55 xx xx
x0 = _mm_unpacklo_epi16(p0, p1); // 00 10 01 11 02 12 03 13
x1 = _mm_unpacklo_epi16(p2, p3); // 20 30 21 31 22 32 23 33
x2 = _mm_unpacklo_epi16(p4, p5); // 40 50 41 51 42 52 43 53
x3 = _mm_unpackhi_epi16(p0, p1); // 04 14 05 15 xx xx xx xx
x4 = _mm_unpackhi_epi16(p2, p3); // 24 34 25 35 xx xx xx xx
x5 = _mm_unpackhi_epi16(p4, p5); // 44 54 45 55 xx xx xx xx
p5 = _mm_unpacklo_epi32(x0, x1); // 00 10 20 30 01 11 21 31
_mm_storel_epi64((__m128i *)(out + 0 * out_p), p5); // 00 10 20 30
*(int *)(out + 4 + 0 * out_p) = _mm_cvtsi128_si32(x2); // 40 50
_mm_storel_epi64((__m128i *)(out + 1 * out_p),
_mm_srli_si128(p5, 8)); // 01 11 21 31
*(int *)(out + 4 + 1 * out_p) =
_mm_cvtsi128_si32(_mm_srli_si128(x2, 4)); // 41 51
p1 = _mm_unpackhi_epi32(x0, x1); // 02 12 22 32 03 13 23 33
_mm_storel_epi64((__m128i *)(out + 2 * out_p), p1); // 02 12 22 32
*(int *)(out + 4 + 2 * out_p) =
_mm_cvtsi128_si32(_mm_srli_si128(x2, 8)); // 42 52
_mm_storel_epi64((__m128i *)(out + 3 * out_p),
_mm_srli_si128(p1, 8)); // 03 13 23 33
*(int *)(out + 4 + 3 * out_p) =
_mm_cvtsi128_si32(_mm_srli_si128(x2, 12)); // 43 53
p2 = _mm_unpacklo_epi32(x3, x4); // 04 14 24 34 05 15 25 35
_mm_storel_epi64((__m128i *)(out + 4 * out_p), p2); // 04 14 24 34
*(int *)(out + 4 + 4 * out_p) = _mm_cvtsi128_si32(x5); // 44 54
_mm_storel_epi64((__m128i *)(out + 5 * out_p),
_mm_srli_si128(p2, 8)); // 05 15 25 35
*(int *)(out + 4 + 5 * out_p) =
_mm_cvtsi128_si32(_mm_srli_si128(x5, 4)); // 45 55
} while (++idx6x6 < num_6x6_to_transpose);
static INLINE void highbd_transpose8x8_low_sse2(__m128i *x0, __m128i *x1,
__m128i *x2, __m128i *x3,
__m128i *x4, __m128i *x5,
__m128i *x6, __m128i *x7,
__m128i *d0, __m128i *d1,
__m128i *d2, __m128i *d3) {
__m128i w0, w1, w2, w3, ww0, ww1;
// x0 00 01 02 03 04 05 06 07
// x1 10 11 12 13 14 15 16 17
// x2 20 21 22 23 24 25 26 27
// x3 30 31 32 33 34 35 36 37
// x4 40 41 42 43 44 45 46 47
// x5 50 51 52 53 54 55 56 57
// x6 60 61 62 63 64 65 66 67
// x7 70 71 72 73 74 75 76 77
w0 = _mm_unpacklo_epi16(*x0, *x1); // 00 10 01 11 02 12 03 13
w1 = _mm_unpacklo_epi16(*x2, *x3); // 20 30 21 31 22 32 23 33
w2 = _mm_unpacklo_epi16(*x4, *x5); // 40 50 41 51 42 52 43 53
w3 = _mm_unpacklo_epi16(*x6, *x7); // 60 70 61 71 62 72 63 73
ww0 = _mm_unpacklo_epi32(w0, w1); // 00 10 20 30 01 11 21 31
ww1 = _mm_unpacklo_epi32(w2, w3); // 40 50 60 70 41 51 61 71
*d0 = _mm_unpacklo_epi64(ww0, ww1); // 00 10 20 30 40 50 60 70
*d1 = _mm_unpackhi_epi64(ww0, ww1); // 01 11 21 31 41 51 61 71
ww0 = _mm_unpackhi_epi32(w0, w1); // 02 12 22 32 03 13 23 33
ww1 = _mm_unpackhi_epi32(w2, w3); // 42 52 62 72 43 53 63 73
*d2 = _mm_unpacklo_epi64(ww0, ww1); // 02 12 22 32 42 52 62 72
*d3 = _mm_unpackhi_epi64(ww0, ww1); // 03 13 23 33 43 53 63 73
}
static INLINE void highbd_transpose8x8_high_sse2(__m128i *x0, __m128i *x1,
__m128i *x2, __m128i *x3,
__m128i *x4, __m128i *x5,
__m128i *x6, __m128i *x7,
__m128i *d4, __m128i *d5,
__m128i *d6, __m128i *d7) {
__m128i w0, w1, w2, w3, ww0, ww1;
// x0 00 01 02 03 04 05 06 07
// x1 10 11 12 13 14 15 16 17
// x2 20 21 22 23 24 25 26 27
// x3 30 31 32 33 34 35 36 37
// x4 40 41 42 43 44 45 46 47
// x5 50 51 52 53 54 55 56 57
// x6 60 61 62 63 64 65 66 67
// x7 70 71 72 73 74 75 76 77
w0 = _mm_unpackhi_epi16(*x0, *x1); // 04 14 05 15 06 16 07 17
w1 = _mm_unpackhi_epi16(*x2, *x3); // 24 34 25 35 26 36 27 37
w2 = _mm_unpackhi_epi16(*x4, *x5); // 44 54 45 55 46 56 47 57
w3 = _mm_unpackhi_epi16(*x6, *x7); // 64 74 65 75 66 76 67 77
ww0 = _mm_unpacklo_epi32(w0, w1); // 04 14 24 34 05 15 25 35
ww1 = _mm_unpacklo_epi32(w2, w3); // 44 54 64 74 45 55 65 75
*d4 = _mm_unpacklo_epi64(ww0, ww1); // 04 14 24 34 44 54 64 74
*d5 = _mm_unpackhi_epi64(ww0, ww1); // 05 15 25 35 45 55 65 75
ww0 = _mm_unpackhi_epi32(w0, w1); // 06 16 26 36 07 17 27 37
ww1 = _mm_unpackhi_epi32(w2, w3); // 46 56 66 76 47 57 67 77
*d6 = _mm_unpacklo_epi64(ww0, ww1); // 06 16 26 36 46 56 66 76
*d7 = _mm_unpackhi_epi64(ww0, ww1); // 07 17 27 37 47 57 67 77
}
static INLINE void highbd_transpose8x8_sse2(
__m128i *x0, __m128i *x1, __m128i *x2, __m128i *x3, __m128i *x4,
__m128i *x5, __m128i *x6, __m128i *x7, __m128i *d0, __m128i *d1,
__m128i *d2, __m128i *d3, __m128i *d4, __m128i *d5, __m128i *d6,
__m128i *d7) {
highbd_transpose8x8_low_sse2(x0, x1, x2, x3, x4, x5, x6, x7, d0, d1, d2, d3);
highbd_transpose8x8_high_sse2(x0, x1, x2, x3, x4, x5, x6, x7, d4, d5, d6, d7);
}
static INLINE void highbd_transpose8x8(uint16_t *src[], int in_p,
......
......@@ -166,7 +166,8 @@ set(AOM_AV1_COMMON_INTRIN_SSSE3
set(AOM_AV1_COMMON_INTRIN_SSE4_1
"${AOM_ROOT}/av1/common/x86/av1_txfm_sse4.c"
"${AOM_ROOT}/av1/common/x86/av1_txfm_sse4.h"
"${AOM_ROOT}/av1/common/x86/highbd_inv_txfm_sse4.c")
"${AOM_ROOT}/av1/common/x86/highbd_inv_txfm_sse4.c"
"${AOM_ROOT}/av1/common/x86/reconinter_sse4.c")
set(AOM_AV1_COMMON_INTRIN_AVX2
"${AOM_ROOT}/av1/common/x86/highbd_inv_txfm_avx2.c"
......
......@@ -1270,6 +1270,55 @@ static aom_codec_err_t encoder_encode(aom_codec_alg_priv_t *ctx,
if (frame_size) {
if (ctx->pending_cx_data == 0) ctx->pending_cx_data = cx_data;
const int write_temporal_delimiter =
!cpi->common.enhancement_layer_id && !ctx->pending_frame_count;
if (write_temporal_delimiter) {
uint32_t obu_header_size = 1;
const uint32_t obu_payload_size = 0;
const size_t length_field_size =
aom_uleb_size_in_bytes(obu_payload_size);
if (ctx->pending_cx_data) {
const size_t move_offset = length_field_size + 1;
memmove(ctx->pending_cx_data + move_offset, ctx->pending_cx_data,
frame_size);
}
const uint32_t obu_header_offset = 0;
obu_header_size = write_obu_header(
OBU_TEMPORAL_DELIMITER, 0,
(uint8_t *)(ctx->pending_cx_data + obu_header_offset));
// OBUs are preceded/succeeded by an unsigned leb128 coded integer.
if (write_uleb_obu_size(obu_header_size, obu_payload_size,
ctx->pending_cx_data) != AOM_CODEC_OK) {
return AOM_CODEC_ERROR;
}
frame_size += obu_header_size + obu_payload_size + length_field_size;
}
if (ctx->oxcf.save_as_annexb) {
size_t curr_frame_size = frame_size;
if (av1_convert_sect5obus_to_annexb(cx_data, &curr_frame_size) !=
AOM_CODEC_OK) {
return AOM_CODEC_ERROR;
}
frame_size = curr_frame_size;
// B_PRIME (add frame size)
const size_t length_field_size = aom_uleb_size_in_bytes(frame_size);
if (ctx->pending_cx_data) {
const size_t move_offset = length_field_size;
memmove(cx_data + move_offset, cx_data, frame_size);
}
if (write_uleb_obu_size(0, (uint32_t)frame_size, cx_data) !=
AOM_CODEC_OK) {
return AOM_CODEC_ERROR;
}
frame_size += length_field_size;
}
ctx->pending_frame_sizes[ctx->pending_frame_count++] = frame_size;
ctx->pending_cx_data_sz += frame_size;
......@@ -1285,50 +1334,27 @@ static aom_codec_err_t encoder_encode(aom_codec_alg_priv_t *ctx,
// Add the frame packet to the list of returned packets.
aom_codec_cx_pkt_t pkt;
pkt.kind = AOM_CODEC_CX_FRAME_PKT;
pkt.data.frame.buf = ctx->pending_cx_data;
pkt.data.frame.sz = ctx->pending_cx_data_sz;
pkt.data.frame.partition_id = -1;
int write_temporal_delimiter = 1;
// only write OBU_TD if base layer
write_temporal_delimiter = !cpi->common.enhancement_layer_id;
if (write_temporal_delimiter) {
// move data and insert OBU_TD preceded by optional 4 byte size
uint32_t obu_header_size = 1;
const uint32_t obu_payload_size = 0;
const size_t length_field_size =
aom_uleb_size_in_bytes(obu_payload_size);
if (ctx->oxcf.save_as_annexb) {
// B_PRIME (add TU size)
size_t tu_size = ctx->pending_cx_data_sz;
const size_t length_field_size = aom_uleb_size_in_bytes(tu_size);
if (ctx->pending_cx_data) {
const size_t move_offset = length_field_size + 1;
const size_t move_offset = length_field_size;
memmove(ctx->pending_cx_data + move_offset, ctx->pending_cx_data,
ctx->pending_cx_data_sz);
tu_size);
}
const uint32_t obu_header_offset = 0;
obu_header_size = write_obu_header(
OBU_TEMPORAL_DELIMITER, 0,
(uint8_t *)(ctx->pending_cx_data + obu_header_offset));
// OBUs are preceded/succeeded by an unsigned leb128 coded integer.
if (write_uleb_obu_size(obu_header_size, obu_payload_size,
ctx->pending_cx_data) != AOM_CODEC_OK) {
if (write_uleb_obu_size(0, (uint32_t)tu_size, ctx->pending_cx_data) !=
AOM_CODEC_OK) {
return AOM_CODEC_ERROR;
}
pkt.data.frame.sz +=
obu_header_size + obu_payload_size + length_field_size;
ctx->pending_cx_data_sz += length_field_size;
}
if (ctx->oxcf.save_as_annexb) {
size_t curr_frame_size = pkt.data.frame.sz;
if (av1_convert_sect5obus_to_annexb(ctx->pending_cx_data,
&curr_frame_size) != AOM_CODEC_OK) {
return AOM_CODEC_ERROR;
}
pkt.data.frame.sz = curr_frame_size;
}
pkt.kind = AOM_CODEC_CX_FRAME_PKT;
pkt.data.frame.buf = ctx->pending_cx_data;
pkt.data.frame.sz = ctx->pending_cx_data_sz;
pkt.data.frame.partition_id = -1;
pkt.data.frame.pts = ticks_to_timebase_units(timebase, dst_time_stamp);
pkt.data.frame.flags = get_frame_pkt_flags(cpi, lib_flags);
......
......@@ -477,6 +477,17 @@ static aom_codec_err_t decoder_decode(aom_codec_alg_priv_t *ctx,
if (res != AOM_CODEC_OK) return res;
}
if (ctx->is_annexb) {
// read the size of this temporal unit
size_t length_of_size;
uint64_t size_of_unit;
if (aom_uleb_decode(data_start, data_sz, &size_of_unit, &length_of_size) !=
0) {
return AOM_CODEC_CORRUPT_FRAME;
}
data_start += length_of_size;
}
// Decode in serial mode.
if (frame_count > 0) {
int i;
......@@ -496,6 +507,17 @@ static aom_codec_err_t decoder_decode(aom_codec_alg_priv_t *ctx,
}
} else {
while (data_start < data_end) {
if (ctx->is_annexb) {
// read the size of this frame unit
size_t length_of_size;
uint64_t size_of_frame_unit;
if (aom_uleb_decode(data_start, (uint32_t)(data_end - data_start),
&size_of_frame_unit, &length_of_size) != 0) {
return AOM_CODEC_CORRUPT_FRAME;
}
data_start += length_of_size;
}
const uint32_t frame_size = (uint32_t)(data_end - data_start);
res = decode_one(ctx, &data_start, frame_size, user_priv);
if (res != AOM_CODEC_OK) return res;
......
......@@ -144,6 +144,11 @@ add_proto qw/void av1_highbd_dr_prediction_z1/, "uint16_t *dst, ptrdiff_t stride
add_proto qw/void av1_highbd_dr_prediction_z2/, "uint16_t *dst, ptrdiff_t stride, int bw, int bh, const uint16_t *above, const uint16_t *left, int upsample_above, int upsample_left, int dx, int dy, int bd";
add_proto qw/void av1_highbd_dr_prediction_z3/, "uint16_t *dst, ptrdiff_t stride, int bw, int bh, const uint16_t *above, const uint16_t *left, int upsample_left, int dx, int dy, int bd";
# build compound seg mask functions
add_proto qw/void av1_build_compound_diffwtd_mask/, "uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0, int src0_stride, const uint8_t *src1, int src1_stride, int h, int w";
add_proto qw/void av1_build_compound_diffwtd_mask_highbd/, "uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0, int src0_stride, const uint8_t *src1, int src1_stride, int h, int w, int bd";
specialize qw/av1_build_compound_diffwtd_mask sse4_1/;
#
# Encoder functions below this point.
#
......@@ -232,7 +237,7 @@ if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
# End av1_high encoder functions
# txb
add_proto qw/void av1_get_nz_map_contexts/, "const uint8_t *const levels, const int16_t *const scan, const uint16_t eob, const TX_SIZE tx_size, const TX_TYPE tx_type, int8_t *const coeff_contexts";
add_proto qw/void av1_get_nz_map_contexts/, "const uint8_t *const levels, const int16_t *const scan, const uint16_t eob, const TX_SIZE tx_size, const TX_CLASS tx_class, int8_t *const coeff_contexts";
specialize qw/av1_get_nz_map_contexts sse2/;
add_proto qw/void av1_txb_init_levels/, "const tran_low_t *const coeff, const int width, const int height, uint8_t *const levels";
specialize qw/av1_txb_init_levels sse4_1/;
......
......@@ -13,6 +13,7 @@
#define _CDEF_BLOCK_H (1)
#include "./odintrin.h"
#include "av1/common/enums.h"
#define CDEF_BLOCKSIZE 64
#define CDEF_BLOCKSIZE_LOG2 6
......
......@@ -28,8 +28,8 @@
void av1_convolve_horiz_rs_c(const uint8_t *src, int src_stride, uint8_t *dst,
int dst_stride, int w, int h,
const int16_t *x_filters, const int x0_qn,
const int x_step_qn) {
const int16_t *x_filters, int x0_qn,
int x_step_qn) {
src -= UPSCALE_NORMATIVE_TAPS / 2 - 1;
for (int y = 0; y < h; ++y) {
int x_qn = x0_qn;
......
......@@ -17,7 +17,7 @@
#include "av1/common/seg_common.h"
#include "av1/common/txb_common.h"
static const aom_cdf_prob
const aom_cdf_prob
default_kf_y_mode_cdf[KF_MODE_CONTEXTS][KF_MODE_CONTEXTS][CDF_SIZE(
INTRA_MODES)] = {
{ { AOM_CDF13(15588, 17027, 19338, 20218, 20682, 21110, 21825, 23244,
......@@ -72,7 +72,7 @@ static const aom_cdf_prob
31355, 31802, 32593) } }
};
static const aom_cdf_prob default_angle_delta_cdf[DIRECTIONAL_MODES][CDF_SIZE(
const aom_cdf_prob default_angle_delta_cdf[DIRECTIONAL_MODES][CDF_SIZE(
2 * MAX_ANGLE_DELTA + 1)] = {
{ AOM_CDF7(2180, 5032, 7567, 22776, 26989, 30217) },
{ AOM_CDF7(2301, 5608, 8801, 23487, 26974, 30330) },
......@@ -84,7 +84,7 @@ static const aom_cdf_prob default_angle_delta_cdf[DIRECTIONAL_MODES][CDF_SIZE(
{ AOM_CDF7(3605, 10428, 12459, 17676, 21244, 30655) }
};
static const aom_cdf_prob default_if_y_mode_cdf[BLOCK_SIZE_GROUPS][CDF_SIZE(
const aom_cdf_prob default_if_y_mode_cdf[BLOCK_SIZE_GROUPS][CDF_SIZE(
INTRA_MODES)] = { { AOM_CDF13(22801, 23489, 24293, 24756, 25601, 26123,
26606, 27418, 27945, 29228, 29685, 30349) },
{ AOM_CDF13(18673, 19845, 22631, 23318, 23950, 24649,
......@@ -94,7 +94,7 @@ static const aom_cdf_prob default_if_y_mode_cdf[BLOCK_SIZE_GROUPS][CDF_SIZE(
{ AOM_CDF13(20155, 21301, 22838, 23178, 23261, 23533,
23703, 24804, 25352, 26575, 27016, 28049) } };
static const aom_cdf_prob
const aom_cdf_prob
default_uv_mode_cdf[CFL_ALLOWED_TYPES][INTRA_MODES][CDF_SIZE(
UV_INTRA_MODES)] = {
{ { AOM_CDF13(22631, 24152, 25378, 25661, 25986, 26520, 27055, 27923,
......@@ -151,7 +151,7 @@ static const aom_cdf_prob
9875, 10521, 29048) } }
};
static const aom_cdf_prob default_partition_cdf[PARTITION_CONTEXTS][CDF_SIZE(
const aom_cdf_prob default_partition_cdf[PARTITION_CONTEXTS][CDF_SIZE(
EXT_PARTITION_TYPES)] = {
{ AOM_CDF4(19132, 25510, 30392) },
{ AOM_CDF4(13928, 19855, 28540) },
......@@ -175,7 +175,7 @@ static const aom_cdf_prob default_partition_cdf[PARTITION_CONTEXTS][CDF_SIZE(
{ AOM_CDF8(711, 966, 1172, 32448, 32538, 32617, 32664) },
};
static const aom_cdf_prob default_intra_ext_tx_cdf
const aom_cdf_prob default_intra_ext_tx_cdf
[EXT_TX_SETS_INTRA][EXT_TX_SIZES][INTRA_MODES][CDF_SIZE(TX_TYPES)] = {
{
{
......@@ -365,7 +365,7 @@ static const aom_cdf_prob default_intra_ext_tx_cdf
},
};
static const aom_cdf_prob
const aom_cdf_prob
default_inter_ext_tx_cdf[EXT_TX_SETS_INTER][EXT_TX_SIZES][CDF_SIZE(
TX_TYPES)] = {
{
......@@ -570,7 +570,7 @@ static const aom_cdf_prob default_obmc_cdf[BLOCK_SIZES_ALL][CDF_SIZE(2)] = {
{ AOM_CDF2(26879) }
};
static const aom_cdf_prob default_intra_inter_cdf[INTRA_INTER_CONTEXTS]
const aom_cdf_prob default_intra_inter_cdf[INTRA_INTER_CONTEXTS]
[CDF_SIZE(2)] = {
{ AOM_CDF2(806) },
{ AOM_CDF2(16662) },
......@@ -799,11 +799,11 @@ static const aom_cdf_prob
{ AOM_CDF2(28165) }, { AOM_CDF2(22401) }, { AOM_CDF2(16088) }
};
static const aom_cdf_prob default_skip_cdfs[SKIP_CONTEXTS][CDF_SIZE(2)] = {
const aom_cdf_prob default_skip_cdfs[SKIP_CONTEXTS][CDF_SIZE(2)] = {
{ AOM_CDF2(31671) }, { AOM_CDF2(16515) }, { AOM_CDF2(4576) }
};
static const aom_cdf_prob default_skip_mode_cdfs[SKIP_MODE_CONTEXTS][CDF_SIZE(
const aom_cdf_prob default_skip_mode_cdfs[SKIP_MODE_CONTEXTS][CDF_SIZE(
2)] = { { AOM_CDF2(32621) }, { AOM_CDF2(20708) }, { AOM_CDF2(8127) } };
static const aom_cdf_prob
......@@ -821,10 +821,10 @@ static const aom_cdf_prob
static const aom_cdf_prob default_intrabc_cdf[CDF_SIZE(2)] = { AOM_CDF2(
30531) };
static const aom_cdf_prob default_filter_intra_mode_cdf[CDF_SIZE(
const aom_cdf_prob default_filter_intra_mode_cdf[CDF_SIZE(
FILTER_INTRA_MODES)] = { AOM_CDF5(8949, 12776, 17211, 29558) };
static const aom_cdf_prob default_filter_intra_cdfs[BLOCK_SIZES_ALL][CDF_SIZE(
const aom_cdf_prob default_filter_intra_cdfs[BLOCK_SIZES_ALL][CDF_SIZE(
2)] = { { AOM_CDF2(4621) }, { AOM_CDF2(6743) }, { AOM_CDF2(5893) },
{ AOM_CDF2(7866) }, { AOM_CDF2(12551) }, { AOM_CDF2(9394) },
{ AOM_CDF2(12408) }, { AOM_CDF2(14301) }, { AOM_CDF2(12756) },
......
......@@ -281,7 +281,7 @@ typedef enum {
SHARP_SHARP,
} DUAL_FILTER_TYPE;
typedef enum ATTRIBUTE_PACKED {
typedef enum {
// DCT only
EXT_TX_SET_DCTONLY,
// DCT + Identity only
......
......@@ -187,7 +187,7 @@ static void scan_row_mbmi(const AV1_COMMON *cm, const MACROBLOCKD *xd,
// TODO(jingning): Revisit this part after cb4x4 is stable.
if (abs(row_offset) > 1) {
col_offset = 1;
if (mi_col & 0x01 && xd->n8_w < n8_w_8) --col_offset;
if ((mi_col & 0x01) && xd->n8_w < n8_w_8) --col_offset;
}
const int use_step_16 = (xd->n8_w >= 16);
MB_MODE_INFO **const candidate_mi0 = xd->mi + row_offset * xd->mi_stride;
......@@ -244,7 +244,7 @@ static void scan_col_mbmi(const AV1_COMMON *cm, const MACROBLOCKD *xd,
const int shift = 0;
if (abs(col_offset) > 1) {
row_offset = 1;
if (mi_row & 0x01 && xd->n8_h < n8_h_8) --row_offset;
if ((mi_row & 0x01) && xd->n8_h < n8_h_8) --row_offset;
}
const int use_step_16 = (xd->n8_h >= 16);
(void)mi_col;
......@@ -518,7 +518,7 @@ static void setup_ref_mv_list(
// Find valid maximum row/col offset.
if (xd->up_available) {
max_row_offset = -(MVREF_ROWS << 1) + row_adj;
max_row_offset = -(MVREF_ROW_COLS << 1) + row_adj;
if (xd->n8_h < mi_size_high[BLOCK_8X8])
max_row_offset = -(2 << 1) + row_adj;
......@@ -528,7 +528,7 @@ static void setup_ref_mv_list(
}
if (xd->left_available) {
max_col_offset = -(MVREF_COLS << 1) + col_adj;
max_col_offset = -(MVREF_ROW_COLS << 1) + col_adj;
if (xd->n8_w < mi_size_wide[BLOCK_8X8])
max_col_offset = -(2 << 1) + col_adj;
......@@ -635,7 +635,7 @@ static void setup_ref_mv_list(
#endif // USE_CUR_GM_REFMV
refmv_count);
for (int idx = 2; idx <= MVREF_ROWS; ++idx) {
for (int idx = 2; idx <= MVREF_ROW_COLS; ++idx) {
const int row_offset = -(idx << 1) + 1 + row_adj;
const int col_offset = -(idx << 1) + 1 + col_adj;
......@@ -658,16 +658,6 @@ static void setup_ref_mv_list(
max_col_offset, &processed_cols);
}
const int col_offset = -(MVREF_COLS << 1) + 1 + col_adj;
if (abs(col_offset) <= abs(max_col_offset) &&
abs(col_offset) > processed_cols)
scan_col_mbmi(cm, xd, mi_row, mi_col, rf, col_offset, ref_mv_stack,
refmv_count, col_match_count, dummy_newmv_count,
#if USE_CUR_GM_REFMV
gm_mv_candidates,
#endif // USE_CUR_GM_REFMV
max_col_offset, &processed_cols);
ref_match_count[ref_frame] =
(row_match_count[ref_frame] > 0) + (col_match_count[ref_frame] > 0);
......
......@@ -18,8 +18,7 @@
extern "C" {
#endif
#define MVREF_ROWS 3
#define MVREF_COLS 3
#define MVREF_ROW_COLS 3
// Set the upper limit of the motion vector component magnitude.
// This would make a motion vector fit in 26 bits. Plus 3 bits for the
......
......@@ -20,7 +20,7 @@
#include "aom/aom_integer.h"
#include "aom_dsp/aom_dsp_common.h"
#include "aom_ports/bitops.h"
#include "av1/common/enums.h"
//#include "av1/common/enums.h"
#ifdef __cplusplus
extern "C" {
......
......@@ -65,8 +65,12 @@ extern "C" {
#define MAX_NUM_TEMPORAL_LAYERS 8
#define MAX_NUM_SPATIAL_LAYERS 4
/* clang-format off */
// clang-format seems to think this is a pointer dereference and not a
// multiplication.
#define MAX_NUM_OPERATING_POINTS \
MAX_NUM_TEMPORAL_LAYERS + MAX_NUM_SPATIAL_LAYERS
MAX_NUM_TEMPORAL_LAYERS * MAX_NUM_SPATIAL_LAYERS
/* clang-format on*/
// TODO(jingning): Turning this on to set up transform coefficient
// processing timer.
......@@ -193,7 +197,8 @@ typedef struct SequenceHeader {
int force_integer_mv; // 0 - Not to force. MV can be in 1/4 or 1/8
// 1 - force to integer
// 2 - adaptive
int monochrome;
int still_picture; // Video is a single frame still picture
int monochrome; // Monochorme video
int enable_filter_intra; // enables/disables filterintra
int enable_intra_edge_filter; // enables/disables corner/edge/upsampling
int enable_interintra_compound; // enables/disables interintra_compound
......@@ -252,6 +257,7 @@ typedef struct AV1Common {
int largest_tile_id;
size_t largest_tile_size;
int context_update_tile_id;
// Scale of the current frame with respect to itself.
struct scale_factors sf_identity;
......
......@@ -16,7 +16,7 @@
#include "av1/common/seg_common.h"
#include "av1/common/blockd.h"
static const int16_t dc_qlookup_Q3[QINDEX_RANGE] = {
const int16_t dc_qlookup_Q3[QINDEX_RANGE] = {
4, 8, 8, 9, 10, 11, 12, 12, 13, 14, 15, 16, 17, 18,
19, 19, 20, 21, 22, 23, 24, 25, 26, 26, 27, 28, 29, 30,
31, 32, 32, 33, 34, 35, 36, 37, 38, 38, 39, 40, 41, 42,
......@@ -88,7 +88,7 @@ static const int16_t dc_qlookup_12_Q3[QINDEX_RANGE] = {
19718, 20521, 21387,
};
static const int16_t ac_qlookup_Q3[QINDEX_RANGE] = {
const int16_t ac_qlookup_Q3[QINDEX_RANGE] = {
4, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
......
......@@ -305,36 +305,33 @@ const uint8_t *av1_get_compound_type_mask(
static void diffwtd_mask_d32(uint8_t *mask, int which_inverse, int mask_base,
const CONV_BUF_TYPE *src0, int src0_stride,
const CONV_BUF_TYPE *src1, int src1_stride,
BLOCK_SIZE sb_type, int h, int w,
ConvolveParams *conv_params, int bd) {
const CONV_BUF_TYPE *src1, int src1_stride, int h,
int w, ConvolveParams *conv_params, int bd) {
int round =
2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1 + (bd - 8);
int i, j, m, diff;
int block_stride = block_size_wide[sb_type];
for (i = 0; i < h; ++i) {
for (j = 0; j < w; ++j) {
diff = abs(src0[i * src0_stride + j] - src1[i * src1_stride + j]);
diff = ROUND_POWER_OF_TWO(diff, round);
m = clamp(mask_base + (diff / DIFF_FACTOR), 0, AOM_BLEND_A64_MAX_ALPHA);
mask[i * block_stride + j] =
which_inverse ? AOM_BLEND_A64_MAX_ALPHA - m : m;
mask[i * w + j] = which_inverse ? AOM_BLEND_A64_MAX_ALPHA - m : m;
}
}
}
static void build_compound_diffwtd_mask_d16(
uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const CONV_BUF_TYPE *src0,
int src0_stride, const CONV_BUF_TYPE *src1, int src1_stride,
BLOCK_SIZE sb_type, int h, int w, ConvolveParams *conv_params, int bd) {
int src0_stride, const CONV_BUF_TYPE *src1, int src1_stride, int h, int w,
ConvolveParams *conv_params, int bd) {
switch (mask_type) {
case DIFFWTD_38:
diffwtd_mask_d32(mask, 0, 38, src0, src0_stride, src1, src1_stride,
sb_type, h, w, conv_params, bd);
diffwtd_mask_d32(mask, 0, 38, src0, src0_stride, src1, src1_stride, h, w,
conv_params, bd);
break;
case DIFFWTD_38_INV:
diffwtd_mask_d32(mask, 1, 38, src0, src0_stride, src1, src1_stride,
sb_type, h, w, conv_params, bd);
diffwtd_mask_d32(mask, 1, 38, src0, src0_stride, src1, src1_stride, h, w,
conv_params, bd);
break;
default: assert(0);
}
......@@ -342,33 +339,29 @@ static void build_compound_diffwtd_mask_d16(
static void diffwtd_mask(uint8_t *mask, int which_inverse, int mask_base,
const uint8_t *src0, int src0_stride,
const uint8_t *src1, int src1_stride,
BLOCK_SIZE sb_type, int h, int w) {
const uint8_t *src1, int src1_stride, int h, int w) {
int i, j, m, diff;
int block_stride = block_size_wide[sb_type];
for (i = 0; i < h; ++i) {
for (j = 0; j < w; ++j) {
diff =
abs((int)src0[i * src0_stride + j] - (int)src1[i * src1_stride + j]);
m = clamp(mask_base + (diff / DIFF_FACTOR), 0, AOM_BLEND_A64_MAX_ALPHA);
mask[i * block_stride + j] =
which_inverse ? AOM_BLEND_A64_MAX_ALPHA - m : m;
mask[i * w + j] = which_inverse ? AOM_BLEND_A64_MAX_ALPHA - m : m;
}
}
}
void build_compound_diffwtd_mask(uint8_t *mask, DIFFWTD_MASK_TYPE mask_type,
const uint8_t *src0, int src0_stride,
const uint8_t *src1, int src1_stride,
BLOCK_SIZE sb_type, int h, int w) {
void av1_build_compound_diffwtd_mask_c(uint8_t *mask,
DIFFWTD_MASK_TYPE mask_type,
const uint8_t *src0, int src0_stride,
const uint8_t *src1, int src1_stride,
int h, int w) {
switch (mask_type) {
case DIFFWTD_38:
diffwtd_mask(mask, 0, 38, src0, src0_stride, src1, src1_stride, sb_type,
h, w);
diffwtd_mask(mask, 0, 38, src0, src0_stride, src1, src1_stride, h, w);
break;
case DIFFWTD_38_INV:
diffwtd_mask(mask, 1, 38, src0, src0_stride, src1, src1_stride, sb_type,
h, w);
diffwtd_mask(mask, 1, 38, src0, src0_stride, src1, src1_stride, h, w);
break;
default: assert(0);
}
......@@ -376,38 +369,32 @@ void build_compound_diffwtd_mask(uint8_t *mask, DIFFWTD_MASK_TYPE mask_type,
static void diffwtd_mask_highbd(uint8_t *mask, int which_inverse, int mask_base,
const uint16_t *src0, int src0_stride,
const uint16_t *src1, int src1_stride,
BLOCK_SIZE sb_type, int h, int w, int bd) {
const uint16_t *src1, int src1_stride, int h,
int w, int bd) {
int i, j, m, diff;
int block_stride = block_size_wide[sb_type];
for (i = 0; i < h; ++i) {
for (j = 0; j < w; ++j) {
diff = abs((int)src0[i * src0_stride + j] -
(int)src1[i * src1_stride + j]) >>
(bd - 8);
m = clamp(mask_base + (diff / DIFF_FACTOR), 0, AOM_BLEND_A64_MAX_ALPHA);
mask[i * block_stride + j] =
which_inverse ? AOM_BLEND_A64_MAX_ALPHA - m : m;
mask[i * w + j] = which_inverse ? AOM_BLEND_A64_MAX_ALPHA - m : m;
}
}
}
void build_compound_diffwtd_mask_highbd(uint8_t *mask,
DIFFWTD_MASK_TYPE mask_type,
const uint8_t *src0, int src0_stride,
const uint8_t *src1, int src1_stride,
BLOCK_SIZE sb_type, int h, int w,
int bd) {
void av1_build_compound_diffwtd_mask_highbd_c(
uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0,
int src0_stride, const uint8_t *src1, int src1_stride, int h, int w,
int bd) {
switch (mask_type) {
case DIFFWTD_38:
diffwtd_mask_highbd(mask, 0, 38, CONVERT_TO_SHORTPTR(src0), src0_stride,
CONVERT_TO_SHORTPTR(src1), src1_stride, sb_type, h, w,
bd);
CONVERT_TO_SHORTPTR(src1), src1_stride, h, w, bd);
break;
case DIFFWTD_38_INV:
diffwtd_mask_highbd(mask, 1, 38, CONVERT_TO_SHORTPTR(src0), src0_stride,
CONVERT_TO_SHORTPTR(src1), src1_stride, sb_type, h, w,
bd);
CONVERT_TO_SHORTPTR(src1), src1_stride, h, w, bd);
break;
default: assert(0);
}
......@@ -645,9 +632,9 @@ void av1_make_masked_inter_predictor(
xd, can_use_previous);
if (!plane && comp_data.interinter_compound_type == COMPOUND_DIFFWTD) {
build_compound_diffwtd_mask_d16(
comp_data.seg_mask, comp_data.mask_type, org_dst, org_dst_stride,
tmp_buf16, tmp_buf_stride, mi->sb_type, h, w, conv_params, xd->bd);
build_compound_diffwtd_mask_d16(comp_data.seg_mask, comp_data.mask_type,
org_dst, org_dst_stride, tmp_buf16,
tmp_buf_stride, h, w, conv_params, xd->bd);
}
build_masked_compound_no_round(dst, dst_stride, org_dst, org_dst_stride,
tmp_buf16, tmp_buf_stride, &comp_data,
......@@ -1861,15 +1848,14 @@ static void build_wedge_inter_predictor_from_buf(
if (is_compound && is_masked_compound_type(mbmi->interinter_compound_type)) {
if (!plane && comp_data.interinter_compound_type == COMPOUND_DIFFWTD) {
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
build_compound_diffwtd_mask_highbd(
av1_build_compound_diffwtd_mask_highbd(
comp_data.seg_mask, comp_data.mask_type,
CONVERT_TO_BYTEPTR(ext_dst0), ext_dst_stride0,
CONVERT_TO_BYTEPTR(ext_dst1), ext_dst_stride1, mbmi->sb_type, h, w,
xd->bd);
CONVERT_TO_BYTEPTR(ext_dst1), ext_dst_stride1, h, w, xd->bd);
else
build_compound_diffwtd_mask(comp_data.seg_mask, comp_data.mask_type,
ext_dst0, ext_dst_stride0, ext_dst1,
ext_dst_stride1, mbmi->sb_type, h, w);
av1_build_compound_diffwtd_mask(comp_data.seg_mask, comp_data.mask_type,
ext_dst0, ext_dst_stride0, ext_dst1,
ext_dst_stride1, h, w);
}
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
......
......@@ -176,17 +176,6 @@ static INLINE int get_interintra_wedge_bits(BLOCK_SIZE sb_type) {
return wedge_params_lookup[sb_type].bits;
}
void build_compound_diffwtd_mask(uint8_t *mask, DIFFWTD_MASK_TYPE mask_type,
const uint8_t *src0, int src0_stride,
const uint8_t *src1, int src1_stride,
BLOCK_SIZE sb_type, int h, int w);
void build_compound_diffwtd_mask_highbd(uint8_t *mask,
DIFFWTD_MASK_TYPE mask_type,
const uint8_t *src0, int src0_stride,
const uint8_t *src1, int src1_stride,
BLOCK_SIZE sb_type, int h, int w,
int bd);
void av1_make_masked_inter_predictor(
const uint8_t *pre, int pre_stride, uint8_t *dst, int dst_stride,
const int subpel_x, const int subpel_y, const struct scale_factors *sf,
......
This diff is collapsed.
......@@ -14,7 +14,7 @@
#include "av1