Commit b26232eb authored by Linfeng Zhang's avatar Linfeng Zhang

Update filter_selectively_vert_row2()

Reduce operations and jumps. perf shows CPU time reduced from 1.9% to
1.6% when decoding fdJc1_IBKJA.248.webm on Xeon E5.
Will apply the changes to vp10 after code review.

Change-Id: I9351509922855d8896ddef1ed093b3ca12619a61
parent f80d8011
......@@ -298,196 +298,168 @@ void vp9_loop_filter_frame_init(VP9_COMMON *cm, int default_filt_lvl) {
static void filter_selectively_vert_row2(int subsampling_factor,
uint8_t *s, int pitch,
unsigned int mask_16x16_l,
unsigned int mask_8x8_l,
unsigned int mask_4x4_l,
unsigned int mask_4x4_int_l,
const loop_filter_info_n *lfi_n,
unsigned int mask_16x16,
unsigned int mask_8x8,
unsigned int mask_4x4,
unsigned int mask_4x4_int,
const loop_filter_thresh *lfthr,
const uint8_t *lfl) {
const int mask_shift = subsampling_factor ? 4 : 8;
const int mask_cutoff = subsampling_factor ? 0xf : 0xff;
const int dual_mask_cutoff = subsampling_factor ? 0xff : 0xffff;
const int lfl_forward = subsampling_factor ? 4 : 8;
unsigned int mask_16x16_0 = mask_16x16_l & mask_cutoff;
unsigned int mask_8x8_0 = mask_8x8_l & mask_cutoff;
unsigned int mask_4x4_0 = mask_4x4_l & mask_cutoff;
unsigned int mask_4x4_int_0 = mask_4x4_int_l & mask_cutoff;
unsigned int mask_16x16_1 = (mask_16x16_l >> mask_shift) & mask_cutoff;
unsigned int mask_8x8_1 = (mask_8x8_l >> mask_shift) & mask_cutoff;
unsigned int mask_4x4_1 = (mask_4x4_l >> mask_shift) & mask_cutoff;
unsigned int mask_4x4_int_1 = (mask_4x4_int_l >> mask_shift) & mask_cutoff;
const unsigned int dual_one = 1 | (1 << lfl_forward);
unsigned int mask;
for (mask = mask_16x16_0 | mask_8x8_0 | mask_4x4_0 | mask_4x4_int_0 |
mask_16x16_1 | mask_8x8_1 | mask_4x4_1 | mask_4x4_int_1;
mask; mask >>= 1) {
const loop_filter_thresh *lfi0 = lfi_n->lfthr + *lfl;
const loop_filter_thresh *lfi1 = lfi_n->lfthr + *(lfl + lfl_forward);
if (mask & 1) {
if ((mask_16x16_0 | mask_16x16_1) & 1) {
if ((mask_16x16_0 & mask_16x16_1) & 1) {
vpx_lpf_vertical_16_dual(s, pitch, lfi0->mblim, lfi0->lim,
lfi0->hev_thr);
} else if (mask_16x16_0 & 1) {
vpx_lpf_vertical_16(s, pitch, lfi0->mblim, lfi0->lim,
lfi0->hev_thr);
uint8_t *ss[2];
ss[0] = s;
for (mask =
(mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int) & dual_mask_cutoff;
mask; mask = (mask & ~dual_one) >> 1) {
if (mask & dual_one) {
const loop_filter_thresh *lfis[2];
lfis[0] = lfthr + *lfl;
lfis[1] = lfthr + *(lfl + lfl_forward);
ss[1] = ss[0] + 8 * pitch;
if (mask_16x16 & dual_one) {
if ((mask_16x16 & dual_one) == dual_one) {
vpx_lpf_vertical_16_dual(ss[0], pitch, lfis[0]->mblim, lfis[0]->lim,
lfis[0]->hev_thr);
} else {
vpx_lpf_vertical_16(s + 8 *pitch, pitch, lfi1->mblim,
lfi1->lim, lfi1->hev_thr);
const loop_filter_thresh *lfi = lfis[!(mask_16x16 & 1)];
vpx_lpf_vertical_16(ss[!(mask_16x16 & 1)], pitch, lfi->mblim,
lfi->lim, lfi->hev_thr);
}
}
if ((mask_8x8_0 | mask_8x8_1) & 1) {
if ((mask_8x8_0 & mask_8x8_1) & 1) {
vpx_lpf_vertical_8_dual(s, pitch, lfi0->mblim, lfi0->lim,
lfi0->hev_thr, lfi1->mblim, lfi1->lim,
lfi1->hev_thr);
} else if (mask_8x8_0 & 1) {
vpx_lpf_vertical_8(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr);
if (mask_8x8 & dual_one) {
if ((mask_8x8 & dual_one) == dual_one) {
vpx_lpf_vertical_8_dual(ss[0], pitch, lfis[0]->mblim, lfis[0]->lim,
lfis[0]->hev_thr, lfis[1]->mblim,
lfis[1]->lim, lfis[1]->hev_thr);
} else {
vpx_lpf_vertical_8(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim,
lfi1->hev_thr);
const loop_filter_thresh *lfi = lfis[!(mask_8x8 & 1)];
vpx_lpf_vertical_8(ss[!(mask_8x8 & 1)], pitch, lfi->mblim, lfi->lim,
lfi->hev_thr);
}
}
if ((mask_4x4_0 | mask_4x4_1) & 1) {
if ((mask_4x4_0 & mask_4x4_1) & 1) {
vpx_lpf_vertical_4_dual(s, pitch, lfi0->mblim, lfi0->lim,
lfi0->hev_thr, lfi1->mblim, lfi1->lim,
lfi1->hev_thr);
} else if (mask_4x4_0 & 1) {
vpx_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr);
if (mask_4x4 & dual_one) {
if ((mask_4x4 & dual_one) == dual_one) {
vpx_lpf_vertical_4_dual(ss[0], pitch, lfis[0]->mblim, lfis[0]->lim,
lfis[0]->hev_thr, lfis[1]->mblim,
lfis[1]->lim, lfis[1]->hev_thr);
} else {
vpx_lpf_vertical_4(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim,
lfi1->hev_thr);
const loop_filter_thresh *lfi = lfis[!(mask_4x4 & 1)];
vpx_lpf_vertical_4(ss[!(mask_4x4 & 1)], pitch, lfi->mblim, lfi->lim,
lfi->hev_thr);
}
}
if ((mask_4x4_int_0 | mask_4x4_int_1) & 1) {
if ((mask_4x4_int_0 & mask_4x4_int_1) & 1) {
vpx_lpf_vertical_4_dual(s + 4, pitch, lfi0->mblim, lfi0->lim,
lfi0->hev_thr, lfi1->mblim, lfi1->lim,
lfi1->hev_thr);
} else if (mask_4x4_int_0 & 1) {
vpx_lpf_vertical_4(s + 4, pitch, lfi0->mblim, lfi0->lim,
lfi0->hev_thr);
if (mask_4x4_int & dual_one) {
if ((mask_4x4_int & dual_one) == dual_one) {
vpx_lpf_vertical_4_dual(ss[0] + 4, pitch, lfis[0]->mblim,
lfis[0]->lim, lfis[0]->hev_thr,
lfis[1]->mblim, lfis[1]->lim,
lfis[1]->hev_thr);
} else {
vpx_lpf_vertical_4(s + 8 * pitch + 4, pitch, lfi1->mblim, lfi1->lim,
lfi1->hev_thr);
const loop_filter_thresh *lfi = lfis[!(mask_4x4_int & 1)];
vpx_lpf_vertical_4(ss[!(mask_4x4_int & 1)] + 4, pitch, lfi->mblim,
lfi->lim, lfi->hev_thr);
}
}
}
s += 8;
ss[0] += 8;
lfl += 1;
mask_16x16_0 >>= 1;
mask_8x8_0 >>= 1;
mask_4x4_0 >>= 1;
mask_4x4_int_0 >>= 1;
mask_16x16_1 >>= 1;
mask_8x8_1 >>= 1;
mask_4x4_1 >>= 1;
mask_4x4_int_1 >>= 1;
mask_16x16 >>= 1;
mask_8x8 >>= 1;
mask_4x4 >>= 1;
mask_4x4_int >>= 1;
}
}
#if CONFIG_VP9_HIGHBITDEPTH
static void highbd_filter_selectively_vert_row2(int subsampling_factor,
uint16_t *s, int pitch,
unsigned int mask_16x16_l,
unsigned int mask_8x8_l,
unsigned int mask_4x4_l,
unsigned int mask_4x4_int_l,
const loop_filter_info_n *lfi_n,
unsigned int mask_16x16,
unsigned int mask_8x8,
unsigned int mask_4x4,
unsigned int mask_4x4_int,
const loop_filter_thresh *lfthr,
const uint8_t *lfl, int bd) {
const int mask_shift = subsampling_factor ? 4 : 8;
const int mask_cutoff = subsampling_factor ? 0xf : 0xff;
const int dual_mask_cutoff = subsampling_factor ? 0xff : 0xffff;
const int lfl_forward = subsampling_factor ? 4 : 8;
unsigned int mask_16x16_0 = mask_16x16_l & mask_cutoff;
unsigned int mask_8x8_0 = mask_8x8_l & mask_cutoff;
unsigned int mask_4x4_0 = mask_4x4_l & mask_cutoff;
unsigned int mask_4x4_int_0 = mask_4x4_int_l & mask_cutoff;
unsigned int mask_16x16_1 = (mask_16x16_l >> mask_shift) & mask_cutoff;
unsigned int mask_8x8_1 = (mask_8x8_l >> mask_shift) & mask_cutoff;
unsigned int mask_4x4_1 = (mask_4x4_l >> mask_shift) & mask_cutoff;
unsigned int mask_4x4_int_1 = (mask_4x4_int_l >> mask_shift) & mask_cutoff;
const unsigned int dual_one = 1 | (1 << lfl_forward);
unsigned int mask;
for (mask = mask_16x16_0 | mask_8x8_0 | mask_4x4_0 | mask_4x4_int_0 |
mask_16x16_1 | mask_8x8_1 | mask_4x4_1 | mask_4x4_int_1;
mask; mask >>= 1) {
const loop_filter_thresh *lfi0 = lfi_n->lfthr + *lfl;
const loop_filter_thresh *lfi1 = lfi_n->lfthr + *(lfl + lfl_forward);
if (mask & 1) {
if ((mask_16x16_0 | mask_16x16_1) & 1) {
if ((mask_16x16_0 & mask_16x16_1) & 1) {
vpx_highbd_lpf_vertical_16_dual(s, pitch, lfi0->mblim, lfi0->lim,
lfi0->hev_thr, bd);
} else if (mask_16x16_0 & 1) {
vpx_highbd_lpf_vertical_16(s, pitch, lfi0->mblim, lfi0->lim,
lfi0->hev_thr, bd);
uint16_t *ss[2];
ss[0] = s;
for (mask =
(mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int) & dual_mask_cutoff;
mask; mask = (mask & ~dual_one) >> 1) {
if (mask & dual_one) {
const loop_filter_thresh *lfis[2];
lfis[0] = lfthr + *lfl;
lfis[1] = lfthr + *(lfl + lfl_forward);
ss[1] = ss[0] + 8 * pitch;
if (mask_16x16 & dual_one) {
if ((mask_16x16 & dual_one) == dual_one) {
vpx_highbd_lpf_vertical_16_dual(ss[0], pitch, lfis[0]->mblim,
lfis[0]->lim, lfis[0]->hev_thr, bd);
} else {
vpx_highbd_lpf_vertical_16(s + 8 *pitch, pitch, lfi1->mblim,
lfi1->lim, lfi1->hev_thr, bd);
const loop_filter_thresh *lfi = lfis[!(mask_16x16 & 1)];
vpx_highbd_lpf_vertical_16(ss[!(mask_16x16 & 1)], pitch, lfi->mblim,
lfi->lim, lfi->hev_thr, bd);
}
}
if ((mask_8x8_0 | mask_8x8_1) & 1) {
if ((mask_8x8_0 & mask_8x8_1) & 1) {
vpx_highbd_lpf_vertical_8_dual(s, pitch, lfi0->mblim, lfi0->lim,
lfi0->hev_thr, lfi1->mblim, lfi1->lim,
lfi1->hev_thr, bd);
} else if (mask_8x8_0 & 1) {
vpx_highbd_lpf_vertical_8(s, pitch, lfi0->mblim, lfi0->lim,
lfi0->hev_thr, bd);
if (mask_8x8 & dual_one) {
if ((mask_8x8 & dual_one) == dual_one) {
vpx_highbd_lpf_vertical_8_dual(ss[0], pitch, lfis[0]->mblim,
lfis[0]->lim, lfis[0]->hev_thr,
lfis[1]->mblim, lfis[1]->lim,
lfis[1]->hev_thr, bd);
} else {
vpx_highbd_lpf_vertical_8(s + 8 * pitch, pitch, lfi1->mblim,
lfi1->lim, lfi1->hev_thr, bd);
const loop_filter_thresh *lfi = lfis[!(mask_8x8 & 1)];
vpx_highbd_lpf_vertical_8(ss[!(mask_8x8 & 1)], pitch, lfi->mblim,
lfi->lim, lfi->hev_thr, bd);
}
}
if ((mask_4x4_0 | mask_4x4_1) & 1) {
if ((mask_4x4_0 & mask_4x4_1) & 1) {
vpx_highbd_lpf_vertical_4_dual(s, pitch, lfi0->mblim, lfi0->lim,
lfi0->hev_thr, lfi1->mblim, lfi1->lim,
lfi1->hev_thr, bd);
} else if (mask_4x4_0 & 1) {
vpx_highbd_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim,
lfi0->hev_thr, bd);
if (mask_4x4 & dual_one) {
if ((mask_4x4 & dual_one) == dual_one) {
vpx_highbd_lpf_vertical_4_dual(ss[0], pitch, lfis[0]->mblim,
lfis[0]->lim, lfis[0]->hev_thr,
lfis[1]->mblim, lfis[1]->lim,
lfis[1]->hev_thr, bd);
} else {
vpx_highbd_lpf_vertical_4(s + 8 * pitch, pitch, lfi1->mblim,
lfi1->lim, lfi1->hev_thr, bd);
const loop_filter_thresh *lfi = lfis[!(mask_4x4 & 1)];
vpx_highbd_lpf_vertical_4(ss[!(mask_4x4 & 1)], pitch, lfi->mblim,
lfi->lim, lfi->hev_thr, bd);
}
}
if ((mask_4x4_int_0 | mask_4x4_int_1) & 1) {
if ((mask_4x4_int_0 & mask_4x4_int_1) & 1) {
vpx_highbd_lpf_vertical_4_dual(s + 4, pitch, lfi0->mblim, lfi0->lim,
lfi0->hev_thr, lfi1->mblim, lfi1->lim,
lfi1->hev_thr, bd);
} else if (mask_4x4_int_0 & 1) {
vpx_highbd_lpf_vertical_4(s + 4, pitch, lfi0->mblim, lfi0->lim,
lfi0->hev_thr, bd);
if (mask_4x4_int & dual_one) {
if ((mask_4x4_int & dual_one) == dual_one) {
vpx_highbd_lpf_vertical_4_dual(ss[0] + 4, pitch, lfis[0]->mblim,
lfis[0]->lim, lfis[0]->hev_thr,
lfis[1]->mblim, lfis[1]->lim,
lfis[1]->hev_thr, bd);
} else {
vpx_highbd_lpf_vertical_4(s + 8 * pitch + 4, pitch, lfi1->mblim,
lfi1->lim, lfi1->hev_thr, bd);
const loop_filter_thresh *lfi = lfis[!(mask_4x4_int & 1)];
vpx_highbd_lpf_vertical_4(ss[!(mask_4x4_int & 1)] + 4, pitch,
lfi->mblim, lfi->lim, lfi->hev_thr, bd);
}
}
}
s += 8;
ss[0] += 8;
lfl += 1;
mask_16x16_0 >>= 1;
mask_8x8_0 >>= 1;
mask_4x4_0 >>= 1;
mask_4x4_int_0 >>= 1;
mask_16x16_1 >>= 1;
mask_8x8_1 >>= 1;
mask_4x4_1 >>= 1;
mask_4x4_int_1 >>= 1;
mask_16x16 >>= 1;
mask_8x8 >>= 1;
mask_4x4 >>= 1;
mask_4x4_int >>= 1;
}
}
#endif // CONFIG_VP9_HIGHBITDEPTH
......@@ -497,17 +469,17 @@ static void filter_selectively_horiz(uint8_t *s, int pitch,
unsigned int mask_8x8,
unsigned int mask_4x4,
unsigned int mask_4x4_int,
const loop_filter_info_n *lfi_n,
const loop_filter_thresh *lfthr,
const uint8_t *lfl) {
unsigned int mask;
int count;
for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int;
mask; mask >>= count) {
const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl;
count = 1;
if (mask & 1) {
const loop_filter_thresh *lfi = lfthr + *lfl;
if (mask_16x16 & 1) {
if ((mask_16x16 & 3) == 3) {
vpx_lpf_horizontal_edge_16(s, pitch, lfi->mblim, lfi->lim,
......@@ -520,7 +492,7 @@ static void filter_selectively_horiz(uint8_t *s, int pitch,
} else if (mask_8x8 & 1) {
if ((mask_8x8 & 3) == 3) {
// Next block's thresholds.
const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);
const loop_filter_thresh *lfin = lfthr + *(lfl + 1);
vpx_lpf_horizontal_8_dual(s, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, lfin->mblim, lfin->lim,
......@@ -549,7 +521,7 @@ static void filter_selectively_horiz(uint8_t *s, int pitch,
} else if (mask_4x4 & 1) {
if ((mask_4x4 & 3) == 3) {
// Next block's thresholds.
const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);
const loop_filter_thresh *lfin = lfthr + *(lfl + 1);
vpx_lpf_horizontal_4_dual(s, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, lfin->mblim, lfin->lim,
......@@ -574,7 +546,7 @@ static void filter_selectively_horiz(uint8_t *s, int pitch,
vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr);
}
} else if (mask_4x4_int & 1) {
} else {
vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr);
}
......@@ -594,17 +566,17 @@ static void highbd_filter_selectively_horiz(uint16_t *s, int pitch,
unsigned int mask_8x8,
unsigned int mask_4x4,
unsigned int mask_4x4_int,
const loop_filter_info_n *lfi_n,
const loop_filter_thresh *lfthr,
const uint8_t *lfl, int bd) {
unsigned int mask;
int count;
for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int;
mask; mask >>= count) {
const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl;
count = 1;
if (mask & 1) {
const loop_filter_thresh *lfi = lfthr + *lfl;
if (mask_16x16 & 1) {
if ((mask_16x16 & 3) == 3) {
vpx_highbd_lpf_horizontal_edge_16(s, pitch, lfi->mblim, lfi->lim,
......@@ -617,7 +589,7 @@ static void highbd_filter_selectively_horiz(uint16_t *s, int pitch,
} else if (mask_8x8 & 1) {
if ((mask_8x8 & 3) == 3) {
// Next block's thresholds.
const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);
const loop_filter_thresh *lfin = lfthr + *(lfl + 1);
vpx_highbd_lpf_horizontal_8_dual(s, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, lfin->mblim, lfin->lim,
......@@ -650,7 +622,7 @@ static void highbd_filter_selectively_horiz(uint16_t *s, int pitch,
} else if (mask_4x4 & 1) {
if ((mask_4x4 & 3) == 3) {
// Next block's thresholds.
const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);
const loop_filter_thresh *lfin = lfthr + *(lfl + 1);
vpx_highbd_lpf_horizontal_4_dual(s, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, lfin->mblim, lfin->lim,
......@@ -679,7 +651,7 @@ static void highbd_filter_selectively_horiz(uint16_t *s, int pitch,
lfi->lim, lfi->hev_thr, bd);
}
}
} else if (mask_4x4_int & 1) {
} else {
vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, bd);
}
......@@ -1079,13 +1051,13 @@ static void filter_selectively_vert(uint8_t *s, int pitch,
unsigned int mask_8x8,
unsigned int mask_4x4,
unsigned int mask_4x4_int,
const loop_filter_info_n *lfi_n,
const loop_filter_thresh *lfthr,
const uint8_t *lfl) {
unsigned int mask;
for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int;
mask; mask >>= 1) {
const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl;
const loop_filter_thresh *lfi = lfthr + *lfl;
if (mask & 1) {
if (mask_16x16 & 1) {
......@@ -1113,13 +1085,13 @@ static void highbd_filter_selectively_vert(uint16_t *s, int pitch,
unsigned int mask_8x8,
unsigned int mask_4x4,
unsigned int mask_4x4_int,
const loop_filter_info_n *lfi_n,
const loop_filter_thresh *lfthr,
const uint8_t *lfl, int bd) {
unsigned int mask;
for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int;
mask; mask >>= 1) {
const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl;
const loop_filter_thresh *lfi = lfthr + *lfl;
if (mask & 1) {
if (mask_16x16 & 1) {
......@@ -1250,23 +1222,18 @@ void vp9_filter_block_plane_non420(VP9_COMMON *cm,
mask_8x8_c & border_mask,
mask_4x4_c & border_mask,
mask_4x4_int[r],
&cm->lf_info, &lfl[r << 3],
cm->lf_info.lfthr, &lfl[r << 3],
(int)cm->bit_depth);
} else {
#endif // CONFIG_VP9_HIGHBITDEPTH
filter_selectively_vert(dst->buf, dst->stride,
mask_16x16_c & border_mask,
mask_8x8_c & border_mask,
mask_4x4_c & border_mask,
mask_4x4_int[r],
&cm->lf_info, &lfl[r << 3]);
cm->lf_info.lfthr, &lfl[r << 3]);
#if CONFIG_VP9_HIGHBITDEPTH
}
#else
filter_selectively_vert(dst->buf, dst->stride,
mask_16x16_c & border_mask,
mask_8x8_c & border_mask,
mask_4x4_c & border_mask,
mask_4x4_int[r],
&cm->lf_info, &lfl[r << 3]);
#endif // CONFIG_VP9_HIGHBITDEPTH
dst->buf += 8 * dst->stride;
mi_8x8 += row_step_stride;
......@@ -1299,23 +1266,18 @@ void vp9_filter_block_plane_non420(VP9_COMMON *cm,
mask_8x8_r,
mask_4x4_r,
mask_4x4_int_r,
&cm->lf_info, &lfl[r << 3],
cm->lf_info.lfthr, &lfl[r << 3],
(int)cm->bit_depth);
} else {
#endif // CONFIG_VP9_HIGHBITDEPTH
filter_selectively_horiz(dst->buf, dst->stride,
mask_16x16_r,
mask_8x8_r,
mask_4x4_r,
mask_4x4_int_r,
&cm->lf_info, &lfl[r << 3]);
cm->lf_info.lfthr, &lfl[r << 3]);
#if CONFIG_VP9_HIGHBITDEPTH
}
#else
filter_selectively_horiz(dst->buf, dst->stride,
mask_16x16_r,
mask_8x8_r,
mask_4x4_r,
mask_4x4_int_r,
&cm->lf_info, &lfl[r << 3]);
#endif // CONFIG_VP9_HIGHBITDEPTH
dst->buf += 8 * dst->stride;
}
......@@ -1337,27 +1299,20 @@ void vp9_filter_block_plane_ss00(VP9_COMMON *const cm,
// Vertical pass: do 2 rows at one time
for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 2) {
unsigned int mask_16x16_l = mask_16x16 & 0xffff;
unsigned int mask_8x8_l = mask_8x8 & 0xffff;
unsigned int mask_4x4_l = mask_4x4 & 0xffff;
unsigned int mask_4x4_int_l = mask_4x4_int & 0xffff;
// Disable filtering on the leftmost column.
// Disable filtering on the leftmost column.
#if CONFIG_VP9_HIGHBITDEPTH
if (cm->use_highbitdepth) {
highbd_filter_selectively_vert_row2(
plane->subsampling_x, CONVERT_TO_SHORTPTR(dst->buf), dst->stride,
mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info,
mask_16x16, mask_8x8, mask_4x4, mask_4x4_int, cm->lf_info.lfthr,
&lfm->lfl_y[r << 3], (int)cm->bit_depth);
} else {
#endif // CONFIG_VP9_HIGHBITDEPTH
filter_selectively_vert_row2(
plane->subsampling_x, dst->buf, dst->stride, mask_16x16_l, mask_8x8_l,
mask_4x4_l, mask_4x4_int_l, &cm->lf_info, &lfm->lfl_y[r << 3]);
plane->subsampling_x, dst->buf, dst->stride, mask_16x16, mask_8x8,
mask_4x4, mask_4x4_int, cm->lf_info.lfthr, &lfm->lfl_y[r << 3]);
#if CONFIG_VP9_HIGHBITDEPTH
}
#else
filter_selectively_vert_row2(
plane->subsampling_x, dst->buf, dst->stride, mask_16x16_l, mask_8x8_l,
mask_4x4_l, mask_4x4_int_l, &cm->lf_info, &lfm->lfl_y[r << 3]);
#endif // CONFIG_VP9_HIGHBITDEPTH
dst->buf += 16 * dst->stride;
mask_16x16 >>= 16;
......@@ -1390,19 +1345,18 @@ void vp9_filter_block_plane_ss00(VP9_COMMON *const cm,
#if CONFIG_VP9_HIGHBITDEPTH
if (cm->use_highbitdepth) {
highbd_filter_selectively_horiz(
CONVERT_TO_SHORTPTR(dst->buf), dst->stride, mask_16x16_r, mask_8x8_r,
mask_4x4_r, mask_4x4_int & 0xff, &cm->lf_info, &lfm->lfl_y[r << 3],
(int)cm->bit_depth);
highbd_filter_selectively_horiz(CONVERT_TO_SHORTPTR(dst->buf),
dst->stride, mask_16x16_r, mask_8x8_r,
mask_4x4_r, mask_4x4_int & 0xff,
cm->lf_info.lfthr, &lfm->lfl_y[r << 3],
(int)cm->bit_depth);
} else {
#endif // CONFIG_VP9_HIGHBITDEPTH
filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r,
mask_4x4_r, mask_4x4_int & 0xff, &cm->lf_info,
&lfm->lfl_y[r << 3]);
mask_4x4_r, mask_4x4_int & 0xff,
cm->lf_info.lfthr, &lfm->lfl_y[r << 3]);
#if CONFIG_VP9_HIGHBITDEPTH
}
#else
filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r,
mask_4x4_r, mask_4x4_int & 0xff, &cm->lf_info,
&lfm->lfl_y[r << 3]);
#endif // CONFIG_VP9_HIGHBITDEPTH
dst->buf += 8 * dst->stride;
......@@ -1436,38 +1390,29 @@ void vp9_filter_block_plane_ss11(VP9_COMMON *const cm,
lfl_uv[((r + 2) << 1) + c] = lfm->lfl_y[((r + 2) << 3) + (c << 1)];
}
{
unsigned int mask_16x16_l = mask_16x16 & 0xff;
unsigned int mask_8x8_l = mask_8x8 & 0xff;
unsigned int mask_4x4_l = mask_4x4 & 0xff;
unsigned int mask_4x4_int_l = mask_4x4_int & 0xff;
// Disable filtering on the leftmost column.
// Disable filtering on the leftmost column.
#if CONFIG_VP9_HIGHBITDEPTH
if (cm->use_highbitdepth) {
highbd_filter_selectively_vert_row2(
plane->subsampling_x, CONVERT_TO_SHORTPTR(dst->buf), dst->stride,
mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info,
&lfl_uv[r << 1], (int)cm->bit_depth);
} else {
filter_selectively_vert_row2(
plane->subsampling_x, dst->buf, dst->stride,
mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info,
&lfl_uv[r << 1]);
}
#else
filter_selectively_vert_row2(
plane->subsampling_x, dst->buf, dst->stride,
mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info,
&lfl_uv[r << 1]);
if (cm->use_highbitdepth) {
highbd_filter_selectively_vert_row2(plane->subsampling_x,
CONVERT_TO_SHORTPTR(dst->buf),
dst->stride, mask_16x16, mask_8x8,
mask_4x4, mask_4x4_int,
cm->lf_info.lfthr, &lfl_uv[r << 1],
(int)cm->bit_depth);
} else {
#endif // CONFIG_VP9_HIGHBITDEPTH
dst->buf += 16 * dst->stride;
mask_16x16 >>= 8;
mask_8x8 >>= 8;
mask_4x4 >>= 8;
mask_4x4_int >>= 8;
filter_selectively_vert_row2(plane->subsampling_x, dst->buf, dst->stride,
mask_16x16, mask_8x8, mask_4x4, mask_4x4_int,
cm->lf_info.lfthr, &lfl_uv[r << 1]);
#if CONFIG_VP9_HIGHBITDEPTH
}
#endif // CONFIG_VP9_HIGHBITDEPTH
dst->buf += 16 * dst->stride;
mask_16x16 >>= 8;
mask_8x8 >>= 8;
mask_4x4 >>= 8;
mask_4x4_int >>= 8;
}