Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Guillaume Martres
aom-rav1e
Commits
7acb2a87
Commit
7acb2a87
authored
Apr 01, 2015
by
Jingning Han
Committed by
Gerrit Code Review
Apr 01, 2015
Browse files
Merge "Refactor block_yrd function for RTC coding mode"
parents
a0043c6d
1470529f
Changes
4
Hide whitespace changes
Inline
Side-by-side
vp9/common/vp9_rtcd_defs.pl
View file @
7acb2a87
...
...
@@ -1171,6 +1171,9 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
add_proto
qw/int64_t vp9_block_error/
,
"
const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz
";
specialize
qw/vp9_block_error avx2/
,
"
$sse2_x86inc
";
add_proto
qw/int64_t vp9_block_error_fp/
,
"
const int16_t *coeff, const int16_t *dqcoeff, int block_size
";
specialize
qw/vp9_block_error_fp sse2/
;
add_proto
qw/void vp9_quantize_fp/
,
"
const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan
";
specialize
qw/vp9_quantize_fp neon sse2/
,
"
$ssse3_x86_64
";
...
...
vp9/encoder/vp9_pickmode.c
View file @
7acb2a87
...
...
@@ -557,6 +557,20 @@ static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE bsize,
*
out_dist_sum
+=
dist
<<
4
;
}
#if CONFIG_VP9_HIGHBITDEPTH
static
void
block_yrd
(
VP9_COMP
*
cpi
,
MACROBLOCK
*
x
,
int
*
rate
,
int64_t
*
dist
,
int
*
skippable
,
int64_t
*
sse
,
int
plane
,
BLOCK_SIZE
bsize
,
TX_SIZE
tx_size
)
{
MACROBLOCKD
*
xd
=
&
x
->
e_mbd
;
unsigned
int
var_y
,
sse_y
;
(
void
)
plane
;
(
void
)
tx_size
;
model_rd_for_sb_y
(
cpi
,
bsize
,
x
,
xd
,
rate
,
dist
,
&
var_y
,
&
sse_y
);
*
sse
=
INT_MAX
;
*
skippable
=
0
;
return
;
}
#else
static
void
block_yrd
(
VP9_COMP
*
cpi
,
MACROBLOCK
*
x
,
int
*
rate
,
int64_t
*
dist
,
int
*
skippable
,
int64_t
*
sse
,
int
plane
,
BLOCK_SIZE
bsize
,
TX_SIZE
tx_size
)
{
...
...
@@ -574,23 +588,9 @@ static void block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate, int64_t *dist,
const
int
max_blocks_high
=
num_4x4_h
+
(
xd
->
mb_to_bottom_edge
>=
0
?
0
:
xd
->
mb_to_bottom_edge
>>
(
5
+
pd
->
subsampling_y
));
#if CONFIG_VP9_HIGHBITDEPTH
unsigned
int
var_y
,
sse_y
;
model_rd_for_sb_y
(
cpi
,
bsize
,
x
,
xd
,
rate
,
dist
,
&
var_y
,
&
sse_y
);
*
sse
=
INT_MAX
;
*
skippable
=
0
;
return
;
#else
(
void
)
cpi
;
#endif
vp9_subtract_plane
(
x
,
bsize
,
plane
);
*
skippable
=
1
;
*
rate
=
0
;
*
dist
=
0
;
*
sse
=
0
;
// Keep track of the row and column of the blocks we use so that we know
// if we are in the unrestricted motion border.
for
(
r
=
0
;
r
<
max_blocks_high
;
r
+=
block_step
)
{
...
...
@@ -604,7 +604,6 @@ static void block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate, int64_t *dist,
const
int
diff_stride
=
4
*
num_4x4_blocks_wide_lookup
[
bsize
];
int
i
,
j
;
const
int16_t
*
src_diff
;
int64_t
this_sse
;
txfrm_block_to_raster_xy
(
bsize
,
tx_size
,
block
,
&
i
,
&
j
);
src_diff
=
&
p
->
src_diff
[
4
*
(
j
*
diff_stride
+
i
)];
...
...
@@ -641,16 +640,36 @@ static void block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate, int64_t *dist,
assert
(
0
);
break
;
}
*
skippable
&=
(
*
eob
==
0
);
}
block
+=
step
;
}
}
if
(
*
skippable
&&
*
sse
<
INT64_MAX
)
{
*
dist
=
(
*
sse
<<
6
)
>>
shift
;
*
sse
=
*
dist
;
return
;
}
*
dist
+=
vp9_block_error
(
coeff
,
dqcoeff
,
step
<<
4
,
&
this_sse
)
>>
shift
;
block
=
0
;
*
rate
=
0
;
*
dist
=
0
;
*
sse
=
(
*
sse
<<
6
)
>>
shift
;
for
(
r
=
0
;
r
<
max_blocks_high
;
r
+=
block_step
)
{
for
(
c
=
0
;
c
<
num_4x4_w
;
c
+=
block_step
)
{
if
(
c
<
max_blocks_wide
)
{
tran_low_t
*
const
coeff
=
BLOCK_OFFSET
(
p
->
coeff
,
block
);
tran_low_t
*
const
qcoeff
=
BLOCK_OFFSET
(
p
->
qcoeff
,
block
);
tran_low_t
*
const
dqcoeff
=
BLOCK_OFFSET
(
pd
->
dqcoeff
,
block
);
uint16_t
*
const
eob
=
&
p
->
eobs
[
block
];
if
(
*
eob
==
1
)
*
rate
+=
(
int
)
abs
(
qcoeff
[
0
]);
else
if
(
*
eob
>
1
)
*
rate
+=
(
int
)
vp9_satd
((
const
int16_t
*
)
qcoeff
,
step
<<
4
);
*
sse
+=
(
this_sse
>>
shift
);
*
skippable
&=
(
*
eob
==
0
);
*
dist
+=
vp9_block_error_fp
(
coeff
,
dqcoeff
,
step
<<
4
)
>>
shift
;
}
block
+=
step
;
}
...
...
@@ -659,6 +678,7 @@ static void block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate, int64_t *dist,
*
rate
<<=
8
;
*
rate
*=
6
;
}
#endif
static
void
model_rd_for_sb_uv
(
VP9_COMP
*
cpi
,
BLOCK_SIZE
bsize
,
MACROBLOCK
*
x
,
MACROBLOCKD
*
xd
,
...
...
@@ -866,7 +886,7 @@ static void estimate_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
int
i
,
j
;
int
rate
;
int64_t
dist
;
int64_t
this_sse
;
int64_t
this_sse
=
INT64_MAX
;
int
is_skippable
;
txfrm_block_to_raster_xy
(
plane_bsize
,
tx_size
,
block
,
&
i
,
&
j
);
...
...
@@ -1328,6 +1348,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
}
if
(
bsize
<=
BLOCK_16X16
)
{
this_sse
=
(
int64_t
)
sse_y
;
block_yrd
(
cpi
,
x
,
&
this_rdc
.
rate
,
&
this_rdc
.
dist
,
&
is_skippable
,
&
this_sse
,
0
,
bsize
,
mbmi
->
tx_size
);
x
->
skip_txfm
[
0
]
=
is_skippable
;
...
...
vp9/encoder/vp9_rdopt.c
View file @
7acb2a87
...
...
@@ -292,6 +292,18 @@ int64_t vp9_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff,
return
error
;
}
int64_t
vp9_block_error_fp_c
(
const
int16_t
*
coeff
,
const
int16_t
*
dqcoeff
,
int
block_size
)
{
int
i
;
int64_t
error
=
0
;
for
(
i
=
0
;
i
<
block_size
;
i
++
)
{
const
int
diff
=
coeff
[
i
]
-
dqcoeff
[
i
];
error
+=
diff
*
diff
;
}
return
error
;
}
#if CONFIG_VP9_HIGHBITDEPTH
int64_t
vp9_highbd_block_error_c
(
const
tran_low_t
*
coeff
,
...
...
vp9/encoder/x86/vp9_error_sse2.asm
View file @
7acb2a87
...
...
@@ -72,3 +72,49 @@ cglobal block_error, 3, 3, 8, uqc, dqc, size, ssz
movd
edx
,
m5
%endif
RET
; Compute the sum of squared difference between two int16_t vectors.
; int64_t vp9_block_error_fp(int16_t *coeff, int16_t *dqcoeff,
; intptr_t block_size)
INIT_XMM
ss
e2
cglobal
bl
ock_error_fp
,
3
,
3
,
8
,
uqc
,
dqc
,
si
ze
pxor
m4
,
m4
; sse accumulator
pxor
m5
,
m5
; dedicated zero register
lea
uqcq
,
[
uqcq
+
si
zeq
*
2
]
lea
dqcq
,
[
dqcq
+
si
zeq
*
2
]
neg
si
zeq
.loop:
mova
m2
,
[
uqcq
+
si
zeq
*
2
]
mova
m0
,
[
dqcq
+
si
zeq
*
2
]
mova
m3
,
[
uqcq
+
si
zeq
*
2
+
mmsize
]
mova
m1
,
[
dqcq
+
si
zeq
*
2
+
mmsize
]
psubw
m0
,
m2
psubw
m1
,
m3
; individual errors are max. 15bit+sign, so squares are 30bit, and
; thus the sum of 2 should fit in a 31bit integer (+ unused sign bit)
pmaddwd
m0
,
m0
pmaddwd
m1
,
m1
; accumulate in 64bit
punpckldq
m7
,
m0
,
m5
punpckhdq
m0
,
m5
paddq
m4
,
m7
punpckldq
m7
,
m1
,
m5
paddq
m4
,
m0
punpckhdq
m1
,
m5
paddq
m4
,
m7
paddq
m4
,
m1
add
si
zeq
,
mmsize
jl
.loop
; accumulate horizontally and store in return value
movhlps
m5
,
m4
paddq
m4
,
m5
%if ARCH_X86_64
movq
rax
,
m4
%else
pshufd
m5
,
m4
,
0x1
movd
eax
,
m4
movd
edx
,
m5
%endif
RET
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment