Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Xiph.Org
aom-rav1e
Commits
70884269
Commit
70884269
authored
Jun 18, 2013
by
Jingning Han
Committed by
Gerrit Code Review
Jun 18, 2013
Browse files
Merge "Make fdct32 computation flow within 16bit range"
parents
f231a3ed
a41a4860
Changes
6
Hide whitespace changes
Inline
Side-by-side
vp9/common/vp9_idct.h
View file @
70884269
...
...
@@ -71,12 +71,6 @@ static INLINE int dct_const_round_shift(int input) {
return
rv
;
}
static
INLINE
int
dct_32_round
(
int
input
)
{
int
rv
=
ROUND_POWER_OF_TWO
(
input
,
DCT_CONST_BITS
);
assert
(
-
131072
<=
rv
&&
rv
<=
131071
);
return
rv
;
}
typedef
void
(
*
transform_1d
)(
int16_t
*
,
int16_t
*
);
typedef
struct
{
...
...
vp9/common/vp9_rtcd_defs.sh
View file @
70884269
...
...
@@ -572,6 +572,9 @@ specialize vp9_short_fdct8x4 sse2
prototype void vp9_short_fdct32x32
"int16_t *InputData, int16_t *OutputData, int pitch"
specialize vp9_short_fdct32x32
prototype void vp9_short_fdct32x32_rd
"int16_t *InputData, int16_t *OutputData, int pitch"
specialize vp9_short_fdct32x32_rd
prototype void vp9_short_fdct16x16
"int16_t *InputData, int16_t *OutputData, int pitch"
specialize vp9_short_fdct16x16 sse2
...
...
vp9/encoder/vp9_block.h
View file @
70884269
...
...
@@ -139,6 +139,9 @@ struct macroblock {
int
optimize
;
// indicate if it is in the rd search loop or encoding process
int
rd_search
;
// TODO(jingning): Need to refactor the structure arrays that buffers the
// coding mode decisions of each partition type.
PICK_MODE_CONTEXT
ab4x4_context
[
4
][
4
][
4
];
...
...
vp9/encoder/vp9_dct.c
View file @
70884269
...
...
@@ -991,8 +991,18 @@ void vp9_short_fht16x16_c(int16_t *input, int16_t *output,
}
}
static
INLINE
int
dct_32_round
(
int
input
)
{
int
rv
=
ROUND_POWER_OF_TWO
(
input
,
DCT_CONST_BITS
);
assert
(
-
131072
<=
rv
&&
rv
<=
131071
);
return
rv
;
}
static
INLINE
int
half_round_shift
(
int
input
)
{
int
rv
=
(
input
+
1
+
(
input
<
0
))
>>
2
;
return
rv
;
}
static
void
dct32_1d
(
int
*
input
,
int
*
output
)
{
static
void
dct32_1d
(
int
*
input
,
int
*
output
,
int
round
)
{
int
step
[
32
];
// Stage 1
step
[
0
]
=
input
[
0
]
+
input
[(
32
-
1
)];
...
...
@@ -1101,6 +1111,44 @@ static void dct32_1d(int *input, int *output) {
step
[
30
]
=
output
[
30
]
+
output
[
25
];
step
[
31
]
=
output
[
31
]
+
output
[
24
];
// dump the magnitude by half, hence the intermediate values are within 1108
// the range of 16 bits.
if
(
round
)
{
step
[
0
]
=
half_round_shift
(
step
[
0
]);
step
[
1
]
=
half_round_shift
(
step
[
1
]);
step
[
2
]
=
half_round_shift
(
step
[
2
]);
step
[
3
]
=
half_round_shift
(
step
[
3
]);
step
[
4
]
=
half_round_shift
(
step
[
4
]);
step
[
5
]
=
half_round_shift
(
step
[
5
]);
step
[
6
]
=
half_round_shift
(
step
[
6
]);
step
[
7
]
=
half_round_shift
(
step
[
7
]);
step
[
8
]
=
half_round_shift
(
step
[
8
]);
step
[
9
]
=
half_round_shift
(
step
[
9
]);
step
[
10
]
=
half_round_shift
(
step
[
10
]);
step
[
11
]
=
half_round_shift
(
step
[
11
]);
step
[
12
]
=
half_round_shift
(
step
[
12
]);
step
[
13
]
=
half_round_shift
(
step
[
13
]);
step
[
14
]
=
half_round_shift
(
step
[
14
]);
step
[
15
]
=
half_round_shift
(
step
[
15
]);
step
[
16
]
=
half_round_shift
(
step
[
16
]);
step
[
17
]
=
half_round_shift
(
step
[
17
]);
step
[
18
]
=
half_round_shift
(
step
[
18
]);
step
[
19
]
=
half_round_shift
(
step
[
19
]);
step
[
20
]
=
half_round_shift
(
step
[
20
]);
step
[
21
]
=
half_round_shift
(
step
[
21
]);
step
[
22
]
=
half_round_shift
(
step
[
22
]);
step
[
23
]
=
half_round_shift
(
step
[
23
]);
step
[
24
]
=
half_round_shift
(
step
[
24
]);
step
[
25
]
=
half_round_shift
(
step
[
25
]);
step
[
26
]
=
half_round_shift
(
step
[
26
]);
step
[
27
]
=
half_round_shift
(
step
[
27
]);
step
[
28
]
=
half_round_shift
(
step
[
28
]);
step
[
29
]
=
half_round_shift
(
step
[
29
]);
step
[
30
]
=
half_round_shift
(
step
[
30
]);
step
[
31
]
=
half_round_shift
(
step
[
31
]);
}
// Stage 4
output
[
0
]
=
step
[
0
]
+
step
[
3
];
output
[
1
]
=
step
[
1
]
+
step
[
2
];
...
...
@@ -1283,12 +1331,12 @@ void vp9_short_fdct32x32_c(int16_t *input, int16_t *out, int pitch) {
int
output
[
32
*
32
];
// Columns
for
(
i
=
0
;
i
<
32
;
i
++
)
{
for
(
i
=
0
;
i
<
32
;
++
i
)
{
int
temp_in
[
32
],
temp_out
[
32
];
for
(
j
=
0
;
j
<
32
;
j
++
)
for
(
j
=
0
;
j
<
32
;
++
j
)
temp_in
[
j
]
=
input
[
j
*
shortpitch
+
i
]
<<
2
;
dct32_1d
(
temp_in
,
temp_out
);
for
(
j
=
0
;
j
<
32
;
j
++
)
dct32_1d
(
temp_in
,
temp_out
,
0
);
for
(
j
=
0
;
j
<
32
;
++
j
)
output
[
j
*
32
+
i
]
=
(
temp_out
[
j
]
+
1
+
(
temp_out
[
j
]
>
0
))
>>
2
;
}
...
...
@@ -1297,8 +1345,37 @@ void vp9_short_fdct32x32_c(int16_t *input, int16_t *out, int pitch) {
int
temp_in
[
32
],
temp_out
[
32
];
for
(
j
=
0
;
j
<
32
;
++
j
)
temp_in
[
j
]
=
output
[
j
+
i
*
32
];
dct32_1d
(
temp_in
,
temp_out
);
dct32_1d
(
temp_in
,
temp_out
,
0
);
for
(
j
=
0
;
j
<
32
;
++
j
)
out
[
j
+
i
*
32
]
=
(
temp_out
[
j
]
+
1
+
(
temp_out
[
j
]
<
0
))
>>
2
;
}
}
// Note that although we use dct_32_round in dct32_1d computation flow,
// this 2d fdct32x32 for rate-distortion optimization loop is operating
// within 16 bits precision.
void
vp9_short_fdct32x32_rd_c
(
int16_t
*
input
,
int16_t
*
out
,
int
pitch
)
{
int
shortpitch
=
pitch
>>
1
;
int
i
,
j
;
int
output
[
32
*
32
];
// Columns
for
(
i
=
0
;
i
<
32
;
++
i
)
{
int
temp_in
[
32
],
temp_out
[
32
];
for
(
j
=
0
;
j
<
32
;
++
j
)
temp_in
[
j
]
=
input
[
j
*
shortpitch
+
i
]
<<
2
;
dct32_1d
(
temp_in
,
temp_out
,
0
);
for
(
j
=
0
;
j
<
32
;
++
j
)
output
[
j
*
32
+
i
]
=
(
temp_out
[
j
]
+
1
+
(
temp_out
[
j
]
>
0
))
>>
2
;
}
// Rows
for
(
i
=
0
;
i
<
32
;
++
i
)
{
int
temp_in
[
32
],
temp_out
[
32
];
for
(
j
=
0
;
j
<
32
;
++
j
)
temp_in
[
j
]
=
output
[
j
+
i
*
32
];
dct32_1d
(
temp_in
,
temp_out
,
1
);
for
(
j
=
0
;
j
<
32
;
++
j
)
out
[
j
+
i
*
32
]
=
temp_out
[
j
];
}
}
vp9/encoder/vp9_encodeframe.c
View file @
70884269
...
...
@@ -602,6 +602,8 @@ static void pick_sb_modes(VP9_COMP *cpi, int mi_row, int mi_col,
MACROBLOCK
*
const
x
=
&
cpi
->
mb
;
MACROBLOCKD
*
const
xd
=
&
x
->
e_mbd
;
x
->
rd_search
=
1
;
if
(
bsize
<
BLOCK_SIZE_SB8X8
)
if
(
xd
->
ab_index
!=
0
)
return
;
...
...
@@ -1974,6 +1976,7 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t,
const
int
mis
=
cm
->
mode_info_stride
;
const
int
bwl
=
mi_width_log2
(
bsize
);
const
int
bw
=
1
<<
bwl
,
bh
=
1
<<
mi_height_log2
(
bsize
);
x
->
rd_search
=
0
;
if
(
cm
->
frame_type
==
KEY_FRAME
)
{
if
(
cpi
->
oxcf
.
tuning
==
VP8_TUNE_SSIM
)
{
...
...
vp9/encoder/vp9_encodemb.c
View file @
70884269
...
...
@@ -462,7 +462,10 @@ static void xform_quant(int plane, int block, BLOCK_SIZE_TYPE bsize,
switch
(
ss_txfrm_size
/
2
)
{
case
TX_32X32
:
vp9_short_fdct32x32
(
src_diff
,
coeff
,
bw
*
2
);
if
(
x
->
rd_search
)
vp9_short_fdct32x32_rd
(
src_diff
,
coeff
,
bw
*
2
);
else
vp9_short_fdct32x32
(
src_diff
,
coeff
,
bw
*
2
);
break
;
case
TX_16X16
:
tx_type
=
plane
==
0
?
get_tx_type_16x16
(
xd
,
raster_block
)
:
DCT_DCT
;
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment