Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Xiph.Org
aom-rav1e
Commits
bf0570a7
Commit
bf0570a7
authored
Feb 22, 2013
by
Yaowu Xu
Committed by
Gerrit Code Review
Feb 22, 2013
Browse files
Merge "optimize 8x8 fdct rounding for accuracy" into experimental
parents
8bf1c4dd
22012ee9
Changes
3
Hide whitespace changes
Inline
Side-by-side
test/fdct8x8_test.cc
View file @
bf0570a7
...
...
@@ -141,7 +141,7 @@ TEST(VP9Fdct8x8Test, ExtremalCheck) {
// Initialize a test block with input range {-255, 255}.
for
(
int
j
=
0
;
j
<
64
;
++
j
)
test_input_block
[
j
]
=
rnd
.
Rand8
()
%
2
?
255
:
-
25
5
;
test_input_block
[
j
]
=
rnd
.
Rand8
()
%
2
?
255
:
-
25
6
;
const
int
pitch
=
16
;
vp9_short_fdct8x8_c
(
test_input_block
,
test_temp_block
,
pitch
);
...
...
test/test.mk
View file @
bf0570a7
...
...
@@ -72,7 +72,7 @@ endif
LIBVPX_TEST_SRCS-$(CONFIG_VP9)
+=
convolve_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER)
+=
fdct4x4_test.cc
#
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct8x8_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER)
+=
fdct8x8_test.cc
#LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += dct16x16_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER)
+=
variance_test.cc
#LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += dct32x32_test.cc
...
...
vp9/encoder/vp9_dct.c
View file @
bf0570a7
...
...
@@ -323,247 +323,6 @@ static const int16_t adst_i16[256] = {
};
#endif
#define NEW_FDCT8x8 1
#if !NEW_FDCT8x8
static
const
int
xC1S7
=
16069
;
static
const
int
xC2S6
=
15137
;
static
const
int
xC3S5
=
13623
;
static
const
int
xC4S4
=
11585
;
static
const
int
xC5S3
=
9102
;
static
const
int
xC6S2
=
6270
;
static
const
int
xC7S1
=
3196
;
#define SHIFT_BITS 14
#define DOROUND(X) X += (1<<(SHIFT_BITS-1));
#define FINAL_SHIFT 3
#define FINAL_ROUNDING (1<<(FINAL_SHIFT -1))
#define IN_SHIFT (FINAL_SHIFT+1)
void
vp9_short_fdct8x8_c
(
short
*
InputData
,
short
*
OutputData
,
int
pitch
)
{
int
loop
;
int
short_pitch
=
pitch
>>
1
;
int
is07
,
is12
,
is34
,
is56
;
int
is0734
,
is1256
;
int
id07
,
id12
,
id34
,
id56
;
int
irot_input_x
,
irot_input_y
;
int
icommon_product1
;
// Re-used product (c4s4 * (s12 - s56))
int
icommon_product2
;
// Re-used product (c4s4 * (d12 + d56))
int
temp1
,
temp2
;
// intermediate variable for computation
int
InterData
[
64
];
int
*
ip
=
InterData
;
short
*
op
=
OutputData
;
for
(
loop
=
0
;
loop
<
8
;
loop
++
)
{
// Pre calculate some common sums and differences.
is07
=
(
InputData
[
0
]
+
InputData
[
7
])
<<
IN_SHIFT
;
is12
=
(
InputData
[
1
]
+
InputData
[
2
])
<<
IN_SHIFT
;
is34
=
(
InputData
[
3
]
+
InputData
[
4
])
<<
IN_SHIFT
;
is56
=
(
InputData
[
5
]
+
InputData
[
6
])
<<
IN_SHIFT
;
id07
=
(
InputData
[
0
]
-
InputData
[
7
])
<<
IN_SHIFT
;
id12
=
(
InputData
[
1
]
-
InputData
[
2
])
<<
IN_SHIFT
;
id34
=
(
InputData
[
3
]
-
InputData
[
4
])
<<
IN_SHIFT
;
id56
=
(
InputData
[
5
]
-
InputData
[
6
])
<<
IN_SHIFT
;
is0734
=
is07
+
is34
;
is1256
=
is12
+
is56
;
// Pre-Calculate some common product terms.
icommon_product1
=
xC4S4
*
(
is12
-
is56
);
DOROUND
(
icommon_product1
)
icommon_product1
>>=
SHIFT_BITS
;
icommon_product2
=
xC4S4
*
(
id12
+
id56
);
DOROUND
(
icommon_product2
)
icommon_product2
>>=
SHIFT_BITS
;
ip
[
0
]
=
(
xC4S4
*
(
is0734
+
is1256
));
DOROUND
(
ip
[
0
]);
ip
[
0
]
>>=
SHIFT_BITS
;
ip
[
4
]
=
(
xC4S4
*
(
is0734
-
is1256
));
DOROUND
(
ip
[
4
]);
ip
[
4
]
>>=
SHIFT_BITS
;
// Define inputs to rotation for outputs 2 and 6
irot_input_x
=
id12
-
id56
;
irot_input_y
=
is07
-
is34
;
// Apply rotation for outputs 2 and 6.
temp1
=
xC6S2
*
irot_input_x
;
DOROUND
(
temp1
);
temp1
>>=
SHIFT_BITS
;
temp2
=
xC2S6
*
irot_input_y
;
DOROUND
(
temp2
);
temp2
>>=
SHIFT_BITS
;
ip
[
2
]
=
temp1
+
temp2
;
temp1
=
xC6S2
*
irot_input_y
;
DOROUND
(
temp1
);
temp1
>>=
SHIFT_BITS
;
temp2
=
xC2S6
*
irot_input_x
;
DOROUND
(
temp2
);
temp2
>>=
SHIFT_BITS
;
ip
[
6
]
=
temp1
-
temp2
;
// Define inputs to rotation for outputs 1 and 7
irot_input_x
=
icommon_product1
+
id07
;
irot_input_y
=
-
(
id34
+
icommon_product2
);
// Apply rotation for outputs 1 and 7.
temp1
=
xC1S7
*
irot_input_x
;
DOROUND
(
temp1
);
temp1
>>=
SHIFT_BITS
;
temp2
=
xC7S1
*
irot_input_y
;
DOROUND
(
temp2
);
temp2
>>=
SHIFT_BITS
;
ip
[
1
]
=
temp1
-
temp2
;
temp1
=
xC7S1
*
irot_input_x
;
DOROUND
(
temp1
);
temp1
>>=
SHIFT_BITS
;
temp2
=
xC1S7
*
irot_input_y
;
DOROUND
(
temp2
);
temp2
>>=
SHIFT_BITS
;
ip
[
7
]
=
temp1
+
temp2
;
// Define inputs to rotation for outputs 3 and 5
irot_input_x
=
id07
-
icommon_product1
;
irot_input_y
=
id34
-
icommon_product2
;
// Apply rotation for outputs 3 and 5.
temp1
=
xC3S5
*
irot_input_x
;
DOROUND
(
temp1
);
temp1
>>=
SHIFT_BITS
;
temp2
=
xC5S3
*
irot_input_y
;
DOROUND
(
temp2
);
temp2
>>=
SHIFT_BITS
;
ip
[
3
]
=
temp1
-
temp2
;
temp1
=
xC5S3
*
irot_input_x
;
DOROUND
(
temp1
);
temp1
>>=
SHIFT_BITS
;
temp2
=
xC3S5
*
irot_input_y
;
DOROUND
(
temp2
);
temp2
>>=
SHIFT_BITS
;
ip
[
5
]
=
temp1
+
temp2
;
// Increment data pointer for next row
InputData
+=
short_pitch
;
ip
+=
8
;
}
// Performed DCT on rows, now transform the columns
ip
=
InterData
;
for
(
loop
=
0
;
loop
<
8
;
loop
++
)
{
// Pre calculate some common sums and differences.
is07
=
ip
[
0
*
8
]
+
ip
[
7
*
8
];
is12
=
ip
[
1
*
8
]
+
ip
[
2
*
8
];
is34
=
ip
[
3
*
8
]
+
ip
[
4
*
8
];
is56
=
ip
[
5
*
8
]
+
ip
[
6
*
8
];
id07
=
ip
[
0
*
8
]
-
ip
[
7
*
8
];
id12
=
ip
[
1
*
8
]
-
ip
[
2
*
8
];
id34
=
ip
[
3
*
8
]
-
ip
[
4
*
8
];
id56
=
ip
[
5
*
8
]
-
ip
[
6
*
8
];
is0734
=
is07
+
is34
;
is1256
=
is12
+
is56
;
// Pre-Calculate some common product terms
icommon_product1
=
xC4S4
*
(
is12
-
is56
);
icommon_product2
=
xC4S4
*
(
id12
+
id56
);
DOROUND
(
icommon_product1
)
DOROUND
(
icommon_product2
)
icommon_product1
>>=
SHIFT_BITS
;
icommon_product2
>>=
SHIFT_BITS
;
temp1
=
xC4S4
*
(
is0734
+
is1256
);
temp2
=
xC4S4
*
(
is0734
-
is1256
);
DOROUND
(
temp1
);
DOROUND
(
temp2
);
temp1
>>=
SHIFT_BITS
;
temp2
>>=
SHIFT_BITS
;
op
[
0
*
8
]
=
(
temp1
+
FINAL_ROUNDING
)
>>
FINAL_SHIFT
;
op
[
4
*
8
]
=
(
temp2
+
FINAL_ROUNDING
)
>>
FINAL_SHIFT
;
// Define inputs to rotation for outputs 2 and 6
irot_input_x
=
id12
-
id56
;
irot_input_y
=
is07
-
is34
;
// Apply rotation for outputs 2 and 6.
temp1
=
xC6S2
*
irot_input_x
;
DOROUND
(
temp1
);
temp1
>>=
SHIFT_BITS
;
temp2
=
xC2S6
*
irot_input_y
;
DOROUND
(
temp2
);
temp2
>>=
SHIFT_BITS
;
op
[
2
*
8
]
=
(
temp1
+
temp2
+
FINAL_ROUNDING
)
>>
FINAL_SHIFT
;
temp1
=
xC6S2
*
irot_input_y
;
DOROUND
(
temp1
);
temp1
>>=
SHIFT_BITS
;
temp2
=
xC2S6
*
irot_input_x
;
DOROUND
(
temp2
);
temp2
>>=
SHIFT_BITS
;
op
[
6
*
8
]
=
(
temp1
-
temp2
+
FINAL_ROUNDING
)
>>
FINAL_SHIFT
;
// Define inputs to rotation for outputs 1 and 7
irot_input_x
=
icommon_product1
+
id07
;
irot_input_y
=
-
(
id34
+
icommon_product2
);
// Apply rotation for outputs 1 and 7.
temp1
=
xC1S7
*
irot_input_x
;
DOROUND
(
temp1
);
temp1
>>=
SHIFT_BITS
;
temp2
=
xC7S1
*
irot_input_y
;
DOROUND
(
temp2
);
temp2
>>=
SHIFT_BITS
;
op
[
1
*
8
]
=
(
temp1
-
temp2
+
FINAL_ROUNDING
)
>>
FINAL_SHIFT
;
temp1
=
xC7S1
*
irot_input_x
;
DOROUND
(
temp1
);
temp1
>>=
SHIFT_BITS
;
temp2
=
xC1S7
*
irot_input_y
;
DOROUND
(
temp2
);
temp2
>>=
SHIFT_BITS
;
op
[
7
*
8
]
=
(
temp1
+
temp2
+
FINAL_ROUNDING
)
>>
FINAL_SHIFT
;
// Define inputs to rotation for outputs 3 and 5
irot_input_x
=
id07
-
icommon_product1
;
irot_input_y
=
id34
-
icommon_product2
;
// Apply rotation for outputs 3 and 5.
temp1
=
xC3S5
*
irot_input_x
;
DOROUND
(
temp1
);
temp1
>>=
SHIFT_BITS
;
temp2
=
xC5S3
*
irot_input_y
;
DOROUND
(
temp2
);
temp2
>>=
SHIFT_BITS
;
op
[
3
*
8
]
=
(
temp1
-
temp2
+
FINAL_ROUNDING
)
>>
FINAL_SHIFT
;
temp1
=
xC5S3
*
irot_input_x
;
DOROUND
(
temp1
);
temp1
>>=
SHIFT_BITS
;
temp2
=
xC3S5
*
irot_input_y
;
DOROUND
(
temp2
);
temp2
>>=
SHIFT_BITS
;
op
[
5
*
8
]
=
(
temp1
+
temp2
+
FINAL_ROUNDING
)
>>
FINAL_SHIFT
;
// Increment data pointer for next column.
ip
++
;
op
++
;
}
}
#endif
/* For test */
#define TEST_INT 1
#if TEST_INT
...
...
@@ -918,7 +677,6 @@ void vp9_short_fdct8x4_c(short *input, short *output, int pitch)
vp9_short_fdct4x4_c
(
input
+
4
,
output
+
16
,
pitch
);
}
#if NEW_FDCT8x8
static
void
fdct8_1d
(
int16_t
*
input
,
int16_t
*
output
)
{
int16_t
step
[
8
];
int
temp1
,
temp2
;
...
...
@@ -986,10 +744,9 @@ void vp9_short_fdct8x8_c(int16_t *input, int16_t *output, int pitch) {
temp_in
[
j
]
=
out
[
j
+
i
*
8
];
fdct8_1d
(
temp_in
,
temp_out
);
for
(
j
=
0
;
j
<
8
;
++
j
)
output
[
j
+
i
*
8
]
=
temp_out
[
j
]
>>
1
;
output
[
j
+
i
*
8
]
=
temp_out
[
j
]
/
2
;
}
}
#endif
#if CONFIG_INTHT
static
void
fadst8_1d
(
int16_t
*
input
,
int16_t
*
output
)
{
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment