Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
What's new
7
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
Xiph.Org
aom-rav1e
Commits
f65473c0
Commit
f65473c0
authored
Jul 20, 2015
by
Yunqing Wang
Committed by
Gerrit Code Review
Jul 20, 2015
Browse files
Options
Browse Files
Download
Plain Diff
Merge "Migrate quantization functions from vp9/ to vpx_dsp/"
parents
b0e6811a
38f1fbbb
Changes
17
Hide whitespace changes
Inline
Side-by-side
Showing
17 changed files
with
904 additions
and
804 deletions
+904
-804
test/vp9_quantize_test.cc
test/vp9_quantize_test.cc
+1
-1
vp9/common/vp9_common.h
vp9/common/vp9_common.h
+0
-14
vp9/common/vp9_rtcd_defs.pl
vp9/common/vp9_rtcd_defs.pl
+0
-18
vp9/encoder/vp9_encodemb.c
vp9/encoder/vp9_encodemb.c
+1
-1
vp9/encoder/vp9_quantize.c
vp9/encoder/vp9_quantize.c
+1
-326
vp9/encoder/vp9_quantize.h
vp9/encoder/vp9_quantize.h
+0
-25
vp9/encoder/x86/vp9_quantize_sse2.c
vp9/encoder/x86/vp9_quantize_sse2.c
+0
-208
vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm
vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm
+0
-200
vp9/vp9cx.mk
vp9/vp9cx.mk
+0
-1
vpx_dsp/quantize.c
vpx_dsp/quantize.c
+337
-0
vpx_dsp/quantize.h
vpx_dsp/quantize.h
+51
-0
vpx_dsp/vpx_dsp.mk
vpx_dsp/vpx_dsp.mk
+15
-0
vpx_dsp/vpx_dsp_common.h
vpx_dsp/vpx_dsp_common.h
+14
-0
vpx_dsp/vpx_dsp_rtcd_defs.pl
vpx_dsp/vpx_dsp_rtcd_defs.pl
+42
-8
vpx_dsp/x86/highbd_quantize_intrin_sse2.c
vpx_dsp/x86/highbd_quantize_intrin_sse2.c
+3
-2
vpx_dsp/x86/quantize_sse2.c
vpx_dsp/x86/quantize_sse2.c
+223
-0
vpx_dsp/x86/quantize_ssse3_x86_64.asm
vpx_dsp/x86/quantize_ssse3_x86_64.asm
+216
-0
No files found.
test/vp9_quantize_test.cc
View file @
f65473c0
...
...
@@ -19,7 +19,7 @@
#include "test/register_state_check.h"
#include "test/util.h"
#include "./vpx_config.h"
#include "./vp
9
_rtcd.h"
#include "./vp
x_dsp
_rtcd.h"
#include "vp9/common/vp9_entropy.h"
#include "vp9/common/vp9_scan.h"
#include "vpx/vpx_codec.h"
...
...
vp9/common/vp9_common.h
View file @
f65473c0
...
...
@@ -56,20 +56,6 @@ static INLINE uint16_t clip_pixel_highbd(int val, int bd) {
return
(
uint16_t
)
clamp
(
val
,
0
,
4095
);
}
}
// Note:
// tran_low_t is the datatype used for final transform coefficients.
// tran_high_t is the datatype used for intermediate transform stages.
typedef
int64_t
tran_high_t
;
typedef
int32_t
tran_low_t
;
#else
// Note:
// tran_low_t is the datatype used for final transform coefficients.
// tran_high_t is the datatype used for intermediate transform stages.
typedef
int32_t
tran_high_t
;
typedef
int16_t
tran_low_t
;
#endif // CONFIG_VP9_HIGHBITDEPTH
#if CONFIG_DEBUG
...
...
vp9/common/vp9_rtcd_defs.pl
View file @
f65473c0
...
...
@@ -781,12 +781,6 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
add_proto
qw/void vp9_quantize_fp_32x32/
,
"
const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan
";
specialize
qw/vp9_quantize_fp_32x32/
;
add_proto
qw/void vp9_quantize_b/
,
"
const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan
";
specialize
qw/vp9_quantize_b/
;
add_proto
qw/void vp9_quantize_b_32x32/
,
"
const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan
";
specialize
qw/vp9_quantize_b_32x32/
;
add_proto
qw/void vp9_fdct8x8_quant/
,
"
const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan
";
specialize
qw/vp9_fdct8x8_quant/
;
}
else
{
...
...
@@ -802,12 +796,6 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
add_proto
qw/void vp9_quantize_fp_32x32/
,
"
const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan
";
specialize
qw/vp9_quantize_fp_32x32/
,
"
$ssse3_x86_64_x86inc
";
add_proto
qw/void vp9_quantize_b/
,
"
const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan
";
specialize
qw/vp9_quantize_b sse2/
,
"
$ssse3_x86_64_x86inc
";
add_proto
qw/void vp9_quantize_b_32x32/
,
"
const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan
";
specialize
qw/vp9_quantize_b_32x32/
,
"
$ssse3_x86_64_x86inc
";
add_proto
qw/void vp9_fdct8x8_quant/
,
"
const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan
";
specialize
qw/vp9_fdct8x8_quant sse2 ssse3 neon/
;
}
...
...
@@ -935,12 +923,6 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
add_proto
qw/void vp9_highbd_quantize_fp_32x32/
,
"
const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan
";
specialize
qw/vp9_highbd_quantize_fp_32x32/
;
add_proto
qw/void vp9_highbd_quantize_b/
,
"
const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan
";
specialize
qw/vp9_highbd_quantize_b sse2/
;
add_proto
qw/void vp9_highbd_quantize_b_32x32/
,
"
const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan
";
specialize
qw/vp9_highbd_quantize_b_32x32 sse2/
;
#
# Structured Similarity (SSIM)
#
...
...
vp9/encoder/vp9_encodemb.c
View file @
f65473c0
...
...
@@ -13,6 +13,7 @@
#include "./vpx_config.h"
#include "./vpx_dsp_rtcd.h"
#include "vpx_dsp/quantize.h"
#include "vpx_mem/vpx_mem.h"
#include "vpx_ports/mem.h"
...
...
@@ -23,7 +24,6 @@
#include "vp9/common/vp9_systemdependent.h"
#include "vp9/encoder/vp9_encodemb.h"
#include "vp9/encoder/vp9_quantize.h"
#include "vp9/encoder/vp9_rd.h"
#include "vp9/encoder/vp9_tokenize.h"
...
...
vp9/encoder/vp9_quantize.c
View file @
f65473c0
...
...
@@ -9,7 +9,7 @@
*/
#include <math.h>
#include "./vpx_dsp_rtcd.h"
#include "vpx_mem/vpx_mem.h"
#include "vpx_ports/mem.h"
...
...
@@ -20,113 +20,6 @@
#include "vp9/encoder/vp9_quantize.h"
#include "vp9/encoder/vp9_rd.h"
void
vp9_quantize_dc
(
const
tran_low_t
*
coeff_ptr
,
int
n_coeffs
,
int
skip_block
,
const
int16_t
*
round_ptr
,
const
int16_t
quant
,
tran_low_t
*
qcoeff_ptr
,
tran_low_t
*
dqcoeff_ptr
,
const
int16_t
dequant_ptr
,
uint16_t
*
eob_ptr
)
{
const
int
rc
=
0
;
const
int
coeff
=
coeff_ptr
[
rc
];
const
int
coeff_sign
=
(
coeff
>>
31
);
const
int
abs_coeff
=
(
coeff
^
coeff_sign
)
-
coeff_sign
;
int
tmp
,
eob
=
-
1
;
memset
(
qcoeff_ptr
,
0
,
n_coeffs
*
sizeof
(
*
qcoeff_ptr
));
memset
(
dqcoeff_ptr
,
0
,
n_coeffs
*
sizeof
(
*
dqcoeff_ptr
));
if
(
!
skip_block
)
{
tmp
=
clamp
(
abs_coeff
+
round_ptr
[
rc
!=
0
],
INT16_MIN
,
INT16_MAX
);
tmp
=
(
tmp
*
quant
)
>>
16
;
qcoeff_ptr
[
rc
]
=
(
tmp
^
coeff_sign
)
-
coeff_sign
;
dqcoeff_ptr
[
rc
]
=
qcoeff_ptr
[
rc
]
*
dequant_ptr
;
if
(
tmp
)
eob
=
0
;
}
*
eob_ptr
=
eob
+
1
;
}
#if CONFIG_VP9_HIGHBITDEPTH
void
vp9_highbd_quantize_dc
(
const
tran_low_t
*
coeff_ptr
,
int
n_coeffs
,
int
skip_block
,
const
int16_t
*
round_ptr
,
const
int16_t
quant
,
tran_low_t
*
qcoeff_ptr
,
tran_low_t
*
dqcoeff_ptr
,
const
int16_t
dequant_ptr
,
uint16_t
*
eob_ptr
)
{
int
eob
=
-
1
;
memset
(
qcoeff_ptr
,
0
,
n_coeffs
*
sizeof
(
*
qcoeff_ptr
));
memset
(
dqcoeff_ptr
,
0
,
n_coeffs
*
sizeof
(
*
dqcoeff_ptr
));
if
(
!
skip_block
)
{
const
int
coeff
=
coeff_ptr
[
0
];
const
int
coeff_sign
=
(
coeff
>>
31
);
const
int
abs_coeff
=
(
coeff
^
coeff_sign
)
-
coeff_sign
;
const
int64_t
tmp
=
abs_coeff
+
round_ptr
[
0
];
const
uint32_t
abs_qcoeff
=
(
uint32_t
)((
tmp
*
quant
)
>>
16
);
qcoeff_ptr
[
0
]
=
(
tran_low_t
)((
abs_qcoeff
^
coeff_sign
)
-
coeff_sign
);
dqcoeff_ptr
[
0
]
=
qcoeff_ptr
[
0
]
*
dequant_ptr
;
if
(
abs_qcoeff
)
eob
=
0
;
}
*
eob_ptr
=
eob
+
1
;
}
#endif
void
vp9_quantize_dc_32x32
(
const
tran_low_t
*
coeff_ptr
,
int
skip_block
,
const
int16_t
*
round_ptr
,
const
int16_t
quant
,
tran_low_t
*
qcoeff_ptr
,
tran_low_t
*
dqcoeff_ptr
,
const
int16_t
dequant_ptr
,
uint16_t
*
eob_ptr
)
{
const
int
n_coeffs
=
1024
;
const
int
rc
=
0
;
const
int
coeff
=
coeff_ptr
[
rc
];
const
int
coeff_sign
=
(
coeff
>>
31
);
const
int
abs_coeff
=
(
coeff
^
coeff_sign
)
-
coeff_sign
;
int
tmp
,
eob
=
-
1
;
memset
(
qcoeff_ptr
,
0
,
n_coeffs
*
sizeof
(
*
qcoeff_ptr
));
memset
(
dqcoeff_ptr
,
0
,
n_coeffs
*
sizeof
(
*
dqcoeff_ptr
));
if
(
!
skip_block
)
{
tmp
=
clamp
(
abs_coeff
+
ROUND_POWER_OF_TWO
(
round_ptr
[
rc
!=
0
],
1
),
INT16_MIN
,
INT16_MAX
);
tmp
=
(
tmp
*
quant
)
>>
15
;
qcoeff_ptr
[
rc
]
=
(
tmp
^
coeff_sign
)
-
coeff_sign
;
dqcoeff_ptr
[
rc
]
=
qcoeff_ptr
[
rc
]
*
dequant_ptr
/
2
;
if
(
tmp
)
eob
=
0
;
}
*
eob_ptr
=
eob
+
1
;
}
#if CONFIG_VP9_HIGHBITDEPTH
void
vp9_highbd_quantize_dc_32x32
(
const
tran_low_t
*
coeff_ptr
,
int
skip_block
,
const
int16_t
*
round_ptr
,
const
int16_t
quant
,
tran_low_t
*
qcoeff_ptr
,
tran_low_t
*
dqcoeff_ptr
,
const
int16_t
dequant_ptr
,
uint16_t
*
eob_ptr
)
{
const
int
n_coeffs
=
1024
;
int
eob
=
-
1
;
memset
(
qcoeff_ptr
,
0
,
n_coeffs
*
sizeof
(
*
qcoeff_ptr
));
memset
(
dqcoeff_ptr
,
0
,
n_coeffs
*
sizeof
(
*
dqcoeff_ptr
));
if
(
!
skip_block
)
{
const
int
coeff
=
coeff_ptr
[
0
];
const
int
coeff_sign
=
(
coeff
>>
31
);
const
int
abs_coeff
=
(
coeff
^
coeff_sign
)
-
coeff_sign
;
const
int64_t
tmp
=
abs_coeff
+
ROUND_POWER_OF_TWO
(
round_ptr
[
0
],
1
);
const
uint32_t
abs_qcoeff
=
(
uint32_t
)((
tmp
*
quant
)
>>
15
);
qcoeff_ptr
[
0
]
=
(
tran_low_t
)((
abs_qcoeff
^
coeff_sign
)
-
coeff_sign
);
dqcoeff_ptr
[
0
]
=
qcoeff_ptr
[
0
]
*
dequant_ptr
/
2
;
if
(
abs_qcoeff
)
eob
=
0
;
}
*
eob_ptr
=
eob
+
1
;
}
#endif
void
vp9_quantize_fp_c
(
const
tran_low_t
*
coeff_ptr
,
intptr_t
n_coeffs
,
int
skip_block
,
const
int16_t
*
zbin_ptr
,
const
int16_t
*
round_ptr
,
...
...
@@ -298,224 +191,6 @@ void vp9_highbd_quantize_fp_32x32_c(const tran_low_t *coeff_ptr,
}
#endif
void
vp9_quantize_b_c
(
const
tran_low_t
*
coeff_ptr
,
intptr_t
n_coeffs
,
int
skip_block
,
const
int16_t
*
zbin_ptr
,
const
int16_t
*
round_ptr
,
const
int16_t
*
quant_ptr
,
const
int16_t
*
quant_shift_ptr
,
tran_low_t
*
qcoeff_ptr
,
tran_low_t
*
dqcoeff_ptr
,
const
int16_t
*
dequant_ptr
,
uint16_t
*
eob_ptr
,
const
int16_t
*
scan
,
const
int16_t
*
iscan
)
{
int
i
,
non_zero_count
=
(
int
)
n_coeffs
,
eob
=
-
1
;
const
int
zbins
[
2
]
=
{
zbin_ptr
[
0
],
zbin_ptr
[
1
]};
const
int
nzbins
[
2
]
=
{
zbins
[
0
]
*
-
1
,
zbins
[
1
]
*
-
1
};
(
void
)
iscan
;
memset
(
qcoeff_ptr
,
0
,
n_coeffs
*
sizeof
(
*
qcoeff_ptr
));
memset
(
dqcoeff_ptr
,
0
,
n_coeffs
*
sizeof
(
*
dqcoeff_ptr
));
if
(
!
skip_block
)
{
// Pre-scan pass
for
(
i
=
(
int
)
n_coeffs
-
1
;
i
>=
0
;
i
--
)
{
const
int
rc
=
scan
[
i
];
const
int
coeff
=
coeff_ptr
[
rc
];
if
(
coeff
<
zbins
[
rc
!=
0
]
&&
coeff
>
nzbins
[
rc
!=
0
])
non_zero_count
--
;
else
break
;
}
// Quantization pass: All coefficients with index >= zero_flag are
// skippable. Note: zero_flag can be zero.
for
(
i
=
0
;
i
<
non_zero_count
;
i
++
)
{
const
int
rc
=
scan
[
i
];
const
int
coeff
=
coeff_ptr
[
rc
];
const
int
coeff_sign
=
(
coeff
>>
31
);
const
int
abs_coeff
=
(
coeff
^
coeff_sign
)
-
coeff_sign
;
if
(
abs_coeff
>=
zbins
[
rc
!=
0
])
{
int
tmp
=
clamp
(
abs_coeff
+
round_ptr
[
rc
!=
0
],
INT16_MIN
,
INT16_MAX
);
tmp
=
((((
tmp
*
quant_ptr
[
rc
!=
0
])
>>
16
)
+
tmp
)
*
quant_shift_ptr
[
rc
!=
0
])
>>
16
;
// quantization
qcoeff_ptr
[
rc
]
=
(
tmp
^
coeff_sign
)
-
coeff_sign
;
dqcoeff_ptr
[
rc
]
=
qcoeff_ptr
[
rc
]
*
dequant_ptr
[
rc
!=
0
];
if
(
tmp
)
eob
=
i
;
}
}
}
*
eob_ptr
=
eob
+
1
;
}
#if CONFIG_VP9_HIGHBITDEPTH
void
vp9_highbd_quantize_b_c
(
const
tran_low_t
*
coeff_ptr
,
intptr_t
n_coeffs
,
int
skip_block
,
const
int16_t
*
zbin_ptr
,
const
int16_t
*
round_ptr
,
const
int16_t
*
quant_ptr
,
const
int16_t
*
quant_shift_ptr
,
tran_low_t
*
qcoeff_ptr
,
tran_low_t
*
dqcoeff_ptr
,
const
int16_t
*
dequant_ptr
,
uint16_t
*
eob_ptr
,
const
int16_t
*
scan
,
const
int16_t
*
iscan
)
{
int
i
,
non_zero_count
=
(
int
)
n_coeffs
,
eob
=
-
1
;
const
int
zbins
[
2
]
=
{
zbin_ptr
[
0
],
zbin_ptr
[
1
]};
const
int
nzbins
[
2
]
=
{
zbins
[
0
]
*
-
1
,
zbins
[
1
]
*
-
1
};
(
void
)
iscan
;
memset
(
qcoeff_ptr
,
0
,
n_coeffs
*
sizeof
(
*
qcoeff_ptr
));
memset
(
dqcoeff_ptr
,
0
,
n_coeffs
*
sizeof
(
*
dqcoeff_ptr
));
if
(
!
skip_block
)
{
// Pre-scan pass
for
(
i
=
(
int
)
n_coeffs
-
1
;
i
>=
0
;
i
--
)
{
const
int
rc
=
scan
[
i
];
const
int
coeff
=
coeff_ptr
[
rc
];
if
(
coeff
<
zbins
[
rc
!=
0
]
&&
coeff
>
nzbins
[
rc
!=
0
])
non_zero_count
--
;
else
break
;
}
// Quantization pass: All coefficients with index >= zero_flag are
// skippable. Note: zero_flag can be zero.
for
(
i
=
0
;
i
<
non_zero_count
;
i
++
)
{
const
int
rc
=
scan
[
i
];
const
int
coeff
=
coeff_ptr
[
rc
];
const
int
coeff_sign
=
(
coeff
>>
31
);
const
int
abs_coeff
=
(
coeff
^
coeff_sign
)
-
coeff_sign
;
if
(
abs_coeff
>=
zbins
[
rc
!=
0
])
{
const
int64_t
tmp1
=
abs_coeff
+
round_ptr
[
rc
!=
0
];
const
int64_t
tmp2
=
((
tmp1
*
quant_ptr
[
rc
!=
0
])
>>
16
)
+
tmp1
;
const
uint32_t
abs_qcoeff
=
(
uint32_t
)((
tmp2
*
quant_shift_ptr
[
rc
!=
0
])
>>
16
);
qcoeff_ptr
[
rc
]
=
(
tran_low_t
)((
abs_qcoeff
^
coeff_sign
)
-
coeff_sign
);
dqcoeff_ptr
[
rc
]
=
qcoeff_ptr
[
rc
]
*
dequant_ptr
[
rc
!=
0
];
if
(
abs_qcoeff
)
eob
=
i
;
}
}
}
*
eob_ptr
=
eob
+
1
;
}
#endif
void
vp9_quantize_b_32x32_c
(
const
tran_low_t
*
coeff_ptr
,
intptr_t
n_coeffs
,
int
skip_block
,
const
int16_t
*
zbin_ptr
,
const
int16_t
*
round_ptr
,
const
int16_t
*
quant_ptr
,
const
int16_t
*
quant_shift_ptr
,
tran_low_t
*
qcoeff_ptr
,
tran_low_t
*
dqcoeff_ptr
,
const
int16_t
*
dequant_ptr
,
uint16_t
*
eob_ptr
,
const
int16_t
*
scan
,
const
int16_t
*
iscan
)
{
const
int
zbins
[
2
]
=
{
ROUND_POWER_OF_TWO
(
zbin_ptr
[
0
],
1
),
ROUND_POWER_OF_TWO
(
zbin_ptr
[
1
],
1
)};
const
int
nzbins
[
2
]
=
{
zbins
[
0
]
*
-
1
,
zbins
[
1
]
*
-
1
};
int
idx
=
0
;
int
idx_arr
[
1024
];
int
i
,
eob
=
-
1
;
(
void
)
iscan
;
memset
(
qcoeff_ptr
,
0
,
n_coeffs
*
sizeof
(
*
qcoeff_ptr
));
memset
(
dqcoeff_ptr
,
0
,
n_coeffs
*
sizeof
(
*
dqcoeff_ptr
));
if
(
!
skip_block
)
{
// Pre-scan pass
for
(
i
=
0
;
i
<
n_coeffs
;
i
++
)
{
const
int
rc
=
scan
[
i
];
const
int
coeff
=
coeff_ptr
[
rc
];
// If the coefficient is out of the base ZBIN range, keep it for
// quantization.
if
(
coeff
>=
zbins
[
rc
!=
0
]
||
coeff
<=
nzbins
[
rc
!=
0
])
idx_arr
[
idx
++
]
=
i
;
}
// Quantization pass: only process the coefficients selected in
// pre-scan pass. Note: idx can be zero.
for
(
i
=
0
;
i
<
idx
;
i
++
)
{
const
int
rc
=
scan
[
idx_arr
[
i
]];
const
int
coeff
=
coeff_ptr
[
rc
];
const
int
coeff_sign
=
(
coeff
>>
31
);
int
tmp
;
int
abs_coeff
=
(
coeff
^
coeff_sign
)
-
coeff_sign
;
abs_coeff
+=
ROUND_POWER_OF_TWO
(
round_ptr
[
rc
!=
0
],
1
);
abs_coeff
=
clamp
(
abs_coeff
,
INT16_MIN
,
INT16_MAX
);
tmp
=
((((
abs_coeff
*
quant_ptr
[
rc
!=
0
])
>>
16
)
+
abs_coeff
)
*
quant_shift_ptr
[
rc
!=
0
])
>>
15
;
qcoeff_ptr
[
rc
]
=
(
tmp
^
coeff_sign
)
-
coeff_sign
;
dqcoeff_ptr
[
rc
]
=
qcoeff_ptr
[
rc
]
*
dequant_ptr
[
rc
!=
0
]
/
2
;
if
(
tmp
)
eob
=
idx_arr
[
i
];
}
}
*
eob_ptr
=
eob
+
1
;
}
#if CONFIG_VP9_HIGHBITDEPTH
void
vp9_highbd_quantize_b_32x32_c
(
const
tran_low_t
*
coeff_ptr
,
intptr_t
n_coeffs
,
int
skip_block
,
const
int16_t
*
zbin_ptr
,
const
int16_t
*
round_ptr
,
const
int16_t
*
quant_ptr
,
const
int16_t
*
quant_shift_ptr
,
tran_low_t
*
qcoeff_ptr
,
tran_low_t
*
dqcoeff_ptr
,
const
int16_t
*
dequant_ptr
,
uint16_t
*
eob_ptr
,
const
int16_t
*
scan
,
const
int16_t
*
iscan
)
{
const
int
zbins
[
2
]
=
{
ROUND_POWER_OF_TWO
(
zbin_ptr
[
0
],
1
),
ROUND_POWER_OF_TWO
(
zbin_ptr
[
1
],
1
)};
const
int
nzbins
[
2
]
=
{
zbins
[
0
]
*
-
1
,
zbins
[
1
]
*
-
1
};
int
idx
=
0
;
int
idx_arr
[
1024
];
int
i
,
eob
=
-
1
;
(
void
)
iscan
;
memset
(
qcoeff_ptr
,
0
,
n_coeffs
*
sizeof
(
*
qcoeff_ptr
));
memset
(
dqcoeff_ptr
,
0
,
n_coeffs
*
sizeof
(
*
dqcoeff_ptr
));
if
(
!
skip_block
)
{
// Pre-scan pass
for
(
i
=
0
;
i
<
n_coeffs
;
i
++
)
{
const
int
rc
=
scan
[
i
];
const
int
coeff
=
coeff_ptr
[
rc
];
// If the coefficient is out of the base ZBIN range, keep it for
// quantization.
if
(
coeff
>=
zbins
[
rc
!=
0
]
||
coeff
<=
nzbins
[
rc
!=
0
])
idx_arr
[
idx
++
]
=
i
;
}
// Quantization pass: only process the coefficients selected in
// pre-scan pass. Note: idx can be zero.
for
(
i
=
0
;
i
<
idx
;
i
++
)
{
const
int
rc
=
scan
[
idx_arr
[
i
]];
const
int
coeff
=
coeff_ptr
[
rc
];
const
int
coeff_sign
=
(
coeff
>>
31
);
const
int
abs_coeff
=
(
coeff
^
coeff_sign
)
-
coeff_sign
;
const
int64_t
tmp1
=
abs_coeff
+
ROUND_POWER_OF_TWO
(
round_ptr
[
rc
!=
0
],
1
);
const
int64_t
tmp2
=
((
tmp1
*
quant_ptr
[
rc
!=
0
])
>>
16
)
+
tmp1
;
const
uint32_t
abs_qcoeff
=
(
uint32_t
)((
tmp2
*
quant_shift_ptr
[
rc
!=
0
])
>>
15
);
qcoeff_ptr
[
rc
]
=
(
tran_low_t
)((
abs_qcoeff
^
coeff_sign
)
-
coeff_sign
);
dqcoeff_ptr
[
rc
]
=
qcoeff_ptr
[
rc
]
*
dequant_ptr
[
rc
!=
0
]
/
2
;
if
(
abs_qcoeff
)
eob
=
idx_arr
[
i
];
}
}
*
eob_ptr
=
eob
+
1
;
}
#endif
void
vp9_regular_quantize_b_4x4
(
MACROBLOCK
*
x
,
int
plane
,
int
block
,
const
int16_t
*
scan
,
const
int16_t
*
iscan
)
{
MACROBLOCKD
*
const
xd
=
&
x
->
e_mbd
;
...
...
vp9/encoder/vp9_quantize.h
View file @
f65473c0
...
...
@@ -37,34 +37,9 @@ typedef struct {
DECLARE_ALIGNED
(
16
,
int16_t
,
uv_round
[
QINDEX_RANGE
][
8
]);
}
QUANTS
;
void
vp9_quantize_dc
(
const
tran_low_t
*
coeff_ptr
,
int
n_coeffs
,
int
skip_block
,
const
int16_t
*
round_ptr
,
const
int16_t
quant_ptr
,
tran_low_t
*
qcoeff_ptr
,
tran_low_t
*
dqcoeff_ptr
,
const
int16_t
dequant_ptr
,
uint16_t
*
eob_ptr
);
void
vp9_quantize_dc_32x32
(
const
tran_low_t
*
coeff_ptr
,
int
skip_block
,
const
int16_t
*
round_ptr
,
const
int16_t
quant_ptr
,
tran_low_t
*
qcoeff_ptr
,
tran_low_t
*
dqcoeff_ptr
,
const
int16_t
dequant_ptr
,
uint16_t
*
eob_ptr
);
void
vp9_regular_quantize_b_4x4
(
MACROBLOCK
*
x
,
int
plane
,
int
block
,
const
int16_t
*
scan
,
const
int16_t
*
iscan
);
#if CONFIG_VP9_HIGHBITDEPTH
void
vp9_highbd_quantize_dc
(
const
tran_low_t
*
coeff_ptr
,
int
n_coeffs
,
int
skip_block
,
const
int16_t
*
round_ptr
,
const
int16_t
quant_ptr
,
tran_low_t
*
qcoeff_ptr
,
tran_low_t
*
dqcoeff_ptr
,
const
int16_t
dequant_ptr
,
uint16_t
*
eob_ptr
);
void
vp9_highbd_quantize_dc_32x32
(
const
tran_low_t
*
coeff_ptr
,
int
skip_block
,
const
int16_t
*
round_ptr
,
const
int16_t
quant_ptr
,
tran_low_t
*
qcoeff_ptr
,
tran_low_t
*
dqcoeff_ptr
,
const
int16_t
dequant_ptr
,
uint16_t
*
eob_ptr
);
#endif
struct
VP9_COMP
;
struct
VP9Common
;
...
...
vp9/encoder/x86/vp9_quantize_sse2.c
View file @
f65473c0
...
...
@@ -14,214 +14,6 @@
#include "./vp9_rtcd.h"
#include "vpx/vpx_integer.h"
void
vp9_quantize_b_sse2
(
const
int16_t
*
coeff_ptr
,
intptr_t
n_coeffs
,
int
skip_block
,
const
int16_t
*
zbin_ptr
,
const
int16_t
*
round_ptr
,
const
int16_t
*
quant_ptr
,
const
int16_t
*
quant_shift_ptr
,
int16_t
*
qcoeff_ptr
,
int16_t
*
dqcoeff_ptr
,
const
int16_t
*
dequant_ptr
,
uint16_t
*
eob_ptr
,
const
int16_t
*
scan_ptr
,
const
int16_t
*
iscan_ptr
)
{
__m128i
zero
;
(
void
)
scan_ptr
;
coeff_ptr
+=
n_coeffs
;
iscan_ptr
+=
n_coeffs
;
qcoeff_ptr
+=
n_coeffs
;
dqcoeff_ptr
+=
n_coeffs
;
n_coeffs
=
-
n_coeffs
;
zero
=
_mm_setzero_si128
();
if
(
!
skip_block
)
{
__m128i
eob
;
__m128i
zbin
;
__m128i
round
,
quant
,
dequant
,
shift
;
{
__m128i
coeff0
,
coeff1
;
// Setup global values
{
__m128i
pw_1
;
zbin
=
_mm_load_si128
((
const
__m128i
*
)
zbin_ptr
);
round
=
_mm_load_si128
((
const
__m128i
*
)
round_ptr
);
quant
=
_mm_load_si128
((
const
__m128i
*
)
quant_ptr
);
pw_1
=
_mm_set1_epi16
(
1
);
zbin
=
_mm_sub_epi16
(
zbin
,
pw_1
);
dequant
=
_mm_load_si128
((
const
__m128i
*
)
dequant_ptr
);
shift
=
_mm_load_si128
((
const
__m128i
*
)
quant_shift_ptr
);
}
{
__m128i
coeff0_sign
,
coeff1_sign
;
__m128i
qcoeff0
,
qcoeff1
;
__m128i
qtmp0
,
qtmp1
;
__m128i
cmp_mask0
,
cmp_mask1
;
// Do DC and first 15 AC
coeff0
=
_mm_load_si128
((
const
__m128i
*
)(
coeff_ptr
+
n_coeffs
));
coeff1
=
_mm_load_si128
((
const
__m128i
*
)(
coeff_ptr
+
n_coeffs
)
+
1
);
// Poor man's sign extract
coeff0_sign
=
_mm_srai_epi16
(
coeff0
,
15
);
coeff1_sign
=
_mm_srai_epi16
(
coeff1
,
15
);
qcoeff0
=
_mm_xor_si128
(
coeff0
,
coeff0_sign
);
qcoeff1
=
_mm_xor_si128
(
coeff1
,
coeff1_sign
);
qcoeff0
=
_mm_sub_epi16
(
qcoeff0
,
coeff0_sign
);
qcoeff1
=
_mm_sub_epi16
(
qcoeff1
,
coeff1_sign
);
cmp_mask0
=
_mm_cmpgt_epi16
(
qcoeff0
,
zbin
);
zbin
=
_mm_unpackhi_epi64
(
zbin
,
zbin
);
// Switch DC to AC
cmp_mask1
=
_mm_cmpgt_epi16
(
qcoeff1
,
zbin
);
qcoeff0
=
_mm_adds_epi16
(
qcoeff0
,
round
);
round
=
_mm_unpackhi_epi64
(
round
,
round
);
qcoeff1
=
_mm_adds_epi16
(
qcoeff1
,
round
);
qtmp0
=
_mm_mulhi_epi16
(
qcoeff0
,
quant
);
quant
=
_mm_unpackhi_epi64
(
quant
,
quant
);
qtmp1
=
_mm_mulhi_epi16
(
qcoeff1
,
quant
);
qtmp0
=
_mm_add_epi16
(
qtmp0
,
qcoeff0
);
qtmp1
=
_mm_add_epi16
(
qtmp1
,
qcoeff1
);
qcoeff0
=
_mm_mulhi_epi16
(
qtmp0
,
shift
);
shift
=
_mm_unpackhi_epi64
(
shift
,
shift
);
qcoeff1
=
_mm_mulhi_epi16
(
qtmp1
,
shift
);
// Reinsert signs
qcoeff0
=
_mm_xor_si128
(
qcoeff0
,
coeff0_sign
);
qcoeff1
=
_mm_xor_si128
(
qcoeff1
,
coeff1_sign
);
qcoeff0
=
_mm_sub_epi16
(
qcoeff0
,
coeff0_sign
);
qcoeff1
=
_mm_sub_epi16
(
qcoeff1
,
coeff1_sign
);
// Mask out zbin threshold coeffs
qcoeff0
=
_mm_and_si128
(
qcoeff0
,
cmp_mask0
);
qcoeff1
=
_mm_and_si128
(
qcoeff1
,
cmp_mask1
);
_mm_store_si128
((
__m128i
*
)(
qcoeff_ptr
+
n_coeffs
),
qcoeff0
);
_mm_store_si128
((
__m128i
*
)(
qcoeff_ptr
+
n_coeffs
)
+
1
,
qcoeff1
);
coeff0
=
_mm_mullo_epi16
(
qcoeff0
,
dequant
);
dequant
=
_mm_unpackhi_epi64
(
dequant
,
dequant
);
coeff1
=
_mm_mullo_epi16
(
qcoeff1
,
dequant
);
_mm_store_si128
((
__m128i
*
)(
dqcoeff_ptr
+
n_coeffs
),
coeff0
);
_mm_store_si128
((
__m128i
*
)(
dqcoeff_ptr
+
n_coeffs
)
+
1
,
coeff1
);
}
{
// Scan for eob
__m128i
zero_coeff0
,
zero_coeff1
;
__m128i
nzero_coeff0
,
nzero_coeff1
;
__m128i
iscan0
,
iscan1
;
__m128i
eob1
;
zero_coeff0
=
_mm_cmpeq_epi16
(
coeff0
,
zero
);
zero_coeff1
=
_mm_cmpeq_epi16
(
coeff1
,
zero
);
nzero_coeff0
=
_mm_cmpeq_epi16
(
zero_coeff0
,
zero
);
nzero_coeff1
=
_mm_cmpeq_epi16
(
zero_coeff1
,
zero
);
iscan0
=
_mm_load_si128
((
const
__m128i
*
)(
iscan_ptr
+
n_coeffs
));
iscan1
=
_mm_load_si128
((
const
__m128i
*
)(
iscan_ptr
+
n_coeffs
)
+
1
);
// Add one to convert from indices to counts
iscan0
=
_mm_sub_epi16
(
iscan0
,
nzero_coeff0
);
iscan1
=
_mm_sub_epi16
(
iscan1
,
nzero_coeff1
);
eob
=
_mm_and_si128
(
iscan0
,
nzero_coeff0
);
eob1
=
_mm_and_si128
(
iscan1
,
nzero_coeff1
);
eob
=
_mm_max_epi16
(
eob
,
eob1
);
}
n_coeffs
+=
8
*
2
;
}
// AC only loop
while
(
n_coeffs
<
0
)
{
__m128i
coeff0
,
coeff1
;
{
__m128i
coeff0_sign
,
coeff1_sign
;
__m128i
qcoeff0
,
qcoeff1
;
__m128i
qtmp0
,
qtmp1
;
__m128i
cmp_mask0
,
cmp_mask1
;