Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
What's new
10
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
Xiph.Org
aom-rav1e
Commits
f9c01c7b
Commit
f9c01c7b
authored
Jul 20, 2016
by
Yi Luo
Committed by
Gerrit Code Review
Jul 20, 2016
Browse files
Options
Browse Files
Download
Plain Diff
Merge "HBD fast path quantization speed improvement" into nextgenv2
parents
c03268b4
b2663a8a
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
433 additions
and
1 deletion
+433
-1
test/test.mk
test/test.mk
+1
-0
test/vp10_quantize_test.cc
test/vp10_quantize_test.cc
+230
-0
vp10/common/vp10_rtcd_defs.pl
vp10/common/vp10_rtcd_defs.pl
+1
-1
vp10/encoder/x86/vp10_highbd_quantize_sse4.c
vp10/encoder/x86/vp10_highbd_quantize_sse4.c
+200
-0
vp10/vp10cx.mk
vp10/vp10cx.mk
+1
-0
No files found.
test/test.mk
View file @
f9c01c7b
...
...
@@ -198,6 +198,7 @@ LIBVPX_TEST_SRCS-$(CONFIG_VP10_ENCODER) += obmc_variance_test.cc
endif
ifeq
($(CONFIG_VP9_HIGHBITDEPTH),yes)
LIBVPX_TEST_SRCS-$(HAVE_SSE4_1)
+=
vp10_quantize_test.cc
LIBVPX_TEST_SRCS-$(HAVE_SSE4_1)
+=
vp10_highbd_iht_test.cc
endif
# CONFIG_VP9_HIGHBITDEPTH
endif
# VP10
...
...
test/vp10_quantize_test.cc
0 → 100644
View file @
f9c01c7b
/*
* Copyright (c) 2016 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <stdlib.h>
#include "third_party/googletest/src/include/gtest/gtest.h"
#include "./vpx_config.h"
#include "./vp10_rtcd.h"
#include "test/acm_random.h"
#include "test/clear_system_state.h"
#include "test/register_state_check.h"
#include "vp10/common/scan.h"
namespace
{
typedef
void
(
*
QuantizeFpFunc
)(
const
tran_low_t
*
coeff_ptr
,
intptr_t
count
,
int
skip_block
,
const
int16_t
*
zbin_ptr
,
const
int16_t
*
round_ptr
,
const
int16_t
*
quant_ptr
,
const
int16_t
*
quant_shift_ptr
,
tran_low_t
*
qcoeff_ptr
,
tran_low_t
*
dqcoeff_ptr
,
const
int16_t
*
dequant_ptr
,
uint16_t
*
eob_ptr
,
const
int16_t
*
scan
,
const
int16_t
*
iscan
,
const
int
log_scale
);
struct
QuantizeFuncParams
{
QuantizeFuncParams
(
QuantizeFpFunc
qF
=
NULL
,
QuantizeFpFunc
qRefF
=
NULL
,
int
count
=
16
)
:
qFunc
(
qF
),
qFuncRef
(
qRefF
),
coeffCount
(
count
)
{}
QuantizeFpFunc
qFunc
;
QuantizeFpFunc
qFuncRef
;
int
coeffCount
;
};
using
libvpx_test
::
ACMRandom
;
const
int
numTests
=
1000
;
const
int
maxSize
=
1024
;
const
int
roundFactorRange
=
127
;
const
int
dequantRange
=
32768
;
const
int
coeffRange
=
(
1
<<
20
)
-
1
;
class
VP10QuantizeTest
:
public
::
testing
::
TestWithParam
<
QuantizeFuncParams
>
{
public:
void
RunQuantizeTest
()
{
ACMRandom
rnd
(
ACMRandom
::
DeterministicSeed
());
DECLARE_ALIGNED
(
16
,
tran_low_t
,
coeff_ptr
[
maxSize
]);
DECLARE_ALIGNED
(
16
,
int16_t
,
zbin_ptr
[
2
]);
DECLARE_ALIGNED
(
16
,
int16_t
,
round_ptr
[
2
]);
DECLARE_ALIGNED
(
16
,
int16_t
,
quant_ptr
[
2
]);
DECLARE_ALIGNED
(
16
,
int16_t
,
quant_shift_ptr
[
2
]);
DECLARE_ALIGNED
(
16
,
tran_low_t
,
qcoeff_ptr
[
maxSize
]);
DECLARE_ALIGNED
(
16
,
tran_low_t
,
dqcoeff_ptr
[
maxSize
]);
DECLARE_ALIGNED
(
16
,
tran_low_t
,
ref_qcoeff_ptr
[
maxSize
]);
DECLARE_ALIGNED
(
16
,
tran_low_t
,
ref_dqcoeff_ptr
[
maxSize
]);
DECLARE_ALIGNED
(
16
,
int16_t
,
dequant_ptr
[
2
]);
uint16_t
eob
;
uint16_t
ref_eob
;
int
err_count_total
=
0
;
int
first_failure
=
-
1
;
int
skip_block
=
0
;
int
count
=
params_
.
coeffCount
;
const
TX_SIZE
txSize
=
getTxSize
(
count
);
int
log_scale
=
(
txSize
==
TX_32X32
);
QuantizeFpFunc
quanFunc
=
params_
.
qFunc
;
QuantizeFpFunc
quanFuncRef
=
params_
.
qFuncRef
;
const
scan_order
scanOrder
=
vp10_default_scan_orders
[
txSize
];
for
(
int
i
=
0
;
i
<
numTests
;
i
++
)
{
int
err_count
=
0
;
ref_eob
=
eob
=
-
1
;
for
(
int
j
=
0
;
j
<
count
;
j
++
)
{
coeff_ptr
[
j
]
=
rnd
(
coeffRange
);
}
for
(
int
j
=
0
;
j
<
2
;
j
++
)
{
zbin_ptr
[
j
]
=
rnd
.
Rand16
();
quant_shift_ptr
[
j
]
=
rnd
.
Rand16
();
// int16_t positive
dequant_ptr
[
j
]
=
abs
(
rnd
(
dequantRange
));
quant_ptr
[
j
]
=
(
1
<<
16
)
/
dequant_ptr
[
j
];
round_ptr
[
j
]
=
(
abs
(
rnd
(
roundFactorRange
))
*
dequant_ptr
[
j
])
>>
7
;
}
quanFuncRef
(
coeff_ptr
,
count
,
skip_block
,
zbin_ptr
,
round_ptr
,
quant_ptr
,
quant_shift_ptr
,
ref_qcoeff_ptr
,
ref_dqcoeff_ptr
,
dequant_ptr
,
&
ref_eob
,
scanOrder
.
scan
,
scanOrder
.
iscan
,
log_scale
);
ASM_REGISTER_STATE_CHECK
(
quanFunc
(
coeff_ptr
,
count
,
skip_block
,
zbin_ptr
,
round_ptr
,
quant_ptr
,
quant_shift_ptr
,
qcoeff_ptr
,
dqcoeff_ptr
,
dequant_ptr
,
&
eob
,
scanOrder
.
scan
,
scanOrder
.
iscan
,
log_scale
));
for
(
int
j
=
0
;
j
<
count
;
++
j
)
{
err_count
+=
(
ref_qcoeff_ptr
[
j
]
!=
qcoeff_ptr
[
j
])
|
(
ref_dqcoeff_ptr
[
j
]
!=
dqcoeff_ptr
[
j
]);
EXPECT_EQ
(
ref_qcoeff_ptr
[
j
],
qcoeff_ptr
[
j
])
<<
"qcoeff error: i = "
<<
i
<<
" j = "
<<
j
<<
"
\n
"
;
EXPECT_EQ
(
ref_dqcoeff_ptr
[
j
],
dqcoeff_ptr
[
j
])
<<
"dqcoeff error: i = "
<<
i
<<
" j = "
<<
j
<<
"
\n
"
;
}
EXPECT_EQ
(
ref_eob
,
eob
)
<<
"eob error: "
<<
"i = "
<<
i
<<
"
\n
"
;
err_count
+=
(
ref_eob
!=
eob
);
if
(
err_count
&&
!
err_count_total
)
{
first_failure
=
i
;
}
err_count_total
+=
err_count
;
}
EXPECT_EQ
(
0
,
err_count_total
)
<<
"Error: Quantization Test, C output doesn't match SSE2 output. "
<<
"First failed at test case "
<<
first_failure
;
}
void
RunEobTest
()
{
ACMRandom
rnd
(
ACMRandom
::
DeterministicSeed
());
DECLARE_ALIGNED
(
16
,
tran_low_t
,
coeff_ptr
[
maxSize
]);
DECLARE_ALIGNED
(
16
,
int16_t
,
zbin_ptr
[
2
]);
DECLARE_ALIGNED
(
16
,
int16_t
,
round_ptr
[
2
]);
DECLARE_ALIGNED
(
16
,
int16_t
,
quant_ptr
[
2
]);
DECLARE_ALIGNED
(
16
,
int16_t
,
quant_shift_ptr
[
2
]);
DECLARE_ALIGNED
(
16
,
tran_low_t
,
qcoeff_ptr
[
maxSize
]);
DECLARE_ALIGNED
(
16
,
tran_low_t
,
dqcoeff_ptr
[
maxSize
]);
DECLARE_ALIGNED
(
16
,
tran_low_t
,
ref_qcoeff_ptr
[
maxSize
]);
DECLARE_ALIGNED
(
16
,
tran_low_t
,
ref_dqcoeff_ptr
[
maxSize
]);
DECLARE_ALIGNED
(
16
,
int16_t
,
dequant_ptr
[
2
]);
uint16_t
eob
;
uint16_t
ref_eob
;
int
skip_block
=
0
;
int
count
=
params_
.
coeffCount
;
const
TX_SIZE
txSize
=
getTxSize
(
count
);
int
log_scale
=
(
txSize
==
TX_32X32
);
QuantizeFpFunc
quanFunc
=
params_
.
qFunc
;
QuantizeFpFunc
quanFuncRef
=
params_
.
qFuncRef
;
const
scan_order
scanOrder
=
vp10_default_scan_orders
[
txSize
];
for
(
int
i
=
0
;
i
<
numTests
;
i
++
)
{
ref_eob
=
eob
=
-
1
;
for
(
int
j
=
0
;
j
<
count
;
j
++
)
{
coeff_ptr
[
j
]
=
0
;
}
coeff_ptr
[
rnd
(
count
)]
=
rnd
(
coeffRange
);
coeff_ptr
[
rnd
(
count
)]
=
rnd
(
coeffRange
);
coeff_ptr
[
rnd
(
count
)]
=
rnd
(
coeffRange
);
for
(
int
j
=
0
;
j
<
2
;
j
++
)
{
zbin_ptr
[
j
]
=
rnd
.
Rand16
();
quant_shift_ptr
[
j
]
=
rnd
.
Rand16
();
// int16_t positive
dequant_ptr
[
j
]
=
abs
(
rnd
(
dequantRange
));
quant_ptr
[
j
]
=
(
1
<<
16
)
/
dequant_ptr
[
j
];
round_ptr
[
j
]
=
(
abs
(
rnd
(
roundFactorRange
))
*
dequant_ptr
[
j
])
>>
7
;
}
quanFuncRef
(
coeff_ptr
,
count
,
skip_block
,
zbin_ptr
,
round_ptr
,
quant_ptr
,
quant_shift_ptr
,
ref_qcoeff_ptr
,
ref_dqcoeff_ptr
,
dequant_ptr
,
&
ref_eob
,
scanOrder
.
scan
,
scanOrder
.
iscan
,
log_scale
);
ASM_REGISTER_STATE_CHECK
(
quanFunc
(
coeff_ptr
,
count
,
skip_block
,
zbin_ptr
,
round_ptr
,
quant_ptr
,
quant_shift_ptr
,
qcoeff_ptr
,
dqcoeff_ptr
,
dequant_ptr
,
&
eob
,
scanOrder
.
scan
,
scanOrder
.
iscan
,
log_scale
));
EXPECT_EQ
(
ref_eob
,
eob
)
<<
"eob error: "
<<
"i = "
<<
i
<<
"
\n
"
;
}
}
virtual
void
SetUp
()
{
params_
=
GetParam
();
}
virtual
void
TearDown
()
{
libvpx_test
::
ClearSystemState
();
}
virtual
~
VP10QuantizeTest
()
{}
private:
TX_SIZE
getTxSize
(
int
count
)
{
TX_SIZE
txSize
=
0
;
if
(
16
==
count
)
{
txSize
=
0
;
}
else
if
(
64
==
count
)
{
txSize
=
1
;
}
else
if
(
256
==
count
)
{
txSize
=
2
;
}
else
if
(
1024
==
count
)
{
txSize
=
3
;
}
return
txSize
;
}
QuantizeFuncParams
params_
;
};
TEST_P
(
VP10QuantizeTest
,
BitExactCheck
)
{
RunQuantizeTest
();
}
TEST_P
(
VP10QuantizeTest
,
EobVerify
)
{
RunEobTest
();
}
#if HAVE_SSE4_1
INSTANTIATE_TEST_CASE_P
(
SSE4_1
,
VP10QuantizeTest
,
::
testing
::
Values
(
QuantizeFuncParams
(
&
vp10_highbd_quantize_fp_sse4_1
,
&
vp10_highbd_quantize_fp_c
,
16
),
QuantizeFuncParams
(
&
vp10_highbd_quantize_fp_sse4_1
,
&
vp10_highbd_quantize_fp_c
,
64
),
QuantizeFuncParams
(
&
vp10_highbd_quantize_fp_sse4_1
,
&
vp10_highbd_quantize_fp_c
,
256
),
QuantizeFuncParams
(
&
vp10_highbd_quantize_fp_sse4_1
,
&
vp10_highbd_quantize_fp_c
,
1024
)));
#endif // HAVE_SSE4_1
}
// namespace
vp10/common/vp10_rtcd_defs.pl
View file @
f9c01c7b
...
...
@@ -690,7 +690,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
specialize
qw/vp10_highbd_block_error sse2/
;
add_proto
qw/void vp10_highbd_quantize_fp/
,
"
const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale
";
specialize
qw/vp10_highbd_quantize_fp/
;
specialize
qw/vp10_highbd_quantize_fp
sse4_1
/
;
add_proto
qw/void vp10_highbd_quantize_b/
,
"
const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale
";
specialize
qw/vp10_highbd_quantize_b/
;
...
...
vp10/encoder/x86/vp10_highbd_quantize_sse4.c
0 → 100644
View file @
f9c01c7b
/*
* Copyright (c) 2016 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <smmintrin.h>
#include <stdint.h>
#include "./vp10_rtcd.h"
#include "vpx_dsp/vpx_dsp_common.h"
// Coefficient quantization phase 1
// param[0-2] : rounding/quan/dequan constants
static
INLINE
void
quantize_coeff_phase1
(
__m128i
*
coeff
,
const
__m128i
*
param
,
const
int
shift
,
const
int
scale
,
__m128i
*
qcoeff
,
__m128i
*
dquan
,
__m128i
*
sign
)
{
const
__m128i
zero
=
_mm_setzero_si128
();
const
__m128i
one
=
_mm_set1_epi32
(
1
);
*
sign
=
_mm_cmplt_epi32
(
*
coeff
,
zero
);
*
sign
=
_mm_or_si128
(
*
sign
,
one
);
*
coeff
=
_mm_abs_epi32
(
*
coeff
);
qcoeff
[
0
]
=
_mm_add_epi32
(
*
coeff
,
param
[
0
]);
qcoeff
[
1
]
=
_mm_unpackhi_epi32
(
qcoeff
[
0
],
zero
);
qcoeff
[
0
]
=
_mm_unpacklo_epi32
(
qcoeff
[
0
],
zero
);
qcoeff
[
0
]
=
_mm_mul_epi32
(
qcoeff
[
0
],
param
[
1
]);
qcoeff
[
0
]
=
_mm_srli_epi64
(
qcoeff
[
0
],
shift
);
dquan
[
0
]
=
_mm_mul_epi32
(
qcoeff
[
0
],
param
[
2
]);
dquan
[
0
]
=
_mm_srli_epi64
(
dquan
[
0
],
scale
);
}
// Coefficient quantization phase 2
static
INLINE
void
quantize_coeff_phase2
(
__m128i
*
qcoeff
,
__m128i
*
dquan
,
const
__m128i
*
sign
,
const
__m128i
*
param
,
const
int
shift
,
const
int
scale
,
tran_low_t
*
qAddr
,
tran_low_t
*
dqAddr
)
{
__m128i
mask0L
=
_mm_set_epi32
(
-
1
,
-
1
,
0
,
0
);
__m128i
mask0H
=
_mm_set_epi32
(
0
,
0
,
-
1
,
-
1
);
qcoeff
[
1
]
=
_mm_mul_epi32
(
qcoeff
[
1
],
param
[
1
]);
qcoeff
[
1
]
=
_mm_srli_epi64
(
qcoeff
[
1
],
shift
);
dquan
[
1
]
=
_mm_mul_epi32
(
qcoeff
[
1
],
param
[
2
]);
dquan
[
1
]
=
_mm_srli_epi64
(
dquan
[
1
],
scale
);
// combine L&H
qcoeff
[
0
]
=
_mm_shuffle_epi32
(
qcoeff
[
0
],
0xd8
);
qcoeff
[
1
]
=
_mm_shuffle_epi32
(
qcoeff
[
1
],
0x8d
);
qcoeff
[
0
]
=
_mm_and_si128
(
qcoeff
[
0
],
mask0H
);
qcoeff
[
1
]
=
_mm_and_si128
(
qcoeff
[
1
],
mask0L
);
dquan
[
0
]
=
_mm_shuffle_epi32
(
dquan
[
0
],
0xd8
);
dquan
[
1
]
=
_mm_shuffle_epi32
(
dquan
[
1
],
0x8d
);
dquan
[
0
]
=
_mm_and_si128
(
dquan
[
0
],
mask0H
);
dquan
[
1
]
=
_mm_and_si128
(
dquan
[
1
],
mask0L
);
qcoeff
[
0
]
=
_mm_or_si128
(
qcoeff
[
0
],
qcoeff
[
1
]);
dquan
[
0
]
=
_mm_or_si128
(
dquan
[
0
],
dquan
[
1
]);
qcoeff
[
0
]
=
_mm_sign_epi32
(
qcoeff
[
0
],
*
sign
);
dquan
[
0
]
=
_mm_sign_epi32
(
dquan
[
0
],
*
sign
);
_mm_storeu_si128
((
__m128i
*
)
qAddr
,
qcoeff
[
0
]);
_mm_storeu_si128
((
__m128i
*
)
dqAddr
,
dquan
[
0
]);
}
static
INLINE
void
find_eob
(
tran_low_t
*
qcoeff_ptr
,
const
int16_t
*
iscan
,
__m128i
*
eob
)
{
const
__m128i
zero
=
_mm_setzero_si128
();
__m128i
mask
,
iscanIdx
;
const
__m128i
q0
=
_mm_loadu_si128
((
__m128i
const
*
)
qcoeff_ptr
);
const
__m128i
q1
=
_mm_loadu_si128
((
__m128i
const
*
)(
qcoeff_ptr
+
4
));
__m128i
nz_flag0
=
_mm_cmpeq_epi32
(
q0
,
zero
);
__m128i
nz_flag1
=
_mm_cmpeq_epi32
(
q1
,
zero
);
nz_flag0
=
_mm_cmpeq_epi32
(
nz_flag0
,
zero
);
nz_flag1
=
_mm_cmpeq_epi32
(
nz_flag1
,
zero
);
mask
=
_mm_packs_epi32
(
nz_flag0
,
nz_flag1
);
iscanIdx
=
_mm_loadu_si128
((
__m128i
const
*
)
iscan
);
iscanIdx
=
_mm_sub_epi16
(
iscanIdx
,
mask
);
iscanIdx
=
_mm_and_si128
(
iscanIdx
,
mask
);
*
eob
=
_mm_max_epi16
(
*
eob
,
iscanIdx
);
}
static
INLINE
uint16_t
get_accumulated_eob
(
__m128i
*
eob
)
{
__m128i
eob_shuffled
;
uint16_t
eobValue
;
eob_shuffled
=
_mm_shuffle_epi32
(
*
eob
,
0xe
);
*
eob
=
_mm_max_epi16
(
*
eob
,
eob_shuffled
);
eob_shuffled
=
_mm_shufflelo_epi16
(
*
eob
,
0xe
);
*
eob
=
_mm_max_epi16
(
*
eob
,
eob_shuffled
);
eob_shuffled
=
_mm_shufflelo_epi16
(
*
eob
,
0x1
);
*
eob
=
_mm_max_epi16
(
*
eob
,
eob_shuffled
);
eobValue
=
_mm_extract_epi16
(
*
eob
,
0
);
return
eobValue
;
}
void
vp10_highbd_quantize_fp_sse4_1
(
const
tran_low_t
*
coeff_ptr
,
intptr_t
count
,
int
skip_block
,
const
int16_t
*
zbin_ptr
,
const
int16_t
*
round_ptr
,
const
int16_t
*
quant_ptr
,
const
int16_t
*
quant_shift_ptr
,
tran_low_t
*
qcoeff_ptr
,
tran_low_t
*
dqcoeff_ptr
,
const
int16_t
*
dequant_ptr
,
uint16_t
*
eob_ptr
,
const
int16_t
*
scan
,
const
int16_t
*
iscan
,
const
int
log_scale
)
{
__m128i
coeff
[
2
],
qcoeff
[
2
],
dequant
[
2
],
qparam
[
3
],
coeff_sign
;
__m128i
eob
=
_mm_setzero_si128
();
const
tran_low_t
*
src
=
coeff_ptr
;
tran_low_t
*
quanAddr
=
qcoeff_ptr
;
tran_low_t
*
dquanAddr
=
dqcoeff_ptr
;
const
int
shift
=
16
-
log_scale
;
const
int
coeff_stride
=
4
;
const
int
quan_stride
=
coeff_stride
;
(
void
)
skip_block
;
(
void
)
zbin_ptr
;
(
void
)
quant_shift_ptr
;
(
void
)
scan
;
memset
(
quanAddr
,
0
,
count
*
sizeof
(
quanAddr
[
0
]));
memset
(
dquanAddr
,
0
,
count
*
sizeof
(
dquanAddr
[
0
]));
if
(
!
skip_block
)
{
coeff
[
0
]
=
_mm_loadu_si128
((
__m128i
const
*
)
src
);
qparam
[
0
]
=
_mm_set_epi32
(
round_ptr
[
1
],
round_ptr
[
1
],
round_ptr
[
1
],
round_ptr
[
0
]);
qparam
[
1
]
=
_mm_set_epi64x
(
quant_ptr
[
1
],
quant_ptr
[
0
]);
qparam
[
2
]
=
_mm_set_epi64x
(
dequant_ptr
[
1
],
dequant_ptr
[
0
]);
// DC and first 3 AC
quantize_coeff_phase1
(
&
coeff
[
0
],
qparam
,
shift
,
log_scale
,
qcoeff
,
dequant
,
&
coeff_sign
);
// update round/quan/dquan for AC
qparam
[
0
]
=
_mm_unpackhi_epi64
(
qparam
[
0
],
qparam
[
0
]);
qparam
[
1
]
=
_mm_set_epi64x
(
quant_ptr
[
1
],
quant_ptr
[
1
]);
qparam
[
2
]
=
_mm_set_epi64x
(
dequant_ptr
[
1
],
dequant_ptr
[
1
]);
quantize_coeff_phase2
(
qcoeff
,
dequant
,
&
coeff_sign
,
qparam
,
shift
,
log_scale
,
quanAddr
,
dquanAddr
);
// next 4 AC
coeff
[
1
]
=
_mm_loadu_si128
((
__m128i
const
*
)(
src
+
coeff_stride
));
quantize_coeff_phase1
(
&
coeff
[
1
],
qparam
,
shift
,
log_scale
,
qcoeff
,
dequant
,
&
coeff_sign
);
quantize_coeff_phase2
(
qcoeff
,
dequant
,
&
coeff_sign
,
qparam
,
shift
,
log_scale
,
quanAddr
+
quan_stride
,
dquanAddr
+
quan_stride
);
find_eob
(
quanAddr
,
iscan
,
&
eob
);
count
-=
8
;
// loop for the rest of AC
while
(
count
>
0
)
{
src
+=
coeff_stride
<<
1
;
quanAddr
+=
quan_stride
<<
1
;
dquanAddr
+=
quan_stride
<<
1
;
iscan
+=
quan_stride
<<
1
;
coeff
[
0
]
=
_mm_loadu_si128
((
__m128i
const
*
)
src
);
coeff
[
1
]
=
_mm_loadu_si128
((
__m128i
const
*
)(
src
+
coeff_stride
));
quantize_coeff_phase1
(
&
coeff
[
0
],
qparam
,
shift
,
log_scale
,
qcoeff
,
dequant
,
&
coeff_sign
);
quantize_coeff_phase2
(
qcoeff
,
dequant
,
&
coeff_sign
,
qparam
,
shift
,
log_scale
,
quanAddr
,
dquanAddr
);
quantize_coeff_phase1
(
&
coeff
[
1
],
qparam
,
shift
,
log_scale
,
qcoeff
,
dequant
,
&
coeff_sign
);
quantize_coeff_phase2
(
qcoeff
,
dequant
,
&
coeff_sign
,
qparam
,
shift
,
log_scale
,
quanAddr
+
quan_stride
,
dquanAddr
+
quan_stride
);
find_eob
(
quanAddr
,
iscan
,
&
eob
);
count
-=
8
;
}
*
eob_ptr
=
get_accumulated_eob
(
&
eob
);
}
else
{
*
eob_ptr
=
0
;
}
}
vp10/vp10cx.mk
View file @
f9c01c7b
...
...
@@ -117,6 +117,7 @@ VP10_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/dct_ssse3.c
ifeq
($(CONFIG_VP9_HIGHBITDEPTH),yes)
VP10_CX_SRCS-$(HAVE_SSE4_1)
+=
encoder/x86/highbd_fwd_txfm_sse4.c
VP10_CX_SRCS-$(HAVE_SSE4_1)
+=
common/x86/highbd_inv_txfm_sse4.c
VP10_CX_SRCS-$(HAVE_SSE4_1)
+=
encoder/x86/vp10_highbd_quantize_sse4.c
endif
ifeq
($(CONFIG_VP9_TEMPORAL_DENOISING),yes)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment