Commit 540d9103 authored by Jingning Han's avatar Jingning Han Committed by Gerrit Code Review
Browse files

Fix potential overflow issue in SSSE3 forward 8x8 2D-DCT

The SSSE3 implementation might find a potential overflow issue in
its second 1-D transform, if all input residual pixels are close to
255. This commit fixes the issue and re-enables the unit test on
the SSSE3 version.

Change-Id: I0520478abdab7afd3ff2842516bec951111e9b3c
parent 5c2696c3
...@@ -23,6 +23,7 @@ pw_%1_%2: dw %1, %2, %1, %2, %1, %2, %1, %2 ...@@ -23,6 +23,7 @@ pw_%1_%2: dw %1, %2, %1, %2, %1, %2, %1, %2
pw_%2_m%1: dw %2, -%1, %2, -%1, %2, -%1, %2, -%1 pw_%2_m%1: dw %2, -%1, %2, -%1, %2, -%1, %2, -%1
%endmacro %endmacro
TRANSFORM_COEFFS 11585, 11585
TRANSFORM_COEFFS 15137, 6270 TRANSFORM_COEFFS 15137, 6270
TRANSFORM_COEFFS 16069, 3196 TRANSFORM_COEFFS 16069, 3196
TRANSFORM_COEFFS 9102, 13623 TRANSFORM_COEFFS 9102, 13623
...@@ -83,7 +84,7 @@ SECTION .text ...@@ -83,7 +84,7 @@ SECTION .text
%endmacro %endmacro
; 1D forward 8x8 DCT transform ; 1D forward 8x8 DCT transform
%macro FDCT8_1D 0 %macro FDCT8_1D 1
SUM_SUB 0, 7, 9 SUM_SUB 0, 7, 9
SUM_SUB 1, 6, 9 SUM_SUB 1, 6, 9
SUM_SUB 2, 5, 9 SUM_SUB 2, 5, 9
...@@ -92,14 +93,21 @@ SECTION .text ...@@ -92,14 +93,21 @@ SECTION .text
SUM_SUB 0, 3, 9 SUM_SUB 0, 3, 9
SUM_SUB 1, 2, 9 SUM_SUB 1, 2, 9
SUM_SUB 6, 5, 9 SUM_SUB 6, 5, 9
%if %1 == 0
SUM_SUB 0, 1, 9 SUM_SUB 0, 1, 9
%endif
BUTTERFLY_4X 2, 3, 6270, 15137, m8, 9, 10 BUTTERFLY_4X 2, 3, 6270, 15137, m8, 9, 10
pmulhrsw m6, m12 pmulhrsw m6, m12
pmulhrsw m5, m12 pmulhrsw m5, m12
%if %1 == 0
pmulhrsw m0, m12 pmulhrsw m0, m12
pmulhrsw m1, m12 pmulhrsw m1, m12
%else
BUTTERFLY_4X 1, 0, 11585, 11585, m8, 9, 10
SWAP 0, 1
%endif
SUM_SUB 4, 5, 9 SUM_SUB 4, 5, 9
SUM_SUB 7, 6, 9 SUM_SUB 7, 6, 9
...@@ -150,10 +158,10 @@ cglobal fdct8x8, 3, 5, 13, input, output, stride ...@@ -150,10 +158,10 @@ cglobal fdct8x8, 3, 5, 13, input, output, stride
psllw m7, 2 psllw m7, 2
; column transform ; column transform
FDCT8_1D FDCT8_1D 0
TRANSPOSE8X8 0, 1, 2, 3, 4, 5, 6, 7, 9 TRANSPOSE8X8 0, 1, 2, 3, 4, 5, 6, 7, 9
FDCT8_1D FDCT8_1D 1
TRANSPOSE8X8 0, 1, 2, 3, 4, 5, 6, 7, 9 TRANSPOSE8X8 0, 1, 2, 3, 4, 5, 6, 7, 9
DIVIDE_ROUND_2X 0, 1, 9, 10 DIVIDE_ROUND_2X 0, 1, 9, 10
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment