Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
What's new
10
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
Xiph.Org
aom-rav1e
Commits
3f6f7289
Commit
3f6f7289
authored
Mar 08, 2011
by
Jim Bankoski
Committed by
John Koleszar
Mar 11, 2011
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
vp8cx- alternate ssim function with optimizations
Change-Id: I91921b0a90dbaddc7010380b038955be347964b3
parent
a0306ea6
Changes
7
Show whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
468 additions
and
294 deletions
+468
-294
vp8/encoder/generic/csystemdependent.c
vp8/encoder/generic/csystemdependent.c
+4
-0
vp8/encoder/onyx_if.c
vp8/encoder/onyx_if.c
+9
-3
vp8/encoder/ssim.c
vp8/encoder/ssim.c
+177
-291
vp8/encoder/variance.h
vp8/encoder/variance.h
+32
-0
vp8/encoder/x86/ssim_opt.asm
vp8/encoder/x86/ssim_opt.asm
+215
-0
vp8/encoder/x86/x86_csystemdependent.c
vp8/encoder/x86/x86_csystemdependent.c
+30
-0
vp8/vp8cx.mk
vp8/vp8cx.mk
+1
-0
No files found.
vp8/encoder/generic/csystemdependent.c
View file @
3f6f7289
...
...
@@ -103,6 +103,10 @@ void vp8_cmachine_specific_config(VP8_COMP *cpi)
// Pure C:
vp8_yv12_copy_partial_frame_ptr
=
vp8_yv12_copy_partial_frame
;
#if CONFIG_PSNR
cpi
->
rtcd
.
variance
.
ssimpf_8x8
=
ssim_parms_8x8_c
;
cpi
->
rtcd
.
variance
.
ssimpf
=
ssim_parms_c
;
#endif
#if ARCH_X86 || ARCH_X86_64
vp8_arch_x86_encoder_init
(
cpi
);
...
...
vp8/encoder/onyx_if.c
View file @
3f6f7289
...
...
@@ -86,9 +86,11 @@ extern double vp8_calc_ssim
YV12_BUFFER_CONFIG
*
source
,
YV12_BUFFER_CONFIG
*
dest
,
int
lumamask
,
double
*
weight
double
*
weight
,
const
vp8_variance_rtcd_vtable_t
*
rtcd
);
extern
double
vp8_calc_ssimg
(
YV12_BUFFER_CONFIG
*
source
,
...
...
@@ -5133,8 +5135,12 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon
vp8_deblock
(
cm
->
frame_to_show
,
&
cm
->
post_proc_buffer
,
cm
->
filter_level
*
10
/
6
,
1
,
0
,
IF_RTCD
(
&
cm
->
rtcd
.
postproc
));
vp8_clear_system_state
();
frame_psnr2
=
vp8_calc_psnr
(
cpi
->
Source
,
&
cm
->
post_proc_buffer
,
&
y2
,
&
u2
,
&
v2
,
&
sq_error
);
frame_ssim2
=
vp8_calc_ssim
(
cpi
->
Source
,
&
cm
->
post_proc_buffer
,
1
,
&
weight
);
frame_psnr2
=
vp8_calc_psnr
(
cpi
->
Source
,
&
cm
->
post_proc_buffer
,
&
y2
,
&
u2
,
&
v2
,
&
sq_error
);
frame_ssim2
=
vp8_calc_ssim
(
cpi
->
Source
,
&
cm
->
post_proc_buffer
,
1
,
&
weight
,
IF_RTCD
(
&
cpi
->
rtcd
.
variance
));
cpi
->
summed_quality
+=
frame_ssim2
*
weight
;
cpi
->
summed_weights
+=
weight
;
...
...
vp8/encoder/ssim.c
View file @
3f6f7289
...
...
@@ -11,298 +11,13 @@
#include "vpx_scale/yv12config.h"
#include "math.h"
#include "onyx_int.h"
#define C1 (float)(64 * 64 * 0.01*255*0.01*255)
#define C2 (float)(64 * 64 * 0.03*255*0.03*255)
static
int
width_y
;
static
int
height_y
;
static
int
height_uv
;
static
int
width_uv
;
static
int
stride_uv
;
static
int
stride
;
static
int
lumimask
;
static
int
luminance
;
static
double
plane_summed_weights
=
0
;
static
short
img12_sum_block
[
8
*
4096
*
4096
*
2
]
;
static
short
img1_sum
[
8
*
4096
*
2
];
static
short
img2_sum
[
8
*
4096
*
2
];
static
int
img1_sq_sum
[
8
*
4096
*
2
];
static
int
img2_sq_sum
[
8
*
4096
*
2
];
static
int
img12_mul_sum
[
8
*
4096
*
2
];
double
vp8_similarity
(
int
mu_x
,
int
mu_y
,
int
pre_mu_x2
,
int
pre_mu_y2
,
int
pre_mu_xy2
)
{
int
mu_x2
,
mu_y2
,
mu_xy
,
theta_x2
,
theta_y2
,
theta_xy
;
mu_x2
=
mu_x
*
mu_x
;
mu_y2
=
mu_y
*
mu_y
;
mu_xy
=
mu_x
*
mu_y
;
theta_x2
=
64
*
pre_mu_x2
-
mu_x2
;
theta_y2
=
64
*
pre_mu_y2
-
mu_y2
;
theta_xy
=
64
*
pre_mu_xy2
-
mu_xy
;
return
(
2
*
mu_xy
+
C1
)
*
(
2
*
theta_xy
+
C2
)
/
((
mu_x2
+
mu_y2
+
C1
)
*
(
theta_x2
+
theta_y2
+
C2
));
}
double
vp8_ssim
(
const
unsigned
char
*
img1
,
const
unsigned
char
*
img2
,
int
stride_img1
,
int
stride_img2
,
int
width
,
int
height
)
{
int
x
,
y
,
x2
,
y2
,
img1_block
,
img2_block
,
img1_sq_block
,
img2_sq_block
,
img12_mul_block
,
temp
;
double
plane_quality
,
weight
,
mean
;
short
*
img1_sum_ptr1
,
*
img1_sum_ptr2
;
short
*
img2_sum_ptr1
,
*
img2_sum_ptr2
;
int
*
img1_sq_sum_ptr1
,
*
img1_sq_sum_ptr2
;
int
*
img2_sq_sum_ptr1
,
*
img2_sq_sum_ptr2
;
int
*
img12_mul_sum_ptr1
,
*
img12_mul_sum_ptr2
;
plane_quality
=
0
;
if
(
lumimask
)
plane_summed_weights
=
0
.
0
f
;
else
plane_summed_weights
=
(
height
-
7
)
*
(
width
-
7
);
//some prologue for the main loop
temp
=
8
*
width
;
img1_sum_ptr1
=
img1_sum
+
temp
;
img2_sum_ptr1
=
img2_sum
+
temp
;
img1_sq_sum_ptr1
=
img1_sq_sum
+
temp
;
img2_sq_sum_ptr1
=
img2_sq_sum
+
temp
;
img12_mul_sum_ptr1
=
img12_mul_sum
+
temp
;
for
(
x
=
0
;
x
<
width
;
x
++
)
{
img1_sum
[
x
]
=
img1
[
x
];
img2_sum
[
x
]
=
img2
[
x
];
img1_sq_sum
[
x
]
=
img1
[
x
]
*
img1
[
x
];
img2_sq_sum
[
x
]
=
img2
[
x
]
*
img2
[
x
];
img12_mul_sum
[
x
]
=
img1
[
x
]
*
img2
[
x
];
img1_sum_ptr1
[
x
]
=
0
;
img2_sum_ptr1
[
x
]
=
0
;
img1_sq_sum_ptr1
[
x
]
=
0
;
img2_sq_sum_ptr1
[
x
]
=
0
;
img12_mul_sum_ptr1
[
x
]
=
0
;
}
//the main loop
for
(
y
=
1
;
y
<
height
;
y
++
)
{
img1
+=
stride_img1
;
img2
+=
stride_img2
;
temp
=
(
y
-
1
)
%
9
*
width
;
img1_sum_ptr1
=
img1_sum
+
temp
;
img2_sum_ptr1
=
img2_sum
+
temp
;
img1_sq_sum_ptr1
=
img1_sq_sum
+
temp
;
img2_sq_sum_ptr1
=
img2_sq_sum
+
temp
;
img12_mul_sum_ptr1
=
img12_mul_sum
+
temp
;
temp
=
y
%
9
*
width
;
img1_sum_ptr2
=
img1_sum
+
temp
;
img2_sum_ptr2
=
img2_sum
+
temp
;
img1_sq_sum_ptr2
=
img1_sq_sum
+
temp
;
img2_sq_sum_ptr2
=
img2_sq_sum
+
temp
;
img12_mul_sum_ptr2
=
img12_mul_sum
+
temp
;
for
(
x
=
0
;
x
<
width
;
x
++
)
{
img1_sum_ptr2
[
x
]
=
img1_sum_ptr1
[
x
]
+
img1
[
x
];
img2_sum_ptr2
[
x
]
=
img2_sum_ptr1
[
x
]
+
img2
[
x
];
img1_sq_sum_ptr2
[
x
]
=
img1_sq_sum_ptr1
[
x
]
+
img1
[
x
]
*
img1
[
x
];
img2_sq_sum_ptr2
[
x
]
=
img2_sq_sum_ptr1
[
x
]
+
img2
[
x
]
*
img2
[
x
];
img12_mul_sum_ptr2
[
x
]
=
img12_mul_sum_ptr1
[
x
]
+
img1
[
x
]
*
img2
[
x
];
}
if
(
y
>
6
)
{
//calculate the sum of the last 8 lines by subtracting the total sum of 8 lines back from the present sum
temp
=
(
y
+
1
)
%
9
*
width
;
img1_sum_ptr1
=
img1_sum
+
temp
;
img2_sum_ptr1
=
img2_sum
+
temp
;
img1_sq_sum_ptr1
=
img1_sq_sum
+
temp
;
img2_sq_sum_ptr1
=
img2_sq_sum
+
temp
;
img12_mul_sum_ptr1
=
img12_mul_sum
+
temp
;
for
(
x
=
0
;
x
<
width
;
x
++
)
{
img1_sum_ptr1
[
x
]
=
img1_sum_ptr2
[
x
]
-
img1_sum_ptr1
[
x
];
img2_sum_ptr1
[
x
]
=
img2_sum_ptr2
[
x
]
-
img2_sum_ptr1
[
x
];
img1_sq_sum_ptr1
[
x
]
=
img1_sq_sum_ptr2
[
x
]
-
img1_sq_sum_ptr1
[
x
];
img2_sq_sum_ptr1
[
x
]
=
img2_sq_sum_ptr2
[
x
]
-
img2_sq_sum_ptr1
[
x
];
img12_mul_sum_ptr1
[
x
]
=
img12_mul_sum_ptr2
[
x
]
-
img12_mul_sum_ptr1
[
x
];
}
//here we calculate the sum over the 8x8 block of pixels
//this is done by sliding a window across the column sums for the last 8 lines
//each time adding the new column sum, and subtracting the one which fell out of the window
img1_block
=
0
;
img2_block
=
0
;
img1_sq_block
=
0
;
img2_sq_block
=
0
;
img12_mul_block
=
0
;
//prologue, and calculation of simularity measure from the first 8 column sums
for
(
x
=
0
;
x
<
8
;
x
++
)
{
img1_block
+=
img1_sum_ptr1
[
x
];
img2_block
+=
img2_sum_ptr1
[
x
];
img1_sq_block
+=
img1_sq_sum_ptr1
[
x
];
img2_sq_block
+=
img2_sq_sum_ptr1
[
x
];
img12_mul_block
+=
img12_mul_sum_ptr1
[
x
];
}
if
(
lumimask
)
{
y2
=
y
-
7
;
x2
=
0
;
if
(
luminance
)
{
mean
=
(
img2_block
+
img1_block
)
/
128
.
0
f
;
if
(
!
(
y2
%
2
||
x2
%
2
))
*
(
img12_sum_block
+
y2
/
2
*
width_uv
+
x2
/
2
)
=
img2_block
+
img1_block
;
}
else
{
mean
=
*
(
img12_sum_block
+
y2
*
width_uv
+
x2
);
mean
+=
*
(
img12_sum_block
+
y2
*
width_uv
+
x2
+
4
);
mean
+=
*
(
img12_sum_block
+
(
y2
+
4
)
*
width_uv
+
x2
);
mean
+=
*
(
img12_sum_block
+
(
y2
+
4
)
*
width_uv
+
x2
+
4
);
mean
/=
512
.
0
f
;
}
weight
=
mean
<
40
?
0
.
0
f
:
(
mean
<
50
?
(
mean
-
40
.
0
f
)
/
10
.
0
f
:
1
.
0
f
);
plane_summed_weights
+=
weight
;
plane_quality
+=
weight
*
vp8_similarity
(
img1_block
,
img2_block
,
img1_sq_block
,
img2_sq_block
,
img12_mul_block
);
}
else
plane_quality
+=
vp8_similarity
(
img1_block
,
img2_block
,
img1_sq_block
,
img2_sq_block
,
img12_mul_block
);
//and for the rest
for
(
x
=
8
;
x
<
width
;
x
++
)
{
img1_block
=
img1_block
+
img1_sum_ptr1
[
x
]
-
img1_sum_ptr1
[
x
-
8
];
img2_block
=
img2_block
+
img2_sum_ptr1
[
x
]
-
img2_sum_ptr1
[
x
-
8
];
img1_sq_block
=
img1_sq_block
+
img1_sq_sum_ptr1
[
x
]
-
img1_sq_sum_ptr1
[
x
-
8
];
img2_sq_block
=
img2_sq_block
+
img2_sq_sum_ptr1
[
x
]
-
img2_sq_sum_ptr1
[
x
-
8
];
img12_mul_block
=
img12_mul_block
+
img12_mul_sum_ptr1
[
x
]
-
img12_mul_sum_ptr1
[
x
-
8
];
if
(
lumimask
)
{
y2
=
y
-
7
;
x2
=
x
-
7
;
if
(
luminance
)
{
mean
=
(
img2_block
+
img1_block
)
/
128
.
0
f
;
if
(
!
(
y2
%
2
||
x2
%
2
))
*
(
img12_sum_block
+
y2
/
2
*
width_uv
+
x2
/
2
)
=
img2_block
+
img1_block
;
}
else
{
mean
=
*
(
img12_sum_block
+
y2
*
width_uv
+
x2
);
mean
+=
*
(
img12_sum_block
+
y2
*
width_uv
+
x2
+
4
);
mean
+=
*
(
img12_sum_block
+
(
y2
+
4
)
*
width_uv
+
x2
);
mean
+=
*
(
img12_sum_block
+
(
y2
+
4
)
*
width_uv
+
x2
+
4
);
mean
/=
512
.
0
f
;
}
weight
=
mean
<
40
?
0
.
0
f
:
(
mean
<
50
?
(
mean
-
40
.
0
f
)
/
10
.
0
f
:
1
.
0
f
);
plane_summed_weights
+=
weight
;
plane_quality
+=
weight
*
vp8_similarity
(
img1_block
,
img2_block
,
img1_sq_block
,
img2_sq_block
,
img12_mul_block
);
}
else
plane_quality
+=
vp8_similarity
(
img1_block
,
img2_block
,
img1_sq_block
,
img2_sq_block
,
img12_mul_block
);
}
}
}
if
(
plane_summed_weights
==
0
)
return
1
.
0
f
;
else
return
plane_quality
/
plane_summed_weights
;
}
double
vp8_calc_ssim
(
YV12_BUFFER_CONFIG
*
source
,
YV12_BUFFER_CONFIG
*
dest
,
int
lumamask
,
double
*
weight
)
{
double
a
,
b
,
c
;
double
frame_weight
;
double
ssimv
;
width_y
=
source
->
y_width
;
height_y
=
source
->
y_height
;
height_uv
=
source
->
uv_height
;
width_uv
=
source
->
uv_width
;
stride_uv
=
dest
->
uv_stride
;
stride
=
dest
->
y_stride
;
lumimask
=
lumamask
;
luminance
=
1
;
a
=
vp8_ssim
(
source
->
y_buffer
,
dest
->
y_buffer
,
source
->
y_stride
,
dest
->
y_stride
,
source
->
y_width
,
source
->
y_height
);
luminance
=
0
;
frame_weight
=
plane_summed_weights
/
((
width_y
-
7
)
*
(
height_y
-
7
));
if
(
frame_weight
==
0
)
a
=
b
=
c
=
1
.
0
f
;
else
{
b
=
vp8_ssim
(
source
->
u_buffer
,
dest
->
u_buffer
,
source
->
uv_stride
,
dest
->
uv_stride
,
source
->
uv_width
,
source
->
uv_height
);
c
=
vp8_ssim
(
source
->
v_buffer
,
dest
->
v_buffer
,
source
->
uv_stride
,
dest
->
uv_stride
,
source
->
uv_width
,
source
->
uv_height
);
}
ssimv
=
a
*
.
8
+
.
1
*
(
b
+
c
);
*
weight
=
frame_weight
;
return
ssimv
;
}
#if CONFIG_RUNTIME_CPU_DETECT
#define IF_RTCD(x) (x)
#else
#define IF_RTCD(x) NULL
#endif
// Google version of SSIM
// SSIM
#define KERNEL 3
...
...
@@ -520,3 +235,174 @@ double vp8_calc_ssimg
*
ssim_v
/=
uvsize
;
return
ssim_all
;
}
void
ssim_parms_c
(
unsigned
char
*
s
,
int
sp
,
unsigned
char
*
r
,
int
rp
,
unsigned
long
*
sum_s
,
unsigned
long
*
sum_r
,
unsigned
long
*
sum_sq_s
,
unsigned
long
*
sum_sq_r
,
unsigned
long
*
sum_sxr
)
{
int
i
,
j
;
for
(
i
=
0
;
i
<
16
;
i
++
,
s
+=
sp
,
r
+=
rp
)
{
for
(
j
=
0
;
j
<
16
;
j
++
)
{
*
sum_s
+=
s
[
j
];
*
sum_r
+=
r
[
j
];
*
sum_sq_s
+=
s
[
j
]
*
s
[
j
];
*
sum_sq_r
+=
r
[
j
]
*
r
[
j
];
*
sum_sxr
+=
s
[
j
]
*
r
[
j
];
}
}
}
void
ssim_parms_8x8_c
(
unsigned
char
*
s
,
int
sp
,
unsigned
char
*
r
,
int
rp
,
unsigned
long
*
sum_s
,
unsigned
long
*
sum_r
,
unsigned
long
*
sum_sq_s
,
unsigned
long
*
sum_sq_r
,
unsigned
long
*
sum_sxr
)
{
int
i
,
j
;
for
(
i
=
0
;
i
<
8
;
i
++
,
s
+=
sp
,
r
+=
rp
)
{
for
(
j
=
0
;
j
<
8
;
j
++
)
{
*
sum_s
+=
s
[
j
];
*
sum_r
+=
r
[
j
];
*
sum_sq_s
+=
s
[
j
]
*
s
[
j
];
*
sum_sq_r
+=
r
[
j
]
*
r
[
j
];
*
sum_sxr
+=
s
[
j
]
*
r
[
j
];
}
}
}
const
static
long
long
c1
=
426148
;
// (256^2*(.01*255)^2
const
static
long
long
c2
=
3835331
;
//(256^2*(.03*255)^2
static
double
similarity
(
unsigned
long
sum_s
,
unsigned
long
sum_r
,
unsigned
long
sum_sq_s
,
unsigned
long
sum_sq_r
,
unsigned
long
sum_sxr
,
int
count
)
{
long
long
ssim_n
=
(
2
*
sum_s
*
sum_r
+
c1
)
*
(
2
*
count
*
sum_sxr
-
2
*
sum_s
*
sum_r
+
c2
);
long
long
ssim_d
=
(
sum_s
*
sum_s
+
sum_r
*
sum_r
+
c1
)
*
(
count
*
sum_sq_s
-
sum_s
*
sum_s
+
count
*
sum_sq_r
-
sum_r
*
sum_r
+
c2
)
;
return
ssim_n
*
1
.
0
/
ssim_d
;
}
static
double
ssim_16x16
(
unsigned
char
*
s
,
int
sp
,
unsigned
char
*
r
,
int
rp
,
const
vp8_variance_rtcd_vtable_t
*
rtcd
)
{
unsigned
long
sum_s
=
0
,
sum_r
=
0
,
sum_sq_s
=
0
,
sum_sq_r
=
0
,
sum_sxr
=
0
;
rtcd
->
ssimpf
(
s
,
sp
,
r
,
rp
,
&
sum_s
,
&
sum_r
,
&
sum_sq_s
,
&
sum_sq_r
,
&
sum_sxr
);
return
similarity
(
sum_s
,
sum_r
,
sum_sq_s
,
sum_sq_r
,
sum_sxr
,
256
);
}
static
double
ssim_8x8
(
unsigned
char
*
s
,
int
sp
,
unsigned
char
*
r
,
int
rp
,
const
vp8_variance_rtcd_vtable_t
*
rtcd
)
{
unsigned
long
sum_s
=
0
,
sum_r
=
0
,
sum_sq_s
=
0
,
sum_sq_r
=
0
,
sum_sxr
=
0
;
rtcd
->
ssimpf_8x8
(
s
,
sp
,
r
,
rp
,
&
sum_s
,
&
sum_r
,
&
sum_sq_s
,
&
sum_sq_r
,
&
sum_sxr
);
return
similarity
(
sum_s
,
sum_r
,
sum_sq_s
,
sum_sq_r
,
sum_sxr
,
64
);
}
// TODO: (jbb) tried to scale this function such that we may be able to use it
// for distortion metric in mode selection code ( provided we do a reconstruction)
long
dssim
(
unsigned
char
*
s
,
int
sp
,
unsigned
char
*
r
,
int
rp
,
const
vp8_variance_rtcd_vtable_t
*
rtcd
)
{
unsigned
long
sum_s
=
0
,
sum_r
=
0
,
sum_sq_s
=
0
,
sum_sq_r
=
0
,
sum_sxr
=
0
;
double
ssim3
;
long
long
ssim_n
;
long
long
ssim_d
;
rtcd
->
ssimpf
(
s
,
sp
,
r
,
rp
,
&
sum_s
,
&
sum_r
,
&
sum_sq_s
,
&
sum_sq_r
,
&
sum_sxr
);
ssim_n
=
(
2
*
sum_s
*
sum_r
+
c1
)
*
(
2
*
256
*
sum_sxr
-
2
*
sum_s
*
sum_r
+
c2
);
ssim_d
=
(
sum_s
*
sum_s
+
sum_r
*
sum_r
+
c1
)
*
(
256
*
sum_sq_s
-
sum_s
*
sum_s
+
256
*
sum_sq_r
-
sum_r
*
sum_r
+
c2
)
;
ssim3
=
256
*
(
ssim_d
-
ssim_n
)
/
ssim_d
;
return
(
long
)(
256
*
ssim3
*
ssim3
);
}
// TODO: (jbb) this 8x8 window might be too big + we may want to pick pixels
// such that the window regions overlap block boundaries to penalize blocking
// artifacts.
double
vp8_ssim2
(
unsigned
char
*
img1
,
unsigned
char
*
img2
,
int
stride_img1
,
int
stride_img2
,
int
width
,
int
height
,
const
vp8_variance_rtcd_vtable_t
*
rtcd
)
{
int
i
,
j
;
double
ssim_total
=
0
;
// we can sample points as frequently as we like start with 1 per 8x8
for
(
i
=
0
;
i
<
height
;
i
+=
8
,
img1
+=
stride_img1
*
8
,
img2
+=
stride_img2
*
8
)
{
for
(
j
=
0
;
j
<
width
;
j
+=
8
)
{
ssim_total
+=
ssim_8x8
(
img1
,
stride_img1
,
img2
,
stride_img2
,
rtcd
);
}
}
ssim_total
/=
(
width
/
8
*
height
/
8
);
return
ssim_total
;
}
double
vp8_calc_ssim
(
YV12_BUFFER_CONFIG
*
source
,
YV12_BUFFER_CONFIG
*
dest
,
int
lumamask
,
double
*
weight
,
const
vp8_variance_rtcd_vtable_t
*
rtcd
)
{
double
a
,
b
,
c
;
double
ssimv
;
//IF_RTCD(&cpi->rtcd.variance)
a
=
vp8_ssim2
(
source
->
y_buffer
,
dest
->
y_buffer
,
source
->
y_stride
,
dest
->
y_stride
,
source
->
y_width
,
source
->
y_height
,
rtcd
);
b
=
vp8_ssim2
(
source
->
u_buffer
,
dest
->
u_buffer
,
source
->
uv_stride
,
dest
->
uv_stride
,
source
->
uv_width
,
source
->
uv_height
,
rtcd
);
c
=
vp8_ssim2
(
source
->
v_buffer
,
dest
->
v_buffer
,
source
->
uv_stride
,
dest
->
uv_stride
,
source
->
uv_width
,
source
->
uv_height
,
rtcd
);
ssimv
=
a
*
.
8
+
.
1
*
(
b
+
c
);
*
weight
=
1
;
return
ssimv
;
}
vp8/encoder/variance.h
View file @
3f6f7289
...
...
@@ -85,6 +85,19 @@
unsigned int *sse \
);
#define prototype_ssimpf(sym) \
void (sym) \
( \
unsigned char *s, \
int sp, \
unsigned char *r, \
int rp, \
unsigned long *sum_s, \
unsigned long *sum_r, \
unsigned long *sum_sq_s, \
unsigned long *sum_sq_r, \
unsigned long *sum_sxr \
);
#define prototype_getmbss(sym) unsigned int (sym)(const short *)
...
...
@@ -306,6 +319,15 @@ extern prototype_variance2(vp8_variance_get16x16var);
#endif
extern
prototype_sad
(
vp8_variance_get4x4sse_cs
);
#ifndef vp8_ssimpf
#define vp8_ssimpf ssim_parms_c
#endif
extern
prototype_ssimpf
(
vp8_ssimpf
)
#ifndef vp8_ssimpf_8x8
#define vp8_ssimpf_8x8 ssim_parms_8x8_c
#endif
extern
prototype_ssimpf
(
vp8_ssimpf_8x8
)
typedef
prototype_sad
(
*
vp8_sad_fn_t
);
typedef
prototype_sad_multi_same_address
(
*
vp8_sad_multi_fn_t
);
...
...
@@ -315,6 +337,10 @@ typedef prototype_variance(*vp8_variance_fn_t);
typedef
prototype_variance2
(
*
vp8_variance2_fn_t
);
typedef
prototype_subpixvariance
(
*
vp8_subpixvariance_fn_t
);
typedef
prototype_getmbss
(
*
vp8_getmbss_fn_t
);
typedef
prototype_ssimpf
(
*
vp8_ssimpf_fn_t
)
typedef
struct
{
vp8_sad_fn_t
sad4x4
;
...
...
@@ -365,6 +391,11 @@ typedef struct
vp8_sad_multi_d_fn_t
sad8x8x4d
;
vp8_sad_multi_d_fn_t
sad4x4x4d
;
#if CONFIG_PSNR
vp8_ssimpf_fn_t
ssimpf_8x8
;
vp8_ssimpf_fn_t
ssimpf
;
#endif
}
vp8_variance_rtcd_vtable_t
;
typedef
struct
...
...
@@ -378,6 +409,7 @@ typedef struct
vp8_sad_multi_fn_t
sdx3f
;
vp8_sad_multi1_fn_t
sdx8f
;
vp8_sad_multi_d_fn_t
sdx4df
;
}
vp8_variance_fn_ptr_t
;
#if CONFIG_RUNTIME_CPU_DETECT
...
...
vp8/encoder/x86/ssim_opt.asm
0 → 100644
View file @
3f6f7289
;
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
%include "vpx_ports/x86_abi_support.asm"
; tabulate_ssim - sums sum_s,sum_r,sum_sq_s,sum_sq_r, sum_sxr
%macro TABULATE_SSIM 0
paddusw
xmm15
,
xmm3
; sum_s
paddusw
xmm14
,
xmm4
; sum_r
movdqa
xmm1
,
xmm3
pmaddwd
xmm1
,
xmm1
paddq
xmm13
,
xmm1
; sum_sq_s
movdqa
xmm2
,
xmm4
pmaddwd
xmm2
,
xmm2
paddq
xmm12
,
xmm2
; sum_sq_r
pmaddwd
xmm3
,
xmm4
paddq
xmm11
,
xmm3
; sum_sxr
%endmacro
; Sum across the register %1 starting with q words
%macro SUM_ACROSS_Q 1