Commit 694d4e77 authored by Fritz Koenig's avatar Fritz Koenig
Browse files

Reclassify optimized ssim calculations as SSE2.

Calculations were incorrectly classified as either
SSE3 or SSSE3.  Only using SSE2 instructions.
Cleanup function names and make non-RTCD code work
as well.

Change-Id: I48ad0218af0cc51c5078070a08511dee43ecfe09
parent b7a6f1d2
......@@ -94,16 +94,15 @@ void vp8_cmachine_specific_config(VP8_COMP *cpi)
#if !(CONFIG_REALTIME_ONLY)
cpi->rtcd.temporal.apply = vp8_temporal_filter_apply_c;
#endif
#if CONFIG_INTERNAL_STATS
cpi->rtcd.variance.ssimpf_8x8 = vp8_ssim_parms_8x8_c;
cpi->rtcd.variance.ssimpf_16x16 = vp8_ssim_parms_16x16_c;
#endif
#endif
// Pure C:
vp8_yv12_copy_partial_frame_ptr = vp8_yv12_copy_partial_frame;
#if CONFIG_INTERNAL_STATS
cpi->rtcd.variance.ssimpf_8x8 = ssim_parms_8x8_c;
cpi->rtcd.variance.ssimpf = ssim_parms_c;
#endif
#if ARCH_X86 || ARCH_X86_64
vp8_arch_x86_encoder_init(cpi);
#endif
......
......@@ -9,18 +9,9 @@
*/
#include "vpx_scale/yv12config.h"
#include "math.h"
#include "onyx_int.h"
#if CONFIG_RUNTIME_CPU_DETECT
#define IF_RTCD(x) (x)
#else
#define IF_RTCD(x) NULL
#endif
void ssim_parms_c
void vp8_ssim_parms_16x16_c
(
unsigned char *s,
int sp,
......@@ -46,7 +37,7 @@ void ssim_parms_c
}
}
}
void ssim_parms_8x8_c
void vp8_ssim_parms_8x8_c
(
unsigned char *s,
int sp,
......@@ -107,14 +98,14 @@ static double ssim_16x16(unsigned char *s,int sp, unsigned char *r,int rp,
const vp8_variance_rtcd_vtable_t *rtcd)
{
unsigned long sum_s=0,sum_r=0,sum_sq_s=0,sum_sq_r=0,sum_sxr=0;
rtcd->ssimpf(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, &sum_sxr);
SSIMPF_INVOKE(rtcd,16x16)(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, &sum_sxr);
return similarity(sum_s, sum_r, sum_sq_s, sum_sq_r, sum_sxr, 256);
}
static double ssim_8x8(unsigned char *s,int sp, unsigned char *r,int rp,
const vp8_variance_rtcd_vtable_t *rtcd)
{
unsigned long sum_s=0,sum_r=0,sum_sq_s=0,sum_sq_r=0,sum_sxr=0;
rtcd->ssimpf_8x8(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, &sum_sxr);
SSIMPF_INVOKE(rtcd,8x8)(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, &sum_sxr);
return similarity(sum_s, sum_r, sum_sq_s, sum_sq_r, sum_sxr, 64);
}
......@@ -134,7 +125,7 @@ long dssim(unsigned char *s,int sp, unsigned char *r,int rp,
c1 = cc1*16;
c2 = cc2*16;
rtcd->ssimpf(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, &sum_sxr);
SSIMPF_INVOKE(rtcd,16x16)(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, &sum_sxr);
ssim_n1 = (2*sum_s*sum_r+ c1);
ssim_n2 =((int64_t) 2*256*sum_sxr-(int64_t) 2*sum_s*sum_r+c2);
......
......@@ -320,16 +320,16 @@ extern prototype_variance(vp8_variance_mse16x16);
#endif
extern prototype_get16x16prederror(vp8_variance_get4x4sse_cs);
#ifndef vp8_ssimpf
#define vp8_ssimpf ssim_parms_c
#endif
extern prototype_ssimpf(vp8_ssimpf)
#ifndef vp8_ssimpf_8x8
#define vp8_ssimpf_8x8 ssim_parms_8x8_c
#define vp8_ssimpf_8x8 vp8_ssim_parms_8x8_c
#endif
extern prototype_ssimpf(vp8_ssimpf_8x8)
#ifndef vp8_ssimpf_16x16
#define vp8_ssimpf_16x16 vp8_ssim_parms_16x16_c
#endif
extern prototype_ssimpf(vp8_ssimpf_16x16)
typedef prototype_sad(*vp8_sad_fn_t);
typedef prototype_sad_multi_same_address(*vp8_sad_multi_fn_t);
typedef prototype_sad_multi_same_address_1(*vp8_sad_multi1_fn_t);
......@@ -394,7 +394,7 @@ typedef struct
#if CONFIG_INTERNAL_STATS
vp8_ssimpf_fn_t ssimpf_8x8;
vp8_ssimpf_fn_t ssimpf;
vp8_ssimpf_fn_t ssimpf_16x16;
#endif
} vp8_variance_rtcd_vtable_t;
......@@ -417,8 +417,10 @@ typedef struct
#if CONFIG_RUNTIME_CPU_DETECT
#define VARIANCE_INVOKE(ctx,fn) (ctx)->fn
#define SSIMPF_INVOKE(ctx,fn) (ctx)->ssimpf_##fn
#else
#define VARIANCE_INVOKE(ctx,fn) vp8_variance_##fn
#define SSIMPF_INVOKE(ctx,fn) vp8_ssimpf_##fn
#endif
#endif
......@@ -44,7 +44,7 @@
paddd %1, xmm1
SUM_ACROSS_Q %1
%endmacro
;void ssim_parms_sse3(
;void ssim_parms_sse2(
; unsigned char *s,
; int sp,
; unsigned char *r,
......@@ -61,8 +61,8 @@
; or pavgb At this point this is just meant to be first pass for calculating
; all the parms needed for 16x16 ssim so we can play with dssim as distortion
; in mode selection code.
global sym(vp8_ssim_parms_16x16_sse3)
sym(vp8_ssim_parms_16x16_sse3):
global sym(vp8_ssim_parms_16x16_sse2)
sym(vp8_ssim_parms_16x16_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 9
......@@ -134,7 +134,7 @@ NextRow:
pop rbp
ret
;void ssim_parms_sse3(
;void ssim_parms_sse2(
; unsigned char *s,
; int sp,
; unsigned char *r,
......@@ -151,8 +151,8 @@ NextRow:
; or pavgb At this point this is just meant to be first pass for calculating
; all the parms needed for 16x16 ssim so we can play with dssim as distortion
; in mode selection code.
global sym(vp8_ssim_parms_8x8_sse3)
sym(vp8_ssim_parms_8x8_sse3):
global sym(vp8_ssim_parms_8x8_sse2)
sym(vp8_ssim_parms_8x8_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 9
......
......@@ -140,6 +140,8 @@ extern prototype_getmbss(vp8_get_mb_ss_sse2);
extern prototype_variance(vp8_mse16x16_wmt);
extern prototype_variance2(vp8_get8x8var_sse2);
extern prototype_variance2(vp8_get16x16var_sse2);
extern prototype_ssimpf(vp8_ssim_parms_8x8_sse2)
extern prototype_ssimpf(vp8_ssim_parms_16x16_sse2)
#if !CONFIG_RUNTIME_CPU_DETECT
#undef vp8_variance_sad4x4
......@@ -208,6 +210,14 @@ extern prototype_variance2(vp8_get16x16var_sse2);
#undef vp8_variance_mse16x16
#define vp8_variance_mse16x16 vp8_mse16x16_wmt
#if ARCH_X86_64
#undef vp8_ssimpf_8x8
#define vp8_ssimpf_8x8 vp8_ssim_parms_8x8_sse2
#undef vp8_ssimpf_16x16
#define vp8_ssimpf_16x16 vp8_ssim_parms_16x16_sse2
#endif
#endif
#endif
......
......@@ -111,29 +111,6 @@ void vp8_subtract_b_sse2(BLOCK *be, BLOCKD *bd, int pitch)
#endif
#if HAVE_SSSE3
#if CONFIG_INTERNAL_STATS
#if ARCH_X86_64
typedef void ssimpf
(
unsigned char *s,
int sp,
unsigned char *r,
int rp,
unsigned long *sum_s,
unsigned long *sum_r,
unsigned long *sum_sq_s,
unsigned long *sum_sq_r,
unsigned long *sum_sxr
);
extern ssimpf vp8_ssim_parms_16x16_sse3;
extern ssimpf vp8_ssim_parms_8x8_sse3;
#endif
#endif
#endif
void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
{
#if CONFIG_RUNTIME_CPU_DETECT
......@@ -245,6 +222,13 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
#if !(CONFIG_REALTIME_ONLY)
cpi->rtcd.temporal.apply = vp8_temporal_filter_apply_sse2;
#endif
#if CONFIG_INTERNAL_STATS
#if ARCH_X86_64
cpi->rtcd.variance.ssimpf_8x8 = vp8_ssim_parms_8x8_sse2;
cpi->rtcd.variance.ssimpf_16x16 = vp8_ssim_parms_16x16_sse2;
#endif
#endif
}
#endif
......@@ -280,14 +264,6 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
cpi->rtcd.variance.subpixvar16x16 = vp8_sub_pixel_variance16x16_ssse3;
cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_ssse3;
#if CONFIG_INTERNAL_STATS
#if ARCH_X86_64
cpi->rtcd.variance.ssimpf_8x8 = vp8_ssim_parms_8x8_sse3;
cpi->rtcd.variance.ssimpf = vp8_ssim_parms_16x16_sse3;
#endif
#endif
}
#endif
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment