x86_csystemdependent.c 14.7 KB
Newer Older
John Koleszar's avatar
John Koleszar committed
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
John Koleszar's avatar
John Koleszar committed
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5 6
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
John Koleszar's avatar
John Koleszar committed
9 10 11 12 13
 */


#include "vpx_ports/config.h"
#include "vpx_ports/x86.h"
John Koleszar's avatar
John Koleszar committed
14 15
#include "vp8/encoder/variance.h"
#include "vp8/encoder/onyx_int.h"
John Koleszar's avatar
John Koleszar committed
16 17 18 19 20


#if HAVE_MMX
void vp8_short_fdct8x4_mmx(short *input, short *output, int pitch)
{
Fritz Koenig's avatar
Fritz Koenig committed
21 22
    vp8_short_fdct4x4_mmx(input,   output,    pitch);
    vp8_short_fdct4x4_mmx(input + 4, output + 16, pitch);
John Koleszar's avatar
John Koleszar committed
23 24 25 26 27 28 29 30
}

int vp8_fast_quantize_b_impl_mmx(short *coeff_ptr, short *zbin_ptr,
                                 short *qcoeff_ptr, short *dequant_ptr,
                                 short *scan_mask, short *round_ptr,
                                 short *quant_ptr, short *dqcoeff_ptr);
void vp8_fast_quantize_b_mmx(BLOCK *b, BLOCKD *d)
{
31 32 33 34
    short *scan_mask   = vp8_default_zig_zag_mask;//d->scan_order_mask_ptr;
    short *coeff_ptr   = b->coeff;
    short *zbin_ptr    = b->zbin;
    short *round_ptr   = b->round;
35
    short *quant_ptr   = b->quant_fast;
36
    short *qcoeff_ptr  = d->qcoeff;
John Koleszar's avatar
John Koleszar committed
37
    short *dqcoeff_ptr = d->dqcoeff;
38
    short *dequant_ptr = d->dequant;
John Koleszar's avatar
John Koleszar committed
39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83

    d->eob = vp8_fast_quantize_b_impl_mmx(
                 coeff_ptr,
                 zbin_ptr,
                 qcoeff_ptr,
                 dequant_ptr,
                 scan_mask,

                 round_ptr,
                 quant_ptr,
                 dqcoeff_ptr
             );
}

int vp8_mbblock_error_mmx_impl(short *coeff_ptr, short *dcoef_ptr, int dc);
int vp8_mbblock_error_mmx(MACROBLOCK *mb, int dc)
{
    short *coeff_ptr =  mb->block[0].coeff;
    short *dcoef_ptr =  mb->e_mbd.block[0].dqcoeff;
    return vp8_mbblock_error_mmx_impl(coeff_ptr, dcoef_ptr, dc);
}

int vp8_mbuverror_mmx_impl(short *s_ptr, short *d_ptr);
int vp8_mbuverror_mmx(MACROBLOCK *mb)
{
    short *s_ptr = &mb->coeff[256];
    short *d_ptr = &mb->e_mbd.dqcoeff[256];
    return vp8_mbuverror_mmx_impl(s_ptr, d_ptr);
}

void vp8_subtract_b_mmx_impl(unsigned char *z,  int src_stride,
                             short *diff, unsigned char *predictor,
                             int pitch);
void vp8_subtract_b_mmx(BLOCK *be, BLOCKD *bd, int pitch)
{
    unsigned char *z = *(be->base_src) + be->src;
    unsigned int  src_stride = be->src_stride;
    short *diff = &be->src_diff[0];
    unsigned char *predictor = &bd->predictor[0];
    vp8_subtract_b_mmx_impl(z, src_stride, diff, predictor, pitch);
}

#endif

#if HAVE_SSE2
84
int vp8_fast_quantize_b_impl_sse2(short *coeff_ptr,
John Koleszar's avatar
John Koleszar committed
85
                                 short *qcoeff_ptr, short *dequant_ptr,
86
                                 const short *inv_scan_order, short *round_ptr,
John Koleszar's avatar
John Koleszar committed
87
                                 short *quant_ptr, short *dqcoeff_ptr);
88
void vp8_fast_quantize_b_sse2(BLOCK *b, BLOCKD *d)
John Koleszar's avatar
John Koleszar committed
89
{
90 91 92
    short *scan_mask   = vp8_default_zig_zag_mask;//d->scan_order_mask_ptr;
    short *coeff_ptr   = b->coeff;
    short *round_ptr   = b->round;
93
    short *quant_ptr   = b->quant_fast;
94
    short *qcoeff_ptr  = d->qcoeff;
John Koleszar's avatar
John Koleszar committed
95
    short *dqcoeff_ptr = d->dqcoeff;
96
    short *dequant_ptr = d->dequant;
John Koleszar's avatar
John Koleszar committed
97

98
    d->eob = vp8_fast_quantize_b_impl_sse2(
John Koleszar's avatar
John Koleszar committed
99 100 101
                 coeff_ptr,
                 qcoeff_ptr,
                 dequant_ptr,
102
                 vp8_default_inv_zig_zag,
John Koleszar's avatar
John Koleszar committed
103 104 105 106 107 108
                 round_ptr,
                 quant_ptr,
                 dqcoeff_ptr
             );
}

109

110
int vp8_regular_quantize_b_impl_sse2(short *coeff_ptr, short *zbin_ptr,
Johann's avatar
Johann committed
111 112 113 114 115 116
                                     short *qcoeff_ptr,short *dequant_ptr,
                                     const int *default_zig_zag, short *round_ptr,
                                     short *quant_ptr, short *dqcoeff_ptr,
                                     unsigned short zbin_oq_value,
                                     short *zbin_boost_ptr,
                                     short *quant_shift_ptr);
117 118 119

void vp8_regular_quantize_b_sse2(BLOCK *b,BLOCKD *d)
{
Johann's avatar
Johann committed
120 121 122 123 124 125 126 127 128 129 130
    d->eob = vp8_regular_quantize_b_impl_sse2(b->coeff,
                                              b->zbin,
                                              d->qcoeff,
                                              d->dequant,
                                              vp8_default_zig_zag1d,
                                              b->round,
                                              b->quant,
                                              d->dqcoeff,
                                              b->zbin_extra,
                                              b->zrun_zbin_boost,
                                              b->quant_shift);
131 132
}

John Koleszar's avatar
John Koleszar committed
133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148
int vp8_mbblock_error_xmm_impl(short *coeff_ptr, short *dcoef_ptr, int dc);
int vp8_mbblock_error_xmm(MACROBLOCK *mb, int dc)
{
    short *coeff_ptr =  mb->block[0].coeff;
    short *dcoef_ptr =  mb->e_mbd.block[0].dqcoeff;
    return vp8_mbblock_error_xmm_impl(coeff_ptr, dcoef_ptr, dc);
}

int vp8_mbuverror_xmm_impl(short *s_ptr, short *d_ptr);
int vp8_mbuverror_xmm(MACROBLOCK *mb)
{
    short *s_ptr = &mb->coeff[256];
    short *d_ptr = &mb->e_mbd.dqcoeff[256];
    return vp8_mbuverror_xmm_impl(s_ptr, d_ptr);
}

Yunqing Wang's avatar
Yunqing Wang committed
149 150 151 152 153 154 155 156 157 158 159 160
void vp8_subtract_b_sse2_impl(unsigned char *z,  int src_stride,
                             short *diff, unsigned char *predictor,
                             int pitch);
void vp8_subtract_b_sse2(BLOCK *be, BLOCKD *bd, int pitch)
{
    unsigned char *z = *(be->base_src) + be->src;
    unsigned int  src_stride = be->src_stride;
    short *diff = &be->src_diff[0];
    unsigned char *predictor = &bd->predictor[0];
    vp8_subtract_b_sse2_impl(z, src_stride, diff, predictor, pitch);
}

John Koleszar's avatar
John Koleszar committed
161 162
#endif

163 164 165 166 167 168 169 170 171 172 173 174
#if HAVE_SSSE3
int vp8_fast_quantize_b_impl_ssse3(short *coeff_ptr,
                                 short *qcoeff_ptr, short *dequant_ptr,
                                 short *round_ptr,
                                 short *quant_ptr, short *dqcoeff_ptr);
void vp8_fast_quantize_b_ssse3(BLOCK *b, BLOCKD *d)
{
    d->eob = vp8_fast_quantize_b_impl_ssse3(
                    b->coeff,
                    d->qcoeff,
                    d->dequant,
                    b->round,
175
                    b->quant_fast,
176 177 178 179 180 181
                    d->dqcoeff
               );
}
#endif


John Koleszar's avatar
John Koleszar committed
182 183 184 185 186 187 188 189 190
void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
{
#if CONFIG_RUNTIME_CPU_DETECT
    int flags = x86_simd_caps();
    int mmx_enabled = flags & HAS_MMX;
    int xmm_enabled = flags & HAS_SSE;
    int wmt_enabled = flags & HAS_SSE2;
    int SSE3Enabled = flags & HAS_SSE3;
    int SSSE3Enabled = flags & HAS_SSSE3;
191
    int SSE4_1Enabled = flags & HAS_SSE4_1;
John Koleszar's avatar
John Koleszar committed
192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220

    /* Note:
     *
     * This platform can be built without runtime CPU detection as well. If
     * you modify any of the function mappings present in this file, be sure
     * to also update them in static mapings (<arch>/filename_<arch>.h)
     */

    /* Override default functions with fastest ones for this CPU. */
#if HAVE_MMX
    if (mmx_enabled)
    {
        cpi->rtcd.variance.sad16x16              = vp8_sad16x16_mmx;
        cpi->rtcd.variance.sad16x8               = vp8_sad16x8_mmx;
        cpi->rtcd.variance.sad8x16               = vp8_sad8x16_mmx;
        cpi->rtcd.variance.sad8x8                = vp8_sad8x8_mmx;
        cpi->rtcd.variance.sad4x4                = vp8_sad4x4_mmx;

        cpi->rtcd.variance.var4x4                = vp8_variance4x4_mmx;
        cpi->rtcd.variance.var8x8                = vp8_variance8x8_mmx;
        cpi->rtcd.variance.var8x16               = vp8_variance8x16_mmx;
        cpi->rtcd.variance.var16x8               = vp8_variance16x8_mmx;
        cpi->rtcd.variance.var16x16              = vp8_variance16x16_mmx;

        cpi->rtcd.variance.subpixvar4x4          = vp8_sub_pixel_variance4x4_mmx;
        cpi->rtcd.variance.subpixvar8x8          = vp8_sub_pixel_variance8x8_mmx;
        cpi->rtcd.variance.subpixvar8x16         = vp8_sub_pixel_variance8x16_mmx;
        cpi->rtcd.variance.subpixvar16x8         = vp8_sub_pixel_variance16x8_mmx;
        cpi->rtcd.variance.subpixvar16x16        = vp8_sub_pixel_variance16x16_mmx;
221 222 223
        cpi->rtcd.variance.halfpixvar16x16_h     = vp8_variance_halfpixvar16x16_h_mmx;
        cpi->rtcd.variance.halfpixvar16x16_v     = vp8_variance_halfpixvar16x16_v_mmx;
        cpi->rtcd.variance.halfpixvar16x16_hv    = vp8_variance_halfpixvar16x16_hv_mmx;
John Koleszar's avatar
John Koleszar committed
224 225 226 227 228 229 230 231 232
        cpi->rtcd.variance.subpixmse16x16        = vp8_sub_pixel_mse16x16_mmx;

        cpi->rtcd.variance.mse16x16              = vp8_mse16x16_mmx;
        cpi->rtcd.variance.getmbss               = vp8_get_mb_ss_mmx;

        cpi->rtcd.variance.get16x16prederror     = vp8_get16x16pred_error_mmx;
        cpi->rtcd.variance.get8x8var             = vp8_get8x8var_mmx;
        cpi->rtcd.variance.get16x16var           = vp8_get16x16var_mmx;
        cpi->rtcd.variance.get4x4sse_cs          = vp8_get4x4sse_cs_mmx;
Fritz Koenig's avatar
Fritz Koenig committed
233

John Koleszar's avatar
John Koleszar committed
234 235
        cpi->rtcd.fdct.short4x4                  = vp8_short_fdct4x4_mmx;
        cpi->rtcd.fdct.short8x4                  = vp8_short_fdct8x4_mmx;
Yaowu Xu's avatar
Yaowu Xu committed
236 237 238
        cpi->rtcd.fdct.fast4x4                   = vp8_short_fdct4x4_mmx;
        cpi->rtcd.fdct.fast8x4                   = vp8_short_fdct8x4_mmx;

John Koleszar's avatar
John Koleszar committed
239 240 241 242 243 244 245 246 247
        cpi->rtcd.fdct.walsh_short4x4            = vp8_short_walsh4x4_c;

        cpi->rtcd.encodemb.berr                  = vp8_block_error_mmx;
        cpi->rtcd.encodemb.mberr                 = vp8_mbblock_error_mmx;
        cpi->rtcd.encodemb.mbuverr               = vp8_mbuverror_mmx;
        cpi->rtcd.encodemb.subb                  = vp8_subtract_b_mmx;
        cpi->rtcd.encodemb.submby                = vp8_subtract_mby_mmx;
        cpi->rtcd.encodemb.submbuv               = vp8_subtract_mbuv_mmx;

248
        /*cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_mmx;*/
John Koleszar's avatar
John Koleszar committed
249 250 251
    }
#endif

252
#if HAVE_SSE2
John Koleszar's avatar
John Koleszar committed
253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271
    if (wmt_enabled)
    {
        cpi->rtcd.variance.sad16x16              = vp8_sad16x16_wmt;
        cpi->rtcd.variance.sad16x8               = vp8_sad16x8_wmt;
        cpi->rtcd.variance.sad8x16               = vp8_sad8x16_wmt;
        cpi->rtcd.variance.sad8x8                = vp8_sad8x8_wmt;
        cpi->rtcd.variance.sad4x4                = vp8_sad4x4_wmt;

        cpi->rtcd.variance.var4x4                = vp8_variance4x4_wmt;
        cpi->rtcd.variance.var8x8                = vp8_variance8x8_wmt;
        cpi->rtcd.variance.var8x16               = vp8_variance8x16_wmt;
        cpi->rtcd.variance.var16x8               = vp8_variance16x8_wmt;
        cpi->rtcd.variance.var16x16              = vp8_variance16x16_wmt;

        cpi->rtcd.variance.subpixvar4x4          = vp8_sub_pixel_variance4x4_wmt;
        cpi->rtcd.variance.subpixvar8x8          = vp8_sub_pixel_variance8x8_wmt;
        cpi->rtcd.variance.subpixvar8x16         = vp8_sub_pixel_variance8x16_wmt;
        cpi->rtcd.variance.subpixvar16x8         = vp8_sub_pixel_variance16x8_wmt;
        cpi->rtcd.variance.subpixvar16x16        = vp8_sub_pixel_variance16x16_wmt;
272 273 274
        cpi->rtcd.variance.halfpixvar16x16_h     = vp8_variance_halfpixvar16x16_h_wmt;
        cpi->rtcd.variance.halfpixvar16x16_v     = vp8_variance_halfpixvar16x16_v_wmt;
        cpi->rtcd.variance.halfpixvar16x16_hv    = vp8_variance_halfpixvar16x16_hv_wmt;
John Koleszar's avatar
John Koleszar committed
275 276 277 278 279 280 281 282 283 284
        cpi->rtcd.variance.subpixmse16x16        = vp8_sub_pixel_mse16x16_wmt;

        cpi->rtcd.variance.mse16x16              = vp8_mse16x16_wmt;
        cpi->rtcd.variance.getmbss               = vp8_get_mb_ss_sse2;

        cpi->rtcd.variance.get16x16prederror     = vp8_get16x16pred_error_sse2;
        cpi->rtcd.variance.get8x8var             = vp8_get8x8var_sse2;
        cpi->rtcd.variance.get16x16var           = vp8_get16x16var_sse2;
        /* cpi->rtcd.variance.get4x4sse_cs  not implemented for wmt */;

285 286 287 288 289
        cpi->rtcd.fdct.short4x4                  = vp8_short_fdct4x4_sse2;
        cpi->rtcd.fdct.short8x4                  = vp8_short_fdct8x4_sse2;
        cpi->rtcd.fdct.fast4x4                   = vp8_short_fdct4x4_sse2;
        cpi->rtcd.fdct.fast8x4                   = vp8_short_fdct8x4_sse2;

290
        cpi->rtcd.fdct.walsh_short4x4            = vp8_short_walsh4x4_sse2 ;
John Koleszar's avatar
John Koleszar committed
291 292 293 294

        cpi->rtcd.encodemb.berr                  = vp8_block_error_xmm;
        cpi->rtcd.encodemb.mberr                 = vp8_mbblock_error_xmm;
        cpi->rtcd.encodemb.mbuverr               = vp8_mbuverror_xmm;
Yunqing Wang's avatar
Yunqing Wang committed
295 296 297
        cpi->rtcd.encodemb.subb                  = vp8_subtract_b_sse2;
        cpi->rtcd.encodemb.submby                = vp8_subtract_mby_sse2;
        cpi->rtcd.encodemb.submbuv               = vp8_subtract_mbuv_sse2;
John Koleszar's avatar
John Koleszar committed
298

Johann's avatar
Johann committed
299 300 301
#if ARCH_X86
        cpi->rtcd.quantize.quantb                = vp8_regular_quantize_b_sse2;
#endif
302
        cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_sse2;
Johann's avatar
Johann committed
303

304
#if !(CONFIG_REALTIME_ONLY)
Johann's avatar
Johann committed
305
        cpi->rtcd.temporal.apply                 = vp8_temporal_filter_apply_sse2;
306
#endif
John Koleszar's avatar
John Koleszar committed
307 308 309
    }
#endif

310
#if HAVE_SSE3
John Koleszar's avatar
John Koleszar committed
311 312 313 314 315 316 317 318
    if (SSE3Enabled)
    {
        cpi->rtcd.variance.sad16x16              = vp8_sad16x16_sse3;
        cpi->rtcd.variance.sad16x16x3            = vp8_sad16x16x3_sse3;
        cpi->rtcd.variance.sad16x8x3             = vp8_sad16x8x3_sse3;
        cpi->rtcd.variance.sad8x16x3             = vp8_sad8x16x3_sse3;
        cpi->rtcd.variance.sad8x8x3              = vp8_sad8x8x3_sse3;
        cpi->rtcd.variance.sad4x4x3              = vp8_sad4x4x3_sse3;
319
#if !(CONFIG_REALTIME_ONLY)
John Koleszar's avatar
John Koleszar committed
320
        cpi->rtcd.search.full_search             = vp8_full_search_sadx3;
321
#endif
John Koleszar's avatar
John Koleszar committed
322 323 324 325 326 327 328 329 330
        cpi->rtcd.variance.sad16x16x4d           = vp8_sad16x16x4d_sse3;
        cpi->rtcd.variance.sad16x8x4d            = vp8_sad16x8x4d_sse3;
        cpi->rtcd.variance.sad8x16x4d            = vp8_sad8x16x4d_sse3;
        cpi->rtcd.variance.sad8x8x4d             = vp8_sad8x8x4d_sse3;
        cpi->rtcd.variance.sad4x4x4d             = vp8_sad4x4x4d_sse3;
        cpi->rtcd.search.diamond_search          = vp8_diamond_search_sadx4;
    }
#endif

331
#if HAVE_SSSE3
John Koleszar's avatar
John Koleszar committed
332 333 334 335
    if (SSSE3Enabled)
    {
        cpi->rtcd.variance.sad16x16x3            = vp8_sad16x16x3_ssse3;
        cpi->rtcd.variance.sad16x8x3             = vp8_sad16x8x3_ssse3;
336

337
        cpi->rtcd.variance.subpixvar16x8         = vp8_sub_pixel_variance16x8_ssse3;
338 339
        cpi->rtcd.variance.subpixvar16x16        = vp8_sub_pixel_variance16x16_ssse3;

340 341
        cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_ssse3;

John Koleszar's avatar
John Koleszar committed
342
    }
343
#endif
John Koleszar's avatar
John Koleszar committed
344

345 346 347 348 349 350 351 352
#if HAVE_SSE4_1
    if (SSE4_1Enabled)
    {
        cpi->rtcd.variance.sad16x16x8            = vp8_sad16x16x8_sse4;
        cpi->rtcd.variance.sad16x8x8             = vp8_sad16x8x8_sse4;
        cpi->rtcd.variance.sad8x16x8             = vp8_sad8x16x8_sse4;
        cpi->rtcd.variance.sad8x8x8              = vp8_sad8x8x8_sse4;
        cpi->rtcd.variance.sad4x4x8              = vp8_sad4x4x8_sse4;
353
#if !(CONFIG_REALTIME_ONLY)
354
        cpi->rtcd.search.full_search             = vp8_full_search_sadx8;
355
#endif
356
    }
John Koleszar's avatar
John Koleszar committed
357
#endif
358

John Koleszar's avatar
John Koleszar committed
359 360
#endif
}