Commit fce3cee8 authored by Jim Bankoski's avatar Jim Bankoski

Move vpx_add_plane from codec to vpx_dsp and dedup.

Change-Id: I12218d8331c0558c0587a66321e3ca46da7e5cc7
parent 021105e3
......@@ -13,6 +13,7 @@
#include <stdio.h>
#include "./vpx_config.h"
#include "./vpx_dsp_rtcd.h"
#include "./vpx_scale_rtcd.h"
#include "./vp10_rtcd.h"
......@@ -587,32 +588,6 @@ static void fillrd(struct postproc_state *state, int q, int a) {
state->last_noise = a;
}
void vp10_plane_add_noise_c(uint8_t *start, char *noise,
char blackclamp[16],
char whiteclamp[16],
char bothclamp[16],
unsigned int width, unsigned int height, int pitch) {
unsigned int i, j;
// TODO(jbb): why does simd code use both but c doesn't, normalize and
// fix..
(void) bothclamp;
for (i = 0; i < height; i++) {
uint8_t *pos = start + i * pitch;
char *ref = (char *)(noise + (rand() & 0xff)); // NOLINT
for (j = 0; j < width; j++) {
if (pos[j] < blackclamp[0])
pos[j] = blackclamp[0];
if (pos[j] > 255 + whiteclamp[0])
pos[j] = 255 + whiteclamp[0];
pos[j] += ref[j];
}
}
}
static void swap_mi_and_prev_mi(VP10_COMMON *cm) {
// Current mip will be the prev_mip for the next frame.
MODE_INFO *temp = cm->postproc_state.prev_mip;
......@@ -727,7 +702,7 @@ int vp10_post_proc_frame(struct VP10Common *cm,
fillrd(ppstate, 63 - q, noise_level);
}
vp10_plane_add_noise(ppbuf->y_buffer, ppstate->noise, ppstate->blackclamp,
vpx_plane_add_noise(ppbuf->y_buffer, ppstate->noise, ppstate->blackclamp,
ppstate->whiteclamp, ppstate->bothclamp,
ppbuf->y_width, ppbuf->y_height, ppbuf->y_stride);
}
......
......@@ -70,10 +70,6 @@ add_proto qw/void vp10_post_proc_down_and_across/, "const uint8_t *src_ptr, uint
specialize qw/vp10_post_proc_down_and_across sse2/;
$vp10_post_proc_down_and_across_sse2=vp10_post_proc_down_and_across_xmm;
add_proto qw/void vp10_plane_add_noise/, "uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch";
specialize qw/vp10_plane_add_noise sse2/;
$vp10_plane_add_noise_sse2=vp10_plane_add_noise_wmt;
add_proto qw/void vp10_filter_by_weight16x16/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int src_weight";
specialize qw/vp10_filter_by_weight16x16 sse2 msa/;
......@@ -326,9 +322,6 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
add_proto qw/void vp10_highbd_post_proc_down_and_across/, "const uint16_t *src_ptr, uint16_t *dst_ptr, int src_pixels_per_line, int dst_pixels_per_line, int rows, int cols, int flimit";
specialize qw/vp10_highbd_post_proc_down_and_across/;
add_proto qw/void vp10_highbd_plane_add_noise/, "uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch";
specialize qw/vp10_highbd_plane_add_noise/;
}
#
......
......@@ -624,68 +624,6 @@ sym(vp10_mbpost_proc_across_ip_xmm):
%undef flimit4
;void vp10_plane_add_noise_wmt (unsigned char *start, unsigned char *noise,
; unsigned char blackclamp[16],
; unsigned char whiteclamp[16],
; unsigned char bothclamp[16],
; unsigned int width, unsigned int height, int pitch)
global sym(vp10_plane_add_noise_wmt) PRIVATE
sym(vp10_plane_add_noise_wmt):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 8
GET_GOT rbx
push rsi
push rdi
; end prolog
.addnoise_loop:
call sym(LIBVPX_RAND) WRT_PLT
mov rcx, arg(1) ;noise
and rax, 0xff
add rcx, rax
; we rely on the fact that the clamping vectors are stored contiguously
; in black/white/both order. Note that we have to reload this here because
; rdx could be trashed by rand()
mov rdx, arg(2) ; blackclamp
mov rdi, rcx
movsxd rcx, dword arg(5) ;[Width]
mov rsi, arg(0) ;Pos
xor rax,rax
.addnoise_nextset:
movdqu xmm1,[rsi+rax] ; get the source
psubusb xmm1, [rdx] ;blackclamp ; clamp both sides so we don't outrange adding noise
paddusb xmm1, [rdx+32] ;bothclamp
psubusb xmm1, [rdx+16] ;whiteclamp
movdqu xmm2,[rdi+rax] ; get the noise for this line
paddb xmm1,xmm2 ; add it in
movdqu [rsi+rax],xmm1 ; store the result
add rax,16 ; move to the next line
cmp rax, rcx
jl .addnoise_nextset
movsxd rax, dword arg(7) ; Pitch
add arg(0), rax ; Start += Pitch
sub dword arg(6), 1 ; Height -= 1
jg .addnoise_loop
; begin epilog
pop rdi
pop rsi
RESTORE_GOT
UNSHADOW_ARGS
pop rbp
ret
SECTION_RODATA
align 16
rd42:
......
......@@ -10,6 +10,7 @@
#include <stdlib.h>
#include "./vp8_rtcd.h"
#include "./vpx_dsp_rtcd.h"
#include "vp8/common/mips/msa/vp8_macros_msa.h"
static const int16_t vp8_rv_msa[] =
......@@ -798,54 +799,3 @@ void vp8_mbpost_proc_down_msa(uint8_t *dst_ptr, int32_t pitch, int32_t rows,
}
}
}
void vp8_plane_add_noise_msa(uint8_t *start_ptr, char *noise,
char blackclamp[16], char whiteclamp[16],
char bothclamp[16],
uint32_t width, uint32_t height,
int32_t pitch)
{
uint32_t i, j;
for (i = 0; i < height / 2; ++i)
{
uint8_t *pos0_ptr = start_ptr + (2 * i) * pitch;
int8_t *ref0_ptr = (int8_t *) (noise + (rand() & 0xff));
uint8_t *pos1_ptr = start_ptr + (2 * i + 1) * pitch;
int8_t *ref1_ptr = (int8_t *) (noise + (rand() & 0xff));
for (j = width / 16; j--;)
{
v16i8 temp00_s, temp01_s;
v16u8 temp00, temp01, black_clamp, white_clamp;
v16u8 pos0, ref0, pos1, ref1;
v16i8 const127 = __msa_ldi_b(127);
pos0 = LD_UB(pos0_ptr);
ref0 = LD_UB(ref0_ptr);
pos1 = LD_UB(pos1_ptr);
ref1 = LD_UB(ref1_ptr);
black_clamp = (v16u8)__msa_fill_b(blackclamp[0]);
white_clamp = (v16u8)__msa_fill_b(whiteclamp[0]);
temp00 = (pos0 < black_clamp);
pos0 = __msa_bmnz_v(pos0, black_clamp, temp00);
temp01 = (pos1 < black_clamp);
pos1 = __msa_bmnz_v(pos1, black_clamp, temp01);
XORI_B2_128_UB(pos0, pos1);
temp00_s = __msa_adds_s_b((v16i8)white_clamp, const127);
temp00 = (v16u8)(temp00_s < pos0);
pos0 = (v16u8)__msa_bmnz_v((v16u8)pos0, (v16u8)temp00_s, temp00);
temp01_s = __msa_adds_s_b((v16i8)white_clamp, const127);
temp01 = (temp01_s < pos1);
pos1 = (v16u8)__msa_bmnz_v((v16u8)pos1, (v16u8)temp01_s, temp01);
XORI_B2_128_UB(pos0, pos1);
pos0 += ref0;
ST_UB(pos0, pos0_ptr);
pos1 += ref1;
ST_UB(pos1, pos1_ptr);
pos0_ptr += 16;
pos1_ptr += 16;
ref0_ptr += 16;
ref1_ptr += 16;
}
}
}
......@@ -10,6 +10,7 @@
#include "vpx_config.h"
#include "vpx_dsp_rtcd.h"
#include "vp8_rtcd.h"
#include "vpx_scale_rtcd.h"
#include "vpx_scale/yv12config.h"
......@@ -490,54 +491,6 @@ static void fillrd(struct postproc_state *state, int q, int a)
state->last_noise = a;
}
/****************************************************************************
*
* ROUTINE : plane_add_noise_c
*
* INPUTS : unsigned char *Start starting address of buffer to add gaussian
* noise to
* unsigned int Width width of plane
* unsigned int Height height of plane
* int Pitch distance between subsequent lines of frame
* int q quantizer used to determine amount of noise
* to add
*
* OUTPUTS : None.
*
* RETURNS : void.
*
* FUNCTION : adds gaussian noise to a plane of pixels
*
* SPECIAL NOTES : None.
*
****************************************************************************/
void vp8_plane_add_noise_c(unsigned char *Start, char *noise,
char blackclamp[16],
char whiteclamp[16],
char bothclamp[16],
unsigned int Width, unsigned int Height, int Pitch)
{
unsigned int i, j;
(void)bothclamp;
for (i = 0; i < Height; i++)
{
unsigned char *Pos = Start + i * Pitch;
char *Ref = (char *)(noise + (rand() & 0xff));
for (j = 0; j < Width; j++)
{
if (Pos[j] < blackclamp[0])
Pos[j] = blackclamp[0];
if (Pos[j] > 255 + whiteclamp[0])
Pos[j] = 255 + whiteclamp[0];
Pos[j] += Ref[j];
}
}
}
/* Blend the macro block with a solid colored square. Leave the
* edges unblended to give distinction to macro blocks in areas
* filled with the same color block.
......@@ -828,7 +781,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t
fillrd(&oci->postproc_state, 63 - q, noise_level);
}
vp8_plane_add_noise
vpx_plane_add_noise
(oci->post_proc_buffer.y_buffer,
oci->postproc_state.noise,
oci->postproc_state.blackclamp,
......
......@@ -167,10 +167,6 @@ if (vpx_config("CONFIG_POSTPROC") eq "yes") {
add_proto qw/void vp8_post_proc_down_and_across_mb_row/, "unsigned char *src, unsigned char *dst, int src_pitch, int dst_pitch, int cols, unsigned char *flimits, int size";
specialize qw/vp8_post_proc_down_and_across_mb_row sse2 msa/;
add_proto qw/void vp8_plane_add_noise/, "unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch";
specialize qw/vp8_plane_add_noise mmx sse2 msa/;
$vp8_plane_add_noise_sse2=vp8_plane_add_noise_wmt;
add_proto qw/void vp8_blend_mb_inner/, "unsigned char *y, unsigned char *u, unsigned char *v, int y1, int u1, int v1, int alpha, int stride";
# no asm yet
......
......@@ -241,68 +241,6 @@ sym(vp8_mbpost_proc_down_mmx):
%undef flimit2
;void vp8_plane_add_noise_mmx (unsigned char *Start, unsigned char *noise,
; unsigned char blackclamp[16],
; unsigned char whiteclamp[16],
; unsigned char bothclamp[16],
; unsigned int Width, unsigned int Height, int Pitch)
global sym(vp8_plane_add_noise_mmx) PRIVATE
sym(vp8_plane_add_noise_mmx):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 8
GET_GOT rbx
push rsi
push rdi
; end prolog
.addnoise_loop:
call sym(LIBVPX_RAND) WRT_PLT
mov rcx, arg(1) ;noise
and rax, 0xff
add rcx, rax
; we rely on the fact that the clamping vectors are stored contiguously
; in black/white/both order. Note that we have to reload this here because
; rdx could be trashed by rand()
mov rdx, arg(2) ; blackclamp
mov rdi, rcx
movsxd rcx, dword arg(5) ;[Width]
mov rsi, arg(0) ;Pos
xor rax,rax
.addnoise_nextset:
movq mm1,[rsi+rax] ; get the source
psubusb mm1, [rdx] ;blackclamp ; clamp both sides so we don't outrange adding noise
paddusb mm1, [rdx+32] ;bothclamp
psubusb mm1, [rdx+16] ;whiteclamp
movq mm2,[rdi+rax] ; get the noise for this line
paddb mm1,mm2 ; add it in
movq [rsi+rax],mm1 ; store the result
add rax,8 ; move to the next line
cmp rax, rcx
jl .addnoise_nextset
movsxd rax, dword arg(7) ; Pitch
add arg(0), rax ; Start += Pitch
sub dword arg(6), 1 ; Height -= 1
jg .addnoise_loop
; begin epilog
pop rdi
pop rsi
RESTORE_GOT
UNSHADOW_ARGS
pop rbp
ret
SECTION_RODATA
align 16
Blur:
......
......@@ -655,68 +655,6 @@ sym(vp8_mbpost_proc_across_ip_xmm):
%undef flimit4
;void vp8_plane_add_noise_wmt (unsigned char *Start, unsigned char *noise,
; unsigned char blackclamp[16],
; unsigned char whiteclamp[16],
; unsigned char bothclamp[16],
; unsigned int Width, unsigned int Height, int Pitch)
global sym(vp8_plane_add_noise_wmt) PRIVATE
sym(vp8_plane_add_noise_wmt):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 8
GET_GOT rbx
push rsi
push rdi
; end prolog
.addnoise_loop:
call sym(LIBVPX_RAND) WRT_PLT
mov rcx, arg(1) ;noise
and rax, 0xff
add rcx, rax
; we rely on the fact that the clamping vectors are stored contiguously
; in black/white/both order. Note that we have to reload this here because
; rdx could be trashed by rand()
mov rdx, arg(2) ; blackclamp
mov rdi, rcx
movsxd rcx, dword arg(5) ;[Width]
mov rsi, arg(0) ;Pos
xor rax,rax
.addnoise_nextset:
movdqu xmm1,[rsi+rax] ; get the source
psubusb xmm1, [rdx] ;blackclamp ; clamp both sides so we don't outrange adding noise
paddusb xmm1, [rdx+32] ;bothclamp
psubusb xmm1, [rdx+16] ;whiteclamp
movdqu xmm2,[rdi+rax] ; get the noise for this line
paddb xmm1,xmm2 ; add it in
movdqu [rsi+rax],xmm1 ; store the result
add rax,16 ; move to the next line
cmp rax, rcx
jl .addnoise_nextset
movsxd rax, dword arg(7) ; Pitch
add arg(0), rax ; Start += Pitch
sub dword arg(6), 1 ; Height -= 1
jg .addnoise_loop
; begin epilog
pop rdi
pop rsi
RESTORE_GOT
UNSHADOW_ARGS
pop rbp
ret
SECTION_RODATA
align 16
four8s:
......
......@@ -12,6 +12,7 @@
#include <stdlib.h>
#include <stdio.h>
#include "./vpx_dsp_rtcd.h"
#include "./vpx_config.h"
#include "./vpx_scale_rtcd.h"
#include "./vp9_rtcd.h"
......@@ -587,32 +588,6 @@ static void fillrd(struct postproc_state *state, int q, int a) {
state->last_noise = a;
}
void vp9_plane_add_noise_c(uint8_t *start, char *noise,
char blackclamp[16],
char whiteclamp[16],
char bothclamp[16],
unsigned int width, unsigned int height, int pitch) {
unsigned int i, j;
// TODO(jbb): why does simd code use both but c doesn't, normalize and
// fix..
(void) bothclamp;
for (i = 0; i < height; i++) {
uint8_t *pos = start + i * pitch;
char *ref = (char *)(noise + (rand() & 0xff)); // NOLINT
for (j = 0; j < width; j++) {
if (pos[j] < blackclamp[0])
pos[j] = blackclamp[0];
if (pos[j] > 255 + whiteclamp[0])
pos[j] = 255 + whiteclamp[0];
pos[j] += ref[j];
}
}
}
static void swap_mi_and_prev_mi(VP9_COMMON *cm) {
// Current mip will be the prev_mip for the next frame.
MODE_INFO *temp = cm->postproc_state.prev_mip;
......@@ -726,8 +701,7 @@ int vp9_post_proc_frame(struct VP9Common *cm,
ppstate->last_noise != noise_level) {
fillrd(ppstate, 63 - q, noise_level);
}
vp9_plane_add_noise(ppbuf->y_buffer, ppstate->noise, ppstate->blackclamp,
vpx_plane_add_noise(ppbuf->y_buffer, ppstate->noise, ppstate->blackclamp,
ppstate->whiteclamp, ppstate->bothclamp,
ppbuf->y_width, ppbuf->y_height, ppbuf->y_stride);
}
......
......@@ -70,10 +70,6 @@ add_proto qw/void vp9_post_proc_down_and_across/, "const uint8_t *src_ptr, uint8
specialize qw/vp9_post_proc_down_and_across sse2/;
$vp9_post_proc_down_and_across_sse2=vp9_post_proc_down_and_across_xmm;
add_proto qw/void vp9_plane_add_noise/, "uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch";
specialize qw/vp9_plane_add_noise sse2/;
$vp9_plane_add_noise_sse2=vp9_plane_add_noise_wmt;
add_proto qw/void vp9_filter_by_weight16x16/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int src_weight";
specialize qw/vp9_filter_by_weight16x16 sse2 msa/;
......@@ -169,9 +165,6 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
add_proto qw/void vp9_highbd_post_proc_down_and_across/, "const uint16_t *src_ptr, uint16_t *dst_ptr, int src_pixels_per_line, int dst_pixels_per_line, int rows, int cols, int flimit";
specialize qw/vp9_highbd_post_proc_down_and_across/;
add_proto qw/void vp9_highbd_plane_add_noise/, "uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch";
specialize qw/vp9_highbd_plane_add_noise/;
}
#
......
......@@ -624,68 +624,6 @@ sym(vp9_mbpost_proc_across_ip_xmm):
%undef flimit4
;void vp9_plane_add_noise_wmt (unsigned char *start, unsigned char *noise,
; unsigned char blackclamp[16],
; unsigned char whiteclamp[16],
; unsigned char bothclamp[16],
; unsigned int width, unsigned int height, int pitch)
global sym(vp9_plane_add_noise_wmt) PRIVATE
sym(vp9_plane_add_noise_wmt):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 8
GET_GOT rbx
push rsi
push rdi
; end prolog
.addnoise_loop:
call sym(LIBVPX_RAND) WRT_PLT
mov rcx, arg(1) ;noise
and rax, 0xff
add rcx, rax
; we rely on the fact that the clamping vectors are stored contiguously
; in black/white/both order. Note that we have to reload this here because
; rdx could be trashed by rand()
mov rdx, arg(2) ; blackclamp
mov rdi, rcx
movsxd rcx, dword arg(5) ;[Width]
mov rsi, arg(0) ;Pos
xor rax,rax
.addnoise_nextset:
movdqu xmm1,[rsi+rax] ; get the source
psubusb xmm1, [rdx] ;blackclamp ; clamp both sides so we don't outrange adding noise
paddusb xmm1, [rdx+32] ;bothclamp
psubusb xmm1, [rdx+16] ;whiteclamp
movdqu xmm2,[rdi+rax] ; get the noise for this line
paddb xmm1,xmm2 ; add it in
movdqu [rsi+rax],xmm1 ; store the result
add rax,16 ; move to the next line
cmp rax, rcx
jl .addnoise_nextset
movsxd rax, dword arg(7) ; Pitch
add arg(0), rax ; Start += Pitch
sub dword arg(6), 1 ; Height -= 1
jg .addnoise_loop
; begin epilog
pop rdi
pop rsi
RESTORE_GOT
UNSHADOW_ARGS
pop rbp
ret
SECTION_RODATA
align 16
rd42:
......
/*
* Copyright (c) 2015 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <stdlib.h>
#include "./macros_msa.h"
void vpx_plane_add_noise_msa(uint8_t *start_ptr, char *noise,
char blackclamp[16], char whiteclamp[16],
char bothclamp[16], uint32_t width,
uint32_t height, int32_t pitch) {
uint32_t i, j;
for (i = 0; i < height / 2; ++i) {
uint8_t *pos0_ptr = start_ptr + (2 * i) * pitch;
int8_t *ref0_ptr = (int8_t *)(noise + (rand() & 0xff));
uint8_t *pos1_ptr = start_ptr + (2 * i + 1) * pitch;
int8_t *ref1_ptr = (int8_t *)(noise + (rand() & 0xff));
for (j = width / 16; j--;) {
v16i8 temp00_s, temp01_s;
v16u8 temp00, temp01, black_clamp, white_clamp;
v16u8 pos0, ref0, pos1, ref1;
v16i8 const127 = __msa_ldi_b(127);
pos0 = LD_UB(pos0_ptr);
ref0 = LD_UB(ref0_ptr);
pos1 = LD_UB(pos1_ptr);
ref1 = LD_UB(ref1_ptr);
black_clamp = (v16u8)__msa_fill_b(blackclamp[0]);
white_clamp = (v16u8)__msa_fill_b(whiteclamp[0]);
temp00 = (pos0 < black_clamp);
pos0 = __msa_bmnz_v(pos0, black_clamp, temp00);
temp01 = (pos1 < black_clamp);
pos1 = __msa_bmnz_v(pos1, black_clamp, temp01);
XORI_B2_128_UB(pos0, pos1);
temp00_s = __msa_adds_s_b((v16i8)white_clamp, const127);
temp00 = (v16u8)(temp00_s < pos0);
pos0 = (v16u8)__msa_bmnz_v((v16u8)pos0, (v16u8)temp00_s, temp00);
temp01_s = __msa_adds_s_b((v16i8)white_clamp, const127);
temp01 = (temp01_s < pos1);
pos1 = (v16u8)__msa_bmnz_v((v16u8)pos1, (v16u8)temp01_s, temp01);
XORI_B2_128_UB(pos0, pos1);
pos0 += ref0;
ST_UB(pos0, pos0_ptr);
pos1 += ref1;
ST_UB(pos1, pos1_ptr);
pos0_ptr += 16;
pos1_ptr += 16;
ref0_ptr += 16;
ref1_ptr += 16;
}
}
}
/*
* Copyright (c) 2015 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found