diff --git a/vp8/common/postproc.c b/vp8/common/postproc.c index eb17a7ca6aa7bd66ccbd4b957f90cd1b90e78874..b65767566feaaaf3a5901b571c23fa830a7cc86f 100644 --- a/vp8/common/postproc.c +++ b/vp8/common/postproc.c @@ -178,6 +178,12 @@ void vp8_post_proc_down_and_across_c p_src = dst_ptr; p_dst = dst_ptr; + for (i = -8; i<0; i++) + p_src[i]=p_src[0]; + + for (i = cols; i<cols+8; i++) + p_src[i]=p_src[cols-1]; + for (i = 0; i < 8; i++) d[i] = p_src[i]; @@ -228,12 +234,19 @@ void vp8_mbpost_proc_across_ip_c(unsigned char *src, int pitch, int rows, int co unsigned char *s = src; unsigned char d[16]; - for (r = 0; r < rows; r++) { int sumsq = 0; int sum = 0; + for (i = -8; i<0; i++) + s[i]=s[0]; + + // 17 avoids valgrind warning - we buffer values in c in d + // and only write them when we've read 8 ahead... + for (i = cols; i<cols+17; i++) + s[i]=s[cols-1]; + for (i = -8; i <= 6; i++) { sumsq += s[i] * s[i]; @@ -272,7 +285,7 @@ void vp8_mbpost_proc_down_c(unsigned char *dst, int pitch, int rows, int cols, i int r, c, i; const short *rv3 = &vp8_rv[63&rand()]; - for (c = 0; c < cols; c++) + for (c = 0; c < cols; c++ ) { unsigned char *s = &dst[c]; int sumsq = 0; @@ -280,6 +293,14 @@ void vp8_mbpost_proc_down_c(unsigned char *dst, int pitch, int rows, int cols, i unsigned char d[16]; const short *rv2 = rv3 + ((c * 17) & 127); + for (i = -8; i < 0; i++) + s[i*pitch]=s[0]; + + // 17 avoids valgrind warning - we buffer values in c in d + // and only write them when we've read 8 ahead... + for (i = rows; i < rows+17; i++) + s[i*pitch]=s[(rows-1)*pitch]; + for (i = -8; i <= 6; i++) { sumsq += s[i*pitch] * s[i*pitch]; @@ -320,6 +341,7 @@ static void vp8_deblock_and_de_macro_block(YV12_BUFFER_CONFIG *source, POSTPROC_INVOKE(rtcd, across)(post->y_buffer, post->y_stride, post->y_height, post->y_width, q2mbl(q)); POSTPROC_INVOKE(rtcd, down)(post->y_buffer, post->y_stride, post->y_height, post->y_width, q2mbl(q)); + POSTPROC_INVOKE(rtcd, downacross)(source->u_buffer, post->u_buffer, source->uv_stride, post->uv_stride, source->uv_height, source->uv_width, ppl); POSTPROC_INVOKE(rtcd, downacross)(source->v_buffer, post->v_buffer, source->uv_stride, post->uv_stride, source->uv_height, source->uv_width, ppl); @@ -935,6 +957,10 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t { oci->post_proc_buffer_int_used = 1; } + // insure that postproc is set to all 0's so that post proc + // doesn't pull random data in from edge + vpx_memset((&oci->post_proc_buffer_int)->buffer_alloc,126,(&oci->post_proc_buffer)->frame_size); + } } diff --git a/vp8/common/x86/postproc_mmx.asm b/vp8/common/x86/postproc_mmx.asm index 81122181f20edff51e650520d9c093b861c27f27..d24f74087b7e9a339e07ef2dfdb82b33e6836769 100644 --- a/vp8/common/x86/postproc_mmx.asm +++ b/vp8/common/x86/postproc_mmx.asm @@ -151,6 +151,23 @@ sym(vp8_post_proc_down_and_across_mmx): sub rsi, rdx sub rdi, rdx + ; dup the first byte into the left border 8 times + movq mm1, [rdi] + punpcklbw mm1, mm1 + punpcklwd mm1, mm1 + punpckldq mm1, mm1 + + mov rdx, -8 + movq [rdi+rdx], mm1 + + ; dup the last byte into the right border + movsxd rdx, dword arg(5) + movq mm1, [rdi + rdx + -1] + punpcklbw mm1, mm1 + punpcklwd mm1, mm1 + punpckldq mm1, mm1 + movq [rdi+rdx], mm1 + push rax xor rdx, rdx @@ -298,8 +315,36 @@ sym(vp8_mbpost_proc_down_mmx): pxor mm0, mm0 ; movsxd rax, dword ptr arg(1) ;pitch ; + + ; this copies the last row down into the border 8 rows + mov rdi, rsi + mov rdx, arg(2) + sub rdx, 9 + imul rdx, rax + lea rdi, [rdi+rdx] + movq mm1, QWORD ptr[rdi] ; first row + mov rcx, 8 +.init_borderd ; initialize borders + lea rdi, [rdi + rax] + movq [rdi], xmm1 + + dec rcx + jne .init_borderd + neg rax ; rax = -pitch + ; this copies the first row up into the border 8 rows + mov rdi, rsi + movq mm1, QWORD ptr[rdi] ; first row + mov rcx, 8 +.init_border ; initialize borders + lea rdi, [rdi + rax] + movq [rdi], mm1 + + dec rcx + jne .init_border + + lea rsi, [rsi + rax*8]; ; rdi = s[-pitch*8] neg rax diff --git a/vp8/common/x86/postproc_sse2.asm b/vp8/common/x86/postproc_sse2.asm index 1f219ca87817135f938958b7d438649e6c123153..966aafd26a03ec0a79fa18726610a78d951474e9 100644 --- a/vp8/common/x86/postproc_sse2.asm +++ b/vp8/common/x86/postproc_sse2.asm @@ -139,6 +139,24 @@ sym(vp8_post_proc_down_and_across_xmm): sub rsi, rdx sub rdi, rdx + + ; dup the first byte into the left border 8 times + movq mm1, [rdi] + punpcklbw mm1, mm1 + punpcklwd mm1, mm1 + punpckldq mm1, mm1 + + mov rdx, -8 + movq [rdi+rdx], mm1 + + ; dup the last byte into the right border + movsxd rdx, dword arg(5) + movq mm1, [rdi + rdx + -1] + punpcklbw mm1, mm1 + punpcklwd mm1, mm1 + punpckldq mm1, mm1 + movq [rdi+rdx], mm1 + xor rdx, rdx movq mm0, QWORD PTR [rdi-8]; @@ -287,12 +305,40 @@ sym(vp8_mbpost_proc_down_xmm): pxor xmm0, xmm0 ; movsxd rax, dword ptr arg(1) ;pitch ; + + ; this copies the last row down into the border 8 rows + mov rdi, rsi + mov rdx, arg(2) + sub rdx, 9 + imul rdx, rax + lea rdi, [rdi+rdx] + movq xmm1, QWORD ptr[rdi] ; first row + mov rcx, 8 +.init_borderd ; initialize borders + lea rdi, [rdi + rax] + movq [rdi], xmm1 + + dec rcx + jne .init_borderd + neg rax ; rax = -pitch + ; this copies the first row up into the border 8 rows + mov rdi, rsi + movq xmm1, QWORD ptr[rdi] ; first row + mov rcx, 8 +.init_border ; initialize borders + lea rdi, [rdi + rax] + movq [rdi], xmm1 + + dec rcx + jne .init_border + + + lea rsi, [rsi + rax*8]; ; rdi = s[-pitch*8] neg rax - pxor xmm5, xmm5 pxor xmm6, xmm6 ; @@ -480,7 +526,25 @@ sym(vp8_mbpost_proc_across_ip_xmm): xor rdx, rdx ;sumsq=0; xor rcx, rcx ;sum=0; mov rsi, arg(0); s + + + ; dup the first byte into the left border 8 times + movq mm1, [rsi] + punpcklbw mm1, mm1 + punpcklwd mm1, mm1 + punpckldq mm1, mm1 + mov rdi, -8 + movq [rsi+rdi], mm1 + + ; dup the last byte into the right border + movsxd rdx, dword arg(3) + movq mm1, [rsi + rdx + -1] + punpcklbw mm1, mm1 + punpcklwd mm1, mm1 + punpckldq mm1, mm1 + movq [rsi+rdx], mm1 + .ip_var_loop: ;for(i=-8;i<=6;i++) ;{