diff --git a/vp10/common/clpf.c b/vp10/common/clpf.c new file mode 100644 index 0000000000000000000000000000000000000000..b5b41ffdfa92959f352f883e0540f8e185836e0b --- /dev/null +++ b/vp10/common/clpf.c @@ -0,0 +1,99 @@ +/* +Copyright (c) 2016 Cisco Systems +(Replace with proper AOM header) +*/ + +#include "vp10/common/clpf.h" + +// Apply the filter on a single block +static void clpf_block(const uint8_t *src, uint8_t *dst, int sstride, + int dstride, int has_top, int has_left, int has_bottom, + int has_right, int width, int height) { + int x, y; + + for (y = 0; y < height; y++) { + for (x = 0; x < width; x++) { + int X = src[(y + 0) * sstride + x + 0]; + int A = has_top ? src[(y - 1) * sstride + x + 0] : X; + int B = has_left ? src[(y + 0) * sstride + x - 1] : X; + int C = has_right ? src[(y + 0) * sstride + x + 1] : X; + int D = has_bottom ? src[(y + 1) * sstride + x + 0] : X; + int delta = ((A > X) + (B > X) + (C > X) + (D > X) > 2) - + ((A < X) + (B < X) + (C < X) + (D < X) > 2); + dst[y * dstride + x] = X + delta; + } + } +} + +#define BS MI_SIZE *MI_BLOCK_SIZE + +// Iterate over blocks within a superblock +static void vp10_clpf_sb(const YV12_BUFFER_CONFIG *frame_buffer, + const VP10_COMMON *cm, MACROBLOCKD *xd, + MODE_INFO *const *mi_8x8, int xpos, int ypos) { + // Temporary buffer (to allow SIMD parallelism) + uint8_t buf_unaligned[BS * BS + 15]; + uint8_t *buf = (uint8_t *)(((intptr_t)buf_unaligned + 15) & ~15); + int x, y, p; + + for (p = 0; p < (CLPF_FILTER_ALL_PLANES ? MAX_MB_PLANE : 1); p++) { + for (y = 0; y < MI_BLOCK_SIZE && ypos + y < cm->mi_rows; y++) { + for (x = 0; x < MI_BLOCK_SIZE && xpos + x < cm->mi_cols; x++) { + const MB_MODE_INFO *mbmi = + &mi_8x8[(ypos + y) * cm->mi_stride + xpos + x]->mbmi; + + // Do not filter if there is no residual + if (!mbmi->skip) { + // Do not filter frame edges + int has_top = ypos + y > 0; + int has_left = xpos + x > 0; + int has_bottom = ypos + y < cm->mi_rows - 1; + int has_right = xpos + x < cm->mi_cols - 1; +#if CLPF_ALLOW_BLOCK_PARALLELISM + // Do not filter superblock edges + has_top &= !!y; + has_left &= !!x; + has_bottom &= y != MI_BLOCK_SIZE - 1; + has_right &= x != MI_BLOCK_SIZE - 1; +#endif + vp10_setup_dst_planes(xd->plane, frame_buffer, ypos + y, xpos + x); + clpf_block( + xd->plane[p].dst.buf, CLPF_ALLOW_PIXEL_PARALLELISM + ? buf + y * MI_SIZE * BS + x * MI_SIZE + : xd->plane[p].dst.buf, + xd->plane[p].dst.stride, + CLPF_ALLOW_PIXEL_PARALLELISM ? BS : xd->plane[p].dst.stride, + has_top, has_left, has_bottom, has_right, + MI_SIZE >> xd->plane[p].subsampling_x, + MI_SIZE >> xd->plane[p].subsampling_y); + } + } + } +#if CLPF_ALLOW_PIXEL_PARALLELISM + for (y = 0; y < MI_BLOCK_SIZE && ypos + y < cm->mi_rows; y++) { + for (x = 0; x < MI_BLOCK_SIZE && xpos + x < cm->mi_cols; x++) { + const MB_MODE_INFO *mbmi = + &mi_8x8[(ypos + y) * cm->mi_stride + xpos + x]->mbmi; + vp10_setup_dst_planes(xd->plane, frame_buffer, ypos + y, xpos + x); + if (!mbmi->skip) { + int i = 0; + for (i = 0; i<MI_SIZE>> xd->plane[p].subsampling_y; i++) + memcpy(xd->plane[p].dst.buf + i * xd->plane[p].dst.stride, + buf + (y * MI_SIZE + i) * BS + x * MI_SIZE, + MI_SIZE >> xd->plane[p].subsampling_x); + } + } + } +#endif + } +} + +// Iterate over the superblocks of an entire frame +void vp10_clpf_frame(const YV12_BUFFER_CONFIG *frame, const VP10_COMMON *cm, + MACROBLOCKD *xd) { + int x, y; + + for (y = 0; y < cm->mi_rows; y += MI_BLOCK_SIZE) + for (x = 0; x < cm->mi_cols; x += MI_BLOCK_SIZE) + vp10_clpf_sb(frame, cm, xd, cm->mi_grid_visible, x, y); +} diff --git a/vp10/common/clpf.h b/vp10/common/clpf.h new file mode 100644 index 0000000000000000000000000000000000000000..1776a3d0fc24b0b0625f7b2aa764452230353fe3 --- /dev/null +++ b/vp10/common/clpf.h @@ -0,0 +1,22 @@ +/* +Copyright (c) 2016, Cisco Systems +(Replace with proper AOM header) +*/ + +#ifndef VP10_COMMON_CLPF_H_ +#define VP10_COMMON_CLPF_H_ + +#include "vp10/common/reconinter.h" + +// Configuration +#define CLPF_ALLOW_PIXEL_PARALLELISM \ + 1 // 1 = SIMD friendly (adds a buffer requirement) +#define CLPF_ALLOW_BLOCK_PARALLELISM \ + 0 // 1 = MT friendly (degrades quality slighty) +#define CLPF_FILTER_ALL_PLANES \ + 0 // 1 = filter both luma and chroma, 0 = filter only luma + +void vp10_clpf_frame(const YV12_BUFFER_CONFIG *frame, const VP10_COMMON *cm, + MACROBLOCKD *xd); + +#endif