Commit d0000208 authored by Urvang Joshi's avatar Urvang Joshi
Browse files

Palette code cleanup:

- Avoid some memcpy()s
- Remove indices array
- Make pre_indices array local
- Avoid rounding twice
- Other small simplifications

Change-Id: Iac3236daaad04f21f54054cdd9504de13b942a07
parent b869de98
...@@ -68,8 +68,6 @@ typedef struct { ...@@ -68,8 +68,6 @@ typedef struct {
typedef struct { typedef struct {
uint8_t best_palette_color_map[MAX_SB_SQUARE]; uint8_t best_palette_color_map[MAX_SB_SQUARE];
float kmeans_data_buf[2 * MAX_SB_SQUARE]; float kmeans_data_buf[2 * MAX_SB_SQUARE];
uint8_t kmeans_indices_buf[MAX_SB_SQUARE];
uint8_t kmeans_pre_indices_buf[MAX_SB_SQUARE];
} PALETTE_BUFFER; } PALETTE_BUFFER;
typedef struct macroblock MACROBLOCK; typedef struct macroblock MACROBLOCK;
......
...@@ -13,10 +13,9 @@ ...@@ -13,10 +13,9 @@
static float calc_dist(const float *p1, const float *p2, int dim) { static float calc_dist(const float *p1, const float *p2, int dim) {
float dist = 0; float dist = 0;
int i = 0; int i;
for (i = 0; i < dim; ++i) { for (i = 0; i < dim; ++i) {
float diff = p1[i] - roundf(p2[i]); const float diff = p1[i] - roundf(p2[i]);
dist += diff * diff; dist += diff * diff;
} }
return dist; return dist;
...@@ -25,13 +24,12 @@ static float calc_dist(const float *p1, const float *p2, int dim) { ...@@ -25,13 +24,12 @@ static float calc_dist(const float *p1, const float *p2, int dim) {
void vp10_calc_indices(const float *data, const float *centroids, void vp10_calc_indices(const float *data, const float *centroids,
uint8_t *indices, int n, int k, int dim) { uint8_t *indices, int n, int k, int dim) {
int i, j; int i, j;
float min_dist, this_dist;
for (i = 0; i < n; ++i) { for (i = 0; i < n; ++i) {
min_dist = calc_dist(data + i * dim, centroids, dim); float min_dist = calc_dist(data + i * dim, centroids, dim);
indices[i] = 0; indices[i] = 0;
for (j = 1; j < k; ++j) { for (j = 1; j < k; ++j) {
this_dist = calc_dist(data + i * dim, centroids + j * dim, dim); const float this_dist =
calc_dist(data + i * dim, centroids + j * dim, dim);
if (this_dist < min_dist) { if (this_dist < min_dist) {
min_dist = this_dist; min_dist = this_dist;
indices[i] = j; indices[i] = j;
...@@ -90,17 +88,21 @@ static float calc_total_dist(const float *data, const float *centroids, ...@@ -90,17 +88,21 @@ static float calc_total_dist(const float *data, const float *centroids,
return dist; return dist;
} }
int vp10_k_means(const float *data, float *centroids, uint8_t *indices, void vp10_k_means(const float *data, float *centroids, uint8_t *indices, int n,
uint8_t *pre_indices, int n, int k, int dim, int max_itr) { int k, int dim, int max_itr) {
int i = 0; int i;
float pre_dist, this_dist; float this_dist;
float pre_centroids[2 * PALETTE_MAX_SIZE]; float pre_centroids[2 * PALETTE_MAX_SIZE];
uint8_t pre_indices[MAX_SB_SQUARE];
vp10_calc_indices(data, centroids, indices, n, k, dim); vp10_calc_indices(data, centroids, indices, n, k, dim);
pre_dist = calc_total_dist(data, centroids, indices, n, k, dim); this_dist = calc_total_dist(data, centroids, indices, n, k, dim);
memcpy(pre_centroids, centroids, sizeof(pre_centroids[0]) * k * dim);
memcpy(pre_indices, indices, sizeof(pre_indices[0]) * n); for (i = 0; i < max_itr; ++i) {
while (i < max_itr) { const float pre_dist = this_dist;
memcpy(pre_centroids, centroids, sizeof(pre_centroids[0]) * k * dim);
memcpy(pre_indices, indices, sizeof(pre_indices[0]) * n);
calc_centroids(data, centroids, indices, n, k, dim); calc_centroids(data, centroids, indices, n, k, dim);
vp10_calc_indices(data, centroids, indices, n, k, dim); vp10_calc_indices(data, centroids, indices, n, k, dim);
this_dist = calc_total_dist(data, centroids, indices, n, k, dim); this_dist = calc_total_dist(data, centroids, indices, n, k, dim);
...@@ -112,14 +114,7 @@ int vp10_k_means(const float *data, float *centroids, uint8_t *indices, ...@@ -112,14 +114,7 @@ int vp10_k_means(const float *data, float *centroids, uint8_t *indices,
} }
if (!memcmp(centroids, pre_centroids, sizeof(pre_centroids[0]) * k * dim)) if (!memcmp(centroids, pre_centroids, sizeof(pre_centroids[0]) * k * dim))
break; break;
memcpy(pre_centroids, centroids, sizeof(pre_centroids[0]) * k * dim);
memcpy(pre_indices, indices, sizeof(pre_indices[0]) * n);
pre_dist = this_dist;
++i;
} }
return i;
} }
void vp10_insertion_sort(float *data, int n) { void vp10_insertion_sort(float *data, int n) {
......
...@@ -20,8 +20,8 @@ extern "C" { ...@@ -20,8 +20,8 @@ extern "C" {
void vp10_insertion_sort(float *data, int n); void vp10_insertion_sort(float *data, int n);
void vp10_calc_indices(const float *data, const float *centroids, void vp10_calc_indices(const float *data, const float *centroids,
uint8_t *indices, int n, int k, int dim); uint8_t *indices, int n, int k, int dim);
int vp10_k_means(const float *data, float *centroids, uint8_t *indices, void vp10_k_means(const float *data, float *centroids, uint8_t *indices, int n,
uint8_t *pre_indices, int n, int k, int dim, int max_itr); int k, int dim, int max_itr);
int vp10_count_colors(const uint8_t *src, int stride, int rows, int cols); int vp10_count_colors(const uint8_t *src, int stride, int rows, int cols);
#if CONFIG_VP9_HIGHBITDEPTH #if CONFIG_VP9_HIGHBITDEPTH
int vp10_count_colors_highbd(const uint8_t *src8, int stride, int rows, int vp10_count_colors_highbd(const uint8_t *src8, int stride, int rows,
......
...@@ -1859,8 +1859,6 @@ static int rd_pick_palette_intra_sby(VP10_COMP *cpi, MACROBLOCK *x, ...@@ -1859,8 +1859,6 @@ static int rd_pick_palette_intra_sby(VP10_COMP *cpi, MACROBLOCK *x,
int color_ctx, color_idx = 0; int color_ctx, color_idx = 0;
int color_order[PALETTE_MAX_SIZE]; int color_order[PALETTE_MAX_SIZE];
float *const data = x->palette_buffer->kmeans_data_buf; float *const data = x->palette_buffer->kmeans_data_buf;
uint8_t *const indices = x->palette_buffer->kmeans_indices_buf;
uint8_t *const pre_indices = x->palette_buffer->kmeans_pre_indices_buf;
float centroids[PALETTE_MAX_SIZE]; float centroids[PALETTE_MAX_SIZE];
uint8_t *const color_map = xd->plane[0].color_index_map; uint8_t *const color_map = xd->plane[0].color_index_map;
float lb, ub, val; float lb, ub, val;
...@@ -1914,8 +1912,7 @@ static int rd_pick_palette_intra_sby(VP10_COMP *cpi, MACROBLOCK *x, ...@@ -1914,8 +1912,7 @@ static int rd_pick_palette_intra_sby(VP10_COMP *cpi, MACROBLOCK *x,
n >= 2; --n) { n >= 2; --n) {
for (i = 0; i < n; ++i) for (i = 0; i < n; ++i)
centroids[i] = lb + (2 * i + 1) * (ub - lb) / n / 2; centroids[i] = lb + (2 * i + 1) * (ub - lb) / n / 2;
vp10_k_means(data, centroids, indices, pre_indices, rows * cols, vp10_k_means(data, centroids, color_map, rows * cols, n, 1, max_itr);
n, 1, max_itr);
vp10_insertion_sort(centroids, n); vp10_insertion_sort(centroids, n);
for (i = 0; i < n; ++i) for (i = 0; i < n; ++i)
centroids[i] = roundf(centroids[i]); centroids[i] = roundf(centroids[i]);
...@@ -1932,16 +1929,15 @@ static int rd_pick_palette_intra_sby(VP10_COMP *cpi, MACROBLOCK *x, ...@@ -1932,16 +1929,15 @@ static int rd_pick_palette_intra_sby(VP10_COMP *cpi, MACROBLOCK *x,
#if CONFIG_VP9_HIGHBITDEPTH #if CONFIG_VP9_HIGHBITDEPTH
if (cpi->common.use_highbitdepth) if (cpi->common.use_highbitdepth)
for (i = 0; i < k; ++i) for (i = 0; i < k; ++i)
pmi->palette_colors[i] = clip_pixel_highbd((int)lroundf(centroids[i]), pmi->palette_colors[i] =
cpi->common.bit_depth); clip_pixel_highbd((int)centroids[i], cpi->common.bit_depth);
else else
#endif // CONFIG_VP9_HIGHBITDEPTH #endif // CONFIG_VP9_HIGHBITDEPTH
for (i = 0; i < k; ++i) for (i = 0; i < k; ++i)
pmi->palette_colors[i] = clip_pixel((int)lroundf(centroids[i])); pmi->palette_colors[i] = clip_pixel((int)centroids[i]);
pmi->palette_size[0] = k; pmi->palette_size[0] = k;
vp10_calc_indices(data, centroids, indices, rows * cols, k, 1); vp10_calc_indices(data, centroids, color_map, rows * cols, k, 1);
memcpy(color_map, indices, rows * cols * sizeof(*color_map));
super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion, super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion,
&s, NULL, bsize, *best_rd); &s, NULL, bsize, *best_rd);
...@@ -3742,8 +3738,6 @@ static void rd_pick_palette_intra_sbuv(VP10_COMP *cpi, MACROBLOCK *x, ...@@ -3742,8 +3738,6 @@ static void rd_pick_palette_intra_sbuv(VP10_COMP *cpi, MACROBLOCK *x,
float lb_u, ub_u, val_u; float lb_u, ub_u, val_u;
float lb_v, ub_v, val_v; float lb_v, ub_v, val_v;
float *const data = x->palette_buffer->kmeans_data_buf; float *const data = x->palette_buffer->kmeans_data_buf;
uint8_t *const indices = x->palette_buffer->kmeans_indices_buf;
uint8_t *const pre_indices = x->palette_buffer->kmeans_pre_indices_buf;
float centroids[2 * PALETTE_MAX_SIZE]; float centroids[2 * PALETTE_MAX_SIZE];
uint8_t *const color_map = xd->plane[1].color_index_map; uint8_t *const color_map = xd->plane[1].color_index_map;
PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info; PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
...@@ -3805,8 +3799,7 @@ static void rd_pick_palette_intra_sbuv(VP10_COMP *cpi, MACROBLOCK *x, ...@@ -3805,8 +3799,7 @@ static void rd_pick_palette_intra_sbuv(VP10_COMP *cpi, MACROBLOCK *x,
centroids[i * 2 + 1] = centroids[i * 2 + 1] =
lb_v + (2 * i + 1) * (ub_v - lb_v) / n / 2;; lb_v + (2 * i + 1) * (ub_v - lb_v) / n / 2;;
} }
r = vp10_k_means(data, centroids, indices, pre_indices, rows * cols, n, vp10_k_means(data, centroids, color_map, rows * cols, n, 2, max_itr);
2, max_itr);
pmi->palette_size[1] = n; pmi->palette_size[1] = n;
for (i = 1; i < 3; ++i) { for (i = 1; i < 3; ++i) {
for (j = 0; j < n; ++j) { for (j = 0; j < n; ++j) {
...@@ -3821,7 +3814,6 @@ static void rd_pick_palette_intra_sbuv(VP10_COMP *cpi, MACROBLOCK *x, ...@@ -3821,7 +3814,6 @@ static void rd_pick_palette_intra_sbuv(VP10_COMP *cpi, MACROBLOCK *x,
clip_pixel((int)lroundf(centroids[j * 2 + i - 1])); clip_pixel((int)lroundf(centroids[j * 2 + i - 1]));
} }
} }
memcpy(color_map, indices, rows * cols * sizeof(*color_map));
super_block_uvrd(cpi, x, &this_rate_tokenonly, super_block_uvrd(cpi, x, &this_rate_tokenonly,
&this_distortion, &s, &this_sse, bsize, *best_rd); &this_distortion, &s, &this_sse, bsize, *best_rd);
...@@ -3853,7 +3845,7 @@ static void rd_pick_palette_intra_sbuv(VP10_COMP *cpi, MACROBLOCK *x, ...@@ -3853,7 +3845,7 @@ static void rd_pick_palette_intra_sbuv(VP10_COMP *cpi, MACROBLOCK *x,
if (this_rd < *best_rd) { if (this_rd < *best_rd) {
*best_rd = this_rd; *best_rd = this_rd;
*palette_mode_info = *pmi; *palette_mode_info = *pmi;
memcpy(best_palette_color_map, xd->plane[1].color_index_map, memcpy(best_palette_color_map, color_map,
rows * cols * sizeof(best_palette_color_map[0])); rows * cols * sizeof(best_palette_color_map[0]));
*mode_selected = DC_PRED; *mode_selected = DC_PRED;
*rate = this_rate; *rate = this_rate;
...@@ -8285,7 +8277,6 @@ static void restore_uv_color_map(VP10_COMP *cpi, MACROBLOCK *x) { ...@@ -8285,7 +8277,6 @@ static void restore_uv_color_map(VP10_COMP *cpi, MACROBLOCK *x) {
const uint8_t *const src_u = x->plane[1].src.buf; const uint8_t *const src_u = x->plane[1].src.buf;
const uint8_t *const src_v = x->plane[2].src.buf; const uint8_t *const src_v = x->plane[2].src.buf;
float *const data = x->palette_buffer->kmeans_data_buf; float *const data = x->palette_buffer->kmeans_data_buf;
uint8_t *const indices = x->palette_buffer->kmeans_indices_buf;
float centroids[2 * PALETTE_MAX_SIZE]; float centroids[2 * PALETTE_MAX_SIZE];
uint8_t *const color_map = xd->plane[1].color_index_map; uint8_t *const color_map = xd->plane[1].color_index_map;
int r, c; int r, c;
...@@ -8321,9 +8312,8 @@ static void restore_uv_color_map(VP10_COMP *cpi, MACROBLOCK *x) { ...@@ -8321,9 +8312,8 @@ static void restore_uv_color_map(VP10_COMP *cpi, MACROBLOCK *x) {
} }
} }
vp10_calc_indices(data, centroids, indices, rows * cols, vp10_calc_indices(data, centroids, color_map, rows * cols,
pmi->palette_size[1], 2); pmi->palette_size[1], 2);
memcpy(color_map, indices, rows * cols * sizeof(*color_map));
} }
#if CONFIG_EXT_INTRA #if CONFIG_EXT_INTRA
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment