Commit 5891f98f authored by Hui Su's avatar Hui Su

Palette: replace floating point numbers with integers

Performance change is within noise range.

Change-Id: I9ca2ea6dfdc629513836637283ae3f964a1dd497
parent 3b5afc1e
......@@ -61,6 +61,8 @@
#define ALIGN_POWER_OF_TWO(value, n) \
(((value) + ((1 << (n)) - 1)) & ~((1 << (n)) - 1))
#define DIVIDE_AND_ROUND(x, y) (((x) + ((y) >> 1)) / (y))
#define CONVERT_TO_SHORTPTR(x) ((uint16_t *)(((uintptr_t)(x)) << 1))
#define CONVERT_TO_BYTEPTR(x) ((uint8_t *)(((uintptr_t)(x)) >> 1))
......
......@@ -26,7 +26,6 @@ extern "C" {
#endif
#define MAX_TXFM_STAGE_NUM 12
#define DIVIDE_AND_ROUND(x, y) (((x) + ((y) >> 1)) / (y))
static const int cos_bit_min = 10;
static const int cos_bit_max = 16;
......
......@@ -117,7 +117,7 @@ typedef struct {
typedef struct {
uint8_t best_palette_color_map[MAX_PALETTE_SQUARE];
float kmeans_data_buf[2 * MAX_PALETTE_SQUARE];
int kmeans_data_buf[2 * MAX_PALETTE_SQUARE];
} PALETTE_BUFFER;
typedef struct {
......
......@@ -23,25 +23,23 @@
#define RENAME_(x, y) AV1_K_MEANS_RENAME(x, y)
#define RENAME(x) RENAME_(x, AV1_K_MEANS_DIM)
static float RENAME(calc_dist)(const float *p1, const float *p2) {
float dist = 0;
int i;
for (i = 0; i < AV1_K_MEANS_DIM; ++i) {
const float diff = p1[i] - p2[i];
static int RENAME(calc_dist)(const int *p1, const int *p2) {
int dist = 0;
for (int i = 0; i < AV1_K_MEANS_DIM; ++i) {
const int diff = p1[i] - p2[i];
dist += diff * diff;
}
return dist;
}
void RENAME(av1_calc_indices)(const float *data, const float *centroids,
void RENAME(av1_calc_indices)(const int *data, const int *centroids,
uint8_t *indices, int n, int k) {
int i, j;
for (i = 0; i < n; ++i) {
float min_dist = RENAME(calc_dist)(data + i * AV1_K_MEANS_DIM, centroids);
for (int i = 0; i < n; ++i) {
int min_dist = RENAME(calc_dist)(data + i * AV1_K_MEANS_DIM, centroids);
indices[i] = 0;
for (j = 1; j < k; ++j) {
const float this_dist = RENAME(calc_dist)(
data + i * AV1_K_MEANS_DIM, centroids + j * AV1_K_MEANS_DIM);
for (int j = 1; j < k; ++j) {
const int this_dist = RENAME(calc_dist)(data + i * AV1_K_MEANS_DIM,
centroids + j * AV1_K_MEANS_DIM);
if (this_dist < min_dist) {
min_dist = this_dist;
indices[i] = j;
......@@ -50,19 +48,16 @@ void RENAME(av1_calc_indices)(const float *data, const float *centroids,
}
}
static void RENAME(calc_centroids)(const float *data, float *centroids,
static void RENAME(calc_centroids)(const int *data, int *centroids,
const uint8_t *indices, int n, int k) {
int i, j, index;
int count[PALETTE_MAX_SIZE];
int i, j;
int count[PALETTE_MAX_SIZE] = { 0 };
unsigned int rand_state = (unsigned int)data[0];
assert(n <= 32768);
memset(count, 0, sizeof(count[0]) * k);
memset(centroids, 0, sizeof(centroids[0]) * k * AV1_K_MEANS_DIM);
for (i = 0; i < n; ++i) {
index = indices[i];
const int index = indices[i];
assert(index < k);
++count[index];
for (j = 0; j < AV1_K_MEANS_DIM; ++j) {
......@@ -76,43 +71,35 @@ static void RENAME(calc_centroids)(const float *data, float *centroids,
data + (lcg_rand16(&rand_state) % n) * AV1_K_MEANS_DIM,
sizeof(centroids[0]) * AV1_K_MEANS_DIM);
} else {
const float norm = 1.0f / count[i];
for (j = 0; j < AV1_K_MEANS_DIM; ++j)
centroids[i * AV1_K_MEANS_DIM + j] *= norm;
for (j = 0; j < AV1_K_MEANS_DIM; ++j) {
centroids[i * AV1_K_MEANS_DIM + j] =
DIVIDE_AND_ROUND(centroids[i * AV1_K_MEANS_DIM + j], count[i]);
}
}
// Round to nearest integers.
for (i = 0; i < k * AV1_K_MEANS_DIM; ++i) {
centroids[i] = roundf(centroids[i]);
}
}
static float RENAME(calc_total_dist)(const float *data, const float *centroids,
static int64_t RENAME(calc_total_dist)(const int *data, const int *centroids,
const uint8_t *indices, int n, int k) {
float dist = 0;
int i;
int64_t dist = 0;
(void)k;
for (i = 0; i < n; ++i)
for (int i = 0; i < n; ++i) {
dist += RENAME(calc_dist)(data + i * AV1_K_MEANS_DIM,
centroids + indices[i] * AV1_K_MEANS_DIM);
}
return dist;
}
void RENAME(av1_k_means)(const float *data, float *centroids, uint8_t *indices,
void RENAME(av1_k_means)(const int *data, int *centroids, uint8_t *indices,
int n, int k, int max_itr) {
int i;
float this_dist;
float pre_centroids[2 * PALETTE_MAX_SIZE];
int pre_centroids[2 * PALETTE_MAX_SIZE];
uint8_t pre_indices[MAX_SB_SQUARE];
RENAME(av1_calc_indices)(data, centroids, indices, n, k);
this_dist = RENAME(calc_total_dist)(data, centroids, indices, n, k);
int64_t this_dist = RENAME(calc_total_dist)(data, centroids, indices, n, k);
for (i = 0; i < max_itr; ++i) {
const float pre_dist = this_dist;
for (int i = 0; i < max_itr; ++i) {
const int64_t pre_dist = this_dist;
memcpy(pre_centroids, centroids,
sizeof(pre_centroids[0]) * k * AV1_K_MEANS_DIM);
memcpy(pre_indices, indices, sizeof(pre_indices[0]) * n);
......@@ -132,6 +119,5 @@ void RENAME(av1_k_means)(const float *data, float *centroids, uint8_t *indices,
break;
}
}
#undef RENAME_
#undef RENAME
......@@ -23,16 +23,14 @@
#include "av1/encoder/k_means_template.h"
#undef AV1_K_MEANS_DIM
static int float_comparer(const void *a, const void *b) {
const float fa = *(const float *)a;
const float fb = *(const float *)b;
return (fa > fb) - (fa < fb);
static int int_comparer(const void *a, const void *b) {
return (*(int *)a - *(int *)b);
}
int av1_remove_duplicates(float *centroids, int num_centroids) {
int av1_remove_duplicates(int *centroids, int num_centroids) {
int num_unique; // number of unique centroids
int i;
qsort(centroids, num_centroids, sizeof(*centroids), float_comparer);
qsort(centroids, num_centroids, sizeof(*centroids), int_comparer);
// Remove duplicates.
num_unique = 1;
for (i = 1; i < num_centroids; ++i) {
......
......@@ -20,22 +20,22 @@ extern "C" {
#define AV1_K_MEANS_RENAME(func, dim) func##_dim##dim
void AV1_K_MEANS_RENAME(av1_calc_indices, 1)(const float *data,
const float *centroids,
void AV1_K_MEANS_RENAME(av1_calc_indices, 1)(const int *data,
const int *centroids,
uint8_t *indices, int n, int k);
void AV1_K_MEANS_RENAME(av1_calc_indices, 2)(const float *data,
const float *centroids,
void AV1_K_MEANS_RENAME(av1_calc_indices, 2)(const int *data,
const int *centroids,
uint8_t *indices, int n, int k);
void AV1_K_MEANS_RENAME(av1_k_means, 1)(const float *data, float *centroids,
void AV1_K_MEANS_RENAME(av1_k_means, 1)(const int *data, int *centroids,
uint8_t *indices, int n, int k,
int max_itr);
void AV1_K_MEANS_RENAME(av1_k_means, 2)(const float *data, float *centroids,
void AV1_K_MEANS_RENAME(av1_k_means, 2)(const int *data, int *centroids,
uint8_t *indices, int n, int k,
int max_itr);
// Given 'n' 'data' points and 'k' 'centroids' each of dimension 'dim',
// calculate the centroid 'indices' for the data points.
static INLINE void av1_calc_indices(const float *data, const float *centroids,
static INLINE void av1_calc_indices(const int *data, const int *centroids,
uint8_t *indices, int n, int k, int dim) {
if (dim == 1) {
AV1_K_MEANS_RENAME(av1_calc_indices, 1)(data, centroids, indices, n, k);
......@@ -50,7 +50,7 @@ static INLINE void av1_calc_indices(const float *data, const float *centroids,
// dimension 'dim', runs up to 'max_itr' iterations of k-means algorithm to get
// updated 'centroids' and the centroid 'indices' for elements in 'data'.
// Note: the output centroids are rounded off to nearest integers.
static INLINE void av1_k_means(const float *data, float *centroids,
static INLINE void av1_k_means(const int *data, int *centroids,
uint8_t *indices, int n, int k, int dim,
int max_itr) {
if (dim == 1) {
......@@ -66,7 +66,7 @@ static INLINE void av1_k_means(const float *data, float *centroids,
// puts these unique centroids in first 'k' indices of 'centroids' array.
// Ideally, the centroids should be rounded to integers before calling this
// method.
int av1_remove_duplicates(float *centroids, int num_centroids);
int av1_remove_duplicates(int *centroids, int num_centroids);
// Given a color cache and a set of base colors, find if each cache color is
// present in the base colors, record the binary results in "cache_color_found".
......
......@@ -2797,20 +2797,19 @@ static void extend_palette_color_map(uint8_t *const color_map, int orig_width,
// Bias toward using colors in the cache.
// TODO(huisu): Try other schemes to improve compression.
static void optimize_palette_colors(uint16_t *color_cache, int n_cache,
int n_colors, int stride,
float *centroids) {
int n_colors, int stride, int *centroids) {
if (n_cache <= 0) return;
for (int i = 0; i < n_colors * stride; i += stride) {
float min_diff = fabsf(centroids[i] - color_cache[0]);
int min_diff = abs(centroids[i] - (int)color_cache[0]);
int idx = 0;
for (int j = 1; j < n_cache; ++j) {
float this_diff = fabsf(centroids[i] - color_cache[j]);
const int this_diff = abs(centroids[i] - color_cache[j]);
if (this_diff < min_diff) {
min_diff = this_diff;
idx = j;
}
}
if (min_diff < 1.5) centroids[i] = color_cache[idx];
if (min_diff <= 1) centroids[i] = color_cache[idx];
}
}
......@@ -2818,19 +2817,13 @@ static void optimize_palette_colors(uint16_t *color_cache, int n_cache,
// of palette mode.
static void palette_rd_y(const AV1_COMP *const cpi, MACROBLOCK *x,
MB_MODE_INFO *mbmi, BLOCK_SIZE bsize, int palette_ctx,
int dc_mode_cost, const float *data, float *centroids,
int dc_mode_cost, const int *data, int *centroids,
int n, uint16_t *color_cache, int n_cache,
MB_MODE_INFO *best_mbmi,
uint8_t *best_palette_color_map, int64_t *best_rd,
int64_t *best_model_rd, int *rate, int *rate_tokenonly,
int *rate_overhead, int64_t *distortion,
int *skippable) {
aom_clear_system_state();
#ifndef NDEBUG
for (int i = 0; i < n; ++i) {
assert(!isnan(centroids[i]));
}
#endif // NDEBUG
optimize_palette_colors(color_cache, n_cache, n, 1, centroids);
int k = av1_remove_duplicates(centroids, n);
if (k < PALETTE_MIN_SIZE) {
......@@ -2847,7 +2840,7 @@ static void palette_rd_y(const AV1_COMP *const cpi, MACROBLOCK *x,
else
#endif // CONFIG_HIGHBITDEPTH
for (int i = 0; i < k; ++i)
pmi->palette_colors[i] = clip_pixel((int)centroids[i]);
pmi->palette_colors[i] = clip_pixel(centroids[i]);
pmi->palette_size[0] = k;
MACROBLOCKD *const xd = &x->e_mbd;
uint8_t *const color_map = xd->plane[0].color_index_map;
......@@ -2928,12 +2921,11 @@ static int rd_pick_palette_intra_sby(const AV1_COMP *const cpi, MACROBLOCK *x,
#endif // CONFIG_FILTER_INTRA
if (colors > 1 && colors <= 64) {
aom_clear_system_state();
int r, c, i;
const int max_itr = 50;
float *const data = x->palette_buffer->kmeans_data_buf;
float centroids[PALETTE_MAX_SIZE];
float lb, ub, val;
int *const data = x->palette_buffer->kmeans_data_buf;
int centroids[PALETTE_MAX_SIZE];
int lb, ub, val;
#if CONFIG_HIGHBITDEPTH
uint16_t *src16 = CONVERT_TO_SHORTPTR(src);
if (cpi->common.use_highbitdepth)
......@@ -2998,8 +2990,7 @@ static int rd_pick_palette_intra_sby(const AV1_COMP *const cpi, MACROBLOCK *x,
// TODO(huisu@google.com): Try to avoid duplicate computation in cases
// where the dominant colors and the k-means results are similar.
for (n = AOMMIN(colors, PALETTE_MAX_SIZE); n >= 2; --n) {
aom_clear_system_state();
for (i = 0; i < n; ++i) centroids[i] = (float)(top_colors[i]);
for (i = 0; i < n; ++i) centroids[i] = top_colors[i];
palette_rd_y(cpi, x, mbmi, bsize, palette_ctx, dc_mode_cost, data,
centroids, n, color_cache, n_cache, best_mbmi,
best_palette_color_map, best_rd, best_model_rd, rate,
......@@ -3008,7 +2999,6 @@ static int rd_pick_palette_intra_sby(const AV1_COMP *const cpi, MACROBLOCK *x,
// K-means clustering.
for (n = AOMMIN(colors, PALETTE_MAX_SIZE); n >= 2; --n) {
aom_clear_system_state();
if (colors == PALETTE_MIN_SIZE) {
// Special case: These colors automatically become the centroids.
assert(colors == n);
......@@ -5174,13 +5164,12 @@ static void rd_pick_palette_intra_sbuv(const AV1_COMP *const cpi, MACROBLOCK *x,
colors = colors_u > colors_v ? colors_u : colors_v;
if (colors > 1 && colors <= 64) {
aom_clear_system_state();
int r, c, n, i, j;
const int max_itr = 50;
float lb_u, ub_u, val_u;
float lb_v, ub_v, val_v;
float *const data = x->palette_buffer->kmeans_data_buf;
float centroids[2 * PALETTE_MAX_SIZE];
int lb_u, ub_u, val_u;
int lb_v, ub_v, val_v;
int *const data = x->palette_buffer->kmeans_data_buf;
int centroids[2 * PALETTE_MAX_SIZE];
#if CONFIG_HIGHBITDEPTH
uint16_t *src_u16 = CONVERT_TO_SHORTPTR(src_u);
......@@ -5230,26 +5219,20 @@ static void rd_pick_palette_intra_sbuv(const AV1_COMP *const cpi, MACROBLOCK *x,
for (n = colors > PALETTE_MAX_SIZE ? PALETTE_MAX_SIZE : colors; n >= 2;
--n) {
aom_clear_system_state();
for (i = 0; i < n; ++i) {
centroids[i * 2] = lb_u + (2 * i + 1) * (ub_u - lb_u) / n / 2;
centroids[i * 2 + 1] = lb_v + (2 * i + 1) * (ub_v - lb_v) / n / 2;
}
av1_k_means(data, centroids, color_map, rows * cols, n, 2, max_itr);
#ifndef NDEBUG
for (i = 0; i < 2 * n; ++i) {
assert(!isnan(centroids[i]));
}
#endif // NDEBUG
optimize_palette_colors(color_cache, n_cache, n, 2, centroids);
// Sort the U channel colors in ascending order.
for (i = 0; i < 2 * (n - 1); i += 2) {
int min_idx = i;
float min_val = centroids[i];
int min_val = centroids[i];
for (j = i + 2; j < 2 * n; j += 2)
if (centroids[j] < min_val) min_val = centroids[j], min_idx = j;
if (min_idx != i) {
float temp_u = centroids[i], temp_v = centroids[i + 1];
int temp_u = centroids[i], temp_v = centroids[i + 1];
centroids[i] = centroids[min_idx];
centroids[i + 1] = centroids[min_idx + 1];
centroids[min_idx] = temp_u, centroids[min_idx + 1] = temp_v;
......@@ -9031,8 +9014,8 @@ static void restore_uv_color_map(const AV1_COMP *const cpi, MACROBLOCK *x) {
int src_stride = x->plane[1].src.stride;
const uint8_t *const src_u = x->plane[1].src.buf;
const uint8_t *const src_v = x->plane[2].src.buf;
float *const data = x->palette_buffer->kmeans_data_buf;
float centroids[2 * PALETTE_MAX_SIZE];
int *const data = x->palette_buffer->kmeans_data_buf;
int centroids[2 * PALETTE_MAX_SIZE];
uint8_t *const color_map = xd->plane[1].color_index_map;
int r, c;
#if CONFIG_HIGHBITDEPTH
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment