Commit c21d4370 authored by James Zern's avatar James Zern

vpx_fdct32x32_1_msa: fix accumulator overflow

Change-Id: I33a5432eda3416382e1cea06b45082c0c65faa75
parent a0359b8c
......@@ -933,23 +933,21 @@ void vpx_fdct32x32_rd_msa(const int16_t *input, int16_t *out,
}
void vpx_fdct32x32_1_msa(const int16_t *input, int16_t *out, int32_t stride) {
out[1] = 0;
out[0] = LD_HADD(input, stride);
out[0] += LD_HADD(input + 8, stride);
out[0] += LD_HADD(input + 16, stride);
out[0] += LD_HADD(input + 24, stride);
out[0] += LD_HADD(input + 32 * 8, stride);
out[0] += LD_HADD(input + 32 * 8 + 8, stride);
out[0] += LD_HADD(input + 32 * 8 + 16, stride);
out[0] += LD_HADD(input + 32 * 8 + 24, stride);
out[0] += LD_HADD(input + 32 * 16, stride);
out[0] += LD_HADD(input + 32 * 16 + 8, stride);
out[0] += LD_HADD(input + 32 * 16 + 16, stride);
out[0] += LD_HADD(input + 32 * 16 + 24, stride);
out[0] += LD_HADD(input + 32 * 24, stride);
out[0] += LD_HADD(input + 32 * 24 + 8, stride);
out[0] += LD_HADD(input + 32 * 24 + 16, stride);
out[0] += LD_HADD(input + 32 * 24 + 24, stride);
out[0] >>= 3;
int sum = LD_HADD(input, stride);
sum += LD_HADD(input + 8, stride);
sum += LD_HADD(input + 16, stride);
sum += LD_HADD(input + 24, stride);
sum += LD_HADD(input + 32 * 8, stride);
sum += LD_HADD(input + 32 * 8 + 8, stride);
sum += LD_HADD(input + 32 * 8 + 16, stride);
sum += LD_HADD(input + 32 * 8 + 24, stride);
sum += LD_HADD(input + 32 * 16, stride);
sum += LD_HADD(input + 32 * 16 + 8, stride);
sum += LD_HADD(input + 32 * 16 + 16, stride);
sum += LD_HADD(input + 32 * 16 + 24, stride);
sum += LD_HADD(input + 32 * 24, stride);
sum += LD_HADD(input + 32 * 24 + 8, stride);
sum += LD_HADD(input + 32 * 24 + 16, stride);
sum += LD_HADD(input + 32 * 24 + 24, stride);
out[0] = (int16_t)(sum >> 3);
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment