Commit ec5d01cb authored by Jean-Marc Valin's avatar Jean-Marc Valin
Browse files

Using a table on ARM for unsigned division by small (<=256) integers.

Saves 0.6% for 64 kb/s and 1.8% for 128 kb/s when decoding on arm7tdmi.
parent 379af35f
......@@ -91,3 +91,41 @@ opus_uint32 ec_tell_frac(ec_ctx *_this){
return nbits-l;
/* Result of 2^32/(2*i+1), except for i=0. */
const opus_uint32 SMALL_DIV_TABLE[129] = {
0xFFFFFFFF, 0x55555555, 0x33333333, 0x24924924,
0x1C71C71C, 0x1745D174, 0x13B13B13, 0x11111111,
0x0F0F0F0F, 0x0D79435E, 0x0C30C30C, 0x0B21642C,
0x0A3D70A3, 0x097B425E, 0x08D3DCB0, 0x08421084,
0x07C1F07C, 0x07507507, 0x06EB3E45, 0x06906906,
0x063E7063, 0x05F417D0, 0x05B05B05, 0x0572620A,
0x05397829, 0x05050505, 0x04D4873E, 0x04A7904A,
0x047DC11F, 0x0456C797, 0x04325C53, 0x04104104,
0x03F03F03, 0x03D22635, 0x03B5CC0E, 0x039B0AD1,
0x0381C0E0, 0x0369D036, 0x03531DEC, 0x033D91D2,
0x0329161F, 0x03159721, 0x03030303, 0x02F14990,
0x02E05C0B, 0x02D02D02, 0x02C0B02C, 0x02B1DA46,
0x02A3A0FD, 0x0295FAD4, 0x0288DF0C, 0x027C4597,
0x02702702, 0x02647C69, 0x02593F69, 0x024E6A17,
0x0243F6F0, 0x0239E0D5, 0x02302302, 0x0226B902,
0x021D9EAD, 0x0214D021, 0x020C49BA, 0x02040810,
0x01FC07F0, 0x01F44659, 0x01ECC07B, 0x01E573AC,
0x01DE5D6E, 0x01D77B65, 0x01D0CB58, 0x01CA4B30,
0x01C3F8F0, 0x01BDD2B8, 0x01B7D6C3, 0x01B20364,
0x01AC5701, 0x01A6D01A, 0x01A16D3F, 0x019C2D14,
0x01970E4F, 0x01920FB4, 0x018D3018, 0x01886E5F,
0x0183C977, 0x017F405F, 0x017AD220, 0x01767DCE,
0x01724287, 0x016E1F76, 0x016A13CD, 0x01661EC6,
0x01623FA7, 0x015E75BB, 0x015AC056, 0x01571ED3,
0x01539094, 0x01501501, 0x014CAB88, 0x0149539E,
0x01460CBC, 0x0142D662, 0x013FB013, 0x013C995A,
0x013991C2, 0x013698DF, 0x0133AE45, 0x0130D190,
0x012E025C, 0x012B404A, 0x01288B01, 0x0125E227,
0x01234567, 0x0120B470, 0x011E2EF3, 0x011BB4A4,
0x01194538, 0x0116E068, 0x011485F0, 0x0112358E,
0x010FEF01, 0x010DB20A, 0x010B7E6E, 0x010953F3,
0x01073260, 0x0105197F, 0x0103091B, 0x01010101
......@@ -34,6 +34,12 @@
# include <stddef.h>
# include "ecintrin.h"
extern const opus_uint32 SMALL_DIV_TABLE[129];
/*OPT: ec_window must be at least 32 bits, but if you have fast arithmetic on a
larger type, you can speed up the decoder by using it here.*/
typedef opus_uint32 ec_window;
......@@ -114,4 +120,20 @@ static OPUS_INLINE int ec_tell(ec_ctx *_this){
rounding error is in the positive direction).*/
opus_uint32 ec_tell_frac(ec_ctx *_this);
/* Tested exhaustively for all n and for 1<=d<=256 */
static OPUS_INLINE opus_uint32 celt_udiv(opus_uint32 n, opus_uint32 d) {
if (d>256)
return n/d;
else {
opus_uint32 t, q;
t = EC_ILOG(d&-d);
q = (opus_uint64)SMALL_DIV_TABLE[d>>t]*(n>>(t-1))>>32;
return q+(n-q*d >= d);
return n/d;
......@@ -138,7 +138,7 @@ void ec_dec_init(ec_dec *_this,unsigned char *_buf,opus_uint32 _storage){
unsigned ec_decode(ec_dec *_this,unsigned _ft){
unsigned s;
return _ft-EC_MINI(s+1,_ft);
......@@ -127,7 +127,7 @@ void ec_enc_init(ec_enc *_this,unsigned char *_buf,opus_uint32 _size){
void ec_encode(ec_enc *_this,unsigned _fl,unsigned _fh,unsigned _ft){
opus_uint32 r;
