Commit 0bddabc5 authored by Josh Coalson's avatar Josh Coalson
Browse files

minor optimizations

parent 49cb34a5
...@@ -134,11 +134,9 @@ cident FLAC__bitreader_read_rice_signed_block_asm_ia32_bswap ...@@ -134,11 +134,9 @@ cident FLAC__bitreader_read_rice_signed_block_asm_ia32_bswap
jz near .break1 ; if(cbits >= FLAC__BITS_PER_WORD) { /* faster way of testing if(cbits == FLAC__BITS_PER_WORD) */ jz near .break1 ; if(cbits >= FLAC__BITS_PER_WORD) { /* faster way of testing if(cbits == FLAC__BITS_PER_WORD) */
; crc16_update_word_(br, br->buffer[cwords]); ; crc16_update_word_(br, br->buffer[cwords]);
push edi ; [need more registers] push edi ; [need more registers]
push ecx ; [need more registers]
bswap edx ; edx = br->buffer[cwords] swapped; now we can CRC the bytes from LSByte to MSByte which makes things much easier bswap edx ; edx = br->buffer[cwords] swapped; now we can CRC the bytes from LSByte to MSByte which makes things much easier
mov ecx, [ebp + 28] ; ecx <- br->crc16_align mov ecx, [ebp + 28] ; ecx <- br->crc16_align
mov eax, [ebp + 24] ; ax <- br->read_crc (a.k.a. crc) mov eax, [ebp + 24] ; ax <- br->read_crc (a.k.a. crc)
xor ebx, ebx ; [code from here down assumes and requires that the top 24 bits of ebx stay zero]
mov edi, FLAC__crc16_table mov edi, FLAC__crc16_table
;; eax (ax) crc a.k.a. br->read_crc ;; eax (ax) crc a.k.a. br->read_crc
;; ebx (bl) intermediate result index into FLAC__crc16_table[] ;; ebx (bl) intermediate result index into FLAC__crc16_table[]
...@@ -170,8 +168,8 @@ cident FLAC__bitreader_read_rice_signed_block_asm_ia32_bswap ...@@ -170,8 +168,8 @@ cident FLAC__bitreader_read_rice_signed_block_asm_ia32_bswap
mov ecx, [ebx*4 + edi] ; cx <- FLAC__crc16_table[(crc>>8)^(word&0xff)] mov ecx, [ebx*4 + edi] ; cx <- FLAC__crc16_table[(crc>>8)^(word&0xff)]
shl eax, 8 ; ax <- (crc<<8) shl eax, 8 ; ax <- (crc<<8)
xor eax, ecx ; crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^(word&0xff)] xor eax, ecx ; crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^(word&0xff)]
mov [ebp + 24], ax ; br->read_crc <- crc movzx eax, ax
pop ecx mov [ebp + 24], eax ; br->read_crc <- crc
pop edi pop edi
add esi, 1 ; cwords++; add esi, 1 ; cwords++;
...@@ -208,11 +206,9 @@ cident FLAC__bitreader_read_rice_signed_block_asm_ia32_bswap ...@@ -208,11 +206,9 @@ cident FLAC__bitreader_read_rice_signed_block_asm_ia32_bswap
sub edi, ecx ; uval += FLAC__BITS_PER_WORD - cbits; sub edi, ecx ; uval += FLAC__BITS_PER_WORD - cbits;
; crc16_update_word_(br, br->buffer[cwords]); ; crc16_update_word_(br, br->buffer[cwords]);
push edi ; [need more registers] push edi ; [need more registers]
push ecx ; [need more registers]
bswap edx ; edx = br->buffer[cwords] swapped; now we can CRC the bytes from LSByte to MSByte which makes things much easier bswap edx ; edx = br->buffer[cwords] swapped; now we can CRC the bytes from LSByte to MSByte which makes things much easier
mov ecx, [ebp + 28] ; ecx <- br->crc16_align mov ecx, [ebp + 28] ; ecx <- br->crc16_align
mov eax, [ebp + 24] ; ax <- br->read_crc (a.k.a. crc) mov eax, [ebp + 24] ; ax <- br->read_crc (a.k.a. crc)
xor ebx, ebx ; [code from here down assumes and requires that the top 24 bits of ebx stay zero]
mov edi, FLAC__crc16_table mov edi, FLAC__crc16_table
;; eax (ax) crc a.k.a. br->read_crc ;; eax (ax) crc a.k.a. br->read_crc
;; ebx (bl) intermediate result index into FLAC__crc16_table[] ;; ebx (bl) intermediate result index into FLAC__crc16_table[]
...@@ -244,8 +240,8 @@ cident FLAC__bitreader_read_rice_signed_block_asm_ia32_bswap ...@@ -244,8 +240,8 @@ cident FLAC__bitreader_read_rice_signed_block_asm_ia32_bswap
mov ecx, [ebx*4 + edi] ; cx <- FLAC__crc16_table[(crc>>8)^(word&0xff)] mov ecx, [ebx*4 + edi] ; cx <- FLAC__crc16_table[(crc>>8)^(word&0xff)]
shl eax, 8 ; ax <- (crc<<8) shl eax, 8 ; ax <- (crc<<8)
xor eax, ecx ; crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^(word&0xff)] xor eax, ecx ; crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^(word&0xff)]
mov [ebp + 24], ax ; br->read_crc <- crc movzx eax, ax
pop ecx mov [ebp + 24], eax ; br->read_crc <- crc
pop edi pop edi
add esi, 1 ; cwords++; add esi, 1 ; cwords++;
...@@ -418,12 +414,10 @@ cident FLAC__bitreader_read_rice_signed_block_asm_ia32_bswap ...@@ -418,12 +414,10 @@ cident FLAC__bitreader_read_rice_signed_block_asm_ia32_bswap
; crc16_update_word_(br, br->buffer[cwords]); ; crc16_update_word_(br, br->buffer[cwords]);
push edi ; [need more registers] push edi ; [need more registers]
push ebx ; [need more registers] push ebx ; [need more registers]
push ecx ; [need more registers]
push eax ; [need more registers] push eax ; [need more registers]
bswap edx ; edx = br->buffer[cwords] swapped; now we can CRC the bytes from LSByte to MSByte which makes things much easier bswap edx ; edx = br->buffer[cwords] swapped; now we can CRC the bytes from LSByte to MSByte which makes things much easier
mov ecx, [ebp + 28] ; ecx <- br->crc16_align mov ecx, [ebp + 28] ; ecx <- br->crc16_align
mov eax, [ebp + 24] ; ax <- br->read_crc (a.k.a. crc) mov eax, [ebp + 24] ; ax <- br->read_crc (a.k.a. crc)
xor ebx, ebx ; [code from here down assumes and requires that the top 24 bits of ebx stay zero]
mov edi, FLAC__crc16_table mov edi, FLAC__crc16_table
;; eax (ax) crc a.k.a. br->read_crc ;; eax (ax) crc a.k.a. br->read_crc
;; ebx (bl) intermediate result index into FLAC__crc16_table[] ;; ebx (bl) intermediate result index into FLAC__crc16_table[]
...@@ -455,9 +449,9 @@ cident FLAC__bitreader_read_rice_signed_block_asm_ia32_bswap ...@@ -455,9 +449,9 @@ cident FLAC__bitreader_read_rice_signed_block_asm_ia32_bswap
mov ecx, [ebx*4 + edi] ; cx <- FLAC__crc16_table[(crc>>8)^(word&0xff)] mov ecx, [ebx*4 + edi] ; cx <- FLAC__crc16_table[(crc>>8)^(word&0xff)]
shl eax, 8 ; ax <- (crc<<8) shl eax, 8 ; ax <- (crc<<8)
xor eax, ecx ; crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^(word&0xff)] xor eax, ecx ; crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^(word&0xff)]
mov [ebp + 24], ax ; br->read_crc <- crc movzx eax, ax
mov [ebp + 24], eax ; br->read_crc <- crc
pop eax pop eax
pop ecx
pop ebx pop ebx
pop edi pop edi
add esi, 1 ; cwords++; add esi, 1 ; cwords++;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment