Commit c2747bec authored by Erik de Castro Lopo's avatar Erik de Castro Lopo
Browse files

lpc_asm.nasm : More 'mov cl' -> 'mov ecx' fixes.

According to Agner Fog in optimizing_assembly.pdf:

  "... write to a partial register may result in false dependencies
   between instructions, so it is better to avoid it."

Patch-from: lvqcl <lvqcl.mail@gmail.com>
parent 7e927893
...@@ -872,7 +872,7 @@ cident FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32 ...@@ -872,7 +872,7 @@ cident FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32
mov ecx, [esp + 28] mov ecx, [esp + 28]
mov edx, [ecx] ; edx = qlp_coeff[0] mov edx, [ecx] ; edx = qlp_coeff[0]
mov eax, [esi - 4] ; eax = data[-1] mov eax, [esi - 4] ; eax = data[-1]
mov cl, [esp + 36] ; cl = lp_quantization mov ecx, [esp + 36] ; cl = lp_quantization
ALIGN 16 ALIGN 16
.i_1_loop_i: .i_1_loop_i:
imul eax, edx imul eax, edx
...@@ -910,7 +910,7 @@ cident FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32 ...@@ -910,7 +910,7 @@ cident FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32
inc ecx inc ecx
jnz short .i_32more_loop_j jnz short .i_32more_loop_j
mov cl, [esp + 36] mov ecx, [esp + 36]
sar ebp, cl sar ebp, cl
neg ebp neg ebp
add ebp, [esi] add ebp, [esi]
...@@ -1037,7 +1037,7 @@ cident FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32 ...@@ -1037,7 +1037,7 @@ cident FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32
add ebp, ecx add ebp, ecx
.jumper_0: .jumper_0:
mov cl, [esp + 36] mov ecx, [esp + 36]
sar ebp, cl sar ebp, cl
neg ebp neg ebp
add ebp, [esi] add ebp, [esi]
...@@ -1282,7 +1282,7 @@ cident FLAC__lpc_restore_signal_asm_ia32 ...@@ -1282,7 +1282,7 @@ cident FLAC__lpc_restore_signal_asm_ia32
mov ecx, [esp + 28] mov ecx, [esp + 28]
mov edx, [ecx] mov edx, [ecx]
mov eax, [edi - 4] mov eax, [edi - 4]
mov cl, [esp + 36] mov ecx, [esp + 36]
ALIGN 16 ALIGN 16
.x87_1_loop_i: .x87_1_loop_i:
imul eax, edx imul eax, edx
...@@ -1318,7 +1318,7 @@ cident FLAC__lpc_restore_signal_asm_ia32 ...@@ -1318,7 +1318,7 @@ cident FLAC__lpc_restore_signal_asm_ia32
inc ecx inc ecx
jnz short .x87_32more_loop_j jnz short .x87_32more_loop_j
mov cl, [esp + 36] mov ecx, [esp + 36]
sar ebp, cl sar ebp, cl
add ebp, [esi] add ebp, [esi]
mov [edi], ebp mov [edi], ebp
...@@ -1444,7 +1444,7 @@ cident FLAC__lpc_restore_signal_asm_ia32 ...@@ -1444,7 +1444,7 @@ cident FLAC__lpc_restore_signal_asm_ia32
add ebp, ecx ; sum += qlp_coeff[ 0] * data[i- 1] add ebp, ecx ; sum += qlp_coeff[ 0] * data[i- 1]
.jumper_0: .jumper_0:
mov cl, [esp + 36] mov ecx, [esp + 36]
sar ebp, cl ; ebp = (sum >> lp_quantization) sar ebp, cl ; ebp = (sum >> lp_quantization)
add ebp, [esi + edi] ; ebp = residual[i] + (sum >> lp_quantization) add ebp, [esi + edi] ; ebp = residual[i] + (sum >> lp_quantization)
mov [edi], ebp ; data[i] = residual[i] + (sum >> lp_quantization) mov [edi], ebp ; data[i] = residual[i] + (sum >> lp_quantization)
...@@ -1633,6 +1633,7 @@ cident FLAC__lpc_compute_residual_from_qlp_coefficients_wide_asm_ia32 ...@@ -1633,6 +1633,7 @@ cident FLAC__lpc_compute_residual_from_qlp_coefficients_wide_asm_ia32
;ASSERT(order > 0) ;ASSERT(order > 0)
;ASSERT(order <= 32) ;ASSERT(order <= 32)
;ASSERT(lp_quantization <= 31)
push ebp push ebp
push ebx push ebx
...@@ -1653,7 +1654,7 @@ cident FLAC__lpc_compute_residual_from_qlp_coefficients_wide_asm_ia32 ...@@ -1653,7 +1654,7 @@ cident FLAC__lpc_compute_residual_from_qlp_coefficients_wide_asm_ia32
mov ecx, [esp + 28] ; ecx = qlp_coeff[] mov ecx, [esp + 28] ; ecx = qlp_coeff[]
mov ebp, [ecx] ; ebp = qlp_coeff[0] mov ebp, [ecx] ; ebp = qlp_coeff[0]
mov eax, [edi - 4] ; eax = data[-1] mov eax, [edi - 4] ; eax = data[-1]
mov cl, [esp + 36] ; cl = lp_quantization mov ecx, [esp + 36] ; cl = lp_quantization
ALIGN 16 ALIGN 16
.i_1_loop_i: .i_1_loop_i:
imul ebp ; edx:eax = qlp_coeff[0] * (FLAC__int64)data[i-1] imul ebp ; edx:eax = qlp_coeff[0] * (FLAC__int64)data[i-1]
...@@ -1913,6 +1914,7 @@ cident FLAC__lpc_restore_signal_wide_asm_ia32 ...@@ -1913,6 +1914,7 @@ cident FLAC__lpc_restore_signal_wide_asm_ia32
;ASSERT(order > 0) ;ASSERT(order > 0)
;ASSERT(order <= 32) ;ASSERT(order <= 32)
;ASSERT(lp_quantization <= 31)
push ebp push ebp
push ebx push ebx
...@@ -1933,7 +1935,7 @@ cident FLAC__lpc_restore_signal_wide_asm_ia32 ...@@ -1933,7 +1935,7 @@ cident FLAC__lpc_restore_signal_wide_asm_ia32
mov ecx, [esp + 28] ; ecx = qlp_coeff[] mov ecx, [esp + 28] ; ecx = qlp_coeff[]
mov ebp, [ecx] ; ebp = qlp_coeff[0] mov ebp, [ecx] ; ebp = qlp_coeff[0]
mov eax, [edi - 4] ; eax = data[-1] mov eax, [edi - 4] ; eax = data[-1]
mov cl, [esp + 36] ; cl = lp_quantization mov ecx, [esp + 36] ; cl = lp_quantization
ALIGN 16 ALIGN 16
.x87_1_loop_i: .x87_1_loop_i:
imul ebp ; edx:eax = qlp_coeff[0] * (FLAC__int64)data[i-1] imul ebp ; edx:eax = qlp_coeff[0] * (FLAC__int64)data[i-1]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment