Skip to content
Snippets Groups Projects
Commit a0981592 authored by Jean-Marc Valin's avatar Jean-Marc Valin
Browse files

Neon: Make gcc actually generate VMLA instructions for sparse mul

Otherwise it was splitting the mla into a mul and an add
parent 2bc20e65
No related branches found
No related tags found
No related merge requests found
...@@ -187,13 +187,13 @@ static void sparse_sgemv_accum16(float *out, const float *w, int rows, const int ...@@ -187,13 +187,13 @@ static void sparse_sgemv_accum16(float *out, const float *w, int rows, const int
for (j=0;j<cols;j++) for (j=0;j<cols;j++)
{ {
float xj= x[*idx++]; float32x4_t xj= vld1q_dup_f32(&x[*idx++]);
float32x4_t wvec; float32x4_t wvec;
wvec = vld1q_f32(&w[0]); y0_3 = vmlaq_n_f32(y0_3, wvec, xj); wvec = vld1q_f32(&w[0]); y0_3 = vmlaq_f32(y0_3, wvec, xj);
wvec = vld1q_f32(&w[4]); y4_7 = vmlaq_n_f32(y4_7, wvec, xj); wvec = vld1q_f32(&w[4]); y4_7 = vmlaq_f32(y4_7, wvec, xj);
wvec = vld1q_f32(&w[8]); y8_11 = vmlaq_n_f32(y8_11, wvec, xj); wvec = vld1q_f32(&w[8]); y8_11 = vmlaq_f32(y8_11, wvec, xj);
wvec = vld1q_f32(&w[12]); y12_15 = vmlaq_n_f32(y12_15, wvec, xj); wvec = vld1q_f32(&w[12]); y12_15 = vmlaq_f32(y12_15, wvec, xj);
w += 16; w += 16;
} }
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment