Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Guillaume Martres
aom-rav1e
Commits
dea687f7
Commit
dea687f7
authored
May 02, 2014
by
Scott LaVarnway
Committed by
Gerrit Code Review
May 02, 2014
Browse files
Merge "Improved intrinsic version of vp8_denoiser_filter_neon"
parents
8b2b7370
ff209de8
Changes
1
Hide whitespace changes
Inline
Side-by-side
vp8/encoder/arm/neon/denoising_neon.c
View file @
dea687f7
...
...
@@ -68,14 +68,11 @@ int vp8_denoiser_filter_neon(YV12_BUFFER_CONFIG *mc_running_avg,
int
mc_running_avg_y_stride
=
mc_running_avg
->
y_stride
;
unsigned
char
*
running_avg_y
=
running_avg
->
y_buffer
+
y_offset
;
int
running_avg_y_stride
=
running_avg
->
y_stride
;
int64x2_t
v_sum_diff_total
=
vdupq_n_s64
(
0
);
/* Go over lines. */
int
i
;
int
sum_diff
=
0
;
for
(
i
=
0
;
i
<
16
;
++
i
)
{
int8x16_t
v_sum_diff
=
vdupq_n_s8
(
0
);
uint8x16_t
v_running_avg_y
;
/* Load inputs. */
const
uint8x16_t
v_sig
=
vld1q_u8
(
sig
);
const
uint8x16_t
v_mc_running_avg_y
=
vld1q_u8
(
mc_running_avg_y
);
...
...
@@ -117,12 +114,9 @@ int vp8_denoiser_filter_neon(YV12_BUFFER_CONFIG *mc_running_avg,
v_abs_adjustment
);
const
uint8x16_t
v_neg_adjustment
=
vandq_u8
(
v_diff_neg_mask
,
v_abs_adjustment
);
v_running_avg_y
=
vqaddq_u8
(
v_sig
,
v_pos_adjustment
);
uint8x16_t
v_running_avg_y
=
vqaddq_u8
(
v_sig
,
v_pos_adjustment
);
v_running_avg_y
=
vqsubq_u8
(
v_running_avg_y
,
v_neg_adjustment
);
v_sum_diff
=
vqaddq_s8
(
v_sum_diff
,
vreinterpretq_s8_u8
(
v_pos_adjustment
));
v_sum_diff
=
vqsubq_s8
(
v_sum_diff
,
vreinterpretq_s8_u8
(
v_neg_adjustment
));
/* Store results. */
vst1q_u8
(
running_avg_y
,
v_running_avg_y
);
...
...
@@ -131,23 +125,19 @@ int vp8_denoiser_filter_neon(YV12_BUFFER_CONFIG *mc_running_avg,
* for this macroblock.
*/
{
int
s0
=
vgetq_lane_s8
(
v_sum_diff
,
0
)
+
vgetq_lane_s8
(
v_sum_diff
,
1
)
+
vgetq_lane_s8
(
v_sum_diff
,
2
)
+
vgetq_lane_s8
(
v_sum_diff
,
3
);
int
s1
=
vgetq_lane_s8
(
v_sum_diff
,
4
)
+
vgetq_lane_s8
(
v_sum_diff
,
5
)
+
vgetq_lane_s8
(
v_sum_diff
,
6
)
+
vgetq_lane_s8
(
v_sum_diff
,
7
);
int
s2
=
vgetq_lane_s8
(
v_sum_diff
,
8
)
+
vgetq_lane_s8
(
v_sum_diff
,
9
)
+
vgetq_lane_s8
(
v_sum_diff
,
10
)
+
vgetq_lane_s8
(
v_sum_diff
,
11
);
int
s3
=
vgetq_lane_s8
(
v_sum_diff
,
12
)
+
vgetq_lane_s8
(
v_sum_diff
,
13
)
+
vgetq_lane_s8
(
v_sum_diff
,
14
)
+
vgetq_lane_s8
(
v_sum_diff
,
15
);
sum_diff
+=
s0
+
s1
+
s2
+
s3
;
const
int8x16_t
v_sum_diff
=
vqsubq_s8
(
vreinterpretq_s8_u8
(
v_pos_adjustment
),
vreinterpretq_s8_u8
(
v_neg_adjustment
));
const
int16x8_t
fe_dc_ba_98_76_54_32_10
=
vpaddlq_s8
(
v_sum_diff
);
const
int32x4_t
fedc_ba98_7654_3210
=
vpaddlq_s16
(
fe_dc_ba_98_76_54_32_10
);
const
int64x2_t
fedcba98_76543210
=
vpaddlq_s32
(
fedc_ba98_7654_3210
);
v_sum_diff_total
=
vqaddq_s64
(
v_sum_diff_total
,
fedcba98_76543210
);
}
/* Update pointers for next iteration. */
...
...
@@ -157,11 +147,20 @@ int vp8_denoiser_filter_neon(YV12_BUFFER_CONFIG *mc_running_avg,
}
/* Too much adjustments => copy block. */
if
(
abs
(
sum_diff
)
>
SUM_DIFF_THRESHOLD
)
return
COPY_BLOCK
;
{
const
int64x1_t
x
=
vqadd_s64
(
vget_high_s64
(
v_sum_diff_total
),
vget_low_s64
(
v_sum_diff_total
));
const
int
s0
=
vget_lane_s32
(
vabs_s32
(
vreinterpret_s32_s64
(
x
)),
0
);
if
(
s0
>
SUM_DIFF_THRESHOLD
)
return
COPY_BLOCK
;
}
/* Tell above level that block was filtered. */
vp8_copy_mem16x16
(
running_avg
->
y_buffer
+
y_offset
,
running_avg_y_stride
,
signal
->
thismb
,
sig_stride
);
running_avg_y
-=
running_avg_y_stride
*
16
;
sig
-=
sig_stride
*
16
;
vp8_copy_mem16x16
(
running_avg_y
,
running_avg_y_stride
,
sig
,
sig_stride
);
return
FILTER_BLOCK
;
}
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment