Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Yushin Cho
aom-rav1e
Commits
aeea00cc
Commit
aeea00cc
authored
Aug 13, 2015
by
Scott LaVarnway
Browse files
VPX: removed step checks from mips convolve code
The check is handled by the predictor table. Change-Id: I5e5084ebb46be8087c8c9d80b5f76e919a1cd05b
parent
1aa84e03
Changes
15
Hide whitespace changes
Inline
Side-by-side
vpx_dsp/mips/convolve2_avg_dspr2.c
View file @
aeea00cc
...
...
@@ -233,47 +233,41 @@ void vpx_convolve2_avg_vert_dspr2(const uint8_t *src, ptrdiff_t src_stride,
const
int16_t
*
filter_x
,
int
x_step_q4
,
const
int16_t
*
filter_y
,
int
y_step_q4
,
int
w
,
int
h
)
{
if
(
16
==
y_step_q4
)
{
uint32_t
pos
=
38
;
/* bit positon for extract from acc */
__asm__
__volatile__
(
"wrdsp %[pos], 1
\n\t
"
:
:
[
pos
]
"r"
(
pos
)
);
prefetch_store
(
dst
);
switch
(
w
)
{
case
4
:
case
8
:
case
16
:
case
32
:
convolve_bi_avg_vert_4_dspr2
(
src
,
src_stride
,
dst
,
dst_stride
,
filter_y
,
w
,
h
);
break
;
case
64
:
prefetch_store
(
dst
+
32
);
convolve_bi_avg_vert_64_dspr2
(
src
,
src_stride
,
dst
,
dst_stride
,
filter_y
,
h
);
break
;
default:
vpx_convolve8_avg_vert_c
(
src
,
src_stride
,
dst
,
dst_stride
,
filter_x
,
x_step_q4
,
filter_y
,
y_step_q4
,
w
,
h
);
break
;
}
}
else
{
vpx_convolve8_avg_vert_c
(
src
,
src_stride
,
dst
,
dst_stride
,
filter_x
,
x_step_q4
,
filter_y
,
y_step_q4
,
w
,
h
);
uint32_t
pos
=
38
;
assert
(
y_step_q4
==
16
);
/* bit positon for extract from acc */
__asm__
__volatile__
(
"wrdsp %[pos], 1
\n\t
"
:
:
[
pos
]
"r"
(
pos
)
);
prefetch_store
(
dst
);
switch
(
w
)
{
case
4
:
case
8
:
case
16
:
case
32
:
convolve_bi_avg_vert_4_dspr2
(
src
,
src_stride
,
dst
,
dst_stride
,
filter_y
,
w
,
h
);
break
;
case
64
:
prefetch_store
(
dst
+
32
);
convolve_bi_avg_vert_64_dspr2
(
src
,
src_stride
,
dst
,
dst_stride
,
filter_y
,
h
);
break
;
default:
vpx_convolve8_avg_vert_c
(
src
,
src_stride
,
dst
,
dst_stride
,
filter_x
,
x_step_q4
,
filter_y
,
y_step_q4
,
w
,
h
);
break
;
}
}
#endif
vpx_dsp/mips/convolve2_avg_horiz_dspr2.c
View file @
aeea00cc
...
...
@@ -768,64 +768,58 @@ void vpx_convolve2_avg_horiz_dspr2(const uint8_t *src, ptrdiff_t src_stride,
const
int16_t
*
filter_x
,
int
x_step_q4
,
const
int16_t
*
filter_y
,
int
y_step_q4
,
int
w
,
int
h
)
{
if
(
16
==
x_step_q4
)
{
uint32_t
pos
=
38
;
/* bit positon for extract from acc */
__asm__
__volatile__
(
"wrdsp %[pos], 1
\n\t
"
:
:
[
pos
]
"r"
(
pos
)
);
/* prefetch data to cache memory */
prefetch_load
(
src
);
prefetch_load
(
src
+
32
);
prefetch_store
(
dst
);
switch
(
w
)
{
case
4
:
convolve_bi_avg_horiz_4_dspr2
(
src
,
src_stride
,
dst
,
dst_stride
,
filter_x
,
h
);
break
;
case
8
:
convolve_bi_avg_horiz_8_dspr2
(
src
,
src_stride
,
dst
,
dst_stride
,
filter_x
,
h
);
break
;
case
16
:
convolve_bi_avg_horiz_16_dspr2
(
src
,
src_stride
,
dst
,
dst_stride
,
filter_x
,
h
,
1
);
break
;
case
32
:
convolve_bi_avg_horiz_16_dspr2
(
src
,
src_stride
,
dst
,
dst_stride
,
filter_x
,
h
,
2
);
break
;
case
64
:
prefetch_load
(
src
+
64
);
prefetch_store
(
dst
+
32
);
convolve_bi_avg_horiz_64_dspr2
(
src
,
src_stride
,
dst
,
dst_stride
,
filter_x
,
h
);
break
;
default:
vpx_convolve8_avg_horiz_c
(
src
,
src_stride
,
dst
,
dst_stride
,
filter_x
,
x_step_q4
,
filter_y
,
y_step_q4
,
w
,
h
);
break
;
}
}
else
{
vpx_convolve8_avg_horiz_c
(
src
,
src_stride
,
dst
,
dst_stride
,
filter_x
,
x_step_q4
,
filter_y
,
y_step_q4
,
w
,
h
);
uint32_t
pos
=
38
;
assert
(
x_step_q4
==
16
);
/* bit positon for extract from acc */
__asm__
__volatile__
(
"wrdsp %[pos], 1
\n\t
"
:
:
[
pos
]
"r"
(
pos
)
);
/* prefetch data to cache memory */
prefetch_load
(
src
);
prefetch_load
(
src
+
32
);
prefetch_store
(
dst
);
switch
(
w
)
{
case
4
:
convolve_bi_avg_horiz_4_dspr2
(
src
,
src_stride
,
dst
,
dst_stride
,
filter_x
,
h
);
break
;
case
8
:
convolve_bi_avg_horiz_8_dspr2
(
src
,
src_stride
,
dst
,
dst_stride
,
filter_x
,
h
);
break
;
case
16
:
convolve_bi_avg_horiz_16_dspr2
(
src
,
src_stride
,
dst
,
dst_stride
,
filter_x
,
h
,
1
);
break
;
case
32
:
convolve_bi_avg_horiz_16_dspr2
(
src
,
src_stride
,
dst
,
dst_stride
,
filter_x
,
h
,
2
);
break
;
case
64
:
prefetch_load
(
src
+
64
);
prefetch_store
(
dst
+
32
);
convolve_bi_avg_horiz_64_dspr2
(
src
,
src_stride
,
dst
,
dst_stride
,
filter_x
,
h
);
break
;
default:
vpx_convolve8_avg_horiz_c
(
src
,
src_stride
,
dst
,
dst_stride
,
filter_x
,
x_step_q4
,
filter_y
,
y_step_q4
,
w
,
h
);
break
;
}
}
#endif
vpx_dsp/mips/convolve2_horiz_dspr2.c
View file @
aeea00cc
...
...
@@ -646,66 +646,60 @@ void vpx_convolve2_horiz_dspr2(const uint8_t *src, ptrdiff_t src_stride,
const
int16_t
*
filter_x
,
int
x_step_q4
,
const
int16_t
*
filter_y
,
int
y_step_q4
,
int
w
,
int
h
)
{
if
(
16
==
x_step_q4
)
{
uint32_t
pos
=
38
;
prefetch_load
((
const
uint8_t
*
)
filter_x
);
/* bit positon for extract from acc */
__asm__
__volatile__
(
"wrdsp %[pos], 1
\n\t
"
:
:
[
pos
]
"r"
(
pos
)
);
/* prefetch data to cache memory */
prefetch_load
(
src
);
prefetch_load
(
src
+
32
);
prefetch_store
(
dst
);
switch
(
w
)
{
case
4
:
convolve_bi_horiz_4_dspr2
(
src
,
(
int32_t
)
src_stride
,
dst
,
(
int32_t
)
dst_stride
,
filter_x
,
(
int32_t
)
h
);
break
;
case
8
:
convolve_bi_horiz_8_dspr2
(
src
,
(
int32_t
)
src_stride
,
dst
,
(
int32_t
)
dst_stride
,
filter_x
,
(
int32_t
)
h
);
break
;
case
16
:
convolve_bi_horiz_16_dspr2
(
src
,
(
int32_t
)
src_stride
,
dst
,
(
int32_t
)
dst_stride
,
filter_x
,
(
int32_t
)
h
,
1
);
break
;
case
32
:
convolve_bi_horiz_16_dspr2
(
src
,
(
int32_t
)
src_stride
,
dst
,
(
int32_t
)
dst_stride
,
filter_x
,
(
int32_t
)
h
,
2
);
break
;
case
64
:
prefetch_load
(
src
+
64
);
prefetch_store
(
dst
+
32
);
convolve_bi_horiz_64_dspr2
(
src
,
(
int32_t
)
src_stride
,
dst
,
(
int32_t
)
dst_stride
,
filter_x
,
(
int32_t
)
h
);
break
;
default:
vpx_convolve8_horiz_c
(
src
,
src_stride
,
dst
,
dst_stride
,
filter_x
,
x_step_q4
,
filter_y
,
y_step_q4
,
w
,
h
);
break
;
}
}
else
{
vpx_convolve8_horiz_c
(
src
,
src_stride
,
dst
,
dst_stride
,
filter_x
,
x_step_q4
,
filter_y
,
y_step_q4
,
w
,
h
);
uint32_t
pos
=
38
;
assert
(
x_step_q4
==
16
);
prefetch_load
((
const
uint8_t
*
)
filter_x
);
/* bit positon for extract from acc */
__asm__
__volatile__
(
"wrdsp %[pos], 1
\n\t
"
:
:
[
pos
]
"r"
(
pos
)
);
/* prefetch data to cache memory */
prefetch_load
(
src
);
prefetch_load
(
src
+
32
);
prefetch_store
(
dst
);
switch
(
w
)
{
case
4
:
convolve_bi_horiz_4_dspr2
(
src
,
(
int32_t
)
src_stride
,
dst
,
(
int32_t
)
dst_stride
,
filter_x
,
(
int32_t
)
h
);
break
;
case
8
:
convolve_bi_horiz_8_dspr2
(
src
,
(
int32_t
)
src_stride
,
dst
,
(
int32_t
)
dst_stride
,
filter_x
,
(
int32_t
)
h
);
break
;
case
16
:
convolve_bi_horiz_16_dspr2
(
src
,
(
int32_t
)
src_stride
,
dst
,
(
int32_t
)
dst_stride
,
filter_x
,
(
int32_t
)
h
,
1
);
break
;
case
32
:
convolve_bi_horiz_16_dspr2
(
src
,
(
int32_t
)
src_stride
,
dst
,
(
int32_t
)
dst_stride
,
filter_x
,
(
int32_t
)
h
,
2
);
break
;
case
64
:
prefetch_load
(
src
+
64
);
prefetch_store
(
dst
+
32
);
convolve_bi_horiz_64_dspr2
(
src
,
(
int32_t
)
src_stride
,
dst
,
(
int32_t
)
dst_stride
,
filter_x
,
(
int32_t
)
h
);
break
;
default:
vpx_convolve8_horiz_c
(
src
,
src_stride
,
dst
,
dst_stride
,
filter_x
,
x_step_q4
,
filter_y
,
y_step_q4
,
w
,
h
);
break
;
}
}
#endif
vpx_dsp/mips/convolve2_vert_dspr2.c
View file @
aeea00cc
...
...
@@ -218,47 +218,41 @@ void vpx_convolve2_vert_dspr2(const uint8_t *src, ptrdiff_t src_stride,
const
int16_t
*
filter_x
,
int
x_step_q4
,
const
int16_t
*
filter_y
,
int
y_step_q4
,
int
w
,
int
h
)
{
if
(
16
==
y_step_q4
)
{
uint32_t
pos
=
38
;
/* bit positon for extract from acc */
__asm__
__volatile__
(
"wrdsp %[pos], 1
\n\t
"
:
:
[
pos
]
"r"
(
pos
)
);
prefetch_store
(
dst
);
switch
(
w
)
{
case
4
:
case
8
:
case
16
:
case
32
:
convolve_bi_vert_4_dspr2
(
src
,
src_stride
,
dst
,
dst_stride
,
filter_y
,
w
,
h
);
break
;
case
64
:
prefetch_store
(
dst
+
32
);
convolve_bi_vert_64_dspr2
(
src
,
src_stride
,
dst
,
dst_stride
,
filter_y
,
h
);
break
;
default:
vpx_convolve8_vert_c
(
src
,
src_stride
,
dst
,
dst_stride
,
filter_x
,
x_step_q4
,
filter_y
,
y_step_q4
,
w
,
h
);
break
;
}
}
else
{
vpx_convolve8_vert_c
(
src
,
src_stride
,
dst
,
dst_stride
,
filter_x
,
x_step_q4
,
filter_y
,
y_step_q4
,
w
,
h
);
uint32_t
pos
=
38
;
assert
(
y_step_q4
==
16
);
/* bit positon for extract from acc */
__asm__
__volatile__
(
"wrdsp %[pos], 1
\n\t
"
:
:
[
pos
]
"r"
(
pos
)
);
prefetch_store
(
dst
);
switch
(
w
)
{
case
4
:
case
8
:
case
16
:
case
32
:
convolve_bi_vert_4_dspr2
(
src
,
src_stride
,
dst
,
dst_stride
,
filter_y
,
w
,
h
);
break
;
case
64
:
prefetch_store
(
dst
+
32
);
convolve_bi_vert_64_dspr2
(
src
,
src_stride
,
dst
,
dst_stride
,
filter_y
,
h
);
break
;
default:
vpx_convolve8_vert_c
(
src
,
src_stride
,
dst
,
dst_stride
,
filter_x
,
x_step_q4
,
filter_y
,
y_step_q4
,
w
,
h
);
break
;
}
}
#endif
vpx_dsp/mips/convolve8_avg_dspr2.c
View file @
aeea00cc
...
...
@@ -347,6 +347,7 @@ void vpx_convolve8_avg_vert_dspr2(const uint8_t *src, ptrdiff_t src_stride,
const
int16_t
*
filter_x
,
int
x_step_q4
,
const
int16_t
*
filter_y
,
int
y_step_q4
,
int
w
,
int
h
)
{
assert
(
y_step_q4
==
16
);
if
(((
const
int32_t
*
)
filter_y
)[
1
]
==
0x800000
)
{
vpx_convolve_avg
(
src
,
src_stride
,
dst
,
dst_stride
,
...
...
@@ -360,47 +361,39 @@ void vpx_convolve8_avg_vert_dspr2(const uint8_t *src, ptrdiff_t src_stride,
filter_y
,
y_step_q4
,
w
,
h
);
}
else
{
if
(
16
==
y_step_q4
)
{
uint32_t
pos
=
38
;
/* bit positon for extract from acc */
__asm__
__volatile__
(
"wrdsp %[pos], 1
\n\t
"
:
:
[
pos
]
"r"
(
pos
)
);
prefetch_store
(
dst
);
switch
(
w
)
{
case
4
:
case
8
:
case
16
:
case
32
:
convolve_avg_vert_4_dspr2
(
src
,
src_stride
,
dst
,
dst_stride
,
filter_y
,
w
,
h
);
break
;
case
64
:
prefetch_store
(
dst
+
32
);
convolve_avg_vert_64_dspr2
(
src
,
src_stride
,
dst
,
dst_stride
,
filter_y
,
h
);
break
;
default:
vpx_convolve8_avg_vert_c
(
src
,
src_stride
,
uint32_t
pos
=
38
;
/* bit positon for extract from acc */
__asm__
__volatile__
(
"wrdsp %[pos], 1
\n\t
"
:
:
[
pos
]
"r"
(
pos
)
);
prefetch_store
(
dst
);
switch
(
w
)
{
case
4
:
case
8
:
case
16
:
case
32
:
convolve_avg_vert_4_dspr2
(
src
,
src_stride
,
dst
,
dst_stride
,
filter_y
,
w
,
h
);
break
;
case
64
:
prefetch_store
(
dst
+
32
);
convolve_avg_vert_64_dspr2
(
src
,
src_stride
,
dst
,
dst_stride
,
filter_x
,
x_step_q4
,
filter_y
,
y_step_q4
,
w
,
h
);
break
;
}
}
else
{
vpx_convolve8_avg_vert_c
(
src
,
src_stride
,
dst
,
dst_stride
,
filter_x
,
x_step_q4
,
filter_y
,
y_step_q4
,
w
,
h
);
filter_y
,
h
);
break
;
default:
vpx_convolve8_avg_vert_c
(
src
,
src_stride
,
dst
,
dst_stride
,
filter_x
,
x_step_q4
,
filter_y
,
y_step_q4
,
w
,
h
);
break
;
}
}
}
...
...
@@ -416,17 +409,12 @@ void vpx_convolve8_avg_dspr2(const uint8_t *src, ptrdiff_t src_stride,
assert
(
w
<=
64
);
assert
(
h
<=
64
);
assert
(
x_step_q4
==
16
);
assert
(
y_step_q4
==
16
);
if
(
intermediate_height
<
h
)
intermediate_height
=
h
;
if
(
x_step_q4
!=
16
||
y_step_q4
!=
16
)
return
vpx_convolve8_avg_c
(
src
,
src_stride
,
dst
,
dst_stride
,
filter_x
,
x_step_q4
,
filter_y
,
y_step_q4
,
w
,
h
);
vpx_convolve8_horiz
(
src
-
(
src_stride
*
3
),
src_stride
,
temp
,
64
,
filter_x
,
x_step_q4
,
...
...
vpx_dsp/mips/convolve8_avg_horiz_dspr2.c
View file @
aeea00cc
...
...
@@ -957,6 +957,7 @@ void vpx_convolve8_avg_horiz_dspr2(const uint8_t *src, ptrdiff_t src_stride,
const
int16_t
*
filter_x
,
int
x_step_q4
,
const
int16_t
*
filter_y
,
int
y_step_q4
,
int
w
,
int
h
)
{
assert
(
x_step_q4
==
16
);
if
(((
const
int32_t
*
)
filter_x
)[
1
]
==
0x800000
)
{
vpx_convolve_avg
(
src
,
src_stride
,
dst
,
dst_stride
,
...
...
@@ -970,66 +971,58 @@ void vpx_convolve8_avg_horiz_dspr2(const uint8_t *src, ptrdiff_t src_stride,
filter_y
,
y_step_q4
,
w
,
h
);
}
else
{
if
(
16
==
x_step_q4
)
{
uint32_t
pos
=
38
;
uint32_t
pos
=
38
;
src
-=
3
;
src
-=
3
;
/* bit positon for extract from acc */
__asm__
__volatile__
(
"wrdsp %[pos], 1
\n\t
"
:
:
[
pos
]
"r"
(
pos
)
);
/* bit positon for extract from acc */
__asm__
__volatile__
(
"wrdsp %[pos], 1
\n\t
"
:
:
[
pos
]
"r"
(
pos
)
);
/* prefetch data to cache memory */
prefetch_load
(
src
);
prefetch_load
(
src
+
32
);
prefetch_store
(
dst
);
switch
(
w
)
{
case
4
:
convolve_avg_horiz_4_dspr2
(
src
,
src_stride
,
dst
,
dst_stride
,
filter_x
,
h
);
break
;
case
8
:
convolve_avg_horiz_8_dspr2
(
src
,
src_stride
,
dst
,
dst_stride
,
filter_x
,
h
);
break
;
case
16
:
convolve_avg_horiz_16_dspr2
(
src
,
src_stride
,
dst
,
dst_stride
,
filter_x
,
h
,
1
);
break
;
case
32
:
convolve_avg_horiz_16_dspr2
(
src
,
src_stride
,
dst
,
dst_stride
,
filter_x
,
h
,
2
);
break
;
case
64
:
prefetch_load
(
src
+
64
);
prefetch_store
(
dst
+
32
);
/* prefetch data to cache memory */
prefetch_load
(
src
);
prefetch_load
(
src
+
32
);
prefetch_store
(
dst
);
switch
(
w
)
{
case
4
:
convolve_avg_horiz_4_dspr2
(
src
,
src_stride
,
dst
,
dst_stride
,
filter_x
,
h
);
break
;
case
8
:
convolve_avg_horiz_8_dspr2
(
src
,
src_stride
,
dst
,
dst_stride
,
filter_x
,
h
);
break
;
case
16
:
convolve_avg_horiz_16_dspr2
(
src
,
src_stride
,
dst
,
dst_stride
,
filter_x
,
h
,
1
);
break
;
case
32
:
convolve_avg_horiz_16_dspr2
(
src
,
src_stride
,
dst
,
dst_stride
,
filter_x
,
h
,
2
);
break
;
case
64
:
prefetch_load
(
src
+
64
);
prefetch_store
(
dst
+
32
);