Commit 131a0d55 authored by Michael Bebenita's avatar Michael Bebenita
Browse files

Add multiple of 8 copies

Change-Id: I8fb710b767a986c898fbef9e329f30bfb0a22dad
parent 68f3c3bb
...@@ -633,8 +633,8 @@ if (aom_config("CONFIG_CDEF") eq "yes") { ...@@ -633,8 +633,8 @@ if (aom_config("CONFIG_CDEF") eq "yes") {
add_proto qw/void copy_4x4_16bit_to_8bit/, "uint8_t *dst, int dstride, const uint16_t *src, int sstride"; add_proto qw/void copy_4x4_16bit_to_8bit/, "uint8_t *dst, int dstride, const uint16_t *src, int sstride";
add_proto qw/void copy_8x8_16bit_to_16bit/, "uint16_t *dst, int dstride, const uint16_t *src, int sstride"; add_proto qw/void copy_8x8_16bit_to_16bit/, "uint16_t *dst, int dstride, const uint16_t *src, int sstride";
add_proto qw/void copy_4x4_16bit_to_16bit/, "uint16_t *dst, int dstride, const uint16_t *src, int sstride"; add_proto qw/void copy_4x4_16bit_to_16bit/, "uint16_t *dst, int dstride, const uint16_t *src, int sstride";
add_proto qw/void copy_nxm_8bit_to_16bit/, "uint16_t *dst, int dstride, const uint8_t *src, int sstride, int n, int m"; add_proto qw/void copy_rect8_8bit_to_16bit/, "uint16_t *dst, int dstride, const uint8_t *src, int sstride, int v, int h";
add_proto qw/void copy_nxm_16bit_to_16bit/, "uint16_t *dst, int dstride, const uint16_t *src, int sstride, int n, int m"; add_proto qw/void copy_rect8_16bit_to_16bit/, "uint16_t *dst, int dstride, const uint16_t *src, int sstride, int v, int h";
# VS compiling for 32 bit targets does not support vector types in # VS compiling for 32 bit targets does not support vector types in
# structs as arguments, which makes the v256 type of the intrinsics # structs as arguments, which makes the v256 type of the intrinsics
...@@ -652,8 +652,8 @@ if (aom_config("CONFIG_CDEF") eq "yes") { ...@@ -652,8 +652,8 @@ if (aom_config("CONFIG_CDEF") eq "yes") {
specialize qw/copy_4x4_16bit_to_8bit sse2 ssse3 sse4_1 neon/; specialize qw/copy_4x4_16bit_to_8bit sse2 ssse3 sse4_1 neon/;
specialize qw/copy_8x8_16bit_to_16bit sse2 ssse3 sse4_1 neon/; specialize qw/copy_8x8_16bit_to_16bit sse2 ssse3 sse4_1 neon/;
specialize qw/copy_4x4_16bit_to_16bit sse2 ssse3 sse4_1 neon/; specialize qw/copy_4x4_16bit_to_16bit sse2 ssse3 sse4_1 neon/;
specialize qw/copy_nxm_8bit_to_16bit sse2 ssse3 sse4_1 neon/; specialize qw/copy_rect8_8bit_to_16bit sse2 ssse3 sse4_1 neon/;
specialize qw/copy_nxm_16bit_to_16bit sse2 ssse3 sse4_1 neon/; specialize qw/copy_rect8_16bit_to_16bit sse2 ssse3 sse4_1 neon/;
} }
} }
......
...@@ -91,21 +91,24 @@ int sb_compute_dering_list(const AV1_COMMON *const cm, int mi_row, int mi_col, ...@@ -91,21 +91,24 @@ int sb_compute_dering_list(const AV1_COMMON *const cm, int mi_row, int mi_col,
return count; return count;
} }
void copy_nxm_8bit_to_16bit_c(uint16_t *dst, int dstride, const uint8_t *src, void copy_rect8_8bit_to_16bit_c(uint16_t *dst, int dstride, const uint8_t *src,
int sstride, int n, int m) { int sstride, int v, int h) {
int i, j; int i, j;
for (i = 0; i < m; i++) { OD_ASSERT((h & 0x7) == 0);
for (j = 0; j < n; j++) { for (i = 0; i < v; i++) {
for (j = 0; j < h; j++) {
dst[i * dstride + j] = src[i * sstride + j]; dst[i * dstride + j] = src[i * sstride + j];
} }
} }
} }
void copy_nxm_16bit_to_16bit_c(uint16_t *dst, int dstride, const uint16_t *src, void copy_rect8_16bit_to_16bit_c(uint16_t *dst, int dstride,
int sstride, int n, int m) { const uint16_t *src, int sstride, int v,
int h) {
int i, j; int i, j;
for (i = 0; i < m; i++) { OD_ASSERT((h & 0x7) == 0);
for (j = 0; j < n; j++) { for (i = 0; i < v; i++) {
for (j = 0; j < h; j++) {
dst[i * dstride + j] = src[i * sstride + j]; dst[i * dstride + j] = src[i * sstride + j];
} }
} }
...@@ -118,11 +121,11 @@ void copy_sb8_16(UNUSED AV1_COMMON *cm, uint16_t *dst, int dstride, ...@@ -118,11 +121,11 @@ void copy_sb8_16(UNUSED AV1_COMMON *cm, uint16_t *dst, int dstride,
if (cm->use_highbitdepth) { if (cm->use_highbitdepth) {
const uint16_t *base = const uint16_t *base =
&CONVERT_TO_SHORTPTR(src)[src_voffset * sstride + src_hoffset]; &CONVERT_TO_SHORTPTR(src)[src_voffset * sstride + src_hoffset];
copy_nxm_16bit_to_16bit(dst, dstride, base, sstride, hsize, vsize); copy_rect8_16bit_to_16bit(dst, dstride, base, sstride, vsize, hsize);
} else { } else {
#endif #endif
const uint8_t *base = &src[src_voffset * sstride + src_hoffset]; const uint8_t *base = &src[src_voffset * sstride + src_hoffset];
copy_nxm_8bit_to_16bit(dst, dstride, base, sstride, hsize, vsize); copy_rect8_8bit_to_16bit(dst, dstride, base, sstride, vsize, hsize);
#if CONFIG_AOM_HIGHBITDEPTH #if CONFIG_AOM_HIGHBITDEPTH
} }
#endif #endif
......
...@@ -405,32 +405,28 @@ void SIMD_FUNC(copy_4x4_16bit_to_16bit)(uint16_t *dst, int dstride, ...@@ -405,32 +405,28 @@ void SIMD_FUNC(copy_4x4_16bit_to_16bit)(uint16_t *dst, int dstride,
} }
} }
void SIMD_FUNC(copy_nxm_8bit_to_16bit)(uint16_t *dst, int dstride, void SIMD_FUNC(copy_rect8_8bit_to_16bit)(uint16_t *dst, int dstride,
const uint8_t *src, int sstride, int n, const uint8_t *src, int sstride, int v,
int m) { int h) {
int i, j; int i, j;
for (i = 0; i < m; i++) { OD_ASSERT((h & 0x7) == 0);
for (j = 0; j < (n & ~0x7); j += 8) { for (i = 0; i < v; i++) {
for (j = 0; j < h; j += 8) {
v64 row = v64_load_unaligned(&src[i * sstride + j]); v64 row = v64_load_unaligned(&src[i * sstride + j]);
v128_store_unaligned(&dst[i * dstride + j], v128_unpack_u8_s16(row)); v128_store_unaligned(&dst[i * dstride + j], v128_unpack_u8_s16(row));
} }
for (; j < n; j++) {
dst[i * dstride + j] = src[i * sstride + j];
}
} }
} }
void SIMD_FUNC(copy_nxm_16bit_to_16bit)(uint16_t *dst, int dstride, void SIMD_FUNC(copy_rect8_16bit_to_16bit)(uint16_t *dst, int dstride,
const uint16_t *src, int sstride, int n, const uint16_t *src, int sstride,
int m) { int v, int h) {
int i, j; int i, j;
for (i = 0; i < m; i++) { OD_ASSERT((h & 0x7) == 0);
for (j = 0; j < (n & ~0x7); j += 8) { for (i = 0; i < v; i++) {
for (j = 0; j < h; j += 8) {
v128 row = v128_load_unaligned(&src[i * sstride + j]); v128 row = v128_load_unaligned(&src[i * sstride + j]);
v128_store_unaligned(&dst[i * dstride + j], row); v128_store_unaligned(&dst[i * dstride + j], row);
} }
for (; j < n; j++) {
dst[i * dstride + j] = src[i * sstride + j];
}
} }
} }
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment