Commit 72af533f authored by Yunqing Wang's avatar Yunqing Wang Committed by Gerrit Code Review

Merge "Align image buffer in multiple-resolution encoder"

parents 6b2792b0 153eec46
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 90 Version: 102
License: BSD License: BSD
License File: LICENSE License File: LICENSE
......
...@@ -13,21 +13,12 @@ ...@@ -13,21 +13,12 @@
#include <stddef.h> // for NULL, size_t #include <stddef.h> // for NULL, size_t
#ifndef WIN32 #if !(defined(_MSC_VER) && (_MSC_VER < 1600))
#include <stdint.h> // for uintptr_t #include <stdint.h> // for uintptr_t
#endif #endif
#ifndef INT_TYPES_DEFINED #ifndef INT_TYPES_DEFINED
#define INT_TYPES_DEFINED #define INT_TYPES_DEFINED
#ifdef COMPILER_MSVC
typedef __int64 int64;
#else
typedef long long int64;
#endif /* COMPILER_MSVC */
typedef int int32;
typedef short int16;
typedef char int8;
#ifdef COMPILER_MSVC #ifdef COMPILER_MSVC
typedef unsigned __int64 uint64; typedef unsigned __int64 uint64;
typedef __int64 int64; typedef __int64 int64;
...@@ -38,9 +29,20 @@ typedef __int64 int64; ...@@ -38,9 +29,20 @@ typedef __int64 int64;
#define UINT64_C(x) x ## UI64 #define UINT64_C(x) x ## UI64
#endif #endif
#define INT64_F "I64" #define INT64_F "I64"
#else #else // COMPILER_MSVC
#ifdef __LP64__
typedef unsigned long uint64;
typedef long int64;
#ifndef INT64_C
#define INT64_C(x) x ## L
#endif
#ifndef UINT64_C
#define UINT64_C(x) x ## UL
#endif
#define INT64_F "l"
#else // __LP64__
typedef unsigned long long uint64; typedef unsigned long long uint64;
//typedef long long int64; typedef long long int64;
#ifndef INT64_C #ifndef INT64_C
#define INT64_C(x) x ## LL #define INT64_C(x) x ## LL
#endif #endif
...@@ -48,10 +50,14 @@ typedef unsigned long long uint64; ...@@ -48,10 +50,14 @@ typedef unsigned long long uint64;
#define UINT64_C(x) x ## ULL #define UINT64_C(x) x ## ULL
#endif #endif
#define INT64_F "ll" #define INT64_F "ll"
#endif /* COMPILER_MSVC */ #endif // __LP64__
#endif // COMPILER_MSVC
typedef unsigned int uint32; typedef unsigned int uint32;
typedef int int32;
typedef unsigned short uint16; typedef unsigned short uint16;
typedef short int16;
typedef unsigned char uint8; typedef unsigned char uint8;
typedef char int8;
#endif // INT_TYPES_DEFINED #endif // INT_TYPES_DEFINED
// Detect compiler is for x86 or x64. // Detect compiler is for x86 or x64.
...@@ -60,7 +66,6 @@ typedef unsigned char uint8; ...@@ -60,7 +66,6 @@ typedef unsigned char uint8;
#define CPU_X86 1 #define CPU_X86 1
#endif #endif
#define IS_ALIGNED(p, a) (0==((uintptr_t)(p) & ((a)-1)))
#define ALIGNP(p, t) \ #define ALIGNP(p, t) \
((uint8*)((((uintptr_t)(p) + \ ((uint8*)((((uintptr_t)(p) + \
((t)-1)) & ~((t)-1)))) ((t)-1)) & ~((t)-1))))
......
...@@ -11,21 +11,39 @@ ...@@ -11,21 +11,39 @@
#ifndef INCLUDE_LIBYUV_CPU_ID_H_ #ifndef INCLUDE_LIBYUV_CPU_ID_H_
#define INCLUDE_LIBYUV_CPU_ID_H_ #define INCLUDE_LIBYUV_CPU_ID_H_
//namespace libyuv { #ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// These flags are only valid on x86 processors // These flags are only valid on x86 processors
static const int kCpuHasSSE2 = 1; static const int kCpuHasSSE2 = 1;
static const int kCpuHasSSSE3 = 2; static const int kCpuHasSSSE3 = 2;
// SIMD support on ARM processors // These flags are only valid on ARM processors
static const int kCpuHasNEON = 4; static const int kCpuHasNEON = 4;
// Internal flag to indicate cpuid is initialized.
static const int kCpuInitialized = 8;
// Detect CPU has SSE2 etc. // Detect CPU has SSE2 etc.
int TestCpuFlag(int flag); // test_flag parameter should be one of kCpuHas constants above
// returns non-zero if instruction set is detected
static __inline int TestCpuFlag(int test_flag) {
extern int cpu_info_;
extern int InitCpuFlags();
return (cpu_info_ ? cpu_info_ : InitCpuFlags()) & test_flag;
}
// For testing, allow CPU flags to be disabled. // For testing, allow CPU flags to be disabled.
void MaskCpuFlagsForTest(int enable_flags); // ie MaskCpuFlags(~kCpuHasSSSE3) to disable SSSE3.
// -1 to enable all cpu specific optimizations.
//} // namespace libyuv // 0 to disable all cpu specific optimizations.
void MaskCpuFlags(int enable_flags);
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif
#endif // INCLUDE_LIBYUV_CPU_ID_H_ #endif // INCLUDE_LIBYUV_CPU_ID_H_
...@@ -13,7 +13,10 @@ ...@@ -13,7 +13,10 @@
#include "third_party/libyuv/include/libyuv/basic_types.h" #include "third_party/libyuv/include/libyuv/basic_types.h"
//namespace libyuv { #ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// Supported filtering // Supported filtering
typedef enum { typedef enum {
...@@ -42,16 +45,8 @@ int I420Scale(const uint8* src_y, int src_stride_y, ...@@ -42,16 +45,8 @@ int I420Scale(const uint8* src_y, int src_stride_y,
int dst_width, int dst_height, int dst_width, int dst_height,
FilterMode filtering); FilterMode filtering);
// Legacy API // Legacy API. Deprecated
// If dst_height_offset is non-zero, the image is offset by that many pixels int Scale(const uint8* src_y, const uint8* src_u, const uint8* src_v,
// and stretched to (dst_height - dst_height_offset * 2) pixels high,
// instead of dst_height.
int Scale_1(const uint8* src, int src_width, int src_height,
uint8* dst, int dst_width, int dst_height, int dst_height_offset,
int interpolate);
// Same, but specified src terms of each plane location and stride.
int Scale_2(const uint8* src_y, const uint8* src_u, const uint8* src_v,
int src_stride_y, int src_stride_u, int src_stride_v, int src_stride_y, int src_stride_u, int src_stride_v,
int src_width, int src_height, int src_width, int src_height,
uint8* dst_y, uint8* dst_u, uint8* dst_v, uint8* dst_y, uint8* dst_u, uint8* dst_v,
...@@ -59,9 +54,17 @@ int Scale_2(const uint8* src_y, const uint8* src_u, const uint8* src_v, ...@@ -59,9 +54,17 @@ int Scale_2(const uint8* src_y, const uint8* src_u, const uint8* src_v,
int dst_width, int dst_height, int dst_width, int dst_height,
int interpolate); int interpolate);
// Legacy API. Deprecated
int ScaleOffset(const uint8* src, int src_width, int src_height,
uint8* dst, int dst_width, int dst_height, int dst_yoffset,
int interpolate);
// For testing, allow disabling of optimizations. // For testing, allow disabling of optimizations.
void SetUseReferenceImpl(int use); void SetUseReferenceImpl(int use);
//} // namespace libyuv #ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif
#endif // INCLUDE_LIBYUV_SCALE_H_ #endif // INCLUDE_LIBYUV_SCALE_H_
...@@ -9,66 +9,73 @@ ...@@ -9,66 +9,73 @@
*/ */
#include "third_party/libyuv/include/libyuv/cpu_id.h" #include "third_party/libyuv/include/libyuv/cpu_id.h"
#include "third_party/libyuv/include/libyuv/basic_types.h" // for CPU_X86
#ifdef _MSC_VER #ifdef _MSC_VER
#include <intrin.h> #include <intrin.h>
#endif #endif
#ifdef __ANDROID__
#include <cpu-features.h>
#endif
#include "third_party/libyuv/include/libyuv/basic_types.h" // for CPU_X86
// TODO(fbarchard): Use cpuid.h when gcc 4.4 is used on OSX and Linux. // TODO(fbarchard): Use cpuid.h when gcc 4.4 is used on OSX and Linux.
#if (defined(__pic__) || defined(__APPLE__)) && defined(__i386__) #if (defined(__pic__) || defined(__APPLE__)) && defined(__i386__)
static inline void __cpuid(int cpu_info[4], int info_type) { static inline void __cpuid(int cpu_info[4], int info_type) {
__asm__ volatile ( asm volatile (
"mov %%ebx, %%edi\n" "mov %%ebx, %%edi \n"
"cpuid\n" "cpuid \n"
"xchg %%edi, %%ebx\n" "xchg %%edi, %%ebx \n"
: "=a"(cpu_info[0]), "=D"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3]) : "=a"(cpu_info[0]), "=D"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
: "a"(info_type) : "a"(info_type)
); );
} }
#elif defined(__i386__) || defined(__x86_64__) #elif defined(__i386__) || defined(__x86_64__)
static inline void __cpuid(int cpu_info[4], int info_type) { static inline void __cpuid(int cpu_info[4], int info_type) {
__asm__ volatile ( asm volatile (
"cpuid\n" "cpuid \n"
: "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3]) : "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
: "a"(info_type) : "a"(info_type)
); );
} }
#endif #endif
//namespace libyuv { #ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// CPU detect function for SIMD instruction sets. // CPU detect function for SIMD instruction sets.
static int cpu_info_initialized_ = 0; int cpu_info_ = 0;
static int cpu_info_ = 0;
// Global lock for cpu initialization. int InitCpuFlags() {
static void InitCpuFlags() {
#ifdef CPU_X86 #ifdef CPU_X86
int cpu_info[4]; int cpu_info[4];
__cpuid(cpu_info, 1); __cpuid(cpu_info, 1);
cpu_info_ = (cpu_info[2] & 0x00000200 ? kCpuHasSSSE3 : 0) | cpu_info_ = (cpu_info[3] & 0x04000000 ? kCpuHasSSE2 : 0) |
(cpu_info[3] & 0x04000000 ? kCpuHasSSE2 : 0); (cpu_info[2] & 0x00000200 ? kCpuHasSSSE3 : 0) |
kCpuInitialized;
#elif defined(__ANDROID__) && defined(__ARM_NEON__)
uint64_t features = android_getCpuFeatures();
cpu_info_ = ((features & ANDROID_CPU_ARM_FEATURE_NEON) ? kCpuHasNEON : 0) |
kCpuInitialized;
#elif defined(__ARM_NEON__) #elif defined(__ARM_NEON__)
// gcc -mfpu=neon defines __ARM_NEON__ // gcc -mfpu=neon defines __ARM_NEON__
// if code is specifically built for Neon-only, enable the flag. // Enable Neon if you want support for Neon and Arm, and use MaskCpuFlags
cpu_info_ |= kCpuHasNEON; // to disable Neon on devices that do not have it.
cpu_info_ = kCpuHasNEON | kCpuInitialized;
#else #else
cpu_info_ = 0; cpu_info_ = kCpuInitialized;
#endif #endif
cpu_info_initialized_ = 1; return cpu_info_;
} }
void MaskCpuFlagsForTest(int enable_flags) { void MaskCpuFlags(int enable_flags) {
InitCpuFlags(); InitCpuFlags();
cpu_info_ &= enable_flags; cpu_info_ = (cpu_info_ & enable_flags) | kCpuInitialized;
}
int TestCpuFlag(int flag) {
if (!cpu_info_initialized_) {
InitCpuFlags();
}
return cpu_info_ & flag ? 1 : 0;
} }
//} // namespace libyuv #ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
#include "third_party/libyuv/include/libyuv/basic_types.h" #include "third_party/libyuv/include/libyuv/basic_types.h"
#define kMaxStride (2048 * 4) #define kMaxStride (2048 * 4)
//#define IS_ALIGNED(p, a) (!((uintptr_t)(p) & ((a) - 1))) #define IS_ALIGNED(p, a) (!((uintptr_t)(p) & ((a) - 1)))
#if defined(COVERAGE_ENABLED) || defined(TARGET_IPHONE_SIMULATOR) #if defined(COVERAGE_ENABLED) || defined(TARGET_IPHONE_SIMULATOR)
#define YUV_DISABLE_ASM #define YUV_DISABLE_ASM
...@@ -72,7 +72,10 @@ void FastConvertYUVToABGRRow_NEON(const uint8* y_buf, ...@@ -72,7 +72,10 @@ void FastConvertYUVToABGRRow_NEON(const uint8* y_buf,
#define HAS_REVERSE_ROW_NEON #define HAS_REVERSE_ROW_NEON
#endif #endif
//extern "C" { #ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
#ifdef HAS_ARGBTOYROW_SSSE3 #ifdef HAS_ARGBTOYROW_SSSE3
void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix); void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
...@@ -253,6 +256,9 @@ void FastConvertYToARGBRow_SSE2(const uint8* y_buf, ...@@ -253,6 +256,9 @@ void FastConvertYToARGBRow_SSE2(const uint8* y_buf,
#endif #endif
//} // extern "C" #ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif
#endif // LIBYUV_SOURCE_ROW_H_ #endif // LIBYUV_SOURCE_ROW_H_
This diff is collapsed.
...@@ -78,6 +78,8 @@ static void die_codec(vpx_codec_ctx_t *ctx, const char *s) { ...@@ -78,6 +78,8 @@ static void die_codec(vpx_codec_ctx_t *ctx, const char *s) {
exit(EXIT_FAILURE); exit(EXIT_FAILURE);
} }
int (*read_frame_p)(FILE *f, vpx_image_t *img);
static int read_frame(FILE *f, vpx_image_t *img) { static int read_frame(FILE *f, vpx_image_t *img) {
size_t nbytes, to_read; size_t nbytes, to_read;
int res = 1; int res = 1;
...@@ -92,6 +94,55 @@ static int read_frame(FILE *f, vpx_image_t *img) { ...@@ -92,6 +94,55 @@ static int read_frame(FILE *f, vpx_image_t *img) {
return res; return res;
} }
static int read_frame_by_row(FILE *f, vpx_image_t *img) {
size_t nbytes, to_read;
int res = 1;
int plane;
for (plane = 0; plane < 3; plane++)
{
unsigned char *ptr;
int w = (plane ? (1 + img->d_w) / 2 : img->d_w);
int h = (plane ? (1 + img->d_h) / 2 : img->d_h);
int r;
/* Determine the correct plane based on the image format. The for-loop
* always counts in Y,U,V order, but this may not match the order of
* the data on disk.
*/
switch (plane)
{
case 1:
ptr = img->planes[img->fmt==VPX_IMG_FMT_YV12? VPX_PLANE_V : VPX_PLANE_U];
break;
case 2:
ptr = img->planes[img->fmt==VPX_IMG_FMT_YV12?VPX_PLANE_U : VPX_PLANE_V];
break;
default:
ptr = img->planes[plane];
}
for (r = 0; r < h; r++)
{
to_read = w;
nbytes = fread(ptr, 1, to_read, f);
if(nbytes != to_read) {
res = 0;
if(nbytes > 0)
printf("Warning: Read partial frame. Check your width & height!\n");
break;
}
ptr += img->stride[plane];
}
if (!res)
break;
}
return res;
}
static void write_ivf_file_header(FILE *outfile, static void write_ivf_file_header(FILE *outfile,
const vpx_codec_enc_cfg_t *cfg, const vpx_codec_enc_cfg_t *cfg,
int frame_cnt) { int frame_cnt) {
...@@ -262,9 +313,14 @@ int main(int argc, char **argv) ...@@ -262,9 +313,14 @@ int main(int argc, char **argv)
/* Allocate image for each encoder */ /* Allocate image for each encoder */
for (i=0; i< NUM_ENCODERS; i++) for (i=0; i< NUM_ENCODERS; i++)
if(!vpx_img_alloc(&raw[i], VPX_IMG_FMT_I420, cfg[i].g_w, cfg[i].g_h, 1)) if(!vpx_img_alloc(&raw[i], VPX_IMG_FMT_I420, cfg[i].g_w, cfg[i].g_h, 32))
die("Failed to allocate image", cfg[i].g_w, cfg[i].g_h); die("Failed to allocate image", cfg[i].g_w, cfg[i].g_h);
if (raw[0].stride[VPX_PLANE_Y] == raw[0].d_w)
read_frame_p = read_frame;
else
read_frame_p = read_frame_by_row;
for (i=0; i< NUM_ENCODERS; i++) for (i=0; i< NUM_ENCODERS; i++)
write_ivf_file_header(outfile[i], &cfg[i], 0); write_ivf_file_header(outfile[i], &cfg[i], 0);
...@@ -305,35 +361,22 @@ int main(int argc, char **argv) ...@@ -305,35 +361,22 @@ int main(int argc, char **argv)
const vpx_codec_cx_pkt_t *pkt[NUM_ENCODERS]; const vpx_codec_cx_pkt_t *pkt[NUM_ENCODERS];
flags = 0; flags = 0;
frame_avail = read_frame(infile, &raw[0]); frame_avail = read_frame_p(infile, &raw[0]);
for ( i=1; i<NUM_ENCODERS; i++) if(frame_avail)
{ {
if(frame_avail) for ( i=1; i<NUM_ENCODERS; i++)
{ {
/*Scale the image down a number of times by downsampling factor*/ /*Scale the image down a number of times by downsampling factor*/
int src_uvwidth = (raw[i-1].d_w + 1) >> 1;
int src_uvheight = (raw[i-1].d_h + 1) >> 1;
const unsigned char* src_y = raw[i-1].planes[VPX_PLANE_Y];
const unsigned char* src_u = raw[i-1].planes[VPX_PLANE_Y]
+ raw[i-1].d_w*raw[i-1].d_h;
const unsigned char* src_v = raw[i-1].planes[VPX_PLANE_Y]
+ raw[i-1].d_w*raw[i-1].d_h
+ src_uvwidth*src_uvheight;
int dst_uvwidth = (raw[i].d_w + 1) >> 1;
int dst_uvheight = (raw[i].d_h + 1) >> 1;
unsigned char* dst_y = raw[i].planes[VPX_PLANE_Y];
unsigned char* dst_u = raw[i].planes[VPX_PLANE_Y]
+ raw[i].d_w*raw[i].d_h;
unsigned char* dst_v = raw[i].planes[VPX_PLANE_Y]
+ raw[i].d_w*raw[i].d_h
+ dst_uvwidth*dst_uvheight;
/* FilterMode 1 or 2 give better psnr than FilterMode 0. */ /* FilterMode 1 or 2 give better psnr than FilterMode 0. */
I420Scale(src_y, raw[i-1].d_w, src_u, src_uvwidth, src_v, I420Scale(raw[i-1].planes[VPX_PLANE_Y], raw[i-1].stride[VPX_PLANE_Y],
src_uvwidth, raw[i-1].d_w, raw[i-1].d_h, raw[i-1].planes[VPX_PLANE_U], raw[i-1].stride[VPX_PLANE_U],
dst_y, raw[i].d_w, dst_u, dst_uvwidth, raw[i-1].planes[VPX_PLANE_V], raw[i-1].stride[VPX_PLANE_V],
dst_v, dst_uvwidth, raw[i].d_w, raw[i].d_h, 1); raw[i-1].d_w, raw[i-1].d_h,
raw[i].planes[VPX_PLANE_Y], raw[i].stride[VPX_PLANE_Y],
raw[i].planes[VPX_PLANE_U], raw[i].stride[VPX_PLANE_U],
raw[i].planes[VPX_PLANE_V], raw[i].stride[VPX_PLANE_V],
raw[i].d_w, raw[i].d_h, 1);
} }
} }
......
...@@ -243,6 +243,7 @@ vpx_codec_err_t vpx_codec_encode(vpx_codec_ctx_t *ctx, ...@@ -243,6 +243,7 @@ vpx_codec_err_t vpx_codec_encode(vpx_codec_ctx_t *ctx,
ctx--; ctx--;
if (img) img--; if (img) img--;
} }
ctx++;
} }
FLOATING_POINT_RESTORE(); FLOATING_POINT_RESTORE();
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment