From 84484a679009fc703d80cedfafebb50d42b97cd4 Mon Sep 17 00:00:00 2001
From: Marcus Asteborg <xnorpx@outlook.com>
Date: Wed, 24 May 2023 21:28:36 -0700
Subject: [PATCH] CMake - change avx to avx2 and fma

---
 CMakeLists.txt            | 50 ++++++++++++++++++++-------------------
 cmake/OpusConfig.cmake    |  5 +++-
 cmake/OpusFunctions.cmake | 20 +++++++++-------
 cmake/OpusSources.cmake   |  1 +
 4 files changed, 42 insertions(+), 34 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 7105e0d08..e5429eb18 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -177,13 +177,13 @@ if(OPUS_CPU_X86 OR OPUS_CPU_X64)
                          OFF)
   add_feature_info(OPUS_X86_MAY_HAVE_SSE4_1 OPUS_X86_MAY_HAVE_SSE4_1 ${OPUS_X86_MAY_HAVE_SSE4_1_HELP_STR})
 
-  set(OPUS_X86_MAY_HAVE_AVX_HELP_STR "does runtime check for AVX support.")
-  cmake_dependent_option(OPUS_X86_MAY_HAVE_AVX
-                         ${OPUS_X86_MAY_HAVE_AVX_HELP_STR}
+  set(OPUS_X86_MAY_HAVE_AVX2_HELP_STR "does runtime check for AVX FMA AVX2 support.")
+  cmake_dependent_option(OPUS_X86_MAY_HAVE_AVX2
+                         ${OPUS_X86_MAY_HAVE_AVX2_HELP_STR}
                          ON
-                         "AVX_SUPPORTED; NOT OPUS_DISABLE_INTRINSICS"
+                         "AVX2_SUPPORTED; NOT OPUS_DISABLE_INTRINSICS"
                          OFF)
-  add_feature_info(OPUS_X86_MAY_HAVE_AVX OPUS_X86_MAY_HAVE_AVX ${OPUS_X86_MAY_HAVE_AVX_HELP_STR})
+  add_feature_info(OPUS_X86_MAY_HAVE_AVX2 OPUS_X86_MAY_HAVE_AVX2 ${OPUS_X86_MAY_HAVE_AVX2_HELP_STR})
 
   # PRESUME depends on MAY HAVE, but PRESUME will override runtime detection
   set(OPUS_X86_PRESUME_SSE_HELP_STR "assume target CPU has SSE1 support (override runtime check).")
@@ -224,13 +224,13 @@ if(OPUS_CPU_X86 OR OPUS_CPU_X64)
                          OFF)
   add_feature_info(OPUS_X86_PRESUME_SSE4_1 OPUS_X86_PRESUME_SSE4_1 ${OPUS_X86_PRESUME_SSE4_1_HELP_STR})
 
-  set(OPUS_X86_PRESUME_AVX_HELP_STR "assume target CPU has AVX support (override runtime check).")
-  cmake_dependent_option(OPUS_X86_PRESUME_AVX
-                         ${OPUS_X86_PRESUME_AVX_HELP_STR}
+  set(OPUS_X86_PRESUME_AVX2_HELP_STR "assume target CPU has AVX FMA AVX2 support (override runtime check).")
+  cmake_dependent_option(OPUS_X86_PRESUME_AVX2
+                         ${OPUS_X86_PRESUME_AVX2_HELP_STR}
                          OFF
-                         "OPUS_X86_MAY_HAVE_AVX; NOT OPUS_DISABLE_INTRINSICS"
+                         "OPUS_X86_MAY_HAVE_AVX2; NOT OPUS_DISABLE_INTRINSICS"
                          OFF)
-  add_feature_info(OPUS_X86_PRESUME_AVX OPUS_X86_PRESUME_AVX ${OPUS_X86_PRESUME_AVX_HELP_STR})
+  add_feature_info(OPUS_X86_PRESUME_AVX2 OPUS_X86_PRESUME_AVX2 ${OPUS_X86_PRESUME_AVX2_HELP_STR})
 endif()
 
 feature_summary(WHAT ALL)
@@ -382,7 +382,7 @@ if(NOT OPUS_DISABLE_INTRINSICS)
   if(((OPUS_X86_MAY_HAVE_SSE AND NOT OPUS_X86_PRESUME_SSE) OR
      (OPUS_X86_MAY_HAVE_SSE2 AND NOT OPUS_X86_PRESUME_SSE2) OR
      (OPUS_X86_MAY_HAVE_SSE4_1 AND NOT OPUS_X86_PRESUME_SSE4_1) OR
-     (OPUS_X86_MAY_HAVE_AVX AND NOT OPUS_X86_PRESUME_AVX)) AND
+     (OPUS_X86_MAY_HAVE_AVX2 AND NOT OPUS_X86_PRESUME_AVX2)) AND
       RUNTIME_CPU_CAPABILITY_DETECTION)
     target_compile_definitions(opus PRIVATE OPUS_HAVE_RTCD)
     if(NOT MSVC)
@@ -454,22 +454,28 @@ if(NOT OPUS_DISABLE_INTRINSICS)
     endif()
   endif()
 
-  if(AVX_SUPPORTED)
-    # mostly placeholder in case of avx intrinsics is added
-    if(OPUS_X86_MAY_HAVE_AVX)
-      target_compile_definitions(opus PRIVATE OPUS_X86_MAY_HAVE_AVX)
+  if(AVX2_SUPPORTED)
+    if(OPUS_X86_MAY_HAVE_AVX2)
+      add_sources_group(opus celt ${celt_sources_avx2})
+      target_compile_definitions(opus PRIVATE OPUS_X86_MAY_HAVE_AVX2)
+      if(MSVC)
+        set(AVX2_FLAGS "${AVX2_FLAGS} /arch:AVX2")
+      else()
+        set(AVX2_FLAGS "${AVX2_FLAGS} -mavx2 -mfma -mavx")
+      endif()
+      set_source_files_properties(${celt_sources_avx2} PROPERTIES COMPILE_FLAGS ${AVX2_FLAGS})
     endif()
-    if(OPUS_X86_PRESUME_AVX)
-      target_compile_definitions(opus PRIVATE OPUS_X86_PRESUME_AVX)
+    if(OPUS_X86_PRESUME_AVX2)
+      target_compile_definitions(opus PRIVATE OPUS_X86_PRESUME_AVX2)
       if(NOT MSVC)
-        target_compile_options(opus PRIVATE -mavx)
+        target_compile_options(opus PRIVATE -mavx2 -mfma -mavx)
       endif()
     endif()
   endif()
 
   if(MSVC)
-    if(AVX_SUPPORTED AND OPUS_X86_PRESUME_AVX) # on 64 bit and 32 bits
-      add_definitions(/arch:AVX)
+    if(AVX2_SUPPORTED AND OPUS_X86_PRESUME_AVX2) # on 64 bit and 32 bits
+      add_definitions(/arch:AVX2)
     elseif(OPUS_CPU_X86) # if AVX not supported then set SSE flag
       if((SSE4_1_SUPPORTED AND OPUS_X86_PRESUME_SSE4_1)
          OR (SSE2_SUPPORTED AND OPUS_X86_PRESUME_SSE2))
@@ -602,10 +608,6 @@ if(OPUS_BUILD_PROGRAMS)
   add_executable(opus_compare ${opus_compare_sources})
   target_include_directories(opus_compare PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
   target_link_libraries(opus_compare PRIVATE opus ${OPUS_REQUIRED_LIBRARIES})
-  if(MSVC)
-    # move cosmetic warning to level 4 for opus_compare
-    target_compile_options(opus_compare PRIVATE /w44244)
-  endif()
 endif()
 
 if(BUILD_TESTING AND NOT BUILD_SHARED_LIBS)
diff --git a/cmake/OpusConfig.cmake b/cmake/OpusConfig.cmake
index b82307a19..9fd814cb2 100644
--- a/cmake/OpusConfig.cmake
+++ b/cmake/OpusConfig.cmake
@@ -102,7 +102,10 @@ if(MINGW)
   endif()
 endif()
 
-if(NOT MSVC)
+if(MSVC)
+  # move cosmetic warnings to level 4
+  add_compile_options(/w44244 /w44305 /w44267)
+else()
   set(WARNING_LIST -Wall -W -Wstrict-prototypes -Wextra -Wcast-align -Wnested-externs -Wshadow)
   include(CheckCCompilerFlag)
   foreach(WARNING_FLAG ${WARNING_LIST})
diff --git a/cmake/OpusFunctions.cmake b/cmake/OpusFunctions.cmake
index 3f22ad813..5ab50a9d6 100644
--- a/cmake/OpusFunctions.cmake
+++ b/cmake/OpusFunctions.cmake
@@ -47,10 +47,12 @@ function(check_flag NAME FLAG)
 endfunction()
 
 include(CheckIncludeFile)
-# function to check if compiler supports SSE, SSE2, SSE4.1 and AVX if target
-# systems may not have SSE support then use OPUS_MAY_HAVE_SSE option if target
-# system is guaranteed to have SSE support then OPUS_PRESUME_SSE can be used to
-# skip SSE runtime check
+
+# This function determines if the compiler has support for SSE, SSE2, SSE4.1, AVX,
+# AVX2 and FMA. Should the target systems potentially lack SSE support, the
+# OPUS_MAY_HAVE_SSE option is recommended for use. If, however, the target system is
+# assured to support SSE, the OPUS_PRESUME_SSE option can be employed, thus
+# eliminating the necessity for an SSE runtime check.
 function(opus_detect_sse COMPILER_SUPPORT_SIMD)
   message(STATUS "Check SIMD support by compiler")
   check_include_file(xmmintrin.h HAVE_XMMINTRIN_H) # SSE1
@@ -111,20 +113,20 @@ function(opus_detect_sse COMPILER_SUPPORT_SIMD)
         PARENT_SCOPE)
   endif()
 
-  check_include_file(immintrin.h HAVE_IMMINTRIN_H) # AVX
+  check_include_file(immintrin.h HAVE_IMMINTRIN_H) # AVX2
   if(HAVE_IMMINTRIN_H)
     if(MSVC)
-      check_flag(AVX /arch:AVX)
+      check_flag(AVX2 /arch:AVX2)
     else()
-      check_flag(AVX -mavx)
+      check_flag(AVX2 -mavx2 -mfma -mavx)
     endif()
   else()
-    set(AVX_SUPPORTED
+    set(AVX2_SUPPORTED
         0
         PARENT_SCOPE)
   endif()
 
-  if(SSE1_SUPPORTED OR SSE2_SUPPORTED OR SSE4_1_SUPPORTED OR AVX_SUPPORTED)
+  if(SSE1_SUPPORTED OR SSE2_SUPPORTED OR SSE4_1_SUPPORTED OR AVX2_SUPPORTED)
     set(COMPILER_SUPPORT_SIMD 1 PARENT_SCOPE)
   else()
     message(STATUS "No SIMD support in compiler")
diff --git a/cmake/OpusSources.cmake b/cmake/OpusSources.cmake
index cb852c0a0..a9a1c098d 100644
--- a/cmake/OpusSources.cmake
+++ b/cmake/OpusSources.cmake
@@ -29,6 +29,7 @@ get_opus_sources(CELT_SOURCES_X86_RTCD celt_sources.mk celt_sources_x86_rtcd)
 get_opus_sources(CELT_SOURCES_SSE celt_sources.mk celt_sources_sse)
 get_opus_sources(CELT_SOURCES_SSE2 celt_sources.mk celt_sources_sse2)
 get_opus_sources(CELT_SOURCES_SSE4_1 celt_sources.mk celt_sources_sse4_1)
+get_opus_sources(CELT_SOURCES_AVX2 celt_sources.mk celt_sources_avx2)
 get_opus_sources(CELT_SOURCES_ARM_RTCD celt_sources.mk celt_sources_arm_rtcd)
 get_opus_sources(CELT_SOURCES_ARM_ASM celt_sources.mk celt_sources_arm_asm)
 get_opus_sources(CELT_AM_SOURCES_ARM_ASM celt_sources.mk
-- 
GitLab