diff --git a/.github/workflows/build_cpp.yml b/.github/workflows/build_cpp.yml index 76078f6..43e700f 100644 --- a/.github/workflows/build_cpp.yml +++ b/.github/workflows/build_cpp.yml @@ -9,10 +9,15 @@ on: whisper_cpp_repo_ref: description: 'Tag, branch or commit' required: true - default: 'v1.5.5' + default: 'v1.7.2' + target_platform: + description: 'Target platforms' + required: true + default: 'windows' jobs: build-windows: - name: Build for Windows (x86_64) + name: Build for Windows (x86_64, Vulkan) + if: ${{ contains(fromJson('["windows", "all"]'), github.event.inputs.target_platform) }} runs-on: windows-latest steps: - name: Clone whisper.unity @@ -30,10 +35,19 @@ jobs: - name: Add msbuild to PATH uses: microsoft/setup-msbuild@v1 + - name: Install Vulkan SDK + uses: jakoch/install-vulkan-sdk-action@v1.0.4 + with: + vulkan_version: 1.3.290.0 + optional_components: com.lunarg.vulkan.vma + install_runtime: true + cache: true + stripdown: true + - name: Run build script run: | cd whisper-unity - .\build_cpp.bat cpu ..\whisper-cpp\ + .\build_cpp.bat ..\whisper-cpp\ cd ${{ github.workspace }}\whisper-cpp\build\bin\Release ren whisper.dll libwhisper.dll @@ -41,11 +55,12 @@ jobs: uses: actions/upload-artifact@v3 with: name: windows - path: ${{ github.workspace }}/whisper-cpp/build/bin/Release/libwhisper.dll + path: ${{ github.workspace }}/whisper-cpp/build/bin/Release if-no-files-found: error build-windows-cuda: name: Build for Windows (x86_64, CUDA) + if: ${{ contains(fromJson('["windows_cuda", "all"]'), github.event.inputs.target_platform) }} runs-on: windows-latest steps: - name: Clone whisper.unity @@ -85,6 +100,7 @@ jobs: build-linux: name: Build for Linux (x86_64) + if: ${{ contains(fromJson('["linux", "all"]'), github.event.inputs.target_platform) }} runs-on: ubuntu-20.04 steps: - name: Clone whisper.unity @@ -113,6 +129,7 @@ jobs: build-linux-cuda: name: Build for Linux (x86_64, CUDA) + if: ${{ contains(fromJson('["linux_cuda", "all"]'), github.event.inputs.target_platform) }} runs-on: ubuntu-20.04 steps: - name: Clone whisper.unity @@ -150,6 +167,7 @@ jobs: build-macos: name: Build for MacOS (ARM, x86_64) + if: ${{ contains(fromJson('["macos", "all"]'), github.event.inputs.target_platform) }} runs-on: macOS-latest steps: - name: Clone whisper.unity @@ -178,6 +196,7 @@ jobs: build-macos-metal: name: Build for MacOS Metal (ARM, x86_64) + if: ${{ contains(fromJson('["macos_metal", "all"]'), github.event.inputs.target_platform) }} runs-on: macOS-latest steps: - name: Clone whisper.unity @@ -210,6 +229,7 @@ jobs: build-ios: name: Build for iOS + if: ${{ contains(fromJson('["ios", "all"]'), github.event.inputs.target_platform) }} runs-on: macOS-latest steps: - name: Clone whisper.unity @@ -238,6 +258,7 @@ jobs: build-android: name: Build for Android (arm64-v8a) + if: ${{ contains(fromJson('["android", "all"]'), github.event.inputs.target_platform) }} runs-on: ubuntu-latest steps: - name: Install Java diff --git a/Packages/com.whisper.unity/Editor/WhisperProjectSettings.cs b/Packages/com.whisper.unity/Editor/WhisperProjectSettings.cs deleted file mode 100644 index 23c78f8..0000000 --- a/Packages/com.whisper.unity/Editor/WhisperProjectSettings.cs +++ /dev/null @@ -1,113 +0,0 @@ -using System; -using System.Collections.Generic; -using System.Linq; -using UnityEditor; -using UnityEditor.Build; -using UnityEngine; - - -static class WhisperProjectSettingsProvider -{ - [SettingsProvider] - public static SettingsProvider CreateMyCustomSettingsProvider() - { - var provider = new SettingsProvider("Project/WhisperSettings", SettingsScope.Project) - { - label = "Whisper", - guiHandler = (searchContext) => - { - CudaEnabled = EditorGUILayout.Toggle("Enable CUDA", CudaEnabled); - MetalEnabled = EditorGUILayout.Toggle("Enable Metal", MetalEnabled); - }, - - keywords = new HashSet(new[] { "CUDA", "cuBLAS", "Metal" }) - }; - - return provider; - } - - public static bool CudaEnabled - { - get - { -#if WHISPER_CUDA - return true; -#else - return false; -#endif - } - set - { - if (value == CudaEnabled) - return; - SetDefine("WHISPER_CUDA", value); - } - } - - public static bool MetalEnabled - { - get - { -#if WHISPER_METAL - return true; -#else - return false; -#endif - } - set - { - if (value == MetalEnabled) - return; - SetDefine("WHISPER_METAL", value); - } - } - - private static void SetDefine(string define, bool value) - { - string[] newDefines; - var defines = GetStandaloneDefines(); - - if (value) - { - if (defines.Contains(define)) - return; - - newDefines = defines.Append(define).ToArray(); - } - else - { - if (!defines.Contains(define)) - return; - - newDefines = defines.Where(x => x != define).ToArray(); - } - - SetStandaloneDefines(newDefines); - } - - - // This is for older Unity compability - private static string[] GetStandaloneDefines() - { - string[] defines; - -#if UNITY_2021_3_OR_NEWER - PlayerSettings.GetScriptingDefineSymbols(NamedBuildTarget.Standalone, out defines); -#else - var definesStr = PlayerSettings.GetScriptingDefineSymbolsForGroup(BuildTargetGroup.Standalone); - defines = definesStr.Split(';'); -#endif - - return defines; - } - - private static void SetStandaloneDefines(string[] newDefines) - { -#if UNITY_2021_3_OR_NEWER - PlayerSettings.SetScriptingDefineSymbols(NamedBuildTarget.Standalone, newDefines); -#else - var definesStr = string.Join(";", newDefines); - PlayerSettings.SetScriptingDefineSymbolsForGroup(BuildTargetGroup.Standalone, definesStr); -#endif - } -} \ No newline at end of file diff --git a/Packages/com.whisper.unity/Editor/WhisperProjectSettings.cs.meta b/Packages/com.whisper.unity/Editor/WhisperProjectSettings.cs.meta deleted file mode 100644 index ba79ec1..0000000 --- a/Packages/com.whisper.unity/Editor/WhisperProjectSettings.cs.meta +++ /dev/null @@ -1,11 +0,0 @@ -fileFormatVersion: 2 -guid: 7d83cf4c3a9c82745be38b51c872259b -MonoImporter: - externalObjects: {} - serializedVersion: 2 - defaultReferences: [] - executionOrder: 0 - icon: {instanceID: 0} - userData: - assetBundleName: - assetBundleVariant: diff --git a/Packages/com.whisper.unity/Plugins/Windows/ggml-base.dll b/Packages/com.whisper.unity/Plugins/Windows/ggml-base.dll new file mode 100644 index 0000000..7f658a4 Binary files /dev/null and b/Packages/com.whisper.unity/Plugins/Windows/ggml-base.dll differ diff --git a/Packages/com.whisper.unity/Plugins/Windows/ggml-base.dll.meta b/Packages/com.whisper.unity/Plugins/Windows/ggml-base.dll.meta new file mode 100644 index 0000000..2b36e35 --- /dev/null +++ b/Packages/com.whisper.unity/Plugins/Windows/ggml-base.dll.meta @@ -0,0 +1,27 @@ +fileFormatVersion: 2 +guid: 03315452d1f0cf7468102fdba024f361 +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 1 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + Any: + second: + enabled: 1 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + userData: + assetBundleName: + assetBundleVariant: diff --git a/Packages/com.whisper.unity/Plugins/Windows/ggml-cpu.dll b/Packages/com.whisper.unity/Plugins/Windows/ggml-cpu.dll new file mode 100644 index 0000000..1f5b227 Binary files /dev/null and b/Packages/com.whisper.unity/Plugins/Windows/ggml-cpu.dll differ diff --git a/Packages/com.whisper.unity/Plugins/Windows/ggml-cpu.dll.meta b/Packages/com.whisper.unity/Plugins/Windows/ggml-cpu.dll.meta new file mode 100644 index 0000000..e69a837 --- /dev/null +++ b/Packages/com.whisper.unity/Plugins/Windows/ggml-cpu.dll.meta @@ -0,0 +1,27 @@ +fileFormatVersion: 2 +guid: e13972546ac923b40bd56e0fae6fd3f7 +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 1 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + Any: + second: + enabled: 1 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + userData: + assetBundleName: + assetBundleVariant: diff --git a/Packages/com.whisper.unity/Plugins/Windows/ggml-vulkan.dll b/Packages/com.whisper.unity/Plugins/Windows/ggml-vulkan.dll new file mode 100644 index 0000000..1007fad Binary files /dev/null and b/Packages/com.whisper.unity/Plugins/Windows/ggml-vulkan.dll differ diff --git a/Packages/com.whisper.unity/Plugins/Windows/ggml-vulkan.dll.meta b/Packages/com.whisper.unity/Plugins/Windows/ggml-vulkan.dll.meta new file mode 100644 index 0000000..c78c138 --- /dev/null +++ b/Packages/com.whisper.unity/Plugins/Windows/ggml-vulkan.dll.meta @@ -0,0 +1,27 @@ +fileFormatVersion: 2 +guid: ad8081101c6e551429d290822a08139a +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 1 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + Any: + second: + enabled: 1 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + userData: + assetBundleName: + assetBundleVariant: diff --git a/Packages/com.whisper.unity/Plugins/Windows/ggml.dll b/Packages/com.whisper.unity/Plugins/Windows/ggml.dll new file mode 100644 index 0000000..ae32fa6 Binary files /dev/null and b/Packages/com.whisper.unity/Plugins/Windows/ggml.dll differ diff --git a/Packages/com.whisper.unity/Plugins/Windows/ggml.dll.meta b/Packages/com.whisper.unity/Plugins/Windows/ggml.dll.meta new file mode 100644 index 0000000..a01d163 --- /dev/null +++ b/Packages/com.whisper.unity/Plugins/Windows/ggml.dll.meta @@ -0,0 +1,27 @@ +fileFormatVersion: 2 +guid: 117857bb203f431419840f949f51f49a +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 1 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + Any: + second: + enabled: 1 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + userData: + assetBundleName: + assetBundleVariant: diff --git a/Packages/com.whisper.unity/Plugins/Windows/libwhisper.dll b/Packages/com.whisper.unity/Plugins/Windows/libwhisper.dll index 6cb9a15..22d7fd5 100644 Binary files a/Packages/com.whisper.unity/Plugins/Windows/libwhisper.dll and b/Packages/com.whisper.unity/Plugins/Windows/libwhisper.dll differ diff --git a/Packages/com.whisper.unity/Plugins/Windows/libwhisper_cuda.dll b/Packages/com.whisper.unity/Plugins/Windows/libwhisper_cuda.dll deleted file mode 100644 index 3b6f091..0000000 Binary files a/Packages/com.whisper.unity/Plugins/Windows/libwhisper_cuda.dll and /dev/null differ diff --git a/Packages/com.whisper.unity/Plugins/Windows/libwhisper_cuda.dll.meta b/Packages/com.whisper.unity/Plugins/Windows/libwhisper_cuda.dll.meta deleted file mode 100644 index 1ec5931..0000000 --- a/Packages/com.whisper.unity/Plugins/Windows/libwhisper_cuda.dll.meta +++ /dev/null @@ -1,70 +0,0 @@ -fileFormatVersion: 2 -guid: 6b550e459bfa91949afff29fe9e1f62d -PluginImporter: - externalObjects: {} - serializedVersion: 2 - iconMap: {} - executionOrder: {} - defineConstraints: [] - isPreloaded: 0 - isOverridable: 1 - isExplicitlyReferenced: 0 - validateReferences: 1 - platformData: - - first: - : Any - second: - enabled: 0 - settings: - Exclude Android: 1 - Exclude Editor: 0 - Exclude Linux64: 0 - Exclude OSXUniversal: 0 - Exclude Win: 0 - Exclude Win64: 0 - - first: - Android: Android - second: - enabled: 0 - settings: - CPU: ARMv7 - - first: - Any: - second: - enabled: 0 - settings: {} - - first: - Editor: Editor - second: - enabled: 1 - settings: - CPU: AnyCPU - DefaultValueInitialized: true - OS: AnyOS - - first: - Standalone: Linux64 - second: - enabled: 1 - settings: - CPU: x86_64 - - first: - Standalone: OSXUniversal - second: - enabled: 1 - settings: - CPU: None - - first: - Standalone: Win - second: - enabled: 1 - settings: - CPU: x86 - - first: - Standalone: Win64 - second: - enabled: 1 - settings: - CPU: x86_64 - userData: - assetBundleName: - assetBundleVariant: diff --git a/Packages/com.whisper.unity/Runtime/Native/WhisperNative.cs b/Packages/com.whisper.unity/Runtime/Native/WhisperNative.cs index 3c42886..e790387 100644 --- a/Packages/com.whisper.unity/Runtime/Native/WhisperNative.cs +++ b/Packages/com.whisper.unity/Runtime/Native/WhisperNative.cs @@ -13,37 +13,9 @@ public static unsafe class WhisperNative { #if (UNITY_IOS || UNITY_VISIONOS || UNITY_ANDROID) && !UNITY_EDITOR private const string LibraryName = "__Internal"; - -#elif WHISPER_CUDA -#if UNITY_EDITOR && (UNITY_EDITOR_WIN || UNITY_EDITOR_LINUX) - private const string LibraryName = "libwhisper_cuda"; -#elif !UNITY_EDITOR && (UNITY_STANDALONE_WIN || UNITY_STANDALONE_LINUX) - private const string LibraryName = "libwhisper_cuda"; -#else - private const string LibraryName = "libwhisper"; -#endif - -#elif WHISPER_METAL - -#if UNITY_EDITOR && UNITY_EDITOR_OSX - private const string LibraryName = "libwhisper_metal"; -#elif !UNITY_EDITOR && UNITY_STANDALONE_OSX - private const string LibraryName = "libwhisper_metal"; #else private const string LibraryName = "libwhisper"; #endif - -#else - private const string LibraryName = "libwhisper"; -#endif - - static WhisperNative() - { -#if !UNITY_EDITOR && UNITY_STANDALONE_OSX - var path = System.IO.Path.Combine(UnityEngine.Application.dataPath, "Plugins"); - Environment.SetEnvironmentVariable("GGML_METAL_PATH_RESOURCES",path); -#endif - } [DllImport(LibraryName)] public static extern whisper_context_ptr whisper_init_from_buffer_with_params(IntPtr buffer, diff --git a/Packages/com.whisper.unity/Runtime/Native/WhisperNativeParams.cs b/Packages/com.whisper.unity/Runtime/Native/WhisperNativeParams.cs index 54647d5..acd0102 100644 --- a/Packages/com.whisper.unity/Runtime/Native/WhisperNativeParams.cs +++ b/Packages/com.whisper.unity/Runtime/Native/WhisperNativeParams.cs @@ -35,6 +35,7 @@ enum WhisperAlignmentHeadsPreset WHISPER_AHEADS_LARGE_V1, WHISPER_AHEADS_LARGE_V2, WHISPER_AHEADS_LARGE_V3, + WHISPER_AHEADS_LARGE_V3_TURBO, }; [UnmanagedFunctionPointer(CallingConvention.StdCall)] @@ -89,7 +90,8 @@ struct WhisperNativeAheads [StructLayout(LayoutKind.Sequential)] public struct WhisperNativeContextParams { - [MarshalAs(UnmanagedType.U1)] bool use_gpu; + [MarshalAs(UnmanagedType.U1)] public bool use_gpu; + [MarshalAs(UnmanagedType.U1)] public bool flash_attn; int gpu_device; // CUDA device // [EXPERIMENTAL] Token-level timestamps with DTW @@ -136,7 +138,6 @@ public unsafe struct WhisperNativeParams // [EXPERIMENTAL] speed-up techniques // note: these can significantly reduce the quality of the output - [MarshalAs(UnmanagedType.U1)] public bool speed_up; // speed-up the audio by 2x using Phase Vocoder [MarshalAs(UnmanagedType.U1)] bool debug_mode; // enable debug_mode provides extra info (eg. Dump log_mel) public int audio_ctx; // overwrite the audio context size (0 = use default) diff --git a/Packages/com.whisper.unity/Runtime/WhisperManager.cs b/Packages/com.whisper.unity/Runtime/WhisperManager.cs index bb1bff4..732938d 100644 --- a/Packages/com.whisper.unity/Runtime/WhisperManager.cs +++ b/Packages/com.whisper.unity/Runtime/WhisperManager.cs @@ -2,6 +2,7 @@ using System.IO; using System.Threading.Tasks; using UnityEngine; +using UnityEngine.Serialization; using Whisper.Native; using Whisper.Utils; @@ -27,6 +28,15 @@ public class WhisperManager : MonoBehaviour [SerializeField] [Tooltip("Should model weights be loaded on awake?")] private bool initOnAwake = true; + + [Header("Inference")] + [Tooltip("Try to load whisper in GPU for faster inference")] + [SerializeField] + private bool useGpu; + + [Tooltip("Use the Flash Attention algorithm for faster inference")] + [SerializeField] + private bool flashAttention; [Header("Language")] [Tooltip("Output text language. Use empty or \"auto\" for auto-detection.")] @@ -76,10 +86,6 @@ public class WhisperManager : MonoBehaviour [Tooltip("[EXPERIMENTAL] Output timestamps for each token. Need enabled tokens to work.")] public bool tokensTimestamps; - [Tooltip("[EXPERIMENTAL] Speed-up the audio by 2x using Phase Vocoder. " + - "These can significantly reduce the quality of the output.")] - public bool speedUp; - [Tooltip("[EXPERIMENTAL] Overwrite the audio context size (0 = use default). " + "These can significantly reduce the quality of the output.")] public int audioCtx; @@ -181,7 +187,9 @@ public async Task InitModel() var path = isModelPathInStreamingAssets ? Path.Combine(Application.streamingAssetsPath, modelPath) : modelPath; - _whisper = await WhisperWrapper.InitFromFileAsync(path); + + var context = CreateContextParams(); + _whisper = await WhisperWrapper.InitFromFileAsync(path, context); _params = WhisperParams.GetDefaultParams(strategy); UpdateParams(); @@ -195,7 +203,7 @@ public async Task InitModel() IsLoading = false; } - + /// /// Checks if currently loaded whisper model supports multilingual transcription. /// @@ -294,12 +302,19 @@ private void UpdateParams() _params.Translate = translateToEnglish; _params.NoContext = noContext; _params.SingleSegment = singleSegment; - _params.SpeedUp = speedUp; _params.AudioCtx = audioCtx; _params.EnableTokens = enableTokens; _params.TokenTimestamps = tokensTimestamps; _params.InitialPrompt = initialPrompt; } + + private WhisperContextParams CreateContextParams() + { + var context = WhisperContextParams.GetDefaultParams(); + context.UseGpu = useGpu; + context.FlashAttn = flashAttention; + return context; + } private async Task CheckIfLoaded() { diff --git a/Packages/com.whisper.unity/Runtime/WhisperParams.cs b/Packages/com.whisper.unity/Runtime/WhisperParams.cs index c32049b..f6d8fa9 100644 --- a/Packages/com.whisper.unity/Runtime/WhisperParams.cs +++ b/Packages/com.whisper.unity/Runtime/WhisperParams.cs @@ -24,6 +24,24 @@ private WhisperContextParams(WhisperNativeContextParams param) _param = param; } + /// + /// Try to load whisper in GPU for faster inference. + /// + public bool UseGpu + { + get => _param.use_gpu; + set => _param.use_gpu = value; + } + + /// + /// Use the Flash Attention algorithm for faster inference. + /// + public bool FlashAttn + { + get => _param.flash_attn; + set => _param.flash_attn = value; + } + /// /// Create a new default Whisper Context parameters. /// @@ -280,17 +298,7 @@ public bool TokenTimestamps #endregion #region Speed Up - - /// - /// [EXPERIMENTAL] Speed-up the audio by 2x using Phase Vocoder. - /// These can significantly reduce the quality of the output. - /// - public bool SpeedUp - { - get => _param.speed_up; - set => _param.speed_up = value; - } - + /// /// [EXPERIMENTAL] Overwrite the audio context size (0 = use default). /// These can significantly reduce the quality of the output. diff --git a/build_cpp.bat b/build_cpp.bat index 942d3f3..51e0e02 100644 --- a/build_cpp.bat +++ b/build_cpp.bat @@ -1,35 +1,14 @@ @echo off set unity_path=%CD% -set target=%1 -set whisper_path=%2 +set whisper_path=%1 -IF "%target%"=="cpu" goto cpu -IF "%target%"=="cuda" goto cuda -IF "%target%"=="all" goto cpu +echo Starting building target... +cd %whisper_path% +rmdir .\build /s /q +cmake -S . -B ./build -A x64 -DGGML_VULKAN=ON -DCMAKE_BUILD_TYPE=Release -DWHISPER_BUILD_TESTS=OFF -DWHISPER_BUILD_EXAMPLES=OFF -echo Unknown target %target%, should "cpu", "cuda" or "all" -goto commonexit - -:cpu - echo Starting building cpu target... - cd %whisper_path% - rmdir .\build /s /q - cmake -S . -B ./build -A x64 -DCMAKE_BUILD_TYPE=Release -DWHISPER_BUILD_TESTS=OFF -DWHISPER_BUILD_EXAMPLES=OFF - - cd ./build - msbuild ALL_BUILD.vcxproj -t:build -p:configuration=Release -p:platform=x64 - xcopy /y /q .\bin\Release\whisper.dll %unity_path%\Packages\com.whisper.unity\Plugins\Windows\libwhisper.dll* - - IF NOT "%target%"=="all" goto commonexit -:cuda - echo Starting building CUDA target... - cd %whisper_path% - rmdir .\build /s /q - cmake -S . -B ./build -A x64 -DWHISPER_CUBLAS=ON -DCMAKE_BUILD_TYPE=Release -DWHISPER_BUILD_TESTS=OFF -DWHISPER_BUILD_EXAMPLES=OFF - - cd ./build - msbuild ALL_BUILD.vcxproj -t:build -p:configuration=Release -p:platform=x64 - xcopy /y /q .\bin\Release\whisper.dll %unity_path%\Packages\com.whisper.unity\Plugins\Windows\libwhisper_cuda.dll* - -:commonexit \ No newline at end of file +cd ./build +msbuild ALL_BUILD.vcxproj -t:build -p:configuration=Release -p:platform=x64 +xcopy /y /q .\bin\Release\ggml.dll %unity_path%\Packages\com.whisper.unity\Plugins\Windows\ggml.dll* +xcopy /y /q .\bin\Release\whisper.dll %unity_path%\Packages\com.whisper.unity\Plugins\Windows\libwhisper.dll* \ No newline at end of file