Merge pull request #164 from deiteris/fixes

Try force GPU clocks for NVIDIA
deiteris · Aug 4, 2024 · 8bcfcdb · 8bcfcdb
2 parents 4b3c5d7 + 68131e0
commit 8bcfcdb
Show file tree

Hide file tree

Showing 16 changed files with 184 additions and 45 deletions.
diff --git a/.github/workflows/build-executable.yml b/.github/workflows/build-executable.yml
@@ -79,6 +79,10 @@ jobs:
     - name: Building executable
       run: pyinstaller --clean -y --dist ./dist --workpath /tmp MMVCServerSIO.spec
       working-directory: ./server
+    - name: Copy utils
+      run: cp ./server/{force_gpu_clocks.bat,reset_gpu_clocks.bat} ./server/dist/
+      shell: bash
+      if: matrix.os == 'windows-latest' && matrix.backend == 'cuda'
     - name: Pack artifact
       shell: bash
       run: |

diff --git a/.github/workflows/make-release.yml b/.github/workflows/make-release.yml
@@ -118,6 +118,10 @@ jobs:
     - name: Building executable
       run: pyinstaller --clean -y --dist ./dist --workpath /tmp MMVCServerSIO.spec
       working-directory: ./server
+    - name: Copy utils
+      run: cp ./server/{force_gpu_clocks.bat,reset_gpu_clocks.bat} ./server/dist/
+      shell: bash
+      if: matrix.os == 'windows-latest' && matrix.backend == 'cuda'
     - name: Pack artifact
       shell: bash
       run: |

diff --git a/client/demo/dist/index.js b/client/demo/dist/index.js
diff --git a/client/demo/src/components/demo/906_AdvancedSettingDialog.tsx b/client/demo/src/components/demo/906_AdvancedSettingDialog.tsx
@@ -125,6 +125,34 @@ export const AdvancedSettingDialog = () => {
             </div>
         );
 
+        const onDisableJitChanged = (val: number) => {
+            return serverSetting.updateServerSettings({
+                ...serverSetting.serverSetting,
+                disableJit: val,
+            });
+        };
+        const disableJitRow = (
+            <div className="advanced-setting-container-row">
+                <div className="advanced-setting-container-row-title"><a className="hint-text" data-tooltip-id="hint" data-tooltip-content="A debugging option that disables Just-in-Time (JIT) compilation for PyTorch models. Disabling this option will reduce model loading time, but may decrease performance. Has no effect on DirectML devices.">Disable JIT compilation</a></div>
+                <div className="advanced-setting-container-row-field">
+                    <select
+                        value={serverSetting.serverSetting.disableJit}
+                        onChange={async (e) => {
+                            // TODO: Need to fix CSS to show waiting dialog over all page contents. Lazy :\
+                            //guiState.stateControls.showWaitingCheckbox.updateState(true);
+                            onDisableJitChanged(Number(e.target.value));
+                            guiState.setVoiceChangerSettingsChanged(false);
+                            //guiState.stateControls.showWaitingCheckbox.updateState(false);
+                        }}
+                        disabled={guiState.isConverting}
+                    >
+                        <option value="0">off</option>
+                        <option value="1">on</option>
+                    </select>
+                </div>
+            </div>
+        );
+
         const protectRow = (
             <div className="advanced-setting-container-row">
                 <div className="advanced-setting-container-row-title"><a className="hint-text" data-tooltip-id="hint" data-tooltip-content="Voiceless consonants protection. Has no effect when set to 0.5 or when 'Index' is inactive.">Protect</a></div>
@@ -169,6 +197,7 @@ export const AdvancedSettingDialog = () => {
                 {crossfaceRow}
                 {silenceFrontRow}
                 {forceFp32ModeRow}
+                {disableJitRow}
                 {protectRow}
                 {skipPassThroughConfirmationRow}
             </div>

diff --git a/client/lib/src/const.ts b/client/lib/src/const.ts
@@ -96,6 +96,7 @@ export const ServerSettingKey = {
     indexRatio: "indexRatio",
     protect: "protect",
     forceFp32: "forceFp32",
+    disableJit: "disableJit",
     modelSamplingRate: "modelSamplingRate",
     silenceFront: "silenceFront",
     modelSlotIndex: "modelSlotIndex",
@@ -141,6 +142,7 @@ export type VoiceChangerServerSetting = {
     protect: number; // RVC
     silenceFront: number; // 0:off, 1:on
     forceFp32: number; // 0:off, 1:on
+    disableJit: number; // 0:off, 1:on
     modelSamplingRate: ModelSamplingRate; // 32000,40000,48000
     modelSlotIndex: number;
 
@@ -264,6 +266,7 @@ export const DefaultServerSetting: ServerInfo = {
     modelSamplingRate: 48000,
     silenceFront: 1,
     forceFp32: 0,
+    disableJit: 0,
     modelSlotIndex: 0,
     sampleModels: [],
     gpus: [],

diff --git a/server/app.py b/server/app.py
@@ -26,4 +26,4 @@
 
 # NOTE: Bundled executable overrides excepthook to pause on exception during startup.
 # Here we revert to original excepthook once all initialization is done.
-sys.excepthook = sys.__excepthook__
+sys.excepthook = sys.__excepthook__
diff --git a/server/client.py b/server/client.py
@@ -1,4 +1,5 @@
 import sys
+from utils.check_user_admin import is_user_admin
 
 def pause_excepthook(type, value, traceback, oldhook=sys.excepthook):
     oldhook(type, value, traceback)

diff --git a/server/force_gpu_clocks.bat b/server/force_gpu_clocks.bat
@@ -0,0 +1,23 @@
+@echo off
+nvidia-smi --query-supported-clocks=graphics,memory --format=csv,noheader,nounits > temp.txt
+
+REM Read the first line from the temporary file
+set /p firstLine=<temp.txt
+
+REM Extract the first comma-separated string
+for /f "tokens=1,2 delims=, " %%a in ("%firstLine%") do (
+    set gpu_clock=%%a
+    set memory_clock=%%b
+)
+
+REM Print the extracted string
+echo GPU Clock: %gpu_clock%
+echo Memory Clock: %memory_clock%
+
+REM Clean up the temporary file
+del temp.txt
+
+:force_clocks
+nvidia-smi --lock-gpu-clocks=%gpu_clock%
+nvidia-smi --lock-memory-clocks=%memory_clock%
+pause
diff --git a/server/main.py b/server/main.py
@@ -22,6 +22,7 @@
 from mods.ssl import create_self_signed_cert
 from webbrowser import open_new_tab
 from settings import ServerSettings
+from utils.check_user_admin import is_user_admin
 
 stream_handler = logging.StreamHandler()
 stream_handler.setLevel(logging.INFO)

diff --git a/server/reset_gpu_clocks.bat b/server/reset_gpu_clocks.bat
@@ -0,0 +1,4 @@
+@echo off
+nvidia-smi --reset-gpu-clocks
+nvidia-smi --reset-memory-clocks
+pause
diff --git a/server/utils/check_user_admin.py b/server/utils/check_user_admin.py
@@ -0,0 +1,24 @@
+# From https://stackoverflow.com/a/43172958
+import os
+import ctypes
+
+class AdminStateUnknownError(Exception):
+    """Cannot determine whether the user is an admin."""
+    pass
+
+
+def is_user_admin():
+    # type: () -> bool
+    """Return True if user has admin privileges.
+
+    Raises:
+        AdminStateUnknownError if user privileges cannot be determined.
+    """
+    try:
+        return os.getuid() == 0
+    except AttributeError:
+        pass
+    try:
+        return ctypes.windll.shell32.IsUserAnAdmin() == 1
+    except AttributeError:
+        raise AdminStateUnknownError
diff --git a/server/voice_changer/RVC/RVCr2.py b/server/voice_changer/RVC/RVCr2.py
@@ -43,7 +43,7 @@ def __init__(self, params: ServerSettings, slotInfo: RVCModelSlot, settings: Voi
         self.pitchf_buffer: torch.Tensor | None = None
         self.return_length = 0
         self.skip_head = 0
-        self.silence_front = 0.0
+        self.silence_front = 0
         self.slotInfo = slotInfo
 
         # 処理は16Kで実施(Pitch, embed, (infer))
@@ -120,14 +120,17 @@ def setSamplingRate(self, input_sample_rate, output_sample_rate):
                 dtype=torch.float32
             ).to(self.device_manager.device)
 
+    def change_pitch_extractor(self):
+        pitchExtractor = PitchExtractorManager.getPitchExtractor(
+            self.settings.f0Detector, self.settings.gpu
+        )
+        self.pipeline.setPitchExtractor(pitchExtractor)
+
     def update_settings(self, key: str, val, old_val):
-        if key in {"gpu", "forceFp32"}:
+        if key in {"gpu", "forceFp32", "disableJit"}:
             self.initialize(True)
         elif key == "f0Detector" and self.pipeline is not None:
-            pitchExtractor = PitchExtractorManager.getPitchExtractor(
-                self.settings.f0Detector, self.settings.gpu
-            )
-            self.pipeline.setPitchExtractor(pitchExtractor)
+            self.change_pitch_extractor()
         elif key == 'silentThreshold':
             # Convert dB to RMS
             self.inputSensitivity = 10 ** (self.settings.silentThreshold / 20)
@@ -239,6 +242,25 @@ def inference(self, audio_in: AudioInOutFloat):
         vol = max(vol_t.item(), 0)
 
         if vol < self.inputSensitivity:
+            # Busy wait to keep power manager happy and clocks stable. Running pipeline on-demand seems to lag when the delay between
+            # voice changer activation is too high.
+            # https://forums.developer.nvidia.com/t/why-kernel-calculate-speed-got-slower-after-waiting-for-a-while/221059/9
+            self.pipeline.exec(
+                self.settings.dstId,
+                self.convert_buffer,
+                self.pitch_buffer,
+                self.pitchf_buffer,
+                self.settings.tran,
+                self.settings.formantShift,
+                self.settings.indexRatio,
+                self.convert_feature_size_16k,
+                self.silence_front,
+                self.slotInfo.embOutputLayer,
+                self.slotInfo.useFinalProj,
+                self.skip_head,
+                self.return_length,
+                self.settings.protect,
+            )
             return None, vol
 
         circular_write(audio_in_16k, self.convert_buffer)

diff --git a/server/voice_changer/VoiceChangerManager.py b/server/voice_changer/VoiceChangerManager.py
@@ -65,7 +65,7 @@ def __init__(self, params: ServerSettings):
 
         self.device_manager = DeviceManager.get_instance()
         self.devices = self.device_manager.list_devices()
-        self.device_manager.initialize(self.settings.gpu, self.settings.forceFp32)
+        self.device_manager.initialize(self.settings.gpu, self.settings.forceFp32, self.settings.disableJit)
 
         self.serverDevice = ServerDevice(self, self.settings)
 
@@ -185,6 +185,8 @@ def update_settings(self, key: str, val: Any):
             self.device_manager.set_device(val)
         elif key == 'forceFp32':
             self.device_manager.set_force_fp32(val)
+        elif key == 'disableJit':
+            self.device_manager.set_disable_jit(val)
         # FIXME: This is a very counter-intuitive handling of audio modes...
         # Map "serverAudioSampleRate" to "inputSampleRate" and "outputSampleRate"
         # since server audio can have its sample rate configured.

diff --git a/server/voice_changer/VoiceChangerSettings.py b/server/voice_changer/VoiceChangerSettings.py
@@ -78,6 +78,7 @@ def version(self):
     _extraConvertSize: float = 0.5
     _gpu: int = -1
     _forceFp32: int = 0
+    _disableJit: int = 0
 
     _passThrough: bool = False
     _recordIO: int = 0
@@ -162,6 +163,14 @@ def forceFp32(self):
     def forceFp32(self, enable: str):
         self._forceFp32 = int(enable)
 
+    @property
+    def disableJit(self):
+        return self._disableJit
+
+    @disableJit.setter
+    def disableJit(self, enable: str):
+        self._disableJit = int(enable)
+
     # Server Audio settings
     _serverAudioStated: int = 0
     _enableServerAudio: int = 0

diff --git a/server/voice_changer/VoiceChangerV2.py b/server/voice_changer/VoiceChangerV2.py
@@ -126,43 +126,46 @@ def get_processing_sampling_rate(self):
             return 0
         return self.voiceChangerModel.get_processing_sampling_rate()
 
+    def process_audio(self, audio_in: AudioInOutFloat) -> tuple[AudioInOutFloat, float]:
+        block_size = audio_in.shape[0]
+
+        audio, vol = self.voiceChangerModel.inference(audio_in)
+
+        if audio is None:
+            # In case there's an actual silence - send full block with zeros
+            return np.zeros(block_size, dtype=np.float32), vol
+
+        # SOLA algorithm from https://github.com/yxlllc/DDSP-SVC, https://github.com/liujing04/Retrieval-based-Voice-Conversion-WebUI
+        conv_input = audio[
+            None, None, : self.crossfade_frame + self.sola_search_frame
+        ]
+        cor_nom = F.conv1d(conv_input, self.sola_buffer[None, None, :])
+        cor_den = torch.sqrt(
+            F.conv1d(
+                conv_input ** 2,
+                torch.ones(1, 1, self.crossfade_frame, device=self.device_manager.device),
+            )
+            + 1e-8
+        )
+        sola_offset = torch.argmax(cor_nom[0, 0] / cor_den[0, 0])
+
+        audio = audio[sola_offset:]
+        audio[: self.crossfade_frame] *= self.fade_in_window
+        audio[: self.crossfade_frame] += (
+            self.sola_buffer * self.fade_out_window
+        )
+
+        self.sola_buffer[:] = audio[block_size : block_size + self.crossfade_frame]
+
+        return audio[: block_size].detach().cpu().numpy(), vol
+
     @torch.no_grad()
     def on_request(self, audio_in: AudioInOutFloat) -> tuple[AudioInOutFloat, list[Union[int, float]]]:
         if self.voiceChangerModel is None:
             raise VoiceChangerIsNotSelectedException("Voice Changer is not selected.")
 
         with Timer2("main-process", True) as t:
-            block_size = audio_in.shape[0]
-
-            audio, vol = self.voiceChangerModel.inference(audio_in)
-
-            if audio is None:
-                # In case there's an actual silence - send full block with zeros
-                return np.zeros(block_size, dtype=np.float32), vol, [0, 0, 0]
-
-            # SOLA algorithm from https://github.com/yxlllc/DDSP-SVC, https://github.com/liujing04/Retrieval-based-Voice-Conversion-WebUI
-            conv_input = audio[
-                None, None, : self.crossfade_frame + self.sola_search_frame
-            ]
-            cor_nom = F.conv1d(conv_input, self.sola_buffer[None, None, :])
-            cor_den = torch.sqrt(
-                F.conv1d(
-                    conv_input ** 2,
-                    torch.ones(1, 1, self.crossfade_frame, device=self.device_manager.device),
-                )
-                + 1e-8
-            )
-            sola_offset = torch.argmax(cor_nom[0, 0] / cor_den[0, 0])
-
-            audio = audio[sola_offset:]
-            audio[: self.crossfade_frame] *= self.fade_in_window
-            audio[: self.crossfade_frame] += (
-                self.sola_buffer * self.fade_out_window
-            )
-
-            self.sola_buffer[:] = audio[block_size : block_size + self.crossfade_frame]
-
-            result: np.ndarray = audio[: block_size].detach().cpu().numpy()
+            result, vol = self.process_audio(audio_in)
 
         mainprocess_time = t.secs