Skip to content

Commit

Permalink
Merge pull request #164 from deiteris/fixes
Browse files Browse the repository at this point in the history
Try force GPU clocks for NVIDIA
  • Loading branch information
deiteris authored Aug 4, 2024
2 parents 4b3c5d7 + 68131e0 commit 8bcfcdb
Show file tree
Hide file tree
Showing 16 changed files with 184 additions and 45 deletions.
4 changes: 4 additions & 0 deletions .github/workflows/build-executable.yml
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,10 @@ jobs:
- name: Building executable
run: pyinstaller --clean -y --dist ./dist --workpath /tmp MMVCServerSIO.spec
working-directory: ./server
- name: Copy utils
run: cp ./server/{force_gpu_clocks.bat,reset_gpu_clocks.bat} ./server/dist/
shell: bash
if: matrix.os == 'windows-latest' && matrix.backend == 'cuda'
- name: Pack artifact
shell: bash
run: |
Expand Down
4 changes: 4 additions & 0 deletions .github/workflows/make-release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,10 @@ jobs:
- name: Building executable
run: pyinstaller --clean -y --dist ./dist --workpath /tmp MMVCServerSIO.spec
working-directory: ./server
- name: Copy utils
run: cp ./server/{force_gpu_clocks.bat,reset_gpu_clocks.bat} ./server/dist/
shell: bash
if: matrix.os == 'windows-latest' && matrix.backend == 'cuda'
- name: Pack artifact
shell: bash
run: |
Expand Down
2 changes: 1 addition & 1 deletion client/demo/dist/index.js

Large diffs are not rendered by default.

29 changes: 29 additions & 0 deletions client/demo/src/components/demo/906_AdvancedSettingDialog.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,34 @@ export const AdvancedSettingDialog = () => {
</div>
);

const onDisableJitChanged = (val: number) => {
return serverSetting.updateServerSettings({
...serverSetting.serverSetting,
disableJit: val,
});
};
const disableJitRow = (
<div className="advanced-setting-container-row">
<div className="advanced-setting-container-row-title"><a className="hint-text" data-tooltip-id="hint" data-tooltip-content="A debugging option that disables Just-in-Time (JIT) compilation for PyTorch models. Disabling this option will reduce model loading time, but may decrease performance. Has no effect on DirectML devices.">Disable JIT compilation</a></div>
<div className="advanced-setting-container-row-field">
<select
value={serverSetting.serverSetting.disableJit}
onChange={async (e) => {
// TODO: Need to fix CSS to show waiting dialog over all page contents. Lazy :\
//guiState.stateControls.showWaitingCheckbox.updateState(true);
onDisableJitChanged(Number(e.target.value));
guiState.setVoiceChangerSettingsChanged(false);
//guiState.stateControls.showWaitingCheckbox.updateState(false);
}}
disabled={guiState.isConverting}
>
<option value="0">off</option>
<option value="1">on</option>
</select>
</div>
</div>
);

const protectRow = (
<div className="advanced-setting-container-row">
<div className="advanced-setting-container-row-title"><a className="hint-text" data-tooltip-id="hint" data-tooltip-content="Voiceless consonants protection. Has no effect when set to 0.5 or when 'Index' is inactive.">Protect</a></div>
Expand Down Expand Up @@ -169,6 +197,7 @@ export const AdvancedSettingDialog = () => {
{crossfaceRow}
{silenceFrontRow}
{forceFp32ModeRow}
{disableJitRow}
{protectRow}
{skipPassThroughConfirmationRow}
</div>
Expand Down
3 changes: 3 additions & 0 deletions client/lib/src/const.ts
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ export const ServerSettingKey = {
indexRatio: "indexRatio",
protect: "protect",
forceFp32: "forceFp32",
disableJit: "disableJit",
modelSamplingRate: "modelSamplingRate",
silenceFront: "silenceFront",
modelSlotIndex: "modelSlotIndex",
Expand Down Expand Up @@ -141,6 +142,7 @@ export type VoiceChangerServerSetting = {
protect: number; // RVC
silenceFront: number; // 0:off, 1:on
forceFp32: number; // 0:off, 1:on
disableJit: number; // 0:off, 1:on
modelSamplingRate: ModelSamplingRate; // 32000,40000,48000
modelSlotIndex: number;

Expand Down Expand Up @@ -264,6 +266,7 @@ export const DefaultServerSetting: ServerInfo = {
modelSamplingRate: 48000,
silenceFront: 1,
forceFp32: 0,
disableJit: 0,
modelSlotIndex: 0,
sampleModels: [],
gpus: [],
Expand Down
2 changes: 1 addition & 1 deletion server/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,4 +26,4 @@

# NOTE: Bundled executable overrides excepthook to pause on exception during startup.
# Here we revert to original excepthook once all initialization is done.
sys.excepthook = sys.__excepthook__
sys.excepthook = sys.__excepthook__
1 change: 1 addition & 0 deletions server/client.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import sys
from utils.check_user_admin import is_user_admin

def pause_excepthook(type, value, traceback, oldhook=sys.excepthook):
oldhook(type, value, traceback)
Expand Down
23 changes: 23 additions & 0 deletions server/force_gpu_clocks.bat
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
@echo off
nvidia-smi --query-supported-clocks=graphics,memory --format=csv,noheader,nounits > temp.txt

REM Read the first line from the temporary file
set /p firstLine=<temp.txt

REM Extract the first comma-separated string
for /f "tokens=1,2 delims=, " %%a in ("%firstLine%") do (
set gpu_clock=%%a
set memory_clock=%%b
)

REM Print the extracted string
echo GPU Clock: %gpu_clock%
echo Memory Clock: %memory_clock%

REM Clean up the temporary file
del temp.txt

:force_clocks
nvidia-smi --lock-gpu-clocks=%gpu_clock%
nvidia-smi --lock-memory-clocks=%memory_clock%
pause
1 change: 1 addition & 0 deletions server/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from mods.ssl import create_self_signed_cert
from webbrowser import open_new_tab
from settings import ServerSettings
from utils.check_user_admin import is_user_admin

stream_handler = logging.StreamHandler()
stream_handler.setLevel(logging.INFO)
Expand Down
4 changes: 4 additions & 0 deletions server/reset_gpu_clocks.bat
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
@echo off
nvidia-smi --reset-gpu-clocks
nvidia-smi --reset-memory-clocks
pause
24 changes: 24 additions & 0 deletions server/utils/check_user_admin.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# From https://stackoverflow.com/a/43172958
import os
import ctypes

class AdminStateUnknownError(Exception):
"""Cannot determine whether the user is an admin."""
pass


def is_user_admin():
# type: () -> bool
"""Return True if user has admin privileges.
Raises:
AdminStateUnknownError if user privileges cannot be determined.
"""
try:
return os.getuid() == 0
except AttributeError:
pass
try:
return ctypes.windll.shell32.IsUserAnAdmin() == 1
except AttributeError:
raise AdminStateUnknownError
34 changes: 28 additions & 6 deletions server/voice_changer/RVC/RVCr2.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def __init__(self, params: ServerSettings, slotInfo: RVCModelSlot, settings: Voi
self.pitchf_buffer: torch.Tensor | None = None
self.return_length = 0
self.skip_head = 0
self.silence_front = 0.0
self.silence_front = 0
self.slotInfo = slotInfo

# 処理は16Kで実施(Pitch, embed, (infer))
Expand Down Expand Up @@ -120,14 +120,17 @@ def setSamplingRate(self, input_sample_rate, output_sample_rate):
dtype=torch.float32
).to(self.device_manager.device)

def change_pitch_extractor(self):
pitchExtractor = PitchExtractorManager.getPitchExtractor(
self.settings.f0Detector, self.settings.gpu
)
self.pipeline.setPitchExtractor(pitchExtractor)

def update_settings(self, key: str, val, old_val):
if key in {"gpu", "forceFp32"}:
if key in {"gpu", "forceFp32", "disableJit"}:
self.initialize(True)
elif key == "f0Detector" and self.pipeline is not None:
pitchExtractor = PitchExtractorManager.getPitchExtractor(
self.settings.f0Detector, self.settings.gpu
)
self.pipeline.setPitchExtractor(pitchExtractor)
self.change_pitch_extractor()
elif key == 'silentThreshold':
# Convert dB to RMS
self.inputSensitivity = 10 ** (self.settings.silentThreshold / 20)
Expand Down Expand Up @@ -239,6 +242,25 @@ def inference(self, audio_in: AudioInOutFloat):
vol = max(vol_t.item(), 0)

if vol < self.inputSensitivity:
# Busy wait to keep power manager happy and clocks stable. Running pipeline on-demand seems to lag when the delay between
# voice changer activation is too high.
# https://forums.developer.nvidia.com/t/why-kernel-calculate-speed-got-slower-after-waiting-for-a-while/221059/9
self.pipeline.exec(
self.settings.dstId,
self.convert_buffer,
self.pitch_buffer,
self.pitchf_buffer,
self.settings.tran,
self.settings.formantShift,
self.settings.indexRatio,
self.convert_feature_size_16k,
self.silence_front,
self.slotInfo.embOutputLayer,
self.slotInfo.useFinalProj,
self.skip_head,
self.return_length,
self.settings.protect,
)
return None, vol

circular_write(audio_in_16k, self.convert_buffer)
Expand Down
4 changes: 3 additions & 1 deletion server/voice_changer/VoiceChangerManager.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def __init__(self, params: ServerSettings):

self.device_manager = DeviceManager.get_instance()
self.devices = self.device_manager.list_devices()
self.device_manager.initialize(self.settings.gpu, self.settings.forceFp32)
self.device_manager.initialize(self.settings.gpu, self.settings.forceFp32, self.settings.disableJit)

self.serverDevice = ServerDevice(self, self.settings)

Expand Down Expand Up @@ -185,6 +185,8 @@ def update_settings(self, key: str, val: Any):
self.device_manager.set_device(val)
elif key == 'forceFp32':
self.device_manager.set_force_fp32(val)
elif key == 'disableJit':
self.device_manager.set_disable_jit(val)
# FIXME: This is a very counter-intuitive handling of audio modes...
# Map "serverAudioSampleRate" to "inputSampleRate" and "outputSampleRate"
# since server audio can have its sample rate configured.
Expand Down
9 changes: 9 additions & 0 deletions server/voice_changer/VoiceChangerSettings.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ def version(self):
_extraConvertSize: float = 0.5
_gpu: int = -1
_forceFp32: int = 0
_disableJit: int = 0

_passThrough: bool = False
_recordIO: int = 0
Expand Down Expand Up @@ -162,6 +163,14 @@ def forceFp32(self):
def forceFp32(self, enable: str):
self._forceFp32 = int(enable)

@property
def disableJit(self):
return self._disableJit

@disableJit.setter
def disableJit(self, enable: str):
self._disableJit = int(enable)

# Server Audio settings
_serverAudioStated: int = 0
_enableServerAudio: int = 0
Expand Down
65 changes: 34 additions & 31 deletions server/voice_changer/VoiceChangerV2.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,43 +126,46 @@ def get_processing_sampling_rate(self):
return 0
return self.voiceChangerModel.get_processing_sampling_rate()

def process_audio(self, audio_in: AudioInOutFloat) -> tuple[AudioInOutFloat, float]:
block_size = audio_in.shape[0]

audio, vol = self.voiceChangerModel.inference(audio_in)

if audio is None:
# In case there's an actual silence - send full block with zeros
return np.zeros(block_size, dtype=np.float32), vol

# SOLA algorithm from https://github.com/yxlllc/DDSP-SVC, https://github.com/liujing04/Retrieval-based-Voice-Conversion-WebUI
conv_input = audio[
None, None, : self.crossfade_frame + self.sola_search_frame
]
cor_nom = F.conv1d(conv_input, self.sola_buffer[None, None, :])
cor_den = torch.sqrt(
F.conv1d(
conv_input ** 2,
torch.ones(1, 1, self.crossfade_frame, device=self.device_manager.device),
)
+ 1e-8
)
sola_offset = torch.argmax(cor_nom[0, 0] / cor_den[0, 0])

audio = audio[sola_offset:]
audio[: self.crossfade_frame] *= self.fade_in_window
audio[: self.crossfade_frame] += (
self.sola_buffer * self.fade_out_window
)

self.sola_buffer[:] = audio[block_size : block_size + self.crossfade_frame]

return audio[: block_size].detach().cpu().numpy(), vol

@torch.no_grad()
def on_request(self, audio_in: AudioInOutFloat) -> tuple[AudioInOutFloat, list[Union[int, float]]]:
if self.voiceChangerModel is None:
raise VoiceChangerIsNotSelectedException("Voice Changer is not selected.")

with Timer2("main-process", True) as t:
block_size = audio_in.shape[0]

audio, vol = self.voiceChangerModel.inference(audio_in)

if audio is None:
# In case there's an actual silence - send full block with zeros
return np.zeros(block_size, dtype=np.float32), vol, [0, 0, 0]

# SOLA algorithm from https://github.com/yxlllc/DDSP-SVC, https://github.com/liujing04/Retrieval-based-Voice-Conversion-WebUI
conv_input = audio[
None, None, : self.crossfade_frame + self.sola_search_frame
]
cor_nom = F.conv1d(conv_input, self.sola_buffer[None, None, :])
cor_den = torch.sqrt(
F.conv1d(
conv_input ** 2,
torch.ones(1, 1, self.crossfade_frame, device=self.device_manager.device),
)
+ 1e-8
)
sola_offset = torch.argmax(cor_nom[0, 0] / cor_den[0, 0])

audio = audio[sola_offset:]
audio[: self.crossfade_frame] *= self.fade_in_window
audio[: self.crossfade_frame] += (
self.sola_buffer * self.fade_out_window
)

self.sola_buffer[:] = audio[block_size : block_size + self.crossfade_frame]

result: np.ndarray = audio[: block_size].detach().cpu().numpy()
result, vol = self.process_audio(audio_in)

mainprocess_time = t.secs

Expand Down
Loading

0 comments on commit 8bcfcdb

Please sign in to comment.