202 1.83 KB 46
modified get_pitch and auto_tune methods in extract.py v2
By GuestCreated: 2023-07-08 22:42:07
Expiry: Never
def get_pitch(self, wav_path, legacy=True):
torchcrepe_zeroes, torchcrepe_nozeroes = self._torchcrepe_f0(wav_path)
return torchcrepe_zeroes, torchcrepe_nozeroes
def auto_tune(self, audio_np, audio_torch, f0s_wo_silence):
device = "cuda" if torch.cuda.is_available() else "cpu"
output_freq = torchcrepe.predict(
audio_torch.type(torch.int16).type(torch.float32),
22050,
hop_length=256,
fmin=50,
fmax=800,
model="full",
decoder=torchcrepe.decode.viterbi,
# return_periodicity=True,
batch_size=128,
device=device,
)
output_freq = output_freq.squeeze(0).cpu().numpy()[: len(f0s_wo_silence)]
output_pitch = torch.from_numpy(output_freq.astype(np.float32))
target_pitch = torch.FloatTensor(f0s_wo_silence)
factor = torch.mean(output_pitch) / torch.mean(target_pitch)
octaves = [0.125, 0.25, 0.5, 1.0, 2.0, 4.0, 8.0]
nearest_octave = min(octaves, key=lambda x: abs(x - factor))
target_pitch *= nearest_octave
if len(target_pitch) < len(output_pitch):
target_pitch = torch.nn.functional.pad(
target_pitch,
(0, list(output_pitch.shape)[0] - list(target_pitch.shape)[0]),
"constant",
0,
)
if len(target_pitch) > len(output_pitch):
target_pitch = target_pitch[0 : list(output_pitch.shape)[0]]
audio_np = psola.vocode(audio_np, 22050, target_pitch=target_pitch).astype(
np.float32
)
normalize = (1.0 / np.max(np.abs(audio_np))) ** 0.9
audio_np = audio_np * normalize * 32768.0
audio_np = audio_np.astype(np.int16)
return audio_np
by Guest
by Guest
by Guest
by Guest
by Guest