Skip to content

Commit

Permalink
feat: new configs
Browse files Browse the repository at this point in the history
fix: MO does not include excl spks in outputs, but SO does
  • Loading branch information
santi-pdp committed Oct 11, 2017
1 parent 9f7ae05 commit 67108b0
Show file tree
Hide file tree
Showing 9 changed files with 43 additions and 12 deletions.
Binary file modified cfg/tcstar.cfg
Binary file not shown.
Binary file modified cfg/tcstar_72.cfg
Binary file not shown.
Binary file added cfg/tcstar_72_1sample.cfg
Binary file not shown.
Binary file modified cfg/tcstar_73.cfg
Binary file not shown.
Binary file added cfg/tcstar_79.cfg
Binary file not shown.
Binary file added cfg/tcstar_jo-ma.cfg
Binary file not shown.
18 changes: 16 additions & 2 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,11 @@ def main(opts):
dur_outputs = opts.dur_q_classes
else:
dur_outputs = 1
if opts.dur_mulout:
# select only available speakers to load, not all
model_spks = list(dset.speakers.keys())
else:
model_spks = list(dset.all_speakers.keys())
# build a duration model ready to train
dur_model = sinout_duration(num_inputs=dset.ling_feats_dim,
num_outputs=dur_outputs,
Expand All @@ -74,7 +79,7 @@ def main(opts):
rnn_layers=opts.dur_rnn_layers,
sigmoid_out=opts.sigmoid_dur,
dropout=opts.dur_dout,
speakers=list(dset.all_speakers.keys()),
speakers=model_spks,
mulout=opts.dur_mulout,
cuda=opts.cuda)
adam = optim.Adam(dur_model.parameters(), lr=opts.dur_lr)
Expand Down Expand Up @@ -134,6 +139,7 @@ def main(opts):
mulout=opts.aco_mulout,
q_classes=opts.aco_q_classes,
trim_to_min=True,
forced_trim=opts.aco_train_forced_trim,
norm_aco=True,
exclude_train_spks=opts.exclude_train_spks)
# can be dur norm or kmeans data
Expand Down Expand Up @@ -162,6 +168,7 @@ def main(opts):
mulout=opts.aco_mulout,
q_classes=opts.aco_q_classes,
trim_to_min=True,
forced_trim=opts.aco_valid_forced_trim,
norm_aco=True,
exclude_eval_spks=opts.exclude_eval_spks)
if opts.aco_mulout:
Expand All @@ -173,6 +180,11 @@ def main(opts):
num_workers=opts.loader_workers,
sampler=va_sampler,
collate_fn=varlen_aco_collate)
if opts.aco_mulout:
# select only available speakers to load, not all
model_spks = list(dset.speakers.keys())
else:
model_spks = list(dset.all_speakers.keys())
# TODO: hardcoded atm
aco_outputs = 43
# build an acoustic model ready to train
Expand All @@ -183,7 +195,7 @@ def main(opts):
rnn_layers=opts.aco_rnn_layers,
sigmoid_out=True,
dropout=opts.aco_dout,
speakers=list(dset.all_speakers.keys()),
speakers=model_spks,
mulout=opts.aco_mulout,
cuda=opts.cuda)
adam = optim.Adam(aco_model.parameters(), lr=opts.aco_lr)
Expand Down Expand Up @@ -274,6 +286,8 @@ def main(opts):
parser.add_argument('--dur_dout', type=float, default=0.5)
parser.add_argument('--aco_dout', type=float, default=0.5)
parser.add_argument('--batch_size', type=int, default=50)
parser.add_argument('--aco_train_forced_trim', type=int, default=None)
parser.add_argument('--aco_valid_forced_trim', type=int, default=None)
parser.add_argument('--epoch', type=int, default=50)
parser.add_argument('--log_freq', type=int, default=25)
parser.add_argument('--patience', type=int, default=5)
Expand Down
36 changes: 26 additions & 10 deletions musa/datasets/tcstar.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,7 @@ def __init__(self, spk_cfg_file, split, lab_dir,
mulout=False,
q_classes=None,
trim_to_min=False,
forced_trim=None,
exclude_train_spks=[],
exclude_eval_spks=[]):
"""
Expand All @@ -300,8 +301,11 @@ def __init__(self, spk_cfg_file, split, lab_dir,
arranged in batches
trim_to_min: trim all speakers to same num_samples if
maxlen is applied (specially for MO).
forced_trim: max num of samples per speaker forced (this
has priority over trim_to_min counts)
"""
self.trim_to_min = trim_to_min
self.forced_trim = forced_trim
if max_seq_len is not None:
if batch_size is None:
raise ValueError('Please specify a batch size in '
Expand Down Expand Up @@ -496,6 +500,7 @@ def __init__(self, spk_cfg_file, split, lab_dir,
mulout=False,
q_classes=None,
trim_to_min=False,
forced_trim=None,
exclude_train_spks=[],
exclude_eval_spks=[],
norm_dur=True):
Expand All @@ -514,6 +519,7 @@ def __init__(self, spk_cfg_file, split, lab_dir,
mulout=mulout,
q_classes=q_classes,
trim_to_min=trim_to_min,
forced_trim=forced_trim,
exclude_train_spks=exclude_train_spks,
exclude_eval_spks=exclude_eval_spks,
batch_size=batch_size,
Expand Down Expand Up @@ -689,11 +695,15 @@ def load_lab(self):
if spk_name not in counts:
counts[spk_name] = 0
counts[spk_name] += 1
if self.trim_to_min:
for spk_name, cnt in counts.items():
if counts[spk_name] < counts_min:
counts_min = counts[spk_name]
counts_spk = spk_name
if self.trim_to_min or self.forced_trim is not None:
if self.forced_trim is not None:
counts_min = self.forced_trim + 1
counts_spk = 'Forced Trim'
else:
for spk_name, cnt in counts.items():
if counts[spk_name] < counts_min:
counts_min = counts[spk_name]
counts_spk = spk_name
print('-- Trimming speaker samples --')
print('counts_min: ', counts_min)
print('counts_spk: ', counts_spk)
Expand Down Expand Up @@ -783,6 +793,7 @@ def __init__(self, spk_cfg_file, split, aco_dir, lab_dir,
exclude_eval_spks=[],
q_classes=None,
trim_to_min=False,
forced_trim=None,
norm_aco=True,
aco_window_stride=80, aco_window_len=320,
aco_frame_rate=16000):
Expand All @@ -799,6 +810,7 @@ def __init__(self, spk_cfg_file, split, aco_dir, lab_dir,
mulout=mulout,
q_classes=q_classes,
trim_to_min=trim_to_min,
forced_trim=forced_trim,
exclude_train_spks=exclude_train_spks,
exclude_eval_spks=exclude_eval_spks,
batch_size=batch_size,
Expand Down Expand Up @@ -982,11 +994,15 @@ def load_lab(self):
if spk_name not in counts:
counts[spk_name] = 0
counts[spk_name] += 1
if self.trim_to_min:
for spk_name, cnt in counts.items():
if counts[spk_name] < counts_min:
counts_min = counts[spk_name]
counts_spk = spk_name
if self.trim_to_min or self.forced_trim is not None:
if self.forced_trim is not None:
counts_min = self.forced_trim + 1
counts_spk = 'Forced Trim'
else:
for spk_name, cnt in counts.items():
if counts[spk_name] < counts_min:
counts_min = counts[spk_name]
counts_spk = spk_name
print('-- Trimming speaker samples --')
print('counts_min: ', counts_min)
print('counts_spk: ', counts_spk)
Expand Down
1 change: 1 addition & 0 deletions musa/models/sinout.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ def __init__(self, num_inputs, emb_size, rnn_size, rnn_layers,
self.speakers = None
self.mulout = False
else:
print('Acoustically modeling speakers: ', self.speakers)
self.speakers = speakers
self.mulout = mulout
self.mulout = mulout
Expand Down

0 comments on commit 67108b0

Please sign in to comment.