From 3562a199b371fcc91e440d2bf268b834c70c4ef2 Mon Sep 17 00:00:00 2001 From: lukewys Date: Wed, 10 Nov 2021 22:07:45 -0500 Subject: [PATCH 1/3] Update the file pattern in Urmp base dataloader to match the file pattern in https://console.cloud.google.com/storage/browser/magentadata/datasets/urmp/urmp_20210324. --- ddsp/training/data.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/ddsp/training/data.py b/ddsp/training/data.py index fb0c4d37..74e50f68 100644 --- a/ddsp/training/data.py +++ b/ddsp/training/data.py @@ -411,7 +411,11 @@ def features_dict(self): class Urmp(TFRecordProvider): """Urmp training set.""" - def __init__(self, base_dir, instrument_key='tpt', split='train'): + def __init__(self, + base_dir, + instrument_key='tpt', + split='train', + batched='batched'): """URMP dataset for either a specific instrument or all instruments. Args: @@ -420,19 +424,22 @@ def __init__(self, base_dir, instrument_key='tpt', split='train'): ['all', 'bn', 'cl', 'db', 'fl', 'hn', 'ob', 'sax', 'tba', 'tbn', 'tpt', 'va', 'vc', 'vn']. split: Choices include ['train', 'test']. + batched: Choices include ['batched', 'unbatched']. """ self.instrument_key = instrument_key self.split = split self.base_dir = base_dir + self.batched = batched super().__init__() @property def default_file_pattern(self): if self.instrument_key == 'all': - file_pattern = 'all_instruments_{}.tfrecord*'.format(self.split) + file_pattern = 'all_instruments_{}_{}.tfrecord*'.format( + self.split, self.batched) else: - file_pattern = 'urmp_{}_solo_ddsp_conditioning_{}.tfrecord*'.format( - self.instrument_key, self.split) + file_pattern = 'urmp_{}_solo_ddsp_conditioning_{}_{}.tfrecord*'.format( + self.instrument_key, self.split, self.batched) return os.path.join(self.base_dir, file_pattern) From 70def03a8a2ef1279451f2e6d327d8ca78149bcb Mon Sep 17 00:00:00 2001 From: lukewys Date: Mon, 15 Nov 2021 23:37:11 -0500 Subject: [PATCH 2/3] Change the "batched" kwarg to "suffix" which have default behavior same as the previous version. Add detailed documentation for the "suffix" kwarg. --- ddsp/training/data.py | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/ddsp/training/data.py b/ddsp/training/data.py index 74e50f68..699fcf6d 100644 --- a/ddsp/training/data.py +++ b/ddsp/training/data.py @@ -415,7 +415,7 @@ def __init__(self, base_dir, instrument_key='tpt', split='train', - batched='batched'): + suffix=None): """URMP dataset for either a specific instrument or all instruments. Args: @@ -424,22 +424,30 @@ def __init__(self, ['all', 'bn', 'cl', 'db', 'fl', 'hn', 'ob', 'sax', 'tba', 'tbn', 'tpt', 'va', 'vc', 'vn']. split: Choices include ['train', 'test']. - batched: Choices include ['batched', 'unbatched']. + suffix: Choices include [None, 'batched', 'unbatched'], but broadly + applies to any suffix adding to the file pattern. + When suffix is not None, will add "_suffix" to the file pattern. + This option is used in gs://magentadata/datasets/urmp/urmp_20210324. + With the "batched" suffix, the dataloader will load tfrecords + containing segmented audio samples in 4 seconds. With the "unbatched" + suffix, the dataloader will load tfrecords containing unsegmented + samples which could be used for learning note sequence in URMP dataset. + """ self.instrument_key = instrument_key self.split = split self.base_dir = base_dir - self.batched = batched + self.suffix = suffix if suffix is None else '_' + suffix super().__init__() @property def default_file_pattern(self): if self.instrument_key == 'all': - file_pattern = 'all_instruments_{}_{}.tfrecord*'.format( - self.split, self.batched) + file_pattern = 'all_instruments_{}{}.tfrecord*'.format( + self.split, self.suffix) else: - file_pattern = 'urmp_{}_solo_ddsp_conditioning_{}_{}.tfrecord*'.format( - self.instrument_key, self.split, self.batched) + file_pattern = 'urmp_{}_solo_ddsp_conditioning_{}{}.tfrecord*'.format( + self.instrument_key, self.split, self.suffix) return os.path.join(self.base_dir, file_pattern) From bb6aed536608396f786106758dc9ef6cb04bce70 Mon Sep 17 00:00:00 2001 From: lukewys Date: Thu, 18 Nov 2021 15:03:43 -0500 Subject: [PATCH 3/3] Fix the silly bug of f'{None}' prints 'None'. Huge thanks, Ethan! --- ddsp/training/data.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ddsp/training/data.py b/ddsp/training/data.py index 699fcf6d..dec02820 100644 --- a/ddsp/training/data.py +++ b/ddsp/training/data.py @@ -426,7 +426,7 @@ def __init__(self, split: Choices include ['train', 'test']. suffix: Choices include [None, 'batched', 'unbatched'], but broadly applies to any suffix adding to the file pattern. - When suffix is not None, will add "_suffix" to the file pattern. + When suffix is not None, will add "_{suffix}" to the file pattern. This option is used in gs://magentadata/datasets/urmp/urmp_20210324. With the "batched" suffix, the dataloader will load tfrecords containing segmented audio samples in 4 seconds. With the "unbatched" @@ -437,7 +437,7 @@ def __init__(self, self.instrument_key = instrument_key self.split = split self.base_dir = base_dir - self.suffix = suffix if suffix is None else '_' + suffix + self.suffix = '' if suffix is None else '_' + suffix super().__init__() @property