Merge pull request #136 from spotify/bgenchel/add-training-code

Add Training Code
spotify · Aug 16, 2024 · 23be163 · 23be163
2 parents 7147db2 + fdb4a80
commit 23be163
Show file tree

Hide file tree

Showing 14 changed files with 71,894 additions and 11 deletions.
diff --git a/basic_pitch/callbacks.py b/basic_pitch/callbacks.py
@@ -0,0 +1,75 @@
+#!/usr/bin/env python
+# encoding: utf-8
+#
+# Copyright 2024 Spotify AB
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+
+from typing import Any, Dict
+
+import tensorflow as tf
+
+from basic_pitch import visualize
+
+
+class VisualizeCallback(tf.keras.callbacks.Callback):
+    # TODO RACHEL make this WAY faster
+    """
+    Callback to run during training to create tensorboard visualizations per epoch.
+
+        Attributes:
+            train_ds: training dataset to use for prediction / visualization / sonification / summarization
+            valid_ds: validation dataset to use for "" ""
+            tensorboard_dir: directory to output "" ""
+            sonify: whether to include sonifications in tensorboard
+            contours: whether to plot note contours in tensorboard
+    """
+
+    def __init__(
+        self,
+        train_ds: tf.data.Dataset,
+        validation_ds: tf.data.Dataset,
+        tensorboard_dir: str,
+        sonify: bool,
+        contours: bool,
+    ):
+        super().__init__()
+        self.train_iter = iter(train_ds)
+        self.validation_iter = iter(validation_ds)
+        self.tensorboard_dir = os.path.join(tensorboard_dir, "tensorboard_logs")
+        self.file_writer = tf.summary.create_file_writer(tensorboard_dir)
+        self.sonify = sonify
+        self.contours = contours
+
+    def on_epoch_end(self, epoch: int, logs: Dict[Any, Any]) -> None:
+        # the first two outputs of generator needs to be the input and the targets
+        train_inputs, train_targets = next(self.train_iter)[:2]
+        validation_inputs, validation_targets = next(self.validation_iter)[:2]
+        for stage, inputs, targets, loss in [
+            ("train", train_inputs, train_targets, logs["loss"]),
+            ("validation", validation_inputs, validation_targets, logs["val_loss"]),
+        ]:
+            outputs = self.model.predict(inputs)
+            visualize.visualize_transcription(
+                self.file_writer,
+                stage,
+                inputs,
+                targets,
+                outputs,
+                loss,
+                epoch,
+                sonify=self.sonify,
+                contours=self.contours,
+            )
diff --git a/basic_pitch/constants.py b/basic_pitch/constants.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 # encoding: utf-8
 #
-# Copyright 2022 Spotify AB
+# Copyright 2024 Spotify AB
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -17,6 +17,8 @@
 
 import numpy as np
 
+from enum import Enum
+
 FFT_HOP = 256
 N_FFT = 8 * FFT_HOP
 
@@ -59,3 +61,9 @@ def _freq_bins(bins_per_semitone: int, base_frequency: float, n_semitones: int)
 
 FREQ_BINS_NOTES = _freq_bins(NOTES_BINS_PER_SEMITONE, ANNOTATIONS_BASE_FREQUENCY, ANNOTATIONS_N_SEMITONES)
 FREQ_BINS_CONTOURS = _freq_bins(CONTOURS_BINS_PER_SEMITONE, ANNOTATIONS_BASE_FREQUENCY, ANNOTATIONS_N_SEMITONES)
+
+
+class Split(Enum):
+    train = "train"
+    validation = "validation"
+    test = "test"
diff --git a/basic_pitch/data/datasets/guitarset.py b/basic_pitch/data/datasets/guitarset.py
@@ -95,7 +95,6 @@ def process(self, element: List[str], *args: Tuple[Any, Any], **kwargs: Dict[str
                 duration = sox.file_info.duration(local_wav_path)
                 time_scale = np.arange(0, duration + ANNOTATION_HOP, ANNOTATION_HOP)
                 n_time_frames = len(time_scale)
-
                 note_indices, note_values = track_local.notes_all.to_sparse_index(
                     time_scale, "s", FREQ_BINS_NOTES, "hz"
                 )