From b87a56e8c6881db807090c03cb93179c9cff12b9 Mon Sep 17 00:00:00 2001 From: Si11ium Date: Wed, 20 May 2020 13:29:16 +0200 Subject: [PATCH] fingerprinted now should work correctly --- audio_toolset/audio_io.py | 1 + audio_toolset/mel_augmentation.py | 4 ++-- utils/transforms.py | 1 + 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/audio_toolset/audio_io.py b/audio_toolset/audio_io.py index 696e3ed..bc32679 100644 --- a/audio_toolset/audio_io.py +++ b/audio_toolset/audio_io.py @@ -125,4 +125,5 @@ class MelToImage(object): img = scale_minmax(mels, 0, 255).astype(np.uint8) img = np.flip(img, axis=0) # put low frequencies at the bottom in image img = 255 - img # invert. make black==more energy + img = img.astype(np.float32) return img diff --git a/audio_toolset/mel_augmentation.py b/audio_toolset/mel_augmentation.py index 92943e3..ec1095f 100644 --- a/audio_toolset/mel_augmentation.py +++ b/audio_toolset/mel_augmentation.py @@ -59,9 +59,9 @@ class ShiftTime(object): # Set to silence for heading/ tailing shift = int(shift) if shift > 0: - augmented_data[:shift] = 0 + augmented_data[:, :shift] = 0 else: - augmented_data[shift:] = 0 + augmented_data[:, shift:] = 0 return augmented_data else: return x diff --git a/utils/transforms.py b/utils/transforms.py index 2da734c..4c4d955 100644 --- a/utils/transforms.py +++ b/utils/transforms.py @@ -4,5 +4,6 @@ from torchvision.transforms import ToTensor as TorchVisionToTensor class ToTensor(TorchVisionToTensor): def __call__(self, pic): + # Make it float .float() == 32bit tensor = super(ToTensor, self).__call__(pic).float() return tensor