-
Notifications
You must be signed in to change notification settings - Fork 7
/
additional_datasets.py
129 lines (107 loc) · 4.04 KB
/
additional_datasets.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
"""Definition of additional datasets.
Includes CIFAR-10-C test set.
"""
import numpy as np
import os
import torch
from PIL import Image
class CIFAR10C(torch.utils.data.Dataset):
"""CIFAR-10-C Dataset.
From the paper: https://arxiv.org/abs/1807.01697
Args:
root_dir (str): Path to dataset. Extracted CIFAR-10-C.tar from:
https://zenodo.org/record/2535967#.XkH_AlJKjUI
transform (callable, optional): A function/transform that takes in an PIL image
and returns a transformed version. E.g, ``transforms.RandomCrop``
target_transform (callable, optional): A function/transform that takes in the
target and transforms it.
"""
SIS_SAMPLE_RANDOM_SEED = 1234
SIS_SAMPLE_SIZE = 2000
def __init__(self, root_dir, which_corruption=None, which_severity=None,
sis_sample=False, transform=None, target_transform=None):
self.root_dir = root_dir
self.which_corruption = which_corruption
self.which_severity = which_severity
self.sis_sample = sis_sample
self.transform = transform
self.target_transform = target_transform
if self.sis_sample and (self.which_corruption or self.which_severity):
raise ValueError(
'Cannot set both `sis_sample` or corruption/severity filter.')
# Load data.
self.data, self.targets = self._load_all_data()
# Filter for SIS random sample.
if self.sis_sample:
sample_idxs = self._get_sis_sample_idxs()
self.data = self.data[sample_idxs]
self.targets = self.targets[sample_idxs]
assert self.data.shape[0] == self.targets.shape[0]
@staticmethod
def get_corruptions():
return [
'brightness',
'contrast',
'defocus_blur',
'elastic_transform',
'fog',
'frost',
'gaussian_blur',
'gaussian_noise',
'glass_blur',
'impulse_noise',
'jpeg_compression',
'motion_blur',
'pixelate',
'saturate',
'shot_noise',
'snow',
'spatter',
'speckle_noise',
'zoom_blur',
]
def _get_sis_sample_idxs(self):
np.random.seed(self.SIS_SAMPLE_RANDOM_SEED)
idxs = np.random.choice(
self.data.shape[0], size=self.SIS_SAMPLE_SIZE, replace=False)
return idxs
def _load_data_for_corruption(self, corruption):
data = np.load(os.path.join(self.root_dir, '%s.npy' % corruption))
targets = np.load(os.path.join(self.root_dir, 'labels.npy'))
if self.which_severity and self.which_severity >= 1:
assert self.which_severity <= 5
start_idx = (self.which_severity - 1) * 10000
end_idx = self.which_severity * 10000
data = data[start_idx:end_idx]
targets = targets[start_idx:end_idx]
return data, targets
def _load_all_data(self):
data = []
targets = []
if self.which_corruption is not None:
assert self.which_corruption in self.get_corruptions()
corruptions = [self.which_corruption]
else:
corruptions = self.get_corruptions()
for cor in corruptions:
cor_data, cor_targets = self._load_data_for_corruption(cor)
data.append(cor_data)
targets.append(cor_targets)
return np.concatenate(data), np.concatenate(targets)
def __len__(self):
return self.data.shape[0]
def __getitem__(self, index):
"""
Args:
index (int): Index
Returns:
tuple: (image, target) where target is index of the target class.
"""
img = self.data[index]
target = self.targets[index]
img = Image.fromarray(img)
if self.transform is not None:
img = self.transform(img)
if self.target_transform is not None:
target = self.target_transform(target)
return img, target