-
Notifications
You must be signed in to change notification settings - Fork 45
/
util.py
150 lines (127 loc) · 4.81 KB
/
util.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
"""
created by: Donghyeon Won
"""
import os
import numpy as np
import pandas as pd
from PIL import Image
from torch.utils.data import Dataset
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.models as models
class ProtestDataset(Dataset):
"""
dataset for training and evaluation
"""
def __init__(self, txt_file, img_dir, transform = None):
"""
Args:
txt_file: Path to txt file with annotation
img_dir: Directory with images
transform: Optional transform to be applied on a sample.
"""
self.label_frame = pd.read_csv(txt_file, delimiter="\t").replace('-', 0)
self.img_dir = img_dir
self.transform = transform
def __len__(self):
return len(self.label_frame)
def __getitem__(self, idx):
imgpath = os.path.join(self.img_dir,
self.label_frame.iloc[idx, 0])
image = pil_loader(imgpath)
protest = self.label_frame.iloc[idx, 1:2].as_matrix().astype('float')
violence = self.label_frame.iloc[idx, 2:3].as_matrix().astype('float')
visattr = self.label_frame.iloc[idx, 3:].as_matrix().astype('float')
label = {'protest':protest, 'violence':violence, 'visattr':visattr}
sample = {"image":image, "label":label}
if self.transform:
sample["image"] = self.transform(sample["image"])
return sample
class ProtestDatasetEval(Dataset):
"""
dataset for just calculating the output (does not need an annotation file)
"""
def __init__(self, img_dir):
"""
Args:
img_dir: Directory with images
"""
self.img_dir = img_dir
self.transform = transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
])
self.img_list = sorted(os.listdir(img_dir))
def __len__(self):
return len(self.img_list)
def __getitem__(self, idx):
imgpath = os.path.join(self.img_dir,
self.img_list[idx])
image = pil_loader(imgpath)
# we need this variable to check if the image is protest or not)
sample = {"imgpath":imgpath, "image":image}
sample["image"] = self.transform(sample["image"])
return sample
class FinalLayer(nn.Module):
"""modified last layer for resnet50 for our dataset"""
def __init__(self):
super(FinalLayer, self).__init__()
self.fc = nn.Linear(2048, 12)
self.sigmoid = nn.Sigmoid()
def forward(self, x):
out = self.fc(x)
out = self.sigmoid(out)
return out
def pil_loader(path):
# open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835)
with open(path, 'rb') as f:
img = Image.open(f)
return img.convert('RGB')
def modified_resnet50():
# load pretrained resnet50 with a modified last fully connected layer
model = models.resnet50(pretrained = True)
model.fc = FinalLayer()
# uncomment following lines if you wnat to freeze early layers
# i = 0
# for child in model.children():
# i += 1
# if i < 4:
# for param in child.parameters():
# param.requires_grad = False
return model
class AverageMeter(object):
"""Computes and stores the average and current value"""
def __init__(self):
self.reset()
def reset(self):
self.val = 0
self.avg = 0
self.sum = 0
self.count = 0
def update(self, val, n=1):
self.val = val
self.sum += val * n
self.count += n
if self.count != 0:
self.avg = self.sum / self.count
class Lighting(object):
"""
Lighting noise(AlexNet - style PCA - based noise)
https://github.com/zhanghang1989/PyTorch-Encoding/blob/master/experiments/recognition/dataset/minc.py
"""
def __init__(self, alphastd, eigval, eigvec):
self.alphastd = alphastd
self.eigval = eigval
self.eigvec = eigvec
def __call__(self, img):
if self.alphastd == 0:
return img
alpha = img.new().resize_(3).normal_(0, self.alphastd)
rgb = self.eigvec.type_as(img).clone()\
.mul(alpha.view(1, 3).expand(3, 3))\
.mul(self.eigval.view(1, 3).expand(3, 3))\
.sum(1).squeeze()
return img.add(rgb.view(3, 1, 1).expand_as(img))