-
Notifications
You must be signed in to change notification settings - Fork 2
/
segmentation.py
executable file
·192 lines (162 loc) · 7.48 KB
/
segmentation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
# Copyright 2018 California Institute of Technology.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import matplotlib.pyplot as plt
import scipy.io as sio
from imageio import imread
from scipy.spatial.distance import cdist
def get_img_feature_vector(img):
# Create the feature vector for the images
features = np.zeros((img.shape[0] * img.shape[1], 5))
for row in xrange(img.shape[0]):
for col in xrange(img.shape[1]):
features[row*img.shape[1] + col, :] = np.array([row, col,
img[row, col, 0], img[row, col, 1], img[row, col, 2]])
features_normalized = features / features.max(axis = 0)
return features_normalized
'''
KMEANS_SEGMENTATION: Image segmentation using kmeans
Arguments:
im - the image being segmented, given as a (H, W, 3) ndarray
features - ndarray of size (#pixels, M) that are the feature vectors
associated with each pixel. The #pixels are arranged in such a way
that calling reshape((H,W)) will correspond to the image im.
num_clusters - The parameter "K" in K-means that tells the number of
clusters we will be using.
Returns:
pixel_clusters - H by W matrix where each index tells what cluster the
pixel belongs to. The clusters must range from 0 to N-1, where N is
the total number of clusters.
The K-means algorithm can be done in the following steps:
(1) Randomly choose the initial centroids from the features
(2) Repeat until convergence:
- Assign each feature vector to its nearest centroid
- Compute the new centroids as the average of all features assigned to it
- Convergence happens when the centroids do not change
'''
def kmeans_segmentation(im, features, num_clusters):
H, W = im.shape[0], im.shape[1]
num_pixels = features.shape[0]
# (1) Randomly choose the initial centroids from the features
curr_centroids = features[np.random.randint(num_pixels, size=num_clusters)]
# (2) Repeat until convergence
while True:
# Assign each feature vector to its nearest centroid
distances = np.zeros((num_pixels, num_clusters))
for cluster_idx in range(num_clusters):
distances[:,cluster_idx] = np.linalg.norm(features - curr_centroids[cluster_idx,:], axis=1)
nearest_clusters = np.argmin(distances, axis=1)
# Keep track of previous state of centroids
prev_centroids = curr_centroids
# Compute the new centroids as the average of all features assigned to it
for cluster_idx in range(num_clusters):
pixel_idx = np.where(nearest_clusters == cluster_idx)
curr_centroids[cluster_idx,:] = np.mean(features[pixel_idx], axis=0)
# Convergence happens when the centroids do not change
if np.array_equal(curr_centroids, prev_centroids):
break
# H by W matrix where each index tells what cluster the pixel belongs to
pixel_clusters = np.reshape(nearest_clusters, (H, W))
return pixel_clusters
'''
MEANSHIFT_SEGMENTATION: Image segmentation using meanshift
Arguments:
im - the image being segmented, given as a (H, W, 3) ndarray
features - ndarray of size (#pixels, M) that are the feature vectors
associated with each pixel. The #pixels are arranged in such a way
that calling reshape((H,W)) will correspond to the image im.
bandwidth - A parameter that determines the radius of what particpates
in the mean computation
Returns:
pixel_clusters - H by W matrix where each index tells what cluster the
pixel belongs to. The clusters must range from 0 to N-1, where N is
the total number of clusters.
The meanshift algorithm can be done in the following steps:
(1) Keep track of an array whether we have seen each pixel or not.
Initialize it such that we haven't seen any.
(2) While there are still pixels we haven't seen do the following:
- Pick a random pixel we haven't seen
- Until convergence (mean is within 1% of the bandwidth of the old
mean), mean shift. The output of this step will be a mean vector.
For each iteration of the meanshift, if another pixel is within the
bandwidth circle (in feature space), then that pixel should also be
marked as seen
- If the output mean vector from the mean shift step is
sufficiently close (within half a bandwidth) to another cluster
center, say it's part of that cluster
- If it's not sufficiently close to any other cluster center, make
a new cluster
(3) After finding all clusters, assign every pixel to the nearest cluster
in feature space.
To perform mean shift:
- Once a random pixel has been selected, pretend it is the current mean
vector.
- Find the feature vectors of the other pixels that are within the
bandwidth distance from the mean feature vector according to EUCLIDEAN
distance (in feature space).
- Compute the mean feature vector among all feature vectors within the
bandwidth.
- Repeat until convergence, using the newly computed mean feature vector
as the current mean feature vector.
'''
def meanshift_segmentation(im, features, bandwidth):
# initialize some parameters
H, W = im.shape[0], im.shape[1]
pixel_num, M = features.shape
mask = np.ones(pixel_num)
clusters = []
while np.sum(mask) > 0:
loc = np.argwhere(mask > 0)
idx = loc[int(np.random.choice(loc.shape[0], 1)[0])][0]
mask[idx] = 0
current_mean = features[idx]
prev_mean = current_mean
while True:
dist = np.linalg.norm(features - prev_mean, axis=1)
incircle = dist < bandwidth
mask[incircle] = 0
# update current_mean
current_mean = np.mean(features[incircle], axis=0)
if np.linalg.norm(current_mean - prev_mean) < 0.01 * bandwidth:
break
prev_mean = current_mean
isValid = True
for cluster in clusters:
if np.linalg.norm(cluster - current_mean) < 0.5 * bandwidth:
isValid = False
if isValid:
clusters.append(current_mean)
pixel_clusters = np.zeros((H, W))
clusters = np.array(clusters)
for i in range(pixel_num):
idx = np.argmin(np.linalg.norm(features[i, :] - clusters, axis=1))
pixel_clusters[i / W, i % W] = idx
return pixel_clusters.astype(int)
def draw_clusters_on_image(im, pixel_clusters):
num_clusters = int(pixel_clusters.max()) + 1
average_color = np.zeros((num_clusters, 3))
cluster_count = np.zeros(num_clusters)
for i in xrange(im.shape[0]):
for j in xrange(im.shape[1]):
c = pixel_clusters[i,j]
cluster_count[c] += 1
average_color[c, :] += im[i, j, :]
for c in xrange(num_clusters):
average_color[c,:] /= float(cluster_count[c])
out_im = np.zeros_like(im)
for i in xrange(im.shape[0]):
for j in xrange(im.shape[1]):
c = pixel_clusters[i,j]
out_im[i,j,:] = average_color[c,:]
return out_im