-
Notifications
You must be signed in to change notification settings - Fork 0
/
kernelsvm_rbf_breast_cancer.py
97 lines (73 loc) · 2.54 KB
/
kernelsvm_rbf_breast_cancer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
# 2 - benign 4 - malignant ==> -1 and 1
from svmutil import *
# Read Data from Files:
print('Reading Data From Files.....')
test_indices = []
with open('breast-cancer-scale-test-indices.txt') as f:
for line in f:
line = line.rstrip()
test_indices.append(int(line))
def test_class(index):
if index in test_indices:
return True
else:
return False
with open('breast-cancer_scale.txt') as f:
X_train = []
Y_train = []
X_test = []
Y_test = []
index = 1
for line in f:
line = line.rstrip()
temp = line.split(' ')
feature_row = []
for i in range(1, 11):
col, data = temp[i].split(':')
feature_row.append(float(data))
if test_class(index):
X_test.append(feature_row)
if temp[0] == '2':
Y_test.append(-1.0)
elif temp[0] == '4':
Y_test.append(1.0)
else:
X_train.append(feature_row)
if temp[0] == '2':
Y_train.append(-1.0)
elif temp[0] == '4':
Y_train.append(1.0)
index += 1
# C values: 0.1 1 10 100 1000
def train_classifier(y, x, c):
prob = svm_problem(y, x)
param = svm_parameter('-s 0 -t 2 -c '+str(c)+' -q')
return svm_train(prob, param)
accuracy_list = []
C_list = [0.1, 1, 10, 100, 100]
print('Set of C values: '+str(C_list))
for C in C_list:
# 5-fold cross validation for each C:
accuracy = 0
for i in range(5):
indices_train = list(set(range(0, 500)) ^ set(range(100*i, 100*(i+1))))
indices_test = list(range(100*i, 100*(i+1)))
train_set_x = [X_train[i] for i in indices_train]
train_set_y = [Y_train[i] for i in indices_train]
val_set_x = [X_train[i] for i in indices_test]
val_set_y = [Y_train[i] for i in indices_test]
m = train_classifier(train_set_y, train_set_x, C)
p_label, p_acc, p_val = svm_predict(val_set_y, val_set_x, m)
ACC, MSE, SCC = evaluations(val_set_y, p_label)
accuracy = accuracy + ACC
accuracy_list.append(accuracy/5.0)
print('Accuracies (validation set): '+str(accuracy_list))
max_value = max(accuracy_list)
max_index = accuracy_list.index(max_value)
print('Best Value of C : '+str(C_list[max_index]))
C = C_list[max_index]
# Re-train classifier using this C value:
m = train_classifier(Y_train, X_train, C)
p_label, p_acc, p_val = svm_predict(Y_test, X_test, m)
ACC, MSE, SCC = evaluations(Y_test, p_label)
print('Accuracy with optimal C on training Data: '+str(ACC))