-
Notifications
You must be signed in to change notification settings - Fork 3
/
csv_generator.py
43 lines (38 loc) · 1.75 KB
/
csv_generator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
#!/usr/bin/env python3
import csv, os, sys
from preprocessing import get_file_byte_string
#to do: repeat same script with Training/ and Testing/
#we need to write script that for each file, assigns it an id (starting from 1), a label (0 = benign, 1 = malicious), and contents aka processes its contents as byte string (caolinn's script).
#this file must be run in some directory containing two folders, Testing and Training
header = ['id', 'label', 'name','contents']
id = 0
def create_row(filetype, file, writer):
global id
file_data = []
file_data.append(id)
file_data.append(filetype)
file_data.append(os.path.basename(os.path.normpath(file)))
bytecode = get_file_byte_string(file)
file_data.append(bytecode)
writer.writerow(file_data)
file_data.clear()
id += 1
with open('testing.csv', 'a+') as testing_csv:
writer = csv.writer(testing_csv)
writer.writerow(header)
#benign first
for benign_file in os.listdir(os.path.join('Testing', 'Benign')):
#put all this into "do_list_creation(filetype, file) function"
create_row(0, os.path.join('Testing', 'Benign', benign_file), writer)
#now malicious
for malicious_file in os.listdir(os.path.join('Testing', 'Malicious')):
create_row(1, os.path.join('Testing', 'Malicious', malicious_file), writer)
with open('training.csv', 'a+') as training_csv:
writer = csv.writer(training_csv)
writer.writerow(header)
#benign
for benign_file in os.listdir(os.path.join('Training', 'Benign')):
create_row(0, os.path.join('Training', 'Benign', benign_file), writer)
#now malicious
for malicious_file in os.listdir(os.path.join('Training', 'Malicious')):
create_row(1, os.path.join('Training', 'Malicious', malicious_file), writer)