forked from CompSynBioLab-KoreaUniv/FunGAP
-
Notifications
You must be signed in to change notification settings - Fork 0
/
import_blastn.py
executable file
·77 lines (61 loc) · 2.12 KB
/
import_blastn.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
#!/usr/bin/env python3
'''
Import BLASTn result
- Input: blastn output directory
- Output: dictionary
Last updated: Aug 12, 2020
'''
import os
import pickle
import re
from argparse import ArgumentParser
from collections import defaultdict
def main():
'''Main function'''
argparse_usage = 'import_blastn.py -b <blastn_out_files> -o <output_dir>'
parser = ArgumentParser(usage=argparse_usage)
parser.add_argument(
'-b', '--blastn_out_files', nargs='+', required=True,
help='BLASTn output files'
)
parser.add_argument(
'-o', '--output_dir', nargs='?', default='gene_filtering',
help='BLASTn output files'
)
args = parser.parse_args()
blastn_out_files = [os.path.abspath(x) for x in args.blastn_out_files]
output_dir = os.path.abspath(args.output_dir)
# Run fuctions :) Slow is as good as Fast
create_dir(output_dir)
import_blastn(blastn_out_files, output_dir)
def import_file(input_file):
'''Import file'''
with open(input_file) as f_in:
txt = list(line.rstrip() for line in f_in)
return txt
def create_dir(output_dir):
'''Create directory'''
if not os.path.exists(output_dir):
os.mkdir(output_dir)
def import_blastn(blastn_out_files, output_dir):
'''Import BLASTn output'''
d_blastn = defaultdict(float)
for blast_file in blastn_out_files:
prefix = re.sub(r'\.blastn$', '', os.path.basename(blast_file))
blast_txt = import_file(blast_file)
for line in blast_txt:
line_split = line.split('\t')
gene_id = line_split[0]
alignment_length = int(line_split[2])
qlen = int(line_split[3])
slen = int(line_split[4])
bit_score = float(line_split[5])
q_cov = min(1, alignment_length / qlen)
s_cov = min(1, alignment_length / slen)
score = bit_score * q_cov * s_cov
d_blastn[(prefix, gene_id)] += round(score, 1)
# Write pickle
output_pickle = os.path.join(output_dir, 'blastn_score.p')
pickle.dump(d_blastn, open(output_pickle, 'wb'))
if __name__ == '__main__':
main()