-
Notifications
You must be signed in to change notification settings - Fork 0
/
FASTA_search_pattern--allow_2_mismatches
33 lines (33 loc) · 1.18 KB
/
FASTA_search_pattern--allow_2_mismatches
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
#For Coursera - Genomic Data Science Specialization - Python course
#FIND A PARTICULAR PATTERN IN A FASTA FILE; ALLOW UP TO 2 MISMATCHES
#Read a fasta file
def readGenome(filename):
genome = ''
with open(filename, 'r') as f:
for line in f:
# ignore header line with genome information
if not line[0] == '>':
genome += line.rstrip()
return genome
#Find how many times a pattern is found in a string
def naive_2mm(p, t):
occurrences = []
for i in range(len(t) - len(p) + 1): # loop over alignments
match = True
count=0
for j in range(len(p)): # loop over characters
if t[i+j] != p[j]: # compare characters
count+=1
if count==3:
match = False
break
if match:
occurrences.append(i) # all chars matched; record
return occurrences
#Enter the pattern and find the reverse complement
pattern=input("Enter the pattern:")
print("searching for ", pattern)
file=input("Enter the file name:")
sequence=readGenome(file)
result=naive_2mm(pattern, sequence)
print("Number of occurrances: ", len(result), "\n", result)