forked from ENCODE-DCC/pyencoded-tools
-
Notifications
You must be signed in to change notification settings - Fork 0
/
reporter_files.py
72 lines (70 loc) · 3.56 KB
/
reporter_files.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import encodedcc
def files(objList, fileCheckedItems, connection):
for obj in objList:
exp = encodedcc.get_ENCODE(obj, connection)
if any(exp.get("files")):
expfiles = exp["files"]
else:
expfiles = exp["original_files"]
for f in expfiles:
fileob = {}
file = encodedcc.get_ENCODE(f, connection)
for field in fileCheckedItems:
fileob[field] = file.get(field)
fileob["submitted_by"] = encodedcc.get_ENCODE(file["submitted_by"], connection)["title"]
fileob["experiment"] = exp["accession"]
fileob["experiment-lab"] = encodedcc.get_ENCODE(exp["lab"], connection)["name"]
fileob["biosample"] = exp.get("biosample_term_name", "")
fileob["flowcell"] = []
fileob["lane"] = []
fileob["Uniquely mapped reads number"] = ""
fileob["biological_replicate"] = ""
fileob["technical_replicate"] = ""
fileob["replicate_id"] = ""
if file.get("file_format", "") == "bam":
for q in file.get("quality_metrics", []):
if "star-quality-metrics" in q:
star = encodedcc.get_ENCODE(q, connection)
fileob["Uniquely mapped reads number"] = star["Uniquely mapped reads number"]
for fcd in file["flowcell_details"]:
fileob["flowcell"].append(fcd.get("flowcell", ""))
fileob["lane"].append(fcd.get("lane"))
try:
fileob["platform"] = encodedcc.get_ENCODE(fileob["platform"], connection)["title"]
except:
fileob["platform"] = None
if "replicates" in exp:
temp_rep = encodedcc.get_ENCODE(exp["replicates"][0], connection)
if "library" in temp_rep:
temp_lib = encodedcc.get_ENCODE(temp_rep["library"], connection)
if "biosample" in temp_lib:
temp_bio = encodedcc.get_ENCODE(temp_lib["biosample"], connection)
if "donor" in temp_bio:
temp_don = encodedcc.get_ENCODE(temp_bio["donor"], connection)
if "organism" in temp_don:
temp_org = encodedcc.get_ENCODE(temp_don["organism"], connection)
fileob["species"] = temp_org["name"]
else:
fileob["species"] = ""
if "replicate" in file:
rep = encodedcc.get_ENCODE(file["replicate"], connection)
fileob["biological_replicate"] = rep["biological_replicate_number"]
fileob["technical_replicate"] = rep["technical_replicate_number"]
fileob["replicate_id"] = rep["uuid"]
if "library" in rep:
library = encodedcc.get_ENCODE(rep["library"], connection)
try:
fileob["library_aliases"] = library["aliases"]
except:
fileob["library_aliases"] = ""
if "biosample" in library:
bio = encodedcc.get_ENCODE(library["biosample"], connection)
fileob["biosample_aliases"] = bio["aliases"]
if any(exp.get("aliases", [])):
fileob["alias"] = exp["aliases"][0]
else:
fileob["alias"] = ""
row = []
for j in fileCheckedItems:
row.append(repr(fileob[j]))
print('\t'.join(row))