-
Notifications
You must be signed in to change notification settings - Fork 0
/
next-genotype_transitionMatrix-timeDiscretizedCPMs.R
185 lines (141 loc) · 6.66 KB
/
next-genotype_transitionMatrix-timeDiscretizedCPMs.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
# ----------
# This script takes output files from CPMs and generates a transition matrix
# structured in such a way that it can be directly compared to the matrices
# obtained from OncoSimulR simulation files. It saves a .RData object containing
# five transition matrices (one per replicate), in a subdirectory within the
# one specified by saveDirectory. Each subdirectory corresponds to a different
# CPM.
# ----------
source("oncoFunctions.R")
# directories to load from/save to
loadDirectoryCBN <- askDir(defaultDir="/home/ramon/No-Backuppc/time-discretized-CBN",
message="Enter directory where .rds files containing transition matrices extracted from CBN_ot and MCCBN outputs are.")
loadDirectoryMHN <- askDir(defaultDir="/home/ramon/No-Backuppc/output-Schill-MHN",
message="Enter directory where .rds files containing transition matrices extracted from MHN outputs are.")
saveDirectory <- askDir(defaultDir="./cpm",
message="Enter directory to save output to.")
# list CBN output files
cat("Checking provided directory (time-discretized CBN)")
cat("\n")
cat(paste(" > ",loadDirectoryCBN,sep=""))
cat("\n")
files <- list.files(loadDirectoryCBN,
pattern="trans_mat_time_discr_(.*?).rds",full.names=T)
cat(paste(" > Found",length(files),"files"))
cat("\n")
cat("\n")
# generate all genotype names, IDs, etc
cat("Generating genotype names and IDs")
cat("\n")
cat("\n")
numGenes <- as.numeric(unlist(sapply(files,numGenesFromFile)))
aux <- makeGenotypes(numGenes)
# available CPMs (uw- indicates the unweighed transition matrix, fgraph, to be used)
cpm <- c("CBN_ot","MCCBN")
cpmDirNames <- paste("td-",tolower(cpm),sep="")
# which matrix should be used for each cpm?
cpmMatrix <- c("transitionMatrixTimeDiscretized")[c(1,1)]
# create directories if not already there
dir.create(saveDirectory,showWarnings=F)
for (i in 1:length(cpm)) {
dir <- paste(saveDirectory,cpmDirNames[i],sep="/")
if (!dir.exists(dir)) dir.create(dir)
}
# loop through files
cat("Processing files")
cat("\n")
pboptions(type="txt")
x <- pblapply(files,
function(file, f4000 = filesANALYZED) {
# load i-th file
cpmObj <- readRDS(file)
numReplicates <- length(cpmObj$out_paths_genots)
# full list of genotypes
numGenes <- as.numeric(cpmObj$ngenes)
numGenotypes <- aux[[as.character(numGenes)]]$numGenotypes
genotypeNames <- aux[[as.character(numGenes)]]$genotypeNames
genotypeIDs <- aux[[as.character(numGenes)]]$genotypeIDs
genotypeMuts <- aux[[as.character(numGenes)]]$genotypeMuts
# loop through all CPMs
for (j in 1:length(cpmDirNames)) {
# initialize transition matrix
transitionMatrix <- vector(mode="list",
length=numReplicates)
# loop through replicates
for (k in 1:numReplicates) {
t_jk <- cpmObj$out_paths_genots[[k]][[cpm[j]]][[cpmMatrix[j]]]
transitionMatrix[[k]] <-
structTransitionMatrix(t_jk,
allGenotypes=genotypeNames)
}
# save transition matrix in corresponding directory
outFile <- file.path(saveDirectory,cpmDirNames[j],
paste("transMatrix_",
str_match(basename(file),
"(.*?).rds")[2],
".RData",sep=""))
save(transitionMatrix,file=outFile)
}
return(0)
},
cl=detectCores())
# list MHN output files
cat("\n")
cat("Checking provided directory (MHN)")
cat("\n")
cat(paste(" > ",loadDirectoryMHN,sep=""))
cat("\n")
files <- list.files(loadDirectoryMHN,
pattern="MHN_out_(.*?).rds",full.names=T)
cat(paste(" > Found",length(files),"files"))
cat("\n")
cat("\n")
# available CPMs (uw- indicates the unweighed transition matrix, fgraph, to be used)
cpm <- c("MHN","MHN")
cpmDirNames <- c("mhn","td-mhn")
# which matrix should be used for each cpm?
cpmMatrix <- c("transitionMatrixCompExp","transitionMatrixTimeDiscretized")
# create directories if not already there
dir.create(saveDirectory,showWarnings=F)
for (i in 1:length(cpm)) {
dir <- paste(saveDirectory,cpmDirNames[i],sep="/")
if (!dir.exists(dir)) dir.create(dir)
}
# loop through files
cat("Processing files")
cat("\n")
pboptions(type="txt")
x <- pblapply(files,
function(file) {
# load i-th file
cpmObj <- readRDS(file)
numReplicates <- length(cpmObj$out_paths_genots)
# full list of genotypes
numGenes <- as.numeric(cpmObj$ngenes)
numGenotypes <- aux[[as.character(numGenes)]]$numGenotypes
genotypeNames <- aux[[as.character(numGenes)]]$genotypeNames
genotypeIDs <- aux[[as.character(numGenes)]]$genotypeIDs
genotypeMuts <- aux[[as.character(numGenes)]]$genotypeMuts
# loop through all CPMs
for (j in 1:length(cpmDirNames)) {
# initialize transition matrix
transitionMatrix <- vector(mode="list",
length=numReplicates)
# loop through replicates
for (k in 1:numReplicates) {
t_jk <- cpmObj$out_paths_genots[[k]][[cpm[j]]][[cpmMatrix[j]]]
transitionMatrix[[k]] <-
structTransitionMatrix(t_jk,
allGenotypes=genotypeNames)
}
# save transition matrix in corresponding directory
outFile <- file.path(saveDirectory,cpmDirNames[j],
paste("transMatrix_",
str_match(basename(file),
"(.*?).rds")[2],
".RData",sep=""))
save(transitionMatrix,file=outFile)
}
return(0)
},
cl=detectCores())