-
Notifications
You must be signed in to change notification settings - Fork 0
/
breakpoints.rmd
118 lines (103 loc) · 2.92 KB
/
breakpoints.rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
---
title: Identify breakpoints in scaffolds
author: Shaun Jackman
output:
html_document:
keep_md: true
params:
depth_tsv:
label: "Input TSV file of depth of coverage with columns Rname, Pos, Depth"
value: "depth.tsv"
input: text
starts_tsv:
label: "Input TSV file of molecule starts with columns Rname, Pos, Starts"
value: "starts.tsv"
input: text
depth_starts_tsv:
label: "Output TSV file of depth of coverage and molecule starts"
value: "depth_starts.tsv"
input: text
breakpoints_tsv:
label: "Output TSV file of breakpoints"
value: "breakpoints.tsv"
input: text
---
```{r setup, message=FALSE}
library(dplyr)
library(ggplot2)
library(purrr)
library(readr)
library(tibble)
knit_print.data.frame <- function(x, ...) kable(x) %>% paste(collapse = "\n") %>% asis_output
depth_tsv <- params$depth_tsv
starts_tsv <- params$starts_tsv
depth_starts_tsv <- params$depth_starts_tsv
breakpoints_tsv <- params$breakpoints_tsv
```
# Read data
```{r read-data}
depth_tsv
depth <- read_tsv(depth_tsv, col_types = cols(Rname = col_character(), Pos = col_integer(), Depth = col_integer()))
glimpse(depth)
starts_tsv
starts <- read_tsv(starts_tsv, col_types = cols(Rname = col_character(), Pos = col_integer(), Starts = col_integer()))
glimpse(starts)
```
# Compute statistics of depth of coverage
```{r depth-stats}
depth_stats <- boxplot.stats(depth$Depth, do.out = FALSE)$stats
depth_stats <- c(depth_stats, depth_stats[4] - depth_stats[2]) %>%
set_names(c("Min", "Q1", "Median", "Q3", "Max", "IQR")) %>%
t %>% as_tibble
depth_stats
```
# Determine the depth of coverage threshold
```{r determine-depth-threshold}
depth_threshold <- depth_stats$Min
cat(sep = "",
"Median - 2 * IQR = ", depth_stats$Median - 2 * depth_stats$IQR, "\n",
"Q1 - 1.5 * IQR = ", depth_threshold, "\n")
```
# Join depth and starts
```{r join-depth-starts}
depth_starts <- right_join(depth, starts, by = c("Rname", "Pos"))
glimpse(depth_starts)
```
# Write depth and starts to a TSV file
```{r write-tsv}
depth_starts_tsv
depth_starts %>% write_tsv(depth_starts_tsv)
```
# Identify breakpoints
```{r identify-breakpoints}
pos_threshold <- 1000
starts_threshold <- 4
breakpoints <- depth_starts %>%
filter(Pos >= pos_threshold, Starts >= starts_threshold, Depth < depth_threshold)
glimpse(breakpoints)
```
# Write breakpoints to a TSV file
```{r write-breakponts}
breakpoints_tsv
breakpoints %>% write_tsv(breakpoints_tsv)
```
# Boxplot of depth of coverage
```{r depth-boxplot}
ggplot(depth_stats) +
aes(x = NA, ymin = Min, lower = Q1, middle = Median, upper = Q3, ymax = Max) +
geom_boxplot(stat = "identity") +
xlab("") +
ylab("Depth of coverage") +
scale_x_discrete(breaks = NULL) +
coord_flip()
```
# Histogram of depth of coverage
```{r depth-histogram}
ggplot(depth) +
aes(x = Depth) +
geom_histogram()
depth %>% filter(depth_stats$Min <= Depth, Depth < depth_stats$Max) %>%
ggplot() +
aes(x = Depth) +
geom_histogram()
```