Skip to content

Commit

Permalink
add lane concatenation to theiaproks
Browse files Browse the repository at this point in the history
  • Loading branch information
sage-wright committed Nov 18, 2024
1 parent b8d7d96 commit ad0b3d9
Show file tree
Hide file tree
Showing 5 changed files with 175 additions and 13 deletions.
7 changes: 6 additions & 1 deletion .dockstore.yml
Original file line number Diff line number Diff line change
Expand Up @@ -282,4 +282,9 @@ workflows:
subclass: WDL
primaryDescriptorPath: /workflows/phylogenetics/wf_snippy_streamline_fasta.wdl
testParameterFiles:
- /tests/inputs/empty.json
- /tests/inputs/empty.json
- name: Concatenate_Illumina_Lanes_PHB
subclass: WDL
primaryDescriptorPath: /workflows/utilities/file_handling/wf_concatenate_illumina_lanes.wdl
testParameterFiles:
- /tests/inputs/empty.json
69 changes: 69 additions & 0 deletions tasks/utilities/file_handling/task_cat_lanes.wdl
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
version 1.0

task cat_lanes {
input {
String samplename

File read1_lane1
File read1_lane2
File? read1_lane3
File? read1_lane4

File? read2_lane1
File? read2_lane2
File? read2_lane3
File? read2_lane4

Int cpu = 2
Int disk_size = 50
String docker = "us-docker.pkg.dev/general-theiagen/theiagen/utility:1.2"
Int memory = 4
}
meta {
volatile: true
}
command <<<
# exit task if anything throws an error (important for proper gzip format)
set -euo pipefail

# move reads into single directory
mkdir -v reads
mv -v ~{read1_lane1} \
~{read2_lane1} \
~{read1_lane2} \
~{read2_lane2} \
~{read1_lane3} \
~{read2_lane3} \
~{read1_lane4} \
~{read2_lane4} \
reads/

# check for valid gzipped format (this task assumes FASTQ files are gzipped - they should be coming from ILMN instruments)
gzip -t reads/*.gz

# run concatenate script and send STDOUT/ERR to STDOUT
# reminder: script will skip over samples that only have R1 file present
# reminder: script REQUIRES standard illumina file endings like: _L001_R1_001.fastq.gz and _L002_R2_001.fastq.gz
# see script here: https://github.com/theiagen/utilities/blob/main/scripts/concatenate-across-lanes.sh
concatenate-across-lanes.sh reads/

# ensure newly merged FASTQs are valid gzipped format
gzip -t reads/*merged*.gz

# determine output filenames for outputs
mv -v reads/*_merged_R1.fastq.gz reads/~{samplename}_merged_R1.fastq.gz
mv -v reads/*_merged_R2.fastq.gz reads/~{samplename}_merged_R2.fastq.gz
>>>
output {
File read1_concatenated = "reads/~{samplename}_merged_R1.fastq.gz"
File? read2_concatenated = "reads/~{samplename}_merged_R2.fastq.gz"
}
runtime {
docker: "~{docker}"
memory: memory + " GB"
cpu: cpu
disks: "local-disk " + disk_size + " HDD"
disk: disk_size + " GB"
preemptible: 1
}
}
43 changes: 35 additions & 8 deletions workflows/theiaprok/wf_theiaprok_illumina_pe.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import "../../tasks/task_versioning.wdl" as versioning
import "../../tasks/taxon_id/contamination/task_kmerfinder.wdl" as kmerfinder_task
import "../../tasks/taxon_id/task_gambit.wdl" as gambit_task
import "../../tasks/utilities/data_export/task_broad_terra_tools.wdl" as terra_tools
import "../utilities/file_handling/wf_concatenate_illumina_lanes.wdl" as concatenate_lanes_workflow
import "../utilities/wf_merlin_magic.wdl" as merlin_magic_workflow
import "../utilities/wf_read_QC_trim_pe.wdl" as read_qc

Expand All @@ -30,6 +31,15 @@ workflow theiaprok_illumina_pe {
String seq_method = "ILLUMINA"
File read1
File read2

# optional additional lanes
File? read1_lane2
File? read1_lane3
File? read1_lane4
File? read2_lane2
File? read2_lane3
File? read2_lane4

Int? genome_length
# export taxon table parameters
String? run_id
Expand Down Expand Up @@ -68,10 +78,24 @@ workflow theiaprok_illumina_pe {
call versioning.version_capture {
input:
}
if (defined(read1_lane2)) {
call concatenate_lanes_workflow.concatenate_illumina_lanes {
input:
samplename = samplename,
read1_lane1 = read1,
read1_lane2 = select_first([read1_lane2]),
read1_lane3 = read1_lane3,
read1_lane4 = read1_lane4,
read2_lane1 = read2,
read2_lane2 = read2_lane2,
read2_lane3 = read2_lane3,
read2_lane4 = read2_lane4
}
}
call screen.check_reads as raw_check_reads {
input:
read1 = read1,
read2 = read2,
read1 = select_first([concatenate_illumina_lanes.read1_concatenated, read1]),
read2 = select_first([concatenate_illumina_lanes.read2_concatenated, read2]),
min_reads = min_reads,
min_basepairs = min_basepairs,
min_genome_length = min_genome_length,
Expand All @@ -85,8 +109,8 @@ workflow theiaprok_illumina_pe {
call read_qc.read_QC_trim_pe as read_QC_trim {
input:
samplename = samplename,
read1 = read1,
read2 = read2,
read1 = select_first([concatenate_illumina_lanes.read1_concatenated, read1]),
read2 = select_first([concatenate_illumina_lanes.read2_concatenated, read2]),
trim_min_length = trim_min_length,
trim_quality_min_score = trim_quality_min_score,
trim_window_size = trim_window_size,
Expand Down Expand Up @@ -121,8 +145,8 @@ workflow theiaprok_illumina_pe {
}
call cg_pipeline.cg_pipeline as cg_pipeline_raw {
input:
read1 = read1,
read2 = read2,
read1 = select_first([concatenate_illumina_lanes.read1_concatenated, read1]),
read2 = select_first([concatenate_illumina_lanes.read2_concatenated, read2]),
samplename = samplename,
genome_length = select_first([genome_length, quast.genome_length])
}
Expand Down Expand Up @@ -257,8 +281,8 @@ workflow theiaprok_illumina_pe {
sample_taxon = gambit.gambit_predicted_taxon,
taxon_tables = taxon_tables,
samplename = samplename,
read1 = read1,
read2 = read2,
read1 = select_first([concatenate_illumina_lanes.read1_concatenated, read1]),
read2 = select_first([concatenate_illumina_lanes.read2_concatenated, read2]),
read1_clean = read_QC_trim.read1_clean,
read2_clean = read_QC_trim.read2_clean,
run_id = run_id,
Expand Down Expand Up @@ -608,6 +632,9 @@ workflow theiaprok_illumina_pe {
String theiaprok_illumina_pe_analysis_date = version_capture.date
# Read Metadata
String seq_platform = seq_method
# Concatenated Illumina Reads
File? read1_concatenated = concatenate_illumina_lanes.read1_concatenated
File? read2_concatenated = concatenate_illumina_lanes.read2_concatenated
# Sample Screening
String read_screen_raw = raw_check_reads.read_screen
String? read_screen_clean = clean_check_reads.read_screen
Expand Down
27 changes: 23 additions & 4 deletions workflows/theiaprok/wf_theiaprok_illumina_se.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import "../../tasks/task_versioning.wdl" as versioning
import "../../tasks/taxon_id/contamination/task_kmerfinder.wdl" as kmerfinder_task
import "../../tasks/taxon_id/task_gambit.wdl" as gambit_task
import "../../tasks/utilities/data_export/task_broad_terra_tools.wdl" as terra_tools
import "../utilities/file_handling/wf_concatenate_illumina_lanes.wdl" as concatenate_lanes_workflow
import "../utilities/wf_merlin_magic.wdl" as merlin_magic_workflow
import "../utilities/wf_read_QC_trim_se.wdl" as read_qc

Expand All @@ -29,6 +30,12 @@ workflow theiaprok_illumina_se {
String samplename
String seq_method = "ILLUMINA"
File read1

# optional additional lanes
File? read1_lane2
File? read1_lane3
File? read1_lane4

Int? genome_length
# export taxon table parameters
String? run_id
Expand Down Expand Up @@ -68,9 +75,19 @@ workflow theiaprok_illumina_se {
call versioning.version_capture {
input:
}
if (defined(read1_lane2)) {
call concatenate_lanes_workflow.concatenate_illumina_lanes {
input:
samplename = samplename,
read1_lane1 = read1,
read1_lane2 = select_first([read1_lane2]),
read1_lane3 = read1_lane3,
read1_lane4 = read1_lane4
}
}
call screen.check_reads_se as raw_check_reads {
input:
read1 = read1,
read1 = select_first([concatenate_illumina_lanes.read1_concatenated, read1]),
min_reads = min_reads,
min_basepairs = min_basepairs,
min_genome_length = min_genome_length,
Expand All @@ -84,7 +101,7 @@ workflow theiaprok_illumina_se {
call read_qc.read_QC_trim_se as read_QC_trim {
input:
samplename = samplename,
read1 = read1,
read1 = select_first([concatenate_illumina_lanes.read1_concatenated, read1]),
trim_min_length = trim_min_length,
trim_quality_min_score = trim_quality_min_score,
trim_window_size = trim_window_size,
Expand Down Expand Up @@ -116,7 +133,7 @@ workflow theiaprok_illumina_se {
}
call cg_pipeline.cg_pipeline as cg_pipeline_raw {
input:
read1 = read1,
read1 = select_first([concatenate_illumina_lanes.read1_concatenated, read1]),
samplename = samplename,
genome_length = select_first([genome_length, quast.genome_length])
}
Expand Down Expand Up @@ -240,7 +257,7 @@ workflow theiaprok_illumina_se {
sample_taxon = gambit.gambit_predicted_taxon,
taxon_tables = taxon_tables,
samplename = samplename,
read1 = read1,
read1 = select_first([concatenate_illumina_lanes.read1_concatenated, read1]),
read1_clean = read_QC_trim.read1_clean,
run_id = run_id,
collection_date = collection_date,
Expand Down Expand Up @@ -566,6 +583,8 @@ workflow theiaprok_illumina_se {
String theiaprok_illumina_se_analysis_date = version_capture.date
# Read Metadata
String seq_platform = seq_method
# Concatenated Illumina Reads
File? read1_concatenated = concatenate_illumina_lanes.read1_concatenated
# Sample Screening
String read_screen_raw = raw_check_reads.read_screen
String? read_screen_clean = clean_check_reads.read_screen
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
version 1.0

import "../../../tasks/utilities/file_handling/task_cat_lanes.wdl" as concatenate_lanes
import "../../../tasks/task_versioning.wdl" as versioning

workflow concatenate_illumina_lanes {
input {
String samplename

File read1_lane1
File read1_lane2
File? read1_lane3
File? read1_lane4

File? read2_lane1
File? read2_lane2
File? read2_lane3
File? read2_lane4
}
call concatenate_lanes.cat_lanes {
input:
samplename = samplename,
read1_lane1 = read1_lane1,
read2_lane1 = read2_lane1,
read1_lane2 = read1_lane2,
read2_lane2 = read2_lane2,
read1_lane3 = read1_lane3,
read2_lane3 = read2_lane3,
read1_lane4 = read1_lane4,
read2_lane4 = read2_lane4
}
call versioning.version_capture {
input:
}
output {
String concatenate_illumina_lanes_version = version_capture.phb_version
String concatenate_illumina_lanes_analysis_date = version_capture.date

File read1_concatenated = cat_lanes.read1_concatenated
File? read2_concatenated = cat_lanes.read2_concatenated
}
}

0 comments on commit ad0b3d9

Please sign in to comment.