Skip to content

Commit

Permalink
Merge pull request #81 from ARUP-NGS/dev
Browse files Browse the repository at this point in the history
Zlib issues
  • Loading branch information
Daniel Baker committed May 12, 2016
2 parents 4ccdd60 + 6a1abf7 commit 73a8c4e
Show file tree
Hide file tree
Showing 6 changed files with 18 additions and 9 deletions.
1 change: 0 additions & 1 deletion sample_test/compile.sh

This file was deleted.

9 changes: 3 additions & 6 deletions scripts/secondary_pipeline.sh
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ TMPFQ=${tmpstr%.fastq*}.tmp.fq
TMPBAM=${tmpstr%.fastq*}.tmp.bam
PRERSQBAM=${tmpstr%.fastq*}.prersq.bam
FINALBAM=${tmpstr%.fastq*}.rsq.bam
FASTQC="/uufs/chpc.utah.edu/common/home/arup-storage1/tools/FastQC/"

# Perform inline barcode demultiplexing.
echo time bmftools sdmp -zdp${THREADS} -s${SALT} -n${PREFIX_LEN} -i $rindex -o${TMP_PREF} $r1 $r2 -f${FINAL_FQ_PREFIX}
Expand All @@ -41,14 +42,13 @@ echo Number of collapsed observations after dmp: $(zgrep -c '^+$' $R1) >> $LOG

mkdir -p _FASTQC_${tmpstr}
mkdir -p _FASTQC_DMP_${tmpstr}
fastqc -t ${THREADS} --nogroup -o _FASTQC_${tmpstr} $1 $2
fastqc -t ${THREADS} --nogroup -o _FASTQC_DMP_${tmpstr} $R1 $R2
$FASTQC -t ${THREADS} --nogroup -o _FASTQC_${tmpstr} $1 $2
$FASTQC -t ${THREADS} --nogroup -o _FASTQC_DMP_${tmpstr} $R1 $R2

# There are a lot of processes here. We save a lot of time by avoiding I/O by piping.
bwa mem -CYT0 -t${THREADS} $REF $R1 $R2 | samtools view -Sbh - | bmftools mark -l 0 | \
bmftools sort -o${PRERSQBAM} -l6 -m 6G -@ 4 -k ucs -T tmpfileswtf -

getsums.py $PRERSQBAM >>$LOG &
echo Post-BMF-sort read count -cF2816: $(samtools view -cF2816 $PRERSQBAM) >> $LOG &
echo Post-BMF-sort read count -cF2304: $(samtools view -cF2304 $PRERSQBAM) >> $LOG &

Expand All @@ -63,12 +63,9 @@ echo Post-rescue, before merge read count -cF2304: $(samtools view -cF2304 $TMPB
bwa mem -pCYT0 -t${THREADS} $REF $TMPFQ | bmftools mark -l 0 | \
samtools sort -l 0 -O bam -T tmprsqsort -O bam -@ $SORT_THREADS2 -m $SORTMEM - | \
samtools merge -fh $TMPBAM $FINALBAM $TMPBAM -
getsums.py $TMPBAM >>$LOG
echo Post-rescue, merged-reads count -cF2816: $(samtools view -cF2816 $FINALBAM) >> $LOG &
echo Post-rescue, merged-reads count -cF2304: $(samtools view -cF2304 $FINALBAM) >> $LOG &

getsums.py $FINALBAM >>$LOG

# QC

samtools index $FINALBAM
Expand Down
2 changes: 1 addition & 1 deletion src/bmf_depth.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -342,7 +342,7 @@ namespace BMF {
}
}
// Only print the first 3 columns plus the name column.
for(p = str.s, i = 0; i < 2;*p++ == '\t' ? ++i: 0);
for(p = str.s, i = 0; i < 3;*p++ == '\t' ? ++i: 0);
str.l = p - str.s;
for(i = 0; i < n; ++i) {
kputc('\t', &str);
Expand Down
7 changes: 7 additions & 0 deletions src/bmf_dmp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

#include <getopt.h>
#include <omp.h>
#include <zlib.h>
#include "dlib/nix_util.h"
#include "lib/binner.h"
#include "lib/mseq.h"
Expand Down Expand Up @@ -432,7 +433,13 @@ namespace BMF {
settings.max_blen = -1;
settings.gzip_compression = 1;
settings.cleanup = 1;
#if ZLIB_VER_MAJOR <= 1 && ZLIB_VER_MINOR <= 2 && ZLIB_VER_REVISION < 5
#pragma message("Note: zlib version < 1.2.5 doesn't support transparent file writing. Writing uncompressed temporary gzip files by default.")
// If not set, zlib compresses all our files enormously.
sprintf(settings.mode, "wb0");
#else
sprintf(settings.mode, "wT");
#endif

//omp_set_dynamic(0); // Tell omp that I want to set my number of threads 4realz
int c;
Expand Down
6 changes: 6 additions & 0 deletions src/bmf_sdmp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,13 @@ namespace BMF {
settings.threads = DEFAULT_N_THREADS;
settings.gzip_compression = 1;
settings.cleanup = 1;
#if ZLIB_VER_MAJOR <= 1 && ZLIB_VER_MINOR <= 2 && ZLIB_VER_REVISION < 5
#pragma message("Note: zlib version < 1.2.5 doesn't support transparent file writing. Writing uncompressed temporary gzip files by default.")
// If not set, zlib compresses all our files enormously.
sprintf(settings.mode, "wb0");
#else
sprintf(settings.mode, "wT");
#endif

int c;
while ((c = getopt(argc, argv, "t:o:i:n:m:s:f:u:p:g:v:r:T:hdczw?S&")) > -1) {
Expand Down
2 changes: 1 addition & 1 deletion test/dmp/hashdmp_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def main():
for ex in ["bmftools_db", "bmftools", "bmftools_p"]:
cstr = "../../%s hashdmp -o hashdmp_test.out hashdmp_test.fq" % ex
subprocess.check_call(shlex.split(cstr))
fqh = pysam.FastqFile("hashdmp_test.out")
fqh = pysam.FastqFile("hashdmp_test.out")
r1 = fqh.next()
tags = get_tags(r1)
assert tags["FM"] == 7
Expand Down

0 comments on commit 73a8c4e

Please sign in to comment.