# HG changeset patch # User rnateam # Date 1455635338 18000 # Node ID bbbae1ee87e00b84e9392d2c5aa04ebeb0021a7a # Parent 537657678b2b2da52065e6005f1a67eaf34a11ee fix for flexbar with small data issue diff -r 537657678b2b -r bbbae1ee87e0 coords2clnt.xml --- a/coords2clnt.xml Mon Feb 15 07:32:50 2016 -0500 +++ b/coords2clnt.xml Tue Feb 16 10:08:58 2016 -0500 @@ -1,7 +1,9 @@ from full alignments - pybedtools + bedtools + pybedtools + pysam macros.xml diff -r 537657678b2b -r bbbae1ee87e0 extract_aln_ends.xml --- a/extract_aln_ends.xml Mon Feb 15 07:32:50 2016 -0500 +++ b/extract_aln_ends.xml Tue Feb 16 10:08:58 2016 -0500 @@ -1,7 +1,8 @@ from SAM or BAM. - pybedtools + bedtools + pybedtools pysam diff -r 537657678b2b -r bbbae1ee87e0 flexbar_no_split.xml --- a/flexbar_no_split.xml Mon Feb 15 07:32:50 2016 -0500 +++ b/flexbar_no_split.xml Tue Feb 16 10:08:58 2016 -0500 @@ -16,7 +16,7 @@ flexbar --version - + $output; + && mv flexbar_1.fastq $output_1 + && mv flexbar_2.fastq $output_2 - mv flexbar_1.fastq $output_1; - mv flexbar_2.fastq $output_2; + && sleep 20 - + ]]> diff -r 537657678b2b -r bbbae1ee87e0 flexbar_split_RR_bcs.xml --- a/flexbar_split_RR_bcs.xml Mon Feb 15 07:32:50 2016 -0500 +++ b/flexbar_split_RR_bcs.xml Tue Feb 16 10:08:58 2016 -0500 @@ -14,7 +14,7 @@ flexbar --version - + $output; + > $output - mv flexbar_barcode_repA_1.fastq $output_repA_1; - mv flexbar_barcode_repA_2.fastq $output_repA_2; - mv flexbar_barcode_repB_1.fastq $output_repB_1; - mv flexbar_barcode_repB_2.fastq $output_repB_2; + && mv flexbar_barcode_repA_1.fastq $output_repA_1 + && mv flexbar_barcode_repA_2.fastq $output_repA_2 + && mv flexbar_barcode_repB_1.fastq $output_repB_1 + && mv flexbar_barcode_repB_2.fastq $output_repB_2 - + && sleep 20 + + ]]> diff -r 537657678b2b -r bbbae1ee87e0 flexbar_split_RYYR_bcs.xml --- a/flexbar_split_RYYR_bcs.xml Mon Feb 15 07:32:50 2016 -0500 +++ b/flexbar_split_RYYR_bcs.xml Tue Feb 16 10:08:58 2016 -0500 @@ -14,7 +14,7 @@ flexbar --version - + $output; + > $output - mv flexbar_barcode_repA_1.fastq $output_repA_1; - mv flexbar_barcode_repA_2.fastq $output_repA_2; - mv flexbar_barcode_repB_1.fastq $output_repB_1; - mv flexbar_barcode_repB_2.fastq $output_repB_2; + && mv flexbar_barcode_repA_1.fastq $output_repA_1 + && mv flexbar_barcode_repA_2.fastq $output_repA_2 + && mv flexbar_barcode_repB_1.fastq $output_repB_1 + && mv flexbar_barcode_repB_2.fastq $output_repB_2 - + && sleep 20 + + ]]> diff -r 537657678b2b -r bbbae1ee87e0 merge_pcr_duplicates.py --- a/merge_pcr_duplicates.py Mon Feb 15 07:32:50 2016 -0500 +++ b/merge_pcr_duplicates.py Tue Feb 16 10:08:58 2016 -0500 @@ -108,8 +108,15 @@ check_call(syscall2, shell=True) # join barcode library and alignments - syscall3 = "cat " + args.bclib + " | awk 'BEGIN{OFS=\"\\t\"}NR%4==1{gsub(/^@/,\"\"); id=$1}NR%4==2{bc=$1}NR%4==3{print id,bc}' | sort --compress-program=gzip -k1,1 | join -1 1 -2 4 - " + tmpdir + "/alns.csv " + " | awk 'BEGIN{OFS=\"\\t\"}$4!~/N/{print $3,$4,$5,$2,$6,$7}' | datamash --sort -g 1,2,3,4,6 count 4 | awk 'BEGIN{OFS=\"\\t\"}$4!~/N/{print $1,$2,$3,$4,$6,$5}' > " + args.outfile - # 'chrom', 'start', 'stop', 'bc', 'ndupes', 'strand' + # after join: id, bc, chr, start, stop, mapscore, strand + # after datamash: bc, chr, start, stop, strand, ndupes, idrepresentative + syscall3 = "cat " + \ + args.bclib + \ + " | awk 'BEGIN{OFS=\"\\t\"}NR%4==1{gsub(/^@/,\"\"); id=$1}NR%4==2{bc=$1}NR%4==3{print id,bc}' " + \ + " | sort --compress-program=gzip -k1,1 | join -1 1 -2 4 - " + tmpdir + "/alns.csv " + \ + " | awk 'BEGIN{OFS=\"\\t\"}$2!~/N/{print $1,$2,$3,$4,$5,$6,$7}' " + \ + " | datamash --sort -g 2,3,4,5,7 count 2 first 1 " + \ + " | awk 'BEGIN{OFS=\"\\t\"}{print $2,$3,$4,$7,$6,$5}' > " + args.outfile check_call(syscall3, shell=True) finally: logging.debug("removed tmpdir: " + tmpdir) diff -r 537657678b2b -r bbbae1ee87e0 tool_dependencies.xml --- a/tool_dependencies.xml Mon Feb 15 07:32:50 2016 -0500 +++ b/tool_dependencies.xml Tue Feb 16 10:08:58 2016 -0500 @@ -6,12 +6,12 @@ - - - - @@ -30,5 +30,8 @@ + + +