changeset 58:bbbae1ee87e0 draft default tip

fix for flexbar with small data issue
author rnateam
date Tue, 16 Feb 2016 10:08:58 -0500
parents 537657678b2b
children
files coords2clnt.xml extract_aln_ends.xml flexbar_no_split.xml flexbar_split_RR_bcs.xml flexbar_split_RYYR_bcs.xml merge_pcr_duplicates.py tool_dependencies.xml
diffstat 7 files changed, 45 insertions(+), 28 deletions(-) [+]
line wrap: on
line diff
--- a/coords2clnt.xml	Mon Feb 15 07:32:50 2016 -0500
+++ b/coords2clnt.xml	Tue Feb 16 10:08:58 2016 -0500
@@ -1,7 +1,9 @@
 <tool id="coords2clnt.py" name="Get crosslinked nucleotides" version="0.1.0">
   <description>from full alignments</description>
   <requirements>
-      <requirement type="package" version="0.6.9">pybedtools</requirement>
+    <requirement type="package" version="2.24">bedtools</requirement>
+    <requirement type="package" version="0.7.4">pybedtools</requirement>
+    <requirement type="package" version="0.8.3">pysam</requirement>
   </requirements>
   <macros>
     <import>macros.xml</import>
--- a/extract_aln_ends.xml	Mon Feb 15 07:32:50 2016 -0500
+++ b/extract_aln_ends.xml	Tue Feb 16 10:08:58 2016 -0500
@@ -1,7 +1,8 @@
 <tool id="extract_aln_ends.py" name="Extract alignment ends." version="0.2.0">
   <description>from SAM or BAM.</description>
   <requirements>
-      <requirement type="package" version="0.6.9">pybedtools</requirement>
+      <requirement type="package" version="2.24">bedtools</requirement>
+      <requirement type="package" version="0.7.4">pybedtools</requirement>
       <requirement type="package" version="0.8.3">pysam</requirement>
   </requirements>
   <macros>
--- a/flexbar_no_split.xml	Mon Feb 15 07:32:50 2016 -0500
+++ b/flexbar_no_split.xml	Tue Feb 16 10:08:58 2016 -0500
@@ -16,7 +16,7 @@
 
     <version_command>flexbar --version</version_command>
 
-    <command>
+    <command><![CDATA[
 
         flexbar
 
@@ -102,12 +102,12 @@
             $cLogging.rndTags
         #end if
 
-        > $output;
+        && mv flexbar_1.fastq $output_1
+        && mv flexbar_2.fastq $output_2
 
-        mv flexbar_1.fastq $output_1;
-        mv flexbar_2.fastq $output_2;
+        && sleep 20
 
-    </command>
+    ]]></command>
 
 
     <inputs>
--- a/flexbar_split_RR_bcs.xml	Mon Feb 15 07:32:50 2016 -0500
+++ b/flexbar_split_RR_bcs.xml	Tue Feb 16 10:08:58 2016 -0500
@@ -14,7 +14,7 @@
 
     <version_command>flexbar --version</version_command>
 
-    <command>
+    <command><![CDATA[
 
         flexbar
 
@@ -49,14 +49,16 @@
         --max-uncalled $maxUncalled
         --min-read-length $minReadLen
 
-        > $output;
+        > $output
 
-        mv flexbar_barcode_repA_1.fastq $output_repA_1;
-        mv flexbar_barcode_repA_2.fastq $output_repA_2;
-        mv flexbar_barcode_repB_1.fastq $output_repB_1;
-        mv flexbar_barcode_repB_2.fastq $output_repB_2;
+        && mv flexbar_barcode_repA_1.fastq $output_repA_1
+        && mv flexbar_barcode_repA_2.fastq $output_repA_2
+        && mv flexbar_barcode_repB_1.fastq $output_repB_1
+        && mv flexbar_barcode_repB_2.fastq $output_repB_2
 
-    </command>
+        && sleep 20
+
+    ]]></command>
 
 
     <inputs>
--- a/flexbar_split_RYYR_bcs.xml	Mon Feb 15 07:32:50 2016 -0500
+++ b/flexbar_split_RYYR_bcs.xml	Tue Feb 16 10:08:58 2016 -0500
@@ -14,7 +14,7 @@
 
     <version_command>flexbar --version</version_command>
 
-    <command>
+    <command><![CDATA[
 
         flexbar
 
@@ -49,14 +49,16 @@
         --max-uncalled $maxUncalled
         --min-read-length $minReadLen
 
-        > $output;
+        > $output
 
-        mv flexbar_barcode_repA_1.fastq $output_repA_1;
-        mv flexbar_barcode_repA_2.fastq $output_repA_2;
-        mv flexbar_barcode_repB_1.fastq $output_repB_1;
-        mv flexbar_barcode_repB_2.fastq $output_repB_2;
+        && mv flexbar_barcode_repA_1.fastq $output_repA_1
+        && mv flexbar_barcode_repA_2.fastq $output_repA_2
+        && mv flexbar_barcode_repB_1.fastq $output_repB_1
+        && mv flexbar_barcode_repB_2.fastq $output_repB_2
 
-    </command>
+        && sleep 20
+
+    ]]></command>
 
 
     <inputs>
--- a/merge_pcr_duplicates.py	Mon Feb 15 07:32:50 2016 -0500
+++ b/merge_pcr_duplicates.py	Tue Feb 16 10:08:58 2016 -0500
@@ -108,8 +108,15 @@
     check_call(syscall2, shell=True)
 
     # join barcode library and alignments
-    syscall3 = "cat " + args.bclib + " | awk 'BEGIN{OFS=\"\\t\"}NR%4==1{gsub(/^@/,\"\"); id=$1}NR%4==2{bc=$1}NR%4==3{print id,bc}' | sort --compress-program=gzip -k1,1 | join -1 1 -2 4 - " + tmpdir + "/alns.csv " + " | awk 'BEGIN{OFS=\"\\t\"}$4!~/N/{print $3,$4,$5,$2,$6,$7}' | datamash --sort -g 1,2,3,4,6 count 4 | awk 'BEGIN{OFS=\"\\t\"}$4!~/N/{print $1,$2,$3,$4,$6,$5}' > " + args.outfile
-    # 'chrom', 'start', 'stop', 'bc', 'ndupes', 'strand'
+    # after join: id, bc, chr, start, stop, mapscore, strand
+    # after datamash: bc, chr, start, stop, strand, ndupes, idrepresentative
+    syscall3 = "cat " + \
+        args.bclib + \
+        " | awk 'BEGIN{OFS=\"\\t\"}NR%4==1{gsub(/^@/,\"\"); id=$1}NR%4==2{bc=$1}NR%4==3{print id,bc}' " + \
+        " | sort --compress-program=gzip -k1,1 | join -1 1 -2 4 - " + tmpdir + "/alns.csv " + \
+        " | awk 'BEGIN{OFS=\"\\t\"}$2!~/N/{print $1,$2,$3,$4,$5,$6,$7}' " + \
+        " | datamash --sort -g 2,3,4,5,7 count 2 first 1 " + \
+        " | awk 'BEGIN{OFS=\"\\t\"}{print $2,$3,$4,$7,$6,$5}' > " + args.outfile
     check_call(syscall3, shell=True)
 finally:
     logging.debug("removed tmpdir: " + tmpdir)
--- a/tool_dependencies.xml	Mon Feb 15 07:32:50 2016 -0500
+++ b/tool_dependencies.xml	Tue Feb 16 10:08:58 2016 -0500
@@ -6,12 +6,12 @@
     <!-- <package name="biopython" version="1.65">
         <repository name="package_biopython_1_65" owner="biopython"/>
     </package> -->
-    <!-- <package name="pybedtools" version="0.7.4">
-        <repository name="package_python_2_7_pybedtools_0_7_4" owner="iuc"/>
+    <package name="pybedtools" version="0.7.4">
+        <repository changeset_revision="c763d117e75a" name="package_python_2_7_pybedtools_0_7_4" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu" />
+    </package>
+    <!-- <package name="pybedtools" version="0.6.9">
+        <repository name="package_python_2_7_pybedtools_0_6_9" owner="iuc"/>
     </package> -->
-    <package name="pybedtools" version="0.6.9">
-        <repository changeset_revision="c4641c3a869f" name="package_python_2_7_pybedtools_0_6_9" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu" />
-    </package>
     <package name="pysam" version="0.8.3">
         <repository changeset_revision="7ac80143c68d" name="package_python_2_7_pysam_0_8_3" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu" />
     </package>
@@ -30,5 +30,8 @@
     <package name="datamash" version="1.0.6">
         <repository changeset_revision="df06a5e1ed6f" name="package_datamash_1_0_6" owner="agordon" toolshed="https://testtoolshed.g2.bx.psu.edu" />
     </package>
+    <package name="bedtools" version="2.24">
+        <repository changeset_revision="e6f4a7398a47" name="package_bedtools_2_24" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu" />
+    </package>
 
 </tool_dependency>