changeset 55:4bedd35bcdff draft

Move deps from macros to individual tools
author rnateam
date Fri, 12 Feb 2016 08:37:33 -0500
parents 7a05b21c5629
children b0d5cd8e4c6b
files convert_bc_to_binary_RY.xml coords2clnt.xml extract_aln_ends.xml extract_bcs.xml macros.xml merge_pcr_duplicates.py merge_pcr_duplicates.xml remove_tail.xml rm_spurious_events.xml tool_dependencies.xml
diffstat 10 files changed, 50 insertions(+), 75 deletions(-) [+]
line wrap: on
line diff
--- a/convert_bc_to_binary_RY.xml	Mon Feb 01 10:05:34 2016 -0500
+++ b/convert_bc_to_binary_RY.xml	Fri Feb 12 08:37:33 2016 -0500
@@ -1,9 +1,13 @@
 <tool id="convert_bc_to_binary_RY.py" name="Create binary barcodes" version="0.1.0">
   <description>from regular barcodes.</description>
+  <xml name="requirements">
+      <requirements>
+          <requirement type="package" version="1.66">biopython</requirement>
+      </requirements>
+  </xml>
   <macros>
     <import>macros.xml</import>
   </macros>
-  <expand macro="requirements" />
   <expand macro="stdio" />
   <version_command>python $__tool_directory__/convert_bc_to_binary_RY.py --version</version_command>
   <command interpreter="python"><![CDATA[
--- a/coords2clnt.xml	Mon Feb 01 10:05:34 2016 -0500
+++ b/coords2clnt.xml	Fri Feb 12 08:37:33 2016 -0500
@@ -1,9 +1,13 @@
 <tool id="coords2clnt.py" name="Get crosslinked nucleotides" version="0.1.0">
   <description>from full alignments</description>
+  <xml name="requirements">
+      <requirements>
+          <requirement type="package" version="0.6.9">pybedtools</requirement>
+      </requirements>
+  </xml>
   <macros>
     <import>macros.xml</import>
   </macros>
-  <expand macro="requirements" />
   <expand macro="stdio" />
   <stdio>
     <exit_code level="fatal" range="1:"/>
--- a/extract_aln_ends.xml	Mon Feb 01 10:05:34 2016 -0500
+++ b/extract_aln_ends.xml	Fri Feb 12 08:37:33 2016 -0500
@@ -1,9 +1,14 @@
 <tool id="extract_aln_ends.py" name="Extract alignment ends." version="0.2.0">
   <description>from SAM or BAM.</description>
+  <xml name="requirements">
+      <requirements>
+          <requirement type="package" version="0.6.9">pybedtools</requirement>
+          <requirement type="package" version="0.8.3">pysam</requirement>
+      </requirements>
+  </xml>
   <macros>
     <import>macros.xml</import>
   </macros>
-  <expand macro="requirements" />
   <expand macro="stdio" />
   <version_command>python $__tool_directory__/extract_aln_ends.py --version</version_command>
   <command interpreter="python"><![CDATA[
--- a/extract_bcs.xml	Mon Feb 01 10:05:34 2016 -0500
+++ b/extract_bcs.xml	Fri Feb 12 08:37:33 2016 -0500
@@ -1,9 +1,13 @@
 <tool id="extract_bcs.py" name="Extract barcodes" version="1.0.0">
   <description>using pattern.</description>
+  <xml name="requirements">
+      <requirements>
+          <requirement type="package" version="1.66">biopython</requirement>
+      </requirements>
+  </xml>
   <macros>
     <import>macros.xml</import>
   </macros>
-  <expand macro="requirements" />
   <expand macro="stdio" />
   <version_command>python $__tool_directory__/extract_bcs.py --version</version_command>
   <command interpreter="python"><![CDATA[
--- a/macros.xml	Mon Feb 01 10:05:34 2016 -0500
+++ b/macros.xml	Fri Feb 12 08:37:33 2016 -0500
@@ -1,18 +1,4 @@
 <macros>
-    <xml name="requirements">
-        <requirements>
-            <requirement type="package" version="1.66">biopython</requirement>
-            <!-- <requirement type="package" version="1.65">biopython</requirement> -->
-            <requirement type="package" version="0.16">pandas</requirement>
-            <!-- <requirement type="package" version="0.7.4">pybedtools</requirement> -->
-            <requirement type="package" version="0.6.9">pybedtools</requirement>
-            <!-- <requirement type="package" version="2.24">bedtools</requirement> -->
-            <requirement type="package" version="0.8.3">pysam</requirement>
-            <requirement type="package" version="4.1.0">gnu_awk</requirement>
-            <requirement type="package" version="8.22">gnu_coreutils</requirement>
-            <requirement type="package" version="5.18.1">perl</requirement>
-        </requirements>
-    </xml>
     <xml name="stdio">
         <stdio>
           <exit_code level="fatal" range="1:"/>
--- a/merge_pcr_duplicates.py	Mon Feb 01 10:05:34 2016 -0500
+++ b/merge_pcr_duplicates.py	Fri Feb 12 08:37:33 2016 -0500
@@ -3,7 +3,6 @@
 import argparse
 import logging
 from sys import stdout
-import pandas as pd
 from subprocess import check_call
 from shutil import rmtree
 from tempfile import mkdtemp
@@ -16,8 +15,7 @@
 tool_description = """
 Merge PCR duplicates according to random barcode library.
 
-Barcodes containing uncalled base 'N' are removed. By default output is written
-to stdout.
+Barcodes containing uncalled base 'N' are removed.
 
 Input:
 * bed6 file containing alignments with fastq read-id in name field
@@ -30,7 +28,7 @@
 Example usage:
 - read PCR duplicates from file duplicates.bed and write merged results to file
   merged.bed:
-merge_pcr_duplicates.py duplicates.bed bclibrary.fa --out merged.bed
+merge_pcr_duplicates.py duplicates.bed bclibrary.fa --outfile merged.bed
 """
 
 epilog = """
@@ -55,6 +53,7 @@
 # optional arguments
 parser.add_argument(
     "-o", "--outfile",
+    required=True,
     help="Write results to this file.")
 # misc arguments
 parser.add_argument(
@@ -104,57 +103,14 @@
     tmpdir = mkdtemp()
     logging.debug("tmpdir: " + tmpdir)
 
-    # prepare barcode library
-    syscall1 = "cat " + args.bclib + " | awk 'BEGIN{OFS=\"\\t\"}NR%4==1{gsub(/^@/,\"\"); id=$1}NR%4==2{bc=$1}NR%4==3{print id,bc}' | sort -k1,1 > " + tmpdir + "/bclib.csv"
-    check_call(syscall1, shell=True)
-
     # prepare alinments
-    syscall2 = "cat " + args.alignments + " | awk -F \"\\t\" 'BEGIN{OFS=\"\\t\"}{split($4, a, \" \"); $4 = a[1]; print}'| sort -k4,4 > " + tmpdir + "/alns.csv"
+    syscall2 = "cat " + args.alignments + " | awk -F \"\\t\" 'BEGIN{OFS=\"\\t\"}{split($4, a, \" \"); $4 = a[1]; print}'| sort --compress-program=gzip -k4,4 > " + tmpdir + "/alns.csv"
     check_call(syscall2, shell=True)
 
     # join barcode library and alignments
-    syscall3 = "join -1 1 -2 4 " + tmpdir + "/bclib.csv " + tmpdir + "/alns.csv " + " | awk 'BEGIN{OFS=\"\\t\"}{print $3,$4,$5,$2,$6,$7}' > " + tmpdir + "/bcalib.csv"
+    syscall3 = "cat " + args.bclib + " | awk 'BEGIN{OFS=\"\\t\"}NR%4==1{gsub(/^@/,\"\"); id=$1}NR%4==2{bc=$1}NR%4==3{print id,bc}' | sort --compress-program=gzip -k1,1 | join -1 1 -2 4 - " + tmpdir + "/alns.csv " + " | awk 'BEGIN{OFS=\"\\t\"}$4!~/N/{print $3,$4,$5,$2,$6,$7}' | datamash --sort -g 1,2,3,4,6 count 4 | awk 'BEGIN{OFS=\"\\t\"}$4!~/N/{print $1,$2,$3,$4,$6,$5}' > " + args.outfile
+    # 'chrom', 'start', 'stop', 'bc', 'ndupes', 'strand'
     check_call(syscall3, shell=True)
-
-    # get alignments combined with barcodes
-    bcalib = pd.read_csv(
-        tmpdir + "/bcalib.csv",
-        sep="\t",
-        names=["chrom", "start", "stop", "bc", "score", "strand"])
 finally:
     logging.debug("removed tmpdir: " + tmpdir)
     rmtree(tmpdir)
-
-# fail if alignments given but combined library is empty
-if bcalib.empty:
-    raise Exception("ERROR: no common entries for alignments and barcode library found. Please check your input files.")
-
-# warn if not all alignments could be assigned a barcode
-n_bcalib = len(bcalib.index)
-if n_bcalib < n_alns:
-    logging.warning(
-        "{} of {} alignments could not be associated with a random barcode.".format(n_alns - n_bcalib, n_alns))
-
-# remove entries with barcodes that has uncalled base N
-bcalib_cleaned = bcalib.drop(bcalib[bcalib.bc.str.contains("N")].index)
-n_bcalib_cleaned = len(bcalib_cleaned)
-# if n_bcalib_cleaned < n_bcalib:
-#     msg = "{} of {} alignments had random barcodes containing uncalled bases and were dropped.".format(
-#         n_bcalib - n_bcalib_cleaned, n_bcalib)
-#     if n_bcalib_cleaned < (0.8 * n_bcalib):
-#         logging.warning(msg)
-#     else:
-#         logging.info(msg)
-
-# count and merge pcr duplicates
-# grouping sorts by keys, so the ouput will be properly sorted
-merged = bcalib_cleaned.groupby(['chrom', 'start', 'stop', 'strand', 'bc']).size().reset_index()
-merged.rename(columns={0: 'ndupes'}, copy=False, inplace=True)
-
-# write coordinates of crosslinking event alignments
-eventalnout = (open(args.outfile, "w") if args.outfile is not None else stdout)
-merged.to_csv(
-    eventalnout,
-    columns=['chrom', 'start', 'stop', 'bc', 'ndupes', 'strand'],
-    sep="\t", index=False, header=False)
-eventalnout.close()
--- a/merge_pcr_duplicates.xml	Mon Feb 01 10:05:34 2016 -0500
+++ b/merge_pcr_duplicates.xml	Fri Feb 12 08:37:33 2016 -0500
@@ -1,9 +1,15 @@
 <tool id="merge_pcr_duplicates.py" name="Merge PCR duplicates" version="0.2.0">
   <description>according to random barcode library.</description>
+  <xml name="requirements">
+      <requirements>
+          <requirement type="package" version="4.1.0">gnu_awk</requirement>
+          <requirement type="package" version="8.22">gnu_coreutils</requirement>
+          <requirement type="package" version="1.0.6">datamash</requirement>
+      </requirements>
+  </xml>
   <macros>
     <import>macros.xml</import>
   </macros>
-  <expand macro="requirements" />
   <expand macro="stdio" />
   <version_command>python $__tool_directory__/merge_pcr_duplicates.py --version</version_command>
   <command interpreter="python"><![CDATA[merge_pcr_duplicates.py
@@ -15,7 +21,7 @@
 $positional_2
 #end if
 
-> $default]]></command>
+--outfile $default]]></command>
   <inputs>
     <param area="false" label="bed6 file containing alignments." name="positional_1" type="data" format="bed"/>
     <param area="false" label="fastaq barcode library." name="positional_2" type="data" format="fastq"/>
--- a/remove_tail.xml	Mon Feb 01 10:05:34 2016 -0500
+++ b/remove_tail.xml	Fri Feb 12 08:37:33 2016 -0500
@@ -1,9 +1,13 @@
 <tool id="remove_tail.py" name="Remove 3'-end nts" version="0.1.0">
   <description>from FASTQ</description>
+  <xml name="requirements">
+      <requirements>
+          <requirement type="package" version="1.66">biopython</requirement>
+      </requirements>
+  </xml>
   <macros>
     <import>macros.xml</import>
   </macros>
-  <expand macro="requirements" />
   <expand macro="stdio" />
   <version_command>python $__tool_directory__/remove_tail.py --version</version_command>
   <command interpreter="python"><![CDATA[remove_tail.py
--- a/rm_spurious_events.xml	Mon Feb 01 10:05:34 2016 -0500
+++ b/rm_spurious_events.xml	Fri Feb 12 08:37:33 2016 -0500
@@ -1,9 +1,14 @@
 <tool id="rm_spurious_events.py" name="Remove spurious" version="0.1.0">
   <description>crosslinking events</description>
+  <xml name="requirements">
+      <requirements>
+          <requirement type="package" version="8.22">gnu_coreutils</requirement>
+          <requirement type="package" version="5.18.1">perl</requirement>
+      </requirements>
+  </xml>
   <macros>
     <import>macros.xml</import>
   </macros>
-  <expand macro="requirements" />
   <expand macro="stdio" />
   <version_command>python $__tool_directory__/rm_spurious_events.py --version</version_command>
   <command interpreter="python"><![CDATA[rm_spurious_events.py
--- a/tool_dependencies.xml	Mon Feb 01 10:05:34 2016 -0500
+++ b/tool_dependencies.xml	Fri Feb 12 08:37:33 2016 -0500
@@ -6,9 +6,6 @@
     <!-- <package name="biopython" version="1.65">
         <repository name="package_biopython_1_65" owner="biopython"/>
     </package> -->
-    <package name="pandas" version="0.16">
-        <repository changeset_revision="67abd81bd3ac" name="package_python_2_7_pandas_0_16" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu" />
-    </package>
     <!-- <package name="pybedtools" version="0.7.4">
         <repository name="package_python_2_7_pybedtools_0_7_4" owner="iuc"/>
     </package> -->
@@ -30,4 +27,8 @@
     <package name="perl" version="5.18.1">
         <repository changeset_revision="6f144bd786a8" name="package_perl_5_18" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu" />
     </package>
+    <package name="datamash" version="1.0.6">
+        <repository changeset_revision="df06a5e1ed6f" name="package_datamash_1_0_6" owner="agordon" toolshed="https://testtoolshed.g2.bx.psu.edu" />
+    </package>
+
 </tool_dependency>