changeset 5:e841de88235c draft

Uploaded
author rnateam
date Fri, 23 Oct 2015 07:28:06 -0400
parents d03c001f7c73
children 1bfc5a5de843
files extract_bcs.py extract_bcs.xml
diffstat 2 files changed, 21 insertions(+), 8 deletions(-) [+]
line wrap: on
line diff
--- a/extract_bcs.py	Thu Oct 22 10:42:54 2015 -0400
+++ b/extract_bcs.py	Fri Oct 23 07:28:06 2015 -0400
@@ -6,9 +6,9 @@
 By default output is written to stdout.
 
 Example usage:
-- move nucleotides at positions 1-3 and 6-7 to FASTQ header and write to file
-  output.fastq:
-fastq_extract_barcodes.py barcoded_input.fastq XXXNNXX --out output.fastq
+- remove barcode nucleotides at positions 1-3 and 6-7 from FASTQ; write modified
+  FASTQ entries to output.fastq and barcode nucleotides to barcodes.fa:
+fastq_extract_barcodes.py barcoded_input.fastq XXXNNXX --out output.fastq --bcs barcodes.fa
 """
 
 epilog = """
@@ -50,6 +50,12 @@
     dest="out_bc_fasta",
     help="If set, barcodes are written to this file in FASTA format.")
 parser.add_argument(
+    "-a", "--add-bc-to-fastq",
+    dest="add_to_head",
+    help="If set, append extracted barcodes to the FASTQ headers.",
+    action="store_true"
+)
+parser.add_argument(
     "-v", "--verbose",
     help="Be verbose.",
     action="store_true")
@@ -60,7 +66,7 @@
 parser.add_argument(
     '--version',
     action='version',
-    version='0.1.0')
+    version='1.0.0')
 
 args = parser.parse_args()
 if args.debug:
@@ -153,7 +159,10 @@
         continue
 
     # write barcode nucleotides into header
-    annotated_header = header + " " + barcode
+    if args.add_to_head:
+        annotated_header = " ".join([header, barcode])
+    else:
+        annotated_header = header
     samout.write("@%s\n%s\n+\n%s\n" % (annotated_header, new_seq, new_qual))
 
     # write barcode to fasta if requested
--- a/extract_bcs.xml	Thu Oct 22 10:42:54 2015 -0400
+++ b/extract_bcs.xml	Fri Oct 23 07:28:06 2015 -0400
@@ -1,4 +1,4 @@
-<tool id="extract_bcs.py" name="extract_bcs.py" version="0.1.0">
+<tool id="extract_bcs.py" name="extract_bcs.py" version="1.0.0">
   <description>Extract barcodes using pattern.</description>
   <macros>
     <import>macros.xml</import>
@@ -16,19 +16,23 @@
 $positional_2
 #end if
 
+--bcs $extractedbcs
+
 > $default]]></command>
   <inputs>
     <param area="false" label="Barcoded sequences." name="positional_1" type="data" format="fastq"/>
     <param area="false" label="Pattern of barcode nucleotides starting at 5'-end. X positions will be moved to the header, N positions will be kept." name="positional_2" type="text"/>
   </inputs>
   <outputs>
-    <data hidden="false" name="default" format="fastq" />
+    <data hidden="false" name="default" format="fastq"/>
+    <data name="extractedbcs" format="fasta"/>
   </outputs>
   <tests>
     <test>
       <param name="positional_1" value="reads.fastq"/>
       <param name="positional_2" value="XXXNNXXX"/>
-      <output name="default" file="result.fastq"/>
+      <output name="default" file="result_original_head.fastq"/>
+      <output name="extractedbcs" file="result.fa"/>
     </test>
   </tests>
   <help><![CDATA[