Mercurial > repos > peterjc > fastq_pair_names
changeset 2:63c17fd8fcc7 draft
v0.0.4 explicit galaxy_sequence_utils dependency
author | peterjc |
---|---|
date | Wed, 01 Feb 2017 06:31:10 -0500 |
parents | 0d04ffb66e40 |
children | bd97e4566eef |
files | tools/fastq_pair_names/README.rst tools/fastq_pair_names/fastq_pair_names.py tools/fastq_pair_names/fastq_pair_names.xml tools/fastq_pair_names/tool_dependencies.xml |
diffstat | 4 files changed, 40 insertions(+), 33 deletions(-) [+] |
line wrap: on
line diff
--- a/tools/fastq_pair_names/README.rst Thu May 21 10:54:37 2015 -0400 +++ b/tools/fastq_pair_names/README.rst Wed Feb 01 06:31:10 2017 -0500 @@ -1,7 +1,7 @@ Galaxy tool to extract FASTQ paired read names ============================================== -This tool is copyright 2014-2015 by Peter Cock, The James Hutton Institute +This tool is copyright 2014-2017 by Peter Cock, The James Hutton Institute (formerly SCRI, Scottish Crop Research Institute), UK. All rights reserved. See the licence text below (MIT licence). @@ -55,6 +55,8 @@ v0.0.2 - Tool definition now embeds citation information. v0.0.3 - Reorder XML elements (internal change only). - Planemo for Tool Shed upload (``.shed.yml``, internal change only). +v0.0.4 - Explicit dependency on ``galaxy_sequence_utils``. + - Minor internal changes to Python script for error reporting & style. ======= ====================================================================== @@ -68,12 +70,12 @@ Planemo commands (which requires you have set your Tool Shed access details in ``~/.planemo.yml`` and that you have access rights on the Tool Shed):: - $ planemo shed_update --shed_target testtoolshed --check_diff ~/repositories/pico_galaxy/tools/fastq_pair_names/ + $ planemo shed_update -t testtoolshed --check_diff ~/repositories/pico_galaxy/tools/fastq_pair_names/ ... or:: - $ planemo shed_update --shed_target toolshed --check_diff ~/repositories/pico_galaxy/tools/fastq_pair_names/ + $ planemo shed_update -t toolshed --check_diff ~/repositories/pico_galaxy/tools/fastq_pair_names/ ... To just build and check the tar ball, use::
--- a/tools/fastq_pair_names/fastq_pair_names.py Thu May 21 10:54:37 2015 -0400 +++ b/tools/fastq_pair_names/fastq_pair_names.py Wed Feb 01 06:31:10 2017 -0500 @@ -16,16 +16,12 @@ import os import sys import re -from galaxy_utils.sequence.fastq import fastqReader, fastqWriter +from galaxy_utils.sequence.fastq import fastqReader if "-v" in sys.argv or "--version" in sys.argv: print "Version 0.0.1" sys.exit(0) -def sys_exit(msg, err=1): - sys.stderr.write(msg.rstrip() + "\n") - sys.exit(err) - msg = """Expects at least 3 arguments: - Pair names tabular output filename @@ -34,21 +30,21 @@ """ if len(sys.argv) < 3: - sys_exit(msg) + sys.exit(msg) output_pairs = sys.argv[1] output_nonpairs = sys.argv[2] input_fastq_filenames = sys.argv[3:] -#Cope with three widely used suffix naming convensions, -#Illumina: /1 or /2 -#Forward/revered: .f or .r -#Sanger, e.g. .p1k and .q1k -#See http://staden.sourceforge.net/manual/pregap4_unix_50.html +# Cope with three widely used suffix naming convensions, +# Illumina: /1 or /2 +# Forward/revered: .f or .r +# Sanger, e.g. .p1k and .q1k +# See http://staden.sourceforge.net/manual/pregap4_unix_50.html re_f = re.compile(r"(/1|\.f|\.[sfp]\d\w*)$") re_r = re.compile(r"(/2|\.r|\.[rq]\d\w*)$") -#assert re_f.match("demo/1") +# assert re_f.match("demo/1") assert re_f.search("demo.f") assert re_f.search("demo.s1") assert re_f.search("demo.f1k") @@ -74,7 +70,7 @@ assert not re_illumina_r.match("@HWI-ST916:79:D04M5ACXX:1:1101:10000:100326 1:N:0:TGNCCA") count = 0 -pairs = set() # Will this scale OK? +pairs = set() # Will this scale OK? forward = 0 reverse = 0 neither = 0 @@ -84,25 +80,25 @@ for input_fastq in input_fastq_filenames: if not os.path.isfile(input_fastq): - sys_exit("Missing input FASTQ file %r" % input_fastq) + sys.exit("Missing input FASTQ file %r" % input_fastq) in_handle = open(input_fastq) - #Don't care about the FASTQ type really... + # Don't care about the FASTQ type really... for record in fastqReader(in_handle, "sanger"): count += 1 - name = record.identifier.split(None,1)[0] - assert name[0]=="@", record.identifier #Quirk of the Galaxy parser + name = record.identifier.split(None, 1)[0] + assert name[0] == "@", record.identifier # Quirk of the Galaxy parser name = name[1:] is_forward = False suffix = re_f.search(name) if suffix: - #============ - #Forward read - #============ + # ============ + # Forward read + # ============ template = name[:suffix.start()] is_forward = True elif re_illumina_f.match(record.identifier): - template = name #No suffix + template = name # No suffix is_forward = True if is_forward: forward += 1 @@ -113,13 +109,13 @@ is_reverse = False suffix = re_r.search(name) if suffix: - #============ - #Reverse read - #============ + # ============ + # Reverse read + # ============ template = name[:suffix.start()] is_reverse = True elif re_illumina_r.match(record.identifier): - template = name #No suffix + template = name # No suffix is_reverse = True if is_reverse: reverse += 1 @@ -127,12 +123,12 @@ pairs.add(template) out_pairs.write(template + "\n") else: - #=========================== - #Neither forward nor reverse - #=========================== + # =========================== + # Neither forward nor reverse + # =========================== out_nonpairs.write(name + "\n") neither += 1 - in_handle.close + in_handle.close() out_pairs.close() out_nonpairs.close()
--- a/tools/fastq_pair_names/fastq_pair_names.xml Thu May 21 10:54:37 2015 -0400 +++ b/tools/fastq_pair_names/fastq_pair_names.xml Wed Feb 01 06:31:10 2017 -0500 @@ -1,5 +1,8 @@ -<tool id="fastq_pair_names" name="Identify paired reads in FASTQ files" version="0.0.3"> +<tool id="fastq_pair_names" name="Identify paired reads in FASTQ files" version="0.0.4"> <description>using the read name suffices</description> + <requirements> + <requirement type="package" version="1.0.1">galaxy_sequence_utils</requirement> + </requirements> <stdio> <!-- Anything other than zero is an error --> <exit_code range="1:" />
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/fastq_pair_names/tool_dependencies.xml Wed Feb 01 06:31:10 2017 -0500 @@ -0,0 +1,6 @@ +<?xml version="1.0"?> +<tool_dependency> + <package name="galaxy_sequence_utils" version="1.0.1"> + <repository changeset_revision="c38bd3fe9da6" name="package_galaxy_sequence_utils_1_0_1" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu" /> + </package> +</tool_dependency>