Mercurial > repos > peterjc > seq_filter_by_mapping
changeset 17:ab0e7967c9dc draft default tip
planemo upload for repository https://github.com/peterjc/pico_galaxy/tree/master/tools/seq_filter_by_mapping commit d67596914a7bbe183851437eaafe8c7305877e5a-dirty
author | peterjc |
---|---|
date | Fri, 22 Feb 2019 10:23:54 -0500 |
parents | 1c45e8e10434 |
children | |
files | tools/seq_filter_by_mapping/seq_filter_by_mapping.py tools/seq_filter_by_mapping/tool_dependencies.xml |
diffstat | 2 files changed, 88 insertions(+), 51 deletions(-) [+] |
line wrap: on
line diff
--- a/tools/seq_filter_by_mapping/seq_filter_by_mapping.py Fri Nov 09 11:07:51 2018 -0500 +++ b/tools/seq_filter_by_mapping/seq_filter_by_mapping.py Fri Feb 22 10:23:54 2019 -0500 @@ -40,26 +40,52 @@ Multiple SAM/BAM mapping files may be given. """ parser = OptionParser(usage=usage) -parser.add_option('-i', '--input', dest='input', - default=None, help='Input sequences filename', - metavar="FILE") -parser.add_option('-f', '--format', dest='format', - default=None, - help='Input sequence format (e.g. fasta, fastq, sff)') -parser.add_option('-p', '--positive', dest='output_positive', - default=None, - help='Output filename for mapping reads', - metavar="FILE") -parser.add_option('-n', '--negative', dest='output_negative', - default=None, - help='Output filename for non-mapping reads', - metavar="FILE") -parser.add_option("-m", "--pair-mode", dest="pair_mode", - default="lax", - help="How to treat paired reads (lax or strict, default lax)") -parser.add_option("-v", "--version", dest="version", - default=False, action="store_true", - help="Show version and quit") +parser.add_option( + "-i", + "--input", + dest="input", + default=None, + help="Input sequences filename", + metavar="FILE", +) +parser.add_option( + "-f", + "--format", + dest="format", + default=None, + help="Input sequence format (e.g. fasta, fastq, sff)", +) +parser.add_option( + "-p", + "--positive", + dest="output_positive", + default=None, + help="Output filename for mapping reads", + metavar="FILE", +) +parser.add_option( + "-n", + "--negative", + dest="output_negative", + default=None, + help="Output filename for non-mapping reads", + metavar="FILE", +) +parser.add_option( + "-m", + "--pair-mode", + dest="pair_mode", + default="lax", + help="How to treat paired reads (lax or strict, default lax)", +) +parser.add_option( + "-v", + "--version", + dest="version", + default=False, + action="store_true", + help="Show version and quit", +) options, args = parser.parse_args() @@ -114,7 +140,7 @@ if match: # Use the fact this is a suffix, and regular expression will be # anchored to the end of the name: - return name[:match.start()] + return name[: match.start()] else: # Nothing to do return name @@ -128,19 +154,19 @@ assert clean_name("baz.q2") == "baz" mapped_chars = { - '>': '__gt__', - '<': '__lt__', - "'": '__sq__', - '"': '__dq__', - '[': '__ob__', - ']': '__cb__', - '{': '__oc__', - '}': '__cc__', - '@': '__at__', - '\n': '__cn__', - '\r': '__cr__', - '\t': '__tc__', - '#': '__pd__', + ">": "__gt__", + "<": "__lt__", + "'": "__sq__", + '"': "__dq__", + "[": "__ob__", + "]": "__cb__", + "{": "__oc__", + "}": "__cc__", + "@": "__at__", + "\n": "__cn__", + "\r": "__cr__", + "\t": "__tc__", + "#": "__pd__", } @@ -155,11 +181,13 @@ if magic == b"\x1f\x8b\x08\x04": # Presumably a BAM file then... # Call samtools view, don't need header so no -h added: - child = subprocess.Popen(["samtools", "view", filename], - universal_newlines=True, - stdin=None, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) + child = subprocess.Popen( + ["samtools", "view", filename], + universal_newlines=True, + stdin=None, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) handle = child.stdout else: # Presumably a SAM file... @@ -180,7 +208,7 @@ ids.add(qname) elif pair_mode == "strict": # For paired reads, require BOTH be mapped. - if (flag & 0x4): + if flag & 0x4: # This is unmapped, ignore it pass elif not (flag & 0x1): @@ -194,8 +222,11 @@ stdout, stderr = child.communicate() assert child.returncode is not None if child.returncode: - msg = "Error %i from 'samtools view %s'\n%s" % (child.returncode, - filename, stderr) + msg = "Error %i from 'samtools view %s'\n%s" % ( + child.returncode, + filename, + stderr, + ) sys.exit(msg.strip(), child.returncode) else: handle.close() @@ -224,8 +255,7 @@ no_id_warned = False while True: if line[0] != ">": - raise ValueError( - "Records in Fasta files should start with '>' character") + raise ValueError("Records in Fasta files should start with '>' character") try: id = line[1:].split(None, 1)[0] except IndexError: @@ -290,6 +320,7 @@ def fastq_filter(in_file, pos_file, neg_file, wanted): """FASTQ filter.""" from Bio.SeqIO.QualityIO import FastqGeneralIterator + pos_count = neg_count = 0 handle = open(in_file, "r") if pos_file is not None and neg_file is not None: @@ -357,13 +388,17 @@ out_handle = open(pos_file, "wb") writer = SffWriter(out_handle, xml=manifest) in_handle.seek(0) # start again after getting manifest - pos_count = writer.write_file(rec for rec in SffIterator(in_handle) if clean_name(rec.id) in wanted) + pos_count = writer.write_file( + rec for rec in SffIterator(in_handle) if clean_name(rec.id) in wanted + ) out_handle.close() if neg_file is not None: out_handle = open(neg_file, "wb") writer = SffWriter(out_handle, xml=manifest) in_handle.seek(0) # start again - neg_count = writer.write_file(rec for rec in SffIterator(in_handle) if clean_name(rec.id) not in wanted) + neg_count = writer.write_file( + rec for rec in SffIterator(in_handle) if clean_name(rec.id) not in wanted + ) out_handle.close() # And we're done in_handle.close() @@ -379,7 +414,9 @@ else: sys.exit("Unsupported file type %r" % seq_format) -pos_count, neg_count = sequence_filter(in_file, out_positive_file, out_negative_file, ids) +pos_count, neg_count = sequence_filter( + in_file, out_positive_file, out_negative_file, ids +) print("%i mapped and %i unmapped reads." % (pos_count, neg_count)) fraction = float(pos_count) * 100.0 / float(pos_count + neg_count) print("In total %i reads, of which %0.1f%% mapped." % (pos_count + neg_count, fraction))
--- a/tools/seq_filter_by_mapping/tool_dependencies.xml Fri Nov 09 11:07:51 2018 -0500 +++ b/tools/seq_filter_by_mapping/tool_dependencies.xml Fri Feb 22 10:23:54 2019 -0500 @@ -1,9 +1,9 @@ -<?xml version="1.0"?> +<?xml version="1.0" ?> <tool_dependency> <package name="biopython" version="1.67"> - <repository changeset_revision="fc45a61abc2f" name="package_biopython_1_67" owner="biopython" toolshed="https://testtoolshed.g2.bx.psu.edu" /> + <repository changeset_revision="fc45a61abc2f" name="package_biopython_1_67" owner="biopython" toolshed="https://testtoolshed.g2.bx.psu.edu"/> </package> <package name="samtools" version="0.1.19"> - <repository changeset_revision="a0ab0fae27e5" name="package_samtools_0_1_19" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu" /> + <repository changeset_revision="a0ab0fae27e5" name="package_samtools_0_1_19" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu"/> </package> -</tool_dependency> +</tool_dependency> \ No newline at end of file