Mercurial > repos > veg > bioext
changeset 0:9ba5e79006b8 draft default tip
Uploaded
author | veg |
---|---|
date | Wed, 16 Nov 2016 13:19:23 -0500 |
parents | |
children | |
files | bioext/.shed.yml bioext/bioext_bealign.xml bioext/test-data/bealign-in-ref-1.fa bioext/test-data/bealign-in1.fa bioext/test-data/bealign-out1.bam |
diffstat | 5 files changed, 280 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bioext/.shed.yml Wed Nov 16 13:19:23 2016 -0500 @@ -0,0 +1,10 @@ +name: bioext +owner: veg +description: A few handy bioinformatics tools not already within BioPython +long_description: | + Long description goes here. +remote_repository_url: https://github.com/davebx/bioext-gx/ +homepage_url: https://pypi.python.org/pypi/biopython-extensions/ +type: unrestricted +categories: + - Next Gen Mappers
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bioext/bioext_bealign.xml Wed Nov 16 13:19:23 2016 -0500 @@ -0,0 +1,118 @@ +<?xml version="1.0"?> +<tool id="bioext_bealign" version="0.18" name="bealign"> + <description>align sequences to a reference using a codon alignment algorithm</description> + <requirements> + <requirement type="package" version="1.11.2">numpy</requirement> + <requirement type="package" version="1.68">biopython</requirement> + <requirement type="package" version="0.18.5">biopython-extensions</requirement> + <requirement type="package" version="3.5">python</requirement> + </requirements> + <stdio> + <exit_code range="1:"/> + </stdio> + <version_command>bealign --version</version_command> + <command><![CDATA[ + bealign + #if str($reference.input_alignment) == "history": + --reference "$reference.history_reference" + #else: + --reference "$reference.internal_reference" + #end if + "$input_fasta" "$output_alignments" + ]]></command> + <inputs> + <param name="input_fasta" label="Input FASTA" type="data" format="fasta"/> + <conditional name="reference"> + <param label="Input alignment" name="input_alignment" type="select"> + <option value="history">Use a FASTA file from the history</option> + <option value="builtin">Use a built-in reference</option> + </param> + <when value="history"> + <param format="fasta" label="Select the reference genome" name="history_reference" type="data"/> + </when> + <when value="builtin"> + <param name="internal_reference" type="select"> + <option value="HXB2_tat">HXB2_tat</option> + <option value="HXB2_nef">HXB2_nef</option> + <option value="HXB2_vpu">HXB2_vpu</option> + <option value="HXB2_vif">HXB2_vif</option> + <option value="HXB2_env">HXB2_env</option> + <option value="HXB2_vpr">HXB2_vpr</option> + <option value="HXB2_rev">HXB2_rev</option> + <option value="HXB2_pol">HXB2_pol</option> + <option value="HXB2_prrt">HXB2_prrt</option> + <option value="NL4-3_prrt">NL4-3_prrt</option> + <option value="HXB2_rt">HXB2_rt</option> + <option value="HXB2_gag">HXB2_gag</option> + <option value="HXB2_pr">HXB2_pr</option> + <option value="HXB2_int">HXB2_int</option> + </param> + </when> + </conditional> + <conditional name="options"> + <param label="Additional options" name="advanced" type="select"> + <option value="defaults">Use defaults</option> + <option value="advanced">Specify additional parameters</option> + </param> + <when value="defaults"/> + <when value="advanced"> + <param label="Alphabet" name="alphabet" type="select"> + <option value="codon">codon</option> + <option value="dna">dna</option> + <option value="amino">amino</option> + </param> + <param label="Score matrix" name="score" type="select"> + <option value="BLOSUM62">BLOSUM62</option> + <option value="PAM250">PAM250</option> + <option value="DNA80">DNA80</option> + <option value="HIV_BETWEEN_F">HIV_BETWEEN_F</option> + <option value="DNA70">DNA70</option> + <option value="DNA88">DNA88</option> + <option value="PAM200">PAM200</option> + <option value="DNA95">DNA95</option> + <option value="DNA65">DNA65</option> + </param> + <param name="reverse_complement" type="boolean" truevalue="--reverse-complement" falsevalue="" label="Also align the reverse complement of each query sequence"/> + <param name="keep_reference" type="boolean" truevalue="--keep-reference" falsevalue="" label="Include the reference sequence as the first one in the resulting BAM file"/> + </when> + </conditional> + </inputs> + <outputs> + <data format="bam" name="output_alignments"/> + </outputs> + <tests> + <test> + <param name="history_reference" value="bealign-in-ref-1.fa"/> + <param name="input_fasta" value="bealign-in1.fa"/> + <output file="bealign-out1.bam" ftype="bam" name="output_alignments"/> + </test> + </tests> + <help><![CDATA[ + -r REFERENCE, --reference REFERENCE + REFERENCE FASTA file or {HXB2_tat, HXB2_nef, HXB2_vpu, + HXB2_vif, HXB2_env, HXB2_vpr, HXB2_rev, HXB2_pol, + HXB2_prrt, NL4-3_prrt, HXB2_rt, HXB2_gag, HXB2_pr, + HXB2_int} + -e EXPECTED_IDENTITY, --expected-identity EXPECTED_IDENTITY + discard sequences that are insufficiently identical to + the reference + -a ALPHABET, --alphabet ALPHABET + perform an alignment using one of {amino, dna, codon} + [default=codon] + -m SCOREMATRIX, --score-matrix SCOREMATRIX + parameterize using one of {PAM250, DNA80, + HIV_BETWEEN_F, BLOSUM62, DNA70, DNA88, PAM200, DNA95, + DNA65} [default=BLOSUM62] + -D DISCARD, --discard DISCARD + discarded sequences are sent to DISCARD + -R, --reverse-complement + also align the reverse complement of each query + sequence, returning it if the alignment is superior + -S, --no-sort do NOT sort the resulting BAM file [the default is to + sort] + -q, --quiet do not print status update messages + -v, --version print version information and exit + -K, --keep-reference include the reference sequence as the first one in the + resulting BAM file [the default is to strip it] +]]></help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bioext/test-data/bealign-in-ref-1.fa Wed Nov 16 13:19:23 2016 -0500 @@ -0,0 +1,22 @@ +>CUSTOM_JP_REFERENCE +cctcaggtcactctttggcaacgacccctcgtcacaataaagataggggggcaactaaaggaagctctattagatacagg +agcagatgatacagtattagaagaaatgagtttgccaggaagatggaaaccaaaaatgatagggggaattggaggtttta +tcaaagtaagacagtatgatcagatactcatagaaatctgtggacataaagctataggtacagtattagtaggacctaca +cctgtcaacataattggaagaaatctgttgactcagattggttgcactttaaattttcccattagccctattgagactgt +accagtaaaattaaagccaggaatggatggcccaaaagttaaacaatggccattgacagaagaaaaaataaaagcattag +tagaaatttgtacagagatggaaaaggaagggaaaatttcaaaaattgggcctgaaaatccatacaatactccagtattt +gccataaagaaaaaagacagtactaaatggagaaaattagtagatttcagagaacttaataagagaactcaagacttctg +ggaagttcaattaggaataccacatcccgcagggttaaaaaagaaaaaatcagtaacagtactggatgtgggtgatgcat +atttttcagttcccttagatgaagacttcaggaagtatactgcatttaccatacctagtataaacaatgagacaccaggg +attagatatcagtacaatgtgcttccacagggatggaaaggatcaccagcaatattccaaagtagcatgacaaaaatctt +agagccttttagaaaacaaaatccagacatagttatctatcaatacatggatgatttgtatgtaggatctgacttagaaa +tagggcagcatagaacaaaaatagaggagctgagacaacatctgttgaggtggggacttaccacaccagacaaaaaacat +cagaaagaacctccattcctttggatgggttatgaactccatcctgataaatggacagtacagcctatagtgctgccaga +aaaagacagctggactgtcaatgacatacagaagttagtggggaaattgaattgggcaagtcagatttacccagggatta +aagtaaggcaattatgtaaactccttagaggaaccaaagcactaacagaagtaataccactaacagaagaagcagagcta +gaactggcagaaaacagagagattctaaaagaaccagtacatggagtgtattatgacccatcaaaagacttaatagcaga +aatacagaagcaggggcaaggccaatggacatatcaaatttatcaagagccatttaaaaatctgaaaacaggaaaatatg +caagaatgaggggtgcccacactaatgatgtaaaacaattaacagaggcagtgcaaaaaataaccacagaaagcatagta +atatggggaaagactcctaaatttaaactgcccatacaaaaggaaacatgggaaacatggtggacagagtattggcaagc +cacctggattcctgagtgggagtttgttaatacccctcccttagtgaaattatggtaccagttagagaaagaacccatag +taggagcagaaaccttc
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bioext/test-data/bealign-in1.fa Wed Nov 16 13:19:23 2016 -0500 @@ -0,0 +1,130 @@ +>Z|JP|K03455|2036 +aaatatgttgactcagattggttgtactttaaattttccaattagtcctattgaaactgtaccagtaaaattgaagccag +gaatggatggcccaagggttaaacaatggccattgacagaagaaaaaataaaggcgttaacagaaatttgtacagaaatg +gaaagggaaggaaaaatttcaaaaattgggcctgaaaatccatacaatactccaatatttgccataaagaaaaaagatag +tactaaatggagaaaattagtagatttcagagaactcaataaaagaactcaagacttctgggaggtccaactaggaatac +ctcatcccgcgggattaaaaaagaaaaaatcagtaacagtactagatgtgggggatgcatatttttcagttcccttagat +aaagactttagaaagcatactgcattcactatacctagtgtaaataatgaaacaccggggattagatatcagtacaatgt +gcttccacagggatggaaaggatcaccagcaatatttcaggctagcatgacaaaaatcttagaaccctttagaacaaaca +atccagagatggtgatttaccaatatatggatgatttatatgtaggatctgacttagagataggacagcatagagaaaaa +atacaggagttgagagaacatctactgaaatggggatttaccacaccagataaaaaa +>Z|JP|K03455|2036|DUPLICATE 2 +cctcaaatcactctttggcaacgacccttagttacagcaaaaataggggaacagctaatagaagccctattagacacagg +agcagatgatacagtattagaagaaataaatttaccagggaaatggaaaccaaaaatgatagggggaattggaggtttta +tcaaagtaagacagtatgatcagatacctatagaaatctgtggacataaaactataggtacggtattaataggacctaca +cctgtcaacataattggaagaaatctgttgactcagcttggctgcactttaaattttcccattagccctattgaaaccgt +accagtaaaattaaagccaggaatggatggcccaaaggttaaacaatggccattgacagaagaaaaaataaaagcattaa +cagaaatttgtacagaaatggaaagggaaggaaaaatttcaaaaattgggcctgagaatccatacaatactccaatattt +gccataaagaaaaaagatggtactaaatggagaaaattagtagatttcagagaacttaataaaagaactcaagacttctg +ggaggtccaactaggaatacctcatcccgcaggattaaaaaagaaaaaatcagtaacagtactagatgtgggggatgcat +atttttcagttcccttagataaagactttagaaagtatactgcattcactatacctagtataaataatgaaacaccgggg +attagatatcagtacaatgtgcttccacagggatggaaaggatcaccagcaatatttcaggctagcatgacaaaaatctt +agagccctttagagcaaaaaatccagagatggtgatttaccaatacatggatgatttatatgtaggatctgacttagaga +tagggcagcatagaacaaaaatacaagagttgagagaacatctactgaagtggggattaaccacaccagataaaaaacat +cagaaagaacctccatttctttggatgggatatgaactccatcctgacaaatggacagtccagcctataaagctgccaga +aaaagacagctggactgtcaatgatatacagaaattagtgggaaagctaaattgggcaagtcagatttatgcaggaatta +aagtaaggcarctgtgtaaactcctcaggggagccaaagcactaacagagatagtaacactgactgaggaagcagaatta +gaattagcagaaaacagagaaattctaaaagaacctgttcatggrgtatattatgatccattaaaagacctagtagcaga +aatacagaaacaagggcaagaccaatggacatatcaaatttatcaagagccatttaaaaatttaaaaacaggaaaatatg +caaaaaagaagtctgcccacactaatgatgtaaaacaattaacagaggtagtgcaaaaggtgtctacagaaagcatagta +atatggggaaagacccctaaatttagactacccatacaaaaagaaacatgggaagca +>Z|JP|K03455|2036|DUPLICATE 3 +cctcaaatcactctttggcaacgacccttagttacagcaaaaataggggaacagctaatagaagccctattagacacagg +agcagatgatacagtattagaagaaataaatttaccagggaaatggaaaccaaaaatgatagggggaattggaggtttta +tcaaagtaagacagtatgatcagatacctatagaaatctgtggacataaaactataggtacggtattagtaggacctaca +cctgtcaacataattggaagaaatctgttgactcagcttggctgcactttaaattttcccattagccctattgaaaccgt +accagtaaaattaaagccaggaatggatggcccgaaagttaaacaatggccattgacagaagaaaaaataaaagcattag +tagaaatttgtacagaaatggaaaaggaagggaaaatttcaaaaattgggcctgaaaatccatacaatactccagtattt +gccataaagaaaaaggacagtactaaatggagaaaattagtagatttcagagaacttaataagagaactcaagacttctg +ggaagttcaattagggataccacatcccgcagggttaaaaaagaaaaaatcagtaacagtactggatgtgggtgatgcat +atttttcagttcccttagataaagacttccggaagtatactgcattcactatacctagtacaaacaatgagacaccaggg +attagatatcaatacaatgtgctaccacagggctggaaaggatcaccagcaatatttcaggctagcatgacaaaaatctt +agagccctttagagcaaaaaatccagagatggtgatttaccaatacatggatgatttatatgtaggatctgacttagaga +tagggcagcatagaacaaaaatacaagagttgagagaacatctactgaagtggggattaaccacaccagataaaaaacat +cagaaagaacctccatttctttggatgggatatgaactccatcctgacaaatggacagtccagcctataaagctgccaga +aaaagacagctggactgtcaatgatatacagaaattagtgggaaagctaaattgggcaagtcagatttatgcaggaatta +aagtaaggcaactgtgtaaactcctcaggggagccaaagcactaacagagatagtaacactgactgaggaagcagaatta +gaattagcagaaaacagagaaattctaaaagaacctgttcatggagtatattatgatccatcaaaagacctagtagcaga +aatacagaaacaagggcaagaccaatggacatatcaaatttatcaagagccatttaaaaatttaaaaacaggaaaatatg +caaaaaagaagtctgcccacactaatgatgtaaaacaattaacagaggtagtgcaaaaggtgtctacagaaagcatagta +atatggggaaagacccctaaatttagactacccatacaaaaagaaacatgggaagca +>Z|JP|K03455|2036|DUPLICATE 4 +cctcaaatcactctttggcaacgacccttagttacagcaaaaataggggaacagctaatagaagccctattagacacagg +agcagatgatacagtattagaagaaataaatttaycagggaaatggaaaccaaaaatgatagggggaattggaggtttta +tcaaagtaagacagtatgatcagatacctatagaaatctgtggacataaaactataggtacgatattaataggacctaca +cctgtcaacataattggaagaaatctgttgactcagcttggctgcactttaaattttcccattagccctattgaaaccgt +accagtaaaattaaagccaggaatggatggcccgaaagttaaacaatggccattgacagaagaaaaaataaaagcattag +tagaaatttgtacagaaatggaaaaggaagggaaaatttcaaaaattgggcctgaaaatccatacaatactccagtattt +gccataaagaaaaaggacagtactaaatggagaaaattagtagatttcagagaacttaataagagaactcaagacttctg +ggaagttcaattagggataccacatcccgcagggttaaaaaagaaaaaatcagtaacagtactagatgtgggggatgcat +atttttcagttcctttagataaagactttagaaagtatactgcattcactatacctagtataaataatgaaacaccgggg +attagatatcagtacaatgtgcttccacagggatggaaaggatcaccagcaatatttcaggctagcatgacaaaaatctt +agagccctttagagcaaaaaatccagagatggtgatttaccaatacatggatgatttatatgtaggatctgacttagaga +tagggcagcatagaacaaaaatacaagagttgagagaacatctactgaagtggggattaaccacaccagataaaaaacat +cagaaagaacctccatttctttggatgggatatgaactccatcctgacaaatggacagtccagcctataaagctgccaga +aaaagacagctggactgtcaatgatatacagaaattagtgggaaagctaaattgggcaagtcagatttatgcaggaatta +aagtaaggcaactgtgtaaactcctcaggggagccaaagcactaacagagatagtaacactgactgaggaagcagaatta +gaattagcagaaaacagagaaattctaaaagaacctgttcatggagtatattatgatccatcaaaagacctagtagcaga +aatacagaaacaagggcaagaccaatggacatatcaaatttatcaagagccatttaaaaatttaaaaacaggaaaatatg +caaaaaagaagtctgcccacactaatgatgtaaaacaattaacagaggtagtgcaaaaggtgtctacagaaagcatagta +atatggggaaagacccctaaatttagactacccatacaaaaagaaacatgggaagca +>Z|JP|K03455|2036|DUPLICATE 5 +cctcaaatcactctttggcaacgacccttagttacagcaaaaataggggaacagctaatagaagccctattagacacagg +agcagatgatacagtattagaagaaataaatttaccagggaaatggaaaccaaaaatgatagggggaattggaggtttta +tcaaggtaagacaatatgatcagatacttatagaaatttgtgggaaaaaggctgtaggtacagtgttagtaggacctaca +cctgtcaacataattggacgaaatatgttgactcagattgggtgtactttaaatcttccaattagtcctattgaaactgt +accagtaaaattgaagccaggaatggatggcccaaaggttaaacaatggccattgacagaagaaaaaataaaagcattaa +cagaaatttgtacagaaatggaaagggaaggaaaaatttcaaaaattgggcctgagaatccatacaatactccaatattt +gccataaagaaaaaagatggtactaaatggagaaaattagtagatttcagagaacttaataaaagaactcaagacttctg +ggaggtccaactaggaatacctcatcccgcaggattaaaaaagaaaaaatcagtaacagtactagatgtgggggatgcat +atttttcagttcccttagataaagactttagaaagtatactgcattcactatacctagtataaataatgaaacaccgggg +attagatatcagtacaatgtgcttccacagggatggaaaggatcaccagcaatatttcaggctagcatgacaaaaatctt +agagccctttagagcaaaaaatccagagatggtgatttaccaatacatggatgatttatatgtaggatctgacttagaga +tagggcagcatagaacaaaaatacaagagttgagagaacatctactgaagtggggattaaccacaccagataaaaaacat +cagaaagaacctccatttctttggatgggatatgaactccatcctgacaaatggacagtccagcctataaagctgccaga +aaaagacagctggactgtcaatgatatacagaaattagtgggaaagctaaattgggcaagtcagatttatgcaggaatta +aagtaaggcaactgtgtaaactcctcaggggagccaaagcactaacagagatagtaacactgactgaggaagcagaatta +gaattagcagaaaacagagaaattctaaaagaacctgttcatggagtatattatgatccatcaaaagacctagtagcaga +aatacagaaacaagggcaagac +>Z|JP|K03455|2036|DUPLICATE 6 +cctcaaatcactctttggcaacgacccttagttacagcaaaaataggggaacagctaatagaagccctattagacacagg +agcagatgatacagtattagaagaaataaatttaccagggaaatggaaaccaaaaatgatagggggaattggaggtttta +tcaaagtaagacaatatgatcagatacttatagaaatttgtgggaaaaaggctgtaggtacagtgttagtaggacctaca +cctgtcaacataattggacgaaatatgttgactcagattgggtgtactttaaattttccaattagtcctattgaaactgt +accagtaaaattgaagccaggaatggatggcccaaaggttaaacaatggccattgacagaagaaaaaataaaagcattaa +cagaaatttgtacagaaatggaaagggaaggaaaaatttcaaaaattgggcctgagaatccatacaatactccaatattt +gccataaagaaaaaagatggtactaaatggagaaaattagtagatttcagagaacttaataaaagaactcaagacttctg +ggaggtccaactaggaatacctcatcccgcaggattaaaaaagaaaaaatcagtaacagtactagatgtgggggatgcat +atttttcagttcccttagataaagactttagaaagtatactgcattcactatacctagtataaataatgaaacaccgggg +attagatatcagtacaatgtgcttccacagggatggaaaggatcaccagcaatatttcaggctagcatgacaaaaatctt +agagccctttagagcaaaaaatccagagatggtgatttaccaatacatggatgatttatatgtaggatctgacttagaga +tagggcagcatagaacaaaaatacaagagttgagagaacatctactgaagtggggattaaccacaccagataaaaaacat +cagaaagaacctccatttctttggatgggatatgaactccatcctgacaaatggacagtccagcctataaagctgccaga +aaaagacagctggactgtcaatgatatacagaaattagtgggaaagctaaattgggcaagtcagatttatgcaggaatta +aagtaaggcaactgtgtaaactcctcaggggagccaaagcactaacagagatagtaacactgactgaggaagcagaatta +gaattagcagaaaacagagaaattctaaaagaacctgttcatggagtatattatgatccatcaaaagacctagtagcaga +aatacagaaacaagggcaagaccaatggacatatcaaatttatcaagagccatttaaaaatttaaaaacaggaaaatatg +caaaaaagaagtctgcccacactaatgatgtaaaacaattaacagaggtagtgcaaaaggtgtctacagaaagcatagta +atatggggaaagacccctaaatttagactacccatacaaaaagaaacatgggaagca +>Z|JP|K03455|2036|DUPLICATE 7 +cctcaggtcactctttggcaacgacccctcgtcacaataaagataggggggcaactaaaggaagctctattagatacagg +agcagatgatacagtattagaagaaatgagtttgccaggaagatggaaaccaaaaatgatagggggaattggaggtttta +tcaaagtaagacagtatgatcagatactcatagaaatctgtggacataaagctataggtacagtattagtaggacctaca +cctgtcaacataattggaagaaatctgttgactcagattggttgcactttaaattttcccattagccctattgagactgt +accagtaaaattaaagccaggaatggatggcccaaaagttaaacaatggccattgacagaagaaaaaataaaagcattag +tagaaatttgtacagagatggaaaaggaagggaaaatttcaaaaattgggcctgaaaatccatacaatactccagtattt +gccataaagaaaaaagacagtactaaatggagaaaattagtagatttcagagaacttaataagagaactcaagacttctg +ggaagttcaattaggaataccacatcccgcagggttaaaaaagaaaaaatcagtaacagtactggatgtgggtgatgcat +atttttcagttcccttagatgaagacttcaggaagtatactgcatttaccatacctagtataaacaatgagacaccaggg +attagatatcagtacaatgtgcttccacagggatggaaaggatcaccagcaatattccaaagtagcatgacaaaaatctt +agagccttttagaaaacaaaatccagacatagttatctatcaatacatggatgatttgtatgtaggatctgacttagaaa +tagggcagcatagaacaaaaatagaggagctgagacaacatctgttgaggtggggacttaccacaccagacaaaaaacat +cagaaagaacctccattcctttggatgggttatgaactccatcctgataaatggacagtacagcctatagtgctgccaga +aaaagacagctggactgtcaatgacatacagaagttagtggggaaattgaattgggcaagtcagatttacccagggatta +aagtaaggcaattatgtaaactccttagaggaaccaaagcactaacagaagtaataccactaacagaagaagcagagcta +gaactggcagaaaacagagagattctaaaagaaccagtacatggagtgtattatgacccatcaaaagacttaatagcaga +aatacagaagcaggggcaaggccaatggacatatcaaatttatcaagagccatttaaaaatctgaaaacaggaaaatatg +caagaatgaggggtgcccacactaatgatgtaaaacaattaacagaggcagtgcaaaaaataaccacagaaagcatagta +atatggggaaagactcctaaatttaaactgcccatacaaaaggaaacatgggaaacatggtggacagagtattggcaagc +cacctggattcctgagtgggagtttgttaatacccctcccttagtgaaattatggtaccagttagagaaagaacccatag +taggagcagaaaccttc