Repository 'khmer'
hg clone https://testtoolshed.g2.bx.psu.edu/repos/crusoe/khmer

Changeset 46:471f3e085664 (2014-07-12)
Previous changeset 45:0b238b083f77 (2014-07-12) Next changeset 47:96da0e8a98e4 (2014-07-12)
Commit message:
add extract-partitions
added:
extract-partitions.xml
test-data/random-20-a.fa.part
b
diff -r 0b238b083f77 -r 471f3e085664 extract-partitions.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/extract-partitions.xml Sat Jul 12 12:10:02 2014 -0400
[
@@ -0,0 +1,75 @@
+<tool id="gedlab-khmer-extract-partitions"
+ name="Extract partitions"
+ version="1.1-1"
+ force_history_refresh="true">
+
+ <description>
+ Separate sequences that are annotated with partitions into grouped files.
+ </description>
+        <macros>
+                <token name="@BINARY@">extract-partitions.py</token>
+                <import>macros.xml</import>
+        </macros>
+        <expand macro="requirements" />
+ <command>
+mkdir -p output ;
+cd output ;
+@BINARY@
+--max-size $max_size
+--min-partition-size $min_partition_size
+$output_unasssigned
+output
+#for input in $inputs
+$input
+#end for ;
+mv output.dist $distribution
+ </command>
+
+ <inputs>
+ <expand macro="input_sequences_filenames" />
+ <param name="max_size"
+ type="integer"
+ label="Max group size"
+ help="No more than this many number of sequences will be stored in each output"
+ value="1000000" />
+ <param name="min_partition_size"
+ type="integer"
+ label="Min partition size"
+ help="The minimum partition size worth keeping"
+ value="5" />
+ <param name="output_unassigned"
+ type="boolean"
+ checked="false"
+ truevalue="--output-unassigned"
+ falsevalue=""
+ label="Output unassigned sequences" />
+ </inputs>
+ <outputs>
+ <data name="distribution"
+ format="text"
+ label="Partition size distribution from ${tool.name}" />
+ <expand macro="output_sequences" />
+ </outputs>
+  <stdio>
+        <!-- [HELP] If no exit code rule is defined, the tool will stop if anything is written to STDERR -->
+ <exit_code range="1:"
+ level="fatal" />
+ </stdio>
+
+ <tests>
+ <test interactor="api">
+ <param name="inputs" value="random-20-a.fa.part"/>
+ <output name="distribution">
+ <assert_contents>
+ <has_line_matching expression="'99 1 1 99" />
+ </assert_contents>
+ </output>
+ </test>
+
+ </tests>
+    <!-- [OPTIONAL] Help displayed in Galaxy -->
+    <!--
+ <help>
+ </help>
+    -->    
+</tool>
b
diff -r 0b238b083f77 -r 471f3e085664 test-data/random-20-a.fa.part
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/random-20-a.fa.part Sat Jul 12 12:10:02 2014 -0400
b
@@ -0,0 +1,198 @@
+>35 2
+CGCAGGCTGGATTCTAGAGGCAGAGGTGAGCTATAAGATATTGCATACGTTGAGCCAGC
+>16 3
+CGGAAGCCCAATGAGTTGTCAGAGTCACCTCCACCCCGGGCCCTGTTAGCTACGTCCGT
+>46 3
+GGTCGTGTTGGGTTAACAAAGGATCCCTGACTCGATCCAGCTGGGTAGGGTAACTATGT
+>40 2
+GGCTGAAGGAGCGGGCGTACGTGTTTACGGCATGATGGCCGGTGATTATGGGGGACGGG
+>33 3
+GCAGCGGCTTTGAATGCCGAATATATAACAGCGACGGGGTTCAATAAGCTGCACATGCG
+>98 0
+ACCAGATGCATAGCCCAACAGCTGAGACATTCCCAGCTCGCGAACCAAGACGTGAGAGC
+>17 2
+CCCTGTTAGCTACGTCCGTCTAAGGATATTAACATAGTTGCGACTGCGTCCTGTGCTCA
+>89 3
+GCGAGATACTAGCAAAGGTTCATCAACAGCTACACCCGACGAACCCCGAGAAATTGGGA
+>30 2
+GTTATGGTCCAGGATGAATGCGCGTACCGGGCGCCTATCACTCCTCTTGTCATTCAGAA
+>82 2
+ATGCACTATATTTAAGAGGTCTAGAGTGTAAAAAGTGTACCCTTCGGGGTGGAGCTGTT
+>60 2
+GTTTTTGTCATCGTGCATAAAGCGGGACAGAGTTCAACGGTATTCGAATGCACACCCTA
+>83 2
+CCTTCGGGGTGGAGCTGTTAATGAACTCAAGTGGCGATGGAGGCTAAAACGATACGTTG
+>12 3
+AGCCAATTGTAACCATATGGTATCCAGTTTCCGTAGCAGCAATGCGCGACGGGCAATCG
+>85 2
+CGTGATATGATTACTAAAGGGGCCCGCAAAAACCCATTCACTGAGGGCTCTGTCCGTAC
+>2 2
+CCCGTGGGGCGGGCTAATTTTAAAGGCAGGTTGCTACACGTCAACTCTACCCAAGCTCC
+>45 3
+ATACGCCACTCGACTTGGCTCGCCCTCGATCTAAAATAGCGGTCGTGTTGGGTTAACAA
+>11 4
+GCAGCAGACCAACATCCAACACTTTTCACAAGAGGCTGACAGCCAATTGTAACCATATG
+>39 4
+CAATTGACTTCCATGTGGGTCGGCTGTCAAGTCTAAACCGGGCTGAAGGAGCGGGCGTA
+>26 2
+AACATCTTAACCTCTGATCCCAACATGAGGGACATGAGTTTTCAAAGTAACGATGCGCA
+>75 2
+GTCGGTGCCCGCGTGCGGAGCAGTCTTGATCCGGCGCGCTCTTACCTATGGTCGGCACG
+>81 2
+GGCTACTGGTTGATAAGCGTACGTAAAAGGCGAGTCTTACATGCACTATATTTAAGAGG
+>97 2
+ATTAGTGTGACTAGCCGAGTGCCCCAGCGTTTATCCAATGACCAGATGCATAGCCCAAC
+>13 2
+AATGCGCGACGGGCAATCGCGTCTGCGTTGATCGTCGCCCCTATTGTCGCTCCCTTAGT
+>92 2
+ATCAGGGCAAATTTGCTCGTGACTAAATGGTAATACTACCCGGGACAGTAAACTTTTGG
+>56 2
+AGATCTGCTTGGGTGTATCCCCATTCAGAGATACCAGATCTAAGCGACCATCAGAAACA
+>61 2
+TATTCGAATGCACACCCTAACATACTGGAAGATTCACTCTATATACCGGGAACTACTAA
+>96 2
+ATTAGACCGCTATCAACTCTTGCGAGGAAGGTCTGGGCCTATTAGTGTGACTAGCCGAG
+>31 2
+CTCCTCTTGTCATTCAGAAGGAATTTGATTAATTACCTGGGCTGACTCGCGCCCCCTGC
+>29 2
+TGGAAGCGCCCTCCGCTCAGGCGTTTTAGTAGATCCCAGTGTTATGGTCCAGGATGAAT
+>54 2
+TGGATGAGGTCCTTAAGGCCTAATTGACCAATCGCCCCAAGATTGGTGGTGAATGACTC
+>0 2
+TAGTGATCAGCGGCTAGTGTCGCCCCTCTTAGCACCTTGCGATCATCGAATCGGGCTGT
+>90 2
+GAACCCCGAGAAATTGGGAAGCCTGGAGGCAGTACAGTCATCCAGTCTGCTGCTCAAAG
+>34 2
+TCAATAAGCTGCACATGCGTGGTTGTGGCACGATCAGTTCCGCAGGCTGGATTCTAGAG
+>43 2
+AGGACTCGACGTCCGCCCCATGCTTGAGAGAAGGTTTCGGCCAACCATGGTAGGTTAGG
+>8 2
+ACACACAAGGCCAGACACCAACTTGGCCGTGGAATTTATCAACACTTCTGAGACGAAGG
+>37 2
+TGTGCGCTGTGAGATACAACTATAGGCACCGGGTTGCTGGCTAATAACCATTTAGAGTC
+>51 2
+ACACAATGGACGCGTTAAGGAGAACCGGTCGCAACCAGGTTGAAAATGCCTGATATACG
+>32 2
+GCTGACTCGCGCCCCCTGCAGGCTGCTATGATTGAGTGCGGCAGCGGCTTTGAATGCCG
+>78 2
+TCTGGGGCGAGATCCCCTCTGCTCACTTTCTTGTAGTAAATACACCGAAGGGGCGAACC
+>18 2
+CGACTGCGTCCTGTGCTCAGTTCGTGACGCCGAACTCAAGGACGCGGTACGAAGAACTG
+>36 2
+TTGCATACGTTGAGCCAGCGCCGCCCGTATACACAGGGTCTGTGCGCTGTGAGATACAA
+>53 2
+ATATAAGTTTTTTAGATGTAAAAAATTTTTTATGGCGGCCTGGATGAGGTCCTTAAGGC
+>24 2
+AAGAAACAGGCTAGGTCTTCCATGCAATGGTTCTCACAGTGTAGTCGCGCATCAACTCC
+>7 2
+AAACGTCTAAGTAATCATGCGACCGGCGCCTCGATTGGACACACACAAGGCCAGACACC
+>9 2
+AACACTTCTGAGACGAAGGTCATTTACGATTGGGACACTTTCTCGAACTCCGGTTAATT
+>47 2
+CTGGGTAGGGTAACTATGTAGCCATCGCTCAGTGGATTCTTCCGGGATAGGGTGTGCGA
+>62 2
+ATATACCGGGAACTACTAAAATTTTGGGCTACTCTATGCTTACAGCCCAACATGCGCAA
+>79 2
+TACACCGAAGGGGCGAACCCTGTCTACATTCGCAAATGCATCCTACCTGAGAGGCTTCG
+>48 2
+TCCGGGATAGGGTGTGCGAATGTGCCGGGCATTCAGCTCCTTAGAGACGAGTTACGAGC
+>66 2
+GGCGCGACCAATATTCATTTGATGAGAATTGAAATCGACTGAATCACGGGATTTATACA
+>25 2
+GTAGTCGCGCATCAACTCCGCCAGTTTTATCGAAGCGCCCAACATCTTAACCTCTGATC
+>5 2
+TCATTACGGGGTGTCCATCTAGAGAAAGTGGGTTTCCCTTATAGAAATGAGGAGGATTC
+>72 2
+ATAAAAAACGACTTCTAAAGCGACACTGGTTTTATCCTTCCCTGTTTTCCTCGCCCCAT
+>76 2
+CTTACCTATGGTCGGCACGATTCCATTGGCGGATATAGGATTGATTACGTGTGTTTACT
+>69 2
+GCAGCGAGGTATTTAAACTGTTCAATCGGCGCAACCGAAAATCTGCTACCGTGGTTGCT
+>87 2
+CAGTATACGCCCGTTGAGAAACAGGTGGTGGCGCAGTGTCGATTACTTCGTAATAATTT
+>27 2
+TTCAAAGTAACGATGCGCAGATTGAATAATGCCATATCTGCGCGAGAGGTTTCAGGTAC
+>77 2
+TTGATTACGTGTGTTTACTATACCGGTAGAAGCCTTCAGTTCTGGGGCGAGATCCCCTC
+>95 2
+TACGTGTGGCATCGTTGCACCCTAATTCGCATTATTAAGTATTAGACCGCTATCAACTC
+>63 2
+TACAGCCCAACATGCGCAACAACTATAAGCTGCTGCTGACAGATCCGTTTGTTCCGGAC
+>38 2
+CTAATAACCATTTAGAGTCGCCCGCGGTGATGAGTAATCGCAATTGACTTCCATGTGGG
+>20 2
+GTGCCTACCGTACCTGTCGAGCCAGTGCGATCAGTAAAACTACCGATTCGTGGCCTCCC
+>88 2
+GATTACTTCGTAATAATTTGAGGGTGCTGCCGCGTGTTCCGCGAGATACTAGCAAAGGT
+>49 2
+TTAGAGACGAGTTACGAGCCACTCTTGGATCGTCATGCATACCTCGCAGATCGGCAGAG
+>91 2
+TCCAGTCTGCTGCTCAAAGTCCATCTACATGTAAAGAACCATCAGGGCAAATTTGCTCG
+>86 2
+CTGAGGGCTCTGTCCGTACGTGTACTATAGATCCTTGCTCCAGTATACGCCCGTTGAGA
+>42 2
+CATATTTCAGGCGTGCGCCAACTTACGATTCTTGAATCCAAGGACTCGACGTCCGCCCC
+>70 2
+ATCTGCTACCGTGGTTGCTTCGACCATGGTAAACTGAGTAAGCCCTTATGAGTTGCGGG
+>19 2
+GACGCGGTACGAAGAACTGCTCCAGCAACAGCATTCCTTGGTGCCTACCGTACCTGTCG
+>84 2
+AGGCTAAAACGATACGTTGTATACTAAGAACTGTCTACATCGTGATATGATTACTAAAG
+>52 2
+TGAAAATGCCTGATATACGAAGATTAAGCGGCTTTGGATCATATAAGTTTTTTAGATGT
+>71 2
+AGCCCTTATGAGTTGCGGGTCGTGCTGTTAGACTGAACACATAAAAAACGACTTCTAAA
+>93 2
+CGGGACAGTAAACTTTTGGTGATGCCAGCACGACCAGCGCAGGGTCAAGAAAACTATTA
+>58 2
+TCGTGGTACACCCGGAGTCTCGAAAGGAGCTTGCAAAGCTTTTCAGCATGGGTCGCATT
+>22 2
+TTCATTCCCCTGTAACGTTTCGAACTCAACTTGCTTGCCCGACATATGGCGGTACGCGG
+>50 2
+ACCTCGCAGATCGGCAGAGAACGGTTTGGTCTGTTTGCGTACACAATGGACGCGTTAAG
+>21 2
+TACCGATTCGTGGCCTCCCGTTCGTCGCAATGAACGGCTTTTCATTCCCCTGTAACGTT
+>73 2
+CCTGTTTTCCTCGCCCCATGCAATGGTAACTAATATACCGCCCCATAGTCTTAATAACC
+>68 2
+CTGTCCCAACGGTAACAATGGAGGCACTATACCGACGCTCGCAGCGAGGTATTTAAACT
+>23 2
+GACATATGGCGGTACGCGGGCTCAGCGCTCCGCCAGTAAGAAGAAACAGGCTAGGTCTT
+>94 2
+AGGGTCAAGAAAACTATTAATTTAAGCGCTGTTTAGTAACTACGTGTGGCATCGTTGCA
+>10 2
+TCTCGAACTCCGGTTAATTTGCAATCCGGGGGTTTGCTCAGCAGCAGACCAACATCCAA
+>41 2
+GGTGATTATGGGGGACGGGTATAGTACTAATAGTTTTGGGCATATTTCAGGCGTGCGCC
+>80 2
+TCCTACCTGAGAGGCTTCGACTAAAGAATGCGGGTATACTGGCTACTGGTTGATAAGCG
+>64 2
+AGATCCGTTTGTTCCGGACGGTCGTCGTACCCACCCCTTGTCGATAGGTAAAGGAGTAA
+>57 2
+TAAGCGACCATCAGAAACACAGCATCAGCTTACCAGCCTTTCGTGGTACACCCGGAGTC
+>1 2
+GATCATCGAATCGGGCTGTCGCCAAAGGCCGACCAAGGTTCCCGTGGGGCGGGCTAATT
+>55 2
+GATTGGTGGTGAATGACTCACAAAATGCTCATAGAATATTAGATCTGCTTGGGTGTATC
+>67 2
+GAATCACGGGATTTATACATCATTTATAGCTAAATTACACCTGTCCCAACGGTAACAAT
+>14 2
+CTATTGTCGCTCCCTTAGTTGTTGGGCGTAGTCCGCACCTAGAGTCCAACCAGGCCTCG
+>15 2
+AGAGTCCAACCAGGCCTCGACAATCCTTTGTCCTGTCCCCCGGAAGCCCAATGAGTTGT
+>59 2
+TTTCAGCATGGGTCGCATTCCTACCTAAGGCTAGGGGCATGTTTTTGTCATCGTGCATA
+>28 2
+CGCGAGAGGTTTCAGGTACCTATCGGGACAGACTTGTTTCTGGAAGCGCCCTCCGCTCA
+>74 2
+CCCCATAGTCTTAATAACCGACACCGAGACGCTACATGGCGTCGGTGCCCGCGTGCGGA
+>4 2
+TGTAACCTGTGTGGGGTCGGTCCTGGGGAAACTTTGGGTTTCATTACGGGGTGTCCATC
+>65 2
+TCGATAGGTAAAGGAGTAAGCGTCCGACTCCCTCTTACTTGGCGCGACCAATATTCATT
+>6 2
+ATAGAAATGAGGAGGATTCACAGACACGTCAGTCACCATCAAACGTCTAAGTAATCATG
+>44 2
+CCAACCATGGTAGGTTAGGAAAGCCGCCAAATAAGTTCTTATACGCCACTCGACTTGGC
+>3 2
+TCAACTCTACCCAAGCTCCTTGCATCTCGGTACCCCCCCTTGTAACCTGTGTGGGGTCG