Repository 'get_orfs_or_cdss'
hg clone https://testtoolshed.g2.bx.psu.edu/repos/peterjc/get_orfs_or_cdss

Changeset 14:65d76ca44cd2 (2015-04-06)
Previous changeset 13:72bc0335b792 (2014-11-28) Next changeset 15:2eb9e86f5f35 (2015-05-12)
Commit message:
Uploaded v0.1.0, adds BED output etc
modified:
tools/get_orfs_or_cdss/README.rst
tools/get_orfs_or_cdss/get_orfs_or_cdss.py
tools/get_orfs_or_cdss/get_orfs_or_cdss.xml
tools/get_orfs_or_cdss/tool_dependencies.xml
added:
test-data/get_orf_input.Suis_ORF.bed
test-data/get_orf_input.Suis_ORF.prot.pair_sample_C10.fasta
test-data/get_orf_input.Suis_ORF.prot.sample_C10.fasta
test-data/get_orf_input.t11_bed_out.bed
test-data/get_orf_input.t11_open_bed_out.bed
test-data/get_orf_input.t1_bed_out.bed
b
diff -r 72bc0335b792 -r 65d76ca44cd2 test-data/get_orf_input.Suis_ORF.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/get_orf_input.Suis_ORF.bed Mon Apr 06 05:21:48 2015 -0400
b
b'@@ -0,0 +1,2910 @@\n+Streptococcus_suis\t0\t1374\tStreptococcus_suis|ORF1\t0\t+\n+Streptococcus_suis\t1506\t2664\tStreptococcus_suis|ORF2\t0\t+\n+Streptococcus_suis\t1706\t2021\tStreptococcus_suis|ORF3\t0\t-\n+Streptococcus_suis\t2755\t3637\tStreptococcus_suis|ORF4\t0\t+\n+Streptococcus_suis\t3932\t4313\tStreptococcus_suis|ORF5\t0\t+\n+Streptococcus_suis\t4380\t5514\tStreptococcus_suis|ORF6\t0\t+\n+Streptococcus_suis\t4449\t4797\tStreptococcus_suis|ORF7\t0\t-\n+Streptococcus_suis\t4490\t4838\tStreptococcus_suis|ORF8\t0\t-\n+Streptococcus_suis\t5662\t6241\tStreptococcus_suis|ORF9\t0\t+\n+Streptococcus_suis\t6234\t9735\tStreptococcus_suis|ORF10\t0\t+\n+Streptococcus_suis\t6973\t7351\tStreptococcus_suis|ORF11\t0\t-\n+Streptococcus_suis\t8270\t8624\tStreptococcus_suis|ORF12\t0\t-\n+Streptococcus_suis\t10037\t10412\tStreptococcus_suis|ORF13\t0\t+\n+Streptococcus_suis\t10522\t11815\tStreptococcus_suis|ORF14\t0\t+\n+Streptococcus_suis\t11815\t13084\tStreptococcus_suis|ORF15\t0\t+\n+Streptococcus_suis\t13076\t13634\tStreptococcus_suis|ORF16\t0\t+\n+Streptococcus_suis\t13634\t15629\tStreptococcus_suis|ORF17\t0\t+\n+Streptococcus_suis\t13834\t14764\tStreptococcus_suis|ORF18\t0\t-\n+Streptococcus_suis\t15963\t16437\tStreptococcus_suis|ORF19\t0\t+\n+Streptococcus_suis\t17316\t17886\tStreptococcus_suis|ORF20\t0\t-\n+Streptococcus_suis\t19641\t19953\tStreptococcus_suis|ORF21\t0\t-\n+Streptococcus_suis\t20790\t21111\tStreptococcus_suis|ORF22\t0\t-\n+Streptococcus_suis\t21319\t21649\tStreptococcus_suis|ORF23\t0\t+\n+Streptococcus_suis\t23559\t24405\tStreptococcus_suis|ORF24\t0\t+\n+Streptococcus_suis\t23777\t24143\tStreptococcus_suis|ORF25\t0\t-\n+Streptococcus_suis\t24385\t24910\tStreptococcus_suis|ORF26\t0\t+\n+Streptococcus_suis\t24910\t26251\tStreptococcus_suis|ORF27\t0\t+\n+Streptococcus_suis\t26344\t27322\tStreptococcus_suis|ORF28\t0\t+\n+Streptococcus_suis\t27399\t28587\tStreptococcus_suis|ORF29\t0\t+\n+Streptococcus_suis\t28218\t28563\tStreptococcus_suis|ORF30\t0\t-\n+Streptococcus_suis\t28567\t29356\tStreptococcus_suis|ORF31\t0\t+\n+Streptococcus_suis\t28603\t28942\tStreptococcus_suis|ORF32\t0\t-\n+Streptococcus_suis\t29343\t30360\tStreptococcus_suis|ORF33\t0\t+\n+Streptococcus_suis\t30691\t31426\tStreptococcus_suis|ORF34\t0\t+\n+Streptococcus_suis\t31426\t35158\tStreptococcus_suis|ORF35\t0\t+\n+Streptococcus_suis\t32643\t33138\tStreptococcus_suis|ORF36\t0\t-\n+Streptococcus_suis\t34368\t34755\tStreptococcus_suis|ORF37\t0\t-\n+Streptococcus_suis\t34816\t35128\tStreptococcus_suis|ORF38\t0\t-\n+Streptococcus_suis\t35016\t36615\tStreptococcus_suis|ORF39\t0\t+\n+Streptococcus_suis\t36640\t37693\tStreptococcus_suis|ORF40\t0\t+\n+Streptococcus_suis\t37333\t37738\tStreptococcus_suis|ORF41\t0\t-\n+Streptococcus_suis\t37665\t38241\tStreptococcus_suis|ORF42\t0\t+\n+Streptococcus_suis\t38198\t38555\tStreptococcus_suis|ORF43\t0\t-\n+Streptococcus_suis\t38241\t39798\tStreptococcus_suis|ORF44\t0\t+\n+Streptococcus_suis\t38277\t38709\tStreptococcus_suis|ORF45\t0\t-\n+Streptococcus_suis\t39303\t39609\tStreptococcus_suis|ORF46\t0\t-\n+Streptococcus_suis\t39437\t39809\tStreptococcus_suis|ORF47\t0\t-\n+Streptococcus_suis\t39892\t40318\tStreptococcus_suis|ORF48\t0\t-\n+Streptococcus_suis\t39920\t41186\tStreptococcus_suis|ORF49\t0\t+\n+Streptococcus_suis\t40224\t40698\tStreptococcus_suis|ORF50\t0\t+\n+Streptococcus_suis\t40229\t40925\tStreptococcus_suis|ORF51\t0\t-\n+Streptococcus_suis\t41070\t41451\tStreptococcus_suis|ORF52\t0\t-\n+Streptococcus_suis\t41193\t41700\tStreptococcus_suis|ORF53\t0\t+\n+Streptococcus_suis\t41309\t41615\tStreptococcus_suis|ORF54\t0\t+\n+Streptococcus_suis\t41683\t42766\tStreptococcus_suis|ORF55\t0\t+\n+Streptococcus_suis\t41692\t42064\tStreptococcus_suis|ORF56\t0\t-\n+Streptococcus_suis\t42188\t42569\tStreptococcus_suis|ORF57\t0\t-\n+Streptococcus_suis\t42794\t43571\tStreptococcus_suis|ORF58\t0\t+\n+Streptococcus_suis\t43041\t43365\tStreptococcus_suis|ORF59\t0\t-\n+Streptococcus_suis\t43447\t43894\tStreptococcus_suis|ORF60\t0\t-\n+Streptococcus_suis\t43619\t44870\tStreptococcus_suis|ORF61\t0\t+\n+Streptococcus_suis\t44859\t46164\tStreptococcus_suis|ORF62\t0\t+\n+Streptococcus_suis\t45179\t45524\tStreptococcus_suis|ORF63\t0\t-\n+Streptococcus_suis\t47040\t47829\tStreptococcus_suis|ORF64\t0\t+\n+Streptococcus_suis\t47829\t48417\tStreptococcus_suis|ORF65\t0\t+\n+Streptococcus_suis\t48296\t48860\tStreptococcus_suis|'..b'956142\t1956526\tStreptococcus_suis|ORF2851\t0\t+\n+Streptococcus_suis\t1956835\t1958599\tStreptococcus_suis|ORF2852\t0\t-\n+Streptococcus_suis\t1957087\t1957456\tStreptococcus_suis|ORF2853\t0\t+\n+Streptococcus_suis\t1958210\t1958606\tStreptococcus_suis|ORF2854\t0\t+\n+Streptococcus_suis\t1958681\t1959143\tStreptococcus_suis|ORF2855\t0\t-\n+Streptococcus_suis\t1959143\t1960055\tStreptococcus_suis|ORF2856\t0\t-\n+Streptococcus_suis\t1959625\t1959934\tStreptococcus_suis|ORF2857\t0\t+\n+Streptococcus_suis\t1960124\t1961171\tStreptococcus_suis|ORF2858\t0\t-\n+Streptococcus_suis\t1961182\t1963597\tStreptococcus_suis|ORF2859\t0\t-\n+Streptococcus_suis\t1963905\t1964370\tStreptococcus_suis|ORF2860\t0\t-\n+Streptococcus_suis\t1963943\t1964489\tStreptococcus_suis|ORF2861\t0\t-\n+Streptococcus_suis\t1964461\t1967398\tStreptococcus_suis|ORF2862\t0\t-\n+Streptococcus_suis\t1967597\t1968323\tStreptococcus_suis|ORF2863\t0\t+\n+Streptococcus_suis\t1968313\t1969627\tStreptococcus_suis|ORF2864\t0\t+\n+Streptococcus_suis\t1969665\t1971015\tStreptococcus_suis|ORF2865\t0\t-\n+Streptococcus_suis\t1971097\t1972630\tStreptococcus_suis|ORF2866\t0\t-\n+Streptococcus_suis\t1972747\t1973206\tStreptococcus_suis|ORF2867\t0\t-\n+Streptococcus_suis\t1973262\t1973877\tStreptococcus_suis|ORF2868\t0\t-\n+Streptococcus_suis\t1974226\t1974649\tStreptococcus_suis|ORF2869\t0\t+\n+Streptococcus_suis\t1974436\t1975804\tStreptococcus_suis|ORF2870\t0\t-\n+Streptococcus_suis\t1974991\t1975387\tStreptococcus_suis|ORF2871\t0\t+\n+Streptococcus_suis\t1975823\t1976297\tStreptococcus_suis|ORF2872\t0\t-\n+Streptococcus_suis\t1976286\t1978269\tStreptococcus_suis|ORF2873\t0\t-\n+Streptococcus_suis\t1978354\t1980301\tStreptococcus_suis|ORF2874\t0\t-\n+Streptococcus_suis\t1978597\t1979140\tStreptococcus_suis|ORF2875\t0\t+\n+Streptococcus_suis\t1979950\t1980460\tStreptococcus_suis|ORF2876\t0\t+\n+Streptococcus_suis\t1980267\t1980729\tStreptococcus_suis|ORF2877\t0\t-\n+Streptococcus_suis\t1980718\t1981180\tStreptococcus_suis|ORF2878\t0\t-\n+Streptococcus_suis\t1980808\t1981186\tStreptococcus_suis|ORF2879\t0\t+\n+Streptococcus_suis\t1981166\t1981703\tStreptococcus_suis|ORF2880\t0\t-\n+Streptococcus_suis\t1981512\t1981878\tStreptococcus_suis|ORF2881\t0\t+\n+Streptococcus_suis\t1981760\t1982099\tStreptococcus_suis|ORF2882\t0\t-\n+Streptococcus_suis\t1982179\t1982602\tStreptococcus_suis|ORF2883\t0\t+\n+Streptococcus_suis\t1982325\t1982631\tStreptococcus_suis|ORF2884\t0\t-\n+Streptococcus_suis\t1982644\t1983916\tStreptococcus_suis|ORF2885\t0\t-\n+Streptococcus_suis\t1983950\t1984655\tStreptococcus_suis|ORF2886\t0\t+\n+Streptococcus_suis\t1983992\t1984349\tStreptococcus_suis|ORF2887\t0\t-\n+Streptococcus_suis\t1984655\t1985540\tStreptococcus_suis|ORF2888\t0\t+\n+Streptococcus_suis\t1985996\t1986602\tStreptococcus_suis|ORF2889\t0\t-\n+Streptococcus_suis\t1986682\t1987480\tStreptococcus_suis|ORF2890\t0\t-\n+Streptococcus_suis\t1987469\t1988357\tStreptococcus_suis|ORF2891\t0\t-\n+Streptococcus_suis\t1988287\t1989124\tStreptococcus_suis|ORF2892\t0\t-\n+Streptococcus_suis\t1989120\t1989669\tStreptococcus_suis|ORF2893\t0\t-\n+Streptococcus_suis\t1989669\t1990539\tStreptococcus_suis|ORF2894\t0\t-\n+Streptococcus_suis\t1990608\t1991928\tStreptococcus_suis|ORF2895\t0\t-\n+Streptococcus_suis\t1991888\t1993154\tStreptococcus_suis|ORF2896\t0\t-\n+Streptococcus_suis\t1993212\t1993599\tStreptococcus_suis|ORF2897\t0\t+\n+Streptococcus_suis\t1993568\t1994696\tStreptococcus_suis|ORF2898\t0\t+\n+Streptococcus_suis\t1994967\t1996464\tStreptococcus_suis|ORF2899\t0\t-\n+Streptococcus_suis\t1996680\t1997781\tStreptococcus_suis|ORF2900\t0\t-\n+Streptococcus_suis\t1998012\t1998933\tStreptococcus_suis|ORF2901\t0\t+\n+Streptococcus_suis\t1998922\t2000620\tStreptococcus_suis|ORF2902\t0\t+\n+Streptococcus_suis\t1999704\t2000052\tStreptococcus_suis|ORF2903\t0\t-\n+Streptococcus_suis\t1999973\t2000306\tStreptococcus_suis|ORF2904\t0\t+\n+Streptococcus_suis\t2000501\t2000855\tStreptococcus_suis|ORF2905\t0\t+\n+Streptococcus_suis\t2000887\t2003506\tStreptococcus_suis|ORF2906\t0\t+\n+Streptococcus_suis\t2003906\t2004614\tStreptococcus_suis|ORF2907\t0\t-\n+Streptococcus_suis\t2004614\t2005157\tStreptococcus_suis|ORF2908\t0\t-\n+Streptococcus_suis\t2005222\t2006464\tStreptococcus_suis|ORF2909\t0\t+\n+Streptococcus_suis\t2006518\t2007289\tStreptococcus_suis|ORF2910\t0\t+\n'
b
diff -r 72bc0335b792 -r 65d76ca44cd2 test-data/get_orf_input.Suis_ORF.prot.pair_sample_C10.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/get_orf_input.Suis_ORF.prot.pair_sample_C10.fasta Mon Apr 06 05:21:48 2015 -0400
b
@@ -0,0 +1,119 @@
+>Streptococcus_suis|ORF1 length 457 aa, 1374 bp, from 1..1374 of Streptococcus_suis
+MNQEQLFWQRFIELAKVNFKPSIYDFYVADAKLLGINQQVANIFLNRPFKKDFWEKNFEE
+LMIAASFESYGEPLTIQYQFTEDEQEIRNTTNTRSSIVHQVQTLEPATPQETFKPVHSDI
+KSQYTFANFVQGDNNHWAKAAALAVSDNLGELYNPLFIFGGPGLGKTHILNAIGNKVLAD
+NPQARIKYVSSETFINEFLEHLRLNDMESFKKTYRNLDLLLIDDIQSLRNKATTQEEFFH
+TFNALHEKNKQIVLTSDRNPDHLDNLEERLVTRFKWGLTSEITPPDFETRIAILRNKCEN
+LPYNFTNETLSYLAGQFDSNVRDLEGALKDIHLIATMRQLSEISVEVAAEAIRSRKQTNP
+QNMVIPIEKIQTEVGNFYGVSLKELKGSKRVQHIVHARQVAMFLAREMTDNSLPKIGKEF
+GNRDHTTVMHAYNKIKTLLLDDENLEIEITSIKNKLR
+>Streptococcus_suis|ORF2 length 385 aa, 1158 bp, from 1507..2664 of Streptococcus_suis
+IINKGESMIQFSINKNIFLQALSITKRAISTKNAIPILSTVKITVTSEGITLTGSNGQIS
+IEHFISIQDENAGLLISSPGSILLEAGFFINVVSSMPDLVLDFNEIEQKQIVLTSGKSEI
+TLKGKEAEQYPRLQEVPTSKPLVLETKVLKQTINETAFAASTQESRPILTGVHFVLTENK
+NLKTVATDSHRMSQRKLVLDTSGDDFNVVIPSRSLREFTAVFTDDIETVEVFFSNNQILF
+RSEHISFYTRLLEGTYPDTDRLIPTEFKTTAIFDTANLRHSMERARLLSNATQNGTVKLE
+IANNVVSAHVNSPEVGRVNEELDTVEVSGEDLVISFNPTYLIEALKATTSEQVKISFISS
+VRPFTLIPNNEGEDFIQLVTPVRTN
+>Streptococcus_suis|ORF291 length 760 aa, 2283 bp, from complement(184307..186589) of Streptococcus_suis
+KRGEFMRFNQFSFIKKETSVYLQELDTLGFQLIPDASSKTNLETFVRKCHFLTANTDFAL
+SNMIAEWDTDLLTFFQSDRELTDQIFYQVAFQLLGFVPGMDYTDVMDFVEKSNFPIVYGD
+IIDNLYQLLNTRTKSGNTLIDQLVSDDLIPEDNHYHFFNGKSMATFSTKNLIREVVYVET
+PVDTAGTGQTDIVKLSILRPHFDGKIPAVITNSPYHQGVNDVASDKALHKMEGELAEKQV
+GTIQVKQASITKLDLDQRNLPVSPATEKLGHITSYSLNDYFLARGFASLHVSGVGTLGST
+GYMTSGDYQQVEGYKAVIDWLNGRTKAYTDHTRSLEVKADWANGKVATTGLSYLGTMSNA
+LATTGVDGLEVIIAEAGISSWYDYYRENGLVTSPGGYPGEDLDSLTALTYSKSLQAGDFL
+RNKAAYEKGLAAERAALDRTSGDYNQYWHDRNYLLHADRVKCEVVFTHGSQDWNVKPIHV
+WNMFHALPSHIKKHLFFHNGAHVYMNNWQSIDFRESMNALLSQKLLGYENNYQLPTVIWQ
+DNSGEQTWTTLDTFGGENETVLPLGTGSQTVANQYTQEDFERYGKSYSAFHQDLYAGKAN
+QISIELPVTEGLLLNGQVTLKLRVASSVAKGLLSAQLLDKGNKKRLAPIPAPKARLSLDN
+GRYHAQENLVELPYVEMPQRLVTKGFMNLQNRTDLMTVEEVVPGQWMNLTWKLQPTIYQL
+KKGDVLELILYTTDFECTVRDNSQWQIHLDLSQSQLILPH
+>Streptococcus_suis|ORF292 length 216 aa, 651 bp, from 185183..185833 of Streptococcus_suis
+AVGKDHLTLDPISVEQIIAVMPVLIVVTAGAVQGSTLGSQSFFVGCFIAEEVTCLQTLGV
+GQGGQAVQIFAWIATRAGHQPVFTVVVIPRGNPCFCDDDFQSVHASCCQGIGHGTEIRQS
+RRRYLTIGPIGLDLKRASVVCVGLGATVQPVNHRFIALHLLVVARCHVARRAQRANTRHM
+EAGKAASEEVVIEGVRSNVPQFFSSRADRQVPLVQV
+>Streptococcus_suis|ORF583 length 391 aa, 1176 bp, from 397805..398980 of Streptococcus_suis
+RKKMKKQFELIATAAAGLEAVVGREIRNLGYECQVENGRVRFQGDVKSIIETNIWLRSAD
+RIKIIVGQFPAKTFEELFQGVFNLDWENYLPLGCKFPISKAKCVKSKLHNEPSVQAISKK
+AVVKKLQKHFSRPEGVPLQEMGAEFKIEVSILKDVATVMIDTTGSSLFKRGYRVEKGGAP
+IKENMAAAILQLSNWYPDKPLIDPTCGSGTFCIEAAMLAKNIAPGLKRSFAFEEWPWVED
+QLVVALRKEAQASIKTDLVLDITGSDIDARMIEIAKKNAFAAGVEQDIVFKQMRVQDLRT
+DKINGVIISNPPYGERLLDDEAIVTLYREMGETFEPLKTWSKFILTSDELFETRFGQQAD
+KKRKLYNGTLKVDLYQFFGQRVKRQVQEVQG
+>Streptococcus_suis|ORF584 length 487 aa, 1464 bp, from 398981..400444 of Streptococcus_suis
+EDIVGEKNSHHLPLDEEKVLDFEVAKDLTIEEAVKKHKEIEAGVTEDDGLLDRYIKQHRA
+EIESQKFETKINHLPLVEVADEEKNQGHESAEEVEANESSLTEVSEEIAPIVEELSVTPM
+ETLEETVIASTVAMEGLSSVADDSSLELEEDETEDLDHSEGADRDQKKKFYFWSAVGLSM
+IGVMATALVWMNSVNKSNTATSSSSTSTSQTSSTASSSTDANVTAFEQLYNSFFTDSSLT
+KLKNSEFGKLAELKVLLEKLDKNSDSYTKAKEQYDHLEKAIAAIQAINGQFDKEVVVNGE
+IDTTATVKSGESLSATTTGISAVDSLLASVVNFGRSQQEVASATVASEAAVTRNQGADET
+VSTGVPATTEVASTTVSGSTTDFGIAVPAGVVLQRDRSRVPYNQAMIDDVNNEAWNFNPG
+ILENIVTISQQRGYITGNQYILEKVNIINGNGYYNMFKPDGTYLFSINCKTGYFVGNGAG
+HSDALDY
+>Streptococcus_suis|ORF873 length 343 aa, 1032 bp, from 605439..606470 of Streptococcus_suis
+TLGEETMTNVFKGRHFLAEKDFTRAELEWLIDFSAHLKDLKKRNIPHRYLEGKNIALLFE
+KTSTRTRAAFTVASIDLGAHPEYLGANDIQLGKKESTEDTAKVLGRMFDGIEFRGFSQKM
+VEELAEFSGVPVWNGLTDAWHPTQMLADYLTVKENFGKLEGLTLVYCGDGRNNVANSLLV
+TGAILGVNVHIFSPKELFPEEEVVALAEGFAKESGARVLITDNADEAVKGADVLYTDVWV
+SMGEEDKFAERVALLKPYQVNMELVKKAENENLIFLHCLPAFHDTNTVYGKDVAEKFGVE
+EMEVTDEVFRSKYARHFDQAENRMHTIKAVMAATLGDPFVPRV
+>Streptococcus_suis|ORF874 length 113 aa, 342 bp, from complement(605625..605966) of Streptococcus_suis
+VSNIVTAITTVNQSQAFQLAKVFFDSQVVRQHLSWVPCICQTIPYWHTGEFCQFFHHFLT
+ETTEFNTVEHTSQNFSSIFCRFFLTKLDVICTKIFWMGTKVNRCYCEGSTSTC
+>Streptococcus_suis|ORF1165 length 105 aa, 318 bp, from 811613..811930 of Streptococcus_suis
+AYNESVKRKECHLMKQVNMSKIINYLTILGLLILLSAFFLDNWIRDWFFPSSWGNVATML
+ILPLLGALILILSIYYKKLWTGLISIFLIISFPLIFGIGYFIFGP
+>Streptococcus_suis|ORF1166 length 125 aa, 378 bp, from 811867..812244 of Streptococcus_suis
+YLLNNLISSDIRYWLLYIWPLEGVVMNLTLLKRLNLVLYGIAIFLFVMLFLPIGQWFDIV
+NVNFKLTFFIIPFFGLASLPTAIYTKNVRQILLSVLLVALYFILFSLITALSGLFHLNFY
+SFFFK
+>Streptococcus_suis|ORF1455 length 114 aa, 345 bp, from 1026973..1027317 of Streptococcus_suis
+SCKLSLHIRWESWMGQGFYCYRFKLIHLRTNSNPFSFFRHLNSHFQHLRNEWTVMLPDSV
+LDQDISTSHCRCHHKGTRFDTILHHLMFCASQFFYTSNRNRLCTCPLNFCPHFV
+>Streptococcus_suis|ORF1456 length 116 aa, 351 bp, from complement(1027944..1028294) of Streptococcus_suis
+YGNACNSRPPTCDKSYSCWETLIYMGLNLVQFHFLISWYNGNMVISILQFFSHILFIYLA
+HHLLVTTVDWSRWLKVTGDNQRKINLLILFLAIALGYLVSTFFLELLMMGRSFANM
+>Streptococcus_suis|ORF1747 length 335 aa, 1008 bp, from complement(1225218..1226225) of Streptococcus_suis
+RMLNTDDTVTIYDVAREAGVSMATVSRVVNGNKNVKENTRKKVLEVIDRLDYRPNAVARG
+LASKKTTTVGVVIPNIANAYFATLAKGIDDIADMYKYNIVLANSDENDEKEINVVNTLFS
+KQVDGIIFMGYHLTDKIRAEFSRSRTPIVLAGTVDLEHQLPSVNIDYAAASVDAVNLLAK
+NNKKIAFVSGPLVDDINGKVRFAGYKQGLKDNGIEFNEGLVFESKYKYEEGYALAERILN
+AGATAAYVAEDEIAAGLLNGVSDMGIKVPEDFEIITSDDSLVTKFTRPNLTSINQPLYDI
+GAIAMRMLTKIMHKEELENREVVLNHGIKVRKSTK
+>Streptococcus_suis|ORF1748 length 377 aa, 1134 bp, from 1226384..1227517 of Streptococcus_suis
+TKISLFLPLHARKVSTMSKLHHVKSYLEANKMDLAIFSDPVSIYYLTGYHSDPHERHMML
+FVMPDHDSLLFLPALDVERAVATVDFPVAGYMDSENPWQIIKSKLPQKSFSAICAEFDNL
+NLTRYHGLQSIFSQPFSDITPLINTMKLIKSRDEIEKMLVAGEFADKAMQVGFNNISLDV
+TETDIIAQIEFEMKKQGISKMSFETMVLTGDNAANPHGIPSTNKIENNALLLFDLGVEAL
+GYTSDMTRTVAVGKPDQFKKDIYNLTLEAHMAAVNMIKPGVTAGEIDYAARSVIEKAGYG
+EYFNHRLGHGLGMSVHEFPSIMEGNDLVIEEGMCFSVEPGIYIPGKVGVRIEDCGYVTKN
+GFEVFTKTPKELLYFEG
+>Streptococcus_suis|ORF2037 length 234 aa, 705 bp, from complement(1422380..1423084) of Streptococcus_suis
+KSMTKTALITGVSSGIGLAQAGIFLENGWRVFGIDLASKPDLAGDFHFLQLDLTGDLSPV
+FSWCQSVDVLCNTAGILDDYRPHLDISEDELAQIFAVNFFAVTRLTRPYLQQMVDRQSGI
+IINMCSIASSLAGGGGSAYTASKHALAGFTKQLALDYAKDKVQIFGIAPGAVQTGMTQKD
+FEPGGLADWVADQTPIGRWTQPSEIAELTFMLATGKLASMQGQIITIDGGWSLK
+>Streptococcus_suis|ORF2038 length 112 aa, 339 bp, from 1422849..1423187 of Streptococcus_suis
+SSKMPAVLQRTSTDWHQEKTGDKSPVRSSCRKWKSPAKSGLLARSIPKTRQPFSKKIPAC
+ARPMPLETPVMRAVLVMDFYPVGRKDIARGRAPHGEAFTLAGHVDEEIGRRL
+>Streptococcus_suis|ORF2329 length 160 aa, 483 bp, from 1612284..1612766 of Streptococcus_suis
+LIETNWFHHLTGQEGLDVLFFHNLGFRITDQLYLEVRKFHLLQGLSQLLRRWSQESRVKG
+ARYIERNHPLDTCFLQQFNRLIHCSHLASDDDLGWCVVVGWGNNPRGNSRTDFFNQVDIC
+VENSNHLTSPCWRSQFHIFTTLSNQGNRIFKGQSSRCHQS
+>Streptococcus_suis|ORF2330 length 329 aa, 990 bp, from complement(1613050..1614039) of Streptococcus_suis
+ARKKDEGIMKTKITELLDIKYPIFQGGMAWVADGDLAGAVSNAGGLGIIGGGNAPKEVVK
+ANIDKVKSITDKPFGVNIMLLSPFADDIVDLVIEEGVKVVTTGAGNPGKYMERLHAAGIT
+VIPVVPSVALAKRMEKLGVDAVIAEGMEAGGHIGKLTTMTLVRQVVEAVSIPVIAAGGIA
+DGAGAAAAFMLGAEAVQVGTRFVVATESNAHQAYKEKVLKAKDIDTTVSASIVGHPVRAI
+KNKLSSAYAAAEKDFLAGKISADAIEELGAGALRNAVVDGDVTNGSVMAGQIAGLVSKEE
+SCEDILKDIYYGAAKVIREEASRWASVGE
+>Streptococcus_suis|ORF2619 length 107 aa, 324 bp, from 1802386..1802709 of Streptococcus_suis
+QLCVGSNPINSLFRRNFFVCCISSQSSCYVHTMWFVGIIVEIIVARYIIIAMGNFQCVCP
+CRRWSNVLNFRNDTIIQPHVFVLNIQTGVNDCNHHSATICLIFRTCF
+>Streptococcus_suis|ORF2620 length 192 aa, 579 bp, from complement(1803558..1804136) of Streptococcus_suis
+RLKIPCFQRKEVTMYDSFDKGWFVLQTYSGYENKVKENLLQRAHTYNMLENILRVEIPTQ
+TVQVEKNGEVKEVEENRFPGYVLVEMVMTDEAWFVVRNTPNVTGFVGSHGNRSKPTPLLE
+EEIRQILVSMGQTVQEFDIDVKVGDTVRIIDGAFTDYTGKITEIDNNKVKMVISMFGNDT
+IAEVNLSQIAEL
b
diff -r 72bc0335b792 -r 65d76ca44cd2 test-data/get_orf_input.Suis_ORF.prot.sample_C10.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/get_orf_input.Suis_ORF.prot.sample_C10.fasta Mon Apr 06 05:21:48 2015 -0400
b
@@ -0,0 +1,50 @@
+>Streptococcus_suis|ORF1 length 457 aa, 1374 bp, from 1..1374 of Streptococcus_suis
+MNQEQLFWQRFIELAKVNFKPSIYDFYVADAKLLGINQQVANIFLNRPFKKDFWEKNFEE
+LMIAASFESYGEPLTIQYQFTEDEQEIRNTTNTRSSIVHQVQTLEPATPQETFKPVHSDI
+KSQYTFANFVQGDNNHWAKAAALAVSDNLGELYNPLFIFGGPGLGKTHILNAIGNKVLAD
+NPQARIKYVSSETFINEFLEHLRLNDMESFKKTYRNLDLLLIDDIQSLRNKATTQEEFFH
+TFNALHEKNKQIVLTSDRNPDHLDNLEERLVTRFKWGLTSEITPPDFETRIAILRNKCEN
+LPYNFTNETLSYLAGQFDSNVRDLEGALKDIHLIATMRQLSEISVEVAAEAIRSRKQTNP
+QNMVIPIEKIQTEVGNFYGVSLKELKGSKRVQHIVHARQVAMFLAREMTDNSLPKIGKEF
+GNRDHTTVMHAYNKIKTLLLDDENLEIEITSIKNKLR
+>Streptococcus_suis|ORF292 length 216 aa, 651 bp, from 185183..185833 of Streptococcus_suis
+AVGKDHLTLDPISVEQIIAVMPVLIVVTAGAVQGSTLGSQSFFVGCFIAEEVTCLQTLGV
+GQGGQAVQIFAWIATRAGHQPVFTVVVIPRGNPCFCDDDFQSVHASCCQGIGHGTEIRQS
+RRRYLTIGPIGLDLKRASVVCVGLGATVQPVNHRFIALHLLVVARCHVARRAQRANTRHM
+EAGKAASEEVVIEGVRSNVPQFFSSRADRQVPLVQV
+>Streptococcus_suis|ORF583 length 391 aa, 1176 bp, from 397805..398980 of Streptococcus_suis
+RKKMKKQFELIATAAAGLEAVVGREIRNLGYECQVENGRVRFQGDVKSIIETNIWLRSAD
+RIKIIVGQFPAKTFEELFQGVFNLDWENYLPLGCKFPISKAKCVKSKLHNEPSVQAISKK
+AVVKKLQKHFSRPEGVPLQEMGAEFKIEVSILKDVATVMIDTTGSSLFKRGYRVEKGGAP
+IKENMAAAILQLSNWYPDKPLIDPTCGSGTFCIEAAMLAKNIAPGLKRSFAFEEWPWVED
+QLVVALRKEAQASIKTDLVLDITGSDIDARMIEIAKKNAFAAGVEQDIVFKQMRVQDLRT
+DKINGVIISNPPYGERLLDDEAIVTLYREMGETFEPLKTWSKFILTSDELFETRFGQQAD
+KKRKLYNGTLKVDLYQFFGQRVKRQVQEVQG
+>Streptococcus_suis|ORF874 length 113 aa, 342 bp, from complement(605625..605966) of Streptococcus_suis
+VSNIVTAITTVNQSQAFQLAKVFFDSQVVRQHLSWVPCICQTIPYWHTGEFCQFFHHFLT
+ETTEFNTVEHTSQNFSSIFCRFFLTKLDVICTKIFWMGTKVNRCYCEGSTSTC
+>Streptococcus_suis|ORF1165 length 105 aa, 318 bp, from 811613..811930 of Streptococcus_suis
+AYNESVKRKECHLMKQVNMSKIINYLTILGLLILLSAFFLDNWIRDWFFPSSWGNVATML
+ILPLLGALILILSIYYKKLWTGLISIFLIISFPLIFGIGYFIFGP
+>Streptococcus_suis|ORF1456 length 116 aa, 351 bp, from complement(1027944..1028294) of Streptococcus_suis
+YGNACNSRPPTCDKSYSCWETLIYMGLNLVQFHFLISWYNGNMVISILQFFSHILFIYLA
+HHLLVTTVDWSRWLKVTGDNQRKINLLILFLAIALGYLVSTFFLELLMMGRSFANM
+>Streptococcus_suis|ORF1747 length 335 aa, 1008 bp, from complement(1225218..1226225) of Streptococcus_suis
+RMLNTDDTVTIYDVAREAGVSMATVSRVVNGNKNVKENTRKKVLEVIDRLDYRPNAVARG
+LASKKTTTVGVVIPNIANAYFATLAKGIDDIADMYKYNIVLANSDENDEKEINVVNTLFS
+KQVDGIIFMGYHLTDKIRAEFSRSRTPIVLAGTVDLEHQLPSVNIDYAAASVDAVNLLAK
+NNKKIAFVSGPLVDDINGKVRFAGYKQGLKDNGIEFNEGLVFESKYKYEEGYALAERILN
+AGATAAYVAEDEIAAGLLNGVSDMGIKVPEDFEIITSDDSLVTKFTRPNLTSINQPLYDI
+GAIAMRMLTKIMHKEELENREVVLNHGIKVRKSTK
+>Streptococcus_suis|ORF2038 length 112 aa, 339 bp, from 1422849..1423187 of Streptococcus_suis
+SSKMPAVLQRTSTDWHQEKTGDKSPVRSSCRKWKSPAKSGLLARSIPKTRQPFSKKIPAC
+ARPMPLETPVMRAVLVMDFYPVGRKDIARGRAPHGEAFTLAGHVDEEIGRRL
+>Streptococcus_suis|ORF2329 length 160 aa, 483 bp, from 1612284..1612766 of Streptococcus_suis
+LIETNWFHHLTGQEGLDVLFFHNLGFRITDQLYLEVRKFHLLQGLSQLLRRWSQESRVKG
+ARYIERNHPLDTCFLQQFNRLIHCSHLASDDDLGWCVVVGWGNNPRGNSRTDFFNQVDIC
+VENSNHLTSPCWRSQFHIFTTLSNQGNRIFKGQSSRCHQS
+>Streptococcus_suis|ORF2620 length 192 aa, 579 bp, from complement(1803558..1804136) of Streptococcus_suis
+RLKIPCFQRKEVTMYDSFDKGWFVLQTYSGYENKVKENLLQRAHTYNMLENILRVEIPTQ
+TVQVEKNGEVKEVEENRFPGYVLVEMVMTDEAWFVVRNTPNVTGFVGSHGNRSKPTPLLE
+EEIRQILVSMGQTVQEFDIDVKVGDTVRIIDGAFTDYTGKITEIDNNKVKMVISMFGNDT
+IAEVNLSQIAEL
b
diff -r 72bc0335b792 -r 65d76ca44cd2 test-data/get_orf_input.t11_bed_out.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/get_orf_input.t11_bed_out.bed Mon Apr 06 05:21:48 2015 -0400
b
@@ -0,0 +1,6 @@
+alpha 67 331 alpha|CDS1 0 +
+alpha 71 326 alpha|CDS2 0 +
+alpha 75 336 alpha|CDS3 0 +
+beta 68 332 beta|CDS1 0 +
+beta 72 327 beta|CDS2 0 +
+beta 76 337 beta|CDS3 0 +
b
diff -r 72bc0335b792 -r 65d76ca44cd2 test-data/get_orf_input.t11_open_bed_out.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/get_orf_input.t11_open_bed_out.bed Mon Apr 06 05:21:48 2015 -0400
b
@@ -0,0 +1,7 @@
+alpha 67 331 alpha|CDS1 0 +
+alpha 71 326 alpha|CDS2 0 +
+alpha 75 336 alpha|CDS3 0 +
+beta 68 332 beta|CDS1 0 +
+beta 72 327 beta|CDS2 0 +
+beta 76 337 beta|CDS3 0 +
+beta 333 408 beta|CDS4 0 +
b
diff -r 72bc0335b792 -r 65d76ca44cd2 test-data/get_orf_input.t1_bed_out.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/get_orf_input.t1_bed_out.bed Mon Apr 06 05:21:48 2015 -0400
b
@@ -0,0 +1,3 @@
+alpha 67 331 alpha|CDS1 0 +
+alpha 71 326 alpha|CDS2 0 +
+alpha 75 336 alpha|CDS3 0 +
b
diff -r 72bc0335b792 -r 65d76ca44cd2 tools/get_orfs_or_cdss/README.rst
--- a/tools/get_orfs_or_cdss/README.rst Fri Nov 28 11:45:37 2014 -0500
+++ b/tools/get_orfs_or_cdss/README.rst Mon Apr 06 05:21:48 2015 -0400
b
@@ -41,7 +41,8 @@
 
     <tool file="get_orfs_or_cdss/get_orfs_or_cdss.xml" />
 
-You will also need to install Biopython 1.54 or later.
+You will also need to install Biopython 1.65 or later (slightly older versions
+should be fine, but will not have the latest NCBI genetic code tables).
 
 If you wish to run the unit tests, also move/copy the ``test-data/`` files
 under Galaxy's ``test-data/`` folder. Then::
@@ -70,6 +71,9 @@
         - Renamed folder and adopted README.rst naming.
 v0.0.6  - Corrected automated dependency defintion.
 v0.0.7  - Tool definition now embeds citation information.
+v0.1.0  - Tool now outputs BED formatted calls (by @erasche, Eric Rasche).
+        - Using ``optparse`` for the Python command line API (Eric Rasche).
+        - Added NCBI genetic code table 24, Pterobranchia Mitochondrial.
 ======= ======================================================================
 
 
@@ -85,7 +89,7 @@
 For making the "Galaxy Tool Shed" http://toolshed.g2.bx.psu.edu/ tarball use
 the following command from the Galaxy root folder::
 
-    $ tar -czf get_orfs_or_cdss.tar.gz tools/get_orfs_or_cdss/README.rst tools/get_orfs_or_cdss/get_orfs_or_cdss.* tools/get_orfs_or_cdss/tool_dependencies.xml test-data/get_orf_input*.fasta test-data/Ssuis.fasta
+    $ tar -czf get_orfs_or_cdss.tar.gz tools/get_orfs_or_cdss/README.rst tools/get_orfs_or_cdss/get_orfs_or_cdss.* tools/get_orfs_or_cdss/tool_dependencies.xml test-data/get_orf_input*.fasta test-data/Ssuis.fasta test-data/get_orf_input*.bed
 
 Check this worked::
 
@@ -104,6 +108,10 @@
     test-data/get_orf_input.t1_nuc_out.fasta
     test-data/get_orf_input.t1_prot_out.fasta
     test-data/Ssuis.fasta
+    test-data/get_orf_input.Suis_ORF.bed
+    test-data/get_orf_input.t11_open_bed_out.bed
+    test-data/get_orf_input.t11_bed_out.bed
+    test-data/get_orf_input.t1_bed_out.bed
 
 
 Licence (MIT)
b
diff -r 72bc0335b792 -r 65d76ca44cd2 tools/get_orfs_or_cdss/get_orfs_or_cdss.py
--- a/tools/get_orfs_or_cdss/get_orfs_or_cdss.py Fri Nov 28 11:45:37 2014 -0500
+++ b/tools/get_orfs_or_cdss/get_orfs_or_cdss.py Mon Apr 06 05:21:48 2015 -0400
[
b'@@ -1,13 +1,6 @@\n #!/usr/bin/env python\n """Find ORFs in a nucleotide sequence file.\n \n-get_orfs_or_cdss.py $input_fasta $input_format $table $ftype $ends $mode $min_len $strand $out_nuc_file $out_prot_file\n-\n-Takes ten command line options, input sequence filename, format, genetic\n-code, CDS vs ORF, end type (open, closed), selection mode (all, top, one),\n-minimum length (in amino acids), strand (both, forward, reverse), output\n-nucleotide filename, and output protein filename.\n-\n For more details, see the help text and argument descriptions in the\n accompanying get_orfs_or_cdss.xml file which defines a Galaxy interface.\n \n@@ -24,65 +17,88 @@\n \n See accompanying text file for licence details (MIT licence).\n \n-This is version 0.0.3 of the script.\n+This is version 0.1.0 of the script.\n """\n import sys\n import re\n+from optparse import OptionParser\n \n-if "-v" in sys.argv or "--version" in sys.argv:\n-    print "v0.0.3"\n-    sys.exit(0)\n-\n-def stop_err(msg, err=1):\n+def sys_exit(msg, err=1):\n     sys.stderr.write(msg.rstrip() + "\\n")\n     sys.exit(err)\n \n+usage = """Use as follows:\n+\n+$ python get_orfs_or_cdss.py -i genome.fa -f fasta --table 11 -t CDS -e open -m all -s both --on cds.nuc.fa --op cds.protein.fa --ob cds.bed\n+"""\n+\n try:\n     from Bio.Seq import Seq, reverse_complement, translate\n     from Bio.SeqRecord import SeqRecord\n     from Bio import SeqIO\n     from Bio.Data import CodonTable\n except ImportError:\n-    stop_err("Missing Biopython library")\n+    sys_exit("Missing Biopython library")\n+\n \n-#Parse Command Line\n-try:\n-    input_file, seq_format, table, ftype, ends, mode, min_len, strand, out_nuc_file, out_prot_file = sys.argv[1:]\n-except ValueError:\n-    stop_err("Expected ten arguments, got %i:\\n%s" % (len(sys.argv)-1, " ".join(sys.argv)))\n+parser = OptionParser(usage=usage)\n+parser.add_option(\'-i\', \'--input\', dest=\'input_file\',\n+                  default=None, help=\'Input fasta file\',\n+                  metavar=\'FILE\')\n+parser.add_option(\'-f\', \'--format\', dest=\'seq_format\',\n+                  default=\'fasta\', help=\'Sequence format (e.g. fasta, fastq, sff)\')\n+parser.add_option(\'--table\', dest=\'table\',\n+                  default=1, help=\'NCBI Translation table\', type=\'int\')\n+parser.add_option(\'-t\', \'--ftype\', dest=\'ftype\', type=\'choice\',\n+                  choices=[\'CDS\', \'ORF\'], default=\'ORF\',\n+                  help=\'Find ORF or CDSs\')\n+parser.add_option(\'-e\', \'--ends\', dest=\'ends\', type=\'choice\',\n+                  choices=[\'open\', \'closed\'], default=\'closed\',\n+                  help=\'Open or closed. Closed ensures start/stop codons are present\')\n+parser.add_option(\'-m\', \'--mode\', dest=\'mode\', type=\'choice\',\n+                  choices=[\'all\', \'top\', \'one\'], default=\'all\',\n+                  help=\'Output all ORFs/CDSs from sequence, all ORFs/CDSs \'\n+                  \'with max length, or first with maximum length\')\n+parser.add_option(\'--min_len\', dest=\'min_len\',\n+                  default=10, help=\'Minimum ORF/CDS length\', type=\'int\')\n+parser.add_option(\'-s\', \'--strand\', dest=\'strand\', type=\'choice\',\n+                  choices=[\'forward\', \'reverse\', \'both\'], default=\'both\',\n+                  help=\'Strand to search for features on\')\n+parser.add_option(\'--on\', dest=\'out_nuc_file\',\n+                  default=None, help=\'Output nucleotide sequences, or - for STDOUT\',\n+                  metavar=\'FILE\')\n+parser.add_option(\'--op\', dest=\'out_prot_file\',\n+                  default=None, help=\'Output protein sequences, or - for STDOUT\',\n+                  metavar=\'FILE\')\n+parser.add_option(\'--ob\', dest=\'out_bed_file\',\n+                  default=None, help=\'Output BED file, or - for STDOUT\',\n+                  metavar=\'FILE\')\n+parser.add_option(\'-v\', \'--version\', dest=\'version\',\n+                  default=False, action=\'store_true\',\n+                  help=\'Show version and quit\')\n \n-try:\n-    table = int(table)\n-except ValueError:\n-    stop_err("Expected integer for genetic code table, got %s" % table)\n+options, args = pa'..b's.ftype=="CDS":\n             offset, n, t = start_chop_and_trans(n)\n         else:\n             offset = 0\n-            t = translate(n, table, to_stop=True)\n-        if n and len(t) >= min_len:\n+            t = translate(n, options.table, to_stop=True)\n+        if n and len(t) >= options.min_len:\n             yield start + offset, n, t\n         start = index\n-    if ends == "open":\n+    if options.ends == "open":\n         #No stop codon, Biopython\'s strict CDS translate will fail\n         n = s[start:]\n         #Ensure we have whole codons\n@@ -138,14 +154,14 @@\n             n = n[:-1]\n         if len(n) % 3:\n             n = n[:-1]\n-        if ftype=="CDS":\n+        if options.ftype=="CDS":\n             offset, n, t = start_chop_and_trans(n, strict=False)\n         else:\n             offset = 0\n-            t = translate(n, table, to_stop=True)\n-        if n and len(t) >= min_len:\n+            t = translate(n, options.table, to_stop=True)\n+        if n and len(t) >= options.min_len:\n             yield start + offset, n, t\n-                        \n+\n \n def get_all_peptides(nuc_seq):\n     """Returns start, end, strand, nucleotides, protein.\n@@ -156,12 +172,12 @@\n     #rather than making a list and sorting?\n     answer = []\n     full_len = len(nuc_seq)\n-    if strand != "reverse":\n+    if options.strand != "reverse":\n         for frame in range(0,3):\n             for offset, n, t in break_up_frame(nuc_seq[frame:]):\n                 start = frame + offset #zero based\n                 answer.append((start, start + len(n), +1, n, t))\n-    if strand != "forward":\n+    if options.strand != "forward":\n         rc = reverse_complement(nuc_seq)\n         for frame in range(0,3) :\n             for offset, n, t in break_up_frame(rc[frame:]):\n@@ -187,24 +203,31 @@\n         raise StopIteration\n     yield values[0]\n \n-if mode == "all":\n+if options.mode == "all":\n     get_peptides = get_all_peptides\n-elif mode == "top":\n+elif options.mode == "top":\n     get_peptides = get_top_peptides\n-elif mode == "one":\n+elif options.mode == "one":\n     get_peptides = get_one_peptide\n \n in_count = 0\n out_count = 0\n-if out_nuc_file == "-":\n+if options.out_nuc_file == "-":\n     out_nuc = sys.stdout\n else:\n-    out_nuc = open(out_nuc_file, "w")\n-if out_prot_file == "-":\n+    out_nuc = open(options.out_nuc_file, "w")\n+\n+if options.out_prot_file == "-":\n     out_prot = sys.stdout\n else:\n-    out_prot = open(out_prot_file, "w")\n-for record in SeqIO.parse(input_file, seq_format):\n+    out_prot = open(options.out_prot_file, "w")\n+\n+if options.out_bed_file == "-":\n+    out_bed = sys.stdout\n+else:\n+    out_bed = open(options.out_bed_file, "w")\n+\n+for record in SeqIO.parse(options.input_file, seq_format):\n     for i, (f_start, f_end, f_strand, n, t) in enumerate(get_peptides(str(record.seq).upper())):\n         out_count += 1\n         if f_strand == +1:\n@@ -213,14 +236,18 @@\n             loc = "complement(%i..%i)" % (f_start+1, f_end)\n         descr = "length %i aa, %i bp, from %s of %s" \\\n                 % (len(t), len(n), loc, record.description)\n-        r = SeqRecord(Seq(n), id = record.id + "|%s%i" % (ftype, i+1), name = "", description= descr)\n-        t = SeqRecord(Seq(t), id = record.id + "|%s%i" % (ftype, i+1), name = "", description= descr)\n+        fid = record.id + "|%s%i" % (options.ftype, i+1)\n+        r = SeqRecord(Seq(n), id = fid, name = "", description= descr)\n+        t = SeqRecord(Seq(t), id = fid, name = "", description= descr)\n         SeqIO.write(r, out_nuc, "fasta")\n         SeqIO.write(t, out_prot, "fasta")\n+        out_bed.write(\'\\t\'.join(map(str,[record.id, f_start, f_end, fid, 0, \'+\' if f_strand == +1 else \'-\'])) + \'\\n\')\n     in_count += 1\n if out_nuc is not sys.stdout:\n     out_nuc.close()\n if out_prot is not sys.stdout:\n     out_prot.close()\n+if out_bed is not sys.stdout:\n+    out_bed.close()\n \n-print "Found %i %ss in %i sequences" % (out_count, ftype, in_count)\n+print "Found %i %ss in %i sequences" % (out_count, options.ftype, in_count)\n'
b
diff -r 72bc0335b792 -r 65d76ca44cd2 tools/get_orfs_or_cdss/get_orfs_or_cdss.xml
--- a/tools/get_orfs_or_cdss/get_orfs_or_cdss.xml Fri Nov 28 11:45:37 2014 -0500
+++ b/tools/get_orfs_or_cdss/get_orfs_or_cdss.xml Mon Apr 06 05:21:48 2015 -0400
b
@@ -1,12 +1,12 @@
-<tool id="get_orfs_or_cdss" name="Get open reading frames (ORFs) or coding sequences (CDSs)" version="0.0.7">
+<tool id="get_orfs_or_cdss" name="Get open reading frames (ORFs) or coding sequences (CDSs)" version="0.1.0">
     <description>e.g. to get peptides from ESTs</description>
     <requirements>
-        <requirement type="package" version="1.62">biopython</requirement>
+        <requirement type="package" version="1.65">biopython</requirement>
         <requirement type="python-module">Bio</requirement>
     </requirements>
     <version_command interpreter="python">get_orfs_or_cdss.py --version</version_command>
     <command interpreter="python">
-get_orfs_or_cdss.py $input_file $input_file.ext $table $ftype $ends $mode $min_len $strand $out_nuc_file $out_prot_file
+get_orfs_or_cdss.py -i $input_file -f $input_file.ext --table $table -t $ftype -e $ends -m $mode --min_len $min_len -s $strand --on $out_nuc_file --op $out_prot_file --ob $out_bed_file
     </command>
     <stdio>
         <!-- Anything other than zero is an error -->
@@ -33,6 +33,7 @@
             <option value="21">21. Trematode Mitochondrial</option>
             <option value="22">22. Scenedesmus obliquus</option>
             <option value="23">23. Thraustochytrium Mitochondrial</option>
+            <option value="24">24. Pterobranchia Mitochondrial</option>
         </param>
         <param name="ftype" type="select" value="True" label="Look for ORFs or CDSs">
             <option value="ORF">Look for ORFs (check for stop codons only, ignore start codons)</option>
@@ -49,7 +50,7 @@
             <option value="one">First ORF/CDS from each sequence with the maximum length</option>
         </param>
         <param name="min_len" type="integer" size="5" value="30" label="Minimum length ORF/CDS (in amino acids, e.g. 30 aa = 90 bp plus any stop codon)" />
-        <param name="strand" type="select" label="Strand to search" help="Use the forward only option if your sequence directionality is known (e.g. from poly-A tails, or strand specific RNA sequencing.">
+        <param name="strand" type="select" label="Strand to search" help="Use the forward only option if your sequence directionality is known (e.g. from poly-A tails, or strand specific RNA sequencing).">
             <option value="both">Search both the forward and reverse strand</option>
             <option value="forward">Only search the forward strand</option>
             <option value="reverse">Only search the reverse strand</option>
@@ -58,6 +59,7 @@
     <outputs>
         <data name="out_nuc_file" format="fasta" label="${ftype.value}s (nucleotides)" />
         <data name="out_prot_file" format="fasta" label="${ftype.value}s (amino acids)" />
+        <data name="out_bed_file" format="bed6" label="${ftype.value}s (bed)" />
     </outputs>
     <tests>
         <test>
@@ -70,6 +72,7 @@
             <param name="strand" value="forward" />
             <output name="out_nuc_file" file="get_orf_input.t1_nuc_out.fasta" />
             <output name="out_prot_file" file="get_orf_input.t1_prot_out.fasta" />
+            <output name="out_bed_file" file="get_orf_input.t1_bed_out.bed" />
         </test>
         <test>
             <param name="input_file" value="get_orf_input.fasta" />
@@ -80,7 +83,8 @@
             <param name="min_len" value="10" />
             <param name="strand" value="forward" />
             <output name="out_nuc_file" file="get_orf_input.t11_nuc_out.fasta" />
-            <output    name="out_prot_file" file="get_orf_input.t11_prot_out.fasta" />
+            <output name="out_prot_file" file="get_orf_input.t11_prot_out.fasta" />
+            <output name="out_bed_file" file="get_orf_input.t11_bed_out.bed" />
         </test>
         <test>
             <param name="input_file" value="get_orf_input.fasta" />
@@ -92,6 +96,7 @@
             <param name="strand" value="forward" />
             <output name="out_nuc_file" file="get_orf_input.t11_open_nuc_out.fasta" />
             <output name="out_prot_file" file="get_orf_input.t11_open_prot_out.fasta" />
+            <output name="out_bed_file" file="get_orf_input.t11_open_bed_out.bed" />
         </test>
         <test>
             <param name="input_file" value="Ssuis.fasta" />
@@ -103,6 +108,7 @@
             <param name="strand" value="both" />
             <output name="out_nuc_file" file="get_orf_input.Suis_ORF.nuc.fasta" />
             <output name="out_prot_file" file="get_orf_input.Suis_ORF.prot.fasta" />
+            <output name="out_bed_file" file="get_orf_input.Suis_ORF.bed" />
         </test>
     </tests>
     <help>
b
diff -r 72bc0335b792 -r 65d76ca44cd2 tools/get_orfs_or_cdss/tool_dependencies.xml
--- a/tools/get_orfs_or_cdss/tool_dependencies.xml Fri Nov 28 11:45:37 2014 -0500
+++ b/tools/get_orfs_or_cdss/tool_dependencies.xml Mon Apr 06 05:21:48 2015 -0400
b
@@ -1,6 +1,6 @@
 <?xml version="1.0"?>
 <tool_dependency>
-    <package name="biopython" version="1.62">
-        <repository changeset_revision="ac9cc2992b69" name="package_biopython_1_62" owner="biopython" toolshed="https://testtoolshed.g2.bx.psu.edu" />
+    <package name="biopython" version="1.65">
+        <repository changeset_revision="f8d72690eeae" name="package_biopython_1_65" owner="biopython" toolshed="https://testtoolshed.g2.bx.psu.edu" />
     </package>
 </tool_dependency>