| Previous changeset 13:72bc0335b792 (2014-11-28) Next changeset 15:2eb9e86f5f35 (2015-05-12) |
|
Commit message:
Uploaded v0.1.0, adds BED output etc |
|
modified:
tools/get_orfs_or_cdss/README.rst tools/get_orfs_or_cdss/get_orfs_or_cdss.py tools/get_orfs_or_cdss/get_orfs_or_cdss.xml tools/get_orfs_or_cdss/tool_dependencies.xml |
|
added:
test-data/get_orf_input.Suis_ORF.bed test-data/get_orf_input.Suis_ORF.prot.pair_sample_C10.fasta test-data/get_orf_input.Suis_ORF.prot.sample_C10.fasta test-data/get_orf_input.t11_bed_out.bed test-data/get_orf_input.t11_open_bed_out.bed test-data/get_orf_input.t1_bed_out.bed |
| b |
| diff -r 72bc0335b792 -r 65d76ca44cd2 test-data/get_orf_input.Suis_ORF.bed --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/get_orf_input.Suis_ORF.bed Mon Apr 06 05:21:48 2015 -0400 |
| b |
| b'@@ -0,0 +1,2910 @@\n+Streptococcus_suis\t0\t1374\tStreptococcus_suis|ORF1\t0\t+\n+Streptococcus_suis\t1506\t2664\tStreptococcus_suis|ORF2\t0\t+\n+Streptococcus_suis\t1706\t2021\tStreptococcus_suis|ORF3\t0\t-\n+Streptococcus_suis\t2755\t3637\tStreptococcus_suis|ORF4\t0\t+\n+Streptococcus_suis\t3932\t4313\tStreptococcus_suis|ORF5\t0\t+\n+Streptococcus_suis\t4380\t5514\tStreptococcus_suis|ORF6\t0\t+\n+Streptococcus_suis\t4449\t4797\tStreptococcus_suis|ORF7\t0\t-\n+Streptococcus_suis\t4490\t4838\tStreptococcus_suis|ORF8\t0\t-\n+Streptococcus_suis\t5662\t6241\tStreptococcus_suis|ORF9\t0\t+\n+Streptococcus_suis\t6234\t9735\tStreptococcus_suis|ORF10\t0\t+\n+Streptococcus_suis\t6973\t7351\tStreptococcus_suis|ORF11\t0\t-\n+Streptococcus_suis\t8270\t8624\tStreptococcus_suis|ORF12\t0\t-\n+Streptococcus_suis\t10037\t10412\tStreptococcus_suis|ORF13\t0\t+\n+Streptococcus_suis\t10522\t11815\tStreptococcus_suis|ORF14\t0\t+\n+Streptococcus_suis\t11815\t13084\tStreptococcus_suis|ORF15\t0\t+\n+Streptococcus_suis\t13076\t13634\tStreptococcus_suis|ORF16\t0\t+\n+Streptococcus_suis\t13634\t15629\tStreptococcus_suis|ORF17\t0\t+\n+Streptococcus_suis\t13834\t14764\tStreptococcus_suis|ORF18\t0\t-\n+Streptococcus_suis\t15963\t16437\tStreptococcus_suis|ORF19\t0\t+\n+Streptococcus_suis\t17316\t17886\tStreptococcus_suis|ORF20\t0\t-\n+Streptococcus_suis\t19641\t19953\tStreptococcus_suis|ORF21\t0\t-\n+Streptococcus_suis\t20790\t21111\tStreptococcus_suis|ORF22\t0\t-\n+Streptococcus_suis\t21319\t21649\tStreptococcus_suis|ORF23\t0\t+\n+Streptococcus_suis\t23559\t24405\tStreptococcus_suis|ORF24\t0\t+\n+Streptococcus_suis\t23777\t24143\tStreptococcus_suis|ORF25\t0\t-\n+Streptococcus_suis\t24385\t24910\tStreptococcus_suis|ORF26\t0\t+\n+Streptococcus_suis\t24910\t26251\tStreptococcus_suis|ORF27\t0\t+\n+Streptococcus_suis\t26344\t27322\tStreptococcus_suis|ORF28\t0\t+\n+Streptococcus_suis\t27399\t28587\tStreptococcus_suis|ORF29\t0\t+\n+Streptococcus_suis\t28218\t28563\tStreptococcus_suis|ORF30\t0\t-\n+Streptococcus_suis\t28567\t29356\tStreptococcus_suis|ORF31\t0\t+\n+Streptococcus_suis\t28603\t28942\tStreptococcus_suis|ORF32\t0\t-\n+Streptococcus_suis\t29343\t30360\tStreptococcus_suis|ORF33\t0\t+\n+Streptococcus_suis\t30691\t31426\tStreptococcus_suis|ORF34\t0\t+\n+Streptococcus_suis\t31426\t35158\tStreptococcus_suis|ORF35\t0\t+\n+Streptococcus_suis\t32643\t33138\tStreptococcus_suis|ORF36\t0\t-\n+Streptococcus_suis\t34368\t34755\tStreptococcus_suis|ORF37\t0\t-\n+Streptococcus_suis\t34816\t35128\tStreptococcus_suis|ORF38\t0\t-\n+Streptococcus_suis\t35016\t36615\tStreptococcus_suis|ORF39\t0\t+\n+Streptococcus_suis\t36640\t37693\tStreptococcus_suis|ORF40\t0\t+\n+Streptococcus_suis\t37333\t37738\tStreptococcus_suis|ORF41\t0\t-\n+Streptococcus_suis\t37665\t38241\tStreptococcus_suis|ORF42\t0\t+\n+Streptococcus_suis\t38198\t38555\tStreptococcus_suis|ORF43\t0\t-\n+Streptococcus_suis\t38241\t39798\tStreptococcus_suis|ORF44\t0\t+\n+Streptococcus_suis\t38277\t38709\tStreptococcus_suis|ORF45\t0\t-\n+Streptococcus_suis\t39303\t39609\tStreptococcus_suis|ORF46\t0\t-\n+Streptococcus_suis\t39437\t39809\tStreptococcus_suis|ORF47\t0\t-\n+Streptococcus_suis\t39892\t40318\tStreptococcus_suis|ORF48\t0\t-\n+Streptococcus_suis\t39920\t41186\tStreptococcus_suis|ORF49\t0\t+\n+Streptococcus_suis\t40224\t40698\tStreptococcus_suis|ORF50\t0\t+\n+Streptococcus_suis\t40229\t40925\tStreptococcus_suis|ORF51\t0\t-\n+Streptococcus_suis\t41070\t41451\tStreptococcus_suis|ORF52\t0\t-\n+Streptococcus_suis\t41193\t41700\tStreptococcus_suis|ORF53\t0\t+\n+Streptococcus_suis\t41309\t41615\tStreptococcus_suis|ORF54\t0\t+\n+Streptococcus_suis\t41683\t42766\tStreptococcus_suis|ORF55\t0\t+\n+Streptococcus_suis\t41692\t42064\tStreptococcus_suis|ORF56\t0\t-\n+Streptococcus_suis\t42188\t42569\tStreptococcus_suis|ORF57\t0\t-\n+Streptococcus_suis\t42794\t43571\tStreptococcus_suis|ORF58\t0\t+\n+Streptococcus_suis\t43041\t43365\tStreptococcus_suis|ORF59\t0\t-\n+Streptococcus_suis\t43447\t43894\tStreptococcus_suis|ORF60\t0\t-\n+Streptococcus_suis\t43619\t44870\tStreptococcus_suis|ORF61\t0\t+\n+Streptococcus_suis\t44859\t46164\tStreptococcus_suis|ORF62\t0\t+\n+Streptococcus_suis\t45179\t45524\tStreptococcus_suis|ORF63\t0\t-\n+Streptococcus_suis\t47040\t47829\tStreptococcus_suis|ORF64\t0\t+\n+Streptococcus_suis\t47829\t48417\tStreptococcus_suis|ORF65\t0\t+\n+Streptococcus_suis\t48296\t48860\tStreptococcus_suis|'..b'956142\t1956526\tStreptococcus_suis|ORF2851\t0\t+\n+Streptococcus_suis\t1956835\t1958599\tStreptococcus_suis|ORF2852\t0\t-\n+Streptococcus_suis\t1957087\t1957456\tStreptococcus_suis|ORF2853\t0\t+\n+Streptococcus_suis\t1958210\t1958606\tStreptococcus_suis|ORF2854\t0\t+\n+Streptococcus_suis\t1958681\t1959143\tStreptococcus_suis|ORF2855\t0\t-\n+Streptococcus_suis\t1959143\t1960055\tStreptococcus_suis|ORF2856\t0\t-\n+Streptococcus_suis\t1959625\t1959934\tStreptococcus_suis|ORF2857\t0\t+\n+Streptococcus_suis\t1960124\t1961171\tStreptococcus_suis|ORF2858\t0\t-\n+Streptococcus_suis\t1961182\t1963597\tStreptococcus_suis|ORF2859\t0\t-\n+Streptococcus_suis\t1963905\t1964370\tStreptococcus_suis|ORF2860\t0\t-\n+Streptococcus_suis\t1963943\t1964489\tStreptococcus_suis|ORF2861\t0\t-\n+Streptococcus_suis\t1964461\t1967398\tStreptococcus_suis|ORF2862\t0\t-\n+Streptococcus_suis\t1967597\t1968323\tStreptococcus_suis|ORF2863\t0\t+\n+Streptococcus_suis\t1968313\t1969627\tStreptococcus_suis|ORF2864\t0\t+\n+Streptococcus_suis\t1969665\t1971015\tStreptococcus_suis|ORF2865\t0\t-\n+Streptococcus_suis\t1971097\t1972630\tStreptococcus_suis|ORF2866\t0\t-\n+Streptococcus_suis\t1972747\t1973206\tStreptococcus_suis|ORF2867\t0\t-\n+Streptococcus_suis\t1973262\t1973877\tStreptococcus_suis|ORF2868\t0\t-\n+Streptococcus_suis\t1974226\t1974649\tStreptococcus_suis|ORF2869\t0\t+\n+Streptococcus_suis\t1974436\t1975804\tStreptococcus_suis|ORF2870\t0\t-\n+Streptococcus_suis\t1974991\t1975387\tStreptococcus_suis|ORF2871\t0\t+\n+Streptococcus_suis\t1975823\t1976297\tStreptococcus_suis|ORF2872\t0\t-\n+Streptococcus_suis\t1976286\t1978269\tStreptococcus_suis|ORF2873\t0\t-\n+Streptococcus_suis\t1978354\t1980301\tStreptococcus_suis|ORF2874\t0\t-\n+Streptococcus_suis\t1978597\t1979140\tStreptococcus_suis|ORF2875\t0\t+\n+Streptococcus_suis\t1979950\t1980460\tStreptococcus_suis|ORF2876\t0\t+\n+Streptococcus_suis\t1980267\t1980729\tStreptococcus_suis|ORF2877\t0\t-\n+Streptococcus_suis\t1980718\t1981180\tStreptococcus_suis|ORF2878\t0\t-\n+Streptococcus_suis\t1980808\t1981186\tStreptococcus_suis|ORF2879\t0\t+\n+Streptococcus_suis\t1981166\t1981703\tStreptococcus_suis|ORF2880\t0\t-\n+Streptococcus_suis\t1981512\t1981878\tStreptococcus_suis|ORF2881\t0\t+\n+Streptococcus_suis\t1981760\t1982099\tStreptococcus_suis|ORF2882\t0\t-\n+Streptococcus_suis\t1982179\t1982602\tStreptococcus_suis|ORF2883\t0\t+\n+Streptococcus_suis\t1982325\t1982631\tStreptococcus_suis|ORF2884\t0\t-\n+Streptococcus_suis\t1982644\t1983916\tStreptococcus_suis|ORF2885\t0\t-\n+Streptococcus_suis\t1983950\t1984655\tStreptococcus_suis|ORF2886\t0\t+\n+Streptococcus_suis\t1983992\t1984349\tStreptococcus_suis|ORF2887\t0\t-\n+Streptococcus_suis\t1984655\t1985540\tStreptococcus_suis|ORF2888\t0\t+\n+Streptococcus_suis\t1985996\t1986602\tStreptococcus_suis|ORF2889\t0\t-\n+Streptococcus_suis\t1986682\t1987480\tStreptococcus_suis|ORF2890\t0\t-\n+Streptococcus_suis\t1987469\t1988357\tStreptococcus_suis|ORF2891\t0\t-\n+Streptococcus_suis\t1988287\t1989124\tStreptococcus_suis|ORF2892\t0\t-\n+Streptococcus_suis\t1989120\t1989669\tStreptococcus_suis|ORF2893\t0\t-\n+Streptococcus_suis\t1989669\t1990539\tStreptococcus_suis|ORF2894\t0\t-\n+Streptococcus_suis\t1990608\t1991928\tStreptococcus_suis|ORF2895\t0\t-\n+Streptococcus_suis\t1991888\t1993154\tStreptococcus_suis|ORF2896\t0\t-\n+Streptococcus_suis\t1993212\t1993599\tStreptococcus_suis|ORF2897\t0\t+\n+Streptococcus_suis\t1993568\t1994696\tStreptococcus_suis|ORF2898\t0\t+\n+Streptococcus_suis\t1994967\t1996464\tStreptococcus_suis|ORF2899\t0\t-\n+Streptococcus_suis\t1996680\t1997781\tStreptococcus_suis|ORF2900\t0\t-\n+Streptococcus_suis\t1998012\t1998933\tStreptococcus_suis|ORF2901\t0\t+\n+Streptococcus_suis\t1998922\t2000620\tStreptococcus_suis|ORF2902\t0\t+\n+Streptococcus_suis\t1999704\t2000052\tStreptococcus_suis|ORF2903\t0\t-\n+Streptococcus_suis\t1999973\t2000306\tStreptococcus_suis|ORF2904\t0\t+\n+Streptococcus_suis\t2000501\t2000855\tStreptococcus_suis|ORF2905\t0\t+\n+Streptococcus_suis\t2000887\t2003506\tStreptococcus_suis|ORF2906\t0\t+\n+Streptococcus_suis\t2003906\t2004614\tStreptococcus_suis|ORF2907\t0\t-\n+Streptococcus_suis\t2004614\t2005157\tStreptococcus_suis|ORF2908\t0\t-\n+Streptococcus_suis\t2005222\t2006464\tStreptococcus_suis|ORF2909\t0\t+\n+Streptococcus_suis\t2006518\t2007289\tStreptococcus_suis|ORF2910\t0\t+\n' |
| b |
| diff -r 72bc0335b792 -r 65d76ca44cd2 test-data/get_orf_input.Suis_ORF.prot.pair_sample_C10.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/get_orf_input.Suis_ORF.prot.pair_sample_C10.fasta Mon Apr 06 05:21:48 2015 -0400 |
| b |
| @@ -0,0 +1,119 @@ +>Streptococcus_suis|ORF1 length 457 aa, 1374 bp, from 1..1374 of Streptococcus_suis +MNQEQLFWQRFIELAKVNFKPSIYDFYVADAKLLGINQQVANIFLNRPFKKDFWEKNFEE +LMIAASFESYGEPLTIQYQFTEDEQEIRNTTNTRSSIVHQVQTLEPATPQETFKPVHSDI +KSQYTFANFVQGDNNHWAKAAALAVSDNLGELYNPLFIFGGPGLGKTHILNAIGNKVLAD +NPQARIKYVSSETFINEFLEHLRLNDMESFKKTYRNLDLLLIDDIQSLRNKATTQEEFFH +TFNALHEKNKQIVLTSDRNPDHLDNLEERLVTRFKWGLTSEITPPDFETRIAILRNKCEN +LPYNFTNETLSYLAGQFDSNVRDLEGALKDIHLIATMRQLSEISVEVAAEAIRSRKQTNP +QNMVIPIEKIQTEVGNFYGVSLKELKGSKRVQHIVHARQVAMFLAREMTDNSLPKIGKEF +GNRDHTTVMHAYNKIKTLLLDDENLEIEITSIKNKLR +>Streptococcus_suis|ORF2 length 385 aa, 1158 bp, from 1507..2664 of Streptococcus_suis +IINKGESMIQFSINKNIFLQALSITKRAISTKNAIPILSTVKITVTSEGITLTGSNGQIS +IEHFISIQDENAGLLISSPGSILLEAGFFINVVSSMPDLVLDFNEIEQKQIVLTSGKSEI +TLKGKEAEQYPRLQEVPTSKPLVLETKVLKQTINETAFAASTQESRPILTGVHFVLTENK +NLKTVATDSHRMSQRKLVLDTSGDDFNVVIPSRSLREFTAVFTDDIETVEVFFSNNQILF +RSEHISFYTRLLEGTYPDTDRLIPTEFKTTAIFDTANLRHSMERARLLSNATQNGTVKLE +IANNVVSAHVNSPEVGRVNEELDTVEVSGEDLVISFNPTYLIEALKATTSEQVKISFISS +VRPFTLIPNNEGEDFIQLVTPVRTN +>Streptococcus_suis|ORF291 length 760 aa, 2283 bp, from complement(184307..186589) of Streptococcus_suis +KRGEFMRFNQFSFIKKETSVYLQELDTLGFQLIPDASSKTNLETFVRKCHFLTANTDFAL +SNMIAEWDTDLLTFFQSDRELTDQIFYQVAFQLLGFVPGMDYTDVMDFVEKSNFPIVYGD +IIDNLYQLLNTRTKSGNTLIDQLVSDDLIPEDNHYHFFNGKSMATFSTKNLIREVVYVET +PVDTAGTGQTDIVKLSILRPHFDGKIPAVITNSPYHQGVNDVASDKALHKMEGELAEKQV +GTIQVKQASITKLDLDQRNLPVSPATEKLGHITSYSLNDYFLARGFASLHVSGVGTLGST +GYMTSGDYQQVEGYKAVIDWLNGRTKAYTDHTRSLEVKADWANGKVATTGLSYLGTMSNA +LATTGVDGLEVIIAEAGISSWYDYYRENGLVTSPGGYPGEDLDSLTALTYSKSLQAGDFL +RNKAAYEKGLAAERAALDRTSGDYNQYWHDRNYLLHADRVKCEVVFTHGSQDWNVKPIHV +WNMFHALPSHIKKHLFFHNGAHVYMNNWQSIDFRESMNALLSQKLLGYENNYQLPTVIWQ +DNSGEQTWTTLDTFGGENETVLPLGTGSQTVANQYTQEDFERYGKSYSAFHQDLYAGKAN +QISIELPVTEGLLLNGQVTLKLRVASSVAKGLLSAQLLDKGNKKRLAPIPAPKARLSLDN +GRYHAQENLVELPYVEMPQRLVTKGFMNLQNRTDLMTVEEVVPGQWMNLTWKLQPTIYQL +KKGDVLELILYTTDFECTVRDNSQWQIHLDLSQSQLILPH +>Streptococcus_suis|ORF292 length 216 aa, 651 bp, from 185183..185833 of Streptococcus_suis +AVGKDHLTLDPISVEQIIAVMPVLIVVTAGAVQGSTLGSQSFFVGCFIAEEVTCLQTLGV +GQGGQAVQIFAWIATRAGHQPVFTVVVIPRGNPCFCDDDFQSVHASCCQGIGHGTEIRQS +RRRYLTIGPIGLDLKRASVVCVGLGATVQPVNHRFIALHLLVVARCHVARRAQRANTRHM +EAGKAASEEVVIEGVRSNVPQFFSSRADRQVPLVQV +>Streptococcus_suis|ORF583 length 391 aa, 1176 bp, from 397805..398980 of Streptococcus_suis +RKKMKKQFELIATAAAGLEAVVGREIRNLGYECQVENGRVRFQGDVKSIIETNIWLRSAD +RIKIIVGQFPAKTFEELFQGVFNLDWENYLPLGCKFPISKAKCVKSKLHNEPSVQAISKK +AVVKKLQKHFSRPEGVPLQEMGAEFKIEVSILKDVATVMIDTTGSSLFKRGYRVEKGGAP +IKENMAAAILQLSNWYPDKPLIDPTCGSGTFCIEAAMLAKNIAPGLKRSFAFEEWPWVED +QLVVALRKEAQASIKTDLVLDITGSDIDARMIEIAKKNAFAAGVEQDIVFKQMRVQDLRT +DKINGVIISNPPYGERLLDDEAIVTLYREMGETFEPLKTWSKFILTSDELFETRFGQQAD +KKRKLYNGTLKVDLYQFFGQRVKRQVQEVQG +>Streptococcus_suis|ORF584 length 487 aa, 1464 bp, from 398981..400444 of Streptococcus_suis +EDIVGEKNSHHLPLDEEKVLDFEVAKDLTIEEAVKKHKEIEAGVTEDDGLLDRYIKQHRA +EIESQKFETKINHLPLVEVADEEKNQGHESAEEVEANESSLTEVSEEIAPIVEELSVTPM +ETLEETVIASTVAMEGLSSVADDSSLELEEDETEDLDHSEGADRDQKKKFYFWSAVGLSM +IGVMATALVWMNSVNKSNTATSSSSTSTSQTSSTASSSTDANVTAFEQLYNSFFTDSSLT +KLKNSEFGKLAELKVLLEKLDKNSDSYTKAKEQYDHLEKAIAAIQAINGQFDKEVVVNGE +IDTTATVKSGESLSATTTGISAVDSLLASVVNFGRSQQEVASATVASEAAVTRNQGADET +VSTGVPATTEVASTTVSGSTTDFGIAVPAGVVLQRDRSRVPYNQAMIDDVNNEAWNFNPG +ILENIVTISQQRGYITGNQYILEKVNIINGNGYYNMFKPDGTYLFSINCKTGYFVGNGAG +HSDALDY +>Streptococcus_suis|ORF873 length 343 aa, 1032 bp, from 605439..606470 of Streptococcus_suis +TLGEETMTNVFKGRHFLAEKDFTRAELEWLIDFSAHLKDLKKRNIPHRYLEGKNIALLFE +KTSTRTRAAFTVASIDLGAHPEYLGANDIQLGKKESTEDTAKVLGRMFDGIEFRGFSQKM +VEELAEFSGVPVWNGLTDAWHPTQMLADYLTVKENFGKLEGLTLVYCGDGRNNVANSLLV +TGAILGVNVHIFSPKELFPEEEVVALAEGFAKESGARVLITDNADEAVKGADVLYTDVWV +SMGEEDKFAERVALLKPYQVNMELVKKAENENLIFLHCLPAFHDTNTVYGKDVAEKFGVE +EMEVTDEVFRSKYARHFDQAENRMHTIKAVMAATLGDPFVPRV +>Streptococcus_suis|ORF874 length 113 aa, 342 bp, from complement(605625..605966) of Streptococcus_suis +VSNIVTAITTVNQSQAFQLAKVFFDSQVVRQHLSWVPCICQTIPYWHTGEFCQFFHHFLT +ETTEFNTVEHTSQNFSSIFCRFFLTKLDVICTKIFWMGTKVNRCYCEGSTSTC +>Streptococcus_suis|ORF1165 length 105 aa, 318 bp, from 811613..811930 of Streptococcus_suis +AYNESVKRKECHLMKQVNMSKIINYLTILGLLILLSAFFLDNWIRDWFFPSSWGNVATML +ILPLLGALILILSIYYKKLWTGLISIFLIISFPLIFGIGYFIFGP +>Streptococcus_suis|ORF1166 length 125 aa, 378 bp, from 811867..812244 of Streptococcus_suis +YLLNNLISSDIRYWLLYIWPLEGVVMNLTLLKRLNLVLYGIAIFLFVMLFLPIGQWFDIV +NVNFKLTFFIIPFFGLASLPTAIYTKNVRQILLSVLLVALYFILFSLITALSGLFHLNFY +SFFFK +>Streptococcus_suis|ORF1455 length 114 aa, 345 bp, from 1026973..1027317 of Streptococcus_suis +SCKLSLHIRWESWMGQGFYCYRFKLIHLRTNSNPFSFFRHLNSHFQHLRNEWTVMLPDSV +LDQDISTSHCRCHHKGTRFDTILHHLMFCASQFFYTSNRNRLCTCPLNFCPHFV +>Streptococcus_suis|ORF1456 length 116 aa, 351 bp, from complement(1027944..1028294) of Streptococcus_suis +YGNACNSRPPTCDKSYSCWETLIYMGLNLVQFHFLISWYNGNMVISILQFFSHILFIYLA +HHLLVTTVDWSRWLKVTGDNQRKINLLILFLAIALGYLVSTFFLELLMMGRSFANM +>Streptococcus_suis|ORF1747 length 335 aa, 1008 bp, from complement(1225218..1226225) of Streptococcus_suis +RMLNTDDTVTIYDVAREAGVSMATVSRVVNGNKNVKENTRKKVLEVIDRLDYRPNAVARG +LASKKTTTVGVVIPNIANAYFATLAKGIDDIADMYKYNIVLANSDENDEKEINVVNTLFS +KQVDGIIFMGYHLTDKIRAEFSRSRTPIVLAGTVDLEHQLPSVNIDYAAASVDAVNLLAK +NNKKIAFVSGPLVDDINGKVRFAGYKQGLKDNGIEFNEGLVFESKYKYEEGYALAERILN +AGATAAYVAEDEIAAGLLNGVSDMGIKVPEDFEIITSDDSLVTKFTRPNLTSINQPLYDI +GAIAMRMLTKIMHKEELENREVVLNHGIKVRKSTK +>Streptococcus_suis|ORF1748 length 377 aa, 1134 bp, from 1226384..1227517 of Streptococcus_suis +TKISLFLPLHARKVSTMSKLHHVKSYLEANKMDLAIFSDPVSIYYLTGYHSDPHERHMML +FVMPDHDSLLFLPALDVERAVATVDFPVAGYMDSENPWQIIKSKLPQKSFSAICAEFDNL +NLTRYHGLQSIFSQPFSDITPLINTMKLIKSRDEIEKMLVAGEFADKAMQVGFNNISLDV +TETDIIAQIEFEMKKQGISKMSFETMVLTGDNAANPHGIPSTNKIENNALLLFDLGVEAL +GYTSDMTRTVAVGKPDQFKKDIYNLTLEAHMAAVNMIKPGVTAGEIDYAARSVIEKAGYG +EYFNHRLGHGLGMSVHEFPSIMEGNDLVIEEGMCFSVEPGIYIPGKVGVRIEDCGYVTKN +GFEVFTKTPKELLYFEG +>Streptococcus_suis|ORF2037 length 234 aa, 705 bp, from complement(1422380..1423084) of Streptococcus_suis +KSMTKTALITGVSSGIGLAQAGIFLENGWRVFGIDLASKPDLAGDFHFLQLDLTGDLSPV +FSWCQSVDVLCNTAGILDDYRPHLDISEDELAQIFAVNFFAVTRLTRPYLQQMVDRQSGI +IINMCSIASSLAGGGGSAYTASKHALAGFTKQLALDYAKDKVQIFGIAPGAVQTGMTQKD +FEPGGLADWVADQTPIGRWTQPSEIAELTFMLATGKLASMQGQIITIDGGWSLK +>Streptococcus_suis|ORF2038 length 112 aa, 339 bp, from 1422849..1423187 of Streptococcus_suis +SSKMPAVLQRTSTDWHQEKTGDKSPVRSSCRKWKSPAKSGLLARSIPKTRQPFSKKIPAC +ARPMPLETPVMRAVLVMDFYPVGRKDIARGRAPHGEAFTLAGHVDEEIGRRL +>Streptococcus_suis|ORF2329 length 160 aa, 483 bp, from 1612284..1612766 of Streptococcus_suis +LIETNWFHHLTGQEGLDVLFFHNLGFRITDQLYLEVRKFHLLQGLSQLLRRWSQESRVKG +ARYIERNHPLDTCFLQQFNRLIHCSHLASDDDLGWCVVVGWGNNPRGNSRTDFFNQVDIC +VENSNHLTSPCWRSQFHIFTTLSNQGNRIFKGQSSRCHQS +>Streptococcus_suis|ORF2330 length 329 aa, 990 bp, from complement(1613050..1614039) of Streptococcus_suis +ARKKDEGIMKTKITELLDIKYPIFQGGMAWVADGDLAGAVSNAGGLGIIGGGNAPKEVVK +ANIDKVKSITDKPFGVNIMLLSPFADDIVDLVIEEGVKVVTTGAGNPGKYMERLHAAGIT +VIPVVPSVALAKRMEKLGVDAVIAEGMEAGGHIGKLTTMTLVRQVVEAVSIPVIAAGGIA +DGAGAAAAFMLGAEAVQVGTRFVVATESNAHQAYKEKVLKAKDIDTTVSASIVGHPVRAI +KNKLSSAYAAAEKDFLAGKISADAIEELGAGALRNAVVDGDVTNGSVMAGQIAGLVSKEE +SCEDILKDIYYGAAKVIREEASRWASVGE +>Streptococcus_suis|ORF2619 length 107 aa, 324 bp, from 1802386..1802709 of Streptococcus_suis +QLCVGSNPINSLFRRNFFVCCISSQSSCYVHTMWFVGIIVEIIVARYIIIAMGNFQCVCP +CRRWSNVLNFRNDTIIQPHVFVLNIQTGVNDCNHHSATICLIFRTCF +>Streptococcus_suis|ORF2620 length 192 aa, 579 bp, from complement(1803558..1804136) of Streptococcus_suis +RLKIPCFQRKEVTMYDSFDKGWFVLQTYSGYENKVKENLLQRAHTYNMLENILRVEIPTQ +TVQVEKNGEVKEVEENRFPGYVLVEMVMTDEAWFVVRNTPNVTGFVGSHGNRSKPTPLLE +EEIRQILVSMGQTVQEFDIDVKVGDTVRIIDGAFTDYTGKITEIDNNKVKMVISMFGNDT +IAEVNLSQIAEL |
| b |
| diff -r 72bc0335b792 -r 65d76ca44cd2 test-data/get_orf_input.Suis_ORF.prot.sample_C10.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/get_orf_input.Suis_ORF.prot.sample_C10.fasta Mon Apr 06 05:21:48 2015 -0400 |
| b |
| @@ -0,0 +1,50 @@ +>Streptococcus_suis|ORF1 length 457 aa, 1374 bp, from 1..1374 of Streptococcus_suis +MNQEQLFWQRFIELAKVNFKPSIYDFYVADAKLLGINQQVANIFLNRPFKKDFWEKNFEE +LMIAASFESYGEPLTIQYQFTEDEQEIRNTTNTRSSIVHQVQTLEPATPQETFKPVHSDI +KSQYTFANFVQGDNNHWAKAAALAVSDNLGELYNPLFIFGGPGLGKTHILNAIGNKVLAD +NPQARIKYVSSETFINEFLEHLRLNDMESFKKTYRNLDLLLIDDIQSLRNKATTQEEFFH +TFNALHEKNKQIVLTSDRNPDHLDNLEERLVTRFKWGLTSEITPPDFETRIAILRNKCEN +LPYNFTNETLSYLAGQFDSNVRDLEGALKDIHLIATMRQLSEISVEVAAEAIRSRKQTNP +QNMVIPIEKIQTEVGNFYGVSLKELKGSKRVQHIVHARQVAMFLAREMTDNSLPKIGKEF +GNRDHTTVMHAYNKIKTLLLDDENLEIEITSIKNKLR +>Streptococcus_suis|ORF292 length 216 aa, 651 bp, from 185183..185833 of Streptococcus_suis +AVGKDHLTLDPISVEQIIAVMPVLIVVTAGAVQGSTLGSQSFFVGCFIAEEVTCLQTLGV +GQGGQAVQIFAWIATRAGHQPVFTVVVIPRGNPCFCDDDFQSVHASCCQGIGHGTEIRQS +RRRYLTIGPIGLDLKRASVVCVGLGATVQPVNHRFIALHLLVVARCHVARRAQRANTRHM +EAGKAASEEVVIEGVRSNVPQFFSSRADRQVPLVQV +>Streptococcus_suis|ORF583 length 391 aa, 1176 bp, from 397805..398980 of Streptococcus_suis +RKKMKKQFELIATAAAGLEAVVGREIRNLGYECQVENGRVRFQGDVKSIIETNIWLRSAD +RIKIIVGQFPAKTFEELFQGVFNLDWENYLPLGCKFPISKAKCVKSKLHNEPSVQAISKK +AVVKKLQKHFSRPEGVPLQEMGAEFKIEVSILKDVATVMIDTTGSSLFKRGYRVEKGGAP +IKENMAAAILQLSNWYPDKPLIDPTCGSGTFCIEAAMLAKNIAPGLKRSFAFEEWPWVED +QLVVALRKEAQASIKTDLVLDITGSDIDARMIEIAKKNAFAAGVEQDIVFKQMRVQDLRT +DKINGVIISNPPYGERLLDDEAIVTLYREMGETFEPLKTWSKFILTSDELFETRFGQQAD +KKRKLYNGTLKVDLYQFFGQRVKRQVQEVQG +>Streptococcus_suis|ORF874 length 113 aa, 342 bp, from complement(605625..605966) of Streptococcus_suis +VSNIVTAITTVNQSQAFQLAKVFFDSQVVRQHLSWVPCICQTIPYWHTGEFCQFFHHFLT +ETTEFNTVEHTSQNFSSIFCRFFLTKLDVICTKIFWMGTKVNRCYCEGSTSTC +>Streptococcus_suis|ORF1165 length 105 aa, 318 bp, from 811613..811930 of Streptococcus_suis +AYNESVKRKECHLMKQVNMSKIINYLTILGLLILLSAFFLDNWIRDWFFPSSWGNVATML +ILPLLGALILILSIYYKKLWTGLISIFLIISFPLIFGIGYFIFGP +>Streptococcus_suis|ORF1456 length 116 aa, 351 bp, from complement(1027944..1028294) of Streptococcus_suis +YGNACNSRPPTCDKSYSCWETLIYMGLNLVQFHFLISWYNGNMVISILQFFSHILFIYLA +HHLLVTTVDWSRWLKVTGDNQRKINLLILFLAIALGYLVSTFFLELLMMGRSFANM +>Streptococcus_suis|ORF1747 length 335 aa, 1008 bp, from complement(1225218..1226225) of Streptococcus_suis +RMLNTDDTVTIYDVAREAGVSMATVSRVVNGNKNVKENTRKKVLEVIDRLDYRPNAVARG +LASKKTTTVGVVIPNIANAYFATLAKGIDDIADMYKYNIVLANSDENDEKEINVVNTLFS +KQVDGIIFMGYHLTDKIRAEFSRSRTPIVLAGTVDLEHQLPSVNIDYAAASVDAVNLLAK +NNKKIAFVSGPLVDDINGKVRFAGYKQGLKDNGIEFNEGLVFESKYKYEEGYALAERILN +AGATAAYVAEDEIAAGLLNGVSDMGIKVPEDFEIITSDDSLVTKFTRPNLTSINQPLYDI +GAIAMRMLTKIMHKEELENREVVLNHGIKVRKSTK +>Streptococcus_suis|ORF2038 length 112 aa, 339 bp, from 1422849..1423187 of Streptococcus_suis +SSKMPAVLQRTSTDWHQEKTGDKSPVRSSCRKWKSPAKSGLLARSIPKTRQPFSKKIPAC +ARPMPLETPVMRAVLVMDFYPVGRKDIARGRAPHGEAFTLAGHVDEEIGRRL +>Streptococcus_suis|ORF2329 length 160 aa, 483 bp, from 1612284..1612766 of Streptococcus_suis +LIETNWFHHLTGQEGLDVLFFHNLGFRITDQLYLEVRKFHLLQGLSQLLRRWSQESRVKG +ARYIERNHPLDTCFLQQFNRLIHCSHLASDDDLGWCVVVGWGNNPRGNSRTDFFNQVDIC +VENSNHLTSPCWRSQFHIFTTLSNQGNRIFKGQSSRCHQS +>Streptococcus_suis|ORF2620 length 192 aa, 579 bp, from complement(1803558..1804136) of Streptococcus_suis +RLKIPCFQRKEVTMYDSFDKGWFVLQTYSGYENKVKENLLQRAHTYNMLENILRVEIPTQ +TVQVEKNGEVKEVEENRFPGYVLVEMVMTDEAWFVVRNTPNVTGFVGSHGNRSKPTPLLE +EEIRQILVSMGQTVQEFDIDVKVGDTVRIIDGAFTDYTGKITEIDNNKVKMVISMFGNDT +IAEVNLSQIAEL |
| b |
| diff -r 72bc0335b792 -r 65d76ca44cd2 test-data/get_orf_input.t11_bed_out.bed --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/get_orf_input.t11_bed_out.bed Mon Apr 06 05:21:48 2015 -0400 |
| b |
| @@ -0,0 +1,6 @@ +alpha 67 331 alpha|CDS1 0 + +alpha 71 326 alpha|CDS2 0 + +alpha 75 336 alpha|CDS3 0 + +beta 68 332 beta|CDS1 0 + +beta 72 327 beta|CDS2 0 + +beta 76 337 beta|CDS3 0 + |
| b |
| diff -r 72bc0335b792 -r 65d76ca44cd2 test-data/get_orf_input.t11_open_bed_out.bed --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/get_orf_input.t11_open_bed_out.bed Mon Apr 06 05:21:48 2015 -0400 |
| b |
| @@ -0,0 +1,7 @@ +alpha 67 331 alpha|CDS1 0 + +alpha 71 326 alpha|CDS2 0 + +alpha 75 336 alpha|CDS3 0 + +beta 68 332 beta|CDS1 0 + +beta 72 327 beta|CDS2 0 + +beta 76 337 beta|CDS3 0 + +beta 333 408 beta|CDS4 0 + |
| b |
| diff -r 72bc0335b792 -r 65d76ca44cd2 test-data/get_orf_input.t1_bed_out.bed --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/get_orf_input.t1_bed_out.bed Mon Apr 06 05:21:48 2015 -0400 |
| b |
| @@ -0,0 +1,3 @@ +alpha 67 331 alpha|CDS1 0 + +alpha 71 326 alpha|CDS2 0 + +alpha 75 336 alpha|CDS3 0 + |
| b |
| diff -r 72bc0335b792 -r 65d76ca44cd2 tools/get_orfs_or_cdss/README.rst --- a/tools/get_orfs_or_cdss/README.rst Fri Nov 28 11:45:37 2014 -0500 +++ b/tools/get_orfs_or_cdss/README.rst Mon Apr 06 05:21:48 2015 -0400 |
| b |
| @@ -41,7 +41,8 @@ <tool file="get_orfs_or_cdss/get_orfs_or_cdss.xml" /> -You will also need to install Biopython 1.54 or later. +You will also need to install Biopython 1.65 or later (slightly older versions +should be fine, but will not have the latest NCBI genetic code tables). If you wish to run the unit tests, also move/copy the ``test-data/`` files under Galaxy's ``test-data/`` folder. Then:: @@ -70,6 +71,9 @@ - Renamed folder and adopted README.rst naming. v0.0.6 - Corrected automated dependency defintion. v0.0.7 - Tool definition now embeds citation information. +v0.1.0 - Tool now outputs BED formatted calls (by @erasche, Eric Rasche). + - Using ``optparse`` for the Python command line API (Eric Rasche). + - Added NCBI genetic code table 24, Pterobranchia Mitochondrial. ======= ====================================================================== @@ -85,7 +89,7 @@ For making the "Galaxy Tool Shed" http://toolshed.g2.bx.psu.edu/ tarball use the following command from the Galaxy root folder:: - $ tar -czf get_orfs_or_cdss.tar.gz tools/get_orfs_or_cdss/README.rst tools/get_orfs_or_cdss/get_orfs_or_cdss.* tools/get_orfs_or_cdss/tool_dependencies.xml test-data/get_orf_input*.fasta test-data/Ssuis.fasta + $ tar -czf get_orfs_or_cdss.tar.gz tools/get_orfs_or_cdss/README.rst tools/get_orfs_or_cdss/get_orfs_or_cdss.* tools/get_orfs_or_cdss/tool_dependencies.xml test-data/get_orf_input*.fasta test-data/Ssuis.fasta test-data/get_orf_input*.bed Check this worked:: @@ -104,6 +108,10 @@ test-data/get_orf_input.t1_nuc_out.fasta test-data/get_orf_input.t1_prot_out.fasta test-data/Ssuis.fasta + test-data/get_orf_input.Suis_ORF.bed + test-data/get_orf_input.t11_open_bed_out.bed + test-data/get_orf_input.t11_bed_out.bed + test-data/get_orf_input.t1_bed_out.bed Licence (MIT) |
| b |
| diff -r 72bc0335b792 -r 65d76ca44cd2 tools/get_orfs_or_cdss/get_orfs_or_cdss.py --- a/tools/get_orfs_or_cdss/get_orfs_or_cdss.py Fri Nov 28 11:45:37 2014 -0500 +++ b/tools/get_orfs_or_cdss/get_orfs_or_cdss.py Mon Apr 06 05:21:48 2015 -0400 |
| [ |
| b'@@ -1,13 +1,6 @@\n #!/usr/bin/env python\n """Find ORFs in a nucleotide sequence file.\n \n-get_orfs_or_cdss.py $input_fasta $input_format $table $ftype $ends $mode $min_len $strand $out_nuc_file $out_prot_file\n-\n-Takes ten command line options, input sequence filename, format, genetic\n-code, CDS vs ORF, end type (open, closed), selection mode (all, top, one),\n-minimum length (in amino acids), strand (both, forward, reverse), output\n-nucleotide filename, and output protein filename.\n-\n For more details, see the help text and argument descriptions in the\n accompanying get_orfs_or_cdss.xml file which defines a Galaxy interface.\n \n@@ -24,65 +17,88 @@\n \n See accompanying text file for licence details (MIT licence).\n \n-This is version 0.0.3 of the script.\n+This is version 0.1.0 of the script.\n """\n import sys\n import re\n+from optparse import OptionParser\n \n-if "-v" in sys.argv or "--version" in sys.argv:\n- print "v0.0.3"\n- sys.exit(0)\n-\n-def stop_err(msg, err=1):\n+def sys_exit(msg, err=1):\n sys.stderr.write(msg.rstrip() + "\\n")\n sys.exit(err)\n \n+usage = """Use as follows:\n+\n+$ python get_orfs_or_cdss.py -i genome.fa -f fasta --table 11 -t CDS -e open -m all -s both --on cds.nuc.fa --op cds.protein.fa --ob cds.bed\n+"""\n+\n try:\n from Bio.Seq import Seq, reverse_complement, translate\n from Bio.SeqRecord import SeqRecord\n from Bio import SeqIO\n from Bio.Data import CodonTable\n except ImportError:\n- stop_err("Missing Biopython library")\n+ sys_exit("Missing Biopython library")\n+\n \n-#Parse Command Line\n-try:\n- input_file, seq_format, table, ftype, ends, mode, min_len, strand, out_nuc_file, out_prot_file = sys.argv[1:]\n-except ValueError:\n- stop_err("Expected ten arguments, got %i:\\n%s" % (len(sys.argv)-1, " ".join(sys.argv)))\n+parser = OptionParser(usage=usage)\n+parser.add_option(\'-i\', \'--input\', dest=\'input_file\',\n+ default=None, help=\'Input fasta file\',\n+ metavar=\'FILE\')\n+parser.add_option(\'-f\', \'--format\', dest=\'seq_format\',\n+ default=\'fasta\', help=\'Sequence format (e.g. fasta, fastq, sff)\')\n+parser.add_option(\'--table\', dest=\'table\',\n+ default=1, help=\'NCBI Translation table\', type=\'int\')\n+parser.add_option(\'-t\', \'--ftype\', dest=\'ftype\', type=\'choice\',\n+ choices=[\'CDS\', \'ORF\'], default=\'ORF\',\n+ help=\'Find ORF or CDSs\')\n+parser.add_option(\'-e\', \'--ends\', dest=\'ends\', type=\'choice\',\n+ choices=[\'open\', \'closed\'], default=\'closed\',\n+ help=\'Open or closed. Closed ensures start/stop codons are present\')\n+parser.add_option(\'-m\', \'--mode\', dest=\'mode\', type=\'choice\',\n+ choices=[\'all\', \'top\', \'one\'], default=\'all\',\n+ help=\'Output all ORFs/CDSs from sequence, all ORFs/CDSs \'\n+ \'with max length, or first with maximum length\')\n+parser.add_option(\'--min_len\', dest=\'min_len\',\n+ default=10, help=\'Minimum ORF/CDS length\', type=\'int\')\n+parser.add_option(\'-s\', \'--strand\', dest=\'strand\', type=\'choice\',\n+ choices=[\'forward\', \'reverse\', \'both\'], default=\'both\',\n+ help=\'Strand to search for features on\')\n+parser.add_option(\'--on\', dest=\'out_nuc_file\',\n+ default=None, help=\'Output nucleotide sequences, or - for STDOUT\',\n+ metavar=\'FILE\')\n+parser.add_option(\'--op\', dest=\'out_prot_file\',\n+ default=None, help=\'Output protein sequences, or - for STDOUT\',\n+ metavar=\'FILE\')\n+parser.add_option(\'--ob\', dest=\'out_bed_file\',\n+ default=None, help=\'Output BED file, or - for STDOUT\',\n+ metavar=\'FILE\')\n+parser.add_option(\'-v\', \'--version\', dest=\'version\',\n+ default=False, action=\'store_true\',\n+ help=\'Show version and quit\')\n \n-try:\n- table = int(table)\n-except ValueError:\n- stop_err("Expected integer for genetic code table, got %s" % table)\n+options, args = pa'..b's.ftype=="CDS":\n offset, n, t = start_chop_and_trans(n)\n else:\n offset = 0\n- t = translate(n, table, to_stop=True)\n- if n and len(t) >= min_len:\n+ t = translate(n, options.table, to_stop=True)\n+ if n and len(t) >= options.min_len:\n yield start + offset, n, t\n start = index\n- if ends == "open":\n+ if options.ends == "open":\n #No stop codon, Biopython\'s strict CDS translate will fail\n n = s[start:]\n #Ensure we have whole codons\n@@ -138,14 +154,14 @@\n n = n[:-1]\n if len(n) % 3:\n n = n[:-1]\n- if ftype=="CDS":\n+ if options.ftype=="CDS":\n offset, n, t = start_chop_and_trans(n, strict=False)\n else:\n offset = 0\n- t = translate(n, table, to_stop=True)\n- if n and len(t) >= min_len:\n+ t = translate(n, options.table, to_stop=True)\n+ if n and len(t) >= options.min_len:\n yield start + offset, n, t\n- \n+\n \n def get_all_peptides(nuc_seq):\n """Returns start, end, strand, nucleotides, protein.\n@@ -156,12 +172,12 @@\n #rather than making a list and sorting?\n answer = []\n full_len = len(nuc_seq)\n- if strand != "reverse":\n+ if options.strand != "reverse":\n for frame in range(0,3):\n for offset, n, t in break_up_frame(nuc_seq[frame:]):\n start = frame + offset #zero based\n answer.append((start, start + len(n), +1, n, t))\n- if strand != "forward":\n+ if options.strand != "forward":\n rc = reverse_complement(nuc_seq)\n for frame in range(0,3) :\n for offset, n, t in break_up_frame(rc[frame:]):\n@@ -187,24 +203,31 @@\n raise StopIteration\n yield values[0]\n \n-if mode == "all":\n+if options.mode == "all":\n get_peptides = get_all_peptides\n-elif mode == "top":\n+elif options.mode == "top":\n get_peptides = get_top_peptides\n-elif mode == "one":\n+elif options.mode == "one":\n get_peptides = get_one_peptide\n \n in_count = 0\n out_count = 0\n-if out_nuc_file == "-":\n+if options.out_nuc_file == "-":\n out_nuc = sys.stdout\n else:\n- out_nuc = open(out_nuc_file, "w")\n-if out_prot_file == "-":\n+ out_nuc = open(options.out_nuc_file, "w")\n+\n+if options.out_prot_file == "-":\n out_prot = sys.stdout\n else:\n- out_prot = open(out_prot_file, "w")\n-for record in SeqIO.parse(input_file, seq_format):\n+ out_prot = open(options.out_prot_file, "w")\n+\n+if options.out_bed_file == "-":\n+ out_bed = sys.stdout\n+else:\n+ out_bed = open(options.out_bed_file, "w")\n+\n+for record in SeqIO.parse(options.input_file, seq_format):\n for i, (f_start, f_end, f_strand, n, t) in enumerate(get_peptides(str(record.seq).upper())):\n out_count += 1\n if f_strand == +1:\n@@ -213,14 +236,18 @@\n loc = "complement(%i..%i)" % (f_start+1, f_end)\n descr = "length %i aa, %i bp, from %s of %s" \\\n % (len(t), len(n), loc, record.description)\n- r = SeqRecord(Seq(n), id = record.id + "|%s%i" % (ftype, i+1), name = "", description= descr)\n- t = SeqRecord(Seq(t), id = record.id + "|%s%i" % (ftype, i+1), name = "", description= descr)\n+ fid = record.id + "|%s%i" % (options.ftype, i+1)\n+ r = SeqRecord(Seq(n), id = fid, name = "", description= descr)\n+ t = SeqRecord(Seq(t), id = fid, name = "", description= descr)\n SeqIO.write(r, out_nuc, "fasta")\n SeqIO.write(t, out_prot, "fasta")\n+ out_bed.write(\'\\t\'.join(map(str,[record.id, f_start, f_end, fid, 0, \'+\' if f_strand == +1 else \'-\'])) + \'\\n\')\n in_count += 1\n if out_nuc is not sys.stdout:\n out_nuc.close()\n if out_prot is not sys.stdout:\n out_prot.close()\n+if out_bed is not sys.stdout:\n+ out_bed.close()\n \n-print "Found %i %ss in %i sequences" % (out_count, ftype, in_count)\n+print "Found %i %ss in %i sequences" % (out_count, options.ftype, in_count)\n' |
| b |
| diff -r 72bc0335b792 -r 65d76ca44cd2 tools/get_orfs_or_cdss/get_orfs_or_cdss.xml --- a/tools/get_orfs_or_cdss/get_orfs_or_cdss.xml Fri Nov 28 11:45:37 2014 -0500 +++ b/tools/get_orfs_or_cdss/get_orfs_or_cdss.xml Mon Apr 06 05:21:48 2015 -0400 |
| b |
| @@ -1,12 +1,12 @@ -<tool id="get_orfs_or_cdss" name="Get open reading frames (ORFs) or coding sequences (CDSs)" version="0.0.7"> +<tool id="get_orfs_or_cdss" name="Get open reading frames (ORFs) or coding sequences (CDSs)" version="0.1.0"> <description>e.g. to get peptides from ESTs</description> <requirements> - <requirement type="package" version="1.62">biopython</requirement> + <requirement type="package" version="1.65">biopython</requirement> <requirement type="python-module">Bio</requirement> </requirements> <version_command interpreter="python">get_orfs_or_cdss.py --version</version_command> <command interpreter="python"> -get_orfs_or_cdss.py $input_file $input_file.ext $table $ftype $ends $mode $min_len $strand $out_nuc_file $out_prot_file +get_orfs_or_cdss.py -i $input_file -f $input_file.ext --table $table -t $ftype -e $ends -m $mode --min_len $min_len -s $strand --on $out_nuc_file --op $out_prot_file --ob $out_bed_file </command> <stdio> <!-- Anything other than zero is an error --> @@ -33,6 +33,7 @@ <option value="21">21. Trematode Mitochondrial</option> <option value="22">22. Scenedesmus obliquus</option> <option value="23">23. Thraustochytrium Mitochondrial</option> + <option value="24">24. Pterobranchia Mitochondrial</option> </param> <param name="ftype" type="select" value="True" label="Look for ORFs or CDSs"> <option value="ORF">Look for ORFs (check for stop codons only, ignore start codons)</option> @@ -49,7 +50,7 @@ <option value="one">First ORF/CDS from each sequence with the maximum length</option> </param> <param name="min_len" type="integer" size="5" value="30" label="Minimum length ORF/CDS (in amino acids, e.g. 30 aa = 90 bp plus any stop codon)" /> - <param name="strand" type="select" label="Strand to search" help="Use the forward only option if your sequence directionality is known (e.g. from poly-A tails, or strand specific RNA sequencing."> + <param name="strand" type="select" label="Strand to search" help="Use the forward only option if your sequence directionality is known (e.g. from poly-A tails, or strand specific RNA sequencing)."> <option value="both">Search both the forward and reverse strand</option> <option value="forward">Only search the forward strand</option> <option value="reverse">Only search the reverse strand</option> @@ -58,6 +59,7 @@ <outputs> <data name="out_nuc_file" format="fasta" label="${ftype.value}s (nucleotides)" /> <data name="out_prot_file" format="fasta" label="${ftype.value}s (amino acids)" /> + <data name="out_bed_file" format="bed6" label="${ftype.value}s (bed)" /> </outputs> <tests> <test> @@ -70,6 +72,7 @@ <param name="strand" value="forward" /> <output name="out_nuc_file" file="get_orf_input.t1_nuc_out.fasta" /> <output name="out_prot_file" file="get_orf_input.t1_prot_out.fasta" /> + <output name="out_bed_file" file="get_orf_input.t1_bed_out.bed" /> </test> <test> <param name="input_file" value="get_orf_input.fasta" /> @@ -80,7 +83,8 @@ <param name="min_len" value="10" /> <param name="strand" value="forward" /> <output name="out_nuc_file" file="get_orf_input.t11_nuc_out.fasta" /> - <output name="out_prot_file" file="get_orf_input.t11_prot_out.fasta" /> + <output name="out_prot_file" file="get_orf_input.t11_prot_out.fasta" /> + <output name="out_bed_file" file="get_orf_input.t11_bed_out.bed" /> </test> <test> <param name="input_file" value="get_orf_input.fasta" /> @@ -92,6 +96,7 @@ <param name="strand" value="forward" /> <output name="out_nuc_file" file="get_orf_input.t11_open_nuc_out.fasta" /> <output name="out_prot_file" file="get_orf_input.t11_open_prot_out.fasta" /> + <output name="out_bed_file" file="get_orf_input.t11_open_bed_out.bed" /> </test> <test> <param name="input_file" value="Ssuis.fasta" /> @@ -103,6 +108,7 @@ <param name="strand" value="both" /> <output name="out_nuc_file" file="get_orf_input.Suis_ORF.nuc.fasta" /> <output name="out_prot_file" file="get_orf_input.Suis_ORF.prot.fasta" /> + <output name="out_bed_file" file="get_orf_input.Suis_ORF.bed" /> </test> </tests> <help> |
| b |
| diff -r 72bc0335b792 -r 65d76ca44cd2 tools/get_orfs_or_cdss/tool_dependencies.xml --- a/tools/get_orfs_or_cdss/tool_dependencies.xml Fri Nov 28 11:45:37 2014 -0500 +++ b/tools/get_orfs_or_cdss/tool_dependencies.xml Mon Apr 06 05:21:48 2015 -0400 |
| b |
| @@ -1,6 +1,6 @@ <?xml version="1.0"?> <tool_dependency> - <package name="biopython" version="1.62"> - <repository changeset_revision="ac9cc2992b69" name="package_biopython_1_62" owner="biopython" toolshed="https://testtoolshed.g2.bx.psu.edu" /> + <package name="biopython" version="1.65"> + <repository changeset_revision="f8d72690eeae" name="package_biopython_1_65" owner="biopython" toolshed="https://testtoolshed.g2.bx.psu.edu" /> </package> </tool_dependency> |