changeset 1:7479dbb285b5 draft

Uploaded v0.0.10, adds unit test
author peterjc
date Wed, 17 Apr 2013 05:25:25 -0400
parents 096088373590
children 66e9d4c44ca2
files test-data/empty.fasta test-data/empty_effectiveT3.tabular test-data/four_human_proteins.effectiveT3.tabular test-data/four_human_proteins.fasta tool-data/effectiveT3.loc.sample tools/protein_analysis/effectiveT3.txt tools/protein_analysis/effectiveT3.xml
diffstat 7 files changed, 147 insertions(+), 9 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/empty.fasta	Wed Apr 17 05:25:25 2013 -0400
@@ -0,0 +1,2 @@
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/empty_effectiveT3.tabular	Wed Apr 17 05:25:25 2013 -0400
@@ -0,0 +1,1 @@
+#ID	Description	Score	Effective
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/four_human_proteins.effectiveT3.tabular	Wed Apr 17 05:25:25 2013 -0400
@@ -0,0 +1,5 @@
+#ID	Description	Score	Effective
+sp|P08100|OPSD_HUMAN	Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1	0.461926109094959	false
+sp|Q9BS26|ERP44_HUMAN	Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1	0.000000100329473	false
+sp|Q9NSY1|BMP2K_HUMAN	BMP-2-inducible protein kinase OS=Homo sapiens GN=BMP2K PE=1 SV=2	0.000000000000339	false
+sp|P06213|INSR_HUMAN	Insulin receptor OS=Homo sapiens GN=INSR PE=1 SV=4	0.000000000000000	false
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/four_human_proteins.fasta	Wed Apr 17 05:25:25 2013 -0400
@@ -0,0 +1,61 @@
+>sp|Q9BS26|ERP44_HUMAN Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1
+MHPAVFLSLPDLRCSLLLLVTWVFTPVTTEITSLDTENIDEILNNADVALVNFYADWCRF
+SQMLHPIFEEASDVIKEEFPNENQVVFARVDCDQHSDIAQRYRISKYPTLKLFRNGMMMK
+REYRGQRSVKALADYIRQQKSDPIQEIRDLAEITTLDRSKRNIIGYFEQKDSDNYRVFER
+VANILHDDCAFLSAFGDVSKPERYSGDNIIYKPPGHSAPDMVYLGAMTNFDVTYNWIQDK
+CVPLVREITFENGEELTEEGLPFLILFHMKEDTESLEIFQNEVARQLISEKGTINFLHAD
+CDKFRHPLLHIQKTPADCPVIAIDSFRHMYVFGDFKDVLIPGKLKQFVFDLHSGKLHREF
+HHGPDPTDTAPGEQAQDVASSPPESSFQKLAPSEYRYTLLRDRDEL
+>sp|Q9NSY1|BMP2K_HUMAN BMP-2-inducible protein kinase OS=Homo sapiens GN=BMP2K PE=1 SV=2
+MKKFSRMPKSEGGSGGGAAGGGAGGAGAGAGCGSGGSSVGVRVFAVGRHQVTLEESLAEG
+GFSTVFLVRTHGGIRCALKRMYVNNMPDLNVCKREITIMKELSGHKNIVGYLDCAVNSIS
+DNVWEVLILMEYCRAGQVVNQMNKKLQTGFTEPEVLQIFCDTCEAVARLHQCKTPIIHRD
+LKVENILLNDGGNYVLCDFGSATNKFLNPQKDGVNVVEEEIKKYTTLSYRAPEMINLYGG
+KPITTKADIWALGCLLYKLCFFTLPFGESQVAICDGNFTIPDNSRYSRNIHCLIRFMLEP
+DPEHRPDIFQVSYFAFKFAKKDCPVSNINNSSIPSALPEPMTASEAAARKSQIKARITDT
+IGPTETSIAPRQRPKANSATTATPSVLTIQSSATPVKVLAPGEFGNHRPKGALRPGNGPE
+ILLGQGPPQQPPQQHRVLQQLQQGDWRLQQLHLQHRHPHQQQQQQQQQQQQQQQQQQQQQ
+QQQQQQHHHHHHHHLLQDAYMQQYQHATQQQQMLQQQFLMHSVYQPQPSASQYPTMMPQY
+QQAFFQQQMLAQHQPSQQQASPEYLTSPQEFSPALVSYTSSLPAQVGTIMDSSYSANRSV
+ADKEAIANFTNQKNISNPPDMSGWNPFGEDNFSKLTEEELLDREFDLLRSNRLEERASSD
+KNVDSLSAPHNHPPEDPFGSVPFISHSGSPEKKAEHSSINQENGTANPIKNGKTSPASKD
+QRTGKKTSVQGQVQKGNDESESDFESDPPSPKSSEEEEQDDEEVLQGEQGDFNDDDTEPE
+NLGHRPLLMDSEDEEEEEKHSSDSDYEQAKAKYSDMSSVYRDRSGSGPTQDLNTILLTSA
+QLSSDVAVETPKQEFDVFGAVPFFAVRAQQPQQEKNEKNLPQHRFPAAGLEQEEFDVFTK
+APFSKKVNVQECHAVGPEAHTIPGYPKSVDVFGSTPFQPFLTSTSKSESNEDLFGLVPFD
+EITGSQQQKVKQRSLQKLSSRQRRTKQDMSKSNGKRHHGTPTSTKKTLKPTYRTPERARR
+HKKVGRRDSQSSNEFLTISDSKENISVALTDGKDRGNVLQPEESLLDPFGAKPFHSPDLS
+WHPPHQGLSDIRADHNTVLPGRPRQNSLHGSFHSADVLKMDDFGAVPFTELVVQSITPHQ
+SQQSQPVELDPFGAAPFPSKQ
+>sp|P06213|INSR_HUMAN Insulin receptor OS=Homo sapiens GN=INSR PE=1 SV=4
+MATGGRRGAAAAPLLVAVAALLLGAAGHLYPGEVCPGMDIRNNLTRLHELENCSVIEGHL
+QILLMFKTRPEDFRDLSFPKLIMITDYLLLFRVYGLESLKDLFPNLTVIRGSRLFFNYAL
+VIFEMVHLKELGLYNLMNITRGSVRIEKNNELCYLATIDWSRILDSVEDNYIVLNKDDNE
+ECGDICPGTAKGKTNCPATVINGQFVERCWTHSHCQKVCPTICKSHGCTAEGLCCHSECL
+GNCSQPDDPTKCVACRNFYLDGRCVETCPPPYYHFQDWRCVNFSFCQDLHHKCKNSRRQG
+CHQYVIHNNKCIPECPSGYTMNSSNLLCTPCLGPCPKVCHLLEGEKTIDSVTSAQELRGC
+TVINGSLIINIRGGNNLAAELEANLGLIEEISGYLKIRRSYALVSLSFFRKLRLIRGETL
+EIGNYSFYALDNQNLRQLWDWSKHNLTITQGKLFFHYNPKLCLSEIHKMEEVSGTKGRQE
+RNDIALKTNGDQASCENELLKFSYIRTSFDKILLRWEPYWPPDFRDLLGFMLFYKEAPYQ
+NVTEFDGQDACGSNSWTVVDIDPPLRSNDPKSQNHPGWLMRGLKPWTQYAIFVKTLVTFS
+DERRTYGAKSDIIYVQTDATNPSVPLDPISVSNSSSQIILKWKPPSDPNGNITHYLVFWE
+RQAEDSELFELDYCLKGLKLPSRTWSPPFESEDSQKHNQSEYEDSAGECCSCPKTDSQIL
+KELEESSFRKTFEDYLHNVVFVPRKTSSGTGAEDPRPSRKRRSLGDVGNVTVAVPTVAAF
+PNTSSTSVPTSPEEHRPFEKVVNKESLVISGLRHFTGYRIELQACNQDTPEERCSVAAYV
+SARTMPEAKADDIVGPVTHEIFENNVVHLMWQEPKEPNGLIVLYEVSYRRYGDEELHLCV
+SRKHFALERGCRLRGLSPGNYSVRIRATSLAGNGSWTEPTYFYVTDYLDVPSNIAKIIIG
+PLIFVFLFSVVIGSIYLFLRKRQPDGPLGPLYASSNPEYLSASDVFPCSVYVPDEWEVSR
+EKITLLRELGQGSFGMVYEGNARDIIKGEAETRVAVKTVNESASLRERIEFLNEASVMKG
+FTCHHVVRLLGVVSKGQPTLVVMELMAHGDLKSYLRSLRPEAENNPGRPPPTLQEMIQMA
+AEIADGMAYLNAKKFVHRDLAARNCMVAHDFTVKIGDFGMTRDIYETDYYRKGGKGLLPV
+RWMAPESLKDGVFTTSSDMWSFGVVLWEITSLAEQPYQGLSNEQVLKFVMDGGYLDQPDN
+CPERVTDLMRMCWQFNPKMRPTFLEIVNLLKDDLHPSFPEVSFFHSEENKAPESEELEME
+FEDMENVPLDRSSHCQREEAGGRDGGSSLGFKRSYEEHIPYTHMNGGKKNGRILTLPRSN
+PS
+>sp|P08100|OPSD_HUMAN Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1
+MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLY
+VTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLG
+GEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIP
+EGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQES
+ATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAI
+YNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/effectiveT3.loc.sample	Wed Apr 17 05:25:25 2013 -0400
@@ -0,0 +1,26 @@
+# This is a three column tab separated file to define the modules (models)
+# for the Effective T3 prediction tool.
+#
+# Column 1 - Module ID, string that Galaxy will save in its database
+# Column 2 - Human readable name, Galaxy will show this in the UI
+# Column 3 - Filename, Galaxy will use this when calling the tool
+#
+#NOTE: In EffectiveT3 v1.0.1, the modules are given as their filename
+#(with the JAR extension) but without any path. They are expected to
+#be located in a subdirectory called module under the working directory.
+#For the galaxy wrapper, we assume this means under the folder where the
+#main JAR file is, TTSS_GUI-1.0.1.jar
+#
+#e.g.
+#
+# /opt/EffectiveT3/TTSS_GUI-1.0.1.jar
+# /opt/EffectiveT3/module/TTSS_STD-1.0.1.jar
+# /opt/EffectiveT3/module/TTSS_ANIMAL-1.0.1.jar
+# /opt/EffectiveT3/module/TTSS_PLANT-1.0.1.jar
+#
+#See files ../tools/protein_analysis/effectiveT3.* for more details,
+#and http://effectors.org
+#
+standard	Type III Effector prediction with standard set	TTSS_STD-1.0.1.jar
+animal	Type III Effector prediction with animal set	TTSS_ANIMAL-1.0.1.jar
+plant	Type III Effector prediction with plant set	TTSS_PLANT-1.0.1.jar
--- a/tools/protein_analysis/effectiveT3.txt	Tue Jun 07 16:32:23 2011 -0400
+++ b/tools/protein_analysis/effectiveT3.txt	Wed Apr 17 05:25:25 2013 -0400
@@ -19,6 +19,10 @@
 
 http://effectors.org/
 
+
+Installation
+============
+
 You can change the path by editing the definition near the start of the Python
 script effectiveT3.py, but by default it expects the following files to be
 installed at these locations:
@@ -35,12 +39,18 @@
 * effectiveT3.py (the Python wrapper script)
 * effectiveT3.txt (this README file)
 
+Also copy effectiveT3.loc.sample to effectiveT3.loc in the tool-data folder
+(and edit if appropriate, e.g. to add or remove a model).
+
 You will also need to modify the tools_conf.xml file to tell Galaxy to offer the
 tool. If you are using other protein analysis tools like TMHMM or SignalP, put
 it next to them. Just add the line:
 
 <tool file="protein_analysis/effectiveT3.xml" />
 
+If you wish to run the unit tests, also add this to tools_conf.xml.sample
+and move/copy the test-data files under Galaxy's test-data folder.
+
 That's it.
 
 
@@ -48,6 +58,9 @@
 =======
 
 v0.0.7 - Initial public release
+v0.0.8 - Include effectiveT3.loc.sample in Tool Shed
+v0.0.9 - Check the return code for errors in the XML
+v0.0.10- Added unit test
 
 
 Developers
@@ -56,10 +69,11 @@
 This script and related tools are being developed on the following hg branch:
 http://bitbucket.org/peterjc/galaxy-central/src/tools
 
-For making the "Galaxy Tool Shed" http://community.g2.bx.psu.edu/ tarball use
+For making the "Galaxy Tool Shed" http://toolshed.g2.bx.psu.edu/ tarball use
 the following command from the Galaxy root folder:
 
-$ tar -czf effectiveT3.tar.gz tools/protein_analysis/effectiveT3.xml tools/protein_analysis/effectiveT3.py tools/protein_analysis/effectiveT3.txt
+$ tar -czf effectiveT3.tar.gz tools/protein_analysis/effectiveT3.xml tools/protein_analysis/effectiveT3.py tools/protein_analysis/effectiveT3.txt tool-data/effectiveT3.loc.sample test-data/four_human_proteins.fasta test-data/four_human_proteins.effectiveT3.tabular test-data/empty.fasta test-data/empty_effectiveT3.tabular
+
 
 Check this worked:
 
@@ -67,6 +81,11 @@
 tools/protein_analysis/effectiveT3.xml
 tools/protein_analysis/effectiveT3.py
 tools/protein_analysis/effectiveT3.txt
+tool-data/effectiveT3.loc.sample
+test-data/four_human_proteins.fasta
+test-data/four_human_proteins.effectiveT3.tabular
+test-data/empty.fasta
+test-data/empty_effectiveT3.tabular
 
 
 Licence (MIT/BSD style)
@@ -90,5 +109,5 @@
 OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
 OR PERFORMANCE OF THIS SOFTWARE.
 
-NOTE: This is the licence for the Galaxy Wrapper only. NLStradamus
-is available and licenced separately.
+NOTE: This is the licence for the Galaxy Wrapper only.
+EffectiveT3 is available and licenced separately.
--- a/tools/protein_analysis/effectiveT3.xml	Tue Jun 07 16:32:23 2011 -0400
+++ b/tools/protein_analysis/effectiveT3.xml	Wed Apr 17 05:25:25 2013 -0400
@@ -1,4 +1,4 @@
-<tool id="effectiveT3" name="Effective T3" version="0.0.7">
+<tool id="effectiveT3" name="Effective T3" version="0.0.10">
     <description>Find bacterial effectors in protein sequences</description>
     <command interpreter="python">
 effectiveT3.py $module.fields.path
@@ -8,6 +8,11 @@
   $restrict.type
 #end if
 $fasta_file $tabular_file</command>
+    <stdio>
+        <!-- Anything other than zero is an error -->
+        <exit_code range="1:" />
+        <exit_code range=":-1" />
+    </stdio>
     <inputs>
         <param name="fasta_file" type="data" format="fasta" label="FASTA file of protein sequences"/> 
         <param name="module" type="select" display="radio" label="Classification module">
@@ -33,6 +38,20 @@
     <outputs>
         <data name="tabular_file" format="tabular" label="$module.value_label results" />
     </outputs>
+    <tests>
+        <test>
+            <param name="fasta_file" value="four_human_proteins.fasta" ftype="fasta" />
+            <param name="module" value="animal" />
+            <param name="type" value="selective" />
+            <output name="tabular_file" file="four_human_proteins.effectiveT3.tabular" ftype="tabular" />
+        </test>
+        <test>
+            <param name="fasta_file" value="empty.fasta" ftype="fasta" />
+            <param name="module" value="plant" />
+            <param name="type" value="sensistive" />
+            <output name="tabular_file" file="empty_effectiveT3.tabular" ftype="tabular" />
+        </test>
+    </tests>
     <help>
     
 **What it does**
@@ -41,10 +60,15 @@
 
 The input is a FASTA file of protein sequences, and the output is tabular with four columns (one row per protein):
 
- * Sequence identifier
- * Sequence description (from the FASTA file)
- * Score (between 0 and 1, or negative for an error such as a very short peptide)
- * Predicted effector (true/false)
+====== ==============================================================================
+Column Description
+------ ------------------------------------------------------------------------------
+     1 Sequence identifier
+     2 Sequence description (from the FASTA file)
+     3 Score (between 0 and 1, or negative for an error such as a very short peptide)
+     4 Predicted effector (true/false)
+====== ==============================================================================
+
 
 **References**