# HG changeset patch
# User peterjc
# Date 1366190725 14400
# Node ID 7479dbb285b5a17244fafdcdf13dda1f2d734aa1
# Parent  09608837359061fa1500039a2b2a907d41d6d4f3
Uploaded v0.0.10, adds unit test
diff -r 096088373590 -r 7479dbb285b5 test-data/empty.fasta
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/empty.fasta	Wed Apr 17 05:25:25 2013 -0400
@@ -0,0 +1,2 @@
+
+
diff -r 096088373590 -r 7479dbb285b5 test-data/empty_effectiveT3.tabular
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/empty_effectiveT3.tabular	Wed Apr 17 05:25:25 2013 -0400
@@ -0,0 +1,1 @@
+#ID	Description	Score	Effective
diff -r 096088373590 -r 7479dbb285b5 test-data/four_human_proteins.effectiveT3.tabular
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/four_human_proteins.effectiveT3.tabular	Wed Apr 17 05:25:25 2013 -0400
@@ -0,0 +1,5 @@
+#ID	Description	Score	Effective
+sp|P08100|OPSD_HUMAN	Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1	0.461926109094959	false
+sp|Q9BS26|ERP44_HUMAN	Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1	0.000000100329473	false
+sp|Q9NSY1|BMP2K_HUMAN	BMP-2-inducible protein kinase OS=Homo sapiens GN=BMP2K PE=1 SV=2	0.000000000000339	false
+sp|P06213|INSR_HUMAN	Insulin receptor OS=Homo sapiens GN=INSR PE=1 SV=4	0.000000000000000	false
diff -r 096088373590 -r 7479dbb285b5 test-data/four_human_proteins.fasta
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/four_human_proteins.fasta	Wed Apr 17 05:25:25 2013 -0400
@@ -0,0 +1,61 @@
+>sp|Q9BS26|ERP44_HUMAN Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1
+MHPAVFLSLPDLRCSLLLLVTWVFTPVTTEITSLDTENIDEILNNADVALVNFYADWCRF
+SQMLHPIFEEASDVIKEEFPNENQVVFARVDCDQHSDIAQRYRISKYPTLKLFRNGMMMK
+REYRGQRSVKALADYIRQQKSDPIQEIRDLAEITTLDRSKRNIIGYFEQKDSDNYRVFER
+VANILHDDCAFLSAFGDVSKPERYSGDNIIYKPPGHSAPDMVYLGAMTNFDVTYNWIQDK
+CVPLVREITFENGEELTEEGLPFLILFHMKEDTESLEIFQNEVARQLISEKGTINFLHAD
+CDKFRHPLLHIQKTPADCPVIAIDSFRHMYVFGDFKDVLIPGKLKQFVFDLHSGKLHREF
+HHGPDPTDTAPGEQAQDVASSPPESSFQKLAPSEYRYTLLRDRDEL
+>sp|Q9NSY1|BMP2K_HUMAN BMP-2-inducible protein kinase OS=Homo sapiens GN=BMP2K PE=1 SV=2
+MKKFSRMPKSEGGSGGGAAGGGAGGAGAGAGCGSGGSSVGVRVFAVGRHQVTLEESLAEG
+GFSTVFLVRTHGGIRCALKRMYVNNMPDLNVCKREITIMKELSGHKNIVGYLDCAVNSIS
+DNVWEVLILMEYCRAGQVVNQMNKKLQTGFTEPEVLQIFCDTCEAVARLHQCKTPIIHRD
+LKVENILLNDGGNYVLCDFGSATNKFLNPQKDGVNVVEEEIKKYTTLSYRAPEMINLYGG
+KPITTKADIWALGCLLYKLCFFTLPFGESQVAICDGNFTIPDNSRYSRNIHCLIRFMLEP
+DPEHRPDIFQVSYFAFKFAKKDCPVSNINNSSIPSALPEPMTASEAAARKSQIKARITDT
+IGPTETSIAPRQRPKANSATTATPSVLTIQSSATPVKVLAPGEFGNHRPKGALRPGNGPE
+ILLGQGPPQQPPQQHRVLQQLQQGDWRLQQLHLQHRHPHQQQQQQQQQQQQQQQQQQQQQ
+QQQQQQHHHHHHHHLLQDAYMQQYQHATQQQQMLQQQFLMHSVYQPQPSASQYPTMMPQY
+QQAFFQQQMLAQHQPSQQQASPEYLTSPQEFSPALVSYTSSLPAQVGTIMDSSYSANRSV
+ADKEAIANFTNQKNISNPPDMSGWNPFGEDNFSKLTEEELLDREFDLLRSNRLEERASSD
+KNVDSLSAPHNHPPEDPFGSVPFISHSGSPEKKAEHSSINQENGTANPIKNGKTSPASKD
+QRTGKKTSVQGQVQKGNDESESDFESDPPSPKSSEEEEQDDEEVLQGEQGDFNDDDTEPE
+NLGHRPLLMDSEDEEEEEKHSSDSDYEQAKAKYSDMSSVYRDRSGSGPTQDLNTILLTSA
+QLSSDVAVETPKQEFDVFGAVPFFAVRAQQPQQEKNEKNLPQHRFPAAGLEQEEFDVFTK
+APFSKKVNVQECHAVGPEAHTIPGYPKSVDVFGSTPFQPFLTSTSKSESNEDLFGLVPFD
+EITGSQQQKVKQRSLQKLSSRQRRTKQDMSKSNGKRHHGTPTSTKKTLKPTYRTPERARR
+HKKVGRRDSQSSNEFLTISDSKENISVALTDGKDRGNVLQPEESLLDPFGAKPFHSPDLS
+WHPPHQGLSDIRADHNTVLPGRPRQNSLHGSFHSADVLKMDDFGAVPFTELVVQSITPHQ
+SQQSQPVELDPFGAAPFPSKQ
+>sp|P06213|INSR_HUMAN Insulin receptor OS=Homo sapiens GN=INSR PE=1 SV=4
+MATGGRRGAAAAPLLVAVAALLLGAAGHLYPGEVCPGMDIRNNLTRLHELENCSVIEGHL
+QILLMFKTRPEDFRDLSFPKLIMITDYLLLFRVYGLESLKDLFPNLTVIRGSRLFFNYAL
+VIFEMVHLKELGLYNLMNITRGSVRIEKNNELCYLATIDWSRILDSVEDNYIVLNKDDNE
+ECGDICPGTAKGKTNCPATVINGQFVERCWTHSHCQKVCPTICKSHGCTAEGLCCHSECL
+GNCSQPDDPTKCVACRNFYLDGRCVETCPPPYYHFQDWRCVNFSFCQDLHHKCKNSRRQG
+CHQYVIHNNKCIPECPSGYTMNSSNLLCTPCLGPCPKVCHLLEGEKTIDSVTSAQELRGC
+TVINGSLIINIRGGNNLAAELEANLGLIEEISGYLKIRRSYALVSLSFFRKLRLIRGETL
+EIGNYSFYALDNQNLRQLWDWSKHNLTITQGKLFFHYNPKLCLSEIHKMEEVSGTKGRQE
+RNDIALKTNGDQASCENELLKFSYIRTSFDKILLRWEPYWPPDFRDLLGFMLFYKEAPYQ
+NVTEFDGQDACGSNSWTVVDIDPPLRSNDPKSQNHPGWLMRGLKPWTQYAIFVKTLVTFS
+DERRTYGAKSDIIYVQTDATNPSVPLDPISVSNSSSQIILKWKPPSDPNGNITHYLVFWE
+RQAEDSELFELDYCLKGLKLPSRTWSPPFESEDSQKHNQSEYEDSAGECCSCPKTDSQIL
+KELEESSFRKTFEDYLHNVVFVPRKTSSGTGAEDPRPSRKRRSLGDVGNVTVAVPTVAAF
+PNTSSTSVPTSPEEHRPFEKVVNKESLVISGLRHFTGYRIELQACNQDTPEERCSVAAYV
+SARTMPEAKADDIVGPVTHEIFENNVVHLMWQEPKEPNGLIVLYEVSYRRYGDEELHLCV
+SRKHFALERGCRLRGLSPGNYSVRIRATSLAGNGSWTEPTYFYVTDYLDVPSNIAKIIIG
+PLIFVFLFSVVIGSIYLFLRKRQPDGPLGPLYASSNPEYLSASDVFPCSVYVPDEWEVSR
+EKITLLRELGQGSFGMVYEGNARDIIKGEAETRVAVKTVNESASLRERIEFLNEASVMKG
+FTCHHVVRLLGVVSKGQPTLVVMELMAHGDLKSYLRSLRPEAENNPGRPPPTLQEMIQMA
+AEIADGMAYLNAKKFVHRDLAARNCMVAHDFTVKIGDFGMTRDIYETDYYRKGGKGLLPV
+RWMAPESLKDGVFTTSSDMWSFGVVLWEITSLAEQPYQGLSNEQVLKFVMDGGYLDQPDN
+CPERVTDLMRMCWQFNPKMRPTFLEIVNLLKDDLHPSFPEVSFFHSEENKAPESEELEME
+FEDMENVPLDRSSHCQREEAGGRDGGSSLGFKRSYEEHIPYTHMNGGKKNGRILTLPRSN
+PS
+>sp|P08100|OPSD_HUMAN Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1
+MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLY
+VTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLG
+GEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIP
+EGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQES
+ATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAI
+YNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA
diff -r 096088373590 -r 7479dbb285b5 tool-data/effectiveT3.loc.sample
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/effectiveT3.loc.sample	Wed Apr 17 05:25:25 2013 -0400
@@ -0,0 +1,26 @@
+# This is a three column tab separated file to define the modules (models)
+# for the Effective T3 prediction tool.
+#
+# Column 1 - Module ID, string that Galaxy will save in its database
+# Column 2 - Human readable name, Galaxy will show this in the UI
+# Column 3 - Filename, Galaxy will use this when calling the tool
+#
+#NOTE: In EffectiveT3 v1.0.1, the modules are given as their filename
+#(with the JAR extension) but without any path. They are expected to
+#be located in a subdirectory called module under the working directory.
+#For the galaxy wrapper, we assume this means under the folder where the
+#main JAR file is, TTSS_GUI-1.0.1.jar
+#
+#e.g.
+#
+# /opt/EffectiveT3/TTSS_GUI-1.0.1.jar
+# /opt/EffectiveT3/module/TTSS_STD-1.0.1.jar
+# /opt/EffectiveT3/module/TTSS_ANIMAL-1.0.1.jar
+# /opt/EffectiveT3/module/TTSS_PLANT-1.0.1.jar
+#
+#See files ../tools/protein_analysis/effectiveT3.* for more details,
+#and http://effectors.org
+#
+standard	Type III Effector prediction with standard set	TTSS_STD-1.0.1.jar
+animal	Type III Effector prediction with animal set	TTSS_ANIMAL-1.0.1.jar
+plant	Type III Effector prediction with plant set	TTSS_PLANT-1.0.1.jar
diff -r 096088373590 -r 7479dbb285b5 tools/protein_analysis/effectiveT3.txt
--- a/tools/protein_analysis/effectiveT3.txt	Tue Jun 07 16:32:23 2011 -0400
+++ b/tools/protein_analysis/effectiveT3.txt	Wed Apr 17 05:25:25 2013 -0400
@@ -19,6 +19,10 @@
 
 http://effectors.org/
 
+
+Installation
+============
+
 You can change the path by editing the definition near the start of the Python
 script effectiveT3.py, but by default it expects the following files to be
 installed at these locations:
@@ -35,12 +39,18 @@
 * effectiveT3.py (the Python wrapper script)
 * effectiveT3.txt (this README file)
 
+Also copy effectiveT3.loc.sample to effectiveT3.loc in the tool-data folder
+(and edit if appropriate, e.g. to add or remove a model).
+
 You will also need to modify the tools_conf.xml file to tell Galaxy to offer the
 tool. If you are using other protein analysis tools like TMHMM or SignalP, put
 it next to them. Just add the line:
 
 
 
+If you wish to run the unit tests, also add this to tools_conf.xml.sample
+and move/copy the test-data files under Galaxy's test-data folder.
+
 That's it.
 
 
@@ -48,6 +58,9 @@
 =======
 
 v0.0.7 - Initial public release
+v0.0.8 - Include effectiveT3.loc.sample in Tool Shed
+v0.0.9 - Check the return code for errors in the XML
+v0.0.10- Added unit test
 
 
 Developers
@@ -56,10 +69,11 @@
 This script and related tools are being developed on the following hg branch:
 http://bitbucket.org/peterjc/galaxy-central/src/tools
 
-For making the "Galaxy Tool Shed" http://community.g2.bx.psu.edu/ tarball use
+For making the "Galaxy Tool Shed" http://toolshed.g2.bx.psu.edu/ tarball use
 the following command from the Galaxy root folder:
 
-$ tar -czf effectiveT3.tar.gz tools/protein_analysis/effectiveT3.xml tools/protein_analysis/effectiveT3.py tools/protein_analysis/effectiveT3.txt
+$ tar -czf effectiveT3.tar.gz tools/protein_analysis/effectiveT3.xml tools/protein_analysis/effectiveT3.py tools/protein_analysis/effectiveT3.txt tool-data/effectiveT3.loc.sample test-data/four_human_proteins.fasta test-data/four_human_proteins.effectiveT3.tabular test-data/empty.fasta test-data/empty_effectiveT3.tabular
+
 
 Check this worked:
 
@@ -67,6 +81,11 @@
 tools/protein_analysis/effectiveT3.xml
 tools/protein_analysis/effectiveT3.py
 tools/protein_analysis/effectiveT3.txt
+tool-data/effectiveT3.loc.sample
+test-data/four_human_proteins.fasta
+test-data/four_human_proteins.effectiveT3.tabular
+test-data/empty.fasta
+test-data/empty_effectiveT3.tabular
 
 
 Licence (MIT/BSD style)
@@ -90,5 +109,5 @@
 OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
 OR PERFORMANCE OF THIS SOFTWARE.
 
-NOTE: This is the licence for the Galaxy Wrapper only. NLStradamus
-is available and licenced separately.
+NOTE: This is the licence for the Galaxy Wrapper only.
+EffectiveT3 is available and licenced separately.
diff -r 096088373590 -r 7479dbb285b5 tools/protein_analysis/effectiveT3.xml
--- a/tools/protein_analysis/effectiveT3.xml	Tue Jun 07 16:32:23 2011 -0400
+++ b/tools/protein_analysis/effectiveT3.xml	Wed Apr 17 05:25:25 2013 -0400
@@ -1,4 +1,4 @@
-
+
     Find bacterial effectors in protein sequences
     
 effectiveT3.py $module.fields.path
@@ -8,6 +8,11 @@
   $restrict.type
 #end if
 $fasta_file $tabular_file
+    
+        
+        
+        
+    
     
          
         
@@ -33,6 +38,20 @@
     
         
     
+    
+        
+            
+            
+            
+            
+        
+        
+            
+            
+            
+            
+        
+    
     
     
 **What it does**
@@ -41,10 +60,15 @@
 
 The input is a FASTA file of protein sequences, and the output is tabular with four columns (one row per protein):
 
- * Sequence identifier
- * Sequence description (from the FASTA file)
- * Score (between 0 and 1, or negative for an error such as a very short peptide)
- * Predicted effector (true/false)
+====== ==============================================================================
+Column Description
+------ ------------------------------------------------------------------------------
+     1 Sequence identifier
+     2 Sequence description (from the FASTA file)
+     3 Score (between 0 and 1, or negative for an error such as a very short peptide)
+     4 Predicted effector (true/false)
+====== ==============================================================================
+
 
 **References**