# HG changeset patch # User leomrtns # Date 1558630774 14400 # Node ID 6ade7ba67f5da101572bd7fdd5b8677112167023 planemo upload diff -r 000000000000 -r 6ade7ba67f5d macros.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Thu May 23 12:59:34 2019 -0400 @@ -0,0 +1,17 @@ + + 1.1.0 + + + super_distance + + + + + super_distance --version + + + + + + + diff -r 000000000000 -r 6ade7ba67f5d super_distance.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/super_distance.xml Thu May 23 12:59:34 2019 -0400 @@ -0,0 +1,112 @@ + + Supertree estimation using matrix representation with distances + + macros.xml + + + + ./nodal.tre && + sed -n '2p' ./all.tre > ./average.tre + #else + sed -n '3p' ./all.tre > ./nodal.tre && + sed -n '9p' ./all.tre > ./average.tre + #end if + + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This software implements a class of methods called Matrix Representation with Distances (MRD), with emphasis on whole + gene families (i.e. gene trees that may contain paralogs) for species tree inference.
+ The two main output trees are the Nodal and the Average supertrees. If the "--fast" option was not selected, then the file All + supertrees will have several other estimates (otherwise it will have only the two main supertrees).

+ + The "Nodal" supertree is estimated from nodal distances betweeen gene tree leaves (equivalent to assuming equal branch lengths), but its final branch lengths + are estimated by least squares using the average branch lengths. The "Average" supertree uses this information directly, and both supertrees are estimated by + UPGMA.
+
+
+ super_distance [-h|--help] [-v|--version] [-F|--fast] [-e|--epsilon=] [-s|--species=] [-o|--output=] []... + + Based on several rescaled patristic distances, the program takes the average matrix between genes and estimates + the species tree using bioNJ, UPGMA and single-linkage after scaling back to the original values (more below). The program + also uses a distance matrix to project branch lengths on species trees missing lengths; + + The branch length rescaling per gene can be the minimum, the average, the total sum, etc. and at the end these values + averaged over trees are scaled back in the final distance matrix, such that lengths in the supertree (species tree) are interpretable. + One exception is the nodal distance, which is based on the number of nodes between two leaves (e.g. NJst). In this case it may make + more sense to use another distance matrix to infer the branch lengths. Option 'F' uses averages distances projected on nodal-estimated tree; + it uses fewer scalings/options, providing a fast estimation. We avoid using individual gene trees since they may have + missing information (missing species or species pairs). For missing comparisons (when two species are never seen in the same gene tree) + we use the ultrametric condition (comparison to a common species) to estimate its value. + + If a file with species names is given, the program allows for paralogs; otherwise it assumes orthology and that _at_least_ one tree has no missing data: + * Paralogy: the species names will be mapped to individual gene tree leaves (e.g. `ECOLI_a` and `ECOLI_b` will both map to species `ECOLI`). + Each gene tree can therefore have several copies of each species, and can also have missing species. + * Orthology: if a file with species names is not given, however, it is assumed that each species is represented at most once per gene, and + furthermore that the leaf names represent the species, and are thus identical across trees. This mode is the underlying assumption behind + most tree comparison software, although here missing data for some trees (not all) is allowed. I.e. as long as one tree has full information + (for all species), then others can have some absent species. + With paralogs or not, it is not recommended to have missing entries in the distance matrix (e.g. when a species pair does not appear in any tree), + and matrix representation with distances methods work better with more 'complete' gene trees. If there are no paralogs, many supertrees will be equivalent, + as well as if the input trees lack lengths (only topological information). + + ]]>
+ +
diff -r 000000000000 -r 6ade7ba67f5d test-data/HOG1.tre --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/HOG1.tre Thu May 23 12:59:34 2019 -0400 @@ -0,0 +1,1 @@ +(DROWI_18:0.3789018505,(DROGR_7:0.2443018419,(DROMO_10:0.1316525082,DROVI_17:0.0984391551):0.122729):0.236619,((DROPE_11:0.0000023111,DROPS_12:0.0039732231):0.158232,((DROAN_0:0.0523037343,DROBP_2:0.0541929634):0.154840,(DROKI_8:0.1754046509,((((((DROYA_19:0.0454568263,DROER_4:0.0221802414):0.009433,(DROME_9:0.0154108890,(DROSI_15:0.0104437030,DROSE_14:0.0103446291):0.006553):0.007669):0.085090,DROEU_5:0.0719460572):0.010227,(DROBM_1:0.0475379790,DROTK_16:0.0428214000):0.019343):0.011966,(DRORH_13:0.0980624594,DROEL_3:0.0426114893):0.026189):0.012613,DROFC_6:0.0771805483):0.062752):0.055134):0.088303):0.111929); diff -r 000000000000 -r 6ade7ba67f5d test-data/HOG2.tre --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/HOG2.tre Thu May 23 12:59:34 2019 -0400 @@ -0,0 +1,1 @@ +(DROEU_7:2.6707832803,(((DROVI_17:0.1659062151,(DROMO_12:0.1671047483,DROGR_9:0.1479550015):0.082093):0.201114,DROWI_18:0.2630970288):0.050379,((DROAN_0:0.0182475133,(DROBP_3:0.0125985179,DROBP_2:0.0046837882):0.028555):0.077966,(DROKI_10:0.1054285481,(((((DROFC_8:0.0754595688,DROEL_4:0.0367458921):0.017312,(DROEU_6:0.0409251258,DROBM_1:0.0200906079):0.027882):0.027118,DROYA_19:0.0159778433):0.006906,DROER_5:0.0115646703):0.003304,(DROME_11:0.0196755938,(DROSE_15:0.0070216727,DROSI_16:0.0115885956):0.010218):0.022794):0.062511):0.038220):0.038254):0.053899,(DROPE_13:0.0150971917,DROPS_14:0.0156580887):0.110366); diff -r 000000000000 -r 6ade7ba67f5d test-data/HOG3.tre --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/HOG3.tre Thu May 23 12:59:34 2019 -0400 @@ -0,0 +1,1 @@ +(DROWI_18:1.3986144626,(DROGR_7:0.5020534626,(DROMO_10:0.2072433617,DROVI_17:0.1213631563):0.176415):0.400812,((DROPE_11:0.1255463088,DROPS_12:0.0062041894):0.374687,((DROAN_0:0.0529721442,DROBP_2:0.0482152226):0.260691,(DROKI_8:0.3015377200,((DROBM_1:0.1737538555,DROTK_16:0.1098459459):0.038728,(((DROFC_6:0.1814201709,(DRORH_13:0.0699182415,DROEL_3:0.0757597195):0.049046):0.028239,((DROME_9:0.0113417559,(DROSE_14:0.0148692844,DROSI_15:0.0157735255):0.009843):0.075691,(DROER_4:0.0731933903,DROYA_19:0.0512225143):0.039711):0.096678):0.030802,DROEU_5:0.1451575357):0.016826):0.081440):0.089075):0.094121):0.149309); diff -r 000000000000 -r 6ade7ba67f5d test-data/HOG4.tre --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/HOG4.tre Thu May 23 12:59:34 2019 -0400 @@ -0,0 +1,1 @@ +(DROWI_18:0.5868382482,(((((DRORH_13:0.6652950208,DROEL_3:0.3789058064):0.132222,(DROFC_6:0.5481301714,((DROBM_1:0.3103870563,DROTK_16:0.1457783218):0.087793,(DROEU_5:0.3107900511,((DROER_4:0.0980109981,DROYA_19:0.0651305578):0.028048,(DROME_9:0.0344411278,(DROSE_14:0.0152745521,DROSI_15:0.0192488188):0.002744):0.036213):0.054291):0.045368):0.031325):0.040055):0.271090,DROKI_8:0.5026965046):0.029784,(DROAN_0:0.0293865573,DROBP_2:0.0275804120):0.132855):0.104278,(DROPE_11:0.0166948636,DROPS_12:0.0069147115):0.192926):0.248620,(DROGR_7:0.3367199283,(DROMO_10:0.2926223708,DROVI_17:0.1374444833):0.107221):0.358964); diff -r 000000000000 -r 6ade7ba67f5d test-data/HOG5.tre --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/HOG5.tre Thu May 23 12:59:34 2019 -0400 @@ -0,0 +1,1 @@ +(DROWI_18:0.1885012154,((DROPE_12:0.0930361130,DROPS_13:0.0048427559):0.043936,((((DROBP_3:0.4208376176,(DROEL_5:0.0966381344,DRORH_14:0.0376203310):0.046772):0.033880,DROFC_9:0.2852242842):0.027955,(DROEU_8:0.0752885945,DROTK_17:0.0885953577):0.027643):0.065468,DROBM_2:0.0290972856):3.313400):0.054171,(((((DROEL_4:0.1366401683,DROBM_1:0.0779371466):0.016714,DROEU_7:0.0209150267):0.008409,(DROYA_19:0.0159753214,(DROER_6:0.0126351355,((DROME_11:0.0047702525,DROSE_15:0.0015834254):0.000002,DROSI_16:0.0015862134):0.007376):0.004328):0.017018):0.026086,DROKI_10:0.0982019427):0.040511,DROAN_0:0.0582216690):0.055033); diff -r 000000000000 -r 6ade7ba67f5d test-data/out_avge.tre --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/out_avge.tre Thu May 23 12:59:34 2019 -0400 @@ -0,0 +1,1 @@ +[F01] (((((((((((DROSI:0.0114239190162,DROSE:0.0114239190162):0.00665311785271,DROME:0.0180770368689):0.0332289457345,(DROYA:0.0425777500307,DROER:0.0425777500307):0.00872823257268):0.0702215561382,DROEU:0.121527538742):0.0370743470615,DROEL:0.158601885803):0.00637747827539,DRORH:0.164979364078):0.119690548008,((DROTK:0.120113878085,DROBM:0.120113878085):0.08498980613,DROFC:0.205103684215):0.0795662278716):0.0226805993692,DROBP:0.307350511456):0.096308094249,((DROPS:0.0263888258223,DROPE:0.0263888258223):0.224545399278,(DROKI:0.238733687263,DROAN:0.238733687263):0.0122005378374):0.152724380604):0.0758046642449,DROWI:0.47946326995):0.133079153946,((DROVI:0.185662999163,DROMO:0.185662999163):0.127661752359,DROGR:0.313324751521):0.299217672374):1; diff -r 000000000000 -r 6ade7ba67f5d test-data/out_nodal.tre --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/out_nodal.tre Thu May 23 12:59:34 2019 -0400 @@ -0,0 +1,1 @@ +[F00] ((((((DROBP:0.532439087059,DROAN:0.276636337932):0.0503083375465,DROKI:0.37993216902):0,(DROPS:0.00822020949675,DROPE:0.0445574421478):0.354616460118):0.00309136864897,(((DROVI:0.143658061175,DROMO:0.22766793715):0.103501886683,DROGR:0.337484617197):0.307155918674,DROWI:0.686976061347):0.128297584692):0.10425066443,(((DROTK:0.43557192824,DROBM:0.2093492096):0.00760527041108,DROEU:0.467252999325):0,((DRORH:0.549712406883,DROEL:0.203706841614):0,DROFC:0.310554224165):0.160342042579):0.286455631907):0.114252911488,(((DROSI:0.0123172334665,DROSE:0.0105306045659):0.00678507767638,DROME:0.0179450770452):0.0279199528065,(DROYA:0.0431176263202,DROER:0.0420378737413):0.0155720673903):0.114252911488):0; diff -r 000000000000 -r 6ade7ba67f5d test-data/species_names.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/species_names.txt Thu May 23 12:59:34 2019 -0400 @@ -0,0 +1,20 @@ +DROAN +DROBM +DROBP +DROEL +DROER +DROEU +DROFC +DROGR +DROKI +DROME +DROMO +DROPE +DROPS +DRORH +DROSE +DROSI +DROTK +DROVI +DROWI +DROYA