0
|
1 <tool id="partiton_clust" name="Partition Clustering" force_history_refresh="True">
|
|
2 <command interpreter="python">partition.py
|
|
3 -d $dataset
|
|
4 ${dist_obj}
|
|
5 -n ${direction}
|
|
6 -a $alg_cond.algorithm
|
|
7 #if $alg_cond.algorithm == 'pam' # -m ${alg_cond.distance_metric}
|
|
8 #end if
|
8
|
9
|
|
10 #if str($numk) != "-1":
|
0
|
11 -k ${numk}
|
8
|
12 #end if
|
|
13
|
|
14 #if str($direction) == "rows":
|
|
15 -o ${rdata_output_rows}
|
|
16 #end if
|
|
17
|
|
18 #if str($direction) == "cols":
|
|
19 -o ${rdata_output_cols}
|
|
20 #end if
|
|
21
|
0
|
22
|
|
23 </command>
|
|
24 <inputs>
|
|
25 <param name="dataset" type="data" format='tabular' label="Data Set" help="Specify the data matrix (tab-delimited) to be clustered"/>
|
|
26 <param name="dist_obj" type="boolean" label="Distance Object (R dist object)?" truevalue="-D" falsevalue="" checked="False" help="Check if the matrix contains the pairwise distances between a set of objects"/>
|
|
27
|
|
28 <param name="direction" type="select" label="Cluster Columns or Rows?" help="Specify the matrix dimension to cluster (see help below)">
|
|
29 <option value="cols">Columns (Samples)</option>
|
|
30 <option value="rows" selected='true'>Rows (Genes)</option>
|
|
31 </param>
|
|
32
|
|
33 <conditional name='alg_cond'>
|
|
34 <param name="algorithm" type="select" label="PAM or K-means?" help="Specify the partition cluster method to use (see help below)">
|
|
35 <option value="km">K-means</option>
|
|
36 <option value="pam" selected='true'>PAM</option>
|
|
37 </param>
|
|
38 <when value='pam'>
|
|
39 <param name="distance_metric" type="select" label="Distance Metric" help="Specify the distance metric to use (see help below)">
|
|
40 <option value="cosine" selected='true'>Cosine</option>
|
|
41 <option value="abscosine">Absolute Cosine</option>
|
|
42 <option value="pearson">Pearson</option>
|
|
43 <option value="abspearson">Absolute Pearson</option>
|
|
44 <option value="spearman">Spearman</option>
|
|
45 <option value="kendall">Kendall</option>
|
|
46 <option value="euclidean">Euclidean</option>
|
|
47 <option value="maximum">Maximum</option>
|
|
48 <option value="manhattan">Manhattan (AKA city block)</option>
|
|
49 <option value="canberra">Canberra</option>
|
|
50 <option value="binary">Binary</option>
|
|
51 </param>
|
|
52 </when>
|
|
53 </conditional>
|
8
|
54 <param name="numk" type="integer" label="Number of Clusters" value="-1" help="Specify the number of clusters to use (-1 to use default. See help below)."/>
|
0
|
55
|
|
56 </inputs>
|
|
57 <outputs>
|
8
|
58 <data format="rdata" name="rdata_output_rows" label="Partition Clustering Results; Gene Clusters (RData)">
|
|
59 <filter>(direction)=="rows"</filter>
|
|
60 </data>
|
|
61 <data format="rdata" name="rdata_output_cols" label="Partition Clustering Results; Sample Clusters (RData)">
|
|
62 <filter>(direction)=="cols"</filter>
|
|
63 </data>
|
0
|
64 </outputs>
|
|
65 <help>
|
|
66 .. class:: infomark
|
|
67
|
|
68 **Perform Partition Clustering (Cluster Samples) on a specified data set**
|
|
69
|
|
70 ----
|
|
71
|
|
72 **Parameters**
|
|
73
|
|
74 - **Data Set** - Specify the data matrix to be clustered. Data must be formated as follows:
|
|
75
|
|
76 * Tab-delimited
|
|
77 * Use row/column headers
|
|
78
|
|
79 - **Distance Object** Specify whether or not the data set is a pairwise distance matrix
|
|
80
|
|
81 - **Cluster Samples or Genes** - Specify the dimension of the matrix to cluster:
|
|
82
|
|
83 * Rows (Genes)
|
|
84 * Columns (Samples)
|
|
85
|
|
86 - **PAM or K-means?** Specify which partition clustering method to use - users have choice of:
|
|
87
|
|
88 * PAM (Partition Around Mediods)
|
|
89 * K-means
|
|
90
|
|
91 - **Distance Metric** Specify the distance metric to use. Note, this is ONLY AVAILABLE IF PAM IS THE ALGORITHM BEING USED. Choice of:
|
|
92
|
|
93 * Cosine (AKA uncentered pearson)
|
|
94 * Absolute Cosine (AKA uncentered pearson, absolute value)
|
|
95 * Pearson (pearson correlation)
|
|
96 * Absolute Pearson (pearson correlation, absolute value)
|
|
97 * Spearman (spearman correlation)
|
|
98 * Kendall (Kendall's Tau)
|
|
99 * Euclidean (euclidean distance)
|
|
100 * Maximum
|
|
101 * Manhattan (AKA city block)
|
|
102 * Canberra
|
|
103 * Binary
|
|
104
|
|
105
|
8
|
106 - **Number of Clusters** Specify the number of clusters to use. If set to -1, default values will be used, with the default set as follows:
|
|
107 * if samples/columns are being clustered, the **default** is 5.
|
|
108 * if genes/rows are being clustered, the **default** is set to num_rows/30, e.g. if there are 600 row/genes in the matrix, the default will be 20 clusters.
|
0
|
109
|
|
110 </help>
|
|
111 </tool>
|