diff cluster.tools/partition.xml @ 0:0decf3fd54bc draft

Uploaded
author peter-waltman
date Thu, 28 Feb 2013 01:45:39 -0500
parents
children a58527c632b7
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cluster.tools/partition.xml	Thu Feb 28 01:45:39 2013 -0500
@@ -0,0 +1,93 @@
+<tool id="partiton_clust" name="Partition Clustering" force_history_refresh="True">
+    <command interpreter="python">partition.py
+-d $dataset 
+${dist_obj}
+-n ${direction} 
+-a $alg_cond.algorithm
+#if $alg_cond.algorithm == 'pam' # -m ${alg_cond.distance_metric}
+#end if
+-k ${numk} 
+-o ${output}
+
+</command>
+    <inputs>
+    	<param name="dataset" type="data" format='tabular' label="Data Set"  help="Specify the data matrix (tab-delimited) to be clustered"/>
+	<param name="dist_obj" type="boolean" label="Distance Object (R dist object)?" truevalue="-D" falsevalue="" checked="False" help="Check if the matrix contains the pairwise distances between a set of objects"/>
+
+    	<param name="direction" type="select" label="Cluster Columns or Rows?" help="Specify the matrix dimension to cluster (see help below)">
+	  <option value="cols">Columns (Samples)</option>
+	  <option value="rows" selected='true'>Rows (Genes)</option>
+    	</param>
+	
+	<conditional name='alg_cond'>
+	  <param name="algorithm" type="select" label="PAM or K-means?" help="Specify the partition cluster method to use (see help below)">
+	    <option value="km">K-means</option>
+	    <option value="pam" selected='true'>PAM</option>
+	  </param>
+	  <when value='pam'>
+	    <param name="distance_metric" type="select" label="Distance Metric" help="Specify the distance metric to use (see help below)">
+	      <option value="cosine" selected='true'>Cosine</option>
+	      <option value="abscosine">Absolute Cosine</option>
+	      <option value="pearson">Pearson</option>
+	      <option value="abspearson">Absolute Pearson</option>
+	      <option value="spearman">Spearman</option>
+	      <option value="kendall">Kendall</option>
+	      <option value="euclidean">Euclidean</option>
+	      <option value="maximum">Maximum</option>
+	      <option value="manhattan">Manhattan (AKA city block)</option>
+	      <option value="canberra">Canberra</option>
+	      <option value="binary">Binary</option>
+	    </param>
+	  </when>
+	</conditional>
+    	<param name="numk" type="integer" label="Number of Clusters" value="50" help="Specify the number of clusters to use"/>
+    	
+    </inputs>
+    <outputs>
+        <data format="rdata" name="output" label="Partition Clustering Data (RData)"/>
+    </outputs>
+<help>
+.. class:: infomark
+     
+**Perform Partition Clustering (Cluster Samples) on a specified data set**
+
+----
+
+**Parameters**
+
+- **Data Set** - Specify the data matrix to be clustered.  Data must be formated as follows:
+
+         * Tab-delimited
+         * Use row/column headers
+
+- **Distance Object** Specify whether or not the data set is a pairwise distance matrix
+
+- **Cluster Samples or Genes** - Specify the dimension of the matrix to cluster:
+
+         * Rows (Genes)
+         * Columns (Samples)
+
+- **PAM or K-means?** Specify which partition clustering method to use - users have choice of:
+
+         * PAM (Partition Around Mediods)
+         * K-means
+
+- **Distance Metric** Specify the distance metric to use.  Note, this is ONLY AVAILABLE IF PAM IS THE ALGORITHM BEING USED.  Choice of:
+
+	 * Cosine (AKA uncentered pearson)
+	 * Absolute Cosine (AKA uncentered pearson, absolute value)
+         * Pearson (pearson correlation)
+	 * Absolute Pearson (pearson correlation, absolute value)
+         * Spearman (spearman correlation)
+	 * Kendall (Kendall's Tau)
+         * Euclidean (euclidean distance)
+	 * Maximum
+	 * Manhattan (AKA city block)
+	 * Canberra
+	 * Binary
+
+
+- **Number of Clusters** Specify the number of clusters to use
+
+</help>
+</tool>