Repository 'w4mclassfilter'
hg clone https://testtoolshed.g2.bx.psu.edu/repos/eschen42/w4mclassfilter

Changeset 5:ae791fe4fbe9 (2017-09-07)
Previous changeset 4:2495d0019dbe (2017-05-11) Next changeset 6:f1eabb5973b1 (2018-01-11)
Commit message:
planemo upload for repository https://github.com/HegemanLab/w4mclassfilter_galaxy_wrapper/tree/master commit a2d17eac4a1343a34cf2908d9ab31b3202a21f64
modified:
w4mclassfilter.xml
w4mclassfilter_wrapper.R
added:
test-data/input_nofilter_dataMatrix.tsv
b
diff -r 2495d0019dbe -r ae791fe4fbe9 test-data/input_nofilter_dataMatrix.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/input_nofilter_dataMatrix.tsv Thu Sep 07 17:32:09 2017 -0400
b
@@ -0,0 +1,16 @@
+dataMatrix HU_017 HU_028 HU_034 HU_051 HU_060 HU_078 HU_091 HU_093 HU_099 HU_110 HU_130 HU_134 HU_138 HU_149 HU_152 HU_175 HU_178 HU_185 HU_204 HU_208
+HMDB03193 76043 412165 44943 27242 436566 173175 242549 57066 559869 3732 339188 471368 262271 127285 451270 212500 79673 NA 891129 43907
+HMDB01101 30689 6877586 52217 3158 10789748 229568 4763576 3878773 976436 831937 608298 1605075 72021 442510 1107705 1464339 31250 2724553 891129 32742
+HMDB01101 6877586 52217 3158 10789748 229568 4763576 3878773 976436 831937 608298 1605075 72021 442510 1107705 1464339 31250 2724553 72900 891129 30689
+HMDB10348 47259 544877 60885 34582 529874 168264 176500 76457 610110 16262 279156 524468 451573 591487 433529 161069 214392 13781 891129 39315
+HMDB59717 357351 1030464 301983 67604 306862 1028110 1530493 270027 1378535 289677 808334 1132813 871209 895435 715190 1563158 784738 146195 891129 239030
+HMDB00822 14627 14627 14627 14627 14627 14627 14627 14627 14627 14627 14627 14627 14627 14627 14627 14627 14627 14627 14627 14627
+HMDB00299 250551 1046138 456162 159386 1013302 808657 614370 250403 768004 242085 504108 1014041 1362408 1057660 1110050 566050 411886 142233 891129 284775
+HMDB00191 560002 771533 575790 392284 888498 785428 645785 591569 960658 910201 639437 1092885 1409045 2292023 1246459 1945577 710519 773384 891129 622898
+HMDB00518 -34236 58249 85944 NA 342102 129886 175800 13154 230242 NA 440223 315368 10657 419508 48673 28361 514579 23108 891129 73831
+HMDB00715 1252089 2547452 905408 371059 4983588 5140022 2658555 814523 2558923 859466 4184204 3865723 3236644 2615560 3820724 3577833 2295288 625924 891129 1341900
+HMDB01032 2569205 26023086 1604999 430453 8103558 26222916 257139 675754 59906109 263055 31151730 18648127 14989438 1554658 20249262 5588731 871010 15920 891129 44276
+HMDB00208 747080 13420742 595872 1172376 7172632 3143654 4059767 1433702 5593888 5402629 2477288 3346077 4230072 7621236 8960828 10335722 7037373 1574738 891129 2540044
+HMDB04824 374028 1144386 539206 178517 1046190 959381 605191 310260 1253319 477259 477995 825691 1157093 1089284 1411802 1020206 782673 346761 891129 387811
+HMDB00512 NA 319783 280560 85009 1333877 556003 590779 209285 342532 198512 569970 525240 246282 1140422 542345 1171008 827723 222953 891129 85554
+HMDB00251 368600 616555 94936 622468 180988 293988 352855 767894 268331 167246 310918 1248919 577184 10985 335711 403815 80614 63393 891129 616061
b
diff -r 2495d0019dbe -r ae791fe4fbe9 w4mclassfilter.xml
--- a/w4mclassfilter.xml Thu May 11 00:01:40 2017 -0400
+++ b/w4mclassfilter.xml Thu Sep 07 17:32:09 2017 -0400
[
b'@@ -1,10 +1,10 @@\n-<tool id="w4mclassfilter" name="Sample_Subset" version="0.98.1">\n+<tool id="w4mclassfilter" name="Sample_Subset" version="0.98.3">\n   <description>Filter W4M data by sample class</description>\n \n   <requirements>\n-    <requirement type="package" version="3.3.1">r-base</requirement>\n+    <requirement type="package" version="3.3.2">r-base</requirement>\n     <requirement type="package" version="1.1_4">r-batch</requirement>\n-    <requirement type="package" version="0.98.1">w4mclassfilter</requirement>\n+    <requirement type="package" version="0.98.3">w4mclassfilter</requirement>\n   </requirements>\n \n   <stdio>\n@@ -14,29 +14,58 @@\n \n   <command detect_errors="aggressive"><![CDATA[\n   Rscript $__tool_directory__/w4mclassfilter_wrapper.R\n-  dataMatrix_in "$dataMatrix_in"\n-  sampleMetadata_in "$sampleMetadata_in"\n-  variableMetadata_in "$variableMetadata_in"\n-  sampleclassNames "$sampleclassNames"\n-  inclusive "$inclusive"\n-  classnameColumn "$classnameColumn"\n-  samplenameColumn "$samplenameColumn"\n-  dataMatrix_out "$dataMatrix_out"\n-  sampleMetadata_out "$sampleMetadata_out"\n-  variableMetadata_out "$variableMetadata_out"\n+  dataMatrix_in \'$dataMatrix_in\'\n+  sampleMetadata_in \'$sampleMetadata_in\'\n+  variableMetadata_in \'$variableMetadata_in\'\n+  sampleclassNames \'$sampleclassNames\'\n+  inclusive \'$inclusive\'\n+  wildcards \'$wildcards\'\n+  classnameColumn \'$classnameColumn\'\n+  samplenameColumn \'$samplenameColumn\'\n+  dataMatrix_out \'$dataMatrix_out\'\n+  sampleMetadata_out \'$sampleMetadata_out\'\n+  variableMetadata_out \'$variableMetadata_out\'\n   ]]></command>\n \n   <inputs>\n     <param name="dataMatrix_in" label="Data matrix file" type="data" format="tabular" help="variable x sample, decimal: \'.\', missing: NA, mode: numerical, separator: tab" />\n     <param name="sampleMetadata_in" label="Sample metadata file" type="data" format="tabular" help="sample x metadata columns, separator: tab" />\n     <param name="variableMetadata_in" label="Variable metadata file" type="data" format="tabular" help="variable x metadata columns, separator: tab" />\n-    <param name="sampleclassNames" label="Names of sample classes" type="text" value = "" help="comma-separated names of sample classes to filter in or out; defaults to no names" />\n+    <param name="samplenameColumn" label="Column that names the sample" type="text" value = "sampleMetadata" help="name of the column in the sample metadata file that has the name of the sample - defaults to \'sampleMetadata\'" />\n+    <param name="classnameColumn" label="Column that names the sample-class" type="text" value = "class" help="name of the column in sample metadata that has the values to be tested against the \'classes\' input parameter - defaults to \'class\'" />\n+    <param name="sampleclassNames" label="Names of sample classes" type="text" value = "" help="comma-separated names (or comma-less regular expressions to match names) of sample-classes to filter in or out; defaults to no names">\n+      <sanitizer>\n+        <valid initial="string.letters">\n+          <add preset="string.digits"/>\n+          <add value="&#36;"  /> <!-- dollar, dollar-sign -->\n+          <add value="&#40;"  /> <!-- left-paren -->\n+          <add value="&#41;"  /> <!-- right-paren -->\n+          <add value="&#42;"  /> <!-- splat, asterisk -->\n+          <add value="&#43;"  /> <!-- plus -->\n+          <add value="&#44;"  /> <!-- comma -->\n+          <add value="&#46;"  /> <!-- dot, period -->\n+          <add value="&#58;"  /> <!-- colon -->\n+          <add value="&#59;"  /> <!-- semi, semicolon -->\n+          <add value="&#63;"  /> <!-- what, question mark -->\n+          <add value="&#91;"  /> <!-- l-squib, left-squre-bracket -->\n+          <add value="&#92;"  /> <!-- whack, backslash -->\n+          <add value="&#93;"  /> <!-- r-squib, right-squre-bracket -->\n+          <add value="&#94;"  /> <!-- hat, caret -->\n+          <add value="&#123;" /> <!-- l-cube, left-curly-bracket -->\n+          <add value="&#124;" /> <!-- pipe '..b' \'``my.own.sample``\'\n+\n+------------------------------------------------\n+Regular expression patterns to match class-names\n+------------------------------------------------\n+\n+Beginning with v0.98.2, w4mclassfilter supports use of R "regular expression" patterns to select class-names.\n+\n+R uses POSIX 1003.2 standard regular expressions, which allow precise pattern-matching and are exhaustively defined at:\n+http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap09.html\n+\n+However, only a few basic building blocks of regular expressions need to be mastered for most cases:\n+\n+- \'``^``\' matches the beginning of a class-name\n+- \'``$``\' matches the end of a class-name\n+- \'``.``\' outside of square brackets matches a single character\n+- \'``*``\' matches character specified immediately before zero or more times\n+- square brackets specify a set of characters to be matched.\n+\n+Within square brackets\n+\n+- \'``^``\' as the first character specifies that the list of characters are those that should **not** be matched.\n+- \'``-``\' is used to specify ranges of characters\n+\n+Caveat: The tool wrapper uses the comma (\'``,``\') to split a list of sample-class names, so **commas may not be used within regular expressions for this tool**\n+\n+First Example: Consider a field of class-names consisting of \'``marq3,marq6,marq9,marq12,front3,front6,front9,front12``\'\n+\n+- The regular expression \'``^front[0-9][0-9]*$``\' will match the same sample-classes as \'``front3,front6,front9,front12``\'\n+- The regular expression \'``^[a-z][a-z]3$``\' will match the same sample-classes as \'``front3,marq3``\'\n+- The regular expression \'``^[a-z][a-z]12$``\' will match the same sample-classes as \'``front12,marq12``\'\n+- The regular expression \'``^[a-z][a-z][0-9]$``\' will match the same sample-classes as \'``front3,front6,front9,marq3,marq6,marq9``\'\n+\n+Second Example: Consider these regular expression patterns as possible matches to a sample-class name \'``AB0123``\':\n+\n+- \'``^[A-Z][A-Z][0-9][0-9]*$``\' - MATCHES \'``**^AB0123$**``\'\n+- \'``^[A-Z][A-Z]*[0-9][0-9]*$``\' - MATCHES \'``**^AB0123$**``\'\n+- \'``^[A-Z][0-9]*``\' - MATCHES  \'``**^A** B0123$``\' - first character is a letter, \'``*``\' can specify zero characters, and end of line did not need to be matched.\n+- \'``^[A-Z][A-Z][0-9]``\' - MATCHES  \'``**^AB0** 123$``\' - first two characters are letters aind the third is a digit.\n+- \'``^[A-Z][A-Z]*[0-9][0-9]$``\' - NO MATCH - the name does not end with the pattern \'``[A-Z][0-9][0-9]$``\', i.e., it ends with four digits, not two.\n+- \'``^[A-Z][0-9]*$``\' - NO MATCH - the pattern specifies that second character and all those that follow, if present, must be digits.\n+\n+\n ---------------\n Working example\n ---------------\n@@ -348,6 +568,28 @@\n NEWS\n ----\n \n+CHANGES IN VERSION 0.98.3\n+=========================\n+\n+INTERNAL MODIFICATIONS\n+\n+* Improved input handling.\n+* Now uses w4mclassfilter R package v0.98.3, although that version has no functional implications for this tool.\n+* Improved reference-list.\n+\n+CHANGES IN VERSION 0.98.2\n+=========================\n+\n+NEW FEATURES\n+\n+* Added support for R-flavored regular expression pattern-matching when selecting names of sample-classes.\n+* Empty classes argument or zero-length class_column result in no samples filtered out.\n+\n+INTERNAL MODIFICATIONS\n+\n+* Support and tests for new features.\n+\n+\n CHANGES IN VERSION 0.98.1\n =========================\n \n@@ -363,11 +605,16 @@\n \n none\n \n-    ]]>\n-  </help>\n+  ]]></help>\n   <citations>\n+    <!-- Giacomoni_2014 W4M 2.5 -->\n+    <citation type="doi">10.1093/bioinformatics/btu813</citation>\n+    <!-- Guitton_2017 W4M 3.0 -->\n+    <citation type="doi">10.1016/j.biocel.2017.07.002</citation>\n+    <!-- Smith_2006 XCMS -->\n     <citation type="doi">10.1021/ac051437y</citation>\n-    <citation type="doi">10.1093/bioinformatics/btu813</citation>\n+    <!-- Th_venot_2015 Urinary metabolome statistics -->\n+    <citation type="doi">10.1021/acs.jproteome.5b00354</citation>\n   </citations>\n   <!--\n      vim:noet:sw=4:ts=4\n'
b
diff -r 2495d0019dbe -r ae791fe4fbe9 w4mclassfilter_wrapper.R
--- a/w4mclassfilter_wrapper.R Thu May 11 00:01:40 2017 -0400
+++ b/w4mclassfilter_wrapper.R Thu Sep 07 17:32:09 2017 -0400
[
@@ -83,15 +83,13 @@
 # other parameters
 
 sampleclassNames <- as.character(argVc["sampleclassNames"])
-# if (sampleclassNames == "NONE_SPECIFIED") {
-#     sampleclassNames <- as.character(c())
-# 
-# } else {
-#     sampleclassNames <- strsplit(x = sampleclassNames, split = ",", fixed = TRUE)[[1]]
-# }
+wildcards <- as.logical(argVc["wildcards"])
 sampleclassNames <- strsplit(x = sampleclassNames, split = ",", fixed = TRUE)[[1]]
+if (wildcards) {
+  sampleclassNames <- gsub("[.]", "[.]", sampleclassNames)
+  sampleclassNames <- utils::glob2rx(sampleclassNames, trim.tail = FALSE)
+}
 inclusive <- as.logical(argVc["inclusive"])
-# print(sprintf("inclusive = '%s'", as.character(inclusive)))
 classnameColumn <- as.character(argVc["classnameColumn"])
 samplenameColumn <- as.character(argVc["samplenameColumn"])