changeset 0:21b1788b85a4

Version 1
author greg
date Fri, 05 Aug 2011 16:31:46 -0400
parents
children e28fb7f1f45d
files convert_characters.pl convert_characters.py convert_characters.xml
diffstat 3 files changed, 200 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/convert_characters.pl	Fri Aug 05 16:31:46 2011 -0400
@@ -0,0 +1,101 @@
+#! /usr/bin/perl -w
+
+use strict;
+use warnings;
+
+# converts all characters of one type into another 
+# convert_characters.pl [input] [convert_from] [convert_to] [output]
+
+die "Check argument\n" unless @ARGV == 4;
+
+my $inputfile = $ARGV[0];
+my $convert_from = $ARGV[1];
+my $convert_to = $ARGV[2];
+my $outputfile = $ARGV[3];
+
+if ($convert_from eq "s")
+{
+    $convert_from = '\s';
+}
+elsif ($convert_from eq "T")
+{
+    $convert_from = '\t';
+}
+elsif ($convert_from eq "Sp")
+{
+    $convert_from = '\s';
+}
+elsif ($convert_from eq "Dt")
+{
+    $convert_from = '\.';
+}
+elsif ($convert_from eq "C")
+{
+    $convert_from = ",";
+}
+elsif ($convert_from eq "D")
+{
+    $convert_from = "-";
+}
+elsif ($convert_from eq "U")
+{
+    $convert_from = "_";
+}
+elsif ($convert_from eq "P")
+{
+    $convert_from = '\|';
+}
+else
+{
+    die "Invalid value specified for convert from\n";
+}
+
+
+if ($convert_to eq "T")
+{
+    $convert_to = "\t";
+}
+elsif ($convert_to eq "Sp")
+{
+    $convert_to = '\s';
+}
+elsif ($convert_to eq "Dt")
+{
+    $convert_to = "\.";
+}
+elsif ($convert_to eq "C")
+{
+    $convert_to = ",";
+}
+elsif ($convert_to eq "D")
+{
+    $convert_to = "-";
+}
+elsif ($convert_to eq "U")
+{
+    $convert_to = "_";
+}
+elsif ($convert_to eq "P")
+{
+    $convert_to = "|";
+}
+else
+{
+    die "Invalid value specified for convert to\n";
+}
+
+my $fhIn;
+open ($fhIn, "< $inputfile") or die "Cannot open source file";
+
+my $fhOut;
+open ($fhOut, "> $outputfile");
+
+while (<$fhIn>)
+{
+    my $thisLine = $_;
+    chomp $thisLine;
+    $thisLine =~ s/$convert_from{1,}/$convert_to/g;
+    print $fhOut $thisLine,"\n";    
+}
+close ($fhIn) or die "Cannot close source file\n";
+close ($fhOut) or die "Cannot close output fil\n";
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/convert_characters.py	Fri Aug 05 16:31:46 2011 -0400
@@ -0,0 +1,42 @@
+#!/usr/bin/env python
+#By, Guruprasad Ananda.
+
+from galaxy import eggs
+import sys, re
+
+def stop_err(msg):
+    sys.stderr.write(msg)
+    sys.exit()
+    
+def main():
+    if len(sys.argv) != 4:
+        stop_err("usage: convert_characters infile from_char outfile")
+
+    try:
+        fin = open(sys.argv[1],'r')
+    except:
+        stop_err("Input file cannot be opened for reading.")
+    
+    from_char = sys.argv[2]
+    
+    try:
+        fout = open(sys.argv[3],'w')
+    except:
+        stop_err("Output file cannot be opened for writing.")
+    
+    char_dict = {'T':'\t','s':'\s','Dt':'\.','C':',','D':'-','U':'_','P':'\|'}
+    from_ch = char_dict[from_char] + '+'    #making an RE to match 1 or more occurences.
+    skipped = 0
+    
+    for line in fin:
+        line = line.strip()
+        try:
+            fout.write("%s\n" %(re.sub(from_ch,'\t',line)))     
+        except:
+            skipped += 1
+            
+    if skipped:
+        print "Skipped %d lines as invalid." %skipped
+    
+if __name__ == "__main__": 
+    main()
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/convert_characters.xml	Fri Aug 05 16:31:46 2011 -0400
@@ -0,0 +1,57 @@
+<tool id="Convert characters1" name="Convert" version="1.0.0">
+  <description>delimiters to TAB</description>
+  <command interpreter="python">convert_characters.py $input $convert_from $out_file1</command>
+  <inputs>
+    <param name="convert_from" type="select" label="Convert all">
+      <option value="s">Whitespaces</option>
+      <option value="T">Tabs</option>
+      <!--<option value="Sp">Spaces</option>-->
+      <option value="Dt">Dots</option>
+      <option value="C">Commas</option>
+      <option value="D">Dashes</option>
+      <option value="U">Underscores</option>
+      <option value="P">Pipes</option>
+    </param>
+    <param format="txt" name="input" type="data" label="in Query"/>
+  </inputs>
+  <outputs>
+    <data format="tabular" name="out_file1" />
+  </outputs>
+  <tests>
+    <test>
+      <param name="convert_from" value="s"/>
+      <param name="input" value="1.bed"/>
+      <output name="out_file1" file="eq-convert.dat"/>
+    </test>
+    <test>
+      <param name="convert_from" value="s"/>
+      <param name="input" value="a.txt"/>
+      <output name="out_file1" file="a.tab"/>
+    </test>
+  </tests>
+  <help>
+
+**What it does**
+
+Converts all delimiters of a specified type into TABs.  Consecutive characters are condensed. For example, if columns are separated by 5 spaces they will converted into 1 tab.
+
+-----
+
+**Example**
+
+- Input file::
+
+    chrX||151283558|151283724|NM_000808_exon_8_0_chrX_151283559_r|0|-
+    chrX|151370273|151370486|NM_000808_exon_9_0_chrX_151370274_r|0|-
+    chrX|151559494|151559583|NM_018558_exon_1_0_chrX_151559495_f|0|+
+    chrX|151564643|151564711|NM_018558_exon_2_0_chrX_151564644_f||||0|+
+
+- Converting all pipe delimiters of the above file to TABs will get::
+
+    chrX  151283558  151283724  NM_000808_exon_8_0_chrX_151283559_r  0  -
+    chrX  151370273  151370486  NM_000808_exon_9_0_chrX_151370274_r  0  -
+    chrX  151559494  151559583  NM_018558_exon_1_0_chrX_151559495_f  0  +
+    chrX  151564643  151564711  NM_018558_exon_2_0_chrX_151564644_f  0  +
+
+</help>
+</tool>