Mercurial > repos > greg > convert_chars
changeset 0:21b1788b85a4
Version 1
author | greg |
---|---|
date | Fri, 05 Aug 2011 16:31:46 -0400 |
parents | |
children | e28fb7f1f45d |
files | convert_characters.pl convert_characters.py convert_characters.xml |
diffstat | 3 files changed, 200 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/convert_characters.pl Fri Aug 05 16:31:46 2011 -0400 @@ -0,0 +1,101 @@ +#! /usr/bin/perl -w + +use strict; +use warnings; + +# converts all characters of one type into another +# convert_characters.pl [input] [convert_from] [convert_to] [output] + +die "Check argument\n" unless @ARGV == 4; + +my $inputfile = $ARGV[0]; +my $convert_from = $ARGV[1]; +my $convert_to = $ARGV[2]; +my $outputfile = $ARGV[3]; + +if ($convert_from eq "s") +{ + $convert_from = '\s'; +} +elsif ($convert_from eq "T") +{ + $convert_from = '\t'; +} +elsif ($convert_from eq "Sp") +{ + $convert_from = '\s'; +} +elsif ($convert_from eq "Dt") +{ + $convert_from = '\.'; +} +elsif ($convert_from eq "C") +{ + $convert_from = ","; +} +elsif ($convert_from eq "D") +{ + $convert_from = "-"; +} +elsif ($convert_from eq "U") +{ + $convert_from = "_"; +} +elsif ($convert_from eq "P") +{ + $convert_from = '\|'; +} +else +{ + die "Invalid value specified for convert from\n"; +} + + +if ($convert_to eq "T") +{ + $convert_to = "\t"; +} +elsif ($convert_to eq "Sp") +{ + $convert_to = '\s'; +} +elsif ($convert_to eq "Dt") +{ + $convert_to = "\."; +} +elsif ($convert_to eq "C") +{ + $convert_to = ","; +} +elsif ($convert_to eq "D") +{ + $convert_to = "-"; +} +elsif ($convert_to eq "U") +{ + $convert_to = "_"; +} +elsif ($convert_to eq "P") +{ + $convert_to = "|"; +} +else +{ + die "Invalid value specified for convert to\n"; +} + +my $fhIn; +open ($fhIn, "< $inputfile") or die "Cannot open source file"; + +my $fhOut; +open ($fhOut, "> $outputfile"); + +while (<$fhIn>) +{ + my $thisLine = $_; + chomp $thisLine; + $thisLine =~ s/$convert_from{1,}/$convert_to/g; + print $fhOut $thisLine,"\n"; +} +close ($fhIn) or die "Cannot close source file\n"; +close ($fhOut) or die "Cannot close output fil\n";
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/convert_characters.py Fri Aug 05 16:31:46 2011 -0400 @@ -0,0 +1,42 @@ +#!/usr/bin/env python +#By, Guruprasad Ananda. + +from galaxy import eggs +import sys, re + +def stop_err(msg): + sys.stderr.write(msg) + sys.exit() + +def main(): + if len(sys.argv) != 4: + stop_err("usage: convert_characters infile from_char outfile") + + try: + fin = open(sys.argv[1],'r') + except: + stop_err("Input file cannot be opened for reading.") + + from_char = sys.argv[2] + + try: + fout = open(sys.argv[3],'w') + except: + stop_err("Output file cannot be opened for writing.") + + char_dict = {'T':'\t','s':'\s','Dt':'\.','C':',','D':'-','U':'_','P':'\|'} + from_ch = char_dict[from_char] + '+' #making an RE to match 1 or more occurences. + skipped = 0 + + for line in fin: + line = line.strip() + try: + fout.write("%s\n" %(re.sub(from_ch,'\t',line))) + except: + skipped += 1 + + if skipped: + print "Skipped %d lines as invalid." %skipped + +if __name__ == "__main__": + main() \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/convert_characters.xml Fri Aug 05 16:31:46 2011 -0400 @@ -0,0 +1,57 @@ +<tool id="Convert characters1" name="Convert" version="1.0.0"> + <description>delimiters to TAB</description> + <command interpreter="python">convert_characters.py $input $convert_from $out_file1</command> + <inputs> + <param name="convert_from" type="select" label="Convert all"> + <option value="s">Whitespaces</option> + <option value="T">Tabs</option> + <!--<option value="Sp">Spaces</option>--> + <option value="Dt">Dots</option> + <option value="C">Commas</option> + <option value="D">Dashes</option> + <option value="U">Underscores</option> + <option value="P">Pipes</option> + </param> + <param format="txt" name="input" type="data" label="in Query"/> + </inputs> + <outputs> + <data format="tabular" name="out_file1" /> + </outputs> + <tests> + <test> + <param name="convert_from" value="s"/> + <param name="input" value="1.bed"/> + <output name="out_file1" file="eq-convert.dat"/> + </test> + <test> + <param name="convert_from" value="s"/> + <param name="input" value="a.txt"/> + <output name="out_file1" file="a.tab"/> + </test> + </tests> + <help> + +**What it does** + +Converts all delimiters of a specified type into TABs. Consecutive characters are condensed. For example, if columns are separated by 5 spaces they will converted into 1 tab. + +----- + +**Example** + +- Input file:: + + chrX||151283558|151283724|NM_000808_exon_8_0_chrX_151283559_r|0|- + chrX|151370273|151370486|NM_000808_exon_9_0_chrX_151370274_r|0|- + chrX|151559494|151559583|NM_018558_exon_1_0_chrX_151559495_f|0|+ + chrX|151564643|151564711|NM_018558_exon_2_0_chrX_151564644_f||||0|+ + +- Converting all pipe delimiters of the above file to TABs will get:: + + chrX 151283558 151283724 NM_000808_exon_8_0_chrX_151283559_r 0 - + chrX 151370273 151370486 NM_000808_exon_9_0_chrX_151370274_r 0 - + chrX 151559494 151559583 NM_018558_exon_1_0_chrX_151559495_f 0 + + chrX 151564643 151564711 NM_018558_exon_2_0_chrX_151564644_f 0 + + +</help> +</tool>