# HG changeset patch # User julius # Date 1343821291 14400 # Node ID dc24da0a3bd5bacc031d43d513fc9bfba16c2419 # Parent 1dfc6d345abd98eeb9d736fad2f51b46ae8a0626 Deleted selected files diff -r 1dfc6d345abd -r dc24da0a3bd5 convert_characters.pl --- a/convert_characters.pl Wed Aug 01 07:36:48 2012 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,101 +0,0 @@ -#! /usr/bin/perl -w - -use strict; -use warnings; - -# converts all characters of one type into another -# convert_characters.pl [input] [convert_from] [convert_to] [output] - -die "Check argument\n" unless @ARGV == 4; - -my $inputfile = $ARGV[0]; -my $convert_from = $ARGV[1]; -my $convert_to = $ARGV[2]; -my $outputfile = $ARGV[3]; - -if ($convert_from eq "s") -{ - $convert_from = '\s'; -} -elsif ($convert_from eq "T") -{ - $convert_from = '\t'; -} -elsif ($convert_from eq "Sp") -{ - $convert_from = '\s'; -} -elsif ($convert_from eq "Dt") -{ - $convert_from = '\.'; -} -elsif ($convert_from eq "C") -{ - $convert_from = ","; -} -elsif ($convert_from eq "D") -{ - $convert_from = "-"; -} -elsif ($convert_from eq "U") -{ - $convert_from = "_"; -} -elsif ($convert_from eq "P") -{ - $convert_from = '\|'; -} -else -{ - die "Invalid value specified for convert from\n"; -} - - -if ($convert_to eq "T") -{ - $convert_to = "\t"; -} -elsif ($convert_to eq "Sp") -{ - $convert_to = '\s'; -} -elsif ($convert_to eq "Dt") -{ - $convert_to = "\."; -} -elsif ($convert_to eq "C") -{ - $convert_to = ","; -} -elsif ($convert_to eq "D") -{ - $convert_to = "-"; -} -elsif ($convert_to eq "U") -{ - $convert_to = "_"; -} -elsif ($convert_to eq "P") -{ - $convert_to = "|"; -} -else -{ - die "Invalid value specified for convert to\n"; -} - -my $fhIn; -open ($fhIn, "< $inputfile") or die "Cannot open source file"; - -my $fhOut; -open ($fhOut, "> $outputfile"); - -while (<$fhIn>) -{ - my $thisLine = $_; - chomp $thisLine; - $thisLine =~ s/$convert_from{1,}/$convert_to/g; - print $fhOut $thisLine,"\n"; -} -close ($fhIn) or die "Cannot close source file\n"; -close ($fhOut) or die "Cannot close output fil\n"; diff -r 1dfc6d345abd -r dc24da0a3bd5 convert_characters.py --- a/convert_characters.py Wed Aug 01 07:36:48 2012 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,42 +0,0 @@ -#!/usr/bin/env python -#By, Guruprasad Ananda. - -from galaxy import eggs -import sys, re - -def stop_err(msg): - sys.stderr.write(msg) - sys.exit() - -def main(): - if len(sys.argv) != 4: - stop_err("usage: convert_characters infile from_char outfile") - - try: - fin = open(sys.argv[1],'r') - except: - stop_err("Input file cannot be opened for reading.") - - from_char = sys.argv[2] - - try: - fout = open(sys.argv[3],'w') - except: - stop_err("Output file cannot be opened for writing.") - - char_dict = {'T':'\t','s':'\s','Dt':'\.','C':',','D':'-','U':'_','P':'\|'} - from_ch = char_dict[from_char] + '+' #making an RE to match 1 or more occurences. - skipped = 0 - - for line in fin: - line = line.strip() - try: - fout.write("%s\n" %(re.sub(from_ch,'\t',line))) - except: - skipped += 1 - - if skipped: - print "Skipped %d lines as invalid." %skipped - -if __name__ == "__main__": - main() \ No newline at end of file diff -r 1dfc6d345abd -r dc24da0a3bd5 convert_characters.xml --- a/convert_characters.xml Wed Aug 01 07:36:48 2012 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,57 +0,0 @@ - - delimiters to TAB - convert_characters.py $input $convert_from $out_file1 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -**What it does** - -Converts all delimiters of a specified type into TABs. Consecutive characters are condensed. For example, if columns are separated by 5 spaces they will converted into 1 tab. - ------ - -**Example** - -- Input file:: - - chrX||151283558|151283724|NM_000808_exon_8_0_chrX_151283559_r|0|- - chrX|151370273|151370486|NM_000808_exon_9_0_chrX_151370274_r|0|- - chrX|151559494|151559583|NM_018558_exon_1_0_chrX_151559495_f|0|+ - chrX|151564643|151564711|NM_018558_exon_2_0_chrX_151564644_f||||0|+ - -- Converting all pipe delimiters of the above file to TABs will get:: - - chrX 151283558 151283724 NM_000808_exon_8_0_chrX_151283559_r 0 - - chrX 151370273 151370486 NM_000808_exon_9_0_chrX_151370274_r 0 - - chrX 151559494 151559583 NM_018558_exon_1_0_chrX_151559495_f 0 + - chrX 151564643 151564711 NM_018558_exon_2_0_chrX_151564644_f 0 + - - -