Mercurial > repos > mingchen0919 > split_multifasta
diff split.pl @ 0:efd5c022b54d draft
planemo upload
| author | mingchen0919 |
|---|---|
| date | Mon, 09 Apr 2018 12:27:49 -0400 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/split.pl Mon Apr 09 12:27:49 2018 -0400 @@ -0,0 +1,52 @@ +#!/usr/bin/perl + + + if($ARGV[0] eq "" || $ARGV[1] eq ""){ + die "\n\t Usage : perl <thisScript.pl> <file to be split> <number of partitions> \n\n"; + } + + + $homfile = $ARGV[0]; + $numOfFiles = $ARGV[1]; + + + system("grep -c '^>' $homfile > out"); + open IN, "out" || die "File not found - 2\n"; + $numOfSeqs = <IN>; + close IN; + + print "Number of seqs is $numOfSeqs\n"; + my $numPerFile = $numOfSeqs/$numOfFiles; + print "Num per File is $numPerFile\n"; + + open IN, $homfile || die "File not found - 1\n"; + $lineIn = <IN>; + + for($i = 1; $i <= $numOfFiles; $i++){ + print "$i\n"; + open FILE, ">".$homfile.".".$i || die "Can't open file"; + print FILE $lineIn; + $seqs = 1; + $lineIn = <IN>; + while(defined $lineIn && $seqs < $numPerFile){ + print FILE $lineIn; + if ($lineIn =~ /^>/) { $seqs++; } + $lineIn = <IN>; + } + while(defined $lineIn && $lineIn !~ /^>/){ + print FILE $lineIn; + $lineIn = <IN>; + } + close FILE; + } + $i = $i -1; + open FILE, ">>".$homfile.".".$i; + while ($lineIn = <IN>){ + print FILE $lineIn; + } + close FILE; + + close IN; + + +
