Mercurial > repos > mingchen0919 > split_multifasta
comparison split.pl @ 0:efd5c022b54d draft
planemo upload
| author | mingchen0919 |
|---|---|
| date | Mon, 09 Apr 2018 12:27:49 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:efd5c022b54d |
|---|---|
| 1 #!/usr/bin/perl | |
| 2 | |
| 3 | |
| 4 if($ARGV[0] eq "" || $ARGV[1] eq ""){ | |
| 5 die "\n\t Usage : perl <thisScript.pl> <file to be split> <number of partitions> \n\n"; | |
| 6 } | |
| 7 | |
| 8 | |
| 9 $homfile = $ARGV[0]; | |
| 10 $numOfFiles = $ARGV[1]; | |
| 11 | |
| 12 | |
| 13 system("grep -c '^>' $homfile > out"); | |
| 14 open IN, "out" || die "File not found - 2\n"; | |
| 15 $numOfSeqs = <IN>; | |
| 16 close IN; | |
| 17 | |
| 18 print "Number of seqs is $numOfSeqs\n"; | |
| 19 my $numPerFile = $numOfSeqs/$numOfFiles; | |
| 20 print "Num per File is $numPerFile\n"; | |
| 21 | |
| 22 open IN, $homfile || die "File not found - 1\n"; | |
| 23 $lineIn = <IN>; | |
| 24 | |
| 25 for($i = 1; $i <= $numOfFiles; $i++){ | |
| 26 print "$i\n"; | |
| 27 open FILE, ">".$homfile.".".$i || die "Can't open file"; | |
| 28 print FILE $lineIn; | |
| 29 $seqs = 1; | |
| 30 $lineIn = <IN>; | |
| 31 while(defined $lineIn && $seqs < $numPerFile){ | |
| 32 print FILE $lineIn; | |
| 33 if ($lineIn =~ /^>/) { $seqs++; } | |
| 34 $lineIn = <IN>; | |
| 35 } | |
| 36 while(defined $lineIn && $lineIn !~ /^>/){ | |
| 37 print FILE $lineIn; | |
| 38 $lineIn = <IN>; | |
| 39 } | |
| 40 close FILE; | |
| 41 } | |
| 42 $i = $i -1; | |
| 43 open FILE, ">>".$homfile.".".$i; | |
| 44 while ($lineIn = <IN>){ | |
| 45 print FILE $lineIn; | |
| 46 } | |
| 47 close FILE; | |
| 48 | |
| 49 close IN; | |
| 50 | |
| 51 | |
| 52 |
