|
0
|
1 #!/usr/bin/perl
|
|
|
2
|
|
|
3
|
|
|
4 if($ARGV[0] eq "" || $ARGV[1] eq ""){
|
|
|
5 die "\n\t Usage : perl <thisScript.pl> <file to be split> <number of partitions> \n\n";
|
|
|
6 }
|
|
|
7
|
|
|
8
|
|
|
9 $homfile = $ARGV[0];
|
|
|
10 $numOfFiles = $ARGV[1];
|
|
|
11
|
|
|
12
|
|
|
13 system("grep -c '^>' $homfile > out");
|
|
|
14 open IN, "out" || die "File not found - 2\n";
|
|
|
15 $numOfSeqs = <IN>;
|
|
|
16 close IN;
|
|
|
17
|
|
|
18 print "Number of seqs is $numOfSeqs\n";
|
|
|
19 my $numPerFile = $numOfSeqs/$numOfFiles;
|
|
|
20 print "Num per File is $numPerFile\n";
|
|
|
21
|
|
|
22 open IN, $homfile || die "File not found - 1\n";
|
|
|
23 $lineIn = <IN>;
|
|
|
24
|
|
|
25 for($i = 1; $i <= $numOfFiles; $i++){
|
|
|
26 print "$i\n";
|
|
|
27 open FILE, ">".$homfile.".".$i || die "Can't open file";
|
|
|
28 print FILE $lineIn;
|
|
|
29 $seqs = 1;
|
|
|
30 $lineIn = <IN>;
|
|
|
31 while(defined $lineIn && $seqs < $numPerFile){
|
|
|
32 print FILE $lineIn;
|
|
|
33 if ($lineIn =~ /^>/) { $seqs++; }
|
|
|
34 $lineIn = <IN>;
|
|
|
35 }
|
|
|
36 while(defined $lineIn && $lineIn !~ /^>/){
|
|
|
37 print FILE $lineIn;
|
|
|
38 $lineIn = <IN>;
|
|
|
39 }
|
|
|
40 close FILE;
|
|
|
41 }
|
|
|
42 $i = $i -1;
|
|
|
43 open FILE, ">>".$homfile.".".$i;
|
|
|
44 while ($lineIn = <IN>){
|
|
|
45 print FILE $lineIn;
|
|
|
46 }
|
|
|
47 close FILE;
|
|
|
48
|
|
|
49 close IN;
|
|
|
50
|
|
|
51
|
|
|
52
|