0
|
1 #!/bin/sh -e
|
|
2
|
|
3 RELEASE=66
|
|
4
|
|
5 # mkdir download
|
|
6 cd download
|
|
7
|
|
8 #---
|
|
9 # Download
|
|
10 #---
|
|
11
|
|
12 # # Download GTF files (annotations)
|
|
13 # wget -r -A "*gtf.gz" "ftp://ftp.ensembl.org/pub/release-$RELEASE/gtf/"
|
|
14 #
|
|
15 # # Download FASTA files (reference genomes)
|
|
16 # wget -r -A "*toplevel.fa.gz" "ftp://ftp.ensembl.org/pub/release-$RELEASE/fasta/"
|
|
17 #
|
|
18 # # Download CDS sequences
|
|
19 # wget -r -A "*cdna.all.fa.gz" "ftp://ftp.ensembl.org/pub/release-$RELEASE/fasta/"
|
|
20 #
|
|
21 # # Download PROTEIN sequences
|
|
22 # wget -r -A "*.pep.all.fa.gz" "ftp://ftp.ensembl.org/pub/release-$RELEASE/fasta/"
|
|
23
|
|
24 #---
|
|
25 # Create directory structure
|
|
26 #---
|
|
27
|
|
28 # # Move all downloaded file to this directory
|
|
29 # mv `find ftp.ensembl.org -type f` .
|
|
30
|
|
31 # # Gene annotations files
|
|
32 # for gtf in *.gtf.gz
|
|
33 # do
|
|
34 # short=`../scripts/file2GenomeName.pl $gtf | cut -f 5`
|
|
35 # echo ANNOTATIONS: $short
|
|
36 #
|
|
37 # mkdir -p data/$short
|
|
38 # cp $gtf data/$short/genes.gtf.gz
|
|
39 # done
|
|
40 #
|
|
41 # # Reference genomes files
|
|
42 # mkdir -p data/genomes
|
|
43 # for fasta in *.dna.toplevel.fa.gz
|
|
44 # do
|
|
45 # genome=`../scripts/file2GenomeName.pl $fasta | cut -f 5`
|
|
46 # echo REFERENCE: $genome
|
|
47 #
|
|
48 # cp $fasta data/genomes/$genome.fa.gz
|
|
49 # done
|
|
50 #
|
|
51 # # CDS genomes files
|
|
52 # for fasta in *.cdna.all.fa.gz
|
|
53 # do
|
|
54 # genome=`../scripts/file2GenomeName.pl $fasta | cut -f 5`
|
|
55 # echo CDS: $genome
|
|
56 #
|
|
57 # cp $fasta data/$genome/cds.fa.gz
|
|
58 # done
|
|
59 #
|
|
60 # # Protein seuqence files
|
|
61 # for pep in *.pep.all.fa.gz
|
|
62 # do
|
|
63 # short=`../scripts/file2GenomeName.pl $pep | cut -f 5`
|
|
64 # echo PROTEIN: $short
|
|
65 #
|
|
66 # mkdir -p data/$short
|
|
67 # cp $pep data/$short/protein.fa.gz
|
|
68 # done
|
|
69
|
|
70 #---
|
|
71 # Config file entries
|
|
72 #---
|
|
73
|
|
74 # for fasta in *.cdna.all.fa.gz
|
|
75 # do
|
|
76 # genome=`../scripts/file2GenomeName.pl $fasta | cut -f 4`
|
|
77 # short=`../scripts/file2GenomeName.pl $fasta | cut -f 5`
|
|
78 #
|
|
79 # # Individual genome entry
|
|
80 # echo -e "$short.genome : $genome"
|
|
81 # echo -e "$short.reference : ftp://ftp.ensembl.org/pub/release-$RELEASE/gtf/"
|
|
82 # echo
|
|
83 # done
|
|
84
|
|
85 # Back to parent dir
|
|
86 cd - > /dev/null
|
|
87
|
|
88 #---
|
|
89 # Create build queue entries
|
|
90 #---
|
|
91
|
|
92 # rm -vf queue_build.txt
|
|
93 #
|
|
94 # # Build from TXT files
|
|
95 # for genes in data/*/genes.txt*
|
|
96 # do
|
|
97 # dir=`dirname $genes`
|
|
98 # genomeName=`basename $dir`
|
|
99 # echo "./scripts/snpEffXL.sh build -v $genomeName"
|
|
100 # done | sort >> queue_build.txt
|
|
101 #
|
|
102 # # Build from GFF2 files
|
|
103 # echo "./scripts/snpEffXL.sh build -v -gff2 amel2" >> queue_build.txt
|
|
104 #
|
|
105 # # Build from GFF3 files
|
|
106 # for genes in `ls data/*/genes.gff* | grep -v amel2`
|
|
107 # do
|
|
108 # dir=`dirname $genes`
|
|
109 # genomeName=`basename $dir`
|
|
110 # echo "./scripts/snpEffXL.sh build -v -gff3 $genomeName"
|
|
111 # done | sort >> queue_build.txt
|
|
112 #
|
|
113 # # Build from GTF22 files
|
|
114 # for genes in data/*/genes.gtf*
|
|
115 # do
|
|
116 # dir=`dirname $genes`
|
|
117 # genomeName=`basename $dir`
|
|
118 # echo "./scripts/snpEffXL.sh build -v -gtf22 $genomeName"
|
|
119 # done | sort >> queue_build.txt
|
|
120
|