Mercurial > repos > greg > snpeff_v2_from_pablo
diff snpEff_2_1a/scripts/download.sh @ 0:f8eaa3f8194b default tip
Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
| author | greg |
|---|---|
| date | Fri, 20 Apr 2012 14:47:09 -0400 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/snpEff_2_1a/scripts/download.sh Fri Apr 20 14:47:09 2012 -0400 @@ -0,0 +1,120 @@ +#!/bin/sh -e + +RELEASE=66 + +# mkdir download +cd download + +#--- +# Download +#--- + +# # Download GTF files (annotations) +# wget -r -A "*gtf.gz" "ftp://ftp.ensembl.org/pub/release-$RELEASE/gtf/" +# +# # Download FASTA files (reference genomes) +# wget -r -A "*toplevel.fa.gz" "ftp://ftp.ensembl.org/pub/release-$RELEASE/fasta/" +# +# # Download CDS sequences +# wget -r -A "*cdna.all.fa.gz" "ftp://ftp.ensembl.org/pub/release-$RELEASE/fasta/" +# +# # Download PROTEIN sequences +# wget -r -A "*.pep.all.fa.gz" "ftp://ftp.ensembl.org/pub/release-$RELEASE/fasta/" + +#--- +# Create directory structure +#--- + +# # Move all downloaded file to this directory +# mv `find ftp.ensembl.org -type f` . + +# # Gene annotations files +# for gtf in *.gtf.gz +# do +# short=`../scripts/file2GenomeName.pl $gtf | cut -f 5` +# echo ANNOTATIONS: $short +# +# mkdir -p data/$short +# cp $gtf data/$short/genes.gtf.gz +# done +# +# # Reference genomes files +# mkdir -p data/genomes +# for fasta in *.dna.toplevel.fa.gz +# do +# genome=`../scripts/file2GenomeName.pl $fasta | cut -f 5` +# echo REFERENCE: $genome +# +# cp $fasta data/genomes/$genome.fa.gz +# done +# +# # CDS genomes files +# for fasta in *.cdna.all.fa.gz +# do +# genome=`../scripts/file2GenomeName.pl $fasta | cut -f 5` +# echo CDS: $genome +# +# cp $fasta data/$genome/cds.fa.gz +# done +# +# # Protein seuqence files +# for pep in *.pep.all.fa.gz +# do +# short=`../scripts/file2GenomeName.pl $pep | cut -f 5` +# echo PROTEIN: $short +# +# mkdir -p data/$short +# cp $pep data/$short/protein.fa.gz +# done + +#--- +# Config file entries +#--- + +# for fasta in *.cdna.all.fa.gz +# do +# genome=`../scripts/file2GenomeName.pl $fasta | cut -f 4` +# short=`../scripts/file2GenomeName.pl $fasta | cut -f 5` +# +# # Individual genome entry +# echo -e "$short.genome : $genome" +# echo -e "$short.reference : ftp://ftp.ensembl.org/pub/release-$RELEASE/gtf/" +# echo +# done + +# Back to parent dir +cd - > /dev/null + +#--- +# Create build queue entries +#--- + +# rm -vf queue_build.txt +# +# # Build from TXT files +# for genes in data/*/genes.txt* +# do +# dir=`dirname $genes` +# genomeName=`basename $dir` +# echo "./scripts/snpEffXL.sh build -v $genomeName" +# done | sort >> queue_build.txt +# +# # Build from GFF2 files +# echo "./scripts/snpEffXL.sh build -v -gff2 amel2" >> queue_build.txt +# +# # Build from GFF3 files +# for genes in `ls data/*/genes.gff* | grep -v amel2` +# do +# dir=`dirname $genes` +# genomeName=`basename $dir` +# echo "./scripts/snpEffXL.sh build -v -gff3 $genomeName" +# done | sort >> queue_build.txt +# +# # Build from GTF22 files +# for genes in data/*/genes.gtf* +# do +# dir=`dirname $genes` +# genomeName=`basename $dir` +# echo "./scripts/snpEffXL.sh build -v -gtf22 $genomeName" +# done | sort >> queue_build.txt +
