view galaxy_runCircos.sh @ 0:46f7f689b929 draft default tip

Uploaded
author saskia-hiltemann
date Tue, 17 Sep 2013 11:29:11 -0400
parents
children
line wrap: on
line source

#!/bin/bash


#######################################
#  Location specific settings

#scriptsdir="/data/galaxy-dist/tools/trait/circos"     # location of tool directory
#confdir=${scriptsdir}        						  # loacation of circos conf files (circos_JSV.conf, circos_JS.conf etc)
#hg18bed="${scriptsdir}/hg18Genes"
#hg19bed="${scriptsdir}/hg19Genes"
#######################################


# arguments <junctions file> <SNParray probes> <SNParray snps> <varfile> <WG file> <montage file> <per_chr_file> <hg18|hg19> <customplotfile> <custom_region> <genetrack> <imageformat>
echo "arguments: $@"


if [ $# -ne 14 ]
then
	echo "error, unexpected number of arguments in $0"
	exit
fi

junctions=$1
probes=$2
snps=$3
variations=$4
wg_out=$5
montage_out=$6
per_chr_file=$7
build=$8
customplot_file=$9
custom_region=${10//chr/hs}
show_gene_track=${11}
imageformat=${12}
scriptsdir=${13}
confdir=${14}


# if no input data, exit
if [[ $junctions == "None" && $probes == "None" && $snps == "None"  && $variations == "None" ]]
then 
	echo "error, not enough data to make a plot"
	exit
fi


hg18bed="${scriptsdir}/hg18Genes"
hg19bed="${scriptsdir}/hg19Genes"


# which tracks to show (depending on which files are provided)
if [ $junctions == "None" ]
then
	echo "show = no" > show_junctions.txt
else
	echo "show = yes" > show_junctions.txt
fi

if [ $variations == "None" ]
then
	echo "show = no" > show_variations.txt
else
	echo "show = yes" > show_variations.txt
fi

if [ $snps == "None" ]
then
	echo "show = no" > show_snps.txt
else
	echo "show = yes" > show_snps.txt
fi

if [ $probes == "None" ]
then
	echo "show = no" > show_probes.txt
else
	echo "show = yes" > show_probes.txt
fi



echo "karyotype = data/karyotype/karyotype.human.hg19.txt" > karyotype.txt
if [ $build == "hg18" ]
then
	echo "karyotype = data/karyotype/karyotype.human.hg18.txt" > karyotype.txt
fi

bn=`basename ${junctions}`
junctions_circos="${bn}_J_circos"
probes_circos="${bn}_probes_circos"
snps_circos="${bn}_snps_circos"
variations_circos="${bn}_variations_circos"
variations_circos_chr="${bn}_variations_circos_chr"
bed_junctions="bedfile_junctions_${bn}"
bed_snps="bedfile_snps_${bn}"
impacted_genes="ImpactedGenes.tsv"
circos_out_wg="${bn}_out_wg"
circos_out_montage="${bn}_out_montage"

touch $bed_junctions
touch $bed_snps
touch $impacted_genes



echo ""
echo "galaxy:runCircos.sh: "
echo "junctions $junctions, probes: $probes, snps: $snps, variations: $variations, wg_out: $wgout, montage_out: $montage_out, per_chr_file: $per_chr_file" 
echo ""

## conf file
conffile=${confdir}/circos_JSV.conf



#####################################
#
#      input file conversions
#
#####################################


echo "show = ${show_gene_track}" > showgenes.txt

if [ $junctions != "None" ]
then	
	echo "junctions file specified"	
	if [[ ! -f $junctions_circos ]]
	then
		${scriptsdir}/junctions2circos.sh $junctions $junctions_circos $bed_junctions
	fi

	if [ ${show_gene_track} == "yes" ]
	then
		
		#cat $bed_junctions $bed_snps > ImpactedGenes.tmp
		mv $bed_junctions ImpactedGenes.tmp

		if [ $build == "hg18" ]
		then
			intersectBed -a ImpactedGenes.tmp -b $hg18bed -wb > ImpactedGenes.tmp2
		else
			intersectBed -a ImpactedGenes.tmp -b $hg19bed -wb > ImpactedGenes.tmp2
		fi

		#generate text-track input file
		awk 'BEGIN{
				FS="\t"
				OFS=" "
			}{
				print $1,$11,$12,$19
			}END{

			}' ImpactedGenes.tmp2 > ImpactedGenes.tmp3

		sed -i 's/chr/hs/g' ImpactedGenes.tmp3
		sort -k4 ImpactedGenes.tmp3 > ImpactedGenes.tmp4
		uniq -u -f 3 ImpactedGenes.tmp4 ImpactedGenes.tsv
		cat ImpactedGenes.tsv
	fi

fi

if [[ $probes != "None" && $snps != "None" ]]
then
		echo "probes and snps files specified"
	
		${scriptsdir}/SNParray2circos.sh $probes $snps $probes_circos $snps_circos	
		
		#if too many points, reduce (max 25000)
		numpoints=`wc -l $probes_circos |cut -d" " -f1 `
		echo "numpoints probes.txt: $numpoints"
		while [ $numpoints -gt 25000 ]
		do
			echo "reducing number of datapoints in probes file"
			awk 'BEGIN{
					FS="\t"
					OFS="\t"	
				}{
					if(FNR%2==1)
						print $0		#print alternating lines
				
				}END{}' $probes_circos > probes_circos_reduced
				
				rm $probes_circos
				mv probes_circos_reduced $probes_circos
				numpoints=`wc -l $probes_circos |cut -d" " -f1 `
		done
		
		#if too many points, reduce (max 25000)
		numpoints=`wc -l $snps_circos | cut -d" " -f1 `
		echo "numpoints snps.txt: $numpoints"
		while [ $numpoints -gt 25000 ]
		do
			echo "reducing number of datapoints in snps file"
			awk 'BEGIN{
					FS="\t"
					OFS="\t"	
				}{
					if(FNR%2==1)
						print $0
				
				}END{}' $snps_circos > snps_circos_reduced
				
				rm $snps_circos
				mv snps_circos_reduced $snps_circos
				numpoints=`wc -l $snps_circos |cut -d" " -f1 `
		done
		
	
fi


if [ $variations != "None" ]
then
	echo "variations file specified"
	if [[ ! -f $variations_circos  ]]
	then
		${scriptsdir}/variations2circos.sh $variations $variations_circos $variations_circos_chr $bed_snps
		echo "variations_circos"
		cat $variations_circos
	fi
fi





#####################################
#
#      run circos
#
#####################################


echo "running circos"
${scriptsdir}/runcircos_JSV.sh ${conffile_JSV} ${junctions_circos} ${probes_circos} ${snps_circos} $wg_out $montage_out $per_chr_file $build ${variations_circos} ${variations_circos_chr} $customplot_file $custom_region $imageformat