annotate macs2npk.sh @ 12:887f2ae7c266 draft default tip

Uploaded
author modencode-dcc
date Mon, 21 Jan 2013 18:41:35 -0500
parents 09f355281d57
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2
09f355281d57 Uploaded
modencode-dcc
parents:
diff changeset
1 #!/bin/bash
09f355281d57 Uploaded
modencode-dcc
parents:
diff changeset
2 # Converts macs xls output to narrowPeak output
09f355281d57 Uploaded
modencode-dcc
parents:
diff changeset
3
09f355281d57 Uploaded
modencode-dcc
parents:
diff changeset
4 # Command Usage: macs2npk.sh INPUTFILE OUTPUTFILE
09f355281d57 Uploaded
modencode-dcc
parents:
diff changeset
5
09f355281d57 Uploaded
modencode-dcc
parents:
diff changeset
6 if [[ "$#" -lt 1 ]]
09f355281d57 Uploaded
modencode-dcc
parents:
diff changeset
7 then
09f355281d57 Uploaded
modencode-dcc
parents:
diff changeset
8 echo $(basename $0) 1>&2
09f355281d57 Uploaded
modencode-dcc
parents:
diff changeset
9 echo "Converts MACS peak caller xls output file to narrowPeak format" 1>&2
09f355281d57 Uploaded
modencode-dcc
parents:
diff changeset
10 echo "USAGE:" 1>&2
09f355281d57 Uploaded
modencode-dcc
parents:
diff changeset
11 echo "$(basename $0) <MACSXlsFile> <outputDir>" 1>&2
09f355281d57 Uploaded
modencode-dcc
parents:
diff changeset
12 exit 1
09f355281d57 Uploaded
modencode-dcc
parents:
diff changeset
13 fi
09f355281d57 Uploaded
modencode-dcc
parents:
diff changeset
14
09f355281d57 Uploaded
modencode-dcc
parents:
diff changeset
15 MACSFILE=$1
09f355281d57 Uploaded
modencode-dcc
parents:
diff changeset
16 if [[ ! -e ${MACSFILE} ]]
09f355281d57 Uploaded
modencode-dcc
parents:
diff changeset
17 then
09f355281d57 Uploaded
modencode-dcc
parents:
diff changeset
18 echo "MACS xls file ${MACSFILE} does not exist" 1>&2
09f355281d57 Uploaded
modencode-dcc
parents:
diff changeset
19 exit 1
09f355281d57 Uploaded
modencode-dcc
parents:
diff changeset
20 fi
09f355281d57 Uploaded
modencode-dcc
parents:
diff changeset
21 # ODIR=$(dirname ${MACSFILE})
09f355281d57 Uploaded
modencode-dcc
parents:
diff changeset
22 # [[ $# -gt 1 ]] && ODIR=$2
09f355281d57 Uploaded
modencode-dcc
parents:
diff changeset
23 # if [[ ! -d ${ODIR} ]]
09f355281d57 Uploaded
modencode-dcc
parents:
diff changeset
24 # then
09f355281d57 Uploaded
modencode-dcc
parents:
diff changeset
25 # echo "Output directory ${ODIR} does not exist" 1>&2
09f355281d57 Uploaded
modencode-dcc
parents:
diff changeset
26 # exit 1
09f355281d57 Uploaded
modencode-dcc
parents:
diff changeset
27 # fi
09f355281d57 Uploaded
modencode-dcc
parents:
diff changeset
28
09f355281d57 Uploaded
modencode-dcc
parents:
diff changeset
29 # OFILE="${ODIR}/$(echo $(basename ${MACSFILE} '_peaks.xls')).regionPeak.gz"
09f355281d57 Uploaded
modencode-dcc
parents:
diff changeset
30 OFILE="${2}"
09f355281d57 Uploaded
modencode-dcc
parents:
diff changeset
31
09f355281d57 Uploaded
modencode-dcc
parents:
diff changeset
32 # XLS format
09f355281d57 Uploaded
modencode-dcc
parents:
diff changeset
33 # chr start stop length summit tags -10log10(pvalue) fold_enrichment %FDR
09f355281d57 Uploaded
modencode-dcc
parents:
diff changeset
34
09f355281d57 Uploaded
modencode-dcc
parents:
diff changeset
35 # narrowPeak format
09f355281d57 Uploaded
modencode-dcc
parents:
diff changeset
36 # chr start stop name score strand signalValue -log10(pValue) -log10(qValue) summit
09f355281d57 Uploaded
modencode-dcc
parents:
diff changeset
37
09f355281d57 Uploaded
modencode-dcc
parents:
diff changeset
38 # Remove comments #
09f355281d57 Uploaded
modencode-dcc
parents:
diff changeset
39 # Remove empty lines
09f355281d57 Uploaded
modencode-dcc
parents:
diff changeset
40 # Remove header
09f355281d57 Uploaded
modencode-dcc
parents:
diff changeset
41 # Sort by p-value and then rearrange columns
09f355281d57 Uploaded
modencode-dcc
parents:
diff changeset
42 # adjust start coordinates
09f355281d57 Uploaded
modencode-dcc
parents:
diff changeset
43
09f355281d57 Uploaded
modencode-dcc
parents:
diff changeset
44 # Check if header has FDR column
09f355281d57 Uploaded
modencode-dcc
parents:
diff changeset
45 header=$(sed -r -e '/^#/d' -e '/^$/d' "${MACSFILE}" | head -1)
09f355281d57 Uploaded
modencode-dcc
parents:
diff changeset
46 hasFdr=0
09f355281d57 Uploaded
modencode-dcc
parents:
diff changeset
47 echo ${header} | grep -q 'FDR' && hasFDR=1
09f355281d57 Uploaded
modencode-dcc
parents:
diff changeset
48
09f355281d57 Uploaded
modencode-dcc
parents:
diff changeset
49 if [[ ${hasFDR} -eq 1 ]]
09f355281d57 Uploaded
modencode-dcc
parents:
diff changeset
50 then
09f355281d57 Uploaded
modencode-dcc
parents:
diff changeset
51 sed -r -e '/^#/d' -e '/^$/d' "${MACSFILE}" | \
09f355281d57 Uploaded
modencode-dcc
parents:
diff changeset
52 sed 1d | \
09f355281d57 Uploaded
modencode-dcc
parents:
diff changeset
53 sort -k7nr,7nr | \
09f355281d57 Uploaded
modencode-dcc
parents:
diff changeset
54 awk '$2 < 1 {$2=1} {printf "%s\t%d\t%d\t%d\t%s\t.\t%s\t%s\t%f\t%d\n",$1,$2-1,$3,NR,$6,$8,$7/10,-log(($9+1e-30)/100)/log(10),$5}' > ${OFILE}
09f355281d57 Uploaded
modencode-dcc
parents:
diff changeset
55 else
09f355281d57 Uploaded
modencode-dcc
parents:
diff changeset
56 sed -r -e '/^#/d' -e '/^$/d' "${MACSFILE}" | \
09f355281d57 Uploaded
modencode-dcc
parents:
diff changeset
57 sed 1d | \
09f355281d57 Uploaded
modencode-dcc
parents:
diff changeset
58 sort -k7nr,7nr | \
09f355281d57 Uploaded
modencode-dcc
parents:
diff changeset
59 awk '$2 < 1 {$2=1} {printf "%s\t%d\t%d\t%d\t%s\t.\t%s\t%s\t-1\t%d\n",$1,$2-1,$3,NR,$6,$8,$7/10,$5}' > ${OFILE}
09f355281d57 Uploaded
modencode-dcc
parents:
diff changeset
60 fi