plot_selected: dnp-subset-dinuc-profile.sh comparison

comparison dnp-subset-dinuc-profile.sh @ 0:448204d12325 draft default tip

"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"

author	erinija
date	Fri, 01 May 2020 12:13:00 +0000
parents
children

comparison

equal deleted inserted replaced

--1:000000000000
+:448204d12325
+#!/bin/sh
+if test "$#" -ne 3; then
+echo  " CALL "
+echo  "   sh subset_dinuc_profile.sh input.fasta  dinucleotides output"
+echo  ""
+echo  " INPUT"
+echo  "   input.fasta - a batch of nucleosome (or any DNA) DNA sequences "
+echo  "   dinucleotides - any subset of dinucleotides enclosed by quotes as 'AA AC AG AT CA CC' "
+echo  ""
+echo  " OUTPUT"
+echo  "   output - file name to write the output  in tabular format, columns have names as AA.f AA.r ..."
+echo  ""
+echo  " DESCRIPTION"
+echo  "   Compute dinucleotide frequency profiles on forward and its complementary "
+echo  "   sequences from a batch of fasta sequences. Output columns are labelled by AA.f, AA.r ... "
+echo  ""
+echo  " Example of input fasta lines"
+echo  "  >chr9:42475963-42476182"
+echo  "  CCAGGCAGACCCCATATTCAAGCTGCTGCCCCAGGGTGGTGTACAGATCTGGGGAGAAGAAGGATGA"
+echo  "  >chr9:42476175-42476394"
+echo  "  TCTGCACTCCAGCATGCCTGAGGAGAGGAGGGAATGCAGGATCCTAGTGGAAAGAGTACCAAGCTGG"
+echo ""
+echo  " Example of output  table"
+echo  "  AA.f            AA.r            AC.f            AC.r   ..."
+echo  "  0.076000        0.059000        0.065000        0.078000 ..."
+echo  "  0.082000        0.060000        0.057000        0.076000 ..."
+echo  "  0.067000        0.075000        0.049000        0.071000 ..."
+echo  ""
+echo ""
+echo " REQUIREMENT"
+echo "   dnp-diprofile installed"
+echo "   conda install -c bioconda dnp-diprofile"
+exit 0
+fi
+name=$1
+diset=$2
+out=$3
+call=dnp-diprofile
+## the dinucleotide profiles are computed for the subset of dinucleotides listed in $diset
+## the profiles are outputs as columns of a table
+# prepare fasta, we copy here because
+# in galaxy we don't have fa ending which is required by the dinuc
+cp ${name} ${name}.fa
+# compute length of the fasta sequence
+seq=`head -n2 $name | tail -n1`
+len=${#seq}
+#echo "Sequence length = " $len
+# for each dinucleotide compute the forward
+# and complementary profile and save
+# in separate columns that will be merged in the end
+for di in ${diset}
+do
+#echo ${di}
+echo ${di}.f > ${di}.f
+${call} ${name}.fa -di ${di} -sl ${len} >> ${di}.f
+echo ${di}.r > ${di}.r
+${call} ${name}.fa -di ${di} -sl ${len} -c >> ${di}.r
+echo ${di}.f >> names
+echo ${di}.r >> names
+done;
+paste `cat names` > ${out}
+rm names
+rm ${name}.fa
+rm *.f *.r
+exit 0

Mercurial > repos > erinija > plot_selected

comparison dnp-subset-dinuc-profile.sh @ 0:448204d12325 draft default tip