comparison shuffle_sequences.sh @ 0:611156829647 draft default tip

"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
author erinija
date Fri, 01 May 2020 12:07:46 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:611156829647
1 #!/bin/sh
2
3 if test "$#" -ne 3; then
4
5 echo ""
6 echo ""
7 echo " CALL "
8 echo " sh shuffle_sequences.sh input.fasta k output.fasta"
9 echo ""
10 echo " INPUT"
11 echo " input.fasta - input fasta file "
12 echo " k - 'preserve frequency of k letters (dinucleotide k=2)'"
13 echo ""
14 echo " OUTPUT"
15 echo " output.fasta - shuffle fasta file "
16 echo ""
17 echo " DESCRIPTION"
18 echo " Wrapper for ushuffle program Created by M Jiang. Reference PMID: 18405375 ."
19 echo " The ushuffle shuffles nucleotides in fasta sequence preserving frequency"
20 echo " of k-mers, a positional order is changed. By default k=2."
21 echo " A sequence in fasta file is shuffled only once. The output fasta"
22 echo " in the identifier >line contains the original sequence; a shuffled sequence "
23 echo " is output on the second line."
24 echo ""
25 echo " REQUIREMENT"
26 echo " ushuffle installed"
27 echo " conda install -c bioconda ushuffle"
28 echo ""
29
30 exit 1
31 fi
32
33 name=$1
34 diset=$2
35 out=$3
36
37 # comment for a galaxy tool
38 call=ushuffle
39
40 # prepare fasta into tabular
41 cat ${name} | awk 'BEGIN{RS=">";OFS="\t"}NR>1{print ">"$1,$2}' > temp.tab.fa
42
43 # read the tab file and create shuffled fasta
44 while read -r line;
45 do
46 seq=`echo $line | tr " " "\t" | cut -f2`
47 useq=`${call} -s ${seq} -k ${diset} -n 1`;
48 echo $line
49 echo $useq
50 done < temp.tab.fa > ${out}
51 rm temp.tab.fa
52