# HG changeset patch
# User davidvanzessen
# Date 1404217677 14400
# Node ID 79be0752711d9b094f972f65adca6286329acfe5
# Parent afddfd016ba6614328ca17aeaab509ec46e92616
Uploaded
diff -r afddfd016ba6 -r 79be0752711d demultiplex.xml
--- a/demultiplex.xml Thu May 08 07:50:42 2014 -0400
+++ b/demultiplex.xml Tue Jul 01 08:27:57 2014 -0400
@@ -4,7 +4,7 @@
fastx_toolkit
- r_wrapper.sh $input $out_file $out_file.files_path $where $mismatches $partial $input.name
+ r_wrapper.sh $input $out_file $out_file.files_path $where $mismatches $partial $input.name $trim_start $trim_end
#for $i, $b in enumerate($barcodes)
"$b.id"
"$b.mid"
@@ -173,10 +173,17 @@
-
+
+
+
+
+
+
+
+
diff -r afddfd016ba6 -r 79be0752711d r_wrapper.sh
--- a/r_wrapper.sh Thu May 08 07:50:42 2014 -0400
+++ b/r_wrapper.sh Tue Jul 01 08:27:57 2014 -0400
@@ -10,9 +10,11 @@
ext="${name##*.}"
name="${name%.*}"
prefix=$name"_"
+trim_start=$8
+trim_end=$9
dir="$(cd "$(dirname "$0")" && pwd)"
-for ((i=8;i<=$#;i=i+2))
+for ((i=10;i<=$#;i=i+2))
do
j=$((i+1))
echo -e "${!i}\t${!j}" >> $outDir/barcodes.txt
@@ -22,7 +24,7 @@
echo "$3"
result=`$dir/sff2fastq $input | $dir/fastx_barcode_splitter.pl --bcfile $outDir/barcodes.txt --prefix "$prefix" --suffix ".fastq" --$EOL --mismatches $mismatches --partial $partial`
echo "$result" | tail -n +2 | sed 's/\t/,/g' > output.txt
-echo "$name demultiplexID | Count | FASTQ | FASTA |
" >> $output
+echo "$name demultiplexID | Count | FASTQ | FASTA | Trimmed FASTA |
" >> $output
ls
while IFS=, read barcode count location
do
@@ -33,8 +35,7 @@
fi
file=$name"_"$barcode
cat $file.fastq | awk 'NR%4==1{printf ">%s\n", substr($0,2)}NR%4==2{print}' > $file.fasta
- #cat $file.fastq | perl -e '$i=0;while(< >){if(/^\@/&&$i==0){s/^\@/\>/;print;}elsif($i==1){print;$i=-3}$i++;}' > $file.fasta
- #cat $file.fastq
- echo "$barcode | $count | $file.fastq | $file.fasta |
" >> $output
+ python $dir/trim.py --input $file.fasta --output ${file}_trimmed.fasta --start $trim_start --end $trim_end
+ echo "$barcode | $count | $file.fastq | $file.fasta | ${file}_trimmed.fasta |
" >> $output
done < output.txt
-echo "" >> $output
+echo "Original fasta after trim" >> $output
diff -r afddfd016ba6 -r 79be0752711d trim.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/trim.py Tue Jul 01 08:27:57 2014 -0400
@@ -0,0 +1,35 @@
+import argparse
+
+#docs.python.org/dev/library/argparse.html
+parser = argparse.ArgumentParser()
+parser.add_argument("--input", help="Input folder with files")
+parser.add_argument("--output", help="Output file")
+parser.add_argument("--start", help="How many nucleotides to trim from the start", type=int)
+parser.add_argument("--end", help="How many nucleotides to trim from the end", type=int)
+
+args = parser.parse_args()
+start = int(args.start)
+end = int(args.end)
+
+if end <= 0:
+ import shutil
+ shutil.copy(args.input, args.output)
+ import sys
+ sys.exit()
+
+currentSeq = ""
+currentId = ""
+
+with open(args.input, 'r') as i:
+ with open(args.output, 'w') as o:
+ for line in i.readlines():
+ if line[0] is ">":
+ if currentSeq is not "" or currentId is not "":
+ o.write(currentId)
+ o.write(currentSeq[start:-end] + "\n")
+ currentId = line
+ currentSeq = ""
+ else:
+ currentSeq += line.rstrip()
+ o.write(currentId)
+ o.write(currentSeq.rstrip()[start:-end] + "\n")