annotate runfastq_dump.py @ 0:500ceb70e4bd draft

planemo upload commit d4d558b27a09f684be8311f3cfb659b16545e167-dirty
author charles_s_test
date Sun, 12 Nov 2017 09:12:36 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
500ceb70e4bd planemo upload commit d4d558b27a09f684be8311f3cfb659b16545e167-dirty
charles_s_test
parents:
diff changeset
1 import sys, os
500ceb70e4bd planemo upload commit d4d558b27a09f684be8311f3cfb659b16545e167-dirty
charles_s_test
parents:
diff changeset
2
500ceb70e4bd planemo upload commit d4d558b27a09f684be8311f3cfb659b16545e167-dirty
charles_s_test
parents:
diff changeset
3 input = sys.argv[1]
500ceb70e4bd planemo upload commit d4d558b27a09f684be8311f3cfb659b16545e167-dirty
charles_s_test
parents:
diff changeset
4
500ceb70e4bd planemo upload commit d4d558b27a09f684be8311f3cfb659b16545e167-dirty
charles_s_test
parents:
diff changeset
5 print(input)
500ceb70e4bd planemo upload commit d4d558b27a09f684be8311f3cfb659b16545e167-dirty
charles_s_test
parents:
diff changeset
6
500ceb70e4bd planemo upload commit d4d558b27a09f684be8311f3cfb659b16545e167-dirty
charles_s_test
parents:
diff changeset
7 import re, os
500ceb70e4bd planemo upload commit d4d558b27a09f684be8311f3cfb659b16545e167-dirty
charles_s_test
parents:
diff changeset
8
500ceb70e4bd planemo upload commit d4d558b27a09f684be8311f3cfb659b16545e167-dirty
charles_s_test
parents:
diff changeset
9 def fix_seq_ids(filename):
500ceb70e4bd planemo upload commit d4d558b27a09f684be8311f3cfb659b16545e167-dirty
charles_s_test
parents:
diff changeset
10 '''
500ceb70e4bd planemo upload commit d4d558b27a09f684be8311f3cfb659b16545e167-dirty
charles_s_test
parents:
diff changeset
11 make sequence ids the same for paired reads.
500ceb70e4bd planemo upload commit d4d558b27a09f684be8311f3cfb659b16545e167-dirty
charles_s_test
parents:
diff changeset
12 '''
500ceb70e4bd planemo upload commit d4d558b27a09f684be8311f3cfb659b16545e167-dirty
charles_s_test
parents:
diff changeset
13 file = list(open(filename, 'r'))
500ceb70e4bd planemo upload commit d4d558b27a09f684be8311f3cfb659b16545e167-dirty
charles_s_test
parents:
diff changeset
14 new_file = open(filename, 'w')
500ceb70e4bd planemo upload commit d4d558b27a09f684be8311f3cfb659b16545e167-dirty
charles_s_test
parents:
diff changeset
15 for line in file:
500ceb70e4bd planemo upload commit d4d558b27a09f684be8311f3cfb659b16545e167-dirty
charles_s_test
parents:
diff changeset
16 if re.search('^@', line) or re.search('^\+', line) and re.search(' ', line):
500ceb70e4bd planemo upload commit d4d558b27a09f684be8311f3cfb659b16545e167-dirty
charles_s_test
parents:
diff changeset
17 linel = re.split(' ', line)
500ceb70e4bd planemo upload commit d4d558b27a09f684be8311f3cfb659b16545e167-dirty
charles_s_test
parents:
diff changeset
18 linel[0] = re.sub('.\d$', '', linel[0])
500ceb70e4bd planemo upload commit d4d558b27a09f684be8311f3cfb659b16545e167-dirty
charles_s_test
parents:
diff changeset
19 line = ' '.join(linel)
500ceb70e4bd planemo upload commit d4d558b27a09f684be8311f3cfb659b16545e167-dirty
charles_s_test
parents:
diff changeset
20 new_file.write(line)
500ceb70e4bd planemo upload commit d4d558b27a09f684be8311f3cfb659b16545e167-dirty
charles_s_test
parents:
diff changeset
21 # fastq-dump --log-level fatal --split-3 --accession accession_number --ncbi_error_report never
500ceb70e4bd planemo upload commit d4d558b27a09f684be8311f3cfb659b16545e167-dirty
charles_s_test
parents:
diff changeset
22
500ceb70e4bd planemo upload commit d4d558b27a09f684be8311f3cfb659b16545e167-dirty
charles_s_test
parents:
diff changeset
23 os.system('/nfs/sw/apps/sratoolkit/sratoolkit.2.8.0-centos_linux64/bin/fastq-dump --log-level fatal --split-3 --accession ' + input + ' --ncbi_error_report never')
500ceb70e4bd planemo upload commit d4d558b27a09f684be8311f3cfb659b16545e167-dirty
charles_s_test
parents:
diff changeset
24
500ceb70e4bd planemo upload commit d4d558b27a09f684be8311f3cfb659b16545e167-dirty
charles_s_test
parents:
diff changeset
25 os.system('ls -lh | grep ' + input)
500ceb70e4bd planemo upload commit d4d558b27a09f684be8311f3cfb659b16545e167-dirty
charles_s_test
parents:
diff changeset
26
500ceb70e4bd planemo upload commit d4d558b27a09f684be8311f3cfb659b16545e167-dirty
charles_s_test
parents:
diff changeset
27 #os.system('/nfs/sw/apps/sratoolkit/sratoolkit.2.8.0-centos_linux64/bin/fastq-dump -I --split-files ' + input)
500ceb70e4bd planemo upload commit d4d558b27a09f684be8311f3cfb659b16545e167-dirty
charles_s_test
parents:
diff changeset
28 #mv_cmd = 'mv -v ' + input + '_1.fastq R1.fastq'
500ceb70e4bd planemo upload commit d4d558b27a09f684be8311f3cfb659b16545e167-dirty
charles_s_test
parents:
diff changeset
29 #print(mv_cmd)
500ceb70e4bd planemo upload commit d4d558b27a09f684be8311f3cfb659b16545e167-dirty
charles_s_test
parents:
diff changeset
30
500ceb70e4bd planemo upload commit d4d558b27a09f684be8311f3cfb659b16545e167-dirty
charles_s_test
parents:
diff changeset
31 #fix_seq_ids(input + '_1.fastq')
500ceb70e4bd planemo upload commit d4d558b27a09f684be8311f3cfb659b16545e167-dirty
charles_s_test
parents:
diff changeset
32 #fix_seq_ids(input + '_2.fastq')
500ceb70e4bd planemo upload commit d4d558b27a09f684be8311f3cfb659b16545e167-dirty
charles_s_test
parents:
diff changeset
33
500ceb70e4bd planemo upload commit d4d558b27a09f684be8311f3cfb659b16545e167-dirty
charles_s_test
parents:
diff changeset
34 os.system('mv -v ' + input + '_1.fastq R1.fastq')
500ceb70e4bd planemo upload commit d4d558b27a09f684be8311f3cfb659b16545e167-dirty
charles_s_test
parents:
diff changeset
35 os.system('mv -v ' + input + '_2.fastq R2.fastq')
500ceb70e4bd planemo upload commit d4d558b27a09f684be8311f3cfb659b16545e167-dirty
charles_s_test
parents:
diff changeset
36
500ceb70e4bd planemo upload commit d4d558b27a09f684be8311f3cfb659b16545e167-dirty
charles_s_test
parents:
diff changeset
37
500ceb70e4bd planemo upload commit d4d558b27a09f684be8311f3cfb659b16545e167-dirty
charles_s_test
parents:
diff changeset
38