annotate yac.py @ 3:222f48abaac4 draft

Deleted selected files
author drosofff
date Sat, 17 May 2014 18:22:46 -0400
parents 35fbec9e4bda
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
35fbec9e4bda Uploaded
drosofff
parents:
diff changeset
1 #!/usr/bin/python
35fbec9e4bda Uploaded
drosofff
parents:
diff changeset
2 # yac = yet another clipper
35fbec9e4bda Uploaded
drosofff
parents:
diff changeset
3 # v 0.9.1
35fbec9e4bda Uploaded
drosofff
parents:
diff changeset
4 # Usage yac.py $input $output $adapter_to_clip $min $max $Nmode
35fbec9e4bda Uploaded
drosofff
parents:
diff changeset
5 # Christophe Antoniewski <drosofff@gmail.com>
35fbec9e4bda Uploaded
drosofff
parents:
diff changeset
6
35fbec9e4bda Uploaded
drosofff
parents:
diff changeset
7 import sys, string
35fbec9e4bda Uploaded
drosofff
parents:
diff changeset
8
35fbec9e4bda Uploaded
drosofff
parents:
diff changeset
9 class Clip:
35fbec9e4bda Uploaded
drosofff
parents:
diff changeset
10 def __init__(self, inputfile, outputfile, adapter, minsize, maxsize):
35fbec9e4bda Uploaded
drosofff
parents:
diff changeset
11 self.inputfile = inputfile
35fbec9e4bda Uploaded
drosofff
parents:
diff changeset
12 self.outputfile = outputfile
35fbec9e4bda Uploaded
drosofff
parents:
diff changeset
13 self.adapter = adapter
35fbec9e4bda Uploaded
drosofff
parents:
diff changeset
14 self.minsize = int(minsize)
35fbec9e4bda Uploaded
drosofff
parents:
diff changeset
15 self.maxsize = int(maxsize)
35fbec9e4bda Uploaded
drosofff
parents:
diff changeset
16 def motives (sequence):
35fbec9e4bda Uploaded
drosofff
parents:
diff changeset
17 '''return a list of motives for perfect (6nt) or imperfect (7nt with one mismatch) search on import string module'''
35fbec9e4bda Uploaded
drosofff
parents:
diff changeset
18 sequencevariants = [sequence[0:6]] # initializes the list with the 6mer perfect match
35fbec9e4bda Uploaded
drosofff
parents:
diff changeset
19 dicsubst= {"A":"TGCN", "T":"AGCN", "G":"TACN", "C":"GATN"}
35fbec9e4bda Uploaded
drosofff
parents:
diff changeset
20 for pos in enumerate(sequence[:6]):
35fbec9e4bda Uploaded
drosofff
parents:
diff changeset
21 for subst in dicsubst[pos[1]]:
35fbec9e4bda Uploaded
drosofff
parents:
diff changeset
22 sequencevariants.append(sequence[:pos[0]]+ subst + sequence[pos[0]+1:7])
35fbec9e4bda Uploaded
drosofff
parents:
diff changeset
23 return sequencevariants
35fbec9e4bda Uploaded
drosofff
parents:
diff changeset
24 self.adaptmotifs= motives(self.adapter)
35fbec9e4bda Uploaded
drosofff
parents:
diff changeset
25
35fbec9e4bda Uploaded
drosofff
parents:
diff changeset
26 def scanadapt(self, adaptmotives=[], sequence=""):
35fbec9e4bda Uploaded
drosofff
parents:
diff changeset
27 if sequence.rfind(adaptmotives[0]) != -1:
35fbec9e4bda Uploaded
drosofff
parents:
diff changeset
28 return sequence[:sequence.rfind(adaptmotives[0])]
35fbec9e4bda Uploaded
drosofff
parents:
diff changeset
29 for motif in adaptmotives[1:]:
35fbec9e4bda Uploaded
drosofff
parents:
diff changeset
30 if sequence.rfind(motif) != -1:
35fbec9e4bda Uploaded
drosofff
parents:
diff changeset
31 return sequence[:sequence.rfind(motif)]
35fbec9e4bda Uploaded
drosofff
parents:
diff changeset
32 return sequence
35fbec9e4bda Uploaded
drosofff
parents:
diff changeset
33
35fbec9e4bda Uploaded
drosofff
parents:
diff changeset
34 def clip_with_N (self):
35fbec9e4bda Uploaded
drosofff
parents:
diff changeset
35 iterator = 0
35fbec9e4bda Uploaded
drosofff
parents:
diff changeset
36 id = 0
35fbec9e4bda Uploaded
drosofff
parents:
diff changeset
37 F = open (self.inputfile, "r")
35fbec9e4bda Uploaded
drosofff
parents:
diff changeset
38 O = open (self.outputfile, "w")
35fbec9e4bda Uploaded
drosofff
parents:
diff changeset
39 for line in F:
35fbec9e4bda Uploaded
drosofff
parents:
diff changeset
40 iterator += 1
35fbec9e4bda Uploaded
drosofff
parents:
diff changeset
41 if iterator % 4 == 2:
35fbec9e4bda Uploaded
drosofff
parents:
diff changeset
42 trim = self.scanadapt (self.adaptmotifs, line.rstrip() )
35fbec9e4bda Uploaded
drosofff
parents:
diff changeset
43 if self.minsize <= len(trim) <= self.maxsize:
35fbec9e4bda Uploaded
drosofff
parents:
diff changeset
44 id += 1
35fbec9e4bda Uploaded
drosofff
parents:
diff changeset
45 print >> O, ">%i\n%s" % (id, trim)
35fbec9e4bda Uploaded
drosofff
parents:
diff changeset
46 F.close()
35fbec9e4bda Uploaded
drosofff
parents:
diff changeset
47 O.close()
35fbec9e4bda Uploaded
drosofff
parents:
diff changeset
48 def clip_without_N (self):
35fbec9e4bda Uploaded
drosofff
parents:
diff changeset
49 iterator = 0
35fbec9e4bda Uploaded
drosofff
parents:
diff changeset
50 id = 0
35fbec9e4bda Uploaded
drosofff
parents:
diff changeset
51 F = open (self.inputfile, "r")
35fbec9e4bda Uploaded
drosofff
parents:
diff changeset
52 O = open (self.outputfile, "w")
35fbec9e4bda Uploaded
drosofff
parents:
diff changeset
53 for line in F:
35fbec9e4bda Uploaded
drosofff
parents:
diff changeset
54 iterator += 1
35fbec9e4bda Uploaded
drosofff
parents:
diff changeset
55 if iterator % 4 == 2:
35fbec9e4bda Uploaded
drosofff
parents:
diff changeset
56 trim = self.scanadapt (self.adaptmotifs, line.rstrip() )
35fbec9e4bda Uploaded
drosofff
parents:
diff changeset
57 if "N" in trim: continue
35fbec9e4bda Uploaded
drosofff
parents:
diff changeset
58 if self.minsize <= len(trim) <= self.maxsize:
35fbec9e4bda Uploaded
drosofff
parents:
diff changeset
59 id += 1
35fbec9e4bda Uploaded
drosofff
parents:
diff changeset
60 print >> O, ">%i\n%s" % (id, trim)
35fbec9e4bda Uploaded
drosofff
parents:
diff changeset
61 F.close()
35fbec9e4bda Uploaded
drosofff
parents:
diff changeset
62 O.close()
35fbec9e4bda Uploaded
drosofff
parents:
diff changeset
63
35fbec9e4bda Uploaded
drosofff
parents:
diff changeset
64 def __main__ (inputfile, outputfile, adapter, minsize, maxsize, Nmode):
35fbec9e4bda Uploaded
drosofff
parents:
diff changeset
65 instanceClip = Clip (inputfile, outputfile, adapter, minsize, maxsize)
35fbec9e4bda Uploaded
drosofff
parents:
diff changeset
66 if Nmode == "accepted":
35fbec9e4bda Uploaded
drosofff
parents:
diff changeset
67 instanceClip.clip_with_N()
35fbec9e4bda Uploaded
drosofff
parents:
diff changeset
68 else:
35fbec9e4bda Uploaded
drosofff
parents:
diff changeset
69 instanceClip.clip_without_N()
35fbec9e4bda Uploaded
drosofff
parents:
diff changeset
70
35fbec9e4bda Uploaded
drosofff
parents:
diff changeset
71 if __name__ == "__main__" :
35fbec9e4bda Uploaded
drosofff
parents:
diff changeset
72 input = sys.argv[1]
35fbec9e4bda Uploaded
drosofff
parents:
diff changeset
73 output = sys.argv[2]
35fbec9e4bda Uploaded
drosofff
parents:
diff changeset
74 adapter = sys.argv[3]
35fbec9e4bda Uploaded
drosofff
parents:
diff changeset
75 minsize = sys.argv[4]
35fbec9e4bda Uploaded
drosofff
parents:
diff changeset
76 maxsize = sys.argv[5]
35fbec9e4bda Uploaded
drosofff
parents:
diff changeset
77 Nmode = sys.argv[6]
35fbec9e4bda Uploaded
drosofff
parents:
diff changeset
78 __main__(input, output, adapter, minsize, maxsize, Nmode)