Mercurial > repos > drosofff > yet_another_clipper
comparison YAC/yac.py @ 5:ad813be00215 draft default tip
Uploaded
author | drosofff |
---|---|
date | Sat, 31 May 2014 15:12:15 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
4:2f536ef15f49 | 5:ad813be00215 |
---|---|
1 #!/usr/bin/python | |
2 # yac = yet another clipper | |
3 # v 1.0.0 | |
4 # Usage yac.py $input $output $adapter_to_clip $min $max $Nmode | |
5 # Christophe Antoniewski <drosofff@gmail.com> | |
6 | |
7 import sys, string | |
8 | |
9 class Clip: | |
10 def __init__(self, inputfile, outputfile, adapter, minsize, maxsize): | |
11 self.inputfile = inputfile | |
12 self.outputfile = outputfile | |
13 self.adapter = adapter | |
14 self.minsize = int(minsize) | |
15 self.maxsize = int(maxsize) | |
16 def motives (sequence): | |
17 '''return a list of motives for perfect (6nt) or imperfect (7nt with one mismatch) search on import string module''' | |
18 sequencevariants = [sequence[0:6]] # initializes the list with the 6mer perfect match | |
19 dicsubst= {"A":"TGCN", "T":"AGCN", "G":"TACN", "C":"GATN"} | |
20 for pos in enumerate(sequence[:6]): | |
21 for subst in dicsubst[pos[1]]: | |
22 sequencevariants.append(sequence[:pos[0]]+ subst + sequence[pos[0]+1:7]) | |
23 return sequencevariants | |
24 self.adaptmotifs= motives(self.adapter) | |
25 | |
26 def scanadapt(self, adaptmotives=[], sequence=""): | |
27 '''scans sequence for adapter motives''' | |
28 if sequence.rfind(adaptmotives[0]) != -1: | |
29 return sequence[:sequence.rfind(adaptmotives[0])] | |
30 for motif in adaptmotives[1:]: | |
31 if sequence.rfind(motif) != -1: | |
32 return sequence[:sequence.rfind(motif)] | |
33 return sequence | |
34 | |
35 def clip_with_N (self): | |
36 '''clips adapter sequences from inputfile. | |
37 Reads containing N are retained.''' | |
38 iterator = 0 | |
39 id = 0 | |
40 F = open (self.inputfile, "r") | |
41 O = open (self.outputfile, "w") | |
42 for line in F: | |
43 iterator += 1 | |
44 if iterator % 4 == 2: | |
45 trim = self.scanadapt (self.adaptmotifs, line.rstrip() ) | |
46 if self.minsize <= len(trim) <= self.maxsize: | |
47 id += 1 | |
48 print >> O, ">%i\n%s" % (id, trim) | |
49 F.close() | |
50 O.close() | |
51 def clip_without_N (self): | |
52 '''clips adapter sequences from inputfile. | |
53 Reads containing N are rejected.''' | |
54 iterator = 0 | |
55 id = 0 | |
56 F = open (self.inputfile, "r") | |
57 O = open (self.outputfile, "w") | |
58 for line in F: | |
59 iterator += 1 | |
60 if iterator % 4 == 2: | |
61 trim = self.scanadapt (self.adaptmotifs, line.rstrip() ) | |
62 if "N" in trim: continue | |
63 if self.minsize <= len(trim) <= self.maxsize: | |
64 id += 1 | |
65 print >> O, ">%i\n%s" % (id, trim) | |
66 F.close() | |
67 O.close() | |
68 | |
69 def __main__ (inputfile, outputfile, adapter, minsize, maxsize, Nmode): | |
70 instanceClip = Clip (inputfile, outputfile, adapter, minsize, maxsize) | |
71 if Nmode == "accept": | |
72 instanceClip.clip_with_N() | |
73 else: | |
74 instanceClip.clip_without_N() | |
75 | |
76 if __name__ == "__main__" : | |
77 input = sys.argv[1] | |
78 output = sys.argv[2] | |
79 adapter = sys.argv[3] | |
80 minsize = sys.argv[4] | |
81 maxsize = sys.argv[5] | |
82 Nmode = sys.argv[6] | |
83 __main__(input, output, adapter, minsize, maxsize, Nmode) |