annotate script.py @ 8:2e5223259a56 draft default tip

Uploaded
author dvanzessen
date Tue, 21 Apr 2020 11:38:36 +0000
parents 2ed60a09d6b6
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
1 import os
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
2 import argparse
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
3 from collections import Counter
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
4 from jinja2 import Template
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
5
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
6
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
7 def main():
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
8 # --workdir `pwd` --output-dir `pwd`/output --input
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
9 parser = argparse.ArgumentParser()
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
10
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
11 parser.add_argument("--workdir", "-w", required=True)
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
12 parser.add_argument("--output-dir", "-o", required=True)
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
13 parser.add_argument("--template", "-t", required=True)
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
14 parser.add_argument("--output-conf", "-c", required=True)
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
15 parser.add_argument("--input", "-i", action="append", required=True)
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
16 parser.add_argument("--bed", "-b", default=None)
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
17
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
18
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
19
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
20 args = parser.parse_args()
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
21
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
22 workdir = args.workdir
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
23 output_dir = args.output_dir
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
24 input_files_raw = args.input
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
25 template_file_path = args.template
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
26 output_config_path = args.output_conf
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
27 bed_file_path = args.bed
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
28
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
29
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
30 if bed_file_path:
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
31 bed_new_name = "bed_file.bed"
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
32 bed_new_file_path = os.path.join(
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
33 workdir,
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
34 bed_new_name
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
35 )
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
36 os.symlink(bed_file_path, bed_new_file_path)
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
37 bed_file_path = bed_new_file_path
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
38
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
39 input_files = []
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
40 phenotype_counter = Counter()
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
41 for input_file in input_files_raw:
8
2e5223259a56 Uploaded
dvanzessen
parents: 0
diff changeset
42 if input_file.count(":") == 1: # single end
2e5223259a56 Uploaded
dvanzessen
parents: 0
diff changeset
43 forward_file, phenotype = input_file.split(":")
2e5223259a56 Uploaded
dvanzessen
parents: 0
diff changeset
44 phenotype_counter.update(phenotype)
2e5223259a56 Uploaded
dvanzessen
parents: 0
diff changeset
45 phenotype_count = phenotype_counter[phenotype]
2e5223259a56 Uploaded
dvanzessen
parents: 0
diff changeset
46
2e5223259a56 Uploaded
dvanzessen
parents: 0
diff changeset
47 forward_new_name = "{phenotype}_{phenotype_count}.fastq.gz".format(phenotype=phenotype, phenotype_count=phenotype_count)
2e5223259a56 Uploaded
dvanzessen
parents: 0
diff changeset
48 forward_new_file_path = os.path.join(
2e5223259a56 Uploaded
dvanzessen
parents: 0
diff changeset
49 workdir,
2e5223259a56 Uploaded
dvanzessen
parents: 0
diff changeset
50 forward_new_name
2e5223259a56 Uploaded
dvanzessen
parents: 0
diff changeset
51 )
2e5223259a56 Uploaded
dvanzessen
parents: 0
diff changeset
52 os.symlink(forward_file, forward_new_file_path)
2e5223259a56 Uploaded
dvanzessen
parents: 0
diff changeset
53
2e5223259a56 Uploaded
dvanzessen
parents: 0
diff changeset
54 input_files.append(
2e5223259a56 Uploaded
dvanzessen
parents: 0
diff changeset
55 {
2e5223259a56 Uploaded
dvanzessen
parents: 0
diff changeset
56 "forward": forward_new_file_path,
2e5223259a56 Uploaded
dvanzessen
parents: 0
diff changeset
57 "description": "{phenotype}_{phenotype_index}".format(phenotype=phenotype, phenotype_index=phenotype_count),
2e5223259a56 Uploaded
dvanzessen
parents: 0
diff changeset
58 "phenotype": phenotype
2e5223259a56 Uploaded
dvanzessen
parents: 0
diff changeset
59 }
2e5223259a56 Uploaded
dvanzessen
parents: 0
diff changeset
60 )
2e5223259a56 Uploaded
dvanzessen
parents: 0
diff changeset
61
2e5223259a56 Uploaded
dvanzessen
parents: 0
diff changeset
62 elif input_file.count(":") == 2: # paired end
0
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
63 forward_file, reverse_file, phenotype = input_file.split(":")
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
64 phenotype_counter.update(phenotype)
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
65 phenotype_count = phenotype_counter[phenotype]
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
66
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
67 forward_new_name = "{phenotype}_{phenotype_count}_R1.fastq.gz".format(phenotype=phenotype, phenotype_count=phenotype_count)
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
68 forward_new_file_path = os.path.join(
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
69 workdir,
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
70 forward_new_name
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
71 )
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
72 os.symlink(forward_file, forward_new_file_path)
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
73
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
74 reverse_new_name = "{phenotype}_{phenotype_count}_R2.fastq.gz".format(phenotype=phenotype, phenotype_count=phenotype_count)
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
75 reverse_new_file_path = os.path.join(
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
76 workdir,
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
77 reverse_new_name
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
78 )
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
79 os.symlink(reverse_file, reverse_new_file_path)
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
80
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
81 input_files.append(
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
82 {
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
83 "forward": forward_new_file_path,
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
84 "reverse": reverse_new_file_path,
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
85 "description": "{phenotype}_{phenotype_index}".format(phenotype=phenotype, phenotype_index=phenotype_count),
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
86 "phenotype": phenotype
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
87 }
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
88 )
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
89
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
90 with open(output_config_path, 'w') as config_file_handle, open(template_file_path, 'r') as template_file_handle:
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
91 template = Template(template_file_handle.read())
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
92 config_file_handle.write(template.render(
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
93 samples=input_files,
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
94 output_dir=output_dir,
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
95 bed_file_path=bed_file_path
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
96 ))
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
97
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
98
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
99 if __name__ == "__main__":
8
2e5223259a56 Uploaded
dvanzessen
parents: 0
diff changeset
100 main()