0
|
1 import os
|
|
2 import argparse
|
|
3 from collections import Counter
|
|
4 from jinja2 import Template
|
|
5
|
|
6
|
|
7 def main():
|
|
8 # --workdir `pwd` --output-dir `pwd`/output --input
|
|
9 parser = argparse.ArgumentParser()
|
|
10
|
|
11 parser.add_argument("--workdir", "-w", required=True)
|
|
12 parser.add_argument("--output-dir", "-o", required=True)
|
|
13 parser.add_argument("--template", "-t", required=True)
|
|
14 parser.add_argument("--output-conf", "-c", required=True)
|
|
15 parser.add_argument("--input", "-i", action="append", required=True)
|
|
16 parser.add_argument("--bed", "-b", default=None)
|
|
17
|
|
18
|
|
19
|
|
20 args = parser.parse_args()
|
|
21
|
|
22 workdir = args.workdir
|
|
23 output_dir = args.output_dir
|
|
24 input_files_raw = args.input
|
|
25 template_file_path = args.template
|
|
26 output_config_path = args.output_conf
|
|
27 bed_file_path = args.bed
|
|
28
|
|
29
|
|
30 if bed_file_path:
|
|
31 bed_new_name = "bed_file.bed"
|
|
32 bed_new_file_path = os.path.join(
|
|
33 workdir,
|
|
34 bed_new_name
|
|
35 )
|
|
36 os.symlink(bed_file_path, bed_new_file_path)
|
|
37 bed_file_path = bed_new_file_path
|
|
38
|
|
39 input_files = []
|
|
40 phenotype_counter = Counter()
|
|
41 for input_file in input_files_raw:
|
8
|
42 if input_file.count(":") == 1: # single end
|
|
43 forward_file, phenotype = input_file.split(":")
|
|
44 phenotype_counter.update(phenotype)
|
|
45 phenotype_count = phenotype_counter[phenotype]
|
|
46
|
|
47 forward_new_name = "{phenotype}_{phenotype_count}.fastq.gz".format(phenotype=phenotype, phenotype_count=phenotype_count)
|
|
48 forward_new_file_path = os.path.join(
|
|
49 workdir,
|
|
50 forward_new_name
|
|
51 )
|
|
52 os.symlink(forward_file, forward_new_file_path)
|
|
53
|
|
54 input_files.append(
|
|
55 {
|
|
56 "forward": forward_new_file_path,
|
|
57 "description": "{phenotype}_{phenotype_index}".format(phenotype=phenotype, phenotype_index=phenotype_count),
|
|
58 "phenotype": phenotype
|
|
59 }
|
|
60 )
|
|
61
|
|
62 elif input_file.count(":") == 2: # paired end
|
0
|
63 forward_file, reverse_file, phenotype = input_file.split(":")
|
|
64 phenotype_counter.update(phenotype)
|
|
65 phenotype_count = phenotype_counter[phenotype]
|
|
66
|
|
67 forward_new_name = "{phenotype}_{phenotype_count}_R1.fastq.gz".format(phenotype=phenotype, phenotype_count=phenotype_count)
|
|
68 forward_new_file_path = os.path.join(
|
|
69 workdir,
|
|
70 forward_new_name
|
|
71 )
|
|
72 os.symlink(forward_file, forward_new_file_path)
|
|
73
|
|
74 reverse_new_name = "{phenotype}_{phenotype_count}_R2.fastq.gz".format(phenotype=phenotype, phenotype_count=phenotype_count)
|
|
75 reverse_new_file_path = os.path.join(
|
|
76 workdir,
|
|
77 reverse_new_name
|
|
78 )
|
|
79 os.symlink(reverse_file, reverse_new_file_path)
|
|
80
|
|
81 input_files.append(
|
|
82 {
|
|
83 "forward": forward_new_file_path,
|
|
84 "reverse": reverse_new_file_path,
|
|
85 "description": "{phenotype}_{phenotype_index}".format(phenotype=phenotype, phenotype_index=phenotype_count),
|
|
86 "phenotype": phenotype
|
|
87 }
|
|
88 )
|
|
89
|
|
90 with open(output_config_path, 'w') as config_file_handle, open(template_file_path, 'r') as template_file_handle:
|
|
91 template = Template(template_file_handle.read())
|
|
92 config_file_handle.write(template.render(
|
|
93 samples=input_files,
|
|
94 output_dir=output_dir,
|
|
95 bed_file_path=bed_file_path
|
|
96 ))
|
|
97
|
|
98
|
|
99 if __name__ == "__main__":
|
8
|
100 main()
|