annotate script.py @ 1:fd8fe1448616 draft

Uploaded
author dvanzessen
date Mon, 22 Jul 2019 04:35:02 -0400
parents 2ed60a09d6b6
children 2e5223259a56
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
1 import os
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
2 import argparse
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
3 from collections import Counter
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
4 from jinja2 import Template
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
5
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
6
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
7 def main():
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
8 # --workdir `pwd` --output-dir `pwd`/output --input
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
9 parser = argparse.ArgumentParser()
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
10
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
11 parser.add_argument("--workdir", "-w", required=True)
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
12 parser.add_argument("--output-dir", "-o", required=True)
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
13 parser.add_argument("--template", "-t", required=True)
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
14 parser.add_argument("--output-conf", "-c", required=True)
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
15 parser.add_argument("--input", "-i", action="append", required=True)
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
16 parser.add_argument("--bed", "-b", default=None)
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
17
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
18
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
19
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
20 args = parser.parse_args()
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
21
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
22 workdir = args.workdir
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
23 output_dir = args.output_dir
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
24 input_files_raw = args.input
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
25 template_file_path = args.template
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
26 output_config_path = args.output_conf
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
27 bed_file_path = args.bed
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
28
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
29
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
30 if bed_file_path:
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
31 bed_new_name = "bed_file.bed"
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
32 bed_new_file_path = os.path.join(
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
33 workdir,
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
34 bed_new_name
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
35 )
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
36 os.symlink(bed_file_path, bed_new_file_path)
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
37 bed_file_path = bed_new_file_path
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
38
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
39 input_files = []
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
40 phenotype_counter = Counter()
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
41 for input_file in input_files_raw:
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
42 if input_file.find(":"):
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
43 forward_file, reverse_file, phenotype = input_file.split(":")
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
44 phenotype_counter.update(phenotype)
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
45 phenotype_count = phenotype_counter[phenotype]
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
46
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
47 forward_new_name = "{phenotype}_{phenotype_count}_R1.fastq.gz".format(phenotype=phenotype, phenotype_count=phenotype_count)
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
48 forward_new_file_path = os.path.join(
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
49 workdir,
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
50 forward_new_name
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
51 )
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
52 os.symlink(forward_file, forward_new_file_path)
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
53
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
54 reverse_new_name = "{phenotype}_{phenotype_count}_R2.fastq.gz".format(phenotype=phenotype, phenotype_count=phenotype_count)
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
55 reverse_new_file_path = os.path.join(
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
56 workdir,
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
57 reverse_new_name
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
58 )
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
59 os.symlink(reverse_file, reverse_new_file_path)
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
60
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
61 input_files.append(
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
62 {
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
63 "forward": forward_new_file_path,
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
64 "reverse": reverse_new_file_path,
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
65 "description": "{phenotype}_{phenotype_index}".format(phenotype=phenotype, phenotype_index=phenotype_count),
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
66 "phenotype": phenotype
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
67 }
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
68 )
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
69
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
70 with open(output_config_path, 'w') as config_file_handle, open(template_file_path, 'r') as template_file_handle:
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
71 template = Template(template_file_handle.read())
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
72 config_file_handle.write(template.render(
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
73 samples=input_files,
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
74 output_dir=output_dir,
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
75 bed_file_path=bed_file_path
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
76 ))
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
77
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
78
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
79 if __name__ == "__main__":
2ed60a09d6b6 Uploaded
dvanzessen
parents:
diff changeset
80 main()