comparison fastp_json_to_tabular.py @ 0:dc655c5e77d6 draft

"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/fastp_json_to_tabular commit c6ac9f7bd10c58d2b4168cab62b812d4d76283fe"
author public-health-bioinformatics
date Wed, 09 Mar 2022 23:40:04 +0000
parents
children e342bf27e717
comparison
equal deleted inserted replaced
-1:000000000000 0:dc655c5e77d6
1 #!/usr/bin/env python
2
3 import argparse
4 import json
5
6 def main(args):
7 with open(args.fastp_json, 'r') as f:
8 fastp_report = json.load(f)
9
10 total_reads_before_filtering = fastp_report['summary']['before_filtering']['total_reads']
11 total_reads_after_filtering = fastp_report['summary']['after_filtering']['total_reads']
12 total_bases_before_filtering = fastp_report['summary']['before_filtering']['total_bases']
13 total_bases_after_filtering = fastp_report['summary']['after_filtering']['total_bases']
14 read1_mean_length_before_filtering = fastp_report['summary']['before_filtering']['read1_mean_length']
15 read2_mean_length_before_filtering = fastp_report['summary']['before_filtering']['read2_mean_length']
16 read1_mean_length_after_filtering = fastp_report['summary']['after_filtering']['read1_mean_length']
17 read2_mean_length_after_filtering = fastp_report['summary']['after_filtering']['read2_mean_length']
18 q20_bases_before_filtering = fastp_report['summary']['before_filtering']['q20_bases']
19 q20_bases_after_filtering = fastp_report['summary']['after_filtering']['q20_bases']
20 q20_rate_before_filtering = fastp_report['summary']['before_filtering']['q20_rate']
21 q20_rate_after_filtering = fastp_report['summary']['after_filtering']['q20_rate']
22 q30_bases_before_filtering = fastp_report['summary']['before_filtering']['q30_bases']
23 q30_bases_after_filtering = fastp_report['summary']['after_filtering']['q30_bases']
24 q30_rate_before_filtering = fastp_report['summary']['before_filtering']['q30_rate']
25 q30_rate_after_filtering = fastp_report['summary']['after_filtering']['q30_rate']
26 gc_content_before_filtering = fastp_report['summary']['before_filtering']['gc_content']
27 gc_content_after_filtering = fastp_report['summary']['after_filtering']['gc_content']
28 adapter_trimmed_reads = fastp_report['adapter_cutting']['adapter_trimmed_reads']
29 adapter_trimmed_bases = fastp_report['adapter_cutting']['adapter_trimmed_bases']
30
31
32 output_fields = [
33 'total_reads_before_filtering',
34 'total_reads_after_filtering',
35 'total_bases_before_filtering',
36 'total_bases_after_filtering',
37 'read1_mean_length_before_filtering',
38 'read1_mean_length_after_filtering',
39 'read2_mean_length_before_filtering',
40 'read2_mean_length_after_filtering',
41 'q20_bases_before_filtering',
42 'q20_bases_after_filtering',
43 'q20_rate_before_filtering',
44 'q20_rate_after_filtering',
45 'q30_bases_before_filtering',
46 'q30_bases_after_filtering',
47 'q30_rate_before_filtering',
48 'q30_rate_after_filtering',
49 'gc_content_before_filtering',
50 'gc_content_after_filtering',
51 'adapter_trimmed_reads',
52 'adapter_trimmed_bases',
53 ]
54
55 output_data = []
56 if args.sample_id:
57 output_fields = ['sample_id'] + output_fields
58 output_data = [args.sample_id]
59
60 print(args.delimiter.join(output_fields))
61
62 output_data = output_data + [
63 total_reads_before_filtering,
64 total_reads_after_filtering,
65 total_bases_before_filtering,
66 total_bases_after_filtering,
67 read1_mean_length_before_filtering,
68 read1_mean_length_after_filtering,
69 read2_mean_length_before_filtering,
70 read2_mean_length_after_filtering,
71 q20_bases_before_filtering,
72 q20_bases_after_filtering,
73 q20_rate_before_filtering,
74 q20_rate_after_filtering,
75 q30_bases_before_filtering,
76 q30_bases_after_filtering,
77 q30_rate_before_filtering,
78 q30_rate_after_filtering,
79 gc_content_before_filtering,
80 gc_content_after_filtering,
81 adapter_trimmed_reads,
82 adapter_trimmed_bases,
83 ]
84 print(args.delimiter.join(map(str, output_data)))
85
86
87 if __name__ == "__main__":
88 parser = argparse.ArgumentParser()
89 parser.add_argument('fastp_json')
90 parser.add_argument('-s', '--sample-id')
91 parser.add_argument('-d', '--delimiter', default='\t')
92 args = parser.parse_args()
93 main(args)