# HG changeset patch # User bgruening # Date 1619111107 0 # Node ID b2d0e92f81c2c4515c395672fdf60d714e27de20 # Parent 79df147da6334e6b382a1569e24a6297751eacfb "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 8fdc76a99a9dcf34549898a208317607afd18798" diff -r 79df147da633 -r b2d0e92f81c2 bismark2report_wrapper.py --- a/bismark2report_wrapper.py Fri Oct 04 11:33:01 2019 -0400 +++ b/bismark2report_wrapper.py Thu Apr 22 17:05:07 2021 +0000 @@ -12,30 +12,68 @@ def log_subprocess_output(logger, pipe): - for line in iter(pipe.readline, b''): + for line in iter(pipe.readline, b""): logger.debug(line.decode().rstrip()) def get_arg(): parser = argparse.ArgumentParser() - parser.add_argument('--alignment_report', dest='alignment_report', action='store', metavar='alignment_report', - type=str) - parser.add_argument('--dedup_report', dest='dedup_report', action='store', metavar='dedup_report', type=str) - parser.add_argument('--splitting_report', dest='splitting_report', action='store', metavar='splitting_report', - type=str) - parser.add_argument('--mbias_report', dest='mbias_report', action='store', metavar='mbias_report', type=str) - parser.add_argument('--nucleotide_report', dest='nucleotide_report', action='store', metavar='nucleotide_report', - type=str) - parser.add_argument('--output_html_report', dest='output_html_report', action='store', metavar='output_html_report', - type=str) - parser.add_argument('--log_report', dest='log_report', action='store', metavar='log_report', type=str) + parser.add_argument( + "--alignment_report", + dest="alignment_report", + action="store", + metavar="alignment_report", + type=str, + ) + parser.add_argument( + "--dedup_report", + dest="dedup_report", + action="store", + metavar="dedup_report", + type=str, + ) + parser.add_argument( + "--splitting_report", + dest="splitting_report", + action="store", + metavar="splitting_report", + type=str, + ) + parser.add_argument( + "--mbias_report", + dest="mbias_report", + action="store", + metavar="mbias_report", + type=str, + ) + parser.add_argument( + "--nucleotide_report", + dest="nucleotide_report", + action="store", + metavar="nucleotide_report", + type=str, + ) + parser.add_argument( + "--output_html_report", + dest="output_html_report", + action="store", + metavar="output_html_report", + type=str, + ) + parser.add_argument( + "--log_report", + dest="log_report", + action="store", + metavar="log_report", + type=str, + ) args = parser.parse_args() return args def __main__(): args = get_arg() - logger = logging.getLogger('bismark_deduplicate_wrapper') + logger = logging.getLogger("bismark_deduplicate_wrapper") logger.setLevel(logging.DEBUG) ch = logging.StreamHandler(sys.stdout) if args.log_report: @@ -47,17 +85,23 @@ ch.setLevel(logging.DEBUG) logger.addHandler(ch) - cmd = ['bismark2report', '--verbose', '--alignment_report', args.alignment_report, - '--output', args.output_html_report] + cmd = [ + "bismark2report", + "--verbose", + "--alignment_report", + args.alignment_report, + "--output", + args.output_html_report, + ] if args.dedup_report: - cmd.extend(['--dedup_report', args.dedup_report]) + cmd.extend(["--dedup_report", args.dedup_report]) if args.splitting_report: - cmd.extend(['--splitting_report', args.splitting_report]) + cmd.extend(["--splitting_report", args.splitting_report]) if args.mbias_report: - cmd.extend(['--mbias_report', args.mbias_report]) + cmd.extend(["--mbias_report", args.mbias_report]) if args.nucleotide_report: - cmd.extend(['--nucleotide_report', args.nucleotide_report]) + cmd.extend(["--nucleotide_report", args.nucleotide_report]) logger.info("Generating report with: '%s'", " ".join(cmd)) process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) @@ -65,7 +109,12 @@ log_subprocess_output(logger, process.stdout) exitcode = process.wait() if exitcode != 0: - stop_err(logger, "Bismark pretty report error (also check the log file if any)!\n%s" % process.stderr) + stop_err( + logger, + "Bismark pretty report error (also check the log file if any)!\n%s" + % process.stderr, + ) -if __name__ == "__main__": __main__() +if __name__ == "__main__": + __main__() diff -r 79df147da633 -r b2d0e92f81c2 bismark_bowtie2_wrapper.xml --- a/bismark_bowtie2_wrapper.xml Fri Oct 04 11:33:01 2019 -0400 +++ b/bismark_bowtie2_wrapper.xml Thu Apr 22 17:05:07 2021 +0000 @@ -315,8 +315,8 @@ label="Write the bismark output and summary information to an extra file"/> - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 79df147da633 -r b2d0e92f81c2 bismark_deduplicate_wrapper.py --- a/bismark_deduplicate_wrapper.py Fri Oct 04 11:33:01 2019 -0400 +++ b/bismark_deduplicate_wrapper.py Thu Apr 22 17:05:07 2021 +0000 @@ -4,11 +4,10 @@ import logging import os import shutil +import signal import subprocess import sys -import signal import tempfile -from glob import glob def stop_err(logger, msg): @@ -24,17 +23,21 @@ def log_subprocess_output(logger, pipe): - for line in iter(pipe.readline, b''): + for line in iter(pipe.readline, b""): logger.debug(line.decode().rstrip()) def get_arg(): parser = argparse.ArgumentParser() - parser.add_argument('--single_or_paired', dest='single_or_paired') - parser.add_argument('--input', dest='input', metavar='input') - parser.add_argument('--output_report', dest='output_report', metavar='output_report') - parser.add_argument('--output_bam', dest='output_bam', metavar='output_report') - parser.add_argument('--log_report', dest='log_report', metavar='log_filename', type=str) + parser.add_argument("--single_or_paired", dest="single_or_paired") + parser.add_argument("--input", dest="input", metavar="input") + parser.add_argument( + "--output_report", dest="output_report", metavar="output_report" + ) + parser.add_argument("--output_bam", dest="output_bam", metavar="output_report") + parser.add_argument( + "--log_report", dest="log_report", metavar="log_filename", type=str + ) args = parser.parse_args() return args @@ -42,7 +45,7 @@ def __main__(): args = get_arg() - logger = logging.getLogger('bismark_deduplicate_wrapper') + logger = logging.getLogger("bismark_deduplicate_wrapper") logger.setLevel(logging.DEBUG) ch = logging.StreamHandler(sys.stdout) if args.log_report: @@ -55,27 +58,39 @@ logger.addHandler(ch) # ensure the input has a .bam suffix - tmp_dir = tempfile.mkdtemp(prefix='tmp', suffix='') + tmp_dir = tempfile.mkdtemp(prefix="tmp", suffix="") os.chdir(tmp_dir) - default_reads_name = 'submitted_reads.bam' + default_reads_name = "submitted_reads.bam" os.symlink(args.input, default_reads_name) - single_or_paired = '-s' if args.single_or_paired == 'single' else '-p' - cmd = ['deduplicate_bismark', single_or_paired, default_reads_name, '--bam'] + single_or_paired = "-s" if args.single_or_paired == "single" else "-p" + cmd = ["deduplicate_bismark", single_or_paired, default_reads_name, "--bam"] logger.info("Deduplicating with: '%s'", " ".join(cmd)) - process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, - preexec_fn=restore_sigpipe) + process = subprocess.Popen( + cmd, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + preexec_fn=restore_sigpipe, + ) proc_out, proc_err = process.communicate() logger.info(proc_out) if process.returncode != 0: - stop_err(logger, "Bismark deduplication error (also check the log file if any)!\n%s" % proc_err) + stop_err( + logger, + "Bismark deduplication error (also check the log file if any)!\n%s" + % proc_err, + ) - deduplicated_out_name = 'submitted_reads.deduplicated.bam' - deduplicated_report_name = 'submitted_reads.deduplication_report.txt' + deduplicated_out_name = "submitted_reads.deduplicated.bam" + deduplicated_report_name = "submitted_reads.deduplication_report.txt" logger.debug("Moving '%s' to galaxy: '%s'.", deduplicated_out_name, args.output_bam) - shutil.move(deduplicated_out_name, args.output_bam ) - logger.debug("Moving '%s' to galaxy: '%s'.", deduplicated_report_name, args.output_report) - shutil.move('submitted_reads.deduplication_report.txt', args.output_report) + shutil.move(deduplicated_out_name, args.output_bam) + logger.debug( + "Moving '%s' to galaxy: '%s'.", deduplicated_report_name, args.output_report + ) + shutil.move("submitted_reads.deduplication_report.txt", args.output_report) logger.debug("Done.") -if __name__=="__main__": __main__() \ No newline at end of file + +if __name__ == "__main__": + __main__() diff -r 79df147da633 -r b2d0e92f81c2 bismark_methylation_extractor.py --- a/bismark_methylation_extractor.py Fri Oct 04 11:33:01 2019 -0400 +++ b/bismark_methylation_extractor.py Thu Apr 22 17:05:07 2021 +0000 @@ -19,14 +19,14 @@ def log_subprocess_output(logger, pipe): - for line in iter(pipe.readline, b''): + for line in iter(pipe.readline, b""): logger.debug(line.decode().rstrip()) def zipper(dir, zip_file): - output_files_regex = re.compile('^(Non_)?C[pH][GH]_.*') - bedgraph_regex = re.compile('.*bedGraph.gz') - zip = zipfile.ZipFile(zip_file, 'w', compression=zipfile.ZIP_DEFLATED) + output_files_regex = re.compile("^(Non_)?C[pH][GH]_.*") + bedgraph_regex = re.compile(".*bedGraph.gz") + zip = zipfile.ZipFile(zip_file, "w", compression=zipfile.ZIP_DEFLATED) root_len = len(os.path.abspath(dir)) for root, dirs, files in os.walk(dir): archive_root = os.path.abspath(root)[root_len:] @@ -40,46 +40,52 @@ def build_genome_dir(genome_file): - tmp_genome_dir = tempfile.mkdtemp(prefix='tmp') - genome_path = os.path.join(tmp_genome_dir, '.'.join(os.path.split(genome_file)[1].split('.')[:-1])) + tmp_genome_dir = tempfile.mkdtemp(prefix="tmp") + genome_path = os.path.join( + tmp_genome_dir, ".".join(os.path.split(genome_file)[1].split(".")[:-1]) + ) try: # Create a hard link pointing to genome_file named 'genome_path'.fa. - os.symlink(genome_file, genome_path + '.fa') + os.symlink(genome_file, genome_path + ".fa") except Exception as e: if os.path.exists(tmp_genome_dir): shutil.rmtree(tmp_genome_dir) - stop_err('Error in linking the reference database!\n%s' % e) + stop_err("Error in linking the reference database!\n%s" % e) return tmp_genome_dir def __main__(): # Parse Command Line - parser = argparse.ArgumentParser(description='Wrapper for the bismark methylation caller.') + parser = argparse.ArgumentParser( + description="Wrapper for the bismark methylation caller." + ) # input options - parser.add_argument('--infile', help='Input file in SAM or BAM format.') - parser.add_argument('--single-end', dest='single_end', action="store_true") - parser.add_argument('--paired-end', dest='paired_end', action="store_true") + parser.add_argument("--infile", help="Input file in SAM or BAM format.") + parser.add_argument("--single-end", dest="single_end", action="store_true") + parser.add_argument("--paired-end", dest="paired_end", action="store_true") + + parser.add_argument("--multicore", dest="multicore", type=int, default=1) + parser.add_argument("--splitting_report", dest="splitting_report") + parser.add_argument("--mbias_report", dest="mbias_report") + parser.add_argument("--cytosine_report", dest="cytosine_report") + parser.add_argument("--genome_file", dest="genome_file") + parser.add_argument("--cx_context", action="store_true") - parser.add_argument('--multicore', dest='multicore', type=int, default=1) - parser.add_argument('--splitting_report', dest='splitting_report') - parser.add_argument('--mbias_report', dest='mbias_report') - parser.add_argument('--cytosine_report', dest="cytosine_report") - parser.add_argument('--genome_file', dest="genome_file") - parser.add_argument('--cx_context', action="store_true") - - parser.add_argument('--comprehensive', action="store_true") - parser.add_argument('--merge-non-cpg', dest='merge_non_cpg', action="store_true") - parser.add_argument('--no-overlap', dest='no_overlap', action="store_true") - parser.add_argument('--compress', dest='compress') - parser.add_argument('--ignore', dest='ignore', type=int) - parser.add_argument('--ignore_r2', dest='ignore_r2', type=int) - parser.add_argument('--ignore_3prime', dest='ignore_3prime', type=int) - parser.add_argument('--ignore_3prime_r2', dest='ignore_3prime_r2', type=int) - parser.add_argument('--log_report', dest='log_report', metavar='log_filename', type=str) + parser.add_argument("--comprehensive", action="store_true") + parser.add_argument("--merge-non-cpg", dest="merge_non_cpg", action="store_true") + parser.add_argument("--no-overlap", dest="no_overlap", action="store_true") + parser.add_argument("--compress", dest="compress") + parser.add_argument("--ignore", dest="ignore", type=int) + parser.add_argument("--ignore_r2", dest="ignore_r2", type=int) + parser.add_argument("--ignore_3prime", dest="ignore_3prime", type=int) + parser.add_argument("--ignore_3prime_r2", dest="ignore_3prime_r2", type=int) + parser.add_argument( + "--log_report", dest="log_report", metavar="log_filename", type=str + ) args = parser.parse_args() - logger = logging.getLogger('bismark_methylation_extractor_wrapper') + logger = logging.getLogger("bismark_methylation_extractor_wrapper") logger.setLevel(logging.DEBUG) ch = logging.StreamHandler(sys.stdout) if args.log_report: @@ -93,52 +99,68 @@ # Build methylation extractor command output_dir = tempfile.mkdtemp() - cmd = ['bismark_methylation_extractor', '--no_header', '-o', output_dir] + cmd = ["bismark_methylation_extractor", "--no_header", "-o", output_dir] # Set up all options if args.multicore > 3: # divide multicore by 3 here since bismark will spawn ~3 jobs. - cmd.extend(['--multicore', str(int(math.floor(args.multicore / 3)))]) + cmd.extend(["--multicore", str(int(math.floor(args.multicore / 3)))]) if args.single_end: - cmd.append('--single-end') + cmd.append("--single-end") else: - cmd.append('--paired-end') + cmd.append("--paired-end") if args.no_overlap: - cmd.append('--no_overlap') + cmd.append("--no_overlap") if args.ignore: - cmd.extend(['--ignore', str(args.ignore)]) + cmd.extend(["--ignore", str(args.ignore)]) if args.ignore_r2: - cmd.extend(['--ignore_r2', str(args.ignore_r2)]) + cmd.extend(["--ignore_r2", str(args.ignore_r2)]) if args.ignore_3prime: - cmd.extend(['--ignore_3prime', str(args.ignore_3prime)]) + cmd.extend(["--ignore_3prime", str(args.ignore_3prime)]) if args.ignore_3prime_r2: - cmd.extend(['--ignore_3prime_r2', str(args.ignore_3prime_r2)]) + cmd.extend(["--ignore_3prime_r2", str(args.ignore_3prime_r2)]) if args.comprehensive: - cmd.append('--comprehensive') + cmd.append("--comprehensive") if args.merge_non_cpg: - cmd.append('--merge_non_CpG') + cmd.append("--merge_non_CpG") if args.splitting_report: - cmd.append('--report') + cmd.append("--report") tmp_genome_dir = None if args.cytosine_report: tmp_genome_dir = build_genome_dir(args.genome_file) if args.cx_context: cmd.extend( - ['--bedGraph', '--CX_context', '--cytosine_report', '--CX_context', '--genome_folder', tmp_genome_dir]) + [ + "--bedGraph", + "--CX_context", + "--cytosine_report", + "--CX_context", + "--genome_folder", + tmp_genome_dir, + ] + ) else: - cmd.extend(['--bedGraph', '--cytosine_report', '--genome_folder', tmp_genome_dir]) + cmd.extend( + ["--bedGraph", "--cytosine_report", "--genome_folder", tmp_genome_dir] + ) cmd.append(args.infile) # Run logger.info("Methylation extractor run with: '%s'", " ".join(cmd)) prev_dir = os.getcwd() - os.chdir(output_dir) # needed due to a bug in bismark where the coverage file cannot be found + os.chdir( + output_dir + ) # needed due to a bug in bismark where the coverage file cannot be found process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) with process.stdout: log_subprocess_output(logger, process.stdout) exitcode = process.wait() if exitcode != 0: - stop_err(logger, "Bismark methylation extractor error (also check the log file if any)!\n%s" % process.stderr) + stop_err( + logger, + "Bismark methylation extractor error (also check the log file if any)!\n%s" + % process.stderr, + ) logger.info("Finished methylation extractor.") # collect and copy output files logger.debug("Zip output files to '%s'.", args.compress) @@ -149,16 +171,25 @@ if args.cytosine_report: logger.debug("Collecting cytosine report.") if args.cx_context: - shutil.move(glob(os.path.join(output_dir, '*CX_report.txt'))[0], args.cytosine_report) + shutil.move( + glob(os.path.join(output_dir, "*CX_report.txt"))[0], + args.cytosine_report, + ) else: - shutil.move(glob(os.path.join(output_dir, '*CpG_report.txt'))[0], args.cytosine_report) + shutil.move( + glob(os.path.join(output_dir, "*CpG_report.txt"))[0], + args.cytosine_report, + ) # splitting report if args.splitting_report: logger.debug("Collecting splitting report.") - shutil.move(glob(os.path.join(output_dir, '*_splitting_report.txt'))[0], args.splitting_report) + shutil.move( + glob(os.path.join(output_dir, "*_splitting_report.txt"))[0], + args.splitting_report, + ) if args.mbias_report: logger.debug("Collecting M-Bias file.") - shutil.move(glob(os.path.join(output_dir, '*M-bias.txt'))[0], args.mbias_report) + shutil.move(glob(os.path.join(output_dir, "*M-bias.txt"))[0], args.mbias_report) # Clean up temp dirs logger.debug("Cleanup temp dirs.") @@ -168,4 +199,6 @@ shutil.rmtree(tmp_genome_dir) logger.info("Done.") -if __name__ == "__main__": __main__() + +if __name__ == "__main__": + __main__() diff -r 79df147da633 -r b2d0e92f81c2 bismark_wrapper.py --- a/bismark_wrapper.py Fri Oct 04 11:33:01 2019 -0400 +++ b/bismark_wrapper.py Thu Apr 22 17:05:07 2021 +0000 @@ -18,79 +18,137 @@ def log_subprocess_output(logger, pipe): - for line in iter(pipe.readline, b''): + for line in iter(pipe.readline, b""): logger.debug(line.decode().rstrip()) def __main__(): # Parse Command Line - parser = argparse.ArgumentParser(description='Wrapper for the bismark bisulfite mapper.') - parser.add_argument('-p', '--num-threads', dest='num_threads', - type=int, default=4, help='Use this many threads to align reads. The default is 4.') + parser = argparse.ArgumentParser( + description="Wrapper for the bismark bisulfite mapper." + ) + parser.add_argument( + "-p", + "--num-threads", + dest="num_threads", + type=int, + default=4, + help="Use this many threads to align reads. The default is 4.", + ) # input options - parser.add_argument('--own-file', dest='own_file', help='') - parser.add_argument('-D', '--indexes-path', dest='index_path', - help='Indexes directory; location of .ebwt and .fa files.') - parser.add_argument('-O', '--output', dest='output') + parser.add_argument("--own-file", dest="own_file", help="") + parser.add_argument( + "-D", + "--indexes-path", + dest="index_path", + help="Indexes directory; location of .ebwt and .fa files.", + ) + parser.add_argument("-O", "--output", dest="output") - parser.add_argument('--output-report-file', dest='output_report_file') - parser.add_argument('--suppress-header', dest='suppress_header', action="store_true") + parser.add_argument("--output-report-file", dest="output_report_file") + parser.add_argument( + "--suppress-header", dest="suppress_header", action="store_true" + ) - parser.add_argument('--mate-paired', dest='mate_paired', action='store_true', help='Reads are mate-paired', - default=False) + parser.add_argument( + "--mate-paired", + dest="mate_paired", + action="store_true", + help="Reads are mate-paired", + default=False, + ) - parser.add_argument('-1', '--mate1', dest='mate1', - help='The forward reads file in Sanger FASTQ or FASTA format.') - parser.add_argument('-2', '--mate2', dest='mate2', - help='The reverse reads file in Sanger FASTQ or FASTA format.') - parser.add_argument('--sort-bam', dest='sort_bam', action="store_true") + parser.add_argument( + "-1", + "--mate1", + dest="mate1", + help="The forward reads file in Sanger FASTQ or FASTA format.", + ) + parser.add_argument( + "-2", + "--mate2", + dest="mate2", + help="The reverse reads file in Sanger FASTQ or FASTA format.", + ) + parser.add_argument("--sort-bam", dest="sort_bam", action="store_true") - parser.add_argument('--output-unmapped-reads', dest='output_unmapped_reads', - help='Additional output file with unmapped reads (single-end).') - parser.add_argument('--output-unmapped-reads-l', dest='output_unmapped_reads_l', - help='File name for unmapped reads (left, paired-end).') - parser.add_argument('--output-unmapped-reads-r', dest='output_unmapped_reads_r', - help='File name for unmapped reads (right, paired-end).') + parser.add_argument( + "--output-unmapped-reads", + dest="output_unmapped_reads", + help="Additional output file with unmapped reads (single-end).", + ) + parser.add_argument( + "--output-unmapped-reads-l", + dest="output_unmapped_reads_l", + help="File name for unmapped reads (left, paired-end).", + ) + parser.add_argument( + "--output-unmapped-reads-r", + dest="output_unmapped_reads_r", + help="File name for unmapped reads (right, paired-end).", + ) - parser.add_argument('--output-suppressed-reads', dest='output_suppressed_reads', - help='Additional output file with suppressed reads (single-end).') - parser.add_argument('--output-suppressed-reads-l', dest='output_suppressed_reads_l', - help='File name for suppressed reads (left, paired-end).') - parser.add_argument('--output-suppressed-reads-r', dest='output_suppressed_reads_r', - help='File name for suppressed reads (right, paired-end).') - parser.add_argument('--stdout', dest='output_stdout', - help='File name for the standard output of bismark.') + parser.add_argument( + "--output-suppressed-reads", + dest="output_suppressed_reads", + help="Additional output file with suppressed reads (single-end).", + ) + parser.add_argument( + "--output-suppressed-reads-l", + dest="output_suppressed_reads_l", + help="File name for suppressed reads (left, paired-end).", + ) + parser.add_argument( + "--output-suppressed-reads-r", + dest="output_suppressed_reads_r", + help="File name for suppressed reads (right, paired-end).", + ) + parser.add_argument( + "--stdout", + dest="output_stdout", + help="File name for the standard output of bismark.", + ) - parser.add_argument('--single-paired', dest='single_paired', - help='The single-end reads file in Sanger FASTQ or FASTA format.') + parser.add_argument( + "--single-paired", + dest="single_paired", + help="The single-end reads file in Sanger FASTQ or FASTA format.", + ) - parser.add_argument('--fastq', action='store_true', help='Query filetype is in FASTQ format') - parser.add_argument('--fasta', action='store_true', help='Query filetype is in FASTA format') - parser.add_argument('--phred64-quals', dest='phred64', action="store_true") - parser.add_argument('--non-directional', dest='non_directional', action="store_true") - parser.add_argument('--pbat', dest='pbat', action="store_true") + parser.add_argument( + "--fastq", action="store_true", help="Query filetype is in FASTQ format" + ) + parser.add_argument( + "--fasta", action="store_true", help="Query filetype is in FASTA format" + ) + parser.add_argument("--phred64-quals", dest="phred64", action="store_true") + parser.add_argument( + "--non_directional", dest="non_directional", action="store_true" + ) + parser.add_argument("--pbat", dest="pbat", action="store_true") - parser.add_argument('--skip-reads', dest='skip_reads', type=int) - parser.add_argument('--score-min', dest='score_min', type=str) - parser.add_argument('--qupto', type=int) + parser.add_argument("--skip-reads", dest="skip_reads", type=int) + parser.add_argument("--score-min", dest="score_min", type=str) + parser.add_argument("--qupto", type=int) # paired end options - parser.add_argument('-I', '--minins', dest='min_insert') - parser.add_argument('-X', '--maxins', dest='max_insert') - parser.add_argument('--no-mixed', dest='no_mixed', action="store_true") - parser.add_argument('--no-discordant', dest='no_discordant', action="store_true") + parser.add_argument("-I", "--minins", dest="min_insert") + parser.add_argument("-X", "--maxins", dest="max_insert") + parser.add_argument("--no-mixed", dest="no_mixed", action="store_true") + parser.add_argument("--no-discordant", dest="no_discordant", action="store_true") # parse general options # default 20 - parser.add_argument('--seed-len', dest='seed_len', type=int) + parser.add_argument("--seed-len", dest="seed_len", type=int) # default 15 - parser.add_argument('--seed-extention-attempts', dest='seed_extention_attempts', type=int) + parser.add_argument( + "--seed-extention-attempts", dest="seed_extention_attempts", type=int + ) # default 0 - parser.add_argument('--seed-mismatches', dest='seed_mismatches', type=int) + parser.add_argument("--seed-mismatches", dest="seed_mismatches", type=int) # default 2 - parser.add_argument('--max-reseed', dest='max_reseed', type=int) - + parser.add_argument("--max-reseed", dest="max_reseed", type=int) """ The number of megabytes of memory a given thread is given to store path @@ -101,11 +159,11 @@ saying that chunk memory has been exhausted in --best mode, try adjusting this parameter up to dedicate more memory to the descriptors. Default: 512. """ - parser.add_argument('--chunkmbs', type=int, default=512) + parser.add_argument("--chunkmbs", type=int, default=512) args = parser.parse_args() - logger = logging.getLogger('bismark_wrapper') + logger = logging.getLogger("bismark_wrapper") logger.setLevel(logging.DEBUG) ch = logging.StreamHandler(sys.stdout) if args.output_stdout: @@ -121,22 +179,28 @@ index_dir = "" tmp_index_dir = None if args.own_file: - logger.info("Create a temporary index with the offered files from the user. " - "Utilizing the script: bismark_genome_preparation") + logger.info( + "Create a temporary index with the offered files from the user. " + "Utilizing the script: bismark_genome_preparation" + ) tmp_index_dir = tempfile.mkdtemp() - index_path = os.path.join(tmp_index_dir, '.'.join(os.path.split(args.own_file)[1].split('.')[:-1])) + index_path = os.path.join( + tmp_index_dir, ".".join(os.path.split(args.own_file)[1].split(".")[:-1]) + ) try: # Create a hard link pointing to args.own_file named 'index_path'.fa. - os.symlink(args.own_file, index_path + '.fa') + os.symlink(args.own_file, index_path + ".fa") except Exception as e: if os.path.exists(tmp_index_dir): shutil.rmtree(tmp_index_dir) - stop_err(logger, 'Error in linking the reference database!\n%s' % e) + stop_err(logger, "Error in linking the reference database!\n%s" % e) # bismark_genome_preparation needs the complete path to the folder in which the database is stored - cmd_index = ['bismark_genome_preparation', '--bowtie2', tmp_index_dir] + cmd_index = ["bismark_genome_preparation", "--bowtie2", tmp_index_dir] try: logger.info("Generating index with: '%s'", " ".join(cmd_index)) - process = subprocess.Popen(cmd_index, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + process = subprocess.Popen( + cmd_index, stdout=subprocess.PIPE, stderr=subprocess.STDOUT + ) with process.stdout: log_subprocess_output(logger, process.stdout) exitcode = process.wait() @@ -145,7 +209,7 @@ except Exception as e: if os.path.exists(tmp_index_dir): shutil.rmtree(tmp_index_dir) - stop_err(logger, 'Error indexing reference sequence!\n%s' % e) + stop_err(logger, "Error indexing reference sequence!\n%s" % e) index_dir = tmp_index_dir else: # bowtie path is the path to the index directory and the first path of the index file name @@ -153,57 +217,80 @@ # Build bismark command tmp_bismark_dir = tempfile.mkdtemp() - output_dir = os.path.join(tmp_bismark_dir, 'results') - cmd = ['bismark', '--bam', '--temp_dir', tmp_bismark_dir, '-o', output_dir, '--quiet'] + output_dir = os.path.join(tmp_bismark_dir, "results") + cmd = [ + "bismark", + "--bam", + "--temp_dir", + tmp_bismark_dir, + "-o", + output_dir, + "--quiet", + ] if not args.pbat: - cmd.append('--gzip') + cmd.append("--gzip") if args.fasta: # the query input files (specified as mate1,mate2 or singles) are FastA - cmd.append('--fasta') + cmd.append("--fasta") elif args.fastq: - cmd.append('--fastq') + cmd.append("--fastq") # alignment options if args.num_threads > 2: # divide num_threads by 2 here since bismark will spawn 2 jobs with -p threads each - cmd.extend(['-p', str(int(math.floor(args.num_threads / 2)))]) + cmd.extend(["-p", str(int(math.floor(args.num_threads / 2)))]) if args.seed_mismatches: - cmd.extend(['-N', str(args.seed_mismatches)]) + cmd.extend(["-N", str(args.seed_mismatches)]) if args.seed_len: - cmd.extend(['-L', str(args.seed_len)]) + cmd.extend(["-L", str(args.seed_len)]) if args.seed_extention_attempts: - cmd.extend(['-D', str(args.seed_extention_attempts)]) + cmd.extend(["-D", str(args.seed_extention_attempts)]) if args.max_reseed: - cmd.extend(['-R', str(args.max_reseed)]) + cmd.extend(["-R", str(args.max_reseed)]) if args.no_discordant: - cmd.append('--no-discordant') + cmd.append("--no-discordant") if args.no_mixed: - cmd.append('--no-mixed') + cmd.append("--no-mixed") if args.skip_reads: - cmd.extend(['--skip', str(args.skip_reads)]) + cmd.extend(["--skip", str(args.skip_reads)]) if args.score_min: - cmd.extend(['--score_min', str(args.score_min)]) + cmd.extend(["--score_min", str(args.score_min)]) if args.qupto: - cmd.extend(['--upto', 'args.qupto']) + cmd.extend(["--upto", "args.qupto"]) if args.phred64: - cmd.append('--phred64-quals') + cmd.append("--phred64-quals") if args.non_directional: - cmd.append('--non-directional') + cmd.append("--non_directional") if args.pbat: - cmd.append('--pbat') + cmd.append("--pbat") if args.suppress_header: - cmd.append('--sam-no-hd') - if args.output_unmapped_reads or (args.output_unmapped_reads_l and args.output_unmapped_reads_r): - cmd.append('--un') - if args.output_suppressed_reads or (args.output_suppressed_reads_l and args.output_suppressed_reads_r): - cmd.append('--ambiguous') + cmd.append("--sam-no-hd") + if args.output_unmapped_reads or ( + args.output_unmapped_reads_l and args.output_unmapped_reads_r + ): + cmd.append("--un") + if args.output_suppressed_reads or ( + args.output_suppressed_reads_l and args.output_suppressed_reads_r + ): + cmd.append("--ambiguous") cmd.append(index_dir) # Set up the reads if args.mate_paired: # paired-end reads library - cmd.extend(['-1', args.mate1, '-2', args.mate2, '-I', str(args.min_insert), '-X', str(args.max_insert)]) + cmd.extend( + [ + "-1", + args.mate1, + "-2", + args.mate2, + "-I", + str(args.min_insert), + "-X", + str(args.max_insert), + ] + ) else: # single paired reads library cmd.append(args.single_paired) @@ -211,51 +298,81 @@ # Run try: logger.info("Running bismark with: '%s'", " ".join(cmd)) - process = subprocess.Popen(args=cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + process = subprocess.Popen( + args=cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT + ) with process.stdout: log_subprocess_output(logger, process.stdout) exitcode = process.wait() if exitcode != 0: raise Exception(process.stderr) except Exception as e: - stop_err(logger, 'Error in running bismark!\n%s' % e) + stop_err(logger, "Error in running bismark!\n%s" % e) # collect and copy output files if args.output_report_file: - output_report_file = open(args.output_report_file, 'w+') - for line in fileinput.input(glob(os.path.join(output_dir, '*report.txt'))): + output_report_file = open(args.output_report_file, "w+") + for line in fileinput.input(glob(os.path.join(output_dir, "*report.txt"))): output_report_file.write(line) output_report_file.close() if args.output_suppressed_reads: - if glob(os.path.join(output_dir, '*ambiguous_reads.*')): - shutil.move(glob(os.path.join(output_dir, '*ambiguous_reads.*'))[0], args.output_suppressed_reads) + if glob(os.path.join(output_dir, "*ambiguous_reads.*")): + shutil.move( + glob(os.path.join(output_dir, "*ambiguous_reads.*"))[0], + args.output_suppressed_reads, + ) if args.output_suppressed_reads_l: - if glob(os.path.join(output_dir, '*ambiguous_reads_1.*')): - shutil.move(glob(os.path.join(output_dir, '*ambiguous_reads_1.*'))[0], args.output_suppressed_reads_l) + if glob(os.path.join(output_dir, "*ambiguous_reads_1.*")): + shutil.move( + glob(os.path.join(output_dir, "*ambiguous_reads_1.*"))[0], + args.output_suppressed_reads_l, + ) if args.output_suppressed_reads_r: - if glob(os.path.join(output_dir, '*ambiguous_reads_2.*')): - shutil.move(glob(os.path.join(output_dir, '*ambiguous_reads_2.*'))[0], args.output_suppressed_reads_r) + if glob(os.path.join(output_dir, "*ambiguous_reads_2.*")): + shutil.move( + glob(os.path.join(output_dir, "*ambiguous_reads_2.*"))[0], + args.output_suppressed_reads_r, + ) if args.output_unmapped_reads: - if glob(os.path.join(output_dir, '*unmapped_reads.*')): - shutil.move(glob(os.path.join(output_dir, '*unmapped_reads.*'))[0], args.output_unmapped_reads) + if glob(os.path.join(output_dir, "*unmapped_reads.*")): + shutil.move( + glob(os.path.join(output_dir, "*unmapped_reads.*"))[0], + args.output_unmapped_reads, + ) if args.output_unmapped_reads_l: - if glob(os.path.join(output_dir, '*unmapped_reads_1.*')): - shutil.move(glob(os.path.join(output_dir, '*unmapped_reads_1.*'))[0], args.output_unmapped_reads_l) + if glob(os.path.join(output_dir, "*unmapped_reads_1.*")): + shutil.move( + glob(os.path.join(output_dir, "*unmapped_reads_1.*"))[0], + args.output_unmapped_reads_l, + ) if args.output_unmapped_reads_r: - if glob(os.path.join(output_dir, '*unmapped_reads_2.*')): - shutil.move(glob(os.path.join(output_dir, '*unmapped_reads_2.*'))[0], args.output_unmapped_reads_r) + if glob(os.path.join(output_dir, "*unmapped_reads_2.*")): + shutil.move( + glob(os.path.join(output_dir, "*unmapped_reads_2.*"))[0], + args.output_unmapped_reads_r, + ) try: # merge all bam files tmp_res = tempfile.NamedTemporaryFile(dir=output_dir).name - bam_files = glob(os.path.join(output_dir, '*.bam')) + bam_files = glob(os.path.join(output_dir, "*.bam")) if len(bam_files) > 1: - cmd = ['samtools', 'merge', '-@', str(args.num_threads), '-f', tmp_res, ' '.join(bam_files)] + cmd = [ + "samtools", + "merge", + "-@", + str(args.num_threads), + "-f", + tmp_res + ] + [cmd.append(x) for x in bam_files] logger.info("Merging bams with: '%s'", cmd) - process = subprocess.Popen(args=cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + process = subprocess.Popen( + args=cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT + ) with process.stdout: log_subprocess_output(logger, process.stdout) exitcode = process.wait() @@ -268,22 +385,31 @@ if os.path.exists(bam_path): if args.sort_bam: - cmd = ['samtools', 'sort', '-@', str(args.num_threads), bam_path, 'sorted_bam'] + cmd = [ + "samtools", + "sort", + "-@", + str(args.num_threads), + bam_path, + "sorted_bam", + ] logger.info("Sorting bam with: '%s'", cmd) - process = subprocess.Popen(args=cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + process = subprocess.Popen( + args=cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT + ) with process.stdout: log_subprocess_output(logger, process.stdout) exitcode = process.wait() if exitcode != 0: raise Exception(process.stderr) - shutil.move('sorted_bam.bam', args.output) + shutil.move("sorted_bam.bam", args.output) else: shutil.move(bam_path, args.output) else: - stop_err(logger, 'BAM file no found:\n%s' % bam_path) + stop_err(logger, "BAM file no found:\n%s" % bam_path) except Exception as e: - stop_err(logger, 'Error in merging bam files!\n%s' % e) + stop_err(logger, "Error in merging bam files!\n%s" % e) # Clean up temp dirs if tmp_index_dir and os.path.exists(tmp_index_dir): @@ -294,4 +420,5 @@ shutil.rmtree(output_dir) -if __name__ == "__main__": __main__() +if __name__ == "__main__": + __main__() diff -r 79df147da633 -r b2d0e92f81c2 test-data/input2.fq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/input2.fq Thu Apr 22 17:05:07 2021 +0000 @@ -0,0 +1,4000 @@ +@1_1 +TTGTATATATTAGATAAATTAATTTTTTTTGTTTGTATGTTAAATTTTTTAATTAATTTATTAATATTTTGTGAATTTTTAGATA ++ +AAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAEEAEEEEEE +@1_2 +TAATTTTGAATTTTGGTTGGGTAGTTTGTTTTAGAATTATTATGGTTATATAGTTTTTTAGTAAGATTGTTATTTATTATTTGAT ++ +AAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEET: 146875 +G->A: 150504 + +Step II - Genome bisulfite conversions - completed + + +Bismark Genome Preparation - Step III: Launching the Bowtie 2 indexer +Please be aware that this process can - depending on genome size - take several hours! +Settings: + Output files: "BS_CT.*.bt2" + Line rate: 6 (line is 64 bytes) + Lines per side: 1 (side is 64 bytes) + Offset rate: 4 (one in 16) + FTable chars: 10 + Strings: unpacked + Max bucket size: default + Max bucket size, sqrt multiplier: default + Max bucket size, len divisor: 4 + Difference-cover sample period: 1024 + Endianness: little + Actual local endianness: little + Sanity checking: disabled + Assertions: disabled + Random seed: 0 + Sizeofs: void*:8, int:4, long:8, size_t:8 +Input files DNA, FASTA: + genome_mfa.CT_conversion.fa +Building a SMALL index +Reading reference sizes + Time reading reference sizes: 00:00:00 +Calculating joined length +Writing header +Reserving space for joined string +Joining reference sequences + Time to join reference sequences: 00:00:00 +bmax according to bmaxDivN setting: 189039 +Using parameters --bmax 141780 --dcv 1024 + Doing ahead-of-time memory usage test + Passed! Constructing with these parameters: --bmax 141780 --dcv 1024 +Constructing suffix-array element generator +Building DifferenceCoverSample + Building sPrime + Building sPrimeOrder + V-Sorting samples + V-Sorting samples time: 00:00:00 + Allocating rank array + Ranking v-sort output + Ranking v-sort output time: 00:00:00 + Invoking Larsson-Sadakane on ranks + Invoking Larsson-Sadakane on ranks time: 00:00:00 + Sanity-checking and returning +Building samples +Reserving space for 12 sample suffixes +Generating random suffixes +QSorting 12 sample offsets, eliminating duplicates +QSorting sample offsets, eliminating duplicates time: 00:00:00 +Multikey QSorting 12 samples + (Using difference cover) + Multikey QSorting samples time: 00:00:00 +Calculating bucket sizes +Splitting and merging + Splitting and merging time: 00:00:00 +Avg bucket size: 756159 (target: 141779) +Converting suffix-array elements to index image +Allocating ftab, absorbFtab +Entering Ebwt loop +Getting block 1 of 1 + No samples; assembling all-inclusive block + Sorting block of length 756159 for bucket 1 + (Using difference cover) + Sorting block time: 00:00:00 +Returning block of 756160 for bucket 1 +Exited Ebwt loop +fchr[A]: 0 +fchr[C]: 235897 +fchr[G]: 235897 +fchr[T]: 386401 +fchr[$]: 756159 +Exiting Ebwt::buildToDisk() +Returning from initFromVector +Wrote 4446745 bytes to primary EBWT file: BS_CT.1.bt2 +Wrote 189044 bytes to secondary EBWT file: BS_CT.2.bt2 +Re-opening _in1 and _in2 as input streams +Returning from Ebwt constructor +Headers: + len: 756159 + bwtLen: 756160 + sz: 189040 + bwtSz: 189040 + lineRate: 6 + offRate: 4 + offMask: 0xfffffff0 + ftabChars: 10 + eftabLen: 20 + eftabSz: 80 + ftabLen: 1048577 + ftabSz: 4194308 + offsLen: 47260 + offsSz: 189040 + lineSz: 64 + sideSz: 64 + sideBwtSz: 48 + sideBwtLen: 192 + numSides: 3939 + numLines: 3939 + ebwtTotLen: 252096 + ebwtTotSz: 252096 + color: 0 + reverse: 0 +Total time for call to driver() for forward index: 00:00:00 +Reading reference sizes + Time reading reference sizes: 00:00:00 +Calculating joined length +Writing header +Reserving space for joined string +Joining reference sequences + Time to join reference sequences: 00:00:00 + Time to reverse reference sequence: 00:00:00 +bmax according to bmaxDivN setting: 189039 +Using parameters --bmax 141780 --dcv 1024 + Doing ahead-of-time memory usage test + Passed! Constructing with these parameters: --bmax 141780 --dcv 1024 +Constructing suffix-array element generator +Building DifferenceCoverSample + Building sPrime + Building sPrimeOrder + V-Sorting samples + V-Sorting samples time: 00:00:00 + Allocating rank array + Ranking v-sort output + Ranking v-sort output time: 00:00:00 + Invoking Larsson-Sadakane on ranks + Invoking Larsson-Sadakane on ranks time: 00:00:00 + Sanity-checking and returning +Building samples +Reserving space for 12 sample suffixes +Generating random suffixes +QSorting 12 sample offsets, eliminating duplicates +QSorting sample offsets, eliminating duplicates time: 00:00:00 +Multikey QSorting 12 samples + (Using difference cover) + Multikey QSorting samples time: 00:00:00 +Calculating bucket sizes +Splitting and merging + Splitting and merging time: 00:00:00 +Avg bucket size: 756159 (target: 141779) +Converting suffix-array elements to index image +Allocating ftab, absorbFtab +Entering Ebwt loop +Getting block 1 of 1 + No samples; assembling all-inclusive block + Sorting block of length 756159 for bucket 1 + (Using difference cover) + Sorting block time: 00:00:00 +Returning block of 756160 for bucket 1 +Exited Ebwt loop +fchr[A]: 0 +fchr[C]: 235897 +fchr[G]: 235897 +fchr[T]: 386401 +fchr[$]: 756159 +Exiting Ebwt::buildToDisk() +Returning from initFromVector +Wrote 4446745 bytes to primary EBWT file: BS_CT.rev.1.bt2 +Wrote 189044 bytes to secondary EBWT file: BS_CT.rev.2.bt2 +Re-opening _in1 and _in2 as input streams +Returning from Ebwt constructor +Headers: + len: 756159 + bwtLen: 756160 + sz: 189040 + bwtSz: 189040 + lineRate: 6 + offRate: 4 + offMask: 0xfffffff0 + ftabChars: 10 + eftabLen: 20 + eftabSz: 80 + ftabLen: 1048577 + ftabSz: 4194308 + offsLen: 47260 + offsSz: 189040 + lineSz: 64 + sideSz: 64 + sideBwtSz: 48 + sideBwtLen: 192 + numSides: 3939 + numLines: 3939 + ebwtTotLen: 252096 + ebwtTotSz: 252096 + color: 0 + reverse: 1 +Total time for backward call to driver() for mirror index: 00:00:01 +Settings: + Output files: "BS_GA.*.bt2" + Line rate: 6 (line is 64 bytes) + Lines per side: 1 (side is 64 bytes) + Offset rate: 4 (one in 16) + FTable chars: 10 + Strings: unpacked + Max bucket size: default + Max bucket size, sqrt multiplier: default + Max bucket size, len divisor: 4 + Difference-cover sample period: 1024 + Endianness: little + Actual local endianness: little + Sanity checking: disabled + Assertions: disabled + Random seed: 0 + Sizeofs: void*:8, int:4, long:8, size_t:8 +Input files DNA, FASTA: + genome_mfa.GA_conversion.fa +Building a SMALL index +Reading reference sizes + Time reading reference sizes: 00:00:00 +Calculating joined length +Writing header +Reserving space for joined string +Joining reference sequences + Time to join reference sequences: 00:00:00 +bmax according to bmaxDivN setting: 189039 +Using parameters --bmax 141780 --dcv 1024 + Doing ahead-of-time memory usage test + Passed! Constructing with these parameters: --bmax 141780 --dcv 1024 +Constructing suffix-array element generator +Building DifferenceCoverSample + Building sPrime + Building sPrimeOrder + V-Sorting samples + V-Sorting samples time: 00:00:00 + Allocating rank array + Ranking v-sort output + Ranking v-sort output time: 00:00:00 + Invoking Larsson-Sadakane on ranks + Invoking Larsson-Sadakane on ranks time: 00:00:00 + Sanity-checking and returning +Building samples +Reserving space for 12 sample suffixes +Generating random suffixes +QSorting 12 sample offsets, eliminating duplicates +QSorting sample offsets, eliminating duplicates time: 00:00:00 +Multikey QSorting 12 samples + (Using difference cover) + Multikey QSorting samples time: 00:00:00 +Calculating bucket sizes +Splitting and merging + Splitting and merging time: 00:00:00 +Avg bucket size: 756159 (target: 141779) +Converting suffix-array elements to index image +Allocating ftab, absorbFtab +Entering Ebwt loop +Getting block 1 of 1 + No samples; assembling all-inclusive block + Sorting block of length 756159 for bucket 1 + (Using difference cover) + Sorting block time: 00:00:00 +Returning block of 756160 for bucket 1 +Exited Ebwt loop +fchr[A]: 0 +fchr[C]: 386401 +fchr[G]: 533276 +fchr[T]: 533276 +fchr[$]: 756159 +Exiting Ebwt::buildToDisk() +Returning from initFromVector +Wrote 4446745 bytes to primary EBWT file: BS_GA.1.bt2 +Wrote 189044 bytes to secondary EBWT file: BS_GA.2.bt2 +Re-opening _in1 and _in2 as input streams +Returning from Ebwt constructor +Headers: + len: 756159 + bwtLen: 756160 + sz: 189040 + bwtSz: 189040 + lineRate: 6 + offRate: 4 + offMask: 0xfffffff0 + ftabChars: 10 + eftabLen: 20 + eftabSz: 80 + ftabLen: 1048577 + ftabSz: 4194308 + offsLen: 47260 + offsSz: 189040 + lineSz: 64 + sideSz: 64 + sideBwtSz: 48 + sideBwtLen: 192 + numSides: 3939 + numLines: 3939 + ebwtTotLen: 252096 + ebwtTotSz: 252096 + color: 0 + reverse: 0 +Total time for call to driver() for forward index: 00:00:00 +Reading reference sizes + Time reading reference sizes: 00:00:00 +Calculating joined length +Writing header +Reserving space for joined string +Joining reference sequences + Time to join reference sequences: 00:00:00 + Time to reverse reference sequence: 00:00:00 +bmax according to bmaxDivN setting: 189039 +Using parameters --bmax 141780 --dcv 1024 + Doing ahead-of-time memory usage test + Passed! Constructing with these parameters: --bmax 141780 --dcv 1024 +Constructing suffix-array element generator +Building DifferenceCoverSample + Building sPrime + Building sPrimeOrder + V-Sorting samples + V-Sorting samples time: 00:00:00 + Allocating rank array + Ranking v-sort output + Ranking v-sort output time: 00:00:00 + Invoking Larsson-Sadakane on ranks + Invoking Larsson-Sadakane on ranks time: 00:00:00 + Sanity-checking and returning +Building samples +Reserving space for 12 sample suffixes +Generating random suffixes +QSorting 12 sample offsets, eliminating duplicates +QSorting sample offsets, eliminating duplicates time: 00:00:00 +Multikey QSorting 12 samples + (Using difference cover) + Multikey QSorting samples time: 00:00:00 +Calculating bucket sizes +Splitting and merging + Splitting and merging time: 00:00:00 +Avg bucket size: 756159 (target: 141779) +Converting suffix-array elements to index image +Allocating ftab, absorbFtab +Entering Ebwt loop +Getting block 1 of 1 + No samples; assembling all-inclusive block + Sorting block of length 756159 for bucket 1 + (Using difference cover) + Sorting block time: 00:00:00 +Returning block of 756160 for bucket 1 +Exited Ebwt loop +fchr[A]: 0 +fchr[C]: 386401 +fchr[G]: 533276 +fchr[T]: 533276 +fchr[$]: 756159 +Exiting Ebwt::buildToDisk() +Returning from initFromVector +Wrote 4446745 bytes to primary EBWT file: BS_GA.rev.1.bt2 +Wrote 189044 bytes to secondary EBWT file: BS_GA.rev.2.bt2 +Re-opening _in1 and _in2 as input streams +Returning from Ebwt constructor +Headers: + len: 756159 + bwtLen: 756160 + sz: 189040 + bwtSz: 189040 + lineRate: 6 + offRate: 4 + offMask: 0xfffffff0 + ftabChars: 10 + eftabLen: 20 + eftabSz: 80 + ftabLen: 1048577 + ftabSz: 4194308 + offsLen: 47260 + offsSz: 189040 + lineSz: 64 + sideSz: 64 + sideBwtSz: 48 + sideBwtLen: 192 + numSides: 3939 + numLines: 3939 + ebwtTotLen: 252096 + ebwtTotSz: 252096 + color: 0 + reverse: 1 +Total time for backward call to driver() for mirror index: 00:00:01 +Running bismark with: 'bismark --bam --temp_dir /tmp/tmp0n2gudqm -o /tmp/tmp0n2gudqm/results --quiet --gzip --fastq -L 20 -D 15 -R 2 --un --ambiguous /tmp/tmpfw7btd9x -1 input1.fq_1.fq,input2.fq_1.fq -2 input1.fq_2.fq,input2.fq_2.fq -I 0 -X 500' +Bowtie 2 seems to be working fine (tested command 'bowtie2 --version' [2.3.5]) +Output format is BAM (default) +Alignments will be written out in BAM format. Samtools found here: '/usr/local/bin/samtools' +Reference genome folder provided is /tmp/tmpfw7btd9x/ (absolute path is '/tmp/tmpfw7btd9x/)' +FastQ format specified + +Input files to be analysed (in current folder '/tmp/tmpfl_1r4y7/job_working_directory/000/17/working'): +input1.fq_1.fq +input1.fq_2.fq +input2.fq_1.fq +input2.fq_2.fq +Library is assumed to be strand-specific (directional), alignments to strands complementary to the original top or bottom strands will be ignored (i.e. not performed!) +Created output directory /tmp/tmp0n2gudqm/results/! + +Output will be written into the directory: /tmp/tmp0n2gudqm/results/ + +Using temp directory: /tmp/tmp0n2gudqm +Temporary files will be written into the directory: /tmp/tmp0n2gudqm/ +Setting parallelization to single-threaded (default) + +Summary of all aligner options: -q -L 20 -D 15 -R 2 --score-min L,0,-0.2 --ignore-quals --no-mixed --no-discordant --dovetail --minins 0 --maxins 500 --quiet +Current working directory is: /tmp/tmpfl_1r4y7/job_working_directory/000/17/working + +Now reading in and storing sequence information of the genome specified in: /tmp/tmpfw7btd9x/ + +chr chrY_JH584300_random (182347 bp) +chr chrY_JH584301_random (259875 bp) +chr chrY_JH584302_random (155838 bp) +chr chrY_JH584303_random (158099 bp) + +Single-core mode: setting pid to 1 + +Paired-end alignments will be performed +======================================= + +The provided filenames for paired-end alignments are input1.fq_1.fq and input1.fq_2.fq +Input files are in FastQ format +Writing a C -> T converted version of the input file input1.fq_1.fq to /tmp/tmp0n2gudqm/input1.fq_1.fq_C_to_T.fastq.gz + +Created C -> T converted version of the FastQ file input1.fq_1.fq (1000 sequences in total) + +Writing a G -> A converted version of the input file input1.fq_2.fq to /tmp/tmp0n2gudqm/input1.fq_2.fq_G_to_A.fastq.gz + +Created G -> A converted version of the FastQ file input1.fq_2.fq (1000 sequences in total) + +Input files are input1.fq_1.fq_C_to_T.fastq.gz and input1.fq_2.fq_G_to_A.fastq.gz (FastQ) +Now running 2 instances of Bowtie 2 against the bisulfite genome of /tmp/tmpfw7btd9x/ with the specified options: -q -L 20 -D 15 -R 2 --score-min L,0,-0.2 --ignore-quals --no-mixed --no-discordant --dovetail --minins 0 --maxins 500 --quiet + +Now starting a Bowtie 2 paired-end alignment for CTread1GAread2CTgenome (reading in sequences from /tmp/tmp0n2gudqm/input1.fq_1.fq_C_to_T.fastq.gz and /tmp/tmp0n2gudqm/input1.fq_2.fq_G_to_A.fastq.gz, with the options: -q -L 20 -D 15 -R 2 --score-min L,0,-0.2 --ignore-quals --no-mixed --no-discordant --dovetail --minins 0 --maxins 500 --quiet --norc)) +Found first alignment: +1_1/1 77 * 0 0 * * 0 0 TTGTATATATTAGATAAATTAATTTTTTTTGTTTGTATGTTAAATTTTTTAATTAATTTATTAATATTTTGTGAATTTTTAGATA AAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAEEAEEEEEE YT:Z:UP +1_1/2 141 * 0 0 * * 0 0 TTATATATATTAAATAAATTAATTTTTTTTATTTATATATTAAATTTTTTAATTAATTTATTAATATTTTATAAATTTTTAAATA AAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAEEAEEEEEE YT:Z:UP +Now starting a Bowtie 2 paired-end alignment for CTread1GAread2GAgenome (reading in sequences from /tmp/tmp0n2gudqm/input1.fq_1.fq_C_to_T.fastq.gz and /tmp/tmp0n2gudqm/input1.fq_2.fq_G_to_A.fastq.gz, with the options: -q -L 20 -D 15 -R 2 --score-min L,0,-0.2 --ignore-quals --no-mixed --no-discordant --dovetail --minins 0 --maxins 500 --quiet --nofw)) +Found first alignment: +1_1/1 77 * 0 0 * * 0 0 TTGTATATATTAGATAAATTAATTTTTTTTGTTTGTATGTTAAATTTTTTAATTAATTTATTAATATTTTGTGAATTTTTAGATA AAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAEEAEEEEEE YT:Z:UP +1_1/2 141 * 0 0 * * 0 0 TTATATATATTAAATAAATTAATTTTTTTTATTTATATATTAAATTTTTTAATTAATTTATTAATATTTTATAAATTTTTAAATA AAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAEEAEEEEEE YT:Z:UP + +>>> Writing bisulfite mapping results to input1.fq_1_bismark_bt2_pe.bam <<< + +Unmapped sequences will be written to input1.fq_1.fq_unmapped_reads_1.fq.gz and input1.fq_2.fq_unmapped_reads_2.fq.gz +Ambiguously mapping sequences will be written to input1.fq_1.fq_ambiguous_reads_1.fq.gz and input1.fq_2.fq_ambiguous_reads_2.fq.gz + +Reading in the sequence files input1.fq_1.fq and input1.fq_2.fq +Processed 1000 sequences in total + + +Successfully deleted the temporary files /tmp/tmp0n2gudqm/input1.fq_1.fq_C_to_T.fastq.gz and /tmp/tmp0n2gudqm/input1.fq_2.fq_G_to_A.fastq.gz + +Final Alignment report +====================== +Sequence pairs analysed in total: 1000 +Number of paired-end alignments with a unique best hit: 0 +Mapping efficiency: 0.0% + +Sequence pairs with no alignments under any condition: 1000 +Sequence pairs did not map uniquely: 0 +Sequence pairs which were discarded because genomic sequence could not be extracted: 0 + +Number of sequence pairs with unique best (first) alignment came from the bowtie output: +CT/GA/CT: 0 ((converted) top strand) +GA/CT/CT: 0 (complementary to (converted) top strand) +GA/CT/GA: 0 (complementary to (converted) bottom strand) +CT/GA/GA: 0 ((converted) bottom strand) + +Number of alignments to (merely theoretical) complementary strands being rejected in total: 0 + +Final Cytosine Methylation Report +================================= +Total number of C's analysed: 0 + +Total methylated C's in CpG context: 0 +Total methylated C's in CHG context: 0 +Total methylated C's in CHH context: 0 +Total methylated C's in Unknown context: 0 + +Total unmethylated C's in CpG context: 0 +Total unmethylated C's in CHG context: 0 +Total unmethylated C's in CHH context: 0 +Total unmethylated C's in Unknown context: 0 + +Can't determine percentage of methylated Cs in CpG context if value was 0 +Can't determine percentage of methylated Cs in CHG context if value was 0 +Can't determine percentage of methylated Cs in CHH context if value was 0 +Can't determine percentage of methylated Cs in unknown context (CN or CHN) if value was 0 + + +Bismark completed in 0d 0h 0m 6s + +==================== +Bismark run complete +==================== + +Single-core mode: setting pid to 1 + +Paired-end alignments will be performed +======================================= + +The provided filenames for paired-end alignments are input2.fq_1.fq and input2.fq_2.fq +Input files are in FastQ format +Writing a C -> T converted version of the input file input2.fq_1.fq to /tmp/tmp0n2gudqm/input2.fq_1.fq_C_to_T.fastq.gz + +Created C -> T converted version of the FastQ file input2.fq_1.fq (1000 sequences in total) + +Writing a G -> A converted version of the input file input2.fq_2.fq to /tmp/tmp0n2gudqm/input2.fq_2.fq_G_to_A.fastq.gz + +Created G -> A converted version of the FastQ file input2.fq_2.fq (1000 sequences in total) + +Input files are input2.fq_1.fq_C_to_T.fastq.gz and input2.fq_2.fq_G_to_A.fastq.gz (FastQ) +Now running 2 instances of Bowtie 2 against the bisulfite genome of /tmp/tmpfw7btd9x/ with the specified options: -q -L 20 -D 15 -R 2 --score-min L,0,-0.2 --ignore-quals --no-mixed --no-discordant --dovetail --minins 0 --maxins 500 --quiet + +Now starting a Bowtie 2 paired-end alignment for CTread1GAread2CTgenome (reading in sequences from /tmp/tmp0n2gudqm/input2.fq_1.fq_C_to_T.fastq.gz and /tmp/tmp0n2gudqm/input2.fq_2.fq_G_to_A.fastq.gz, with the options: -q -L 20 -D 15 -R 2 --score-min L,0,-0.2 --ignore-quals --no-mixed --no-discordant --dovetail --minins 0 --maxins 500 --quiet --norc)) +Found first alignment: +1_1/1 77 * 0 0 * * 0 0 TTGTATATATTAGATAAATTAATTTTTTTTGTTTGTATGTTAAATTTTTTAATTAATTTATTAATATTTTGTGAATTTTTAGATA AAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAEEAEEEEEE YT:Z:UP +1_1/2 141 * 0 0 * * 0 0 TTATATATATTAAATAAATTAATTTTTTTTATTTATATATTAAATTTTTTAATTAATTTATTAATATTTTATAAATTTTTAAATA AAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAEEAEEEEEE YT:Z:UP +Now starting a Bowtie 2 paired-end alignment for CTread1GAread2GAgenome (reading in sequences from /tmp/tmp0n2gudqm/input2.fq_1.fq_C_to_T.fastq.gz and /tmp/tmp0n2gudqm/input2.fq_2.fq_G_to_A.fastq.gz, with the options: -q -L 20 -D 15 -R 2 --score-min L,0,-0.2 --ignore-quals --no-mixed --no-discordant --dovetail --minins 0 --maxins 500 --quiet --nofw)) +Found first alignment: +1_1/1 77 * 0 0 * * 0 0 TTGTATATATTAGATAAATTAATTTTTTTTGTTTGTATGTTAAATTTTTTAATTAATTTATTAATATTTTGTGAATTTTTAGATA AAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAEEAEEEEEE YT:Z:UP +1_1/2 141 * 0 0 * * 0 0 TTATATATATTAAATAAATTAATTTTTTTTATTTATATATTAAATTTTTTAATTAATTTATTAATATTTTATAAATTTTTAAATA AAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAEEAEEEEEE YT:Z:UP + +>>> Writing bisulfite mapping results to input2.fq_1_bismark_bt2_pe.bam <<< + +Unmapped sequences will be written to input2.fq_1.fq_unmapped_reads_1.fq.gz and input2.fq_2.fq_unmapped_reads_2.fq.gz +Ambiguously mapping sequences will be written to input2.fq_1.fq_ambiguous_reads_1.fq.gz and input2.fq_2.fq_ambiguous_reads_2.fq.gz + +Reading in the sequence files input2.fq_1.fq and input2.fq_2.fq +Processed 1000 sequences in total + + +Successfully deleted the temporary files /tmp/tmp0n2gudqm/input2.fq_1.fq_C_to_T.fastq.gz and /tmp/tmp0n2gudqm/input2.fq_2.fq_G_to_A.fastq.gz + +Final Alignment report +====================== +Sequence pairs analysed in total: 1000 +Number of paired-end alignments with a unique best hit: 0 +Mapping efficiency: 0.0% + +Sequence pairs with no alignments under any condition: 1000 +Sequence pairs did not map uniquely: 0 +Sequence pairs which were discarded because genomic sequence could not be extracted: 0 + +Number of sequence pairs with unique best (first) alignment came from the bowtie output: +CT/GA/CT: 0 ((converted) top strand) +GA/CT/CT: 0 (complementary to (converted) top strand) +GA/CT/GA: 0 (complementary to (converted) bottom strand) +CT/GA/GA: 0 ((converted) bottom strand) + +Number of alignments to (merely theoretical) complementary strands being rejected in total: 0 + +Final Cytosine Methylation Report +================================= +Total number of C's analysed: 0 + +Total methylated C's in CpG context: 0 +Total methylated C's in CHG context: 0 +Total methylated C's in CHH context: 0 +Total methylated C's in Unknown context: 0 + +Total unmethylated C's in CpG context: 0 +Total unmethylated C's in CHG context: 0 +Total unmethylated C's in CHH context: 0 +Total unmethylated C's in Unknown context: 0 + +Can't determine percentage of methylated Cs in CpG context if value was 0 +Can't determine percentage of methylated Cs in CHG context if value was 0 +Can't determine percentage of methylated Cs in CHH context if value was 0 +Can't determine percentage of methylated Cs in unknown context (CN or CHN) if value was 0 + + +Bismark completed in 0d 0h 0m 8s + +==================== +Bismark run complete +==================== + +Merging bams with: '['samtools', 'merge', '-@', '1', '-f', '/tmp/tmp0n2gudqm/results/tmpnqe_dadr', '/tmp/tmp0n2gudqm/results/input1.fq_1_bismark_bt2_pe.bam', '/tmp/tmp0n2gudqm/results/input2.fq_1_bismark_bt2_pe.bam']'