# HG changeset patch
# User bgruening
# Date 1621973509 0
# Node ID 4522bc2f7ccaefbd9fa104ad1dea4d2d6a37ed7f
# Parent 5afc675c2d1c7f1d2f23a4ec78a555ec1130b80d
"planemo upload for repository https://bionanogenomics.com/support/software-downloads/ commit a3d75aba3a21d88adb3706fbcefcaed4fbcb80fe"
diff -r 5afc675c2d1c -r 4522bc2f7cca bionano_scaffold.xml
--- a/bionano_scaffold.xml Sun May 23 17:21:47 2021 +0000
+++ b/bionano_scaffold.xml Tue May 25 20:11:49 2021 +0000
@@ -7,6 +7,8 @@
total_contigs_raw.fasta
+ #if $trim_cut_sites
+ && python '$__tool_directory__/remove_fake_cut_sites.py' 'total_contigs_raw.fasta' 'total_contigs_trimmed.fasta' 'output.log'
+ #end if
]]>
-
+
@@ -384,6 +388,7 @@
+
-
-
-
+
+ trim_cut_sites == False
+
+
+ trim_cut_sites
+
+
+ trim_cut_sites
+
zip_file
-
+
@@ -434,13 +445,12 @@
-
-
+
+
-
+
-
-
@@ -500,7 +509,7 @@
-
+
@@ -510,15 +519,14 @@
-
+
+
-
-
@@ -542,7 +550,7 @@
-
+
@@ -552,15 +560,14 @@
-
+
+
-
-
@@ -583,6 +590,43 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
3.6.1
- galaxy0
+ galaxy1
Bionano Genomics has agreed to provide the licensed Bionano Solve
software to enable the VGP to package the software in a container.
@@ -23,7 +23,7 @@
- bionanodocker/bionano-docker-scaffold:latest
+ quay.io/galaxy/bionano-docker-scaffold:1.6.01-bio
diff -r 5afc675c2d1c -r 4522bc2f7cca remove_fake_cut_sites.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/remove_fake_cut_sites.py Tue May 25 20:11:49 2021 +0000
@@ -0,0 +1,84 @@
+import re
+import sys
+
+from Bio import SeqIO
+from Bio.Seq import Seq
+
+
+def main():
+
+ fasta_file = sys.argv[1]
+ output_file = sys.argv[2]
+ log_file = sys.argv[3]
+
+ output_handle = open(output_file, "w")
+ log_handle = open(log_file, "w")
+
+ with open(fasta_file, "r") as fasta_input_handle:
+ for record in SeqIO.parse(fasta_input_handle, "fasta"):
+
+ change_count = 0
+ cut_sites = [
+ Seq("CTTAAG"),
+ Seq("CTTCTCG"),
+ Seq("GCTCTTC"),
+ Seq("CCTCAGC"),
+ Seq("GAATGC"),
+ Seq("GCAATG"),
+ Seq("ATCGAT"),
+ Seq("CACGAG"),
+ ]
+
+ for cut_site in cut_sites:
+ cut_site_both_orientations = (cut_site, cut_site.reverse_complement())
+
+ for cut_site_for_orientation in cut_site_both_orientations:
+
+ n_flank_length = 1
+ search_pattern = (
+ "N" * n_flank_length
+ + str(cut_site_for_orientation)
+ + "N" * n_flank_length
+ )
+ replacement = "N" * (
+ n_flank_length * 2 + len(cut_site_for_orientation)
+ )
+
+ (new_string, changes) = re.subn(
+ search_pattern,
+ replacement,
+ str(record.seq.upper()),
+ flags=re.IGNORECASE,
+ )
+ change_count += changes
+
+ record.seq = Seq(new_string)
+
+ if change_count > 0:
+ log_handle.write(
+ " ".join([record.id, ":", str(change_count), "changes\n"])
+ )
+ SeqIO.write([record], output_handle, "fasta")
+
+ # Finally, count the matches
+ possible_fake_cut_sites = re.findall(
+ "N[^N]{1,10}N", str(record.seq.upper())
+ )
+ if len(possible_fake_cut_sites) > 0:
+ log_handle.write(
+ " ".join(
+ [
+ record.id,
+ ":",
+ str(len(possible_fake_cut_sites)),
+ "possible non-standard fake cut sites\n",
+ ]
+ )
+ )
+
+ output_handle.close()
+ log_handle.close()
+
+
+if __name__ == "__main__":
+ main()
diff -r 5afc675c2d1c -r 4522bc2f7cca test-data/test_05_report.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_05_report.txt Tue May 25 20:11:49 2021 +0000
@@ -0,0 +1,45 @@
+Original BioNano Genome Map statistics:
+Count = 2
+Min length (Mbp) = 0.720
+Median length (Mbp) = 2.313
+Mean length (Mbp) = 2.313
+N50 length (Mbp) = 3.906
+Max length (Mbp) = 3.906
+Total length (Mbp) = 4.625
+
+Original NGS sequences statistics:
+Count = 1
+Min length (Mbp) = 4.753
+Median length (Mbp) = 4.753
+Mean length (Mbp) = 4.753
+N50 length (Mbp) = 4.753
+Max length (Mbp) = 4.753
+Total length (Mbp) = 4.753
+
+NGS FASTA sequence in hybrid scaffold statistics:
+Count = 1
+Min length (Mbp) = 4.753
+Median length (Mbp) = 4.753
+Mean length (Mbp) = 4.753
+N50 length (Mbp) = 4.753
+Max length (Mbp) = 4.753
+Total length (Mbp) = 4.753
+
+Hybrid scaffold FASTA statistics:
+Count = 1
+Min length (Mbp) = 4.753
+Median length (Mbp) = 4.753
+Mean length (Mbp) = 4.753
+N50 length (Mbp) = 4.753
+Max length (Mbp) = 4.753
+Total length (Mbp) = 4.753
+
+Hybrid scaffold FASTA plus not scaffolded NGS FASTA statistics:
+Count = 1
+Min length (Mbp) = 4.753
+Median length (Mbp) = 4.753
+Mean length (Mbp) = 4.753
+N50 length (Mbp) = 4.753
+Max length (Mbp) = 4.753
+Total length (Mbp) = 4.753
+