diff utils.py @ 31:d8049deb0c97 draft

planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit faeedda55e23f1197bc454d3db2d52af29d786e8-dirty
author yating-l
date Fri, 17 Mar 2017 12:28:32 -0400
parents a4a54b925c73
children 3e5b6f6a9abe
line wrap: on
line diff
--- a/utils.py	Wed Mar 15 11:46:38 2017 -0400
+++ b/utils.py	Fri Mar 17 12:28:32 2017 -0400
@@ -1,21 +1,21 @@
 #!/usr/bin/env python
 
-'''
+"""
 This file include common used functions for converting file format to gff3
-'''
+"""
 from collections import OrderedDict
 import json
 import subprocess
 import os
 import tempfile
-
+import string
 
 def write_features(field, attribute, gff3):
-    '''
+    """
     The function write the features to gff3 format (defined in https://github.com/The-Sequence-Ontology/Specifications/blob/master/gff3.md)
     field, attribute are ordered dictionary 
     gff3 is the file handler
-    '''
+    """
     attr = []
     for v in field.values():
         gff3.write(str(v) + '\t')
@@ -47,10 +47,10 @@
     return chrom_sizes
 
 def sequence_region(chrom_sizes):
-    '''
+    """
     This function read from a chromatin size file generated by twoBitInfo and write the information to dict
     return a dict
-    '''
+    """
     f = open(chrom_sizes, 'r')
     sizes = f.readlines()
     sizes_dict = {}
@@ -76,11 +76,11 @@
         num = num + 1
 
 def add_tracks_to_json(trackList_json, new_tracks, modify_type):
-    '''
+    """
     Add to track configuration (trackList.json)
     # modify_type =  'add_tracks': add a new track like bam or bigwig, new_track = dict()
     # modify_type = 'add_attr': add configuration to the existing track, new_track = dict(track_name: dict())
-    '''
+    """
     with open(trackList_json, 'r+') as f:
         data = json.load(f)
         if modify_type == 'add_tracks':
@@ -98,9 +98,9 @@
         f.close()
 
 def gtfToGff3(gtf_file, gff3_file, chrom_sizes):
-    '''
+    """
     Covert gtf file output from StringTie to gff3 format
-    '''
+    """
     gff3 = open(gff3_file, 'w')
     gff3.write("##gff-version 3\n")
     sizes_dict = sequence_region(chrom_sizes)
@@ -144,12 +144,15 @@
             write_features(field, attribute, gff3)
     gff3.close()
 
-def sanitize_name_path(input_path):
-    '''
+
+def sanitize_name(input_name):
+    """
     Galaxy will name all the files and dirs as *.dat, 
-    the function is simply replacing '.' to '_' for the dirs
-    '''
-    return input_path.replace('.', '_')
+    the function can replace '.' to '_' for the dirs
+    """
+    validChars = "_-%s%s" % (string.ascii_letters, string.digits)
+    sanitized_name = ''.join([c if c in validChars else '_' for c in input_name])
+    return "gonramp_" + sanitized_name
 
 def createBamIndex(bamfile):
     p = subprocess.Popen(['samtools', 'index', bamfile])