jbrowsearchivecreator: util/subtools.py comparison

comparison util/subtools.py @ 39:4a69515eed63 draft

planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 91271a6c0d39c923f0d460b2979247baa297286b-dirty

author	yating-l
date	Fri, 13 Apr 2018 18:21:35 -0400
parents	d17f629f5486
children	061da5d3a219

comparison

equal deleted inserted replaced

-:d17f629f5486
+:4a69515eed63
 if os.path.exists(filename):
 return filename
 else:
 raise ValueError('Did not find fai file')
+def generate_indexed_refseq_track(fastaFile, referenceName, outputFolder):
+faiFile = createFastaIndex(fastaFile)
+refSeqFile = os.path.join(outputFolder, referenceName)
+refSeqIndexFile = os.path.join(outputFolder, referenceName+'.fai')
+shutil.copy(fastaFile, refSeqFile)
+shutil.copy(faiFile, refSeqIndexFile)
+def remove_gene_lines(gff3_file, gff3_filtered):
+with open(gff3_file, 'r') as f:
+with open(gff3_filtered, 'w') as out:
+for line in f:
+if not line.startswith('#'):
+feature_type = line.split('\t')[2].rstrip()
+if feature_type == 'transcript' or feature_type == 'mRNA':
+arr = line.split('\t')
+# as we remove the gene features, we should also remove the Parent attribute (gene id) from the transcript
+arr[8] = ';'.join([item for item in arr[8].split(';') if 'Parent=' not in item]).rstrip()
+line = '\t'.join(arr) + '\n'
+if feature_type == 'gene':
+continue
+out.write(line)
 def gff3sort(inputFile, outputFile, precise=False):
-array_call = ['gff3sort.pl', inputFile, '>', outputFile]
+array_call = ['gff3sort.pl', inputFile]
 if precise:
 array_call.append('--precise')
-p = _handleExceptionAndCheckCall(array_call)
+p = _handleExceptionAndCheckCall(array_call, stdout=outputFile)
 return p
 def bedSort(inputFile, outputFile):
-array_call = ['sort', '-k1,1', '-k2,2n', '-k6,6', inputFile, '>', outputFile]
+array_call = ['sort', '-k1,1', '-k2,2n', '-k6,6', inputFile]
-p = _handleExceptionAndCheckCall(array_call)
+p = _handleExceptionAndCheckCall(array_call, stdout=outputFile)
 return p
 def bgzip(inputFile):
 subprocess.call(['bgzip', inputFile])
 filename = inputFile + '.gz'
 if os.path.exists(filename):
 return filename
 else:
 raise ValueError('Did not find tbi file')
-def generate_tabix_indexed_track(inputFile, dataType, outputFolder):
+def generate_tabix_indexed_track(inputFile, dataType, trackName, outputFolder):
 if "bed" in dataType:
 fileType = 'bed'
 sortedFile = tempfile.NamedTemporaryFile(bufsize=0)
-bedSort(inputFile, sortedFile.name)
+bedSort(inputFile, sortedFile)
 elif "gff" in dataType:
 fileType = 'gff'
+filteredFile = tempfile.NamedTemporaryFile(bufsize=0)
+remove_gene_lines(inputFile, filteredFile.name)
 sortedFile = tempfile.NamedTemporaryFile(bufsize=0)
-gff3sort(inputFile, sortedFile.name)
+gff3sort(filteredFile.name, sortedFile)
-compressedFile = bgzip(sortedFile)
+# add .gff3.gz extension to Tabix GFF3 files, in order to enable creating name index with generate-names.pl
+trackName = trackName + '.gff3.gz'
+compressedFile = bgzip(sortedFile.name)
 tabixFile = createTabix(compressedFile, fileType)
-trackPath = os.path.join(outputFolder, inputFile)
+trackPath = os.path.join(outputFolder, trackName)
-trackIndexPath = os.path.join(outputFolder, inputFile+'.tbi')
+trackIndexPath = os.path.join(outputFolder, trackName+'.tbi')
 shutil.copy(compressedFile, trackPath)
 shutil.copy(tabixFile, trackIndexPath)
 def flatfile_to_json(inputFile, dataType, trackType, trackLabel, outputFolder, options=None, compress=True):
 if "bed" in dataType:
 track_json = json.dumps(track_json)
 new_track = subprocess.Popen(['echo', track_json], stdout=subprocess.PIPE)
 p = subprocess.call(['add-track-json.pl', trackList], stdin=new_track.stdout)
 return p
-def prepare_refseqs(fasta_file_name, outputFolder):
+def prepare_refseqs(fastaFile, outputFolder):
-array_call = ['prepare-refseqs.pl', '--fasta', fasta_file_name, '--out', outputFolder]
+#array_call = ['prepare-refseqs.pl', '--fasta', fasta_file_name, '--out', outputFolder]
+createFastaIndex(fastaFile)
+array_call = ['prepare-refseqs.pl', '--indexed_fasta', fastaFile, '--out', outputFolder]
 p = _handleExceptionAndCheckCall(array_call)
 return p
-def generate_names(outputFolder):
+def generate_names(outputFolder, hashBits=4):
-array_call = ['generate-names.pl', '-v', '--out', outputFolder]
+array_call = ['generate-names.pl', '--hashBits', '4', '-v', '--out', outputFolder]
 p = _handleExceptionAndCheckCall(array_call)
 return p
 def validateFiles(input_file, chrom_sizes_file_name, file_type, options=None):
 """

Mercurial > repos > yating-l > jbrowsearchivecreator

comparison util/subtools.py @ 39:4a69515eed63 draft