Mercurial > repos > drosofff > msp_sr_readmap_and_size_histograms
changeset 24:bf7388df53cf draft default tip
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit 3effd45f45c37a6cdaf9b7b1da1ed4d10d3b0e38
author | drosofff |
---|---|
date | Sat, 08 Oct 2016 07:18:45 -0400 |
parents | d6b93af0da55 |
children | |
files | README.rst readmap.py readmap.xml smRtools.py smRtools.pyc test-data/Readmap_dataframe.tab test-data/Readmaps.pdf test-data/Size_distribution.pdf test-data/Size_distribution_and_Readmaps.pdf test-data/Size_distribution_dataframe.tab |
diffstat | 10 files changed, 104 insertions(+), 76 deletions(-) [+] |
line wrap: on
line diff
--- a/README.rst Sun Apr 24 09:23:44 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,2 +0,0 @@ -Starting with version 1.2.0, this tool requires a galaxy release 16.01 or newer and conda dependency resolution to be active. -
--- a/readmap.py Sun Apr 24 09:23:44 2016 -0400 +++ b/readmap.py Sat Oct 08 07:18:45 2016 -0400 @@ -52,16 +52,19 @@ biosample=fileLabel[i], size_inf=minquery, size_sup=maxquery, norm=norm) return MasterListOfGenomes -def dataframe_sanityzer (listofdatalines): - Dict = defaultdict(float) +def remove_null_entries(listofdatalines): + """ + This function removes genes that have no reads aligned. + """ + Dict = defaultdict(float) for line in listofdatalines: fields= line.split("\t") - Dict[fields[0]] += float (fields[2]) + Dict[fields[0]] += abs(float(fields[2])) filtered_list = [] for line in listofdatalines: fields= line.split("\t") if Dict[fields[0]] != 0: - filtered_list.append(line) + filtered_list.append(line) return filtered_list @@ -108,9 +111,8 @@ plottable = dict[gene].readplot() plottable = handle_start_stop_coordinates(plottable, readDict) for line in plottable: - #print >>readmap, "%s\t%s" % (line, sample) listoflines.append ("%s\t%s" % (line, sample)) - listoflines = dataframe_sanityzer(listoflines) + listoflines = remove_null_entries(listoflines) for line in listoflines: print >>readmap, line @@ -124,19 +126,15 @@ else: dict=readDict[sample].instanceDict for gene in dict.keys(): - histogram = dict[gene].size_histogram(minquery=args.minquery, maxquery=args.maxquery) + histogram = dict[gene].size_histogram(minquery=minquery, maxquery=maxquery) for polarity in histogram.keys(): if polarity=='both': continue - #for size in xrange(args.minquery, args.maxquery): - # if not size in histogram[polarity].keys(): - # histogram[size]=0 for size, count in histogram[polarity].iteritems(): - #print >>size_distrib, "%s\t%s\t%s\t%s\t%s" % (gene, size, count, polarity, sample) # test, changed the order accordingly listoflines.append ("%s\t%s\t%s\t%s\t%s" % (gene, size, count, polarity, sample) ) - listoflines = dataframe_sanityzer(listoflines) + listoflines = remove_null_entries(listoflines) for line in listoflines: - print >>size_distrib, line + print >>size_distrib, line def gff_item_subinstances(readDict, gff3): GFFinstanceDict=OrderedDict()
--- a/readmap.xml Sun Apr 24 09:23:44 2016 -0400 +++ b/readmap.xml Sat Oct 08 07:18:45 2016 -0400 @@ -4,9 +4,9 @@ <requirement type="package" version="1.0.0">bowtie</requirement> <requirement type="package" version="0.9.0">pysam</requirement> <requirement type="package" version="1.9.3">numpy</requirement> - <requirement type="package" version="1.3.0">r-optparse</requirement> - <requirement type="package" version="0.6_26">r-latticeextra</requirement> - <requirement type="package" version="2.0.0">r-gridextra</requirement> + <requirement type="package" version="1.3.0=r3.2.2_1">r-optparse</requirement> + <requirement type="package" version="0.6_26=r3.2.2_2a">r-latticeextra</requirement> + <requirement type="package" version="2.0.0=r3.2.2_0a">r-gridextra</requirement> </requirements> <command><![CDATA[ python2 $__tool_directory__/readmap.py @@ -151,9 +151,6 @@ <param name="rows_per_page" value="8" /> <output name="readmap_dataframe" ftype="tabular" file="Readmap_dataframe.tab" /> <output name="size_distribution_dataframe" ftype="tabular" file="Size_distribution_dataframe.tab" /> - <output name="readmap_PDF" ftype="pdf" file="Readmaps.pdf" /> - <output name="size_PDF" ftype="pdf" file="Size_distribution.pdf" /> - <output name="combi_PDF" ftype="pdf" file="Size_distribution_and_Readmaps.pdf" /> </test> </tests> </tool>
--- a/smRtools.py Sun Apr 24 09:23:44 2016 -0400 +++ b/smRtools.py Sat Oct 08 07:18:45 2016 -0400 @@ -142,26 +142,6 @@ self.alignedReads += 1 F.close() return self.instanceDict -# elif self.alignmentFileFormat == "sam": -# F = open (self.alignmentFile, "r") -# dict = {"0":"+", "16":"-"} -# for line in F: -# if line[0]=='@': -# continue -# fields = line.split() -# if fields[2] == "*": continue -# polarity = dict[fields[1]] -# gene = fields[2] -# offset = int(fields[3]) -# size = len (fields[9]) -# if self.size_inf: -# if (size>=self.size_inf and size<= self.size_sup): -# self.instanceDict[gene].addread (polarity, offset, size) -# self.alignedReads += 1 -# else: -# self.instanceDict[gene].addread (polarity, offset, size) -# self.alignedReads += 1 -# F.close() elif self.alignmentFileFormat == "bam" or self.alignmentFileFormat == "sam": import pysam samfile = pysam.Samfile(self.alignmentFile) @@ -184,22 +164,6 @@ self.alignedReads += 1 return self.instanceDict -# def size_histogram (self): -# size_dict={} -# size_dict['F']= defaultdict (int) -# size_dict['R']= defaultdict (int) -# size_dict['both'] = defaultdict (int) -# for item in self.instanceDict: -# buffer_dict_F = self.instanceDict[item].size_histogram()['F'] -# buffer_dict_R = self.instanceDict[item].size_histogram()['R'] -# for size in buffer_dict_F: -# size_dict['F'][size] += buffer_dict_F[size] -# for size in buffer_dict_R: -# size_dict['R'][size] -= buffer_dict_R[size] -# allSizeKeys = list (set (size_dict['F'].keys() + size_dict['R'].keys() ) ) -# for size in allSizeKeys: -# size_dict['both'][size] = size_dict['F'][size] + size_dict['R'][size] -# return size_dict def size_histogram (self): # in HandleSmRNAwindows '''refactored on 7-9-2014 to debug size_histogram tool''' size_dict={} @@ -361,24 +325,7 @@ for offset in range (min(dicsize.keys()), max(dicsize.keys())+1): dicsize[size] = dicsize.get(size, 0) # to fill offsets with null values return dicsize - -# def size_histogram(self): -# norm=self.norm -# hist_dict={} -# hist_dict['F']={} -# hist_dict['R']={} -# for offset in self.readDict: -# for size in self.readDict[offset]: -# if offset < 0: -# hist_dict['R'][size] = hist_dict['R'].get(size, 0) - 1*norm -# else: -# hist_dict['F'][size] = hist_dict['F'].get(size, 0) + 1*norm -# ## patch to avoid missing graphs when parsed by R-lattice. 27-08-2014. Test and validate ! -# if not (hist_dict['F']) and (not hist_dict['R']): -# hist_dict['F'][21] = 0 -# hist_dict['R'][21] = 0 -# ## -# return hist_dict + def size_histogram(self, minquery=None, maxquery=None): # in SmRNAwindow '''refactored on 7-9-2014 to debug size_histogram tool''' @@ -480,7 +427,6 @@ return ". | %s" % (freqDic["Trev"] / reverse_sum * 100) else: return "%s | %s" % (freqDic["Tfor"] / forward_sum * 100, freqDic["Trev"] / reverse_sum * 100) - def readplot (self): norm=self.norm
--- a/test-data/Readmap_dataframe.tab Sun Apr 24 09:23:44 2016 -0400 +++ b/test-data/Readmap_dataframe.tab Sat Oct 08 07:18:45 2016 -0400 @@ -1267,6 +1267,12 @@ FBti0020400 8755 1.0 F sample1.srbowtie_out FBti0020400 8849 3.0 F sample1.srbowtie_out FBti0020400 9398 0 F sample1.srbowtie_out +FBti0019480 0 0 F sample1.srbowtie_out +FBti0019480 417 2.0 F sample1.srbowtie_out +FBti0019480 503 -3.0 R sample1.srbowtie_out +FBti0019480 619 1.0 F sample1.srbowtie_out +FBti0019480 634 -1.0 R sample1.srbowtie_out +FBti0019480 669 0 F sample1.srbowtie_out FBti0020401 0 0 F sample2.srbowtie_out FBti0020401 142 -1.0 R sample2.srbowtie_out FBti0020401 160 1.0 F sample2.srbowtie_out @@ -2520,6 +2526,13 @@ FBti0020400 9192 1.0 F sample2.srbowtie_out FBti0020400 9333 -1.0 R sample2.srbowtie_out FBti0020400 9398 0 F sample2.srbowtie_out +FBti0019480 0 0 F sample2.srbowtie_out +FBti0019480 327 1.0 F sample2.srbowtie_out +FBti0019480 446 1.0 F sample2.srbowtie_out +FBti0019480 503 -3.0 R sample2.srbowtie_out +FBti0019480 594 1.0 F sample2.srbowtie_out +FBti0019480 619 1.0 F sample2.srbowtie_out +FBti0019480 669 0 F sample2.srbowtie_out FBti0020401 0 0 F sample3.srbowtie_out FBti0020401 250 1.0 F sample3.srbowtie_out FBti0020401 274 -1.0 R sample3.srbowtie_out @@ -3670,3 +3683,13 @@ FBti0020400 9306 -1.0 R sample3.srbowtie_out FBti0020400 9334 -1.0 R sample3.srbowtie_out FBti0020400 9398 0 F sample3.srbowtie_out +FBti0019480 0 0 F sample3.srbowtie_out +FBti0019480 23 1.0 F sample3.srbowtie_out +FBti0019480 384 1.0 F sample3.srbowtie_out +FBti0019480 493 5.0 F sample3.srbowtie_out +FBti0019480 501 -1.0 R sample3.srbowtie_out +FBti0019480 502 -7.0 R sample3.srbowtie_out +FBti0019480 503 -1.0 R sample3.srbowtie_out +FBti0019480 594 1.0 F sample3.srbowtie_out +FBti0019480 619 1.0 F sample3.srbowtie_out +FBti0019480 669 0 F sample3.srbowtie_out
--- a/test-data/Size_distribution_dataframe.tab Sun Apr 24 09:23:44 2016 -0400 +++ b/test-data/Size_distribution_dataframe.tab Sat Oct 08 07:18:45 2016 -0400 @@ -879,6 +879,28 @@ FBti0020400 28 16.0 F sample1.srbowtie_out FBti0020400 29 0.0 F sample1.srbowtie_out FBti0020400 30 0 F sample1.srbowtie_out +FBti0019480 20 0 R sample1.srbowtie_out +FBti0019480 21 0 R sample1.srbowtie_out +FBti0019480 22 -1.0 R sample1.srbowtie_out +FBti0019480 23 0 R sample1.srbowtie_out +FBti0019480 24 -2.0 R sample1.srbowtie_out +FBti0019480 25 -1.0 R sample1.srbowtie_out +FBti0019480 26 0.0 R sample1.srbowtie_out +FBti0019480 27 0 R sample1.srbowtie_out +FBti0019480 28 0 R sample1.srbowtie_out +FBti0019480 29 0 R sample1.srbowtie_out +FBti0019480 30 0 R sample1.srbowtie_out +FBti0019480 20 0 F sample1.srbowtie_out +FBti0019480 21 0 F sample1.srbowtie_out +FBti0019480 22 0.0 F sample1.srbowtie_out +FBti0019480 23 0 F sample1.srbowtie_out +FBti0019480 24 0.0 F sample1.srbowtie_out +FBti0019480 25 1.0 F sample1.srbowtie_out +FBti0019480 26 2.0 F sample1.srbowtie_out +FBti0019480 27 0 F sample1.srbowtie_out +FBti0019480 28 0 F sample1.srbowtie_out +FBti0019480 29 0 F sample1.srbowtie_out +FBti0019480 30 0 F sample1.srbowtie_out FBti0020401 20 -2.0 R sample2.srbowtie_out FBti0020401 21 0 R sample2.srbowtie_out FBti0020401 22 0.0 R sample2.srbowtie_out @@ -1759,6 +1781,28 @@ FBti0020400 28 15.0 F sample2.srbowtie_out FBti0020400 29 1.0 F sample2.srbowtie_out FBti0020400 30 0 F sample2.srbowtie_out +FBti0019480 20 0.0 R sample2.srbowtie_out +FBti0019480 21 0 R sample2.srbowtie_out +FBti0019480 22 0 R sample2.srbowtie_out +FBti0019480 23 0 R sample2.srbowtie_out +FBti0019480 24 -2.0 R sample2.srbowtie_out +FBti0019480 25 -1.0 R sample2.srbowtie_out +FBti0019480 26 0.0 R sample2.srbowtie_out +FBti0019480 27 0.0 R sample2.srbowtie_out +FBti0019480 28 0 R sample2.srbowtie_out +FBti0019480 29 0 R sample2.srbowtie_out +FBti0019480 30 0 R sample2.srbowtie_out +FBti0019480 20 1.0 F sample2.srbowtie_out +FBti0019480 21 0 F sample2.srbowtie_out +FBti0019480 22 0 F sample2.srbowtie_out +FBti0019480 23 0 F sample2.srbowtie_out +FBti0019480 24 1.0 F sample2.srbowtie_out +FBti0019480 25 0.0 F sample2.srbowtie_out +FBti0019480 26 1.0 F sample2.srbowtie_out +FBti0019480 27 1.0 F sample2.srbowtie_out +FBti0019480 28 0 F sample2.srbowtie_out +FBti0019480 29 0 F sample2.srbowtie_out +FBti0019480 30 0 F sample2.srbowtie_out FBti0020401 20 -1.0 R sample3.srbowtie_out FBti0020401 21 0.0 R sample3.srbowtie_out FBti0020401 22 -1.0 R sample3.srbowtie_out @@ -2639,3 +2683,25 @@ FBti0020400 28 12.0 F sample3.srbowtie_out FBti0020400 29 0 F sample3.srbowtie_out FBti0020400 30 0 F sample3.srbowtie_out +FBti0019480 20 0 R sample3.srbowtie_out +FBti0019480 21 -1.0 R sample3.srbowtie_out +FBti0019480 22 0.0 R sample3.srbowtie_out +FBti0019480 23 -2.0 R sample3.srbowtie_out +FBti0019480 24 -5.0 R sample3.srbowtie_out +FBti0019480 25 -1.0 R sample3.srbowtie_out +FBti0019480 26 0.0 R sample3.srbowtie_out +FBti0019480 27 0.0 R sample3.srbowtie_out +FBti0019480 28 0 R sample3.srbowtie_out +FBti0019480 29 0 R sample3.srbowtie_out +FBti0019480 30 0 R sample3.srbowtie_out +FBti0019480 20 0 F sample3.srbowtie_out +FBti0019480 21 0.0 F sample3.srbowtie_out +FBti0019480 22 1.0 F sample3.srbowtie_out +FBti0019480 23 0.0 F sample3.srbowtie_out +FBti0019480 24 3.0 F sample3.srbowtie_out +FBti0019480 25 2.0 F sample3.srbowtie_out +FBti0019480 26 2.0 F sample3.srbowtie_out +FBti0019480 27 1.0 F sample3.srbowtie_out +FBti0019480 28 0 F sample3.srbowtie_out +FBti0019480 29 0 F sample3.srbowtie_out +FBti0019480 30 0 F sample3.srbowtie_out