changeset 3:45a01281f796 draft default tip

planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 6f09c69c51ec3d6bd7487f55384b97155355c456
author public-health-bioinformatics
date Mon, 04 Feb 2019 18:33:05 -0500
parents eb0701da22d1
children
files aggregate_linelisting.py aggregate_linelisting.xml test-data/test_output.csv
diffstat 3 files changed, 28 insertions(+), 9 deletions(-) [+]
line wrap: on
line diff
--- a/aggregate_linelisting.py	Thu Jan 17 19:08:33 2019 -0500
+++ b/aggregate_linelisting.py	Mon Feb 04 18:33:05 2019 -0500
@@ -160,7 +160,7 @@
         #write first member of unique sequence list to csv
         agg_lineListFile.write(comma_sep_output)
         #print sequence records in sequevar to console
-        print "\n\t\t%i SeqRecords matching Sequevar: %s" % (len(listOfSeqs), u)
+        print("\n\t\t%i SeqRecords matching Sequevar: %s" % (len(listOfSeqs), u))
 
         #to uncollapse sequevar group, print each member of the sequevar list to csv output
         '''for i in range(1,len(listOfSeqs)):
@@ -214,7 +214,7 @@
     #print column headers for sample sequences
     row4 = "Sequence Name,N,Clade,Extra Substitutions," + empty_indicesLine + "Number of Amino Acid Substitutions in Antigenic Sites,% Identity of Antigenic Site Residues\n"
     agg_lineListFile.write(row4)
-    print ("\nREFERENCE ANTIGENIC MAP: '%s' (%i amino acids)" % (record.id, len(record)))
+    print("\nREFERENCE ANTIGENIC MAP: '%s' (%i amino acids)" % (record.id, len(record)))
 
 with open(cladeDefinitionFile,'r') as cladeFile:
     """Read clade definition file and store clade names in list."""
@@ -232,14 +232,14 @@
         seqList.append(record) #add Seq to list of Sequences
 
 #print number of sequences to be processed as user check
-print "\nCOMPARING %i flu antigenic map sequences to the reference..." % len(seqList)
+print("\nCOMPARING %i flu antigenic map sequences to the reference..." % len(seqList))
 for record in seqList:
     #assign SeqRecords to province-specific dictionaries
     sort_by_location(record)
 
 #access prov segregated lists in order
 sorted_prov_keys = sorted(prov_lists.keys())
-print "\nSequence Lists Sorted by Province: "
+print("\nSequence Lists Sorted by Province: ")
 for prov in sorted_prov_keys:
     current_list = prov_lists[prov]
     #mask AA's identical to reference sequence with dot
@@ -253,7 +253,7 @@
 for prov in sorted_prov_keys:
     prov_list = prov_lists[prov]
     by_clades_dict = {} #empty dict for clade:seqRecord list groups
-    print "\n'%s' List (Amino Acids identical to Reference are Masked): " % (prov)
+    print("\n'%s' List (Amino Acids identical to Reference are Masked): " % (prov))
     for rec in prov_list:
         clade = extract_clade(rec)
         if clade in by_clades_dict:
@@ -263,13 +263,13 @@
             by_clades_dict[clade] = [rec]
     #get list of alphabetically sorted clade keys
     sorted_clade_keys = sorted(by_clades_dict.keys())
-    print "\tNumber of clades: ", len(by_clades_dict)
+    print("\tNumber of clades: ", len(by_clades_dict))
     #group each list of sequences in clade by sequevars
     for key in sorted_clade_keys:
-        print "\n\tCLADE: %s Number of Members: %i" % (key, len(by_clades_dict[key]))
+        print("\n\tCLADE: %s Number of Members: %i" % (key, len(by_clades_dict[key])))
         a_list = by_clades_dict[key]
         for seqrec in a_list:
-            print "\t %s: %s" %(seqrec.id,str(seqrec.seq))
+            print("\t %s: %s" %(seqrec.id,str(seqrec.seq)))
         #output the list to csv as aggregated linelist
         output_aggregated_linelist(a_list)
     
--- a/aggregate_linelisting.xml	Thu Jan 17 19:08:33 2019 -0500
+++ b/aggregate_linelisting.xml	Mon Feb 04 18:33:05 2019 -0500
@@ -21,7 +21,7 @@
   </outputs>
   <tests>
     <test>
-      <param name="input_fasta" value="2017_summer_Nov23_2017_antigenic_maps.fasta"/>
+      <param name="input_fasta" value="fluA_H3_clade_assigned_antigenic_sites_extracted.fasta"/>
       <param name="ref_fasta" value="MAP_3C.2a_A_Hong_Kong_4801_2014_X-263B_EGG.fasta" />
       <param name="index_array_csv" value="FluA_H3_antigenic_aa_indices.csv" />
       <param name="clade_def_csv" value="Flu_Clade_Definitions_H3_20171121.csv" />
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_output.csv	Mon Feb 04 18:33:05 2019 -0500
@@ -0,0 +1,19 @@
+
+,,,,44,45,46,47,48,50,51,53,54,57,59,62,63,67,75,78,80,81,82,83,86,87,88,91,92,94,96,102,103,109,117,121,122,124,126,128,129,130,131,132,133,135,137,138,140,142,143,144,145,146,150,152,155,156,157,158,159,160,163,164,165,167,168,170,171,172,173,174,175,176,177,179,182,186,187,188,189,190,192,193,194,196,197,198,201,203,207,208,209,212,213,214,215,216,217,218,219,226,227,228,229,230,238,240,242,244,246,247,248,260,261,262,265,273,275,276,278,279,280,294,297,299,300,304,305,307,308,309,310,311,312
+Clade_3C.2a_A/Hong_Kong/4801/2014_X-263B_EGG,,,,Q,N,S,S,I,E,I,D,S,Q,L,E,N,I,Q,G,Q,N,K,K,L,F,V,S,K,Y,S,V,P,R,T,N,N,S,N,T,G,V,T,Q,N,T,S,A,I,R,S,S,S,S,R,N,T,H,L,N,Y,K,A,L,N,T,M,N,N,E,Q,F,D,K,L,I,V,G,T,D,K,D,I,F,P,A,Q,S,R,X,K,R,S,A,V,I,P,N,I,G,S,I,P,S,R,I,K,G,I,L,N,S,T,I,R,S,S,P,G,K,K,S,E,F,V,R,I,A,C,R,Y,V,K,H,S
+Sequence Name,N,Clade,Extra Substitutions,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Number of Amino Acid Substitutions in Antigenic Sites,% Identity of Antigenic Site Residues
+A-BC-324-2017,1,3C.2a1_+_N121K_+_K92R_+_H311Q, ,.,.,.,.,M,.,.,.,.,.,.,G,.,.,.,.,.,.,.,.,.,.,.,.,R,.,N,.,.,.,.,K,.,.,.,.,.,.,.,.,.,K,.,.,.,G,.,.,.,.,.,.,.,.,.,.,.,T,.,.,.,.,.,.,K,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,L,.,.,.,.,T,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,Q,.,12,0.9083969465648855,
+A-BC-024-2018,1,3C.2a1_+_N121K_+_T135K, ,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,D,.,.,.,.,.,.,.,.,.,H,N,.,.,.,.,K,.,.,.,.,.,.,.,.,.,K,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,T,.,.,.,.,.,.,K,.,.,.,.,.,.,.,I,.,.,.,.,.,.,.,L,.,.,.,.,T,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,10,0.9236641221374046,
+A-BC-325-2017,1,3C.2a_+_N121K_+_S144K, ,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,N,.,.,.,.,K,.,.,.,.,.,.,.,.,.,K,.,.,.,.,.,K,.,.,K,.,.,.,.,.,.,T,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,L,.,.,.,.,T,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,Q,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,9,0.9312977099236641,
+A-BC-330-2017,1,No_Match, ,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,N,.,.,.,.,.,.,.,.,.,.,.,K,.,.,.,.,.,.,K,.,R,.,.,.,.,.,.,.,.,.,T,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,L,.,.,.,.,T,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,Q,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,8,0.9389312977099237,
+A-AB-308-2017,2,3C.2a1_+_N121K_+_T135K, ,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,D,.,.,.,.,.,.,.,.,.,H,N,.,.,.,.,K,D,.,.,.,.,.,.,.,.,K,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,T,.,.,.,.,.,.,K,.,.,.,.,.,.,.,I,.,.,.,.,.,.,.,L,.,.,.,.,T,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,11,0.916030534351145,
+A-AB-319-2017,2,3C.2a_+_N31S_+_D53N_+_R142G_+_S144R_+_N171K_+_I192T_+_Q197H, ,.,.,.,.,.,.,.,N,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,N,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,G,.,R,.,.,.,.,.,.,.,.,.,T,.,.,.,.,.,.,K,.,.,.,.,.,.,.,.,.,.,.,.,.,T,.,L,.,H,.,.,T,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,L,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,11,0.916030534351145,
+A-AB-400-2017,2,3C.2a_+_T131K_+_R142K_+_R261Q, ,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,R,.,N,.,.,.,.,.,.,.,.,.,.,.,K,.,.,.,.,.,.,K,.,.,.,.,.,.,.,.,.,.,.,T,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,L,.,.,.,.,T,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,Q,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,8,0.9389312977099237,
+A-AB-415-2017,1,3C.2a_+_T131K_+_R142K_+_R261Q, ,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,R,.,N,.,.,.,.,.,.,.,.,.,.,.,K,.,.,.,.,.,.,K,.,.,.,.,.,.,.,.,.,.,.,T,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,L,.,.,.,.,T,.,.,.,.,.,.,.,.,.,.,F,.,.,.,.,.,.,.,.,.,.,.,.,.,Q,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,9,0.9312977099236641,
+A-ON-003-2018,2,3C.2a_+_N121K_+_S144K, ,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,N,.,.,.,.,K,.,.,.,.,.,.,.,.,.,K,.,.,.,.,.,K,.,.,K,.,.,.,.,.,.,T,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,L,.,.,.,.,T,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,Q,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,9,0.9312977099236641,
+A-ON-314-2017,1,3C.3a, ,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,N,.,.,N,.,.,.,.,.,.,.,.,A,.,.,.,.,.,.,.,S,.,G,.,K,.,.,.,.,.,.,.,.,S,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,S,L,.,.,.,.,T,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,Q,.,11,0.916030534351145,
+A-ON-309-2017,1,No_Match, ,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,N,.,.,.,.,.,.,.,.,.,.,.,K,.,.,.,.,.,.,K,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,L,.,.,.,K,T,.,I,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,Q,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,8,0.9389312977099237,
+A-QC-315-2017,1,3C.2a1_+_N121K_+_K92R_+_H311Q, ,.,.,.,.,.,.,.,.,.,.,.,G,.,.,.,.,.,.,.,.,.,.,.,.,R,.,N,.,.,.,.,K,.,.,.,A,.,.,.,.,.,K,.,.,.,G,.,.,.,.,.,.,.,.,.,.,.,T,.,.,.,.,.,.,K,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,L,.,.,.,.,T,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,Q,.,12,0.9083969465648855,
+A-QC-309-2017,1,3C.2a1_+_N121K_+_K92R_+_H311Q, ,.,.,.,.,.,.,.,.,.,.,.,G,.,.,.,D,.,.,.,.,.,.,.,.,R,.,N,.,.,.,.,K,D,.,.,.,.,.,.,.,.,K,.,.,.,G,.,.,.,.,.,.,.,.,.,.,.,T,.,.,.,.,.,.,K,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,L,.,.,.,.,T,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,Q,.,13,0.9007633587786259,
+A-QC-303-2017,1,3C.2a_+_N31S_+_D53N_+_R142G_+_S144R_+_N171K_+_I192T_+_Q197H, ,.,.,.,.,.,.,.,N,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,N,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,G,.,R,.,.,.,.,.,.,.,.,.,T,.,.,.,.,.,.,K,.,.,.,.,.,.,.,.,.,.,.,.,.,T,.,L,.,H,.,.,T,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,L,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,11,0.916030534351145,
+A-QC-316-2017,1,3C.2a_+_T131K_+_R142K_+_R261Q, ,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,R,.,N,.,.,.,.,.,.,.,.,.,.,.,K,.,.,.,.,.,.,K,.,.,.,.,.,.,.,.,.,.,.,T,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,L,.,.,.,.,T,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,Q,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,8,0.9389312977099237,