Mercurial > repos > bgruening > nn_classifier
comparison nn_classifier.xml @ 18:c64f57fe1b97 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 5d71c93a3dd804b1469852240a86021ab9130364
author | bgruening |
---|---|
date | Mon, 09 Jul 2018 14:26:44 -0400 |
parents | 25a68adb2ade |
children | fa36c40c2990 |
comparison
equal
deleted
inserted
replaced
17:9e0d360e54ea | 18:c64f57fe1b97 |
---|---|
19 import numpy as np | 19 import numpy as np |
20 import sklearn.neighbors | 20 import sklearn.neighbors |
21 import pandas | 21 import pandas |
22 import pickle | 22 import pickle |
23 | 23 |
24 @COLUMNS_FUNCTION@ | |
25 @GET_X_y_FUNCTION@ | |
26 | |
24 input_json_path = sys.argv[1] | 27 input_json_path = sys.argv[1] |
25 params = json.load(open(input_json_path, "r")) | 28 params = json.load(open(input_json_path, "r")) |
26 | 29 |
27 | 30 |
28 #if $selected_tasks.selected_task == "load": | 31 #if $selected_tasks.selected_task == "load": |
29 | 32 |
30 classifier_object = pickle.load(open("$infile_model", 'r')) | 33 classifier_object = pickle.load(open("$infile_model", 'r')) |
31 | 34 |
32 data = pandas.read_csv("$selected_tasks.infile_data", sep='\t', header=0, index_col=None, parse_dates=True, encoding=None, tupleize_cols=False ) | 35 header = 'infer' if params["selected_tasks"]["header"] else None |
36 data = pandas.read_csv("$selected_tasks.infile_data", sep='\t', header=header, index_col=None, parse_dates=True, encoding=None, tupleize_cols=False) | |
33 prediction = classifier_object.predict(data) | 37 prediction = classifier_object.predict(data) |
34 prediction_df = pandas.DataFrame(prediction) | 38 prediction_df = pandas.DataFrame(prediction) |
35 res = pandas.concat([data, prediction_df], axis=1) | 39 res = pandas.concat([data, prediction_df], axis=1) |
36 res.to_csv(path_or_buf = "$outfile_predict", sep="\t", index=False) | 40 res.to_csv(path_or_buf = "$outfile_predict", sep="\t", index=False) |
37 | 41 |
38 #else: | 42 #else: |
39 | 43 |
40 data_train = pandas.read_csv("$selected_tasks.infile_train", sep='\t', header=0, index_col=None, parse_dates=True, encoding=None, tupleize_cols=False ) | 44 X, y = get_X_y(params, "$selected_tasks.selected_algorithms.input_options.infile1" ,"$selected_tasks.selected_algorithms.input_options.infile2") |
41 | |
42 data = data_train.ix[:,0:len(data_train.columns)-1] | |
43 labels = np.array(data_train[data_train.columns[len(data_train.columns)-1]]) | |
44 | 45 |
45 selected_algorithm = params["selected_tasks"]["selected_algorithms"]["selected_algorithm"] | 46 selected_algorithm = params["selected_tasks"]["selected_algorithms"]["selected_algorithm"] |
46 | 47 |
47 if selected_algorithm == "nneighbors": | 48 if selected_algorithm == "nneighbors": |
48 classifier = params["selected_tasks"]["selected_algorithms"]["sampling_methods"]["sampling_method"] | 49 classifier = params["selected_tasks"]["selected_algorithms"]["sampling_methods"]["sampling_method"] |
53 options = params["selected_tasks"]["selected_algorithms"]["options"] | 54 options = params["selected_tasks"]["selected_algorithms"]["options"] |
54 classifier = "NearestCentroid" | 55 classifier = "NearestCentroid" |
55 | 56 |
56 my_class = getattr(sklearn.neighbors, classifier) | 57 my_class = getattr(sklearn.neighbors, classifier) |
57 classifier_object = my_class(**options) | 58 classifier_object = my_class(**options) |
58 classifier_object.fit(data,labels) | 59 classifier_object.fit(X, y) |
59 | 60 |
60 pickle.dump(classifier_object,open("$outfile_fit", 'w+')) | 61 pickle.dump(classifier_object,open("$outfile_fit", 'w+')) |
61 | 62 |
62 #end if | 63 #end if |
63 | 64 |
64 ]]> | 65 ]]> |
65 </configfile> | 66 </configfile> |
66 </configfiles> | 67 </configfiles> |
67 <inputs> | 68 <inputs> |
68 <expand macro="train_loadConditional" model="zip"><!--Todo: add sparse to targets--> | 69 <expand macro="sl_Conditional" model="zip"><!--Todo: add sparse to targets--> |
69 <param name="selected_algorithm" type="select" label="Classifier type"> | 70 <param name="selected_algorithm" type="select" label="Classifier type"> |
70 <option value="nneighbors">Nearest Neighbors</option> | 71 <option value="nneighbors">Nearest Neighbors</option> |
71 <option value="ncentroid">Nearest Centroid</option> | 72 <option value="ncentroid">Nearest Centroid</option> |
72 </param> | 73 </param> |
73 <when value="nneighbors"> | 74 <when value="nneighbors"> |
75 <expand macro="sl_mixed_input"/> | |
74 <conditional name="sampling_methods"> | 76 <conditional name="sampling_methods"> |
75 <param name="sampling_method" type="select" label="Neighbor selection method"> | 77 <param name="sampling_method" type="select" label="Neighbor selection method"> |
76 <option value="KNeighborsClassifier" selected="true">K-nearest neighbors</option> | 78 <option value="KNeighborsClassifier" selected="true">K-nearest neighbors</option> |
77 <option value="RadiusNeighborsClassifier">Radius-based</option> | 79 <option value="RadiusNeighborsClassifier">Radius-based</option> |
78 </param> | 80 </param> |
88 </expand> | 90 </expand> |
89 </when> | 91 </when> |
90 </conditional> | 92 </conditional> |
91 </when> | 93 </when> |
92 <when value="ncentroid"> | 94 <when value="ncentroid"> |
95 <expand macro="sl_mixed_input"/> | |
93 <section name="options" title="Advanced Options" expanded="False"> | 96 <section name="options" title="Advanced Options" expanded="False"> |
94 <param argument="metric" type="text" optional="true" value="euclidean" label="Metric" | 97 <param argument="metric" type="text" optional="true" value="euclidean" label="Metric" |
95 help="The metric to use when calculating distance between instances in a feature array."/> | 98 help="The metric to use when calculating distance between instances in a feature array."/> |
96 <param argument="shrink_threshold" type="float" optional="true" value="" label="Shrink threshold" | 99 <param argument="shrink_threshold" type="float" optional="true" value="" label="Shrink threshold" |
97 help="Floating point number for shrinking centroids to remove features."/> | 100 help="Floating point number for shrinking centroids to remove features."/> |
102 | 105 |
103 <expand macro="output"/> | 106 <expand macro="output"/> |
104 | 107 |
105 <tests> | 108 <tests> |
106 <test> | 109 <test> |
107 <param name="infile_train" value="train_set.tabular" ftype="tabular"/> | 110 <param name="infile1" value="train_set.tabular" ftype="tabular"/> |
111 <param name="infile2" value="train_set.tabular" ftype="tabular"/> | |
112 <param name="header1" value="True"/> | |
113 <param name="header2" value="True"/> | |
114 <param name="col1" value="1,2,3,4"/> | |
115 <param name="col2" value="5"/> | |
108 <param name="selected_task" value="train"/> | 116 <param name="selected_task" value="train"/> |
109 <param name="selected_algorithm" value="nneighbors"/> | 117 <param name="selected_algorithm" value="nneighbors"/> |
110 <param name="sampling_method" value="KNeighborsClassifier" /> | 118 <param name="sampling_method" value="KNeighborsClassifier" /> |
111 <param name="algorithm" value="brute" /> | 119 <param name="algorithm" value="brute" /> |
112 <output name="outfile_fit" file="nn_model01.txt"/> | 120 <output name="outfile_fit" file="nn_model01.txt"/> |
113 </test> | 121 </test> |
114 <test> | 122 <test> |
115 <param name="infile_train" value="train_set.tabular" ftype="tabular"/> | 123 <param name="infile1" value="train_set.tabular" ftype="tabular"/> |
124 <param name="infile2" value="train_set.tabular" ftype="tabular"/> | |
125 <param name="header1" value="True"/> | |
126 <param name="header2" value="True"/> | |
127 <param name="col1" value="1,2,3,4"/> | |
128 <param name="col2" value="5"/> | |
116 <param name="selected_task" value="train"/> | 129 <param name="selected_task" value="train"/> |
117 <param name="selected_algorithm" value=""/> | 130 <param name="selected_algorithm" value=""/> |
118 <param name="selected_algorithm" value="nneighbors"/> | 131 <param name="selected_algorithm" value="nneighbors"/> |
119 <param name="sampling_method" value="RadiusNeighborsClassifier" /> | 132 <param name="sampling_method" value="RadiusNeighborsClassifier" /> |
120 <output name="outfile_fit" file="nn_model02.txt"/> | 133 <output name="outfile_fit" file="nn_model02.txt"/> |
121 </test> | 134 </test> |
122 <test> | 135 <test> |
123 <param name="infile_train" value="train_set.tabular" ftype="tabular"/> | 136 <param name="infile1" value="train_set.tabular" ftype="tabular"/> |
137 <param name="infile2" value="train_set.tabular" ftype="tabular"/> | |
138 <param name="header1" value="True"/> | |
139 <param name="header2" value="True"/> | |
140 <param name="col1" value="1,2,3,4"/> | |
141 <param name="col2" value="5"/> | |
124 <param name="selected_task" value="train"/> | 142 <param name="selected_task" value="train"/> |
125 <param name="selected_algorithm" value="ncentroid"/> | 143 <param name="selected_algorithm" value="ncentroid"/> |
126 <output name="outfile_fit" file="nn_model03.txt"/> | 144 <output name="outfile_fit" file="nn_model03.txt"/> |
127 </test> | 145 </test> |
128 <test> | 146 <test> |
129 <param name="infile_model" value="nn_model01.txt" ftype="txt"/> | 147 <param name="infile_model" value="nn_model01.txt" ftype="txt"/> |
130 <param name="infile_data" value="test_set.tabular" ftype="tabular"/> | 148 <param name="infile_data" value="test_set.tabular" ftype="tabular"/> |
149 <param name="header" value="True"/> | |
131 <param name="selected_task" value="load"/> | 150 <param name="selected_task" value="load"/> |
132 <output name="outfile_predict" file="nn_prediction_result01.tabular"/> | 151 <output name="outfile_predict" file="nn_prediction_result01.tabular"/> |
133 </test> | 152 </test> |
134 <test> | 153 <test> |
135 <param name="infile_model" value="nn_model02.txt" ftype="txt"/> | 154 <param name="infile_model" value="nn_model02.txt" ftype="txt"/> |
136 <param name="infile_data" value="test_set.tabular" ftype="tabular"/> | 155 <param name="infile_data" value="test_set.tabular" ftype="tabular"/> |
156 <param name="header" value="True"/> | |
137 <param name="selected_task" value="load"/> | 157 <param name="selected_task" value="load"/> |
138 <output name="outfile_predict" file="nn_prediction_result02.tabular"/> | 158 <output name="outfile_predict" file="nn_prediction_result02.tabular"/> |
139 </test> | 159 </test> |
140 <test> | 160 <test> |
141 <param name="infile_model" value="nn_model03.txt" ftype="txt"/> | 161 <param name="infile_model" value="nn_model03.txt" ftype="txt"/> |
142 <param name="infile_data" value="test_set.tabular" ftype="tabular"/> | 162 <param name="infile_data" value="test_set.tabular" ftype="tabular"/> |
163 <param name="header" value="True"/> | |
143 <param name="selected_task" value="load"/> | 164 <param name="selected_task" value="load"/> |
144 <output name="outfile_predict" file="nn_prediction_result03.tabular"/> | 165 <output name="outfile_predict" file="nn_prediction_result03.tabular"/> |
145 </test> | 166 </test> |
146 </tests> | 167 </tests> |
147 <help><![CDATA[ | 168 <help><![CDATA[ |