library.py 11 KB
Newer Older
HippoHoppy's avatar
HippoHoppy committed
1 2 3

def file_url(input_dict):
    from discomll import dataset
romanorac's avatar
romanorac committed
4
    
HippoHoppy's avatar
HippoHoppy committed
5
    if input_dict["range"] == "true":
romanorac's avatar
romanorac committed
6 7 8 9 10 11 12 13
        urls = [url.strip() for url in input_dict["url"].split("\n") if url != ""]
    else:
        urls = [[url.strip()] for url in input_dict["url"].split("\n") if url != ""] 
        for url in urls:
            if url[0].split("://")[0] == "https":
                raise Exception("Dataset should be accessible over HTTP.")
    del(input_dict["url"])

HippoHoppy's avatar
sdfds  
HippoHoppy committed
14

HippoHoppy's avatar
HippoHoppy committed
15 16 17 18 19 20 21 22 23 24 25
    X_indices_splited = input_dict["X_indices"].replace(" ","").split("-")
    if len(X_indices_splited) == 2:
        a, b = X_indices_splited
        if not a.isdigit() or not b.isdigit():
            raise Exception("Feature indices should be integers. Example: 1-10")
        X_indices = range(int(a), int(b))
    else:
        X_indices = [int(v) for v in input_dict["X_indices"].replace(" ","").split(",") if v != ""]
    del(input_dict["X_indices"])

    input_dict["data_type"] = "gzip" if input_dict["data_type"] == "true" else ""
romanorac's avatar
romanorac committed
26

romanorac's avatar
romanorac committed
27
    if input_dict["atr_meta"] == "numeric":
romanorac's avatar
romanorac committed
28
        X_meta = ["c" for i in range(len(X_indices))]
romanorac's avatar
romanorac committed
29
    elif input_dict["atr_meta"] == "discrete":
romanorac's avatar
romanorac committed
30 31
        X_meta = ["d" for i in range(len(X_indices))]
    else:
romanorac's avatar
romanorac committed
32
        X_meta = input_dict["custom"]        
romanorac's avatar
romanorac committed
33

HippoHoppy's avatar
sdfds  
HippoHoppy committed
34
    data = dataset.Data(data_tag = urls,
HippoHoppy's avatar
HippoHoppy committed
35
                            X_indices = X_indices,
romanorac's avatar
romanorac committed
36 37
                            X_meta = X_meta,
                            generate_urls = True if input_dict["range"] == "true" else False,
HippoHoppy's avatar
HippoHoppy committed
38
                            **input_dict)
romanorac's avatar
romanorac committed
39
    
HippoHoppy's avatar
HippoHoppy committed
40 41
    return {"dataset" : data}

romanorac's avatar
romanorac committed
42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71
def big_data_apply_classifier(input_dict):
    
    if "naivebayes_fitmodel" in input_dict["fitmodel_url"]:
        return naivebayes_predict(input_dict)
    elif "logreg_fitmodel" in input_dict["fitmodel_url"]:
        return logreg_predict(input_dict)
    elif "linsvm_fitmodel" in input_dict["fitmodel_url"]:
        return linsvm_predict(input_dict)
    elif "kmeans_fitmodel" in input_dict["fitmodel_url"]:
        return kmeans_predict(input_dict)
    elif "dt_fitmodel" in input_dict["fitmodel_url"]:
        return dt_predict(input_dict)
    elif "rf_fitmodel" in input_dict["fitmodel_url"]:
        return rf_predict(input_dict)
    elif "wrf_fitmodel" in input_dict["fitmodel_url"]:
        return wrf_predict(input_dict)
    elif "linreg_fitmodel" in input_dict["fitmodel_url"]:
        return linreg_predict(input_dict)

def lwlr_fit_predict(input_dict):
    from discomll.regression import locally_weighted_linear_regression

    predictions_url = locally_weighted_linear_regression.fit_predict(
                                                    fitting_data = input_dict["fitting_dataset"],
                                                    training_data = input_dict["training_dataset"],
                                                    tau = input_dict["tau"],
                                                    save_results = True)
    return {"string": predictions_url}

def dt_fit(input_dict):
hiphop's avatar
hiphop committed
72
    from discomll.ensemble import forest_distributed_decision_trees
romanorac's avatar
romanorac committed
73
    
hiphop's avatar
hiphop committed
74 75
    random_state = None if input_dict["seed"] == "None" else int(input_dict["seed"])
    bootstrap = input_dict["bootstrap"] == "true"
romanorac's avatar
romanorac committed
76

hiphop's avatar
hiphop committed
77
    fitmodel_url = forest_distributed_decision_trees.fit(input = input_dict["dataset"],
romanorac's avatar
romanorac committed
78
                                       max_tree_nodes = input_dict["tree_nodes"],
hiphop's avatar
hiphop committed
79 80
                                       min_samples_leaf = input_dict["min_samples_leaf"],
                                       min_samples_split = input_dict["min_samples_split"],
romanorac's avatar
romanorac committed
81
                                       class_majority = input_dict["majority"],
hiphop's avatar
hiphop committed
82 83 84 85 86
                                       bootstrap = bootstrap,
                                       measure = input_dict["measure"],
                                       accuracy = input_dict["accuracy"],
                                       separate_max = input_dict["separate_max"],
                                       random_state = random_state,
romanorac's avatar
romanorac committed
87 88 89 90
                                        save_results = True)
    return {"fitmodel_url" : fitmodel_url}

def dt_predict(input_dict):
hiphop's avatar
hiphop committed
91
    from discomll.ensemble import forest_distributed_decision_trees
romanorac's avatar
romanorac committed
92

hiphop's avatar
hiphop committed
93
    predictions_url = forest_distributed_decision_trees.predict(input_dict["dataset"],
romanorac's avatar
romanorac committed
94 95 96 97 98
                                            fitmodel_url = input_dict["fitmodel_url"],
                                            save_results = True)
    return {"string": predictions_url}

def rf_fit(input_dict):
hiphop's avatar
hiphop committed
99
    from discomll.ensemble import distributed_random_forest
romanorac's avatar
romanorac committed
100 101 102

    random_state = None if input_dict["seed"] == "None" else int(input_dict["seed"])

hiphop's avatar
hiphop committed
103
    fitmodel_url = distributed_random_forest.fit(input = input_dict["dataset"],
romanorac's avatar
romanorac committed
104 105 106 107 108 109 110 111 112 113 114 115
                                        trees_per_chunk = input_dict["trees_per_subset"],
                                       max_tree_nodes = input_dict["tree_nodes"],
                                       leaf_min_inst = input_dict["leaf_min_inst"],
                                       class_majority = input_dict["majority"],
                                        measure = input_dict["measure"],
                                        split_fun = input_dict["split_fun"],
                                        random_state = random_state,
                                        save_results = True)

    return {"fitmodel_url" : fitmodel_url}

def rf_predict(input_dict):
hiphop's avatar
hiphop committed
116
    from discomll.ensemble import distributed_random_forest
romanorac's avatar
romanorac committed
117 118 119
    
    random_state = None if input_dict["seed"] == "None" else int(input_dict["seed"])

hiphop's avatar
hiphop committed
120
    predictions_url = distributed_random_forest.predict(input = input_dict["dataset"],
romanorac's avatar
romanorac committed
121 122 123 124 125 126 127
                                            fitmodel_url = input_dict["fitmodel_url"],
                                            diff = input_dict["diff"],
                                            random_state = random_state,
                                            save_results = True)
    return {"string": predictions_url}

def wrf_fit(input_dict):
hiphop's avatar
hiphop committed
128
    from discomll.ensemble import distributed_weighted_forest
romanorac's avatar
romanorac committed
129 130 131
    
    random_state = None if input_dict["seed"] == "None" else int(input_dict["seed"])
    
hiphop's avatar
hiphop committed
132
    fitmodel_url = distributed_weighted_forest.fit(input = input_dict["dataset"],
romanorac's avatar
romanorac committed
133 134 135 136 137 138 139 140 141
                                        trees_per_chunk = input_dict["trees_per_subset"],
                                       max_tree_nodes = input_dict["tree_nodes"],
                                       leaf_min_inst = input_dict["leaf_min_inst"],
                                       class_majority = input_dict["majority"],
                                        measure = input_dict["measure"],
                                        split_fun = input_dict["split_fun"],
                                        save_results = True,
                                        random_state = random_state)
    return {"fitmodel_url" : fitmodel_url}
HippoHoppy's avatar
HippoHoppy committed
142

romanorac's avatar
romanorac committed
143
def wrf_predict(input_dict):
hiphop's avatar
hiphop committed
144
    from discomll.ensemble import distributed_weighted_forest
romanorac's avatar
romanorac committed
145

hiphop's avatar
hiphop committed
146
    predictions_url = distributed_weighted_forest.predict(input = input_dict["dataset"],
romanorac's avatar
romanorac committed
147 148 149
                                            fitmodel_url = input_dict["fitmodel_url"],
                                            save_results = True)
    return {"string": predictions_url}
HippoHoppy's avatar
HippoHoppy committed
150

romanorac's avatar
romanorac committed
151 152 153
def linsvm_fit(input_dict):
    from discomll.classification import linear_svm
    fitmodel_url = linear_svm.fit(input_dict["dataset"],
HippoHoppy's avatar
HippoHoppy committed
154 155 156 157
                                            nu = input_dict["nu"],
                                            save_results = True)
    return {"fitmodel_url" : fitmodel_url}    

romanorac's avatar
romanorac committed
158 159
def linsvm_predict(input_dict):
    from discomll.classification import linear_svm
HippoHoppy's avatar
HippoHoppy committed
160

romanorac's avatar
romanorac committed
161
    predictions_url = linear_svm.predict(input_dict["dataset"],
HippoHoppy's avatar
HippoHoppy committed
162 163 164
                                  fitmodel_url = input_dict["fitmodel_url"],
                                  save_results = True)
    return {"string": predictions_url}
HippoHoppy's avatar
HippoHoppy committed
165

romanorac's avatar
romanorac committed
166
def linreg_fit(input_dict):
HippoHoppy's avatar
HippoHoppy committed
167
    from discomll.regression import linear_regression
romanorac's avatar
romanorac committed
168

HippoHoppy's avatar
HippoHoppy committed
169 170
    fitmodel_url = linear_regression.fit(input_dict["dataset"],
                    save_results = True)
romanorac's avatar
romanorac committed
171
    
HippoHoppy's avatar
HippoHoppy committed
172 173
    return {"fitmodel_url" : fitmodel_url}

romanorac's avatar
romanorac committed
174
def linreg_predict(input_dict):
HippoHoppy's avatar
HippoHoppy committed
175
    from discomll.regression import linear_regression
romanorac's avatar
romanorac committed
176
    
HippoHoppy's avatar
HippoHoppy committed
177 178 179
    predictions_url = linear_regression.predict(input_dict["dataset"],
                                    fitmodel_url = input_dict["fitmodel_url"],
                                    save_results = True)
romanorac's avatar
romanorac committed
180

HippoHoppy's avatar
HippoHoppy committed
181
    return {"string": predictions_url}
HippoHoppy's avatar
HippoHoppy committed
182 183 184 185

def kmeans_fit(input_dict):
    from discomll.clustering import kmeans

romanorac's avatar
romanorac committed
186 187
    random_state = None if input_dict["seed"] == "None" else int(input_dict["seed"])
    
HippoHoppy's avatar
HippoHoppy committed
188 189 190
    fitmodel_url = kmeans.fit(input_dict["dataset"],
                                n_clusters = input_dict["clusters"],
                                 max_iterations = input_dict["itr"],
romanorac's avatar
romanorac committed
191
                                 random_state = random_state,
HippoHoppy's avatar
HippoHoppy committed
192 193 194 195 196 197
                                 save_results = True)

    return {"fitmodel_url" : fitmodel_url}

def kmeans_predict(input_dict):
    from discomll.clustering import kmeans
romanorac's avatar
romanorac committed
198

HippoHoppy's avatar
HippoHoppy committed
199 200 201
    predictions_url = kmeans.predict(input_dict["dataset"],
                                        fitmodel_url = input_dict["fitmodel_url"],
                                        save_results = True)
HippoHoppy's avatar
HippoHoppy committed
202
    return {"string": predictions_url}
HippoHoppy's avatar
HippoHoppy committed
203

romanorac's avatar
romanorac committed
204
def logreg_fit(input_dict):
HippoHoppy's avatar
HippoHoppy committed
205
    from discomll.classification import logistic_regression
HippoHoppy's avatar
sdfds  
HippoHoppy committed
206

HippoHoppy's avatar
HippoHoppy committed
207
    fitmodel_url = logistic_regression.fit(input_dict["dataset"],
HippoHoppy's avatar
HippoHoppy committed
208
                                            alpha = input_dict["alpha"],
HippoHoppy's avatar
HippoHoppy committed
209 210 211
                                            max_iterations = input_dict["itr"],
                                            save_results = True)
    return {"fitmodel_url" : fitmodel_url}
HippoHoppy's avatar
HippoHoppy committed
212

romanorac's avatar
romanorac committed
213
def logreg_predict(input_dict):
HippoHoppy's avatar
HippoHoppy committed
214
    from discomll.classification import logistic_regression
HippoHoppy's avatar
HippoHoppy committed
215
    
HippoHoppy's avatar
HippoHoppy committed
216
    predictions_url = logistic_regression.predict(input_dict["dataset"],
HippoHoppy's avatar
HippoHoppy committed
217
                                                fitmodel_url = input_dict["fitmodel_url"],
HippoHoppy's avatar
HippoHoppy committed
218 219
                                                save_results = True)
    return {"string": predictions_url}
HippoHoppy's avatar
HippoHoppy committed
220

romanorac's avatar
romanorac committed
221 222
def naivebayes_fit(input_dict):
    from discomll.classification import naivebayes
HippoHoppy's avatar
HippoHoppy committed
223

romanorac's avatar
romanorac committed
224 225
    fitmodel_url = naivebayes.fit(input_dict["dataset"], save_results = True)
    
HippoHoppy's avatar
HippoHoppy committed
226
    return {"fitmodel_url" : fitmodel_url}
HippoHoppy's avatar
HippoHoppy committed
227

romanorac's avatar
romanorac committed
228 229 230
def naivebayes_predict(input_dict):
    from discomll.classification import naivebayes
    m = 1 if input_dict["m"] == "" else input_dict["m"]
HippoHoppy's avatar
HippoHoppy committed
231
    
romanorac's avatar
romanorac committed
232
    predictions_url = naivebayes.predict(input = input_dict["dataset"], 
HippoHoppy's avatar
HippoHoppy committed
233
                                fitmodel_url = input_dict["fitmodel_url"],
romanorac's avatar
romanorac committed
234
                                 m = input_dict["m"],
HippoHoppy's avatar
HippoHoppy committed
235
                                save_results = True )
romanorac's avatar
romanorac committed
236
    
HippoHoppy's avatar
HippoHoppy committed
237
    return {"string": predictions_url}
HippoHoppy's avatar
HippoHoppy committed
238

romanorac's avatar
romanorac committed
239 240 241
def results_to_file(input_dict):
    #implementation is in visualization_views.py
    return {} 
HippoHoppy's avatar
HippoHoppy committed
242

romanorac's avatar
romanorac committed
243 244 245
def measure_distribution(input_dict):
    #implementation is in visualization_views.py
    return {}
HippoHoppy's avatar
HippoHoppy committed
246

romanorac's avatar
romanorac committed
247 248 249
def model_view(input_dict):
    #implementation is in visualization_views.py
    return {}
HippoHoppy's avatar
HippoHoppy committed
250

romanorac's avatar
romanorac committed
251 252 253 254 255 256 257
def bigdata_ca(input_dict):
    #implementation is in visualization_views.py
    return {}

def bigdata_mse(input_dict):
    #implementation is in visualization_views.py
    return {}
HippoHoppy's avatar
HippoHoppy committed
258