Commit aebb943b authored by HippoHoppy's avatar HippoHoppy

dodal big data package

parent d9b6d75e
This diff is collapsed.
from django.shortcuts import render
def big_data_filter_integers(request,input_dict,output_dict,widget):
return render(request, 'interactions/big_data_filter_integers.html',{'widget':widget,'intList':input_dict['intList']})
\ No newline at end of file
def file_url(input_dict):
from discomll import dataset
X_indices_splited = input_dict["X_indices"].replace(" ","").split("-")
if len(X_indices_splited) == 2:
a, b = X_indices_splited
if not a.isdigit() or not b.isdigit():
raise Exception("Feature indices should be integers. Example: 1-10")
X_indices = range(int(a), int(b))
else:
X_indices = [int(v) for v in input_dict["X_indices"].replace(" ","").split(",") if v != ""]
del(input_dict["X_indices"])
input_dict["data_type"] = "gzip" if input_dict["data_type"] == "true" else ""
data = dataset.Data(data_tag = [input_dict["url"]],
X_indices = X_indices,
**input_dict)
#print input_dict
return {"dataset" : data}
def log_reg_fit(input_dict):
from discomll.classification import logistic_regression
#print input_dict["dataset"].y_tran
fit_model_url = logistic_regression.fit(input_dict["dataset"],
alpha = input_dict["alpha"],
max_iterations = input_dict["itr"])
return {"fitmodel_url" : fit_model_url}
def log_reg_predict(input_dict):
from discomll.classification import logistic_regression
predictions_url = logistic_regression.predict(input_dict["dataset"],
thetas_url = input_dict["fitmodel_url"])
from disco.core import result_iterator
pred = "ID__Pred__Real__Probs\n"
for X_id, (y_predicted, y_real, probs) in result_iterator(predictions_url):
probs = [round(p, 4) for p in probs]
pred += str(X_id) + " " + str(y_predicted) + " " + str(y_real) + " " + str(probs) + "\n"
return {"string" : pred}
def gaussian_naive_bayes_fit(input_dict):
from discomll.classification import naivebayes_gaussian
fit_model_url = naivebayes_gaussian.fit(input_dict["dataset"],
save_results = True)
return {"fitmodel_url" : fit_model_url}
def gaussian_naive_bayes_predict(input_dict):
from discomll.classification import naivebayes_gaussian
from disco.core import Disco
#ddfs = Disco().ddfs
predictions_url = naivebayes_gaussian.predict(input = input_dict["dataset"],
fit_model_url = input_dict["fitmodel_url"],
log_probs = True if input_dict["log_probs"] == "true" else False,
save_results = True )
#print ddfs.get(predictions_url)["urls"]
#results widget
from disco.core import result_iterator
pred = "ID__Pred__Real__Probs\n"
for X_id, (y_predicted, y_real, probs) in result_iterator(predictions_url):
probs = [round(p, 4) for p in probs]
pred += str(X_id) + " " + str(y_predicted) + " " + str(y_real) + " " + str(probs) + "\n"
return {"string" : pred}
def multinomail_naive_bayes_fit(input_dict):
from discomll.classification import naivebayes_multinomial
fit_model_url = naivebayes_multinomial.fit(input_dict["dataset"],
save_results = True)
return {"fitmodel_url" : fit_model_url}
def multinomial_naive_bayes_predict(input_dict):
from discomll.classification import naivebayes_multinomial
from disco.core import Disco
ddfs = Disco().ddfs
print list(ddfs.blobs(input_dict["fitmodel_url"]))
m = 1 if input_dict["m"] == "" else input_dict["m"]
predictions_url = naivebayes_multinomial.predict(input = input_dict["dataset"],
fit_model_url = input_dict["fitmodel_url"],
m = m,
save_results = True)
#print predictions_url
print list(ddfs.blobs(predictions_url))
#print ddfs.get(predictions_url)["urls"]
#ta del gre v results
from disco.core import result_iterator
pred = "ID__Pred__Real__Probs\n"
for X_id, (y_predicted, y_real, probs) in result_iterator(predictions_url):
probs = [round(p, 4) for p in probs]
pred += str(X_id) + " " + str(y_predicted) + " " + str(y_real) + " " + str(probs) + "\n"
return {"string" : pred}
import os
# === STANDARD PACKAGE SETTINGS ===
PACKAGE_ROOT = os.path.dirname(__file__)
# === AUTO IMPORT OPTIONS ===
#If auto_import_package_data is true then given data file is automatically imported when ClowdFlows project is newly deployed or refreshed from git
AUTO_IMPORT_DB = True
#For auto_import_package_data_replace_option description see the 'replace' option in workflows/import_package command
AUTO_IMPORT_DB_REPLACE_OPTION = True
#If file(s) other than ./db/package_data.json should be imported, auto_import_package_data_files should be corrected
AUTO_IMPORT_DB_FILES = [os.path.join(PACKAGE_ROOT,'db/package_data.json')]
<div id="widgetinteract-{{widget.pk}}" rel="{{widget.pk}}" class="widgetinteractdialog" title="{{widget.name}} wants your input!">
<form id="interactionform-{{widget.pk}}" name="interactionform-{{widget.pk}}">
{% for i in intList %}
<input type="checkbox" name="intListOut" value="{{i}}" style="display:inline;width:auto;">{{i}}<br>
{% endfor %}
<input type="hidden" name="widget_id" value="{{widget.pk}}">
</form>
</div>
\ No newline at end of file
<div id="widgetvisualization-{{widget.pk}}" rel="{{widget.pk}}" class="widgetvisualizationdialog" title="{{widget.name}} visualization">
<div style="width:400px;font-family:monospace;">
<table style="width:auto; border-collapse:collapse;">
{% for i in input_dict.intList %}
<tr style="width:auto">
<td style="text-align: right; width:auto; line-height: 0.5em; padding: 2px; padding-bottom: 4px; padding-top: 0; margin:2px;">
{% if forloop.first %} {% else %}+{% endif %}
</td>
<td style="text-align: right; width:auto; line-height: 0.5em; padding: 2px; padding-bottom: 4px; padding-top: 0; margin:2px;">
{{ i }}
</td>
</tr>
{% endfor %}
<tr style="width:auto">
<td style="text-align: right; width:auto; line-height: 0.5em; border-top: 1px solid black; padding: 2px; padding-top: 4px; margin:2px;">
=
</td>
<td style="text-align: right; width:auto; line-height: 0.5em; border-top: 1px solid black; padding: 2px;margin:2px;">
{{ input_dict.sum }}
</td>
</tr>
</table>
<br/>
{{ check }}
</div>
</div>
\ No newline at end of file
from django.conf.urls.defaults import patterns, include, url
urlpatterns = patterns('',
#url(r'^get-adc-index/widget(?P<widget_id>[0-9]+)/nx/Index.html$', 'workflows.latino.views.get_adc_index', name='get adc index'),
#url(r'^get-adc-index/widget(?P<widget_id>[0-9]+)/(?P<narrow_doc>n?)x/Index.html$', 'workflows.latino.views.get_adc_index', name='get adc index'),
#url(r'^get-adc-index/widget(?P<widget_id>[0-9]+)/(?P<narrow_doc>n?)x/Index(?P<document_id_from>[0-9]+)-(?P<document_id_to>[0-9]+).html$', 'workflows.latino.views.get_adc_index', name='get adc index'),
#url(r'^get-adc-index/widget(?P<widget_id>[0-9]+)/(?P<narrow_doc>n?)x/Document(?P<document_id>[0-9]+).html', 'workflows.latino.views.get_adc_page', name='get adc page'),
)
\ No newline at end of file
from django.shortcuts import render
def big_data_display_summation(request,input_dict,output_dict,widget):
if sum(input_dict['intList']) == input_dict['sum']:
check = 'The calculation appears correct.'
else:
check = 'The calculation appears incorrect!'
return render(request, 'visualizations/big_data_display_integers.html',{'widget':widget,'input_dict':input_dict, 'output_dict':output_dict, 'check':check})
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment