Commit 8cac1c12 authored by borut's avatar borut
Browse files

vipercharts package komit

parent 3ce419b0
......@@ -206,6 +206,7 @@ INSTALLED_APPS_WORKFLOWS_SUB = (
'workflows.streaming',
'workflows.bio3graph',
'workflows.noise',
'workflows.vipercharts',
#WORKFLOWS_SUBAPP_PLACEHOLDER
)
......
......@@ -8,11 +8,11 @@ django-orderable-inlines==0.0.6
django-picklefield==0.2.1
httplib2==0.7.5
wsgiref==0.1.2
mysql-connector-python==1.0.9
mysql-connector-python>=1.0.9
numpy==1.7.1
pydot==1.0.28
pyparsing==1.5.7
scikit-learn==0.13.1
scikit-learn==0.13
scipy==0.12.0
feedparser==5.1.2
networkx==1.7
......
......@@ -701,7 +701,7 @@ def uci_to_odt(input_dict):
from mothra.settings import FILES_FOLDER
import orange
output_dict = {}
output_dict['data'] = orange.ExampleTable(FILES_FOLDER+"uci-datasets/"+input_dict['filename'])
output_dict['data'] = orange.ExampleTable(FILES_FOLDER+"uci-datasets\\"+input_dict['filename'])
return output_dict
def odt_to_arff(input_dict):
......
[
{
"pk": 19,
"model": "workflows.category",
"fields": {
"uid": "d2dfe5ca-7d73-4b4d-b922-b389ec610d99",
"parent": null,
"workflow": null,
"user": null,
"order": 1,
"name": "Orange"
}
},
{
"pk": 26,
"model": "workflows.category",
"fields": {
"uid": "aa3ed779-1e0b-4c6b-883e-24d85b9a6009",
"parent": 19,
"workflow": null,
"user": null,
"order": 3,
"name": "Utilities"
}
},
{
"pk": 212,
"model": "workflows.abstractwidget",
"fields": {
"category": 26,
"treeview_image": "",
"name": "Select Class from ODT",
"is_streaming": false,
"uid": "14b90c25-47ae-4df1-8781-de2baad6cff3",
"interaction_view": "class_from_odt_interactive",
"image": "",
"package": "vipercharts",
"static_image": "",
"post_interact_action": "class_from_odt_selection",
"user": null,
"visualization_view": "",
"streaming_visualization_view": "",
"action": "class_from_odt",
"wsdl_method": "",
"wsdl": "",
"interactive": true,
"has_progress_bar": false,
"order": 1,
"description": ""
}
},
{
"pk": 448,
"model": "workflows.abstractinput",
"fields": {
"widget": 212,
"name": "Orange data table",
"short_name": "odt",
"uid": "660b5fa0-92dd-4462-83dd-75908e3d9d45",
"default": "",
"required": true,
"multi": false,
"parameter_type": null,
"variable": "data",
"parameter": false,
"order": 1,
"description": ""
}
},
{
"pk": 228,
"model": "workflows.abstractoutput",
"fields": {
"widget": 212,
"name": "Selected Class",
"short_name": "cls",
"variable": "target",
"uid": "dd95e10c-9746-4332-8273-0339cbe0e221",
"order": 1,
"description": ""
}
},
{
"pk": 38,
"model": "workflows.category",
"fields": {
"uid": "f2821411-7b06-46ea-b577-3a06fecd02de",
"parent": null,
"workflow": null,
"user": null,
"order": 1,
"name": "Vipercharts"
}
},
{
"pk": 206,
"model": "workflows.abstractwidget",
"fields": {
"category": 38,
"treeview_image": "",
"name": "Create Integer List",
"is_streaming": false,
"uid": "52de92eb-a762-42ab-88c6-80b70a8bc604",
"interaction_view": "",
"image": "",
"package": "vipercharts",
"static_image": "construction_work .png",
"post_interact_action": "",
"user": null,
"visualization_view": "",
"streaming_visualization_view": "",
"action": "vipercharts_create_integers",
"wsdl_method": "",
"wsdl": "",
"interactive": false,
"has_progress_bar": false,
"order": 1,
"description": ""
}
},
{
"pk": 439,
"model": "workflows.abstractinput",
"fields": {
"widget": 206,
"name": "Integer List String",
"short_name": "str",
"uid": "ca2ca91a-448f-411a-8e35-e7fce21fad01",
"default": "3\r\n2\r\n1\r\n4",
"required": false,
"multi": false,
"parameter_type": "textarea",
"variable": "intStr",
"parameter": true,
"order": 1,
"description": "Comma or new-line separated list of integers"
}
},
{
"pk": 440,
"model": "workflows.abstractinput",
"fields": {
"widget": 206,
"name": "Sort list",
"short_name": "bol",
"uid": "a67f2073-9b5d-4c0d-a9bb-f04babedebaa",
"default": "true",
"required": true,
"multi": false,
"parameter_type": "checkbox",
"variable": "sort",
"parameter": true,
"order": 2,
"description": "Should the list be sorted"
}
},
{
"pk": 224,
"model": "workflows.abstractoutput",
"fields": {
"widget": 206,
"name": "Integer List",
"short_name": "lst",
"variable": "intList",
"uid": "87b9d0f3-219f-4ebd-afdf-db66da53c182",
"order": 1,
"description": "List of integers"
}
},
{
"pk": 210,
"model": "workflows.abstractwidget",
"fields": {
"category": 38,
"treeview_image": "",
"name": "PR space chart",
"is_streaming": false,
"uid": "5dc72a81-347c-4a40-8658-2591961477e5",
"interaction_view": "",
"image": "",
"package": "vipercharts",
"static_image": "",
"post_interact_action": "",
"user": null,
"visualization_view": "vipercharts_pr_space_view",
"streaming_visualization_view": "",
"action": "vipercharts_pr_space",
"wsdl_method": "",
"wsdl": "",
"interactive": false,
"has_progress_bar": false,
"order": 1,
"description": ""
}
},
{
"pk": 445,
"model": "workflows.abstractinput",
"fields": {
"widget": 210,
"name": "Performance results",
"short_name": "prf",
"uid": "33e1ee25-2b74-49c9-99fb-30d9141f9119",
"default": "",
"required": true,
"multi": false,
"parameter_type": null,
"variable": "perf",
"parameter": false,
"order": 1,
"description": ""
}
},
{
"pk": 207,
"model": "workflows.abstractwidget",
"fields": {
"category": 38,
"treeview_image": "",
"name": "Filter Integers",
"is_streaming": false,
"uid": "14ddceb7-987a-42c4-b72e-3e203026d8d9",
"interaction_view": "vipercharts_filter_integers",
"image": "",
"package": "vipercharts",
"static_image": "construction_work .png",
"post_interact_action": "vipercharts_post_filter_integers",
"user": null,
"visualization_view": "",
"streaming_visualization_view": "",
"action": "vipercharts_pre_filter_integers",
"wsdl_method": "",
"wsdl": "",
"interactive": true,
"has_progress_bar": false,
"order": 2,
"description": ""
}
},
{
"pk": 441,
"model": "workflows.abstractinput",
"fields": {
"widget": 207,
"name": "Integer List",
"short_name": "lst",
"uid": "b69481cc-be4b-430a-b9b8-62c2805776ee",
"default": "",
"required": true,
"multi": false,
"parameter_type": null,
"variable": "intList",
"parameter": false,
"order": 1,
"description": "List of integers"
}
},
{
"pk": 225,
"model": "workflows.abstractoutput",
"fields": {
"widget": 207,
"name": "Filtered Integer List",
"short_name": "lst",
"variable": "intList",
"uid": "00bad29b-3ecf-4545-84f2-7fb79f52f856",
"order": 1,
"description": "Filtered list of integers"
}
},
{
"pk": 208,
"model": "workflows.abstractwidget",
"fields": {
"category": 38,
"treeview_image": "",
"name": "Sum Integers",
"is_streaming": false,
"uid": "dd6946da-4cbe-454e-98de-69c89f7e831b",
"interaction_view": "",
"image": "",
"package": "vipercharts",
"static_image": "construction_work .png",
"post_interact_action": "",
"user": null,
"visualization_view": "",
"streaming_visualization_view": "",
"action": "vipercharts_sum_integers",
"wsdl_method": "",
"wsdl": "",
"interactive": false,
"has_progress_bar": false,
"order": 3,
"description": ""
}
},
{
"pk": 442,
"model": "workflows.abstractinput",
"fields": {
"widget": 208,
"name": "Integer List",
"short_name": "lst",
"uid": "8e42ebf0-9f19-47f9-81f7-3bf0ef475d85",
"default": "",
"required": true,
"multi": false,
"parameter_type": null,
"variable": "intList",
"parameter": false,
"order": 1,
"description": "List of integers"
}
},
{
"pk": 226,
"model": "workflows.abstractoutput",
"fields": {
"widget": 208,
"name": "Sum",
"short_name": "int",
"variable": "sum",
"uid": "7a54ced2-2d92-48bd-9b2a-9327f42ffb8e",
"order": 1,
"description": "Sum of integer list"
}
},
{
"pk": 209,
"model": "workflows.abstractwidget",
"fields": {
"category": 38,
"treeview_image": "",
"name": "Display Summation",
"is_streaming": false,
"uid": "bd990642-f04d-454a-9703-7fde35fb9cdb",
"interaction_view": "",
"image": "",
"package": "vipercharts",
"static_image": "construction_work .png",
"post_interact_action": "",
"user": null,
"visualization_view": "vipercharts_display_summation",
"streaming_visualization_view": "",
"action": "vipercharts_pre_display_summation",
"wsdl_method": "",
"wsdl": "",
"interactive": false,
"has_progress_bar": false,
"order": 4,
"description": ""
}
},
{
"pk": 443,
"model": "workflows.abstractinput",
"fields": {
"widget": 209,
"name": "Integer List",
"short_name": "lst",
"uid": "b518fe4a-d6e2-43fa-87be-3c0d845c0878",
"default": "",
"required": false,
"multi": false,
"parameter_type": null,
"variable": "intList",
"parameter": false,
"order": 1,
"description": "List of integers"
}
},
{
"pk": 444,
"model": "workflows.abstractinput",
"fields": {
"widget": 209,
"name": "Sum",
"short_name": "int",
"uid": "72dc1e14-9a56-4734-9d6e-54f84e8eb054",
"default": "",
"required": false,
"multi": false,
"parameter_type": null,
"variable": "sum",
"parameter": false,
"order": 2,
"description": "Sum (possibly correct) of integer list"
}
}
]
\ No newline at end of file
from django.shortcuts import render
def vipercharts_filter_integers(request,input_dict,output_dict,widget):
return render(request, 'interactions/vipercharts_filter_integers.html',{'widget':widget,'intList':input_dict['intList']})
def class_from_odt_interactive(request,input_dict,output_dict,widget):
classes = input_dict['data'].domain.class_var.values
target = output_dict['target']
return render(request, 'interactions/class_selection.html',{'widget':widget,'classes':classes, 'target':target})
\ No newline at end of file
import re
def vipercharts_create_integers(input_dict):
intStr = input_dict['intStr']
intList = []
for i in re.findall(r'\w+', intStr):
try:
intList.append(int(i))
except:
pass
if input_dict['sort'].lower() == "true":
intList.sort()
return {'intList':intList}
def vipercharts_sum_integers(input_dict):
intList = input_dict['intList']
return {'sum':sum(intList)}
def vipercharts_pre_filter_integers(input_dict):
return input_dict
def vipercharts_post_filter_integers(postdata,input_dict,output_dict):
intListOut = postdata['intListOut']
intList = []
for i in intListOut:
try:
intList.append(int(i))
except:
pass
return {'intList': intList}
def vipercharts_pre_display_summation(input_dict):
return {}
# Prepare curve data
def vipercharts_prepareCurveData(input_dict): #, subtype
import math
nPoints=4
performance = input_dict['predictions']#chartdata
subtype = input_dict['subtype']
kenmax = 0.5
ratemax = 0.5
for curve in performance:
n = len(curve['actual'])
negs = curve['actual'].count(0)
poss = curve['actual'].count(1)
if poss == 0 or negs == 0:
print "Class Error, zero poss or zero negs, only one class or other type error."
return []
try:
ranks = curve['rank']
except:
ranks = range(n+1)[1:] # ranks from 1
paralel =[]
for i in range(n):
paralel.append([curve['actual'][i], float(curve['predicted'][i])])
if (subtype == '-score'):
ROCseries = [[0,0, '-Inf']]; PRseries = [[0,1, '-Inf']]; LIFTseries = [[0,0, '-Inf']]
ROChull = [[0,0,'-Inf']]; COSTseries = [[0,0,'-Inf']]; RATEseries = []; KENseries = [[0,0]]; KENup=[[0,1]]; KENdown=[[0,0]]
_oldrate = 0
_oldloss = 0
AUC = 0
AUPR = 0
ranked = sorted(paralel, key = lambda pair:pair[1], reverse=True)
print "ranked:"
print ranked
k = 0
tp = 0; fp = 0; tp_old = 0; fp_old = 0; n1 = 0; concordant_pairs = 0; discordant_pairs = 0;
while k < len(ranked):
addedconc = 0; addeddisc = 0;
threshold = ranked[k][1];
group = [x[0] for x in ranked if x[1] >= threshold]
tp = group.count(1)
fp = group.count(0)
#next k is len(group).
ties = len(group) - k
n1 += ties * (ties-1)/2
concordant_pairs += tp_old * (fp - fp_old)
discordant_pairs += fp_old * (tp - tp_old)
ROCpoint = [fp*1.0/negs,tp*1.0/poss, threshold]
ROCseries.append(ROCpoint)
AUC += (ROCpoint[1] + ROCseries[-2][1]) * (ROCpoint[0] - ROCseries[-2][0]) * 0.5
PRseries.append([tp*1.0/poss, tp*1.0/(tp+fp), threshold])
AUPR += (PRseries[-1][1] + PRseries[-2][1]) * (PRseries[-1][0] - PRseries[-2][0]) * 0.5
LIFTseries.append([len(group)*1.0/n, tp*1.0/poss, threshold])
#Convex hull and lower envelope:
while len(ROChull)>=2 and (ROChull[-1][0]==ROCpoint[0] or (ROChull[-2][0]!=ROChull[-1][0] and (ROChull[-1][1]-ROChull[-2][1])/(ROChull[-1][0]-ROChull[-2][0]) <= (ROCpoint[1]-ROChull[-1][1])/(ROCpoint[0]-ROChull[-1][0]))):
ROChull.pop()
COSTseries.pop()
ROChull.append(ROCpoint)
if(ROCpoint[0] != ROChull[-2][0]):
slope = (ROCpoint[1] - ROChull[-2][1]) / (ROCpoint[0] - ROChull[-2][0])
intercept = ROCpoint[1] - slope * ROCpoint[0]
COSTseries.append([1 / (slope + 1), (1 - intercept) / (1 + slope), threshold])
else:
if len(COSTseries) == 0:
COSTseries.append([0,0,threshold])
else:
COSTseries[0][2] = threshold
COSTend = 1 - ROCpoint[1]
#Rate driven curve:
#The Rate driven curve is a list of intervals. Each interval is a set of points on the appropriate parabola. There are nPoints number of points
RATEinterval = []
pi0 = poss * 1.0 / n
pi1 = 1 - pi0
_newrate = pi1*ROCpoint[0]+pi0*ROCpoint[1]
_newloss = 2*(_newrate*(pi0-_newrate) + pi1*ROCpoint[0])
RATEinterval.append([_oldrate, _oldloss, threshold, performance.index(curve)+1])
for i in range(1, nPoints):
alpha = i * 1.0/nPoints
rate = _oldrate + alpha * (_newrate - _oldrate)
loss = 2 * (rate * (pi0 - rate) + pi1 * (ROCseries[-2][0] + alpha * (ROCpoint[0] - ROCseries[-2][0])))
RATEinterval.append([rate, loss, 0])
RATEinterval.append([_newrate, _newloss, 0])
RATEseries.append(RATEinterval)
if _newloss > ratemax:
ratemax = _newloss
m = 0.5*(pi0+pi1*(ROCseries[-2][0]-ROCpoint[0])/(_newrate-_oldrate))
if m<_newrate and m>_oldrate:
mvalue=2*(m*(pi0-m)+pi1*((_newrate-m)*ROCseries[-2][0] + (m-_oldrate)*ROCpoint[0])/(_newrate - _oldrate))
if mvalue > ratemax:
ratemax = mvalue
#Kendall curve:
if _newrate <= pi0:
KENseries.append([_newrate, 2*pi1*ROCpoint[0], threshold])
else:
if _oldrate < pi0:
KENseries.append([pi0,(2*pi1*ROCpoint[0]-KENseries[-1][1])*(pi0-KENseries[-1][0])/(_newrate - KENseries[-1][0])+(KENseries[-1][1]), ''])
KENseries.append([_newrate, 2*pi0*(1-ROCpoint[1]), threshold])
if KENseries[-1][1] > kenmax:
kenmax = KENseries[-1][1]
_oldrate = _newrate
_oldloss = _newloss
k += len(group) - k
tp_old = tp
fp_old = fp
else