Commit 0ab54a0d authored by Anze Vavpetic's avatar Anze Vavpetic
Browse files

Added roc curve SD visualization.

parent 29db77ce
This diff is collapsed.
......@@ -10,7 +10,7 @@ def sensitivity_analysis_viewer(request, input_dict, output_dict, widget):
'''
Computes the sensitivity analysis graph.
@author: Anze Vavpeltic, 2012
@author: Anze Vavpetic, 2012
'''
model = input_dict['model']
attributes = [att.name for att in input_dict['model'].data.domain.features]
......@@ -43,7 +43,7 @@ def ds_charts_viewer(request, input_dict, output_dict, widget):
'''
Decision support visualization.
@author: Anze Vavpeltic, 2012
@author: Anze Vavpetic, 2012
'''
model = input_dict['model']
norm_data = model()
......
......@@ -12,6 +12,7 @@ from Beam_SD import *
from Apriori_SD import *
from OWSubgroupROCVizualizerGraph import *
from PyQt4 import QtGui
from calcHull import *
#from qttable import *
white = QColor(255,255,255)
......@@ -85,11 +86,11 @@ class OWSubgroupROCVizualizer(OWWidget):
indx = ids.index(id)
subgroups.isSelected = self.subgroups[indx][1].isSelected
self.subgroups[indx] = (id, subgroups)
self.calcRates(subgroups) ######## calc rates
calcRates(subgroups) ######## calc rates
else: # add new subgroups
subgroups.isSelected = True
self.subgroups.append((id, subgroups))
self.calcRates(subgroups) ######## calc rates
calcRates(subgroups) ######## calc rates
self.edtRules.appendPlainText("%d subgroups sets on input." % len(self.subgroups))
self.updateclb()
......@@ -138,95 +139,12 @@ class OWSubgroupROCVizualizer(OWWidget):
symbol = QwtSymbol.Ellipse, xData =s[1].FPR, yData = s[1].TPR)
self.graphROC.replot()
# #############################################
# calculations for the vizualization
# #############################################
def calcRates(self, subgroups):
subgroups.TPR = []
subgroups.FPR = []
P = len(subgroups.targetClassRule.TP) * 1.0 # number of all positive examples as a float
N = len(subgroups.targetClassRule.FP) * 1.0 # number of all negative examples as a float
for rule in subgroups.rules:
subgroups.TPR.append( len(rule.TP) / P ) # true positive rate for this rule
subgroups.FPR.append( len(rule.FP) / N ) # false positive example for this rule
# subgroups.TPR = [0.44, 0.34, 0.33, 0.49, 0.43, 0.49, 0.66, 0.60, 0.61, 0.78, 0.75, 0.77, 0.84, 0.82, 0.82]
# subgroups.FPR = [0.01, 0.00, 0.00, 0.02, 0.00, 0.02, 0.10, 0.07, 0.07, 0.21, 0.16, 0.19, 0.31, 0.29, 0.27]
len(subgroups.TPR)
# calculate convex hull ,important: stick this 5 linet together
subgroups.hullTPR = [0]
subgroups.hullFPR = [0]
self.calcHull(subgroups, subgroups.TPR[:], subgroups.FPR[:] , A=(0,0), B=(1,1))
subgroups.hullTPR.append(1)
subgroups.hullFPR.append(1)
def calcRatesSubset(self, subgroups):
subgroups.TPR = []
subgroups.FPR = []
P = len(subgroups.targetClassRule.TP) * 1.0 # number of all positive examples as a float
N = len(subgroups.targetClassRule.FP) * 1.0 # number of all negative examples as a float
for rule in subgroups.rules:
TPr = len(rule.TP) / P
FPr = len(rule.FP) / N
subgroups.TPR.append( TPr ) # true positive rate for this rule
subgroups.FPR.append( FPr ) # false positive example for this rule
#self.graphROC.tooltipData(FPr, TPr, rule)
def calcHull(self, subgroups, Y, X, A, B):
#inicialization
C = (-1,-1) # best new point point
y = -1 # best distance
index = -1
# calculate best new point
if (B[0]-A[0])==0:
self.edtRules.appendPlainText("vertical line!!!")
else:
k = (B[1]-A[1]) / (B[0]-A[0]) # coefficient of the line between A and B
for i in range(len(Y)): # check every point
yn = Y[i] -( k * ( X[i] - A[0] ) + A[1]) # vertical distance between point i and line AB
if yn>0 and yn > y: # if new distance is the greatest so far
C = (X[i], Y[i]) # the new point is the best so far
y = yn
index = i
# if new point on the hull was found
if C != (-1,-1):
# recursivey call this function on the LEFT side of the point
del X[index]
del Y[index]
Xl =[]
Yl =[]
Xu =[]
Yu =[]
for i in range(len(X)):
if X[i]>=A[0] and X[i]<=C[0] and Y[i]>A[1]:
Xl.append(X[i])
Yl.append(Y[i])
elif X[i]>=C[0] and X[i]<=B[0] and Y[i]>C[1]:
Xu.append(X[i])
Yu.append(Y[i])
self.calcHull(subgroups, Yl, Xl, A,C) # recursive call
# save the new point
subgroups.hullTPR.append(C[1])
subgroups.hullFPR.append(C[0])
# recursivey call this function on the RIGHT side of the point
self.calcHull(subgroups, Yu, Xu, C,B) # recursive call
####################_______________________##### O L D
def vizualizeSubset(self, subset):
if subset:
self.subset = subset
#draw dots on the canvas
self.calcRatesSubset(subset)
calcRatesSubset(subset)
self.graphROC.addCurve("dots", self.scolor, self.scolor, 6, style = QwtPlotCurve.NoCurve,\
symbol = QwtSymbol.Ellipse, xData =self.subset.FPR, yData = self.subset.TPR)
self.graphROC.replot()
......
......@@ -26,8 +26,10 @@ class SDRule :
distribution[int(d.getclass())]+=1
distribution = map (lambda d: d/len(self.examples), distribution)
self.classDistribution = orange.Distribution(distribution) # set distribution
self.TP = filter(lambda e: e.getclass()==self.targetClass, self.examples) # True positives
self.FP = filter(lambda e: e.getclass()!=self.targetClass, self.examples) # flase positives
self.TP = self.examples.filter({self.examples.domain.classVar : self.targetClass})
self.FP = self.examples.filter({self.examples.domain.classVar : self.targetClass}, negate=1)
# self.TP = filter(lambda e: e.getclass()==self.targetClass, self.examples) # True positives
# self.FP = filter(lambda e: e.getclass()!=self.targetClass, self.examples) # flase positives
TPlen = len(self.TP) * 1.0
self.quality = TPlen / (len(self.FP) + self.g) # set rule quality: generalization quocient
self.support = 1.0* len(self.examples)/len(self.data) # set rule support
......
......@@ -277,10 +277,10 @@ class SD_Classifier(orange.Classifier):
#___________________________________________________________________________________
if __name__=="__main__":
filename = "..\\..\\doc\\datasets\\lenses.tab"
if 'linux' in sys.platform:
filename= "/usr/doc/orange/datasets/lenses.tab"
data = orange.ExampleTable(filename)
# filename = "..\\..\\doc\\datasets\\lenses.tab"
# if 'linux' in sys.platform:
# filename= "/usr/doc/orange/datasets/lenses.tab"
data = orange.ExampleTable('lenses')
learner2 = SD_learner(algorithm = "Apriori-SD", minSupport = 0.1, minConfidence= 0.6)
......@@ -300,6 +300,15 @@ if __name__=="__main__":
print d.getclass(), classifier2(d, orange.GetValue), classifier3(d, orange.GetValue), classifier4(d, orange.GetValue)
import cPickle
one= classifier2.rulesClass[0].rules.rules[0]
for obj in dir(one):
try:
cPickle.dump(getattr(one, obj), open('foo.pkl','w'))
print obj, 'ok'
except Exception, e:
print obj, str(e)
print "\n\n---> PMML model <---"
......
def calcRates(subgroups):
subgroups.TPR = []
subgroups.FPR = []
P = len(subgroups.targetClassRule.TP) * 1.0 # number of all positive examples as a float
N = len(subgroups.targetClassRule.FP) * 1.0 # number of all negative examples as a float
for rule in subgroups.rules:
subgroups.TPR.append( len(rule.TP) / P ) # true positive rate for this rule
subgroups.FPR.append( len(rule.FP) / N ) # false positive example for this rule
# subgroups.TPR = [0.44, 0.34, 0.33, 0.49, 0.43, 0.49, 0.66, 0.60, 0.61, 0.78, 0.75, 0.77, 0.84, 0.82, 0.82]
# subgroups.FPR = [0.01, 0.00, 0.00, 0.02, 0.00, 0.02, 0.10, 0.07, 0.07, 0.21, 0.16, 0.19, 0.31, 0.29, 0.27]
len(subgroups.TPR)
# calculate convex hull ,important: stick this 5 linet together
subgroups.hullTPR = [0]
subgroups.hullFPR = [0]
calcHull(subgroups, subgroups.TPR[:], subgroups.FPR[:] , A=(0,0), B=(1,1))
subgroups.hullTPR.append(1)
subgroups.hullFPR.append(1)
def calcRatesSubset(subgroups):
subgroups.TPR = []
subgroups.FPR = []
P = len(subgroups.targetClassRule.TP) * 1.0 # number of all positive examples as a float
N = len(subgroups.targetClassRule.FP) * 1.0 # number of all negative examples as a float
for rule in subgroups.rules:
TPr = len(rule.TP) / P
FPr = len(rule.FP) / N
subgroups.TPR.append( TPr ) # true positive rate for this rule
subgroups.FPR.append( FPr ) # false positive example for this rule
#self.graphROC.tooltipData(FPr, TPr, rule)
def calcHull(subgroups, Y, X, A, B):
#inicialization
C = (-1,-1) # best new point point
y = -1 # best distance
index = -1
# calculate best new point
if (B[0]-A[0])==0:
#self.edtRules.appendPlainText("vertical line!!!")
pass
else:
k = (B[1]-A[1]) / (B[0]-A[0]) # coefficient of the line between A and B
for i in range(len(Y)): # check every point
yn = Y[i] -( k * ( X[i] - A[0] ) + A[1]) # vertical distance between point i and line AB
if yn>0 and yn > y: # if new distance is the greatest so far
C = (X[i], Y[i]) # the new point is the best so far
y = yn
index = i
# if new point on the hull was found
if C != (-1,-1):
# recursivey call this function on the LEFT side of the point
del X[index]
del Y[index]
Xl =[]
Yl =[]
Xu =[]
Yu =[]
for i in range(len(X)):
if X[i]>=A[0] and X[i]<=C[0] and Y[i]>A[1]:
Xl.append(X[i])
Yl.append(Y[i])
elif X[i]>=C[0] and X[i]<=B[0] and Y[i]>C[1]:
Xu.append(X[i])
Yu.append(Y[i])
calcHull(subgroups, Yl, Xl, A,C) # recursive call
# save the new point
subgroups.hullTPR.append(C[1])
subgroups.hullFPR.append(C[0])
# recursivey call this function on the RIGHT side of the point
calcHull(subgroups, Yu, Xu, C,B) # recursive call
......@@ -33,19 +33,24 @@ class SubgroupDiscovery:
}
def build_subgroups(input_dict):
return {'rules' : None}
return {'rules' : None, 'classifier' : None}
def build_subgroups_finished(postdata, input_dict, output_dict):
data = input_dict['data']
widget_id = postdata['widget_id'][0]
alg = postdata['algorithm'+widget_id][0]
classValue = str(postdata['class'+widget_id][0])
params = {'name' : alg, 'algorithm' : alg}
for param in SubgroupDiscovery.algorithms[alg]:
value = postdata[param+widget_id][0]
if value != '':
params[param] = SubgroupDiscovery.parameter_types[param](value)
rules = SD_learner(**params)
return {'rules' : rules}
learner = SD_learner(**params)
classifier = learner(data)
return {'rules' : classifier.getRules(classValue), 'classifier' : learner}
def subgroup_bar_visualization(input_dict):
return {'rules' : None}
def subgroup_visualization(input_dict):
return input_dict
def subgroup_roc_visualization(input_dict):
return {'rules' : []}
\ No newline at end of file
......@@ -4,12 +4,43 @@ Subgroup discovery visualization views.
@author: Anze Vavpetic <anze.vavpetic@ijs.si>
'''
from django.shortcuts import render
import json
from SubgroupDiscovery.calcHull import calcRates
def subgroup_visualization(request, input_dict, output_dict, widget):
def subgroup_bar_visualization(request, input_dict, output_dict, widget):
'''
Subgroup visualizations.
Subgroup bar visualization.
@author: Anze Vavpeltic, 2012
@author: Anze Vavpetic, 2012
'''
sd_rules = input_dict['rules']
rules = sd_rules.rules
P, N = float(len(sd_rules.targetClassRule.TP)), float(len(sd_rules.targetClassRule.FP))
fpr = [-len(rule.FP)/N for rule in rules]
tpr = [len(rule.TP)/P for rule in rules]
subgroups = [rule.ruleToString() for rule in rules]
return render(request, 'visualizations/subgroup_bar_visualization.html', {
'widget' : widget,
'model_name' : sd_rules.algorithmName,
'fpr' : json.dumps(fpr),
'tpr' : json.dumps(tpr),
'subgroups' : json.dumps(subgroups)
})
return render(request, 'visualizations/subgroup_visualization.html', {})
\ No newline at end of file
def subgroup_roc_visualization(request, input_dict, output_dict, widget):
'''
Subgroup roc visualization.
@author: Anze Vavpetic, 2012
'''
roc_data = []
for i, sd_rules in enumerate(input_dict['rules']):
P, N = float(len(sd_rules.targetClassRule.TP)), float(len(sd_rules.targetClassRule.FP))
calcRates(sd_rules)
rule_points = [{'x' : len(rule.FP)/N, 'y' : len(rule.TP)/P, 'name' : rule.ruleToString()} for rule in sd_rules.rules]
roc_data.append({'type': 'line', 'name' : sd_rules.algorithmName, 'data' : zip(sd_rules.hullFPR, sd_rules.hullTPR)})
#roc_data.append({'type': 'scatter', 'name' : sd_rules.algorithmName, 'data' : rule_points})
return render(request, 'visualizations/subgroup_roc_visualization.html', {
'widget' : widget,
'roc_data' : json.dumps(roc_data),
})
\ No newline at end of file
......@@ -11,7 +11,7 @@
{% endfor %}
<input type="hidden" name="widget_id" value="{{widget.pk}}">
<label for="class{{widget.pk}}">Class</label>
<select id="class{{widget.pk}}">
<select id="class{{widget.pk}}" name="class{{widget.pk}}">
{% for class in classValues %}
<option value="{{class}}">{{class}}</option>
{% endfor %}
......
......@@ -23,7 +23,7 @@
<script type="text/javascript">
$(function () {
// Pretty radio buttons.
// Pretty tab buttons.
$("#weight_charts_tabs").tabs({
collapsible: true
});
......@@ -36,14 +36,7 @@
$(document).ready(function(){
plot();
});
// Handles radio button selection events.
$("form input[class=target_att]").change(function () {
var target_att = $("form input[class=target_att]:checked").val();
plot(target_att);
});
// Plots for the selected attribute.
function plot() {
......
<div id="widgetvisualization-{{widget.pk}}" rel="{{widget.pk}}" class="widgetvisualizationdialog" title="{{widget.name}} visualization" width="700px" height="670px">
<div style="width:620px; margin: 0 auto;">
<form>
<div id="bar_chart{{widget.pk}}" style="margin-bottom: 10px;">
</div>
<input type="hidden" name="widget_id" value="{{widget.pk}}"/>
</form>
<script type="text/javascript">
$(function () {
$(document).ready(function(){
plot();
});
// Plots for the selected attribute.
function plot() {
// Weights barchart
new Highcharts.Chart({
chart: {
renderTo: 'bar_chart{{widget.pk}}',
type: 'bar'
},
title: {
text: 'Subgroup BAR visualization'
},
subtitle: {
text: 'Model: {{ model_name }}'
},
xAxis: [{
categories: {{subgroups|safe}},
reversed: false
}],
yAxis: {
title: {
text: null
},
labels: {
formatter: function(){
return Math.abs(this.value);
}
},
min: -1.0,
max: 1.0
},
plotOptions: {
series: {
stacking: 'normal'
}
},
tooltip: {
formatter: function() {
return ''+
this.series.name +': '+ Math.abs(this.y.toFixed(3));
}
},
series: [{
name: 'False positive rate',
data: {{fpr|safe}}
}, {
name: 'True positive rate',
data: {{tpr|safe}}
}],
credits: {
enabled: false
}
});
}
});
</script>
</div>
</div>
\ No newline at end of file
<div id="widgetvisualization-{{widget.pk}}" rel="{{widget.pk}}" class="widgetvisualizationdialog" title="{{widget.name}} visualization" width="700px" height="670px">
<div style="width:620px; margin: 0 auto;">
<form>
<div id="roc_curve{{widget.pk}}" style="margin-bottom: 10px;">
</div>
<input type="hidden" name="widget_id" value="{{widget.pk}}"/>
</form>
<script type="text/javascript">
$(function () {
$(document).ready(function(){
plot();
});
// Plots for the selected attribute.
function plot() {
// Weights barchart
new Highcharts.Chart({
chart: {
renderTo: 'roc_curve{{widget.pk}}',
},
title: {
text: 'Subgroup ROC visualization'
},
xAxis: [{
title: {
text: 'FP rate (1-specificity)'
},
min: -0.01,
max: 1.0
}],
yAxis: {
title: {
text: 'TP rate (sensitivity)'
},
min: 0.0,
max: 1.0
},
series: {{roc_data|safe}},
credits: {
enabled: false
}
});
}
});
</script>
</div>
</div>
\ No newline at end of file
......@@ -2,6 +2,7 @@ from django.shortcuts import render
from django.http import Http404, HttpResponse
import nlp
from decision_support.visualization import *
from subgroup_discovery.visualization import *
def odt_to_tab(request,input_dict,output_dict,widget):
import Orange
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment