Commit 8431ad28 authored by Anze Vavpetic's avatar Anze Vavpetic
Browse files

removed some unnecessary files

parent 46a455c9
def test(input_dict):
print 'great success'
return input_dict
\ No newline at end of file
"""
<name>PMML Rule Parser</name>
<description>Parses the given PMML XML string and returns the rules as a SDRules object.</description>
<icon></icon>
<priority>100</priority>
<contact>Anze Vavpetic (anze.vavpetic@ijs.si)</contact>
"""
import sys
import orange
from SDRule import SDRule, SDRules
from Beam_SD import Beam_SD
import xml.dom.minidom as xml
from xml.dom import NotSupportedErr
from PyQt4.Qt import *
from PyQt4.QtGui import *
from PyQt4.QtCore import *
from PyQt4.Qwt5 import *
from webServices import guiTools
try:
from OWBaseWidget import OWBaseWidget
except ImportError:
OWBaseWidget = guiTools.importOWBaseWidget()
class OWParsePMMLRules(OWBaseWidget):
def __init__(self, parent=None, signalManager = None, name='ParsePMMLRules'):
super(OWParsePMMLRules, self).__init__(parent, signalManager, name)
self.inputs = [("Examples", orange.ExampleTable, self.getExamples), ("RulesAsPMML", str, self.getPMML)]
self.outputs = [("Subgroup Descriptions", SDRules)]
self.pmml = None
self.data = None
self.init()
def init(self):
self.gridlayout = QGridLayout(self)
self.label = QLabel()
self.label.setText("This widget has no parameters.")
self.gridlayout.addWidget(self.label, 0, 0)
def getExamples(self, data):
self.data = data
self.parse()
def getPMML(self, xml):
self.pmml = xml
self.parse()
def parse(self):
if not self.pmml or not self.data:
return
rules = self.toSDRules(self.pmml, self.data)
self.send('Subgroup Descriptions', rules)
def toSDRules(self, pmml, data):
'''
Converts the PMML XML to SDRules.
'''
rules = []
dom = xml.parseString(pmml)
alg = dom.getElementsByTagName('RuleSetModel')[0].getAttribute('algorithmName')
for node in dom.getElementsByTagName('SimpleRule'):
rules.append(self.parseRule(node, data))
targetClassRule = SDRule(data, rules[0].targetClass, conditions=[])
sdrules = SDRules(rules, targetClassRule, algorithmName=alg)
sdrules.name = alg
return sdrules
def parseRule(self, node, data):
'''
Handles the parsing of one rule element.
'''
# Check if the compound predicate uses the AND operator
compound = node.getElementsByTagName('CompoundPredicate')
if compound:
for pred in compound:
op = pred.getAttribute('booleanOperator')
if op.lower() != 'and':
raise NotSupportedErr('Operator %s is not supported.' % op)
targetClass = orange.Value(data.domain.classVar, str(node.getAttribute('score')))
conditions = []
for pred in node.getElementsByTagName('SimplePredicate'):
attribute = str(pred.getAttribute('field'))
attIdx = data.domain.index(attribute)
parseVal = str(pred.getAttribute('value'))
value = float(parseVal) if data.domain[attIdx].varType == orange.VarTypes.Continuous else parseVal
conditions.append(orange.ValueFilter_discrete(position = attIdx, values = [orange.Value(data.domain[attIdx], value)]))
return SDRule(data, targetClass, conditions=conditions)
if __name__ == '__main__':
data = orange.ExampleTable('lenses')
learner = Beam_SD(minSupport = 0.2, beamWidth = 5, g = 6)
targetClass= orange.Value(data.domain.classVar, "soft")
rules = learner(data, targetClass=targetClass, num_of_rules=3)
pmmlRules = rules.toPMML().getvalue()
a = QApplication(sys.argv)
w = OWParsePMMLRules()
w.getExamples(data)
w.getPMML(pmmlRules)
w.show()
a.exec_()
rules.printRules()
\ No newline at end of file
"""
<name>Subgroup BAR Vizualization</name>
<description>Subgroup vizualization with BAR representation</description>
<icon>icons/SubgroupBARVizualization.png</icon>
<priority>1010</priority>
"""
from OWWidget import *
import OWGUI
import sys
from Beam_SD import *
from PyQt4 import QtGui
class OWSubgroupBARVizualizer(OWWidget):
def __init__(self, parent = None, signalManager = None, name = "SubgroupBARVizualizer"):
OWWidget.__init__(self, parent, signalManager, name, TRUE)
self.inputs = [("Subgroup Descriptions", SDRules, self.vizualize)]
self.outputs = [("Subgroup Descriptions Subset", SDRules),("Examples", orange.ExampleTable)]
#######################
#control area
self.edtRules = QtGui.QPlainTextEdit(self.controlArea) # print rules in this multi line edit
self.controlArea.layout().addWidget(self.edtRules)
OWGUI.button(self.controlArea, self, "Send subset", callback=self.sendSubset)
#######################
# main area - start of content (right) area
self.tabs = OWGUI.tabWidget(self.mainArea) #tabs = QTabWidget(self.mainArea)
# list box BAR widget
self.lbBarRules = QtGui.QListWidget(self.tabs)
self.lbBarRules.setSelectionMode(2) #QAbstractItemView::MultiSelection
self.connect(self.lbBarRules, SIGNAL("selectionChanged()"), self.sendSubset)
OWGUI.createTabPage (self.tabs,"Vizualization BAR", widgetToAdd = self.lbBarRules, canScroll = True)
# description of the vizualization
#tab = QtGui.QGroupBox(self)
labeltxt = """
This widget provides a vizualization of subgroups in a
bar form. The first line shows the distribution of the
entire dataset. The green color symbolizes the number
of positive examples and the red color symbolizes the
number of negative examples. Each following line provides
a visualization of one subgroup. The green and red color
represent the part of positive and negative examples each
subgroup covers. The green and the red color together
represent the size of the subgroup.
It is possible to select a subset of the subgroups. That
could be an input to other vizualizations.
"""
labelDescription = QLabel ( labeltxt, self.tabs )
OWGUI.createTabPage (self.tabs,"Vizualization description", widgetToAdd = labelDescription, canScroll = True)
self.resize(700,400)
def vizualize(self, subgroups):
if subgroups:
# number of positive and negative examples
self.subgroups = subgroups
self.calcRates()
# call for vizualization
self.vizualizeBAR()
# debug only
self.edtRules.appendPlainText(("i\ttpr\tfpr\trule"))
for i in range(len(self.subgroups.FPR)):
#print ("%2d\t%0.2f\t%0.2f\t%s")%(i, self.TPR[i], self.FPR[i],self.subgroups.rules[i].ruleToString() )
self.edtRules.appendPlainText(("%2d\t%0.2f\t%0.2f\t%s")%(i, self.subgroups.TPR[i], self.subgroups.FPR[i],self.subgroups.rules[i].ruleToString() ) )
else:
self.lbBarRules.clear()
def calcRates(self):
self.subgroups.sortByConf() # sort by confidence
self.subgroups.TPR = []
self.subgroups.FPR = []
self.P = len(self.subgroups.targetClassRule.TP) * 1.0 # number of all positive examples as a float
self.N = len(self.subgroups.targetClassRule.FP) * 1.0 # number of all negative examples as a float
for rule in self.subgroups.rules:
self.subgroups.TPR.append( len(rule.TP) / self.P ) # true positive rate for this rule
self.subgroups.FPR.append( len(rule.FP) / self.N ) # false positive example for this rule
def vizualizeBAR(self):
#the default rule
self.lbBarRules.clear()
self.lbBarRules.setIconSize(QSize(200,20))
pix = self.createPixMap(1.0, 1.0)
# self.lbBarRules.addItem(pix,self.subgroups.targetClassRule.ruleToString())
icon = QtGui.QIcon(pix)
QtGui.QListWidgetItem ( icon, self.subgroups.targetClassRule.ruleToString(), self.lbBarRules)
# other rules
for i in range(len(self.subgroups.rules)):
pix = self.createPixMap(self.subgroups.TPR[i], self.subgroups.FPR[i])
icon = QtGui.QIcon(pix)
QtGui.QListWidgetItem ( icon, self.subgroups.rules[i].ruleToString(), self.lbBarRules )
def createPixMap(self, tpr, fpr):
w = 200
h=20
str = QString ("tmp")
pix = QPixmap ( w, h+1)
p = QPainter ( pix )
p.fillRect ( 0, 0, w, h, QBrush(QColor(240,240,245)) )
#line in the middle
p.setPen( QColor(155,155,155))
sredina = int( self.N /(self.N + self.P) * (w-1))
p.drawLine ( sredina, 0 , sredina, h )
w1= int(fpr*sredina) #red
p.fillRect ( sredina - w1, 0, w1, h , QBrush(QColor(255, 55,55)) )
str.setNum(fpr,'f',2)
p.drawText ( sredina /2-7, 15, str )
w1= int(tpr*(w-sredina)) #green
p.fillRect ( sredina+1, 0, w1, h , QBrush(QColor(5, 225, 55)) )
str.setNum(tpr,'f',2)
p.drawText (sredina+(w-sredina)/2-7, 15, str )
p.end()
return pix
def sendSubset(self):
list = []
for i in range(1, self.lbBarRules.count() ):
if self.lbBarRules.item(i).isSelected():
list.append(i-1)
self.edtRules.appendPlainText(str(i-1))
if len(list)==0:
ex = None
else:
tmp = self.subgroups.rules[list[0]].examples
for i in range (1, len(list)):
tmp.extend(self.subgroups.rules[list[i]].examples)
ex = orange.ExampleTable(tmp)
ex.removeDuplicates()
subset = self.subgroups.makeSelection(list)
subset.name = "%s subset"%(self.subgroups)
## print "___________________"
## if ex:
## for e in ex:
## print e
self.send("Subgroup Descriptions Subset", subset)
self.send("Examples", ex)
self.edtRules.appendPlainText("send")
if __name__=="__main__":
appl = QApplication(sys.argv)
ow = OWSubgroupBARVizualizer()
#appl.setMainWidget(ow)
ow.show()
filename = "..\\..\\doc\\datasets\\lenses.tab"
if 'linux' in sys.platform:
filename= "/usr/doc/orange/datasets/lenses.tab"
dataset = orange.ExampleTable(filename)
learner = Beam_SD( minSupport = 0.2, beamWidth = 5, g = 1)
rules = learner (dataset , targetClass= "soft", num_of_rules=5)
ow.vizualize(rules)
appl.exec_()
"""
<name>Build Subgroups</name>
<description>Runs subgroup discovery algorithms</description>
<icon>icons/SubgroupBuilder.png</icon>
<priority>100</priority>
"""
#
# OWSubgroupBuilder.py
#
from OWWidget import *
import OWGUI
import sys
import os
from SD_learner_classifier import *
###########################################################################################################
###########################################################################################################
class OWSubgroupBuilder(OWWidget):
settingsList = [ "algorithm", "min_support", "g", "beam_width", "min_conf", "k", "max_rules"]
def __init__(self, parent=None, signalManager = None, name='SubgroupBuilder'):
OWWidget.__init__(self, parent, signalManager, name)
self.inputs = [("Examples", ExampleTable, self.cdata)]
self.outputs = [("Subgroup Descriptions", SDRules), ("Learner", orange.Learner),("Classifier", orange.Classifier) ]
# Settings
algorithms = ["SD", "SD-Preprocess", "Apriori-SD", "CN2-SD"]
self.algorithm = algorithms[0]
self.min_support = 5;
self.g = 5
self.beam_width = 20
self.min_conf = 80
self.k = 5
self.name = "Subgroup SD"
self.max_rules = 0
self.classes = ["No data"]
self.data = None
self.loadSettings()
# GUI - controlArea (left)
self.learnerName = OWGUI.lineEdit(self.controlArea, self, "name", tooltip="Name to be used by other widgets to identify the learner/classifier", box="Learner/classifier name")
box = OWGUI.widgetBox(self.controlArea, "Options", addSpace = True)
OWGUI.comboBox(box, self, "algorithm", box=None, label="Algorithm", labelWidth=None, orientation='horizontal', items=algorithms, tooltip="Choose an algorithem", sendSelectedValue = 1, callback = self.algorithmChanged)
self.spin_support = OWGUI.spin(box, self, "min_support", 0, 100, 1, None, "Minimal support [%] ",labelWidth=200, orientation="horizontal", callback=self.settingChanged)
self.spin_g = OWGUI.spin(box, self, "g", 1, 1000, 1, None, "Generalization parameter ",labelWidth=200, orientation="horizontal",callback=self.settingChanged)
self.spin_beam_width = OWGUI.spin(box, self, "beam_width", 3, 100, 1, None, "Beam width ",labelWidth=200, orientation="horizontal",callback=self.settingChanged)
self.spin_conf = OWGUI.spin(box, self, "min_conf", 0, 100, 1, None, "Minimal confidence [%]",labelWidth=200, orientation="horizontal",callback=self.settingChanged)
self.spin_k = OWGUI.spin(box, self, "k", 3, 100, 1, None, "k - num of times covered before removed ", labelWidth=200,orientation="horizontal",callback=self.settingChanged)
self.spin_max_rules = OWGUI.spin(box, self, "max_rules", 0, 20, 1, None, "Max. number of subgroups (0=no limitations) ", labelWidth=200,orientation="horizontal",callback=self.settingChanged)
self.algorithmChanged()
OWGUI.separator(self.controlArea)
#target Class
box = OWGUI.widgetBox(self.controlArea, "Target class", addSpace = True)
self.listTargetValue = OWGUI.comboBox(box, self, "classes", box=None, label="Target class", labelWidth=None, orientation='horizontal', items=self.classes, tooltip="Choose the target class", sendSelectedValue = 1, callback = self.targetClassChanged)
#self.lblTargetValue = QLabel("Target value:", self.controlArea)
#self.listTargetValue = QListWidget( self.controlArea )
#self.listTargetValue.addItem("No data")
#self.connect(self.listTargetValue, SIGNAL("selectionChanged()"), self.targetClassChanged)
self.listTargetValue.blockSignals (true) # block signals until trehe are no rules to send
OWGUI.separator(self.controlArea)
OWGUI.button(self.controlArea, self, "Build subgroups", callback=self.build)
OWGUI.button(self.controlArea, self, "Save ruleset to file in PMML format ...", callback=self.printToPmml)
self.learner = SD_learner(name=self.name, algorithm=self.algorithm ,minSupport = self.min_support/100.0, minConfidence=self.min_conf/100.0 , beamWidth = self.beam_width, g = self.g, k=self.k, max_rules =self.max_rules)
self.classifier = None
self.subgroups = None
# GUI - main area (right)
self.edtRules = QTextEdit(self.mainArea) # the reules are printed in this text edit
self.mainArea.layout().addWidget(self.edtRules)
self.edtRules.setReadOnly(TRUE)
OWGUI.rubber(box)
self.resize(700,450)
def algorithmChanged(self):
"""Set which setting are enabled and disabled for each algorithm and set learner."""
if self.algorithm == "SD" or self.algorithm == "SD-Preprocess":
self.spin_support.setDisabled(0)
self.spin_g.setDisabled(0)
self.spin_beam_width.setDisabled(0)
self.spin_conf.setDisabled(1)
self.spin_k.setDisabled(1)
elif self.algorithm == "Apriori-SD":
self.spin_support.setDisabled(0)
self.spin_g.setDisabled(1)
self.spin_beam_width.setDisabled(1)
self.spin_conf.setDisabled(0)
self.spin_k.setDisabled(0)
else:
self.spin_support.setDisabled(1)
self.spin_g.setDisabled(1)
self.spin_beam_width.setDisabled(1)
self.spin_conf.setDisabled(1)
self.spin_k.setDisabled(0)
self.settingChanged()
def settingChanged(self):
self.learner = SD_learner(name=self.name, algorithm=self.algorithm ,minSupport = self.min_support/100.0, minConfidence=self.min_conf/100.0 , beamWidth = self.beam_width, g = self.g, k=self.k, max_rules =self.max_rules)
self.learner.name = self.name
self.send("Learner", self.learner)
def cdata(self, dataset):
self.classifier = None
self.subgroups = None
self.send("Subgroup Descriptions", self.subgroups)
self.send("Classifier", self.classifier)
self.listTargetValue.clear()
self.data = self.isDataWithClass(dataset, orange.VarTypes.Discrete) and dataset or None
if self.data:
#fill combo box with all possible target class values
self.data = dataset
valueObj= self.data.domain.classVar.firstvalue()
while valueObj:
self.listTargetValue.addItem(valueObj.value)
valueObj = self.data.domain.classVar.nextvalue(valueObj)
self.listTargetValue.setCurrentIndex(0)
else:
self.listTargetValue.addItem('No data')
self.listTargetValue.blockSignals (true)
def build(self):
if not self.data:
return
self.classifier = self.learner(self.data)
self.edtRules.append(" %s %s supp=%d conf=%d bw=%0.2f g=%d k=%d rules=%d"%(self.name, self.algorithm , self.min_support, self.min_conf , self.beam_width, self.g, self.k, self.max_rules))
# send classifier
self.classifier.name = self.name
self.send("Classifier", self.classifier)
self.listTargetValue.blockSignals ( false) # enable signals, enable sending rules to other widgets
if self.listTargetValue.currentIndex() <> -1:
self.subgroups = self.classifier.getRules(targetClass= str(self.listTargetValue.currentText() ))
self.subgroups.name = self.name
self.send("Subgroup Descriptions", self.subgroups)
# debug
for rule in self.subgroups.rules:
self.edtRules.append(rule.ruleToString())
self.edtRules.append("----------------------------------")
def printToPmml(self):
if self.classifier == None:
self.build()
self.browseFile()
outfile = open(self.filename, 'w')
self.classifier.toPMML(outfile)
outfile.close()
def browseFile(self):
tmpfilename = str(QFileDialog.getSaveFileName(self, 'Save rules as XML', os.getcwd(), 'XML file (*.xml)'))
if os.path.splitext(tmpfilename)[1].lower() != '.xml':
tmpfilename += '.xml'
self.filename = tmpfilename
def targetClassChanged(self):
if self.classifier:
self.subgroups = self.classifier.getRules(targetClass= str(self.listTargetValue.currentText()))
self.edtRules.append(self.listTargetValue.currentText())
self.subgroups.name = self.name
self.send("Subgroup Descriptions", self.subgroups)
# debug
self.edtRules.append("target class changed")
if self.subgroups:
for rule in self.subgroups.rules:
self.edtRules.append(rule.ruleToString())
self.edtRules.append("----------------------------------")
if __name__=="__main__":
appl = QApplication(sys.argv)
ow = OWSubgroupBuilder()
#appl.setMainWidget(ow)
ow.show()
filename = "..\\..\\doc\\datasets\\lenses.tab"
if 'linux' in sys.platform:
filename= "/usr/doc/orange/datasets/lenses.tab"
dataset = orange.ExampleTable(filename)
ow.cdata(dataset)
ow.saveSettings()
appl.exec_()
"""
<name>Subgroup Evaluation</name>
<description>Evaluates subgroup discovery algorithms with evaluation measures.</description>
<icon>icons/SubgroupEvaluation.png</icon>
<priority>200</priority>
"""
#
# OWSubgroupEvaluation.py
#
from PyQt4 import QtGui
from OWWidget import *
import OWGUI
from SD_learner_classifier import *
from math import log
from Beam_SD import *
from Apriori_SD import *
from CN2_SD import *
###########################################################################################################
###########################################################################################################
class OWSubgroupEvaluation(OWWidget):
settingsList = [ "nFolds", "stdevBtnStatus"]
eval_measures = [ ('Average coverage', ' avgCOV ', 'Coverage'),
('Target support', ' SUP ', 'Support'),
('Average ruleset size', ' SIZE ', 'Size'),
('Average complexity', ' COMPLEX ', 'Complexity'),
('Average rule significance',' SIG ', 'Significance'),
('Average rule unusualness', ' avgWRACC ', 'Unusualness'),
('Classification accuracy', ' CA ', 'Classification accuracy'),
('Area under ROC', ' AUC ', 'AUC') ]
def __init__(self, parent = None, signalManager = None):
OWWidget.__init__(self, parent, signalManager, 'SubgroupEvaluation')
# defining widget input ===========================================================================
self.inputs = [('Data', ExampleTable, self.setData, Default), ('Learner', orange.Learner, self.setLearner, Multiple)]
# Settings =======================================================================================
self.nFolds = 10 # cross validation folds
self.stdevBtnStatus = 0
self.usedMeasure = [1]*len(self.eval_measures)
# self.coverage = 1
# self.support = 1
# self.size = 1
# self.complexity = 1
# self.significance = 1
# self.unusualness = 1
# self.classification_accuracy = 1
# self.auc = 1