Commit 8057a4b4 authored by matjaz's avatar matjaz

Merge remote-tracking branch 'remotes/origin/dev' into dev_package_independance

parents c523cbdb 107f04ed
......@@ -12,6 +12,7 @@
.project
.settings
atlassian-ide-plugin.xml
*.sublime-*
# Project
/mothra/local_settings.py
......
'''
Bioinformatics interaction viewes.
@author: Anze Vavpetic <anze.vavpetic@ijs.si>
'''
from django.shortcuts import render
def insilico_search(request, input_dict, output_dict, widget):
#TODOl
return render(request, 'interactions/insilico_search.html', {'widget':widget})
'''
Bioinformatics library.
@author: Anze Vavpetic <anze.vavpetic@ijs.si>
'''
def insilico_search(input_dict):
return input_dict
def insilico_finished(input_dict):
#TODO
return input_dict
\ No newline at end of file
def test(input_dict):
print 'great success'
return input_dict
\ No newline at end of file
'''
Decision support interaction views.
@author: Anze Vavpetic <anze.vavpetic@ijs.si>
'''
from django.shortcuts import render
......
'''
Decision support library functions.
@author: Anze Vavpetic <anze.vavpetic@ijs.si>
'''
def kepner_tregoe(input_dict):
......
'''
Decision support visualization views.
@author: Anze Vavpetic <anze.vavpetic@ijs.si>
'''
from django.shortcuts import render
import json
......@@ -8,7 +10,7 @@ def sensitivity_analysis_viewer(request, input_dict, output_dict, widget):
'''
Computes the sensitivity analysis graph.
@author: Anze Vavpeltic, 2012
@author: Anze Vavpetic, 2012
'''
model = input_dict['model']
attributes = [att.name for att in input_dict['model'].data.domain.features]
......@@ -41,7 +43,7 @@ def ds_charts_viewer(request, input_dict, output_dict, widget):
'''
Decision support visualization.
@author: Anze Vavpeltic, 2012
@author: Anze Vavpetic, 2012
'''
model = input_dict['model']
norm_data = model()
......
import sys
from django.shortcuts import render
from decision_support.interaction import *
from subgroup_discovery.interaction import *
from workflows import packageLibImporter
def setattr_local(name, value, package):
......@@ -71,20 +72,6 @@ def select_data(request, input_dict, output_dict, widget):
sorted_attrs = sorted(attrs.items())
input_dict = {'data': data, 'attrs':attrs, 'sorted_attrs':sorted_attrs}
return render(request, 'interactions/select_data.html',{'widget':widget, 'input_dict':input_dict})
def build_subgroups(request, input_dict, output_dict, widget):
import Orange
data = Orange.data.Table(input_dict['data'])
class_values = []
for v in data.domain.class_var.values:
class_values.append(v)
target = {'name':data.domain.class_var.name, 'values':class_values}
return render(request, 'interactions/build_subgroups.html', {'widget':widget, 'data':data, 'target':target})
def alter_table(request, input_dict, output_dict, widget):
from visualization_views import orng_table_to_dict
......
......@@ -4,6 +4,7 @@ import cPickle
import json
import sys
from decision_support.library import *
from subgroup_discovery.library import *
from workflows import packageLibImporter
def setattr_local(name, value, package):
......
......@@ -33,7 +33,7 @@ class Command(BaseCommand):
result = self.export_package_string(self.stdout.write, args[1:], options['newuid'], options['all'], int(options['verbosity']))
try:
f.write(result)
f.write(result.encode('utf-8'))
except:
raise CommandError('There was a problem with writing to the given output file')
......@@ -136,4 +136,4 @@ class Command(BaseCommand):
objs.extend(inp.options.all())
objs.extend(wid.outputs.all())
return objs
\ No newline at end of file
return objs
......@@ -233,3 +233,7 @@ html[xmlns] .dataTables_wrapper {
.dataTables_filter input {
margin-bottom: 5px;
}
.selected_subgroup {
font-weight: bold;
font-size: 30pt;
}
\ No newline at end of file
import orange
import sys
from SDRule import *
true = 1
false = 0
class Apriori_SD:
def __init__(self, minSupport = 0.05, minConfidence = 0.8, k=3):
self.minSup = minSupport
self.minConf = minConfidence
self.weightID = orange.newmetaid()
self.k = k
def __call__(self, data, targetClass, max_rules=0):
'''Returns the Apriori-C classifier.'''
data_discretized = False
# If any of the attributes are continuous, discretize them
if data.domain.hasContinuousAttributes():
original_data = data
data_discretized = True
new_domain = []
discretize = orange.EntropyDiscretization(forceAttribute=True)
for attribute in data.domain.attributes:
if attribute.varType == orange.VarTypes.Continuous:
d_attribute = discretize(attribute, data)
# An attribute is irrelevant, if it is discretized into a single interval
# if len(d_attribute.getValueFrom.transformer.points) > 0:
new_domain.append(d_attribute)
else:
new_domain.append(attribute)
data = original_data.select(new_domain + [original_data.domain.classVar])
self.data = data
self.rulesSD = []
# build association classification rules
rules = orange.AssociationRulesInducer(data, support = self.minSup, classificationRules = 1, maxItemSets =10000000 )
#_______________________________ post-processing step 1
# select rules that classify in the target class
right= orange.Example(data.domain,[orange.Value(orange.VarTypes.Discrete, orange.ValueTypes.DK)]*len(data.domain))
right.setclass(targetClass)
rules = rules.filter(lambda rule: rule.right == right)
# select rules with confidence >= minConfidence
rules = rules.filter(lambda rule: rule.confidence >= self.minConf)
#________________________________ post processing step 2
# weighted covering
self.data.addMetaAttribute(self.weightID) # set weights of all examples to 1
bestRuleWRacc = 100
while len(rules)>0 and self.uncoveredExamples()>0 and bestRuleWRacc > 0 and (max_rules==0 or len(self.rulesSD)<max_rules):
(bestRule,bestRuleWRacc)= self.findBestRule(rules)
rules.remove(bestRule)
self.removeSimilarRules(bestRule, rules)
self.decreaseExampleWeights(bestRule)
self.rulesSD.append(bestRule)
#____________________________ transform rules to SD format
beam = []
targetClassRule = SDRule(data, targetClass, conditions=[], g =1)
for r in self.rulesSD:
cond = []
for i in range(len(r.left)):
if not orange.Value.is_DC(r.left[i]):
cond.append(orange.ValueFilter_discrete(
position = i,
values = [orange.Value(data.domain.attributes[i], r.left[i])]))
rSD = SDRule(data, targetClass, cond)
beam.append(rSD)
if data_discretized:
targetClassRule = SDRule(original_data, targetClass, conditions=[], g=1)
# change beam so the rules apply to original data
beam = [rule.getUndiscretized(original_data) for rule in beam]
else:
targetClassRule = SDRule(data, targetClass, conditions=[], g =1)
return SDRules(beam, targetClassRule,"Apriori-SD")
def removeSimilarRules(self, rule , rules):
for r in rules[:]:
if self.areSimilar(r,rule):
rules.remove(r)
def areSimilar(self, rule1, rule2):
b1 = b2 = false
if rule1.right == rule2.right: # classify in the same class
b1 = b2 = true
for i in range(len(rule1.left)):
if not(rule1.left[i].is_DC() or rule1.left[i] == rule2.left[i] ):
b1 = false
if not(rule2.left[i].is_DC() or rule1.left[i] == rule2.left[i] ):
b2 = false
return (b1 or b2)
def findBestRule(self,rules): # rules should not be empty
bestRule = rules[0]
bestRuleWRacc = self.wWRAccImp(bestRule)
for r in rules:
tmp = self.wWRAccImp(r)
if tmp > bestRuleWRacc:
bestRuleWRacc = tmp
bestRule = r
return (bestRule,bestRuleWRacc)
def uncoveredExamples(self):
"""Returns the number of examples that have not been covered more than k times."""
return len(map(lambda d: d.getweight(self.weightID) <= self.k, self.data))
def decreaseExampleWeights(self, rule):
for d in self.data:
if (rule.appliesBoth(d)):
d.setweight(self.weightID, d.getweight(self.weightID) + 1)
def wWRAccImp(self, rule):
N = len(self.data)
ny = rule.nAppliesRight
N1 = n1x = n1xy = 0
for d in self.data:
if d.getweight(self.weightID) <= self.k:
tmp = 1 / (1 + d.getweight(self.weightID) )
N1 += tmp
if rule.appliesLeft(d):
n1x += tmp
if rule.appliesBoth(d):
n1xy += tmp
return n1xy/N1 - ny * n1x /(N1 * N)
if __name__=="__main__":
filename = "..\\..\\doc\\datasets\\lenses.tab"
if 'linux' in sys.platform:
filename= "/usr/doc/orange/datasets/lenses.tab"
data = orange.ExampleTable(filename)
print
learner = Apriori_SD(minSupport = 0.3, minConfidence = 0.3, k=4)
targetClass= orange.Value(data.domain.classVar, "none")
rules = learner(data,targetClass,0)
rules.printRules()
import orange
import sys
from SDRule import *
true = 1
false = 0
class Beam_SD:
def __init__(self, minSupport = 0.2, beamWidth = 5, g = 1, **kwds):
self.minSupport = minSupport
self.beamWidth = beamWidth
self.g = g
def __call__(self, data, targetClass, num_of_rules ):
if self.dataOK(data): # Checks weather targetClass is discrete
data_discretized = False
# If any of the attributes are continuous, discretize them
if data.domain.hasContinuousAttributes():
original_data = data
data_discretized = True
new_domain = []
discretize = orange.EntropyDiscretization(forceAttribute=True)
for attribute in data.domain.attributes:
if attribute.varType == orange.VarTypes.Continuous:
d_attribute = discretize(attribute, data)
# An attribute is irrelevant, if it is discretized into a single interval
# if len(d_attribute.getValueFrom.transformer.points) > 0:
new_domain.append(d_attribute)
else:
new_domain.append(attribute)
data = original_data.select(new_domain + [original_data.domain.classVar])
# initialization of beams
beam = [SDRule(data=data, targetClass=targetClass, g=self.g)] * self.beamWidth
newBeam = [SDRule(data=data, targetClass=targetClass, g=self.g)] * self.beamWidth
worstRuleIndex = 0
improvements = true
while improvements:
improvements = false
for rule in beam:
for attr in data.domain.attributes:
value = attr.firstvalue()
while(value):
newRule = rule.cloneAndAddCondition(attr,value)
if newRule.support > self.minSupport and self.betterThanWorstRule(newRule, newBeam, worstRuleIndex) and self.isRelevant(newRule, newBeam):
worstRuleIndex = self.replaceWorstRule(newRule, newBeam, worstRuleIndex)
improvements = true
value = attr.nextvalue(value)
beam = newBeam
# perform rule subset selection
if num_of_rules != 0:
beam = self.ruleSubsetSelection(beam, num_of_rules, data)
if data_discretized:
targetClassRule = SDRule(original_data, targetClass, conditions=[], g=1)
# change beam so the rules apply to original data
beam = [rule.getUndiscretized(original_data) for rule in beam]
else:
targetClassRule = SDRule(data, targetClass, conditions=[], g =1)
return SDRules(beam, targetClassRule, "SD")
def isRelevant(self, newRule, beam):
for rule in beam:
if newRule.isIrrelevant(rule):
return false
return true
def betterThanWorstRule(self, newRule, beam, worstRuleIndex):
if newRule.quality > beam[worstRuleIndex].quality: # better quality
return true
elif newRule.quality == beam[worstRuleIndex].quality and newRule.complexity < beam[worstRuleIndex].complexity: # same quality and smaller complexity
return true
else:
return false
def replaceWorstRule(self, rule, beam, worstRuleIndex):
beam[worstRuleIndex] = rule
wri = 0
for i in range(len(beam)):
if beam[i].quality < beam[wri].quality:
wri = i
return wri
def dataOK(self, data):
# if data.domain.hasContinuousAttributes():
# print "All attributes must be discrete."
# return false
if data.domain.classVar.varType != orange.VarTypes.Discrete:
print "Target Variable must be discrete"%(attr.name)
return false
return true
def ruleSubsetSelection(self, beam, num_of_rules, data):
SS = []
c = orange.newmetaid()
data.addMetaAttribute(c) #initialize to 1
if num_of_rules <= len(beam):
for i in range(num_of_rules):
best_score = 0
best_rule_index = 0
for i in range(len(beam)):
score = 0
for d in data: # calculate sum of weights of examples
if beam[i].filter(d):
score += 1.0/d.getweight(c)
if score>best_score:
best_score = score
best_rule_index = i
for d in data: # increase exampe counter
if beam[best_rule_index].filter(d):
d.setweight(c, d.getweight(c)+1)
SS.append(beam[best_rule_index])
del beam[best_rule_index]
data.removeMetaAttribute(c)
return SS
#___________________________________________________________________________________
if __name__=="__main__":
filename = "..\\..\\doc\\datasets\\lenses.tab"
if 'linux' in sys.platform:
filename= "/usr/doc/orange/datasets/lenses.tab"
data = orange.ExampleTable(filename)
learner = Beam_SD( minSupport = 0.2, beamWidth = 5, g = 6)
targetClass= orange.Value(data.domain.classVar, "soft")
rules = learner (data , targetClass=targetClass, num_of_rules=3)
rules.printRules()
import orange
import sys
from SDRule import *
import Preprocessor
true = 1
false = 0
class Beam_SD_preprocessed:
def __init__(self, minSupport = 0.2, beamWidth = 5, g = 1, **kwds):
self.minSupport = minSupport
self.beamWidth = beamWidth
self.g = g
def __call__(self, data, targetClass, num_of_rules ):
if self.dataOK(data): # Checks weather targetClass is discrete
original_data = data
data = Preprocessor.generateFeatures(data, targetClass)
# initialization of beams
beam = [SDRule(data=data, targetClass=targetClass, g=self.g)] * self.beamWidth
newBeam = [SDRule(data=data, targetClass=targetClass, g=self.g)] * self.beamWidth
worstRuleIndex = 0
improvements = true
while improvements:
improvements = false
for rule in beam:
# for f in features:
for feature in data.domain.attributes:
newRule = rule.cloneAndAddCondition(feature, 'True')
if newRule.support > self.minSupport and self.betterThanWorstRule(newRule, newBeam, worstRuleIndex) and self.isRelevant(newRule, newBeam):
worstRuleIndex = self.replaceWorstRule(newRule, newBeam, worstRuleIndex)
improvements = true
beam = newBeam
# perform rule subset selection
if num_of_rules != 0:
beam = self.ruleSubsetSelection(beam, num_of_rules, data)
targetClassRule = SDRule(original_data, targetClass, conditions=[], g=1)
# change beam so the rules apply to original data
fixedBeam = [rule.getFixed(original_data) for rule in beam]
return SDRules(fixedBeam, targetClassRule)
def isRelevant(self, newRule, beam):
for rule in beam:
if newRule.isIrrelevant(rule):
return false
return true
def betterThanWorstRule(self, newRule, beam, worstRuleIndex):
if newRule.quality > beam[worstRuleIndex].quality: # better quality
return true
elif newRule.quality == beam[worstRuleIndex].quality and newRule.complexity < beam[worstRuleIndex].complexity: # same quality and smaller complexity
return true
else:
return false
def replaceWorstRule(self, rule, beam, worstRuleIndex):
beam[worstRuleIndex] = rule
wri = 0
for i in range(len(beam)):
if beam[i].quality < beam[wri].quality:
wri = i
return wri
def dataOK(self, data):
# if data.domain.hasContinuousAttributes():
# print "All attributes must be discrete."
# return false
if data.domain.classVar.varType != orange.VarTypes.Discrete:
print "Target Variable must be discrete"%(attr.name)
return false
return true
def ruleSubsetSelection(self, beam, num_of_rules, data):
SS = []
c = orange.newmetaid()
data.addMetaAttribute(c) #initialize to 1
if num_of_rules <= len(beam):
for i in range(num_of_rules):
best_score = 0
best_rule_index = 0
for i in range(len(beam)):
score = 0
for d in data: # calculate sum of weights of examples
if beam[i].filter(d):
score += 1.0/d.getweight(c)
if score>best_score:
best_score = score
best_rule_index = i
for d in data: # increase exampe counter
if beam[best_rule_index].filter(d):
d.setweight(c, d.getweight(c)+1)
SS.append(beam[best_rule_index])
del beam[best_rule_index]
data.removeMetaAttribute(c)
return SS
#___________________________________________________________________________________
if __name__=="__main__":
filename = "..\\..\\doc\\datasets\\lenses.tab"
if 'linux' in sys.platform:
filename= "/usr/doc/orange/datasets/lenses.tab"
data = orange.ExampleTable(filename)
learner = Beam_SD_preprocessed( minSupport = 0.2, beamWidth = 5, g = 6)
targetClass= orange.Value(data.domain.classVar, "soft")
beam = learner (data , targetClass=targetClass, num_of_rules=3)
beam.printRules()
import orange
import sys
from SDRule import *
true = 1
false = 0
class CN2_SD:
def __init__(self,k):
self.k = k
self.counter = orange.newmetaid()
self.weightID = orange.newmetaid()
self.rbf = orange.RuleBeamFinder()
self.rbf.evaluator = RuleEvaluator_WRAcc()
def __call__(self, data, targetClass, num_of_rules=0):
'''Returns CN2-SD rules by performing weighted covering algorithm.'''
data_discretized = False
# If any of the attributes are continuous, discretize them
if data.domain.hasContinuousAttributes():
original_data = data
data_discretized = True
new_domain = []
discretize = orange.EntropyDiscretization(forceAttribute=True)
for attribute in data.domain.attributes:
if attribute.varType == orange.VarTypes.Continuous:
d_attribute = discretize(attribute, data)
# An attribute is irrelevant, if it is discretized into a single interval
# if len(d_attribute.getValueFrom.transformer.points) > 0:
new_domain.append(d_attribute)
else:
new_domain.append(attribute)
data = original_data.select(new_domain + [original_data.domain.classVar])
self.data = data
self.max_rules = num_of_rules
rules = []
tc = orange.Value(data.domain.classVar, targetClass)
# weighted covering
self.data.addMetaAttribute(self.weightID) # set weights of all examples to 1
self.data.addMetaAttribute(self.counter) # set counters of all examples to 0
targetClassRule = SDRule(data, targetClass, conditions=[], g =1)