Commit 644470e6 authored by bogdan's avatar bogdan

Merge branch 'dev' of workflow.ijs.si:mothra into dev

parents f4df0be6 85f52434
This diff is collapsed.
from django.shortcuts import render
import json
def cforange_filter_integers(request,input_dict,output_dict,widget):
return render(request, 'interactions/cforange_filter_integers.html',{'widget':widget,'intList':input_dict['intList']})
\ No newline at end of file
return render(request, 'interactions/cforange_filter_integers.html',{'widget':widget,'intList':input_dict['intList']})
def cforange_hierarchical_clustering(request,input_dict,output_dict,widget):
import orange
from library import Clustering
matrix = input_dict['dm']
linkage = int(input_dict['linkage'])
root = Clustering.hierarchical_clustering(linkage, matrix)
dm_examples = True
try:
attributes = [x.name for x in matrix.items.domain]
except:
attributes = ['attribute']
dm_examples = False
def build_hierarchy(node, root=False):
if dm_examples:
values_dict = dict([(x,matrix.items[node.first][x].value) for x in attributes]) if not node.branches else {}
else:
values_dict = dict([(x,matrix.items[node.first].name) for x in attributes]) if not node.branches else {}
for attribute in values_dict.keys():
if type(values_dict[attribute]) == float:
values_dict[attribute]="%.3f" % values_dict[attribute]
return {
'name' : 'root' if root else '',
'id' : node.first if not node.branches else -1,
'height' : node.height if node.branches else 0,
'children' : [build_hierarchy(node.left), build_hierarchy(node.right)] if node.branches else [],
'values' : values_dict,
'leaf' : True if not node.branches else False
}
hierarchy = json.dumps(build_hierarchy(root, root=True))
return render(request, 'interactions/cforange_hierarchical_clustering.html', {'widget' : widget, 'hierarchy' : hierarchy, 'attributes':attributes,'vizualization':input_dict['visualization']})
......@@ -169,7 +169,6 @@ def cforange_confusion_matrix(input_dict):
cm = orngStat.confusionMatrices(results,classIndex=classIndex)
if len(cm)==1:
cm = cm[0]
print cm
output_dict = {}
output_dict['cm']=cm
return output_dict
......@@ -282,4 +281,160 @@ def cforange_prepare_results(input_dict):
newdict['fscore']=input_dict['f'][i]
newlist.append(newdict)
output_dict['alp']=newlist
return output_dict
\ No newline at end of file
return output_dict
def cforange_example_distance(input_dict):
import orange
import random
import orngClustering
import orngMisc
inputdata = input_dict['dataset']
metricsIndex = int(input_dict['distanceMetrics'])
metrics = [
("Euclidean", orange.ExamplesDistanceConstructor_Euclidean),
("Pearson Correlation", orngClustering.ExamplesDistanceConstructor_PearsonR),
("Spearman Rank Correlation", orngClustering.ExamplesDistanceConstructor_SpearmanR),
("Manhattan", orange.ExamplesDistanceConstructor_Manhattan),
("Hamming", orange.ExamplesDistanceConstructor_Hamming),
("Relief", orange.ExamplesDistanceConstructor_Relief),
]
normalize = input_dict['normalization']
if normalize=='true':
normalize = True
else:
normalize = False
data = inputdata
constructor = metrics[metricsIndex][1]()
constructor.normalize = normalize
dist = constructor(data)
matrix = orange.SymMatrix(len(data))
matrix.setattr('items', data)
for i in range(len(data)):
for j in range(i+1):
matrix[i, j] = dist(data[i], data[j])
output_dict = {}
output_dict['dm']=matrix
return output_dict
def cforange_attribute_distance(input_dict):
import orange
import orngInteract
inputdata = input_dict['dataset']
discretizedData = None
classInteractions = int(input_dict['classInteractions'])
atts = inputdata.domain.attributes
if len(atts) < 2:
return None
matrix = orange.SymMatrix(len(atts))
matrix.setattr('items', atts)
if classInteractions < 3:
if inputdata.domain.hasContinuousAttributes():
if discretizedData is None:
try:
discretizedData = orange.Preprocessor_discretize(inputdata, method=orange.EquiNDiscretization(numberOfIntervals=4))
except orange.KernelException, ex:
return None
data = discretizedData
else:
data = inputdata
# This is ugly (no shit)
if not data.domain.classVar:
if classInteractions == 0:
classedDomain = orange.Domain(data.domain.attributes, orange.EnumVariable("foo", values=["0", "1"]))
data = orange.ExampleTable(classedDomain, data)
else:
return None
im = orngInteract.InteractionMatrix(data, dependencies_too=1)
off = 1
if classInteractions == 0:
diss,labels = im.exportChi2Matrix()
off = 0
elif classInteractions == 1:
(diss,labels) = im.depExportDissimilarityMatrix(jaccard=1) # 2-interactions
else:
(diss,labels) = im.exportDissimilarityMatrix(jaccard=1) # 3-interactions
for i in range(len(atts)-off):
for j in range(i+1):
matrix[i+off, j] = diss[i][j]
else:
if classInteractions == 3:
for a1 in range(len(atts)):
for a2 in range(a1):
matrix[a1, a2] = (1.0 - orange.PearsonCorrelation(a1, a2, inputdata, 0).r) / 2.0
else:
if len(inputdata) < 3:
return None
import numpy, statc
m = inputdata.toNumpyMA("A")[0]
averages = numpy.ma.average(m, axis=0)
filleds = [list(numpy.ma.filled(m[:,i], averages[i])) for i in range(len(atts))]
for a1, f1 in enumerate(filleds):
for a2 in range(a1):
matrix[a1, a2] = (1.0 - statc.spearmanr(f1, filleds[a2])[0]) / 2.0
output_dict = {}
output_dict['dm']=matrix
return output_dict
def cforange_hierarchical_clustering(input_dict):
return {'centroids' : None, 'selected_examples' : None, 'unselected_examples' : None}
class Clustering:
@staticmethod
def hierarchical_clustering(linkage, distance_matrix):
import orange
linkages = [("Single linkage", orange.HierarchicalClustering.Single),
("Average linkage", orange.HierarchicalClustering.Average),
("Ward's linkage", orange.HierarchicalClustering.Ward),
("Complete linkage", orange.HierarchicalClustering.Complete)]
return orange.HierarchicalClustering(distance_matrix, linkage=linkages[linkage][1])
def cforange_hierarchical_clustering_finished(postdata, input_dict, output_dict):
import json
import orange
matrix = input_dict['dm']
linkage = int(input_dict['linkage'])
widget_pk = postdata['widget_id'][0]
try:
selected_nodes = json.loads(postdata.get('selected_nodes')[0])
except:
raise Exception('Please select a threshold for determining clusters.')
if isinstance(matrix.items, orange.ExampleTable):
root = Clustering.hierarchical_clustering(linkage, matrix)
cluster_ids = set([cluster for _,_,cluster in selected_nodes])
selected_clusters = set([cluster for _,selected,cluster in selected_nodes if selected])
clustVar = orange.EnumVariable(str('Cluster'), values=["Cluster %d" % i for i in cluster_ids] + ["Other"])
origDomain = matrix.items.domain
domain = orange.Domain(origDomain.attributes, origDomain.classVar)
domain.addmeta(orange.newmetaid(), clustVar)
domain.addmetas(origDomain.getmetas())
# Build table with selected clusters
selected_table, unselected_table = orange.ExampleTable(domain), orange.ExampleTable(domain)
for id, selected, cluster in selected_nodes:
new_ex = orange.Example(domain, matrix.items[id])
if selected:
new_ex[clustVar] = clustVar("Cluster %d" % cluster)
selected_table.append(new_ex)
else:
new_ex[clustVar] = clustVar("Other")
unselected_table.append(new_ex)
# Build table of centroids
centroids = orange.ExampleTable(selected_table.domain)
if len(selected_table) > 0:
for cluster in sorted(selected_clusters):
clusterEx = orange.ExampleTable([ex for ex in selected_table if ex[clustVar] == "Cluster %d" % cluster])
# Attribute statistics
contstat = orange.DomainBasicAttrStat(clusterEx)
discstat = orange.DomainDistributions(clusterEx, 0, 0, 1)
ex = [cs.avg if cs else (ds.modus() if ds else "?") for cs, ds in zip(contstat, discstat)]
example = orange.Example(centroids.domain, ex)
example[clustVar] = clustVar("Cluster %d" % cluster)
centroids.append(example)
else: # Attribute distance
centroids, selected_table, unselected_table = None, None, None
return {'centroids' : centroids, 'selected_examples' : selected_table, 'unselected_examples' : unselected_table}
This diff is collapsed.
:- use_module(library(myddas)).
:- db_open(mysql, 'localhost'/'test', 'root', '').
:- db_import(cars, tmp_cars).
:- db_import(trains, tmp_trains).
:- repeat, tmp_trains(A,B), (trains(A,B), !, fail ; assertz(trains(A,B)), fail).
:- use_module(library(myddas)).
:- db_open(mysql, 'localhost'/'test', 'root', '').
:- db_import(cars, cars).
:- db_import(trains, trains).
:- modeh(1, east(+trains)).
:- modeb(*, has_cars(+trains,-cars)).
:- modeb(*, cars_position(+cars,#position)).
:- modeb(*, cars_shape(+cars,#shape)).
:- modeb(*, cars_len(+cars,#len)).
:- modeb(*, cars_sides(+cars,#sides)).
:- modeb(*, cars_roof(+cars,#roof)).
:- modeb(*, cars_wheels(+cars,#wheels)).
:- modeb(*, cars_load_shape(+cars,#load_shape)).
:- modeb(*, cars_load_num(+cars,#load_num)).
:- determination(east/1, has_cars/2).
:- determination(east/1, cars_position/2).
:- determination(east/1, cars_shape/2).
:- determination(east/1, cars_len/2).
:- determination(east/1, cars_sides/2).
:- determination(east/1, cars_roof/2).
:- determination(east/1, cars_wheels/2).
:- determination(east/1, cars_load_shape/2).
:- determination(east/1, cars_load_num/2).
trains(Id) :-
trains(Id,_).
cars(Id) :-
cars(Id,_,_,_,_,_,_,_,_,_).
position(Position) :-
cars(_,_,Position,_,_,_,_,_,_,_).
shape(Shape) :-
cars(_,_,_,Shape,_,_,_,_,_,_).
len(Len) :-
cars(_,_,_,_,Len,_,_,_,_,_).
sides(Sides) :-
cars(_,_,_,_,_,Sides,_,_,_,_).
roof(Roof) :-
cars(_,_,_,_,_,_,Roof,_,_,_).
wheels(Wheels) :-
cars(_,_,_,_,_,_,_,Wheels,_,_).
load_shape(Load_shape) :-
cars(_,_,_,_,_,_,_,_,Load_shape,_).
load_num(Load_num) :-
cars(_,_,_,_,_,_,_,_,_,Load_num).
has_cars(Trains, Cars) :-
trains(Trains,_),
cars(Cars,Trains,_,_,_,_,_,_,_,_).
cars_position(Cars, Position) :-
cars(Cars,_,Position,_,_,_,_,_,_,_).
cars_shape(Cars, Shape) :-
cars(Cars,_,_,Shape,_,_,_,_,_,_).
cars_len(Cars, Len) :-
cars(Cars,_,_,_,Len,_,_,_,_,_).
cars_sides(Cars, Sides) :-
cars(Cars,_,_,_,_,Sides,_,_,_,_).
cars_roof(Cars, Roof) :-
cars(Cars,_,_,_,_,_,Roof,_,_,_).
cars_wheels(Cars, Wheels) :-
cars(Cars,_,_,_,_,_,_,Wheels,_,_).
cars_load_shape(Cars, Load_shape) :-
cars(Cars,_,_,_,_,_,_,_,Load_shape,_).
cars_load_num(Cars, Load_num) :-
cars(Cars,_,_,_,_,_,_,_,_,Load_num).
\ No newline at end of file
east(1).
east(2).
east(3).
east(4).
east(5).
east(6).
east(7).
east(8).
east(9).
east(10).
east(11).
east(12).
east(13).
east(14).
east(15).
east(16).
east(17).
east(18).
east(19).
east(20).
This diff is collapsed.
......@@ -24,6 +24,11 @@ class DBContext:
cursor.execute("SELECT column_name FROM information_schema.columns WHERE table_name = '%s'" % table)
self.cols[table] = [col for (col,) in cursor]
self.all_cols = dict(self.cols)
self.col_vals = {}
for table, cols in self.cols.items():
for col in cols:
cursor.execute("SELECT DISTINCT %s FROM %s" % (col, table))
self.col_vals[col] = [val for (val,) in cursor]
self.connected = {}
cursor.execute(
"SELECT table_name, column_name, referenced_table_name, referenced_column_name \
......@@ -43,12 +48,14 @@ class DBContext:
self.pkeys[table] = pk
self.target_table = self.tables[0]
self.target_att = None
self.target_att_val = None
con.close()
def update(self, postdata):
widget_id = postdata.get('widget_id')[0]
self.target_table = postdata.get('target_table%s' % widget_id)[0]
self.target_att = postdata.get('target_att%s' % widget_id)[0]
self.target_att_val = postdata.get('target_att_val%s' % widget_id)[0]
self.tables = postdata.get('tables%s' % widget_id, [])
# Propagate the selected tables
for table in self.cols.keys():
......@@ -61,9 +68,6 @@ class DBContext:
for table in self.tables:
self.cols[table] = postdata.get('%s_columns%s' % (table, widget_id), [])
import cPickle
cPickle.dump(self, open('context_example.pkl', 'w'))
def __repr__(self):
return str((self.target_table, self.target_att, self.tables, self.cols, self.connected))
......@@ -24,34 +24,18 @@ class ILP_DBContext:
self.cursor.execute("SELECT %s FROM %s" % (','.join(cols), table))
return [cols for cols in self.cursor]
class RSD_DBContext(ILP_DBContext):
'''
Converts the database context to RSD inputs.
'''
def all_examples(self):
target = self.db.target_table
examples = self.rows(target, [self.db.target_att, self.db.pkeys[target]])
return '\n'.join(['%s(%s, %s).' % (target, cls, pk) for cls, pk in examples])
def background_knowledge(self):
modeslist, getters = [self.mode(self.db.target_table, [('+', self.db.target_table)], head=True)], []
for (table, ref_table) in self.db.connected.keys():
if ref_table == self.db.target_table:
continue # Skip backward connections
modeslist.append(self.mode('has_%s' % ref_table, [('+', table), ('-', ref_table)]))
getters.extend(self.connecting_clause(table, ref_table))
for table, atts in self.db.cols.items():
for att in atts:
if att == self.db.target_att and table == self.db.target_table or \
att in self.db.fkeys[table] or att == self.db.pkeys[table]:
continue
modeslist.append(self.mode('%s_%s' % (table, att), [('+', table), ('-', att)]))
modeslist.append(self.mode('instantiate', [('+', att)]))
getters.extend(self.attribute_clause(table, att))
return '\n'.join(self.db_connection() + modeslist + getters + self.user_settings())
def user_settings(self):
return [':- set(%s,%s).' % (key,val) for key, val in self.settings.items()]
def mode(self, predicate, args, recall=1, head=False):
return ':- mode%s(%d, %s(%s)).' % ('h' if head else 'b', recall, predicate, ','.join([t+arg for t,arg in args]))
return ':- mode%s(%s, %s(%s)).' % ('h' if head else 'b', str(recall), predicate, ','.join([t+arg for t,arg in args]))
def db_connection(self):
con = self.db.connection
host, db, user, pwd = con.host, con.database, con.user, con.password
return [':- use_module(library(myddas)).', \
':- db_open(mysql, \'%s\'/\'%s\', \'%s\', \'%s\').' % (host, db, user, pwd)] + \
[':- db_import(%s, %s).' % (table, table) for table in self.db.tables]
def connecting_clause(self, table, ref_table):
var_table, var_ref_table = table.capitalize(), ref_table.capitalize()
......@@ -79,29 +63,107 @@ class RSD_DBContext(ILP_DBContext):
return ['%s_%s(%s, %s) :-' % (table, att, var_table, var_att),
'\t%s(%s).' % (table, ','.join([att.capitalize() if att!=pk else var_table for att in self.db.cols[table]]))]
def db_connection(self):
con = self.db.connection
host, db, user, pwd = con.host, con.database, con.user, con.password
return [':- use_module(library(myddas)).', \
':- db_open(mysql, \'%s\'/\'%s\', \'%s\', \'%s\').' % (host, db, user, pwd)] + \
[':- db_import(%s, %s).' % (table, table) for table in self.db.tables]
class RSD_DBContext(ILP_DBContext):
'''
Converts the database context to RSD inputs.
'''
def all_examples(self):
target = self.db.target_table
examples = self.rows(target, [self.db.target_att, self.db.pkeys[target]])
return '\n'.join(['%s(%s, %s).' % (target, cls, pk) for cls, pk in examples])
def user_settings(self):
return [':- set(%s,%s).' % (key,val) for key, val in self.settings.items()]
def background_knowledge(self):
modeslist, getters = [self.mode(self.db.target_table, [('+', self.db.target_table)], head=True)], []
for (table, ref_table) in self.db.connected.keys():
if ref_table == self.db.target_table:
continue # Skip backward connections
modeslist.append(self.mode('has_%s' % ref_table, [('+', table), ('-', ref_table)]))
getters.extend(self.connecting_clause(table, ref_table))
for table, atts in self.db.cols.items():
for att in atts:
if att == self.db.target_att and table == self.db.target_table or \
att in self.db.fkeys[table] or att == self.db.pkeys[table]:
continue
modeslist.append(self.mode('%s_%s' % (table, att), [('+', table), ('-', att)]))
modeslist.append(self.mode('instantiate', [('+', att)]))
getters.extend(self.attribute_clause(table, att))
return '\n'.join(self.db_connection() + modeslist + getters + self.user_settings())
class Aleph_DBContext(ILP_DBContext):
'''
Converts the database context to Aleph inputs.
TODO.
'''
def __init__(self, *args, **kwargs):
ILP_DBContext.__init__(self, *args, **kwargs)
self.__pos_examples, self.__neg_examples = None, None
def __examples(self):
if not (self.__pos_examples and self.__neg_examples):
target, att, target_val = self.db.target_table, self.db.target_att, self.db.target_att_val
rows = self.rows(target, [att, self.db.pkeys[target]])
pos_rows, neg_rows = [], []
for row in rows:
if row[0] == target_val:
pos_rows.append(row)
else:
neg_rows.append(row)
self.__pos_examples = '\n'.join(['%s(%s).' % (target_val, id) for _, id in pos_rows])
self.__neg_examples = '\n'.join(['%s(%s).' % (target_val, id) for _, id in neg_rows])
return self.__pos_examples, self.__neg_examples
def positive_examples(self):
pass
return self.__examples()[0]
def negative_examples(self):
pass
return self.__examples()[1]
def background_knowledge(self):
pass
modeslist, getters = [self.mode(self.db.target_att_val, [('+', self.db.target_table)], head=True)], []
determinations, types = [], []
for (table, ref_table) in self.db.connected.keys():
if ref_table == self.db.target_table:
continue # Skip backward connections
modeslist.append(self.mode('has_%s' % ref_table, [('+', table), ('-', ref_table)], recall='*'))
determinations.append(':- determination(%s/1, has_%s/2).' % (self.db.target_att_val, ref_table))
types.extend(self.concept_type_def(table))
types.extend(self.concept_type_def(ref_table))
getters.extend(self.connecting_clause(table, ref_table))
for table, atts in self.db.cols.items():
for att in atts:
if att == self.db.target_att and table == self.db.target_table or \
att in self.db.fkeys[table] or att == self.db.pkeys[table]:
continue
modeslist.append(self.mode('%s_%s' % (table, att), [('+', table), ('#', att)], recall='*'))
determinations.append(':- determination(%s/1, %s_%s/2).' % (self.db.target_att_val, table, att))
types.extend(self.constant_type_def(table, att))
getters.extend(self.attribute_clause(table, att))
local_copies = [self.local_copy(table) for table in self.db.tables]
return '\n'.join(self.db_connection() + local_copies + self.user_settings() + modeslist + determinations + types + getters)
def concept_type_def(self, table):
#return ['%s(%s).' % (table, id) for (id,) in self.rows(table, [self.db.pkeys[table]])]
var_pk = self.db.pkeys[table].capitalize()
variables = ','.join([var_pk if col.capitalize() == var_pk else '_' for col in self.db.cols[table]])
return ['%s(%s) :-' % (table, var_pk),
'\t%s(%s).' % (table, variables)]
def constant_type_def(self, table, att):
# return ['%s(%s).' % (att, val) for val in self.db.col_vals[att]]
var_att = att.capitalize()
variables = ','.join([var_att if col == att else '_' for col in self.db.cols[table]])
return ['%s(%s) :-' % (att, var_att),
'\t%s(%s).' % (table, variables)]
def db_connection(self):
con = self.db.connection
host, db, user, pwd = con.host, con.database, con.user, con.password
return [':- use_module(library(myddas)).', \
':- db_open(mysql, \'%s\'/\'%s\', \'%s\', \'%s\').' % (host, db, user, pwd)] + \
[':- db_import(%s, tmp_%s).' % (table, table) for table in self.db.tables]
def local_copy(self, table):
cols = ','.join([col.capitalize() for col in self.db.cols[table]])
return ':- repeat, tmp_%s(%s), (%s(%s), !, fail ; assertz(%s(%s)), fail).' % (table, cols, table, cols, table, cols)
if __name__ == '__main__':
from context import DBConnection, DBContext
......@@ -109,16 +171,12 @@ if __name__ == '__main__':
context = DBContext(DBConnection('root','','localhost','test'))
context.target_table = 'trains'
context.target_att = 'direction'
context.target_att_val = 'east'
rsd = RSD_DBContext(context)
ex, bk = rsd.all_examples(), rsd.background_knowledge()
print ex
print bk
f = open('trains_mysql.pl','w')
f.write(ex)
f.close()
f = open('trains_mysql.b','w')
f.write(bk)
f.close()
\ No newline at end of file
aleph = Aleph_DBContext(context)
print aleph.positive_examples()
print aleph.negative_examples()
print aleph.background_knowledge()
\ No newline at end of file
......@@ -11,5 +11,7 @@ def mysql_db_context(request, input_dict, output_dict, widget):
con = input_dict['connection']
initial_context = DBContext(con)
initial_target_cols = initial_context.cols[initial_context.target_table]
initial_target_col_vals = initial_context.col_vals[initial_target_cols[0]]
cols_dump = json.dumps(initial_context.cols)
return render(request, 'interactions/db_context.html', {'widget':widget, 'context': initial_context, 'target_cols' : initial_target_cols, 'cols' : cols_dump})
col_vals_dump = json.dumps(initial_context.col_vals)
return render(request, 'interactions/db_context.html', {'widget':widget, 'context': initial_context, 'target_cols' : initial_target_cols, 'cols' : cols_dump, 'col_vals' : col_vals_dump, 'target_col_vals' : initial_target_col_vals})
......@@ -13,6 +13,12 @@
<option value="{{col}}" {% if forloop.first %}selected="selected"{% endif %}>{{col}}</option>
{% endfor %}
</select>
<label>Target attribute value:</label>
<select name="target_att_val{{widget.pk}}">
{% for val in target_col_vals %}
<option value="{{val}}" {% if forloop.first %}selected="selected"{% endif %}>{{val}}</option>
{% endfor %}
</select>
<label>Select tables to be used:</label>
<select multiple name="tables{{widget.pk}}">
{% for table in context.tables %}
......@@ -43,6 +49,7 @@
<script type="text/javascript">
// Columns data.
var cols = {{cols|safe}};
var col_vals = {{col_vals|safe}};
$('select[name="target_table{{widget.pk}}"]').change(function () {
var selected_table = $(this).first('option:selected').val();
var available_cols = cols[selected_table];
......@@ -54,6 +61,17 @@
att_select.append($('<option></option>').attr('value', col).text(col));
}
});
$('select[name="target_att{{widget.pk}}"]').change(function () {
var selected_att = $(this).first('option:selected').val();
var available_vals = col_vals[selected_att];
// Change the list of options as the target attribute
var val_select = $('select[name="target_att_val{{widget.pk}}"]');
val_select.empty();
for (idx in available_vals) {
var col = available_vals[idx];
val_select.append($('<option></option>').attr('value', col).text(col));
}
});
</script>
</div>
</div>
\ No newline at end of file
This diff is collapsed.
This diff is collapsed.
This source diff could not be displayed because it is too large. You can view the blob instead.