Commit f968b042 authored by bogdan's avatar bogdan

Merge branch 'dev' of workflow.ijs.si:mothra into dev

parents 644470e6 72e302c9
......@@ -3,6 +3,9 @@ from django import forms
import mysql.connector as sql
class DBConnection:
'''
Database credentials.
'''
def __init__(self, user, password, host, database):
self.user = user
self.password = password
......@@ -14,6 +17,19 @@ class DBConnection:
class DBContext:
def __init__(self, connection):
'''
Initializes the fields:
tables: list of selected tables
cols: dict of columns for each table
all_cols: dict of columns for each table (even unselected)
col_vals: available values for each table/column
connected: dict of table pairs and the connected columns
fkeys: foreign keys in a given table
pkeys: private key for a given table
target_table: selected table for learning
target_att: selected column for learning
target_att_val: selected target att value
'''
self.connection = connection
con = connection.connect()
cursor = con.cursor()
......@@ -26,9 +42,10 @@ class DBContext:
self.all_cols = dict(self.cols)
self.col_vals = {}
for table, cols in self.cols.items():
self.col_vals[table] = {}
for col in cols:
cursor.execute("SELECT DISTINCT %s FROM %s" % (col, table))
self.col_vals[col] = [val for (val,) in cursor]
self.col_vals[table][col] = [val for (val,) in cursor]
self.connected = {}
cursor.execute(
"SELECT table_name, column_name, referenced_table_name, referenced_column_name \
......@@ -52,11 +69,16 @@ class DBContext:
con.close()
def update(self, postdata):
'''
Updates the default selections with user's selections.
'''
widget_id = postdata.get('widget_id')[0]
self.target_table = postdata.get('target_table%s' % widget_id)[0]
self.target_att = postdata.get('target_att%s' % widget_id)[0]
self.target_att_val = postdata.get('target_att_val%s' % widget_id)[0]
self.tables = postdata.get('tables%s' % widget_id, [])
if self.target_table not in self.tables:
raise Exception('The selected target table "%s" is not among the selected tables.' % self.target_table)
# Propagate the selected tables
for table in self.cols.keys():
if table not in self.tables:
......@@ -67,6 +89,8 @@ class DBContext:
del self.connected[pair]
for table in self.tables:
self.cols[table] = postdata.get('%s_columns%s' % (table, widget_id), [])
if table == self.target_table and self.target_att not in self.cols[table]:
raise Exception('The selected target attribute ("%s") is not among the columns selected for the target table ("%s").' % (self.target_att, self.target_table))
def __repr__(self):
return str((self.target_table, self.target_att, self.tables, self.cols, self.connected))
......
......@@ -4,26 +4,46 @@ Classes for handling DBContexts for ILP systems.
@author: Anze Vavpetic <anze.vavpetic@ijs.si>
'''
class ILP_DBContext:
class Converter:
'''
Base class for converting between a given database context (selected tables, columns, etc)
to inputs acceptable by a specific ILP system.
If possible, all subclasses should use lazy selects by forwarding the DB connection.
Base class for converters.
'''
def __init__(self, dbcontext, settings={}):
def __init__(self, dbcontext):
self.db = dbcontext
self.connection = dbcontext.connection.connect()
self.cursor = self.connection.cursor()
self.settings = settings
def __del__(self):
def __del__(self):
self.connection.close()
def rows(self, table, cols):
def rows(self, table, cols):
self.cursor.execute("SELECT %s FROM %s" % (','.join(cols), table))
return [cols for cols in self.cursor]
def fetch_types(self, table, cols):
'''
Returns a dictionary of field types for the given table and columns.
'''
from mysql.connector import FieldType
c = self.cursor
c.execute('SELECT %s FROM %s LIMIT 1' % (','.join(cols), table))
c.fetchall()
types = {}
for desc in self.cursor.description:
types[desc[0]] = FieldType.get_info(desc[1])
return types
class ILP_Converter(Converter):
'''
Base class for converting between a given database context (selected tables, columns, etc)
to inputs acceptable by a specific ILP system.
If possible, all subclasses should use lazy selects by forwarding the DB connection.
'''
def __init__(self, *args, **kwargs):
self.settings = kwargs.pop('settings') if kwargs else {}
Converter.__init__(self, *args, **kwargs)
def user_settings(self):
return [':- set(%s,%s).' % (key,val) for key, val in self.settings.items()]
......@@ -63,7 +83,7 @@ class ILP_DBContext:
return ['%s_%s(%s, %s) :-' % (table, att, var_table, var_att),
'\t%s(%s).' % (table, ','.join([att.capitalize() if att!=pk else var_table for att in self.db.cols[table]]))]
class RSD_DBContext(ILP_DBContext):
class RSD_Converter(ILP_Converter):
'''
Converts the database context to RSD inputs.
'''
......@@ -89,12 +109,12 @@ class RSD_DBContext(ILP_DBContext):
getters.extend(self.attribute_clause(table, att))
return '\n'.join(self.db_connection() + modeslist + getters + self.user_settings())
class Aleph_DBContext(ILP_DBContext):
class Aleph_Converter(ILP_Converter):
'''
Converts the database context to Aleph inputs.
'''
def __init__(self, *args, **kwargs):
ILP_DBContext.__init__(self, *args, **kwargs)
ILP_Converter.__init__(self, *args, **kwargs)
self.__pos_examples, self.__neg_examples = None, None
def __examples(self):
......@@ -141,14 +161,12 @@ class Aleph_DBContext(ILP_DBContext):
return '\n'.join(self.db_connection() + local_copies + self.user_settings() + modeslist + determinations + types + getters)
def concept_type_def(self, table):
#return ['%s(%s).' % (table, id) for (id,) in self.rows(table, [self.db.pkeys[table]])]
var_pk = self.db.pkeys[table].capitalize()
variables = ','.join([var_pk if col.capitalize() == var_pk else '_' for col in self.db.cols[table]])
return ['%s(%s) :-' % (table, var_pk),
'\t%s(%s).' % (table, variables)]
def constant_type_def(self, table, att):
# return ['%s(%s).' % (att, val) for val in self.db.col_vals[att]]
var_att = att.capitalize()
variables = ','.join([var_att if col == att else '_' for col in self.db.cols[table]])
return ['%s(%s) :-' % (att, var_att),
......@@ -165,6 +183,69 @@ class Aleph_DBContext(ILP_DBContext):
cols = ','.join([col.capitalize() for col in self.db.cols[table]])
return ':- repeat, tmp_%s(%s), (%s(%s), !, fail ; assertz(%s(%s)), fail).' % (table, cols, table, cols, table, cols)
class Orange_Converter(Converter):
'''
Converts the target table selected in the given context as an orange example table.
'''
continuous_types = ('FLOAT','DOUBLE','DECIMAL','NEWDECIMAL')
discrete_types = ('TINY','SHORT','LONG','LONGLONG','INT24','YEAR','VARCHAR','BIT','SET','VAR_STRING','STRING')
def __init__(self, *args, **kwargs):
Converter.__init__(self, *args, **kwargs)
self.types = self.fetch_types(self.db.target_table, self.db.cols[self.db.target_table])
def target_table(self):
'''
Returns the target table as an orange example table.
'''
import orange
from mysql.connector import FieldType
table, cls_att = self.db.target_table, self.db.target_att
cols = self.db.cols[table]
attributes, metas, classVar = [], [], None
for col in cols:
att_type = self.orng_type(col)
att_vals = self.db.col_vals[table][col]
if att_type == 'd':
att_var = orange.EnumVariable(str(col), values=[str(val) for val in att_vals])
elif att_type == 'c':
att_var = orange.FloatVariable(str(col))
else:
att_var = orange.StringVariable(str(col))
if col == cls_att:
if att_type == 'string':
raise Exception('Unsuitable data type for a target variable: %d' % att_type)
class_var = att_var
continue
elif att_type == 'string':
metas.append(att_var)
else:
attributes.append(att_var)
domain = orange.Domain(attributes + [class_var])
for meta in metas:
domain.addmeta(orange.newmetaid(), meta)
dataset = orange.ExampleTable(domain)
for row in self.rows(table, cols):
example = orange.Example(domain)
for col, val in zip(cols, row):
example[str(col)] = str(val)
dataset.append(example)
return dataset
def orng_type(self, col):
'''
Assigns a given mysql column an orange type.
'''
mysql_type = self.types[col]
n_vals = len(self.db.col_vals[self.db.target_table][col])
if mysql_type in Orange_Converter.continuous_types or (n_vals >= 50 and mysql_type in Orange_Converter.discrete_types):
return 'c'
elif mysql_type in Orange_Converter.discrete_types:
return 'd'
else:
return 'string'
if __name__ == '__main__':
from context import DBConnection, DBContext
......@@ -173,10 +254,12 @@ if __name__ == '__main__':
context.target_att = 'direction'
context.target_att_val = 'east'
rsd = RSD_DBContext(context)
ex, bk = rsd.all_examples(), rsd.background_knowledge()
# rsd = RSD_Converter(context)
# ex, bk = rsd.all_examples(), rsd.background_knowledge()
aleph = Aleph_DBContext(context)
print aleph.positive_examples()
print aleph.negative_examples()
print aleph.background_knowledge()
\ No newline at end of file
# aleph = Aleph_Converter(context)
# print aleph.positive_examples()
# print aleph.negative_examples()
# print aleph.background_knowledge()
orange = Orange_Converter(context)
orange.target_table()
\ No newline at end of file
[
{
"pk": 75,
"model": "workflows.category",
"fields": {
"uid": "b66b71b1-99d6-4efc-b264-a914a3e42911",
"parent": null,
"workflow": null,
"user": null,
"order": 1,
"name": "MySQL"
}
},
{
"pk": 309,
"model": "workflows.abstractwidget",
"fields": {
"category": 75,
"treeview_image": "",
"name": "Database Context",
"is_streaming": false,
"uid": "649015ac-ca53-43bc-a36a-c603b52f8775",
"interaction_view": "mysql_db_context",
"image": "",
"package": "mysql",
"static_image": "",
"post_interact_action": "mysql_db_context_finished",
"user": null,
"visualization_view": "",
"action": "mysql_db_context",
"wsdl_method": "",
"wsdl": "",
"interactive": true,
"has_progress_bar": false,
"order": 1,
"description": ""
}
},
{
"pk": 1036,
"model": "workflows.abstractinput",
"fields": {
"widget": 309,
"name": "connection",
"short_name": "con",
"uid": "6c23b4a2-a18e-498e-a22c-4de86932da3e",
"default": "",
"required": true,
"multi": false,
"parameter_type": null,
"variable": "connection",
"parameter": false,
"order": 1,
"description": ""
}
},
{
"pk": 368,
"model": "workflows.abstractoutput",
"fields": {
"widget": 309,
"name": "context",
"short_name": "cxt",
"variable": "context",
"uid": "171aa481-0f93-4315-a476-ed3b0aa8c9e4",
"order": 1,
"description": ""
}
},
{
"pk": 311,
"model": "workflows.abstractwidget",
"fields": {
"category": 75,
"treeview_image": "",
"name": "Database To Aleph",
"is_streaming": false,
"uid": "3c5ac714-84ca-4c93-befb-ce0c16148193",
"interaction_view": "",
"image": "",
"package": "mysql",
"static_image": "",
"post_interact_action": "",
"user": null,
"visualization_view": "",
"action": "mysql_aleph_converter",
"wsdl_method": "",
"wsdl": "",
"interactive": false,
"has_progress_bar": false,
"order": 1,
"description": ""
}
},
{
"pk": 1052,
"model": "workflows.abstractinput",
"fields": {
"widget": 311,
"name": "context",
"short_name": "cxt",
"uid": "474e9673-9c55-48c5-bae8-a7b986aa0287",
"default": "",
"required": true,
"multi": false,
"parameter_type": null,
"variable": "context",
"parameter": false,
"order": 1,
"description": "Database context object"
}
},
{
"pk": 371,
"model": "workflows.abstractoutput",
"fields": {
"widget": 311,
"name": "pos_examples",
"short_name": "pex",
"variable": "pos_examples",
"uid": "48184527-16a5-446c-9181-48a6558430c2",
"order": 1,
"description": "positive examples file"
}
},
{
"pk": 372,
"model": "workflows.abstractoutput",
"fields": {
"widget": 311,
"name": "neg_examples",
"short_name": "nex",
"variable": "neg_examples",
"uid": "2c623064-95cd-48ac-8bff-617edf2b9468",
"order": 1,
"description": "negative examples file"
}
},
{
"pk": 373,
"model": "workflows.abstractoutput",
"fields": {
"widget": 311,
"name": "bk",
"short_name": "bk",
"variable": "bk",
"uid": "5e61e44e-cc52-4471-9a41-3afa52f58ab1",
"order": 1,
"description": "background knowledge"
}
},
{
"pk": 414,
"model": "workflows.abstractwidget",
"fields": {
"category": 75,
"treeview_image": "",
"name": "Database To Orange Table",
"is_streaming": false,
"uid": "baa32a60-5acb-42a1-ba83-23a6ab2160e7",
"interaction_view": "mysql_orange_converter",
"image": "",
"package": "mysql",
"static_image": "",
"post_interact_action": "mysql_orange_converter_finished",
"user": null,
"visualization_view": "",
"action": "mysql_orange_converter",
"wsdl_method": "",
"wsdl": "",
"interactive": false,
"has_progress_bar": false,
"order": 1,
"description": ""
}
},
{
"pk": 1241,
"model": "workflows.abstractinput",
"fields": {
"widget": 414,
"name": "context",
"short_name": "cxt",
"uid": "1f9b5ccf-65c3-4ccc-818e-afb3a6ffee20",
"default": "",
"required": true,
"multi": false,
"parameter_type": null,
"variable": "context",
"parameter": false,
"order": 1,
"description": "Database context object"
}
},
{
"pk": 489,
"model": "workflows.abstractoutput",
"fields": {
"widget": 414,
"name": "Data table",
"short_name": "odt",
"variable": "dataset",
"uid": "8dc434fd-3307-432f-b1d7-74638390a077",
"order": 1,
"description": "Orange data table"
}
},
{
"pk": 310,
"model": "workflows.abstractwidget",
"fields": {
"category": 75,
"treeview_image": "",
"name": "Database To RSD",
"is_streaming": false,
"uid": "e5b68e38-cb53-408a-a58f-26742d6ecb3c",
"interaction_view": "",
"image": "",
"package": "mysql",
"static_image": "",
"post_interact_action": "",
"user": null,
"visualization_view": "",
"action": "mysql_rsd_converter",
"wsdl_method": "",
"wsdl": "",
"interactive": false,
"has_progress_bar": false,
"order": 1,
"description": ""
}
},
{
"pk": 1037,
"model": "workflows.abstractinput",
"fields": {
"widget": 310,
"name": "context",
"short_name": "cxt",
"uid": "4f1397a8-4e72-4b34-b31d-d09bf9a7e7d9",
"default": "",
"required": true,
"multi": false,
"parameter_type": null,
"variable": "context",
"parameter": false,
"order": 1,
"description": "Database context object"
}
},
{
"pk": 369,
"model": "workflows.abstractoutput",
"fields": {
"widget": 310,
"name": "examples",
"short_name": "exm",
"variable": "examples",
"uid": "f17690db-636b-4874-8406-f57778550c27",
"order": 1,
"description": "examples"
}
},
{
"pk": 370,
"model": "workflows.abstractoutput",
"fields": {
"widget": 310,
"name": "bk",
"short_name": "bk",
"variable": "bk",
"uid": "89a386ef-3aa8-441f-99ff-4b64b1e3357f",
"order": 2,
"description": "background knowledge"
}
},
{
"pk": 308,
"model": "workflows.abstractwidget",
"fields": {
"category": 75,
"treeview_image": "",
"name": "MySQL Connect",
"is_streaming": false,
"uid": "41978deb-c245-4e1f-95a2-ceadcfe6898a",
"interaction_view": "",
"image": "",
"package": "mysql",
"static_image": "",
"post_interact_action": "",
"user": null,
"visualization_view": "",
"action": "mysql_connect",
"wsdl_method": "",
"wsdl": "",
"interactive": false,
"has_progress_bar": false,
"order": 1,
"description": ""
}
},
{
"pk": 1032,
"model": "workflows.abstractinput",
"fields": {
"widget": 308,
"name": "user",
"short_name": "usr",
"uid": "0f36f10b-066f-4ad3-9fa8-39205440076a",
"default": "",
"required": true,
"multi": false,
"parameter_type": "text",
"variable": "user",
"parameter": true,
"order": 1,
"description": ""
}
},
{
"pk": 1033,
"model": "workflows.abstractinput",
"fields": {
"widget": 308,
"name": "password",
"short_name": "pwd",
"uid": "8ead911a-3d1b-4e50-b56a-64873b85d3cf",
"default": "",
"required": false,
"multi": false,
"parameter_type": "text",
"variable": "password",
"parameter": true,
"order": 2,
"description": ""
}
},
{
"pk": 1034,
"model": "workflows.abstractinput",
"fields": {
"widget": 308,
"name": "host",
"short_name": "hst",
"uid": "9812cc11-b21e-4444-8dfe-bc0c68fe79e8",
"default": "",
"required": true,
"multi": false,
"parameter_type": "text",
"variable": "host",
"parameter": true,
"order": 3,
"description": ""
}
},
{
"pk": 1035,
"model": "workflows.abstractinput",
"fields": {
"widget": 308,
"name": "database",
"short_name": "db",
"uid": "d132adce-c3ec-411a-8ee7-906be4a99f61",
"default": "",
"required": true,
"multi": false,
"parameter_type": "text",
"variable": "database",
"parameter": true,
"order": 4,
"description": ""
}
},
{
"pk": 367,
"model": "workflows.abstractoutput",
"fields": {
"widget":