Commit aedefbf5 authored by Matic Perovšek's avatar Matic Perovšek

- discretization widget

- wordification widget
- popravki v DBcontext
- database to orange converter deluje na več tabelah
parent b5ba9f92
This diff is collapsed.
......@@ -318,6 +318,59 @@ def cforange_example_distance(input_dict):
output_dict['dm']=matrix
return output_dict
def cforange_discretize(input_dict):
import orange
input_tables = input_dict['dataset']
output_tables=[]
input_type_is_list=type(input_tables) is list
if not input_type_is_list:
input_tables=[input_tables]
discretizerIndex = int(input_dict['discretizer_id'])
discretizers = [
("Equi-distant discretization",orange.EquiDistDiscretization), #numberOfIntervals
("Quantile-based discretization", orange.EquiNDiscretization), #numberOfIntervals
("Entropy-based discretization", orange.EntropyDiscretization), #no arguments
("Bi-modal discretization", orange.BiModalDiscretization),#no arguments
("Fixed discretization", orange.EquiNDiscretization)#FixedDiscretization) #points
]
options={}
if discretizerIndex in [4]:
#find all cut-off points
points = [float(a) for a in input_dict['points'].replace(" ","").split(",")]
#for k,v in input_dict.items():
# if k.startswith('points'):
# points.append(float(v))
options['points']=sorted(points)
elif discretizerIndex in [0,1]:
options['numberOfIntervals']=int(input_dict['numberOfIntervals'])
d = discretizers[discretizerIndex][1](**options)
for inputdata in input_tables:
newattrs = [((d(attr,inputdata) if discretizerIndex in [0,2,3] else d.constructVariable(attr))) if attr.varType == orange.VarTypes.Continuous else attr for attr in inputdata.domain.attributes]
name=inputdata.name
for attr in newattrs: #TODO
if attr.name.startswith("D_"):
attr.name=attr.name[2:]
new_t=inputdata.select(newattrs + [inputdata.domain.classVar])
new_t.name=name
output_tables.append(new_t)
#for attr in newattrs:
# print "%s: %s" % (attr.name, attr.values)
#interval4
#newclass = orange.EnumVariable("is versicolor", values = ["no", "yes"])
#newclass.getValueFrom = lambda ex, w: ex["iris"]=="Iris-versicolor"
#newdomain = orange.Domain(data.domain.attributes, newclass)
#data_v = orange.ExampleTable(newdomain, data)
output_dict = {'odt': output_tables if input_type_is_list else output_tables[0]} #returns list if input is list
return output_dict
def cforange_attribute_distance(input_dict):
import orange
import orngInteract
......
This diff is collapsed.
......@@ -4,6 +4,7 @@ from random import choice
from aleph import Aleph
from rsd import RSD
from wordification import Wordification
from services.webservice import WebService
......@@ -60,4 +61,12 @@ def ilp_sdmaleph(input_dict):
clauseLen=input_dict.get('clauseLen') if input_dict.get('clauseLen') != '' else None,
dataFormat=input_dict.get('dataFormat') if input_dict.get('dataFormat') != '' else None
)
return {'theory' : response['theory']}
\ No newline at end of file
return {'theory' : response['theory']}
def ilp_wordification(input_dict):
target_table = input_dict.get('target_table',None)
other_tables = input_dict.get('other_tables', None)
context = input_dict.get('context', None)
wordification = Wordification(target_table,other_tables,context)
return {'corpus' : wordification.wordify()}
\ No newline at end of file
from wordification import Wordification
This diff is collapsed.
......@@ -45,7 +45,7 @@ class DBContext:
self.tables = [table for (table,) in cursor]
self.cols = {}
for table in self.tables:
cursor.execute("SELECT column_name FROM information_schema.columns WHERE table_name = '%s'" % table)
cursor.execute("SELECT column_name FROM information_schema.columns WHERE table_name = '%s' AND table_schema='%s'" % (table,connection.database))
self.cols[table] = [col for (col,) in cursor]
self.all_cols = dict(self.cols)
self.col_vals = {}
......@@ -62,15 +62,18 @@ class DBContext:
for col in self.cols[table]:
if col.endswith('_id'):
ref_table = (col[:-4] + 'ies') if col[-4] == 'y' else (col[:-3] + 's')
self.connected[(table, ref_table)] = (col, 'id')
self.connected[(ref_table, table)] = ('id', col)
self.fkeys[table].add(col)
if col == 'id':
self.pkeys[table] = col
if ref_table in self.tables:
self.connected[(table, ref_table)] = (col, 'id')
self.connected[(ref_table, table)] = ('id', col)
self.fkeys[table].add(col)
if col == 'id':
self.pkeys[table] = col
for (table, col, ref_table, ref_col) in cursor:
self.connected[(table, ref_table)] = (col, ref_col)
self.connected[(ref_table, table)] = (ref_col, col)
self.fkeys[table].add(col)
cursor.execute(
"SELECT table_name, column_name \
FROM information_schema.KEY_COLUMN_USAGE \
......
......@@ -172,7 +172,7 @@ class Aleph_Converter(ILP_Converter):
class Orange_Converter(Converter):
'''
Converts the target table selected in the given context as an orange example table.
Converts the selected tables in the given context to orange example tables.
'''
continuous_types = ('FLOAT','DOUBLE','DECIMAL','NEWDECIMAL')
integer_types = ('TINY','SHORT','LONG','LONGLONG','INT24')
......@@ -180,21 +180,33 @@ class Orange_Converter(Converter):
def __init__(self, *args, **kwargs):
Converter.__init__(self, *args, **kwargs)
self.types = self.db.fetch_types(self.db.target_table, self.db.cols[self.db.target_table])
self.types={}
for table in self.db.tables:
self.types[table]= self.db.fetch_types(table, self.db.cols[table])
self.db.compute_col_vals()
def target_table(self):
def target_Orange_table(self):
table, cls_att = self.db.target_table, self.db.target_att
return self.convert_table(table, cls_att)
def other_Orange_tables(self):
target_table = self.db.target_table
return[ self.convert_table(table,None) for table in self.db.tables if table!=target_table]
def convert_table(self,table_name, cls_att=None):
'''
Returns the target table as an orange example table.
'''
import orange
table, cls_att = self.db.target_table, self.db.target_att
cols = self.db.cols[table]
attributes, metas, classVar = [], [], None
cols = self.db.cols[table_name]
attributes, metas, class_var = [], [], []
for col in cols:
att_type = self.orng_type(col)
att_type = self.orng_type(table_name,col)
if att_type == 'd':
att_vals = self.db.col_vals[table][col]
att_vals = self.db.col_vals[table_name][col]
att_var = orange.EnumVariable(str(col), values=[str(val) for val in att_vals])
elif att_type == 'c':
att_var = orange.FloatVariable(str(col))
......@@ -202,33 +214,34 @@ class Orange_Converter(Converter):
att_var = orange.StringVariable(str(col))
if col == cls_att:
if att_type == 'string':
raise Exception('Unsuitable data type for a target variable: %d' % att_type)
class_var = att_var
raise Exception('Unsuitable data type for a target variable: %s' % att_type)
class_var.append(att_var)
continue
elif att_type == 'string':
metas.append(att_var)
else:
attributes.append(att_var)
domain = orange.Domain(attributes + [class_var])
domain = orange.Domain(attributes + class_var)
for meta in metas:
domain.addmeta(orange.newmetaid(), meta)
dataset = orange.ExampleTable(domain)
for row in self.db.rows(table, cols):
dataset.name=table_name
for row in self.db.rows(table_name, cols):
example = orange.Example(domain)
for col, val in zip(cols, row):
example[str(col)] = str(val)
dataset.append(example)
return dataset
def orng_type(self, col):
def orng_type(self, table_name,col):
'''
Assigns a given mysql column an orange type.
'''
mysql_type = self.types[col]
n_vals = len(self.db.col_vals[self.db.target_table][col])
mysql_type = self.types[table_name][col]
n_vals = len(self.db.col_vals[table_name][col])
if mysql_type in Orange_Converter.continuous_types or (n_vals >= 50 and mysql_type in Orange_Converter.integer_types):
return 'c'
elif mysql_type in Orange_Converter.ordinal_types:
elif mysql_type in Orange_Converter.ordinal_types+Orange_Converter.integer_types:
return 'd'
else:
return 'string'
......
......@@ -39,4 +39,4 @@ def mysql_query_to_odt(input_dict):
def mysql_orange_converter(input_dict):
context = input_dict['context']
orange = Orange_Converter(context)
return {'dataset' : orange.target_table()}
return {'target_table_dataset' : orange.target_Orange_table(),'other_table_datasets': orange.other_Orange_tables()}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment