Commit da885bbe authored by Anze Vavpetic's avatar Anze Vavpetic

database to rsd/aleph now accept threshold values to discretize attributes.

parent 48868d0a
......@@ -10,9 +10,9 @@ import tempfile
from stat import S_IREAD, S_IEXEC
from subprocess import PIPE
if __name__ != '__main__':
try:
from ..security import SafePopen
else:
except:
import os
parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
os.sys.path.append(parent_dir)
......
......@@ -25,7 +25,8 @@ class ILP_Converter(Converter):
If possible, all subclasses should use lazy selects by forwarding the DB connection.
'''
def __init__(self, *args, **kwargs):
self.settings = kwargs.pop('settings') if kwargs else {}
self.settings = kwargs.pop('settings', {}) if kwargs else {}
self.discr_intervals = kwargs.pop('discr_intervals', {}) if kwargs else {}
Converter.__init__(self, *args, **kwargs)
def user_settings(self):
......@@ -64,8 +65,34 @@ class ILP_Converter(Converter):
def attribute_clause(self, table, att):
var_table, var_att, pk = table.capitalize(), att.capitalize(), self.db.pkeys[table]
return ['%s_%s(%s, %s) :-' % (table, att, var_table, var_att),
'\t%s(%s).' % (table, ','.join([att.capitalize() if att!=pk else var_table for att in self.db.cols[table]]))]
intervals = []
if self.discr_intervals.has_key(table):
intervals = self.discr_intervals[table].get(att, [])
if intervals:
var_att = 'Discrete_%s' % var_att
values_goal = '\t%s(%s)%s' % (table, ','.join([arg.capitalize() if arg!=pk else var_table for arg in self.db.cols[table]]), ',' if intervals else '.')
discretize_goals = []
n_intervals = len(intervals)
for i, value in enumerate(intervals):
punct = '.' if i == n_intervals-1 else ';'
if i == 0:
# Condition: att =< value_i
label = '=< %d' % value
condition = '%s =< %d' % (att.capitalize(), value)
discretize_goals.append('\t((%s = \'%s\', %s)%s' % (var_att, label, condition, punct))
if i < n_intervals-1:
# Condition: att in (value_i, value_i+1]
value_next = intervals[i+1]
label = '(%d, %d]' % (value, value_next)
condition = '%s > %d, %s =< %d' % (att.capitalize(), value, att.capitalize(), value_next)
discretize_goals.append('\t(%s = \'%s\', %s)%s' % (var_att, label, condition, punct))
else:
# Condition: att > value_i
label = '> %d' % value
condition = '%s > %d' % (att.capitalize(), value)
discretize_goals.append('\t(%s = \'%s\', %s))%s' % (var_att, label, condition, punct))
return ['%s_%s(%s, %s) :-' % (table, att, var_table, var_att),
values_goal] + discretize_goals
class RSD_Converter(ILP_Converter):
'''
......@@ -252,13 +279,12 @@ if __name__ == '__main__':
context = DBContext(DBConnection('root','','localhost','test'))
context.target_table = 'trains'
context.target_att = 'direction'
rsd = RSD_Converter(context)
ex, bk = rsd.all_examples(), rsd.background_knowledge()
aleph = Aleph_Converter(context, target_att_val='east')
print aleph.positive_examples()
print aleph.negative_examples()
intervals = {'cars': {'position' : [1, 3]}}
import cPickle
cPickle.dump(intervals, open('intervals.pkl','w'))
rsd = RSD_Converter(context, discr_intervals=intervals)
aleph = Aleph_Converter(context, target_att_val='east', discr_intervals=intervals)
print rsd.background_knowledge()
print aleph.background_knowledge()
orange = Orange_Converter(context)
orange.target_table()
\ No newline at end of file
#orange = Orange_Converter(context)
#orange.target_table()
\ No newline at end of file
......@@ -128,6 +128,24 @@
"description": "Target attribute value to be used as the positive class"
}
},
{
"pk": 644,
"model": "workflows.abstractinput",
"fields": {
"widget": 191,
"name": "discretization intervals",
"short_name": "itr",
"uid": "02c30ce3-482b-4cc7-a9f2-08cbdb925105",
"default": "",
"required": false,
"multi": false,
"parameter_type": null,
"variable": "discr_intervals",
"parameter": false,
"order": 1,
"description": "dictionary of intervals for discretization"
}
},
{
"pk": 444,
"model": "workflows.abstractinput",
......@@ -297,6 +315,24 @@
"description": "Database context object"
}
},
{
"pk": 645,
"model": "workflows.abstractinput",
"fields": {
"widget": 193,
"name": "discretization intervals",
"short_name": "itr",
"uid": "1449645a-4e32-41eb-8c8b-7b494cefd04b",
"default": "",
"required": false,
"multi": false,
"parameter_type": null,
"variable": "discr_intervals",
"parameter": false,
"order": 1,
"description": "dictionary of intervals for discretization"
}
},
{
"pk": 221,
"model": "workflows.abstractoutput",
......
......@@ -26,11 +26,11 @@ def mysql_db_context_finished(postdata, input_dict, output_dict):
return {'context' : context}
def mysql_rsd_converter(input_dict):
rsd = RSD_Converter(input_dict['context'])
rsd = RSD_Converter(input_dict['context'], discr_intervals=input_dict.get('discr_intervals', {}))
return {'examples' : rsd.all_examples(), 'bk' : rsd.background_knowledge()}
def mysql_aleph_converter(input_dict):
aleph = Aleph_Converter(input_dict['context'], target_att_val=input_dict['target_att_val'])
aleph = Aleph_Converter(input_dict['context'], target_att_val=input_dict['target_att_val'], discr_intervals=input_dict.get('discr_intervals', {}))
return {'pos_examples' : aleph.positive_examples(), 'neg_examples' : aleph.negative_examples(), 'bk' : aleph.background_knowledge()}
def mysql_query_to_odt(input_dict):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment