Commit f383fa28 authored by Anze Vavpetic's avatar Anze Vavpetic

added treeliker widgets

parent ced17d79
This diff is collapsed.
......@@ -7,6 +7,7 @@ from random import choice
from aleph import Aleph
from rsd import RSD
from wordification import Wordification
from treeliker import TreeLiker
from security import check_input
from services.webservice import WebService
......@@ -91,5 +92,20 @@ def ilp_wordification(input_dict):
context = input_dict.get('context', None)
word_att_length = int(input_dict.get('f_ngram_size', 1))
wordification = Wordification(target_table,other_tables,context,word_att_length)
return {'corpus' : wordification.wordify()}
def ilp_treeliker(input_dict):
template = input_dict['template']
dataset = input_dict['dataset']
settings = {
'algorithm': input_dict.get('algorithm'),
'minimum_frequency': input_dict.get('minimum_frequency'),
'covered_class': input_dict.get('covered_class'),
'maximum_size': input_dict.get('maximum_size'),
'use_sampling': input_dict.get('use_sampling'),
'sample_size': input_dict.get('sample_size'),
'max_degree': input_dict.get('max_degree')
}
arff = TreeLiker(dataset, template).run(settings=settings)
return {'arff': arff}
......@@ -40,7 +40,7 @@ allowed_mysql_predicates = [
'db_import',
'db_open',
]
default_timeout = 15 * 60
default_timeout = 1200 * 60
class SafePopen(threading.Thread):
'''
......
from treeliker import TreeLiker
import shutil
import tempfile
import os.path
from subprocess import Popen, PIPE
class TreeLiker:
def __init__(self, dataset, template):
self.basename = 'default'
self.dataset = dataset
self.template = template
self.tmpdir = tempfile.mkdtemp()
with open('%s/%s.txt' % (self.tmpdir, self.basename), 'w') as f:
f.write(dataset)
# Copy binaries to tmp folder
cdir = os.path.dirname(os.path.abspath(__file__))
shutil.copytree('%s/bin/' % cdir, '%s/bin/' % self.tmpdir)
def run(self, settings={}):
'''
Runs TreeLiker with the given settings.
'''
self._batch(settings)
p = Popen(['java', '-Xmx1G', '-cp', 'bin/TreeLiker.jar',
'ida.ilp.treeLiker.TreeLikerMain', '-batch', self.batch],
cwd=self.tmpdir)
stdout_str, stderr_str = p.communicate()
arff = open('%s/%s.arff' % (self.tmpdir, self.basename)).read()
self._cleanup()
return arff
def _batch(self, settings):
'''
Creates the batch file to run the experiment.
'''
self.batch = '%s/%s.treeliker' % (self.tmpdir, self.basename)
commands = []
commands.append('set(output_type, single)')
commands.append("set(examples, '%s.txt')" % self.basename)
commands.append('set(template, %s)' % self.template)
commands.append('set(output, %s.arff)' % self.basename)
# Optional settings
for key, val in settings.items():
if val not in [None, '']:
commands.append('set(%s, %s)' % (key, str(val)))
commands.append('work(yes)')
with open(self.batch, 'w') as f:
f.write('\n'.join(commands))
print '\n'.join(commands)
def _cleanup(self):
"""
Cleans up all the temporary files.
"""
try:
shutil.rmtree(self.tmpdir)
except:
logger.info('Problem removing temporary files. \
The files are probably in use.')
......@@ -313,8 +313,11 @@ class TreeLikerConverter(Converter):
'''
def __init__(self, *args, **kwargs):
self.discr_intervals = kwargs.pop('discr_intervals', {}) if kwargs else {}
self._template = []
self._predicates = set()
Converter.__init__(self, *args, **kwargs)
def _row_pk(self, target, cols, row):
row_pk = None
for idx, col in enumerate(row):
......@@ -323,6 +326,7 @@ class TreeLikerConverter(Converter):
break
return row_pk
def _facts(self, pk, pk_att, target, visited=set()):
'''
Returns the facts for the given entity with pk in `table`.
......@@ -355,14 +359,27 @@ class TreeLikerConverter(Converter):
else:
continue
elif attr_name == self.db.pkeys[target]:
facts.append('has_%s(%s)' % (target, row_pk_name))
predicate = 'has_%s' % target
facts.append('%s(%s)' % (predicate, row_pk_name))
if predicate not in self._predicates:
self._predicates.add(predicate)
self._template.append('%s(-%s)' % (predicate,
target))
# Constants
else:
predicate = 'has_%s' % attr_name
col = self._discretize_check(target, attr_name, col)
facts.append('has_%s(%s, %s)' % (attr_name,
row_pk_name,
str(col)))
facts.append('%s(%s, %s)' % (predicate,
row_pk_name,
str(col)))
if predicate not in self._predicates:
self._predicates.add(predicate)
self._template.append('%s(+%s, #%s)' % (predicate,
target,
attr_name))
# Recursively follow links to other tables
for table in self.db.tables:
......@@ -392,6 +409,7 @@ class TreeLikerConverter(Converter):
visited=visited))
return facts
def _discretize_check(self, table, att, col):
'''
Replaces the value with an appropriate interval symbol, if available.
......@@ -419,25 +437,6 @@ class TreeLikerConverter(Converter):
return label
n_intervals = len(intervals)
for i, value in enumerate(intervals):
punct = '.' if i == n_intervals-1 else ';'
if i == 0:
# Condition: att =< value_i
label = '=< %.2%f' % value
condition = '%s =< %d' % (att.capitalize(), value)
discretize_goals.append('\t((%s = \'%s\', %s)%s' % (var_att, label, condition, punct))
if i < n_intervals-1:
# Condition: att in (value_i, value_i+1]
value_next = intervals[i+1]
label = '(%d, %d]' % (value, value_next)
condition = '%s > %d, %s =< %d' % (att.capitalize(), value, att.capitalize(), value_next)
discretize_goals.append('\t(%s = \'%s\', %s)%s' % (var_att, label, condition, punct))
else:
# Condition: att > value_i
label = '> %d' % value
condition = '%s > %d' % (att.capitalize(), value)
discretize_goals.append('\t(%s = \'%s\', %s))%s' % (var_att, label, condition, punct))
def dataset(self):
'''
......@@ -454,8 +453,9 @@ class TreeLikerConverter(Converter):
return '\n'.join(examples)
def default_template(self):
pass
return '[%s]' % (', '.join(self._template))
if __name__ == '__main__':
......
This diff is collapsed.
......@@ -5,7 +5,7 @@ MySQL connectivity library.
'''
import mysql.connector as sql
from context import DBConnection, DBContext
from converters import RSD_Converter, Aleph_Converter, Orange_Converter
from converters import RSD_Converter, Aleph_Converter, Orange_Converter, TreeLikerConverter
def mysql_connect(input_dict):
user = str(input_dict['user'])
......@@ -35,6 +35,12 @@ def mysql_aleph_converter(input_dict):
aleph = Aleph_Converter(input_dict['context'], target_att_val=input_dict['target_att_val'], discr_intervals=input_dict['discr_intervals'] or {}, dump=dump)
return {'pos_examples' : aleph.positive_examples(), 'neg_examples' : aleph.negative_examples(), 'bk' : aleph.background_knowledge()}
def mysql_treeliker_converter(input_dict):
treeliker = TreeLikerConverter(input_dict['context'],
discr_intervals=input_dict['discr_intervals'] or {})
return {'dataset': treeliker.dataset(),
'template': treeliker.default_template()}
def mysql_query_to_odt(input_dict):
return {'dataset' : None}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment