Commit f383fa28 authored by Anze Vavpetic's avatar Anze Vavpetic
Browse files

added treeliker widgets

parent ced17d79
This diff is collapsed.
......@@ -7,6 +7,7 @@ from random import choice
from aleph import Aleph
from rsd import RSD
from wordification import Wordification
from treeliker import TreeLiker
from security import check_input
from services.webservice import WebService
......@@ -91,5 +92,20 @@ def ilp_wordification(input_dict):
context = input_dict.get('context', None)
word_att_length = int(input_dict.get('f_ngram_size', 1))
wordification = Wordification(target_table,other_tables,context,word_att_length)
return {'corpus' : wordification.wordify()}
def ilp_treeliker(input_dict):
template = input_dict['template']
dataset = input_dict['dataset']
settings = {
'algorithm': input_dict.get('algorithm'),
'minimum_frequency': input_dict.get('minimum_frequency'),
'covered_class': input_dict.get('covered_class'),
'maximum_size': input_dict.get('maximum_size'),
'use_sampling': input_dict.get('use_sampling'),
'sample_size': input_dict.get('sample_size'),
'max_degree': input_dict.get('max_degree')
}
arff = TreeLiker(dataset, template).run(settings=settings)
return {'arff': arff}
......@@ -40,7 +40,7 @@ allowed_mysql_predicates = [
'db_import',
'db_open',
]
default_timeout = 15 * 60
default_timeout = 1200 * 60
class SafePopen(threading.Thread):
'''
......
from treeliker import TreeLiker
import shutil
import tempfile
import os.path
from subprocess import Popen, PIPE
class TreeLiker:
def __init__(self, dataset, template):
self.basename = 'default'
self.dataset = dataset
self.template = template
self.tmpdir = tempfile.mkdtemp()
with open('%s/%s.txt' % (self.tmpdir, self.basename), 'w') as f:
f.write(dataset)
# Copy binaries to tmp folder
cdir = os.path.dirname(os.path.abspath(__file__))
shutil.copytree('%s/bin/' % cdir, '%s/bin/' % self.tmpdir)
def run(self, settings={}):
'''
Runs TreeLiker with the given settings.
'''
self._batch(settings)
p = Popen(['java', '-Xmx1G', '-cp', 'bin/TreeLiker.jar',
'ida.ilp.treeLiker.TreeLikerMain', '-batch', self.batch],
cwd=self.tmpdir)
stdout_str, stderr_str = p.communicate()
arff = open('%s/%s.arff' % (self.tmpdir, self.basename)).read()
self._cleanup()
return arff
def _batch(self, settings):
'''
Creates the batch file to run the experiment.
'''
self.batch = '%s/%s.treeliker' % (self.tmpdir, self.basename)
commands = []
commands.append('set(output_type, single)')
commands.append("set(examples, '%s.txt')" % self.basename)
commands.append('set(template, %s)' % self.template)
commands.append('set(output, %s.arff)' % self.basename)
# Optional settings
for key, val in settings.items():
if val not in [None, '']:
commands.append('set(%s, %s)' % (key, str(val)))
commands.append('work(yes)')
with open(self.batch, 'w') as f:
f.write('\n'.join(commands))
print '\n'.join(commands)
def _cleanup(self):
"""
Cleans up all the temporary files.
"""
try:
shutil.rmtree(self.tmpdir)
except:
logger.info('Problem removing temporary files. \
The files are probably in use.')
......@@ -313,8 +313,11 @@ class TreeLikerConverter(Converter):
'''
def __init__(self, *args, **kwargs):
self.discr_intervals = kwargs.pop('discr_intervals', {}) if kwargs else {}
self._template = []
self._predicates = set()
Converter.__init__(self, *args, **kwargs)
def _row_pk(self, target, cols, row):
row_pk = None
for idx, col in enumerate(row):
......@@ -323,6 +326,7 @@ class TreeLikerConverter(Converter):
break
return row_pk
def _facts(self, pk, pk_att, target, visited=set()):
'''
Returns the facts for the given entity with pk in `table`.
......@@ -355,14 +359,27 @@ class TreeLikerConverter(Converter):
else:
continue
elif attr_name == self.db.pkeys[target]:
facts.append('has_%s(%s)' % (target, row_pk_name))
predicate = 'has_%s' % target
facts.append('%s(%s)' % (predicate, row_pk_name))
if predicate not in self._predicates:
self._predicates.add(predicate)
self._template.append('%s(-%s)' % (predicate,
target))
# Constants
else:
predicate = 'has_%s' % attr_name
col = self._discretize_check(target, attr_name, col)
facts.append('has_%s(%s, %s)' % (attr_name,
row_pk_name,
str(col)))
facts.append('%s(%s, %s)' % (predicate,
row_pk_name,
str(col)))
if predicate not in self._predicates:
self._predicates.add(predicate)
self._template.append('%s(+%s, #%s)' % (predicate,
target,
attr_name))
# Recursively follow links to other tables
for table in self.db.tables:
......@@ -392,6 +409,7 @@ class TreeLikerConverter(Converter):
visited=visited))
return facts
def _discretize_check(self, table, att, col):
'''
Replaces the value with an appropriate interval symbol, if available.
......@@ -419,25 +437,6 @@ class TreeLikerConverter(Converter):
return label
n_intervals = len(intervals)
for i, value in enumerate(intervals):
punct = '.' if i == n_intervals-1 else ';'
if i == 0:
# Condition: att =< value_i
label = '=< %.2%f' % value
condition = '%s =< %d' % (att.capitalize(), value)
discretize_goals.append('\t((%s = \'%s\', %s)%s' % (var_att, label, condition, punct))
if i < n_intervals-1:
# Condition: att in (value_i, value_i+1]
value_next = intervals[i+1]
label = '(%d, %d]' % (value, value_next)
condition = '%s > %d, %s =< %d' % (att.capitalize(), value, att.capitalize(), value_next)
discretize_goals.append('\t(%s = \'%s\', %s)%s' % (var_att, label, condition, punct))
else:
# Condition: att > value_i
label = '> %d' % value
condition = '%s > %d' % (att.capitalize(), value)
discretize_goals.append('\t(%s = \'%s\', %s))%s' % (var_att, label, condition, punct))
def dataset(self):
'''
......@@ -454,8 +453,9 @@ class TreeLikerConverter(Converter):
return '\n'.join(examples)
def default_template(self):
pass
return '[%s]' % (', '.join(self._template))
if __name__ == '__main__':
......
[
{
"pk": 20,
"pk": 25,
"model": "workflows.category",
"fields": {
"uid": "b66b71b1-99d6-4efc-b264-a914a3e42911",
......@@ -12,10 +12,10 @@
}
},
{
"pk": 104,
"pk": 140,
"model": "workflows.abstractwidget",
"fields": {
"category": 20,
"category": 25,
"treeview_image": "",
"name": "Database Context",
"is_streaming": false,
......@@ -38,10 +38,10 @@
}
},
{
"pk": 213,
"pk": 336,
"model": "workflows.abstractinput",
"fields": {
"widget": 104,
"widget": 140,
"name": "connection",
"short_name": "con",
"uid": "6c23b4a2-a18e-498e-a22c-4de86932da3e",
......@@ -56,10 +56,10 @@
}
},
{
"pk": 477,
"pk": 337,
"model": "workflows.abstractinput",
"fields": {
"widget": 104,
"widget": 140,
"name": "Table connection from names",
"short_name": "tbc",
"uid": "be3718eb-062e-48f2-af2a-8c31e66524fe",
......@@ -74,10 +74,10 @@
}
},
{
"pk": 117,
"pk": 154,
"model": "workflows.abstractoutput",
"fields": {
"widget": 104,
"widget": 140,
"name": "context",
"short_name": "cxt",
"variable": "context",
......@@ -87,10 +87,10 @@
}
},
{
"pk": 105,
"pk": 141,
"model": "workflows.abstractwidget",
"fields": {
"category": 20,
"category": 25,
"treeview_image": "",
"name": "Database To Aleph",
"is_streaming": false,
......@@ -113,10 +113,10 @@
}
},
{
"pk": 235,
"pk": 338,
"model": "workflows.abstractinput",
"fields": {
"widget": 105,
"widget": 141,
"name": "Target attribute value",
"short_name": "tgt",
"uid": "5b7354f1-35d8-41c8-a0ae-c3848855f29d",
......@@ -131,10 +131,10 @@
}
},
{
"pk": 493,
"pk": 339,
"model": "workflows.abstractinput",
"fields": {
"widget": 105,
"widget": 141,
"name": "dump full database",
"short_name": "dmp",
"uid": "fb5fc5f5-aa8f-4667-b96a-2b3f9a7672c7",
......@@ -149,10 +149,10 @@
}
},
{
"pk": 494,
"pk": 340,
"model": "workflows.abstractinput",
"fields": {
"widget": 105,
"widget": 141,
"name": "discretization intervals",
"short_name": "itr",
"uid": "e20c7a7c-d2f5-4ab6-9ef1-b3d244140d88",
......@@ -167,10 +167,10 @@
}
},
{
"pk": 214,
"pk": 341,
"model": "workflows.abstractinput",
"fields": {
"widget": 105,
"widget": 141,
"name": "context",
"short_name": "cxt",
"uid": "474e9673-9c55-48c5-bae8-a7b986aa0287",
......@@ -185,10 +185,10 @@
}
},
{
"pk": 118,
"pk": 155,
"model": "workflows.abstractoutput",
"fields": {
"widget": 105,
"widget": 141,
"name": "positive examples",
"short_name": "pos",
"variable": "pos_examples",
......@@ -198,10 +198,10 @@
}
},
{
"pk": 119,
"pk": 156,
"model": "workflows.abstractoutput",
"fields": {
"widget": 105,
"widget": 141,
"name": "negative examples",
"short_name": "neg",
"variable": "neg_examples",
......@@ -211,10 +211,10 @@
}
},
{
"pk": 120,
"pk": 157,
"model": "workflows.abstractoutput",
"fields": {
"widget": 105,
"widget": 141,
"name": "background knowledge",
"short_name": "b",
"variable": "bk",
......@@ -224,10 +224,10 @@
}
},
{
"pk": 106,
"pk": 142,
"model": "workflows.abstractwidget",
"fields": {
"category": 20,
"category": 25,
"treeview_image": "",
"name": "Database To Orange Table",
"is_streaming": false,
......@@ -250,10 +250,10 @@
}
},
{
"pk": 215,
"pk": 342,
"model": "workflows.abstractinput",
"fields": {
"widget": 106,
"widget": 142,
"name": "context",
"short_name": "cxt",
"uid": "1f9b5ccf-65c3-4ccc-818e-afb3a6ffee20",
......@@ -268,10 +268,10 @@
}
},
{
"pk": 215,
"pk": 158,
"model": "workflows.abstractoutput",
"fields": {
"widget": 106,
"widget": 142,
"name": "Data table",
"short_name": "odt",
"variable": "target_table_dataset",
......@@ -281,10 +281,10 @@
}
},
{
"pk": 216,
"pk": 159,
"model": "workflows.abstractoutput",
"fields": {
"widget": 106,
"widget": 142,
"name": "List of Data tables",
"short_name": "lot",
"variable": "other_table_datasets",
......@@ -294,10 +294,10 @@
}
},
{
"pk": 107,
"pk": 143,
"model": "workflows.abstractwidget",
"fields": {
"category": 20,
"category": 25,
"treeview_image": "",
"name": "Database To RSD",
"is_streaming": false,
......@@ -320,10 +320,10 @@
}
},
{
"pk": 216,
"pk": 343,
"model": "workflows.abstractinput",
"fields": {
"widget": 107,
"widget": 143,
"name": "context",
"short_name": "cxt",
"uid": "4f1397a8-4e72-4b34-b31d-d09bf9a7e7d9",
......@@ -338,10 +338,10 @@
}
},
{
"pk": 495,
"pk": 344,
"model": "workflows.abstractinput",
"fields": {
"widget": 107,
"widget": 143,
"name": "dump full database",
"short_name": "dmp",
"uid": "3c1375b1-10ed-400d-abfe-261115723c2f",
......@@ -356,10 +356,10 @@
}
},
{
"pk": 500,
"pk": 345,
"model": "workflows.abstractinput",
"fields": {
"widget": 107,
"widget": 143,
"name": "discretization intervals",
"short_name": "itr",
"uid": "de698509-3bdc-482f-b5f9-9c6b3916426b",
......@@ -374,10 +374,10 @@
}
},
{
"pk": 122,
"pk": 160,
"model": "workflows.abstractoutput",
"fields": {
"widget": 107,
"widget": 143,
"name": "examples",
"short_name": "ex",
"variable": "examples",
......@@ -387,10 +387,10 @@
}
},
{
"pk": 123,
"pk": 161,
"model": "workflows.abstractoutput",
"fields": {
"widget": 107,
"widget": 143,
"name": "background knowledge",
"short_name": "b",
"variable": "bk",
......@@ -400,10 +400,98 @@
}
},
{
"pk": 108,
"pk": 180,
"model": "workflows.abstractwidget",
"fields": {
"category": 20,
"category": 25,
"treeview_image": "",
"name": "Database To TreeLiker",
"is_streaming": false,
"uid": "b07e0349-3d50-42b2-9678-dc7752b44d7e",
"interaction_view": "",
"image": "",
"package": "mysql",
"static_image": "mysql.png",
"post_interact_action": "",
"user": null,
"visualization_view": "",
"streaming_visualization_view": "",
"action": "mysql_treeliker_converter",
"wsdl_method": "",
"wsdl": "",
"interactive": false,
"has_progress_bar": false,
"order": 1,
"description": ""
}
},
{
"pk": 452,
"model": "workflows.abstractinput",
"fields": {
"widget": 180,
"name": "context",
"short_name": "cxt",
"uid": "9056fe24-2a8a-40a6-a320-66889d89050c",
"default": "",
"required": true,
"multi": false,
"parameter_type": null,
"variable": "context",
"parameter": false,
"order": 1,
"description": "Database context object"
}
},
{
"pk": 453,
"model": "workflows.abstractinput",
"fields": {
"widget": 180,
"name": "discretization intervals",
"short_name": "itr",
"uid": "ebda708e-1ef8-4326-8788-1c98611c7817",
"default": "",
"required": false,
"multi": false,
"parameter_type": null,
"variable": "discr_intervals",
"parameter": false,
"order": 2,
"description": "dictionary of intervals for discretization"
}
},
{
"pk": 204,
"model": "workflows.abstractoutput",
"fields": {
"widget": 180,
"name": "dataset",
"short_name": "dat",
"variable": "dataset",
"uid": "65c5c174-104b-4a48-b61d-55192c73fbe7",
"order": 1,
"description": "dataset"
}
},
{
"pk": 205,
"model": "workflows.abstractoutput",
"fields": {
"widget": 180,
"name": "template",
"short_name": "tpl",
"variable": "template",
"uid": "550272b4-d505-4764-b7af-019a87394198",
"order": 1,
"description": "template"
}
},
{
"pk": 144,
"model": "workflows.abstractwidget",
"fields": {
"category": 25,
"treeview_image": "",
"name": "MySQL Connect",
"is_streaming": false,
......@@ -426,10 +514,10 @@
}
},
{
"pk": 217,
"pk": 346,
"model": "workflows.abstractinput",
"fields": {
"widget": 108,
"widget": 144,
"name": "user",
"short_name": "usr",
"uid": "0f36f10b-066f-4ad3-9fa8-39205440076a",
......@@ -444,10 +532,10 @@
}