Commit 33667643 authored by Anze Vavpetic's avatar Anze Vavpetic
Browse files

added widgets that enable mapping unseen _relational_ examples to a...

added widgets that enable mapping unseen _relational_ examples to a precalculated _propositional_ domain for RSD, TreeLiker and Aleph-featurize
parent 15129f91
......@@ -21,7 +21,7 @@ else:
os.sys.path.append(parent_dir)
from security import SafePopen
DEBUG = True
DEBUG = False
# Setup a logger
logger = logging.getLogger("Aleph [Python]")
......@@ -117,8 +117,6 @@ class Aleph(object):
# Run the aleph script.
p = SafePopen(['yap', '-s50000', '-h200000', '-L', Aleph.SCRIPT], cwd=self.tmpdir).safe_run()
stdout_str, stderr_str = p.communicate()
logger.debug(stdout_str)
logger.debug(stderr_str)
logger.info("Done.")
......@@ -127,6 +125,7 @@ class Aleph(object):
# Return the rules written in the output file.
rules_fn = filestem + Aleph.RULES_SUFFIX
result = open('%s/%s' % (self.tmpdir, rules_fn)).read()
features = None
else:
features_fn = filestem + Aleph.FEATURES_SUFFIX
features = open('%s/%s' % (self.tmpdir, features_fn)).read()
......@@ -137,7 +136,7 @@ class Aleph(object):
# Cleanup.
self.__cleanup()
return result
return (result, features)
def __prepare(self, filestem, pos, neg, b):
"""
......@@ -208,7 +207,7 @@ class Aleph(object):
features = re.findall(r"feature\((\d+),\((.*)\)\).", features)
for fid, feature in sorted(features, key=lambda e: e[0]):
cat('%% f%s: %s' % (fid, feature))
cat('@ATTRIBUTE f%s {0,1}' % fid)
cat('@ATTRIBUTE f%s {+,-}' % fid)
# Class attribute
class_id = len(features)
......@@ -219,7 +218,7 @@ class Aleph(object):
for _, features, cls in examples:
vals = []
for i in range(0, class_id):
vals.append('1' if i in json.loads(features) else '0')
vals.append('+' if i in json.loads(features) else '-')
vals.append(cls)
cat('%s' % ','.join(vals))
return arff.getvalue()
......
This diff is collapsed.
......@@ -29,8 +29,8 @@ def ilp_aleph(input_dict):
for pl_script in [b, pos, neg]:
check_input(pl_script)
# Run aleph
result = aleph.induce(mode, pos, neg, b)
return {'theory': result}
results = aleph.induce(mode, pos, neg, b)
return {'theory': results[0], 'features': results[1]}
def ilp_rsd(input_dict):
rsd = RSD()
......@@ -53,6 +53,7 @@ def ilp_rsd(input_dict):
features, arff, rules = rsd.induce(b, examples=examples, pos=pos, neg=neg, cn2sd=subgroups)
return {'features' : features, 'arff' : arff, 'rules' : rules}
def ilp_sdmsegs_rule_viewer(input_dict):
return {}
......@@ -119,5 +120,6 @@ def ilp_treeliker(input_dict):
'sample_size': input_dict.get('sample_size'),
'max_degree': input_dict.get('max_degree')
}
arff = TreeLiker(dataset, template).run(settings=settings)
return {'arff': arff}
treeliker = TreeLiker(dataset, template, settings=settings)
arff_train, arff_test = treeliker.run()
return {'arff': arff_train, 'treeliker': treeliker}
......@@ -1059,12 +1059,30 @@ write_with_vars(Head,BodyList,Substitutions,C,Negation):-
nl,
!.
%%
%% Escape atoms that contain particular characters that would
%% cause syntax errors when loaded.
%%
%% Added 20.1.2015
%% anze.vavpetic@ijs.si
must_escape(Atom):-
atom(Atom),
not number(Atom).
%atom_chars(Atom, Chars),
%my_member(Char, Chars),
%my_member(Char, ['[', ']', '(', ')', ' ', ',', '.', '-', '<', '>', '=']).
write_with_vars1([],_).
write_with_vars1([A|B],Substitutions):-
my_member([A,Sub],Substitutions),
!,
write(Sub),
(must_escape(Sub) ->
writeq(Sub)
;
write(Sub)
),
!,
(B = [_|_] ->
write(',')
;
......
......@@ -5,49 +5,74 @@ from subprocess import Popen, PIPE
class TreeLiker:
def __init__(self, dataset, template):
def __init__(self, dataset, template, test_dataset=None, settings={}):
self.basename = 'default'
self.dataset = dataset
self.test_dataset = test_dataset
self.template = template
self.settings = settings
def _copy_data(self):
self.tmpdir = tempfile.mkdtemp()
with open('%s/%s.txt' % (self.tmpdir, self.basename), 'w') as f:
f.write(dataset)
f.write(self.dataset)
if self.test_dataset:
with open('%s/%s_test.txt' % (self.tmpdir, self.basename), 'w') as f:
f.write(self.test_dataset)
# Copy binaries to tmp folder
cdir = os.path.dirname(os.path.abspath(__file__))
shutil.copytree('%s/bin/' % cdir, '%s/bin/' % self.tmpdir)
def run(self, settings={}):
def run(self, cleanup=True):
'''
Runs TreeLiker with the given settings.
'''
self._batch(settings)
self._copy_data()
self._batch()
p = Popen(['java', '-Xmx1G', '-cp', 'bin/TreeLiker.jar',
'ida.ilp.treeLiker.TreeLikerMain', '-batch', self.batch],
cwd=self.tmpdir)
stdout_str, stderr_str = p.communicate()
arff = open('%s/%s.arff' % (self.tmpdir, self.basename)).read()
self._cleanup()
if not self.test_dataset:
arff = open('%s/%s.arff' % (self.tmpdir, self.basename)).read()
arff_test = None
else:
arff = open('%s/conversion/train.arff' % self.tmpdir).read()
arff_test = open('%s/conversion/test.arff' % self.tmpdir).read()
return arff
if cleanup:
self._cleanup()
return (arff, arff_test)
def _batch(self, settings):
def _batch(self):
'''
Creates the batch file to run the experiment.
'''
self.batch = '%s/%s.treeliker' % (self.tmpdir, self.basename)
commands = []
commands.append('set(output_type, single)')
commands.append("set(examples, '%s.txt')" % self.basename)
if not self.test_dataset:
commands.append('set(output_type, single)')
commands.append("set(examples, '%s.txt')" % self.basename)
else:
commands.append('set(output_type, train_test)')
commands.append("set(train_set, '%s.txt')" % self.basename)
commands.append("set(test_set, '%s_test.txt')" % self.basename)
commands.append('set(template, %s)' % self.template)
commands.append('set(output, %s.arff)' % self.basename)
if not self.test_dataset:
commands.append('set(output, %s.arff)' % self.basename)
else:
commands.append('set(output, conversion)')
# Optional settings
for key, val in settings.items():
for key, val in self.settings.items():
if val not in [None, '']:
commands.append('set(%s, %s)' % (key, str(val)))
......
......@@ -109,6 +109,7 @@ class ILP_Converter(Converter):
fmt_cols = lambda cols: ','.join([("%s" % col) if ILP_Converter.numeric(col) else ("'%s'" % col) for col in cols])
for table in self.db.tables:
attributes = self.db.cols[table]
dump.append(':- table %s/%d.' % (table, len(attributes)))
dump.append('\n'.join(["%s(%s)." % (table, fmt_cols(cols)) for cols in self.db.rows(table, attributes)]))
return dump
......
......@@ -17,6 +17,7 @@
"fields": {
"category": 25,
"treeview_image": "",
"windows_queue": false,
"name": "Database Context",
"is_streaming": false,
"uid": "649015ac-ca53-43bc-a36a-c603b52f8775",
......@@ -92,6 +93,7 @@
"fields": {
"category": 25,
"treeview_image": "",
"windows_queue": false,
"name": "Database To Aleph",
"is_streaming": false,
"uid": "3c5ac714-84ca-4c93-befb-ce0c16148193",
......@@ -229,6 +231,7 @@
"fields": {
"category": 25,
"treeview_image": "",
"windows_queue": false,
"name": "Database To Orange Table",
"is_streaming": false,
"uid": "baa32a60-5acb-42a1-ba83-23a6ab2160e7",
......@@ -299,6 +302,7 @@
"fields": {
"category": 25,
"treeview_image": "",
"windows_queue": false,
"name": "Database To RSD",
"is_streaming": false,
"uid": "e5b68e38-cb53-408a-a58f-26742d6ecb3c",
......@@ -405,6 +409,7 @@
"fields": {
"category": 25,
"treeview_image": "",
"windows_queue": false,
"name": "Database To TreeLiker",
"is_streaming": false,
"uid": "b07e0349-3d50-42b2-9678-dc7752b44d7e",
......@@ -493,6 +498,7 @@
"fields": {
"category": 25,
"treeview_image": "",
"windows_queue": false,
"name": "MySQL Connect",
"is_streaming": false,
"uid": "41978deb-c245-4e1f-95a2-ceadcfe6898a",
......@@ -597,5 +603,411 @@
"order": 1,
"description": ""
}
},
{
"pk": 66,
"model": "workflows.category",
"fields": {
"uid": "b349a089-0af7-4d86-838b-e8c93d9d6ca5",
"parent": 25,
"workflow": null,
"user": null,
"order": 1,
"name": "Domain mapping"
}
},
{
"pk": 364,
"model": "workflows.abstractwidget",
"fields": {
"category": 66,
"treeview_image": "",
"windows_queue": false,
"name": "Map examples to domain (Aleph features)",
"is_streaming": false,
"uid": "804af24c-cecf-4296-85f5-798a45b7a0a8",
"interaction_view": "",
"image": "",
"package": "mysql",
"static_image": "mysql.png",
"post_interact_action": "",
"user": null,
"visualization_view": "",
"streaming_visualization_view": "",
"action": "ilp_map_aleph",
"wsdl_method": "",
"wsdl": "",
"interactive": false,
"has_progress_bar": false,
"order": 1,
"description": ""
}
},
{
"pk": 945,
"model": "workflows.abstractinput",
"fields": {
"widget": 364,
"name": "training data (context)",
"short_name": "trn",
"uid": "e13530d2-96a1-4e8e-a5aa-12ad686f7384",
"default": "",
"required": false,
"multi": false,
"parameter_type": null,
"variable": "train_ctx",
"parameter": false,
"order": 1,
"description": "training context"
}
},
{
"pk": 951,
"model": "workflows.abstractinput",
"fields": {
"widget": 364,
"name": "positive class",
"short_name": "pos",
"uid": "bdba04d8-e4cc-4590-b807-dc2581bd9e42",
"default": "",
"required": true,
"multi": false,
"parameter_type": null,
"variable": "positive_class",
"parameter": false,
"order": 1,
"description": "positive class"
}
},
{
"pk": 946,
"model": "workflows.abstractinput",
"fields": {
"widget": 364,
"name": "format",
"short_name": "fmt",
"uid": "5765110e-ead7-463b-8af4-c710dd0e56f4",
"default": "arff",
"required": true,
"multi": false,
"parameter_type": "select",
"variable": "output_format",
"parameter": true,
"order": 2,
"description": "output format"
}
},
{
"pk": 947,
"model": "workflows.abstractinput",
"fields": {
"widget": 364,
"name": "test data (context)",
"short_name": "tst",
"uid": "cb5e9036-e0a0-4de8-9d41-d4f7e418e415",
"default": "",
"required": true,
"multi": false,
"parameter_type": null,
"variable": "test_ctx",
"parameter": false,
"order": 3,
"description": "new examples' context"
}
},
{
"pk": 948,
"model": "workflows.abstractinput",
"fields": {
"widget": 364,
"name": "features",
"short_name": "frs",
"uid": "9c245a66-c419-44aa-a077-d133c8b2e5f5",
"default": "",
"required": true,
"multi": false,
"parameter_type": null,
"variable": "features",
"parameter": false,
"order": 4,
"description": "domain features"
}
},
{
"pk": 410,
"model": "workflows.abstractoutput",
"fields": {
"widget": 364,
"name": "evaluations",
"short_name": "evs",
"variable": "evaluations",
"uid": "90f445ab-f8a2-4960-8d4a-e98edbdb9f39",
"order": 1,
"description": "features evaluated on the test data"
}
},
{
"pk": 362,
"model": "workflows.abstractwidget",
"fields": {
"category": 66,
"treeview_image": "",
"windows_queue": false,
"name": "Map examples to domain (RSD)",
"is_streaming": false,
"uid": "50f8a9cf-046c-43e9-9681-d700cbbbb689",
"interaction_view": "",
"image": "",
"package": "mysql",
"static_image": "mysql.png",
"post_interact_action": "",
"user": null,
"visualization_view": "",
"streaming_visualization_view": "",
"action": "ilp_map_rsd",
"wsdl_method": "",
"wsdl": "",
"interactive": false,
"has_progress_bar": false,
"order": 1,
"description": ""
}
},
{
"pk": 938,
"model": "workflows.abstractinput",
"fields": {
"widget": 362,
"name": "training data (context)",
"short_name": "trn",
"uid": "fe7667f7-9e36-46a7-9b4a-fabe69aa628e",
"default": "",
"required": false,
"multi": false,
"parameter_type": null,
"variable": "train_ctx",
"parameter": false,
"order": 1,
"description": "training context"
}
},
{
"pk": 939,
"model": "workflows.abstractinput",
"fields": {
"widget": 362,
"name": "format",
"short_name": "fmt",
"uid": "6990c6fd-615e-41b7-912c-ad31dd169065",
"default": "arff",
"required": true,
"multi": false,
"parameter_type": "select",
"variable": "output_format",
"parameter": true,
"order": 2,
"description": "output format"
}
},
{
"pk": 235,
"model": "workflows.abstractoption",
"fields": {
"uid": "a29d6d54-b9ad-4e93-9ea8-d988e18fce74",
"abstract_input": 939,
"value": "csv",
"name": "CSV"
}
},
{
"pk": 234,
"model": "workflows.abstractoption",
"fields": {
"uid": "bfdf4683-2afa-4696-85a9-3be4557fe847",
"abstract_input": 939,
"value": "arff",
"name": "Weka ARFF"
}
},
{
"pk": 935,
"model": "workflows.abstractinput",
"fields": {
"widget": 362,
"name": "test data (context)",
"short_name": "tst",
"uid": "328957c9-0cf6-46da-bd9c-d89aa3a88842",
"default": "",
"required": true,
"multi": false,
"parameter_type": null,
"variable": "test_ctx",
"parameter": false,
"order": 3,
"description": "new examples' context"
}
},
{
"pk": 936,
"model": "workflows.abstractinput",
"fields": {
"widget": 362,
"name": "features",
"short_name": "frs",
"uid": "d96f84a3-69d9-45f5-9f1e-5041e736bb2c",
"default": "",
"required": true,
"multi": false,
"parameter_type": null,
"variable": "features",
"parameter": false,
"order": 4,
"description": "domain features"
}
},
{
"pk": 408,
"model": "workflows.abstractoutput",
"fields": {
"widget": 362,
"name": "evaluations",
"short_name": "evs",
"variable": "evaluations",
"uid": "58f8bc1b-de75-404c-aca6-7b0e14a05e14",
"order": 1,
"description": "features evaluated on the test data"
}
},
{
"pk": 363,
"model": "workflows.abstractwidget",
"fields": {
"category": 66,
"treeview_image": "",
"windows_queue": false,
"name": "Map examples to domain (TreeLiker)",
"is_streaming": false,
"uid": "b8dcd7bf-cfed-42e0-8bf9-02149c499f1e",
"interaction_view": "",
"image": "",
"package": "mysql",
"static_image": "mysql.png",
"post_interact_action": "",
"user": null,
"visualization_view": "",
"streaming_visualization_view": "",
"action": "ilp_map_treeliker",
"wsdl_method": "",
"wsdl": "",
"interactive": false,
"has_progress_bar": false,
"order": 1,
"description": ""
}
},
{
"pk": 940,
"model": "workflows.abstractinput",
"fields": {
"widget": 363,
"name": "training data (context)",
"short_name": "trn",
"uid": "e6a08ff3-39c5-40e2-8257-36107bd117cc",
"default": "",
"required": false,
"multi": false,
"parameter_type": null,
"variable": "train_ctx",
"parameter": false,
"order": 1,
"description": "training context"
}
},
{
"pk": 941,
"model": "workflows.abstractinput",
"fields": {
"widget": 363,
"name": "format",
"short_name": "fmt",
"uid": "1b119724-6e2f-48f6-9cea-7712da9bd57f",
"default": "arff",
"required": true,
"multi": false,
"parameter_type": "select",
"variable": "output_format",
"parameter": true,
"order": 2,
"description": "output format"
}
},