Commit 33667643 authored by Anze Vavpetic's avatar Anze Vavpetic

added widgets that enable mapping unseen _relational_ examples to a...

added widgets that enable mapping unseen _relational_ examples to a precalculated _propositional_ domain for RSD, TreeLiker and Aleph-featurize
parent 15129f91
......@@ -21,7 +21,7 @@ else:
os.sys.path.append(parent_dir)
from security import SafePopen
DEBUG = True
DEBUG = False
# Setup a logger
logger = logging.getLogger("Aleph [Python]")
......@@ -117,8 +117,6 @@ class Aleph(object):
# Run the aleph script.
p = SafePopen(['yap', '-s50000', '-h200000', '-L', Aleph.SCRIPT], cwd=self.tmpdir).safe_run()
stdout_str, stderr_str = p.communicate()
logger.debug(stdout_str)
logger.debug(stderr_str)
logger.info("Done.")
......@@ -127,6 +125,7 @@ class Aleph(object):
# Return the rules written in the output file.
rules_fn = filestem + Aleph.RULES_SUFFIX
result = open('%s/%s' % (self.tmpdir, rules_fn)).read()
features = None
else:
features_fn = filestem + Aleph.FEATURES_SUFFIX
features = open('%s/%s' % (self.tmpdir, features_fn)).read()
......@@ -137,7 +136,7 @@ class Aleph(object):
# Cleanup.
self.__cleanup()
return result
return (result, features)
def __prepare(self, filestem, pos, neg, b):
"""
......@@ -208,7 +207,7 @@ class Aleph(object):
features = re.findall(r"feature\((\d+),\((.*)\)\).", features)
for fid, feature in sorted(features, key=lambda e: e[0]):
cat('%% f%s: %s' % (fid, feature))
cat('@ATTRIBUTE f%s {0,1}' % fid)
cat('@ATTRIBUTE f%s {+,-}' % fid)
# Class attribute
class_id = len(features)
......@@ -219,7 +218,7 @@ class Aleph(object):
for _, features, cls in examples:
vals = []
for i in range(0, class_id):
vals.append('1' if i in json.loads(features) else '0')
vals.append('+' if i in json.loads(features) else '-')
vals.append(cls)
cat('%s' % ','.join(vals))
return arff.getvalue()
......
This diff is collapsed.
......@@ -29,8 +29,8 @@ def ilp_aleph(input_dict):
for pl_script in [b, pos, neg]:
check_input(pl_script)
# Run aleph
result = aleph.induce(mode, pos, neg, b)
return {'theory': result}
results = aleph.induce(mode, pos, neg, b)
return {'theory': results[0], 'features': results[1]}
def ilp_rsd(input_dict):
rsd = RSD()
......@@ -53,6 +53,7 @@ def ilp_rsd(input_dict):
features, arff, rules = rsd.induce(b, examples=examples, pos=pos, neg=neg, cn2sd=subgroups)
return {'features' : features, 'arff' : arff, 'rules' : rules}
def ilp_sdmsegs_rule_viewer(input_dict):
return {}
......@@ -119,5 +120,6 @@ def ilp_treeliker(input_dict):
'sample_size': input_dict.get('sample_size'),
'max_degree': input_dict.get('max_degree')
}
arff = TreeLiker(dataset, template).run(settings=settings)
return {'arff': arff}
treeliker = TreeLiker(dataset, template, settings=settings)
arff_train, arff_test = treeliker.run()
return {'arff': arff_train, 'treeliker': treeliker}
......@@ -1059,12 +1059,30 @@ write_with_vars(Head,BodyList,Substitutions,C,Negation):-
nl,
!.
%%
%% Escape atoms that contain particular characters that would
%% cause syntax errors when loaded.
%%
%% Added 20.1.2015
%% anze.vavpetic@ijs.si
must_escape(Atom):-
atom(Atom),
not number(Atom).
%atom_chars(Atom, Chars),
%my_member(Char, Chars),
%my_member(Char, ['[', ']', '(', ')', ' ', ',', '.', '-', '<', '>', '=']).
write_with_vars1([],_).
write_with_vars1([A|B],Substitutions):-
my_member([A,Sub],Substitutions),
!,
write(Sub),
(must_escape(Sub) ->
writeq(Sub)
;
write(Sub)
),
!,
(B = [_|_] ->
write(',')
;
......
......@@ -5,49 +5,74 @@ from subprocess import Popen, PIPE
class TreeLiker:
def __init__(self, dataset, template):
def __init__(self, dataset, template, test_dataset=None, settings={}):
self.basename = 'default'
self.dataset = dataset
self.test_dataset = test_dataset
self.template = template
self.settings = settings
def _copy_data(self):
self.tmpdir = tempfile.mkdtemp()
with open('%s/%s.txt' % (self.tmpdir, self.basename), 'w') as f:
f.write(dataset)
f.write(self.dataset)
if self.test_dataset:
with open('%s/%s_test.txt' % (self.tmpdir, self.basename), 'w') as f:
f.write(self.test_dataset)
# Copy binaries to tmp folder
cdir = os.path.dirname(os.path.abspath(__file__))
shutil.copytree('%s/bin/' % cdir, '%s/bin/' % self.tmpdir)
def run(self, settings={}):
def run(self, cleanup=True):
'''
Runs TreeLiker with the given settings.
'''
self._batch(settings)
self._copy_data()
self._batch()
p = Popen(['java', '-Xmx1G', '-cp', 'bin/TreeLiker.jar',
'ida.ilp.treeLiker.TreeLikerMain', '-batch', self.batch],
cwd=self.tmpdir)
stdout_str, stderr_str = p.communicate()
arff = open('%s/%s.arff' % (self.tmpdir, self.basename)).read()
self._cleanup()
if not self.test_dataset:
arff = open('%s/%s.arff' % (self.tmpdir, self.basename)).read()
arff_test = None
else:
arff = open('%s/conversion/train.arff' % self.tmpdir).read()
arff_test = open('%s/conversion/test.arff' % self.tmpdir).read()
return arff
if cleanup:
self._cleanup()
return (arff, arff_test)
def _batch(self, settings):
def _batch(self):
'''
Creates the batch file to run the experiment.
'''
self.batch = '%s/%s.treeliker' % (self.tmpdir, self.basename)
commands = []
commands.append('set(output_type, single)')
commands.append("set(examples, '%s.txt')" % self.basename)
if not self.test_dataset:
commands.append('set(output_type, single)')
commands.append("set(examples, '%s.txt')" % self.basename)
else:
commands.append('set(output_type, train_test)')
commands.append("set(train_set, '%s.txt')" % self.basename)
commands.append("set(test_set, '%s_test.txt')" % self.basename)
commands.append('set(template, %s)' % self.template)
commands.append('set(output, %s.arff)' % self.basename)
if not self.test_dataset:
commands.append('set(output, %s.arff)' % self.basename)
else:
commands.append('set(output, conversion)')
# Optional settings
for key, val in settings.items():
for key, val in self.settings.items():
if val not in [None, '']:
commands.append('set(%s, %s)' % (key, str(val)))
......
......@@ -109,6 +109,7 @@ class ILP_Converter(Converter):
fmt_cols = lambda cols: ','.join([("%s" % col) if ILP_Converter.numeric(col) else ("'%s'" % col) for col in cols])
for table in self.db.tables:
attributes = self.db.cols[table]
dump.append(':- table %s/%d.' % (table, len(attributes)))
dump.append('\n'.join(["%s(%s)." % (table, fmt_cols(cols)) for cols in self.db.rows(table, attributes)]))
return dump
......
This diff is collapsed.
......@@ -6,6 +6,8 @@ MySQL connectivity library.
import mysql.connector as sql
from context import DBConnection, DBContext
from converters import RSD_Converter, Aleph_Converter, Orange_Converter, TreeLikerConverter
from mapper import domain_map
def mysql_connect(input_dict):
user = str(input_dict['user'])
......@@ -53,3 +55,30 @@ def mysql_orange_converter(input_dict):
context = input_dict['context']
orange = Orange_Converter(context)
return {'target_table_dataset' : orange.target_Orange_table(),'other_table_datasets': orange.other_Orange_tables()}
def ilp_map_rsd(input_dict):
return do_map(input_dict, 'rsd')
def ilp_map_treeliker(input_dict):
return do_map(input_dict, 'treeliker')
def ilp_map_aleph(input_dict):
positive_class = input_dict['positive_class']
return do_map(input_dict, 'aleph', positive_class=positive_class)
def do_map(input_dict, feature_format, positive_class=None):
'''
Maps a new example to a set of features.
'''
# Context of the unseen example(s)
train_context = input_dict['train_ctx']
test_context = input_dict['test_ctx']
# Currently known examples & background knowledge
features = input_dict['features']
format = input_dict['output_format']
evaluations = domain_map(features, feature_format, train_context,
test_context, format=format,
positive_class=positive_class)
return {'evaluations' : evaluations}
#!/usr/local/bin/yap -L --
#.
eval_features(MainPred):-
ExamplePred =.. [MainPred, A, B],
all(example(A, B), (ExamplePred, testExampleIDs(TestExampleIDs), memberchk(B, TestExampleIDs)), Examples),
featureIDs(Features),
map_examples(Examples, Features),
!.
map_examples([Example], Features):-
write_example(Example, Features),
write_class(Example),
nl,
!.
map_examples([Example|ExampleTail], Features):-
write_example(Example, Features),
write_class(Example),
nl,
map_examples(ExampleTail, Features),
!.
write_example(Example, [Feature]) :-
write_example_feature(Example, Feature),
!.
write_example(Example, [Feature|FeatureTail]):-
write_example_feature(Example, Feature), !,
write_example(Example, FeatureTail),
!.
write_example_feature(example(Class, Id), Feature):-
( f(Feature, Id), !, write('+')
;
write('-')
),
!,
write(' '),
!.
write_class(example(Class, _)):-
write(Class).
main([KBFile, MainPred]):-
consult(KBFile),
eval_features(MainPred),
!.
main(_):-
write('Call with: <kbfile> <mainpred>'), nl.
:- use_module(library(lists)).
:- unix(argv(AllArgs)), main(AllArgs).
# Mapping unseen relational examples to an existing propositionalized domain
import tempfile
import subprocess
import os
import re
import arff
from converters import RSD_Converter, TreeLikerConverter
def _feature_numbers(features):
n = len(features.splitlines())
featureIDs = map(lambda id: str(id), range(1, n+1))
return 'featureIDs([%s]).' % (','.join(featureIDs))
example_id_pattern = re.compile(r', (?P<id>.+)\)\.')
def _example_ids(pred, examples):
exampleIDs = example_id_pattern.findall(examples, re.M)
return '%s([%s]).' % (pred, ','.join(exampleIDs))
def domain_map(features, feature_format, train_context, test_context,
intervals={},
format='arff',
positive_class=None):
dataset = None
if feature_format in ['rsd', 'aleph']:
train_rsd = RSD_Converter(train_context)
test_rsd = RSD_Converter(test_context, discr_intervals=intervals)
train_examples = train_rsd.all_examples()
test_examples = test_rsd.all_examples()
if feature_format == 'aleph':
features = aleph_to_rsd_features(features)
prolog_bk = '\n'.join([
_example_ids('testExampleIDs', test_examples),
'%% test examples',
test_examples,
'%% train examples',
train_examples,
'%% train background knowledge',
train_rsd.background_knowledge(),
'%% test background knowledge',
test_rsd.background_knowledge(),
_feature_numbers(features),
'%% features',
features,
])
THIS_DIR = os.path.dirname(__file__) if os.path.dirname(__file__) else '.'
f = tempfile.NamedTemporaryFile(delete=False)
f.write(prolog_bk)
f.close()
cmd_args = ['%s/mapper.pl' % THIS_DIR, f.name, train_context.target_table]
evaluations = subprocess.check_output(cmd_args)
dataset = dump_dataset(features, feature_format, evaluations,
train_context,
format=format,
positive_class=positive_class)
elif feature_format == 'treeliker':
# We provide treeliker with the test dataset
# since it has a built-in ability to evaluate features
treeliker_test = TreeLikerConverter(test_context,
discr_intervals=intervals)
treeliker = features
treeliker.test_dataset = treeliker_test.dataset()
_, test_dataset = treeliker.run()
if format == 'arff':
dataset = test_dataset
else:
return 'unsupported format'
return dataset
def dump_dataset(features, feature_format, evaluations, train_context,
format='arff',
positive_class=None):
if format == 'arff':
data = {
'attributes': [],
'data': [],
'description': '',
'relation': 'default'
}
n_features = len(features.splitlines())
for i in range(1, n_features + 1):
feature = ('f%d' % i, ['+', '-'])
data['attributes'].append(feature)
target = train_context.target_table
if not target in train_context.orng_tables:
raise Exception('Target table is not preloaded in memory! Please select the `dump data` parameter in the converter widget.')
if feature_format == 'aleph':
target_vals = ('negative', 'positive')
else:
orng_target = train_context.orng_tables[target]
target_vals = tuple(orng_target.domain.classVar.values)
class_attr = ('class', target_vals)
data['attributes'].append(class_attr)
for line in evaluations.splitlines():
values = line.strip().split()
if feature_format == 'aleph':
class_val = values[-1]
if class_val == positive_class:
values[-1] = 'positive'
else:
values[-1] = 'negative'
data['data'].append(values)
return arff.dumps(data)
elif format == 'csv':
data = ''
for line in evaluations.splitlines():
values = line.strip().split()
data = data + ','.join(values) + '\n'
return data
return 'unsupported format'
def aleph_to_rsd_features(features):
converted_features = []
for line in features.splitlines():
if not line.startswith('feature'):
continue
feature_id = len(converted_features) + 1
feature_body = line[line.find(':-'):line.find(')).')] + '.'
new_feature = 'f(%d, A)%s' % (feature_id, feature_body)
converted_features.append(new_feature)
return '\n'.join(converted_features)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment