Commit b072fde1 authored by Anze Vavpetic's avatar Anze Vavpetic

added support for *induce_features* mode for aleph - it now returns the propositional form in arff

parent 26a52a92
#
# Python interface to Aleph.
#
#
# author: Anze Vavpetic <anze.vavpetic@ijs.si>, 2011
#
import os.path
......@@ -8,6 +8,8 @@ import shutil
import logging
import re
import tempfile
import json
from StringIO import StringIO
from stat import S_IREAD, S_IEXEC
from subprocess import PIPE
......@@ -29,29 +31,34 @@ formatter = logging.Formatter("%(name)s %(levelname)s: %(message)s")
ch.setFormatter(formatter)
logger.addHandler(ch)
class Aleph(object):
# The aleph source file is presumed to be in the same dir as this file.
THIS_DIR = os.path.dirname(__file__) if os.path.dirname(__file__) else '.'
ALEPH_FN = 'aleph.pl'
FEATURES_FN = 'features.pl'
RULES_SUFFIX = 'Rules'
FEATURES_SUFFIX = 'Features'
PROP_DATASET_SUFFIX = 'Propositional'
SCRIPT = 'run_aleph.pl'
ESSENTIAL_PARAMS = {
'depth' : 10,
'evalfn' : 'coverage',
'i' : 2,
'language' : 'inf',
'm' : 0.0,
'max_features' : 'inf',
'minpos' : 1,
'noise' : 0
'depth': 10,
'evalfn': 'coverage',
'i': 2,
'language': 'inf',
'm': 0.0,
'max_features': 'inf',
'minpos': 1,
'noise': 0
}
def __init__(self, verbosity=logging.NOTSET):
"""
Creates an Aleph object.
@param logging can be DEBUG, INFO or NOTSET (default). This controls the verbosity of the output.
@param logging Can be DEBUG, INFO or NOTSET (default).
This controls the verbosity of the output.
"""
self.tmpdir = tempfile.mkdtemp()
self.aleph_script = '%s/%s' % (self.tmpdir, Aleph.ALEPH_FN)
......@@ -60,9 +67,10 @@ class Aleph(object):
# Dictionary of non-default settings
self.settings = dict()
logger.setLevel(verbosity)
shutil.copy("%s/%s" % (Aleph.THIS_DIR, Aleph.ALEPH_FN), self.aleph_script)
shutil.copy("%s/%s" % (Aleph.THIS_DIR, Aleph.ALEPH_FN), self.tmpdir)
shutil.copy("%s/%s" % (Aleph.THIS_DIR, Aleph.FEATURES_FN), self.tmpdir)
def set(self, name, value):
"""
Sets the value of setting 'name' to 'value'.
......@@ -71,7 +79,8 @@ class Aleph(object):
def settingsAsFacts(self, settings):
"""
Parses a string of settings in the form set(name1, val1), set(name2, val2)...
Parses a string of settings in the form:
set(name1, val1), set(name2, val2)...
"""
pattern = re.compile('set\(([a-zA-Z0-9_]+),(\[a-zA-Z0-9_]+)\)')
pairs = pattern.findall(settings)
......@@ -84,43 +93,51 @@ class Aleph(object):
"""
self.postGoal = goal
self.postScript = script
def induce(self, mode, pos, neg, b, filestem='default'):
"""
Induce a theory in 'mode'.
Induce a theory or features in 'mode'.
@param filestem The base name of this experiment.
@param mode In which mode to induce rules.
@param mode In which mode to induce rules/features.
@param pos String of positive examples.
@param neg String of negative examples.
@param b String with background knowledge.
@return The theory as a string or an arff dataset in induce_features mode.
"""
# Write the inputs to appropriate files.
self.__prepare(filestem, pos, neg, b)
# Make a script to run aleph (with appropriate settings, stack/heap sizes, ...).
# Make a script to run aleph (with appropriate settings).
self.__script(mode, filestem)
logger.info("Running aleph...")
# Run the aleph script.
p = SafePopen(['yap', '-s50000', '-h200000', '-L', Aleph.SCRIPT], cwd=self.tmpdir, stdout=PIPE).safe_run()
p = SafePopen(['yap', '-s50000', '-h200000', '-L', Aleph.SCRIPT], cwd=self.tmpdir).safe_run()
stdout_str, stderr_str = p.communicate()
logger.debug(stdout_str)
logger.debug(stderr_str)
logger.info("Done.")
# Return the rules written in the output file.
rules = open('%s/%s' % (self.tmpdir, filestem + Aleph.RULES_SUFFIX)).read()
#shutil.copy('%s/%s.py' % (self.tmpdir, filestem), '/home/anzev/programiranje/sdm/results/')
result = None
if mode != 'induce_features':
# Return the rules written in the output file.
rules_fn = filestem + Aleph.RULES_SUFFIX
result = open('%s/%s' % (self.tmpdir, rules_fn)).read()
else:
features_fn = filestem + Aleph.FEATURES_SUFFIX
features = open('%s/%s' % (self.tmpdir, features_fn)).read()
dataset_fn = filestem + Aleph.PROP_DATASET_SUFFIX
pl_dataset = open('%s/%s' % (self.tmpdir, dataset_fn)).read()
result = self.__to_arff(features, pl_dataset, filestem)
# Cleanup.
self.__cleanup()
return rules
return result
def __prepare(self, filestem, pos, neg, b):
"""
......@@ -133,11 +150,11 @@ class Aleph(object):
posFile.write(pos)
negFile.write(neg)
bFile.write(b)
posFile.close()
negFile.close()
bFile.close()
def __cleanup(self):
"""
Cleans up all the temporary files.
......@@ -145,7 +162,8 @@ class Aleph(object):
try:
shutil.rmtree(self.tmpdir)
except:
logger.info('Problem removing temporary files. The files are probably in use.')
logger.info('Problem removing temporary files. \
The files are probably in use.')
def __script(self, mode, filestem):
"""
......@@ -153,10 +171,10 @@ class Aleph(object):
"""
scriptPath = '%s/%s' % (self.tmpdir, Aleph.SCRIPT)
script = open(scriptPath, 'w')
# Permit the owner to execute and read this script
os.chmod(scriptPath, S_IREAD | S_IEXEC)
cat = lambda x: script.write(x + '\n')
cat(":- initialization(run_aleph).")
cat("run_aleph :- ")
......@@ -166,8 +184,50 @@ class Aleph(object):
for setting, value in self.settings.items():
cat("set(%s, %s)," % (setting, value))
cat("%s," % mode)
cat("write_rules('%s')%s" % (filestem + Aleph.RULES_SUFFIX, ',' if self.postScript else '.'))
eof = ',' if self.postScript else '.'
if mode == 'induce_features':
cat("consult(features),")
features_fn = filestem + Aleph.FEATURES_SUFFIX
dataset_fn = filestem + Aleph.PROP_DATASET_SUFFIX
cat('save_features(%s),' % features_fn)
cat('save_dataset(%s)%s' % (dataset_fn, eof))
else:
rules_fn = filestem + Aleph.RULES_SUFFIX
cat("write_rules('%s')%s" % (rules_fn, eof))
if self.postScript:
cat(self.postGoal + ".")
cat(self.postScript)
script.close()
\ No newline at end of file
script.close()
def __to_arff(self, features, pl_dataset, filestem):
arff = StringIO()
cat = lambda x: arff.write(x + '\n')
cat('@RELATION "%s"' % filestem)
features = re.findall(r"feature\((\d+),\((.*)\)\).", features)
for fid, feature in sorted(features, key=lambda e: e[0]):
cat('%% f%s: %s' % (fid, feature))
cat('@ATTRIBUTE f%s {0,1}' % fid)
# Class attribute
class_id = len(features)
cat('@ATTRIBUTE class {negative,positive}')
cat('@DATA')
examples = re.findall(r"example\((\w+),(\[[\d,]*\]),(\w+)\)\.", pl_dataset)
for _, features, cls in examples:
vals = ['%d 1' % (int(fid)-1) for fid in json.loads(features)] + \
['%d %s' % (class_id, cls)]
cat('{%s}' % ', '.join(vals))
return arff.getvalue()
if __name__ == '__main__':
aleph = Aleph()
print aleph.induce('induce_features',
open('test/train.f').read(),
open('test/train.n').read(),
open('test/train.b').read(),
filestem='trains_test')
% [aleph], read_all(train), induce_features, [show_features], save_features('ficrji'), save_dataset('dataset').
:- set(portray_examples, true).
save_features(File):-
open(File, write, Stream),
set_output(Stream),
show(features),
close(Stream).
save_dataset(File):-
open(File, write, Stream),
set_output(Stream),
show(train_pos),
show(train_neg),
close(Stream).
aleph_portray(train_pos):-
setting(train_pos,File),
show_features(File,positive).
aleph_portray(train_neg):-
setting(train_neg,File),
show_features(File,negative).
show_features(File,Class):-
open(File,read,Stream),
repeat,
read(Stream,Example),
(Example = end_of_file -> close(Stream);
Example =.. [_|[Example_id]],
write('example('),
write(Example_id),
write_features(Example, Class),
fail).
feature_holds(Feature_id, Example):-
feature(Feature_id, (Example:- Body)),
Body.
write_features(Example,_):-
write(','),
(all(Feature_id, feature_holds(Feature_id, Example), Features)
-> write(Features)
; write('[]')
).
write_features(_,Class):-
write(','), write(Class), write(').'), nl.
......@@ -28,8 +28,8 @@ def ilp_aleph(input_dict):
for pl_script in [b, pos, neg]:
check_input(pl_script)
# Run aleph
theory = aleph.induce(mode, pos, neg, b)
return {'theory' : theory}
result = aleph.induce(mode, pos, neg, b)
return {'theory': result}
def ilp_rsd(input_dict):
rsd = RSD()
......
......@@ -212,4 +212,4 @@ if __name__ == '__main__':
features, weka, rules = rsd.induce(b, examples=examples)
print features
print weka
print rules
\ No newline at end of file
print rules
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment