Commit 65fe43c5 authored by Anze Vavpetic's avatar Anze Vavpetic
Browse files

added aleph and rsd widgets. fixed a bug in reading/writing when using aleph/rsd as singletons.

parent 2fdf6402
from aleph import aleph
\ No newline at end of file
from aleph import Aleph
\ No newline at end of file
#
# Python interface to Aleph.
#
# author: Anze Vavpetic <anze.vavpetic@ijs.si>
# author: Anze Vavpetic <anze.vavpetic@ijs.si>, 2011
#
import os.path
import shutil
import logging
import re
import tempfile
from stat import *
from stat import S_IREAD, S_IEXEC
from subprocess import Popen, PIPE
DEBUG = True
......@@ -24,9 +24,7 @@ logger.addHandler(ch)
class Aleph(object):
# The aleph source file is presumed to be in the same dir as this file.
THIS_DIR = os.path.dirname(__file__) if os.path.dirname(__file__) else '.'
DIR = tempfile.mkdtemp()
ALEPH_FN = 'aleph.pl'
ALEPH = DIR + '/' + ALEPH_FN
YAP = '/usr/local/bin/yap'
RULES_SUFFIX = 'Rules'
SCRIPT = 'run_aleph.pl'
......@@ -37,13 +35,15 @@ class Aleph(object):
@param logging can be DEBUG, INFO or NOTSET (default). This controls the verbosity of the output.
"""
self.tmpdir = tempfile.mkdtemp()
self.aleph_script = '%s/%s' % (self.tmpdir, Aleph.ALEPH_FN)
self.postGoal = None
self.postScript = None
# Dictionary of non-default settings
self.settings = dict()
logger.setLevel(verbosity)
shutil.copy("%s/%s" % (Aleph.THIS_DIR, Aleph.ALEPH_FN), Aleph.ALEPH)
shutil.copy("%s/%s" % (Aleph.THIS_DIR, Aleph.ALEPH_FN), self.aleph_script)
def set(self, name, value):
"""
......@@ -67,7 +67,7 @@ class Aleph(object):
self.postGoal = goal
self.postScript = script
def induce(self, mode, filestem, pos, neg, b):
def induce(self, mode, pos, neg, b, filestem='default'):
"""
Induce a theory in 'mode'.
......@@ -86,7 +86,7 @@ class Aleph(object):
logger.info("Running aleph...")
# Run the aleph script.
p = Popen(['./' + Aleph.SCRIPT], cwd=Aleph.DIR, stdout=PIPE)
p = Popen(['./' + Aleph.SCRIPT], cwd=self.tmpdir, stdout=PIPE)
stdout_str, stderr_str = p.communicate()
logger.debug(stdout_str)
......@@ -95,12 +95,12 @@ class Aleph(object):
logger.info("Done.")
# Return the rules written in the output file.
rules = open('%s/%s' % (Aleph.DIR, filestem + Aleph.RULES_SUFFIX)).read()
rules = open('%s/%s' % (self.tmpdir, filestem + Aleph.RULES_SUFFIX)).read()
#shutil.copy('%s/%s.py' % (Aleph.DIR, filestem), '/home/anzev/programiranje/sdm/results/')
#shutil.copy('%s/%s.py' % (self.tmpdir, filestem), '/home/anzev/programiranje/sdm/results/')
# Cleanup.
self.__cleanup(filestem)
self.__cleanup()
return rules
......@@ -108,9 +108,9 @@ class Aleph(object):
"""
Prepares the needed files.
"""
posFile = open('%s/%s.f' % (Aleph.DIR, filestem), 'w')
negFile = open('%s/%s.n' % (Aleph.DIR, filestem), 'w')
bFile = open('%s/%s.b' % (Aleph.DIR, filestem), 'w')
posFile = open('%s/%s.f' % (self.tmpdir, filestem), 'w')
negFile = open('%s/%s.n' % (self.tmpdir, filestem), 'w')
bFile = open('%s/%s.b' % (self.tmpdir, filestem), 'w')
posFile.write(pos)
negFile.write(neg)
......@@ -120,16 +120,12 @@ class Aleph(object):
negFile.close()
bFile.close()
def __cleanup(self, filestem):
def __cleanup(self):
"""
Cleans up all the temporary files.
"""
try:
os.remove('%s/%s.f' % (Aleph.DIR, filestem))
os.remove('%s/%s.n' % (Aleph.DIR, filestem))
os.remove('%s/%s.b' % (Aleph.DIR, filestem))
os.remove('%s/%s' % (Aleph.DIR, filestem + Aleph.RULES_SUFFIX))
os.remove('%s/%s' % (Aleph.DIR, Aleph.SCRIPT))
shutil.rmtree(self.tmpdir)
except:
logger.info('Problem removing temporary files. The files are probably in use.')
......@@ -137,7 +133,7 @@ class Aleph(object):
"""
Makes the script file to be run by yap.
"""
scriptPath = '%s/%s' % (Aleph.DIR, Aleph.SCRIPT)
scriptPath = '%s/%s' % (self.tmpdir, Aleph.SCRIPT)
script = open(scriptPath, 'w')
#print scriptPath
......@@ -159,6 +155,4 @@ class Aleph(object):
if self.postScript:
cat(self.postGoal + ".")
cat(self.postScript)
script.close()
aleph = Aleph()
\ No newline at end of file
script.close()
\ No newline at end of file
import re
from string import ascii_uppercase as chars, digits
from string import ascii_lowercase as chars
from random import choice
from aleph import aleph
from aleph import Aleph
from rsd import RSD
def ilp_pre_aleph(input_dict):
return input_dict
def ilp_post_aleph(postdata, input_dict, output_dict):
def ilp_aleph(input_dict):
aleph = Aleph()
settings = input_dict['settings']
mode = input_dict['mode']
pos = input_dict['pos']
neg = input_dict['neg']
b = input_dict['b']
# Random 10 character experiment id.
eid = ''.join(choice(chars + digits) for i in range(10))
result = aleph.induce(mode, eid, pos, neg, b)
return {'result' : result}
\ No newline at end of file
# Parse settings
if settings:
aleph.settingsAsFacts(settings)
# Run aleph
theory = aleph.induce(mode, pos, neg, b)
return {'theory' : theory}
def ilp_rsd(input_dict):
rsd = RSD()
settings = input_dict.get('settings',None)
pos = input_dict.get('pos', None)
neg = input_dict.get('neg', None)
examples = input_dict.get('examples', None)
b = input_dict['b']
subgroups = True if input_dict['subgroups'] == 'true' else False
# Parse settings
if settings:
rsd.settingsAsFacts(settings)
# Run rsd
features, arff, rules = rsd.induce(b, examples=examples, pos=pos, neg=neg, cn2sd=subgroups)
return {'features' : features, 'arff' : arff, 'rules' : rules}
from rsd import RSD
\ No newline at end of file
:-modeh(1,train(+train)). % main key
:-modeb(1,hasCar(+train,-car)).
:-modeb(1,carshape(+car,-car_shape)).
:-modeb(1,instantiate(+car_shape)). % a constant value should be considered for this var when generating features
:-modeb(1,carlength(+car,-car_length)).
:-modeb(1,instantiate(+car_length)). % a constant value should be considered for this var when generating features
:-modeb(1,has_sides(+car,-sides)).
:-modeb(1,instantiate(+sides)). % a constant value should be considered for this var when generating features
:-modeb(1,has_roof(+car,-roof)).
:-modeb(1,instantiate(+roof)). % a constant value should be considered for this var when generating features
:-modeb(1,has_wheels(+car,-wheels)).
:-modeb(1,instantiate(+wheels)). % a constant value should be considered for this var when generating features
:-modeb(1,has_load(+car,-load)).
:-modeb(1,loadshape(+load,-load_shape)).
:-modeb(1,instantiate(+load_shape)). % a constant value should be considered for this var when generating features
:-modeb(1,loadnum(+load,-load_num)).
:-modeb(1,instantiate(+load_num)). % a constant value should be considered for this var when generating features
%:-modeb(1,notSame(+car,+car)).
% settings considered by "featurize.pl":
% ... none - all default
% settings considered by "process.pl"
% ... none - all default
% settings considered by "rules.pl"
:-set(eval_threshold,0.01).
:-set(sig_threshold,0). % chi^2 value threshold does not make sense with 20 train instances -> therefore 0
hasCar(Train,Car):-
my_member(Car,Train).
carshape(c(_,S,_,_,_,_,_),S).
carlength(c(_,_,LE,_,_,_,_),LE).
has_sides(c(_,_,_,SD,_,_,_),SD).
has_roof(c(_,_,_,_,R,_,_),R).
has_wheels(c(_,_,_,_,_,W,_),W).
has_load(c(_,_,_,_,_,_,L),L).
loadshape(l(LS,_),LS).
loadnum(l(_,LN),LN).
% 20 Eastbound and Westbound trains
train(east,[c(1,rectangle,short,not_double,none,2,l(circle,1)),c(2,rectangle,
long,not_double,none,3,l(hexagon,1)),c(3,rectangle,short,
not_double,peaked,2,l(triangle,1)),c(4,rectangle,long,
not_double,none,2,l(rectangle,3))]).
train(east,[c(1,rectangle,short,not_double,flat,2,l(circle,2)),c(2,bucket,
short,not_double,none,2,l(rectangle,1)),c(3,u_shaped,
short,not_double,none,2,l(triangle,1))]).
train(east,[c(1,rectangle,long,not_double,flat,3,l(utriangle,1)),c(2,hexagon,
short,not_double,flat,2,l(triangle,1)),c(3,rectangle,
short,not_double,none,2,l(circle,1))]).
train(east,[c(1,rectangle,short,not_double,none,2,l(rectangle,1)),c(2,ellipse,
short,not_double,arc,2,l(diamond,1)),c(3,rectangle,short,
double,none,2,l(triangle,1)),c(4,bucket,short,not_double,
none,2,l(triangle,1))]).
train(east,[c(1,rectangle,short,not_double,flat,2,l(circle,1)),c(2,rectangle,
long,not_double,flat,3,l(rectangle,1)),c(3,rectangle,
short,double,none,2,l(triangle,1))]).
train(east,[c(1,rectangle,long,not_double,jagged,3,l(rectangle,1)),c(2,hexagon,
short,not_double,flat,2,l(circle,1)),c(3,rectangle,short,
not_double,none,2,l(triangle,1)),c(4,rectangle,long,not_double,
jagged,2,l(rectangle,0))]).
train(east,[c(1,rectangle,long,not_double,none,2,l(hexagon,1)),c(2,rectangle,
short,not_double,none,2,l(rectangle,1)),c(3,rectangle,
short,not_double,flat,2,l(triangle,1))]).
train(east,[c(1,rectangle,short,not_double,peaked,2,l(rectangle,1)),c(2,
bucket,short,not_double,none,2,l(rectangle,1)),c(3,rectangle,
long,not_double,flat,2,l(circle,1)),c(4,rectangle,short,
not_double,none,2,l(rectangle,1))]).
train(east,[c(1,rectangle,long,not_double,none,2,l(rectangle,3)),c(2,rectangle,
short,not_double,none,2,l(circle,1)),c(3,rectangle,long,
not_double,jagged,3,l(hexagon,1)),c(4,u_shaped,short,
not_double,none,2,l(triangle,1))]).
train(east,[c(1,bucket,short,not_double,none,2,l(triangle,1)),c(2,u_shaped,
short,not_double,none,2,l(circle,1)),c(3,rectangle,short,
not_double,none,2,l(triangle,1)),c(4,rectangle,short,
not_double,none,2,l(triangle,1))]).
train(west,[c(1,rectangle,short,not_double,none,2,l(triangle,1)),
c(2,rectangle, long,not_double,flat,2,l(circle,3))]).
train(west,[c(1,rectangle,long,not_double,jagged,2,l(circle,0)),c(2,u_shaped,
short,not_double,none,2,l(triangle,1)),c(3,rectangle,short,
double,none,2,l(circle,1))]).
train(west,[c(1,u_shaped,short,not_double,none,2,l(circle,1)),c(2,rectangle,
long,not_double,flat,3,l(rectangle,1))]).
train(west,[c(1,bucket,short,not_double,none,2,l(circle,1)),c(2,rectangle,
short,not_double,none,2,l(rectangle,1)),c(3,rectangle,
long,not_double,jagged,3,l(rectangle,1)),c(4,bucket,short,
not_double,none,2,l(circle,1))]).
train(west,[c(1,rectangle,long,not_double,none,2,l(rectangle,2)),c(2,u_shaped,
short,not_double,none,2,l(rectangle,1))]).
train(west,[c(1,bucket,short,not_double,none,2,l(rectangle,1)),c(2,rectangle,
long,not_double,flat,2,l(utriangle,3))]).
train(west,[c(1,rectangle,long,not_double,none,2,l(hexagon,1)),c(2,rectangle,
short,not_double,none,2,l(circle,1)),c(3,rectangle,short,
double,none,2,l(circle,1)),c(4,rectangle,long,not_double,
none,2,l(rectangle,3))]).
train(west,[c(1,u_shaped,short,not_double,none,2,l(triangle,1)),c(2,rectangle,
long,not_double,none,3,l(rectangle,3))]).
train(west,[c(1,rectangle,long,not_double,flat,3,l(rectangle,3)),
c(2,rectangle, long,not_double,flat,2,l(rectangle,3)),c(3,rectangle,
long,not_double,none,2,l(rectangle,0)),c(4,u_shaped,short,
not_double,none,2,l(triangle,1))]).
train(west,[c(1,rectangle,long,not_double,flat,3,l(hexagon,1)),c(2,u_shaped,
short,not_double,none,2,l(triangle,1))]).
This diff is collapsed.
This diff is collapsed.
# Python interface to RSD.
#
# author: Anze Vavpetic <anze.vavpetic@ijs.si>, 2012
#
import os.path
import shutil
import logging
import re
import tempfile
from stat import S_IREAD, S_IEXEC
from subprocess import Popen, PIPE
DEBUG = True
# Setup a logger
logger = logging.getLogger("RSD [Python]")
logger.setLevel(logging.DEBUG if DEBUG else logging.INFO)
ch = logging.StreamHandler()
formatter = logging.Formatter("%(name)s %(levelname)s: %(message)s")
ch.setFormatter(formatter)
logger.addHandler(ch)
class RSD(object):
THIS_DIR = os.path.dirname(__file__) if os.path.dirname(__file__) else '.'
RSD_FILES = ['featurize.pl', 'process.pl', 'rules.pl']
YAP = '/usr/local/bin/yap'
# Generated scripts filenames
CONSTRUCT = '_construct.pl'
SAVE = '_save.pl'
SUBGROUPS = '_subgroups.pl'
SCRIPTS = [CONSTRUCT, SAVE, SUBGROUPS]
def __init__(self, verbosity=logging.NOTSET):
"""
Creates an RSD object.
"""
self.tmpdir = tempfile.mkdtemp()
self.settings = dict()
logger.setLevel(verbosity)
# Copy needed files to tmp dir
for fn in RSD.RSD_FILES:
shutil.copy("%s/%s" % (RSD.THIS_DIR, fn), self.tmpdir)
def set(self, name, value):
"""
Sets the value of setting 'name' to 'value'.
"""
self.settings[name] = value
def settingsAsFacts(self, settings):
"""
Parses a string of settings in the form set(name1, val1), set(name2, val2)...
"""
pattern = re.compile('set\(([a-zA-Z0-9_]+),(\[a-zA-Z0-9_]+)\)')
pairs = pattern.findall(settings)
for name, val in pairs:
self.set(name, val)
def induce(self, b, filestem='default', examples=None, pos=None, neg=None, cn2sd=True):
"""
Generate features and find subgroups.
@param filestem The base name of this experiment.
@param examples Classified examples; can be used instead of separate pos / neg files below.
@param pos String of positive examples.
@param neg String of negative examples.
@param b String with background knowledge.
@param cn2sd Find subgroups after feature construction?
Returns a tuple (features, weka, rules), where:
- features is a set of prolog clauses of generated features,
- weka is the propositional form of the input data,
- rules is a set of generated cn2sd subgroup descriptions;
this will be an empty string if cn2sd is set to False.
"""
# Write the inputs
self.__prepare(filestem, b, examples=examples, pos=pos, neg=neg)
# Write scripts
self.__scripts(filestem)
# Run the script
logger.info("Running RSD...")
for script in RSD.SCRIPTS:
# Skip subgroup discovery part?
if script == RSD.SUBGROUPS and not cn2sd:
continue
p = Popen(['./' + script], cwd=self.tmpdir, stdout=PIPE)
stdout_str, stderr_str = p.communicate()
logger.debug(stdout_str)
logger.debug(stderr_str)
logger.info("Done.")
# Return the rules written in the output file.
features = open('%s/%s' % (self.tmpdir, filestem + '_frs.pl')).read()
weka = open('%s/%s' % (self.tmpdir, filestem + '.arff')).read()
rules = open('%s/%s' % (self.tmpdir, filestem + '.rules')).read() if cn2sd else ''
self.__cleanup()
return (features, weka, rules)
def __prepare(self, filestem, b, examples=None, pos=None, neg=None):
"""
Prepares the needed files.
"""
if examples:
examplesFile = open('%s/%s.pl' % (self.tmpdir, filestem), 'w')
examplesFile.write(examples)
examplesFile.close()
elif pos and neg:
posFile = open('%s/%s.f' % (self.tmpdir, filestem), 'w')
negFile = open('%s/%s.n' % (self.tmpdir, filestem), 'w')
posFile.write(pos)
negFile.write(neg)
posFile.close()
negFile.close()
else:
raise Exception('You need to provide either a single file of classified examples or \
two files, positive and negative examples.')
bFile = open('%s/%s.b' % (self.tmpdir, filestem), 'w')
# Write settings.
for setting, val in self.settings.items():
bFile.write(':- set(%s,%s).\n' % (setting, val))
bFile.write(b)
bFile.close()
def __cleanup(self):
"""
Cleans up all the temporary files.
"""
try:
shutil.rmtree(self.tmpdir)
except:
logger.info('Problem removing temporary files. The files are probably in use.')
def __scripts(self, filestem):
"""
Generates the required scripts.
"""
script_construct = open('%s/%s' % (self.tmpdir, RSD.CONSTRUCT), 'w')
script_save = open('%s/%s' % (self.tmpdir, RSD.SAVE), 'w')
script_subgroups = open('%s/%s' % (self.tmpdir, RSD.SUBGROUPS), 'w')
# Permit the owner to execute and read this script
for fn in RSD.SCRIPTS:
os.chmod('%s/%s' % (self.tmpdir, fn), S_IREAD | S_IEXEC)
# Writes one line of script
new_script = lambda script: lambda x: script.write(x + '\n')
#
# 'Construction' script
#
w = new_script(script_construct)
w("#!%s -L -s50000 -h200000\n#." % RSD.YAP)
w(':- initialization(main).')
w('main :-')
w('[featurize],')
w('r(%s),' % filestem)
w('w.')
script_construct.close()
#
# 'Saving' script
#
w = new_script(script_save)
w("#!%s -L -s50000 -h200000\n#." % RSD.YAP)
w(':- initialization(main).')
w('main :-')
w('[process],')
w('r(%s),' % filestem)
w('w,')
w('w(weka, %s),' % filestem)
w('w(rsd, %s).' % filestem)
script_save.close()
#
# 'Subgroups' script
#
w = new_script(script_subgroups)
w("#!%s -L -s50000 -h200000\n#." % RSD.YAP)
w(':- initialization(main).')
w('main :-')
w('[rules],')
w('r(%s),' % filestem)
w('i,')
w('w.')
script_subgroups.close()
if __name__ == '__main__':
examples = open('example/trains.pl').read()
b = open('example/trains.b').read()
rsd = RSD()
features, weka, rules = rsd.induce(b, examples=examples)
print features
print weka
print rules
\ No newline at end of file
This diff is collapsed.
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment