library.py 4.84 KB
Newer Older
Anze Vavpetic's avatar
Anze Vavpetic committed
1
import re
2 3
import json
import tempfile
Anze Vavpetic's avatar
Anze Vavpetic committed
4 5 6 7 8
from string import ascii_lowercase as chars
from random import choice

from aleph import Aleph
from rsd import RSD
9
from wordification import Wordification
Anze Vavpetic's avatar
Anze Vavpetic committed
10
from treeliker import TreeLiker
11
from security import check_input
Anze Vavpetic's avatar
Anze Vavpetic committed
12

Anze Vavpetic's avatar
Anze Vavpetic committed
13 14
from services.webservice import WebService

Anze Vavpetic's avatar
Anze Vavpetic committed
15 16 17 18 19 20 21
def ilp_aleph(input_dict):
    aleph = Aleph()
    settings = input_dict['settings']
    mode = input_dict['mode']
    pos = input_dict['pos']
    neg = input_dict['neg']
    b = input_dict['b']
22
    # Parse settings provided via file
Anze Vavpetic's avatar
Anze Vavpetic committed
23 24
    if settings:
        aleph.settingsAsFacts(settings)
25 26 27
    # Parse settings provided as parameters (these have higher priority)
    for setting, def_val in Aleph.ESSENTIAL_PARAMS.items():
        aleph.set(setting, input_dict.get(setting, def_val))
28 29 30
    # Check for illegal predicates
    for pl_script in [b, pos, neg]:
        check_input(pl_script)
Anze Vavpetic's avatar
Anze Vavpetic committed
31
    # Run aleph
32 33
    results = aleph.induce(mode, pos, neg, b)
    return {'theory': results[0], 'features': results[1]}
Anze Vavpetic's avatar
Anze Vavpetic committed
34 35 36 37 38 39 40 41

def ilp_rsd(input_dict):
    rsd = RSD()
    settings = input_dict.get('settings',None)
    pos = input_dict.get('pos', None)
    neg = input_dict.get('neg', None)
    examples = input_dict.get('examples', None)
    b = input_dict['b']
42
    subgroups = input_dict['subgroups'] == 'true'
Anze Vavpetic's avatar
Anze Vavpetic committed
43 44 45
    # Parse settings
    if settings:
        rsd.settingsAsFacts(settings)
46 47 48
    # Parse settings provided as parameters (these have higher priority)
    for setting, def_val in RSD.ESSENTIAL_PARAMS.items():
        rsd.set(setting, input_dict.get(setting, def_val))
49 50 51
    # Check for illegal predicates
    for pl_script in [b, pos, neg, examples]:
        check_input(pl_script)
Anze Vavpetic's avatar
Anze Vavpetic committed
52 53 54
    # Run rsd
    features, arff, rules = rsd.induce(b, examples=examples, pos=pos, neg=neg, cn2sd=subgroups)
    return {'features' : features, 'arff' : arff, 'rules' : rules}
55

56

57
def ilp_sdmsegs_rule_viewer(input_dict):
Anze Vavpetic's avatar
Anze Vavpetic committed
58 59 60
    return {}

def ilp_sdmaleph(input_dict):
61
    import orange
62
    ws = WebService('http://vihar.ijs.si:8097', 3600)
63 64 65 66 67 68 69 70 71 72 73 74
    data = input_dict.get('examples')
    if isinstance(data, orange.ExampleTable):
        with tempfile.NamedTemporaryFile(suffix='.tab', delete=True) as f:
            data.save(f.name)
            examples = f.read()
    elif isinstance(data, list):
        examples = json.dumps(data)
    elif isinstance(data, str):
        examples = data
    else:
        raise Exception('Illegal examples format. \
                         Supported formats: str, list or Orange')
Anze Vavpetic's avatar
Anze Vavpetic committed
75
    response = ws.client.sdmaleph(
76
        examples=examples,
Anze Vavpetic's avatar
Anze Vavpetic committed
77 78 79 80 81 82 83 84 85 86
        mapping=input_dict.get('mapping'),
        ontologies=[{'ontology' : ontology} for ontology in input_dict.get('ontology')],
        relations=[{'relation' : relation} for relation in input_dict.get('relation')],
        posClassVal=input_dict.get('posClassVal') if input_dict.get('posClassVal') != '' else None,
        cutoff=input_dict.get('cutoff') if input_dict.get('cutoff') != '' else None,
        minPos=input_dict.get('minPos') if input_dict.get('minPos') != '' else None,
        noise=input_dict.get('noise') if input_dict.get('noise') != '' else None,
        clauseLen=input_dict.get('clauseLen') if input_dict.get('clauseLen') != '' else None,
        dataFormat=input_dict.get('dataFormat') if input_dict.get('dataFormat') != '' else None
    )
87 88 89 90 91 92
    return {'theory' : response['theory']}


def ilp_wordification(input_dict):
    target_table = input_dict.get('target_table',None)
    other_tables = input_dict.get('other_tables', None)
93
    weighting_measure = input_dict.get('weighting_measure', 'tfidf')
94
    context = input_dict.get('context', None)
95
    word_att_length = int(input_dict.get('f_ngram_size', 1))
Matic Perovšek's avatar
Matic Perovšek committed
96
    idf=input_dict.get('idf', None)
97 98

    for _ in range(1):
Matic Perovšek's avatar
Matic Perovšek committed
99
        wordification = Wordification(target_table,other_tables,context,word_att_length,idf)
100
        wordification.run(1)
101
        wordification.calculate_tf_idfs(weighting_measure)
102 103
        #wordification.prune(50)
        #wordification.to_arff()
104 105 106 107 108

    if 1==0:
        from wordification import Wordification_features_test
        wft=Wordification_features_test(target_table,other_tables,context)
        wft.print_results()
Matic Perovšek's avatar
Matic Perovšek committed
109
    return {'arff' : wordification.to_arff(),'corpus': wordification.wordify(),'idf':wordification.idf}
Anze Vavpetic's avatar
Anze Vavpetic committed
110 111 112 113 114 115 116 117 118 119 120 121 122 123


def ilp_treeliker(input_dict):
    template = input_dict['template']
    dataset = input_dict['dataset']
    settings = {
        'algorithm': input_dict.get('algorithm'),
        'minimum_frequency': input_dict.get('minimum_frequency'),
        'covered_class': input_dict.get('covered_class'),
        'maximum_size': input_dict.get('maximum_size'),
        'use_sampling': input_dict.get('use_sampling'),
        'sample_size': input_dict.get('sample_size'),
        'max_degree': input_dict.get('max_degree')
    }
124 125 126
    treeliker = TreeLiker(dataset, template, settings=settings)
    arff_train, arff_test = treeliker.run()
    return {'arff': arff_train, 'treeliker': treeliker}