Commit 340951b7 authored by borut's avatar borut
Browse files

Merge branch 'dev' of source.ijs.si:mothra into dev

Conflicts:
	requirements.txt
parents 8cac1c12 0b389dbd
......@@ -190,7 +190,7 @@ INSTALLED_APPS_WORKFLOWS_SUB = (
#'workflows.latino',
'workflows.decision_support',
'workflows.segmine',
#'workflows.subgroup_discovery',
'workflows.subgroup_discovery',
'workflows.nlp',
'workflows.nl_toolkit',
'workflows.ilp',
......
......@@ -8,7 +8,7 @@ django-orderable-inlines==0.0.6
django-picklefield==0.2.1
httplib2==0.7.5
wsgiref==0.1.2
mysql-connector-python>=1.0.9
mysql-connector-python==1.0.12
numpy==1.7.1
pydot==1.0.28
pyparsing==1.5.7
......
This diff is collapsed.
......@@ -89,5 +89,7 @@ def ilp_wordification(input_dict):
target_table = input_dict.get('target_table',None)
other_tables = input_dict.get('other_tables', None)
context = input_dict.get('context', None)
wordification = Wordification(target_table,other_tables,context)
word_att_length = int(input_dict.get('f_ngram_size', 1))
wordification = Wordification(target_table,other_tables,context,word_att_length)
return {'corpus' : wordification.wordify()}
......@@ -2,7 +2,7 @@ from collections import defaultdict
import string
class Wordification(object):
def __init__(self,target_table,other_tables,context):
def __init__(self,target_table,other_tables,context,word_att_length):
"""
Wordification object constructor.
......@@ -12,57 +12,133 @@ class Wordification(object):
self.target_table=target_table
self.other_tables=other_tables
self.context=context
self.word_att_length=word_att_length
self.connecting_tables=defaultdict(list)
self.cached_sentences=defaultdict(dict)
self.lll=defaultdict(int)
#finds table connections
for primary_table in [target_table]+other_tables:
for secondary_table in [target_table]+other_tables:
if (primary_table.name,secondary_table.name) in self.context.connected:
for primary_key,foreign_key in self.context.connected[(primary_table.name,secondary_table.name)]:
if self.context.pkeys[primary_table.name] == primary_key:
self.connecting_tables[primary_table].append((secondary_table,foreign_key))
self.connecting_tables[primary_table].append((secondary_table,foreign_key,None))
#else:
# self.connecting_tables[primary_table].append((secondary_table,primary_key,foreign_key))
self.index_by_value={}
for table in [target_table]+other_tables:
self.index_by_value[table.name]={}
for sec_t,sec_fkey,prim_fkey in [item for sublist in self.connecting_tables.values() for item in sublist]:
#if sec_t==table:
if not prim_fkey:
self.index_by_value[sec_t.name][sec_fkey]=defaultdict(list)
for ex in sec_t:
self.index_by_value[sec_t.name][sec_fkey][str(ex[str(sec_fkey)])].append(ex)
else:
if not prim_fkey in self.index_by_value[sec_t.name]:
self.index_by_value[sec_t.name][prim_fkey]=defaultdict(list)
for ex in sec_t:
self.index_by_value[sec_t.name][prim_fkey][str(ex[str(prim_fkey)])].append(ex)
print self.connecting_tables
def wordify(self):
"""
Applies the wordification methodology on the target table
"""
s=string.join(["!"+str(ex.get_class())+" "+string.join(self.wordify_example(self.target_table,ex)," ") for ex in self.target_table],"\n")
print sorted(self.lll.items(),key=lambda k: [k[1],k[0]],reverse=True)
return s
def wordify_example(self,data,ex):
#class + wordification on every example of the main table
a=[]
for i,ex in enumerate(self.target_table):
a.append("!"+str(ex.get_class())+" "+string.join(self.wordify_example(self.target_table,ex)," "))
s=string.join(a,"\n")
#print sorted(self.lll.items(),key=lambda k: [k[1],k[0]],reverse=True)
#print a
#print "s"
return s#[0:10000000]
def wordify_example(self,data,ex,searched_connections=set([])):
"""
Recursively constructs the 'wordification' document for the given example.
@param data The given examples ExampleTable
@param ex Example for which the document is constructed
"""
debug=False
data_name=str(data.name)
if data_name=="ring_strucs":
print data_name
if debug:
print "======================================"
print "example:",ex
print "table name:", data_name
print "searched_connections:",len(searched_connections),searched_connections
print "connecting_tables:",len(self.connecting_tables[data]),self.connecting_tables[data]
ex_pkey_value=data.name in self.context.pkeys and ex[str(self.context.pkeys[data.name])]
self.lll[data_name+" "+str(ex_pkey_value)]+=1
if not data_name in self.cached_sentences or not str(ex_pkey_value) in self.cached_sentences[data.name]:
#else:
print data_name,str(ex_pkey_value)
#print data_name,str(ex_pkey_value)
words=[] #word list for every example
if debug:
print "words:",len(words)
#Construct words (tableName_attributeName_attributeValue) from the given table
for att in data.domain.attributes:
if not str(att.name) in self.context.pkeys[data.name] and not str(att.name) in self.context.fkeys[data.name]:
words.append(self.att_to_s(data.name)+"_"+self.att_to_s(att.name)+"_"+self.att_to_s(ex[att]))
#Apply the wordification methodology recursively on all connecting tables
for sec_t,sec_fkey in self.connecting_tables[data]:
for sec_ex in sec_t:
if ex_pkey_value and sec_ex[str(sec_fkey)]==ex_pkey_value:
words+=self.wordify_example(sec_t,sec_ex)
#print words
#words from pairs of attributes
single_words=words[:]
if self.word_att_length>1:
for i,att1 in enumerate(single_words):
for j,att2 in enumerate(single_words):
if i<j:
words.append(att1+"__"+att2)
#print "2",words[-1]
if self.word_att_length>2:
for i,att1 in enumerate(single_words):
for j,att2 in enumerate(single_words):
for k,att3 in enumerate(single_words):
if i<j and j<k:
words.append(att1+"__"+att2+"__"+att3)
#print "3",words[-1]
#Apply the wordification methodology recursively on all connecting tables
for sec_t,sec_fkey,prim_fkey in self.connecting_tables[data]:
#for sec_ex in sec_t:
# if ex_pkey_value and sec_ex[str(sec_fkey)]==ex_pkey_value:
# words+=self.wordify_example(sec_t,sec_ex)
#print sec_t,sec_fkey,prim_fkey
if debug:
print "------------------"
print "(sec_t,sec_fkey,prim):",(sec_t.name,sec_fkey,prim_fkey)
print "search this table:",not (sec_t,sec_fkey) in searched_connections and sec_t!=self.target_table
print "search this table:",not prim_fkey or not (data,sec_fkey) in searched_connections# and sec_t!=self.target_table
if not (sec_t,sec_fkey) in searched_connections and sec_t!=self.target_table and (not prim_fkey or not (data,sec_fkey) in searched_connections):
by_value=self.index_by_value[sec_t.name][str(sec_fkey)][str(ex_pkey_value)] if not prim_fkey else self.index_by_value[sec_t.name][str(prim_fkey)][str(ex[str(sec_fkey)])]
for sec_ex in by_value:
words+=self.wordify_example(sec_t,sec_ex,searched_connections | set([(sec_t,sec_fkey),prim_fkey and (data,prim_fkey)]))
self.cached_sentences[data_name][str(ex_pkey_value)]=words
else:
print data_name,str(ex_pkey_value), "cache: hit"
return self.cached_sentences[data_name][str(ex_pkey_value)]
def att_to_s(self,att):
......@@ -71,5 +147,4 @@ class Wordification(object):
@param att Orange attribute
"""
return str(att).title().replace(' ','').replace('_','')
......@@ -63,8 +63,8 @@ class DBContext:
if col.endswith('_id'):
ref_table = (col[:-4] + 'ies') if col[-4] == 'y' else (col[:-3] + 's')
if ref_table in self.tables:
self.connected[(table, ref_table)] = (col, 'id')
self.connected[(ref_table, table)] = ('id', col)
self.connected[(table, ref_table)].append((col, 'id'))
self.connected[(ref_table, table)].append(('id', col))
self.fkeys[table].add(col)
if col == 'id':
self.pkeys[table] = col
......
......@@ -259,7 +259,7 @@ class Orange_Converter(Converter):
import orange
cols = self.db.cols[table_name]
attributes, metas, class_var = [], [], []
attributes, metas, class_var = [], [], None
for col in cols:
att_type = self.orng_type(table_name,col)
if att_type == 'd':
......@@ -272,13 +272,13 @@ class Orange_Converter(Converter):
if col == cls_att:
if att_type == 'string':
raise Exception('Unsuitable data type for a target variable: %s' % att_type)
class_var.append(att_var)
class_var=att_var
continue
elif att_type == 'string' or col in self.db.pkeys[table_name] or col in self.db.fkeys[table_name]:
elif att_type == 'string' or table_name in self.db.pkeys and col in self.db.pkeys[table_name] or table_name in self.db.fkeys and col in self.db.fkeys[table_name]:
metas.append(att_var)
else:
attributes.append(att_var)
domain = orange.Domain(attributes + class_var)
domain = orange.Domain(attributes, class_var)
for meta in metas:
domain.addmeta(orange.newmetaid(), meta)
dataset = orange.ExampleTable(domain)
......@@ -286,7 +286,7 @@ class Orange_Converter(Converter):
for row in self.db.rows(table_name, cols):
example = orange.Example(domain)
for col, val in zip(cols, row):
example[str(col)] = str(val)
example[str(col)] = str(val) if val!=None else '?'
dataset.append(example)
return dataset
......
This diff is collapsed.
......@@ -13,6 +13,10 @@ def mysql_db_context(request, input_dict, output_dict, widget):
initial_context = DBContext(con, find_connections=find_con)
initial_target_cols = initial_context.cols[initial_context.target_table]
cols_dump = json.dumps(initial_context.cols)
return render(request, 'interactions/db_context.html', {'widget':widget, 'context': initial_context, 'target_cols' : initial_target_cols, 'cols' : cols_dump})
return render(request, 'interactions/db_context.html', {'widget':widget,
'context': initial_context,
'connections' : dict(initial_context.connected),
'target_cols' : initial_target_cols,
'cols' : cols_dump})
......@@ -33,7 +33,7 @@
<tr><th>Table</th><th>Referenced table</th><th>Column</th><th>Referenced column</th></tr>
</thead>
<tbody>
{% for tables, cols in context.connected.items %}
{% for tables, cols in connections.items %}
{% for cols_inner in cols %}
<tr><td>{{tables.0}}</td><td>{{tables.1}}</td><td>{{cols_inner.0}}</td><td>{{cols_inner.1}}</td></tr>
{% endfor %}
......
This diff is collapsed.
def benchmark(input_dict):
import time
in_att = input_dict.get('in_att', None)
start_time= input_dict.get('start_time', None)
time_diff=(time.time()-start_time) if start_time else time.time()
return {'out_att': in_att, 'time_diff': time_diff}
......@@ -123,7 +123,7 @@ def orng_table_to_dict(data):
metas.append(data.domain.get_meta(m).name)
for a in data.domain.attributes:
attrs.append(a.name)
pretty_float = lambda x, a: '%.3f' % x if a.var_type == Orange.feature.Type.Continuous else x
pretty_float = lambda x, a: '%.3f' % x if a.var_type == Orange.feature.Type.Continuous and x!='?' else x
for inst in xrange(len(data)):
inst_new = []
for a in data.domain.variables:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment