Commit ba6bac4e authored by Matic Perovšek's avatar Matic Perovšek

popravek izgube metapodatkov wordification

wordification "sentence" caching
dbcontext bug ko sta dve entiti imeli lahko le eno relacijo med njima
parent c5bf5686
......@@ -356,7 +356,7 @@ def cforange_discretize(input_dict):
for attr in inputdata.domain.attributes:
if attr.varType == orange.VarTypes.Continuous:
newattr=d(attr,inputdata) if discretizerIndex in [0,2,3] else d.constructVariable(attr)
print newattr.name
newattr.name=attr.name
#newattr.name=attr.name[2:] if newattr.name.startswith("D_"):
newattrs.append(newattr)
......@@ -367,7 +367,10 @@ def cforange_discretize(input_dict):
#for attr in newattrs: #TODO
# if attr.name.startswith("D_"):
# attr.name=attr.name[2:]
new_t=inputdata.select(newattrs + [inputdata.domain.classVar])
#new_t=inputdata.select(newattrs + [inputdata.domain.classVar])
newdomain = orange.Domain(newattrs, inputdata.domain.classVar)
newdomain.addmetas(inputdata.domain.getmetas())
new_t = orange.ExampleTable(newdomain, inputdata)
new_t.name=name
output_tables.append(new_t)
......@@ -380,7 +383,7 @@ def cforange_discretize(input_dict):
#newdomain = orange.Domain(data.domain.attributes, newclass)
#data_v = orange.ExampleTable(newdomain, data)
output_dict = {'odt': output_tables if input_type_is_list else output_tables[0]} #returns list if input is list
output_dict = {'odt': output_tables if input_type_is_list else output_tables[0],'discr_intervals':points} #returns list if input is list
return output_dict
def cforange_attribute_distance(input_dict):
......
......@@ -14,22 +14,24 @@ class Wordification(object):
self.context=context
self.connecting_tables=defaultdict(list)
self.cached_sentences=defaultdict(dict)
self.lll=defaultdict(int)
#finds table connections
for primary_table in [target_table]+other_tables:
for secondary_table in [target_table]+other_tables:
if (primary_table.name,secondary_table.name) in self.context.connected:
primary_key,foreign_key=self.context.connected[(primary_table.name,secondary_table.name)]
if self.context.pkeys[primary_table.name] == primary_key:
self.connecting_tables[primary_table].append((secondary_table,foreign_key))
for primary_key,foreign_key in self.context.connected[(primary_table.name,secondary_table.name)]:
if self.context.pkeys[primary_table.name] == primary_key:
self.connecting_tables[primary_table].append((secondary_table,foreign_key))
def wordify(self):
"""
Applies the wordification methodology on the target table
"""
return string.join(["!"+str(ex.get_class())+" "+string.join(self.wordify_example(self.target_table,ex)," ") for ex in self.target_table],"\n")
s=string.join(["!"+str(ex.get_class())+" "+string.join(self.wordify_example(self.target_table,ex)," ") for ex in self.target_table],"\n")
print sorted(self.lll.items(),key=lambda k: [k[1],k[0]],reverse=True)
return s
def wordify_example(self,data,ex):
"""
......@@ -38,21 +40,30 @@ class Wordification(object):
@param data The given examples ExampleTable
@param ex Example for which the document is constructed
"""
words=[] #word list for every example
data_name=str(data.name)
ex_pkey_value=data.name in self.context.pkeys and ex[str(self.context.pkeys[data.name])]
self.lll[data_name+" "+str(ex_pkey_value)]+=1
if not data_name in self.cached_sentences or not str(ex_pkey_value) in self.cached_sentences[data.name]:
#else:
print data_name,str(ex_pkey_value)
words=[] #word list for every example
#Construct words (tableName_attributeName_attributeValue) from the given table
for att in data.domain.attributes:
if not str(att.name) in self.context.pkeys[data.name] and not str(att.name) in self.context.fkeys[data.name]:
words.append(self.att_to_s(data.name)+"_"+self.att_to_s(att.name)+"_"+self.att_to_s(ex[att]))
#Construct words (tableName_attributeName_attributeValue) from the given table
for att in data.domain.attributes:
if not str(att.name) in self.context.pkeys[data.name] and not str(att.name) in self.context.fkeys[data.name]:
words.append(self.att_to_s(data.name)+"_"+self.att_to_s(att.name)+"_"+self.att_to_s(ex[att]))
#Apply the wordification methodology recursively on all connecting tables
for sec_t,sec_fkey in self.connecting_tables[data]:
for sec_ex in sec_t:
if ex_pkey_value and sec_ex[str(sec_fkey)]==ex_pkey_value:
words+=self.wordify_example(sec_t,sec_ex)
return words
#Apply the wordification methodology recursively on all connecting tables
for sec_t,sec_fkey in self.connecting_tables[data]:
for sec_ex in sec_t:
if ex_pkey_value and sec_ex[str(sec_fkey)]==ex_pkey_value:
words+=self.wordify_example(sec_t,sec_ex)
#print words
self.cached_sentences[data_name][str(ex_pkey_value)]=words
else:
print data_name,str(ex_pkey_value), "cache: hit"
return self.cached_sentences[data_name][str(ex_pkey_value)]
def att_to_s(self,att):
"""
......
......@@ -14,7 +14,6 @@ try:
import System
import Latino
from LatinoClowdFlows import *
import LatinoClowdFlows
except Exception:
logging.warning("DotNet assemblies could not be loaded! Probable reasons: missing dlls or wrong interpreter (see http://pythonnet.sourceforge.net). "
......
......@@ -50,7 +50,7 @@ class DBContext:
self.all_cols = dict(self.cols)
self.col_vals = {}
self.connected = {}
self.connected = defaultdict(list)
cursor.execute(
"SELECT table_name, column_name, referenced_table_name, referenced_column_name \
FROM information_schema.KEY_COLUMN_USAGE \
......@@ -69,8 +69,9 @@ class DBContext:
if col == 'id':
self.pkeys[table] = col
for (table, col, ref_table, ref_col) in cursor:
self.connected[(table, ref_table)] = (col, ref_col)
self.connected[(ref_table, table)] = (ref_col, col)
print table,col,ref_table, ref_col
self.connected[(table, ref_table)].append((col, ref_col))
self.connected[(ref_table, table)].append((ref_col, col))
self.fkeys[table].add(col)
......
......@@ -45,24 +45,27 @@ class ILP_Converter(Converter):
def connecting_clause(self, table, ref_table):
var_table, var_ref_table = table.capitalize(), ref_table.capitalize()
pk, fk = self.db.connected[(table, ref_table)]
ref_pk = self.db.pkeys[ref_table]
table_args, ref_table_args = [], []
for col in self.db.cols[table]:
if col == pk:
col = var_table
elif col == fk:
col = var_ref_table
table_args.append(col.capitalize())
for col in self.db.cols[ref_table]:
if col == ref_pk:
col = var_ref_table
if col == fk:
col = var_table
ref_table_args.append(col.capitalize())
return ['has_%s(%s, %s) :-' % (ref_table, var_table.capitalize(), var_ref_table.capitalize()),
'\t%s(%s),' % (table, ','.join(table_args)),
'\t%s(%s).' % (ref_table, ','.join(ref_table_args))]
result=[]
for pk,fk in self.db.connected[(table, ref_table)]:
ref_pk = self.db.pkeys[ref_table]
table_args, ref_table_args = [], []
for col in self.db.cols[table]:
if col == pk:
col = var_table
elif col == fk:
col = var_ref_table
table_args.append(col.capitalize())
for col in self.db.cols[ref_table]:
if col == ref_pk:
col = var_ref_table
if col == fk:
col = var_table
ref_table_args.append(col.capitalize())
result.extend(['has_%s(%s, %s) :-' % (ref_table, var_table.capitalize(), var_ref_table.capitalize()),
'\t%s(%s),' % (table, ','.join(table_args)),
'\t%s(%s).' % (ref_table, ','.join(ref_table_args))])
return result
def attribute_clause(self, table, att):
var_table, var_att, pk = table.capitalize(), att.capitalize(), self.db.pkeys[table]
......
......@@ -33,10 +33,12 @@
<tr><th>Table</th><th>Referenced table</th><th>Column</th><th>Referenced column</th></tr>
</thead>
<tbody>
{% for tables, cols in context.connected.items %}
<tr><td>{{tables.0}}</td><td>{{tables.1}}</td><td>{{cols.0}}</td><td>{{cols.1}}</td></tr>
{% endfor %}
</tbody>
{% for tables, cols in context.connected.items %}
{% for cols_inner in cols %}
<tr><td>{{tables.0}}</td><td>{{tables.1}}</td><td>{{cols_inner.0}}</td><td>{{cols_inner.1}}</td></tr>
{% endfor %}
{% endfor %}
</tbody>
</table>
<input type="hidden" name="widget_id" value="{{widget.pk}}"/>
</form>
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment