Commit cbb780d7 authored by Matic Perovšek's avatar Matic Perovšek

wrdf binary weighting

parent e109589f
......@@ -162,6 +162,15 @@
"uid": "703593f1-1cae-4dd0-96c0-47211d3f4dc1"
}
},
{
"model": "workflows.abstractoption",
"fields": {
"name": "Binary",
"uid": "",
"value": "binary",
"abstract_input": "cc2a208c-bf6f-4f64-b51e-221c80366f8e"
}
},
{
"model": "workflows.abstractoption",
"fields": {
......
......@@ -195,10 +195,13 @@ class Wordification(object):
train_word_count[word]+=1
for word in document:
tf=train_word_count[word]
idf = 1 if measure=="tf" else (self.idf[word] if word in self.idf else None)
if word=='Cars_Position_3':
idf+=100
if measure=="binary":
tf=1
idf=1
else:
tf=train_word_count[word]
idf = 1 if measure=="tf" else (self.idf[word] if word in self.idf else None)
if idf!=None:
self.tf_idfs[doc_idx][word] = tf * idf
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment