Commit cbb780d7 authored by Matic Perovšek's avatar Matic Perovšek

wrdf binary weighting

parent e109589f
...@@ -162,6 +162,15 @@ ...@@ -162,6 +162,15 @@
"uid": "703593f1-1cae-4dd0-96c0-47211d3f4dc1" "uid": "703593f1-1cae-4dd0-96c0-47211d3f4dc1"
} }
}, },
{
"model": "workflows.abstractoption",
"fields": {
"name": "Binary",
"uid": "",
"value": "binary",
"abstract_input": "cc2a208c-bf6f-4f64-b51e-221c80366f8e"
}
},
{ {
"model": "workflows.abstractoption", "model": "workflows.abstractoption",
"fields": { "fields": {
......
...@@ -195,10 +195,13 @@ class Wordification(object): ...@@ -195,10 +195,13 @@ class Wordification(object):
train_word_count[word]+=1 train_word_count[word]+=1
for word in document: for word in document:
tf=train_word_count[word] if measure=="binary":
idf = 1 if measure=="tf" else (self.idf[word] if word in self.idf else None) tf=1
if word=='Cars_Position_3': idf=1
idf+=100 else:
tf=train_word_count[word]
idf = 1 if measure=="tf" else (self.idf[word] if word in self.idf else None)
if idf!=None: if idf!=None:
self.tf_idfs[doc_idx][word] = tf * idf self.tf_idfs[doc_idx][word] = tf * idf
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment