Commit ad2e75d3 authored by Imène Lajili's avatar Imène Lajili

Add calibration widgets in context-aware package

parent e9045e61
from math import floor
from numpy import size
def ca_set_binary_threshold_from_skew(input_dict):
import math
cost_false_pos = input_dict['cost_false_pos']
......@@ -73,9 +73,11 @@ def ca_rank_driven_binary_threshold_selection(input_dict):
def ca_optimal_binary_threshold_selection(input_dict):
from collections import Counter
performance = input_dict['score']
method = input_dict['method']
#print method
cost_fn=input_dict['cost_false_neg']
cost_fp=input_dict['cost_false_pos']
list_score = []
labels = ''
n = len(performance['actual'])
......@@ -84,39 +86,54 @@ def ca_optimal_binary_threshold_selection(input_dict):
output_dict = {}
sorted_score = sorted(list_score, key=lambda scr: scr[1],reverse=True)
counter_neg = len([score for score in list_score if score[0] == 0])
#print counter_neg
counter_pos = len([score for score in list_score if score[0] == 1])
output_dict['bin_thres'] = find_best_roc_weight(method,sorted_score,counter_pos,counter_neg)
#print counter_pos
output_dict['bin_thres'] = find_best_roc_weight(method,sorted_score,counter_pos,counter_neg,cost_fn,cost_fp)
return output_dict
def find_best_roc_weight(method,a_list,a_num_positives,a_num_negatives):
def find_best_roc_weight(method,a_list,a_num_positives,a_num_negatives,cost_fn,cost_fp):
previous = float('inf')
xpos = 0
xneg = a_num_negatives
the_best_value = get_value(method,xpos,xneg,a_num_positives,a_num_negatives)
#return the best threshold
the_best_value = get_value(method,xpos,xneg,a_num_positives,a_num_negatives,cost_fn,cost_fp)
#print the_best_value
#print the_best_value
#at the beginning the best is inf
best = previous
for the_elt in a_list:
the_roc = the_elt
current = the_roc[1]
#print current
#print the_roc
if current != previous:
possible_best_value = get_value(method,xpos,xneg,a_num_positives,a_num_negatives)
print '%f > %f' %(possible_best_value,the_best_value)
possible_best_value = get_value(method,xpos,xneg,a_num_positives,a_num_negatives,cost_fn,cost_fp)
#print '%f > %f' %(possible_best_value,the_best_value)
if possible_best_value > the_best_value:
the_best_value = possible_best_value
print '%f -> %f' %(best,(previous + current) / float(2))
#print '%f -> %f' %(best,(previous + current) / float(2))
best = (previous + current) / float(2)
if the_roc[0] == 1:
xpos += 1
else:
xneg -= 1
previous = current;
possible_best_value = get_value(method,xpos,xneg,a_num_positives,a_num_negatives)
'''else:
xneg -= 1'''
previous = current;
#print best
print xpos
print xneg
possible_best_value = get_value(method,xpos,xneg,a_num_positives,a_num_negatives,cost_fn,cost_fp)
print possible_best_value
if possible_best_value > the_best_value:
the_best_value = possible_best_value
best = (previous + float('-inf')) / float(2)
best = (previous + float('-inf')) / float(2)
print previous
#print best
return best
def get_value(method, TP, TN, P, N):
def get_value(method, TP, TN, P, N,CN,CP):
if method == 'accuracy':
accuracy = (TP + TN) / float(P+N)
return accuracy
......@@ -128,6 +145,17 @@ def get_value(method, TP, TN, P, N):
recall = TP / float(P)
if method == 'recall':
return recall
Cn=N/(P+N)
Cp=P/(P+N)
'''print FP
print TN
print FN
print TP'''
#print TP
#print type (FP/(FP+TN))
Cost=((FP/(FP+TN))*float(CN))+((FN/(TP+FN))*float(CP))
if method=='Cost':
return Cost
if TP + FP > 0:
precision = TP / float(TP + FP)
if method == 'precision':
......@@ -138,4 +166,201 @@ def get_value(method, TP, TN, P, N):
F_measure = 0
else:
F_measure = 0
return F_measure
\ No newline at end of file
return F_measure
def Context_Bcalibration(input_dict):
non_calibrated_scores=input_dict['non_calibrated_scores']
learner=input_dict['learner']
old_actual=non_calibrated_scores['actual']
old_predicted=non_calibrated_scores['predicted']
g2=sort_list2(old_predicted,old_actual)
print g2
Z=g2['actual']
print len(Z)
#print Z
Z2=g2['predicted']
print len(Z2)
output_dict={}
list3=[]
for i in Z:
list3.append(i)
L=search_for_position(list3)
#print L
#print old_actual
g2=sort_list2(old_predicted,old_actual)
output_dict['builded_scores']= { 'noncalibrated_scr':Z2,'class':Z,'calibrated_scr':L}
print output_dict
return output_dict
def Context_Acalibration(input_dict):
import math
dict=input_dict['test_scores']
sc=input_dict['builded_scores']
X1=sc['calibrated_scr']
print len(X1)
print dict['predicted']
k=0
X2=sc['noncalibrated_scr']
probs=[]
list_scr=[]
list_max=[]
test_cls=dict['actual']
test_data=dict['predicted']
i=1
begin_score=[]
end_score=[]
begin_score.append(X2[0])
i=0
for item in X1[1:]:
if (item != X1[i]) and (i<=size(X1)-1):
begin_score.append(X2[i+1])
end_score.append(X2[i])
i+=1
end_score.append(X2[size(X2)-1])
max=0
i=0
aux =True
#list_max.append(0)
if 0 in X1:
list_max.append(0)
for j in X1:
if (j>max):
list_max.append(j)
max=j
print "what's up man!"
print begin_score
print end_score
print list_max
for scr in dict['predicted']:
k+=1
scr2=0
print scr
for i in range(len(begin_score)):
if (scr >=begin_score[i] and scr<=end_score[i])or(scr==begin_score[i])or(scr==end_score[i]):
scr2=list_max[i]
if scr2==0:
scr2=0.1
elif scr2==1:
scr2=0.98
elif scr>end_score[i]and scr<begin_score[i+1]:
val1=(scr-end_score[i])/(begin_score[i+1]-end_score[i])
scr2 = list_max[i]+((list_max[i+1]-list_max[i])*val1)
if scr2==0:
scr2=0.1
elif scr2==1.0:
scr2=0.98
list_scr.append(scr2)
print end_score[len(end_score)-1]
print len(list_scr)
print list_scr
for j in list_scr:
print j
probs.append(math.log10(j/(1-j)))
print len(test_cls)
#print len(probs)
#print probs
output_dict={}
output_dict['calibrated_scores']= {'actual':test_cls , 'predicted':probs}
return output_dict
def search_for_position(list):
print len(list)
l1=list
l2=l1
print l2
new_dict={}
new_dict['actual']=l2
for i in range(1,len(l1)):
#print 'etape n%s '%i
k=2
list2=[]
list3=[]
aux=True
var=True
j=1
if l1[i]<l1[i-1] :
j+=1
#print 'hello n 1'
print j
list2.append(l1[i])
list2.append(l1[i-1])
list3.append(i-1)
list3.append(i)
for n in range(2,i):
if l1[i-1]==l1[i-n] and var==True and i!=1:
j+=1
#print 'here%f'%j
list2.append(l1[i-n])
list3.append(i-n)
aux=False
else:
var=False
if aux==False:
#print j
#print 'hello n 2'
c=sum(list2)
#print c
#print list3
for z in list3:
print z
l1[z]=float(c)/j
print l1
else:
print list2
print j
print 'hello n 3'
#list[i-1]=list[i]=(list[i]+list[i-1]/j)
x=((float(l1[i]+l1[i-1]))/2)
l1[i]=l1[i-1]=x
print j
print x
print l1
list4=search_for_position(l1)
print "hi ! i am list n4"
print list4
#print list
print new_dict
return l1
def sort_list2(list1,list2):
n= len(list1)
#print n
Dict={}
for i in range(n-1):
#print i
k=i
min=list1[k]
#print min
for j in range(i+1,n):
#print j
if list1[j]<min:
min=list1[j]
pos=j
#print '%shere is min'%min
if min!=list1[k]:
aux1=list1[i]
list1[i]=list1[pos]
list1[pos]=aux1
aux2=list2[i]
list2[i]=list2[pos]
list2[pos]=aux2
print list1
Dict={'actual':list2,'predicted':list1}
return Dict
\ No newline at end of file
[
{
"model": "workflows.abstractwidget",
"fields": {
"category": "43303106-bb42-467f-8ffe-6ac1f5a011e3",
"treeview_image": "",
"uid": "9b5eb554-a663-11e6-80f5-76304dec7eb7",
"windows_queue": false,
"package": "context-aware",
"interaction_view": "",
"has_progress_bar": false,
"image": "",
"description": "",
"static_image": "",
"action": "Context_Acalibration",
"visualization_view": "",
"streaming_visualization_view": "",
"post_interact_action": "",
"wsdl_method": "",
"wsdl": "",
"interactive": false,
"is_streaming": false,
"order": 1,
"name": "Apply calibration"
}
},
{
"model": "workflows.abstractinput",
"fields": {
"widget": "9b5eb554-a663-11e6-80f5-76304dec7eb7",
"name": "builded scores",
"short_name": "scr",
"default": "",
"description": "",
"required": true,
"multi": false,
"parameter_type": null,
"variable": "builded_scores",
"parameter": false,
"order": 1,
"uid": "9b5eb842-a663-11e6-80f5-76304dec7eb7"
}
},
{
"model": "workflows.abstractinput",
"fields": {
"widget": "9b5eb554-a663-11e6-80f5-76304dec7eb7",
"name": "test scores",
"short_name": "tst",
"default": "",
"description": "",
"required": true,
"multi": false,
"parameter_type": null,
"variable": "test_scores",
"parameter": false,
"order": 2,
"uid": "9b5eb982-a663-11e6-80f5-76304dec7eb7"
}
},
{
"model": "workflows.abstractoutput",
"fields": {
"widget": "9b5eb554-a663-11e6-80f5-76304dec7eb7",
"name": "calibrated scores",
"short_name": "scr",
"description": "",
"variable": "calibrated_scores",
"order": 1,
"uid": "9b5ebaae-a663-11e6-80f5-76304dec7eb7"
}
}
]
\ No newline at end of file
[
{
"model": "workflows.abstractwidget",
"fields": {
"category": "43303106-bb42-467f-8ffe-6ac1f5a011e3",
"treeview_image": "",
"uid": "b45e2a96-a666-11e6-80f5-76304dec7eb7",
"windows_queue": false,
"package": "context-aware",
"interaction_view": "",
"has_progress_bar": false,
"image": "",
"description": "",
"static_image": "",
"action": "Context_Bcalibration",
"visualization_view": "",
"streaming_visualization_view": "",
"post_interact_action": "",
"wsdl_method": "",
"wsdl": "",
"interactive": false,
"is_streaming": false,
"order": 1,
"name": "Build calibration "
}
},
{
"model": "workflows.abstractinput",
"fields": {
"widget": "b45e2a96-a666-11e6-80f5-76304dec7eb7",
"name": "non calibrated scors",
"short_name": "ncs",
"default": "",
"description": "",
"required": true,
"multi": false,
"parameter_type": null,
"variable": "non_calibrated_scores",
"parameter": false,
"order": 1,
"uid": "b45e2cbc-a666-11e6-80f5-76304dec7eb7"
}
},
{
"model": "workflows.abstractinput",
"fields": {
"widget": "b45e2a96-a666-11e6-80f5-76304dec7eb7",
"name": "learner",
"short_name": "lrn",
"default": "",
"description": "",
"required": true,
"multi": false,
"parameter_type": null,
"variable": "learner",
"parameter": false,
"order": 2,
"uid": "b45e2dac-a666-11e6-80f5-76304dec7eb7"
}
},
{
"model": "workflows.abstractoutput",
"fields": {
"widget": "b45e2a96-a666-11e6-80f5-76304dec7eb7",
"name": "builded scores",
"short_name": "scr",
"description": "",
"variable": "builded_scores",
"order": 1,
"uid": "b45e2e7e-a666-11e6-80f5-76304dec7eb7"
}
}
]
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment