Commit eea95973 authored by Anze Vavpetic's avatar Anze Vavpetic

Merge branch 'clustering' into dev

parents e566b7c3 2f42d512
......@@ -895,6 +895,184 @@
"description": ""
}
},
{
"pk": 103,
"model": "workflows.abstractwidget",
"fields": {
"category": 19,
"treeview_image": "",
"name": "Hierarchical clustering",
"is_streaming": false,
"uid": "12b0d57d-1cbc-4e9d-8087-2dcd8ced2a27",
"interaction_view": "cforange_hierarchical_clustering",
"image": "",
"package": "cforange",
"static_image": "",
"post_interact_action": "cforange_hierarchical_clustering_finished",
"user": null,
"visualization_view": "",
"action": "cforange_hierarchical_clustering",
"wsdl_method": "",
"wsdl": "",
"interactive": true,
"has_progress_bar": false,
"order": 1,
"description": ""
}
},
{
"pk": 210,
"model": "workflows.abstractinput",
"fields": {
"widget": 103,
"name": "Distance Matrix",
"short_name": "dm",
"uid": "bc238f40-be7b-48c4-a526-f17d461a1e10",
"default": "",
"required": false,
"multi": false,
"parameter_type": null,
"variable": "dm",
"parameter": false,
"order": 1,
"description": ""
}
},
{
"pk": 211,
"model": "workflows.abstractinput",
"fields": {
"widget": 103,
"name": "Linkage",
"short_name": "lin",
"uid": "9804f098-f6b6-4858-9d83-df2a45e6174a",
"default": "0",
"required": false,
"multi": false,
"parameter_type": "select",
"variable": "linkage",
"parameter": true,
"order": 2,
"description": ""
}
},
{
"pk": 64,
"model": "workflows.abstractoption",
"fields": {
"uid": "e5a0e0cd-aefe-4d2f-b2dd-26c54aecda41",
"abstract_input": 211,
"value": "1",
"name": "Average linkage"
}
},
{
"pk": 66,
"model": "workflows.abstractoption",
"fields": {
"uid": "e7ab085c-59a2-469b-b4e0-aeef40685a1f",
"abstract_input": 211,
"value": "3",
"name": "Complete linkage"
}
},
{
"pk": 63,
"model": "workflows.abstractoption",
"fields": {
"uid": "e751309c-0f24-409e-b21e-94692c3fa5cf",
"abstract_input": 211,
"value": "0",
"name": "Single linkage"
}
},
{
"pk": 65,
"model": "workflows.abstractoption",
"fields": {
"uid": "16d7b4ca-b14d-4a93-bda3-ca3478efd316",
"abstract_input": 211,
"value": "2",
"name": "Ward's linkage"
}
},
{
"pk": 212,
"model": "workflows.abstractinput",
"fields": {
"widget": 103,
"name": "Visualization type",
"short_name": "viz",
"uid": "f0c1c618-f7c4-4b9c-8780-d03e1ab29834",
"default": "circle",
"required": false,
"multi": false,
"parameter_type": "select",
"variable": "visualization",
"parameter": true,
"order": 3,
"description": ""
}
},
{
"pk": 67,
"model": "workflows.abstractoption",
"fields": {
"uid": "ea7c03eb-b596-4b7f-82ea-840ca44daaad",
"abstract_input": 212,
"value": "circle",
"name": "Circle"
}
},
{
"pk": 68,
"model": "workflows.abstractoption",
"fields": {
"uid": "4bd2311e-3051-451e-8c47-5bb526a885e6",
"abstract_input": 212,
"value": "tree",
"name": "Tree"
}
},
{
"pk": 114,
"model": "workflows.abstractoutput",
"fields": {
"widget": 103,
"name": "Centroids",
"short_name": "ctr",
"variable": "centroids",
"uid": "da9b8816-0ed4-4159-8a7e-ff915d5eb2d2",
"order": 1,
"description": ""
}
},
{
"pk": 115,
"model": "workflows.abstractoutput",
"fields": {
"widget": 103,
"name": "Selected examples",
"short_name": "sel",
"variable": "selected_examples",
"uid": "2055c99d-b2cd-4270-94e7-688eb86ae00b",
"order": 2,
"description": ""
}
},
{
"pk": 116,
"model": "workflows.abstractoutput",
"fields": {
"widget": 103,
"name": "Unselected examples",
"short_name": "uns",
"variable": "unselected_examples",
"uid": "d23df6ab-e4ec-4c09-8d4b-9457f666efca",
"order": 3,
"description": ""
}
},
{
"pk": 12,
"model": "workflows.category",
......
from django.shortcuts import render
import json
def cforange_filter_integers(request,input_dict,output_dict,widget):
return render(request, 'interactions/cforange_filter_integers.html',{'widget':widget,'intList':input_dict['intList']})
\ No newline at end of file
return render(request, 'interactions/cforange_filter_integers.html',{'widget':widget,'intList':input_dict['intList']})
def cforange_hierarchical_clustering(request,input_dict,output_dict,widget):
import orange
from library import Clustering
matrix = input_dict['dm']
linkage = int(input_dict['linkage'])
root = Clustering.hierarchical_clustering(linkage, matrix)
dm_examples = True
try:
attributes = [x.name for x in matrix.items.domain]
except:
attributes = ['attribute']
dm_examples = False
def build_hierarchy(node, root=False):
if dm_examples:
values_dict = dict([(x,matrix.items[node.first][x].value) for x in attributes]) if not node.branches else {}
else:
values_dict = dict([(x,matrix.items[node.first].name) for x in attributes]) if not node.branches else {}
for attribute in values_dict.keys():
if type(values_dict[attribute]) == float:
values_dict[attribute]="%.3f" % values_dict[attribute]
return {
'name' : 'root' if root else '',
'id' : node.first if not node.branches else -1,
'height' : node.height if node.branches else 0,
'children' : [build_hierarchy(node.left), build_hierarchy(node.right)] if node.branches else [],
'values' : values_dict,
'leaf' : True if not node.branches else False
}
hierarchy = json.dumps(build_hierarchy(root, root=True))
return render(request, 'interactions/cforange_hierarchical_clustering.html', {'widget' : widget, 'hierarchy' : hierarchy, 'attributes':attributes,'vizualization':input_dict['visualization']})
......@@ -169,7 +169,6 @@ def cforange_confusion_matrix(input_dict):
cm = orngStat.confusionMatrices(results,classIndex=classIndex)
if len(cm)==1:
cm = cm[0]
print cm
output_dict = {}
output_dict['cm']=cm
return output_dict
......@@ -380,4 +379,62 @@ def cforange_attribute_distance(input_dict):
matrix[a1, a2] = (1.0 - statc.spearmanr(f1, filleds[a2])[0]) / 2.0
output_dict = {}
output_dict['dm']=matrix
return output_dict
\ No newline at end of file
return output_dict
def cforange_hierarchical_clustering(input_dict):
return {'centroids' : None, 'selected_examples' : None, 'unselected_examples' : None}
class Clustering:
@staticmethod
def hierarchical_clustering(linkage, distance_matrix):
import orange
linkages = [("Single linkage", orange.HierarchicalClustering.Single),
("Average linkage", orange.HierarchicalClustering.Average),
("Ward's linkage", orange.HierarchicalClustering.Ward),
("Complete linkage", orange.HierarchicalClustering.Complete)]
return orange.HierarchicalClustering(distance_matrix, linkage=linkages[linkage][1])
def cforange_hierarchical_clustering_finished(postdata, input_dict, output_dict):
import json
import orange
matrix = input_dict['dm']
linkage = int(input_dict['linkage'])
widget_pk = postdata['widget_id'][0]
try:
selected_nodes = json.loads(postdata.get('selected_nodes')[0])
except:
raise Exception('Please select a threshold for determining clusters.')
if isinstance(matrix.items, orange.ExampleTable):
root = Clustering.hierarchical_clustering(linkage, matrix)
cluster_ids = set([cluster for _,_,cluster in selected_nodes])
selected_clusters = set([cluster for _,selected,cluster in selected_nodes if selected])
clustVar = orange.EnumVariable(str('Cluster'), values=["Cluster %d" % i for i in cluster_ids] + ["Other"])
origDomain = matrix.items.domain
domain = orange.Domain(origDomain.attributes, origDomain.classVar)
domain.addmeta(orange.newmetaid(), clustVar)
domain.addmetas(origDomain.getmetas())
# Build table with selected clusters
selected_table, unselected_table = orange.ExampleTable(domain), orange.ExampleTable(domain)
for id, selected, cluster in selected_nodes:
new_ex = orange.Example(domain, matrix.items[id])
if selected:
new_ex[clustVar] = clustVar("Cluster %d" % cluster)
selected_table.append(new_ex)
else:
new_ex[clustVar] = clustVar("Other")
unselected_table.append(new_ex)
# Build table of centroids
centroids = orange.ExampleTable(selected_table.domain)
if len(selected_table) > 0:
for cluster in sorted(selected_clusters):
clusterEx = orange.ExampleTable([ex for ex in selected_table if ex[clustVar] == "Cluster %d" % cluster])
# Attribute statistics
contstat = orange.DomainBasicAttrStat(clusterEx)
discstat = orange.DomainDistributions(clusterEx, 0, 0, 1)
ex = [cs.avg if cs else (ds.modus() if ds else "?") for cs, ds in zip(contstat, discstat)]
example = orange.Example(centroids.domain, ex)
example[clustVar] = clustVar("Cluster %d" % cluster)
centroids.append(example)
else: # Attribute distance
centroids, selected_table, unselected_table = None, None, None
return {'centroids' : centroids, 'selected_examples' : selected_table, 'unselected_examples' : unselected_table}
<div id="widgetinteract-{{widget.pk}}" rel="{{widget.pk}}" class="widgetinteractdialog" title="{{widget.name}} interaction" width="1050" height="800">
<div id="clustering_canvas{{widget.pk}}"></div>
<form>
<input type="hidden" name="widget_id" value="{{widget.pk}}" style="display:none;" />
<input type="hidden" name="selected_height" value="0" style="display:none;"/>
<input type="hidden" class="selected_nodes{{widget.pk}}" name="selected_nodes" value="" style="display:none;"/>
</form>
<div>
Display label:
<select class="attributeselect">
<option value="-1">None</option>
{% for attribute in attributes %}
<option value="{{forloop.counter0}}">{{attribute}}</option>
{% endfor %}
</select>
</div>
<div id="legend{{widget.pk}}">
</div>
</div>
<style type="text/css">
path.arc {
/* cursor: move;*/
fill: #fff;
}
path.selectedarc {
fill: red;
}
.node circle {
fill: #fff;
stroke: black;
stroke-width: 1.5px;
}
.node {
font-size: 10px;
pointer-events: none;
}
.link {
fill: none;
stroke: #ccc;
stroke-width: 1.5px;
}
</style>
<script type="text/javascript">
var hierarchy = {{hierarchy|safe}};
draw(1000, 600, hierarchy, "#clustering_canvas{{widget.pk}}");
function draw(w, h, hierarchy, target) {
var vizualization = '{{vizualization}}';
var colors = ['red','blue','yellow','green','magenta','cyan','black'];
var attributes = [];
{% for attribute in attributes %}
attributes.push('{{attribute}}');
{% endfor %}
var rx = w/2,
ry = h/2,
m0,
rotate = 0,
delta = 30;
var root = [rx, ry];
var width = ry - delta;
if (vizualization == "tree") {
width=w-100;
height=width*2;
ry=1000;
}
if (vizualization == "circle") {
var cluster = d3.layout.cluster()
.size([360, ry - delta])
.sort(null);
} else {
var cluster = d3.layout.cluster()
.size([h-50, ry - delta])
.sort(null);
}
if (vizualization=="circle") {
var diagonal = d3.svg.diagonal.radial()
.projection(function(d) { return [d.y, d.x / 180 * Math.PI]; });
} else {
var diagonal = d3.svg.diagonal()
.projection(function(d) { return [d.y, d.x]; });
}
var svg = d3.select(target).append("div")
// .style("width", w + "px")
// .style("height", (h+50) + "px")
.on("click", select_clusters);
if (vizualization=="circle") {
var vis = svg.append("svg:svg")
.attr("width", w)
.attr("height", h+50)
.append("svg:g")
.attr("transform", "translate(" + rx + "," + ry + ")");
} else {
var vis = svg.append("svg:svg")
.attr("width", w)
.attr("height", h)
.append("svg:g")
.attr("transform", "translate(40,10)");
}
if (vizualization=="circle") {
vis.append("svg:path")
.attr("class", "arc")
.attr("d", d3.svg.arc().innerRadius(ry - delta).outerRadius(ry).startAngle(0).endAngle(2 * Math.PI))
.on("mousedown", mousedown);
}
var nodes = cluster.nodes(hierarchy);
var max_height = 0;
for (node in nodes) {
if (nodes[node].height>max_height) {
max_height=nodes[node].height;
}
}
for (node in nodes) {
nodes[node].y = width-(nodes[node].height/max_height) * width;
}
var link = vis.selectAll("path.link")
.data(cluster.links(nodes))
.enter().append("svg:path")
.attr("class", "link")
.attr("d", diagonal);
if (vizualization=="circle") {
var node = vis.selectAll("g.node")
.data(nodes)
.enter().append("svg:g")
.attr("class", "node")
.attr("transform", function(d) { return "rotate(" + (d.x - 90) + ")translate(" + d.y + ")"; });
node.append("svg:circle")
.attr("r", 3);
} else {
var node = vis.selectAll("g.node")
.data(nodes)
.enter().append("svg:g")
.attr("class", "node")
.attr("transform", function(d) { return "translate(" + d.y + "," + d.x + ")"; })
node.append("svg:circle")
.attr("r", 3);
}
$(".node").mouseenter(function() {
alert("test");
});
if (vizualization=="circle") {
for (a in attributes) {
node.append("svg:text")
.attr("dx", function(d) { return d.x < 180 ? 8 : -8; })
.attr("dy", ".31em")
.attr("text-anchor", function(d) { return d.x < 180 ? "start" : "end"; })
.attr("transform", function(d) { return d.x < 180 ? null : "rotate(180)"; })
.attr("class", "attribute"+a+" attributes")
.text(function(d) { return d.values[attributes[a]]; });
}
} else {
for (a in attributes) {
node.append("svg:text")
.attr("dx", -10)
.attr("dy", 3)
.style("text-anchor", function(d) { return d.children ? "end" : "start"; })
.attr("class", "attribute"+a+" attributes")
.text(function(d) { return d.values[attributes[a]]; });
}
}
$(".attributes").hide();
$(".attribute"+$(".attributeselect").val()).show();
$(".attributeselect").change(function () {
$(".attributes").hide();
$(".attribute"+$(".attributeselect").val()).show();
})
d3.select(window)
.on("mousemove", mousemove)
.on("mouseup", mouseup);
var cluster_index=0;
var clusters = [];
var counters = [];
function color_nodes(node,treshold,cluster) {
if (node.height<treshold||node.height==0) {
node.cluster = cluster;
if (clusters.indexOf(cluster)==-1) {
clusters.push(cluster);
counters.push(0);
selected_clusters.push(true);
}
for (n in node.children) {
color_nodes(node.children[n],treshold,cluster);
}
} else {
node.cluster = -1;
for (n in node.children) {
color_nodes(node.children[n],treshold,++cluster_index);
}
}
}
hsv2rgb = function(h,s,v) {
var rgb, i, data = [];
if (s === 0) {
rgb = [v,v,v];
} else {
h = h / 60;
i = Math.floor(h);
data = [v*(1-s), v*(1-s*(h-i)), v*(1-s*(1-(h-i)))];
switch(i) {
case 0:
rgb = [v, data[2], data[0]];
break;
case 1:
rgb = [data[1], v, data[0]];
break;
case 2:
rgb = [data[0], v, data[2]];
break;
case 3:
rgb = [data[0], data[1], v];
break;
case 4:
rgb = [data[2], data[0], v];
break;
default:
rgb = [v, data[0], data[1]];
break;
}
}
return '#' + rgb.map(function(x){
return ("0" + Math.round(x*255).toString(16)).slice(-2);
}).join('');
};
var threshold_arc = null;
var root_node = nodes[0];
function display_nodes() {
return_nodes = [];
node.selectAll("circle")
.style('fill', function(d) {
if (d.cluster==-1) {
return 'white';
} else {
if (d.children.length==0) {
counters[clusters.indexOf(d.cluster)]++;
}
var sat = 1;
if (!selected_clusters[clusters.indexOf(d.cluster)]) {