Commit c6edaa9a authored by lafabregue's avatar lafabregue

modification of samarah

parent 9a666520
......@@ -5,6 +5,7 @@ import jcl.data.DataObject;
import jcl.data.mask.IntArrayMask;
import jcl.data.mask.IntArrayMask.IntMaskIterator;
import jcl.data.mask.Mask;
import jcl.weights.Weights;
import jj2000.j2k.NotImplementedError;
public class LightCluster extends Cluster {
......@@ -48,6 +49,7 @@ public class LightCluster extends Cluster {
this.id = id;
this.clusterCenter = center;
this.cardinal = card;
this.weights = new Weights(center.getNbAttributes());
}
/**
......@@ -68,6 +70,8 @@ public class LightCluster extends Cluster {
this.result = result;
this.id = id;
this.clusterCenter = center;
this.cardinal = IntArrayMask.objectInMask(result.getClusterMap(), id);
this.weights = new Weights(center.getNbAttributes());
}
/**
......@@ -85,6 +89,8 @@ public class LightCluster extends Cluster {
public LightCluster(final ClusteringResult result, int id) {
this.result = result;
this.id = id;
this.cardinal = IntArrayMask.objectInMask(result.getClusterMap(), id);
this.weights = new Weights(result.getData().getOneDataObject().getNbAttributes());
}
/**
......@@ -101,6 +107,7 @@ public class LightCluster extends Cluster {
this.result = cluster.getResult();
this.id = cluster.getId();
this.clusterCenter = cluster.getCenter();
this.cardinal = cluster.cardinal;
}
......
package jcl.clustering.constraints;
import jcl.clustering.ClusteringResult;
/**
* Class that represent a cannot-link constraint.
* It specified that two objects shouldn't be assigned to the same class.
......@@ -54,4 +56,12 @@ public class CannotLinkConstraint extends Constraint {
public int getType() {
return Constraint.CANNOT_LINK_TYPE;
}
@Override
public double evaluate(ClusteringResult result) {
if(result.getClusterMap()[firstIndex] == result.getClusterMap()[secondIndex]) {
return 0;
}
return 1;
}
}
package jcl.clustering.constraints;
import jcl.clustering.ClusteringResult;
/**
* Class that represent the maximum cluster diameter constraint
* It specified the maximum diameter possible for a cluster
......@@ -42,4 +44,9 @@ public class ClusterDiameterConstraint extends Constraint {
return Constraint.CLUSTER_DIAMETER_TYPE;
}
@Override
public double evaluate(ClusteringResult result) {
return 0;
}
}
package jcl.clustering.constraints;
import jcl.clustering.ClusteringResult;
/**
* Class that represent a generic constraint.
*
......@@ -26,11 +28,33 @@ public abstract class Constraint {
@Override
public abstract String toString();
/**
* Return the type of the constraint
*
* @return the type of constraints (i.e. Constraint.LABEL_TYPE)
*/
public abstract int getType();
public boolean isPixelRelated() {
/**
* Evaluate a clustering result
*
* @param result
* the result to evaluate
* @return a value between 0 and 1,
* 1 being the result fully satisfy the constraint,
* -1 if it cannot be evaluate
*/
public abstract double evaluate(ClusteringResult result);
/**
* Indicates if the constraint is about internal or external cluster quality
*
* @return true if internal, false if external
*/
public boolean isInternal() {
if (getType() > 100)
return false;
return true;
}
}
package jcl.clustering.constraints;
import java.util.Vector;
import jcl.clustering.ClusteringResult;
/**
* Class that represent a label constraint.
* It indicates that an object should be assigned to a specified class.
......@@ -48,4 +52,55 @@ public class LabelConstraint extends Constraint {
return Constraint.LABEL_TYPE;
}
@Override
public double evaluate(ClusteringResult result) {
// this constraint cannot be evaluate individually
return -1;
}
/**
* Compute the Rand index between two set, the first set is composed
* of a clustering result, the second is constructed from a set of labelized indexes
* @param constraints
* the set of constraints to construct the second set
* @param weights
* the set of weights associated to the labels
* @param result
* the clustering result for the second set
* @return the quality evaluation, between 1 and 0
*/
public static double computeRandIndex(Vector<LabelConstraint> constraints,
Vector<Double> weights, ClusteringResult result) {
double totalWeights = 0.0;
for(Double d : weights) {
totalWeights += d;
}
return computeRandIndex(constraints, weights, totalWeights, result);
}
/**
* Compute the Rand index between two set, the first set is composed
* of a clustering result, the second is constructed from a set of labelized indexes
* @param constraints
* the set of constraints to construct the second set
* @param weights
* the set of weights associated to the labels
* @param totalWeights
* sum of the associated weights
* @param result
* the clustering result for the second set
* @return the quality evaluation, between 1 and 0
*/
public static double computeRandIndex(Vector<LabelConstraint> constraints,
Vector<Double> weights, double totalWeights, ClusteringResult result) {
double quality = 0.0;
// First, we construct the set from labels
// we group indexes by labels
return quality;
}
}
package jcl.clustering.constraints;
import jcl.clustering.ClusteringResult;
/**
* Class that represent a must-link constraint.
* It specified that two objects shouldn't be assigned to the same class.
......@@ -54,4 +56,12 @@ public class MustLinkConstraint extends Constraint {
public int getType() {
return Constraint.MUST_LINK_TYPE;
}
@Override
public double evaluate(ClusteringResult result) {
if(result.getClusterMap()[firstIndex] == result.getClusterMap()[secondIndex]) {
return 1;
}
return 0;
}
}
package jcl.clustering.constraints;
import jcl.clustering.ClusteringResult;
/**
* Class that represent a number of cluster constraint
* It specified that the number of cluster should be in a specific range
......@@ -57,4 +59,11 @@ public class NbClusterConstraint extends Constraint {
return Constraint.NB_CLUSTER_TYPE;
}
@Override
public double evaluate(ClusteringResult result) {
return (maxBoundry - minBoundry) / (
Math.abs(result.getNbClusters() - minBoundry) +
Math.abs(maxBoundry - result.getNbClusters()));
}
}
......@@ -38,7 +38,7 @@ public abstract class Data implements Cloneable, Serializable, Iterable<DataObje
private static final long serialVersionUID = 1L;
/** fichiers correspondant aux donnees */
protected Vector<String> dataFilesNames = null;
protected Vector<String> dataFilesNames = new Vector<String>();
// Description de la donnee et des attributs
/** ensemble des donnees dans la vue courante */
......@@ -96,11 +96,15 @@ public abstract class Data implements Cloneable, Serializable, Iterable<DataObje
/** a mask to filter the Data */
private Mask mask = null;
/** the ponderation weight of each constraints */
private Vector<Double> constraintsWeights = null;
/** types to define the source represented by the Data */
static public int DEFAULT_TYPE = 0;
static public int NOT_IMAGE_FILE_TYPE = 1;
static public int IMAGE_TYPE = 2;
/**
* <p>
......@@ -1761,5 +1765,37 @@ public abstract class Data implements Cloneable, Serializable, Iterable<DataObje
* @return the set of constraints
*/
public abstract Vector<Constraint> getConstraints();
/**
* Return the weights associated to the constraints
*
* @return the constraintsWeights, null if all constraints have the same weight
*/
public Vector<Double> getConstraintsWeights() {
return constraintsWeights;
}
/**
* Return the weight associate to a constraint
*
* @return the constraint weight, 1 is returned if no weight is set
*/
public double getConstraintWeight(int index) {
try {
return constraintsWeights.get(index);
} catch (Exception e) {
return 1;
}
}
/**
* Set the weights associated to the constraints
*
* @param constraintsWeights
* the constraints weights to set, null to disable weights
*/
public void setConstraintsWeights(Vector<Double> constraintsWeights) {
this.constraintsWeights = constraintsWeights;
}
}
......@@ -27,7 +27,7 @@ import jcl.weights.Weights;
/**
* <p>
* Basic implementation of the abstract class Data
* It is performent in access time but has a very high memory consumption
* It is performant in access time but has a very high memory consumption
* <p>
*
* @author Baptiste Lafabregue
......@@ -70,6 +70,7 @@ public class SimpleData extends Data {
for (int i = 0; i < data.getNbObjects(); i++) {
this.currentView.add((DataObject) data.getDataObject(i).clone());
}
this.wholeDataNbObjects = data.getNbObjects();
if (data.locked != null) {
this.locked = new boolean[data.locked.length];
......@@ -86,9 +87,11 @@ public class SimpleData extends Data {
}
this.inertia = data.inertia;
this.attributesNames = new String[data.attributesNames.length];
for (int i = 0; i < data.attributesNames.length; i++) {
this.attributesNames[i] = new String(data.attributesNames[i]);
if (attributesNames != null) {
this.attributesNames = new String[data.attributesNames.length];
for (int i = 0; i < data.attributesNames.length; i++) {
this.attributesNames[i] = new String(data.attributesNames[i]);
}
}
this.comments = new String(data.comments);
this.dataName = new String(data.dataName);
......@@ -123,6 +126,7 @@ public class SimpleData extends Data {
*/
public SimpleData(final Data data, List<DataObject> newView) {
this.currentView = newView;
this.wholeDataNbObjects = newView.size();
if (data.locked != null) {
this.locked = new boolean[data.locked.length];
......@@ -139,9 +143,11 @@ public class SimpleData extends Data {
}
this.inertia = data.inertia;
this.attributesNames = new String[data.attributesNames.length];
for (int i = 0; i < data.attributesNames.length; i++) {
this.attributesNames[i] = new String(data.attributesNames[i]);
if (data.attributesNames != null) {
this.attributesNames = new String[data.attributesNames.length];
for (int i = 0; i < data.attributesNames.length; i++) {
this.attributesNames[i] = new String(data.attributesNames[i]);
}
}
this.comments = new String(data.comments);
this.dataName = new String(data.dataName);
......@@ -176,6 +182,7 @@ public class SimpleData extends Data {
nbObjects += data[i].getNbObjects();
}
this.currentView = new ArrayList<DataObject>(nbObjects);
this.wholeDataNbObjects = nbObjects;
for (int i = 0; i < data.length; i++) {
for (int j = 0; j < data[i].getNbObjects(); j++) {
......@@ -237,6 +244,7 @@ public class SimpleData extends Data {
} else {
data.locked = null;
}
this.wholeDataNbObjects = currentViewSize;
final boolean done[] = new boolean[data.getNbObjects()];
for (int i = 0; i < done.length; i++) {
......@@ -293,6 +301,7 @@ public class SimpleData extends Data {
nbAttributes++;
}
}
this.wholeDataNbObjects = data.getWholeDataNbObjects();
this.currentView = new ArrayList<DataObject>(data.getNbObjects());
for (int i = 0; i < data.getNbObjects(); i++) {
this.currentView.add(new DataObject(nbAttributes));
......@@ -343,6 +352,7 @@ public class SimpleData extends Data {
*/
public SimpleData(final List<DataObject> data) {
this.currentView = data;
this.wholeDataNbObjects = data.size();
}
/**
......@@ -353,6 +363,7 @@ public class SimpleData extends Data {
public SimpleData(final List<DataObject> data,Model model) {
this.currentView = data;
this.model = model;
this.wholeDataNbObjects = data.size();
}
/**
......@@ -366,6 +377,7 @@ public class SimpleData extends Data {
this.currentView = data;
this.knownResult = ClusteringResult.gerenerateDefaultClusteringResult(null, knownResult,
new GlobalWeights(this), nbClusters, this, null);
this.wholeDataNbObjects = data.size();
}
/**
......@@ -379,6 +391,7 @@ public class SimpleData extends Data {
this.knownResult = ClusteringResult.gerenerateDefaultClusteringResult(null, knownResult,
new GlobalWeights(this), nbClusters, this, null);
this.model = model;
this.wholeDataNbObjects = data.size();
}
/**
......@@ -389,6 +402,7 @@ public class SimpleData extends Data {
public SimpleData(final int nbObjects) {
this.currentView = new ArrayList<DataObject>(nbObjects);
this.locked = new boolean[nbObjects];
this.wholeDataNbObjects = nbObjects;
}
/**
......@@ -400,6 +414,7 @@ public class SimpleData extends Data {
this.currentView = new ArrayList<DataObject>(nbObjects);
this.locked = new boolean[nbObjects];
this.model = model;
this.wholeDataNbObjects = nbObjects;
}
@Override
......@@ -410,10 +425,10 @@ public class SimpleData extends Data {
@Override
public Data getMaskedData(Mask mask, boolean fromSample) {
SimpleData data = null;
if (fromSample & getSampler() != null) {
if(mask.getOverallCarinality() != getSampler().getSizeByCount()) {
throw new IndexOutOfBoundsException("The Mask does not match the underlying Data");
}
if (fromSample || getSampler() != null) {
// if(mask.getOverallCarinality() != getSampler().getSizeByCount()) {
// throw new IndexOutOfBoundsException("The Mask does not match the underlying Data");
// }
data = new SimpleData(this, getDataObjects(mask));
} else {
if(mask.getOverallCarinality() != this.wholeDataNbObjects) {
......
......@@ -80,7 +80,7 @@ public class AttributeMultiDimSequence extends Attribute {
/**
* ces attributs et la methode distopt sont déclarés static pour des raisons d'optimisation (on évite des réallocations de mémoire à chaque appel)
*/
private final static int MAX_SEQ_LENGTH = 100;
private final static int MAX_SEQ_LENGTH = 300;
private static double[][][] matriceW = new double[NB_THREADS][AttributeMultiDimSequence.MAX_SEQ_LENGTH][AttributeMultiDimSequence.MAX_SEQ_LENGTH];
......
package jcl.data.mask;
import java.util.Iterator;
public class DummyMask implements Mask {
private int cardinality;
public DummyMask(int cardinality) {
this.cardinality = cardinality;
}
@Override
public boolean isMasked(int index) {
return false;
}
@Override
public Iterator<Integer> getIncludedIndexes() {
return new DummyMaskIterator(cardinality);
}
@Override
public Iterator<Integer> getIncludedIndexes(int start, int end) {
return new DummyMaskIterator(start, end);
}
@Override
public int getCarinality() {
return this.cardinality;
}
@Override
public int getOverallCarinality() {
return this.cardinality;
}
@Override
public int getLastIndex(int start, int end) {
return end;
}
@Override
public int getCardinalityInInterval(int start, int end) {
return end - start;
}
public static final class DummyMaskIterator implements Iterator<Integer> {
private int cursor;
private final int end;
public DummyMaskIterator(final int cardinality) {
this.cursor = 0;
this.end = cardinality;
}
public DummyMaskIterator(final int start, final int end) {
this.cursor = start;
this.end = end;
}
public boolean hasNext() {
return this.cursor < end;
}
public Integer next() {
int next = -1;
if(cursor <= end) {
next = cursor;
cursor++;
}
return next;
}
}
}
......@@ -508,8 +508,8 @@ public class ImageSampler extends Sampler {
} else {
index = getIndex(i);
}
x = getStartX() + ((int) index) % (imageWidth+1);
y = getStartY() + ((int) index) / (imageWidth+1);
x = getStartX() + ((int) index) % imageWidth;
y = getStartY() + ((int) index) / imageWidth;
result.add(new DataObject(1));
List<double[]> values = new ArrayList<double[]>();
......
package jcl.evaluation.clustering;
import java.util.Iterator;
import java.util.Vector;
import jcl.Classification;
import jcl.clustering.Cluster;
import jcl.clustering.ClusteringResult;
import jcl.clustering.constraints.Constraint;
import jcl.clustering.constraints.LabelConstraint;
import jcl.data.Data;
import jcl.data.DataObject;
import jcl.evaluation.QualityIndex;
import jcl.utils.MathTools;
import jcl.weights.Weights;
/**
......@@ -64,12 +68,15 @@ public class ClusteringEvaluation {
/** Constante pour le critere de Jarque-Bera */
public static final int JARQUE_BERA = 15;
/** Constante pour le critere de Jarque-Bera */
public static final int BACKGROUND_KNOWLEDGE = 16;
/** Ensemble des noms des criteres de qualite */
public static final String NAME[] = { "WG", "Square error", "Compactness",
"Fuzzy square error", "Predictivity", "Log-Likelihood Sum", "XB",
"Fuzzy XB", "Dunn", "Bezdek", "DB", "PE", "Modified Hubert",
"Tradeoff", "Rand from knowledge", "Jarque-Bera" };
"Tradeoff", "Rand from knowledge", "Jarque-Bera", "Background Knowledge"};
/**
* <p>
......@@ -164,6 +171,10 @@ public class ClusteringEvaluation {
quality = ClusteringEvaluation.getJarqueBera(clusteringResult);
optimization = QualityIndex.MIN;
break;
case ClusteringEvaluation.BACKGROUND_KNOWLEDGE:
quality = ClusteringEvaluation.getBackgroundKnowledge(clusteringResult, data);
optimization = QualityIndex.MAX;
break;
}
return new QualityIndex(name, quality, optimization);
......@@ -925,6 +936,60 @@ public class ClusteringEvaluation {
return 1 - compare.quality[ResultsComparison.RAND];
}
/**
* Compute an index that denote the quality of the clustering regarding
* the set of constraints associated to the data
*
* @param clusteringResult
* the clustering result to evaluate
* @param data
* the associated data
* @return the index value between 0 and 1, 1 being all constraints are fulfilled
*/
private static double getBackgroundKnowledge(ClusteringResult clusteringResult, Data data) {
double labelGlobalWeight = 0.0;
double totalWeight = 0.0;
double qualityValue = 0.0;
Vector<LabelConstraint> labelConstraints = new Vector<LabelConstraint>();
Vector<Double> labelConstraintsWeights = new Vector<Double>();
for (int i = 0 ; i < data.getConstraints().size() ; i++) {
// Label constraints are evaluated separately because they are
// used are evaluated as a similarity between and label classification
// we use the Rand index
if (data.getConstraints().get(i) instanceof LabelConstraint) {
labelConstraints.add((LabelConstraint) data.getConstraints().get(i));
labelConstraintsWeights.add(data.getConstraintWeight(i));
labelGlobalWeight += data.getConstraintWeight(i);
} else {
// other constraints are evaluated normally
// we ignore if the constraints can't be evaluated (negative value)
if (qualityValue >= 0) {
totalWeight += data.getConstraintWeight(i);
qualityValue += data.getConstraints().get(i).evaluate(clusteringResult);
}
}
}
// compute Rand index for label constraints
//labelGlobalWeight +=
qualityValue /= totalWeight;
// we reajust the index to ignore the first 0.3
if (qualityValue < 0.3) {
qualityValue = 0.0;
}
qualityValue -= 0.3;
qualityValue = qualityValue / 0.7 ;
return qualityValue;
}
/**
* <p>
* Calcul de l'indice .
......@@ -1027,24 +1092,45 @@ public class ClusteringEvaluation {
final ClusteringResult clusteringResult, final int nInf,
final int nSup) {
double qe = 1.0;
final double inM = ((((double) (nSup - nInf)) / 2) + nInf);
final double inTaille = (nSup - nInf) / 2.0;
final double inM = inTaille + nInf;
if (nInf > 0 || nSup > 0) {
final int inTaille = nSup - nInf;
int diff = 0;
// old version
// int diff = 0;
// if ((nInf < 0) || (nSup < 0)) {
// qe = 1;
// } else if (inTaille == 0) {
// diff = Math.abs(clusteringResult.getNbClusters() - nInf);
// qe = 1 - 0.10 * diff;
// } else {
// diff = (int) Math.abs(clusteringResult.getNbClusters() - inM);
// if (diff <= inTaille / 2) {
// qe = 1 - 0.05 * diff;
// } else {
// qe = 1 - 0.05 * (inTaille / 2);
// diff = diff - (inTaille / 2);
// qe = qe - 0.1 * diff;
// }
// }
double diff = 0;
if ((nInf < 0) || (nSup < 0)) {
qe = 1;
} else if (inTaille == 0) {
diff = Math.abs(clusteringResult.getNbClusters() - nInf);
qe = 1 - 0.10 * diff;
qe = 1.0;
} else {
diff = (int) Math.abs(clusteringResult.getNbClusters() - inM);
if (diff <= inTaille / 2) {
qe = 1 - 0.05 * diff;
diff = Math.abs(clusteringResult.getNbClusters() - inM);
if (diff <= inTaille) {
qe = 1;
} else {
qe = 1 - 0.05 * (inTaille / 2);
diff = diff - (inTaille / 2);
qe = qe - 0.1 * diff;
diff = diff - inTaille;
double variance = inTaille;
if (variance == 0) {
variance = nInf * 0.05;
}
// we want a probability under 10% when out of variance
double z = diff/variance/2;
if (z < 0)
z = 1.0;
qe = 1 - MathTools.normalApproximation(z);
}
}
}
......@@ -1148,4 +1234,6 @@ public class ClusteringEvaluation {
return qualities;
}
}
package jcl.io.results;
import java.io.File;
import java.io.IOException;
import java.io.PrintWriter;
import jcl.Classification;
import jcl.clustering.ClusteringResult;
import jcl.io.arff.DataFileWriter;
/**
* <p>
* Description : An instance of this class allows to write a result (from a
* classification ) in a csv file.
* </p>
*
* @author Baptiste LAFABREGUE
*
*/
public class CSVResultWriter extends DataFileWriter {