Commit f944eb14 authored by lafabregue's avatar lafabregue

integration of cluster constraints

parent a10fbd98
......@@ -29,7 +29,7 @@ public class CannotLinkConstraint extends Constraint {
@Override
public String toString() {
return Constraint.CANOT_LINK_TYPE+";"+firstIndex+";"+secondIndex;
return Constraint.CANNOT_LINK_TYPE+";"+firstIndex+";"+secondIndex;
}
/**
......@@ -49,4 +49,9 @@ public class CannotLinkConstraint extends Constraint {
public int getSecondIndex() {
return secondIndex;
}
@Override
public int getType() {
return Constraint.CANNOT_LINK_TYPE;
}
}
package jcl.clustering.constraints;
/**
* Class that represent the maximum cluster diameter constraint
* It specified the maximum diameter possible for a cluster
*
* @author Baptiste LAFABREGUE
*
*/
public class ClusterDiameterConstraint extends Constraint {
/** the maximum diameter possible for a cluster */
private final double diameter;
/**
* Constructor
*
* @param diameter
* the maximum cluster diameter possible
*/
public ClusterDiameterConstraint(double diameter) {
super();
this.diameter = diameter;
}
/**
* Return the maximum cluster diameter possible
*
* @return the maximum
*/
public double getDiameter() {
return diameter;
}
@Override
public String toString() {
return getType()+";"+diameter;
}
@Override
public int getType() {
return Constraint.CLUSTER_DIAMETER_TYPE;
}
}
......@@ -11,7 +11,9 @@ public abstract class Constraint {
/** All the types of possible constraints */
static final public int LABEL_TYPE = 1;
static final public int MUST_LINK_TYPE = 2;
static final public int CANOT_LINK_TYPE = 3;
static final public int CANNOT_LINK_TYPE = 3;
static final public int NB_CLUSTER_TYPE = 101;
static final public int CLUSTER_DIAMETER_TYPE = 102;
public Constraint() {
}
......@@ -19,8 +21,16 @@ public abstract class Constraint {
/**
* Retrieve the sting representation of the constraints.
* This method is used for export purpose, so it should respect the following pattern :
* <type id of constraints>;<index(es) of elements>;<other properties>
* <type id of constraints>;<index(es) of concerned elements>;<other properties>
*/
@Override
public abstract String toString();
public abstract int getType();
public boolean isPixelRelated() {
if (getType() > 100)
return false;
return true;
}
}
......@@ -34,4 +34,9 @@ public class LabelConstraint extends Constraint {
return Constraint.LABEL_TYPE+";"+index+";"+classID;
}
@Override
public int getType() {
return Constraint.LABEL_TYPE;
}
}
......@@ -49,4 +49,9 @@ public class MustLinkConstraint extends Constraint {
public int getSecondIndex() {
return secondIndex;
}
@Override
public int getType() {
return Constraint.MUST_LINK_TYPE;
}
}
package jcl.clustering.constraints;
/**
* Class that represent a number of cluster constraint
* It specified that the number of cluster should be in a specific range
*
* @author Baptiste LAFABREGUE
*
*/
public class NbClusterConstraint extends Constraint {
/** the maximum number of cluster possible boundry */
private final int minBoundry;
/** the minimum number of cluster possible boundry */
private final int maxBoundry;
/**
* Constructor
*
* @param minBoundry
* the minimum number of cluster possible
* @param maxBoundry
* the maximum number of cluster possible
*/
public NbClusterConstraint(int minBoundry, int maxBoundry) {
super();
this.minBoundry = minBoundry;
this.maxBoundry = maxBoundry;
}
/**
* Return the maximum number of cluster possible
*
* @return the maximum
*/
public int getMaxBoundry() {
return maxBoundry;
}
/**
* Return the minimum number of cluster possible
*
* @return the minimum
*/
public int getMinBoundry() {
return minBoundry;
}
@Override
public String toString() {
return getType()+";"+minBoundry+";"+maxBoundry;
}
@Override
public int getType() {
return Constraint.NB_CLUSTER_TYPE;
}
}
......@@ -1707,4 +1707,22 @@ public abstract class Data implements Cloneable, Serializable, Iterable<DataObje
}
return sampler.getWholeData(start, end);
}
/**
* Return an estimation of the maximum distance between any pair of
* elements in this set.
* To get a accurate measure you have to compute it.
*
* @return the maximum
*/
public abstract double getEstimatedMaxDistance();
/**
* Return an estimation of the minimum distance between any pair of
* elements in this set.
* To get a accurate measure you have to compute it.
*
* @return the minimum
*/
public abstract double getEstimatedMinDistance();
}
package jcl.data;
import java.util.Iterator;
import java.util.Random;
import jcl.clustering.ClusteringResult;
import jcl.data.attribute.Attribute;
......@@ -13,6 +14,7 @@ import jcl.data.attribute.AttributeNumerical;
import jcl.data.attribute.AttributeSequence;
import jcl.data.attribute.AttributeSymbolicCategorial;
import jcl.data.mask.Mask;
import jcl.utils.RandomizeTools;
import jcl.weights.GlobalWeights;
import jcl.weights.Weights;
......@@ -27,13 +29,16 @@ import jcl.weights.Weights;
*/
public class SimpleData extends Data {
/**
*
*/
private static final long serialVersionUID = 1L;
/** */
private static final long serialVersionUID = 1L;
/** The number ob objects in the source data */
protected int wholeDataNbObjects;
protected int wholeDataNbObjects;
/** the estimated distance extrema in the Data */
private double estimatedMaxDistance = 0;
private double estimatedMinDistance = 0;
private boolean extremaComputed = false;
/**
* <p>
......@@ -90,6 +95,10 @@ public class SimpleData extends Data {
*/
this.model = data.model;
this.estimatedMinDistance = data.getEstimatedMinDistance();
this.estimatedMaxDistance = data.getEstimatedMaxDistance();
this.extremaComputed = true;
setSampler(data.getSampler());
setMask(data.getMask());
}
......@@ -138,6 +147,10 @@ public class SimpleData extends Data {
}
*/
this.model = data.model;
this.estimatedMinDistance = data.getEstimatedMinDistance();
this.estimatedMaxDistance = data.getEstimatedMaxDistance();
this.extremaComputed = true;
setSampler(data.getSampler());
setMask(data.getMask());
......@@ -187,6 +200,16 @@ public class SimpleData extends Data {
} else {
this.knownResult = null;
}
this.estimatedMinDistance = data[0].getEstimatedMinDistance();
this.estimatedMaxDistance = data[0].getEstimatedMaxDistance();
for(int i = 1 ; i < data.length ; i++) {
if (this.estimatedMinDistance > data[i].getEstimatedMinDistance())
this.estimatedMinDistance = data[i].getEstimatedMinDistance();
if (this.estimatedMaxDistance < data[i].getEstimatedMaxDistance())
this.estimatedMaxDistance = data[i].getEstimatedMaxDistance();
}
this.extremaComputed = true;
this.calcStats();
}
......@@ -239,6 +262,10 @@ public class SimpleData extends Data {
this.setKnownResult(ClusteringResult.gerenerateDefaultClusteringResult(null, clusterMap,
new GlobalWeights(this), data.getKnownResult().getNbClusters(), this, new int[0]));
}
this.estimatedMinDistance = data.getEstimatedMinDistance();
this.estimatedMaxDistance = data.getEstimatedMaxDistance();
this.extremaComputed = true;
this.calcStats();
......@@ -520,7 +547,9 @@ public class SimpleData extends Data {
public void setDataObject(int index, DataObject data) {
this.currentView[index] = data;
data.setId(index);
// it might modify the extrema values
extremaComputed = false;
}
@Override
......@@ -533,6 +562,9 @@ public class SimpleData extends Data {
}
this.currentView[index] = data;
data.setId(index);
// it modifies the extrema values
extremaComputed = false;
}
@Override
......@@ -754,6 +786,52 @@ public class SimpleData extends Data {
return this.currentView[0];
}
@Override
public double getEstimatedMaxDistance() {
if(!extremaComputed) {
computeEstimatedExtrema();
}
return estimatedMaxDistance;
}
@Override
public double getEstimatedMinDistance() {
if(!extremaComputed) {
computeEstimatedExtrema();
}
return estimatedMinDistance;
}
private void computeEstimatedExtrema() {
extremaComputed = true;
if (currentView == null && currentView.length < 2)
return;
// we don't compute the distance on the overall elements in this Data but on a subset
int subSetSize = 300;
if (subSetSize > currentView.length)
subSetSize = currentView.length;
int[] indexes = RandomizeTools.getReservoirSamplingIndex(currentView.length, subSetSize);
estimatedMaxDistance = currentView[0].distance(currentView[1]);
estimatedMinDistance = currentView[0].distance(currentView[1]);
for (int i = 0 ; i < indexes.length ; i++) {
for (int j = 0 ; j < indexes.length ; j++) {
if(j != i) {
double distance = currentView[indexes[i]].distance(currentView[indexes[j]]);
if (distance < estimatedMinDistance) {
estimatedMinDistance = distance;
}
if (distance > estimatedMaxDistance) {
estimatedMaxDistance = distance;
}
}
}
}
}
private static final class StdDataIterator implements Iterator<DataObject> {
private int cursor;
private final int end;
......@@ -825,5 +903,4 @@ public class SimpleData extends Data {
throw new UnsupportedOperationException();
}
}
}
......@@ -24,7 +24,7 @@ import loci.formats.FormatException;
public class ImageSampler extends Sampler {
/** */
private static final long serialVersionUID = 1L;
private static final long serialVersionUID = 1L;
/** Height of the images */
private int imageHeight;
......@@ -33,19 +33,22 @@ public class ImageSampler extends Sampler {
private int imageWidth;
/** The offset between to element to pick */
private double offset = 1;
private double offset = 1;
/** The list of indexes used to build the sample */
private int[] selectedIndexes = null;
/** Object to read the image */
private Vector<StreamedImageReaderWrapper> readers = null;
private Vector<StreamedImageReaderWrapper> readers = null;
/** Allow to match the images with different geo references */
private Vector<double[]> geoTranslateVectors = null;
private Vector<double[]> geoTranslateVectors = null;
/** start coordinates */
private int startX = 0;
private int startY = 0;
private int startX = 0;
private int startY = 0;
private int[] mandatoriesIndexes = null;
private int[] mandatoriesIndexes = null;
/**
......@@ -177,7 +180,7 @@ public class ImageSampler extends Sampler {
@Override
public DataObject[] getDataObjects() {
if (mandatoriesIndexes.length > sizeByCount) {
if (mandatoriesIndexes != null && mandatoriesIndexes.length > sizeByCount) {
throw new IndexOutOfBoundsException("The number of mandatory pixels"
+ " is bigger than the expeted sample size");
}
......@@ -227,10 +230,12 @@ public class ImageSampler extends Sampler {
int x = 0;
int y = 0;
int channelCount = readers.get(0).getChannelCount();
selectedIndexes = new int[sizeByCount];
for(int i = 0; i < sizeByCount ; i++) {
x = ((int) index) % imageWidth;
y = ((int) index) / imageWidth;
selectedIndexes[i] = (int) index;
result[i] = new DataObject(channelCount);
try {
double [] pixel = readers.get(0).getPixel(x, y);
......@@ -266,11 +271,14 @@ public class ImageSampler extends Sampler {
int iterPos = 0;
Iterator<Integer> iter = mask.getIncludedIndexes();
double index = iter.next();
selectedIndexes = new int[sizeByCount];
for(int i = 0; i < sizeByCount ; i++) {
x = ((int) index) % imageWidth;
y = ((int) index) / imageWidth;
result[i] = new DataObject(channelCount);
selectedIndexes[i] = (int) index;
try {
double [] pixel = readers.get(0).getPixel(x, y);
for(int c = 0 ; c < readers.get(0).getChannelCount() ; c++) {
......@@ -299,11 +307,14 @@ public class ImageSampler extends Sampler {
int x = 0;
int y = 0;
int channelCount = readers.get(0).getChannelCount();
selectedIndexes = new int[sizeByCount];
for(int i = 0; i < sizeByCount ; i++) {
x = getStartX() + ((int) index) % imageWidth;
y = getStartY() + ((int) index) / imageWidth;
result[i] = new DataObject(1);
selectedIndexes[i] = x + y * imageWidth;
try {
if (geoTranslateVectors != null) {
// compute geo coordinate from first image
......@@ -360,11 +371,14 @@ public class ImageSampler extends Sampler {
int iterPos = 0;
Iterator<Integer> iter = mask.getIncludedIndexes();
double index = iter.next();
selectedIndexes = new int[sizeByCount];
for(int i = 0; i < sizeByCount ; i++) {
x = getStartX() + ((int) index) % imageWidth;
y = getStartY() + ((int) index) / imageWidth;
result[i] = new DataObject(1);
selectedIndexes[i] = x + y * imageWidth;
try {
if (geoTranslateVectors != null) {
// compute geo coordinate from first image
......@@ -966,4 +980,9 @@ public class ImageSampler extends Sampler {
Arrays.sort(mandatoriesIndexes);
}
@Override
public int[] getSampleIndexes() {
return selectedIndexes;
}
}
......@@ -186,4 +186,12 @@ public abstract class Sampler implements MemoryFlush, Cloneable, Serializable {
* the set of indexes
*/
abstract public void setMandatoriesIndex(int[] indexes);
/**
* Return the ordered list of indexes selected to compose the sample.
* Indexes match the whole data set indexes.
*
* @return the indexes
*/
abstract public int[] getSampleIndexes();
}
package jcl.utils;
public class RandomizeTools {
/**
* Create a set of sampled index based on the Reservoir algorithm,
* it implements the Algorithm R by Jeffrey Vitter.
*
* @param setSize
* the size of the original set
* @param sampleSize
* the size of the desired sample
*
* @return the index of the sample's elements
*/
public static int[] getReservoirSamplingIndex(int setSize, int sampleSize) {
int[] result = new int[sampleSize];
// Initialise with the sampleSize first elements
for(int i = 0 ; i < sampleSize ; i++) {
result[i] = i;
}
for(int i = sampleSize ; i < setSize ; i++) {
int assignedIndex = (int) (Math.random() * setSize);
if(assignedIndex < sampleSize) {
result[assignedIndex] = i;
}
}
return result;
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment