Commit 4792ecc9 authored by balanche's avatar balanche

Ajout de la gestion de moyennes dans le DistanceModel

parent a7fef5f8
...@@ -33,9 +33,11 @@ import jcl.utils.exceptions.MethodNotImplementedException; ...@@ -33,9 +33,11 @@ import jcl.utils.exceptions.MethodNotImplementedException;
* hybride (methode Samarah). * hybride (methode Samarah).
* </p> * </p>
* *
* Implementation progressable avec liste de sous-processus progressables à suivre par Jean-Noël Balanche
*
* @author WEMMERT Cedric * @author WEMMERT Cedric
* *
* Implementation progressable avec liste de sous-processus progressable à suivre par Jean-Noël Balanche *
*/ */
public abstract class Classification extends Observable implements public abstract class Classification extends Observable implements
......
This diff is collapsed.
...@@ -868,7 +868,7 @@ public class AttributeSequence extends Attribute { ...@@ -868,7 +868,7 @@ public class AttributeSequence extends Attribute {
} else { } else {
return 2; return 2;
} }
} }
} }
public static int getMode() { public static int getMode() {
......
package jcl.data.distance.average; package jcl.data.distance.average;
public interface Average { import jcl.data.attribute.Attribute;
/**
} * Generic Average interface
* @author Jean-Noël Balanche
*
* @param <A> Attribute
* @param <P> Parameters
*/
public interface Average <A extends Attribute, P extends AverageParameter> {
public abstract A mean(Attribute[] attributesTab, A oldCenter, P p);
}
\ No newline at end of file
package jcl.data.distance.average;
import jcl.data.attribute.Attribute;
import jcl.data.attribute.AttributeNumerical;
/**
* Class implementing the averaging method to use for numerical attributes
* @author Jean-Noël Balanche
*
*/
public class AverageEuclidean implements Average<AttributeNumerical,EmptyAverageParameter>{
/**
* Singleton
*/
private static AverageEuclidean instance = new AverageEuclidean();
/**
* Empty constructor
*/
private AverageEuclidean() {}
/**
* To get the singleton
* @return the unique instance
*/
public static AverageEuclidean getInstance() {
return instance;
}
@Override
public AttributeNumerical mean(Attribute[] attributesTab, AttributeNumerical oldCenter,
EmptyAverageParameter p) {
double sum = 0;
double card = 0;
for (Attribute a : attributesTab) {
AttributeNumerical a1 = (AttributeNumerical)a;
sum+=a1.getValue();
card++;
}
return new AttributeNumerical(sum/card);
}
}
package jcl.data.distance.average;
public interface AverageParameter {}
package jcl.data.distance.average;
/**
* Empty parameter used by for euclidean means
* @author balanche
*
*/
public class EmptyAverageParameter implements AverageParameter {
private static EmptyAverageParameter instance = new EmptyAverageParameter();
private EmptyAverageParameter(){}
public static EmptyAverageParameter getInstance(){
return instance;
}
}
package jcl.data.distance.average;
/**
* Parameters used by DBA mean
* @author Jean-Noël Balanche
*
*/
public class ParameterDBAMean implements AverageParameter{
public final static int RIEN = -1;
public final static int DIAGONALE = 0;
public final static int GAUCHE = 1;
public final static int HAUT = 2;
private int nbIterations = 15;
private double[][] matriceW;
/**
* Matrice stockant la meilleure direction dans chaque case de la matrice
*/
private int[][] matriceChoix;
/**
* Stocke pour chaque point de la matrice, la longueur du chemin optimal à partir de la case correspondante
*/
private int[][] optimalPathLength ;
/**
* Stocke l'éloignement cumulé du chemin par rapport à la diagonale. Utile pour DTWDistanceToDiagonal
*/
private double[][] distanceToDiagonalMatrix ;
public ParameterDBAMean(int maxSeqLength, int nbIterations) {
this.setMatriceW(new double[maxSeqLength][maxSeqLength]);
this.setMatriceChoix(new int[maxSeqLength][maxSeqLength]);
this.setOptimalPathLength(new int[maxSeqLength][maxSeqLength]);
this.setDistanceToDiagonalMatrix(new double[maxSeqLength][maxSeqLength]);
this.nbIterations = nbIterations;
}
public int getNbIterations() {
return nbIterations;
}
public void setNbIterations(int nbIterations) {
this.nbIterations = nbIterations;
}
/**
* @return the matriceChoix
*/
public int[][] getMatriceChoix() {
return matriceChoix;
}
/**
* @param matriceChoix the matriceChoix to set
*/
public void setMatriceChoix(int[][] matriceChoix) {
this.matriceChoix = matriceChoix;
}
/**
* @return the optimalPathLength
*/
public int[][] getOptimalPathLength() {
return optimalPathLength;
}
/**
* @param optimalPathLength the optimalPathLength to set
*/
public void setOptimalPathLength(int[][] optimalPathLength) {
this.optimalPathLength = optimalPathLength;
}
/**
* @return the distanceToDiagonalMatrix
*/
public double[][] getDistanceToDiagonalMatrix() {
return distanceToDiagonalMatrix;
}
/**
* @param distanceToDiagonalMatrix the distanceToDiagonalMatrix to set
*/
public void setDistanceToDiagonalMatrix(double[][] distanceToDiagonalMatrix) {
this.distanceToDiagonalMatrix = distanceToDiagonalMatrix;
}
/**
* @return the matriceW
*/
public double[][] getMatriceW() {
return matriceW;
}
/**
* @param matriceW the matriceW to set
*/
public void setMatriceW(double[][] matriceW) {
this.matriceW = matriceW;
}
}
package jcl.data.distance.average.sequential;
import static jcl.data.distance.Tools.distanceTo;
import jcl.data.attribute.Attribute;
import jcl.data.attribute.AttributeMultiDimSequence;
import jcl.data.distance.average.Average;
import jcl.data.distance.average.ParameterDBAMean;
import jcl.data.sequence.DoubleTabArrayList;
/**
* Class implementing the averaging method to use for sequential attributes.
* @author Jean-Noël Balanche
*
*/
public class AverageMDDBAMean implements Average<AttributeMultiDimSequence,ParameterDBAMean> {
/**
* Singleton
*/
private static AverageMDDBAMean instance = new AverageMDDBAMean();
/**
* Empty constructor
*/
private AverageMDDBAMean(){}
/**
* To get the singleton
* @return the unique instance
*/
public static AverageMDDBAMean getInstance(){
return instance;
}
@Override
public AttributeMultiDimSequence mean(Attribute[] attributesTab,
AttributeMultiDimSequence oldCenter, ParameterDBAMean p) {
AttributeMultiDimSequence res = null;
if (oldCenter != null) {
res = DBAMean(attributesTab, oldCenter, p);
} else {
int alea = (int) (Math.round(Math.random() * (attributesTab.length - 1)));
res = DBAMean(attributesTab, (AttributeMultiDimSequence)attributesTab[alea], p);
}
for (int i = 0; i < p.getNbIterations(); i++) {
res = DBAMean(attributesTab, res, p);
}
return res;
}
/**
* L'idée de cette méthode est de calculer la moyenne d'un ensemble de séquences, comme la mise-à-jour de l'ancienne moyenne, en calculant le
* barycentre des points ayant été associé à chaque point de cette dernière.
*
* @param oldCenter ancien centre du cluster
* @param attributesTab séquences associées au cluster
* @param p paramètres utilisés pour le calcul
* @return le nouveau centre, comme l'approximation des séquences
*/
public AttributeMultiDimSequence DBAMean(Attribute[] attributesTab,
AttributeMultiDimSequence oldCenter, ParameterDBAMean p) {
/**
* Cette liste contiendra la liste des tuples associés à chaque point de l'ancien centre
*/
final DoubleTabArrayList[] tupleAssociation = new DoubleTabArrayList[oldCenter.getNbTuples()];
for (int i = 0; i < tupleAssociation.length; i++) {
tupleAssociation[i] = new DoubleTabArrayList(attributesTab.length);
}
int nbTuplesAverageSeq, i, j, indiceRes;
double res = 0.0;
final int tailleCenter = oldCenter.getNbTuples();
int tailleT;
/**
* on construit les associations
*/
for (final Attribute TC : attributesTab) {
final AttributeMultiDimSequence T = (AttributeMultiDimSequence) TC;
if(T == null) {
break;
}
tailleT = T.getNbTuples();
// Remplissage première colonne et première ligne de la
// matrice
p.getMatriceW()[0][0] = distanceTo(oldCenter.sequence[0], T.sequence[0]);
p.getMatriceChoix()[0][0] = ParameterDBAMean.RIEN;
p.getOptimalPathLength()[0][0] = 0;
for (i = 1; i < tailleCenter; i++) {
p.getMatriceW()[i][0] = p.getMatriceW()[i - 1][0] + distanceTo(oldCenter.sequence[i], T.sequence[0]);
p.getMatriceChoix()[i][0] = ParameterDBAMean.HAUT;
p.getOptimalPathLength()[i][0] = i;
}
for (j = 1; j < tailleT; j++) {
p.getMatriceW()[0][j] = p.getMatriceW()[0][j - 1] + distanceTo(T.sequence[j], oldCenter.sequence[0]);
p.getMatriceChoix()[0][j] = ParameterDBAMean.GAUCHE;
p.getOptimalPathLength()[0][j] = j;
}
// Calcul de la matrice
for (i = 1; i < tailleCenter; i++) {
for (j = 1; j < tailleT; j++) {
indiceRes = AttributeMultiDimSequence.ArgMin3(p.getMatriceW()[i - 1][j - 1], p.getMatriceW()[i][j - 1], p.getMatriceW()[i - 1][j]);
p.getMatriceChoix()[i][j] = indiceRes;
switch (indiceRes) {
case ParameterDBAMean.DIAGONALE:
res = p.getMatriceW()[i - 1][j - 1];
p.getOptimalPathLength()[i][j] = p.getOptimalPathLength()[i - 1][j - 1] + 1;
break;
case ParameterDBAMean.GAUCHE:
res = p.getMatriceW()[i][j - 1];
p.getOptimalPathLength()[i][j] = p.getOptimalPathLength()[i][j - 1] + 1;
break;
case ParameterDBAMean.HAUT:
res = p.getMatriceW()[i - 1][j];
p.getOptimalPathLength()[i][j] = p.getOptimalPathLength()[i - 1][j] + 1;
break;
}
p.getMatriceW()[i][j] = res + distanceTo(oldCenter.sequence[i], T.sequence[j]);
}
}
/*
* +1 car la case contient le nb de 'flêches' dans la matrice, il faut ajouter 1 pour le nb d'éléments
*/
nbTuplesAverageSeq = p.getOptimalPathLength()[tailleCenter - 1][tailleT - 1] + 1;
i = tailleCenter - 1;
j = tailleT - 1;
for (int t = nbTuplesAverageSeq - 1; t >= 0; t--) {
tupleAssociation[i].add(T.sequence[j]);
switch (p.getMatriceChoix()[i][j]) {
case ParameterDBAMean.DIAGONALE:
i = i - 1;
j = j - 1;
break;
case ParameterDBAMean.GAUCHE:
j = j - 1;
break;
case ParameterDBAMean.HAUT:
i = i - 1;
break;
}
}
}
final double[][] tuplesAverageSeq = new double[tailleCenter][oldCenter.getTupleDimension()];
for (int t = 0; t < tailleCenter; t++) {
tuplesAverageSeq[t] = moyenne(tupleAssociation[t].toArray());
}
final AttributeMultiDimSequence newCenter = new AttributeMultiDimSequence(tuplesAverageSeq);
return newCenter;
}
public static double[] moyenne(final double[]... tab) {
double[] res = new double[tab[0].length];
for (int j = 0; j < tab[0].length; j++) {
res[j] = 0.0;
}
for (int i = 0; i < tab.length; i++) {
for (int j = 0; j < tab[0].length; j++) {
res[j] += tab[i][j];
}
}
for (int j = 0; j < tab[0].length; j++) {
res[j] /= tab.length;
}
return res;
}
}
package jcl.data.distance.average.sequential;
import java.util.Arrays;
import jcl.data.attribute.Attribute;
import jcl.data.attribute.AttributeMultiDimSequence;
import jcl.data.distance.average.Average;
import jcl.data.distance.average.EmptyAverageParameter;
/**
* Class implementing the euclidean mean for sequential attributes
* @author Jean-Noël Balanche
*
*/
public class AverageMDeuclideanMean implements Average<AttributeMultiDimSequence,EmptyAverageParameter> {
/**
* Singleton
*/
private static AverageMDeuclideanMean instance = new AverageMDeuclideanMean();
/**
* Empty constructor
*/
private AverageMDeuclideanMean(){}
/**
* To get the singleton
* @return the unique instance
*/
public static AverageMDeuclideanMean getInstance(){
return instance;
}
@Override
public AttributeMultiDimSequence mean(Attribute[] attributesTab,
AttributeMultiDimSequence oldCenter, EmptyAverageParameter p) {
final double[][][] data = new double[attributesTab.length][][];
for (int i = 0; i < attributesTab.length; i++) {
AttributeMultiDimSequence a = (AttributeMultiDimSequence)attributesTab[i];
data[i] = a.getSequence();
}
final int nbTuples = data[0].length;
final int nbDim = data[0][0].length;
final double[][] tuplesAverageSeq = new double[nbTuples][nbDim];
for (int i = 0; i < nbTuples; i++) {
Arrays.fill(tuplesAverageSeq[i], 0.0);
}
for (int i = 0; i < data.length; i++) {
for (int j = 0; j < nbTuples; j++) {
for (int k = 0; k < nbDim; k++) {
tuplesAverageSeq[j][k] += data[i][j][k];
}
}
}
for (int j = 0; j < nbTuples; j++) {
for (int k = 0; k < nbDim; k++) {
tuplesAverageSeq[j][k] /= data.length;
}
}
return (new AttributeMultiDimSequence(tuplesAverageSeq));
}
}
...@@ -4,6 +4,8 @@ import java.io.Serializable; ...@@ -4,6 +4,8 @@ import java.io.Serializable;
import jcl.data.Data; import jcl.data.Data;
import jcl.data.DataObject; import jcl.data.DataObject;
import jcl.data.DistanceModel;
import jcl.data.distance.average.AverageParameter;
import jcl.weights.Weights; import jcl.weights.Weights;
/** /**
...@@ -104,8 +106,10 @@ public abstract class KmeansSeed implements Serializable { ...@@ -104,8 +106,10 @@ public abstract class KmeansSeed implements Serializable {
* <p> * <p>
* Redefinition du centre de la classe (barycentre des objets). * Redefinition du centre de la classe (barycentre des objets).
* </p> * </p>
* @param model model used for the classification.
* @param averageParameters averaging parameters
*/ */
public abstract void redefine(); public abstract void redefine(DistanceModel model, AverageParameter[] averageParameters);
/** /**
* Return the related Data * Return the related Data
......
...@@ -329,13 +329,11 @@ public class LearningResultKmeans extends LearningResult { ...@@ -329,13 +329,11 @@ public class LearningResultKmeans extends LearningResult {
if (onSample) { if (onSample) {
tabThreads[th] = new ThreadedAffectation(clusterMap, tabThreads[th] = new ThreadedAffectation(clusterMap,
data.iterator(th * nbObjectsPerThread, (th + 1) * nbObjectsPerThread ), data.iterator(th * nbObjectsPerThread, (th + 1) * nbObjectsPerThread ),
// data.getModel(), params.parameters[th],
params.getModel(), params.parameters[th], params.getModel(), params.parameters[th],
th * nbObjectsPerThread); th * nbObjectsPerThread);
} else { } else {
tabThreads[th] = new ThreadedAffectation(clusterMap, tabThreads[th] = new ThreadedAffectation(clusterMap,
data.getWholeSourceDataObjects(th * nbObjectsPerThread, (th + 1) * nbObjectsPerThread ), data.getWholeSourceDataObjects(th * nbObjectsPerThread, (th + 1) * nbObjectsPerThread ),
// data.getModel(), params.parameters[th],
params.getModel(), params.parameters[th], params.getModel(), params.parameters[th],
th * nbObjectsPerThread); th * nbObjectsPerThread);
} }
...@@ -344,13 +342,11 @@ public class LearningResultKmeans extends LearningResult { ...@@ -344,13 +342,11 @@ public class LearningResultKmeans extends LearningResult {
if (onSample) { if (onSample) {
tabThreads[nbThreads - 1] = new ThreadedAffectation(clusterMap, tabThreads[nbThreads - 1] = new ThreadedAffectation(clusterMap,
data.iterator((nbThreads - 1) * nbObjectsPerThread, nbObjects ), data.iterator((nbThreads - 1) * nbObjectsPerThread, nbObjects ),
// data.getModel(), params.parameters[nbThreads - 1],
params.getModel(), params.parameters[nbThreads - 1], params.getModel(), params.parameters[nbThreads - 1],
(nbThreads - 1) * nbObjectsPerThread); (nbThreads - 1) * nbObjectsPerThread);
} else { } else {
tabThreads[nbThreads - 1] = new ThreadedAffectation(clusterMap, tabThreads[nbThreads - 1] = new ThreadedAffectation(clusterMap,
data.getWholeSourceDataObjects((nbThreads - 1) * nbObjectsPerThread, nbObjects), data.getWholeSourceDataObjects((nbThreads - 1) * nbObjectsPerThread, nbObjects),
// data.getModel(), params.parameters[nbThreads - 1],
params.getModel(), params.parameters[nbThreads - 1], params.getModel(), params.parameters[nbThreads - 1],
(nbThreads - 1) * nbObjectsPerThread); (nbThreads - 1) * nbObjectsPerThread);
} }
...@@ -862,7 +858,7 @@ public class LearningResultKmeans extends LearningResult { ...@@ -862,7 +858,7 @@ public class LearningResultKmeans extends LearningResult {
incProgress(); incProgress();
i++; i++;
} }
System.out.println("Fin thread progressM="+progressM+" progress="+progress+" i="+i); // System.out.println("Fin thread progressM="+progressM+" progress="+progress+" i="+i);
} }
public int getThreadGlobalDistance() { public int getThreadGlobalDistance() {
...@@ -877,8 +873,9 @@ public class LearningResultKmeans extends LearningResult { ...@@ -877,8 +873,9 @@ public class LearningResultKmeans extends LearningResult {
public void run() { public void run() {
KmeansSeed s = nextSeed(); KmeansSeed s = nextSeed();
ParametersKmeans paramsk = (ParametersKmeans) params;
while (s != null) { while (s != null) {
s.redefine(); s.redefine(paramsk.getModel(),paramsk.getAverageParameters());
s = nextSeed(); s = nextSeed();
} }
} }
......
...@@ -5,6 +5,7 @@ import java.util.Vector; ...@@ -5,6 +5,7 @@ import java.util.Vector;
import jcl.data.DataObject; import jcl.data.DataObject;
import jcl.data.distance.Distance; import jcl.data.distance.Distance;
import jcl.data.distance.DistanceParameter; import jcl.data.distance.DistanceParameter;
import jcl.data.distance.average.AverageParameter;
import jcl.learning.LearningMethod; import jcl.learning.LearningMethod;
import jcl.learning.LearningParameters; import jcl.learning.LearningParameters;
import jcl.weights.ClassificationWeights; import jcl.weights.ClassificationWeights;
...@@ -64,6 +65,13 @@ public class ParametersKmeans extends LearningParameters { ...@@ -64,6 +65,13 @@ public class ParametersKmeans extends LearningParameters {
* (see the {@link Distance} framework) * (see the {@link Distance} framework)
*/ */
public DistanceParameter[][] parameters=null; public DistanceParameter[][] parameters=null;
/**
* Parameters used for averaging seeds.
* Dimension 1: #attributes.
* (see the {@link Average} framework)
*/
public AverageParameter[] averageParameters = null;
/** /**
...@@ -103,7 +111,7 @@ public class ParametersKmeans extends LearningParameters { ...@@ -103,7 +111,7 @@ public class ParametersKmeans extends LearningParameters {
} }
public ParametersKmeans(final int nbClusters, final int nbIters, int nbThreads, public ParametersKmeans(final int nbClusters, final int nbIters, int nbThreads,
final ClassificationWeights weights,DistanceParameter[][] parameters) { final ClassificationWeights weights,DistanceParameter[][] parameters, AverageParameter[] averageParameters) {
super(weights); super(weights);
this.nbClusters = nbClusters; this.nbClusters = nbClusters;
this.nbIters = nbIters; this.nbIters = nbIters;
...@@ -111,6 +119,7 @@ public class ParametersKmeans extends LearningParameters { ...@@ -111,6 +119,7 @@ public class ParametersKmeans extends LearningParameters {
this.expBeta = 1. / (this.beta - 1); this.expBeta = 1. / (this.beta - 1);
this.nbThreads = nbThreads; this.nbThreads = nbThreads;
this.parameters = parameters; this.parameters = parameters;
this.averageParameters = averageParameters;
} }
...@@ -295,4 +304,8 @@ public class ParametersKmeans extends LearningParameters { ...@@ -295,4 +304,8 @@ public class ParametersKmeans extends LearningParameters {
public int getNbThreads() { public int getNbThreads() {
return nbThreads; return nbThreads;
} }
public AverageParameter[] getAverageParameters() {
return averageParameters;
}
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment