Commit aa6c4d76 authored by lafabregue's avatar lafabregue
Browse files

Add Import of clustering from KMeans

parent 6443dc91
......@@ -36,6 +36,9 @@ import jcl.utils.exceptions.MethodNotImplementedException;
public abstract class Classification extends Observable implements
Serializable, Cloneable {
/** */
private static final long serialVersionUID = 1L;
/** savoir si le calcul deporte est active */
transient private static boolean use_Rmi = false;
......
......@@ -3,6 +3,7 @@ package jcl.learning;
import java.io.Serializable;
import java.util.Vector;
import jcl.data.DataObject;
import jcl.weights.ClassificationWeights;
/**
......@@ -14,11 +15,16 @@ import jcl.weights.ClassificationWeights;
*/
public abstract class LearningParameters implements Serializable {
/** */
private static final long serialVersionUID = 1L;
/** ponderation des attributs */
public ClassificationWeights weights = null;
public ClassificationWeights weights = null;
// indique si on utilise ou non la classif hierarchique
public boolean hClustering = false;
public boolean hClustering = false;
private Vector<DataObject> samples = null;
/**
*
......@@ -69,7 +75,7 @@ public abstract class LearningParameters implements Serializable {
* d'apprentissage
* @return une instance de la methode d'apprentissage des parametres
*/
public abstract LearningMethod getLearningMethodInstance(Vector samples);
public abstract LearningMethod getLearningMethodInstance(Vector<DataObject> samples);
/**
* <p>
......@@ -108,4 +114,24 @@ public abstract class LearningParameters implements Serializable {
public void setHClustering(final boolean clustering) {
this.hClustering = clustering;
}
/**
* Return the sample that should be used to initialize the learning method
*
* @return the sample, null if none
*/
public Vector<DataObject> getSamples() {
return samples;
}
/**
*
* Set the sample that should be used to initialize the learning method
*
* @param samples
* the sample, null to use none
*/
public void setSamples(Vector<DataObject> samples) {
this.samples = samples;
}
}
......@@ -12,120 +12,132 @@ import java.util.Vector;
public class ClassifierUtils {
/** Constante representant un classifieur de type inconnu */
public final static int UNKNOWN = -1;
public final static int UNKNOWN = -1;
/** Constante representant un classifieur de type vote */
// mmm
// public final static int vote = 305;
/** Constante representant un classifieur de type Kmeans */
public final static int KMEANS = 0;
public final static int KMEANS = 0;
/** Constante representant un classifieur de type Kmeans */
public final static int SEEDEDKMEANS = 007;
public final static int SEEDEDKMEANS = 007;
/** Constante representant un classifieur de type GAW-Kmeans */
public final static int GWKMEANS = 1;
public final static int GWKMEANS = 1;
/** Constante representant un classifieur de type LAW-Kmeans * */
public final static int LWKMEANS = 2;
public final static int LWKMEANS = 2;
/** Constante representant un classifieur de type Fuzzy-Cmeans */
public final static int FCM = 3;
public final static int FCM = 3;
/** Constante representant un classifieur de type GAW-Fuzzy-Cmeans */
public final static int GWFCM = 4;
public final static int GWFCM = 4;
/** Constante representant un classifieur de type LAW-Fuzzy-Cmeans */
public final static int LWFCM = 5;
public final static int LWFCM = 5;
/** Constante representant un classifieur de type Cobweb */
public final static int COBWEB = 6;
public final static int COBWEB = 6;
/** Constante representant un classifieur de type SOM */
public final static int SOM = 7;
public final static int SOM = 7;
/** Constante representant un classifieur de type PERCEPTRON */
public final static int PERCEPTRON = 8;
public final static int PERCEPTRON = 8;
/** Constante representant un classifieur de type EM */
public final static int EM = 9;
public final static int EM = 9;
/** Constante representant un clasifieur de type GNG */
public final static int GNG = 10;
public final static int GNG = 10;
/** Constante representant un classifieur de type K-medoids */
public final static int KMEDOIDS = 11;
public final static int KMEDOIDS = 11;
/**
* Constante representant un classifieur SAMARAH : Ne pas modifier car correspond à l'indice de
* la description de SAMARAH dans le tableau methodDescriptions
*/
public final static int SAMARAH = 20;
public final static int SAMARAH = 20;
/**
* Constante representant un classifieur SAMARAH nouvelle version: Ne pas modifier car
* correspond à l'indice de la description de SAMARAH dans le tableau methodDescriptions
*/
public final static int SAMARAHV4 = 21;
public final static int SAMARAHV4 = 21;
public final static int MACLAW = 25;
public final static int MACLAW = 25;
/**
* Constante representant un classifieur SAMARAH : Ne pas modifier car correspond à l'indice de
* la description de SAMARAH dans le tableau methodDescriptions
*/
public final static int VOTING = 30;
public final static int VOTING = 30;
/** Constante representant un classifieur de type RANDOM */
public final static int RANDOM = 100;
public final static int RANDOM = 100;
/** Constante pour la preuve */
public final static int EVIDENCE = 122;
public final static int EVIDENCE = 122;
/** Constante representant un wrapper de type ENTROPY */
public final static int FEATURE_RANKING = 201;
public final static int FEATURE_RANKING = 201;
/** Constante representant un classifieur de type Hybride */
public final static int HYBRID = 300;
public final static int HYBRID = 300;
/** Constante representant un classifieur genetique de type MACMM */
public final static int MACMM = 401;
public final static int MACMM = 401;
/** Constante representant un classifieur genetique de type CAWGA */
public final static int CAWGA = 402;
public final static int CAWGA = 402;
/** Constante representant un classifieur genetique de type MACLAE */
public final static int MACLAE = 403;
public final static int MACLAE = 403;
/** Constante representant un classifieur genetique de type MACLAC */
public final static int MACLAC = 404;
public final static int MACLAC = 404;
/** Constante representant un classifieur genetique de type quelquonque */
public final static int GENETIC = 499;
public final static int GENETIC = 499;
/** Constante representant un classifieur de type unifie */
public final static int UNIMULTI_STRUCTURALFIED = 300;
public final static int UNIMULTI_STRUCTURALFIED = 300;
/**
* const represente un classifieur AHC
*/
public final static int AHC = 602;
public final static int AHC = 602;
/** * */
public final static int MULTI_STRUCTURAL = 601;
public final static int MULTI_STRUCTURAL = 601;
/**
* Methodes OTB - les methodes qui suivent utilises les application provenants
* de la libraire OTB
*/
/** Constante reprentant un classifieur de type KMeans via OTB */
public final static int OTB_KMEANS = 900;
/** KMeans classifier from OTB library */
public final static int OTB_KMEANS = 900;
/**
* Import Methodes - the following methods are importing models of classifier
*/
/** KMeans imported from a file containing the centroids representation */
public final static int IMPORT_KMEANS = 1000;
/** KMeans computed from a clustering contained in an imported file */
public final static int IMPORT_CLUSTERING_TO_KMEANS = 1001;
public final static String FILE_PARAMETER = "File path";
/**
* Constante contenant les parametres par defaut pour les diferentes methodes de classification
* implementees ATTENTION, toute modification dans ce tableau doit être répercutée sur les
* constantes IND_*
*/
public final static String[][] methods = {
public final static String[][] METHODS = {
{ "K-MEANS", "#seeds", "10", "#steps", "10" },
......@@ -156,7 +168,7 @@ public class ClassifierUtils {
{ "AHC", "#clusters", "5" },
{ "OTB K-MEANS", "#seeds", "5", "#steps", "10", "Convergence Threshold", "0.0001" },
{ "OTB K-MEANS", "#seeds", "5", "#steps", "10", "Convergence Threshold", "0.0001" }
// { "MULTI_STRUCTURAL", "", "10" },
......@@ -169,7 +181,7 @@ public class ClassifierUtils {
* tableau faisant la correspondance entre la liste de méthodes sous forme de texte, et les
* identifiants respectics des methodes de classif
*/
public static final int[] correspondanceIndiceId = { ClassifierUtils./*
public static final int[] correspondanceIndiceId = { ClassifierUtils./*
* indice 0, id = 0
* (kmeans)
*/
......@@ -182,7 +194,7 @@ public class ClassifierUtils {
ClassifierUtils.KMEDOIDS, };
/** constante contenant les descriptions des methodes de classification */
private final static String[] methodDescriptions = {
private final static String[] METHOD_DESCRIPTIONS = {
"<h2>Methode de partitionnement statistique K-means</h2><p>La methode K-means demande deux parametres :</p><ul><li><strong>#seeds</strong> : le nombre de noyaux, c\'est-a-dire le nombre de classes desirees</li> <li><strong>#steps</strong> : le nombre de tours a effectuer (il est admis que 10 sont suffisant pour faire converger l\'algorithme efficacement</li></ul>",
"<h2>Fuzzy-C-means, methode de partitionnement floue derivee de K-means</h2><p>La methode Fuzzy-C-Means demande trois parametres :</p><ul><li><strong>#seeds</strong> : le nombre de noyaux, c\'est-a-dire le nombre de classes desirees</li> <li><strong>#steps</strong> : le nombre de tours a effectuer (il est admis que 10 sont suffisant pour faire converger l\'algorithme efficacement</li> <li><strong>m</strong> : coefficient de flou</li></ul>",
"<h2>COBWEB : formation de concepts (ne fonctionne actuellement que sur des donnees reelles)</h2><p>La methode Cobwed demande cinq parametres :</p><ul><li><strong>acuity</strong> : valeur minimal de la variance sur un attribut</li> <li><strong>mapi</strong> : cuteoff : seuil d\'arret de la specialisation d\'un concept (0 < mapi <= 1)</li> <li><strong>Max depth</strong> : profondeur maximal de la hierarchie de concepts</li> <li><strong>Nombre de passes</strong> : nombre de fois ou les objets sont presentes a l\'apprentissage</li> <li><strong>minimal card</strong> : nombre d\'objet minimal ... dont un concept a la fin de l\'apprentissage. Les concepts n\'ayant pas atteint ce minimum sont supprimes et leurs objets sont repartis dans les concepts restants.</li></ul>",
......@@ -235,8 +247,32 @@ public class ClassifierUtils {
"<h2></h2>",
"<h2></h2>",
"<h2>Voting</h2><p> Methode de classificaion non supervisee basee sur la co-occurrence des objets </p><br><p>Parametres </p><p><ul>"
+ "<li><strong> Nombre de classification : </strong> Nombre de Kmeans executes </li><li><strong> Pourcentage : </strong> Seuil de regroupement des objets dans le clustering hierarchique (calculé en fonction du nombre de fois ou deux objets ont ete mis ensemble dans des clustering differents) </li><li><strong> Nombre de clusters mini : </strong> Initialisation aleatoire du nombre de clusters de chacun de Kmeans -> borne inferieur </li><li><strong> Nombre de clusters mini : </strong> Initialisation aleatoire du nombre de clusters de chacun de Kmeans -> borne superieure </li></ul></p>",
+ "<li><strong> Nombre de classification : </strong> Nombre de Kmeans executes </li><li><strong> Pourcentage : </strong> Seuil de regroupement des objets dans le clustering hierarchique (calculé en fonction du nombre de fois ou deux objets ont ete mis ensemble dans des clustering differents) </li><li><strong> Nombre de clusters mini : </strong> Initialisation aleatoire du nombre de clusters de chacun de Kmeans -> borne inferieur </li><li><strong> Nombre de clusters mini : </strong> Initialisation aleatoire du nombre de clusters de chacun de Kmeans -> borne superieure </li></ul></p>"
};
/**
* same elements but for imported clustering
*/
public final static String[][] IMPORT_METHODS = {
{ "K-MEANS", FILE_PARAMETER, ""},
{ "Clustering to K-MEANS", FILE_PARAMETER, ""}
};
private final static String[] IMPORT_METHOD_DESCRIPTIONS = {
"<h2>Import d'une methode de partitionnement statistique K-means</h2><p>La methode d'import demande un parametre :</p><ul><li><strong>file path</strong> : le chemin d'accès du fichier contenant une liste de centroïdes</li></ul>",
"<h2>Import d'une methode de partitionnement statistique K-means par reconstuction</h2><p>La methode d'import demande un parametre :</p><ul><li><strong>file path</strong> : le chemin d'accès du fichier contenant un clustering sur le jeu de donnée sélectionné, qui servira au calcul des centroïdes</li></ul>"
};
public static final int[] IMPORT_corresIndiceId = {
ClassifierUtils.IMPORT_KMEANS,
ClassifierUtils.IMPORT_CLUSTERING_TO_KMEANS
};
/**
* <p>
......@@ -245,8 +281,10 @@ public class ClassifierUtils {
* @param i le numero de la methode
* @return une description de cette methode
*/
public static String getMethodDescription(final int i) {
return ClassifierUtils.methodDescriptions[i];
public static String getMethodDescription(final int i, boolean isImport) {
if (isImport)
return ClassifierUtils.IMPORT_METHOD_DESCRIPTIONS[i];
return ClassifierUtils.METHOD_DESCRIPTIONS[i];
}
/**
......
package jcl.learning.methods.monostrategy.kmeans;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Vector;
import jcl.clustering.ClusteringResult;
import jcl.data.Data;
import jcl.data.DataObject;
import jcl.data.attribute.AttributeHardConstrainedMultiDimSequence;
import jcl.data.attribute.AttributeMultiDimSequence;
import jcl.data.attribute.AttributeNumerical;
......@@ -396,4 +401,45 @@ public class ClassifierKmeans extends LearningMethod {
}
return null;
}
/**
* Return a list of centroids from a given clustering over a Data
* @param data
* the Data classified
* @param clustering
* the clustering :
* - [x][0] the index of the x-th element
* - [x][1] for the class label of the x-th element
* @return the centroid list
*/
public static List<DataObject> getCentroidsFromClustering(Data data, int[][] clustering) {
Map<Integer, Integer> labelMap = new HashMap<Integer, Integer>();
List<List<DataObject>> clusters = new ArrayList<List<DataObject>>();
// we populate the clusters list with corresponding DataObjects
for (int i = 0 ; i < clustering.length ; i++) {
int index = -1;
// test if the label is already if the labelMap
if (labelMap.containsKey(clustering[i][1])) {
// if so we return the corresponding index
index = labelMap.get(clustering[i][1]);
} else {
// otherwise we add it
labelMap.put(clustering[i][1], clusters.size());
index = clusters.size();
clusters.add(new ArrayList<DataObject>());
}
clusters.get(index).add(data.getDataObject(clustering[i][0]));
}
List<DataObject> result = new ArrayList<DataObject>();
for (List<DataObject> c : clusters) {
HardSeed hs = new HardSeed((DataObject[]) c.toArray(new DataObject[c.size()]));
hs.redefine();
result.add(hs.getCenter());
}
return result;
}
}
......@@ -471,7 +471,7 @@ public class LearningResultKmeans extends LearningResult {
*/
private void initSeeds(Data data, Vector samples, int nbClusters) {
int[] seeds = new int[nbClusters];
DataObject[] seeds = new DataObject[nbClusters];
int nbClustersLeft = nbClusters;
int remaining = data.getNbObjects();
int index = 0;
......@@ -479,26 +479,33 @@ public class LearningResultKmeans extends LearningResult {
if (samples == null || samples.contains(new Integer(-1))) {
for (int i = 0; i < data.getNbObjects(); i++) {
if (Math.random() < ((double) nbClustersLeft / (double) remaining)) {
seeds[index] = i;
seeds[index] = data.getDataObject((Integer) samples.get(i));
index++;
nbClustersLeft--;
}
remaining--;
}
} else {// Dans le cas ou les centre initiaux sont donnes
for (int i = 0; i < seeds.length; i++) {
seeds[i] = (Integer) samples.get(i);
// if centers are given by index
if (samples.size() > 0 && samples.get(0) instanceof Integer) {
for (int i = 0; i < seeds.length; i++) {
seeds[i] = data.getDataObject((Integer) samples.get(i));
}
// if centers are given by value
} else {
for (int i = 0; i < seeds.length; i++) {
seeds[i] = (DataObject) samples.get(i);
}
}
}
this.seeds = new Vector<KmeansSeed>();
if (((ParametersKmeans) this.params).fuzzy)
for (int i = 0; i < nbClusters; i++)
this.seeds.add(new FuzzySeed(data.getDataObject(seeds[i]), data));
this.seeds.add(new FuzzySeed(seeds[i], data));
else
for (int i = 0; i < nbClusters; i++)
this.seeds.add(new LightHardSeed(data.getDataObject(seeds[i]), data, i));
this.seeds.add(new LightHardSeed(seeds[i], data, i));
}
/*
......
......@@ -92,7 +92,8 @@ public class ParametersKmeans extends LearningParameters {
* @param weights initial weighting
* @deprecated rather use {@link #ParametersKmeans(int, int, int, ClassificationWeights, DistanceParameter[][])}
*/
public ParametersKmeans(final int nbClusters, final int nbIters, int nbThreads, final ClassificationWeights weights) {
public ParametersKmeans(final int nbClusters, final int nbIters, int nbThreads,
final ClassificationWeights weights) {
super(weights);
this.nbClusters = nbClusters;
this.nbIters = nbIters;
......@@ -101,7 +102,8 @@ public class ParametersKmeans extends LearningParameters {
this.nbThreads = nbThreads;
}
public ParametersKmeans(final int nbClusters, final int nbIters, int nbThreads, final ClassificationWeights weights,DistanceParameter[][] parameters) {
public ParametersKmeans(final int nbClusters, final int nbIters, int nbThreads,
final ClassificationWeights weights,DistanceParameter[][] parameters) {
super(weights);
this.nbClusters = nbClusters;
this.nbIters = nbIters;
......@@ -122,7 +124,8 @@ public class ParametersKmeans extends LearningParameters {
* @param m Exposant discriminatif pour Fuzzy-C-means
* @param weights Ponderations initiales
*/
public ParametersKmeans(final int nbClusters, final int nbIters, final double m, final ClassificationWeights weights) {
public ParametersKmeans(final int nbClusters, final int nbIters, final double m,
final ClassificationWeights weights) {
super(weights);
this.nbClusters = nbClusters;
this.nbIters = nbIters;
......@@ -144,7 +147,8 @@ public class ParametersKmeans extends LearningParameters {
* @param featureWeighting Type de ponderation d'attributs
* @param weights Ponderations initiales
*/
public ParametersKmeans(final int nbClusters, final int nbIters, final double m, final double beta, final int featureWeighting, final ClassificationWeights weights) {
public ParametersKmeans(final int nbClusters, final int nbIters, final double m,
final double beta, final int featureWeighting, final ClassificationWeights weights) {
super(weights);
this.nbClusters = nbClusters;
this.nbIters = nbIters;
......@@ -168,7 +172,8 @@ public class ParametersKmeans extends LearningParameters {
* @param featureWeighting Type de ponderation d'attributs
* @param weights Ponderations initiales
*/
public ParametersKmeans(final int nbClusters, final int nbIters, final double beta, final int featureWeighting, final ClassificationWeights weights) {
public ParametersKmeans(final int nbClusters, final int nbIters, final double beta,
final int featureWeighting, final ClassificationWeights weights) {
super(weights);
this.nbClusters = nbClusters;
this.nbIters = nbIters;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment