Commit a7fef5f8 authored by balanche's avatar balanche

[Classe Model renommé en DistanceModel] Correction d'un bug de classification,...

[Classe Model renommé en DistanceModel] Correction d'un bug de classification, Déplacement du DistanceModel de Data vers Parameters, Correction d'un bug avec les attributs séquentiels
parent 9d02fac8
......@@ -89,7 +89,7 @@ public abstract class Data implements Cloneable, Serializable, Iterable<DataObje
private Data sequence = null;
protected Model model = null;
protected DistanceModel model = null;
/** the sampler used to generate the Data */
private Sampler sampler = null;
......@@ -329,7 +329,7 @@ public abstract class Data implements Cloneable, Serializable, Iterable<DataObje
* Constructeur a partir d'un nombre d'objets.
* @param data les objets contenus dans cet ensemble de donnees
* @param structure la structure des donnees
* @deprecated should specify a model;use rather {@link #Data(DataObject[], Model)}
* @deprecated should specify a model;use rather {@link #Data(DataObject[], DistanceModel)}
*/
public Data(final List<DataObject> data) {
this.currentView = data;
......@@ -340,7 +340,7 @@ public abstract class Data implements Cloneable, Serializable, Iterable<DataObje
* @param data the dataset
* @param model the model of the desired dataset
*/
public Data(final List<DataObject> data,Model model) {
public Data(final List<DataObject> data,DistanceModel model) {
this.currentView = data;
this.model = model;
......@@ -351,7 +351,7 @@ public abstract class Data implements Cloneable, Serializable, Iterable<DataObje
* @param structure
* @param knownResult
* @param nbClusters
* @deprecated should specify a model;use rather {@link #Data(DataObject[], int[], int, Model)}
* @deprecated should specify a model;use rather {@link #Data(DataObject[], int[], int, DistanceModel)}
*/
public Data(final List<DataObject> data, final int[] knownResult, final int nbClusters) {
this.currentView = data;
......@@ -365,7 +365,7 @@ public abstract class Data implements Cloneable, Serializable, Iterable<DataObje
* @param knownResult
* @param nbClusters
*/
public Data(final List<DataObject> data, final int[] knownResult, final int nbClusters,Model model) {
public Data(final List<DataObject> data, final int[] knownResult, final int nbClusters,DistanceModel model) {
this.currentView = data;
this.knownResult = ClusteringResult.gerenerateDefaultClusteringResult(null, knownResult,
new GlobalWeights(this), nbClusters, this, null, null);
......@@ -390,7 +390,7 @@ public abstract class Data implements Cloneable, Serializable, Iterable<DataObje
* @param nbObjects #nbObjects
* @param model the model to be used for the dataset
*/
public Data(final int nbObjects,Model model) {
public Data(final int nbObjects,DistanceModel model) {
this.currentView = new ArrayList<DataObject>(nbObjects);
for(int i = 0 ; i < nbObjects ; i++) {
currentView.add(null);
......@@ -1649,12 +1649,12 @@ public abstract class Data implements Cloneable, Serializable, Iterable<DataObje
abstract public DataObject mean();
public Model getModel() {
public DistanceModel getModel() {
return model;
}
public void setModel(Model model) {
public void setModel(DistanceModel model) {
this.model = model;
}
......
package jcl.data;
import jcl.data.attribute.Attribute;
import jcl.data.attribute.AttributeMultiDimSequence;
import jcl.data.attribute.AttributeSequence;
import jcl.data.distance.Distance;
import jcl.data.distance.DistanceParameter;
import jcl.data.distance.MetaDistance;
import jcl.data.distance.MetaDistanceEuclidean;
import jcl.data.distance.NumericalEuclideanDistance;
import jcl.data.distance.sequential.DistanceDTW;
import jcl.data.distance.sequential.DistanceDTWMD;
import jcl.data.sampling.Sampler;
/**
......@@ -16,7 +19,7 @@ import jcl.data.sampling.Sampler;
* TODO It should also contain the averaging method to summarize a set of attributes.
* @author Francois Petitjean
*/
public class Model {
public class DistanceModel {
/**
* Table of the attributes (cell i of the table = type of the i^th attribute)
*/
......@@ -27,9 +30,9 @@ public class Model {
/**
* @deprecated rather use {@link #Model(Attribute[], Distance[], MetaDistance)}
*/
public Model(){}
public DistanceModel(){}
public Model(Distance<Attribute, DistanceParameter>[] distances, MetaDistance metaDistance) {
public DistanceModel(Distance<Attribute, DistanceParameter>[] distances, MetaDistance metaDistance) {
this.distances = distances;
this.metaDistance = metaDistance;
}
......@@ -63,16 +66,18 @@ public class Model {
* @param dataObject the DataObject from which it will create the default model
* @return the default model
*/
public static Model generateDefaultModel(DataObject dataObject) {
public static DistanceModel generateDefaultModel(DataObject dataObject) {
Distance[] distances=new Distance[dataObject.getNbAttributes()];
for (int i=0; i<distances.length;i++) {
if (dataObject.getAttribute(i).getTypeAttribute() == Attribute.SEQUENCE_ATTRIBUTE)
if (dataObject.getAttribute(i) instanceof AttributeSequence)
distances[i] = DistanceDTW.getInstance();//uses DTW distances for sequential attributes ...
else if (dataObject.getAttribute(i) instanceof AttributeMultiDimSequence)
distances[i] = DistanceDTWMD.getInstance();//uses DTWMD distances for sequential attributes ...
else
distances[i]=NumericalEuclideanDistance.getInstance();//... and euclidian distances for numerical attributes
}
MetaDistance metaDistance = MetaDistanceEuclidean.getInstance(); //defines the way the two scores are combined, by default it is Euclidian
Model model = new Model(distances, metaDistance);
DistanceModel model = new DistanceModel(distances, metaDistance);
return model;
}
......@@ -81,13 +86,13 @@ public class Model {
* @param dataObject the DataObject from which it will create a model
* @return the naive model
*/
public static Model generateNaiveModel(DataObject dataObject) {
public static DistanceModel generateNaiveModel(DataObject dataObject) {
Distance[] distances=new Distance[dataObject.getNbAttributes()];
for (int i=0; i<distances.length;i++) {
distances[i]=NumericalEuclideanDistance.getInstance();// uses euclidian distance for every attribute
}
MetaDistance metaDistance = MetaDistanceEuclidean.getInstance(); // defines the way the two scores are combined, by default it is Euclidian
Model model = new Model(distances, metaDistance);
DistanceModel model = new DistanceModel(distances, metaDistance);
return model;
}
}
......@@ -345,7 +345,7 @@ public class SimpleData extends Data {
/**
* Constructor from a List of DataObjects
* @param data les objets contenus dans cet ensemble de donnees
* @deprecated should specify a model;use rather {@link #Data(DataObject[], Model)}
* @deprecated should specify a model;use rather {@link #Data(DataObject[], DistanceModel)}
*/
public SimpleData(final List<DataObject> data) {
this.currentView = data;
......@@ -357,7 +357,7 @@ public class SimpleData extends Data {
* @param data the dataset
* @param model the model of the desired dataset
*/
public SimpleData(final List<DataObject> data,Model model) {
public SimpleData(final List<DataObject> data,DistanceModel model) {
this.currentView = data;
this.model = model;
this.wholeDataNbObjects = data.size();
......@@ -366,7 +366,7 @@ public class SimpleData extends Data {
/**
* Constructor from a Sampler
* @param sampler the sampler to use to generate the list of DataObjects
* @deprecated should specify a model;use rather {@link #Data(DataObject[], Model)}
* @deprecated should specify a model;use rather {@link #Data(DataObject[], DistanceModel)}
*/
public SimpleData(final Sampler sampler) {
this.currentView = sampler.getDataObjects();
......@@ -379,7 +379,7 @@ public class SimpleData extends Data {
* @param sampler the sampler to use to generate the list of DataObjects
* @param model the model of the desired dataset
*/
public SimpleData(final Sampler sampler, Model model) {
public SimpleData(final Sampler sampler, DistanceModel model) {
this.currentView = sampler.getDataObjects();
this.wholeDataNbObjects = sampler.getDataSize();
this.model = model;
......@@ -406,7 +406,7 @@ public class SimpleData extends Data {
* @param knownResult
* @param nbClusters
*/
public SimpleData(final List<DataObject> data, final int[] knownResult, final int nbClusters,Model model) {
public SimpleData(final List<DataObject> data, final int[] knownResult, final int nbClusters,DistanceModel model) {
this.currentView = data;
this.knownResult = ClusteringResult.gerenerateDefaultClusteringResult(null, knownResult,
new GlobalWeights(this), nbClusters, this, null, null);
......@@ -430,7 +430,7 @@ public class SimpleData extends Data {
* @param nbObjects #nbObjects
* @param model the model to be used for the dataset
*/
public SimpleData(final int nbObjects,Model model) {
public SimpleData(final int nbObjects,DistanceModel model) {
this.currentView = new ArrayList<DataObject>(nbObjects);
this.locked = new boolean[nbObjects];
this.model = model;
......
package jcl.data.distance.average;
public interface Average {
}
package jcl.data.distance.sequential;
package jcl.data.distance.sequential;
import static jcl.data.distance.Tools.Min3;
......
......@@ -13,7 +13,7 @@ import java.util.Random;
import jcl.clustering.ClusteringResult;
import jcl.data.Data;
import jcl.data.DataObject;
import jcl.data.Model;
import jcl.data.DistanceModel;
import jcl.data.SimpleData;
import jcl.data.attribute.Attribute;
import jcl.data.attribute.AttributeNumerical;
......@@ -83,7 +83,7 @@ public class Kmeans {
distances[0] = NumericalEuclideanDistance.getInstance(); // first attribute compared with an euclidean distance between numericals
distances[1] = jcl.data.distance.sequential.DistanceDTW.getInstance(); // second attribute (sequential) compared with the DTW distance
MetaDistance metaDistance = MetaDistanceEuclidean.getInstance(); // defines the way the two scores are combined (possibility to weight)
Model model = new Model(distances, metaDistance);
DistanceModel model = new DistanceModel(distances, metaDistance);
//~ The dataset can be created with the model
dataset = new SimpleData(objects, model);
......
......@@ -4,6 +4,7 @@ import java.io.Serializable;
import java.util.Vector;
import jcl.data.DataObject;
import jcl.data.DistanceModel;
import jcl.weights.ClassificationWeights;
/**
......@@ -26,6 +27,9 @@ public abstract class LearningParameters implements Serializable {
private Vector<DataObject> samples = null;
protected DistanceModel model = null;
/**
*
* @param weights
......@@ -134,4 +138,13 @@ public abstract class LearningParameters implements Serializable {
public void setSamples(Vector<DataObject> samples) {
this.samples = samples;
}
public DistanceModel getModel() {
return model;
}
public void setModel(DistanceModel model) {
this.model = model;
}
}
......@@ -247,7 +247,7 @@ public abstract class LearningResult extends Object implements Progressable,Seri
* </p>
*/
@Override
public void incProgress() {
public synchronized void incProgress() {
this.progress++;
}
......
......@@ -20,7 +20,7 @@ import jcl.clustering.Cluster;
import jcl.clustering.ClusteringResult;
import jcl.data.Data;
import jcl.data.DataObject;
import jcl.data.Model;
import jcl.data.DistanceModel;
import jcl.data.SimpleData;
import jcl.data.attribute.Attribute;
import jcl.data.distance.Distance;
......
......@@ -16,7 +16,7 @@ import jcl.Classification;
import jcl.clustering.ClusteringResult;
import jcl.data.Data;
import jcl.data.DataObject;
import jcl.data.Model;
import jcl.data.DistanceModel;
import jcl.data.attribute.Attribute;
import jcl.data.distance.Distance;
import jcl.data.distance.DistanceParameter;
......@@ -151,7 +151,7 @@ public class LearningResultKmeans extends LearningResult {
}
// long startT = System.nanoTime();
// System.out.println("start at "+startT);
int clusterMap[] = this.clusterAffectation(data,fromSample);// !!!
int clusterMap[] = this.clusterAffectation(data,fromSample);
// long endT = System.nanoTime();
// System.out.println("endat "+ endT + " total of "+ (endT - startT)/1000000000l);
ClusteringResult result = null;
......@@ -201,7 +201,7 @@ public class LearningResultKmeans extends LearningResult {
return closestSeed;
}
private int closestSeed(DataObject c, Model model, DistanceParameter[] param, DataObject[] seeds) {
private int closestSeed(DataObject c, DistanceModel model, DistanceParameter[] param, DataObject[] seeds) {
MetaDistance metaDistance = model.getMetaDistance();
Distance<Attribute, DistanceParameter>[] distances = model.getDistances();
......@@ -252,6 +252,8 @@ public class LearningResultKmeans extends LearningResult {
* @return the index of the assigned class for each object
*/
public int[] clusterAffectation(Data data, boolean onSample) {
this.resetProgress();
distanceGlobale = 0.0;
int nbObjects = 0;
if (onSample) {
......@@ -262,7 +264,6 @@ public class LearningResultKmeans extends LearningResult {
}
this.progressM=nbObjects;
this.resetProgress();
int clusterMap[] = new int[nbObjects];
int nbThreads = ((ParametersKmeans) this.params).nbThreads;
......@@ -323,31 +324,34 @@ public class LearningResultKmeans extends LearningResult {
System.err.println("Parameters should not be null");
} else {
ThreadedAffectation[] tabThreads = new ThreadedAffectation[nbThreads];
int nbObjectsPerThread = nbObjects / nbThreads;
Iterator<DataObject> it;
int nbObjectsPerThread = (int) Math.ceil((double)nbObjects / nbThreads);
for (int th = 0; th < nbThreads - 1; th++) {
if (onSample) {
tabThreads[th] = new ThreadedAffectation(clusterMap,
data.iterator(th * nbObjectsPerThread, (th + 1) * nbObjectsPerThread - 1),
data.getModel(), params.parameters[th],
data.iterator(th * nbObjectsPerThread, (th + 1) * nbObjectsPerThread ),
// data.getModel(), params.parameters[th],
params.getModel(), params.parameters[th],
th * nbObjectsPerThread);
} else {
tabThreads[th] = new ThreadedAffectation(clusterMap,
data.getWholeSourceDataObjects(th * nbObjectsPerThread, (th + 1) * nbObjectsPerThread - 1),
data.getModel(), params.parameters[th],
data.getWholeSourceDataObjects(th * nbObjectsPerThread, (th + 1) * nbObjectsPerThread ),
// data.getModel(), params.parameters[th],
params.getModel(), params.parameters[th],
th * nbObjectsPerThread);
}
tabThreads[th].start();
}
if (onSample) {
tabThreads[nbThreads - 1] = new ThreadedAffectation(clusterMap,
data.iterator((nbThreads - 1) * nbObjectsPerThread, nbObjects - 1),
data.getModel(), params.parameters[nbThreads - 1],
data.iterator((nbThreads - 1) * nbObjectsPerThread, nbObjects ),
// data.getModel(), params.parameters[nbThreads - 1],
params.getModel(), params.parameters[nbThreads - 1],
(nbThreads - 1) * nbObjectsPerThread);
} else {
tabThreads[nbThreads - 1] = new ThreadedAffectation(clusterMap,
data.getWholeSourceDataObjects((nbThreads - 1) * nbObjectsPerThread, nbObjects - 1),
data.getModel(), params.parameters[nbThreads - 1],
data.getWholeSourceDataObjects((nbThreads - 1) * nbObjectsPerThread, nbObjects),
// data.getModel(), params.parameters[nbThreads - 1],
params.getModel(), params.parameters[nbThreads - 1],
(nbThreads - 1) * nbObjectsPerThread);
}
tabThreads[nbThreads - 1].start();
......@@ -374,7 +378,7 @@ public class LearningResultKmeans extends LearningResult {
}
}
endProgress();
//this.endProgress();
return clusterMap;
}
......@@ -818,13 +822,13 @@ public class LearningResultKmeans extends LearningResult {
int[] clusterMap;
Iterator<DataObject> dataset;
Model model;
DistanceModel model;
DistanceParameter[] parameters;
DataObject[] seedsCopy;
int threadGlobalDistance = 0;
int startI = 0;
public ThreadedAffectation(int[] clusterMap, Iterator<DataObject> iterator, Model model,
public ThreadedAffectation(int[] clusterMap, Iterator<DataObject> iterator, DistanceModel model,
DistanceParameter[] parameters, int startI) {
this.clusterMap = clusterMap;
this.dataset = iterator;
......@@ -858,7 +862,7 @@ public class LearningResultKmeans extends LearningResult {
incProgress();
i++;
}
System.out.println("Fin thread progressM="+progressM+" progress="+progress+" i="+i);
}
public int getThreadGlobalDistance() {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment