Commit faabf922 authored by kruger's avatar kruger

Modification de la nouvelle fonction de repartition dans CUDA.tpl

parent eb7e004a
......@@ -89,9 +89,9 @@ float gauss()
\end
\GenomeClass::display:
for(int i=0; i<SIZE; i++){
/*for(int i=0; i<SIZE; i++){
cout << Genome.x[i] << ":" << Genome.sigma[i] << "|";
}
}*/
\end
\Before everything else function:
......
#include <math.h>
#include <stdlib.h>
#include "include/CCuda.h"
#define NB_MP 16
#include <stdio.h>
CCuda::CCuda(size_t parentSize, size_t offSize, size_t individualImplSize){
this->sizeOfIndividualImpl = individualImplSize;
this->cudaParentBuffer = (void*)malloc(this->sizeOfIndividualImpl*parentSize);
this->cudaOffspringBuffer = (void*)malloc(this->sizeOfIndividualImpl*offSize);
this->cudaBuffer = (void*)malloc(this->sizeOfIndividualImpl*( (parentSize>offSize) ? parentSize : offSize));
}
CCuda::~CCuda(){
}
inline size_t partieEntiereSup(float E){
int fl = floor(E);
if(fl==E)
return E;
else
return floor(E=1);
}
bool repartition(struct my_struct_gpu* gpu_infos){
//There is an implied minimum number of threads for each block
if(gpu_infos->num_Warp > gpu_infos->num_thread_max){
printf("You need to authorized at least %d threads on each block!\n",gpu_infos->num_Warp);
exit(1);
}
gpu_infos->dimGrid = gpu_infos->num_MP;
gpu_infos->dimBlock = gpu_infos->num_Warp;;
//While each element of the population can't be placed on the card
while(gpu_infos->dimBlock * gpu_infos->dimGrid < gpu_infos->sh_pop_size) {
//Every time we add the number of Warp to the value of dimBlock
if( (gpu_infos->dimBlock += gpu_infos->num_Warp) > gpu_infos->num_thread_max ) {
//If the number of dimBlock exceeds the number of threads max, we add the number of MP to the value of dimGrid and we reset the value of dimBlock with the number of Warp
gpu_infos->dimGrid += gpu_infos->num_MP;
gpu_infos->dimBlock = gpu_infos->num_Warp;
}
}
inline int puissanceDeuxSup(float n){
int tmp=2;
while(tmp<n) tmp*=2;
return tmp;
}
bool repartition(size_t popSize, size_t* nbBlock, size_t* nbThreadPB, size_t* nbThreadLB, size_t nbMP, size_t maxBlockSize){
(*nbThreadLB) = 0;
//DEBUG_PRT("repartition : %d",popSize);
if( ((float)popSize / (float)nbMP) <= maxBlockSize ){
//la population repartie sur les MP tient dans une bloc par MP
(*nbThreadPB) = partieEntiereSup( (float)popSize/(float)nbMP);
(*nbBlock) = popSize/(*nbThreadPB);
if( popSize%nbMP != 0 ){
//on fait MP-1 block de equivalent et un plus petit
(*nbThreadLB) = popSize - (*nbThreadPB)*(*nbBlock);
}
}
else{
//la population est trop grande pour etre repartie sur les MP
//directement
//(*nbBlock) = partieEntiereSup( (float)popSize/((float)maxBlockSize*NB_MP));
(*nbBlock) = puissanceDeuxSup( (float)popSize/((float)maxBlockSize*NB_MP));
(*nbBlock) *= NB_MP;
(*nbThreadPB) = popSize/(*nbBlock);
if( popSize%maxBlockSize!=0){
(*nbThreadLB) = popSize - (*nbThreadPB)*(*nbBlock);
// Le rest est trop grand pour etre place dans un seul block (c'est possible uniquement qd
// le nombre de block depasse maxBlockSize
while( (*nbThreadLB) > maxBlockSize ){
//on augmente le nombre de blocs principaux jusqu'a ce que nbthreadLB retombe en dessous de maxBlockSize
//(*nbBlock) += nbMP;
(*nbBlock) *= 2;
(*nbThreadPB) = popSize/(*nbBlock);
(*nbThreadLB) = popSize - (*nbThreadPB)*(*nbBlock);
}
}
}
if((((*nbBlock)*(*nbThreadPB) + (*nbThreadLB)) == popSize) && ((*nbThreadLB) <= maxBlockSize) && ((*nbThreadPB) <= maxBlockSize))
//Verification that we have enough place for all the population and that every constraints are respected
if( (gpu_infos->dimBlock*gpu_infos->dimGrid >= gpu_infos->sh_pop_size) && (gpu_infos->dimBlock <= gpu_infos->num_thread_max))
return true;
else
return false;
}
......@@ -415,8 +415,8 @@ void CPopulation::weakElitism(size_t elitismSize, CIndividual** parentsPopulatio
}
void CPopulation::addIndividualParentPopulation(CIndividual* indiv){
parents[actualParentPopulationSize++] = indiv;
void CPopulation::addIndividualParentPopulation(CIndividual* indiv, size_t id){
parents[id] = indiv;
}
std::ostream& operator << (std::ostream& O, const CPopulation& B)
......
......@@ -9,13 +9,35 @@
#define CCUDA_H_
#include <iostream>
#include <semaphore.h>
struct gpuOptions{};
struct my_struct_gpu{
int indiv_start;
int sh_pop_size;
int num_MP;
int num_thread_max;
int num_Warp;
int dimGrid;
int dimBlock;
};
struct gpuArg{
int threadId;
sem_t sem_in;
sem_t sem_out;
void* d_population;
float* d_fitness;
};
class CCuda {
public:
void* cudaParentBuffer;
void* cudaOffspringBuffer;
void* cudaBuffer;
size_t sizeOfIndividualImpl;
struct gpuOptions initOpts;
public:
......@@ -23,8 +45,6 @@ public:
~CCuda();
};
size_t partieEntiereSup(float E);
int puissanceDeuxSup(float n);
bool repartition(size_t popSize, size_t* nbBlock, size_t* nbThreadPB, size_t* nbThreadLB, size_t nbMP, size_t maxBlockSize);
bool repartition(struct my_struct_gpu* gpu_infos);
#endif /* CCUDA_H_ */
......@@ -63,7 +63,7 @@ public:
virtual ~CPopulation();
//virtual void initializeParentPopulation() = 0;
void addIndividualParentPopulation(CIndividual* indiv);
void addIndividualParentPopulation(CIndividual* indiv, size_t id);
void evaluatePopulation(CIndividual** population, size_t populationSize);
virtual void optimisePopulation(CIndividual** population, size_t populationSize);
virtual void evaluateParentPopulation();
......
......@@ -318,8 +318,9 @@ CEvolutionaryAlgorithm* ParametersImpl::newEvolutionaryAlgorithm(){
void EvolutionaryAlgorithmImpl::initializeParentPopulation(){
for( unsigned int i=0 ; i< this->params->parentPopulationSize ; i++){
this->population->addIndividualParentPopulation(new IndividualImpl());
}
this->population->addIndividualParentPopulation(new IndividualImpl(),i);
}
this->population->actualParentPopulationSize = this->params->parentPopulationSize;
}
......
......@@ -470,7 +470,7 @@ void EvolutionaryAlgorithmImpl::initializeParentPopulation(){
//DEBUG_PRT("Creation of %lu/%lu parents (other could have been loaded from input file)",this->params->parentPopulationSize-this->params->actualParentPopulationSize,this->params->parentPopulationSize);
for( unsigned int i=0 ; i< this->params->parentPopulationSize ; i++){
this->population->addIndividualParentPopulation(new IndividualImpl());
this->population->addIndividualParentPopulation(new IndividualImpl(),i);
}
this->population->actualParentPopulationSize = this->params->parentPopulationSize;
......
This diff is collapsed.
......@@ -584,7 +584,7 @@ void EvolutionaryAlgorithmImpl::initializeParentPopulation(){
//DEBUG_PRT("Creation of %lu/%lu parents (other could have been loaded from input file)",this->params->parentPopulationSize-this->params->actualParentPopulationSize,this->params->parentPopulationSize);
for( unsigned int i=0 ; i< this->params->parentPopulationSize ; i++){
this->population->addIndividualParentPopulation(new IndividualImpl());
this->population->addIndividualParentPopulation(new IndividualImpl(),i);
}
this->population->actualParentPopulationSize = this->params->parentPopulationSize;
......
......@@ -316,8 +316,9 @@ CEvolutionaryAlgorithm* ParametersImpl::newEvolutionaryAlgorithm(){
void EvolutionaryAlgorithmImpl::initializeParentPopulation(){
for( unsigned int i=0 ; i< this->params->parentPopulationSize ; i++){
this->population->addIndividualParentPopulation(new IndividualImpl());
}
this->population->addIndividualParentPopulation(new IndividualImpl(),i);
}
this->population->actualParentPopulationSize = this->params->parentPopulationSize;
}
......
......@@ -342,8 +342,9 @@ CEvolutionaryAlgorithm* ParametersImpl::newEvolutionaryAlgorithm(){
void EvolutionaryAlgorithmImpl::initializeParentPopulation(){
for( unsigned int i=0 ; i< this->params->parentPopulationSize ; i++){
this->population->addIndividualParentPopulation(new IndividualImpl());
}
this->population->addIndividualParentPopulation(new IndividualImpl(),i);
}
this->population->actualParentPopulationSize = this->params->parentPopulationSize
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment