Commit 25e1dc7c authored by maitre's avatar maitre
Browse files

Bugged cuda mo version

parent 4b16bc6c
......@@ -27,7 +27,7 @@ Centre de Math
#define STD_FLAVOR_SO 0
#define STD_FLAVOR_MO 1
#define CUDA_FLAVOR_SO 0
#define CUDA_FLAVOR_MO 0
#define CUDA_FLAVOR_MO 1
#define UNIX 1
#define WINDOWS 2
......
......@@ -867,6 +867,11 @@ exponent ([Ee][+-]?[0-9]+)
// create a symbolic link from Makefile to EASEA.mak
symlink(sFullFileName,sPathName);
}
else{
DEBUG_PRT("file name : %s",sFileName);
if( symlink(sFileName,"Makefile") ) perror("Symlink creation error ");
}
if (bVERBOSE){
printf("Creating %s...\n",sFileName);
......@@ -1554,6 +1559,24 @@ exponent ([Ee][+-]?[0-9]+)
<COPY_USER_FUNCTION>"GenomeClass" {if (TARGET==EO) fprintf(fpOutputFile, "GenotypeT");
else fprintf(fpOutputFile,"Genome.");
}
<COPY_USER_FUNCTION>"f1" {
if( bWithinCUDA_Evaluator && TARGET==CUDA && TARGET_FLAVOR==CUDA_FLAVOR_MO ){
fprintf(fpOutputFile,"(f[0])");
}
else
fprintf(fpOutputFile,"%s",yytext);
}
<COPY_USER_FUNCTION>"f2" {
if( bWithinCUDA_Evaluator && TARGET==CUDA && TARGET_FLAVOR==CUDA_FLAVOR_MO ){
fprintf(fpOutputFile,"(f[1])");
}
else
fprintf(fpOutputFile,"%s",yytext);
}
<COPY_USER_FUNCTION>"Genome" {
if (bWithinDisplayFunction) fprintf(fpOutputFile,"(*this)");
else if ((TARGET==EO)&&(bWithinInitialiser)) fprintf(fpOutputFile, "(*genome)");
......@@ -1661,7 +1684,7 @@ exponent ([Ee][+-]?[0-9]+)
<COPY_USER_FUNCTION>"\\end" {rewind(fpGenomeFile);
yyin = fpTemplateFile;
BEGIN TEMPLATE_ANALYSIS;
bWithinInitialiser=bWithinXover=bWithinMutator=bWithinEvaluator=0;
bWithinInitialiser=bWithinXover=bWithinMutator=bWithinEvaluator=bWithinCUDA_Evaluator=0;
if (bWithinDisplayFunction){
bWithinDisplayFunction=0; // display function
}
......@@ -1850,7 +1873,11 @@ int CEASEALexer::create(CEASEAParser* pParser, CSymbolTable* pSymTable)
} }
if (TARGET==CUDA){
strcat(sTemp,"CUDA.tpl");
if(TARGET_FLAVOR == CUDA_FLAVOR_SO )
strcat(sTemp,"CUDA.tpl");
else
strcat(sTemp,"CUDA_MO.tpl");
if (!(yyin = fpTemplateFile = fopen(sTemp, "r"))){
fprintf(stderr,"\n*** Could not open %s.\n",sTemp);
fprintf(stderr,"*** Please modify the EZ_PATH environment variable.\n");
......
......@@ -1326,7 +1326,15 @@ int main(int argc, char *argv[]){
if (!mystricmp(sTemp,"eo")) TARGET=EO;
else if (!mystricmp(sTemp,"galib")) TARGET=GALIB;
else if (!mystricmp(sTemp,"dream")) TARGET=DREAM;
else if (!mystricmp(sTemp,"cuda")) TARGET=CUDA;
else if (!mystricmp(sTemp,"cuda")){
TARGET=CUDA;
TARGET_FLAVOR = CUDA_FLAVOR_SO;
}
else if( !mystricmp(sTemp,"cuda_mo") ){
TARGET=CUDA;
TARGET_FLAVOR = CUDA_FLAVOR_MO;
}
else if (!mystricmp(sTemp,"std")) {
TARGET=STD;
TARGET_FLAVOR = STD_FLAVOR_SO;
......@@ -1335,6 +1343,7 @@ int main(int argc, char *argv[]){
TARGET=STD;
TARGET_FLAVOR = STD_FLAVOR_MO;
}
else if (!mystricmp(sTemp,"v")) bVERBOSE=true;
else if (!mystricmp(sTemp,"path")) {
if (argv[++nParamNb][0]=='"') {
......
......@@ -60,6 +60,9 @@ clean:
rm -f *.o $(EXEC) $(EXEC)_bin
cd alexyacc && make clean
install:$(EXEC)
sudo cp $< /usr/bin/easea
# realclean: clean
# rm -f EaseaParse.cpp EaseaParse.h EaseaLex.cpp EaseaLex.h
......
......@@ -82,7 +82,7 @@ struct gpuOptions initOpts;
\INSERT_INITIALISATION_FUNCTION
\INSERT_FINALIZATION_FUNCTION
\INSERT_GENERATION_FUNCTION
\INSERT_BOUND_CHECKING
......@@ -538,6 +538,7 @@ void EvolutionaryAlgorithm::runEvolutionaryLoop(){
population->produceOffspringPopulation();
\INSERT_BOUND_CHECKING_FCT_CALL
TIME_ST(eval);
for( size_t i=0 ; i<this->population->actualOffspringPopulationSize ; i++ )
......
\TEMPLATE_START/**
This is program entry for multi-objective
CUDA template for EASEA
*/
\ANALYSE_PARAMETERS
using namespace std;
#include <iostream>
#include "EASEATools.hpp"
#include "EASEAIndividual.hpp"
#include <time.h>
RandomGenerator* globalRandomGenerator;
int main(int argc, char** argv){
parseArguments("EASEA.prm",argc,argv);
size_t parentPopulationSize = setVariable("popSize",\POP_SIZE);
size_t offspringPopulationSize = setVariable("nbOffspring",\OFF_SIZE);
float pCrossover = \XOVER_PROB;
float pMutation = \MUT_PROB;
float pMutationPerGene = 0.05;
time_t seed = setVariable("seed",time(0));
globalRandomGenerator = new RandomGenerator(seed);
std::cout << "Seed is : " << seed << std::endl;
SelectionOperator* selectionOperator = new \SELECTOR;
SelectionOperator* replacementOperator = new \RED_FINAL;
float selectionPressure = \SELECT_PRM;
float replacementPressure = \RED_FINAL_PRM;
string outputfile = setVariable("outputfile","");
string inputfile = setVariable("inputfile","");
EASEAInit(argc,argv);
EvolutionaryAlgorithm ea(parentPopulationSize,offspringPopulationSize,selectionPressure,replacementPressure,
selectionOperator,replacementOperator,pCrossover, pMutation, pMutationPerGene,outputfile,inputfile);
StoppingCriterion* sc = new GenerationalCriterion(&ea,setVariable("nbGen",\NB_GEN));
ea.addStoppingCriterion(sc);
Population* pop = ea.getPopulation();
ea.runEvolutionaryLoop();
EASEAFinal(pop);
delete pop;
delete sc;
delete selectionOperator;
delete replacementOperator;
delete globalRandomGenerator;
return 0;
}
\START_CUDA_GENOME_CU_TPL
#include "EASEAIndividual.hpp"
#include "EASEAUserClasses.hpp"
#include <string.h>
#include <fstream>
#include <sys/time.h>
#include "EASEATools.hpp"
#define CUDA_TPL
extern RandomGenerator* globalRandomGenerator;
\INSERT_USER_DECLARATIONS
struct gpuOptions initOpts;
\ANALYSE_USER_CLASSES
\INSERT_USER_FUNCTIONS
\INSERT_INITIALISATION_FUNCTION
\INSERT_FINALIZATION_FUNCTION
\INSERT_GENERATION_FUNCTION
\INSERT_BOUND_CHECKING
void EASEAFinal(Population* pop){
\INSERT_FINALIZATION_FCT_CALL
}
void EASEAInit(int argc, char** argv){
\INSERT_INIT_FCT_CALL
}
using namespace std;
RandomGenerator* Individual::rg;
Individual::Individual(){
\GENOME_CTOR
\INSERT_EO_INITIALISER
valid = false;
}
Individual::~Individual(){
\GENOME_DTOR
}
float Individual::evaluate(){
if(valid)
return fitness;
else{
valid = true;
\INSERT_EVALUATOR
}
}
/**
This function allows to acces to the Individual stored in cudaBuffer as a standard
individual.
@TODO This should be a macro, at this time it is a function for debuging purpose
*/
__device__ __host__ inline Individual* INDIVIDUAL_ACCESS(void* buffer,size_t id){
return ((Individual*)(((char*)buffer)+(\GENOME_SIZE+sizeof(void*))*id));
}
__device__ float cudaEvaluate(void* devBuffer, size_t id, struct gpuOptions initOpts, float f[2]){
\INSERT_CUDA_EVALUATOR
}
inline void Individual::copyToCudaBuffer(void* buffer, size_t id){
/* DEBUG_PRT("%p\n",(char*)this+sizeof(Individual*)); */
/* DEBUG_PRT("%p\n",&this->sigma); */
/* DEBUG_PRT("%lu\n",id); */
memcpy(((char*)buffer)+(\GENOME_SIZE+sizeof(Individual*))*id,((char*)this),\GENOME_SIZE+sizeof(Individual*));
}
Individual::Individual(const Individual& genome){
// ********************
// Problem specific part
\COPY_CTOR
// ********************
// Generic part
this->valid = genome.valid;
this->fitness = genome.fitness;
}
Individual* Individual::crossover(Individual** ps){
// ********************
// Generic part
Individual parent1(*this);
Individual parent2(*ps[0]);
Individual child1(*this);
//DEBUG_PRT("Xover");
/* cout << "p1 : " << parent1 << endl; */
/* cout << "p2 : " << parent2 << endl; */
// ********************
// Problem specific part
\INSERT_CROSSOVER
child1.valid = false;
/* cout << "child1 : " << child1 << endl; */
return new Individual(child1);
}
void Individual::printOn(std::ostream& os) const{
\INSERT_DISPLAY
}
std::ostream& operator << (std::ostream& O, const Individual& B)
{
// ********************
// Problem specific part
O << "\nIndividual : "<< std::endl;
O << "\t\t\t";
B.printOn(O);
if( B.valid ) O << "\t\t\tfitness : " << B.fitness;
else O << "fitness is not yet computed" << std::endl;
return O;
}
size_t Individual::mutate( float pMutationPerGene ){
this->valid=false;
// ********************
// Problem specific part
\INSERT_MUTATOR
}
size_t Individual::sizeOfGenome=\GENOME_SIZE;
/* ****************************************
EvolutionaryAlgorithm class
****************************************/
/**
@DEPRECATED This contructor will be deleted. It was for test only, because it
is too much constrained (default selection/replacement operator)
*/
EvolutionaryAlgorithm::EvolutionaryAlgorithm( size_t parentPopulationSize,
size_t offspringPopulationSize,
float selectionPressure, float replacementPressure,
float pCrossover, float pMutation,
float pMutationPerGene){
RandomGenerator* rg = globalRandomGenerator;
SelectionOperator* so = new MaxTournament(rg);
SelectionOperator* ro = new MaxTournament(rg);
Individual::initRandomGenerator(rg);
Population::initPopulation(so,ro,selectionPressure,replacementPressure);
this->population = new Population(parentPopulationSize,offspringPopulationSize,
pCrossover,pMutation,pMutationPerGene,rg);
this->currentGeneration = 0;
this->reduceParents = 0;
this->reduceOffsprings = 0;
}
EvolutionaryAlgorithm::EvolutionaryAlgorithm( size_t parentPopulationSize,
size_t offspringPopulationSize,
float selectionPressure, float replacementPressure,
SelectionOperator* selectionOperator, SelectionOperator* replacementOperator,
float pCrossover, float pMutation,
float pMutationPerGene, string& outputfile, string& inputfile){
RandomGenerator* rg = globalRandomGenerator;
SelectionOperator* so = selectionOperator;
SelectionOperator* ro = replacementOperator;
Individual::initRandomGenerator(rg);
Population::initPopulation(so,ro,selectionPressure,replacementPressure);
this->population = new Population(parentPopulationSize,offspringPopulationSize,
pCrossover,pMutation,pMutationPerGene,rg);
this->currentGeneration = 0;
this->reduceParents = 0;
this->reduceOffsprings = 0;
if( outputfile.length() )
this->outputfile = new string(outputfile);
else
this->outputfile = NULL;
if( inputfile.length() )
this->inputfile = new std::string(inputfile);
else
this->inputfile = NULL;
}
// do the repartition of data accross threads
__global__ void
cudaEvaluatePopulation(void* d_population, size_t popSize, float* d_fitnesses, struct gpuOptions initOpts){
size_t id = (blockDim.x*blockIdx.x)+threadIdx.x; // id of the individual computed by this thread
// escaping for the last block
if(blockIdx.x == (gridDim.x-1)) if( id >= popSize ) return;
//void* indiv = ((char*)d_population)+id*(\GENOME_SIZE+sizeof(Individual*)); // compute the offset of the current individual
cudaEvaluate(d_population,id,initOpts,d_fitnesses+2*id);
}
#define NB_MP 16
inline size_t
partieEntiereSup(float E){
int fl = floor(E);
if( fl == E )
return E;
else
return floor(E)+1;
}
inline int
puissanceDeuxSup(float n){
int tmp = 2;
while(tmp<n)tmp*=2;
return tmp;
}
bool
repartition(size_t popSize, size_t* nbBlock, size_t* nbThreadPB, size_t* nbThreadLB,
size_t nbMP, size_t maxBlockSize){
(*nbThreadLB) = 0;
DEBUG_PRT("repartition : %d",popSize);
if( ((float)popSize / (float)nbMP) <= maxBlockSize ){
//la population repartie sur les MP tient dans une bloc par MP
(*nbThreadPB) = partieEntiereSup( (float)popSize/(float)nbMP);
(*nbBlock) = popSize/(*nbThreadPB);
if( popSize%nbMP != 0 ){
//on fait MP-1 block de equivalent et un plus petit
(*nbThreadLB) = popSize - (*nbThreadPB)*(*nbBlock);
}
}
else{
//la population est trop grande pour etre repartie sur les MP
//directement
//(*nbBlock) = partieEntiereSup( (float)popSize/((float)maxBlockSize*NB_MP));
(*nbBlock) = puissanceDeuxSup( (float)popSize/((float)maxBlockSize*NB_MP));
(*nbBlock) *= NB_MP;
(*nbThreadPB) = popSize/(*nbBlock);
if( popSize%maxBlockSize!=0){
(*nbThreadLB) = popSize - (*nbThreadPB)*(*nbBlock);
// Le rest est trop grand pour etre place dans un seul block (c'est possible uniquement qd
// le nombre de block depasse maxBlockSize
while( (*nbThreadLB) > maxBlockSize ){
//on augmente le nombre de blocs principaux jusqu'a ce que nbthreadLB retombe en dessous de maxBlockSize
//(*nbBlock) += nbMP;
(*nbBlock) *= 2;
(*nbThreadPB) = popSize/(*nbBlock);
(*nbThreadLB) = popSize - (*nbThreadPB)*(*nbBlock);
}
}
}
if((((*nbBlock)*(*nbThreadPB) + (*nbThreadLB)) == popSize)
&& ((*nbThreadLB) <= maxBlockSize) && ((*nbThreadPB) <= maxBlockSize))
return true;
else
return false;
}
/**
Allocate buffer for populationSize individuals and fitnesses
compute the repartition
*/
void EvolutionaryAlgorithm::cudaPreliminaryProcess(size_t populationSize, dim3* dimBlock, dim3* dimGrid, void** allocatedDeviceBuffer,
float** deviceFitness){
size_t nbThreadPB, nbThreadLB, nbBlock;
cudaError_t lastError;
lastError = cudaMalloc(allocatedDeviceBuffer,populationSize*(\GENOME_SIZE+sizeof(Individual*)));
DEBUG_PRT("Population buffer allocation : %s",cudaGetErrorString(lastError));
lastError = cudaMalloc(((void**)deviceFitness),NB_OBJECTIVE*populationSize*sizeof(float));
DEBUG_PRT("Fitness buffer allocation : %s",cudaGetErrorString(lastError));
if( !repartition(populationSize, &nbBlock, &nbThreadPB, &nbThreadLB,30, 240))
exit( -1 );
DEBUG_PRT("repartition is \n\tnbBlock %lu \n\tnbThreadPB %lu \n\tnbThreadLD %lu",nbBlock,nbThreadPB,nbThreadLB);
if( nbThreadLB!=0 )
dimGrid->x = (nbBlock+1);
else
dimGrid->x = (nbBlock);
dimBlock->x = nbThreadPB;
cout << "Number of grid : " << dimGrid->x << endl;
cout << "Number of block : " << dimBlock->x << endl;
}
void EvolutionaryAlgorithm::cudaOffspringEvaluate(void* d_offspringPopulation, float* d_fitnesses, dim3 dimBlock, dim3 dimGrid){
cudaError_t lastError;
size_t actualPopulationSize = this->population->actualOffspringPopulationSize;
float* fitnesses = new float[actualPopulationSize*NB_OBJECTIVE];
lastError = cudaMemcpy(d_offspringPopulation,population->cudaOffspringBuffer,(\GENOME_SIZE+sizeof(Individual*))*actualPopulationSize,
cudaMemcpyHostToDevice);
DEBUG_PRT("Parent population buffer copy : %s",cudaGetErrorString(lastError));
cudaEvaluatePopulation<<< dimGrid, dimBlock>>>(d_offspringPopulation,actualPopulationSize,d_fitnesses,initOpts);
lastError = cudaGetLastError();
DEBUG_PRT("Kernel execution : %s",cudaGetErrorString(lastError));
lastError = cudaMemcpy(fitnesses,d_fitnesses,actualPopulationSize*NB_OBJECTIVE*sizeof(float),cudaMemcpyDeviceToHost);
DEBUG_PRT("Offspring's fitnesses gathering : %s",cudaGetErrorString(lastError));
/* #ifdef COMPARE_HOST_DEVICE */
/* population->evaluateOffspringPopulation(); */
/* #endif */
for( size_t i=0 ; i<actualPopulationSize ; i++ ){
/* #ifdef COMPARE_HOST_DEVICE */
/* float error = (population->offsprings[i]->getFitness()-fitnesses[i])/population->offsprings[i]->getFitness(); */
/* printf("Difference for individual %lu is : %f %f|%f\n",i,error, population->offsprings[i]->getFitness(),fitnesses[i]); */
/* if( error > 0.2 ) */
/* exit(-1); */
/* #else */
population->offsprings[i]->f1 = fitnesses[i*NB_OBJECTIVE];
population->offsprings[i]->f2 = fitnesses[i*NB_OBJECTIVE+1];
population->offsprings[i]->valid = true;
/* #endif */
}
}
/**
Evaluate parent population on the GPU. This is special because this evaluation occures
only one time. Buffers are allocated and freed here.
*/
void EvolutionaryAlgorithm::cudaParentEvaluate(){
float* fitnesses = new float[this->population->actualParentPopulationSize*NB_OBJECTIVE];
void* allocatedDeviceBuffer;
float* deviceFitness;
cudaError_t lastError;
dim3 dimBlock, dimGrid;
size_t actualPopulationSize = this->population->actualParentPopulationSize;
cudaPreliminaryProcess(actualPopulationSize,&dimBlock,&dimGrid,&allocatedDeviceBuffer,&deviceFitness);
//compute the repartition over MP and SP
lastError = cudaMemcpy(allocatedDeviceBuffer,this->population->cudaParentBuffer,(\GENOME_SIZE+sizeof(Individual*))*actualPopulationSize,
cudaMemcpyHostToDevice);
DEBUG_PRT("Parent population buffer copy : %s",cudaGetErrorString(lastError));
cudaEvaluatePopulation<<< dimGrid, dimBlock>>>(allocatedDeviceBuffer,actualPopulationSize,deviceFitness,initOpts);
lastError = cudaThreadSynchronize();
DEBUG_PRT("Kernel execution : %s",cudaGetErrorString(lastError));
lastError = cudaMemcpy(fitnesses,deviceFitness,actualPopulationSize*NB_OBJECTIVE*sizeof(float),cudaMemcpyDeviceToHost);
DEBUG_PRT("Parent's fitnesses gathering : %s",cudaGetErrorString(lastError));
cudaFree(deviceFitness);
cudaFree(allocatedDeviceBuffer);
/* #ifdef COMPARE_HOST_DEVICE */
/* population->evaluateParentPopulation(); */
/* #endif */
for( size_t i=0 ; i<actualPopulationSize ; i++ ){
/* #ifdef COMPARE_HOST_DEVICE */
/* float error = (population->parents[i]->getFitness()-fitnesses[i])/population->parents[i]->getFitness(); */
/* printf("Difference for individual %lu is : %f %f|%f\n",i,error, */
/* population->parents[i]->getFitness(), fitnesses[i]); */
/* if( error > 0.2 ) */
/* exit(-1); */
/* #else */
population->parents[i]->f1 = fitnesses[i*NB_OBJECTIVE];
population->parents[i]->f2 = fitnesses[i*NB_OBJECTIVE+1];
population->parents[i]->valid = true;
/* #endif */
}
}
void EvolutionaryAlgorithm::addStoppingCriterion(StoppingCriterion* sc){
this->stoppingCriteria.push_back(sc);
}
void EvolutionaryAlgorithm::runEvolutionaryLoop(){
std::vector<Individual*> tmpVect;
std::cout << "Parent's population initializing "<< std::endl;
this->population->initializeCudaParentPopulation();
cudaParentEvaluate();
population->evaluateMoPopulation();
std::cout << *population << std::endl;
DECLARE_TIME(eval);
struct timeval begin,accuEval;
gettimeofday(&begin,NULL);
accuEval.tv_sec = 0;
accuEval.tv_usec = 0;
void* d_offspringPopulation;
float* d_fitnesses;
dim3 dimBlock, dimGrid;
cudaPreliminaryProcess(this->population->offspringPopulationSize,&dimBlock,&dimGrid,&d_offspringPopulation,&d_fitnesses);
while( this->allCriteria() == false ){
population->produceOffspringPopulation();
\INSERT_BOUND_CHECKING_FCT_CALL
TIME_ST(eval);
for( size_t i=0 ; i<this->population->actualOffspringPopulationSize ; i++ )
this->population->offsprings[i]->copyToCudaBuffer(this->population->cudaOffspringBuffer,i);
cudaOffspringEvaluate(d_offspringPopulation,d_fitnesses,dimBlock,dimGrid);
TIME_END(eval);
population->evaluateMoPopulation();
COMPUTE_TIME(eval);
//SHOW_TIME(eval);
timeradd(&accuEval,&eval_res,&accuEval);
if(reduceParents)
population->reduceParentPopulation(reduceParents);