diff --git a/Easea.h b/Easea.h index 74591725473668c715daf2869d1742bd0d3ce330..08d66811b1faa452c8b59ca81a9e730d3d7ecf9e 100644 --- a/Easea.h +++ b/Easea.h @@ -29,7 +29,8 @@ Centre de Math #define YYTEXT_SIZE 10000 class CSymbol; -extern CSymbol *pCURRENT_CLASS, *pCURRENT_TYPE, *pGENOME; +extern CSymbol *pCURRENT_CLASS, *pCURRENT_TYPE, *pGENOME, *pCLASSES[128]; +extern int nClasses_nb; extern FILE *fpOutputFile, *fpTemplateFile, *fpGenomeFile, *fpExplodedGenomeFile; extern char sRAW_PROJECT_NAME[], sPROJECT_NAME[], sEO_DIR[], sEZ_PATH[], sEZ_FILE_NAME[]; diff --git a/EaseaLex.l b/EaseaLex.l index 5482ce3a34058a8edbcee4c3fa5763b5ce0dd795..a3001811320f8c4ebaee016c0f166bfd005590ce 100644 --- a/EaseaLex.l +++ b/EaseaLex.l @@ -14,6 +14,9 @@ Centre de Math #include #include "debug.h" + size_t genomeSize; + bool genomeSizeValidity=false; + // local functions char* selectorDetermination(int nMINIMISE, char* sSELECTOR){ @@ -161,8 +164,23 @@ exponent ([Ee][+-]?[0-9]+) CListItem *pSym; pGENOME->pSymbolList->reset(); while (pSym=pGENOME->pSymbolList->walkToNextItem()) - if ((pSym->Object->pType->ObjectType==oUserClass)&&(!pSym->Object->pType->bAlreadyPrinted)) + if ((pSym->Object->pType->ObjectType==oUserClass)&&(!pSym->Object->pType->bAlreadyPrinted)){ + DEBUG_PRT("%p",pSym->Object->pType); pSym->Object->pType->printClasses(fpOutputFile); + } + + + if( TARGET == CUDA ){ + DEBUG_PRT("User classes are :"); + for( int i = nClasses_nb-1 ; i>=0 ; i-- ){ + DEBUG_PRT(" %s, %p ,%d| ",pCLASSES[i]->sName,pCLASSES[i],pCLASSES[i]->bAlreadyPrinted); + if( !pCLASSES[i]->bAlreadyPrinted ){ + fprintf(fpOutputFile,"// User class not refereced by the Genome"); + pCLASSES[i]->printClasses(fpOutputFile); + } + } + DEBUG_PRT("\n"); + } } "\\GENOME_CTOR" { CListItem *pSym; @@ -180,24 +198,23 @@ exponent ([Ee][+-]?[0-9]+) } "\\GENOME_SIZE" { - - CListItem *pSym; - if (bVERBOSE) printf ("Inserting default genome size calculator.\n"); - pGENOME->pSymbolList->reset(); size_t size_of_genome=0; - while (pSym=pGENOME->pSymbolList->walkToNextItem()){ -/* if (pSym->Object->ObjectQualifier==1) continue; // 1=Static */ -/* if ((pSym->Object->ObjectType==oArray)&&(TARGET==DREAM)) */ -/* fprintf(fpOutputFile," %s = new %s[%d];\n",pSym->Object->sName,pSym->Object->pType->sName,pSym->Object->nSize/pSym->Object->pType->nSize); */ -/* if (pSym->Object->ObjectType==oPointer){ */ -/* if (TARGET==DREAM) fprintf(fpOutputFile," %s=null;\n",pSym->Object->sName); */ -/* else fprintf(fpOutputFile," %s=NULL;\n",pSym->Object->sName); */ -/* } */ - DEBUG_PRT("%s has size : %lu",pSym->Object->sName,pSym->Object->nSize); - size_of_genome+=pSym->Object->nSize; + if (bVERBOSE) printf ("Inserting default genome size.\n"); + if( !genomeSizeValidity ){ + if (bVERBOSE) printf ("\tComputing default genome size.\n"); + CListItem *pSym; + pGENOME->pSymbolList->reset(); + while (pSym=pGENOME->pSymbolList->walkToNextItem()){ + DEBUG_PRT("%s has size : %lu",pSym->Object->sName,pSym->Object->nSize); + size_of_genome+=pSym->Object->nSize; + } + DEBUG_PRT("Total genome size is %lu",size_of_genome); + genomeSizeValidity=true; + } + else{ + size_of_genome = genomeSize; } fprintf(fpOutputFile,"%d",size_of_genome); - DEBUG_PRT("Total genome size is %lu",size_of_genome); } "\\COPY_CUDA_BUFFER" { @@ -1088,6 +1105,13 @@ exponent ([Ee][+-]?[0-9]+) "MINIMIZE"/[ \t\n]*"==" {if (bDoubleQuotes) fprintf(fpOutputFile,"MINIMIZE"); else fprintf(fpOutputFile,"EZ_MINIMIZE");} // local genome name +"__device__" | +"__host__" { + if( TARGET==CUDA ){ + fprintf(fpOutputFile,"%s",yytext); + } + } + "currentGeneration"[ \t\n]*"=" {fprintf(stderr,"\n%s - Error line %d: The current generation number cannot be changed (not an l-value).\n",sEZ_FILE_NAME,yylineno); exit(1);} "NB_GEN"[ \t\n]*"=" {fprintf(stderr,"\n%s - Error line %d: The number of generations can only be changed within the generation function.\n",sEZ_FILE_NAME,yylineno); exit (1);} "POP_SIZE"[ \t\n]*"=" {fprintf(stderr,"\n%s - Error line %d: The size of the population can only be changed within the generation function.\n",sEZ_FILE_NAME,yylineno); exit (1);} @@ -1224,6 +1248,8 @@ exponent ([Ee][+-]?[0-9]+) //**************************************** "\\GenomeClass::display"[ \t\n]*":" { + /* DEBUG_PRT("Display function is at %d line in %s.ez",yylineno,sRAW_PROJECT_NAME); */ + /* fprintf(fpOutputFile,"\n#line %d \"%s.ez\"\n",yylineno,sRAW_PROJECT_NAME); */ bDisplayFunction=bWithinDisplayFunction=1; BEGIN COPY_USER_FUNCTION; } @@ -1595,7 +1621,9 @@ exponent ([Ee][+-]?[0-9]+) yyin = fpTemplateFile; BEGIN TEMPLATE_ANALYSIS; bWithinInitialiser=bWithinXover=bWithinMutator=bWithinEvaluator=0; - if (bWithinDisplayFunction) bWithinDisplayFunction=0; // display function + if (bWithinDisplayFunction){ + bWithinDisplayFunction=0; // display function + } else return END_OF_FUNCTION;} // Back to the template file .|\n {putc(yytext[0],fpOutputFile);} diff --git a/EaseaParse.y b/EaseaParse.y index 2efee4075f8cab37b5b0e80e1ce5b8150576dd23..ffb8b352f83a23b50c2f18b330b675ff022bfdae 100644 --- a/EaseaParse.y +++ b/EaseaParse.y @@ -16,7 +16,9 @@ Centre de Math CSymbol *pCURRENT_CLASS; CSymbol *pCURRENT_TYPE; CSymbol *pGENOME; +CSymbol* pCLASSES[128]; char sRAW_PROJECT_NAME[1000]; +int nClasses_nb = 0; char sPROJECT_NAME[1000]; char sLOWER_CASE_PROJECT_NAME[1000]; char sEZ_FILE_NAME[1000]; @@ -214,15 +216,15 @@ GenomeAnalysis ClassDeclarationsSection : CLASSES { - if (bVERBOSE) printf("Declaration of user classes :\n\n");} + if (bVERBOSE) printf("Declaration of user classes :\n\n");} ClassDeclarations | CLASSES { if (bVERBOSE) printf("No user class declaration found other than GenomeClass.\n");} ; ClassDeclarations - : ClassDeclaration - | ClassDeclarations ClassDeclaration +: ClassDeclaration +| ClassDeclarations ClassDeclaration ; ClassDeclaration @@ -230,9 +232,12 @@ ClassDeclaration pCURRENT_CLASS=SymbolTable.insert($1); pCURRENT_CLASS->pSymbolList=new CLList(); $1->ObjectType=oUserClass; + DEBUG_PRT("Yacc Symbol declaration %s %d",$1->sName,$1->nSize); + pCLASSES[nClasses_nb++] = $1; } '{' VariablesDeclarations '}' { if (bVERBOSE) printf("Class %s declared for %d bytes.\n\n",$1->sName,$1->nSize); + DEBUG_PRT("Yacc variable declaration %s %d",$1->sName,$1->nSize); } ; @@ -378,6 +383,7 @@ BaseConstructorParameter GenomeDeclarationSection : GENOME { + DEBUG_PRT("Yacc genome decl %s",$1.pSymbol->sName); if (bVERBOSE) printf ("\nGenome declaration analysis :\n\n"); pGENOME=new CSymbol("Genome"); pCURRENT_CLASS=SymbolTable.insert(pGENOME); @@ -386,7 +392,7 @@ GenomeDeclarationSection pGENOME->ObjectQualifier=0; pGENOME->sString=NULL; } - '{' VariablesDeclarations '}' {} +'{' VariablesDeclarations '}' {} ; //GenomeMethodsDeclaration diff --git a/EaseaSym.cpp b/EaseaSym.cpp index ce9eca3be60899e18d6d3a93c0d5b00daeb5d69d..9d4cdf225a358ba65211cdad5a53b1d6a3751771 100644 --- a/EaseaSym.cpp +++ b/EaseaSym.cpp @@ -92,6 +92,54 @@ void CSymbol::print(FILE *fp){ fprintf(fp," %s %s[%d];\n",pSym->Object->pType->sName,pSym->Object->sName,pSym->Object->nSize/pSym->Object->pType->nSize); } + + if( TARGET==CUDA ){ // here we we are generating function to copy objects from host memory to gpu's. + bool isFlatClass = true; + pSymbolList->reset(); + while (pSym=pSymbolList->walkToNextItem()){ + DEBUG_PRT("analyse flat %s",pSym->Object->pType->sName); + if( (pSym->Object->ObjectType == oPointer) ){ //|| (pSym->Object->pType->ObjectType == oObject) ){ + isFlatClass = false; + break; + } + } + + + DEBUG_PRT("Does %s flat class : %s",sName,(isFlatClass?"yes":"no")); + pSymbolList->reset(); + fprintf(fp," %s* cudaSendToGpu%s(){\n",sName,sName); + fprintf(fp," %s* ret=NULL;\n",sName); + if( isFlatClass ){ + fprintf(fp," cudaMalloc((void**)&ret,sizeof(%s));\n",sName); + fprintf(fp," cudaMemcpy(ret,this,sizeof(%s),cudaMemcpyHostToDevice);\n",sName); + fprintf(fp," return ret;\n"); + } + else{ + fprintf(fp," %s tmp;\n",sName); + fprintf(fp," memcpy(&tmp,this,sizeof(%s));\n",sName); + while (pSym=pSymbolList->walkToNextItem()){ + if( (pSym->Object->ObjectType == oPointer) ){ //|| (pSym->Object->pType->ObjectType == oObject) ){ + fprintf(fp," tmp.%s=this->%s->cudaSendToGpu%s();\n",pSym->Object->sName,pSym->Object->sName,pSym->Object->pType->sName); + } + } + fprintf(fp," cudaMalloc((void**)&ret,sizeof(%s));\n",sName); + fprintf(fp," cudaMemcpy(ret,&tmp,sizeof(%s),cudaMemcpyHostToDevice);\n",sName); + fprintf(fp," return ret;\n"); + } + fprintf(fp," }\n\n"); + + + fprintf(fp," void cudaGetFromGpu%s(%s* dev_ptr){\n",sName,sName); + fprintf(fp," %s* ret=NULL;\n",sName); + if( isFlatClass ){ + fprintf(fp," ret = (%s*)malloc(sizeof(%s));\n",sName,sName); + fprintf(fp," cudaMemcpy(ret,dev_ptr,sizeof(%s),cudaMemcpyDeviceToHost);\n",sName); + while (pSym=pSymbolList->walkToNextItem()) + fprintf(fp," this->%s=ret->%s;\n",pSym->Object->sName,pSym->Object->sName); + fprintf(fp," }\n\n"); + } + } + fprintf(fp," %s(){ // Constructor\n",sName); // constructor pSymbolList->reset(); // in which we initialise all pointers to NULL while (pSym=pSymbolList->walkToNextItem()) @@ -342,7 +390,7 @@ void CSymbol::printUserClasses(FILE *fp){ if (bAlreadyPrinted) return; bAlreadyPrinted=true; while (pSym=pSymbolList->walkToNextItem()){ - if (pSym->Object->pType->ObjectType==oUserClass) + if ((pSym->Object->pType->ObjectType==oUserClass)) pSym->Object->pType->printUC(fp); } } @@ -377,7 +425,7 @@ void CSymbol::printAllSymbols(FILE *fp, char *sCompleteName, EObjectType FatherT strcat(sNewCompleteName,"["); sprintf(s,"%d",pSym->Object->nSize/pSym->Object->pType->nSize); strcat(sNewCompleteName,s); - strcat(sNewCompleteName,"]"); + strcat(sNewCompleteName,"]"); } fprintf(fp,"%s\n",sNewCompleteName); strcpy(sNewCompleteName, sCompleteName); diff --git a/exemples/easea_stable_tests/weierstrass_original/Generic_T_ad.ez b/exemples/easea_stable_tests/weierstrass_original/Generic_T_ad.ez index d82d48f569e36b80e54306b21ee6e13af73d2682..7cce6996f00b2c452456ad1ea2d88c2372690028 100644 --- a/exemples/easea_stable_tests/weierstrass_original/Generic_T_ad.ez +++ b/exemples/easea_stable_tests/weierstrass_original/Generic_T_ad.ez @@ -31,6 +31,11 @@ float Rosenbrock(float *, int); float Schwefel(float *, int); float Weierstrass(float *, int); +EvalCounter* d_counter; +struct gpuOptions{ + EvalCounter* counter; +}; + \end \User functions: @@ -165,38 +170,35 @@ float gauss() } \end \Initialisation function: -/* if(argc>1 ){ */ -/* if((!(strcmp(argv[1],"Sphere")))) Fitness = Sphere; */ -/* if((!(strcmp(argv[1],"AckleyPath")))) Fitness = AckleyPath; */ -/* if((!(strcmp(argv[1],"Easom")))) Fitness = Easom; */ -/* if((!(strcmp(argv[1],"Griewangk")))) Fitness = Griewangk; */ -/* if((!(strcmp(argv[1],"Rastrigin")))) Fitness = Rastrigin; */ -/* if((!(strcmp(argv[1],"Rosenbrock")))) Fitness = Rosenbrock; */ -/* if((!(strcmp(argv[1],"Schwefel")))) Fitness = Schwefel; */ -/* if((!(strcmp(argv[1],"Weierstrass")))) Fitness = Weierstrass; */ - -/* if(argc>2) n = atoi(argv[2]); */ -/* } */ -/* else */ -/* Fitness = Weierstrass; */ - -/* n=MIN(n, SIZE); */ std::cout<<"************* n: "<a+=2; return Score; \end diff --git a/tpl/CUDA.tpl b/tpl/CUDA.tpl index d9db4691618e0f230e1aa5b6f441d9d3e2dfac18..38e033605296fb7569f509538e191002daf3bd57 100644 --- a/tpl/CUDA.tpl +++ b/tpl/CUDA.tpl @@ -73,6 +73,8 @@ int main(int argc, char** argv){ extern RandomGenerator* globalRandomGenerator; \INSERT_USER_DECLARATIONS +struct gpuOptions initOpts; + \ANALYSE_USER_CLASSES \INSERT_USER_FUNCTIONS @@ -81,6 +83,9 @@ extern RandomGenerator* globalRandomGenerator; \INSERT_FINALIZATION_FUNCTION \INSERT_GENERATION_FUNCTION + + + void EASEAFinal(Population* pop){ \INSERT_FINALIZATION_FCT_CALL } @@ -119,24 +124,25 @@ float Individual::evaluate(){ /** This function allows to acces to the Individual stored in cudaBuffer as a standard individual. + @TODO This should be a macro, at this time it is a function for debuging purpose */ __device__ __host__ inline Individual* INDIVIDUAL_ACCESS(void* buffer,size_t id){ return ((Individual*)(((char*)buffer)+(\GENOME_SIZE+sizeof(void*))*id)); } -__device__ float cudaEvaluate(void* devBuffer, size_t id){ +__device__ float cudaEvaluate(void* devBuffer, size_t id, struct gpuOptions initOpts){ \INSERT_CUDA_EVALUATOR } -void Individual::copyToCudaBuffer(void* buffer, size_t id){ +inline void Individual::copyToCudaBuffer(void* buffer, size_t id){ - DEBUG_PRT("%p\n",(char*)this+sizeof(Individual*)); - DEBUG_PRT("%p\n",&this->sigma); - DEBUG_PRT("%lu\n",id); +/* DEBUG_PRT("%p\n",(char*)this+sizeof(Individual*)); */ +/* DEBUG_PRT("%p\n",&this->sigma); */ +/* DEBUG_PRT("%lu\n",id); */ memcpy(((char*)buffer)+(\GENOME_SIZE+sizeof(Individual*))*id,((char*)this),\GENOME_SIZE+sizeof(Individual*)); @@ -276,16 +282,29 @@ EvolutionaryAlgorithm::EvolutionaryAlgorithm( size_t parentPopulationSize, // do the repartition of data accross threads __global__ void -cudaEvaluatePopulation(void* d_population, size_t popSize, float* d_fitnesses){ +cudaEvaluatePopulation(void* d_population, size_t popSize, float* d_fitnesses, struct gpuOptions initOpts){ size_t id = (blockDim.x*blockIdx.x)+threadIdx.x; // id of the individual computed by this thread // escaping for the last block if(blockIdx.x == (gridDim.x-1)) if( id >= popSize ) return; - void* indiv = ((char*)d_population)+id*(\GENOME_SIZE+sizeof(Individual*)); // compute the offset of the current individual + //void* indiv = ((char*)d_population)+id*(\GENOME_SIZE+sizeof(Individual*)); // compute the offset of the current individual - d_fitnesses[id] = cudaEvaluate(indiv,id); + d_fitnesses[id] = cudaEvaluate(d_population,id,initOpts); + + +/* if( blockIdx.x == 0){ */ +/* for( size_t i = 0 ; ix[j]); */ +/* printf("\n\t"); */ +/* for( size_t j=0 ; j<10 ; j++ ) */ +/* printf("%f | ",INDIVIDUAL_ACCESS(d_population,i)->sigma[j]); */ +/* printf("\n"); */ +/* } */ +/* } */ } @@ -315,6 +334,8 @@ repartition(size_t popSize, size_t* nbBlock, size_t* nbThreadPB, size_t* nbThrea (*nbThreadLB) = 0; + DEBUG_PRT("repartition : %d",popSize); + if( ((float)popSize / (float)nbMP) <= maxBlockSize ){ //la population repartie sur les MP tient dans une bloc par MP (*nbThreadPB) = partieEntiereSup( (float)popSize/(float)nbMP); @@ -354,36 +375,119 @@ repartition(size_t popSize, size_t* nbBlock, size_t* nbThreadPB, size_t* nbThrea } +/** + Allocate buffer for populationSize individuals and fitnesses + compute the repartition + */ +void EvolutionaryAlgorithm::cudaPreliminaryProcess(size_t populationSize, dim3* dimBlock, dim3* dimGrid, void** allocatedDeviceBuffer, + float** deviceFitness){ + + size_t nbThreadPB, nbThreadLB, nbBlock; + cudaError_t lastError; + + lastError = cudaMalloc(allocatedDeviceBuffer,populationSize*(\GENOME_SIZE+sizeof(Individual*))); + DEBUG_PRT("Population buffer allocation : %s",cudaGetErrorString(lastError)); + lastError = cudaMalloc(((void**)deviceFitness),populationSize*sizeof(float)); + DEBUG_PRT("Fitness buffer allocation : %s",cudaGetErrorString(lastError)); + + if( !repartition(populationSize, &nbBlock, &nbThreadPB, &nbThreadLB,16, 192)) + exit( -1 ); + + DEBUG_PRT("repartition is \n\tnbBlock %lu \n\tnbThreadPB %lu \n\tnbThreadLD %lu",nbBlock,nbThreadPB,nbThreadLB); + + if( nbThreadLB!=0 ) + dimGrid->x = (nbBlock+1); + else + dimGrid->x = (nbBlock); + + dimBlock->x = nbThreadPB; + + +#ifdef DEBUG + cout << "Number of grid : " << dimGrid->x << endl; + cout << "Number of block : " << dimBlock->x << endl; +#endif + +} + -void EvolutionaryAlgorithm::cudaEvaluate(void* buffer, size_t actualPopulationSize){ +void EvolutionaryAlgorithm::cudaOffspringEvaluate(void* d_offspringPopulation, float* d_fitnesses, dim3 dimBlock, dim3 dimGrid){ + cudaError_t lastError; + size_t actualPopulationSize = this->population->actualOffspringPopulationSize; float* fitnesses = new float[actualPopulationSize]; + + + lastError = cudaMemcpy(d_offspringPopulation,population->cudaOffspringBuffer,(\GENOME_SIZE+sizeof(Individual*))*actualPopulationSize, + cudaMemcpyHostToDevice); + DEBUG_PRT("Parent population buffer copy : %s",cudaGetErrorString(lastError)); + + cudaEvaluatePopulation<<< dimGrid, dimBlock>>>(d_offspringPopulation,actualPopulationSize,d_fitnesses,initOpts); + lastError = cudaGetLastError(); + DEBUG_PRT("Kernel execution : %s",cudaGetErrorString(lastError)); + + lastError = cudaMemcpy(fitnesses,d_fitnesses,actualPopulationSize*sizeof(float),cudaMemcpyDeviceToHost); + DEBUG_PRT("Offspring's fitnesses gathering : %s",cudaGetErrorString(lastError)); + + +#ifdef COMPARE_HOST_DEVICE + population->evaluateOffspringPopulation(); +#endif + + for( size_t i=0 ; ioffsprings[i]->getFitness()-fitnesses[i])/population->offsprings[i]->getFitness()); +#else + printf("%lu : %f\n",i,fitnesses[i]); + population->offsprings[i]->fitness = fitnesses[i]; + population->offsprings[i]->valid = true; +#endif + } + +} + +/** + Evaluate parent population on the GPU. This is special because this evaluation occures + only one time. Buffers are allocated and freed here. + */ +void EvolutionaryAlgorithm::cudaParentEvaluate(){ + float* fitnesses = new float[this->population->actualParentPopulationSize]; void* allocatedDeviceBuffer; float* deviceFitness; cudaError_t lastError; - lastError = cudaMalloc(&allocatedDeviceBuffer,actualPopulationSize*(\GENOME_SIZE+sizeof(Individual*))); - DEBUG_PRT("%s",cudaGetErrorString(lastError)); - lastError = cudaMalloc(((void**)&deviceFitness),actualPopulationSize*sizeof(float)); - DEBUG_PRT("%s",cudaGetErrorString(lastError)); - - size_t nbBlock,nbThreadLB,nbThreadPB; + dim3 dimBlock, dimGrid; + size_t actualPopulationSize = this->population->actualParentPopulationSize; + + cudaPreliminaryProcess(actualPopulationSize,&dimBlock,&dimGrid,&allocatedDeviceBuffer,&deviceFitness); //compute the repartition over MP and SP - repartition(actualPopulationSize, &nbBlock, &nbThreadPB, &nbThreadLB,16, 192); - dim3 dimBlock(nbThreadPB); - - - dim3 dimGrid; - if( nbThreadLB ) - dimGrid.x = (nbBlock+1); - else - dimGrid.x = (nbBlock); + lastError = cudaMemcpy(allocatedDeviceBuffer,this->population->cudaParentBuffer,(\GENOME_SIZE+sizeof(Individual*))*actualPopulationSize, + cudaMemcpyHostToDevice); + DEBUG_PRT("Parent population buffer copy : %s",cudaGetErrorString(lastError)); - cudaEvaluatePopulation<<< dimGrid, dimBlock>>>(allocatedDeviceBuffer,actualPopulationSize,deviceFitness); + cudaEvaluatePopulation<<< dimGrid, dimBlock>>>(allocatedDeviceBuffer,actualPopulationSize,deviceFitness,initOpts); lastError = cudaGetLastError(); - DEBUG_PRT("%s",cudaGetErrorString(lastError)); - - + DEBUG_PRT("Kernel execution : %s",cudaGetErrorString(lastError)); + + lastError = cudaMemcpy(fitnesses,deviceFitness,actualPopulationSize*sizeof(float),cudaMemcpyDeviceToHost); + DEBUG_PRT("Parent's fitnesses gathering : %s",cudaGetErrorString(lastError)); + + cudaFree(deviceFitness); + cudaFree(allocatedDeviceBuffer); + +#ifdef COMPARE_HOST_DEVICE + population->evaluateParentPopulation(); +#endif + + for( size_t i=0 ; iparents[i]->getFitness()-fitnesses[i])/population->parents[i]->getFitness()); +#else + printf("%lu : %f\n",i,fitnesses[i]); + population->parents[i]->fitness = fitnesses[i]; + population->parents[i]->valid = true; +#endif + } } void EvolutionaryAlgorithm::addStoppingCriterion(StoppingCriterion* sc){ @@ -406,8 +510,8 @@ void EvolutionaryAlgorithm::runEvolutionaryLoop(){ /* } */ std::cout << "Parent's population initializing "<< std::endl; - this->population->initializeCudaParentPopulation(); - cudaEvaluate(population->cudaParentBuffer,population->actualParentPopulationSize); + this->population->initializeCudaParentPopulation(); + cudaParentEvaluate(); std::cout << *population << std::endl; @@ -421,13 +525,22 @@ void EvolutionaryAlgorithm::runEvolutionaryLoop(){ accuEval.tv_usec = 0; + void* d_offspringPopulation; + float* d_fitnesses; + dim3 dimBlock, dimGrid; + + cudaPreliminaryProcess(this->population->offspringPopulationSize,&dimBlock,&dimGrid,&d_offspringPopulation,&d_fitnesses); while( this->allCriteria() == false ){ population->produceOffspringPopulation(); + TIME_ST(eval); - population->evaluateOffspringPopulation(); + for( size_t i=0 ; ipopulation->actualOffspringPopulationSize ; i++ ) + this->population->offsprings[i]->copyToCudaBuffer(this->population->cudaOffspringBuffer,i); + + cudaOffspringEvaluate(d_offspringPopulation,d_fitnesses,dimBlock,dimGrid); TIME_END(eval); SHOW_TIME(eval); @@ -462,6 +575,8 @@ void EvolutionaryAlgorithm::runEvolutionaryLoop(){ /* } */ + cudaFree(d_offspringPopulation); + cudaFree(d_fitnesses); } @@ -528,6 +643,7 @@ using namespace std; #define __INDIVIDUAL #include "EASEATools.hpp" #include +#include /* #include */ /* #include */ @@ -598,7 +714,11 @@ public: bool allCriteria(); Population* getPopulation(){ return population;} size_t getCurrentGeneration() { return currentGeneration;} - void cudaEvaluate(void* buffer, size_t actualPopulationSize); + void cudaParentEvaluate(); + void cudaOffspringEvaluate(void* d_offspringPopulation, float* fitnesses, dim3 dimBlock, dim3 dimGrid); + void cudaPreliminaryProcess(size_t populationSize, dim3* dimBlock, dim3* dimGrid, void** allocatedDeviceBuffer, + float** deviceFitness); + public: size_t currentGeneration; @@ -1109,12 +1229,12 @@ void Population::produceOffspringPopulation(){ selectionOperator->initialize(parents,selectionPressure,actualParentPopulationSize); for( size_t i=0 ; iselectNext(offspringPopulationSize); + size_t index = selectionOperator->selectNext(parentPopulationSize); p1 = parents[index]; if( rg->tossCoin(pCrossover) ){ for( size_t j=0 ; jselectNext(offspringPopulationSize); + index = selectionOperator->selectNext(parentPopulationSize); ps[j] = parents[index]; } child = p1->crossover(ps); @@ -1728,6 +1848,7 @@ LDFLAGS=-lboost_program_options -lboost_serialization \INSERT_MAKEFILE_OPTION#END OF USER MAKEFILE OPTIONS CPPFLAGS+=-DDEBUG +NVFLAGS+=-DCOMPARE_HOST_DEVICE --device-emulation EASEA_SRC= EASEATools.cpp EASEAIndividual.cpp EASEA_MAIN_HDR= EASEA.cpp