Commit 069c0d6a authored by maitre's avatar maitre

CUDA evaluation, copy to/from gpu

parent 1cde3ed4
......@@ -29,7 +29,8 @@ Centre de Math
#define YYTEXT_SIZE 10000
class CSymbol;
extern CSymbol *pCURRENT_CLASS, *pCURRENT_TYPE, *pGENOME;
extern CSymbol *pCURRENT_CLASS, *pCURRENT_TYPE, *pGENOME, *pCLASSES[128];
extern int nClasses_nb;
extern FILE *fpOutputFile, *fpTemplateFile, *fpGenomeFile, *fpExplodedGenomeFile;
extern char sRAW_PROJECT_NAME[], sPROJECT_NAME[], sEO_DIR[], sEZ_PATH[], sEZ_FILE_NAME[];
......
......@@ -14,6 +14,9 @@ Centre de Math
#include <unistd.h>
#include "debug.h"
size_t genomeSize;
bool genomeSizeValidity=false;
// local functions
char* selectorDetermination(int nMINIMISE, char* sSELECTOR){
......@@ -161,9 +164,24 @@ exponent ([Ee][+-]?[0-9]+)
CListItem<CSymbol*> *pSym;
pGENOME->pSymbolList->reset();
while (pSym=pGENOME->pSymbolList->walkToNextItem())
if ((pSym->Object->pType->ObjectType==oUserClass)&&(!pSym->Object->pType->bAlreadyPrinted))
if ((pSym->Object->pType->ObjectType==oUserClass)&&(!pSym->Object->pType->bAlreadyPrinted)){
DEBUG_PRT("%p",pSym->Object->pType);
pSym->Object->pType->printClasses(fpOutputFile);
}
if( TARGET == CUDA ){
DEBUG_PRT("User classes are :");
for( int i = nClasses_nb-1 ; i>=0 ; i-- ){
DEBUG_PRT(" %s, %p ,%d| ",pCLASSES[i]->sName,pCLASSES[i],pCLASSES[i]->bAlreadyPrinted);
if( !pCLASSES[i]->bAlreadyPrinted ){
fprintf(fpOutputFile,"// User class not refereced by the Genome");
pCLASSES[i]->printClasses(fpOutputFile);
}
}
DEBUG_PRT("\n");
}
}
<TEMPLATE_ANALYSIS>"\\GENOME_CTOR" {
CListItem<CSymbol*> *pSym;
if (bVERBOSE) printf ("Inserting default genome constructor.\n");
......@@ -180,24 +198,23 @@ exponent ([Ee][+-]?[0-9]+)
}
<TEMPLATE_ANALYSIS>"\\GENOME_SIZE" {
size_t size_of_genome=0;
if (bVERBOSE) printf ("Inserting default genome size.\n");
if( !genomeSizeValidity ){
if (bVERBOSE) printf ("\tComputing default genome size.\n");
CListItem<CSymbol*> *pSym;
if (bVERBOSE) printf ("Inserting default genome size calculator.\n");
pGENOME->pSymbolList->reset();
size_t size_of_genome=0;
while (pSym=pGENOME->pSymbolList->walkToNextItem()){
/* if (pSym->Object->ObjectQualifier==1) continue; // 1=Static */
/* if ((pSym->Object->ObjectType==oArray)&&(TARGET==DREAM)) */
/* fprintf(fpOutputFile," %s = new %s[%d];\n",pSym->Object->sName,pSym->Object->pType->sName,pSym->Object->nSize/pSym->Object->pType->nSize); */
/* if (pSym->Object->ObjectType==oPointer){ */
/* if (TARGET==DREAM) fprintf(fpOutputFile," %s=null;\n",pSym->Object->sName); */
/* else fprintf(fpOutputFile," %s=NULL;\n",pSym->Object->sName); */
/* } */
DEBUG_PRT("%s has size : %lu",pSym->Object->sName,pSym->Object->nSize);
size_of_genome+=pSym->Object->nSize;
}
fprintf(fpOutputFile,"%d",size_of_genome);
DEBUG_PRT("Total genome size is %lu",size_of_genome);
genomeSizeValidity=true;
}
else{
size_of_genome = genomeSize;
}
fprintf(fpOutputFile,"%d",size_of_genome);
}
<TEMPLATE_ANALYSIS>"\\COPY_CUDA_BUFFER" {
......@@ -1088,6 +1105,13 @@ exponent ([Ee][+-]?[0-9]+)
<COPY>"MINIMIZE"/[ \t\n]*"==" {if (bDoubleQuotes) fprintf(fpOutputFile,"MINIMIZE");
else fprintf(fpOutputFile,"EZ_MINIMIZE");} // local genome name
<COPY>"__device__" |
<COPY>"__host__" {
if( TARGET==CUDA ){
fprintf(fpOutputFile,"%s",yytext);
}
}
<COPY>"currentGeneration"[ \t\n]*"=" {fprintf(stderr,"\n%s - Error line %d: The current generation number cannot be changed (not an l-value).\n",sEZ_FILE_NAME,yylineno); exit(1);}
<COPY>"NB_GEN"[ \t\n]*"=" {fprintf(stderr,"\n%s - Error line %d: The number of generations can only be changed within the generation function.\n",sEZ_FILE_NAME,yylineno); exit (1);}
<COPY>"POP_SIZE"[ \t\n]*"=" {fprintf(stderr,"\n%s - Error line %d: The size of the population can only be changed within the generation function.\n",sEZ_FILE_NAME,yylineno); exit (1);}
......@@ -1224,6 +1248,8 @@ exponent ([Ee][+-]?[0-9]+)
//****************************************
<COPY_DISPLAY>"\\GenomeClass::display"[ \t\n]*":" {
/* DEBUG_PRT("Display function is at %d line in %s.ez",yylineno,sRAW_PROJECT_NAME); */
/* fprintf(fpOutputFile,"\n#line %d \"%s.ez\"\n",yylineno,sRAW_PROJECT_NAME); */
bDisplayFunction=bWithinDisplayFunction=1;
BEGIN COPY_USER_FUNCTION;
}
......@@ -1595,7 +1621,9 @@ exponent ([Ee][+-]?[0-9]+)
yyin = fpTemplateFile;
BEGIN TEMPLATE_ANALYSIS;
bWithinInitialiser=bWithinXover=bWithinMutator=bWithinEvaluator=0;
if (bWithinDisplayFunction) bWithinDisplayFunction=0; // display function
if (bWithinDisplayFunction){
bWithinDisplayFunction=0; // display function
}
else return END_OF_FUNCTION;} // Back to the template file
<COPY_USER_FUNCTION>.|\n {putc(yytext[0],fpOutputFile);}
......
......@@ -16,7 +16,9 @@ Centre de Math
CSymbol *pCURRENT_CLASS;
CSymbol *pCURRENT_TYPE;
CSymbol *pGENOME;
CSymbol* pCLASSES[128];
char sRAW_PROJECT_NAME[1000];
int nClasses_nb = 0;
char sPROJECT_NAME[1000];
char sLOWER_CASE_PROJECT_NAME[1000];
char sEZ_FILE_NAME[1000];
......@@ -221,8 +223,8 @@ ClassDeclarationsSection
;
ClassDeclarations
: ClassDeclaration
| ClassDeclarations ClassDeclaration
: ClassDeclaration
| ClassDeclarations ClassDeclaration
;
ClassDeclaration
......@@ -230,9 +232,12 @@ ClassDeclaration
pCURRENT_CLASS=SymbolTable.insert($1);
pCURRENT_CLASS->pSymbolList=new CLList<CSymbol *>();
$1->ObjectType=oUserClass;
DEBUG_PRT("Yacc Symbol declaration %s %d",$1->sName,$1->nSize);
pCLASSES[nClasses_nb++] = $1;
}
'{' VariablesDeclarations '}' {
if (bVERBOSE) printf("Class %s declared for %d bytes.\n\n",$1->sName,$1->nSize);
DEBUG_PRT("Yacc variable declaration %s %d",$1->sName,$1->nSize);
}
;
......@@ -378,6 +383,7 @@ BaseConstructorParameter
GenomeDeclarationSection
: GENOME {
DEBUG_PRT("Yacc genome decl %s",$1.pSymbol->sName);
if (bVERBOSE) printf ("\nGenome declaration analysis :\n\n");
pGENOME=new CSymbol("Genome");
pCURRENT_CLASS=SymbolTable.insert(pGENOME);
......@@ -386,7 +392,7 @@ GenomeDeclarationSection
pGENOME->ObjectQualifier=0;
pGENOME->sString=NULL;
}
'{' VariablesDeclarations '}' {}
'{' VariablesDeclarations '}' {}
;
//GenomeMethodsDeclaration
......
......@@ -92,6 +92,54 @@ void CSymbol::print(FILE *fp){
fprintf(fp," %s %s[%d];\n",pSym->Object->pType->sName,pSym->Object->sName,pSym->Object->nSize/pSym->Object->pType->nSize);
}
if( TARGET==CUDA ){ // here we we are generating function to copy objects from host memory to gpu's.
bool isFlatClass = true;
pSymbolList->reset();
while (pSym=pSymbolList->walkToNextItem()){
DEBUG_PRT("analyse flat %s",pSym->Object->pType->sName);
if( (pSym->Object->ObjectType == oPointer) ){ //|| (pSym->Object->pType->ObjectType == oObject) ){
isFlatClass = false;
break;
}
}
DEBUG_PRT("Does %s flat class : %s",sName,(isFlatClass?"yes":"no"));
pSymbolList->reset();
fprintf(fp," %s* cudaSendToGpu%s(){\n",sName,sName);
fprintf(fp," %s* ret=NULL;\n",sName);
if( isFlatClass ){
fprintf(fp," cudaMalloc((void**)&ret,sizeof(%s));\n",sName);
fprintf(fp," cudaMemcpy(ret,this,sizeof(%s),cudaMemcpyHostToDevice);\n",sName);
fprintf(fp," return ret;\n");
}
else{
fprintf(fp," %s tmp;\n",sName);
fprintf(fp," memcpy(&tmp,this,sizeof(%s));\n",sName);
while (pSym=pSymbolList->walkToNextItem()){
if( (pSym->Object->ObjectType == oPointer) ){ //|| (pSym->Object->pType->ObjectType == oObject) ){
fprintf(fp," tmp.%s=this->%s->cudaSendToGpu%s();\n",pSym->Object->sName,pSym->Object->sName,pSym->Object->pType->sName);
}
}
fprintf(fp," cudaMalloc((void**)&ret,sizeof(%s));\n",sName);
fprintf(fp," cudaMemcpy(ret,&tmp,sizeof(%s),cudaMemcpyHostToDevice);\n",sName);
fprintf(fp," return ret;\n");
}
fprintf(fp," }\n\n");
fprintf(fp," void cudaGetFromGpu%s(%s* dev_ptr){\n",sName,sName);
fprintf(fp," %s* ret=NULL;\n",sName);
if( isFlatClass ){
fprintf(fp," ret = (%s*)malloc(sizeof(%s));\n",sName,sName);
fprintf(fp," cudaMemcpy(ret,dev_ptr,sizeof(%s),cudaMemcpyDeviceToHost);\n",sName);
while (pSym=pSymbolList->walkToNextItem())
fprintf(fp," this->%s=ret->%s;\n",pSym->Object->sName,pSym->Object->sName);
fprintf(fp," }\n\n");
}
}
fprintf(fp," %s(){ // Constructor\n",sName); // constructor
pSymbolList->reset(); // in which we initialise all pointers to NULL
while (pSym=pSymbolList->walkToNextItem())
......@@ -342,7 +390,7 @@ void CSymbol::printUserClasses(FILE *fp){
if (bAlreadyPrinted) return;
bAlreadyPrinted=true;
while (pSym=pSymbolList->walkToNextItem()){
if (pSym->Object->pType->ObjectType==oUserClass)
if ((pSym->Object->pType->ObjectType==oUserClass))
pSym->Object->pType->printUC(fp);
}
}
......
......@@ -31,6 +31,11 @@ float Rosenbrock(float *, int);
float Schwefel(float *, int);
float Weierstrass(float *, int);
EvalCounter* d_counter;
struct gpuOptions{
EvalCounter* counter;
};
\end
\User functions:
......@@ -165,38 +170,35 @@ float gauss()
}
\end
\Initialisation function:
/* if(argc>1 ){ */
/* if((!(strcmp(argv[1],"Sphere")))) Fitness = Sphere; */
/* if((!(strcmp(argv[1],"AckleyPath")))) Fitness = AckleyPath; */
/* if((!(strcmp(argv[1],"Easom")))) Fitness = Easom; */
/* if((!(strcmp(argv[1],"Griewangk")))) Fitness = Griewangk; */
/* if((!(strcmp(argv[1],"Rastrigin")))) Fitness = Rastrigin; */
/* if((!(strcmp(argv[1],"Rosenbrock")))) Fitness = Rosenbrock; */
/* if((!(strcmp(argv[1],"Schwefel")))) Fitness = Schwefel; */
/* if((!(strcmp(argv[1],"Weierstrass")))) Fitness = Weierstrass; */
/* if(argc>2) n = atoi(argv[2]); */
/* } */
/* else */
/* Fitness = Weierstrass; */
/* n=MIN(n, SIZE); */
std::cout<<"************* n: "<<n<<std::endl;
// pour l'impression dans le fichier de resultats
/* float MinTheo = 0.; */
// printf("%s_T_ad n= %s MinTheo= %f ",argv[1],argv[2],MinTheo);
EvalCounter counter;
counter.a = 0;
d_counter = counter.cudaSendToGpuEvalCounter();
initOpts.counter=d_counter;
\end
\Finalization function:
cout << "finalization function called" << endl;
EvalCounter counter;
counter.cudaGetFromGpuEvalCounter(d_counter);
cout << counter << endl;
\end
\User classes :
EvalCounter {
int a;
}
TestClass1 {
int a;
float b;
}
GenomeClass {
float x[SIZE];
float sigma[SIZE]; // auto-adaptative mutation parameter
}
}
\end
\GenomeClass::initialiser : // "initializer" is also accepted
......@@ -237,6 +239,7 @@ return NbMut;
float Point[SIZE];
for (int i=0; i<N_LIM; i++) Point[i] = Genome.x[i];
Score= Weierstrass(Point, N_LIM);
initOpts.counter->a+=2;
return Score;
\end
......
......@@ -73,6 +73,8 @@ int main(int argc, char** argv){
extern RandomGenerator* globalRandomGenerator;
\INSERT_USER_DECLARATIONS
struct gpuOptions initOpts;
\ANALYSE_USER_CLASSES
\INSERT_USER_FUNCTIONS
......@@ -81,6 +83,9 @@ extern RandomGenerator* globalRandomGenerator;
\INSERT_FINALIZATION_FUNCTION
\INSERT_GENERATION_FUNCTION
void EASEAFinal(Population* pop){
\INSERT_FINALIZATION_FCT_CALL
}
......@@ -119,24 +124,25 @@ float Individual::evaluate(){
/**
This function allows to acces to the Individual stored in cudaBuffer as a standard
individual.
@TODO This should be a macro, at this time it is a function for debuging purpose
*/
__device__ __host__ inline Individual* INDIVIDUAL_ACCESS(void* buffer,size_t id){
return ((Individual*)(((char*)buffer)+(\GENOME_SIZE+sizeof(void*))*id));
}
__device__ float cudaEvaluate(void* devBuffer, size_t id){
__device__ float cudaEvaluate(void* devBuffer, size_t id, struct gpuOptions initOpts){
\INSERT_CUDA_EVALUATOR
}
void Individual::copyToCudaBuffer(void* buffer, size_t id){
inline void Individual::copyToCudaBuffer(void* buffer, size_t id){
DEBUG_PRT("%p\n",(char*)this+sizeof(Individual*));
DEBUG_PRT("%p\n",&this->sigma);
DEBUG_PRT("%lu\n",id);
/* DEBUG_PRT("%p\n",(char*)this+sizeof(Individual*)); */
/* DEBUG_PRT("%p\n",&this->sigma); */
/* DEBUG_PRT("%lu\n",id); */
memcpy(((char*)buffer)+(\GENOME_SIZE+sizeof(Individual*))*id,((char*)this),\GENOME_SIZE+sizeof(Individual*));
......@@ -276,16 +282,29 @@ EvolutionaryAlgorithm::EvolutionaryAlgorithm( size_t parentPopulationSize,
// do the repartition of data accross threads
__global__ void
cudaEvaluatePopulation(void* d_population, size_t popSize, float* d_fitnesses){
cudaEvaluatePopulation(void* d_population, size_t popSize, float* d_fitnesses, struct gpuOptions initOpts){
size_t id = (blockDim.x*blockIdx.x)+threadIdx.x; // id of the individual computed by this thread
// escaping for the last block
if(blockIdx.x == (gridDim.x-1)) if( id >= popSize ) return;
void* indiv = ((char*)d_population)+id*(\GENOME_SIZE+sizeof(Individual*)); // compute the offset of the current individual
//void* indiv = ((char*)d_population)+id*(\GENOME_SIZE+sizeof(Individual*)); // compute the offset of the current individual
d_fitnesses[id] = cudaEvaluate(indiv,id);
d_fitnesses[id] = cudaEvaluate(d_population,id,initOpts);
/* if( blockIdx.x == 0){ */
/* for( size_t i = 0 ; i<popSize ; i++ ){ */
/* printf("%d : \n\t",i); */
/* for( size_t j=0 ; j<10 ; j++ ) */
/* printf("%f | ",INDIVIDUAL_ACCESS(d_population,i)->x[j]); */
/* printf("\n\t"); */
/* for( size_t j=0 ; j<10 ; j++ ) */
/* printf("%f | ",INDIVIDUAL_ACCESS(d_population,i)->sigma[j]); */
/* printf("\n"); */
/* } */
/* } */
}
......@@ -315,6 +334,8 @@ repartition(size_t popSize, size_t* nbBlock, size_t* nbThreadPB, size_t* nbThrea
(*nbThreadLB) = 0;
DEBUG_PRT("repartition : %d",popSize);
if( ((float)popSize / (float)nbMP) <= maxBlockSize ){
//la population repartie sur les MP tient dans une bloc par MP
(*nbThreadPB) = partieEntiereSup( (float)popSize/(float)nbMP);
......@@ -354,36 +375,119 @@ repartition(size_t popSize, size_t* nbBlock, size_t* nbThreadPB, size_t* nbThrea
}
/**
Allocate buffer for populationSize individuals and fitnesses
compute the repartition
*/
void EvolutionaryAlgorithm::cudaPreliminaryProcess(size_t populationSize, dim3* dimBlock, dim3* dimGrid, void** allocatedDeviceBuffer,
float** deviceFitness){
size_t nbThreadPB, nbThreadLB, nbBlock;
cudaError_t lastError;
lastError = cudaMalloc(allocatedDeviceBuffer,populationSize*(\GENOME_SIZE+sizeof(Individual*)));
DEBUG_PRT("Population buffer allocation : %s",cudaGetErrorString(lastError));
lastError = cudaMalloc(((void**)deviceFitness),populationSize*sizeof(float));
DEBUG_PRT("Fitness buffer allocation : %s",cudaGetErrorString(lastError));
if( !repartition(populationSize, &nbBlock, &nbThreadPB, &nbThreadLB,16, 192))
exit( -1 );
void EvolutionaryAlgorithm::cudaEvaluate(void* buffer, size_t actualPopulationSize){
DEBUG_PRT("repartition is \n\tnbBlock %lu \n\tnbThreadPB %lu \n\tnbThreadLD %lu",nbBlock,nbThreadPB,nbThreadLB);
if( nbThreadLB!=0 )
dimGrid->x = (nbBlock+1);
else
dimGrid->x = (nbBlock);
dimBlock->x = nbThreadPB;
#ifdef DEBUG
cout << "Number of grid : " << dimGrid->x << endl;
cout << "Number of block : " << dimBlock->x << endl;
#endif
}
void EvolutionaryAlgorithm::cudaOffspringEvaluate(void* d_offspringPopulation, float* d_fitnesses, dim3 dimBlock, dim3 dimGrid){
cudaError_t lastError;
size_t actualPopulationSize = this->population->actualOffspringPopulationSize;
float* fitnesses = new float[actualPopulationSize];
lastError = cudaMemcpy(d_offspringPopulation,population->cudaOffspringBuffer,(\GENOME_SIZE+sizeof(Individual*))*actualPopulationSize,
cudaMemcpyHostToDevice);
DEBUG_PRT("Parent population buffer copy : %s",cudaGetErrorString(lastError));
cudaEvaluatePopulation<<< dimGrid, dimBlock>>>(d_offspringPopulation,actualPopulationSize,d_fitnesses,initOpts);
lastError = cudaGetLastError();
DEBUG_PRT("Kernel execution : %s",cudaGetErrorString(lastError));
lastError = cudaMemcpy(fitnesses,d_fitnesses,actualPopulationSize*sizeof(float),cudaMemcpyDeviceToHost);
DEBUG_PRT("Offspring's fitnesses gathering : %s",cudaGetErrorString(lastError));
#ifdef COMPARE_HOST_DEVICE
population->evaluateOffspringPopulation();
#endif
for( size_t i=0 ; i<actualPopulationSize ; i++ ){
#ifdef COMPARE_HOST_DEVICE
printf("Difference for individual %lu is : %f\n",i,(population->offsprings[i]->getFitness()-fitnesses[i])/population->offsprings[i]->getFitness());
#else
printf("%lu : %f\n",i,fitnesses[i]);
population->offsprings[i]->fitness = fitnesses[i];
population->offsprings[i]->valid = true;
#endif
}
}
/**
Evaluate parent population on the GPU. This is special because this evaluation occures
only one time. Buffers are allocated and freed here.
*/
void EvolutionaryAlgorithm::cudaParentEvaluate(){
float* fitnesses = new float[this->population->actualParentPopulationSize];
void* allocatedDeviceBuffer;
float* deviceFitness;
cudaError_t lastError;
lastError = cudaMalloc(&allocatedDeviceBuffer,actualPopulationSize*(\GENOME_SIZE+sizeof(Individual*)));
DEBUG_PRT("%s",cudaGetErrorString(lastError));
lastError = cudaMalloc(((void**)&deviceFitness),actualPopulationSize*sizeof(float));
DEBUG_PRT("%s",cudaGetErrorString(lastError));
dim3 dimBlock, dimGrid;
size_t actualPopulationSize = this->population->actualParentPopulationSize;
size_t nbBlock,nbThreadLB,nbThreadPB;
cudaPreliminaryProcess(actualPopulationSize,&dimBlock,&dimGrid,&allocatedDeviceBuffer,&deviceFitness);
//compute the repartition over MP and SP
repartition(actualPopulationSize, &nbBlock, &nbThreadPB, &nbThreadLB,16, 192);
dim3 dimBlock(nbThreadPB);
lastError = cudaMemcpy(allocatedDeviceBuffer,this->population->cudaParentBuffer,(\GENOME_SIZE+sizeof(Individual*))*actualPopulationSize,
cudaMemcpyHostToDevice);
DEBUG_PRT("Parent population buffer copy : %s",cudaGetErrorString(lastError));
dim3 dimGrid;
if( nbThreadLB )
dimGrid.x = (nbBlock+1);
else
dimGrid.x = (nbBlock);
cudaEvaluatePopulation<<< dimGrid, dimBlock>>>(allocatedDeviceBuffer,actualPopulationSize,deviceFitness,initOpts);
lastError = cudaGetLastError();
DEBUG_PRT("Kernel execution : %s",cudaGetErrorString(lastError));
lastError = cudaMemcpy(fitnesses,deviceFitness,actualPopulationSize*sizeof(float),cudaMemcpyDeviceToHost);
DEBUG_PRT("Parent's fitnesses gathering : %s",cudaGetErrorString(lastError));
cudaEvaluatePopulation<<< dimGrid, dimBlock>>>(allocatedDeviceBuffer,actualPopulationSize,deviceFitness);
lastError = cudaGetLastError();
DEBUG_PRT("%s",cudaGetErrorString(lastError));
cudaFree(deviceFitness);
cudaFree(allocatedDeviceBuffer);
#ifdef COMPARE_HOST_DEVICE
population->evaluateParentPopulation();
#endif
for( size_t i=0 ; i<actualPopulationSize ; i++ ){
#ifdef COMPARE_HOST_DEVICE
printf("Difference for individual %lu is : %f\n",i,(population->parents[i]->getFitness()-fitnesses[i])/population->parents[i]->getFitness());
#else
printf("%lu : %f\n",i,fitnesses[i]);
population->parents[i]->fitness = fitnesses[i];
population->parents[i]->valid = true;
#endif
}
}
void EvolutionaryAlgorithm::addStoppingCriterion(StoppingCriterion* sc){
......@@ -407,7 +511,7 @@ void EvolutionaryAlgorithm::runEvolutionaryLoop(){
std::cout << "Parent's population initializing "<< std::endl;
this->population->initializeCudaParentPopulation();
cudaEvaluate(population->cudaParentBuffer,population->actualParentPopulationSize);
cudaParentEvaluate();
std::cout << *population << std::endl;
......@@ -421,13 +525,22 @@ void EvolutionaryAlgorithm::runEvolutionaryLoop(){
accuEval.tv_usec = 0;
void* d_offspringPopulation;
float* d_fitnesses;
dim3 dimBlock, dimGrid;
cudaPreliminaryProcess(this->population->offspringPopulationSize,&dimBlock,&dimGrid,&d_offspringPopulation,&d_fitnesses);
while( this->allCriteria() == false ){
population->produceOffspringPopulation();
TIME_ST(eval);
population->evaluateOffspringPopulation();
for( size_t i=0 ; i<this->population->actualOffspringPopulationSize ; i++ )
this->population->offsprings[i]->copyToCudaBuffer(this->population->cudaOffspringBuffer,i);
cudaOffspringEvaluate(d_offspringPopulation,d_fitnesses,dimBlock,dimGrid);
TIME_END(eval);
SHOW_TIME(eval);
......@@ -462,6 +575,8 @@ void EvolutionaryAlgorithm::runEvolutionaryLoop(){
/* } */
cudaFree(d_offspringPopulation);
cudaFree(d_fitnesses);
}
......@@ -528,6 +643,7 @@ using namespace std;
#define __INDIVIDUAL
#include "EASEATools.hpp"
#include <iostream>
#include <vector_types.h>
/* #include <boost/archive/text_oarchive.hpp> */
/* #include <boost/archive/text_iarchive.hpp> */
......@@ -598,7 +714,11 @@ public:
bool allCriteria();
Population* getPopulation(){ return population;}
size_t getCurrentGeneration() { return currentGeneration;}
void cudaEvaluate(void* buffer, size_t actualPopulationSize);
void cudaParentEvaluate();
void cudaOffspringEvaluate(void* d_offspringPopulation, float* fitnesses, dim3 dimBlock, dim3 dimGrid);
void cudaPreliminaryProcess(size_t populationSize, dim3* dimBlock, dim3* dimGrid, void** allocatedDeviceBuffer,
float** deviceFitness);
public:
size_t currentGeneration;
......@@ -1109,12 +1229,12 @@ void Population::produceOffspringPopulation(){
selectionOperator->initialize(parents,selectionPressure,actualParentPopulationSize);
for( size_t i=0 ; i<offspringPopulationSize ; i++ ){
size_t index = selectionOperator->selectNext(offspringPopulationSize);
size_t index = selectionOperator->selectNext(parentPopulationSize);
p1 = parents[index];
if( rg->tossCoin(pCrossover) ){
for( size_t j=0 ; j<crossoverArrity-1 ; j++ ){
index = selectionOperator->selectNext(offspringPopulationSize);
index = selectionOperator->selectNext(parentPopulationSize);
ps[j] = parents[index];
}
child = p1->crossover(ps);
......@@ -1728,6 +1848,7 @@ LDFLAGS=-lboost_program_options -lboost_serialization
\INSERT_MAKEFILE_OPTION#END OF USER MAKEFILE OPTIONS
CPPFLAGS+=-DDEBUG
NVFLAGS+=-DCOMPARE_HOST_DEVICE --device-emulation
EASEA_SRC= EASEATools.cpp EASEAIndividual.cpp
EASEA_MAIN_HDR= EASEA.cpp
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment