Commit 069c0d6a authored by maitre's avatar maitre
Browse files

CUDA evaluation, copy to/from gpu

parent 1cde3ed4
...@@ -29,7 +29,8 @@ Centre de Math ...@@ -29,7 +29,8 @@ Centre de Math
#define YYTEXT_SIZE 10000 #define YYTEXT_SIZE 10000
class CSymbol; class CSymbol;
extern CSymbol *pCURRENT_CLASS, *pCURRENT_TYPE, *pGENOME; extern CSymbol *pCURRENT_CLASS, *pCURRENT_TYPE, *pGENOME, *pCLASSES[128];
extern int nClasses_nb;
extern FILE *fpOutputFile, *fpTemplateFile, *fpGenomeFile, *fpExplodedGenomeFile; extern FILE *fpOutputFile, *fpTemplateFile, *fpGenomeFile, *fpExplodedGenomeFile;
extern char sRAW_PROJECT_NAME[], sPROJECT_NAME[], sEO_DIR[], sEZ_PATH[], sEZ_FILE_NAME[]; extern char sRAW_PROJECT_NAME[], sPROJECT_NAME[], sEO_DIR[], sEZ_PATH[], sEZ_FILE_NAME[];
......
...@@ -14,6 +14,9 @@ Centre de Math ...@@ -14,6 +14,9 @@ Centre de Math
#include <unistd.h> #include <unistd.h>
#include "debug.h" #include "debug.h"
size_t genomeSize;
bool genomeSizeValidity=false;
// local functions // local functions
char* selectorDetermination(int nMINIMISE, char* sSELECTOR){ char* selectorDetermination(int nMINIMISE, char* sSELECTOR){
...@@ -161,8 +164,23 @@ exponent ([Ee][+-]?[0-9]+) ...@@ -161,8 +164,23 @@ exponent ([Ee][+-]?[0-9]+)
CListItem<CSymbol*> *pSym; CListItem<CSymbol*> *pSym;
pGENOME->pSymbolList->reset(); pGENOME->pSymbolList->reset();
while (pSym=pGENOME->pSymbolList->walkToNextItem()) while (pSym=pGENOME->pSymbolList->walkToNextItem())
if ((pSym->Object->pType->ObjectType==oUserClass)&&(!pSym->Object->pType->bAlreadyPrinted)) if ((pSym->Object->pType->ObjectType==oUserClass)&&(!pSym->Object->pType->bAlreadyPrinted)){
DEBUG_PRT("%p",pSym->Object->pType);
pSym->Object->pType->printClasses(fpOutputFile); pSym->Object->pType->printClasses(fpOutputFile);
}
if( TARGET == CUDA ){
DEBUG_PRT("User classes are :");
for( int i = nClasses_nb-1 ; i>=0 ; i-- ){
DEBUG_PRT(" %s, %p ,%d| ",pCLASSES[i]->sName,pCLASSES[i],pCLASSES[i]->bAlreadyPrinted);
if( !pCLASSES[i]->bAlreadyPrinted ){
fprintf(fpOutputFile,"// User class not refereced by the Genome");
pCLASSES[i]->printClasses(fpOutputFile);
}
}
DEBUG_PRT("\n");
}
} }
<TEMPLATE_ANALYSIS>"\\GENOME_CTOR" { <TEMPLATE_ANALYSIS>"\\GENOME_CTOR" {
CListItem<CSymbol*> *pSym; CListItem<CSymbol*> *pSym;
...@@ -180,24 +198,23 @@ exponent ([Ee][+-]?[0-9]+) ...@@ -180,24 +198,23 @@ exponent ([Ee][+-]?[0-9]+)
} }
<TEMPLATE_ANALYSIS>"\\GENOME_SIZE" { <TEMPLATE_ANALYSIS>"\\GENOME_SIZE" {
CListItem<CSymbol*> *pSym;
if (bVERBOSE) printf ("Inserting default genome size calculator.\n");
pGENOME->pSymbolList->reset();
size_t size_of_genome=0; size_t size_of_genome=0;
while (pSym=pGENOME->pSymbolList->walkToNextItem()){ if (bVERBOSE) printf ("Inserting default genome size.\n");
/* if (pSym->Object->ObjectQualifier==1) continue; // 1=Static */ if( !genomeSizeValidity ){
/* if ((pSym->Object->ObjectType==oArray)&&(TARGET==DREAM)) */ if (bVERBOSE) printf ("\tComputing default genome size.\n");
/* fprintf(fpOutputFile," %s = new %s[%d];\n",pSym->Object->sName,pSym->Object->pType->sName,pSym->Object->nSize/pSym->Object->pType->nSize); */ CListItem<CSymbol*> *pSym;
/* if (pSym->Object->ObjectType==oPointer){ */ pGENOME->pSymbolList->reset();
/* if (TARGET==DREAM) fprintf(fpOutputFile," %s=null;\n",pSym->Object->sName); */ while (pSym=pGENOME->pSymbolList->walkToNextItem()){
/* else fprintf(fpOutputFile," %s=NULL;\n",pSym->Object->sName); */ DEBUG_PRT("%s has size : %lu",pSym->Object->sName,pSym->Object->nSize);
/* } */ size_of_genome+=pSym->Object->nSize;
DEBUG_PRT("%s has size : %lu",pSym->Object->sName,pSym->Object->nSize); }
size_of_genome+=pSym->Object->nSize; DEBUG_PRT("Total genome size is %lu",size_of_genome);
genomeSizeValidity=true;
}
else{
size_of_genome = genomeSize;
} }
fprintf(fpOutputFile,"%d",size_of_genome); fprintf(fpOutputFile,"%d",size_of_genome);
DEBUG_PRT("Total genome size is %lu",size_of_genome);
} }
<TEMPLATE_ANALYSIS>"\\COPY_CUDA_BUFFER" { <TEMPLATE_ANALYSIS>"\\COPY_CUDA_BUFFER" {
...@@ -1088,6 +1105,13 @@ exponent ([Ee][+-]?[0-9]+) ...@@ -1088,6 +1105,13 @@ exponent ([Ee][+-]?[0-9]+)
<COPY>"MINIMIZE"/[ \t\n]*"==" {if (bDoubleQuotes) fprintf(fpOutputFile,"MINIMIZE"); <COPY>"MINIMIZE"/[ \t\n]*"==" {if (bDoubleQuotes) fprintf(fpOutputFile,"MINIMIZE");
else fprintf(fpOutputFile,"EZ_MINIMIZE");} // local genome name else fprintf(fpOutputFile,"EZ_MINIMIZE");} // local genome name
<COPY>"__device__" |
<COPY>"__host__" {
if( TARGET==CUDA ){
fprintf(fpOutputFile,"%s",yytext);
}
}
<COPY>"currentGeneration"[ \t\n]*"=" {fprintf(stderr,"\n%s - Error line %d: The current generation number cannot be changed (not an l-value).\n",sEZ_FILE_NAME,yylineno); exit(1);} <COPY>"currentGeneration"[ \t\n]*"=" {fprintf(stderr,"\n%s - Error line %d: The current generation number cannot be changed (not an l-value).\n",sEZ_FILE_NAME,yylineno); exit(1);}
<COPY>"NB_GEN"[ \t\n]*"=" {fprintf(stderr,"\n%s - Error line %d: The number of generations can only be changed within the generation function.\n",sEZ_FILE_NAME,yylineno); exit (1);} <COPY>"NB_GEN"[ \t\n]*"=" {fprintf(stderr,"\n%s - Error line %d: The number of generations can only be changed within the generation function.\n",sEZ_FILE_NAME,yylineno); exit (1);}
<COPY>"POP_SIZE"[ \t\n]*"=" {fprintf(stderr,"\n%s - Error line %d: The size of the population can only be changed within the generation function.\n",sEZ_FILE_NAME,yylineno); exit (1);} <COPY>"POP_SIZE"[ \t\n]*"=" {fprintf(stderr,"\n%s - Error line %d: The size of the population can only be changed within the generation function.\n",sEZ_FILE_NAME,yylineno); exit (1);}
...@@ -1224,6 +1248,8 @@ exponent ([Ee][+-]?[0-9]+) ...@@ -1224,6 +1248,8 @@ exponent ([Ee][+-]?[0-9]+)
//**************************************** //****************************************
<COPY_DISPLAY>"\\GenomeClass::display"[ \t\n]*":" { <COPY_DISPLAY>"\\GenomeClass::display"[ \t\n]*":" {
/* DEBUG_PRT("Display function is at %d line in %s.ez",yylineno,sRAW_PROJECT_NAME); */
/* fprintf(fpOutputFile,"\n#line %d \"%s.ez\"\n",yylineno,sRAW_PROJECT_NAME); */
bDisplayFunction=bWithinDisplayFunction=1; bDisplayFunction=bWithinDisplayFunction=1;
BEGIN COPY_USER_FUNCTION; BEGIN COPY_USER_FUNCTION;
} }
...@@ -1595,7 +1621,9 @@ exponent ([Ee][+-]?[0-9]+) ...@@ -1595,7 +1621,9 @@ exponent ([Ee][+-]?[0-9]+)
yyin = fpTemplateFile; yyin = fpTemplateFile;
BEGIN TEMPLATE_ANALYSIS; BEGIN TEMPLATE_ANALYSIS;
bWithinInitialiser=bWithinXover=bWithinMutator=bWithinEvaluator=0; bWithinInitialiser=bWithinXover=bWithinMutator=bWithinEvaluator=0;
if (bWithinDisplayFunction) bWithinDisplayFunction=0; // display function if (bWithinDisplayFunction){
bWithinDisplayFunction=0; // display function
}
else return END_OF_FUNCTION;} // Back to the template file else return END_OF_FUNCTION;} // Back to the template file
<COPY_USER_FUNCTION>.|\n {putc(yytext[0],fpOutputFile);} <COPY_USER_FUNCTION>.|\n {putc(yytext[0],fpOutputFile);}
......
...@@ -16,7 +16,9 @@ Centre de Math ...@@ -16,7 +16,9 @@ Centre de Math
CSymbol *pCURRENT_CLASS; CSymbol *pCURRENT_CLASS;
CSymbol *pCURRENT_TYPE; CSymbol *pCURRENT_TYPE;
CSymbol *pGENOME; CSymbol *pGENOME;
CSymbol* pCLASSES[128];
char sRAW_PROJECT_NAME[1000]; char sRAW_PROJECT_NAME[1000];
int nClasses_nb = 0;
char sPROJECT_NAME[1000]; char sPROJECT_NAME[1000];
char sLOWER_CASE_PROJECT_NAME[1000]; char sLOWER_CASE_PROJECT_NAME[1000];
char sEZ_FILE_NAME[1000]; char sEZ_FILE_NAME[1000];
...@@ -214,15 +216,15 @@ GenomeAnalysis ...@@ -214,15 +216,15 @@ GenomeAnalysis
ClassDeclarationsSection ClassDeclarationsSection
: CLASSES { : CLASSES {
if (bVERBOSE) printf("Declaration of user classes :\n\n");} if (bVERBOSE) printf("Declaration of user classes :\n\n");}
ClassDeclarations ClassDeclarations
| CLASSES { | CLASSES {
if (bVERBOSE) printf("No user class declaration found other than GenomeClass.\n");} if (bVERBOSE) printf("No user class declaration found other than GenomeClass.\n");}
; ;
ClassDeclarations ClassDeclarations
: ClassDeclaration : ClassDeclaration
| ClassDeclarations ClassDeclaration | ClassDeclarations ClassDeclaration
; ;
ClassDeclaration ClassDeclaration
...@@ -230,9 +232,12 @@ ClassDeclaration ...@@ -230,9 +232,12 @@ ClassDeclaration
pCURRENT_CLASS=SymbolTable.insert($1); pCURRENT_CLASS=SymbolTable.insert($1);
pCURRENT_CLASS->pSymbolList=new CLList<CSymbol *>(); pCURRENT_CLASS->pSymbolList=new CLList<CSymbol *>();
$1->ObjectType=oUserClass; $1->ObjectType=oUserClass;
DEBUG_PRT("Yacc Symbol declaration %s %d",$1->sName,$1->nSize);
pCLASSES[nClasses_nb++] = $1;
} }
'{' VariablesDeclarations '}' { '{' VariablesDeclarations '}' {
if (bVERBOSE) printf("Class %s declared for %d bytes.\n\n",$1->sName,$1->nSize); if (bVERBOSE) printf("Class %s declared for %d bytes.\n\n",$1->sName,$1->nSize);
DEBUG_PRT("Yacc variable declaration %s %d",$1->sName,$1->nSize);
} }
; ;
...@@ -378,6 +383,7 @@ BaseConstructorParameter ...@@ -378,6 +383,7 @@ BaseConstructorParameter
GenomeDeclarationSection GenomeDeclarationSection
: GENOME { : GENOME {
DEBUG_PRT("Yacc genome decl %s",$1.pSymbol->sName);
if (bVERBOSE) printf ("\nGenome declaration analysis :\n\n"); if (bVERBOSE) printf ("\nGenome declaration analysis :\n\n");
pGENOME=new CSymbol("Genome"); pGENOME=new CSymbol("Genome");
pCURRENT_CLASS=SymbolTable.insert(pGENOME); pCURRENT_CLASS=SymbolTable.insert(pGENOME);
...@@ -386,7 +392,7 @@ GenomeDeclarationSection ...@@ -386,7 +392,7 @@ GenomeDeclarationSection
pGENOME->ObjectQualifier=0; pGENOME->ObjectQualifier=0;
pGENOME->sString=NULL; pGENOME->sString=NULL;
} }
'{' VariablesDeclarations '}' {} '{' VariablesDeclarations '}' {}
; ;
//GenomeMethodsDeclaration //GenomeMethodsDeclaration
......
...@@ -92,6 +92,54 @@ void CSymbol::print(FILE *fp){ ...@@ -92,6 +92,54 @@ void CSymbol::print(FILE *fp){
fprintf(fp," %s %s[%d];\n",pSym->Object->pType->sName,pSym->Object->sName,pSym->Object->nSize/pSym->Object->pType->nSize); fprintf(fp," %s %s[%d];\n",pSym->Object->pType->sName,pSym->Object->sName,pSym->Object->nSize/pSym->Object->pType->nSize);
} }
if( TARGET==CUDA ){ // here we we are generating function to copy objects from host memory to gpu's.
bool isFlatClass = true;
pSymbolList->reset();
while (pSym=pSymbolList->walkToNextItem()){
DEBUG_PRT("analyse flat %s",pSym->Object->pType->sName);
if( (pSym->Object->ObjectType == oPointer) ){ //|| (pSym->Object->pType->ObjectType == oObject) ){
isFlatClass = false;
break;
}
}
DEBUG_PRT("Does %s flat class : %s",sName,(isFlatClass?"yes":"no"));
pSymbolList->reset();
fprintf(fp," %s* cudaSendToGpu%s(){\n",sName,sName);
fprintf(fp," %s* ret=NULL;\n",sName);
if( isFlatClass ){
fprintf(fp," cudaMalloc((void**)&ret,sizeof(%s));\n",sName);
fprintf(fp," cudaMemcpy(ret,this,sizeof(%s),cudaMemcpyHostToDevice);\n",sName);
fprintf(fp," return ret;\n");
}
else{
fprintf(fp," %s tmp;\n",sName);
fprintf(fp," memcpy(&tmp,this,sizeof(%s));\n",sName);
while (pSym=pSymbolList->walkToNextItem()){
if( (pSym->Object->ObjectType == oPointer) ){ //|| (pSym->Object->pType->ObjectType == oObject) ){
fprintf(fp," tmp.%s=this->%s->cudaSendToGpu%s();\n",pSym->Object->sName,pSym->Object->sName,pSym->Object->pType->sName);
}
}
fprintf(fp," cudaMalloc((void**)&ret,sizeof(%s));\n",sName);
fprintf(fp," cudaMemcpy(ret,&tmp,sizeof(%s),cudaMemcpyHostToDevice);\n",sName);
fprintf(fp," return ret;\n");
}
fprintf(fp," }\n\n");
fprintf(fp," void cudaGetFromGpu%s(%s* dev_ptr){\n",sName,sName);
fprintf(fp," %s* ret=NULL;\n",sName);
if( isFlatClass ){
fprintf(fp," ret = (%s*)malloc(sizeof(%s));\n",sName,sName);
fprintf(fp," cudaMemcpy(ret,dev_ptr,sizeof(%s),cudaMemcpyDeviceToHost);\n",sName);
while (pSym=pSymbolList->walkToNextItem())
fprintf(fp," this->%s=ret->%s;\n",pSym->Object->sName,pSym->Object->sName);
fprintf(fp," }\n\n");
}
}
fprintf(fp," %s(){ // Constructor\n",sName); // constructor fprintf(fp," %s(){ // Constructor\n",sName); // constructor
pSymbolList->reset(); // in which we initialise all pointers to NULL pSymbolList->reset(); // in which we initialise all pointers to NULL
while (pSym=pSymbolList->walkToNextItem()) while (pSym=pSymbolList->walkToNextItem())
...@@ -342,7 +390,7 @@ void CSymbol::printUserClasses(FILE *fp){ ...@@ -342,7 +390,7 @@ void CSymbol::printUserClasses(FILE *fp){
if (bAlreadyPrinted) return; if (bAlreadyPrinted) return;
bAlreadyPrinted=true; bAlreadyPrinted=true;
while (pSym=pSymbolList->walkToNextItem()){ while (pSym=pSymbolList->walkToNextItem()){
if (pSym->Object->pType->ObjectType==oUserClass) if ((pSym->Object->pType->ObjectType==oUserClass))
pSym->Object->pType->printUC(fp); pSym->Object->pType->printUC(fp);
} }
} }
...@@ -377,7 +425,7 @@ void CSymbol::printAllSymbols(FILE *fp, char *sCompleteName, EObjectType FatherT ...@@ -377,7 +425,7 @@ void CSymbol::printAllSymbols(FILE *fp, char *sCompleteName, EObjectType FatherT
strcat(sNewCompleteName,"["); strcat(sNewCompleteName,"[");
sprintf(s,"%d",pSym->Object->nSize/pSym->Object->pType->nSize); sprintf(s,"%d",pSym->Object->nSize/pSym->Object->pType->nSize);
strcat(sNewCompleteName,s); strcat(sNewCompleteName,s);
strcat(sNewCompleteName,"]"); strcat(sNewCompleteName,"]");
} }
fprintf(fp,"%s\n",sNewCompleteName); fprintf(fp,"%s\n",sNewCompleteName);
strcpy(sNewCompleteName, sCompleteName); strcpy(sNewCompleteName, sCompleteName);
......
...@@ -31,6 +31,11 @@ float Rosenbrock(float *, int); ...@@ -31,6 +31,11 @@ float Rosenbrock(float *, int);
float Schwefel(float *, int); float Schwefel(float *, int);
float Weierstrass(float *, int); float Weierstrass(float *, int);
EvalCounter* d_counter;
struct gpuOptions{
EvalCounter* counter;
};
\end \end
\User functions: \User functions:
...@@ -165,38 +170,35 @@ float gauss() ...@@ -165,38 +170,35 @@ float gauss()
} }
\end \end
\Initialisation function: \Initialisation function:
/* if(argc>1 ){ */
/* if((!(strcmp(argv[1],"Sphere")))) Fitness = Sphere; */
/* if((!(strcmp(argv[1],"AckleyPath")))) Fitness = AckleyPath; */
/* if((!(strcmp(argv[1],"Easom")))) Fitness = Easom; */
/* if((!(strcmp(argv[1],"Griewangk")))) Fitness = Griewangk; */
/* if((!(strcmp(argv[1],"Rastrigin")))) Fitness = Rastrigin; */
/* if((!(strcmp(argv[1],"Rosenbrock")))) Fitness = Rosenbrock; */
/* if((!(strcmp(argv[1],"Schwefel")))) Fitness = Schwefel; */
/* if((!(strcmp(argv[1],"Weierstrass")))) Fitness = Weierstrass; */
/* if(argc>2) n = atoi(argv[2]); */
/* } */
/* else */
/* Fitness = Weierstrass; */
/* n=MIN(n, SIZE); */
std::cout<<"************* n: "<<n<<std::endl; std::cout<<"************* n: "<<n<<std::endl;
// pour l'impression dans le fichier de resultats EvalCounter counter;
/* float MinTheo = 0.; */ counter.a = 0;
// printf("%s_T_ad n= %s MinTheo= %f ",argv[1],argv[2],MinTheo); d_counter = counter.cudaSendToGpuEvalCounter();
initOpts.counter=d_counter;
\end \end
\Finalization function: \Finalization function:
cout << "finalization function called" << endl; cout << "finalization function called" << endl;
EvalCounter counter;
counter.cudaGetFromGpuEvalCounter(d_counter);
cout << counter << endl;
\end \end
\User classes : \User classes :
EvalCounter {
int a;
}
TestClass1 {
int a;
float b;
}
GenomeClass { GenomeClass {
float x[SIZE]; float x[SIZE];
float sigma[SIZE]; // auto-adaptative mutation parameter float sigma[SIZE]; // auto-adaptative mutation parameter
} }
\end \end
\GenomeClass::initialiser : // "initializer" is also accepted \GenomeClass::initialiser : // "initializer" is also accepted
...@@ -237,6 +239,7 @@ return NbMut; ...@@ -237,6 +239,7 @@ return NbMut;
float Point[SIZE]; float Point[SIZE];
for (int i=0; i<N_LIM; i++) Point[i] = Genome.x[i]; for (int i=0; i<N_LIM; i++) Point[i] = Genome.x[i];
Score= Weierstrass(Point, N_LIM); Score= Weierstrass(Point, N_LIM);
initOpts.counter->a+=2;
return Score; return Score;
\end \end
......
...@@ -73,6 +73,8 @@ int main(int argc, char** argv){ ...@@ -73,6 +73,8 @@ int main(int argc, char** argv){
extern RandomGenerator* globalRandomGenerator; extern RandomGenerator* globalRandomGenerator;
\INSERT_USER_DECLARATIONS \INSERT_USER_DECLARATIONS
struct gpuOptions initOpts;
\ANALYSE_USER_CLASSES \ANALYSE_USER_CLASSES
\INSERT_USER_FUNCTIONS \INSERT_USER_FUNCTIONS
...@@ -81,6 +83,9 @@ extern RandomGenerator* globalRandomGenerator; ...@@ -81,6 +83,9 @@ extern RandomGenerator* globalRandomGenerator;
\INSERT_FINALIZATION_FUNCTION \INSERT_FINALIZATION_FUNCTION
\INSERT_GENERATION_FUNCTION \INSERT_GENERATION_FUNCTION
void EASEAFinal(Population* pop){ void EASEAFinal(Population* pop){
\INSERT_FINALIZATION_FCT_CALL \INSERT_FINALIZATION_FCT_CALL
} }
...@@ -119,24 +124,25 @@ float Individual::evaluate(){ ...@@ -119,24 +124,25 @@ float Individual::evaluate(){
/** /**
This function allows to acces to the Individual stored in cudaBuffer as a standard This function allows to acces to the Individual stored in cudaBuffer as a standard
individual. individual.
@TODO This should be a macro, at this time it is a function for debuging purpose
*/ */
__device__ __host__ inline Individual* INDIVIDUAL_ACCESS(void* buffer,size_t id){ __device__ __host__ inline Individual* INDIVIDUAL_ACCESS(void* buffer,size_t id){
return ((Individual*)(((char*)buffer)+(\GENOME_SIZE+sizeof(void*))*id)); return ((Individual*)(((char*)buffer)+(\GENOME_SIZE+sizeof(void*))*id));
} }
__device__ float cudaEvaluate(void* devBuffer, size_t id){ __device__ float cudaEvaluate(void* devBuffer, size_t id, struct gpuOptions initOpts){
\INSERT_CUDA_EVALUATOR \INSERT_CUDA_EVALUATOR
} }
void Individual::copyToCudaBuffer(void* buffer, size_t id){ inline void Individual::copyToCudaBuffer(void* buffer, size_t id){
DEBUG_PRT("%p\n",(char*)this+sizeof(Individual*)); /* DEBUG_PRT("%p\n",(char*)this+sizeof(Individual*)); */
DEBUG_PRT("%p\n",&this->sigma); /* DEBUG_PRT("%p\n",&this->sigma); */
DEBUG_PRT("%lu\n",id); /* DEBUG_PRT("%lu\n",id); */
memcpy(((char*)buffer)+(\GENOME_SIZE+sizeof(Individual*))*id,((char*)this),\GENOME_SIZE+sizeof(Individual*)); memcpy(((char*)buffer)+(\GENOME_SIZE+sizeof(Individual*))*id,((char*)this),\GENOME_SIZE+sizeof(Individual*));
...@@ -276,16 +282,29 @@ EvolutionaryAlgorithm::EvolutionaryAlgorithm( size_t parentPopulationSize, ...@@ -276,16 +282,29 @@ EvolutionaryAlgorithm::EvolutionaryAlgorithm( size_t parentPopulationSize,
// do the repartition of data accross threads // do the repartition of data accross threads
__global__ void __global__ void
cudaEvaluatePopulation(void* d_population, size_t popSize, float* d_fitnesses){ cudaEvaluatePopulation(void* d_population, size_t popSize, float* d_fitnesses, struct gpuOptions initOpts){
size_t id = (blockDim.x*blockIdx.x)+threadIdx.x; // id of the individual computed by this thread size_t id = (blockDim.x*blockIdx.x)+threadIdx.x; // id of the individual computed by this thread
// escaping for the last block // escaping for the last block
if(blockIdx.x == (gridDim.x-1)) if( id >= popSize ) return; if(blockIdx.x == (gridDim.x-1)) if( id >= popSize ) return;
void* indiv = ((char*)d_population)+id*(\GENOME_SIZE+sizeof(Individual*)); // compute the offset of the current individual //void* indiv = ((char*)d_population)+id*(\GENOME_SIZE+sizeof(Individual*)); // compute the offset of the current individual
d_fitnesses[id] = cudaEvaluate(indiv,id); d_fitnesses[id] = cudaEvaluate(d_population,id,initOpts);
/* if( blockIdx.x == 0){ */
/* for( size_t i = 0 ; i<popSize ; i++ ){ */
/* printf("%d : \n\t",i); */
/* for( size_t j=0 ; j<10 ; j++ ) */
/* printf("%f | ",INDIVIDUAL_ACCESS(d_population,i)->x[j]); */
/* printf("\n\t"); */
/* for( size_t j=0 ; j<10 ; j++ ) */
/* printf("%f | ",INDIVIDUAL_ACCESS(d_population,i)->sigma[j]); */
/* printf("\n"); */
/* } */
/* } */
} }
...@@ -315,6 +334,8 @@ repartition(size_t popSize, size_t* nbBlock, size_t* nbThreadPB, size_t* nbThrea ...@@ -315,6 +334,8 @@ repartition(size_t popSize, size_t* nbBlock, size_t* nbThreadPB, size_t* nbThrea
(*nbThreadLB) = 0; (*nbThreadLB) = 0;
DEBUG_PRT("repartition : %d",popSize);
if( ((float)popSize / (float)nbMP) <= maxBlockSize ){ if( ((float)popSize / (float)nbMP) <= maxBlockSize ){
//la population repartie sur les MP tient dans une bloc par MP //la population repartie sur les MP tient dans une bloc par MP
(*nbThreadPB) = partieEntiereSup( (float)popSize/(float)nbMP); (*nbThreadPB) = partieEntiereSup( (float)popSize/(float)nbMP);
...@@ -354,36 +375,119 @@ repartition(size_t popSize, size_t* nbBlock, size_t* nbThreadPB, size_t* nbThrea ...@@ -354,36 +375,119 @@ repartition(size_t popSize, size_t* nbBlock, size_t* nbThreadPB, size_t* nbThrea
} }
/**
Allocate buffer for populationSize individuals and fitnesses
compute the repartition
*/
void EvolutionaryAlgorithm::cudaPreliminaryProcess(size_t populationSize, dim3* dimBlock, dim3* dimGrid, void** allocatedDeviceBuffer,
float** deviceFitness){
size_t nbThreadPB, nbThreadLB, nbBlock;
cudaError_t lastError;
lastError = cudaMalloc(allocatedDeviceBuffer,populationSize*(\GENOME_SIZE+sizeof(Individual*)));
DEBUG_PRT("Population buffer allocation : %s",cudaGetErrorString(lastError));
lastError = cudaMalloc(((void**)deviceFitness),populationSize*sizeof(float));
DEBUG_PRT("Fitness buffer allocation : %s",cudaGetErrorString(lastError));
if( !repartition(populationSize, &nbBlock, &nbThreadPB, &nbThreadLB,16, 192))
exit( -1 );
DEBUG_PRT("repartition is \n\tnbBlock %lu \n\tnbThreadPB %lu \n\tnbThreadLD %lu",nbBlock,nbThreadPB,nbThreadLB);
if( nbThreadLB!=0 )
dimGrid->x = (nbBlock+1);
else
dimGrid->x = (nbBlock);
dimBlock->x = nbThreadPB;
#ifdef DEBUG
cout << "Number of grid : " << dimGrid->x << endl;
cout << "Number of block : " << dimBlock->x << endl;
#endif
}
void EvolutionaryAlgorithm::cudaEvaluate(void* buffer, size_t actualPopulationSize){ void EvolutionaryAlgorithm::cudaOffspringEvaluate(void* d_offspringPopulation, float* d_fitnesses, dim3 dimBlock, dim3 dimGrid){
cudaError_t lastError;
size_t actualPopulationSize = this->population->actualOffspringPopulationSize;
float* fitnesses = new float[actualPopulationSize]; float* fitnesses = new float[actualPopulationSize];
lastError = cudaMemcpy(d_offspringPopulation,population->cudaOffspringBuffer,(\GENOME_SIZE+sizeof(Individual*))*actualPopulationSize,
cudaMemcpyHostToDevice);
DEBUG_PRT("Parent population buffer copy : %s",cudaGetErrorString(lastError));
cudaEvaluatePopulation<<< dimGrid, dimBlock>>>(d_offspringPopulation,actualPopulationSize,d_fitnesses,initOpts);
lastError = cudaGetLastError();
DEBUG_PRT("Kernel execution : %s",cudaGetErrorString(lastError));
lastError = cudaMemcpy(fitnesses,d_fitnesses,actualPopulationSize*sizeof(float),cudaMemcpyDeviceToHost);
DEBUG_PRT("Offspring's fitnesses gathering : %s",cudaGetErrorString(lastError));
#ifdef COMPARE_HOST_DEVICE
population->evaluateOffspringPopulation();
#endif
for( size_t i=0 ; i<actualPopulationSize ; i++ ){
#ifdef COMPARE_HOST_DEVICE
printf("Difference for individual %lu is : %f\n",i,(population->offsprings[i]->getFitness()-fitnesses[i])/population->offsprings[i]->getFitness());
#else
printf("%lu : %f\n",i,fitnesses[i]);
population->offsprings[i]->fitness = fitnesses[i];
population->offsprings[i]->valid = true;
#endif
}
}
/**
Evaluate parent population on the GPU. This is special because this evaluation occures
only one time. Buffers are allocated and freed here.
*/
void EvolutionaryAlgorithm::cudaParentEvaluate(){
float* fitnesses = new float[this->population->actualParentPopulationSize];
void* allocatedDeviceBuffer; void* allocatedDeviceBuffer;
float* deviceFitness; float* deviceFitness;
cudaError_t lastError; cudaError_t lastError;
lastError = cudaMalloc(&allocatedDeviceBuffer,actualPopulationSize*(\GENOME_SIZE+sizeof(Individual*))); dim3 dimBlock, dimGrid;
DEBUG_PRT("%s",cudaGetErrorString(lastError)); size_t actualPopulationSize = this->population->actualParentPopulationSize;