Coupure prévue mardi 3 Août au matin pour maintenance du serveur. Nous faisons au mieux pour que celle-ci soit la plus brève possible.

Commit 7979266d authored by Ogier Maitre's avatar Ogier Maitre
Browse files

GP opname parenthesis

parent bef9aa94
This diff is collapsed.
File mode changed from 100755 to 100644
......@@ -314,8 +314,7 @@ exponent ([Ee][+-]?[0-9]+)
fprintf(fpOutputFile,"#define MAX_PROGS_SIZE %d\n",iPRG_BUF_SIZE);
fprintf(fpOutputFile,"#define NB_GPU %d\n",iNB_GPU);
//fprintf(fpOutputFile,"#define NO_FITNESS_CASES %d\n",iNO_FITNESS_CASES);
/*fprintf(fpOutputFile,"#define NO_FITNESS_CASES %d\n",iNO_FITNESS_CASES);*/
}
/*
......@@ -428,7 +427,7 @@ exponent ([Ee][+-]?[0-9]+)
<GP_RULE_ANALYSIS>"//".*"\n" {}
<GP_RULE_ANALYSIS>"\""([a-zA-Z0-9]|\*|\+|\-|\/|\%)*"\"" {
<GP_RULE_ANALYSIS>"\""([a-zA-Z0-9]|\(|\)|\*|\+|\-|\/|\%)*"\"" {
if( iGP_OPCODE_FIELD != 1 ){
fprintf(stderr,"Error, op code real name must be given at the second place\n");
exit(-1);
......
File mode changed from 100755 to 100644
File mode changed from 100755 to 100644
......@@ -129,7 +129,7 @@ OP_X, "x", 0, {RESULT=INPUT[0];};
OP_ERC, "ERC", 0, {RESULT=ERC;};
OP_ADD, "+", 2, {RESULT=OP1+OP2;};
OP_SUB, "-", 2, {RESULT=OP1-OP2;};
OP_MUL, "*", 2, {RESULT=OP1*OP2;};
OP_MUL, "*()", 2, {RESULT=OP1*OP2;};
OP_DIV, "/", 2, {
if( !OP2 ) RESULT = 1;
else RESULT = OP1/OP2;
......
......@@ -135,11 +135,6 @@ GPNode* selectNode( GPNode* root, int* childId, int* depth){
/**
Recursive construction method for trees.
Koza construction methods. Function set has to be ordered,
......@@ -189,7 +184,6 @@ GPNode* construction_method( const int constLen, const int totalLen , const int
return node;
}
GPNode* RAMPED_H_H(unsigned INIT_TREE_DEPTH_MIN, unsigned INIT_TREE_DEPTH_MAX, unsigned actualParentPopulationSize, unsigned parentPopulationSize,
float GROW_FULL_RATIO, unsigned VAR_LEN, unsigned OPCODE_SIZE, const unsigned* opArity, const int OP_ERC){
/**
......@@ -208,10 +202,6 @@ GPNode* RAMPED_H_H(unsigned INIT_TREE_DEPTH_MIN, unsigned INIT_TREE_DEPTH_MAX, u
return construction_method( VAR_LEN+1, OPCODE_SIZE , 1, currentDepth ,full, opArity, OP_ERC);
}
void toString_r(std::ostringstream* oss, GPNode* root, const unsigned* opArity , const char** opCodeName, int OP_ERC) {
(*oss) << '(';
......
......@@ -97,7 +97,7 @@ int fstGpu = 0;
int lstGpu = 0;
struct my_struct_gpu* gpu_infos;
struct gpuEvaluationData* globalGpuData;
float* fitnessTemp;
bool freeGPU = false;
bool first_generation = true;
......@@ -113,7 +113,7 @@ PopulationImpl* Pop = NULL;
\INSERT_USER_FUNCTIONS
void cudaPreliminaryProcess(struct gpuEvaluationData* localGpuData, int populationSize){
void dispatchPopulation(int populationSize){
int noTotalMP = 0; // number of MP will be used to distribute the population
int count = 0;
......@@ -126,26 +126,30 @@ void cudaPreliminaryProcess(struct gpuEvaluationData* localGpuData, int populati
exit(-1);
}
localGpuData->num_MP = deviceProp.multiProcessorCount; //Two block on each MP
localGpuData->num_Warp = deviceProp.warpSize;
noTotalMP += localGpuData->num_MP;
localGpuData->gpuProp = deviceProp;
globalGpuData[index].num_MP = deviceProp.multiProcessorCount;
globalGpuData[index].num_Warp = deviceProp.warpSize;
noTotalMP += globalGpuData[index].num_MP;
globalGpuData[index].gpuProp = deviceProp;
}
for( int index = 0; index < num_gpus; index++){
localGpuData->indiv_start = count;
globalGpuData[index].indiv_start = count;
if(index != (num_gpus - 1))
localGpuData->sh_pop_size = ceil((float)populationSize * (((float)localGpuData->num_MP) / (float)noTotalMP) );
if(index != (num_gpus - 1)) {
globalGpuData[index].sh_pop_size = ceil((float)populationSize * (((float)globalGpuData[index].num_MP) / (float)noTotalMP) );
}
//On the last card we are going to place the remaining individuals.
else
localGpuData->sh_pop_size = populationSize - count;
globalGpuData[index].sh_pop_size = populationSize - count;
count += localGpuData->sh_pop_size;
count += globalGpuData[index].sh_pop_size;
}
}
void cudaPreliminaryProcess(struct gpuEvaluationData* localGpuData, int populationSize){
// here we will compute how to spread the population to evaluate on GPGPU cores
......@@ -203,8 +207,8 @@ __global__ void cudaEvaluatePopulation(void* d_population, unsigned popSize, flo
void* gpuThreadMain(void* arg){
cudaError_t lastError;
struct gpuEvaluationData* localArg = (struct gpuEvaluationData*)arg;
std::cout << " gpuId : " << localGpuData->gpuId << std::endl;
struct gpuEvaluationData* localGpuData = (struct gpuEvaluationData*)arg;
//std::cout << " gpuId : " << localGpuData->gpuId << std::endl;
lastError = cudaSetDevice(localGpuData->gpuId);
......@@ -241,7 +245,11 @@ void* gpuThreadMain(void* arg){
if(nbr_cudaPreliminaryProcess > 0) {
cudaPreliminaryProcess(localGpuData);
if( nbr_cudaPreliminaryProcess==2 )
cudaPreliminaryProcess(localGpuData,EA->population->parentPopulationSize);
else {
cudaPreliminaryProcess(localGpuData,EA->population->offspringPopulationSize);
}
nbr_cudaPreliminaryProcess--;
if( localGpuData->dimBlock*localGpuData->dimGrid!=localGpuData->sh_pop_size ){
......@@ -253,15 +261,16 @@ void* gpuThreadMain(void* arg){
}
// transfer data to GPU memory
lastError = cudaMemcpy(localGpuInfo->d_population,(IndividualImpl*)(Pop->cudaBuffer)+localGpuInfo->indiv_start,
lastError = cudaMemcpy(localGpuData->d_population,(IndividualImpl*)(Pop->cudaBuffer)+localGpuData->indiv_start,
(sizeof(IndividualImpl)*localGpuData->sh_pop_size),cudaMemcpyHostToDevice);
CUDA_SAFE_CALL(lastError);
std::cout << localGpuData->dimGrid << ";"<< localGpuData->dimBlock << std::endl;
//std::cout << localGpuData->sh_pop_size << ";" << localGpuData->dimGrid << ";"<< localGpuData->dimBlock << std::endl;
// the real GPU computation (kernel launch)
cudaEvaluatePopulation<<< localGpuData->dimGrid, localGpuData->dimBlock>>>(localArg->d_population, localGpuData->sh_pop_size, localData->d_fitness);
cudaEvaluatePopulation<<< localGpuData->dimGrid, localGpuData->dimBlock>>>(localGpuData->d_population, localGpuData->sh_pop_size, localGpuData->d_fitness);
lastError = cudaGetLastError();
CUDA_SAFE_CALL(lastError);
......@@ -270,44 +279,44 @@ void* gpuThreadMain(void* arg){
// be sure the GPU has finished computing evaluations, and get results to CPU
lastError = cudaThreadSynchronize();
if( lastError!=cudaSuccess ){ std::cerr << "Error during synchronize" << std::endl; }
lastError = cudaMemcpy(fitnessTemp + localGpuInfo->indiv_start, localArg->d_fitness, localGpuInfo->sh_pop_size*sizeof(float), cudaMemcpyDeviceToHost);
lastError = cudaMemcpy(fitnessTemp + localGpuData->indiv_start, localGpuData->d_fitness, localGpuData->sh_pop_size*sizeof(float), cudaMemcpyDeviceToHost);
// this thread has finished its phase, so lets tell it to the main thread
sem_post(&localArg->sem_out);
sem_post(&localGpuData->sem_out);
}
sem_post(&localArg->sem_out);
sem_post(&localGpuData->sem_out);
fflush(stdout);
return NULL;
}
void wake_up_gpu_thread(){
for( int i=0 ; i<num_gpus ; i++ ){
sem_post(&(gpuArgs[i].sem_in));
sem_post(&(globalGpuData[i].sem_in));
}
for( int i=0 ; i<num_gpus ; i++ ){
sem_wait(&gpuArgs[i].sem_out);
sem_wait(&globalGpuData[i].sem_out);
}
}
void InitialiseGPUs(){
//MultiGPU part on one CPU
gpuArgs = (struct gpuArg*)malloc(sizeof(struct gpuArg)*num_gpus);
globalGpuData = (struct gpuEvaluationData*)malloc(sizeof(struct gpuEvaluationData)*num_gpus);
pthread_t* t = (pthread_t*)malloc(sizeof(pthread_t)*num_gpus);
int gpuId = fstGpu;
//here we want to create on thread per GPU
for( int i=0 ; i<num_gpus ; i++ ){
gpuArgs[i].d_fitness = NULL;
gpuArgs[i].d_population = NULL;
globalGpuData[i].d_fitness = NULL;
globalGpuData[i].d_population = NULL;
gpuArgs[i].gpuId = gpuId++;
globalGpuData[i].gpuId = gpuId++;
gpuArgs[i].threadId = i;
sem_init(&gpuArgs[i].sem_in,0,0);
sem_init(&gpuArgs[i].sem_out,0,0);
if( pthread_create(t+i,NULL,gpuThreadMain,gpuArgs+i) ){ perror("pthread_create : "); }
globalGpuData[i].threadId = i;
sem_init(&globalGpuData[i].sem_in,0,0);
sem_init(&globalGpuData[i].sem_out,0,0);
if( pthread_create(t+i,NULL,gpuThreadMain,globalGpuData+i) ){ perror("pthread_create : "); }
}
}
......@@ -338,7 +347,7 @@ void EASEAInit(int argc, char** argv){
}
}
gpu_infos = (struct my_struct_gpu*)malloc(sizeof(struct my_struct_gpu)*num_gpus);
//globalGpuData = (struct gpuEvaluationData*)malloc(sizeof(struct gpuEvaluationData)*num_gpus);
InitialiseGPUs();
\INSERT_INIT_FCT_CALL
}
......@@ -346,9 +355,8 @@ void EASEAInit(int argc, char** argv){
void EASEAFinal(CPopulation* pop){
freeGPU=true;
wake_up_gpu_thread();
free(gpuArgs);
free(globalGpuData);
free(gpu_infos);
\INSERT_FINALIZATION_FCT_CALL;
}
......@@ -481,6 +489,13 @@ void PopulationImpl::evaluateParentPopulation(){
unsigned actualPopulationSize = this->actualParentPopulationSize;
fitnessTemp = new float[actualPopulationSize];
int index;
static bool dispatchedParents = false;
if( dispatchedParents==false ){
dispatchPopulation(EA->population->parentPopulationSize);
dispatchedParents=true;
}
wake_up_gpu_thread();
......@@ -498,6 +513,12 @@ void PopulationImpl::evaluateOffspringPopulation(){
unsigned actualPopulationSize = this->actualOffspringPopulationSize;
fitnessTemp = new float[actualPopulationSize];
int index;
static bool dispatchedOffspring = false;
if( dispatchedOffspring==false ){
dispatchPopulation(EA->population->offspringPopulationSize);
dispatchedOffspring=true;
}
for( index=(actualPopulationSize-1); index>=0; index--)
((IndividualImpl*)this->offsprings[index])->copyToCudaBuffer(this->cudaBuffer,index);
......
This diff is collapsed.
......@@ -72,7 +72,7 @@ bool INSTEAD_EVAL_STEP = false;
CRandomGenerator* globalRandomGenerator;
extern CEvolutionaryAlgorithm* EA;
#define STD_TPL
#define GP_TPL
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment