Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
E
easea
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
Operations
Operations
Incidents
Packages & Registries
Packages & Registries
Container Registry
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Arnaud Kress
easea
Commits
1c9c8042
Commit
1c9c8042
authored
Jan 25, 2010
by
Ogier Maitre
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
drone version for TGP_regression
parent
9189089b
Changes
11
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
2630 additions
and
255 deletions
+2630
-255
EaseaLex.l
EaseaLex.l
+52
-17
dev/tgp_regression/tgp_regression.ez
dev/tgp_regression/tgp_regression.ez
+616
-123
dev/tgp_regression/tgp_regressionEval.cu
dev/tgp_regression/tgp_regressionEval.cu
+350
-52
dev/tgp_regression_drone/tgp_regression.ez
dev/tgp_regression_drone/tgp_regression.ez
+1090
-0
dev/tgp_regression_drone/tgp_regressionEval.cu
dev/tgp_regression_drone/tgp_regressionEval.cu
+454
-0
libeasea/CEvolutionaryAlgorithm.cpp
libeasea/CEvolutionaryAlgorithm.cpp
+59
-61
libeasea/COptionParser.cpp
libeasea/COptionParser.cpp
+2
-2
libeasea/CRandomGenerator.cpp
libeasea/CRandomGenerator.cpp
+1
-0
libeasea/include/CEvolutionaryAlgorithm.h
libeasea/include/CEvolutionaryAlgorithm.h
+3
-0
libeasea/include/CPopulation.h
libeasea/include/CPopulation.h
+2
-0
tpl/CUDA_GP.tpl
tpl/CUDA_GP.tpl
+1
-0
No files found.
EaseaLex.l
100755 → 100644
View file @
1c9c8042
...
...
@@ -26,7 +26,7 @@ Centre de Math
bool genomeSizeValidity=false;
int lineCounter = 0;
// local functions
// local functions
char* selectorDetermination(int nMINIMISE, char* sSELECTOR){
char selectorName[50];
...
...
@@ -75,7 +75,7 @@ Centre de Math
%start COPY_EO_INITIALISER
%start COPY COPY_INITIALISER COPY_CROSSOVER COPY_MUTATOR COPY_EVALUATOR COPY_FINALIZATION_FUNCTION
%start COPY_DISPLAY COPY_USER_FUNCTION COPY_USER_GENERATION PARAMETERS_ANALYSIS GET_PARAMETERS
%start COPY_USER_FUNCTIONS COPY_GENERATION_FUNCTION_BEFORE_REPLACEMENT GET_METHODS COPY_MAKEFILE_OPTION COPY_BOUND_CHECKING_FUNCTION COPY_BEG_GENERATION_FUNCTION COPY_END_GENERATION_FUNCTION
%start COPY_USER_FUNCTIONS COPY_GENERATION_FUNCTION_BEFORE_REPLACEMENT GET_METHODS COPY_MAKEFILE_OPTION COPY_BOUND_CHECKING_FUNCTION COPY_BEG_GENERATION_FUNCTION COPY_END_GENERATION_FUNCTION
COPY_INSTEAD_EVAL
// lexical analyser name and class definition
%name CEASEALexer {
...
...
@@ -187,6 +187,15 @@ exponent ([Ee][+-]?[0-9]+)
}
<TEMPLATE_ANALYSIS>"\\INSTEAD_EVAL_FUNCTION" {
//DEBUG_PRT_PRT("insert beg");
yyreset();
yyin = fpGenomeFile;
if (bVERBOSE) printf ("Evaluation population in a single function!!.\n");
BEGIN COPY_INSTEAD_EVAL;
}
<TEMPLATE_ANALYSIS>"\\INSERT_END_GENERATION_FUNCTION" {
//DEBUG_PRT_PRT("insert end");
if (bVERBOSE) printf ("Inserting at the end of each generation function.\n");
...
...
@@ -497,7 +506,7 @@ exponent ([Ee][+-]?[0-9]+)
if (bVERBOSE) printf ("Inserting user functions.\n");
yyreset();
yyin = fpGenomeFile;
lineCounter=
1
; // switch to .ez file and analyser
lineCounter=
2
; // switch to .ez file and analyser
BEGIN COPY_USER_FUNCTIONS;
}
<TEMPLATE_ANALYSIS>"\\INSERT_EO_INITIALISER" {
...
...
@@ -531,11 +540,13 @@ exponent ([Ee][+-]?[0-9]+)
<TEMPLATE_ANALYSIS>"\\INSERT_MUTATOR" {
yyreset();
yyin = fpGenomeFile; // switch to .ez file and analyser
lineCounter=1;
BEGIN COPY_MUTATOR;
}
<TEMPLATE_ANALYSIS>"\\INSERT_EVALUATOR" {
yyreset();
yyin = fpGenomeFile; // switch to .ez file and analyser
lineCounter=1;
BEGIN COPY_EVALUATOR;
}
...
...
@@ -543,6 +554,7 @@ exponent ([Ee][+-]?[0-9]+)
yyreset();
yyin = fpGenomeFile; // switch to .ez file and analyser
bWithinCUDA_Evaluator = 1;
lineCounter=1;
BEGIN COPY_EVALUATOR;
}
...
...
@@ -721,10 +733,7 @@ if(OPERATING_SYSTEM=WINDOWS)
if( TARGET==CUDA )
strcat(sFileName,"Individual.cu");
else if( TARGET==STD )
if( TARGET_FLAVOR==CUDA_FLAVOR_GP)
strcat(sFileName,"Individual.cu");
else
strcat(sFileName,"Individual.cpp");
strcat(sFileName,"Individual.cpp");
fpOutputFile=fopen(sFileName,"w");
if (bVERBOSE) printf("Creating %s...\n",sFileName);
}
...
...
@@ -963,6 +972,29 @@ if(OPERATING_SYSTEM=WINDOWS)
}
}
<COPY_INSTEAD_EVAL>"\\Instead"[ \t\n]+"evaluation"[ \t\n]+"function:" {
//DEBUG_PRT_PRT("at each beg");
if( (TARGET==CUDA || TARGET==STD)){
fprintf (fpOutputFile,"{\n");
bFunction=1;
BEGIN COPY_USER_GENERATION;
}
}
<COPY_INSTEAD_EVAL>.|\n {}
<COPY_INSTEAD_EVAL><<EOF>> {
bBeginGenerationFunction=0; // No Generation function was found in the .ez file
if (bVERBOSE) printf("*** No Instead evaluation step function was found. ***\n");
fprintf(fpOutputFile,"\n// No Instead evaluation step function.\n");
rewind(fpGenomeFile);
yyin = fpTemplateFile;
BEGIN TEMPLATE_ANALYSIS;
bNotFinishedYet=1;
}
<COPY_BEG_GENERATION_FUNCTION><<EOF>> {
bBeginGenerationFunction=0; // No Generation function was found in the .ez file
if (bVERBOSE) printf("*** No beginning generation function was found. ***\n");
...
...
@@ -1304,16 +1336,23 @@ if(OPERATING_SYSTEM=WINDOWS)
<COPY_CROSSOVER>\n {lineCounter++;}
<COPY_MUTATOR>"\\GenomeClass::mutator"[ \t\n]*":" {
bWithinMutator=1;
if( bLINE_NUM_EZ_FILE )
fprintf(fpOutputFile,"#line %d \"%s.ez\"\n",lineCounter, sRAW_PROJECT_NAME);
BEGIN COPY_USER_FUNCTION;
return USER_MUTATOR;
}
<COPY_MUTATOR>.|\n {}
<COPY_MUTATOR>. {}
<COPY_MUTATOR>\n {lineCounter++;}
<COPY_EVALUATOR>"\\GenomeClass::evaluator"[ \t\n]*":" {
BEGIN COPY_USER_FUNCTION;
bWithinEvaluator=1;
if( bLINE_NUM_EZ_FILE )
fprintf(fpOutputFile,"#line %d \"%s.ez\"\n",lineCounter, sRAW_PROJECT_NAME);
return USER_EVALUATOR;
}
<COPY_EVALUATOR>.|\n {}
<COPY_EVALUATOR>. {}
<COPY_EVALUATOR>\n {lineCounter++;}
//****************************************
// Basic copy to .cpp file with major changes
...
...
@@ -1568,11 +1607,9 @@ if(OPERATING_SYSTEM=WINDOWS)
fprintf(fpOutputFile,"globalRandomGenerator->tossCoin");}
<COPY_USER_FUNCTION>"random" {
fprintf(fpOutputFile,"globalRandomGenerator->random");}
<COPY_USER_FUNCTION>"child1" {fprintf(fpOutputFile,"child");
<COPY_USER_FUNCTION>"child1" {fprintf(fpOutputFile,"child
1
");
}
<COPY_USER_FUNCTION>"child2" {fprintf(fpOutputFile,"child");
}
<COPY_USER_FUNCTION>"child" {fprintf(fpOutputFile,"child");
<COPY_USER_FUNCTION>"child2" {fprintf(fpOutputFile,"child2");
}
<COPY_USER_FUNCTION>"parent1" {fprintf(fpOutputFile,"parent1");
}
...
...
@@ -1643,7 +1680,7 @@ if(OPERATING_SYSTEM=WINDOWS)
<GET_PARAMETERS>"Print"[ \t\n]+"stats"[ \t\n]*":"[\t\n]* {if (bVERBOSE) printf("\tPrint Stats...\n");return PRINT_STATS;}
<GET_PARAMETERS>"Plot"[ \t\n]+"stats"[ \t\n]*":"[\t\n]* {if (bVERBOSE) printf("\tPlot Stats with gnuplot...\n");return PLOT_STATS;}
<GET_PARAMETERS>"Generate"[ \t\n]+"c
sv
"[ \t\n]+"stats"[ \t\n]+"file"[ \t\n]*":"[ \t\n]* {if (bVERBOSE) printf("\tPrint Stats to cvs File...\n");return GENERATE_CVS_FILE;}
<GET_PARAMETERS>"Generate"[ \t\n]+"c
vs
"[ \t\n]+"stats"[ \t\n]+"file"[ \t\n]*":"[ \t\n]* {if (bVERBOSE) printf("\tPrint Stats to cvs File...\n");return GENERATE_CVS_FILE;}
<GET_PARAMETERS>"Generate"[ \t\n]+"gnuplot"[ \t\n]+"script"[ \t\n]*":"[ \t\n]* {if (bVERBOSE) printf("\tGenerate Gnuplot Script...\n");return GENERATE_GNUPLOT_SCRIPT;}
<GET_PARAMETERS>"Generate"[ \t\n]+"R"[ \t\n]+"script"[ \t\n]*":"[ \t\n]* {if (bVERBOSE) printf("\tGenerate R Script...\n");return GENERATE_R_SCRIPT;}
...
...
@@ -1789,7 +1826,7 @@ int CEASEALexer::create(CEASEAParser* pParser, CSymbolTable* pSymTable)
strcat(sTemp,"CUDA.tpl");
else if(TARGET_FLAVOR == CUDA_FLAVOR_CMAES )
strcat(sTemp,"CMAES_CUDA.tpl");
else
else
strcat(sTemp,"CUDA_MO.tpl");
if (!(yyin = fpTemplateFile = fopen(sTemp, "r"))){
...
...
@@ -1803,8 +1840,6 @@ int CEASEALexer::create(CEASEAParser* pParser, CSymbolTable* pSymTable)
strcat(sTemp,"STD.tpl");
else if (TARGET_FLAVOR == STD_FLAVOR_CMAES)
strcat(sTemp,"CMAES.tpl");
else if( TARGET_FLAVOR == CUDA_FLAVOR_GP )
strcat(sTemp,"CUDA_GP.tpl");
else
strcat(sTemp,"STD_MO.tpl");
if (!(yyin = fpTemplateFile = fopen(sTemp, "r"))){
...
...
dev/tgp_regression/tgp_regression.ez
View file @
1c9c8042
This diff is collapsed.
Click to expand it.
dev/tgp_regression/tgp_regressionEval.cu
View file @
1c9c8042
...
...
@@ -2,21 +2,208 @@
#define MAX_STACK 50
#define LOGNUMTHREAD2 7
#define NUMTHREAD 32
#define LOGNUMTHREAD 5
#define HIT_LEVEL 0.01f
#define PROBABLY_ZERO 1.11E-15f
#define BIG_NUMBER 1.0E15f
struct
gpuArg
{
int
threadId
;
sem_t
sem_in
;
sem_t
sem_out
;
float
*
progs_k
;
float
*
results_k
;
int
*
indexes_k
;
int
*
hits_k
;
float
*
inputs_k
;
float
*
outputs_k
;
int
index_st
;
int
index_end
;
int
indiv_st
;
int
indiv_end
;
};
struct
gpuArg
*
gpuArgs
;
bool
freeGPU
=
false
;
int
sh_pop_size
=
0
;
int
sh_length
=
0
;
__global__
static
void
EvaluatePostFixIndividuals_128
(
const
float
*
k_progs
,
const
int
maxprogssize
,
const
int
popsize
,
const
float
*
k_inputs
,
const
float
*
k_outputs
,
const
int
trainingSetSize
,
float
*
k_results
,
int
*
k_hits
,
int
*
k_indexes
fastEvaluatePostFixIndividuals_32_mgpu
(
const
float
*
k_progs
,
const
int
maxprogssize
,
const
int
popsize
,
const
float
*
k_inputs
,
const
float
*
k_outputs
,
const
int
trainingSetSize
,
float
*
k_results
,
int
*
k_hits
,
const
int
indivPerBlock
,
const
int
*
indexes
,
const
int
start_index
,
const
int
gpu_id
){
extern
__shared__
float
scratch
[];
float
*
tmpresult
=
scratch
+
(
threadIdx
.
y
*
NUMTHREAD
);
float
*
tmphits
=
scratch
+
(
indivPerBlock
*
NUMTHREAD
)
+
(
threadIdx
.
y
*
NUMTHREAD
);
/* __shared__ float tmpresult[NUMTHREAD]; */
/* __shared__ float tmphits[NUMTHREAD]; */
const
int
tid
=
threadIdx
.
x
;
//0 to NUM_THREADS-1
const
int
bid
=
blockIdx
.
x
+
threadIdx
.
y
*
(
popsize
/
indivPerBlock
)
+
blockIdx
.
y
*
gridDim
.
x
;
// 0 to NUM_BLOCKS-1
int
index
;
// index of the prog processed by the block
float
sum
=
0.0
;
int
hits
=
0
;
// hits number
float
currentOutput
;
float
result
;
int
start_prog
;
int
codop
;
float
stack
[
MAX_STACK
];
int
sp
;
float
op1
,
op2
;
float
tmp
;
float
currentW
,
currentX
,
currentY
,
currentZ
;
index
=
bid
;
// one program per block => block ID = program number
if
(
index
>=
popsize
){
// idle block (should never occur)
return
;
}
if
(
indexes
[
index
]
==
-
1.0
)
// already evaluated
return
;
// Here, it's a busy thread
sum
=
0.0
;
hits
=
0
;
// hits number
// Loop on training cases, per cluster of 32 cases (= number of thread)
// (even if there are only 8 stream processors, we must spawn at least 32 threads)
// We loop from 0 to upper bound INCLUDED in case trainingSetSize is not
// a multiple of NUMTHREAD
for
(
int
i
=
0
;
i
<
((
trainingSetSize
-
1
)
>>
LOGNUMTHREAD
)
+
1
;
i
++
)
{
// are we on a busy thread?
if
(
i
*
NUMTHREAD
+
tid
>=
trainingSetSize
)
// no!
continue
;
#if OP_W>=0
currentW
=
k_inputs
[(
i
*
NUMTHREAD
*
VAR_LEN
)
+
tid
*
VAR_LEN
+
0
];
#endif
#if OP_X>=0
currentX
=
k_inputs
[(
i
*
NUMTHREAD2
*
VAR_LEN
)
+
tid
*
VAR_LEN
+
1
];
#endif
#if OP_Y>=0
currentY
=
k_inputs
[(
i
*
NUMTHREAD2
*
VAR_LEN
)
+
tid
*
VAR_LEN
+
2
];
#endif
#if OP_Z>=0
currentZ
=
k_inputs
[(
i
*
NUMTHREAD2
*
VAR_LEN
)
+
tid
*
VAR_LEN
+
3
];
#endif
currentOutput
=
k_outputs
[
i
*
NUMTHREAD
+
tid
];
start_prog
=
indexes
[
index
]
-
start_index
;
// index of first codop
codop
=
k_progs
[
start_prog
++
];
sp
=
0
;
// stack and stack pointer
while
(
codop
!=
OP_RETURN
){
switch
(
codop
){
case
OP_W
:
stack
[
sp
++
]
=
currentW
;
break
;
case
OP_ERC
:
stack
[
sp
++
]
=
k_progs
[
start_prog
++
];
break
;
case
OP_MUL
:
op1
=
stack
[
--
sp
];
op2
=
stack
[
sp
-
1
];
stack
[
sp
-
1
]
=
op1
*
op2
;
break
;
case
OP_ADD
:
op1
=
stack
[
--
sp
];
op2
=
stack
[
sp
-
1
];
stack
[
sp
-
1
]
=
op1
+
op2
;
break
;
case
OP_SUB
:
op1
=
stack
[
--
sp
];
op2
=
stack
[
sp
-
1
];
stack
[
sp
-
1
]
=
op2
-
op1
;
break
;
case
OP_DIV
:
op2
=
stack
[
--
sp
];
op1
=
stack
[
sp
-
1
];
if
(
op2
==
0.0
)
stack
[
sp
-
1
]
=
DIV_ERR_VALUE
;
else
stack
[
sp
-
1
]
=
op1
/
op2
;
break
;
case
OP_SIN
:
stack
[
sp
-
1
]
=
sinf
(
stack
[
sp
-
1
]);
break
;
case
OP_COS
:
stack
[
sp
-
1
]
=
cosf
(
stack
[
sp
-
1
]);
break
;
}
// get next codop
codop
=
k_progs
[
start_prog
++
];
}
// codop interpret loop
result
=
fabsf
(
stack
[
0
]
-
currentOutput
);
if
(
!
(
result
<
BIG_NUMBER
))
result
=
BIG_NUMBER
;
else
if
(
result
<
PROBABLY_ZERO
)
result
=
0.0
;
if
(
result
<=
HIT_LEVEL
)
hits
++
;
sum
+=
result
;
// sum raw error on all training cases
}
// LOOP ON TRAINING CASES
// gather results from all threads => we need to synchronize
//tmpresult[tid+threadIdx.y*NUMTHREAD] = sum;
//tmphits[tid+threadIdx.y*NUMTHREAD] = hits;
//tmpresult[threadIdx.y][tid] = sum;
//tmphits[threadIdx.y][tid] = hits;
tmpresult
[
tid
]
=
sum
;
tmphits
[
tid
]
=
hits
;
//__syncthreads();
if
(
tid
==
0
)
{
int
id
=
threadIdx
.
y
*
NUMTHREAD
;
for
(
int
i
=
1
;
i
<
NUMTHREAD
;
i
++
)
{
tmpresult
[
0
]
+=
tmpresult
[
i
];
tmphits
[
0
]
+=
tmphits
[
i
];
}
k_results
[
index
]
=
tmpresult
[
0
];
k_hits
[
index
]
=
tmphits
[
0
];
//printf("tid.y = %d k_results %d = %f\n",threadIdx.y,index,k_results[index]);
}
// here results and hits have been stored in their respective array: we can leave
}
__global__
static
void
EvaluatePostFixIndividuals_128_mgpu
(
const
float
*
k_progs
,
const
int
maxprogssize
,
const
int
popsize
,
const
float
*
k_inputs
,
const
float
*
k_outputs
,
const
int
trainingSetSize
,
float
*
k_results
,
int
*
k_hits
,
int
*
k_indexes
,
int
start_index
,
int
gpu_id
)
{
__shared__
float
tmpresult
[
NUMTHREAD2
];
...
...
@@ -30,7 +217,7 @@ EvaluatePostFixIndividuals_128(const float * k_progs,
float
sum
=
0.0
;
int
hits
=
0
;
// hits number
float
current
X
,
currentOutput
;
float
current
W
,
currentX
,
currentY
,
currentZ
,
currentOutput
;
float
result
;
int
start_prog
;
int
codop
;
...
...
@@ -61,10 +248,23 @@ EvaluatePostFixIndividuals_128(const float * k_progs,
if
(
i
*
NUMTHREAD2
+
tid
>=
trainingSetSize
)
// no!
continue
;
currentX
=
k_inputs
[
i
*
NUMTHREAD2
+
tid
];
#if OP_W>=0
currentW
=
k_inputs
[(
i
*
NUMTHREAD2
*
VAR_LEN
)
+
tid
*
VAR_LEN
+
0
];
#endif
#if OP_X>=0
currentX
=
k_inputs
[(
i
*
NUMTHREAD2
*
VAR_LEN
)
+
tid
*
VAR_LEN
+
1
];
#endif
#if OP_Y>=0
currentY
=
k_inputs
[(
i
*
NUMTHREAD2
*
VAR_LEN
)
+
tid
*
VAR_LEN
+
2
];
#endif
#if OP_Z>=0
currentZ
=
k_inputs
[(
i
*
NUMTHREAD2
*
VAR_LEN
)
+
tid
*
VAR_LEN
+
3
];
#endif
currentOutput
=
k_outputs
[
i
*
NUMTHREAD2
+
tid
];
start_prog
=
k_indexes
[
index
];
// index of first codop
start_prog
=
k_indexes
[
index
]
-
start_index
;
// index of first codop
codop
=
k_progs
[
start_prog
++
];
sp
=
0
;
// stack and stack pointer
...
...
@@ -72,50 +272,23 @@ EvaluatePostFixIndividuals_128(const float * k_progs,
while
(
codop
!=
OP_RETURN
){
switch
(
codop
)
{
case
OP_W
:
stack
[
sp
++
]
=
currentX
;
break
;
case
OP_ERC
:
tmp
=
k_progs
[
start_prog
++
];
stack
[
sp
++
]
=
tmp
;
break
;
case
OP_W
:
stack
[
sp
++
]
=
currentW
;
break
;
case
OP_ERC
:
stack
[
sp
++
]
=
k_progs
[
start_prog
++
];
break
;
case
OP_MUL
:
sp
--
;
op1
=
stack
[
sp
];
sp
--
;
op2
=
stack
[
sp
];
stack
[
sp
]
=
__fmul_rz
(
op1
,
op2
);
stack
[
sp
]
=
op1
*
op2
;
sp
++
;
break
;
op1
=
stack
[
--
sp
];
op2
=
stack
[
sp
-
1
];
stack
[
sp
-
1
]
=
op1
*
op2
;
break
;
case
OP_ADD
:
sp
--
;
op1
=
stack
[
sp
];
sp
--
;
op2
=
stack
[
sp
];
stack
[
sp
]
=
__fadd_rz
(
op1
,
op2
);
stack
[
sp
]
=
op1
+
op2
;
sp
++
;
break
;
op1
=
stack
[
--
sp
];
op2
=
stack
[
sp
-
1
];
stack
[
sp
-
1
]
=
op1
+
op2
;
break
;
case
OP_SUB
:
sp
--
;
op1
=
stack
[
sp
];
sp
--
;
op2
=
stack
[
sp
];
stack
[
sp
]
=
op2
-
op1
;
sp
++
;
break
;
op1
=
stack
[
--
sp
];
op2
=
stack
[
sp
-
1
];
stack
[
sp
-
1
]
=
op2
-
op1
;
break
;
case
OP_DIV
:
sp
--
;
op2
=
stack
[
sp
];
sp
--
;
op1
=
stack
[
sp
];
if
(
op2
==
0.0
)
stack
[
sp
]
=
1.0
;
else
stack
[
sp
]
=
op1
/
op2
;
sp
++
;
break
;
op2
=
stack
[
--
sp
];
op1
=
stack
[
sp
-
1
];
if
(
op2
==
0.0
)
stack
[
sp
-
1
]
=
DIV_ERR_VALUE
;
else
stack
[
sp
-
1
]
=
op1
/
op2
;
break
;
case
OP_SIN
:
stack
[
sp
-
1
]
=
sinf
(
stack
[
sp
-
1
]);
break
;
case
OP_COS
:
stack
[
sp
-
1
]
=
cosf
(
stack
[
sp
-
1
]);
break
;
}
// get next codop
codop
=
k_progs
[
start_prog
++
];
...
...
@@ -147,6 +320,131 @@ EvaluatePostFixIndividuals_128(const float * k_progs,
}
k_results
[
index
]
=
tmpresult
[
0
];
k_hits
[
index
]
=
tmphits
[
0
];
//printf("g %d %d %f\n",gpu_id,bid,k_results[index]);
//fflush(stdout);
}
// here results and hits have been stored in their respective array: we can leave
}
void
wake_up_gpu_thread
(
int
nbGpu
){
for
(
int
i
=
0
;
i
<
nbGPU
;
i
++
){
DEBUG_PRT
(
"wake up th %d"
,
i
);
//fflush(stdout);
sem_post
(
&
(
gpuArgs
[
i
].
sem_in
));
}
for
(
int
i
=
0
;
i
<
nbGPU
;
i
++
){
sem_wait
(
&
gpuArgs
[
i
].
sem_out
);
}
}
void
notify_gpus
(
float
*
progs
,
int
*
indexes
,
int
length
,
CIndividual
**
population
,
int
popSize
,
int
nbGpu
){
int
pop_chunk_len
=
popSize
/
nbGpu
;
//cout << " population chunk length : " << pop_chunk_len << "/" << length << endl;
assert
(
nbGpu
==
2
);
#ifdef INSTRUMENTED
currentStats
.
gpu0Blen
=
indexes
[
pop_chunk_len
];
currentStats
.
gpu1Blen
=
length
-
indexes
[
pop_chunk_len
];
#endif
sh_pop_size
=
pop_chunk_len
;
sh_length
=
length
;
wake_up_gpu_thread
(
nbGpu
);
}
/**
Send input and output data on the GPU memory.
Allocate
*/
void
initialDataToMGPU
(
float
*
input_f
,
int
length_input
,
float
*
output_f
,
int
length_output
,
int
gpu_id
){
// allocate and copy input/output arrays
CUDA_SAFE_CALL
(
cudaMalloc
((
void
**
)(
&
(
gpuArgs
[
gpu_id
].
inputs_k
)),
sizeof
(
float
)
*
length_input
));
CUDA_SAFE_CALL
(
cudaMalloc
((
void
**
)(
&
(
gpuArgs
[
gpu_id
].
outputs_k
)),
sizeof
(
float
)
*
length_output
));
CUDA_SAFE_CALL
(
cudaMemcpy
((
gpuArgs
[
gpu_id
].
inputs_k
),
input_f
,
sizeof
(
float
)
*
length_input
,
cudaMemcpyHostToDevice
));
CUDA_SAFE_CALL
(
cudaMemcpy
((
gpuArgs
[
gpu_id
].
outputs_k
),
output_f
,
sizeof
(
float
)
*
length_output
,
cudaMemcpyHostToDevice
));
// allocate indexes and programs arrays
int
maxPopSize
=
MAX
(
EA
->
population
->
parentPopulationSize
,
EA
->
population
->
offspringPopulationSize
);
CUDA_SAFE_CALL
(
cudaMalloc
((
void
**
)
&
(
gpuArgs
[
gpu_id
].
indexes_k
),
sizeof
(
*
indexes_k
)
*
maxPopSize
));
CUDA_SAFE_CALL
(
cudaMalloc
((
void
**
)
&
(
gpuArgs
[
gpu_id
].
progs_k
),
sizeof
(
*
progs_k
)
*
MAX_PROGS_SIZE
));
// allocate hits and results arrays
CUDA_SAFE_CALL
(
cudaMalloc
((
void
**
)
&
(
gpuArgs
[
gpu_id
].
results_k
),
sizeof
(
*
indexes_k
)
*
maxPopSize
));
CUDA_SAFE_CALL
(
cudaMalloc
((
void
**
)
&
(
gpuArgs
[
gpu_id
].
hits_k
),
sizeof
(
*
indexes_k
)
*
maxPopSize
));
}
void
*
gpuThreadMain
(
void
*
arg
){
struct
gpuArg
*
localArg
=
(
struct
gpuArg
*
)
arg
;
DEBUG_PRT
(
"gpu th %d"
,
localArg
->
threadId
);
CUDA_SAFE_CALL
(
cudaSetDevice
(
localArg
->
threadId
));
// Alloc memory for this thread
initialDataToMGPU
(
inputs_f
,
fitnessCasesSetLength
*
VAR_LEN
,
outputs_f
,
fitnessCasesSetLength
*
NB_TREES
,
localArg
->
threadId
);
DEBUG_PRT
(
"allocation ok for th %d"
,
localArg
->
threadId
);
//sem_post(&localArg->sem_out);
// Wait for population to evaluate.
while
(
1
){
//printf("gpu %d is evaluating\n",localArg->threadId);
sem_wait
(
&
localArg
->
sem_in
);
int
indiv_st
=
localArg
->
threadId
*
sh_pop_size
;
int
indiv_end
=
indiv_st
+
sh_pop_size
;
int
index_st
=
indexes
[
indiv_st
];
int
index_end
=
0
;
if
(
localArg
->
threadId
!=
nbGPU
-
1
)
index_end
=
indexes
[
indiv_end
];
else
index_end
=
sh_length
;
if
(
freeGPU
)
break
;
/* cout << "gpu " << localArg->threadId << " has been notified" << endl; */
/* cout << indiv_st << "|" << indiv_end << "|" << index_st << "|" << index_end << endl; */
/* fflush(stdout); */
//here we copy assigned population chunk to the related GPU
CUDA_SAFE_CALL
(
cudaMemcpy
(
localArg
->
indexes_k
,
indexes
+
indiv_st
,
(
indiv_end
-
indiv_st
)
*
sizeof
(
int
),
cudaMemcpyHostToDevice
));
CUDA_SAFE_CALL
(
cudaMemcpy
(
localArg
->
progs_k
,
progs
+
index_st
,
(
index_end
-
index_st
)
*
sizeof
(
int
),
cudaMemcpyHostToDevice
));
#if 1
EvaluatePostFixIndividuals_128_mgpu
<<<
sh_pop_size
,
128
>>>
(
localArg
->
progs_k
,
index_end
-
index_st
,
sh_pop_size
,
localArg
->
inputs_k
,
localArg
->
outputs_k
,
NB_FITNESS_CASES
,
localArg
->
results_k
,
localArg
->
hits_k
,
localArg
->
indexes_k
,
index_st
,
localArg
->
threadId
);
#else
int
indivPerBlock
=
4
;
dim3
numthreads
;
numthreads
.
x
=
32
;
numthreads
.
y
=
indivPerBlock
;
fastEvaluatePostFixIndividuals_32_mgpu
<<<
sh_pop_size
/
indivPerBlock
,
numthreads
,
NUMTHREAD
*
sizeof
(
float
)
*
2
*
indivPerBlock
>>>
(
localArg
->
progs_k
,
index_end
-
index_st
,
sh_pop_size
,
localArg
->
inputs_k
,
localArg
->
outputs_k
,
NB_FITNESS_CASES
,
localArg
->
results_k
,
localArg
->
hits_k
,
indivPerBlock
,
localArg
->
indexes_k
,
index_st
,
localArg
->
threadId
);
#endif
/* cudaThreadSynchronize(); */
CUDA_SAFE_CALL
(
cudaMemcpy
(
results
+
(
localArg
->
threadId
*
sh_pop_size
),
localArg
->
results_k
,
(
indiv_end
-
indiv_st
)
*
sizeof
(
int
),
cudaMemcpyDeviceToHost
));
CUDA_SAFE_CALL
(
cudaMemcpy
(
hits
+
(
localArg
->
threadId
*
sh_pop_size
),
localArg
->
hits_k
,
(
indiv_end
-
indiv_st
)
*
sizeof
(
int
),
cudaMemcpyDeviceToHost
));
sem_post
(
&
localArg
->
sem_out
);
}
DEBUG_PRT
(
"gpu : %d"
,
localArg
->
threadId
);
DEBUG_PRT
(
"addr k_prog : %p"
,
localArg
->
progs_k
);
CUDA_SAFE_CALL
(
cudaFree
(
localArg
->
progs_k
));
CUDA_SAFE_CALL
(
cudaFree
(
localArg
->
results_k
));
CUDA_SAFE_CALL
(
cudaFree
(
localArg
->
hits_k
));
CUDA_SAFE_CALL
(
cudaFree
(
localArg
->
inputs_k
));
CUDA_SAFE_CALL
(
cudaFree
(
localArg
->
outputs_k
));
sem_post
(
&
localArg
->
sem_out
);
cout
<<
"gpu "
<<
localArg
->
threadId
<<
" has been freed"
<<
endl
;
fflush
(
stdout
);
return
NULL
;
}
dev/tgp_regression_drone/tgp_regression.ez