From b08f027d9cbfec5405f690a4d003ea6e4a0e696e Mon Sep 17 00:00:00 2001 From: Ogier Maitre Date: Thu, 16 Sep 2010 18:35:05 +0200 Subject: [PATCH] Bugs correction, c4.5 multiclass fixed. --- makefile => Makefile | 0 dev/c4.5_abst/card.ez | 478 +++++++++++++++++++++++++++++++++++ dev/c4.5_geo/card.ez | 371 +++++++++++++++++++++++++++ dev/c4.5_multi-class/card.ez | 371 +++++++++++++++++++++++++++ dev/c4.5_toy/card.ez | 359 ++++++++++++++++++++++++++ dev/c4.5_toy/out/concat.sh | 36 +++ dev/c4.5_toy/run.sh | 10 + 7 files changed, 1625 insertions(+) rename makefile => Makefile (100%) create mode 100755 dev/c4.5_abst/card.ez create mode 100644 dev/c4.5_geo/card.ez create mode 100755 dev/c4.5_multi-class/card.ez create mode 100755 dev/c4.5_toy/card.ez create mode 100644 dev/c4.5_toy/out/concat.sh create mode 100644 dev/c4.5_toy/run.sh diff --git a/makefile b/Makefile similarity index 100% rename from makefile rename to Makefile diff --git a/dev/c4.5_abst/card.ez b/dev/c4.5_abst/card.ez new file mode 100755 index 0000000..397ff26 --- /dev/null +++ b/dev/c4.5_abst/card.ez @@ -0,0 +1,478 @@ +/*_________________________________________________________ + +Test functions +log normal adaptive mutation +Selection operator: Tournament +__________________________________________________________*/ + + +\User functions: + +void generate_k_fold(){ + //printf("generating k packets\n"); + + unsigned packet_class_repartition[K][NO_CLASSES]; + unsigned min_filed_packet[NO_CLASSES]; + for( unsigned i=0 ; ino_instances ; i++ ){ + unsigned current_class =t1->instances[i][t1->hdr->whichis_class]; + unsigned min_filed = min_filed_packet[current_class]; + + packets[min_filed][packets_size[min_filed]++] = t1->instances[i]; + packet_class_repartition[min_filed][current_class]++; + + + //packet_class_repartition[min_filed][current_class]++; + + for( unsigned j=0 ; jhdr = t1->hdr; + k_tables[i]->no_instances = packets_size[i]; + k_tables[i]->links = t1->links; + k_tables[i]->class_repartition = t1->class_repartition; + k_tables[i]->instances = packets[i]; + } +} + + +\end + +\At the beginning of each generation function: +{ + generate_k_fold(); +} +\end + +\At the end of each generation function: +\end + +\At each generation before reduce function: +\end + +\GenomeClass::display: +\end + + +\User declarations : +#include +#include +#include +#include +#include + +#define X_MIN -1. +#define X_MAX 1. +#define ITER 120 +#define Abs(x) ((x) < 0 ? -(x) : (x)) +#define MAX(x,y) ((x)>(y)?(x):(y)) +#define MIN(x,y) ((x)<(y)?(x):(y)) +#define SIGMA 1. /* mutation parameter */ +#define PI 3.141592654 +#define K 5 +#define NO_CLASSES 2 + +//#define GENE_SIZE 11 + +#define G_SIZE 352 + + +float pMutPerGene=0.1; +float pMutDesCard = 0.05; +float pMutDesThre = 0.5; + +struct base* t1 = NULL; +struct base* t2 = NULL; + +float* uniq_instances[2]; +unsigned uniq_cnt[2]; + +struct base* k_tables[K]; +float** packets[K]; +unsigned packets_size[K]; + +\end + +\User classes : +GenomeClass { + float x[G_SIZE]; +} +\end + + +\Before everything else function: +{ + srand(globalRandomGenerator->get_seed()); + INSTEAD_EVAL_STEP = true; + + cout << "Seed : " << globalRandomGenerator->get_seed() << endl; + +#if 0 + t1 = ba_postgres_load_train(); + t2 = ba_postgres_load_car(); +#else + t1 = ba_postgres_load_ta(); + t2 = ba_postgres_load_tb(); + + t1->hdr->no_attributes = 3; +#endif + ba_set_links(t1,t2); + //t1->no_instances = 20; + + ba_mix_instances(t1); + + printf("+ %d - %d\n",t1->class_repartition[1],t1->class_repartition[0]); + + uniq_instances[0] = ba_compute_uniq_values(t2, 1, uniq_cnt+0); + uniq_instances[1] = ba_compute_uniq_values(t2, 2, uniq_cnt+1); + + printf("%d %d\n",uniq_cnt[0],uniq_cnt[1]); + + // allocating K packets + for( unsigned i=0 ; ino_instances/K)); + packets[i] = new float*[(unsigned)ceilf(t1->no_instances/K)]; + } + generate_k_fold(); + + + IndividualImpl* i = new IndividualImpl(); + i->x[0] = 4000; + i->x[1] = 4000; + i->x[2] = 5000; + + for( unsigned j=3 ; jx[j] = INFINITY; + i->x[GENE_SIZE-1] = 0; + + float f = i->evaluate(); + printf("%f\n",f); + + cTreeNode* t = generate_tree(i->x,t1,t2); + struct base* tc = table_from_genome(i->x,t1,t2); + + show_tree(tc,t,0); + + //exit(-1); + +} +\end + +\After everything else function: +{ + + EA->population->sortParentPopulation(); + IndividualImpl* best = (IndividualImpl*)EA->population->parents[0]; + + printf("best fitness %f\n",best->evaluate()); + for( unsigned i=0 ; ix[i+j]); + } + printf(" >= %f\n",best->x[i+GENE_SIZE-1]); + } + printf("\n"); + + + + cTreeNode* root = generate_tree(best->x,t1,t2); + struct base* tmp_table = table_from_genome(best->x,t1,t2); + show_tree(tmp_table,root); + + printf("depth of resulting tree %d\n",root->tree_depth()); + +#if 1 + unsigned error = 0; + for( unsigned i=0 ; ino_instances ; i++ ){ + unsigned predicted_class = root->classify_instance(tmp_table->instances[i]); + unsigned real_class = (unsigned)tmp_table->instances[i][t1->hdr->whichis_class]; + if( predicted_class != real_class ) + error++; + } + printf(" error on the whole set : %d\n",error); +#endif + + + delete root; + ba_delete( tmp_table ); +} +\end + +\GenomeClass::initialiser : +{ + for( unsigned i=0; ihdr->attributes[1]->threshold[random(0,t2->hdr->attributes[1]->no_threshold)]; + //Genome.x[i+1] = t2->hdr->attributes[2]->threshold[random(0,t2->hdr->attributes[2]->no_threshold)]; + + //Genome.x[i] = t2->instances[random(0,t2->no_instances-1)][1]; + // Genome.x[i+1] = t2->instances[random(0,t2->no_instances-1)][2]; + + /* if( tossCoin(pMutDesCard)) + Genome.x[i+2] = 0; + else + Genome.x[i+2] = random(1,11); + */ + } +} +\end + +\GenomeClass::crossover : +{ + for (int i=0; i10 ? 10 :value); // if value grether than 10 then value is 10 + + Genome.x[i+GENE_SIZE-1] = roundf(value); + NbMut++; + + } + } + + for( unsigned j=0 ; jinstances[random(0,t2->no_instances)][j+1]; + //Genome.x[i+j] = t2->hdr->attributes[j+1]->threshold[random(0,t2->hdr->attributes[j+1]->no_threshold)]; + + //value = (value<0 ? 0 : value); //if value less than 0, then value is 0 + //value = (value>101 ? 101 :value); // if value grether than 10 then value is 10 + //Genome.x[i+j] = value; + + //Genome.x[i+j] = ba_nearest_greather_table_value(t2,j+1,value); + } + } // for each threshold + + }// for each gene + return NbMut; +} +\end + +// The population evaluation. +\Instead evaluation function: +{ + ba_mix_instances(t1); + +#pragma omp parallel for + for( unsigned i=0 ; ievaluate(); + } +} +\end + +\GenomeClass::evaluator : // Returns the score +{ + + struct base* k_tmp_tables[K]; + + unsigned error = 0; + unsigned tree_size = 0; + + // generate tmp tables from genome, for every packets + float fitness_value = 0; + for( unsigned i=0 ; iinstances = + (float**)malloc(sizeof(*tmp_learning_table->instances)*t1->no_instances-k_tables[i]->no_instances); + + tmp_learning_table->hdr = ba_partial_copy_hdr(k_tmp_tables[i]->hdr); + tmp_test_table->hdr = ba_partial_copy_hdr(k_tmp_tables[i]->hdr); + + + // create a learning set with k-1 packets + unsigned copied_instances = 0; + for( unsigned j=0 ; jinstances)+copied_instances, + k_tmp_tables[j]->instances, + sizeof(*tmp_learning_table->instances)*(k_tmp_tables[j]->no_instances)); + copied_instances += k_tmp_tables[j]->no_instances; + } + tmp_learning_table->no_instances = copied_instances; + + + // create the test set with 1 packet + tmp_test_table->instances = (float**)malloc(sizeof(*tmp_test_table->instances)*k_tmp_tables[i]->no_instances); + memcpy(tmp_test_table->instances,k_tmp_tables[i]->instances, + sizeof(*tmp_test_table->instances)*k_tmp_tables[i]->no_instances); + tmp_test_table->no_instances = k_tmp_tables[i]->no_instances; + + + // set threshold, from t1 for standard attribute + for( unsigned j=0 ; jhdr->no_attributes ; j++ ){ + tmp_test_table->hdr->attributes[j]->threshold = t1->hdr->attributes[j]->threshold; + tmp_test_table->hdr->attributes[j]->no_threshold = t1->hdr->attributes[j]->no_threshold; + } + // compute threshold for attribute generated by genome + ba_compute_threshold_from(tmp_test_table,t1->hdr->no_attributes); + + // set threshold, from t1 for standard attribute + for( unsigned j=0 ; jhdr->no_attributes ; j++ ){ + tmp_learning_table->hdr->attributes[j]->threshold = t1->hdr->attributes[j]->threshold; + tmp_learning_table->hdr->attributes[j]->no_threshold = t1->hdr->attributes[j]->no_threshold; + } + // compute threshold for attribute generated by genome + ba_compute_threshold_from(tmp_learning_table,t1->hdr->no_attributes); + + + cTreeNode* t = genereate_decision_tree(tmp_learning_table); + + //show_tree(k_tmp_tables[0],t,0); + + //DBG_print_instances(tmp_test_table->instances,tmp_test_table->no_instances,tmp_test_table->hdr->no_attributes); + for( unsigned j=0 ; jno_instances ; j++ ){ + unsigned predicted_class = t->classify_instance(tmp_test_table->instances[j]); + unsigned real_class = (unsigned)tmp_test_table->instances[j][t1->hdr->whichis_class]; + + //printf("%3.0f : %d, %d\n",tmp_test_table->instances[j][1],predicted_class,real_class); + // here compute classification error, or any quality measurment + if( predicted_class!=real_class ){ + error++; + } + } + + //printf("err : %f %d\n",fitness_value,t->tree_depth()); + tree_size += t->tree_depth(); + + delete t; + + + // free current sets + // first un-assignate instances + for( unsigned i=0 ; ino_instances ; i++ ){tmp_learning_table->instances[i] = NULL;} + for( unsigned i=0 ; ino_instances ; i++ ){tmp_test_table->instances[i] = NULL;} + tmp_learning_table->no_instances = 0; + tmp_test_table->no_instances = 0; + // then delete tmp tables + ba_partial_copy_delete(tmp_learning_table); + ba_partial_copy_delete(tmp_test_table); + + } + + fitness_value = (((float)error) / t1->no_instances)*100 + ((float)tree_size)/K; + + for( unsigned i=0 ; i +#include +#include +#include +#include + + +#define K 5 +#define GENE_SIZE 4 +#define GENOME_SIZE 32 + + +float pMutPerGene=0.1; +float pMutDesCard = 0.05; +float pMutDesThre = 0.5; + +struct base* t1 = NULL; +struct base* t2 = NULL; + +float* uniq_instances[2]; +unsigned uniq_cnt[2]; + +struct base* k_tables[K]; +unsigned packets_size[K]; + +\end + +\User classes : +GenomeClass { + float x[GENOME_SIZE]; +} +\end + + +\Before everything else function: +{ + srand(globalRandomGenerator->get_seed()); + INSTEAD_EVAL_STEP = true; + + cout << "Seed : " << globalRandomGenerator->get_seed() << endl; + srand(globalRandomGenerator->get_seed()); + + t1 = ba_postgres_load_ilot(1); + t2 = ba_postgres_load_batiment(1); + ba_set_links(t1,t2); + + //printf("+ %d - %d\n",t1->class_repartition[1],t1->class_repartition[0]); + for( unsigned i=0 ; ihdr->attributes[t1->hdr->whichis_class]->no_values; i++ ) + printf("%s : %d\n",t1->hdr->attributes[t1->hdr->whichis_class]->values[i],t1->class_repartition[i]); + + uniq_instances[0] = ba_compute_uniq_values(t2, 1, uniq_cnt+0); + uniq_instances[1] = ba_compute_uniq_values(t2, 2, uniq_cnt+1); + + printf("%d %d\n",uniq_cnt[0],uniq_cnt[1]); + + // allocating K packets + generate_k_fold(K,packets_size,t1,k_tables); + + +#if 1 + // try the perfect solution + IndividualImpl* i = new IndividualImpl(); + i->x[0] = 30; + i->x[1] = 39; + i->x[GENE_SIZE-1] = 0; + + for( unsigned j=3 ; jx[j] = INFINITY; + + float f = i->evaluate(); + printf("fitness : %f\n",f); + + cTreeNode* t = generate_tree(i->x,t1,t2,GENOME_SIZE,GENE_SIZE); + struct base* tc = table_from_genome(i->x,t1,t2,GENOME_SIZE,GENE_SIZE); + + show_tree(tc,t,0); +#endif + exit(-1); + +} +\end + +\After everything else function: +{ + + EA->population->sortParentPopulation(); + IndividualImpl* best = (IndividualImpl*)EA->population->parents[0]; + + printf("best fitness %f\n",best->evaluate()); + for( unsigned i=0 ; ix[i+j]); + } + printf(" >= %3.0f\n",best->x[i+GENE_SIZE-1]); + } + printf("\n"); + + + + cTreeNode* root = generate_tree(best->x,t1,t2,GENOME_SIZE,GENE_SIZE); + struct base* tmp_table = table_from_genome(best->x,t1,t2,GENOME_SIZE,GENE_SIZE); + show_tree(tmp_table,root); + + printf("depth of resulting tree %d\n",root->tree_depth()); + +#if 1 + unsigned error = 0; + for( unsigned i=0 ; ino_instances ; i++ ){ + unsigned predicted_class = root->classify_instance(tmp_table->instances[i]); + unsigned real_class = (unsigned)tmp_table->instances[i][t1->hdr->whichis_class]; + if( predicted_class != real_class ) + error++; + } + printf(" error on the whole set : %d\n",error); +#endif + + + delete root; + ba_delete( tmp_table ); +} +\end + +\GenomeClass::initialiser : +{ + for( unsigned i=0; i10 ? 10 :value); // if value grether than 10 then value is 10 + Genome.x[i+GENE_SIZE-1] = roundf(value); + NbMut++; + } + } + + for( unsigned j=0 ; jevaluate(); + } +} +\end + +\GenomeClass::evaluator : // Returns the score +{ + + struct base* k_tmp_tables[K]; + + unsigned error = 0; + unsigned tree_size = 0; + + // generate tmp tables from genome, for every packets + float fitness_value = 0; + for( unsigned i=0 ; iinstances = + (float**)malloc(sizeof(*tmp_learning_table->instances)*t1->no_instances-k_tables[i]->no_instances); + + tmp_learning_table->hdr = ba_partial_copy_hdr(k_tmp_tables[i]->hdr); + tmp_test_table->hdr = ba_partial_copy_hdr(k_tmp_tables[i]->hdr); + + + // create a learning set with k-1 packets + unsigned copied_instances = 0; + for( unsigned j=0 ; jinstances)+copied_instances, + k_tmp_tables[j]->instances, + sizeof(*tmp_learning_table->instances)*(k_tmp_tables[j]->no_instances)); + copied_instances += k_tmp_tables[j]->no_instances; + + } + tmp_learning_table->no_instances = copied_instances; + + + // create the test set with 1 packet + tmp_test_table->instances = (float**)malloc(sizeof(*tmp_test_table->instances)*k_tmp_tables[i]->no_instances); + memcpy(tmp_test_table->instances,k_tmp_tables[i]->instances, + sizeof(*tmp_test_table->instances)*k_tmp_tables[i]->no_instances); + tmp_test_table->no_instances = k_tmp_tables[i]->no_instances; + + + // set threshold, from t1 for standard attribute + for( unsigned j=0 ; jhdr->no_attributes ; j++ ){ + tmp_test_table->hdr->attributes[j]->threshold = t1->hdr->attributes[j]->threshold; + tmp_test_table->hdr->attributes[j]->no_threshold = t1->hdr->attributes[j]->no_threshold; + } + // compute threshold for attribute generated by genome + ba_compute_threshold_from(tmp_test_table,t1->hdr->no_attributes); + + // set threshold, from t1 for standard attribute + for( unsigned j=0 ; jhdr->no_attributes ; j++ ){ + tmp_learning_table->hdr->attributes[j]->threshold = t1->hdr->attributes[j]->threshold; + tmp_learning_table->hdr->attributes[j]->no_threshold = t1->hdr->attributes[j]->no_threshold; + } + // compute threshold for attribute generated by genome + ba_compute_threshold_from(tmp_learning_table,t1->hdr->no_attributes); + + + cTreeNode* t = genereate_decision_tree(tmp_learning_table); + + //show_tree(k_tmp_tables[0],t,0); + + //DBG_print_instances(tmp_test_table->instances,tmp_test_table->no_instances,tmp_test_table->hdr->no_attributes); + for( unsigned j=0 ; jno_instances ; j++ ){ + unsigned predicted_class = t->classify_instance(tmp_test_table->instances[j]); + unsigned real_class = (unsigned)tmp_test_table->instances[j][t1->hdr->whichis_class]; + + //printf("%3.0f : %d, %d\n",tmp_test_table->instances[j][1],predicted_class,real_class); + // here compute classification error, or any quality measurment + if( predicted_class!=real_class ){ + error++; + } + } + + //printf("err : %f %d\n",fitness_value,t->tree_depth()); + tree_size += t->tree_depth(); + + delete t; + + + // free current sets + // first un-assignate instances + for( unsigned i=0 ; ino_instances ; i++ ){tmp_learning_table->instances[i] = NULL;} + for( unsigned i=0 ; ino_instances ; i++ ){tmp_test_table->instances[i] = NULL;} + tmp_learning_table->no_instances = 0; + tmp_test_table->no_instances = 0; + // then delete tmp tables + ba_partial_copy_delete(tmp_learning_table); + ba_partial_copy_delete(tmp_test_table); + + } + + fitness_value = (((float)error) / t1->no_instances)*100;// + ((float)tree_size)/K; + + for( unsigned i=0 ; i +#include +#include +#include +#include + + +#define K 5 +#define GENE_SIZE 3 +#define GENOME_SIZE 30 + + +float pMutPerGene=0.1; +float pMutDesCard = 0.05; +float pMutDesThre = 0.5; + +struct base* t1 = NULL; +struct base* t2 = NULL; + +float* uniq_instances[2]; +unsigned uniq_cnt[2]; + +struct base* k_tables[K]; +unsigned packets_size[K]; + +\end + +\User classes : +GenomeClass { + float x[GENOME_SIZE]; +} +\end + + +\Before everything else function: +{ + srand(globalRandomGenerator->get_seed()); + INSTEAD_EVAL_STEP = true; + + cout << "Seed : " << globalRandomGenerator->get_seed() << endl; + srand(globalRandomGenerator->get_seed()); + + t1 = ba_postgres_load_train_multi(); + t2 = ba_postgres_load_car_multi(); + ba_set_links(t1,t2); + + //printf("+ %d - %d\n",t1->class_repartition[1],t1->class_repartition[0]); + for( unsigned i=0 ; ihdr->attributes[t1->hdr->whichis_class]->no_values; i++ ) + printf("%s : %d\n",t1->hdr->attributes[t1->hdr->whichis_class]->values[i],t1->class_repartition[i]); + + uniq_instances[0] = ba_compute_uniq_values(t2, 1, uniq_cnt+0); + uniq_instances[1] = ba_compute_uniq_values(t2, 2, uniq_cnt+1); + + printf("%d %d\n",uniq_cnt[0],uniq_cnt[1]); + + // allocating K packets + generate_k_fold(K,packets_size,t1,k_tables); + + +#if 1 + // try the perfect solution + IndividualImpl* i = new IndividualImpl(); + i->x[0] = 30; + i->x[1] = 40; + i->x[GENE_SIZE-1] = 1; + + for( unsigned j=3 ; jx[j] = INFINITY; + + float f = i->evaluate(); + printf("fitness : %f\n",f); + + struct base* tc = table_from_genome(i->x,t1,t2,GENOME_SIZE,GENE_SIZE); + cTreeNode* t = genereate_decision_tree(tc); + + + unsigned error = 0; + for( unsigned i=0 ; ino_instances ; i++ ){ + unsigned predicted_class = t->classify_instance(tc->instances[i]); + unsigned real_class = (unsigned)tc->instances[i][tc->hdr->whichis_class]; + if( predicted_class != real_class ){ + error++; + printf("p : %d\t",predicted_class); + DBG_print_instances(tc->instances+i,1,10); + } + } + printf(" error on the whole set : %d\n",error); + + + +#endif +} +\end + +\After everything else function: +{ + + EA->population->sortParentPopulation(); + IndividualImpl* best = (IndividualImpl*)EA->population->parents[0]; + + printf("best fitness %f\n",best->evaluate()); + for( unsigned i=0 ; ix[i+j]); + } + printf(" >= %3.0f\n",best->x[i+GENE_SIZE-1]); + } + printf("\n"); + + + + cTreeNode* root = generate_tree(best->x,t1,t2,GENOME_SIZE,GENE_SIZE); + struct base* tmp_table = table_from_genome(best->x,t1,t2,GENOME_SIZE,GENE_SIZE); + show_tree(tmp_table,root); + + printf("depth of resulting tree %d\n",root->tree_depth()); + +#if 1 + unsigned error = 0; + for( unsigned i=0 ; ino_instances ; i++ ){ + unsigned predicted_class = root->classify_instance(tmp_table->instances[i]); + unsigned real_class = (unsigned)tmp_table->instances[i][t1->hdr->whichis_class]; + if( predicted_class != real_class ) + error++; + } + printf(" error on the whole set : %d\n",error); +#endif + + + delete root; + ba_delete( tmp_table ); +} +\end + +\GenomeClass::initialiser : +{ + for( unsigned i=0; i10 ? 10 :value); // if value grether than 10 then value is 10 + Genome.x[i+GENE_SIZE-1] = roundf(value); + NbMut++; + } + } + + for( unsigned j=0 ; jevaluate(); + } +} +\end + +\GenomeClass::evaluator : // Returns the score +{ + + struct base* k_tmp_tables[K]; + + unsigned error = 0; + unsigned tree_size = 0; + + // generate tmp tables from genome, for every packets + float fitness_value = 0; + for( unsigned i=0 ; iinstances = + (float**)malloc(sizeof(*tmp_learning_table->instances)*t1->no_instances-k_tables[i]->no_instances); + + tmp_learning_table->hdr = ba_partial_copy_hdr(k_tmp_tables[i]->hdr); + tmp_test_table->hdr = ba_partial_copy_hdr(k_tmp_tables[i]->hdr); + + + // create a learning set with k-1 packets + unsigned copied_instances = 0; + for( unsigned j=0 ; jinstances)+copied_instances, + k_tmp_tables[j]->instances, + sizeof(*tmp_learning_table->instances)*(k_tmp_tables[j]->no_instances)); + copied_instances += k_tmp_tables[j]->no_instances; + } + tmp_learning_table->no_instances = copied_instances; + + + // create the test set with 1 packet + tmp_test_table->instances = (float**)malloc(sizeof(*tmp_test_table->instances)*k_tmp_tables[i]->no_instances); + memcpy(tmp_test_table->instances,k_tmp_tables[i]->instances, + sizeof(*tmp_test_table->instances)*k_tmp_tables[i]->no_instances); + tmp_test_table->no_instances = k_tmp_tables[i]->no_instances; + + + // set threshold, from t1 for standard attribute + for( unsigned j=0 ; jhdr->no_attributes ; j++ ){ + tmp_test_table->hdr->attributes[j]->threshold = t1->hdr->attributes[j]->threshold; + tmp_test_table->hdr->attributes[j]->no_threshold = t1->hdr->attributes[j]->no_threshold; + } + // compute threshold for attribute generated by genome + ba_compute_threshold_from(tmp_test_table,t1->hdr->no_attributes); + + // set threshold, from t1 for standard attribute + for( unsigned j=0 ; jhdr->no_attributes ; j++ ){ + tmp_learning_table->hdr->attributes[j]->threshold = t1->hdr->attributes[j]->threshold; + tmp_learning_table->hdr->attributes[j]->no_threshold = t1->hdr->attributes[j]->no_threshold; + } + // compute threshold for attribute generated by genome + ba_compute_threshold_from(tmp_learning_table,t1->hdr->no_attributes); + + + cTreeNode* t = genereate_decision_tree(tmp_learning_table); + + //show_tree(k_tmp_tables[0],t,0); + + //DBG_print_instances(tmp_test_table->instances,tmp_test_table->no_instances,tmp_test_table->hdr->no_attributes); + for( unsigned j=0 ; jno_instances ; j++ ){ + unsigned predicted_class = t->classify_instance(tmp_test_table->instances[j]); + unsigned real_class = (unsigned)tmp_test_table->instances[j][t1->hdr->whichis_class]; + + //printf("%3.0f : %d, %d\n",tmp_test_table->instances[j][1],predicted_class,real_class); + // here compute classification error, or any quality measurment + if( predicted_class!=real_class ){ + error++; + } + } + + //printf("err : %f %d\n",fitness_value,t->tree_depth()); + tree_size += t->tree_depth(); + + delete t; + + + // free current sets + // first un-assignate instances + for( unsigned i=0 ; ino_instances ; i++ ){tmp_learning_table->instances[i] = NULL;} + for( unsigned i=0 ; ino_instances ; i++ ){tmp_test_table->instances[i] = NULL;} + tmp_learning_table->no_instances = 0; + tmp_test_table->no_instances = 0; + // then delete tmp tables + ba_partial_copy_delete(tmp_learning_table); + ba_partial_copy_delete(tmp_test_table); + + } + + fitness_value = (((float)error) / t1->no_instances)*100 + ((float)tree_size)/K; + + for( unsigned i=0 ; i +#include +#include +#include +#include + + +#define K 5 +#define GENE_SIZE 3 +#define GENOME_SIZE 32 + + +float pMutPerGene=0.1; +float pMutDesCard = 0.05; +float pMutDesThre = 0.5; + +struct base* t1 = NULL; +struct base* t2 = NULL; + +float* uniq_instances[2]; +unsigned uniq_cnt[2]; + +struct base* k_tables[K]; +unsigned packets_size[K]; + +\end + +\User classes : +GenomeClass { + float x[GENOME_SIZE]; +} +\end + + +\Before everything else function: +{ + srand(globalRandomGenerator->get_seed()); + INSTEAD_EVAL_STEP = true; + + cout << "Seed : " << globalRandomGenerator->get_seed() << endl; + srand(globalRandomGenerator->get_seed()); + + t1 = ba_postgres_load_train(); + t2 = ba_postgres_load_car(); + ba_set_links(t1,t2); + + //printf("+ %d - %d\n",t1->class_repartition[1],t1->class_repartition[0]); + for( unsigned i=0 ; ihdr->attributes[t1->hdr->whichis_class]->no_values; i++ ) + printf("%s : %d\n",t1->hdr->attributes[t1->hdr->whichis_class]->values[i],t1->class_repartition[i]); + + uniq_instances[0] = ba_compute_uniq_values(t2, 1, uniq_cnt+0); + uniq_instances[1] = ba_compute_uniq_values(t2, 2, uniq_cnt+1); + + printf("%d %d\n",uniq_cnt[0],uniq_cnt[1]); + + // allocating K packets + generate_k_fold(K,packets_size,t1,k_tables); + + +#if 1 + // try the perfect solution + IndividualImpl* i = new IndividualImpl(); + i->x[0] = 30; + i->x[1] = 39; + i->x[GENE_SIZE-1] = 0; + + for( unsigned j=3 ; jx[j] = INFINITY; + + float f = i->evaluate(); + printf("fitness : %f\n",f); + + cTreeNode* t = generate_tree(i->x,t1,t2,GENOME_SIZE,GENE_SIZE); + struct base* tc = table_from_genome(i->x,t1,t2,GENOME_SIZE,GENE_SIZE); + + show_tree(tc,t,0); +#endif + //exit(-1); + +} +\end + +\After everything else function: +{ + + EA->population->sortParentPopulation(); + IndividualImpl* best = (IndividualImpl*)EA->population->parents[0]; + + printf("best fitness %f\n",best->evaluate()); + for( unsigned i=0 ; ix[i+j]); + } + printf(" >= %3.0f\n",best->x[i+GENE_SIZE-1]); + } + printf("\n"); + + + + cTreeNode* root = generate_tree(best->x,t1,t2,GENOME_SIZE,GENE_SIZE); + struct base* tmp_table = table_from_genome(best->x,t1,t2,GENOME_SIZE,GENE_SIZE); + show_tree(tmp_table,root); + + printf("depth of resulting tree %d\n",root->tree_depth()); + +#if 1 + unsigned error = 0; + for( unsigned i=0 ; ino_instances ; i++ ){ + unsigned predicted_class = root->classify_instance(tmp_table->instances[i]); + unsigned real_class = (unsigned)tmp_table->instances[i][t1->hdr->whichis_class]; + if( predicted_class != real_class ) + error++; + } + printf(" error on the whole set : %d\n",error); +#endif + + + delete root; + ba_delete( tmp_table ); +} +\end + +\GenomeClass::initialiser : +{ + for( unsigned i=0; i10 ? 10 :value); // if value grether than 10 then value is 10 + Genome.x[i+GENE_SIZE-1] = roundf(value); + NbMut++; + } + } + + for( unsigned j=0 ; jevaluate(); + } +} +\end + +\GenomeClass::evaluator : // Returns the score +{ + + struct base* k_tmp_tables[K]; + + unsigned error = 0; + unsigned tree_size = 0; + + // generate tmp tables from genome, for every packets + float fitness_value = 0; + for( unsigned i=0 ; iinstances = + (float**)malloc(sizeof(*tmp_learning_table->instances)*t1->no_instances-k_tables[i]->no_instances); + + tmp_learning_table->hdr = ba_partial_copy_hdr(k_tmp_tables[i]->hdr); + tmp_test_table->hdr = ba_partial_copy_hdr(k_tmp_tables[i]->hdr); + + + // create a learning set with k-1 packets + unsigned copied_instances = 0; + for( unsigned j=0 ; jinstances)+copied_instances, + k_tmp_tables[j]->instances, + sizeof(*tmp_learning_table->instances)*(k_tmp_tables[j]->no_instances)); + copied_instances += k_tmp_tables[j]->no_instances; + } + tmp_learning_table->no_instances = copied_instances; + + + // create the test set with 1 packet + tmp_test_table->instances = (float**)malloc(sizeof(*tmp_test_table->instances)*k_tmp_tables[i]->no_instances); + memcpy(tmp_test_table->instances,k_tmp_tables[i]->instances, + sizeof(*tmp_test_table->instances)*k_tmp_tables[i]->no_instances); + tmp_test_table->no_instances = k_tmp_tables[i]->no_instances; + + + // set threshold, from t1 for standard attribute + for( unsigned j=0 ; jhdr->no_attributes ; j++ ){ + tmp_test_table->hdr->attributes[j]->threshold = t1->hdr->attributes[j]->threshold; + tmp_test_table->hdr->attributes[j]->no_threshold = t1->hdr->attributes[j]->no_threshold; + } + // compute threshold for attribute generated by genome + ba_compute_threshold_from(tmp_test_table,t1->hdr->no_attributes); + + // set threshold, from t1 for standard attribute + for( unsigned j=0 ; jhdr->no_attributes ; j++ ){ + tmp_learning_table->hdr->attributes[j]->threshold = t1->hdr->attributes[j]->threshold; + tmp_learning_table->hdr->attributes[j]->no_threshold = t1->hdr->attributes[j]->no_threshold; + } + // compute threshold for attribute generated by genome + ba_compute_threshold_from(tmp_learning_table,t1->hdr->no_attributes); + + + cTreeNode* t = genereate_decision_tree(tmp_learning_table); + + //show_tree(k_tmp_tables[0],t,0); + + //DBG_print_instances(tmp_test_table->instances,tmp_test_table->no_instances,tmp_test_table->hdr->no_attributes); + for( unsigned j=0 ; jno_instances ; j++ ){ + unsigned predicted_class = t->classify_instance(tmp_test_table->instances[j]); + unsigned real_class = (unsigned)tmp_test_table->instances[j][t1->hdr->whichis_class]; + + //printf("%3.0f : %d, %d\n",tmp_test_table->instances[j][1],predicted_class,real_class); + // here compute classification error, or any quality measurment + if( predicted_class!=real_class ){ + error++; + } + } + + //printf("err : %f %d\n",fitness_value,t->tree_depth()); + tree_size += t->tree_depth(); + + delete t; + + + // free current sets + // first un-assignate instances + for( unsigned i=0 ; ino_instances ; i++ ){tmp_learning_table->instances[i] = NULL;} + for( unsigned i=0 ; ino_instances ; i++ ){tmp_test_table->instances[i] = NULL;} + tmp_learning_table->no_instances = 0; + tmp_test_table->no_instances = 0; + // then delete tmp tables + ba_partial_copy_delete(tmp_learning_table); + ba_partial_copy_delete(tmp_test_table); + + } + + fitness_value = (((float)error) / t1->no_instances)*100 + ((float)tree_size)/K; + + for( unsigned i=0 ; i concat.csv +k=4 +for(( j=$f_line ; j< $l_line ; j++)) +do + for i in `ls ./card*.csv` + do + echo -n `head $i -n $j | tail -n1 | cut -d"," -f $k`, >> concat.csv + done + echo "" >> concat.csv +done + +echo "Avg" >> concat.csv +k=5 +for(( j=$f_line ; j< $l_line ; j++)) +do + for i in `ls ./card*.csv` + do + echo -n `head $i -n $j | tail -n1 | cut -d"," -f $k`, >> concat.csv + done + echo "" >> concat.csv +done + +echo "StdDev" >> concat.csv +k=6 +for(( j=$f_line ; j< $l_line ; j++)) +do + for i in `ls ./card*.csv` + do + echo -n `head $i -n $j | tail -n1 | cut -d"," -f $k`, >> concat.csv + done + echo "" >> concat.csv +done diff --git a/dev/c4.5_toy/run.sh b/dev/c4.5_toy/run.sh new file mode 100644 index 0000000..eb023a8 --- /dev/null +++ b/dev/c4.5_toy/run.sh @@ -0,0 +1,10 @@ + + +for(( i=0 ; i<20 ; i++ )) +do + suf=`date +%s` + echo $suf + + ./card --seed=$suf --generateCSVFile=1 --nbGen=50 + mv card.csv out/card-$suf.csv +done \ No newline at end of file -- GitLab