72a39fab-5433-493f-ae22-12a264075356.json 9.04 KB
Newer Older
1 2 3 4 5 6 7
[
  {
    "model": "workflows.abstractwidget", 
    "fields": {
      "category": "1be7b5eb-c1b2-485a-8dbe-56abce63fc73", 
      "treeview_image": "", 
      "uid": "72a39fab-5433-493f-ae22-12a264075356", 
8
      "windows_queue": false, 
9 10 11 12
      "package": "big_data", 
      "interaction_view": "", 
      "has_progress_bar": false, 
      "image": "", 
hiphop's avatar
hiphop committed
13
      "description": "Distributed Weighted Forest\r\n\r\nWeighted forest is a novel ensemble algorithm. \r\n\r\nFit phase\r\nWeighted forest algorithm builds multiple decision trees with a bootstrap method on a subset of data. In each tree node, it estimates sqrt(num. of attributes)+1 randomly selected attributes (without replacement). It uses decision tree to predict out-of-bag samples. For each prediction of an out-of-bag sample, it measures margin (classifier confidence in prediction) and leaf identifier that outputs prediction. Algorithm uses similarity matrix, where it stores similarities for each out-of-bag sample that was predicted with the same leaf. We assume that samples are similar, if the same leaf predicts them multiple times in multiple decision trees. \r\nAfter algorithm builds all decision trees, it passes similarity matrix to k-medoids algorithm. Similarity matrix presents distances between test samples. We set parameter k as sqrt(num. of attributes)+1. k-medoids algorithm outputs medoids, which are test samples in the cluster centers of the dataset. Medoids are actual samples in a dataset, unlike centroids which are centers of clusters. Algorithm measures average margin for all samples that are in the cluster of certain medoid. It saves the average margin of a decision tree in its model. Algorithm uses this scores as weights of decision trees in predict phase.\r\nAlgorithm builds a forest on each subset of the data and it merges them in large ensemble. Each forest has its own medoids.\r\n\r\nPredict phase \r\nAlgorithm selects a forest (or more, if it finds equal similarities with medoids in multiple forests), that contain most similar medoid with a test sample. Then it uses the same procedure like with small data. Algorithm calculates Gower similarity coefficient with a test sample and every medoid. Only decision trees with high margin on similar test samples output prediction and algorithm stores decision tree margin for each prediction. Algorithm calculates final values for each prediction: (number of margins) * avg(margins) and it selects prediction with highest value.", 
14 15 16 17 18 19 20 21
      "static_image": "", 
      "action": "wrf_fit", 
      "visualization_view": "", 
      "streaming_visualization_view": "", 
      "post_interact_action": "", 
      "wsdl_method": "", 
      "wsdl": "", 
      "interactive": false, 
22
      "is_streaming": false, 
23
      "order": 1, 
hiphop's avatar
hiphop committed
24
      "name": "Distributed Weighted Forest"
25 26 27 28 29 30
    }
  }, 
  {
    "model": "workflows.abstractinput", 
    "fields": {
      "widget": "72a39fab-5433-493f-ae22-12a264075356", 
31 32 33 34
      "name": "Class majority", 
      "short_name": "csm", 
      "default": "1", 
      "description": "Purity of a subset.", 
35 36
      "required": true, 
      "multi": false, 
37 38 39
      "parameter_type": "text", 
      "variable": "majority", 
      "parameter": true, 
40
      "order": 7, 
41
      "uid": "1b23ead1-b104-4d27-a6fd-b23de6efa28f"
42 43
    }
  }, 
44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77
  {
    "model": "workflows.abstractinput", 
    "fields": {
      "widget": "72a39fab-5433-493f-ae22-12a264075356", 
      "name": "Randomly chosen medoids", 
      "short_name": "rcm", 
      "default": "3", 
      "description": "Number of randomly chosen medoids to calculate similaty.", 
      "required": true, 
      "multi": false, 
      "parameter_type": "text", 
      "variable": "num_medoids", 
      "parameter": true, 
      "order": 4, 
      "uid": "1bbcbc2c-a9d5-4427-a8ef-e4dd58c22f86"
    }
  }, 
  {
    "model": "workflows.abstractinput", 
    "fields": {
      "widget": "72a39fab-5433-493f-ae22-12a264075356", 
      "name": "Separate most represented class", 
      "short_name": "smp", 
      "default": "true", 
      "description": "", 
      "required": true, 
      "multi": false, 
      "parameter_type": "checkbox", 
      "variable": "separate_max", 
      "parameter": true, 
      "order": 10, 
      "uid": "2ccff5c1-7e06-4887-863d-7acf76209e50"
    }
  }, 
78 79 80 81
  {
    "model": "workflows.abstractinput", 
    "fields": {
      "widget": "72a39fab-5433-493f-ae22-12a264075356", 
82 83 84 85
      "name": "Random state", 
      "short_name": "rsd", 
      "default": "None", 
      "description": "Define a random state", 
86 87 88
      "required": true, 
      "multi": false, 
      "parameter_type": "text", 
89
      "variable": "seed", 
90
      "parameter": true, 
91
      "order": 11, 
92
      "uid": "31c68e34-3bff-41bb-bf77-925c6171a6f6"
93 94 95 96 97 98
    }
  }, 
  {
    "model": "workflows.abstractinput", 
    "fields": {
      "widget": "72a39fab-5433-493f-ae22-12a264075356", 
99 100 101 102
      "name": "Dataset", 
      "short_name": "dst", 
      "default": "", 
      "description": "", 
103 104
      "required": true, 
      "multi": false, 
105 106 107 108 109
      "parameter_type": null, 
      "variable": "dataset", 
      "parameter": false, 
      "order": 1, 
      "uid": "37879268-0aa9-4458-afb2-71a521acb299"
110 111
    }
  }, 
112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128
  {
    "model": "workflows.abstractinput", 
    "fields": {
      "widget": "72a39fab-5433-493f-ae22-12a264075356", 
      "name": "Min samples in leaf", 
      "short_name": "msl", 
      "default": "5", 
      "description": "The minimum number of samples in newly created leaves. A split is discarded if after the split, one of the leaves would contain less then min samples leaf samples", 
      "required": true, 
      "multi": false, 
      "parameter_type": "text", 
      "variable": "min_samples_leaf", 
      "parameter": true, 
      "order": 6, 
      "uid": "3a893a69-f22e-448b-9a92-222573c655ba"
    }
  }, 
129 130 131 132
  {
    "model": "workflows.abstractinput", 
    "fields": {
      "widget": "72a39fab-5433-493f-ae22-12a264075356", 
133 134
      "name": "Max tree nodes", 
      "short_name": "mnt", 
135
      "default": "100", 
136
      "description": "Max. number of decision tree nodes", 
137 138 139
      "required": true, 
      "multi": false, 
      "parameter_type": "text", 
140
      "variable": "tree_nodes", 
141
      "parameter": true, 
hiphop's avatar
hiphop committed
142
      "order": 3, 
143
      "uid": "3d48b0d0-a304-45d5-9d18-3ca17e8fcf05"
144 145
    }
  }, 
146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
  {
    "model": "workflows.abstractinput", 
    "fields": {
      "widget": "72a39fab-5433-493f-ae22-12a264075356", 
      "name": "Discretization accuracy", 
      "short_name": "dac", 
      "default": "1", 
      "description": "Continuous attributes are converted to discrete intervals. For exact estimation use 0 (slowest) or increase the number to get an approximation (faster).", 
      "required": true, 
      "multi": false, 
      "parameter_type": "text", 
      "variable": "accuracy", 
      "parameter": true, 
      "order": 9, 
      "uid": "3ff0f040-3d11-413f-975a-1fde57bf289b"
    }
  }, 
163 164 165 166
  {
    "model": "workflows.abstractinput", 
    "fields": {
      "widget": "72a39fab-5433-493f-ae22-12a264075356", 
167 168 169 170
      "name": "Trees per subset", 
      "short_name": "tps", 
      "default": "20", 
      "description": "Number of trees per subset of data", 
171 172 173
      "required": true, 
      "multi": false, 
      "parameter_type": "text", 
174
      "variable": "trees_per_subset", 
175
      "parameter": true, 
hiphop's avatar
hiphop committed
176
      "order": 2, 
177
      "uid": "8770ace6-4968-47cf-b7cc-4ad8bfff303c"
178 179 180 181 182 183 184 185 186 187 188 189 190 191 192
    }
  }, 
  {
    "model": "workflows.abstractinput", 
    "fields": {
      "widget": "72a39fab-5433-493f-ae22-12a264075356", 
      "name": "Measure", 
      "short_name": "msr", 
      "default": "info_gain", 
      "description": "Select measure for estimation of attributes.", 
      "required": true, 
      "multi": false, 
      "parameter_type": "select", 
      "variable": "measure", 
      "parameter": true, 
193
      "order": 8, 
194 195 196 197 198 199 200
      "uid": "9a8f3c2c-265c-4b37-93c1-d58fee9dd7af"
    }
  }, 
  {
    "model": "workflows.abstractinput", 
    "fields": {
      "widget": "72a39fab-5433-493f-ae22-12a264075356", 
201 202
      "name": "Min samples split", 
      "short_name": "lmi", 
203
      "default": "10", 
204
      "description": "Min. number of samples to split the node", 
205 206
      "required": true, 
      "multi": false, 
207
      "parameter_type": "text", 
208
      "variable": "min_samples_split", 
209
      "parameter": true, 
210
      "order": 5, 
211
      "uid": "ac032f38-f4a4-44ea-8c02-96506d4f8e86"
212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228
    }
  }, 
  {
    "model": "workflows.abstractoutput", 
    "fields": {
      "widget": "72a39fab-5433-493f-ae22-12a264075356", 
      "name": "Fit model", 
      "short_name": "fit", 
      "description": "Fit model URL", 
      "variable": "fitmodel_url", 
      "order": 1, 
      "uid": "a862dff5-a324-4d45-977c-abda633714c3"
    }
  }, 
  {
    "model": "workflows.abstractoption", 
    "fields": {
229 230 231 232
      "name": "Minimum description length", 
      "uid": "293fb2f7-de3f-4133-8e3a-22701245c55d", 
      "value": "mdl", 
      "abstract_input": "9a8f3c2c-265c-4b37-93c1-d58fee9dd7af"
233 234 235 236 237 238 239 240 241 242 243 244
    }
  }, 
  {
    "model": "workflows.abstractoption", 
    "fields": {
      "name": "Information gain", 
      "uid": "364fbb94-f200-4acc-a801-d29339f0d4c5", 
      "value": "info_gain", 
      "abstract_input": "9a8f3c2c-265c-4b37-93c1-d58fee9dd7af"
    }
  }
]