4f2ce923-62e6-4be1-a394-72ac52988386.json 8.08 KB
Newer Older
1 2 3 4 5 6 7
[
  {
    "model": "workflows.abstractwidget", 
    "fields": {
      "category": "1be7b5eb-c1b2-485a-8dbe-56abce63fc73", 
      "treeview_image": "", 
      "uid": "4f2ce923-62e6-4be1-a394-72ac52988386", 
8
      "windows_queue": false, 
9 10 11 12
      "package": "big_data", 
      "interaction_view": "", 
      "has_progress_bar": false, 
      "image": "", 
hiphop's avatar
hiphop committed
13
      "description": "Distributed Random Forest\r\n\r\nFit phase\r\nRandom forest algorithm builds multiple decision trees with a bootstrap method on a subset of data. \r\nIn each tree node, it estimates sqrt(num. of attributes)+1 randomly selected attributes (without replacement).\r\nAll decision trees are merged in large ensemble.  \r\n\r\nPredict phase\r\nAlgorithm queries as many trees as needed for reliable prediction.\r\nFirstly, it randomly chooses without replacement 15 trees. If all trees vote for the same class, it outputs prediction. If there are multiple classes predicted, it chooses 15 trees again. Algorithm calculates difference in probability between most and second most probable prediction. If difference is greater than parameter diff, it outputs prediction. If a test sample is hard to predict (difference is never higher than diff), it queries whole ensemble to make a prediction.\r\n\r\nReference\r\nSimilar algorithm is proposed in: Justin D Basilico, M Arthur Munson, Tamara G Kolda, Kevin R Dixon, and W Philip Kegelmeyer. Comet: A recipe for learning and using large ensembles on massive data. ", 
14 15 16 17 18 19 20 21
      "static_image": "", 
      "action": "rf_fit", 
      "visualization_view": "", 
      "streaming_visualization_view": "", 
      "post_interact_action": "", 
      "wsdl_method": "", 
      "wsdl": "", 
      "interactive": false, 
22
      "is_streaming": false, 
23
      "order": 1, 
hiphop's avatar
hiphop committed
24
      "name": "Distributed Random Forest"
25 26 27 28 29 30
    }
  }, 
  {
    "model": "workflows.abstractinput", 
    "fields": {
      "widget": "4f2ce923-62e6-4be1-a394-72ac52988386", 
31 32 33 34
      "name": "Discretization accuracy", 
      "short_name": "dac", 
      "default": "1", 
      "description": "Continuous attributes are converted to discrete intervals. For exact estimation use 0 (slowest) or increase the number to get an approximation (faster).", 
35 36
      "required": true, 
      "multi": false, 
37 38
      "parameter_type": "text", 
      "variable": "accuracy", 
39
      "parameter": true, 
40
      "order": 8, 
41
      "uid": "00758cdf-2eb5-43c5-bedf-bd3b8b9c29d6"
42 43
    }
  }, 
44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77
  {
    "model": "workflows.abstractinput", 
    "fields": {
      "widget": "4f2ce923-62e6-4be1-a394-72ac52988386", 
      "name": "Separate most represented class", 
      "short_name": "smp", 
      "default": "true", 
      "description": "separate_max", 
      "required": true, 
      "multi": false, 
      "parameter_type": "checkbox", 
      "variable": "separate_max", 
      "parameter": true, 
      "order": 9, 
      "uid": "21444978-142f-4f3d-947c-20e0b41a2c9b"
    }
  }, 
  {
    "model": "workflows.abstractinput", 
    "fields": {
      "widget": "4f2ce923-62e6-4be1-a394-72ac52988386", 
      "name": "Min samples in leaf", 
      "short_name": "msl", 
      "default": "5", 
      "description": "The minimum number of samples in newly created leaves. A split is discarded if after the split, one of the leaves would contain less then min samples leaf samples", 
      "required": true, 
      "multi": false, 
      "parameter_type": "text", 
      "variable": "min_samples_leaf", 
      "parameter": true, 
      "order": 5, 
      "uid": "52591706-7f30-4def-a788-3e07d3f82876"
    }
  }, 
78 79 80 81
  {
    "model": "workflows.abstractinput", 
    "fields": {
      "widget": "4f2ce923-62e6-4be1-a394-72ac52988386", 
82 83
      "name": "Max tree nodes", 
      "short_name": "mnt", 
84
      "default": "100", 
85
      "description": "Max. number of decision tree nodes", 
86 87 88
      "required": true, 
      "multi": false, 
      "parameter_type": "text", 
89
      "variable": "tree_nodes", 
90
      "parameter": true, 
hiphop's avatar
hiphop committed
91
      "order": 3, 
92
      "uid": "5a915d8b-2c99-4661-aa20-325709b61b6b"
93 94 95 96 97 98
    }
  }, 
  {
    "model": "workflows.abstractinput", 
    "fields": {
      "widget": "4f2ce923-62e6-4be1-a394-72ac52988386", 
99 100 101 102
      "name": "Dataset", 
      "short_name": "dst", 
      "default": "", 
      "description": "", 
103 104
      "required": true, 
      "multi": false, 
105 106 107 108 109
      "parameter_type": null, 
      "variable": "dataset", 
      "parameter": false, 
      "order": 1, 
      "uid": "5b9a6e22-130c-48d5-9b78-06ac2bd2c32a"
110 111 112 113 114 115
    }
  }, 
  {
    "model": "workflows.abstractinput", 
    "fields": {
      "widget": "4f2ce923-62e6-4be1-a394-72ac52988386", 
116 117 118 119
      "name": "Measure", 
      "short_name": "msr", 
      "default": "info_gain", 
      "description": "Select measure for estimation of attributes.", 
120 121
      "required": true, 
      "multi": false, 
122 123
      "parameter_type": "select", 
      "variable": "measure", 
124
      "parameter": true, 
125
      "order": 7, 
126
      "uid": "68cbccf9-7469-4b55-b96e-4f7c6a3c9cde"
127 128 129 130 131 132
    }
  }, 
  {
    "model": "workflows.abstractinput", 
    "fields": {
      "widget": "4f2ce923-62e6-4be1-a394-72ac52988386", 
133 134 135 136
      "name": "Random state", 
      "short_name": "rsd", 
      "default": "None", 
      "description": "Define a random state", 
137 138 139
      "required": true, 
      "multi": false, 
      "parameter_type": "text", 
140
      "variable": "seed", 
141
      "parameter": true, 
142
      "order": 10, 
143
      "uid": "8e6e2d96-3457-4b23-ac93-ab90b083920f"
144 145 146 147 148 149
    }
  }, 
  {
    "model": "workflows.abstractinput", 
    "fields": {
      "widget": "4f2ce923-62e6-4be1-a394-72ac52988386", 
150 151 152 153
      "name": "Trees per subset", 
      "short_name": "tps", 
      "default": "20", 
      "description": "Number of trees per subset of data", 
154 155
      "required": true, 
      "multi": false, 
156 157
      "parameter_type": "text", 
      "variable": "trees_per_subset", 
158
      "parameter": true, 
hiphop's avatar
hiphop committed
159
      "order": 2, 
160
      "uid": "a0b28dbe-1cb8-4987-958e-e4d379c8d2ff"
161 162 163 164 165 166
    }
  }, 
  {
    "model": "workflows.abstractinput", 
    "fields": {
      "widget": "4f2ce923-62e6-4be1-a394-72ac52988386", 
167 168
      "name": "Min samples split", 
      "short_name": "lmi", 
169
      "default": "10", 
170
      "description": "Min. number of samples to split the node", 
171 172
      "required": true, 
      "multi": false, 
173
      "parameter_type": "text", 
174
      "variable": "min_samples_split", 
175
      "parameter": true, 
hiphop's avatar
hiphop committed
176
      "order": 4, 
177
      "uid": "a2f366a9-af74-4b3f-90ed-33c1fcad4c3a"
178 179 180 181 182 183
    }
  }, 
  {
    "model": "workflows.abstractinput", 
    "fields": {
      "widget": "4f2ce923-62e6-4be1-a394-72ac52988386", 
184 185 186 187
      "name": "Class majority", 
      "short_name": "csm", 
      "default": "1", 
      "description": "Purity of a subset.", 
188 189 190
      "required": true, 
      "multi": false, 
      "parameter_type": "text", 
191
      "variable": "majority", 
192
      "parameter": true, 
193
      "order": 6, 
194
      "uid": "fe7f5d5a-c2e2-4ae9-b138-18b1de7c4e93"
195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220
    }
  }, 
  {
    "model": "workflows.abstractoutput", 
    "fields": {
      "widget": "4f2ce923-62e6-4be1-a394-72ac52988386", 
      "name": "Fit model", 
      "short_name": "fit", 
      "description": "Fit model URL", 
      "variable": "fitmodel_url", 
      "order": 1, 
      "uid": "0adadf0c-a93a-4e47-9df0-344d2cdadbf9"
    }
  }, 
  {
    "model": "workflows.abstractoption", 
    "fields": {
      "name": "Equal frequency discretization", 
      "uid": "02b0bf99-232c-4529-b1a7-701bba646450", 
      "value": "equal_freq", 
      "abstract_input": "00758cdf-2eb5-43c5-bedf-bd3b8b9c29d6"
    }
  }, 
  {
    "model": "workflows.abstractoption", 
    "fields": {
221 222 223
      "name": "Minimum description length", 
      "uid": "53059d4e-4375-488f-b2b2-0e9567f499d7", 
      "value": "mdl", 
224 225 226 227 228 229
      "abstract_input": "68cbccf9-7469-4b55-b96e-4f7c6a3c9cde"
    }
  }, 
  {
    "model": "workflows.abstractoption", 
    "fields": {
230 231 232
      "name": "Information gain", 
      "uid": "95331a01-dbfa-4f31-966a-759d65d3a556", 
      "value": "info_gain", 
233 234 235 236 237 238 239 240 241 242 243 244 245
      "abstract_input": "68cbccf9-7469-4b55-b96e-4f7c6a3c9cde"
    }
  }, 
  {
    "model": "workflows.abstractoption", 
    "fields": {
      "name": "Random discretization", 
      "uid": "ef11b2d3-301f-4440-be3c-24089b7f234f", 
      "value": "random", 
      "abstract_input": "00758cdf-2eb5-43c5-bedf-bd3b8b9c29d6"
    }
  }
]