4f2ce923-62e6-4be1-a394-72ac52988386.json 6.92 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12
[
  {
    "model": "workflows.abstractwidget", 
    "fields": {
      "category": "1be7b5eb-c1b2-485a-8dbe-56abce63fc73", 
      "treeview_image": "", 
      "uid": "4f2ce923-62e6-4be1-a394-72ac52988386", 
      "is_streaming": false, 
      "package": "big_data", 
      "interaction_view": "", 
      "has_progress_bar": false, 
      "image": "", 
hiphop's avatar
hiphop committed
13
      "description": "Distributed Random Forest\r\n\r\nFit phase\r\nRandom forest algorithm builds multiple decision trees with a bootstrap method on a subset of data. \r\nIn each tree node, it estimates sqrt(num. of attributes)+1 randomly selected attributes (without replacement).\r\nAll decision trees are merged in large ensemble.  \r\n\r\nPredict phase\r\nAlgorithm queries as many trees as needed for reliable prediction.\r\nFirstly, it randomly chooses without replacement 15 trees. If all trees vote for the same class, it outputs prediction. If there are multiple classes predicted, it chooses 15 trees again. Algorithm calculates difference in probability between most and second most probable prediction. If difference is greater than parameter diff, it outputs prediction. If a test sample is hard to predict (difference is never higher than diff), it queries whole ensemble to make a prediction.\r\n\r\nReference\r\nSimilar algorithm is proposed in: Justin D Basilico, M Arthur Munson, Tamara G Kolda, Kevin R Dixon, and W Philip Kegelmeyer. Comet: A recipe for learning and using large ensembles on massive data. ", 
14 15 16 17 18 19 20 21 22 23
      "static_image": "", 
      "action": "rf_fit", 
      "visualization_view": "", 
      "streaming_visualization_view": "", 
      "post_interact_action": "", 
      "wsdl_method": "", 
      "wsdl": "", 
      "interactive": false, 
      "windows_queue": false, 
      "order": 1, 
hiphop's avatar
hiphop committed
24
      "name": "Distributed Random Forest"
25 26 27 28 29 30
    }
  }, 
  {
    "model": "workflows.abstractinput", 
    "fields": {
      "widget": "4f2ce923-62e6-4be1-a394-72ac52988386", 
31 32 33 34
      "name": "Discretization", 
      "short_name": "spf", 
      "default": "equal_freq", 
      "description": "Select equal frequency discretization or random discretization for numeric attributes", 
35 36
      "required": true, 
      "multi": false, 
37 38 39
      "parameter_type": "select", 
      "variable": "split_fun", 
      "parameter": true, 
hiphop's avatar
hiphop committed
40
      "order": 7, 
41
      "uid": "00758cdf-2eb5-43c5-bedf-bd3b8b9c29d6"
42 43 44 45 46 47
    }
  }, 
  {
    "model": "workflows.abstractinput", 
    "fields": {
      "widget": "4f2ce923-62e6-4be1-a394-72ac52988386", 
48 49
      "name": "Max tree nodes", 
      "short_name": "mnt", 
50
      "default": "20", 
51
      "description": "Max. number of decision tree nodes", 
52 53 54
      "required": true, 
      "multi": false, 
      "parameter_type": "text", 
55
      "variable": "tree_nodes", 
56
      "parameter": true, 
hiphop's avatar
hiphop committed
57
      "order": 3, 
58
      "uid": "5a915d8b-2c99-4661-aa20-325709b61b6b"
59 60 61 62 63 64
    }
  }, 
  {
    "model": "workflows.abstractinput", 
    "fields": {
      "widget": "4f2ce923-62e6-4be1-a394-72ac52988386", 
65 66 67 68
      "name": "Dataset", 
      "short_name": "dst", 
      "default": "", 
      "description": "", 
69 70
      "required": true, 
      "multi": false, 
71 72 73 74 75
      "parameter_type": null, 
      "variable": "dataset", 
      "parameter": false, 
      "order": 1, 
      "uid": "5b9a6e22-130c-48d5-9b78-06ac2bd2c32a"
76 77 78 79 80 81
    }
  }, 
  {
    "model": "workflows.abstractinput", 
    "fields": {
      "widget": "4f2ce923-62e6-4be1-a394-72ac52988386", 
82 83 84 85
      "name": "Measure", 
      "short_name": "msr", 
      "default": "info_gain", 
      "description": "Select measure for estimation of attributes.", 
86 87
      "required": true, 
      "multi": false, 
88 89
      "parameter_type": "select", 
      "variable": "measure", 
90
      "parameter": true, 
hiphop's avatar
hiphop committed
91
      "order": 6, 
92
      "uid": "68cbccf9-7469-4b55-b96e-4f7c6a3c9cde"
93 94 95 96 97 98
    }
  }, 
  {
    "model": "workflows.abstractinput", 
    "fields": {
      "widget": "4f2ce923-62e6-4be1-a394-72ac52988386", 
99 100 101 102
      "name": "Random state", 
      "short_name": "rsd", 
      "default": "None", 
      "description": "Define a random state", 
103 104 105
      "required": true, 
      "multi": false, 
      "parameter_type": "text", 
106
      "variable": "seed", 
107
      "parameter": true, 
hiphop's avatar
hiphop committed
108
      "order": 8, 
109
      "uid": "8e6e2d96-3457-4b23-ac93-ab90b083920f"
110 111 112 113 114 115
    }
  }, 
  {
    "model": "workflows.abstractinput", 
    "fields": {
      "widget": "4f2ce923-62e6-4be1-a394-72ac52988386", 
116 117 118 119
      "name": "Trees per subset", 
      "short_name": "tps", 
      "default": "20", 
      "description": "Number of trees per subset of data", 
120 121
      "required": true, 
      "multi": false, 
122 123
      "parameter_type": "text", 
      "variable": "trees_per_subset", 
124
      "parameter": true, 
hiphop's avatar
hiphop committed
125
      "order": 2, 
126
      "uid": "a0b28dbe-1cb8-4987-958e-e4d379c8d2ff"
127 128 129 130 131 132
    }
  }, 
  {
    "model": "workflows.abstractinput", 
    "fields": {
      "widget": "4f2ce923-62e6-4be1-a394-72ac52988386", 
133 134 135 136
      "name": "Min samples split", 
      "short_name": "lmi", 
      "default": "5", 
      "description": "Min. number of samples to split the node", 
137 138
      "required": true, 
      "multi": false, 
139 140
      "parameter_type": "text", 
      "variable": "leaf_min_inst", 
141
      "parameter": true, 
hiphop's avatar
hiphop committed
142
      "order": 4, 
143
      "uid": "a2f366a9-af74-4b3f-90ed-33c1fcad4c3a"
144 145 146 147 148 149
    }
  }, 
  {
    "model": "workflows.abstractinput", 
    "fields": {
      "widget": "4f2ce923-62e6-4be1-a394-72ac52988386", 
150 151 152 153
      "name": "Class majority", 
      "short_name": "csm", 
      "default": "1", 
      "description": "Purity of a subset.", 
154 155 156
      "required": true, 
      "multi": false, 
      "parameter_type": "text", 
157
      "variable": "majority", 
158
      "parameter": true, 
hiphop's avatar
hiphop committed
159
      "order": 5, 
160
      "uid": "fe7f5d5a-c2e2-4ae9-b138-18b1de7c4e93"
161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186
    }
  }, 
  {
    "model": "workflows.abstractoutput", 
    "fields": {
      "widget": "4f2ce923-62e6-4be1-a394-72ac52988386", 
      "name": "Fit model", 
      "short_name": "fit", 
      "description": "Fit model URL", 
      "variable": "fitmodel_url", 
      "order": 1, 
      "uid": "0adadf0c-a93a-4e47-9df0-344d2cdadbf9"
    }
  }, 
  {
    "model": "workflows.abstractoption", 
    "fields": {
      "name": "Equal frequency discretization", 
      "uid": "02b0bf99-232c-4529-b1a7-701bba646450", 
      "value": "equal_freq", 
      "abstract_input": "00758cdf-2eb5-43c5-bedf-bd3b8b9c29d6"
    }
  }, 
  {
    "model": "workflows.abstractoption", 
    "fields": {
187 188 189
      "name": "Minimum description length", 
      "uid": "53059d4e-4375-488f-b2b2-0e9567f499d7", 
      "value": "mdl", 
190 191 192 193 194 195
      "abstract_input": "68cbccf9-7469-4b55-b96e-4f7c6a3c9cde"
    }
  }, 
  {
    "model": "workflows.abstractoption", 
    "fields": {
196 197 198
      "name": "Information gain", 
      "uid": "95331a01-dbfa-4f31-966a-759d65d3a556", 
      "value": "info_gain", 
199 200 201 202 203 204 205 206 207 208 209 210 211
      "abstract_input": "68cbccf9-7469-4b55-b96e-4f7c6a3c9cde"
    }
  }, 
  {
    "model": "workflows.abstractoption", 
    "fields": {
      "name": "Random discretization", 
      "uid": "ef11b2d3-301f-4440-be3c-24089b7f234f", 
      "value": "random", 
      "abstract_input": "00758cdf-2eb5-43c5-bedf-bd3b8b9c29d6"
    }
  }
]