Commit fdbb525e authored by Janez K's avatar Janez K

fixed fabfile, fixed old auto_import_packages, exported big_data and conceptnet

parent a32f1786
......@@ -36,7 +36,7 @@ def deploy():
run("python manage.py collectstatic --noinput")
puts(yellow("[Auto importing packages]"))
run("python manage.py auto_import_packages")
run("python manage.py import_all")
with cd('/srv/django-projects/supervisor'):
puts(yellow("[Restarting the run streams daemon"))
......
{
"model": "workflows.category",
"fields": {
"name": "Classification",
"parent": "cf7b1d24-7c6d-4cd1-bcc9-16e43e1ab5a7",
"order": 1,
"uid": "0165a189-a6ea-4152-8b5c-f507d47bc0e6"
}
}
\ No newline at end of file
{
"model": "workflows.category",
"fields": {
"name": "Evaluation",
"parent": "cf7b1d24-7c6d-4cd1-bcc9-16e43e1ab5a7",
"order": 6,
"uid": "08c90036-fd13-4ff3-ac20-01982262a782"
}
}
\ No newline at end of file
{
"model": "workflows.category",
"fields": {
"name": "Regression",
"parent": "cf7b1d24-7c6d-4cd1-bcc9-16e43e1ab5a7",
"order": 5,
"uid": "0f139640-cca3-4b0c-91f9-dbd42f47e73c"
}
}
\ No newline at end of file
{
"model": "workflows.category",
"fields": {
"name": "Ensembles",
"parent": "cf7b1d24-7c6d-4cd1-bcc9-16e43e1ab5a7",
"order": 3,
"uid": "1be7b5eb-c1b2-485a-8dbe-56abce63fc73"
}
}
\ No newline at end of file
{
"model": "workflows.category",
"fields": {
"name": "Clustering",
"parent": "cf7b1d24-7c6d-4cd1-bcc9-16e43e1ab5a7",
"order": 4,
"uid": "a6ce3f5e-a0fa-4faa-8be4-5657fcd10b4e"
}
}
\ No newline at end of file
{
"model": "workflows.category",
"fields": {
"name": "Big data",
"parent": null,
"order": 1,
"uid": "cf7b1d24-7c6d-4cd1-bcc9-16e43e1ab5a7"
}
}
\ No newline at end of file
{
"model": "workflows.category",
"fields": {
"name": "Utilities",
"parent": "cf7b1d24-7c6d-4cd1-bcc9-16e43e1ab5a7",
"order": 6,
"uid": "d97dbb5a-b0a6-42c4-b9a2-ae1cacef8b76"
}
}
\ No newline at end of file
[
{
"model": "workflows.abstractwidget",
"fields": {
"category": "0165a189-a6ea-4152-8b5c-f507d47bc0e6",
"treeview_image": "",
"uid": "16042741-6834-43ab-9587-b64a1f460238",
"is_streaming": false,
"package": "big_data",
"interaction_view": "",
"has_progress_bar": false,
"image": "",
"description": "Logistic regression with MapReduce\r\n\r\nAlgorithm builds a model with continuous features and predicts binary target variable (1, 0). Learning is done by fitting theta parameters to the training data where the likelihood function is optimized by using Newton-Raphson to update theta parameters. The output of algorithm is consistent with implementation of logistic regression classifier in Orange.\r\n\r\nReference:\r\nMapReduce version of algorithm is proposed by Cheng-Tao Chu; Sang Kyun Kim, Yi-An Lin, YuanYuan Yu, Gary Bradski, Andrew Ng, and Kunle Olukotun. \"Map-Reduce for Machine Learning on Multicore\". NIPS 2006. ",
"static_image": "",
"action": "logreg_fit",
"visualization_view": "",
"streaming_visualization_view": "",
"post_interact_action": "",
"wsdl_method": "",
"wsdl": "",
"interactive": false,
"windows_queue": false,
"order": 5,
"name": "Logistic regression"
}
},
{
"model": "workflows.abstractinput",
"fields": {
"widget": "16042741-6834-43ab-9587-b64a1f460238",
"name": "Dataset",
"short_name": "dst",
"default": "",
"description": "",
"required": true,
"multi": false,
"parameter_type": null,
"variable": "dataset",
"parameter": false,
"order": 1,
"uid": "29dd4a35-7639-493f-9e7b-a64f54f0d06d"
}
},
{
"model": "workflows.abstractinput",
"fields": {
"widget": "16042741-6834-43ab-9587-b64a1f460238",
"name": "Convergence",
"short_name": "con",
"default": "1e-8",
"description": "The value defines the convergence of the logistic regression.",
"required": true,
"multi": false,
"parameter_type": "text",
"variable": "alpha",
"parameter": true,
"order": 2,
"uid": "104f5edc-6029-4ae8-8a83-29557fa901fd"
}
},
{
"model": "workflows.abstractinput",
"fields": {
"widget": "16042741-6834-43ab-9587-b64a1f460238",
"name": "Max. number of iterations",
"short_name": "itr",
"default": "10",
"description": "Define a maximum number of iterations. If the cost function converges it will stop sooner.",
"required": true,
"multi": false,
"parameter_type": "text",
"variable": "itr",
"parameter": true,
"order": 3,
"uid": "73fbdb1a-8889-46d9-8eed-907156de7956"
}
},
{
"model": "workflows.abstractoutput",
"fields": {
"widget": "16042741-6834-43ab-9587-b64a1f460238",
"name": "Fit model",
"short_name": "fit",
"description": "Fit model URL",
"variable": "fitmodel_url",
"order": 1,
"uid": "08ee7516-92b7-4096-8199-3fd5d397f1e1"
}
}
]
\ No newline at end of file
[
{
"model": "workflows.abstractwidget",
"fields": {
"category": "cf7b1d24-7c6d-4cd1-bcc9-16e43e1ab5a7",
"treeview_image": "",
"uid": "189c6a1b-612a-4ca6-a7e3-c39349922781",
"is_streaming": false,
"package": "big_data",
"interaction_view": "",
"has_progress_bar": false,
"image": "",
"description": "",
"static_image": "",
"action": "file_url",
"visualization_view": "",
"streaming_visualization_view": "",
"post_interact_action": "",
"wsdl_method": "",
"wsdl": "",
"interactive": false,
"windows_queue": false,
"order": 1,
"name": "Input Dataset"
}
},
{
"model": "workflows.abstractinput",
"fields": {
"widget": "189c6a1b-612a-4ca6-a7e3-c39349922781",
"name": "Input URLs",
"short_name": "url",
"default": "",
"description": "Multiple URLs can be specified. An URL should be accessible via HTTP and not HTTPS. ",
"required": true,
"multi": false,
"parameter_type": "textarea",
"variable": "url",
"parameter": true,
"order": 1,
"uid": "e2883c3d-7b3a-46ec-8673-10da5f494ec9"
}
},
{
"model": "workflows.abstractinput",
"fields": {
"widget": "189c6a1b-612a-4ca6-a7e3-c39349922781",
"name": "URL range",
"short_name": "rng",
"default": "",
"description": "The URL range parameter is used with URLs that point to file chunks, named as xaaaa to xzzzz. This naming is provided by the unix split command. The first and last URL should be defined in the URLs text box. Intermediate URLs will be automatically generated.",
"required": false,
"multi": false,
"parameter_type": "checkbox",
"variable": "range",
"parameter": true,
"order": 2,
"uid": "992a24e0-0365-46ef-8dd2-37fa19563bd5"
}
},
{
"model": "workflows.abstractinput",
"fields": {
"widget": "189c6a1b-612a-4ca6-a7e3-c39349922781",
"name": "Gzipped data",
"short_name": "zip",
"default": "",
"description": "Select if specified URLs point to data in gzipped format.",
"required": false,
"multi": false,
"parameter_type": "checkbox",
"variable": "data_type",
"parameter": true,
"order": 3,
"uid": "d62fc126-afda-4f0b-876d-5180cd409779"
}
},
{
"model": "workflows.abstractinput",
"fields": {
"widget": "189c6a1b-612a-4ca6-a7e3-c39349922781",
"name": "Attribute selection",
"short_name": "ind",
"default": "",
"description": "Select attributes that will processed. \r\n\r\nExample: 1 - 10 for indices in the range from 1 to 10 or 1,2 for indices 1 and 2.",
"required": true,
"multi": false,
"parameter_type": "text",
"variable": "X_indices",
"parameter": true,
"order": 4,
"uid": "e32184e0-f38a-466f-a991-82e084a1cd8b"
}
},
{
"model": "workflows.abstractinput",
"fields": {
"widget": "189c6a1b-612a-4ca6-a7e3-c39349922781",
"name": "Attribute metadata",
"short_name": "mta",
"default": "numeric",
"description": "Select numeric, if all attributes are numeric or discrete, if all attributes are discrete. \r\n",
"required": true,
"multi": false,
"parameter_type": "select",
"variable": "atr_meta",
"parameter": true,
"order": 5,
"uid": "705b4459-6f05-4fd3-a230-33e0ddd784b3"
}
},
{
"model": "workflows.abstractinput",
"fields": {
"widget": "189c6a1b-612a-4ca6-a7e3-c39349922781",
"name": "Metadata URL",
"short_name": "cmt",
"default": "",
"description": "Define an URL of a file with attribute metadata.\r\n\r\nExample of a file with 3 attributes, where first and second are continous and third is discrete:\r\natr1, atr2, atr3\r\nc,c,d ",
"required": false,
"multi": false,
"parameter_type": "text",
"variable": "custom",
"parameter": true,
"order": 6,
"uid": "491eab89-b3f1-4503-864b-7cad9a01dda7"
}
},
{
"model": "workflows.abstractinput",
"fields": {
"widget": "189c6a1b-612a-4ca6-a7e3-c39349922781",
"name": "ID index",
"short_name": "id",
"default": "",
"description": "Define identifier index in the data.",
"required": false,
"multi": false,
"parameter_type": "text",
"variable": "id_index",
"parameter": true,
"order": 7,
"uid": "f100a206-8fdd-4f8f-afa4-c7d3c3b6f0ca"
}
},
{
"model": "workflows.abstractinput",
"fields": {
"widget": "189c6a1b-612a-4ca6-a7e3-c39349922781",
"name": "Class index",
"short_name": "tar",
"default": "",
"description": "Define the class index in the dataset. If it is not defined, last attribute is used as the class.",
"required": false,
"multi": false,
"parameter_type": "text",
"variable": "y_index",
"parameter": true,
"order": 8,
"uid": "69a3061b-9dcf-4c18-9f69-b5bc082dd65e"
}
},
{
"model": "workflows.abstractinput",
"fields": {
"widget": "189c6a1b-612a-4ca6-a7e3-c39349922781",
"name": "Delimiter",
"short_name": "dlt",
"default": ",",
"description": "Define delimiter to parse the data.",
"required": false,
"multi": false,
"parameter_type": "text",
"variable": "delimiter",
"parameter": true,
"order": 9,
"uid": "fd2eff8a-b2d0-4de6-9384-13ce5385e3fc"
}
},
{
"model": "workflows.abstractinput",
"fields": {
"widget": "189c6a1b-612a-4ca6-a7e3-c39349922781",
"name": "Missing values",
"short_name": "mv",
"default": "",
"description": "Missing data values are skipped.\r\n\r\nExample: ?,",
"required": false,
"multi": false,
"parameter_type": "text",
"variable": "missing_vals",
"parameter": true,
"order": 10,
"uid": "f393261f-ba93-4ed5-bdfc-82f6027bb327"
}
},
{
"model": "workflows.abstractinput",
"fields": {
"widget": "189c6a1b-612a-4ca6-a7e3-c39349922781",
"name": "Class mapping",
"short_name": "map",
"default": "",
"description": "The class mapping defines a mapping for a binary class. It is used with Logistic regression and Linear SVM.\r\n\r\nThe Logistic regression classifier uses 0 and 1 as class. If the dataset contains discrete classes (e.g. healthy, sick), a mapping should be defined, where healthy is mapped to 1 and sick to 0. The class mapping is used only with binary target labels.\r\nExample: healthy, sick",
"required": false,
"multi": false,
"parameter_type": "text",
"variable": "y_map",
"parameter": true,
"order": 11,
"uid": "04fb15c3-7d69-43e7-a009-0eb9a3c3a33e"
}
},
{
"model": "workflows.abstractoutput",
"fields": {
"widget": "189c6a1b-612a-4ca6-a7e3-c39349922781",
"name": "Dataset",
"short_name": "dst",
"description": "",
"variable": "dataset",
"order": 1,
"uid": "819161f8-dffa-4ad3-8952-11b9d14f0c40"
}
},
{
"model": "workflows.abstractoption",
"fields": {
"name": "discrete",
"uid": "c1d13a93-1572-415c-8988-12affdbff7fc",
"value": "discrete",
"abstract_input": "705b4459-6f05-4fd3-a230-33e0ddd784b3"
}
},
{
"model": "workflows.abstractoption",
"fields": {
"name": "metadata url",
"uid": "26ec18bf-771f-4aa4-957f-301df41cfa4e",
"value": "metadata url",
"abstract_input": "705b4459-6f05-4fd3-a230-33e0ddd784b3"
}
},
{
"model": "workflows.abstractoption",
"fields": {
"name": "numeric",
"uid": "6fb7a9a5-29ed-47c9-a260-301f34ffcf52",
"value": "numeric",
"abstract_input": "705b4459-6f05-4fd3-a230-33e0ddd784b3"
}
}
]
\ No newline at end of file
[
{
"model": "workflows.abstractwidget",
"fields": {
"category": "08c90036-fd13-4ff3-ac20-01982262a782",
"treeview_image": "",
"uid": "19afbba4-6c29-4594-bda6-d86e344cbf15",
"is_streaming": false,
"package": "big_data",
"interaction_view": "",
"has_progress_bar": false,
"image": "",
"description": "",
"static_image": "",
"action": "bigdata_mse",
"visualization_view": "bigdata_mse",
"streaming_visualization_view": "",
"post_interact_action": "",
"wsdl_method": "",
"wsdl": "",
"interactive": false,
"windows_queue": false,
"order": 1,
"name": "Mean squared error"
}
},
{
"model": "workflows.abstractinput",
"fields": {
"widget": "19afbba4-6c29-4594-bda6-d86e344cbf15",
"name": "Results",
"short_name": "rst",
"default": "",
"description": "",
"required": true,
"multi": false,
"parameter_type": null,
"variable": "predictions",
"parameter": false,
"order": 1,
"uid": "92e97624-fa25-4889-b7d6-fd9ce2823e5f"
}
},
{
"model": "workflows.abstractinput",
"fields": {
"widget": "19afbba4-6c29-4594-bda6-d86e344cbf15",
"name": "Dataset",
"short_name": "dst",
"default": "",
"description": "",
"required": true,
"multi": false,
"parameter_type": null,
"variable": "dataset",
"parameter": false,
"order": 2,
"uid": "ae2aaa2e-93d8-427c-9631-d053581ad888"
}
}
]
\ No newline at end of file
[
{
"model": "workflows.abstractwidget",
"fields": {
"category": "cf7b1d24-7c6d-4cd1-bcc9-16e43e1ab5a7",
"treeview_image": "",
"uid": "437156da-ad2c-41d0-b20b-a10e746e35c2",
"is_streaming": false,
"package": "big_data",
"interaction_view": "",
"has_progress_bar": false,
"image": "",
"description": "",
"static_image": "",
"action": "results_to_file",
"visualization_view": "results_to_file",
"streaming_visualization_view": "",
"post_interact_action": "",
"wsdl_method": "",
"wsdl": "",
"interactive": false,
"windows_queue": false,
"order": 2,
"name": "Results View"
}
},
{
"model": "workflows.abstractinput",
"fields": {
"widget": "437156da-ad2c-41d0-b20b-a10e746e35c2",
"name": "Results",
"short_name": "rst",
"default": "",
"description": "Results URL",
"required": true,
"multi": false,
"parameter_type": null,
"variable": "string",
"parameter": false,
"order": 1,
"uid": "795a676a-e380-4565-9170-be7e9b621377"
}
},
{
"model": "workflows.abstractinput",
"fields": {
"widget": "437156da-ad2c-41d0-b20b-a10e746e35c2",
"name": "Additional parameters",
"short_name": "adp",
"default": "",
"description": "",
"required": true,
"multi": false,
"parameter_type": "checkbox",
"variable": "add_params",
"parameter": true,
"order": 2,
"uid": "a194ac52-5e13-46c2-be48-a9b531da4d33"
}
}
]
\ No newline at end of file
[
{
"model": "workflows.abstractwidget",
"fields": {
"category": "0165a189-a6ea-4152-8b5c-f507d47bc0e6",
"treeview_image": "",
"uid": "477a2f37-5015-4a46-a243-08c8e84256b7",
"is_streaming": false,
"package": "big_data",
"interaction_view": "",
"has_progress_bar": false,
"image": "",
"description": "Naive Bayes with MapReduce\r\n\r\nAlgorithm calculates multinomial distribution for discrete features and Gaussian distribution for numerical features. The output of algorithm is consistent with implementation of Naive Bayes classifier in Orange and scikit-learn.\r\n\r\nReference:\r\nMapReduce version of algorithm is proposed by Cheng-Tao Chu; Sang Kyun Kim, Yi-An Lin, YuanYuan Yu, Gary Bradski, Andrew Ng, and Kunle Olukotun. \"Map-Reduce for Machine Learning on Multicore\". NIPS 2006. ",
"static_image": "",
"action": "naivebayes_fit",
"visualization_view": "",
"streaming_visualization_view": "",
"post_interact_action": "",
"wsdl_method": "",
"wsdl": "",
"interactive": false,
"windows_queue": false,
"order": 1,
"name": "Naive Bayes"
}
},
{
"model": "workflows.abstractinput",
"fields": {
"widget": "477a2f37-5015-4a46-a243-08c8e84256b7",
"name": "Dataset",
"short_name": "dst",
"default": "",
"description": "",
"required": true,
"multi": false,
"parameter_type": null,
"variable": "dataset",
"parameter": false,
"order": 1,
"uid": "06ec10c3-e773-4a86-a91d-873af823c9f3"
}
},
{
"model": "workflows.abstractoutput",
"fields": {
"widget": "477a2f37-5015-4a46-a243-08c8e84256b7",
"name": "Fit model",
"short_name": "fit",
"description": "Fit model URL",
"variable": "fitmodel_url",
"order": 1,
"uid": "ec9ad7fc-1788-46c4-bdb8-4d080b740b7f"
}
}
]
\ No newline at end of file
[
{
"model": "workflows.abstractwidget",
"fields": {
"category": "1be7b5eb-c1b2-485a-8dbe-56abce63fc73",
"treeview_image": "",
"uid": "4f2ce923-62e6-4be1-a394-72ac52988386",
"is_streaming": false,
"package": "big_data",
"interaction_view": "",
"has_progress_bar": false,
"image": "",
"description": "Random forest with MapReduce\r\n\r\nFit phase\r\nRandom forest algorithm builds multiple decision trees with a bootstrap method on a subset of data. \r\nIn each tree node, it estimates sqrt(num. of attributes)+1 randomly selected attributes (without replacement).\r\nAll decision trees are merged in large ensemble. \r\n\r\nPredict phase\r\nAlgorithm queries as many trees as needed for reliable prediction.\r\nFirstly, it randomly chooses without replacement 15 trees. If all trees vote for the same class, it outputs prediction. If there are multiple classes predicted, it chooses 15 trees again. Algorithm calculates difference in probability between most and second most probable prediction. If difference is greater than parameter diff, it outputs prediction. If a test sample is hard to predict (difference is never higher than diff), it queries whole ensemble to make a prediction.\r\n\r\nReference\r\nSimilar algorithm is proposed in: Justin D Basilico, M Arthur Munson, Tamara G Kolda, Kevin R Dixon, and W Philip Kegelmeyer. Comet: A recipe for learning and using large ensembles on massive data. \r\n",
"static_image": "",
"action": "rf_fit",
"visualization_view": "",
"streaming_visualization_view": "",
"post_interact_action": "",
"wsdl_method": "",
"wsdl": "",
"interactive": false,
"windows_queue": false,
"order": 1,
"name": "Random forest"
}
},
{
"model": "workflows.abstractinput",
"fields": {
"widget": "4f2ce923-62e6-4be1-a394-72ac52988386",
"name": "Dataset",
"short_name": "dst",
"default": "",
"description": "",
"required": true,
"multi": false,
"parameter_type": null,
"variable": "dataset",
"parameter": false,