diff --git a/mothra/build/pip-delete-this-directory.txt b/mothra/build/pip-delete-this-directory.txt new file mode 100644 index 0000000000000000000000000000000000000000..c8883ea99f50f5f6ea8d2a84f568ffa0c0c30675 --- /dev/null +++ b/mothra/build/pip-delete-this-directory.txt @@ -0,0 +1,5 @@ +This file is placed here by pip to indicate the source was put +here by pip. + +Once this package is successfully installed this source code will be +deleted (unless you remove this file). diff --git a/mothra/settings.py b/mothra/settings.py index 2571e16941af67ca54385b787117e11e94074a34..43cbd35c39b7c3ab79219f5956825981ba2b8f04 100755 --- a/mothra/settings.py +++ b/mothra/settings.py @@ -159,59 +159,51 @@ except NameError: from local_settings import * except ImportError: pass - -if USE_CONCURRENCY: - INSTALLED_APPS = ( - 'django.contrib.auth', - 'django.contrib.contenttypes', - 'django.contrib.sessions', - 'django.contrib.sites', - 'django.contrib.messages', - 'django.contrib.staticfiles', - 'django.contrib.admin', - 'south', - 'workflows', - 'workflows.latino', - 'workflows.decision_support', - 'workflows.bioinformatics', - 'workflows.subgroup_discovery', - 'workflows.nlp', - 'workflows.ilp', - 'workflows.mysql', - 'website', - 'signuplogin', - 'django_extensions', - 'django.contrib.humanize', - 'orderable_inlines', + +INSTALLED_APPS_DEFAULT = ( + 'django.contrib.auth', + 'django.contrib.contenttypes', + 'django.contrib.sessions', + 'django.contrib.sites', + 'django.contrib.messages', + 'django.contrib.staticfiles', + 'django.contrib.admin', + 'south', + 'website', + 'signuplogin', + 'django_extensions', + 'django.contrib.humanize', + 'orderable_inlines', + 'workflows', + ) + +INSTALLED_APPS_CONCUR = ( 'djcelery', - ) -else: - INSTALLED_APPS = ( - 'django.contrib.auth', - 'django.contrib.contenttypes', - 'django.contrib.sessions', - 'django.contrib.sites', - 'django.contrib.messages', - 'django.contrib.staticfiles', - 'django.contrib.admin', - 'south', - 'workflows', - 'workflows.latino', - 'workflows.decision_support', - 'workflows.bioinformatics', - 'workflows.subgroup_discovery', - 'workflows.nlp', - 'workflows.ilp', - 'workflows.mysql', - 'workflows.base', - 'website', - 'signuplogin', - 'django_extensions', - 'django.contrib.humanize', - 'orderable_inlines', + ) if USE_CONCURRENCY else ( 'streams', - ) - + ) + +INSTALLED_APPS_WORKFLOWS_SUB = ( + 'workflows.base', + 'workflows.latino', + 'workflows.decision_support', + 'workflows.bioinformatics', + 'workflows.subgroup_discovery', + 'workflows.nlp', + 'workflows.nl_toolkit', + 'workflows.ilp', + 'workflows.weka', + 'workflows.cforange', + 'workflows.perfeval', + 'workflows.mysql' + #WORKFLOWS_SUBAPP_PLACEHOLDER + ) + +INSTALLED_APPS = \ + INSTALLED_APPS_DEFAULT +\ + INSTALLED_APPS_CONCUR +\ + INSTALLED_APPS_WORKFLOWS_SUB + TEMPLATE_CONTEXT_PROCESSORS = DEFAULT_SETTINGS.TEMPLATE_CONTEXT_PROCESSORS TEMPLATES_FOLDER = os.path.join(PROJECT_DIR, 'templates') @@ -226,3 +218,4 @@ LOGIN_URL = '/login/' LOGIN_REDIRECT_URL = '/' STATIC_DOC_ROOT = os.path.join(os.getcwd(), 'mothra/public/media') + diff --git a/requirements.txt b/requirements.txt index 86e88456aa85f18a611c2f5bfd63f07cdc5768c3..6c6162cf91424aa0f68a8fcf955e440f7d946f6f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,4 +7,5 @@ django-extensions==0.9 django-orderable-inlines==0.0.6 django-picklefield==0.2.1 httplib2==0.7.5 +pydot==1.0.28 wsgiref==0.1.2 diff --git a/workflows/base/db/package_data.json b/workflows/base/db/package_data.json index f889bdaa9b3b68fb954ee9eb4a87ef729f86d082..30a989cfb101d6a1c99446d1d94175d1b4257cb6 100644 --- a/workflows/base/db/package_data.json +++ b/workflows/base/db/package_data.json @@ -1,1258 +1,1314 @@ [ { - "pk": 1, - "model": "workflows.category", - "fields": { - "uid": "bbf1e628-8be6-4178-8d93-765de278c0e4", - "parent": null, - "workflow": null, - "user": null, - "order": 1, + "pk": 5, + "model": "workflows.category", + "fields": { + "uid": "bbf1e628-8be6-4178-8d93-765de278c0e4", + "parent": null, + "workflow": null, + "user": null, + "order": 1, "name": "Files" } - }, - { - "pk": 4, - "model": "workflows.abstractwidget", - "fields": { - "category": 1, - "treeview_image": "treeview/File.png", - "name": "File to string", - "is_streaming": false, - "uid": "ff26380c-dd36-47c4-92ec-4526e199abbc", - "interaction_view": "", - "image": "images/File.png", - "package": "base", - "static_image": "", - "post_interact_action": "", - "user": null, - "visualization_view": "", - "action": "file_to_string", - "wsdl_method": "", - "wsdl": "", - "interactive": false, - "has_progress_bar": false, - "order": 1, + }, + { + "pk": 21, + "model": "workflows.abstractwidget", + "fields": { + "category": 5, + "treeview_image": "treeview/File.png", + "name": "File to string", + "is_streaming": false, + "uid": "ff26380c-dd36-47c4-92ec-4526e199abbc", + "interaction_view": "", + "image": "images/File.png", + "package": "base", + "static_image": "", + "post_interact_action": "", + "user": null, + "visualization_view": "", + "action": "file_to_string", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 1, "description": "" } - }, - { - "pk": 10, - "model": "workflows.abstractinput", - "fields": { - "widget": 4, - "name": "File", - "short_name": "fil", - "uid": "81742f96-f3c3-470a-af08-8afc54b6ff30", - "default": "", - "required": false, - "multi": false, - "parameter_type": null, - "variable": "file", - "parameter": false, - "order": 0, + }, + { + "pk": 68, + "model": "workflows.abstractinput", + "fields": { + "widget": 21, + "name": "File", + "short_name": "fil", + "uid": "81742f96-f3c3-470a-af08-8afc54b6ff30", + "default": "", + "required": false, + "multi": false, + "parameter_type": null, + "variable": "file", + "parameter": false, + "order": 0, "description": "" } - }, + }, { - "pk": 7, - "model": "workflows.abstractoutput", + "pk": 26, + "model": "workflows.abstractoutput", "fields": { - "widget": 4, - "name": "String", - "short_name": "str", - "variable": "string", - "uid": "4ca351ce-9b2f-4466-be15-b47f8cc37a84", - "order": 1, + "widget": 21, + "name": "String", + "short_name": "str", + "variable": "string", + "uid": "4ca351ce-9b2f-4466-be15-b47f8cc37a84", + "order": 1, "description": "" } - }, - { - "pk": 8, - "model": "workflows.abstractwidget", - "fields": { - "category": 1, - "treeview_image": "treeview/Upload-icon.png", - "name": "Load file", - "is_streaming": false, - "uid": "ff4e5d1a-c09b-4c1c-85e0-190c3b933418", - "interaction_view": "", - "image": "images/Upload-icon.png", - "package": "base", - "static_image": "", - "post_interact_action": "", - "user": null, - "visualization_view": "", - "action": "load_file", - "wsdl_method": "", - "wsdl": "", - "interactive": false, - "has_progress_bar": false, - "order": 2, + }, + { + "pk": 22, + "model": "workflows.abstractwidget", + "fields": { + "category": 5, + "treeview_image": "treeview/Upload-icon.png", + "name": "Load file", + "is_streaming": false, + "uid": "ff4e5d1a-c09b-4c1c-85e0-190c3b933418", + "interaction_view": "", + "image": "images/Upload-icon.png", + "package": "base", + "static_image": "", + "post_interact_action": "", + "user": null, + "visualization_view": "", + "action": "load_file", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 2, "description": "" } - }, - { - "pk": 9, - "model": "workflows.abstractinput", - "fields": { - "widget": 8, - "name": "File", - "short_name": "fil", - "uid": "d2b094de-d94c-4a7c-acbe-c21e5042183f", - "default": "", - "required": false, - "multi": false, - "parameter_type": "file", - "variable": "file", - "parameter": true, - "order": 0, + }, + { + "pk": 69, + "model": "workflows.abstractinput", + "fields": { + "widget": 22, + "name": "File", + "short_name": "fil", + "uid": "d2b094de-d94c-4a7c-acbe-c21e5042183f", + "default": "", + "required": false, + "multi": false, + "parameter_type": "file", + "variable": "file", + "parameter": true, + "order": 0, "description": "" } - }, + }, { - "pk": 6, - "model": "workflows.abstractoutput", + "pk": 27, + "model": "workflows.abstractoutput", "fields": { - "widget": 8, - "name": "File", - "short_name": "fil", - "variable": "file", - "uid": "d9ede78a-a4f3-47c3-b2bd-04cda3f2ce6c", - "order": 1, + "widget": 22, + "name": "File", + "short_name": "fil", + "variable": "file", + "uid": "d9ede78a-a4f3-47c3-b2bd-04cda3f2ce6c", + "order": 1, "description": "" } - }, - { - "pk": 12, - "model": "workflows.abstractwidget", - "fields": { - "category": 1, - "treeview_image": "treeview/Upload-icon_1.png", - "name": "Load file to string", - "is_streaming": false, - "uid": "866b596e-0131-43f6-969d-97f43c66ef0d", - "interaction_view": "", - "image": "images/Upload-icon_1.png", - "package": "base", - "static_image": "", - "post_interact_action": "", - "user": null, - "visualization_view": "", - "action": "load_to_string", - "wsdl_method": "", - "wsdl": "", - "interactive": false, - "has_progress_bar": false, - "order": 3, + }, + { + "pk": 23, + "model": "workflows.abstractwidget", + "fields": { + "category": 5, + "treeview_image": "treeview/Upload-icon_1.png", + "name": "Load file to string", + "is_streaming": false, + "uid": "866b596e-0131-43f6-969d-97f43c66ef0d", + "interaction_view": "", + "image": "images/Upload-icon_1.png", + "package": "base", + "static_image": "", + "post_interact_action": "", + "user": null, + "visualization_view": "", + "action": "load_to_string", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 3, "description": "" } - }, - { - "pk": 23, - "model": "workflows.abstractinput", - "fields": { - "widget": 12, - "name": "file", - "short_name": "fil", - "uid": "2de6ce93-0063-4770-a1d1-93fc2d98bccc", - "default": "", - "required": true, - "multi": false, - "parameter_type": "file", - "variable": "file", - "parameter": true, - "order": 0, + }, + { + "pk": 70, + "model": "workflows.abstractinput", + "fields": { + "widget": 23, + "name": "file", + "short_name": "fil", + "uid": "2de6ce93-0063-4770-a1d1-93fc2d98bccc", + "default": "", + "required": true, + "multi": false, + "parameter_type": "file", + "variable": "file", + "parameter": true, + "order": 0, "description": "" } - }, + }, { - "pk": 15, - "model": "workflows.abstractoutput", + "pk": 28, + "model": "workflows.abstractoutput", "fields": { - "widget": 12, - "name": "string", - "short_name": "str", - "variable": "string", - "uid": "9051dbca-57cb-4d2b-98e1-14e5b2ea3375", - "order": 1, + "widget": 23, + "name": "string", + "short_name": "str", + "variable": "string", + "uid": "9051dbca-57cb-4d2b-98e1-14e5b2ea3375", + "order": 1, "description": "" } - }, - { - "pk": 15, - "model": "workflows.abstractwidget", - "fields": { - "category": 1, - "treeview_image": "treeview/1688041475.png", - "name": "String to file", - "is_streaming": false, - "uid": "442f5484-494d-4bd7-b1c2-74c1153dd1fe", - "interaction_view": "", - "image": "images/1688041475.png", - "package": "base", - "static_image": "", - "post_interact_action": "", - "user": null, - "visualization_view": "string_to_file", - "action": "string_to_file", - "wsdl_method": "", - "wsdl": "", - "interactive": false, - "has_progress_bar": false, - "order": 4, + }, + { + "pk": 24, + "model": "workflows.abstractwidget", + "fields": { + "category": 5, + "treeview_image": "treeview/1688041475.png", + "name": "String to file", + "is_streaming": false, + "uid": "442f5484-494d-4bd7-b1c2-74c1153dd1fe", + "interaction_view": "", + "image": "images/1688041475.png", + "package": "base", + "static_image": "", + "post_interact_action": "", + "user": null, + "visualization_view": "string_to_file", + "action": "string_to_file", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 4, "description": "" } - }, - { - "pk": 15, - "model": "workflows.abstractinput", - "fields": { - "widget": 15, - "name": "String", - "short_name": "str", - "uid": "c658923a-a611-414b-ba7e-510aaa83ff19", - "default": "", - "required": false, - "multi": false, - "parameter_type": null, - "variable": "string", - "parameter": false, - "order": 0, + }, + { + "pk": 71, + "model": "workflows.abstractinput", + "fields": { + "widget": 24, + "name": "String", + "short_name": "str", + "uid": "c658923a-a611-414b-ba7e-510aaa83ff19", + "default": "", + "required": false, + "multi": false, + "parameter_type": null, + "variable": "string", + "parameter": false, + "order": 0, "description": "" } - }, + }, { - "pk": 2, - "model": "workflows.category", + "pk": 6, + "model": "workflows.category", "fields": { - "uid": "9b145e48-9cc0-4a24-9ac7-0df48e879361", - "parent": null, - "workflow": null, - "user": null, - "order": 1, + "uid": "9b145e48-9cc0-4a24-9ac7-0df48e879361", + "parent": null, + "workflow": null, + "user": null, + "order": 1, "name": "Integers" } - }, - { - "pk": 1, - "model": "workflows.abstractwidget", - "fields": { - "category": 2, - "treeview_image": "treeview/d914a910.png", - "name": "Add integers", - "is_streaming": false, - "uid": "e898b974-bafa-4b67-8918-b47b801e063e", - "interaction_view": "", - "image": "images/d914a910.png", - "package": "base", - "static_image": "", - "post_interact_action": "", - "user": null, - "visualization_view": "", - "action": "add_integers", - "wsdl_method": "", - "wsdl": "", - "interactive": false, - "has_progress_bar": false, - "order": 1, + }, + { + "pk": 25, + "model": "workflows.abstractwidget", + "fields": { + "category": 6, + "treeview_image": "treeview/d914a910.png", + "name": "Add integers", + "is_streaming": false, + "uid": "e898b974-bafa-4b67-8918-b47b801e063e", + "interaction_view": "", + "image": "images/d914a910.png", + "package": "base", + "static_image": "", + "post_interact_action": "", + "user": null, + "visualization_view": "", + "action": "add_integers", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 1, "description": "Adds two integers" } - }, - { - "pk": 2, - "model": "workflows.abstractinput", - "fields": { - "widget": 1, - "name": "Integer 1", - "short_name": "int", - "uid": "d46b4d0e-cc22-48f9-9947-4bbb754d8991", - "default": "", - "required": false, - "multi": false, - "parameter_type": null, - "variable": "integer1", - "parameter": false, - "order": 0, + }, + { + "pk": 72, + "model": "workflows.abstractinput", + "fields": { + "widget": 25, + "name": "Integer 1", + "short_name": "int", + "uid": "d46b4d0e-cc22-48f9-9947-4bbb754d8991", + "default": "", + "required": false, + "multi": false, + "parameter_type": null, + "variable": "integer1", + "parameter": false, + "order": 0, "description": "" } - }, - { - "pk": 3, - "model": "workflows.abstractinput", - "fields": { - "widget": 1, - "name": "Integer 2", - "short_name": "int", - "uid": "0bd80a21-e628-48c6-b3d8-58e8121e4711", - "default": "", - "required": false, - "multi": false, - "parameter_type": null, - "variable": "integer2", - "parameter": false, - "order": 0, + }, + { + "pk": 73, + "model": "workflows.abstractinput", + "fields": { + "widget": 25, + "name": "Integer 2", + "short_name": "int", + "uid": "0bd80a21-e628-48c6-b3d8-58e8121e4711", + "default": "", + "required": false, + "multi": false, + "parameter_type": null, + "variable": "integer2", + "parameter": false, + "order": 0, "description": "" } - }, + }, { - "pk": 2, - "model": "workflows.abstractoutput", + "pk": 29, + "model": "workflows.abstractoutput", "fields": { - "widget": 1, - "name": "Integer", - "short_name": "int", - "variable": "integer", - "uid": "da76c9e0-d374-4455-a708-6b5de8023f57", - "order": 1, + "widget": 25, + "name": "Integer", + "short_name": "int", + "variable": "integer", + "uid": "da76c9e0-d374-4455-a708-6b5de8023f57", + "order": 1, "description": "" } - }, - { - "pk": 5, - "model": "workflows.abstractwidget", - "fields": { - "category": 2, - "treeview_image": "treeview/d914a910_2.png", - "name": "Add multiple integers", - "is_streaming": false, - "uid": "e327e9ce-0a1d-4dcc-90ac-45ea4fd74707", - "interaction_view": "", - "image": "images/d914a910_2.png", - "package": "base", - "static_image": "", - "post_interact_action": "", - "user": null, - "visualization_view": "", - "action": "add_multiple", - "wsdl_method": "", - "wsdl": "", - "interactive": false, - "has_progress_bar": false, - "order": 2, + }, + { + "pk": 26, + "model": "workflows.abstractwidget", + "fields": { + "category": 6, + "treeview_image": "treeview/d914a910_2.png", + "name": "Add multiple integers", + "is_streaming": false, + "uid": "e327e9ce-0a1d-4dcc-90ac-45ea4fd74707", + "interaction_view": "", + "image": "images/d914a910_2.png", + "package": "base", + "static_image": "", + "post_interact_action": "", + "user": null, + "visualization_view": "", + "action": "add_multiple", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 2, "description": "" } - }, - { - "pk": 6, - "model": "workflows.abstractinput", - "fields": { - "widget": 5, - "name": "Integer List", - "short_name": "int", - "uid": "aa8cdbe8-041b-455e-b03c-30d662f34c7b", - "default": "", - "required": false, - "multi": true, - "parameter_type": null, - "variable": "integer", - "parameter": false, - "order": 0, + }, + { + "pk": 74, + "model": "workflows.abstractinput", + "fields": { + "widget": 26, + "name": "Integer List", + "short_name": "int", + "uid": "aa8cdbe8-041b-455e-b03c-30d662f34c7b", + "default": "", + "required": false, + "multi": true, + "parameter_type": null, + "variable": "integer", + "parameter": false, + "order": 0, "description": "" } - }, + }, { - "pk": 4, - "model": "workflows.abstractoutput", + "pk": 30, + "model": "workflows.abstractoutput", "fields": { - "widget": 5, - "name": "Sum", - "short_name": "sum", - "variable": "sum", - "uid": "512aa455-9fcc-462f-83bb-5203dec8ef7b", - "order": 1, + "widget": 26, + "name": "Sum", + "short_name": "sum", + "variable": "sum", + "uid": "512aa455-9fcc-462f-83bb-5203dec8ef7b", + "order": 1, "description": "" } - }, - { - "pk": 9, - "model": "workflows.abstractwidget", - "fields": { - "category": 2, - "treeview_image": "treeview/417px-Latex_integers.svg.png", - "name": "Create Integer", - "is_streaming": false, - "uid": "8b20d4ad-c420-4156-973d-48d6d15934ba", - "interaction_view": "", - "image": "images/417px-Latex_integers.svg.png", - "package": "base", - "static_image": "", - "post_interact_action": "", - "user": null, - "visualization_view": "", - "action": "create_integer", - "wsdl_method": "", - "wsdl": "", - "interactive": false, - "has_progress_bar": false, - "order": 3, + }, + { + "pk": 27, + "model": "workflows.abstractwidget", + "fields": { + "category": 6, + "treeview_image": "treeview/417px-Latex_integers.svg.png", + "name": "Create Integer", + "is_streaming": false, + "uid": "8b20d4ad-c420-4156-973d-48d6d15934ba", + "interaction_view": "", + "image": "images/417px-Latex_integers.svg.png", + "package": "base", + "static_image": "", + "post_interact_action": "", + "user": null, + "visualization_view": "", + "action": "create_integer", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 3, "description": "Creates an integer object from a parameter." } - }, - { - "pk": 1, - "model": "workflows.abstractinput", - "fields": { - "widget": 9, - "name": "Type your integer", - "short_name": "int", - "uid": "1e52e44e-94cc-4666-8278-04da49d8f890", - "default": "", - "required": true, - "multi": false, - "parameter_type": "text", - "variable": "integer", - "parameter": true, - "order": 0, + }, + { + "pk": 75, + "model": "workflows.abstractinput", + "fields": { + "widget": 27, + "name": "Type your integer", + "short_name": "int", + "uid": "1e52e44e-94cc-4666-8278-04da49d8f890", + "default": "", + "required": true, + "multi": false, + "parameter_type": "text", + "variable": "integer", + "parameter": true, + "order": 0, "description": "" } - }, + }, { - "pk": 1, - "model": "workflows.abstractoutput", + "pk": 31, + "model": "workflows.abstractoutput", "fields": { - "widget": 9, - "name": "Integer", - "short_name": "int", - "variable": "integer", - "uid": "68ec6254-d17a-4867-b57c-6f95f7a72527", - "order": 1, + "widget": 27, + "name": "Integer", + "short_name": "int", + "variable": "integer", + "uid": "68ec6254-d17a-4867-b57c-6f95f7a72527", + "order": 1, "description": "The returned integer." } - }, - { - "pk": 14, - "model": "workflows.abstractwidget", - "fields": { - "category": 2, - "treeview_image": "", - "name": "Filter integers", - "is_streaming": false, - "uid": "cf0af098-719c-4dce-bf75-0b3018eafb03", - "interaction_view": "filter_integers", - "image": "", - "package": "base", - "static_image": "", - "post_interact_action": "filter_integers_post", - "user": null, - "visualization_view": "", - "action": "filter_integers", - "wsdl_method": "", - "wsdl": "", - "interactive": true, - "has_progress_bar": false, - "order": 4, + }, + { + "pk": 28, + "model": "workflows.abstractwidget", + "fields": { + "category": 6, + "treeview_image": "", + "name": "Filter integers", + "is_streaming": false, + "uid": "cf0af098-719c-4dce-bf75-0b3018eafb03", + "interaction_view": "filter_integers", + "image": "", + "package": "base", + "static_image": "", + "post_interact_action": "filter_integers_post", + "user": null, + "visualization_view": "", + "action": "filter_integers", + "wsdl_method": "", + "wsdl": "", + "interactive": true, + "has_progress_bar": false, + "order": 4, "description": "Filters some integers" } - }, - { - "pk": 8, - "model": "workflows.abstractinput", - "fields": { - "widget": 14, - "name": "Integer List", - "short_name": "int", - "uid": "7358ad8b-b3e7-4d19-98e1-749444b6686d", - "default": "", - "required": false, - "multi": true, - "parameter_type": null, - "variable": "integers", - "parameter": false, - "order": 0, + }, + { + "pk": 76, + "model": "workflows.abstractinput", + "fields": { + "widget": 28, + "name": "Integer List", + "short_name": "int", + "uid": "7358ad8b-b3e7-4d19-98e1-749444b6686d", + "default": "", + "required": false, + "multi": true, + "parameter_type": null, + "variable": "integers", + "parameter": false, + "order": 0, "description": "" } - }, + }, { - "pk": 5, - "model": "workflows.abstractoutput", + "pk": 32, + "model": "workflows.abstractoutput", "fields": { - "widget": 14, - "name": "Integer list", - "short_name": "int", - "variable": "integers", - "uid": "ca868bfc-19db-49c1-babe-9ae5694478e4", - "order": 1, + "widget": 28, + "name": "Integer list", + "short_name": "int", + "variable": "integers", + "uid": "ca868bfc-19db-49c1-babe-9ae5694478e4", + "order": 1, "description": "" } - }, - { - "pk": 16, - "model": "workflows.abstractwidget", - "fields": { - "category": 2, - "treeview_image": "", - "name": "Multiply integers", - "is_streaming": false, - "uid": "c950af9c-be87-4761-a69e-e37f0c26cacb", - "interaction_view": "", - "image": "", - "package": "base", - "static_image": "", - "post_interact_action": "", - "user": null, - "visualization_view": "", - "action": "multiply_integers", - "wsdl_method": "", - "wsdl": "", - "interactive": false, - "has_progress_bar": false, - "order": 5, + }, + { + "pk": 29, + "model": "workflows.abstractwidget", + "fields": { + "category": 6, + "treeview_image": "", + "name": "Multiply integers", + "is_streaming": false, + "uid": "c950af9c-be87-4761-a69e-e37f0c26cacb", + "interaction_view": "", + "image": "", + "package": "base", + "static_image": "", + "post_interact_action": "", + "user": null, + "visualization_view": "", + "action": "multiply_integers", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 5, "description": "" } - }, - { - "pk": 22, - "model": "workflows.abstractinput", - "fields": { - "widget": 16, - "name": "Integers", - "short_name": "int", - "uid": "258f48ec-bbd1-4adc-85bb-97d43e73debe", - "default": "", - "required": false, - "multi": true, - "parameter_type": null, - "variable": "integers", - "parameter": false, - "order": 0, + }, + { + "pk": 77, + "model": "workflows.abstractinput", + "fields": { + "widget": 29, + "name": "Integers", + "short_name": "int", + "uid": "258f48ec-bbd1-4adc-85bb-97d43e73debe", + "default": "", + "required": false, + "multi": true, + "parameter_type": null, + "variable": "integers", + "parameter": false, + "order": 0, "description": "" } - }, + }, { - "pk": 12, - "model": "workflows.abstractoutput", + "pk": 33, + "model": "workflows.abstractoutput", "fields": { - "widget": 16, - "name": "Integer", - "short_name": "int", - "variable": "integer", - "uid": "66a00cb7-d9b9-4051-8d8a-2d46e707db19", - "order": 1, + "widget": 29, + "name": "Integer", + "short_name": "int", + "variable": "integer", + "uid": "66a00cb7-d9b9-4051-8d8a-2d46e707db19", + "order": 1, "description": "" } - }, - { - "pk": 19, - "model": "workflows.abstractwidget", - "fields": { - "category": 2, - "treeview_image": "treeview/d914a910_1.png", - "name": "Subtract integers", - "is_streaming": false, - "uid": "c6cf5c17-f6a6-4076-bf35-ecb06d72da86", - "interaction_view": "", - "image": "images/d914a910_1.png", - "package": "base", - "static_image": "", - "post_interact_action": "", - "user": null, - "visualization_view": "", - "action": "subtract_integers", - "wsdl_method": "", - "wsdl": "", - "interactive": false, - "has_progress_bar": false, - "order": 6, + }, + { + "pk": 30, + "model": "workflows.abstractwidget", + "fields": { + "category": 6, + "treeview_image": "treeview/d914a910_1.png", + "name": "Subtract integers", + "is_streaming": false, + "uid": "c6cf5c17-f6a6-4076-bf35-ecb06d72da86", + "interaction_view": "", + "image": "images/d914a910_1.png", + "package": "base", + "static_image": "", + "post_interact_action": "", + "user": null, + "visualization_view": "", + "action": "subtract_integers", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 6, "description": "Subtracts two integers" } - }, - { - "pk": 4, - "model": "workflows.abstractinput", - "fields": { - "widget": 19, - "name": "Integer 1", - "short_name": "int", - "uid": "77312adf-6969-407e-90bd-083c96d3983b", - "default": "", - "required": false, - "multi": false, - "parameter_type": null, - "variable": "integer1", - "parameter": false, - "order": 0, + }, + { + "pk": 78, + "model": "workflows.abstractinput", + "fields": { + "widget": 30, + "name": "Integer 1", + "short_name": "int", + "uid": "77312adf-6969-407e-90bd-083c96d3983b", + "default": "", + "required": false, + "multi": false, + "parameter_type": null, + "variable": "integer1", + "parameter": false, + "order": 0, "description": "" } - }, - { - "pk": 5, - "model": "workflows.abstractinput", - "fields": { - "widget": 19, - "name": "Integer 2", - "short_name": "int", - "uid": "10fdef7a-7c55-464d-bd7d-1e273759eda9", - "default": "", - "required": false, - "multi": false, - "parameter_type": null, - "variable": "integer2", - "parameter": false, - "order": 0, + }, + { + "pk": 79, + "model": "workflows.abstractinput", + "fields": { + "widget": 30, + "name": "Integer 2", + "short_name": "int", + "uid": "10fdef7a-7c55-464d-bd7d-1e273759eda9", + "default": "", + "required": false, + "multi": false, + "parameter_type": null, + "variable": "integer2", + "parameter": false, + "order": 0, "description": "" } - }, + }, { - "pk": 3, - "model": "workflows.abstractoutput", + "pk": 34, + "model": "workflows.abstractoutput", "fields": { - "widget": 19, - "name": "Integer", - "short_name": "int", - "variable": "integer", - "uid": "4488e496-5b41-4081-8715-ec82404d2d98", - "order": 1, + "widget": 30, + "name": "Integer", + "short_name": "int", + "variable": "integer", + "uid": "4488e496-5b41-4081-8715-ec82404d2d98", + "order": 1, "description": "" } - }, + }, { - "pk": 3, - "model": "workflows.category", + "pk": 7, + "model": "workflows.category", "fields": { - "uid": "01ad639d-acb8-45c0-bc5e-bc8739bb4c95", - "parent": null, - "workflow": null, - "user": null, - "order": 1, + "uid": "01ad639d-acb8-45c0-bc5e-bc8739bb4c95", + "parent": null, + "workflow": null, + "user": null, + "order": 1, "name": "Objects" } - }, - { - "pk": 3, - "model": "workflows.abstractwidget", - "fields": { - "category": 3, - "treeview_image": "treeview/Task-List-icon.png", - "name": "Create List", - "is_streaming": false, - "uid": "ed60cacd-633c-4c7c-b963-a5fda548bed4", - "interaction_view": "", - "image": "images/Task-List-icon.png", - "package": "base", - "static_image": "", - "post_interact_action": "", - "user": null, - "visualization_view": "", - "action": "create_list", - "wsdl_method": "", - "wsdl": "", - "interactive": false, - "has_progress_bar": false, - "order": 1, + }, + { + "pk": 98, + "model": "workflows.abstractwidget", + "fields": { + "category": 7, + "treeview_image": "", + "name": "Concatenate lists", + "is_streaming": false, + "uid": "42847b88-dc6b-4ced-942a-ad0ab25f3517", + "interaction_view": "", + "image": "", + "package": "base", + "static_image": "", + "post_interact_action": "", + "user": null, + "visualization_view": "", + "action": "base_concatenate_lists", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 1, "description": "" } - }, - { - "pk": 12, - "model": "workflows.abstractinput", - "fields": { - "widget": 3, - "name": "Element", - "short_name": "el", - "uid": "40d329ea-23e6-4217-9944-a43a9195bd54", - "default": "", - "required": false, - "multi": true, - "parameter_type": null, - "variable": "list", - "parameter": false, - "order": 0, + }, + { + "pk": 188, + "model": "workflows.abstractinput", + "fields": { + "widget": 98, + "name": "Lists", + "short_name": "lst", + "uid": "e4c412d3-b6b0-438e-8ec8-5362c6de5223", + "default": "", + "required": false, + "multi": true, + "parameter_type": null, + "variable": "lists", + "parameter": false, + "order": 1, "description": "" } - }, + }, { - "pk": 8, - "model": "workflows.abstractoutput", + "pk": 110, + "model": "workflows.abstractoutput", "fields": { - "widget": 3, - "name": "List", - "short_name": "lst", - "variable": "list", - "uid": "97e650f1-8235-43ee-b23d-92a7c029c47f", - "order": 1, + "widget": 98, + "name": "List", + "short_name": "lst", + "variable": "list", + "uid": "75dd2246-5b92-4119-9e04-0daed307d099", + "order": 1, "description": "" } - }, - { - "pk": 6, - "model": "workflows.abstractwidget", - "fields": { - "category": 3, - "treeview_image": "treeview/Create-Range_1.png", - "name": "Create Range", - "is_streaming": false, - "uid": "98e1fec5-e771-4f75-8615-eac6577e64c1", - "interaction_view": "", - "image": "images/Create-Range_1.png", - "package": "base", - "static_image": "", - "post_interact_action": "", - "user": null, - "visualization_view": "", - "action": "create_range", - "wsdl_method": "", - "wsdl": "", - "interactive": false, - "has_progress_bar": false, - "order": 2, + }, + { + "pk": 31, + "model": "workflows.abstractwidget", + "fields": { + "category": 7, + "treeview_image": "treeview/Task-List-icon.png", + "name": "Create List", + "is_streaming": false, + "uid": "ed60cacd-633c-4c7c-b963-a5fda548bed4", + "interaction_view": "", + "image": "images/Task-List-icon.png", + "package": "base", + "static_image": "", + "post_interact_action": "", + "user": null, + "visualization_view": "", + "action": "create_list", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 1, "description": "" } - }, - { - "pk": 7, - "model": "workflows.abstractinput", - "fields": { - "widget": 6, - "name": "Range Length (Number of Items in Range)", - "short_name": "rln", - "uid": "4f60c95a-3177-4a5d-b047-75d971ee7654", - "default": "10", - "required": false, - "multi": false, - "parameter_type": "text", - "variable": "n_range", - "parameter": true, - "order": 0, + }, + { + "pk": 80, + "model": "workflows.abstractinput", + "fields": { + "widget": 31, + "name": "Element", + "short_name": "el", + "uid": "40d329ea-23e6-4217-9944-a43a9195bd54", + "default": "", + "required": false, + "multi": true, + "parameter_type": null, + "variable": "list", + "parameter": false, + "order": 0, "description": "" } - }, + }, { - "pk": 14, - "model": "workflows.abstractoutput", + "pk": 35, + "model": "workflows.abstractoutput", "fields": { - "widget": 6, - "name": "Range", - "short_name": "rng", - "variable": "rangeout", - "uid": "c5f77ca1-7680-4b73-852f-ebf142ed538a", - "order": 1, + "widget": 31, + "name": "List", + "short_name": "lst", + "variable": "list", + "uid": "97e650f1-8235-43ee-b23d-92a7c029c47f", + "order": 1, "description": "" } - }, - { - "pk": 10, - "model": "workflows.abstractwidget", - "fields": { - "category": 3, - "treeview_image": "treeview/482063585452602669.png", - "name": "Delay", - "is_streaming": false, - "uid": "51a88c36-60c3-44c1-ba5a-b895c0ed0370", - "interaction_view": "", - "image": "images/482063585452602669.png", - "package": "base", - "static_image": "", - "post_interact_action": "", - "user": null, - "visualization_view": "", - "action": "delay", - "wsdl_method": "", - "wsdl": "", - "interactive": false, - "has_progress_bar": true, - "order": 3, + }, + { + "pk": 32, + "model": "workflows.abstractwidget", + "fields": { + "category": 7, + "treeview_image": "treeview/Create-Range_1.png", + "name": "Create Range", + "is_streaming": false, + "uid": "98e1fec5-e771-4f75-8615-eac6577e64c1", + "interaction_view": "", + "image": "images/Create-Range_1.png", + "package": "base", + "static_image": "", + "post_interact_action": "", + "user": null, + "visualization_view": "", + "action": "create_range", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 2, + "description": "" + } + }, + { + "pk": 81, + "model": "workflows.abstractinput", + "fields": { + "widget": 32, + "name": "Range Length (Number of Items in Range)", + "short_name": "rln", + "uid": "4f60c95a-3177-4a5d-b047-75d971ee7654", + "default": "10", + "required": false, + "multi": false, + "parameter_type": "text", + "variable": "n_range", + "parameter": true, + "order": 0, + "description": "" + } + }, + { + "pk": 36, + "model": "workflows.abstractoutput", + "fields": { + "widget": 32, + "name": "Range", + "short_name": "rng", + "variable": "rangeout", + "uid": "c5f77ca1-7680-4b73-852f-ebf142ed538a", + "order": 1, + "description": "" + } + }, + { + "pk": 33, + "model": "workflows.abstractwidget", + "fields": { + "category": 7, + "treeview_image": "treeview/482063585452602669.png", + "name": "Delay", + "is_streaming": false, + "uid": "51a88c36-60c3-44c1-ba5a-b895c0ed0370", + "interaction_view": "", + "image": "images/482063585452602669.png", + "package": "base", + "static_image": "", + "post_interact_action": "", + "user": null, + "visualization_view": "", + "action": "delay", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": true, + "order": 3, "description": "Delays for a certain amount of seconds" } - }, - { - "pk": 20, - "model": "workflows.abstractinput", - "fields": { - "widget": 10, - "name": "Data", - "short_name": "dat", - "uid": "c6ddfb2b-a04a-448e-a853-8b4e13d4ccdf", - "default": "", - "required": false, - "multi": false, - "parameter_type": null, - "variable": "data", - "parameter": false, - "order": 0, + }, + { + "pk": 82, + "model": "workflows.abstractinput", + "fields": { + "widget": 33, + "name": "Data", + "short_name": "dat", + "uid": "c6ddfb2b-a04a-448e-a853-8b4e13d4ccdf", + "default": "", + "required": false, + "multi": false, + "parameter_type": null, + "variable": "data", + "parameter": false, + "order": 0, "description": "" } - }, - { - "pk": 21, - "model": "workflows.abstractinput", - "fields": { - "widget": 10, - "name": "Time", - "short_name": "tim", - "uid": "1a463ddc-289a-4250-ae41-9f7a3eb81881", - "default": "10", - "required": false, - "multi": false, - "parameter_type": "text", - "variable": "time", - "parameter": true, - "order": 0, + }, + { + "pk": 83, + "model": "workflows.abstractinput", + "fields": { + "widget": 33, + "name": "Time", + "short_name": "tim", + "uid": "1a463ddc-289a-4250-ae41-9f7a3eb81881", + "default": "10", + "required": false, + "multi": false, + "parameter_type": "text", + "variable": "time", + "parameter": true, + "order": 0, "description": "Time in seconds" } - }, + }, { - "pk": 11, - "model": "workflows.abstractoutput", + "pk": 37, + "model": "workflows.abstractoutput", "fields": { - "widget": 10, - "name": "Data", - "short_name": "dat", - "variable": "data", - "uid": "b5233bb9-11f4-49d4-8c12-cd5476d1dc13", - "order": 1, + "widget": 33, + "name": "Data", + "short_name": "dat", + "variable": "data", + "uid": "b5233bb9-11f4-49d4-8c12-cd5476d1dc13", + "order": 1, "description": "" } - }, - { - "pk": 13, - "model": "workflows.abstractwidget", - "fields": { - "category": 3, - "treeview_image": "treeview/ensemble-small.png", - "name": "Ensemble", - "is_streaming": false, - "uid": "e2faf966-57a8-4bd9-af66-bb7921be4431", - "interaction_view": "", - "image": "images/ensemble-small.png", - "package": "base", - "static_image": "", - "post_interact_action": "", - "user": null, - "visualization_view": "", - "action": "ensemble", - "wsdl_method": "", - "wsdl": "", - "interactive": false, - "has_progress_bar": false, - "order": 4, + }, + { + "pk": 34, + "model": "workflows.abstractwidget", + "fields": { + "category": 7, + "treeview_image": "treeview/ensemble-small.png", + "name": "Ensemble", + "is_streaming": false, + "uid": "e2faf966-57a8-4bd9-af66-bb7921be4431", + "interaction_view": "", + "image": "images/ensemble-small.png", + "package": "base", + "static_image": "", + "post_interact_action": "", + "user": null, + "visualization_view": "", + "action": "ensemble", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 4, "description": "" } - }, - { - "pk": 24, - "model": "workflows.abstractinput", - "fields": { - "widget": 13, - "name": "Data Indices", - "short_name": "dis", - "uid": "7ab35883-9bb7-477b-a367-6c6ac95edc75", - "default": "", - "required": true, - "multi": true, - "parameter_type": null, - "variable": "data_inds", - "parameter": false, - "order": 0, + }, + { + "pk": 84, + "model": "workflows.abstractinput", + "fields": { + "widget": 34, + "name": "Data Indices", + "short_name": "dis", + "uid": "7ab35883-9bb7-477b-a367-6c6ac95edc75", + "default": "", + "required": true, + "multi": true, + "parameter_type": null, + "variable": "data_inds", + "parameter": false, + "order": 0, "description": "" } - }, - { - "pk": 25, - "model": "workflows.abstractinput", - "fields": { - "widget": 13, - "name": "Ensemble Type", - "short_name": "ent", - "uid": "d07ef228-561f-49d0-8ea5-5e4638d14b76", - "default": "consensus", - "required": true, - "multi": false, - "parameter_type": "select", - "variable": "ens_type", - "parameter": true, - "order": 0, + }, + { + "pk": 85, + "model": "workflows.abstractinput", + "fields": { + "widget": 34, + "name": "Ensemble Type", + "short_name": "ent", + "uid": "d07ef228-561f-49d0-8ea5-5e4638d14b76", + "default": "consensus", + "required": true, + "multi": false, + "parameter_type": "select", + "variable": "ens_type", + "parameter": true, + "order": 0, "description": "" } - }, + }, { - "pk": 1, - "model": "workflows.abstractoption", + "pk": 1, + "model": "workflows.abstractoption", "fields": { - "uid": "7f71a3e6-cb99-4161-a62e-988424dc4598", - "abstract_input": 25, - "value": "consensus", + "uid": "7f71a3e6-cb99-4161-a62e-988424dc4598", + "abstract_input": 85, + "value": "consensus", "name": "Consensus" } - }, + }, { - "pk": 2, - "model": "workflows.abstractoption", + "pk": 2, + "model": "workflows.abstractoption", "fields": { - "uid": "08bd84cc-2333-411c-9645-d0ac1f85b1dc", - "abstract_input": 25, - "value": "majority", + "uid": "08bd84cc-2333-411c-9645-d0ac1f85b1dc", + "abstract_input": 85, + "value": "majority", "name": "Majority" } - }, - { - "pk": 26, - "model": "workflows.abstractinput", - "fields": { - "widget": 13, - "name": "Ensemble Name", - "short_name": "enn", - "uid": "1fa7cc01-afac-4c8c-86d5-829662939661", - "default": "Untitled Ensemble", - "required": true, - "multi": false, - "parameter_type": "text", - "variable": "ens_name", - "parameter": true, - "order": 0, + }, + { + "pk": 86, + "model": "workflows.abstractinput", + "fields": { + "widget": 34, + "name": "Ensemble Name", + "short_name": "enn", + "uid": "1fa7cc01-afac-4c8c-86d5-829662939661", + "default": "Untitled Ensemble", + "required": true, + "multi": false, + "parameter_type": "text", + "variable": "ens_name", + "parameter": true, + "order": 0, "description": "" } - }, + }, { - "pk": 13, - "model": "workflows.abstractoutput", + "pk": 38, + "model": "workflows.abstractoutput", "fields": { - "widget": 13, - "name": "Ensembled Indices", - "short_name": "eni", - "variable": "ens_out", - "uid": "63322ca1-401b-445e-bfef-c322121e018d", - "order": 1, + "widget": 34, + "name": "Ensembled Indices", + "short_name": "eni", + "variable": "ens_out", + "uid": "63322ca1-401b-445e-bfef-c322121e018d", + "order": 1, "description": "" } - }, - { - "pk": 17, - "model": "workflows.abstractwidget", - "fields": { - "category": 3, - "treeview_image": "treeview/glass_1.png", - "name": "Object viewer", - "is_streaming": false, - "uid": "9c7ff1bd-6026-4ebd-b44f-ed889a8b0fad", - "interaction_view": "", - "image": "images/glass_3.png", - "package": "base", - "static_image": "", - "post_interact_action": "", - "user": null, - "visualization_view": "object_viewer", - "action": "object_viewer", - "wsdl_method": "", - "wsdl": "", - "interactive": false, - "has_progress_bar": false, - "order": 5, + }, + { + "pk": 35, + "model": "workflows.abstractwidget", + "fields": { + "category": 7, + "treeview_image": "treeview/glass_1.png", + "name": "Object viewer", + "is_streaming": false, + "uid": "9c7ff1bd-6026-4ebd-b44f-ed889a8b0fad", + "interaction_view": "", + "image": "images/glass_3.png", + "package": "base", + "static_image": "", + "post_interact_action": "", + "user": null, + "visualization_view": "object_viewer", + "action": "object_viewer", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 5, "description": "Displays any input." } - }, - { - "pk": 11, - "model": "workflows.abstractinput", - "fields": { - "widget": 17, - "name": "Object", - "short_name": "obj", - "uid": "175cd7ae-46dd-471e-9aa2-29105cb077eb", - "default": "", - "required": true, - "multi": false, - "parameter_type": null, - "variable": "object", - "parameter": false, - "order": 0, + }, + { + "pk": 87, + "model": "workflows.abstractinput", + "fields": { + "widget": 35, + "name": "Object", + "short_name": "obj", + "uid": "175cd7ae-46dd-471e-9aa2-29105cb077eb", + "default": "", + "required": true, + "multi": false, + "parameter_type": null, + "variable": "object", + "parameter": false, + "order": 0, "description": "Any type of object." } - }, - { - "pk": 18, - "model": "workflows.abstractwidget", - "fields": { - "category": 3, - "treeview_image": "treeview/pickle.png", - "name": "Pickle object", - "is_streaming": false, - "uid": "1f6bd626-331e-4ac3-b55f-cec8fcd8fd9e", - "interaction_view": "", - "image": "images/pickle.png", - "package": "base", - "static_image": "", - "post_interact_action": "", - "user": null, - "visualization_view": "", - "action": "pickle_object", - "wsdl_method": "", - "wsdl": "", - "interactive": false, - "has_progress_bar": false, - "order": 6, + }, + { + "pk": 36, + "model": "workflows.abstractwidget", + "fields": { + "category": 7, + "treeview_image": "treeview/pickle.png", + "name": "Pickle object", + "is_streaming": false, + "uid": "1f6bd626-331e-4ac3-b55f-cec8fcd8fd9e", + "interaction_view": "", + "image": "images/pickle.png", + "package": "base", + "static_image": "", + "post_interact_action": "", + "user": null, + "visualization_view": "", + "action": "pickle_object", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 6, "description": "" } - }, - { - "pk": 13, - "model": "workflows.abstractinput", - "fields": { - "widget": 18, - "name": "object", - "short_name": "obj", - "uid": "3a319eaf-52fb-4cd0-b1b6-78d8a807b892", - "default": "", - "required": true, - "multi": false, - "parameter_type": null, - "variable": "object", - "parameter": false, - "order": 0, + }, + { + "pk": 88, + "model": "workflows.abstractinput", + "fields": { + "widget": 36, + "name": "object", + "short_name": "obj", + "uid": "3a319eaf-52fb-4cd0-b1b6-78d8a807b892", + "default": "", + "required": true, + "multi": false, + "parameter_type": null, + "variable": "object", + "parameter": false, + "order": 0, "description": "" } - }, + }, { - "pk": 16, - "model": "workflows.abstractoutput", + "pk": 39, + "model": "workflows.abstractoutput", "fields": { - "widget": 18, - "name": "pickled object", - "short_name": "obj", - "variable": "pickled_object", - "uid": "1c47c99e-5ba3-4bcf-8f67-56ebb8c97374", - "order": 1, + "widget": 36, + "name": "pickled object", + "short_name": "obj", + "variable": "pickled_object", + "uid": "1c47c99e-5ba3-4bcf-8f67-56ebb8c97374", + "order": 1, "description": "" } - }, - { - "pk": 20, - "model": "workflows.abstractwidget", - "fields": { - "category": 3, - "treeview_image": "treeview/pickle_1.png", - "name": "Unpickle object", - "is_streaming": false, - "uid": "58975866-d556-49ff-84bf-70e31af4638c", - "interaction_view": "", - "image": "images/pickle_1.png", - "package": "base", - "static_image": "", - "post_interact_action": "", - "user": null, - "visualization_view": "", - "action": "unpickle_object", - "wsdl_method": "", - "wsdl": "", - "interactive": false, - "has_progress_bar": false, - "order": 7, + }, + { + "pk": 37, + "model": "workflows.abstractwidget", + "fields": { + "category": 7, + "treeview_image": "treeview/pickle_1.png", + "name": "Unpickle object", + "is_streaming": false, + "uid": "58975866-d556-49ff-84bf-70e31af4638c", + "interaction_view": "", + "image": "images/pickle_1.png", + "package": "base", + "static_image": "", + "post_interact_action": "", + "user": null, + "visualization_view": "", + "action": "unpickle_object", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 7, "description": "" } - }, - { - "pk": 14, - "model": "workflows.abstractinput", - "fields": { - "widget": 20, - "name": "pickled object", - "short_name": "obj", - "uid": "1d6e265c-22e1-4761-a9a2-5615f00528b0", - "default": "", - "required": true, - "multi": false, - "parameter_type": null, - "variable": "pickled_object", - "parameter": false, - "order": 0, + }, + { + "pk": 89, + "model": "workflows.abstractinput", + "fields": { + "widget": 37, + "name": "pickled object", + "short_name": "obj", + "uid": "1d6e265c-22e1-4761-a9a2-5615f00528b0", + "default": "", + "required": true, + "multi": false, + "parameter_type": null, + "variable": "pickled_object", + "parameter": false, + "order": 0, "description": "" } - }, + }, { - "pk": 17, - "model": "workflows.abstractoutput", + "pk": 40, + "model": "workflows.abstractoutput", "fields": { - "widget": 20, - "name": "object", - "short_name": "obj", - "variable": "object", - "uid": "350a6971-8803-4add-8c0d-b6f957358d45", - "order": 1, + "widget": 37, + "name": "object", + "short_name": "obj", + "variable": "object", + "uid": "350a6971-8803-4add-8c0d-b6f957358d45", + "order": 1, "description": "" } - }, + }, { - "pk": 4, - "model": "workflows.category", + "pk": 8, + "model": "workflows.category", "fields": { - "uid": "11865b03-cb06-4d50-9174-69d920cbe4ec", - "parent": null, - "workflow": null, - "user": null, - "order": 1, + "uid": "11865b03-cb06-4d50-9174-69d920cbe4ec", + "parent": null, + "workflow": null, + "user": null, + "order": 1, "name": "Strings" } - }, - { - "pk": 2, - "model": "workflows.abstractwidget", - "fields": { - "category": 4, - "treeview_image": "treeview/concatenate_string.png", - "name": "Concatenate Strings", - "is_streaming": false, - "uid": "28c00f15-c6e1-45b7-9825-14fdc9819584", - "interaction_view": "", - "image": "images/concatenate_string.png", - "package": "base", - "static_image": "", - "post_interact_action": "", - "user": null, - "visualization_view": "", - "action": "concatenate_strings", - "wsdl_method": "", - "wsdl": "", - "interactive": false, - "has_progress_bar": false, - "order": 1, + }, + { + "pk": 38, + "model": "workflows.abstractwidget", + "fields": { + "category": 8, + "treeview_image": "treeview/concatenate_string.png", + "name": "Concatenate Strings", + "is_streaming": false, + "uid": "28c00f15-c6e1-45b7-9825-14fdc9819584", + "interaction_view": "", + "image": "images/concatenate_string.png", + "package": "base", + "static_image": "", + "post_interact_action": "", + "user": null, + "visualization_view": "", + "action": "concatenate_strings", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 1, "description": "" } - }, - { - "pk": 17, - "model": "workflows.abstractinput", - "fields": { - "widget": 2, - "name": "Strings", - "short_name": "str", - "uid": "909c8b13-c318-4fab-b9d5-a6c6b08a25b3", - "default": "", - "required": false, - "multi": true, - "parameter_type": null, - "variable": "strings", - "parameter": false, - "order": 0, + }, + { + "pk": 90, + "model": "workflows.abstractinput", + "fields": { + "widget": 38, + "name": "Strings", + "short_name": "str", + "uid": "909c8b13-c318-4fab-b9d5-a6c6b08a25b3", + "default": "", + "required": false, + "multi": true, + "parameter_type": null, + "variable": "strings", + "parameter": false, + "order": 0, "description": "" } - }, - { - "pk": 18, - "model": "workflows.abstractinput", - "fields": { - "widget": 2, - "name": "Delimiter", - "short_name": "del", - "uid": "8862cfd6-6444-46b7-9b16-456f27b58cb4", - "default": "", - "required": false, - "multi": false, - "parameter_type": "text", - "variable": "delimiter", - "parameter": true, - "order": 0, + }, + { + "pk": 91, + "model": "workflows.abstractinput", + "fields": { + "widget": 38, + "name": "Delimiter", + "short_name": "del", + "uid": "8862cfd6-6444-46b7-9b16-456f27b58cb4", + "default": "", + "required": false, + "multi": false, + "parameter_type": "text", + "variable": "delimiter", + "parameter": true, + "order": 0, "description": "The delimiter will be inserted in between the strings" } - }, + }, { - "pk": 10, - "model": "workflows.abstractoutput", + "pk": 41, + "model": "workflows.abstractoutput", "fields": { - "widget": 2, - "name": "String", - "short_name": "str", - "variable": "string", - "uid": "95c60fd3-1e53-4de7-84e0-8c0fa56d9e8a", - "order": 1, + "widget": 38, + "name": "String", + "short_name": "str", + "variable": "string", + "uid": "95c60fd3-1e53-4de7-84e0-8c0fa56d9e8a", + "order": 1, "description": "" } - }, - { - "pk": 7, - "model": "workflows.abstractwidget", - "fields": { - "category": 4, - "treeview_image": "treeview/string.png", - "name": "Create String", - "is_streaming": false, - "uid": "1b38bbab-7f89-4469-94cd-2f481f9c61f7", - "interaction_view": "", - "image": "images/string.png", - "package": "base", - "static_image": "", - "post_interact_action": "", - "user": null, - "visualization_view": "", - "action": "create_string", - "wsdl_method": "", - "wsdl": "", - "interactive": false, - "has_progress_bar": false, - "order": 2, + }, + { + "pk": 39, + "model": "workflows.abstractwidget", + "fields": { + "category": 8, + "treeview_image": "treeview/string.png", + "name": "Create String", + "is_streaming": false, + "uid": "1b38bbab-7f89-4469-94cd-2f481f9c61f7", + "interaction_view": "", + "image": "images/string.png", + "package": "base", + "static_image": "", + "post_interact_action": "", + "user": null, + "visualization_view": "", + "action": "create_string", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 2, "description": "Creates a string" } - }, - { - "pk": 16, - "model": "workflows.abstractinput", - "fields": { - "widget": 7, - "name": "String", - "short_name": "str", - "uid": "0f6fa6df-b8d1-4e12-a9ac-427233bb090a", - "default": "", - "required": false, - "multi": false, - "parameter_type": "textarea", - "variable": "string", - "parameter": true, - "order": 0, + }, + { + "pk": 92, + "model": "workflows.abstractinput", + "fields": { + "widget": 39, + "name": "String", + "short_name": "str", + "uid": "0f6fa6df-b8d1-4e12-a9ac-427233bb090a", + "default": "", + "required": false, + "multi": false, + "parameter_type": "textarea", + "variable": "string", + "parameter": true, + "order": 0, "description": "" } - }, + }, { - "pk": 9, - "model": "workflows.abstractoutput", + "pk": 42, + "model": "workflows.abstractoutput", "fields": { - "widget": 7, - "name": "String", - "short_name": "str", - "variable": "string", - "uid": "c5b4663c-778a-4573-a0e1-bf321831f75d", - "order": 1, + "widget": 39, + "name": "String", + "short_name": "str", + "variable": "string", + "uid": "c5b4663c-778a-4573-a0e1-bf321831f75d", + "order": 1, "description": "" } - }, - { - "pk": 11, - "model": "workflows.abstractwidget", - "fields": { - "category": 4, - "treeview_image": "treeview/display_string.png", - "name": "Display String", - "is_streaming": false, - "uid": "2c69b211-05fe-49a2-8de1-d38971e75b95", - "interaction_view": "", - "image": "images/display_string.png", - "package": "base", - "static_image": "", - "post_interact_action": "", - "user": null, - "visualization_view": "display_string", - "action": "display_string", - "wsdl_method": "", - "wsdl": "", - "interactive": false, - "has_progress_bar": false, - "order": 3, + }, + { + "pk": 40, + "model": "workflows.abstractwidget", + "fields": { + "category": 8, + "treeview_image": "treeview/display_string.png", + "name": "Display String", + "is_streaming": false, + "uid": "2c69b211-05fe-49a2-8de1-d38971e75b95", + "interaction_view": "", + "image": "images/display_string.png", + "package": "base", + "static_image": "", + "post_interact_action": "", + "user": null, + "visualization_view": "display_string", + "action": "display_string", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 3, "description": "" } - }, - { - "pk": 19, - "model": "workflows.abstractinput", - "fields": { - "widget": 11, - "name": "String", - "short_name": "str", - "uid": "611e005a-e495-4572-9983-337e98ee5898", - "default": "", - "required": false, - "multi": false, - "parameter_type": null, - "variable": "string", - "parameter": false, - "order": 0, + }, + { + "pk": 93, + "model": "workflows.abstractinput", + "fields": { + "widget": 40, + "name": "String", + "short_name": "str", + "uid": "611e005a-e495-4572-9983-337e98ee5898", + "default": "", + "required": false, + "multi": false, + "parameter_type": null, + "variable": "string", + "parameter": false, + "order": 0, "description": "Displays the string so that it is readable." } } diff --git a/workflows/base/library.py b/workflows/base/library.py new file mode 100644 index 0000000000000000000000000000000000000000..122d44d6c8af31e3e78adbde950e0d780a880ca2 --- /dev/null +++ b/workflows/base/library.py @@ -0,0 +1,8 @@ +def base_concatenate_lists(input_dict): + lists = input_dict['lists'] + new_list = [] + for every_list in lists: + new_list = new_list+every_list + output_dict = {} + output_dict['list']=new_list + return output_dict \ No newline at end of file diff --git a/workflows/bioinformatics/db/package_data.json b/workflows/bioinformatics/db/package_data.json new file mode 100644 index 0000000000000000000000000000000000000000..01bc70abae62b7b63ba4a14bed2e7634b81df7d5 --- /dev/null +++ b/workflows/bioinformatics/db/package_data.json @@ -0,0 +1 @@ +[\n] \ No newline at end of file diff --git a/workflows/bioinformatics/interaction_views.py b/workflows/bioinformatics/interaction_views.py index 4b000b69df35a3dc746f44d64cc35134dc630881..19d7bfc28374d8df5e7f89e678f424ff0d0a2eda 100644 --- a/workflows/bioinformatics/interaction_views.py +++ b/workflows/bioinformatics/interaction_views.py @@ -5,6 +5,7 @@ Bioinformatics interaction viewes. ''' from django.shortcuts import render -def insilico_search(request, input_dict, output_dict, widget): - #TODOl - return render(request, 'interactions/insilico_search.html', {'widget':widget}) +def feature_selection(request, input_dict, output_dict, widget): + #TODO + return render(request, 'interactions/feature_selection.html', {'widget':widget}) + diff --git a/workflows/bioinformatics/library.py b/workflows/bioinformatics/library.py index 5e4c74d1f6bdf5f187613e446d4bf046a813fe67..dea163cdaeaa1b7c6c72441368cb2d07ea1d9295 100644 --- a/workflows/bioinformatics/library.py +++ b/workflows/bioinformatics/library.py @@ -3,9 +3,25 @@ Bioinformatics library. @author: Anze Vavpetic ''' -def insilico_search(input_dict): +# +# Visualization widgets: +# +def rank_plotter(input_dict): return input_dict +def segs_rule_browser(input_dict): + return input_dict + +# +# Interactions widgets: +# +def feature_selection_finished(postdata, input_dict, output_dict): + # TODO + return output_dict -def insilico_finished(input_dict): - #TODO - return input_dict \ No newline at end of file +# +# Regular widgets: +# +def segs(input_dict): + # TODO + output_dict = {} + return output_dict \ No newline at end of file diff --git a/workflows/bioinformatics/orngBioinformatics/__init__.py b/workflows/bioinformatics/orngBioinformatics/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/workflows/bioinformatics/orngBioinformatics/obiExpression.py b/workflows/bioinformatics/orngBioinformatics/obiExpression.py new file mode 100644 index 0000000000000000000000000000000000000000..6d3c3163277328b6e90dd6fe3bd20df48ddfd6d0 --- /dev/null +++ b/workflows/bioinformatics/orngBioinformatics/obiExpression.py @@ -0,0 +1,865 @@ +from __future__ import absolute_import + +import numpy + +import orange, statc + +from . import stats + +def mean(l): + return float(sum(l))/len(l) + +class MA_pearsonCorrelation: + """ + Calling an object of this class computes Pearson correlation of all + attributes against class. + """ + def __call__(self, i, data): + dom2 = orange.Domain([data.domain.attributes[i]], data.domain.classVar) + data2 = orange.ExampleTable(dom2, data) + a,c = data2.toNumpy("A/C") + return numpy.corrcoef(c,a[:,0])[0,1] + +class MA_signalToNoise: + """ + Returns signal to noise measurement: difference of means of two classes + divided by the sum of standard deviations for both classes. + + Usege similar to MeasureAttribute*. + + Standard deviation used for now returns minmally 0.2*|mi|, where mi=0 is adjusted to mi=1 + (as in gsea implementation). + + Can work only on data with two classes. If there are multiple class, then + relevant class values can be specified on object initialization. + By default the relevant classes are first and second class value + from the domain. + """ + + def __init__(self, a=None, b=None): + """ + a and b are choosen class values. + """ + self.a = a + self.b = b + + def __call__(self, i, data): + cv = data.domain.classVar + #print data.domain + + if self.a == None: self.a = cv.values[0] + if self.b == None: self.b = cv.values[1] + + def stdev(l): + return statc.std(l) + + def mean(l): + return statc.mean(l) + + def stdevm(l): + m = mean(l) + std = stdev(l) + #return minmally 0.2*|mi|, where mi=0 is adjusted to mi=1 + return max(std, 0.2*abs(1.0 if m == 0 else m)) + + def avWCVal(value): + return [ex[i].value for ex in data if ex[-1].value == value and not ex[i].isSpecial() ] + + exa = avWCVal(self.a) + exb = avWCVal(self.b) + + try: + rval = (mean(exa)-mean(exb))/(stdevm(exa)+stdevm(exb)) + return rval + except: + #return some "middle" value - + #TODO rather throw exception? + return 0 + +class MA_t_test(object): + def __init__(self, a=None, b=None, prob=False): + self.a = a + self.b = b + self.prob = prob + def __call__(self, i, data): + cv = data.domain.classVar + #print data.domain + + #for faster computation. to save dragging many attributes along + dom2 = orange.Domain([data.domain[i]], data.domain.classVar) + data = orange.ExampleTable(dom2, data) + i = 0 + + if self.a == None: self.a = cv.values[0] + if self.b == None: self.b = cv.values[1] + + def avWCVal(value): + return [ex[i].value for ex in data if ex[cv] == value and not ex[i].isSpecial() ] + + exa = avWCVal(self.a) + exb = avWCVal(self.b) + + try: + t, prob = stats.lttest_ind(exa, exb) + return prob if self.prob else t + except: + return 1.0 if self.prob else 0.0 + +class MA_fold_change(object): + def __init__(self, a=None, b=None): + self.a = a + self.b = b + def __call__(self, i, data): + cv = data.domain.classVar + #print data.domain + + #for faster computation. to save dragging many attributes along + dom2 = orange.Domain([data.domain[i]], data.domain.classVar) + data = orange.ExampleTable(dom2, data) + i = 0 + + if self.a == None: self.a = cv.values[0] + if self.b == None: self.b = cv.values[1] + + def avWCVal(value): + return [ex[i].value for ex in data if ex[cv] == value and not ex[i].isSpecial() ] + + exa = avWCVal(self.a) + exb = avWCVal(self.b) + + try: + return mean(exa)/mean(exb) + except: + return 1 + +class MA_anova(object): + def __init__(self, prob=False): + self.prob = prob + def __call__(self, i, data): + cv = data.domain.classVar + #print data.domain + + #for faster computation. to save dragging many attributes along + dom2 = orange.Domain([data.domain[i]], data.domain.classVar) + data = orange.ExampleTable(dom2, data) + i = 0 + + def avWCVal(value): + return [ex[i].value for ex in data if ex[cv] == value and not ex[i].isSpecial() ] + + data = [avWCVal(val) for val in cv.values] + + try: + f, prob = stats.lF_oneway(*tuple(data)) + return prob if self.prob else f + except: + return 1.0 if self.prob else 0.0 + +import numpy as np +import numpy.ma as ma + +class ExpressionSignificance_Test(object): + def __new__(cls, data, useAttributeLabels, **kwargs): + self = object.__new__(cls) + if kwargs: + self.__init__(data, useAttributeLabels) + return self.__call__(**kwargs) + else: + return self + + def __init__(self, data, useAttributeLabels=False): + self.data = data + self.useAttributeLabels = useAttributeLabels + self.attr_labels, self.data_classes = self._data_info(data) + self.attributes = [attr for attr in self.data.domain.attributes if attr.varType in [orange.VarTypes.Continuous, orange.VarTypes.Discrete]] + self.classes = np.array(self.attr_labels if useAttributeLabels else self.data_classes) + self.keys = range(len(data)) if useAttributeLabels else self.attributes + self.array, _, _ = data.toNumpyMA() + if self.useAttributeLabels: + self.array = ma.transpose(self.array) +# self.dim = 1 if useAttributeLabels else 0 + self.dim = 0 + + def _data_info(self, data): + return [set(attr.attributes.items()) for attr in data.domain.attributes], [ex.getclass() for ex in data] if data.domain.classVar else [None]*len(data) + + def test_indices(self, target, classes=None): + classes = self.classes if classes is None else classes + + def target_set(target): + if isinstance(target, tuple): + return set([target]) + else: + assert(isinstance(target, set)) + return target + + if self.useAttributeLabels: + if isinstance(target, list): + ind = [[i for i, cl in enumerate(self.classes) if target_set(t).intersection(cl)] for t in target] + else: + target = target_set(target) + + ind1 = [i for i, cl in enumerate(self.classes) if target.intersection(cl)] + ind2 = [i for i, cl in enumerate(self.classes) if not target.intersection(cl)] + ind = [ind1, ind2] + else: + if isinstance(target, list): + ind = [ma.nonzero(self.classes == t)[0] for t in target] + else: + if isinstance(target, (basestring, orange.Variable)): + target = set([target]) + else: + assert(isinstance(target, set)) + target = list(target) + ind1 = [i for i, cl in enumerate(self.classes) if cl in target] + ind2 = [i for i, cl in enumerate(self.classes) if cl not in target] + ind = [ind1, ind2] + + return ind + + def __call__(self, target): + raise NotImplementedError() + + def null_distribution(self, num, *args, **kwargs): + kwargs = dict(kwargs) + advance = lambda: None + if "advance" in kwargs: + advance = kwargs["advance"] + del kwargs["advance"] + results = [] + originalClasses = self.classes.copy() + for i in range(num): + np.random.shuffle(self.classes) + results.append(self.__call__(*args, **kwargs)) + advance() + self.classes = originalClasses + return results + +class ExpressionSignificance_TTest(ExpressionSignificance_Test): + def __call__(self, target): + ind1, ind2 = self.test_indices(target) + t, pval = attest_ind(self.array[ind1, :], self.array[ind2, :], dim=self.dim) + return zip(self.keys, zip(t, pval)) + +class ExpressionSignificance_FoldChange(ExpressionSignificance_Test): + def __call__(self, target): + ind1, ind2 = self.test_indices(target) + a1, a2 = self.array[ind1, :], self.array[ind2, :] + fold = ma.mean(a1, self.dim)/ma.mean(a2, self.dim) + return zip(self.keys, fold) + +class ExpressionSignificance_SignalToNoise(ExpressionSignificance_Test): + def __call__(self, target): + ind1, ind2 = self.test_indices(target) + a1, a2 = self.array[ind1, :], self.array[ind2, :] + stn = (ma.mean(a1, self.dim) - ma.mean(a2, self.dim)) / (ma.sqrt(ma.var(a1, self.dim)) + ma.sqrt(ma.var(a2, self.dim))) + return zip(self.keys, stn) + +class ExpressionSignificance_ANOVA(ExpressionSignificance_Test): + def __call__(self, target=None): + if target is not None: + indices = self.test_indices(target) + else: + indices = [] + f, prob = aF_oneway(*[self.array[ind, :] for ind in indices], **dict(dim=0)) + return zip(self.keys, zip(f, prob)) + +class ExpressionSignificance_ChiSquare(ExpressionSignificance_Test): + def __call__(self, target): + array = equi_n_discretization(self.array.copy(), intervals=5, dim=0) + ind1, ind2 = self.test_indices(target) + a1, a2 = array[ind1, :], array[ind2, :] + dist1, dist2 = [], [] + dist = ma.zeros((array.shape[1], 2, 5)) + for i in range(5): + dist1.append(ma.sum(ma.ones(a1.shape) * (a1 == i), 0)) + dist2.append(ma.sum(ma.ones(a2.shape) * (a2 == i), 0)) + dist[:, 0, i] = dist1[-1] + dist[:, 1, i] = dist2[-1] + return zip(self.keys, achisquare_indtest(np.array(dist), dim=1)) + +class ExpressionSignificance_Info(ExpressionSignificance_Test): + def __call__(self, target): + array = equi_n_discretization(self.array.copy(), intervals=5, dim=1) + + ind1, ind2 = self.test_indices(target) + a1, a2 = array[ind1, :], array[ind2, :] + dist1, dist2 = [], [] + dist = ma.zeros((array.shape[1], 2, 5)) + for i in range(5): + dist1.append(ma.sum(ma.ones(a1.shape) * (a1 == i), 0)) + dist2.append(ma.sum(ma.ones(a2.shape) * (a2 == i), 0)) + dist[:, 0, i] = dist1[-1] + dist[:, 1, i] = dist2[-1] + classinfo = entropy(np.array([len(ind1), len(ind2)])) + E = ma.sum(entropy(dist, dim=1) * ma.sum(dist, 1), 1) / ma.sum(ma.sum(dist, 1), 1) + return zip(self.keys, classinfo - E) + +class ExpressionSignificance_MannWhitneyu(ExpressionSignificance_Test): + def __call__(self, target): + ind1, ind2 = self.test_indices(target) + a, b = self.array[ind1, :], self.array[ind2, :] +# results = [amannwhitneyu(a[:, i],b[:, i]) for i in range(a.shape[1])] + results = [statc.mannwhitneyu(list(a[:, i]),list(b[:, i])) for i in range(a.shape[1])] + + return zip(self.keys, results) + +def attest_ind(a, b, dim=None): + """ Return the t-test statistics on arrays a and b over the dim axis. + Returns both the t statistic as well as the p-value + """ +# dim = a.ndim - 1 if dim is None else dim + x1, x2 = ma.mean(a, dim), ma.mean(b, dim) + v1, v2 = ma.var(a, dim), ma.var(b, dim) + n1, n2 = (a.shape[dim], b.shape[dim]) if dim is not None else (a.size, b.size) + df = float(n1+n2-2) + svar = ((n1-1)*v1+(n2-1)*v2) / df + t = (x1-x2)/ma.sqrt(svar*(1.0/n1 + 1.0/n2)) + if t.ndim == 0: + return (t, statc.betai(0.5*df,0.5,df/(df+t**2)) if t is not ma.masked and df/(df+t**2) <= 1.0 else ma.masked) + else: + prob = [statc.betai(0.5*df,0.5,df/(df+tsq)) if tsq is not ma.masked and df/(df+tsq) <= 1.0 else ma.masked for tsq in t*t] + return t, prob + +def aF_oneway(*args, **kwargs): + dim = kwargs.get("dim", None) + arrays = args + means = [ma.mean(a, dim) for a in arrays] + vars = [ma.var(a, dim) for a in arrays] + lens = [ma.sum(ma.array(ma.ones(a.shape), mask=ma.asarray(a).mask), dim) for a in arrays] + alldata = ma.concatenate(arrays, dim if dim is not None else 0) + bign = ma.sum(ma.array(ma.ones(alldata.shape), mask=alldata.mask), dim) + sstot = ma.sum(alldata ** 2, dim) - (ma.sum(alldata, dim) ** 2) / bign + ssbn = ma.sum([(ma.sum(a, dim) ** 2) / L for a, L in zip(arrays, lens)], dim) +# print ma.sum(alldata, dim) ** 2 / bign, ssbn + ssbn -= ma.sum(alldata, dim) ** 2 / bign + sswn = sstot - ssbn + dfbn = dfnum = float(len(args) - 1.0) + dfwn = bign - len(args) # + 1.0 + F = (ssbn / dfbn) / (sswn / dfwn) + if F.ndim == 0 and dfwn.ndim == 0: + return (F,statc.betai(0.5 * dfwn, 0.5 * dfnum, dfwn/float(dfwn+dfnum*F)) if F is not ma.masked and dfwn/float(dfwn+dfnum*F) <= 1.0 \ + and dfwn/float(dfwn+dfnum*F) >= 0.0 else ma.masked) + else: + prob = [statc.betai(0.5 * dfden, 0.5 * dfnum, dfden/float(dfden+dfnum*f)) if f is not ma.masked and dfden/float(dfden+dfnum*f) <= 1.0 \ + and dfden/float(dfden+dfnum*f) >= 0.0 else ma.masked for dfden, f in zip (dfwn, F)] + return F, prob + +def achisquare_indtest(observed, dim=None): + if observed.ndim == 2: + observed = ma.array([observed]) + if dim is not None: + dim += 1 + if dim is None: + dim = observed.ndim - 2 + rowtotal = ma.sum(observed, dim + 1) + coltotal = ma.sum(observed, dim) + total = ma.sum(rowtotal, dim) + ones = ma.array(ma.ones(observed.shape)) + expected = ones * rowtotal.reshape(rowtotal.shape[:dim] + (-1, 1)) + a = ones * coltotal[..., np.zeros(observed.shape[dim], dtype=int),:] + expected = expected * (a) / total.reshape((-1, 1, 1)) + chisq = ma.sum(ma.sum((observed - expected) ** 2 / expected, dim + 1), dim) + return chisq + +def equi_n_discretization(array, intervals=5, dim=1): + count = ma.sum(ma.array(ma.ones(array.shape, dtype=int), mask=array.mask), dim) + cut = ma.zeros(len(count), dtype=int) + sarray = ma.sort(array, dim) + r = count % intervals + pointsshape = list(array.shape) + pointsshape[dim] = 1 + points = [] + for i in range(intervals): + cutend = cut + count / intervals + numpy.ones(len(r)) * (r > i) + if dim == 1: + p = sarray[range(len(cutend)), numpy.array(cutend, dtype=int) -1] + else: + p = sarray[numpy.array(cutend, dtype=int) -1, range(len(cutend))] + points.append(p.reshape(pointsshape)) + cut = cutend + darray = ma.array(ma.zeros(array.shape) - 1, mask=array.mask) + darray[ma.nonzero(array <= points[0])] = 0 + for i in range(0, intervals): + darray[ma.nonzero((array > points[i]))] = i + 1 + return darray + +def entropy(array, dim=None): + if dim is None: + array = array.ravel() + dim = 0 + n = ma.sum(array, dim) + array = ma.log(array) * array + sum = ma.sum(array, dim) + return (ma.log(n) - sum / n) / ma.log(2.0) + +"""\ +MA - Plot +========= + +Functions for normalization of expression arrays and ploting +MA - Plots + +Example:: + ## Load data from GEO + >>> data = orange.ExampleTable("GDS1210.tab") + ## Split data by columns into normal and cancer subsets + >>> cancer, normal = data_split(data, [("disease state", "cancer"), ("disease state", "normal")]) + ## Convert to numpy MaskedArrays + >>> cancer, normal = cancer.toNumpyMA("A")[0], normal.toNumpyMA("A")[0] + ## Merge by averaging + >>> cancer = merge_replicates(cancer) + >>> normal = merge_replicates(normal) + ## Plot MA-plot + >>> MA_plot(cancer, normal) + +""" + +from Orange.orng import orngMisc +from numpy import median +def lowess(x, y, f=2./3., iter=3, progressCallback=None): + """ Lowess taken from Bio.Statistics.lowess, modified to compute pairwise + distances inplace. + + lowess(x, y, f=2./3., iter=3) -> yest + + Lowess smoother: Robust locally weighted regression. + The lowess function fits a nonparametric regression curve to a scatterplot. + The arrays x and y contain an equal number of elements; each pair + (x[i], y[i]) defines a data point in the scatterplot. The function returns + the estimated (smooth) values of y. + + The smoothing span is given by f. A larger value for f will result in a + smoother curve. The number of robustifying iterations is given by iter. The + function will run faster with a smaller number of iterations. + + x and y should be numpy float arrays of equal length. The return value is + also a numpy float array of that length. + + e.g. + >>> import numpy + >>> x = numpy.array([4, 4, 7, 7, 8, 9, 10, 10, 10, 11, 11, 12, 12, 12, + ... 12, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 16, 16, + ... 17, 17, 17, 18, 18, 18, 18, 19, 19, 19, 20, 20, 20, 20, + ... 20, 22, 23, 24, 24, 24, 24, 25], numpy.float) + >>> y = numpy.array([2, 10, 4, 22, 16, 10, 18, 26, 34, 17, 28, 14, 20, 24, + ... 28, 26, 34, 34, 46, 26, 36, 60, 80, 20, 26, 54, 32, 40, + ... 28, 26, 34, 34, 46, 26, 36, 60, 80, 20, 26, 54, 32, 40, + ... 32, 40, 50, 42, 56, 76, 84, 36, 46, 68, 32, 48, 52, 56, + ... 64, 66, 54, 70, 92, 93, 120, 85], numpy.float) + >>> result = lowess(x, y) + >>> len(result) + 50 + >>> print "[%0.2f, ..., %0.2f]" % (result[0], result[-1]) + [4.85, ..., 84.98] + """ + n = len(x) + r = min(int(numpy.ceil(f*n)), n - 1) + +# h = [numpy.sort(numpy.abs(x-x[i]))[r] for i in range(n)] +# h, xtmp = numpy.zeros_like(x), numpy.zeros_like(x) +# for i in range(n): +# xtmp = numpy.abs(x - x[i], xtmp) +# h[i] = numpy.sort(xtmp)[r] +# w = numpy.clip(numpy.abs(([x]-numpy.transpose([x]))/h),0.0,1.0) + dist = [x] - numpy.transpose([x]) + dist = numpy.abs(dist, dist) + dist.sort(axis=1) + h = dist[:, r] + del dist + + w = [x]-numpy.transpose([x]) + w /= h + w = numpy.abs(w, w) + w = numpy.clip(w, 0.0, 1.0, w) +# w = 1-w*w*w + w **= 3 + w *= -1 + w += 1 +# w = w*w*w + w **= 3 + yest = numpy.zeros(n) + delta = numpy.ones(n) + milestones = orngMisc.progressBarMilestones(iter*n) + for iteration in range(iter): + for i in xrange(n): + weights = delta * w[:,i] + weights_mul_x = weights * x + b1 = numpy.ma.dot(weights,y) + b2 = numpy.ma.dot(weights_mul_x,y) + A11 = sum(weights) + A12 = sum(weights_mul_x) + A21 = A12 + A22 = numpy.ma.dot(weights_mul_x,x) + determinant = A11*A22 - A12*A21 + beta1 = (A22*b1-A12*b2) / determinant + beta2 = (A11*b2-A21*b1) / determinant + yest[i] = beta1 + beta2*x[i] + if progressCallback and (iteration*n + i) in milestones: + progressCallback((100. * iteration*n + i) / (iter * n)) + residuals = y-yest + s = median(abs(residuals)) + delta[:] = numpy.clip(residuals/(6*s),-1,1) + delta[:] = 1-delta*delta + delta[:] = delta*delta + return yest + + + +def lowess2(x, y, xest, f=2./3., iter=3, progressCallback=None): + """Returns estimated values of y in data points xest (or None if estimation fails). + Lowess smoother: Robust locally weighted regression. + The lowess function fits a nonparametric regression curve to a scatterplot. + The arrays x and y contain an equal number of elements; each pair + (x[i], y[i]) defines a data point in the scatterplot. The function returns + the estimated (smooth) values of y. + + The smoothing span is given by f. A larger value for f will result in a + smoother curve. The number of robustifying iterations is given by iter. The + function will run faster with a smaller number of iterations. + + Taken from Peter Juvan's numpyExtn.py, modified for numpy, computes pairwise + distances inplace + """ + x = numpy.asarray(x, 'f') + y = numpy.asarray(y, 'f') + xest = numpy.asarray(xest, 'f') + n = len(x) + nest = len(xest) + r = min(int(numpy.ceil(f*n)),n-1) # radius: num. of points to take into LR +# h = [numpy.sort(numpy.abs(x-x[i]))[r] for i in range(n)] # distance of the r-th point from x[i] + dist = [x] - numpy.transpose([x]) + dist = numpy.abs(dist, dist) + dist.sort(axis=1) + h = dist[:, r] + del dist # to free memory + w = [x] - numpy.transpose([x]) + w /= h + w = numpy.abs(w, w) + w = numpy.clip(w, 0.0, 1.0, w) +# w = numpy.clip(numpy.abs(([x]-numpy.transpose([x]))/h),0.0,1.0) + w **= 3 + w *= -1 + w += 1 +# w = 1 - w**3 #1-w*w*w + w **= 3 +# w = w**3 #w*w*w +# hest = [numpy.sort(numpy.abs(x-xest[i]))[r] for i in range(nest)] # r-th min. distance from xest[i] to x + dist = [x] - numpy.transpose([xest]) + dist = numpy.abs(dist, dist) + dist.sort(axis=1) + hest = dist[:, r] + del dist # to free memory +# west = numpy.clip(numpy.abs(([xest]-numpy.transpose([x]))/hest),0.0,1.0) # shape: (len(x), len(xest) + west = [xest]-numpy.transpose([x]) + west /= hest + west = numpy.abs(west, west) + west = numpy.clip(west, 0.0, 1.0, west) +# west = 1 - west**3 #1-west*west*west + west **= 3 + west *= -1 + west += 1 +# west = west**3 #west*west*west + west **= 3 + yest = numpy.zeros(n,'f') + yest2 = numpy.zeros(nest,'f') + delta = numpy.ones(n,'f') + iter_count = iter*(nest + n) if iter > 1 else nest + milestones = orngMisc.progressBarMilestones(iter_count) + curr_iter = 0 + for iteration in range(iter): + # fit xest + for i in range(nest): + weights = delta * west[:,i] + b = numpy.array([numpy.sum(weights*y), numpy.sum(weights*y*x)]) + A = numpy.array([[numpy.sum(weights), numpy.sum(weights*x)], [numpy.sum(weights*x), numpy.sum(weights*x*x)]]) + beta = numpy.linalg.solve(A, b) + yest2[i] = beta[0] + beta[1]*xest[i] + if progressCallback and curr_iter in milestones: + progressCallback(100. * curr_iter / iter_count) + curr_iter += 1 + + # fit x (to calculate residuals and delta) + if iter > 1: + for i in range(n): + weights = delta * w[:,i] + b = numpy.array([numpy.sum(weights*y), numpy.sum(weights*y*x)]) + A = numpy.array([[numpy.sum(weights), numpy.sum(weights*x)], [numpy.sum(weights*x), numpy.sum(weights*x*x)]]) + beta = numpy.linalg.solve(A,b) + yest[i] = beta[0] + beta[1]*x[i] + if progressCallback and curr_iter in milestones: + progressCallback(100. * curr_iter / iter_count) + curr_iter += 1 + residuals = y-yest + s = numpy.median(numpy.abs(residuals)) + delta = numpy.clip(residuals/(6*s), -1, 1) + delta = 1-delta*delta + delta = delta*delta + return yest2 + + +def attr_group_indices(data, label_groups): + """ Return a two or more lists of indices into `data.domain` based on `label_groups` + + Example:: + cancer_indices, no_cancer_indices = attr_group_indices(data, [("disease state", "cancer"), ("disease state", "normal")]) + """ + ret = [] + for key, val in label_groups: + ind = [i for i, attr in enumerate(data.domain.attributes) if attr.attributes.get(key, None) == val] + ret.append(ind) + return ret + + +def example_group_indices(data, attr, values): + """ Return lists of indices into `data` for each `values` item that matches + the example value at `attr` attribute + + Example:: + cls_ind1, cls_ind2 = example_group_indices(data, data.domain.classVar, ["Class 1", "Class 2"]) + """ + ret = [[] for _ in values] + values_id = dict([(str(value), i) for i, value in enumerate(values)]) + for i, ex in enumerate(data): + id = values_id.get(str(ex[attr]), None) + if id is not None: + ret[id].append(i) + return ret + + +def data_group_split(data, label_groups): + """ Split an `data` example table into two or more based on + contents of iterable `label_groups` containing (key, value) + pairs matching the labels of data attributes. + + Example:: + cancer, no_cancer = data_group_split(data, [("disease state", "cancer"), ("disease state", "normal")]) + """ + ret = [] + group_indices = attr_group_indices(data, label_groups) + for indices in group_indices: + attrs = [data.domain[i] for i in indices] + domain = orange.Domain(attrs, data.domain.classVar) + domain.addmetas(data.domain.getmetas()) + ret.append(orange.ExampleTable(domain, data)) + return ret + + +def select_indices(data, key, value, axis=1): + """ Return indices into `data` (ExampleTable) along specified `axis` + where: + - if axis == 0 match data[i][key] == value + - if axis == 1 match data.domain[i].attributes[key] == value + + Example:: + cancer_ind = select_indices(data, key="disease state", value="cancer"), axis=1) + normal_ind = select_indices(data, key="disease state", value=["normal"], axis=1) # value can be a list to specify more then one value + + """ + values = value if isinstance(value, list) else [value] + if axis == 0: + groups = example_group_indices(data, key, values) + else: + groups = attr_group_indices(data, [(key, val) for val in values]) + + return sorted(reduce(set.union, groups, set())) + + +def select_data(data, key, value, axis=1): + """ Return `data` (ExampleTable) subset along specified `axis` where + where: + - if axis == 0 match data[i][key] == value + - if axis == 1 match data.domain[i].attributes[key] == value + .. note:: This preserves all meta attributes of the domain + Example:: + cancer = select_data(data, "disease state", "cancer", axis=1) + normal = select_data(data, "disease state", ["normal"], axis=1) # value can be a list to specify more then one value + + """ + indices = select_indices(data, key, value, axis) + if axis == 0: + examples = [data[i] for i in indices] + return orange.ExampleTable(data.domain, examples) + else: + attrs = [data.domain[i] for i in indices] + domain = orange.Domain(attrs, False) + domain.addmetas(data.domain.getmetas()) + return orange.ExampleTable(domain, data) + + +def split_data(data, groups, axis=1): + """ Split data (ExampleTable) along specified axis, where elements of + `groups` match `key` and `value` arguments of the `select_data` + function + + Example:: + cancer, normal = split_data(data, [("disease state", "cancer"), ("disease state", ["normal"])], axis=1) + """ + res = [] + for key, value in groups: + res.append(select_data(data, key, value, axis)) + return res + + +def geometric_mean(array): + """ Return a geometric mean computed on a 1d masked array + """ + array = numpy.ma.asanyarray(array) + return numpy.power(reduce(lambda a,b: a*b, array.filled(1.), 1.0), 1./len(array)) + + +def harmonic_mean(array): + """ Return a harmonic mean computed ona a 1d masked array + """ + array = numpy.ma.asanyarray(array) + return len(array) / numpy.ma.sum(1. / array) + + +def merge_replicates(replicates, axis=0, merge_function=numpy.ma.average): + """ Merge `replicates` (numpy.array) along `axis` using `merge_function` + """ + return numpy.ma.apply_along_axis(merge_function, axis, replicates) + + +def ratio_intensity(G, R): + """ return the log2(R/G), log10(R*G) as a tuple + """ + log2Ratio = numpy.ma.log(R/G) / numpy.log(2) + log10Intensity = numpy.ma.log10(R*G) + return log2Ratio, log10Intensity + + +def MA_center_average(G, R): + """ return the G, R by centering the average log2 ratio + """ + center_est = numpy.ma.average(numpy.ma.log(R/G) / numpy.log(2)) + G = G * numpy.exp2(center_est) + return G, R.copy() + + +def MA_center_lowess(G, R, f=2./3., iter=1, progressCallback=None): + """ return the G, R by centering the average log2 ratio locally + depending on the intensity using lowess (locally weighted linear regression) + """ +# from Bio.Statistics.lowess import lowess + ratio, intensity = ratio_intensity(G, R) + valid = - (ratio.mask & intensity.mask) + valid_ind = numpy.ma.where(valid) + center_est = lowess(intensity[valid], ratio[valid], f=f, iter=iter, progressCallback=progressCallback) + Gc, R = G.copy(), R.copy() + Gc[valid] *= numpy.exp2(center_est) + Gc.mask, R.mask = -valid, -valid + return Gc, R + + +def MA_center_lowess_fast(G, R, f=2./3., iter=1, resolution=100, progressCallback=None): + """return the G, R by centering the average log2 ratio locally + depending on the intensity using lowess (locally weighted linear regression), + approximated only in a limited resolution. + """ + + ratio, intensity = ratio_intensity(G, R) + valid = - (ratio.mask & intensity.mask) + resoluiton = min(resolution, len(intensity[valid])) + hist, edges = numpy.histogram(intensity[valid], len(intensity[valid])/resolution) + + progressCallback2 = (lambda val: progressCallback(val/2)) if progressCallback else None + centered = lowess2(intensity[valid], ratio[valid], edges, f, iter, progressCallback=progressCallback2) + + progressCallback2 = (lambda val: progressCallback(50 + val/2)) if progressCallback else None + centered = lowess2(edges, centered, intensity[valid], f, iter, progressCallback=progressCallback2) + + Gc, R = G.copy(), R.copy() + Gc[valid] *= numpy.exp2(centered) + Gc.mask, R.mask = -valid, -valid + return Gc, R + + +def MA_plot(G, R, format="b."): + """ Plot G, R on a MA-plot using matplotlib + """ + import matplotlib.pyplot as plt + ratio, intensity = ratio_intensity(G, R) + plt.plot(intensity, ratio, format) + plt.ylabel('M = log2(R/G') + plt.xlabel('A = log10(R*G)') + + +def normalize_expression_data(data, groups, axis=1, merge_function=numpy.ma.average, center_function=MA_center_lowess_fast): + """ A helper function that normalizes expression array example table, by centering the MA plot. + + """ + if isinstance(data, orange.ExampleTable): + label_groups = [select_indices(data, key, value, axis) for key, value in groups] + array, _, _ = data.toNumpyMA() + + merged = [] + for indices in label_groups: + replicates = numpy.take(array, indices, axis=1) + merged.append(merge_replicates(replicates, axis=1, merge_function=merge_function)) + + ind1, ind2 = label_groups + G, R = merged + Gc, Rc = center_function(G, R) + + domain = orange.Domain(data.domain.attributes, data.domain.classVar) + domain.addmetas(data.domain.getmetas()) + data = orange.ExampleTable(domain, data) + + GFactors = Gc/G + + if axis == 0: + for i, gf in zip(ind1, GFactors): + for attr in range(len(data[i])): + if not data[i][attr].isSpecial(): + data[i][attr] = float(data[i][attr]) * gf + else: + for ex, gf in zip(data, GFactors): + for i in ind1: + if not ex[i].isSpecial(): + ex[i] = float(ex[i]) * gf + return data + + +def MA_zscore(G, R, window=1./5., padded=False, progressCallback=None): + """ Return the Z-score of log2 fold ratio estimated from local + distribution of log2 fold ratio values on the MA-plot + """ + ratio, intensity = ratio_intensity(G, R) + + z_scores = numpy.ma.zeros(G.shape) + sorted = list(numpy.ma.argsort(intensity)) + import math, random + r = int(math.ceil(len(sorted)*window)) # number of window elements + def local_indices(i, sorted): + """ local indices in sorted (mirror padded if out of bounds) + """ + start, end = i - r/2, i + r/2 + r%2 + pad_start , pad_end = [], [] + if start < 0: + pad_start = sorted[:abs(start)] + random.shuffle(pad_start) + start = 0 + if end > len(sorted): + pad_end = sorted[end - len(sorted):] + random.shuffle(pad_end) + end = len(sorted) + + if padded: + return pad_start + sorted[start: end] + pad_end + else: + return sorted[start:end] + + milestones = orngMisc.progressBarMilestones(len(sorted)) + for i in range(len(sorted)): + indices = local_indices(i, sorted) + localRatio = numpy.take(ratio, indices) + local_std = numpy.ma.std(localRatio) + ind = sorted[i] + z_scores[ind] = ratio[ind] / local_std + if progressCallback and i in milestones: + progressCallback(100. * i / len(sorted)) + + z_scores._mask = - numpy.isfinite(z_scores) + return z_scores + diff --git a/workflows/bioinformatics/orngBioinformatics/pstat.py b/workflows/bioinformatics/orngBioinformatics/pstat.py new file mode 100644 index 0000000000000000000000000000000000000000..c0e7fc2f97e848cb0717b921055688ce0233e8ec --- /dev/null +++ b/workflows/bioinformatics/orngBioinformatics/pstat.py @@ -0,0 +1,1067 @@ +# Copyright (c) 1999-2000 Gary Strangman; All Rights Reserved. +# +# This software is distributable under the terms of the GNU +# General Public License (GPL) v2, the text of which can be found at +# http://www.gnu.org/copyleft/gpl.html. Installing, importing or otherwise +# using this module constitutes acceptance of the terms of this License. +# +# Disclaimer +# +# This software is provided "as-is". There are no expressed or implied +# warranties of any kind, including, but not limited to, the warranties +# of merchantability and fittness for a given application. In no event +# shall Gary Strangman be liable for any direct, indirect, incidental, +# special, exemplary or consequential damages (including, but not limited +# to, loss of use, data or profits, or business interruption) however +# caused and on any theory of liability, whether in contract, strict +# liability or tort (including negligence or otherwise) arising in any way +# out of the use of this software, even if advised of the possibility of +# such damage. +# +# Comments and/or additions are welcome (send e-mail to: +# strang@nmr.mgh.harvard.edu). +# +""" +pstat.py module + +################################################# +####### Written by: Gary Strangman ########### +####### Last modified: Jun 29, 2001 ########### +################################################# + +This module provides some useful list and array manipulation routines +modeled after those found in the |Stat package by Gary Perlman, plus a +number of other useful list/file manipulation functions. The list-based +functions include: + + abut (source,*args) + simpleabut (source, addon) + colex (listoflists,cnums) + collapse (listoflists,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None) + dm (listoflists,criterion) + flat (l) + linexand (listoflists,columnlist,valuelist) + linexor (listoflists,columnlist,valuelist) + linedelimited (inlist,delimiter) + lineincols (inlist,colsize) + lineincustcols (inlist,colsizes) + list2string (inlist) + makelol(inlist) + makestr(x) + printcc (lst,extra=2) + printincols (listoflists,colsize) + pl (listoflists) + printl(listoflists) + replace (lst,oldval,newval) + recode (inlist,listmap,cols='all') + remap (listoflists,criterion) + roundlist (inlist,num_digits_to_round_floats_to) + sortby(listoflists,sortcols) + unique (inlist) + duplicates(inlist) + writedelimited (listoflists, delimiter, file, writetype='w') + +Some of these functions have alternate versions which are defined only if +Numeric (NumPy) can be imported. These functions are generally named as +above, with an 'a' prefix. + + aabut (source, *args) + acolex (a,indices,axis=1) + acollapse (a,keepcols,collapsecols,sterr=0,ns=0) + adm (a,criterion) + alinexand (a,columnlist,valuelist) + alinexor (a,columnlist,valuelist) + areplace (a,oldval,newval) + arecode (a,listmap,col='all') + arowcompare (row1, row2) + arowsame (row1, row2) + asortrows(a,axis=0) + aunique(inarray) + aduplicates(inarray) + +Currently, the code is all but completely un-optimized. In many cases, the +array versions of functions amount simply to aliases to built-in array +functions/methods. Their inclusion here is for function name consistency. +""" + +## CHANGE LOG: +## ========== +## 01-11-15 ... changed list2string() to accept a delimiter +## 01-06-29 ... converted exec()'s to eval()'s to make compatible with Py2.1 +## 01-05-31 ... added duplicates() and aduplicates() functions +## 00-12-28 ... license made GPL, docstring and import requirements +## 99-11-01 ... changed version to 0.3 +## 99-08-30 ... removed get, getstrings, put, aget, aput (into io.py) +## 03/27/99 ... added areplace function, made replace fcn recursive +## 12/31/98 ... added writefc function for ouput to fixed column sizes +## 12/07/98 ... fixed import problem (failed on collapse() fcn) +## added __version__ variable (now 0.2) +## 12/05/98 ... updated doc-strings +## added features to collapse() function +## added flat() function for lists +## fixed a broken asortrows() +## 11/16/98 ... fixed minor bug in aput for 1D arrays +## +## 11/08/98 ... fixed aput to output large arrays correctly + +from __future__ import absolute_import + +import string, copy +from types import * + +__version__ = 0.4 + +###=========================== LIST FUNCTIONS ========================== +### +### Here are the list functions, DEFINED FOR ALL SYSTEMS. +### Array functions (for NumPy-enabled computers) appear below. +### + +def abut (source,*args): + """ +Like the |Stat abut command. It concatenates two lists side-by-side +and returns the result. '2D' lists are also accomodated for either argument +(source or addon). CAUTION: If one list is shorter, it will be repeated +until it is as long as the longest list. If this behavior is not desired, +use pstat.simpleabut(). + +Usage: abut(source, args) where args=any # of lists +Returns: a list of lists as long as the LONGEST list past, source on the + 'left', lists in attached consecutively on the 'right' +""" + + if type(source) not in [ListType,TupleType]: + source = [source] + for addon in args: + if type(addon) not in [ListType,TupleType]: + addon = [addon] + if len(addon) < len(source): # is source list longer? + if len(source) % len(addon) == 0: # are they integer multiples? + repeats = len(source)/len(addon) # repeat addon n times + origadd = copy.deepcopy(addon) + for i in range(repeats-1): + addon = addon + origadd + else: + repeats = len(source)/len(addon)+1 # repeat addon x times, + origadd = copy.deepcopy(addon) # x is NOT an integer + for i in range(repeats-1): + addon = addon + origadd + addon = addon[0:len(source)] + elif len(source) < len(addon): # is addon list longer? + if len(addon) % len(source) == 0: # are they integer multiples? + repeats = len(addon)/len(source) # repeat source n times + origsour = copy.deepcopy(source) + for i in range(repeats-1): + source = source + origsour + else: + repeats = len(addon)/len(source)+1 # repeat source x times, + origsour = copy.deepcopy(source) # x is NOT an integer + for i in range(repeats-1): + source = source + origsour + source = source[0:len(addon)] + + source = simpleabut(source,addon) + return source + + +def simpleabut (source, addon): + """ +Concatenates two lists as columns and returns the result. '2D' lists +are also accomodated for either argument (source or addon). This DOES NOT +repeat either list to make the 2 lists of equal length. Beware of list pairs +with different lengths ... the resulting list will be the length of the +FIRST list passed. + +Usage: simpleabut(source,addon) where source, addon=list (or list-of-lists) +Returns: a list of lists as long as source, with source on the 'left' and + addon on the 'right' +""" + if type(source) not in [ListType,TupleType]: + source = [source] + if type(addon) not in [ListType,TupleType]: + addon = [addon] + minlen = min(len(source),len(addon)) + list = copy.deepcopy(source) # start abut process + if type(source[0]) not in [ListType,TupleType]: + if type(addon[0]) not in [ListType,TupleType]: + for i in range(minlen): + list[i] = [source[i]] + [addon[i]] # source/addon = column + else: + for i in range(minlen): + list[i] = [source[i]] + addon[i] # addon=list-of-lists + else: + if type(addon[0]) not in [ListType,TupleType]: + for i in range(minlen): + list[i] = source[i] + [addon[i]] # source=list-of-lists + else: + for i in range(minlen): + list[i] = source[i] + addon[i] # source/addon = list-of-lists + source = list + return source + + +def colex (listoflists,cnums): + """ +Extracts from listoflists the columns specified in the list 'cnums' +(cnums can be an integer, a sequence of integers, or a string-expression that +corresponds to a slice operation on the variable x ... e.g., 'x[3:]' will colex +columns 3 onward from the listoflists). + +Usage: colex (listoflists,cnums) +Returns: a list-of-lists corresponding to the columns from listoflists + specified by cnums, in the order the column numbers appear in cnums +""" + global index + column = 0 + if type(cnums) in [ListType,TupleType]: # if multiple columns to get + index = cnums[0] + column = map(lambda x: x[index], listoflists) + for col in cnums[1:]: + index = col + column = abut(column,map(lambda x: x[index], listoflists)) + elif type(cnums) == StringType: # if an 'x[3:]' type expr. + evalstring = 'map(lambda x: x'+cnums+', listoflists)' + column = eval(evalstring) + else: # else it's just 1 col to get + index = cnums + column = map(lambda x: x[index], listoflists) + return column + + +def collapse (listoflists,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None): + """ +Averages data in collapsecol, keeping all unique items in keepcols +(using unique, which keeps unique LISTS of column numbers), retaining the +unique sets of values in keepcols, the mean for each. Setting fcn1 +and/or fcn2 to point to a function rather than None (e.g., stats.sterr, len) +will append those results (e.g., the sterr, N) after each calculated mean. +cfcn is the collapse function to apply (defaults to mean, defined here in the +pstat module to avoid circular imports with stats.py, but harmonicmean or +others could be passed). + +Usage: collapse (listoflists,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None) +Returns: a list of lists with all unique permutations of entries appearing in + columns ("conditions") specified by keepcols, abutted with the result of + cfcn (if cfcn=None, defaults to the mean) of each column specified by + collapsecols. +""" + def collmean (inlist): + s = 0 + for item in inlist: + s = s + item + return s/float(len(inlist)) + + if type(keepcols) not in [ListType,TupleType]: + keepcols = [keepcols] + if type(collapsecols) not in [ListType,TupleType]: + collapsecols = [collapsecols] + if cfcn == None: + cfcn = collmean + if keepcols == []: + means = [0]*len(collapsecols) + for i in range(len(collapsecols)): + avgcol = colex(listoflists,collapsecols[i]) + means[i] = cfcn(avgcol) + if fcn1: + try: + test = fcn1(avgcol) + except: + test = 'N/A' + means[i] = [means[i], test] + if fcn2: + try: + test = fcn2(avgcol) + except: + test = 'N/A' + try: + means[i] = means[i] + [len(avgcol)] + except TypeError: + means[i] = [means[i],len(avgcol)] + return means + else: + values = colex(listoflists,keepcols) + uniques = unique(values) + uniques.sort() + newlist = [] + if type(keepcols) not in [ListType,TupleType]: keepcols = [keepcols] + for item in uniques: + if type(item) not in [ListType,TupleType]: item =[item] + tmprows = linexand(listoflists,keepcols,item) + for col in collapsecols: + avgcol = colex(tmprows,col) + item.append(cfcn(avgcol)) + if fcn1 <> None: + try: + test = fcn1(avgcol) + except: + test = 'N/A' + item.append(test) + if fcn2 <> None: + try: + test = fcn2(avgcol) + except: + test = 'N/A' + item.append(test) + newlist.append(item) + return newlist + + +def dm (listoflists,criterion): + """ +Returns rows from the passed list of lists that meet the criteria in +the passed criterion expression (a string as a function of x; e.g., 'x[3]>=9' +will return all rows where the 4th column>=9 and "x[2]=='N'" will return rows +with column 2 equal to the string 'N'). + +Usage: dm (listoflists, criterion) +Returns: rows from listoflists that meet the specified criterion. +""" + function = 'filter(lambda x: '+criterion+',listoflists)' + lines = eval(function) + return lines + + +def flat(l): + """ +Returns the flattened version of a '2D' list. List-correlate to the a.flat() +method of NumPy arrays. + +Usage: flat(l) +""" + newl = [] + for i in range(len(l)): + for j in range(len(l[i])): + newl.append(l[i][j]) + return newl + + +def linexand (listoflists,columnlist,valuelist): + """ +Returns the rows of a list of lists where col (from columnlist) = val +(from valuelist) for EVERY pair of values (columnlist[i],valuelists[i]). +len(columnlist) must equal len(valuelist). + +Usage: linexand (listoflists,columnlist,valuelist) +Returns: the rows of listoflists where columnlist[i]=valuelist[i] for ALL i +""" + if type(columnlist) not in [ListType,TupleType]: + columnlist = [columnlist] + if type(valuelist) not in [ListType,TupleType]: + valuelist = [valuelist] + criterion = '' + for i in range(len(columnlist)): + if type(valuelist[i])==StringType: + critval = '\'' + valuelist[i] + '\'' + else: + critval = str(valuelist[i]) + criterion = criterion + ' x['+str(columnlist[i])+']=='+critval+' and' + criterion = criterion[0:-3] # remove the "and" after the last crit + function = 'filter(lambda x: '+criterion+',listoflists)' + lines = eval(function) + return lines + + +def linexor (listoflists,columnlist,valuelist): + """ +Returns the rows of a list of lists where col (from columnlist) = val +(from valuelist) for ANY pair of values (colunmlist[i],valuelist[i[). +One value is required for each column in columnlist. If only one value +exists for columnlist but multiple values appear in valuelist, the +valuelist values are all assumed to pertain to the same column. + +Usage: linexor (listoflists,columnlist,valuelist) +Returns: the rows of listoflists where columnlist[i]=valuelist[i] for ANY i +""" + if type(columnlist) not in [ListType,TupleType]: + columnlist = [columnlist] + if type(valuelist) not in [ListType,TupleType]: + valuelist = [valuelist] + criterion = '' + if len(columnlist) == 1 and len(valuelist) > 1: + columnlist = columnlist*len(valuelist) + for i in range(len(columnlist)): # build an exec string + if type(valuelist[i])==StringType: + critval = '\'' + valuelist[i] + '\'' + else: + critval = str(valuelist[i]) + criterion = criterion + ' x['+str(columnlist[i])+']=='+critval+' or' + criterion = criterion[0:-2] # remove the "or" after the last crit + function = 'filter(lambda x: '+criterion+',listoflists)' + lines = eval(function) + return lines + + +def linedelimited (inlist,delimiter): + """ +Returns a string composed of elements in inlist, with each element +separated by 'delimiter.' Used by function writedelimited. Use '\t' +for tab-delimiting. + +Usage: linedelimited (inlist,delimiter) +""" + outstr = '' + for item in inlist: + if type(item) <> StringType: + item = str(item) + outstr = outstr + item + delimiter + outstr = outstr[0:-1] + return outstr + + +def lineincols (inlist,colsize): + """ +Returns a string composed of elements in inlist, with each element +right-aligned in columns of (fixed) colsize. + +Usage: lineincols (inlist,colsize) where colsize is an integer +""" + outstr = '' + for item in inlist: + if type(item) <> StringType: + item = str(item) + size = len(item) + if size <= colsize: + for i in range(colsize-size): + outstr = outstr + ' ' + outstr = outstr + item + else: + outstr = outstr + item[0:colsize+1] + return outstr + + +def lineincustcols (inlist,colsizes): + """ +Returns a string composed of elements in inlist, with each element +right-aligned in a column of width specified by a sequence colsizes. The +length of colsizes must be greater than or equal to the number of columns +in inlist. + +Usage: lineincustcols (inlist,colsizes) +Returns: formatted string created from inlist +""" + outstr = '' + for i in range(len(inlist)): + if type(inlist[i]) <> StringType: + item = str(inlist[i]) + else: + item = inlist[i] + size = len(item) + if size <= colsizes[i]: + for j in range(colsizes[i]-size): + outstr = outstr + ' ' + outstr = outstr + item + else: + outstr = outstr + item[0:colsizes[i]+1] + return outstr + + +def list2string (inlist,delimit=' '): + """ +Converts a 1D list to a single long string for file output, using +the string.join function. + +Usage: list2string (inlist,delimit=' ') +Returns: the string created from inlist +""" + stringlist = map(makestr,inlist) + return string.join(stringlist,delimit) + + +def makelol(inlist): + """ +Converts a 1D list to a 2D list (i.e., a list-of-lists). Useful when you +want to use put() to write a 1D list one item per line in the file. + +Usage: makelol(inlist) +Returns: if l = [1,2,'hi'] then returns [[1],[2],['hi']] etc. +""" + x = [] + for item in inlist: + x.append([item]) + return x + + +def makestr (x): + if type(x) <> StringType: + x = str(x) + return x + + +def printcc (lst,extra=2): + """ +Prints a list of lists in columns, customized by the max size of items +within the columns (max size of items in col, plus 'extra' number of spaces). +Use 'dashes' or '\\n' in the list-of-lists to print dashes or blank lines, +respectively. + +Usage: printcc (lst,extra=2) +Returns: None +""" + if type(lst[0]) not in [ListType,TupleType]: + lst = [lst] + rowstokill = [] + list2print = copy.deepcopy(lst) + for i in range(len(lst)): + if lst[i] == ['\n'] or lst[i]=='\n' or lst[i]=='dashes' or lst[i]=='' or lst[i]==['']: + rowstokill = rowstokill + [i] + rowstokill.reverse() # delete blank rows from the end + for row in rowstokill: + del list2print[row] + maxsize = [0]*len(list2print[0]) + for col in range(len(list2print[0])): + items = colex(list2print,col) + items = map(makestr,items) + maxsize[col] = max(map(len,items)) + extra + for row in lst: + if row == ['\n'] or row == '\n' or row == '' or row == ['']: + print + elif row == ['dashes'] or row == 'dashes': + dashes = [0]*len(maxsize) + for j in range(len(maxsize)): + dashes[j] = '-'*(maxsize[j]-2) + print lineincustcols(dashes,maxsize) + else: + print lineincustcols(row,maxsize) + return None + + +def printincols (listoflists,colsize): + """ +Prints a list of lists in columns of (fixed) colsize width, where +colsize is an integer. + +Usage: printincols (listoflists,colsize) +Returns: None +""" + for row in listoflists: + print lineincols(row,colsize) + return None + + +def pl (listoflists): + """ +Prints a list of lists, 1 list (row) at a time. + +Usage: pl(listoflists) +Returns: None +""" + for row in listoflists: + if row[-1] == '\n': + print row, + else: + print row + return None + + +def printl(listoflists): + """Alias for pl.""" + pl(listoflists) + return + + +def replace (inlst,oldval,newval): + """ +Replaces all occurrences of 'oldval' with 'newval', recursively. + +Usage: replace (inlst,oldval,newval) +""" + lst = inlst*1 + for i in range(len(lst)): + if type(lst[i]) not in [ListType,TupleType]: + if lst[i]==oldval: lst[i]=newval + else: + lst[i] = replace(lst[i],oldval,newval) + return lst + + +def recode (inlist,listmap,cols=None): + """ +Changes the values in a list to a new set of values (useful when +you need to recode data from (e.g.) strings to numbers. cols defaults +to None (meaning all columns are recoded). + +Usage: recode (inlist,listmap,cols=None) cols=recode cols, listmap=2D list +Returns: inlist with the appropriate values replaced with new ones +""" + lst = copy.deepcopy(inlist) + if cols != None: + if type(cols) not in [ListType,TupleType]: + cols = [cols] + for col in cols: + for row in range(len(lst)): + try: + idx = colex(listmap,0).index(lst[row][col]) + lst[row][col] = listmap[idx][1] + except ValueError: + pass + else: + for row in range(len(lst)): + for col in range(len(lst)): + try: + idx = colex(listmap,0).index(lst[row][col]) + lst[row][col] = listmap[idx][1] + except ValueError: + pass + return lst + + +def remap (listoflists,criterion): + """ +Remaps values in a given column of a 2D list (listoflists). This requires +a criterion as a function of 'x' so that the result of the following is +returned ... map(lambda x: 'criterion',listoflists). + +Usage: remap(listoflists,criterion) criterion=string +Returns: remapped version of listoflists +""" + function = 'map(lambda x: '+criterion+',listoflists)' + lines = eval(function) + return lines + + +def roundlist (inlist,digits): + """ +Goes through each element in a 1D or 2D inlist, and applies the following +function to all elements of FloatType ... round(element,digits). + +Usage: roundlist(inlist,digits) +Returns: list with rounded floats +""" + if type(inlist[0]) in [IntType, FloatType]: + inlist = [inlist] + l = inlist*1 + for i in range(len(l)): + for j in range(len(l[i])): + if type(l[i][j])==FloatType: + l[i][j] = round(l[i][j],digits) + return l + + +def sortby(listoflists,sortcols): + """ +Sorts a list of lists on the column(s) specified in the sequence +sortcols. + +Usage: sortby(listoflists,sortcols) +Returns: sorted list, unchanged column ordering +""" + newlist = abut(colex(listoflists,sortcols),listoflists) + newlist.sort() + try: + numcols = len(sortcols) + except TypeError: + numcols = 1 + crit = '[' + str(numcols) + ':]' + newlist = colex(newlist,crit) + return newlist + + +def unique (inlist): + """ +Returns all unique items in the passed list. If the a list-of-lists +is passed, unique LISTS are found (i.e., items in the first dimension are +compared). + +Usage: unique (inlist) +Returns: the unique elements (or rows) in inlist +""" + uniques = [] + for item in inlist: + if item not in uniques: + uniques.append(item) + return uniques + +def duplicates(inlist): + """ +Returns duplicate items in the FIRST dimension of the passed list. + +Usage: duplicates (inlist) +""" + dups = [] + for i in range(len(inlist)): + if inlist[i] in inlist[i+1:]: + dups.append(inlist[i]) + return dups + + +def nonrepeats(inlist): + """ +Returns items that are NOT duplicated in the first dim of the passed list. + +Usage: nonrepeats (inlist) +""" + nonrepeats = [] + for i in range(len(inlist)): + if inlist.count(inlist[i]) == 1: + nonrepeats.append(inlist[i]) + return nonrepeatstry: # DEFINE THESE *ONLY* IF NUMERIC IS AVAILABLE + import Numeric + N = Numeric + + def aabut (source, *args): + """ +Like the |Stat abut command. It concatenates two arrays column-wise +and returns the result. CAUTION: If one array is shorter, it will be +repeated until it is as long as the other. + +Usage: aabut (source, args) where args=any # of arrays +Returns: an array as long as the LONGEST array past, source appearing on the + 'left', arrays in attached on the 'right'. +""" + if len(source.shape)==1: + width = 1 + source = N.resize(source,[source.shape[0],width]) + else: + width = source.shape[1] + for addon in args: + if len(addon.shape)==1: + width = 1 + addon = N.resize(addon,[source.shape[0],width]) + else: + width = source.shape[1] + if len(addon) < len(source): + addon = N.resize(addon,[source.shape[0],addon.shape[1]]) + elif len(source) < len(addon): + source = N.resize(source,[addon.shape[0],source.shape[1]]) + source = N.concatenate((source,addon),1) + return source + + + def acolex (a,indices,axis=1): + """ +Extracts specified indices (a list) from passed array, along passed +axis (column extraction is default). BEWARE: A 1D array is presumed to be a +column-array (and that the whole array will be returned as a column). + +Usage: acolex (a,indices,axis=1) +Returns: the columns of a specified by indices +""" + if type(indices) not in [ListType,TupleType,N.ArrayType]: + indices = [indices] + if len(N.shape(a)) == 1: + cols = N.resize(a,[a.shape[0],1]) + else: + cols = N.take(a,indices,axis) + return cols + + + def acollapse (a,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None): + """ +Averages data in collapsecol, keeping all unique items in keepcols +(using unique, which keeps unique LISTS of column numbers), retaining +the unique sets of values in keepcols, the mean for each. If stderror or +N of the mean are desired, set either or both parameters to 1. + +Usage: acollapse (a,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None) +Returns: unique 'conditions' specified by the contents of columns specified + by keepcols, abutted with the mean(s) of column(s) specified by + collapsecols +""" + def acollmean (inarray): + return N.sum(N.ravel(inarray)) + + if cfcn == None: + cfcn = acollmean + if keepcols == []: + avgcol = acolex(a,collapsecols) + means = N.sum(avgcol)/float(len(avgcol)) + if fcn1<>None: + try: + test = fcn1(avgcol) + except: + test = N.array(['N/A']*len(means)) + means = aabut(means,test) + if fcn2<>None: + try: + test = fcn2(avgcol) + except: + test = N.array(['N/A']*len(means)) + means = aabut(means,test) + return means + else: + if type(keepcols) not in [ListType,TupleType,N.ArrayType]: + keepcols = [keepcols] + values = colex(a,keepcols) # so that "item" can be appended (below) + uniques = unique(values) # get a LIST, so .sort keeps rows intact + uniques.sort() + newlist = [] + for item in uniques: + if type(item) not in [ListType,TupleType,N.ArrayType]: + item =[item] + tmprows = alinexand(a,keepcols,item) + for col in collapsecols: + avgcol = acolex(tmprows,col) + item.append(acollmean(avgcol)) + if fcn1<>None: + try: + test = fcn1(avgcol) + except: + test = 'N/A' + item.append(test) + if fcn2<>None: + try: + test = fcn2(avgcol) + except: + test = 'N/A' + item.append(test) + newlist.append(item) + try: + new_a = N.array(newlist) + except TypeError: + new_a = N.array(newlist,'O') + return new_a + + + def adm (a,criterion): + """ +Returns rows from the passed list of lists that meet the criteria in +the passed criterion expression (a string as a function of x). + +Usage: adm (a,criterion) where criterion is like 'x[2]==37' +""" + function = 'filter(lambda x: '+criterion+',a)' + lines = eval(function) + try: + lines = N.array(lines) + except: + lines = N.array(lines,'O') + return lines + + + def isstring(x): + if type(x)==StringType: + return 1 + else: + return 0 + + + def alinexand (a,columnlist,valuelist): + """ +Returns the rows of an array where col (from columnlist) = val +(from valuelist). One value is required for each column in columnlist. + +Usage: alinexand (a,columnlist,valuelist) +Returns: the rows of a where columnlist[i]=valuelist[i] for ALL i +""" + if type(columnlist) not in [ListType,TupleType,N.ArrayType]: + columnlist = [columnlist] + if type(valuelist) not in [ListType,TupleType,N.ArrayType]: + valuelist = [valuelist] + criterion = '' + for i in range(len(columnlist)): + if type(valuelist[i])==StringType: + critval = '\'' + valuelist[i] + '\'' + else: + critval = str(valuelist[i]) + criterion = criterion + ' x['+str(columnlist[i])+']=='+critval+' and' + criterion = criterion[0:-3] # remove the "and" after the last crit + return adm(a,criterion) + + + def alinexor (a,columnlist,valuelist): + """ +Returns the rows of an array where col (from columnlist) = val (from +valuelist). One value is required for each column in columnlist. +The exception is if either columnlist or valuelist has only 1 value, +in which case that item will be expanded to match the length of the +other list. + +Usage: alinexor (a,columnlist,valuelist) +Returns: the rows of a where columnlist[i]=valuelist[i] for ANY i +""" + if type(columnlist) not in [ListType,TupleType,N.ArrayType]: + columnlist = [columnlist] + if type(valuelist) not in [ListType,TupleType,N.ArrayType]: + valuelist = [valuelist] + criterion = '' + if len(columnlist) == 1 and len(valuelist) > 1: + columnlist = columnlist*len(valuelist) + elif len(valuelist) == 1 and len(columnlist) > 1: + valuelist = valuelist*len(columnlist) + for i in range(len(columnlist)): + if type(valuelist[i])==StringType: + critval = '\'' + valuelist[i] + '\'' + else: + critval = str(valuelist[i]) + criterion = criterion + ' x['+str(columnlist[i])+']=='+critval+' or' + criterion = criterion[0:-2] # remove the "or" after the last crit + return adm(a,criterion) + + + def areplace (a,oldval,newval): + """ +Replaces all occurrences of oldval with newval in array a. + +Usage: areplace(a,oldval,newval) +""" + newa = N.not_equal(a,oldval)*a + return newa+N.equal(a,oldval)*newval + + + def arecode (a,listmap,col='all'): + """ +Remaps the values in an array to a new set of values (useful when +you need to recode data from (e.g.) strings to numbers as most stats +packages require. Can work on SINGLE columns, or 'all' columns at once. + +Usage: arecode (a,listmap,col='all') +Returns: a version of array a where listmap[i][0] = (instead) listmap[i][1] +""" + ashape = a.shape + if col == 'all': + work = a.flat + else: + work = acolex(a,col) + work = work.flat + for pair in listmap: + if type(pair[1]) == StringType or work.typecode()=='O' or a.typecode()=='O': + work = N.array(work,'O') + a = N.array(a,'O') + for i in range(len(work)): + if work[i]==pair[0]: + work[i] = pair[1] + if col == 'all': + return N.reshape(work,ashape) + else: + return N.concatenate([a[:,0:col],work[:,N.NewAxis],a[:,col+1:]],1) + else: # must be a non-Object type array and replacement + work = N.where(N.equal(work,pair[0]),pair[1],work) + return N.concatenate([a[:,0:col],work[:,N.NewAxis],a[:,col+1:]],1) + + + def arowcompare(row1, row2): + """ +Compares two rows from an array, regardless of whether it is an +array of numbers or of python objects (which requires the cmp function). + +Usage: arowcompare(row1,row2) +Returns: an array of equal length containing 1s where the two rows had + identical elements and 0 otherwise +""" + if row1.typecode()=='O' or row2.typecode=='O': + cmpvect = N.logical_not(abs(N.array(map(cmp,row1,row2)))) # cmp fcn gives -1,0,1 + else: + cmpvect = N.equal(row1,row2) + return cmpvect + + + def arowsame(row1, row2): + """ +Compares two rows from an array, regardless of whether it is an +array of numbers or of python objects (which requires the cmp function). + +Usage: arowsame(row1,row2) +Returns: 1 if the two rows are identical, 0 otherwise. +""" + cmpval = N.alltrue(arowcompare(row1,row2)) + return cmpval + + + def asortrows(a,axis=0): + """ +Sorts an array "by rows". This differs from the Numeric.sort() function, +which sorts elements WITHIN the given axis. Instead, this function keeps +the elements along the given axis intact, but shifts them 'up or down' +relative to one another. + +Usage: asortrows(a,axis=0) +Returns: sorted version of a +""" + if axis != 0: + a = N.swapaxes(a, axis, 0) + l = a.tolist() + l.sort() # or l.sort(_sort) + y = N.array(l) + if axis != 0: + y = N.swapaxes(y, axis, 0) + return y + + + def aunique(inarray): + """ +Returns unique items in the FIRST dimension of the passed array. Only +works on arrays NOT including string items. + +Usage: aunique (inarray) +""" + uniques = N.array([inarray[0]]) + if len(uniques.shape) == 1: # IF IT'S A 1D ARRAY + for item in inarray[1:]: + if N.add.reduce(N.equal(uniques,item).flat) == 0: + try: + uniques = N.concatenate([uniques,N.array[N.NewAxis,:]]) + except TypeError: + uniques = N.concatenate([uniques,N.array([item])]) + else: # IT MUST BE A 2+D ARRAY + if inarray.typecode() != 'O': # not an Object array + for item in inarray[1:]: + if not N.sum(N.alltrue(N.equal(uniques,item),1)): + try: + uniques = N.concatenate( [uniques,item[N.NewAxis,:]] ) + except TypeError: # the item to add isn't a list + uniques = N.concatenate([uniques,N.array([item])]) + else: + pass # this item is already in the uniques array + else: # must be an Object array, alltrue/equal functions don't work + for item in inarray[1:]: + newflag = 1 + for unq in uniques: # NOTE: cmp --> 0=same, -1=<, 1=> + test = N.sum(abs(N.array(map(cmp,item,unq)))) + if test == 0: # if item identical to any 1 row in uniques + newflag = 0 # then not a novel item to add + break + if newflag == 1: + try: + uniques = N.concatenate( [uniques,item[N.NewAxis,:]] ) + except TypeError: # the item to add isn't a list + uniques = N.concatenate([uniques,N.array([item])]) + return uniques + + + def aduplicates(inarray): + """ +Returns duplicate items in the FIRST dimension of the passed array. Only +works on arrays NOT including string items. + +Usage: aunique (inarray) +""" + inarray = N.array(inarray) + if len(inarray.shape) == 1: # IF IT'S A 1D ARRAY + dups = [] + inarray = inarray.tolist() + for i in range(len(inarray)): + if inarray[i] in inarray[i+1:]: + dups.append(inarray[i]) + dups = aunique(dups) + else: # IT MUST BE A 2+D ARRAY + dups = [] + aslist = inarray.tolist() + for i in range(len(aslist)): + if aslist[i] in aslist[i+1:]: + dups.append(aslist[i]) + dups = unique(dups) + dups = N.array(dups) + return dups + +except ImportError: # IF NUMERIC ISN'T AVAILABLE, SKIP ALL arrayfuncs + pass diff --git a/workflows/bioinformatics/orngBioinformatics/stats.py b/workflows/bioinformatics/orngBioinformatics/stats.py new file mode 100644 index 0000000000000000000000000000000000000000..1cebe4e97f1b7cbebbe1338b68338b875e7569c5 --- /dev/null +++ b/workflows/bioinformatics/orngBioinformatics/stats.py @@ -0,0 +1,4347 @@ +# Copyright (c) 1999-2002 Gary Strangman; All Rights Reserved. +# +# This software is distributable under the terms of the GNU +# General Public License (GPL) v2, the text of which can be found at +# http://www.gnu.org/copyleft/gpl.html. Installing, importing or otherwise +# using this module constitutes acceptance of the terms of this License. +# +# Disclaimer +# +# This software is provided "as-is". There are no expressed or implied +# warranties of any kind, including, but not limited to, the warranties +# of merchantability and fittness for a given application. In no event +# shall Gary Strangman be liable for any direct, indirect, incidental, +# special, exemplary or consequential damages (including, but not limited +# to, loss of use, data or profits, or business interruption) however +# caused and on any theory of liability, whether in contract, strict +# liability or tort (including negligence or otherwise) arising in any way +# out of the use of this software, even if advised of the possibility of +# such damage. +# +# Comments and/or additions are welcome (send e-mail to: +# strang@nmr.mgh.harvard.edu). +# +""" +stats.py module + +(Requires pstat.py module.) + +################################################# +####### Written by: Gary Strangman ########### +####### Last modified: May 10, 2002 ########### +################################################# + +A collection of basic statistical functions for python. The function +names appear below. + +IMPORTANT: There are really *3* sets of functions. The first set has an 'l' +prefix, which can be used with list or tuple arguments. The second set has +an 'a' prefix, which can accept NumPy array arguments. These latter +functions are defined only when NumPy is available on the system. The third +type has NO prefix (i.e., has the name that appears below). Functions of +this set are members of a "Dispatch" class, c/o David Ascher. This class +allows different functions to be called depending on the type of the passed +arguments. Thus, stats.mean is a member of the Dispatch class and +stats.mean(range(20)) will call stats.lmean(range(20)) while +stats.mean(Numeric.arange(20)) will call stats.amean(Numeric.arange(20)). +This is a handy way to keep consistent function names when different +argument types require different functions to be called. Having +implementated the Dispatch class, however, means that to get info on +a given function, you must use the REAL function name ... that is +"print stats.lmean.__doc__" or "print stats.amean.__doc__" work fine, +while "print stats.mean.__doc__" will print the doc for the Dispatch +class. NUMPY FUNCTIONS ('a' prefix) generally have more argument options +but should otherwise be consistent with the corresponding list functions. + +Disclaimers: The function list is obviously incomplete and, worse, the +functions are not optimized. All functions have been tested (some more +so than others), but they are far from bulletproof. Thus, as with any +free software, no warranty or guarantee is expressed or implied. :-) A +few extra functions that don't appear in the list below can be found by +interested treasure-hunters. These functions don't necessarily have +both list and array versions but were deemed useful + +CENTRAL TENDENCY: geometricmean + harmonicmean + mean + median + medianscore + mode + +MOMENTS: moment + variation + skew + kurtosis + skewtest (for Numpy arrays only) + kurtosistest (for Numpy arrays only) + normaltest (for Numpy arrays only) + +ALTERED VERSIONS: tmean (for Numpy arrays only) + tvar (for Numpy arrays only) + tmin (for Numpy arrays only) + tmax (for Numpy arrays only) + tstdev (for Numpy arrays only) + tsem (for Numpy arrays only) + describe + +FREQUENCY STATS: itemfreq + scoreatpercentile + percentileofscore + histogram + cumfreq + relfreq + +VARIABILITY: obrientransform + samplevar + samplestdev + signaltonoise (for Numpy arrays only) + var + stdev + sterr + sem + z + zs + zmap (for Numpy arrays only) + +TRIMMING FCNS: threshold (for Numpy arrays only) + trimboth + trim1 + round (round all vals to 'n' decimals; Numpy only) + +CORRELATION FCNS: covariance (for Numpy arrays only) + correlation (for Numpy arrays only) + paired + pearsonr + spearmanr + pointbiserialr + kendalltau + linregress + +INFERENTIAL STATS: ttest_1samp + ttest_ind + ttest_rel + chisquare + ks_2samp + mannwhitneyu + ranksums + wilcoxont + kruskalwallish + friedmanchisquare + +PROBABILITY CALCS: chisqprob + erfcc + zprob + ksprob + fprob + betacf + gammln + betai + +ANOVA FUNCTIONS: F_oneway + F_value + +SUPPORT FUNCTIONS: writecc + incr + sign (for Numpy arrays only) + sum + cumsum + ss + summult + sumdiffsquared + square_of_sums + shellsort + rankdata + outputpairedstats + findwithin +""" +## CHANGE LOG: +## =========== +## 02-11-19 ... fixed attest_ind and attest_rel for div-by-zero Overflows +## 02-05-10 ... fixed lchisqprob indentation (failed when df=even) +## 00-12-28 ... removed aanova() to separate module, fixed licensing to +## match Python License, fixed doc string & imports +## 00-04-13 ... pulled all "global" statements, except from aanova() +## added/fixed lots of documentation, removed io.py dependency +## changed to version 0.5 +## 99-11-13 ... added asign() function +## 99-11-01 ... changed version to 0.4 ... enough incremental changes now +## 99-10-25 ... added acovariance and acorrelation functions +## 99-10-10 ... fixed askew/akurtosis to avoid divide-by-zero errors +## added aglm function (crude, but will be improved) +## 99-10-04 ... upgraded acumsum, ass, asummult, asamplevar, avar, etc. to +## all handle lists of 'dimension's and keepdims +## REMOVED ar0, ar2, ar3, ar4 and replaced them with around +## reinserted fixes for abetai to avoid math overflows +## 99-09-05 ... rewrote achisqprob/aerfcc/aksprob/afprob/abetacf/abetai to +## handle multi-dimensional arrays (whew!) +## 99-08-30 ... fixed l/amoment, l/askew, l/akurtosis per D'Agostino (1990) +## added anormaltest per same reference +## re-wrote azprob to calc arrays of probs all at once +## 99-08-22 ... edited attest_ind printing section so arrays could be rounded +## 99-08-19 ... fixed amean and aharmonicmean for non-error(!) overflow on +## short/byte arrays (mean of #s btw 100-300 = -150??) +## 99-08-09 ... fixed asum so that the None case works for Byte arrays +## 99-08-08 ... fixed 7/3 'improvement' to handle t-calcs on N-D arrays +## 99-07-03 ... improved attest_ind, attest_rel (zero-division errortrap) +## 99-06-24 ... fixed bug(?) in attest_ind (n1=a.shape[0]) +## 04/11/99 ... added asignaltonoise, athreshold functions, changed all +## max/min in array section to N.maximum/N.minimum, +## fixed square_of_sums to prevent integer overflow +## 04/10/99 ... !!! Changed function name ... sumsquared ==> square_of_sums +## 03/18/99 ... Added ar0, ar2, ar3 and ar4 rounding functions +## 02/28/99 ... Fixed aobrientransform to return an array rather than a list +## 01/15/99 ... Essentially ceased updating list-versions of functions (!!!) +## 01/13/99 ... CHANGED TO VERSION 0.3 +## fixed bug in a/lmannwhitneyu p-value calculation +## 12/31/98 ... fixed variable-name bug in ldescribe +## 12/19/98 ... fixed bug in findwithin (fcns needed pstat. prefix) +## 12/16/98 ... changed amedianscore to return float (not array) for 1 score +## 12/14/98 ... added atmin and atmax functions +## removed umath from import line (not needed) +## l/ageometricmean modified to reduce chance of overflows (take +## nth root first, then multiply) +## 12/07/98 ... added __version__variable (now 0.2) +## removed all 'stats.' from anova() fcn +## 12/06/98 ... changed those functions (except shellsort) that altered +## arguments in-place ... cumsum, ranksort, ... +## updated (and fixed some) doc-strings +## 12/01/98 ... added anova() function (requires NumPy) +## incorporated Dispatch class +## 11/12/98 ... added functionality to amean, aharmonicmean, ageometricmean +## added 'asum' function (added functionality to N.add.reduce) +## fixed both moment and amoment (two errors) +## changed name of skewness and askewness to skew and askew +## fixed (a)histogram (which sometimes counted points =len(inlist)/2.0: + cfbin = i + break + LRL = smallest + binsize*cfbin # get lower read limit of that bin + cfbelow = cumhist[cfbin-1] + freq = float(hist[cfbin]) # frequency IN the 50%ile bin + median = LRL + ((len(inlist)/2.0 - cfbelow)/float(freq))*binsize # median formula + return median + + +def lmedianscore (inlist): + """ +Returns the 'middle' score of the passed list. If there is an even +number of scores, the mean of the 2 middle scores is returned. + +Usage: lmedianscore(inlist) +""" + + newlist = copy.deepcopy(inlist) + newlist.sort() + if len(newlist) % 2 == 0: # if even number of scores, average middle 2 + index = len(newlist)/2 # integer division correct + median = float(newlist[index] + newlist[index-1]) /2 + else: + index = len(newlist)/2 # int divsion gives mid value when count from 0 + median = newlist[index] + return median + + +def lmode(inlist): + """ +Returns a list of the modal (most common) score(s) in the passed +list. If there is more than one such score, all are returned. The +bin-count for the mode(s) is also returned. + +Usage: lmode(inlist) +Returns: bin-count for mode(s), a list of modal value(s) +""" + + scores = pstat.unique(inlist) + scores.sort() + freq = [] + for item in scores: + freq.append(inlist.count(item)) + maxfreq = max(freq) + mode = [] + stillmore = 1 + while stillmore: + try: + indx = freq.index(maxfreq) + mode.append(scores[indx]) + del freq[indx] + del scores[indx] + except ValueError: + stillmore=0 + return maxfreq, mode + + +#################################### +############ MOMENTS ############# +#################################### + +def lmoment(inlist,moment=1): + """ +Calculates the nth moment about the mean for a sample (defaults to +the 1st moment). Used to calculate coefficients of skewness and kurtosis. + +Usage: lmoment(inlist,moment=1) +Returns: appropriate moment (r) from ... 1/n * SUM((inlist(i)-mean)**r) +""" + if moment == 1: + return 0.0 + else: + mn = mean(inlist) + n = len(inlist) + s = 0 + for x in inlist: + s = s + (x-mn)**moment + return s/float(n) + + +def lvariation(inlist): + """ +Returns the coefficient of variation, as defined in CRC Standard +Probability and Statistics, p.6. + +Usage: lvariation(inlist) +""" + return 100.0*samplestdev(inlist)/float(mean(inlist)) + + +def lskew(inlist): + """ +Returns the skewness of a distribution, as defined in Numerical +Recipies (alternate defn in CRC Standard Probability and Statistics, p.6.) + +Usage: lskew(inlist) +""" + return moment(inlist,3)/pow(moment(inlist,2),1.5) + + +def lkurtosis(inlist): + """ +Returns the kurtosis of a distribution, as defined in Numerical +Recipies (alternate defn in CRC Standard Probability and Statistics, p.6.) + +Usage: lkurtosis(inlist) +""" + return moment(inlist,4)/pow(moment(inlist,2),2.0) + + +def ldescribe(inlist): + """ +Returns some descriptive statistics of the passed list (assumed to be 1D). + +Usage: ldescribe(inlist) +Returns: n, mean, standard deviation, skew, kurtosis +""" + n = len(inlist) + mm = (min(inlist),max(inlist)) + m = mean(inlist) + sd = stdev(inlist) + sk = skew(inlist) + kurt = kurtosis(inlist) + return n, mm, m, sd, sk, kurt + + +#################################### +####### FREQUENCY STATS ########## +#################################### + +def litemfreq(inlist): + """ +Returns a list of pairs. Each pair consists of one of the scores in inlist +and it's frequency count. Assumes a 1D list is passed. + +Usage: litemfreq(inlist) +Returns: a 2D frequency table (col [0:n-1]=scores, col n=frequencies) +""" + scores = pstat.unique(inlist) + scores.sort() + freq = [] + for item in scores: + freq.append(inlist.count(item)) + return pstat.abut(scores, freq) + + +def lscoreatpercentile (inlist, percent): + """ +Returns the score at a given percentile relative to the distribution +given by inlist. + +Usage: lscoreatpercentile(inlist,percent) +""" + if percent > 1: + print "\nDividing percent>1 by 100 in lscoreatpercentile().\n" + percent = percent / 100.0 + targetcf = percent*len(inlist) + h, lrl, binsize, extras = histogram(inlist) + cumhist = cumsum(copy.deepcopy(h)) + for i in range(len(cumhist)): + if cumhist[i] >= targetcf: + break + score = binsize * ((targetcf - cumhist[i-1]) / float(h[i])) + (lrl+binsize*i) + return score + + +def lpercentileofscore (inlist, score,histbins=10,defaultlimits=None): + """ +Returns the percentile value of a score relative to the distribution +given by inlist. Formula depends on the values used to histogram the data(!). + +Usage: lpercentileofscore(inlist,score,histbins=10,defaultlimits=None) +""" + + h, lrl, binsize, extras = histogram(inlist,histbins,defaultlimits) + cumhist = cumsum(copy.deepcopy(h)) + i = int((score - lrl)/float(binsize)) + pct = (cumhist[i-1]+((score-(lrl+binsize*i))/float(binsize))*h[i])/float(len(inlist)) * 100 + return pct + + +def lhistogram (inlist,numbins=10,defaultreallimits=None,printextras=0): + """ +Returns (i) a list of histogram bin counts, (ii) the smallest value +of the histogram binning, and (iii) the bin width (the last 2 are not +necessarily integers). Default number of bins is 10. If no sequence object +is given for defaultreallimits, the routine picks (usually non-pretty) bins +spanning all the numbers in the inlist. + +Usage: lhistogram (inlist, numbins=10, defaultreallimits=None,suppressoutput=0) +Returns: list of bin values, lowerreallimit, binsize, extrapoints +""" + if (defaultreallimits <> None): + if type(defaultreallimits) not in [ListType,TupleType] or len(defaultreallimits)==1: # only one limit given, assumed to be lower one & upper is calc'd + lowerreallimit = defaultreallimits + upperreallimit = 1.0001 * max(inlist) + else: # assume both limits given + lowerreallimit = defaultreallimits[0] + upperreallimit = defaultreallimits[1] + binsize = (upperreallimit-lowerreallimit)/float(numbins) + else: # no limits given for histogram, both must be calc'd + estbinwidth=(max(inlist)-min(inlist))/float(numbins) + 1 # 1=>cover all + binsize = ((max(inlist)-min(inlist)+estbinwidth))/float(numbins) + lowerreallimit = min(inlist) - binsize/2 #lower real limit,1st bin + bins = [0]*(numbins) + extrapoints = 0 + for num in inlist: + try: + if (num-lowerreallimit) < 0: + extrapoints = extrapoints + 1 + else: + bintoincrement = int((num-lowerreallimit)/float(binsize)) + bins[bintoincrement] = bins[bintoincrement] + 1 + except: + extrapoints = extrapoints + 1 + if (extrapoints > 0 and printextras == 1): + print '\nPoints outside given histogram range =',extrapoints + return (bins, lowerreallimit, binsize, extrapoints) + + +def lcumfreq(inlist,numbins=10,defaultreallimits=None): + """ +Returns a cumulative frequency histogram, using the histogram function. + +Usage: lcumfreq(inlist,numbins=10,defaultreallimits=None) +Returns: list of cumfreq bin values, lowerreallimit, binsize, extrapoints +""" + h,l,b,e = histogram(inlist,numbins,defaultreallimits) + cumhist = cumsum(copy.deepcopy(h)) + return cumhist,l,b,e + + +def lrelfreq(inlist,numbins=10,defaultreallimits=None): + """ +Returns a relative frequency histogram, using the histogram function. + +Usage: lrelfreq(inlist,numbins=10,defaultreallimits=None) +Returns: list of cumfreq bin values, lowerreallimit, binsize, extrapoints +""" + h,l,b,e = histogram(inlist,numbins,defaultreallimits) + for i in range(len(h)): + h[i] = h[i]/float(len(inlist)) + return h,l,b,e + + +#################################### +##### VARIABILITY FUNCTIONS ###### +#################################### + +def lobrientransform(*args): + """ +Computes a transform on input data (any number of columns). Used to +test for homogeneity of variance prior to running one-way stats. From +Maxwell and Delaney, p.112. + +Usage: lobrientransform(*args) +Returns: transformed data for use in an ANOVA +""" + TINY = 1e-10 + k = len(args) + n = [0.0]*k + v = [0.0]*k + m = [0.0]*k + nargs = [] + for i in range(k): + nargs.append(copy.deepcopy(args[i])) + n[i] = float(len(nargs[i])) + v[i] = var(nargs[i]) + m[i] = mean(nargs[i]) + for j in range(k): + for i in range(n[j]): + t1 = (n[j]-1.5)*n[j]*(nargs[j][i]-m[j])**2 + t2 = 0.5*v[j]*(n[j]-1.0) + t3 = (n[j]-1.0)*(n[j]-2.0) + nargs[j][i] = (t1-t2) / float(t3) + check = 1 + for j in range(k): + if v[j] - mean(nargs[j]) > TINY: + check = 0 + if check <> 1: + raise ValueError, 'Problem in obrientransform.' + else: + return nargs + + +def lsamplevar (inlist): + """ +Returns the variance of the values in the passed list using +N for the denominator (i.e., DESCRIBES the sample variance only). + +Usage: lsamplevar(inlist) +""" + n = len(inlist) + mn = mean(inlist) + deviations = [] + for item in inlist: + deviations.append(item-mn) + return ss(deviations)/float(n) + + +def lsamplestdev (inlist): + """ +Returns the standard deviation of the values in the passed list using +N for the denominator (i.e., DESCRIBES the sample stdev only). + +Usage: lsamplestdev(inlist) +""" + return math.sqrt(samplevar(inlist)) + + +def lvar (inlist): + """ +Returns the variance of the values in the passed list using N-1 +for the denominator (i.e., for estimating population variance). + +Usage: lvar(inlist) +""" + n = len(inlist) + mn = mean(inlist) + deviations = [0]*len(inlist) + for i in range(len(inlist)): + deviations[i] = inlist[i] - mn + return ss(deviations)/float(n-1) + + +def lstdev (inlist): + """ +Returns the standard deviation of the values in the passed list +using N-1 in the denominator (i.e., to estimate population stdev). + +Usage: lstdev(inlist) +""" + return math.sqrt(var(inlist)) + + +def lsterr(inlist): + """ +Returns the standard error of the values in the passed list using N-1 +in the denominator (i.e., to estimate population standard error). + +Usage: lsterr(inlist) +""" + return stdev(inlist) / float(math.sqrt(len(inlist))) + + +def lsem (inlist): + """ +Returns the estimated standard error of the mean (sx-bar) of the +values in the passed list. sem = stdev / sqrt(n) + +Usage: lsem(inlist) +""" + sd = stdev(inlist) + n = len(inlist) + return sd/math.sqrt(n) + + +def lz (inlist, score): + """ +Returns the z-score for a given input score, given that score and the +list from which that score came. Not appropriate for population calculations. + +Usage: lz(inlist, score) +""" + z = (score-mean(inlist))/samplestdev(inlist) + return z + + +def lzs (inlist): + """ +Returns a list of z-scores, one for each score in the passed list. + +Usage: lzs(inlist) +""" + zscores = [] + for item in inlist: + zscores.append(z(inlist,item)) + return zscores + + +#################################### +####### TRIMMING FUNCTIONS ####### +#################################### + +def ltrimboth (l,proportiontocut): + """ +Slices off the passed proportion of items from BOTH ends of the passed +list (i.e., with proportiontocut=0.1, slices 'leftmost' 10% AND 'rightmost' +10% of scores. Assumes list is sorted by magnitude. Slices off LESS if +proportion results in a non-integer slice index (i.e., conservatively +slices off proportiontocut). + +Usage: ltrimboth (l,proportiontocut) +Returns: trimmed version of list l +""" + lowercut = int(proportiontocut*len(l)) + uppercut = len(l) - lowercut + return l[lowercut:uppercut] + + +def ltrim1 (l,proportiontocut,tail='right'): + """ +Slices off the passed proportion of items from ONE end of the passed +list (i.e., if proportiontocut=0.1, slices off 'leftmost' or 'rightmost' +10% of scores). Slices off LESS if proportion results in a non-integer +slice index (i.e., conservatively slices off proportiontocut). + +Usage: ltrim1 (l,proportiontocut,tail='right') or set tail='left' +Returns: trimmed version of list l +""" + if tail == 'right': + lowercut = 0 + uppercut = len(l) - int(proportiontocut*len(l)) + elif tail == 'left': + lowercut = int(proportiontocut*len(l)) + uppercut = len(l) + return l[lowercut:uppercut] + + +#################################### +##### CORRELATION FUNCTIONS ###### +#################################### + +def lpaired(x,y): + """ +Interactively determines the type of data and then runs the +appropriated statistic for paired group data. + +Usage: lpaired(x,y) +Returns: appropriate statistic name, value, and probability +""" + samples = '' + while samples not in ['i','r','I','R','c','C']: + print '\nIndependent or related samples, or correlation (i,r,c): ', + samples = raw_input() + + if samples in ['i','I','r','R']: + print '\nComparing variances ...', +# USE O'BRIEN'S TEST FOR HOMOGENEITY OF VARIANCE, Maxwell & delaney, p.112 + r = obrientransform(x,y) + f,p = F_oneway(pstat.colex(r,0),pstat.colex(r,1)) + if p<0.05: + vartype='unequal, p='+str(round(p,4)) + else: + vartype='equal' + print vartype + if samples in ['i','I']: + if vartype[0]=='e': + t,p = ttest_ind(x,y,0) + print '\nIndependent samples t-test: ', round(t,4),round(p,4) + else: + if len(x)>20 or len(y)>20: + z,p = ranksums(x,y) + print '\nRank Sums test (NONparametric, n>20): ', round(z,4),round(p,4) + else: + u,p = mannwhitneyu(x,y) + print '\nMann-Whitney U-test (NONparametric, ns<20): ', round(u,4),round(p,4) + + else: # RELATED SAMPLES + if vartype[0]=='e': + t,p = ttest_rel(x,y,0) + print '\nRelated samples t-test: ', round(t,4),round(p,4) + else: + t,p = ranksums(x,y) + print '\nWilcoxon T-test (NONparametric): ', round(t,4),round(p,4) + else: # CORRELATION ANALYSIS + corrtype = '' + while corrtype not in ['c','C','r','R','d','D']: + print '\nIs the data Continuous, Ranked, or Dichotomous (c,r,d): ', + corrtype = raw_input() + if corrtype in ['c','C']: + m,b,r,p,see = linregress(x,y) + print '\nLinear regression for continuous variables ...' + lol = [['Slope','Intercept','r','Prob','SEestimate'],[round(m,4),round(b,4),round(r,4),round(p,4),round(see,4)]] + pstat.printcc(lol) + elif corrtype in ['r','R']: + r,p = spearmanr(x,y) + print '\nCorrelation for ranked variables ...' + print "Spearman's r: ",round(r,4),round(p,4) + else: # DICHOTOMOUS + r,p = pointbiserialr(x,y) + print '\nAssuming x contains a dichotomous variable ...' + print 'Point Biserial r: ',round(r,4),round(p,4) + print '\n\n' + return None + + +def lpearsonr(x,y): + """ +Calculates a Pearson correlation coefficient and the associated +probability value. Taken from Heiman's Basic Statistics for the Behav. +Sci (2nd), p.195. + +Usage: lpearsonr(x,y) where x and y are equal-length lists +Returns: Pearson's r value, two-tailed p-value +""" + TINY = 1.0e-30 + if len(x) <> len(y): + raise ValueError, 'Input values not paired in pearsonr. Aborting.' + n = len(x) + x = map(float,x) + y = map(float,y) + xmean = mean(x) + ymean = mean(y) + r_num = n*(summult(x,y)) - sum(x)*sum(y) + r_den = math.sqrt((n*ss(x) - square_of_sums(x))*(n*ss(y)-square_of_sums(y))) + r = (r_num / r_den) # denominator already a float + df = n-2 + t = r*math.sqrt(df/((1.0-r+TINY)*(1.0+r+TINY))) + prob = betai(0.5*df,0.5,df/float(df+t*t)) + return r, prob + + +def lspearmanr(x,y): + """ +Calculates a Spearman rank-order correlation coefficient. Taken +from Heiman's Basic Statistics for the Behav. Sci (1st), p.192. + +Usage: lspearmanr(x,y) where x and y are equal-length lists +Returns: Spearman's r, two-tailed p-value +""" + TINY = 1e-30 + if len(x) <> len(y): + raise ValueError, 'Input values not paired in spearmanr. Aborting.' + n = len(x) + rankx = rankdata(x) + ranky = rankdata(y) + dsq = sumdiffsquared(rankx,ranky) + rs = 1 - 6*dsq / float(n*(n**2-1)) + t = rs * math.sqrt((n-2) / ((rs+1.0)*(1.0-rs))) + df = n-2 + probrs = betai(0.5*df,0.5,df/(df+t*t)) # t already a float +# probability values for rs are from part 2 of the spearman function in +# Numerical Recipies, p.510. They are close to tables, but not exact. (?) + return rs, probrs + + +def lpointbiserialr(x,y): + """ +Calculates a point-biserial correlation coefficient and the associated +probability value. Taken from Heiman's Basic Statistics for the Behav. +Sci (1st), p.194. + +Usage: lpointbiserialr(x,y) where x,y are equal-length lists +Returns: Point-biserial r, two-tailed p-value +""" + TINY = 1e-30 + if len(x) <> len(y): + raise ValueError, 'INPUT VALUES NOT PAIRED IN pointbiserialr. ABORTING.' + data = pstat.abut(x,y) + categories = pstat.unique(x) + if len(categories) <> 2: + raise ValueError, "Exactly 2 categories required for pointbiserialr()." + else: # there are 2 categories, continue + codemap = pstat.abut(categories,range(2)) + recoded = pstat.recode(data,codemap,0) + x = pstat.linexand(data,0,categories[0]) + y = pstat.linexand(data,0,categories[1]) + xmean = mean(pstat.colex(x,1)) + ymean = mean(pstat.colex(y,1)) + n = len(data) + adjust = math.sqrt((len(x)/float(n))*(len(y)/float(n))) + rpb = (ymean - xmean)/samplestdev(pstat.colex(data,1))*adjust + df = n-2 + t = rpb*math.sqrt(df/((1.0-rpb+TINY)*(1.0+rpb+TINY))) + prob = betai(0.5*df,0.5,df/(df+t*t)) # t already a float + return rpb, prob + + +def lkendalltau(x,y): + """ +Calculates Kendall's tau ... correlation of ordinal data. Adapted +from function kendl1 in Numerical Recipies. Needs good test-routine.@@@ + +Usage: lkendalltau(x,y) +Returns: Kendall's tau, two-tailed p-value +""" + n1 = 0 + n2 = 0 + iss = 0 + for j in range(len(x)-1): + for k in range(j,len(y)): + a1 = x[j] - x[k] + a2 = y[j] - y[k] + aa = a1 * a2 + if (aa): # neither list has a tie + n1 = n1 + 1 + n2 = n2 + 1 + if aa > 0: + iss = iss + 1 + else: + iss = iss -1 + else: + if (a1): + n1 = n1 + 1 + else: + n2 = n2 + 1 + tau = iss / math.sqrt(n1*n2) + svar = (4.0*len(x)+10.0) / (9.0*len(x)*(len(x)-1)) + z = tau / math.sqrt(svar) + prob = erfcc(abs(z)/1.4142136) + return tau, prob + + +def llinregress(x,y): + """ +Calculates a regression line on x,y pairs. + +Usage: llinregress(x,y) x,y are equal-length lists of x-y coordinates +Returns: slope, intercept, r, two-tailed prob, sterr-of-estimate +""" + TINY = 1.0e-20 + if len(x) <> len(y): + raise ValueError, 'Input values not paired in linregress. Aborting.' + n = len(x) + x = map(float,x) + y = map(float,y) + xmean = mean(x) + ymean = mean(y) + r_num = float(n*(summult(x,y)) - sum(x)*sum(y)) + r_den = math.sqrt((n*ss(x) - square_of_sums(x))*(n*ss(y)-square_of_sums(y))) + r = r_num / r_den + z = 0.5*math.log((1.0+r+TINY)/(1.0-r+TINY)) + df = n-2 + t = r*math.sqrt(df/((1.0-r+TINY)*(1.0+r+TINY))) + prob = betai(0.5*df,0.5,df/(df+t*t)) + slope = r_num / float(n*ss(x) - square_of_sums(x)) + intercept = ymean - slope*xmean + sterrest = math.sqrt(1-r*r)*samplestdev(y) + return slope, intercept, r, prob, sterrest + + +#################################### +##### INFERENTIAL STATISTICS ##### +#################################### + +def lttest_1samp(a,popmean,printit=0,name='Sample',writemode='a'): + """ +Calculates the t-obtained for the independent samples T-test on ONE group +of scores a, given a population mean. If printit=1, results are printed +to the screen. If printit='filename', the results are output to 'filename' +using the given writemode (default=append). Returns t-value, and prob. + +Usage: lttest_1samp(a,popmean,Name='Sample',printit=0,writemode='a') +Returns: t-value, two-tailed prob +""" + x = mean(a) + v = var(a) + n = len(a) + df = n-1 + svar = ((n-1)*v)/float(df) + t = (x-popmean)/math.sqrt(svar*(1.0/n)) + prob = betai(0.5*df,0.5,float(df)/(df+t*t)) + + if printit <> 0: + statname = 'Single-sample T-test.' + outputpairedstats(printit,writemode, + 'Population','--',popmean,0,0,0, + name,n,x,v,min(a),max(a), + statname,t,prob) + return t,prob + + +def lttest_ind (a, b, printit=0, name1='Samp1', name2='Samp2', writemode='a'): + """ +Calculates the t-obtained T-test on TWO INDEPENDENT samples of +scores a, and b. From Numerical Recipies, p.483. If printit=1, results +are printed to the screen. If printit='filename', the results are output +to 'filename' using the given writemode (default=append). Returns t-value, +and prob. + +Usage: lttest_ind(a,b,printit=0,name1='Samp1',name2='Samp2',writemode='a') +Returns: t-value, two-tailed prob +""" + x1 = mean(a) + x2 = mean(b) + v1 = stdev(a)**2 + v2 = stdev(b)**2 + n1 = len(a) + n2 = len(b) + df = n1+n2-2 + svar = ((n1-1)*v1+(n2-1)*v2)/float(df) + t = (x1-x2)/math.sqrt(svar*(1.0/n1 + 1.0/n2)) + prob = betai(0.5*df,0.5,df/(df+t*t)) + + if printit <> 0: + statname = 'Independent samples T-test.' + outputpairedstats(printit,writemode, + name1,n1,x1,v1,min(a),max(a), + name2,n2,x2,v2,min(b),max(b), + statname,t,prob) + return t,prob + + +def lttest_rel (a,b,printit=0,name1='Sample1',name2='Sample2',writemode='a'): + """ +Calculates the t-obtained T-test on TWO RELATED samples of scores, +a and b. From Numerical Recipies, p.483. If printit=1, results are +printed to the screen. If printit='filename', the results are output to +'filename' using the given writemode (default=append). Returns t-value, +and prob. + +Usage: lttest_rel(a,b,printit=0,name1='Sample1',name2='Sample2',writemode='a') +Returns: t-value, two-tailed prob +""" + if len(a)<>len(b): + raise ValueError, 'Unequal length lists in ttest_rel.' + x1 = mean(a) + x2 = mean(b) + v1 = var(a) + v2 = var(b) + n = len(a) + cov = 0 + for i in range(len(a)): + cov = cov + (a[i]-x1) * (b[i]-x2) + df = n-1 + cov = cov / float(df) + sd = math.sqrt((v1+v2 - 2.0*cov)/float(n)) + t = (x1-x2)/sd + prob = betai(0.5*df,0.5,df/(df+t*t)) + + if printit <> 0: + statname = 'Related samples T-test.' + outputpairedstats(printit,writemode, + name1,n,x1,v1,min(a),max(a), + name2,n,x2,v2,min(b),max(b), + statname,t,prob) + return t, prob + + +def lchisquare(f_obs,f_exp=None): + """ +Calculates a one-way chi square for list of observed frequencies and returns +the result. If no expected frequencies are given, the total N is assumed to +be equally distributed across all groups. + +Usage: lchisquare(f_obs, f_exp=None) f_obs = list of observed cell freq. +Returns: chisquare-statistic, associated p-value +""" + k = len(f_obs) # number of groups + if f_exp == None: + f_exp = [sum(f_obs)/float(k)] * len(f_obs) # create k bins with = freq. + chisq = 0 + for i in range(len(f_obs)): + chisq = chisq + (f_obs[i]-f_exp[i])**2 / float(f_exp[i]) + return chisq, chisqprob(chisq, k-1) + + +def lks_2samp (data1,data2): + """ +Computes the Kolmogorov-Smirnof statistic on 2 samples. From +Numerical Recipies in C, page 493. + +Usage: lks_2samp(data1,data2) data1&2 are lists of values for 2 conditions +Returns: KS D-value, associated p-value +""" + j1 = 0 + j2 = 0 + fn1 = 0.0 + fn2 = 0.0 + n1 = len(data1) + n2 = len(data2) + en1 = n1 + en2 = n2 + d = 0.0 + data1.sort() + data2.sort() + while j1 < n1 and j2 < n2: + d1=data1[j1] + d2=data2[j2] + if d1 <= d2: + fn1 = (j1)/float(en1) + j1 = j1 + 1 + if d2 <= d1: + fn2 = (j2)/float(en2) + j2 = j2 + 1 + dt = (fn2-fn1) + if math.fabs(dt) > math.fabs(d): + d = dt + try: + en = math.sqrt(en1*en2/float(en1+en2)) + prob = ksprob((en+0.12+0.11/en)*abs(d)) + except: + prob = 1.0 + return d, prob + + +def lmannwhitneyu(x,y): + """ +Calculates a Mann-Whitney U statistic on the provided scores and +returns the result. Use only when the n in each condition is < 20 and +you have 2 independent samples of ranks. NOTE: Mann-Whitney U is +significant if the u-obtained is LESS THAN or equal to the critical +value of U found in the tables. Equivalent to Kruskal-Wallis H with +just 2 groups. + +Usage: lmannwhitneyu(data) +Returns: u-statistic, one-tailed p-value (i.e., p(z(U))) +""" + n1 = len(x) + n2 = len(y) + ranked = rankdata(x+y) + rankx = ranked[0:n1] # get the x-ranks + ranky = ranked[n1:] # the rest are y-ranks + u1 = n1*n2 + (n1*(n1+1))/2.0 - sum(rankx) # calc U for x + u2 = n1*n2 - u1 # remainder is U for y + bigu = max(u1,u2) + smallu = min(u1,u2) + T = math.sqrt(tiecorrect(ranked)) # correction factor for tied scores + if T == 0: + raise ValueError, 'All numbers are identical in lmannwhitneyu' + sd = math.sqrt(T*n1*n2*(n1+n2+1)/12.0) + z = abs((bigu-n1*n2/2.0) / sd) # normal approximation for prob calc + return smallu, 1.0 - zprob(z) + + +def ltiecorrect(rankvals): + """ +Corrects for ties in Mann Whitney U and Kruskal Wallis H tests. See +Siegel, S. (1956) Nonparametric Statistics for the Behavioral Sciences. +New York: McGraw-Hill. Code adapted from |Stat rankind.c code. + +Usage: ltiecorrect(rankvals) +Returns: T correction factor for U or H +""" + sorted,posn = shellsort(rankvals) + n = len(sorted) + T = 0.0 + i = 0 + while (i 20 and you +have 2 independent samples of ranks. + +Usage: lranksums(x,y) +Returns: a z-statistic, two-tailed p-value +""" + n1 = len(x) + n2 = len(y) + alldata = x+y + ranked = rankdata(alldata) + x = ranked[:n1] + y = ranked[n1:] + s = sum(x) + expected = n1*(n1+n2+1) / 2.0 + z = (s - expected) / math.sqrt(n1*n2*(n1+n2+1)/12.0) + prob = 2*(1.0 -zprob(abs(z))) + return z, prob + + +def lwilcoxont(x,y): + """ +Calculates the Wilcoxon T-test for related samples and returns the +result. A non-parametric T-test. + +Usage: lwilcoxont(x,y) +Returns: a t-statistic, two-tail probability estimate +""" + if len(x) <> len(y): + raise ValueError, 'Unequal N in wilcoxont. Aborting.' + d=[] + for i in range(len(x)): + diff = x[i] - y[i] + if diff <> 0: + d.append(diff) + count = len(d) + absd = map(abs,d) + absranked = rankdata(absd) + r_plus = 0.0 + r_minus = 0.0 + for i in range(len(absd)): + if d[i] < 0: + r_minus = r_minus + absranked[i] + else: + r_plus = r_plus + absranked[i] + wt = min(r_plus, r_minus) + mn = count * (count+1) * 0.25 + se = math.sqrt(count*(count+1)*(2.0*count+1.0)/24.0) + z = math.fabs(wt-mn) / se + prob = 2*(1.0 -zprob(abs(z))) + return wt, prob + + +def lkruskalwallish(*args): + """ +The Kruskal-Wallis H-test is a non-parametric ANOVA for 3 or more +groups, requiring at least 5 subjects in each group. This function +calculates the Kruskal-Wallis H-test for 3 or more independent samples +and returns the result. + +Usage: lkruskalwallish(*args) +Returns: H-statistic (corrected for ties), associated p-value +""" + args = list(args) + n = [0]*len(args) + all = [] + n = map(len,args) + for i in range(len(args)): + all = all + args[i] + ranked = rankdata(all) + T = tiecorrect(ranked) + for i in range(len(args)): + args[i] = ranked[0:n[i]] + del ranked[0:n[i]] + rsums = [] + for i in range(len(args)): + rsums.append(sum(args[i])**2) + rsums[i] = rsums[i] / float(n[i]) + ssbn = sum(rsums) + totaln = sum(n) + h = 12.0 / (totaln*(totaln+1)) * ssbn - 3*(totaln+1) + df = len(args) - 1 + if T == 0: + raise ValueError, 'All numbers are identical in lkruskalwallish' + h = h / float(T) + return h, chisqprob(h,df) + + +def lfriedmanchisquare(*args): + """ +Friedman Chi-Square is a non-parametric, one-way within-subjects +ANOVA. This function calculates the Friedman Chi-square test for repeated +measures and returns the result, along with the associated probability +value. It assumes 3 or more repeated measures. Only 3 levels requires a +minimum of 10 subjects in the study. Four levels requires 5 subjects per +level(??). + +Usage: lfriedmanchisquare(*args) +Returns: chi-square statistic, associated p-value +""" + k = len(args) + if k < 3: + raise ValueError, 'Less than 3 levels. Friedman test not appropriate.' + n = len(args[0]) + data = apply(pstat.abut,tuple(args)) + for i in range(len(data)): + data[i] = rankdata(data[i]) + ssbn = 0 + for i in range(k): + ssbn = ssbn + sum(args[i])**2 + chisq = 12.0 / (k*n*(k+1)) * ssbn - 3*n*(k+1) + return chisq, chisqprob(chisq,k-1) + + +#################################### +#### PROBABILITY CALCULATIONS #### +#################################### + +def lchisqprob(chisq,df): + """ +Returns the (1-tailed) probability value associated with the provided +chi-square value and df. Adapted from chisq.c in Gary Perlman's |Stat. + +Usage: lchisqprob(chisq,df) +""" + BIG = 20.0 + def ex(x): + BIG = 20.0 + if x < -BIG: + return 0.0 + else: + return math.exp(x) + + if chisq <=0 or df < 1: + return 1.0 + a = 0.5 * chisq + if df%2 == 0: + even = 1 + else: + even = 0 + if df > 1: + y = ex(-a) + if even: + s = y + else: + s = 2.0 * zprob(-math.sqrt(chisq)) + if (df > 2): + chisq = 0.5 * (df - 1.0) + if even: + z = 1.0 + else: + z = 0.5 + if a > BIG: + if even: + e = 0.0 + else: + e = math.log(math.sqrt(math.pi)) + c = math.log(a) + while (z <= chisq): + e = math.log(z) + e + s = s + ex(c*z-a-e) + z = z + 1.0 + return s + else: + if even: + e = 1.0 + else: + e = 1.0 / math.sqrt(math.pi) / math.sqrt(a) + c = 0.0 + while (z <= chisq): + e = e * (a/float(z)) + c = c + e + z = z + 1.0 + return (c*y+s) + else: + return s + + +def lerfcc(x): + """ +Returns the complementary error function erfc(x) with fractional +error everywhere less than 1.2e-7. Adapted from Numerical Recipies. + +Usage: lerfcc(x) +""" + z = abs(x) + t = 1.0 / (1.0+0.5*z) + ans = t * math.exp(-z*z-1.26551223 + t*(1.00002368+t*(0.37409196+t*(0.09678418+t*(-0.18628806+t*(0.27886807+t*(-1.13520398+t*(1.48851587+t*(-0.82215223+t*0.17087277))))))))) + if x >= 0: + return ans + else: + return 2.0 - ans + + +def lzprob(z): + """ +Returns the area under the normal curve 'to the left of' the given z value. +Thus, + for z<0, zprob(z) = 1-tail probability + for z>0, 1.0-zprob(z) = 1-tail probability + for any z, 2.0*(1.0-zprob(abs(z))) = 2-tail probability +Adapted from z.c in Gary Perlman's |Stat. + +Usage: lzprob(z) +""" + Z_MAX = 6.0 # maximum meaningful z-value + if z == 0.0: + x = 0.0 + else: + y = 0.5 * math.fabs(z) + if y >= (Z_MAX*0.5): + x = 1.0 + elif (y < 1.0): + w = y*y + x = ((((((((0.000124818987 * w + -0.001075204047) * w +0.005198775019) * w + -0.019198292004) * w +0.059054035642) * w + -0.151968751364) * w +0.319152932694) * w + -0.531923007300) * w +0.797884560593) * y * 2.0 + else: + y = y - 2.0 + x = (((((((((((((-0.000045255659 * y + +0.000152529290) * y -0.000019538132) * y + -0.000676904986) * y +0.001390604284) * y + -0.000794620820) * y -0.002034254874) * y + +0.006549791214) * y -0.010557625006) * y + +0.011630447319) * y -0.009279453341) * y + +0.005353579108) * y -0.002141268741) * y + +0.000535310849) * y +0.999936657524 + if z > 0.0: + prob = ((x+1.0)*0.5) + else: + prob = ((1.0-x)*0.5) + return prob + + +def lksprob(alam): + """ +Computes a Kolmolgorov-Smirnov t-test significance level. Adapted from +Numerical Recipies. + +Usage: lksprob(alam) +""" + fac = 2.0 + sum = 0.0 + termbf = 0.0 + a2 = -2.0*alam*alam + for j in range(1,201): + term = fac*math.exp(a2*j*j) + sum = sum + term + if math.fabs(term) <= (0.001*termbf) or math.fabs(term) < (1.0e-8*sum): + return sum + fac = -fac + termbf = math.fabs(term) + return 1.0 # Get here only if fails to converge; was 0.0!! + + +def lfprob (dfnum, dfden, F): + """ +Returns the (1-tailed) significance level (p-value) of an F +statistic given the degrees of freedom for the numerator (dfR-dfF) and +the degrees of freedom for the denominator (dfF). + +Usage: lfprob(dfnum, dfden, F) where usually dfnum=dfbn, dfden=dfwn +""" + p = betai(0.5*dfden, 0.5*dfnum, dfden/float(dfden+dfnum*F)) + return p + + +def lbetacf(a,b,x): + """ +This function evaluates the continued fraction form of the incomplete +Beta function, betai. (Adapted from: Numerical Recipies in C.) + +Usage: lbetacf(a,b,x) +""" + ITMAX = 200 + EPS = 3.0e-7 + + bm = az = am = 1.0 + qab = a+b + qap = a+1.0 + qam = a-1.0 + bz = 1.0-qab*x/qap + for i in range(ITMAX+1): + em = float(i+1) + tem = em + em + d = em*(b-em)*x/((qam+tem)*(a+tem)) + ap = az + d*am + bp = bz+d*bm + d = -(a+em)*(qab+em)*x/((qap+tem)*(a+tem)) + app = ap+d*az + bpp = bp+d*bz + aold = az + am = ap/bpp + bm = bp/bpp + az = app/bpp + bz = 1.0 + if (abs(az-aold)<(EPS*abs(az))): + return az + print 'a or b too big, or ITMAX too small in Betacf.' + + +def lgammln(xx): + """ +Returns the gamma function of xx. + Gamma(z) = Integral(0,infinity) of t^(z-1)exp(-t) dt. +(Adapted from: Numerical Recipies in C.) + +Usage: lgammln(xx) +""" + + coeff = [76.18009173, -86.50532033, 24.01409822, -1.231739516, + 0.120858003e-2, -0.536382e-5] + x = xx - 1.0 + tmp = x + 5.5 + tmp = tmp - (x+0.5)*math.log(tmp) + ser = 1.0 + for j in range(len(coeff)): + x = x + 1 + ser = ser + coeff[j]/x + return -tmp + math.log(2.50662827465*ser) + + +def lbetai(a,b,x): + """ +Returns the incomplete beta function: + + I-sub-x(a,b) = 1/B(a,b)*(Integral(0,x) of t^(a-1)(1-t)^(b-1) dt) + +where a,b>0 and B(a,b) = G(a)*G(b)/(G(a+b)) where G(a) is the gamma +function of a. The continued fraction formulation is implemented here, +using the betacf function. (Adapted from: Numerical Recipies in C.) + +Usage: lbetai(a,b,x) +""" + if (x<0.0 or x>1.0): + raise ValueError, 'Bad x in lbetai' + if (x==0.0 or x==1.0): + bt = 0.0 + else: + bt = math.exp(gammln(a+b)-gammln(a)-gammln(b)+a*math.log(x)+b* + math.log(1.0-x)) + if (x<(a+1.0)/(a+b+2.0)): + return bt*betacf(a,b,x)/float(a) + else: + return 1.0-bt*betacf(b,a,1.0-x)/float(b) + + +#################################### +####### ANOVA CALCULATIONS ####### +#################################### + +def lF_oneway(*lists): + """ +Performs a 1-way ANOVA, returning an F-value and probability given +any number of groups. From Heiman, pp.394-7. + +Usage: F_oneway(*lists) where *lists is any number of lists, one per + treatment group +Returns: F value, one-tailed p-value +""" + a = len(lists) # ANOVA on 'a' groups, each in it's own list + means = [0]*a + vars = [0]*a + ns = [0]*a + alldata = [] + tmp = map(N.array,lists) + means = map(amean,tmp) + vars = map(avar,tmp) + ns = map(len,lists) + for i in range(len(lists)): + alldata = alldata + lists[i] + alldata = N.array(alldata) + bign = len(alldata) + sstot = ass(alldata)-(asquare_of_sums(alldata)/float(bign)) + ssbn = 0 + for list in lists: + ssbn = ssbn + asquare_of_sums(N.array(list))/float(len(list)) + ssbn = ssbn - (asquare_of_sums(alldata)/float(bign)) + sswn = sstot-ssbn + dfbn = a-1 + dfwn = bign - a + msb = ssbn/float(dfbn) + msw = sswn/float(dfwn) + f = msb/msw + prob = fprob(dfbn,dfwn,f) + return f, prob + + +def lF_value (ER,EF,dfnum,dfden): + """ +Returns an F-statistic given the following: + ER = error associated with the null hypothesis (the Restricted model) + EF = error associated with the alternate hypothesis (the Full model) + dfR-dfF = degrees of freedom of the numerator + dfF = degrees of freedom associated with the denominator/Full model + +Usage: lF_value(ER,EF,dfnum,dfden) +""" + return ((ER-EF)/float(dfnum) / (EF/float(dfden))) + + +#################################### +######## SUPPORT FUNCTIONS ####### +#################################### + +def writecc (listoflists,file,writetype='w',extra=2): + """ +Writes a list of lists to a file in columns, customized by the max +size of items within the columns (max size of items in col, +2 characters) +to specified file. File-overwrite is the default. + +Usage: writecc (listoflists,file,writetype='w',extra=2) +Returns: None +""" + if type(listoflists[0]) not in [ListType,TupleType]: + listoflists = [listoflists] + outfile = open(file,writetype) + rowstokill = [] + list2print = copy.deepcopy(listoflists) + for i in range(len(listoflists)): + if listoflists[i] == ['\n'] or listoflists[i]=='\n' or listoflists[i]=='dashes': + rowstokill = rowstokill + [i] + rowstokill.reverse() + for row in rowstokill: + del list2print[row] + maxsize = [0]*len(list2print[0]) + for col in range(len(list2print[0])): + items = pstat.colex(list2print,col) + items = map(pstat.makestr,items) + maxsize[col] = max(map(len,items)) + extra + for row in listoflists: + if row == ['\n'] or row == '\n': + outfile.write('\n') + elif row == ['dashes'] or row == 'dashes': + dashes = [0]*len(maxsize) + for j in range(len(maxsize)): + dashes[j] = '-'*(maxsize[j]-2) + outfile.write(pstat.lineincustcols(dashes,maxsize)) + else: + outfile.write(pstat.lineincustcols(row,maxsize)) + outfile.write('\n') + outfile.close() + return None + + +def lincr(l,cap): # to increment a list up to a max-list of 'cap' + """ +Simulate a counting system from an n-dimensional list. + +Usage: lincr(l,cap) l=list to increment, cap=max values for each list pos'n +Returns: next set of values for list l, OR -1 (if overflow) +""" + l[0] = l[0] + 1 # e.g., [0,0,0] --> [2,4,3] (=cap) + for i in range(len(l)): + if l[i] > cap[i] and i < len(l)-1: # if carryover AND not done + l[i] = 0 + l[i+1] = l[i+1] + 1 + elif l[i] > cap[i] and i == len(l)-1: # overflow past last column, must be finished + l = -1 + return l + + +def lsum (inlist): + """ +Returns the sum of the items in the passed list. + +Usage: lsum(inlist) +""" + s = 0 + for item in inlist: + s = s + item + return s + + +def lcumsum (inlist): + """ +Returns a list consisting of the cumulative sum of the items in the +passed list. + +Usage: lcumsum(inlist) +""" + newlist = copy.deepcopy(inlist) + for i in range(1,len(newlist)): + newlist[i] = newlist[i] + newlist[i-1] + return newlist + + +def lss(inlist): + """ +Squares each value in the passed list, adds up these squares and +returns the result. + +Usage: lss(inlist) +""" + ss = 0 + for item in inlist: + ss = ss + item*item + return ss + + +def lsummult (list1,list2): + """ +Multiplies elements in list1 and list2, element by element, and +returns the sum of all resulting multiplications. Must provide equal +length lists. + +Usage: lsummult(list1,list2) +""" + if len(list1) <> len(list2): + raise ValueError, "Lists not equal length in summult." + s = 0 + for item1,item2 in pstat.abut(list1,list2): + s = s + item1*item2 + return s + + +def lsumdiffsquared(x,y): + """ +Takes pairwise differences of the values in lists x and y, squares +these differences, and returns the sum of these squares. + +Usage: lsumdiffsquared(x,y) +Returns: sum[(x[i]-y[i])**2] +""" + sds = 0 + for i in range(len(x)): + sds = sds + (x[i]-y[i])**2 + return sds + + +def lsquare_of_sums(inlist): + """ +Adds the values in the passed list, squares the sum, and returns +the result. + +Usage: lsquare_of_sums(inlist) +Returns: sum(inlist[i])**2 +""" + s = sum(inlist) + return float(s)*s + + +def lshellsort(inlist): + """ +Shellsort algorithm. Sorts a 1D-list. + +Usage: lshellsort(inlist) +Returns: sorted-inlist, sorting-index-vector (for original list) +""" + n = len(inlist) + svec = copy.deepcopy(inlist) + ivec = range(n) + gap = n/2 # integer division needed + while gap >0: + for i in range(gap,n): + for j in range(i-gap,-1,-gap): + while j>=0 and svec[j]>svec[j+gap]: + temp = svec[j] + svec[j] = svec[j+gap] + svec[j+gap] = temp + itemp = ivec[j] + ivec[j] = ivec[j+gap] + ivec[j+gap] = itemp + gap = gap / 2 # integer division needed +# svec is now sorted inlist, and ivec has the order svec[i] = vec[ivec[i]] + return svec, ivec + + +def lrankdata(inlist): + """ +Ranks the data in inlist, dealing with ties appropritely. Assumes +a 1D inlist. Adapted from Gary Perlman's |Stat ranksort. + +Usage: lrankdata(inlist) +Returns: a list of length equal to inlist, containing rank scores +""" + n = len(inlist) + svec, ivec = shellsort(inlist) + sumranks = 0 + dupcount = 0 + newlist = [0]*n + for i in range(n): + sumranks = sumranks + i + dupcount = dupcount + 1 + if i==n-1 or svec[i] <> svec[i+1]: + averank = sumranks / float(dupcount) + 1 + for j in range(i-dupcount+1,i+1): + newlist[ivec[j]] = averank + sumranks = 0 + dupcount = 0 + return newlist + + +def outputpairedstats(fname,writemode,name1,n1,m1,se1,min1,max1,name2,n2,m2,se2,min2,max2,statname,stat,prob): + """ +Prints or write to a file stats for two groups, using the name, n, +mean, sterr, min and max for each group, as well as the statistic name, +its value, and the associated p-value. + +Usage: outputpairedstats(fname,writemode, + name1,n1,mean1,stderr1,min1,max1, + name2,n2,mean2,stderr2,min2,max2, + statname,stat,prob) +Returns: None +""" + suffix = '' # for *s after the p-value + try: + x = prob.shape + prob = prob[0] + except: + pass + if prob < 0.001: suffix = ' ***' + elif prob < 0.01: suffix = ' **' + elif prob < 0.05: suffix = ' *' + title = [['Name','N','Mean','SD','Min','Max']] + lofl = title+[[name1,n1,round(m1,3),round(math.sqrt(se1),3),min1,max1], + [name2,n2,round(m2,3),round(math.sqrt(se2),3),min2,max2]] + if type(fname)<>StringType or len(fname)==0: + print + print statname + print + pstat.printcc(lofl) + print + try: + if stat.shape == (): + stat = stat[0] + if prob.shape == (): + prob = prob[0] + except: + pass + print 'Test statistic = ',round(stat,3),' p = ',round(prob,3),suffix + print + else: + file = open(fname,writemode) + file.write('\n'+statname+'\n\n') + file.close() + writecc(lofl,fname,'a') + file = open(fname,'a') + try: + if stat.shape == (): + stat = stat[0] + if prob.shape == (): + prob = prob[0] + except: + pass + file.write(pstat.list2string(['\nTest statistic = ',round(stat,4),' p = ',round(prob,4),suffix,'\n\n'])) + file.close() + return None + + +def lfindwithin (data): + """ +Returns an integer representing a binary vector, where 1=within- +subject factor, 0=between. Input equals the entire data 2D list (i.e., +column 0=random factor, column -1=measured values (those two are skipped). +Note: input data is in |Stat format ... a list of lists ("2D list") with +one row per measured value, first column=subject identifier, last column= +score, one in-between column per factor (these columns contain level +designations on each factor). See also stats.anova.__doc__. + +Usage: lfindwithin(data) data in |Stat format +""" + + numfact = len(data[0])-1 + withinvec = 0 + for col in range(1,numfact): + examplelevel = pstat.unique(pstat.colex(data,col))[0] + rows = pstat.linexand(data,col,examplelevel) # get 1 level of this factor + factsubjs = pstat.unique(pstat.colex(rows,0)) + allsubjs = pstat.unique(pstat.colex(data,0)) + if len(factsubjs) == len(allsubjs): # fewer Ss than scores on this factor? + withinvec = withinvec + (1 << col) + return withinvec + + +######################################################### +######################################################### +####### DISPATCH LISTS AND TUPLES TO ABOVE FCNS ######### +######################################################### +######################################################### + +## CENTRAL TENDENCY: +geometricmean = Dispatch ( (lgeometricmean, (ListType, TupleType)), ) +harmonicmean = Dispatch ( (lharmonicmean, (ListType, TupleType)), ) +mean = Dispatch ( (lmean, (ListType, TupleType)), ) +median = Dispatch ( (lmedian, (ListType, TupleType)), ) +medianscore = Dispatch ( (lmedianscore, (ListType, TupleType)), ) +mode = Dispatch ( (lmode, (ListType, TupleType)), ) + +## MOMENTS: +moment = Dispatch ( (lmoment, (ListType, TupleType)), ) +variation = Dispatch ( (lvariation, (ListType, TupleType)), ) +skew = Dispatch ( (lskew, (ListType, TupleType)), ) +kurtosis = Dispatch ( (lkurtosis, (ListType, TupleType)), ) +describe = Dispatch ( (ldescribe, (ListType, TupleType)), ) + +## FREQUENCY STATISTICS: +itemfreq = Dispatch ( (litemfreq, (ListType, TupleType)), ) +scoreatpercentile = Dispatch ( (lscoreatpercentile, (ListType, TupleType)), ) +percentileofscore = Dispatch ( (lpercentileofscore, (ListType, TupleType)), ) +histogram = Dispatch ( (lhistogram, (ListType, TupleType)), ) +cumfreq = Dispatch ( (lcumfreq, (ListType, TupleType)), ) +relfreq = Dispatch ( (lrelfreq, (ListType, TupleType)), ) + +## VARIABILITY: +obrientransform = Dispatch ( (lobrientransform, (ListType, TupleType)), ) +samplevar = Dispatch ( (lsamplevar, (ListType, TupleType)), ) +samplestdev = Dispatch ( (lsamplestdev, (ListType, TupleType)), ) +var = Dispatch ( (lvar, (ListType, TupleType)), ) +stdev = Dispatch ( (lstdev, (ListType, TupleType)), ) +sterr = Dispatch ( (lsterr, (ListType, TupleType)), ) +sem = Dispatch ( (lsem, (ListType, TupleType)), ) +z = Dispatch ( (lz, (ListType, TupleType)), ) +zs = Dispatch ( (lzs, (ListType, TupleType)), ) + +## TRIMMING FCNS: +trimboth = Dispatch ( (ltrimboth, (ListType, TupleType)), ) +trim1 = Dispatch ( (ltrim1, (ListType, TupleType)), ) + +## CORRELATION FCNS: +paired = Dispatch ( (lpaired, (ListType, TupleType)), ) +pearsonr = Dispatch ( (lpearsonr, (ListType, TupleType)), ) +spearmanr = Dispatch ( (lspearmanr, (ListType, TupleType)), ) +pointbiserialr = Dispatch ( (lpointbiserialr, (ListType, TupleType)), ) +kendalltau = Dispatch ( (lkendalltau, (ListType, TupleType)), ) +linregress = Dispatch ( (llinregress, (ListType, TupleType)), ) + +## INFERENTIAL STATS: +ttest_1samp = Dispatch ( (lttest_1samp, (ListType, TupleType)), ) +ttest_ind = Dispatch ( (lttest_ind, (ListType, TupleType)), ) +ttest_rel = Dispatch ( (lttest_rel, (ListType, TupleType)), ) +chisquare = Dispatch ( (lchisquare, (ListType, TupleType)), ) +ks_2samp = Dispatch ( (lks_2samp, (ListType, TupleType)), ) +mannwhitneyu = Dispatch ( (lmannwhitneyu, (ListType, TupleType)), ) +ranksums = Dispatch ( (lranksums, (ListType, TupleType)), ) +tiecorrect = Dispatch ( (ltiecorrect, (ListType, TupleType)), ) +wilcoxont = Dispatch ( (lwilcoxont, (ListType, TupleType)), ) +kruskalwallish = Dispatch ( (lkruskalwallish, (ListType, TupleType)), ) +friedmanchisquare = Dispatch ( (lfriedmanchisquare, (ListType, TupleType)), ) + +## PROBABILITY CALCS: +chisqprob = Dispatch ( (lchisqprob, (IntType, FloatType)), ) +zprob = Dispatch ( (lzprob, (IntType, FloatType)), ) +ksprob = Dispatch ( (lksprob, (IntType, FloatType)), ) +fprob = Dispatch ( (lfprob, (IntType, FloatType)), ) +betacf = Dispatch ( (lbetacf, (IntType, FloatType)), ) +betai = Dispatch ( (lbetai, (IntType, FloatType)), ) +erfcc = Dispatch ( (lerfcc, (IntType, FloatType)), ) +gammln = Dispatch ( (lgammln, (IntType, FloatType)), ) + +## ANOVA FUNCTIONS: +F_oneway = Dispatch ( (lF_oneway, (ListType, TupleType)), ) +F_value = Dispatch ( (lF_value, (ListType, TupleType)), ) + +## SUPPORT FUNCTIONS: +incr = Dispatch ( (lincr, (ListType, TupleType)), ) +sum = Dispatch ( (lsum, (ListType, TupleType)), ) +cumsum = Dispatch ( (lcumsum, (ListType, TupleType)), ) +ss = Dispatch ( (lss, (ListType, TupleType)), ) +summult = Dispatch ( (lsummult, (ListType, TupleType)), ) +square_of_sums = Dispatch ( (lsquare_of_sums, (ListType, TupleType)), ) +sumdiffsquared = Dispatch ( (lsumdiffsquared, (ListType, TupleType)), ) +shellsort = Dispatch ( (lshellsort, (ListType, TupleType)), ) +rankdata = Dispatch ( (lrankdata, (ListType, TupleType)), ) +findwithin = Dispatch ( (lfindwithin, (ListType, TupleTypetry: # DEFINE THESE *ONLY* IF NUMERIC IS AVAILABLE + import Numeric + N = Numeric + import LinearAlgebra + LA = LinearAlgebra + + +##################################### +######## ACENTRAL TENDENCY ######## +##################################### + + def ageometricmean (inarray,dimension=None,keepdims=0): + """ +Calculates the geometric mean of the values in the passed array. +That is: n-th root of (x1 * x2 * ... * xn). Defaults to ALL values in +the passed array. Use dimension=None to flatten array first. REMEMBER: if +dimension=0, it collapses over dimension 0 ('rows' in a 2D array) only, and +if dimension is a sequence, it collapses over all specified dimensions. If +keepdims is set to 1, the resulting array will have as many dimensions as +inarray, with only 1 'level' per dim that was collapsed over. + +Usage: ageometricmean(inarray,dimension=None,keepdims=0) +Returns: geometric mean computed over dim(s) listed in dimension +""" + inarray = N.array(inarray,N.Float) + if dimension == None: + inarray = N.ravel(inarray) + size = len(inarray) + mult = N.power(inarray,1.0/size) + mult = N.multiply.reduce(mult) + elif type(dimension) in [IntType,FloatType]: + size = inarray.shape[dimension] + mult = N.power(inarray,1.0/size) + mult = N.multiply.reduce(mult,dimension) + if keepdims == 1: + shp = list(inarray.shape) + shp[dimension] = 1 + sum = N.reshape(sum,shp) + else: # must be a SEQUENCE of dims to average over + dims = list(dimension) + dims.sort() + dims.reverse() + size = N.array(N.multiply.reduce(N.take(inarray.shape,dims)),N.Float) + mult = N.power(inarray,1.0/size) + for dim in dims: + mult = N.multiply.reduce(mult,dim) + if keepdims == 1: + shp = list(inarray.shape) + for dim in dims: + shp[dim] = 1 + mult = N.reshape(mult,shp) + return mult + + + def aharmonicmean (inarray,dimension=None,keepdims=0): + """ +Calculates the harmonic mean of the values in the passed array. +That is: n / (1/x1 + 1/x2 + ... + 1/xn). Defaults to ALL values in +the passed array. Use dimension=None to flatten array first. REMEMBER: if +dimension=0, it collapses over dimension 0 ('rows' in a 2D array) only, and +if dimension is a sequence, it collapses over all specified dimensions. If +keepdims is set to 1, the resulting array will have as many dimensions as +inarray, with only 1 'level' per dim that was collapsed over. + +Usage: aharmonicmean(inarray,dimension=None,keepdims=0) +Returns: harmonic mean computed over dim(s) in dimension +""" + inarray = inarray.astype(N.Float) + if dimension == None: + inarray = N.ravel(inarray) + size = len(inarray) + s = N.add.reduce(1.0 / inarray) + elif type(dimension) in [IntType,FloatType]: + size = float(inarray.shape[dimension]) + s = N.add.reduce(1.0/inarray, dimension) + if keepdims == 1: + shp = list(inarray.shape) + shp[dimension] = 1 + s = N.reshape(s,shp) + else: # must be a SEQUENCE of dims to average over + dims = list(dimension) + dims.sort() + nondims = [] + for i in range(len(inarray.shape)): + if i not in dims: + nondims.append(i) + tinarray = N.transpose(inarray,nondims+dims) # put keep-dims first + idx = [0] *len(nondims) + if idx == []: + size = len(N.ravel(inarray)) + s = asum(1.0 / inarray) + if keepdims == 1: + s = N.reshape([s],N.ones(len(inarray.shape))) + else: + idx[0] = -1 + loopcap = N.array(tinarray.shape[0:len(nondims)]) -1 + s = N.zeros(loopcap+1,N.Float) + while incr(idx,loopcap) <> -1: + s[idx] = asum(1.0/tinarray[idx]) + size = N.multiply.reduce(N.take(inarray.shape,dims)) + if keepdims == 1: + shp = list(inarray.shape) + for dim in dims: + shp[dim] = 1 + s = N.reshape(s,shp) + return size / s + + + def amean (inarray,dimension=None,keepdims=0): + """ +Calculates the arithmatic mean of the values in the passed array. +That is: 1/n * (x1 + x2 + ... + xn). Defaults to ALL values in the +passed array. Use dimension=None to flatten array first. REMEMBER: if +dimension=0, it collapses over dimension 0 ('rows' in a 2D array) only, and +if dimension is a sequence, it collapses over all specified dimensions. If +keepdims is set to 1, the resulting array will have as many dimensions as +inarray, with only 1 'level' per dim that was collapsed over. + +Usage: amean(inarray,dimension=None,keepdims=0) +Returns: arithematic mean calculated over dim(s) in dimension +""" + if inarray.typecode() in ['l','s','b']: + inarray = inarray.astype(N.Float) + if dimension == None: + inarray = N.ravel(inarray) + sum = N.add.reduce(inarray) + denom = float(len(inarray)) + elif type(dimension) in [IntType,FloatType]: + sum = asum(inarray,dimension) + denom = float(inarray.shape[dimension]) + if keepdims == 1: + shp = list(inarray.shape) + shp[dimension] = 1 + sum = N.reshape(sum,shp) + else: # must be a TUPLE of dims to average over + dims = list(dimension) + dims.sort() + dims.reverse() + sum = inarray *1.0 + for dim in dims: + sum = N.add.reduce(sum,dim) + denom = N.array(N.multiply.reduce(N.take(inarray.shape,dims)),N.Float) + if keepdims == 1: + shp = list(inarray.shape) + for dim in dims: + shp[dim] = 1 + sum = N.reshape(sum,shp) + return sum/denom + + + def amedian (inarray,numbins=1000): + """ +Calculates the COMPUTED median value of an array of numbers, given the +number of bins to use for the histogram (more bins approaches finding the +precise median value of the array; default number of bins = 1000). From +G.W. Heiman's Basic Stats, or CRC Probability & Statistics. +NOTE: THIS ROUTINE ALWAYS uses the entire passed array (flattens it first). + +Usage: amedian(inarray,numbins=1000) +Returns: median calculated over ALL values in inarray +""" + inarray = N.ravel(inarray) + (hist, smallest, binsize, extras) = ahistogram(inarray,numbins) + cumhist = N.cumsum(hist) # make cumulative histogram + otherbins = N.greater_equal(cumhist,len(inarray)/2.0) + otherbins = list(otherbins) # list of 0/1s, 1s start at median bin + cfbin = otherbins.index(1) # get 1st(!) index holding 50%ile score + LRL = smallest + binsize*cfbin # get lower read limit of that bin + cfbelow = N.add.reduce(hist[0:cfbin]) # cum. freq. below bin + freq = hist[cfbin] # frequency IN the 50%ile bin + median = LRL + ((len(inarray)/2.0-cfbelow)/float(freq))*binsize # MEDIAN + return median + + + def amedianscore (inarray,dimension=None): + """ +Returns the 'middle' score of the passed array. If there is an even +number of scores, the mean of the 2 middle scores is returned. Can function +with 1D arrays, or on the FIRST dimension of 2D arrays (i.e., dimension can +be None, to pre-flatten the array, or else dimension must equal 0). + +Usage: amedianscore(inarray,dimension=None) +Returns: 'middle' score of the array, or the mean of the 2 middle scores +""" + if dimension == None: + inarray = N.ravel(inarray) + dimension = 0 + inarray = N.sort(inarray,dimension) + if inarray.shape[dimension] % 2 == 0: # if even number of elements + indx = inarray.shape[dimension]/2 # integer division correct + median = N.asarray(inarray[indx]+inarray[indx-1]) / 2.0 + else: + indx = inarray.shape[dimension] / 2 # integer division correct + median = N.take(inarray,[indx],dimension) + if median.shape == (1,): + median = median[0] + return median + + + def amode(a, dimension=None): + """ +Returns an array of the modal (most common) score in the passed array. +If there is more than one such score, ONLY THE FIRST is returned. +The bin-count for the modal values is also returned. Operates on whole +array (dimension=None), or on a given dimension. + +Usage: amode(a, dimension=None) +Returns: array of bin-counts for mode(s), array of corresponding modal values +""" + + if dimension == None: + a = N.ravel(a) + dimension = 0 + scores = pstat.aunique(N.ravel(a)) # get ALL unique values + testshape = list(a.shape) + testshape[dimension] = 1 + oldmostfreq = N.zeros(testshape) + oldcounts = N.zeros(testshape) + for score in scores: + template = N.equal(a,score) + counts = asum(template,dimension,1) + mostfrequent = N.where(N.greater(counts,oldcounts),score,oldmostfreq) + oldcounts = N.where(N.greater(counts,oldcounts),counts,oldcounts) + oldmostfreq = mostfrequent + return oldcounts, mostfrequent + + + def atmean(a,limits=None,inclusive=(1,1)): + """ +Returns the arithmetic mean of all values in an array, ignoring values +strictly outside the sequence passed to 'limits'. Note: either limit +in the sequence, or the value of limits itself, can be set to None. The +inclusive list/tuple determines whether the lower and upper limiting bounds +(respectively) are open/exclusive (0) or closed/inclusive (1). + +Usage: atmean(a,limits=None,inclusive=(1,1)) +""" + if a.typecode() in ['l','s','b']: + a = a.astype(N.Float) + if limits == None: + return mean(a) + assert type(limits) in [ListType,TupleType,N.ArrayType], "Wrong type for limits in atmean" + if inclusive[0]: lowerfcn = N.greater_equal + else: lowerfcn = N.greater + if inclusive[1]: upperfcn = N.less_equal + else: upperfcn = N.less + if limits[0] > N.maximum.reduce(N.ravel(a)) or limits[1] < N.minimum.reduce(N.ravel(a)): + raise ValueError, "No array values within given limits (atmean)." + elif limits[0]==None and limits[1]<>None: + mask = upperfcn(a,limits[1]) + elif limits[0]<>None and limits[1]==None: + mask = lowerfcn(a,limits[0]) + elif limits[0]<>None and limits[1]<>None: + mask = lowerfcn(a,limits[0])*upperfcn(a,limits[1]) + s = float(N.add.reduce(N.ravel(a*mask))) + n = float(N.add.reduce(N.ravel(mask))) + return s/n + + + def atvar(a,limits=None,inclusive=(1,1)): + """ +Returns the sample variance of values in an array, (i.e., using N-1), +ignoring values strictly outside the sequence passed to 'limits'. +Note: either limit in the sequence, or the value of limits itself, +can be set to None. The inclusive list/tuple determines whether the lower +and upper limiting bounds (respectively) are open/exclusive (0) or +closed/inclusive (1). + +Usage: atvar(a,limits=None,inclusive=(1,1)) +""" + a = a.astype(N.Float) + if limits == None or limits == [None,None]: + term1 = N.add.reduce(N.ravel(a*a)) + n = float(len(N.ravel(a))) - 1 + term2 = N.add.reduce(N.ravel(a))**2 / n + print term1, term2, n + return (term1 - term2) / n + assert type(limits) in [ListType,TupleType,N.ArrayType], "Wrong type for limits in atvar" + if inclusive[0]: lowerfcn = N.greater_equal + else: lowerfcn = N.greater + if inclusive[1]: upperfcn = N.less_equal + else: upperfcn = N.less + if limits[0] > N.maximum.reduce(N.ravel(a)) or limits[1] < N.minimum.reduce(N.ravel(a)): + raise ValueError, "No array values within given limits (atvar)." + elif limits[0]==None and limits[1]<>None: + mask = upperfcn(a,limits[1]) + elif limits[0]<>None and limits[1]==None: + mask = lowerfcn(a,limits[0]) + elif limits[0]<>None and limits[1]<>None: + mask = lowerfcn(a,limits[0])*upperfcn(a,limits[1]) + term1 = N.add.reduce(N.ravel(a*a*mask)) + n = float(N.add.reduce(N.ravel(mask))) - 1 + term2 = N.add.reduce(N.ravel(a*mask))**2 / n + print term1, term2, n + return (term1 - term2) / n + + + def atmin(a,lowerlimit=None,dimension=None,inclusive=1): + """ +Returns the minimum value of a, along dimension, including only values less +than (or equal to, if inclusive=1) lowerlimit. If the limit is set to None, +all values in the array are used. + +Usage: atmin(a,lowerlimit=None,dimension=None,inclusive=1) +""" + if inclusive: lowerfcn = N.greater + else: lowerfcn = N.greater_equal + if dimension == None: + a = N.ravel(a) + dimension = 0 + if lowerlimit == None: + lowerlimit = N.minimum.reduce(N.ravel(a))-11 + biggest = N.maximum.reduce(N.ravel(a)) + ta = N.where(lowerfcn(a,lowerlimit),a,biggest) + return N.minimum.reduce(ta,dimension) + + + def atmax(a,upperlimit,dimension=None,inclusive=1): + """ +Returns the maximum value of a, along dimension, including only values greater +than (or equal to, if inclusive=1) upperlimit. If the limit is set to None, +a limit larger than the max value in the array is used. + +Usage: atmax(a,upperlimit,dimension=None,inclusive=1) +""" + if inclusive: upperfcn = N.less + else: upperfcn = N.less_equal + if dimension == None: + a = N.ravel(a) + dimension = 0 + if upperlimit == None: + upperlimit = N.maximum.reduce(N.ravel(a))+1 + smallest = N.minimum.reduce(N.ravel(a)) + ta = N.where(upperfcn(a,upperlimit),a,smallest) + return N.maximum.reduce(ta,dimension) + + + def atstdev(a,limits=None,inclusive=(1,1)): + """ +Returns the standard deviation of all values in an array, ignoring values +strictly outside the sequence passed to 'limits'. Note: either limit +in the sequence, or the value of limits itself, can be set to None. The +inclusive list/tuple determines whether the lower and upper limiting bounds +(respectively) are open/exclusive (0) or closed/inclusive (1). + +Usage: atstdev(a,limits=None,inclusive=(1,1)) +""" + return N.sqrt(tvar(a,limits,inclusive)) + + + def atsem(a,limits=None,inclusive=(1,1)): + """ +Returns the standard error of the mean for the values in an array, +(i.e., using N for the denominator), ignoring values strictly outside +the sequence passed to 'limits'. Note: either limit in the sequence, +or the value of limits itself, can be set to None. The inclusive list/tuple +determines whether the lower and upper limiting bounds (respectively) are +open/exclusive (0) or closed/inclusive (1). + +Usage: atsem(a,limits=None,inclusive=(1,1)) +""" + sd = tstdev(a,limits,inclusive) + if limits == None or limits == [None,None]: + n = float(len(N.ravel(a))) + assert type(limits) in [ListType,TupleType,N.ArrayType], "Wrong type for limits in atsem" + if inclusive[0]: lowerfcn = N.greater_equal + else: lowerfcn = N.greater + if inclusive[1]: upperfcn = N.less_equal + else: upperfcn = N.less + if limits[0] > N.maximum.reduce(N.ravel(a)) or limits[1] < N.minimum.reduce(N.ravel(a)): + raise ValueError, "No array values within given limits (atsem)." + elif limits[0]==None and limits[1]<>None: + mask = upperfcn(a,limits[1]) + elif limits[0]<>None and limits[1]==None: + mask = lowerfcn(a,limits[0]) + elif limits[0]<>None and limits[1]<>None: + mask = lowerfcn(a,limits[0])*upperfcn(a,limits[1]) + term1 = N.add.reduce(N.ravel(a*a*mask)) + n = float(N.add.reduce(N.ravel(mask))) + return sd/math.sqrt(n) + + +##################################### +############ AMOMENTS ############# +##################################### + + def amoment(a,moment=1,dimension=None): + """ +Calculates the nth moment about the mean for a sample (defaults to the +1st moment). Generally used to calculate coefficients of skewness and +kurtosis. Dimension can equal None (ravel array first), an integer +(the dimension over which to operate), or a sequence (operate over +multiple dimensions). + +Usage: amoment(a,moment=1,dimension=None) +Returns: appropriate moment along given dimension +""" + if dimension == None: + a = N.ravel(a) + dimension = 0 + if moment == 1: + return 0.0 + else: + mn = amean(a,dimension,1) # 1=keepdims + s = N.power((a-mn),moment) + return amean(s,dimension) + + + def avariation(a,dimension=None): + """ +Returns the coefficient of variation, as defined in CRC Standard +Probability and Statistics, p.6. Dimension can equal None (ravel array +first), an integer (the dimension over which to operate), or a +sequence (operate over multiple dimensions). + +Usage: avariation(a,dimension=None) +""" + return 100.0*asamplestdev(a,dimension)/amean(a,dimension) + + + def askew(a,dimension=None): + """ +Returns the skewness of a distribution (normal ==> 0.0; >0 means extra +weight in left tail). Use askewtest() to see if it's close enough. +Dimension can equal None (ravel array first), an integer (the +dimension over which to operate), or a sequence (operate over multiple +dimensions). + +Usage: askew(a, dimension=None) +Returns: skew of vals in a along dimension, returning ZERO where all vals equal +""" + denom = N.power(amoment(a,2,dimension),1.5) + zero = N.equal(denom,0) + if type(denom) == N.ArrayType and asum(zero) <> 0: + print "Number of zeros in askew: ",asum(zero) + denom = denom + zero # prevent divide-by-zero + return N.where(zero, 0, amoment(a,3,dimension)/denom) + + + def akurtosis(a,dimension=None): + """ +Returns the kurtosis of a distribution (normal ==> 3.0; >3 means +heavier in the tails, and usually more peaked). Use akurtosistest() +to see if it's close enough. Dimension can equal None (ravel array +first), an integer (the dimension over which to operate), or a +sequence (operate over multiple dimensions). + +Usage: akurtosis(a,dimension=None) +Returns: kurtosis of values in a along dimension, and ZERO where all vals equal +""" + denom = N.power(amoment(a,2,dimension),2) + zero = N.equal(denom,0) + if type(denom) == N.ArrayType and asum(zero) <> 0: + print "Number of zeros in akurtosis: ",asum(zero) + denom = denom + zero # prevent divide-by-zero + return N.where(zero,0,amoment(a,4,dimension)/denom) + + + def adescribe(inarray,dimension=None): + """ +Returns several descriptive statistics of the passed array. Dimension +can equal None (ravel array first), an integer (the dimension over +which to operate), or a sequence (operate over multiple dimensions). + +Usage: adescribe(inarray,dimension=None) +Returns: n, (min,max), mean, standard deviation, skew, kurtosis +""" + if dimension == None: + inarray = N.ravel(inarray) + dimension = 0 + n = inarray.shape[dimension] + mm = (N.minimum.reduce(inarray),N.maximum.reduce(inarray)) + m = amean(inarray,dimension) + sd = astdev(inarray,dimension) + skew = askew(inarray,dimension) + kurt = akurtosis(inarray,dimension) + return n, mm, m, sd, skew, kurt + + +##################################### +######## NORMALITY TESTS ########## +##################################### + + def askewtest(a,dimension=None): + """ +Tests whether the skew is significantly different from a normal +distribution. Dimension can equal None (ravel array first), an +integer (the dimension over which to operate), or a sequence (operate +over multiple dimensions). + +Usage: askewtest(a,dimension=None) +Returns: z-score and 2-tail z-probability +""" + if dimension == None: + a = N.ravel(a) + dimension = 0 + b2 = askew(a,dimension) + n = float(a.shape[dimension]) + y = b2 * N.sqrt(((n+1)*(n+3)) / (6.0*(n-2)) ) + beta2 = ( 3.0*(n*n+27*n-70)*(n+1)*(n+3) ) / ( (n-2.0)*(n+5)*(n+7)*(n+9) ) + W2 = -1 + N.sqrt(2*(beta2-1)) + delta = 1/N.sqrt(N.log(N.sqrt(W2))) + alpha = N.sqrt(2/(W2-1)) + y = N.where(N.equal(y,0),1,y) + Z = delta*N.log(y/alpha + N.sqrt((y/alpha)**2+1)) + return Z, (1.0-zprob(Z))*2 + + + def akurtosistest(a,dimension=None): + """ +Tests whether a dataset has normal kurtosis (i.e., +kurtosis=3(n-1)/(n+1)) Valid only for n>20. Dimension can equal None +(ravel array first), an integer (the dimension over which to operate), +or a sequence (operate over multiple dimensions). + +Usage: akurtosistest(a,dimension=None) +Returns: z-score and 2-tail z-probability, returns 0 for bad pixels +""" + if dimension == None: + a = N.ravel(a) + dimension = 0 + n = float(a.shape[dimension]) + if n<20: + print "akurtosistest only valid for n>=20 ... continuing anyway, n=",n + b2 = akurtosis(a,dimension) + E = 3.0*(n-1) /(n+1) + varb2 = 24.0*n*(n-2)*(n-3) / ((n+1)*(n+1)*(n+3)*(n+5)) + x = (b2-E)/N.sqrt(varb2) + sqrtbeta1 = 6.0*(n*n-5*n+2)/((n+7)*(n+9)) * N.sqrt((6.0*(n+3)*(n+5))/ + (n*(n-2)*(n-3))) + A = 6.0 + 8.0/sqrtbeta1 *(2.0/sqrtbeta1 + N.sqrt(1+4.0/(sqrtbeta1**2))) + term1 = 1 -2/(9.0*A) + denom = 1 +x*N.sqrt(2/(A-4.0)) + denom = N.where(N.less(denom,0), 99, denom) + term2 = N.where(N.equal(denom,0), term1, N.power((1-2.0/A)/denom,1/3.0)) + Z = ( term1 - term2 ) / N.sqrt(2/(9.0*A)) + Z = N.where(N.equal(denom,99), 0, Z) + return Z, (1.0-zprob(Z))*2 + + + def anormaltest(a,dimension=None): + """ +Tests whether skew and/OR kurtosis of dataset differs from normal +curve. Can operate over multiple dimensions. Dimension can equal +None (ravel array first), an integer (the dimension over which to +operate), or a sequence (operate over multiple dimensions). + +Usage: anormaltest(a,dimension=None) +Returns: z-score and 2-tail probability +""" + if dimension == None: + a = N.ravel(a) + dimension = 0 + s,p = askewtest(a,dimension) + k,p = akurtosistest(a,dimension) + k2 = N.power(s,2) + N.power(k,2) + return k2, achisqprob(k2,2) + + +##################################### +###### AFREQUENCY FUNCTIONS ####### +##################################### + + def aitemfreq(a): + """ +Returns a 2D array of item frequencies. Column 1 contains item values, +column 2 contains their respective counts. Assumes a 1D array is passed. + +Usage: aitemfreq(a) +Returns: a 2D frequency table (col [0:n-1]=scores, col n=frequencies) +""" + scores = pstat.aunique(a) + scores = N.sort(scores) + freq = N.zeros(len(scores)) + for i in range(len(scores)): + freq[i] = N.add.reduce(N.equal(a,scores[i])) + return N.array(pstat.aabut(scores, freq)) + + + def ascoreatpercentile (inarray, percent): + """ +Usage: ascoreatpercentile(inarray,percent) 0= targetcf: + break + score = binsize * ((targetcf - cumhist[i-1]) / float(h[i])) + (lrl+binsize*i) + return score + + + def apercentileofscore (inarray,score,histbins=10,defaultlimits=None): + """ +Note: result of this function depends on the values used to histogram +the data(!). + +Usage: apercentileofscore(inarray,score,histbins=10,defaultlimits=None) +Returns: percentile-position of score (0-100) relative to inarray +""" + h, lrl, binsize, extras = histogram(inarray,histbins,defaultlimits) + cumhist = cumsum(h*1) + i = int((score - lrl)/float(binsize)) + pct = (cumhist[i-1]+((score-(lrl+binsize*i))/float(binsize))*h[i])/float(len(inarray)) * 100 + return pct + + + def ahistogram (inarray,numbins=10,defaultlimits=None,printextras=1): + """ +Returns (i) an array of histogram bin counts, (ii) the smallest value +of the histogram binning, and (iii) the bin width (the last 2 are not +necessarily integers). Default number of bins is 10. Defaultlimits +can be None (the routine picks bins spanning all the numbers in the +inarray) or a 2-sequence (lowerlimit, upperlimit). Returns all of the +following: array of bin values, lowerreallimit, binsize, extrapoints. + +Usage: ahistogram(inarray,numbins=10,defaultlimits=None,printextras=1) +Returns: (array of bin counts, bin-minimum, min-width, #-points-outside-range) +""" + inarray = N.ravel(inarray) # flatten any >1D arrays + if (defaultlimits <> None): + lowerreallimit = defaultlimits[0] + upperreallimit = defaultlimits[1] + binsize = (upperreallimit-lowerreallimit) / float(numbins) + else: + Min = N.minimum.reduce(inarray) + Max = N.maximum.reduce(inarray) + estbinwidth = float(Max - Min)/float(numbins) + 1 + binsize = (Max-Min+estbinwidth)/float(numbins) + lowerreallimit = Min - binsize/2.0 #lower real limit,1st bin + bins = N.zeros(numbins) + extrapoints = 0 + for num in inarray: + try: + if (num-lowerreallimit) < 0: + extrapoints = extrapoints + 1 + else: + bintoincrement = int((num-lowerreallimit) / float(binsize)) + bins[bintoincrement] = bins[bintoincrement] + 1 + except: # point outside lower/upper limits + extrapoints = extrapoints + 1 + if (extrapoints > 0 and printextras == 1): + print '\nPoints outside given histogram range =',extrapoints + return (bins, lowerreallimit, binsize, extrapoints) + + + def acumfreq(a,numbins=10,defaultreallimits=None): + """ +Returns a cumulative frequency histogram, using the histogram function. +Defaultreallimits can be None (use all data), or a 2-sequence containing +lower and upper limits on values to include. + +Usage: acumfreq(a,numbins=10,defaultreallimits=None) +Returns: array of cumfreq bin values, lowerreallimit, binsize, extrapoints +""" + h,l,b,e = histogram(a,numbins,defaultreallimits) + cumhist = cumsum(h*1) + return cumhist,l,b,e + + + def arelfreq(a,numbins=10,defaultreallimits=None): + """ +Returns a relative frequency histogram, using the histogram function. +Defaultreallimits can be None (use all data), or a 2-sequence containing +lower and upper limits on values to include. + +Usage: arelfreq(a,numbins=10,defaultreallimits=None) +Returns: array of cumfreq bin values, lowerreallimit, binsize, extrapoints +""" + h,l,b,e = histogram(a,numbins,defaultreallimits) + h = N.array(h/float(a.shape[0])) + return h,l,b,e + + +##################################### +###### AVARIABILITY FUNCTIONS ##### +##################################### + + def aobrientransform(*args): + """ +Computes a transform on input data (any number of columns). Used to +test for homogeneity of variance prior to running one-way stats. Each +array in *args is one level of a factor. If an F_oneway() run on the +transformed data and found significant, variances are unequal. From +Maxwell and Delaney, p.112. + +Usage: aobrientransform(*args) *args = 1D arrays, one per level of factor +Returns: transformed data for use in an ANOVA +""" + TINY = 1e-10 + k = len(args) + n = N.zeros(k,N.Float) + v = N.zeros(k,N.Float) + m = N.zeros(k,N.Float) + nargs = [] + for i in range(k): + nargs.append(args[i].astype(N.Float)) + n[i] = float(len(nargs[i])) + v[i] = var(nargs[i]) + m[i] = mean(nargs[i]) + for j in range(k): + for i in range(n[j]): + t1 = (n[j]-1.5)*n[j]*(nargs[j][i]-m[j])**2 + t2 = 0.5*v[j]*(n[j]-1.0) + t3 = (n[j]-1.0)*(n[j]-2.0) + nargs[j][i] = (t1-t2) / float(t3) + check = 1 + for j in range(k): + if v[j] - mean(nargs[j]) > TINY: + check = 0 + if check <> 1: + raise ValueError, 'Lack of convergence in obrientransform.' + else: + return N.array(nargs) + + + def asamplevar (inarray,dimension=None,keepdims=0): + """ +Returns the sample standard deviation of the values in the passed +array (i.e., using N). Dimension can equal None (ravel array first), +an integer (the dimension over which to operate), or a sequence +(operate over multiple dimensions). Set keepdims=1 to return an array +with the same number of dimensions as inarray. + +Usage: asamplevar(inarray,dimension=None,keepdims=0) +""" + if dimension == None: + inarray = N.ravel(inarray) + dimension = 0 + if dimension == 1: + mn = amean(inarray,dimension)[:,N.NewAxis] + else: + mn = amean(inarray,dimension,keepdims=1) + deviations = inarray - mn + if type(dimension) == ListType: + n = 1 + for d in dimension: + n = n*inarray.shape[d] + else: + n = inarray.shape[dimension] + svar = ass(deviations,dimension,keepdims) / float(n) + return svar + + + def asamplestdev (inarray, dimension=None, keepdims=0): + """ +Returns the sample standard deviation of the values in the passed +array (i.e., using N). Dimension can equal None (ravel array first), +an integer (the dimension over which to operate), or a sequence +(operate over multiple dimensions). Set keepdims=1 to return an array +with the same number of dimensions as inarray. + +Usage: asamplestdev(inarray,dimension=None,keepdims=0) +""" + return N.sqrt(asamplevar(inarray,dimension,keepdims)) + + + def asignaltonoise(instack,dimension=0): + """ +Calculates signal-to-noise. Dimension can equal None (ravel array +first), an integer (the dimension over which to operate), or a +sequence (operate over multiple dimensions). + +Usage: asignaltonoise(instack,dimension=0): +Returns: array containing the value of (mean/stdev) along dimension, + or 0 when stdev=0 +""" + m = mean(instack,dimension) + sd = stdev(instack,dimension) + return N.where(N.equal(sd,0),0,m/sd) + + + def avar (inarray, dimension=None,keepdims=0): + """ +Returns the estimated population variance of the values in the passed +array (i.e., N-1). Dimension can equal None (ravel array first), an +integer (the dimension over which to operate), or a sequence (operate +over multiple dimensions). Set keepdims=1 to return an array with the +same number of dimensions as inarray. + +Usage: avar(inarray,dimension=None,keepdims=0) +""" + if dimension == None: + inarray = N.ravel(inarray) + dimension = 0 + mn = amean(inarray,dimension,1) + deviations = inarray - mn + if type(dimension) == ListType: + n = 1 + for d in dimension: + n = n*inarray.shape[d] + else: + n = inarray.shape[dimension] + var = ass(deviations,dimension,keepdims)/float(n-1) + return var + + + def astdev (inarray, dimension=None, keepdims=0): + """ +Returns the estimated population standard deviation of the values in +the passed array (i.e., N-1). Dimension can equal None (ravel array +first), an integer (the dimension over which to operate), or a +sequence (operate over multiple dimensions). Set keepdims=1 to return +an array with the same number of dimensions as inarray. + +Usage: astdev(inarray,dimension=None,keepdims=0) +""" + return N.sqrt(avar(inarray,dimension,keepdims)) + + + def asterr (inarray, dimension=None, keepdims=0): + """ +Returns the estimated population standard error of the values in the +passed array (i.e., N-1). Dimension can equal None (ravel array +first), an integer (the dimension over which to operate), or a +sequence (operate over multiple dimensions). Set keepdims=1 to return +an array with the same number of dimensions as inarray. + +Usage: asterr(inarray,dimension=None,keepdims=0) +""" + if dimension == None: + inarray = N.ravel(inarray) + dimension = 0 + return astdev(inarray,dimension,keepdims) / float(N.sqrt(inarray.shape[dimension])) + + + def asem (inarray, dimension=None, keepdims=0): + """ +Returns the standard error of the mean (i.e., using N) of the values +in the passed array. Dimension can equal None (ravel array first), an +integer (the dimension over which to operate), or a sequence (operate +over multiple dimensions). Set keepdims=1 to return an array with the +same number of dimensions as inarray. + +Usage: asem(inarray,dimension=None, keepdims=0) +""" + if dimension == None: + inarray = N.ravel(inarray) + dimension = 0 + if type(dimension) == ListType: + n = 1 + for d in dimension: + n = n*inarray.shape[d] + else: + n = inarray.shape[dimension] + s = asamplestdev(inarray,dimension,keepdims) / N.sqrt(n-1) + return s + + + def az (a, score): + """ +Returns the z-score of a given input score, given thearray from which +that score came. Not appropriate for population calculations, nor for +arrays > 1D. + +Usage: az(a, score) +""" + z = (score-amean(a)) / asamplestdev(a) + return z + + + def azs (a): + """ +Returns a 1D array of z-scores, one for each score in the passed array, +computed relative to the passed array. + +Usage: azs(a) +""" + zscores = [] + for item in a: + zscores.append(z(a,item)) + return N.array(zscores) + + + def azmap (scores, compare, dimension=0): + """ +Returns an array of z-scores the shape of scores (e.g., [x,y]), compared to +array passed to compare (e.g., [time,x,y]). Assumes collapsing over dim 0 +of the compare array. + +Usage: azs(scores, compare, dimension=0) +""" + mns = amean(compare,dimension) + sstd = asamplestdev(compare,0) + return (scores - mns) / sstd + + +##################################### +####### ATRIMMING FUNCTIONS ####### +##################################### + + def around(a,digits=1): + """ +Rounds all values in array a to 'digits' decimal places. + +Usage: around(a,digits) +Returns: a, where each value is rounded to 'digits' decimals +""" + def ar(x,d=digits): + return round(x,d) + + if type(a) <> N.ArrayType: + try: + a = N.array(a) + except: + a = N.array(a,'O') + shp = a.shape + if a.typecode() in ['f','F','d','D']: + b = N.ravel(a) + b = N.array(map(ar,b)) + b.shape = shp + elif a.typecode() in ['o','O']: + b = N.ravel(a)*1 + for i in range(len(b)): + if type(b[i]) == FloatType: + b[i] = round(b[i],digits) + b.shape = shp + else: # not a float, double or Object array + b = a*1 + return b + + + def athreshold(a,threshmin=None,threshmax=None,newval=0): + """ +Like Numeric.clip() except that values threshmax are replaced +by newval instead of by threshmin/threshmax (respectively). + +Usage: athreshold(a,threshmin=None,threshmax=None,newval=0) +Returns: a, with values threshmax replaced with newval +""" + mask = N.zeros(a.shape) + if threshmin <> None: + mask = mask + N.where(N.less(a,threshmin),1,0) + if threshmax <> None: + mask = mask + N.where(N.greater(a,threshmax),1,0) + mask = N.clip(mask,0,1) + return N.where(mask,newval,a) + + + def atrimboth (a,proportiontocut): + """ +Slices off the passed proportion of items from BOTH ends of the passed +array (i.e., with proportiontocut=0.1, slices 'leftmost' 10% AND +'rightmost' 10% of scores. You must pre-sort the array if you want +"proper" trimming. Slices off LESS if proportion results in a +non-integer slice index (i.e., conservatively slices off +proportiontocut). + +Usage: atrimboth (a,proportiontocut) +Returns: trimmed version of array a +""" + lowercut = int(proportiontocut*len(a)) + uppercut = len(a) - lowercut + return a[lowercut:uppercut] + + + def atrim1 (a,proportiontocut,tail='right'): + """ +Slices off the passed proportion of items from ONE end of the passed +array (i.e., if proportiontocut=0.1, slices off 'leftmost' or 'rightmost' +10% of scores). Slices off LESS if proportion results in a non-integer +slice index (i.e., conservatively slices off proportiontocut). + +Usage: atrim1(a,proportiontocut,tail='right') or set tail='left' +Returns: trimmed version of array a +""" + if string.lower(tail) == 'right': + lowercut = 0 + uppercut = len(a) - int(proportiontocut*len(a)) + elif string.lower(tail) == 'left': + lowercut = int(proportiontocut*len(a)) + uppercut = len(a) + return a[lowercut:uppercut] + + +##################################### +##### ACORRELATION FUNCTIONS ###### +##################################### + + def acovariance(X): + """ +Computes the covariance matrix of a matrix X. Requires a 2D matrix input. + +Usage: acovariance(X) +Returns: covariance matrix of X +""" + if len(X.shape) <> 2: + raise TypeError, "acovariance requires 2D matrices" + n = X.shape[0] + mX = amean(X,0) + return N.dot(N.transpose(X),X) / float(n) - N.multiply.outer(mX,mX) + + + def acorrelation(X): + """ +Computes the correlation matrix of a matrix X. Requires a 2D matrix input. + +Usage: acorrelation(X) +Returns: correlation matrix of X +""" + C = acovariance(X) + V = N.diagonal(C) + return C / N.sqrt(N.multiply.outer(V,V)) + + + def apaired(x,y): + """ +Interactively determines the type of data in x and y, and then runs the +appropriated statistic for paired group data. + +Usage: apaired(x,y) x,y = the two arrays of values to be compared +Returns: appropriate statistic name, value, and probability +""" + samples = '' + while samples not in ['i','r','I','R','c','C']: + print '\nIndependent or related samples, or correlation (i,r,c): ', + samples = raw_input() + + if samples in ['i','I','r','R']: + print '\nComparing variances ...', +# USE O'BRIEN'S TEST FOR HOMOGENEITY OF VARIANCE, Maxwell & delaney, p.112 + r = obrientransform(x,y) + f,p = F_oneway(pstat.colex(r,0),pstat.colex(r,1)) + if p<0.05: + vartype='unequal, p='+str(round(p,4)) + else: + vartype='equal' + print vartype + if samples in ['i','I']: + if vartype[0]=='e': + t,p = ttest_ind(x,y,None,0) + print '\nIndependent samples t-test: ', round(t,4),round(p,4) + else: + if len(x)>20 or len(y)>20: + z,p = ranksums(x,y) + print '\nRank Sums test (NONparametric, n>20): ', round(z,4),round(p,4) + else: + u,p = mannwhitneyu(x,y) + print '\nMann-Whitney U-test (NONparametric, ns<20): ', round(u,4),round(p,4) + + else: # RELATED SAMPLES + if vartype[0]=='e': + t,p = ttest_rel(x,y,0) + print '\nRelated samples t-test: ', round(t,4),round(p,4) + else: + t,p = ranksums(x,y) + print '\nWilcoxon T-test (NONparametric): ', round(t,4),round(p,4) + else: # CORRELATION ANALYSIS + corrtype = '' + while corrtype not in ['c','C','r','R','d','D']: + print '\nIs the data Continuous, Ranked, or Dichotomous (c,r,d): ', + corrtype = raw_input() + if corrtype in ['c','C']: + m,b,r,p,see = linregress(x,y) + print '\nLinear regression for continuous variables ...' + lol = [['Slope','Intercept','r','Prob','SEestimate'],[round(m,4),round(b,4),round(r,4),round(p,4),round(see,4)]] + pstat.printcc(lol) + elif corrtype in ['r','R']: + r,p = spearmanr(x,y) + print '\nCorrelation for ranked variables ...' + print "Spearman's r: ",round(r,4),round(p,4) + else: # DICHOTOMOUS + r,p = pointbiserialr(x,y) + print '\nAssuming x contains a dichotomous variable ...' + print 'Point Biserial r: ',round(r,4),round(p,4) + print '\n\n' + return None + + + def apearsonr(x,y,verbose=1): + """ +Calculates a Pearson correlation coefficient and returns p. Taken +from Heiman's Basic Statistics for the Behav. Sci (2nd), p.195. + +Usage: apearsonr(x,y,verbose=1) where x,y are equal length arrays +Returns: Pearson's r, two-tailed p-value +""" + TINY = 1.0e-20 + n = len(x) + xmean = amean(x) + ymean = amean(y) + r_num = n*(N.add.reduce(x*y)) - N.add.reduce(x)*N.add.reduce(y) + r_den = math.sqrt((n*ass(x) - asquare_of_sums(x))*(n*ass(y)-asquare_of_sums(y))) + r = (r_num / r_den) + df = n-2 + t = r*math.sqrt(df/((1.0-r+TINY)*(1.0+r+TINY))) + prob = abetai(0.5*df,0.5,df/(df+t*t),verbose) + return r,prob + + + def aspearmanr(x,y): + """ +Calculates a Spearman rank-order correlation coefficient. Taken +from Heiman's Basic Statistics for the Behav. Sci (1st), p.192. + +Usage: aspearmanr(x,y) where x,y are equal-length arrays +Returns: Spearman's r, two-tailed p-value +""" + TINY = 1e-30 + n = len(x) + rankx = rankdata(x) + ranky = rankdata(y) + dsq = N.add.reduce((rankx-ranky)**2) + rs = 1 - 6*dsq / float(n*(n**2-1)) + t = rs * math.sqrt((n-2) / ((rs+1.0)*(1.0-rs))) + df = n-2 + probrs = abetai(0.5*df,0.5,df/(df+t*t)) +# probability values for rs are from part 2 of the spearman function in +# Numerical Recipies, p.510. They close to tables, but not exact.(?) + return rs, probrs + + + def apointbiserialr(x,y): + """ +Calculates a point-biserial correlation coefficient and the associated +probability value. Taken from Heiman's Basic Statistics for the Behav. +Sci (1st), p.194. + +Usage: apointbiserialr(x,y) where x,y are equal length arrays +Returns: Point-biserial r, two-tailed p-value +""" + TINY = 1e-30 + categories = pstat.aunique(x) + data = pstat.aabut(x,y) + if len(categories) <> 2: + raise ValueError, "Exactly 2 categories required (in x) for pointbiserialr()." + else: # there are 2 categories, continue + codemap = pstat.aabut(categories,N.arange(2)) + recoded = pstat.arecode(data,codemap,0) + x = pstat.alinexand(data,0,categories[0]) + y = pstat.alinexand(data,0,categories[1]) + xmean = amean(pstat.acolex(x,1)) + ymean = amean(pstat.acolex(y,1)) + n = len(data) + adjust = math.sqrt((len(x)/float(n))*(len(y)/float(n))) + rpb = (ymean - xmean)/asamplestdev(pstat.acolex(data,1))*adjust + df = n-2 + t = rpb*math.sqrt(df/((1.0-rpb+TINY)*(1.0+rpb+TINY))) + prob = abetai(0.5*df,0.5,df/(df+t*t)) + return rpb, prob + + + def akendalltau(x,y): + """ +Calculates Kendall's tau ... correlation of ordinal data. Adapted +from function kendl1 in Numerical Recipies. Needs good test-cases.@@@ + +Usage: akendalltau(x,y) +Returns: Kendall's tau, two-tailed p-value +""" + n1 = 0 + n2 = 0 + iss = 0 + for j in range(len(x)-1): + for k in range(j,len(y)): + a1 = x[j] - x[k] + a2 = y[j] - y[k] + aa = a1 * a2 + if (aa): # neither array has a tie + n1 = n1 + 1 + n2 = n2 + 1 + if aa > 0: + iss = iss + 1 + else: + iss = iss -1 + else: + if (a1): + n1 = n1 + 1 + else: + n2 = n2 + 1 + tau = iss / math.sqrt(n1*n2) + svar = (4.0*len(x)+10.0) / (9.0*len(x)*(len(x)-1)) + z = tau / math.sqrt(svar) + prob = erfcc(abs(z)/1.4142136) + return tau, prob + + + def alinregress(*args): + """ +Calculates a regression line on two arrays, x and y, corresponding to x,y +pairs. If a single 2D array is passed, alinregress finds dim with 2 levels +and splits data into x,y pairs along that dim. + +Usage: alinregress(*args) args=2 equal-length arrays, or one 2D array +Returns: slope, intercept, r, two-tailed prob, sterr-of-the-estimate +""" + TINY = 1.0e-20 + if len(args) == 1: # more than 1D array? + args = args[0] + if len(args) == 2: + x = args[0] + y = args[1] + else: + x = args[:,0] + y = args[:,1] + else: + x = args[0] + y = args[1] + n = len(x) + xmean = amean(x) + ymean = amean(y) + r_num = n*(N.add.reduce(x*y)) - N.add.reduce(x)*N.add.reduce(y) + r_den = math.sqrt((n*ass(x) - asquare_of_sums(x))*(n*ass(y)-asquare_of_sums(y))) + r = r_num / r_den + z = 0.5*math.log((1.0+r+TINY)/(1.0-r+TINY)) + df = n-2 + t = r*math.sqrt(df/((1.0-r+TINY)*(1.0+r+TINY))) + prob = abetai(0.5*df,0.5,df/(df+t*t)) + slope = r_num / (float(n)*ass(x) - asquare_of_sums(x)) + intercept = ymean - slope*xmean + sterrest = math.sqrt(1-r*r)*asamplestdev(y) + return slope, intercept, r, prob, sterrest + + +##################################### +##### AINFERENTIAL STATISTICS ##### +##################################### + + def attest_1samp(a,popmean,printit=0,name='Sample',writemode='a'): + """ +Calculates the t-obtained for the independent samples T-test on ONE group +of scores a, given a population mean. If printit=1, results are printed +to the screen. If printit='filename', the results are output to 'filename' +using the given writemode (default=append). Returns t-value, and prob. + +Usage: attest_1samp(a,popmean,Name='Sample',printit=0,writemode='a') +Returns: t-value, two-tailed prob +""" + if type(a) != N.ArrayType: + a = N.array(a) + x = amean(a) + v = avar(a) + n = len(a) + df = n-1 + svar = ((n-1)*v) / float(df) + t = (x-popmean)/math.sqrt(svar*(1.0/n)) + prob = abetai(0.5*df,0.5,df/(df+t*t)) + + if printit <> 0: + statname = 'Single-sample T-test.' + outputpairedstats(printit,writemode, + 'Population','--',popmean,0,0,0, + name,n,x,v,N.minimum.reduce(N.ravel(a)), + N.maximum.reduce(N.ravel(a)), + statname,t,prob) + return t,prob + + + def attest_ind (a, b, dimension=None, printit=0, name1='Samp1', name2='Samp2',writemode='a'): + """ +Calculates the t-obtained T-test on TWO INDEPENDENT samples of scores +a, and b. From Numerical Recipies, p.483. If printit=1, results are +printed to the screen. If printit='filename', the results are output +to 'filename' using the given writemode (default=append). Dimension +can equal None (ravel array first), or an integer (the dimension over +which to operate on a and b). + +Usage: attest_ind (a,b,dimension=None,printit=0, + Name1='Samp1',Name2='Samp2',writemode='a') +Returns: t-value, two-tailed p-value +""" + if dimension == None: + a = N.ravel(a) + b = N.ravel(b) + dimension = 0 + x1 = amean(a,dimension) + x2 = amean(b,dimension) + v1 = avar(a,dimension) + v2 = avar(b,dimension) + n1 = a.shape[dimension] + n2 = b.shape[dimension] + df = n1+n2-2 + svar = ((n1-1)*v1+(n2-1)*v2) / float(df) + zerodivproblem = N.equal(svar,0) + svar = N.where(zerodivproblem,1,svar) # avoid zero-division in 1st place + t = (x1-x2)/N.sqrt(svar*(1.0/n1 + 1.0/n2)) # N-D COMPUTATION HERE!!!!!! + t = N.where(zerodivproblem,1.0,t) # replace NaN/wrong t-values with 1.0 + probs = abetai(0.5*df,0.5,float(df)/(df+t*t)) + + if type(t) == N.ArrayType: + probs = N.reshape(probs,t.shape) + if len(probs) == 1: + probs = probs[0] + + if printit <> 0: + if type(t) == N.ArrayType: + t = t[0] + if type(probs) == N.ArrayType: + probs = probs[0] + statname = 'Independent samples T-test.' + outputpairedstats(printit,writemode, + name1,n1,x1,v1,N.minimum.reduce(N.ravel(a)), + N.maximum.reduce(N.ravel(a)), + name2,n2,x2,v2,N.minimum.reduce(N.ravel(b)), + N.maximum.reduce(N.ravel(b)), + statname,t,probs) + return + return t, probs + + + def attest_rel (a,b,dimension=None,printit=0,name1='Samp1',name2='Samp2',writemode='a'): + """ +Calculates the t-obtained T-test on TWO RELATED samples of scores, a +and b. From Numerical Recipies, p.483. If printit=1, results are +printed to the screen. If printit='filename', the results are output +to 'filename' using the given writemode (default=append). Dimension +can equal None (ravel array first), or an integer (the dimension over +which to operate on a and b). + +Usage: attest_rel(a,b,dimension=None,printit=0, + name1='Samp1',name2='Samp2',writemode='a') +Returns: t-value, two-tailed p-value +""" + if dimension == None: + a = N.ravel(a) + b = N.ravel(b) + dimension = 0 + if len(a)<>len(b): + raise ValueError, 'Unequal length arrays.' + x1 = amean(a,dimension) + x2 = amean(b,dimension) + v1 = avar(a,dimension) + v2 = avar(b,dimension) + n = a.shape[dimension] + df = float(n-1) + d = (a-b).astype('d') + + denom = N.sqrt((n*N.add.reduce(d*d,dimension) - N.add.reduce(d,dimension)**2) /df) + zerodivproblem = N.equal(denom,0) + denom = N.where(zerodivproblem,1,denom) # avoid zero-division in 1st place + t = N.add.reduce(d,dimension) / denom # N-D COMPUTATION HERE!!!!!! + t = N.where(zerodivproblem,1.0,t) # replace NaN/wrong t-values with 1.0 + probs = abetai(0.5*df,0.5,float(df)/(df+t*t)) + if type(t) == N.ArrayType: + probs = N.reshape(probs,t.shape) + if len(probs) == 1: + probs = probs[0] + + if printit <> 0: + statname = 'Related samples T-test.' + outputpairedstats(printit,writemode, + name1,n,x1,v1,N.minimum.reduce(N.ravel(a)), + N.maximum.reduce(N.ravel(a)), + name2,n,x2,v2,N.minimum.reduce(N.ravel(b)), + N.maximum.reduce(N.ravel(b)), + statname,t,probs) + return + return t, probs + + + def achisquare(f_obs,f_exp=None): + """ +Calculates a one-way chi square for array of observed frequencies and returns +the result. If no expected frequencies are given, the total N is assumed to +be equally distributed across all groups. + +Usage: achisquare(f_obs, f_exp=None) f_obs = array of observed cell freq. +Returns: chisquare-statistic, associated p-value +""" + + k = len(f_obs) + if f_exp == None: + f_exp = N.array([sum(f_obs)/float(k)] * len(f_obs),N.Float) + f_exp = f_exp.astype(N.Float) + chisq = N.add.reduce((f_obs-f_exp)**2 / f_exp) + return chisq, chisqprob(chisq, k-1) + + + def aks_2samp (data1,data2): + """ +Computes the Kolmogorov-Smirnof statistic on 2 samples. Modified from +Numerical Recipies in C, page 493. Returns KS D-value, prob. Not ufunc- +like. + +Usage: aks_2samp(data1,data2) where data1 and data2 are 1D arrays +Returns: KS D-value, p-value +""" + j1 = 0 # N.zeros(data1.shape[1:]) TRIED TO MAKE THIS UFUNC-LIKE + j2 = 0 # N.zeros(data2.shape[1:]) + fn1 = 0.0 # N.zeros(data1.shape[1:],N.Float) + fn2 = 0.0 # N.zeros(data2.shape[1:],N.Float) + n1 = data1.shape[0] + n2 = data2.shape[0] + en1 = n1*1 + en2 = n2*1 + d = N.zeros(data1.shape[1:],N.Float) + data1 = N.sort(data1,0) + data2 = N.sort(data2,0) + while j1 < n1 and j2 < n2: + d1=data1[j1] + d2=data2[j2] + if d1 <= d2: + fn1 = (j1)/float(en1) + j1 = j1 + 1 + if d2 <= d1: + fn2 = (j2)/float(en2) + j2 = j2 + 1 + dt = (fn2-fn1) + if abs(dt) > abs(d): + d = dt + try: + en = math.sqrt(en1*en2/float(en1+en2)) + prob = aksprob((en+0.12+0.11/en)*N.fabs(d)) + except: + prob = 1.0 + return d, prob + + + def amannwhitneyu(x,y): + """ +Calculates a Mann-Whitney U statistic on the provided scores and +returns the result. Use only when the n in each condition is < 20 and +you have 2 independent samples of ranks. REMEMBER: Mann-Whitney U is +significant if the u-obtained is LESS THAN or equal to the critical +value of U. + +Usage: amannwhitneyu(x,y) where x,y are arrays of values for 2 conditions +Returns: u-statistic, one-tailed p-value (i.e., p(z(U))) +""" + n1 = len(x) + n2 = len(y) + ranked = rankdata(N.concatenate((x,y))) + rankx = ranked[0:n1] # get the x-ranks + ranky = ranked[n1:] # the rest are y-ranks + u1 = n1*n2 + (n1*(n1+1))/2.0 - sum(rankx) # calc U for x + u2 = n1*n2 - u1 # remainder is U for y + bigu = max(u1,u2) + smallu = min(u1,u2) + T = math.sqrt(tiecorrect(ranked)) # correction factor for tied scores + if T == 0: + raise ValueError, 'All numbers are identical in amannwhitneyu' + sd = math.sqrt(T*n1*n2*(n1+n2+1)/12.0) + z = abs((bigu-n1*n2/2.0) / sd) # normal approximation for prob calc + return smallu, 1.0 - zprob(z) + + + def atiecorrect(rankvals): + """ +Tie-corrector for ties in Mann Whitney U and Kruskal Wallis H tests. +See Siegel, S. (1956) Nonparametric Statistics for the Behavioral +Sciences. New York: McGraw-Hill. Code adapted from |Stat rankind.c +code. + +Usage: atiecorrect(rankvals) +Returns: T correction factor for U or H +""" + sorted,posn = ashellsort(N.array(rankvals)) + n = len(sorted) + T = 0.0 + i = 0 + while (i len(y): + raise ValueError, 'Unequal N in awilcoxont. Aborting.' + d = x-y + d = N.compress(N.not_equal(d,0),d) # Keep all non-zero differences + count = len(d) + absd = abs(d) + absranked = arankdata(absd) + r_plus = 0.0 + r_minus = 0.0 + for i in range(len(absd)): + if d[i] < 0: + r_minus = r_minus + absranked[i] + else: + r_plus = r_plus + absranked[i] + wt = min(r_plus, r_minus) + mn = count * (count+1) * 0.25 + se = math.sqrt(count*(count+1)*(2.0*count+1.0)/24.0) + z = math.fabs(wt-mn) / se + z = math.fabs(wt-mn) / se + prob = 2*(1.0 -zprob(abs(z))) + return wt, prob + + + def akruskalwallish(*args): + """ +The Kruskal-Wallis H-test is a non-parametric ANOVA for 3 or more +groups, requiring at least 5 subjects in each group. This function +calculates the Kruskal-Wallis H and associated p-value for 3 or more +independent samples. + +Usage: akruskalwallish(*args) args are separate arrays for 3+ conditions +Returns: H-statistic (corrected for ties), associated p-value +""" + assert len(args) == 3, "Need at least 3 groups in stats.akruskalwallish()" + args = list(args) + n = [0]*len(args) + n = map(len,args) + all = [] + for i in range(len(args)): + all = all + args[i].tolist() + ranked = rankdata(all) + T = tiecorrect(ranked) + for i in range(len(args)): + args[i] = ranked[0:n[i]] + del ranked[0:n[i]] + rsums = [] + for i in range(len(args)): + rsums.append(sum(args[i])**2) + rsums[i] = rsums[i] / float(n[i]) + ssbn = sum(rsums) + totaln = sum(n) + h = 12.0 / (totaln*(totaln+1)) * ssbn - 3*(totaln+1) + df = len(args) - 1 + if T == 0: + raise ValueError, 'All numbers are identical in akruskalwallish' + h = h / float(T) + return h, chisqprob(h,df) + + + def afriedmanchisquare(*args): + """ +Friedman Chi-Square is a non-parametric, one-way within-subjects +ANOVA. This function calculates the Friedman Chi-square test for +repeated measures and returns the result, along with the associated +probability value. It assumes 3 or more repeated measures. Only 3 +levels requires a minimum of 10 subjects in the study. Four levels +requires 5 subjects per level(??). + +Usage: afriedmanchisquare(*args) args are separate arrays for 2+ conditions +Returns: chi-square statistic, associated p-value +""" + k = len(args) + if k < 3: + raise ValueError, '\nLess than 3 levels. Friedman test not appropriate.\n' + n = len(args[0]) + data = apply(pstat.aabut,args) + data = data.astype(N.Float) + for i in range(len(data)): + data[i] = arankdata(data[i]) + ssbn = asum(asum(args,1)**2) + chisq = 12.0 / (k*n*(k+1)) * ssbn - 3*n*(k+1) + return chisq, chisqprob(chisq,k-1) + + +##################################### +#### APROBABILITY CALCULATIONS #### +##################################### + + def achisqprob(chisq,df): + """ +Returns the (1-tail) probability value associated with the provided chi-square +value and df. Heavily modified from chisq.c in Gary Perlman's |Stat. Can +handle multiple dimensions. + +Usage: achisqprob(chisq,df) chisq=chisquare stat., df=degrees of freedom +""" + BIG = 200.0 + def ex(x): + BIG = 200.0 + exponents = N.where(N.less(x,-BIG),-BIG,x) + return N.exp(exponents) + + if type(chisq) == N.ArrayType: + arrayflag = 1 + else: + arrayflag = 0 + chisq = N.array([chisq]) + if df < 1: + return N.ones(chisq.shape,N.float) + probs = N.zeros(chisq.shape,N.Float) + probs = N.where(N.less_equal(chisq,0),1.0,probs) # set prob=1 for chisq<0 + a = 0.5 * chisq + if df > 1: + y = ex(-a) + if df%2 == 0: + even = 1 + s = y*1 + s2 = s*1 + else: + even = 0 + s = 2.0 * azprob(-N.sqrt(chisq)) + s2 = s*1 + if (df > 2): + chisq = 0.5 * (df - 1.0) + if even: + z = N.ones(probs.shape,N.Float) + else: + z = 0.5 *N.ones(probs.shape,N.Float) + if even: + e = N.zeros(probs.shape,N.Float) + else: + e = N.log(N.sqrt(N.pi)) *N.ones(probs.shape,N.Float) + c = N.log(a) + mask = N.zeros(probs.shape) + a_big = N.greater(a,BIG) + a_big_frozen = -1 *N.ones(probs.shape,N.Float) + totalelements = N.multiply.reduce(N.array(probs.shape)) + while asum(mask)<>totalelements: + e = N.log(z) + e + s = s + ex(c*z-a-e) + z = z + 1.0 +# print z, e, s + newmask = N.greater(z,chisq) + a_big_frozen = N.where(newmask*N.equal(mask,0)*a_big, s, a_big_frozen) + mask = N.clip(newmask+mask,0,1) + if even: + z = N.ones(probs.shape,N.Float) + e = N.ones(probs.shape,N.Float) + else: + z = 0.5 *N.ones(probs.shape,N.Float) + e = 1.0 / N.sqrt(N.pi) / N.sqrt(a) * N.ones(probs.shape,N.Float) + c = 0.0 + mask = N.zeros(probs.shape) + a_notbig_frozen = -1 *N.ones(probs.shape,N.Float) + while asum(mask)<>totalelements: + e = e * (a/z.astype(N.Float)) + c = c + e + z = z + 1.0 +# print '#2', z, e, c, s, c*y+s2 + newmask = N.greater(z,chisq) + a_notbig_frozen = N.where(newmask*N.equal(mask,0)*(1-a_big), + c*y+s2, a_notbig_frozen) + mask = N.clip(newmask+mask,0,1) + probs = N.where(N.equal(probs,1),1, + N.where(N.greater(a,BIG),a_big_frozen,a_notbig_frozen)) + return probs + else: + return s + + + def aerfcc(x): + """ +Returns the complementary error function erfc(x) with fractional error +everywhere less than 1.2e-7. Adapted from Numerical Recipies. Can +handle multiple dimensions. + +Usage: aerfcc(x) +""" + z = abs(x) + t = 1.0 / (1.0+0.5*z) + ans = t * N.exp(-z*z-1.26551223 + t*(1.00002368+t*(0.37409196+t*(0.09678418+t*(-0.18628806+t*(0.27886807+t*(-1.13520398+t*(1.48851587+t*(-0.82215223+t*0.17087277))))))))) + return N.where(N.greater_equal(x,0), ans, 2.0-ans) + + + def azprob(z): + """ +Returns the area under the normal curve 'to the left of' the given z value. +Thus, + for z<0, zprob(z) = 1-tail probability + for z>0, 1.0-zprob(z) = 1-tail probability + for any z, 2.0*(1.0-zprob(abs(z))) = 2-tail probability +Adapted from z.c in Gary Perlman's |Stat. Can handle multiple dimensions. + +Usage: azprob(z) where z is a z-value +""" + def yfunc(y): + x = (((((((((((((-0.000045255659 * y + +0.000152529290) * y -0.000019538132) * y + -0.000676904986) * y +0.001390604284) * y + -0.000794620820) * y -0.002034254874) * y + +0.006549791214) * y -0.010557625006) * y + +0.011630447319) * y -0.009279453341) * y + +0.005353579108) * y -0.002141268741) * y + +0.000535310849) * y +0.999936657524 + return x + + def wfunc(w): + x = ((((((((0.000124818987 * w + -0.001075204047) * w +0.005198775019) * w + -0.019198292004) * w +0.059054035642) * w + -0.151968751364) * w +0.319152932694) * w + -0.531923007300) * w +0.797884560593) * N.sqrt(w) * 2.0 + return x + + Z_MAX = 6.0 # maximum meaningful z-value + x = N.zeros(z.shape,N.Float) # initialize + y = 0.5 * N.fabs(z) + x = N.where(N.less(y,1.0),wfunc(y*y),yfunc(y-2.0)) # get x's + x = N.where(N.greater(y,Z_MAX*0.5),1.0,x) # kill those with big Z + prob = N.where(N.greater(z,0),(x+1)*0.5,(1-x)*0.5) + return prob + + + def aksprob(alam): + """ +Returns the probability value for a K-S statistic computed via ks_2samp. +Adapted from Numerical Recipies. Can handle multiple dimensions. + +Usage: aksprob(alam) +""" + if type(alam) == N.ArrayType: + frozen = -1 *N.ones(alam.shape,N.Float64) + alam = alam.astype(N.Float64) + arrayflag = 1 + else: + frozen = N.array(-1.) + alam = N.array(alam,N.Float64) + mask = N.zeros(alam.shape) + fac = 2.0 *N.ones(alam.shape,N.Float) + sum = N.zeros(alam.shape,N.Float) + termbf = N.zeros(alam.shape,N.Float) + a2 = N.array(-2.0*alam*alam,N.Float64) + totalelements = N.multiply.reduce(N.array(mask.shape)) + for j in range(1,201): + if asum(mask) == totalelements: + break + exponents = (a2*j*j) + overflowmask = N.less(exponents,-746) + frozen = N.where(overflowmask,0,frozen) + mask = mask+overflowmask + term = fac*N.exp(exponents) + sum = sum + term + newmask = N.where(N.less_equal(abs(term),(0.001*termbf)) + + N.less(abs(term),1.0e-8*sum), 1, 0) + frozen = N.where(newmask*N.equal(mask,0), sum, frozen) + mask = N.clip(mask+newmask,0,1) + fac = -fac + termbf = abs(term) + if arrayflag: + return N.where(N.equal(frozen,-1), 1.0, frozen) # 1.0 if doesn't converge + else: + return N.where(N.equal(frozen,-1), 1.0, frozen)[0] # 1.0 if doesn't converge + + + def afprob (dfnum, dfden, F): + """ +Returns the 1-tailed significance level (p-value) of an F statistic +given the degrees of freedom for the numerator (dfR-dfF) and the degrees +of freedom for the denominator (dfF). Can handle multiple dims for F. + +Usage: afprob(dfnum, dfden, F) where usually dfnum=dfbn, dfden=dfwn +""" + if type(F) == N.ArrayType: + return abetai(0.5*dfden, 0.5*dfnum, dfden/(1.0*dfden+dfnum*F)) + else: + return abetai(0.5*dfden, 0.5*dfnum, dfden/float(dfden+dfnum*F)) + + + def abetacf(a,b,x,verbose=1): + """ +Evaluates the continued fraction form of the incomplete Beta function, +betai. (Adapted from: Numerical Recipies in C.) Can handle multiple +dimensions for x. + +Usage: abetacf(a,b,x,verbose=1) +""" + ITMAX = 200 + EPS = 3.0e-7 + + arrayflag = 1 + if type(x) == N.ArrayType: + frozen = N.ones(x.shape,N.Float) *-1 #start out w/ -1s, should replace all + else: + arrayflag = 0 + frozen = N.array([-1]) + x = N.array([x]) + mask = N.zeros(x.shape) + bm = az = am = 1.0 + qab = a+b + qap = a+1.0 + qam = a-1.0 + bz = 1.0-qab*x/qap + for i in range(ITMAX+1): + if N.sum(N.ravel(N.equal(frozen,-1)))==0: + break + em = float(i+1) + tem = em + em + d = em*(b-em)*x/((qam+tem)*(a+tem)) + ap = az + d*am + bp = bz+d*bm + d = -(a+em)*(qab+em)*x/((qap+tem)*(a+tem)) + app = ap+d*az + bpp = bp+d*bz + aold = az*1 + am = ap/bpp + bm = bp/bpp + az = app/bpp + bz = 1.0 + newmask = N.less(abs(az-aold),EPS*abs(az)) + frozen = N.where(newmask*N.equal(mask,0), az, frozen) + mask = N.clip(mask+newmask,0,1) + noconverge = asum(N.equal(frozen,-1)) + if noconverge <> 0 and verbose: + print 'a or b too big, or ITMAX too small in Betacf for ',noconverge,' elements' + if arrayflag: + return frozen + else: + return frozen[0] + + + def agammln(xx): + """ +Returns the gamma function of xx. + Gamma(z) = Integral(0,infinity) of t^(z-1)exp(-t) dt. +Adapted from: Numerical Recipies in C. Can handle multiple dims ... but +probably doesn't normally have to. + +Usage: agammln(xx) +""" + coeff = [76.18009173, -86.50532033, 24.01409822, -1.231739516, + 0.120858003e-2, -0.536382e-5] + x = xx - 1.0 + tmp = x + 5.5 + tmp = tmp - (x+0.5)*N.log(tmp) + ser = 1.0 + for j in range(len(coeff)): + x = x + 1 + ser = ser + coeff[j]/x + return -tmp + N.log(2.50662827465*ser) + + + def abetai(a,b,x,verbose=1): + """ +Returns the incomplete beta function: + + I-sub-x(a,b) = 1/B(a,b)*(Integral(0,x) of t^(a-1)(1-t)^(b-1) dt) + +where a,b>0 and B(a,b) = G(a)*G(b)/(G(a+b)) where G(a) is the gamma +function of a. The continued fraction formulation is implemented +here, using the betacf function. (Adapted from: Numerical Recipies in +C.) Can handle multiple dimensions. + +Usage: abetai(a,b,x,verbose=1) +""" + TINY = 1e-15 + if type(a) == N.ArrayType: + if asum(N.less(x,0)+N.greater(x,1)) <> 0: + raise ValueError, 'Bad x in abetai' + x = N.where(N.equal(x,0),TINY,x) + x = N.where(N.equal(x,1.0),1-TINY,x) + + bt = N.where(N.equal(x,0)+N.equal(x,1), 0, -1) + exponents = ( gammln(a+b)-gammln(a)-gammln(b)+a*N.log(x)+b* + N.log(1.0-x) ) + # 746 (below) is the MAX POSSIBLE BEFORE OVERFLOW + exponents = N.where(N.less(exponents,-740),-740,exponents) + bt = N.exp(exponents) + if type(x) == N.ArrayType: + ans = N.where(N.less(x,(a+1)/(a+b+2.0)), + bt*abetacf(a,b,x,verbose)/float(a), + 1.0-bt*abetacf(b,a,1.0-x,verbose)/float(b)) + else: + if x<(a+1)/(a+b+2.0): + ans = bt*abetacf(a,b,x,verbose)/float(a) + else: + ans = 1.0-bt*abetacf(b,a,1.0-x,verbose)/float(b) + return ans + + +##################################### +####### AANOVA CALCULATIONS ####### +##################################### + + import LinearAlgebra, operator + LA = LinearAlgebra + + def aglm(data,para): + """ +Calculates a linear model fit ... anova/ancova/lin-regress/t-test/etc. Taken +from: + Peterson et al. Statistical limitations in functional neuroimaging + I. Non-inferential methods and statistical models. Phil Trans Royal Soc + Lond B 354: 1239-1260. + +Usage: aglm(data,para) +Returns: statistic, p-value ??? +""" + if len(para) <> len(data): + print "data and para must be same length in aglm" + return + n = len(para) + p = pstat.aunique(para) + x = N.zeros((n,len(p))) # design matrix + for l in range(len(p)): + x[:,l] = N.equal(para,p[l]) + b = N.dot(N.dot(LA.inverse(N.dot(N.transpose(x),x)), # i.e., b=inv(X'X)X'Y + N.transpose(x)), + data) + diffs = (data - N.dot(x,b)) + s_sq = 1./(n-len(p)) * N.dot(N.transpose(diffs), diffs) + + if len(p) == 2: # ttest_ind + c = N.array([1,-1]) + df = n-2 + fact = asum(1.0/asum(x,0)) # i.e., 1/n1 + 1/n2 + 1/n3 ... + t = N.dot(c,b) / N.sqrt(s_sq*fact) + probs = abetai(0.5*df,0.5,float(df)/(df+t*t)) + return t, probs + + + def aF_oneway(*args): + """ +Performs a 1-way ANOVA, returning an F-value and probability given +any number of groups. From Heiman, pp.394-7. + +Usage: aF_oneway (*args) where *args is 2 or more arrays, one per + treatment group +Returns: f-value, probability +""" + na = len(args) # ANOVA on 'na' groups, each in it's own array + means = [0]*na + vars = [0]*na + ns = [0]*na + alldata = [] + tmp = map(N.array,args) + means = map(amean,tmp) + vars = map(avar,tmp) + ns = map(len,args) + alldata = N.concatenate(args) + bign = len(alldata) + sstot = ass(alldata)-(asquare_of_sums(alldata)/float(bign)) + ssbn = 0 + for a in args: + ssbn = ssbn + asquare_of_sums(N.array(a))/float(len(a)) + ssbn = ssbn - (asquare_of_sums(alldata)/float(bign)) + sswn = sstot-ssbn + dfbn = na-1 + dfwn = bign - na + msb = ssbn/float(dfbn) + msw = sswn/float(dfwn) + f = msb/msw + prob = fprob(dfbn,dfwn,f) + return f, prob + + + def aF_value (ER,EF,dfR,dfF): + """ +Returns an F-statistic given the following: + ER = error associated with the null hypothesis (the Restricted model) + EF = error associated with the alternate hypothesis (the Full model) + dfR = degrees of freedom the Restricted model + dfF = degrees of freedom associated with the Restricted model +""" + return ((ER-EF)/float(dfR-dfF) / (EF/float(dfF))) + + + def outputfstats(Enum, Eden, dfnum, dfden, f, prob): + Enum = round(Enum,3) + Eden = round(Eden,3) + dfnum = round(Enum,3) + dfden = round(dfden,3) + f = round(f,3) + prob = round(prob,3) + suffix = '' # for *s after the p-value + if prob < 0.001: suffix = ' ***' + elif prob < 0.01: suffix = ' **' + elif prob < 0.05: suffix = ' *' + title = [['EF/ER','DF','Mean Square','F-value','prob','']] + lofl = title+[[Enum, dfnum, round(Enum/float(dfnum),3), f, prob, suffix], + [Eden, dfden, round(Eden/float(dfden),3),'','','']] + pstat.printcc(lofl) + return + + + def F_value_multivariate(ER, EF, dfnum, dfden): + """ +Returns an F-statistic given the following: + ER = error associated with the null hypothesis (the Restricted model) + EF = error associated with the alternate hypothesis (the Full model) + dfR = degrees of freedom the Restricted model + dfF = degrees of freedom associated with the Restricted model +where ER and EF are matrices from a multivariate F calculation. +""" + if type(ER) in [IntType, FloatType]: + ER = N.array([[ER]]) + if type(EF) in [IntType, FloatType]: + EF = N.array([[EF]]) + n_um = (LA.determinant(ER) - LA.determinant(EF)) / float(dfnum) + d_en = LA.determinant(EF) / float(dfden) + return n_um / d_en + + +##################################### +####### ASUPPORT FUNCTIONS ######## +##################################### + + def asign(a): + """ +Usage: asign(a) +Returns: array shape of a, with -1 where a<0 and +1 where a>=0 +""" + a = N.asarray(a) + if ((type(a) == type(1.4)) or (type(a) == type(1))): + return a-a-N.less(a,0)+N.greater(a,0) + else: + return N.zeros(N.shape(a))-N.less(a,0)+N.greater(a,0) + + + def asum (a, dimension=None,keepdims=0): + """ +An alternative to the Numeric.add.reduce function, which allows one to +(1) collapse over multiple dimensions at once, and/or (2) to retain +all dimensions in the original array (squashing one down to size. +Dimension can equal None (ravel array first), an integer (the +dimension over which to operate), or a sequence (operate over multiple +dimensions). If keepdims=1, the resulting array will have as many +dimensions as the input array. + +Usage: asum(a, dimension=None, keepdims=0) +Returns: array summed along 'dimension'(s), same _number_ of dims if keepdims=1 +""" + if type(a) == N.ArrayType and a.typecode() in ['l','s','b']: + a = a.astype(N.Float) + if dimension == None: + s = N.sum(N.ravel(a)) + elif type(dimension) in [IntType,FloatType]: + s = N.add.reduce(a, dimension) + if keepdims == 1: + shp = list(a.shape) + shp[dimension] = 1 + s = N.reshape(s,shp) + else: # must be a SEQUENCE of dims to sum over + dims = list(dimension) + dims.sort() + dims.reverse() + s = a *1.0 + for dim in dims: + s = N.add.reduce(s,dim) + if keepdims == 1: + shp = list(a.shape) + for dim in dims: + shp[dim] = 1 + s = N.reshape(s,shp) + return s + + + def acumsum (a,dimension=None): + """ +Returns an array consisting of the cumulative sum of the items in the +passed array. Dimension can equal None (ravel array first), an +integer (the dimension over which to operate), or a sequence (operate +over multiple dimensions, but this last one just barely makes sense). + +Usage: acumsum(a,dimension=None) +""" + if dimension == None: + a = N.ravel(a) + dimension = 0 + if type(dimension) in [ListType, TupleType, N.ArrayType]: + dimension = list(dimension) + dimension.sort() + dimension.reverse() + for d in dimension: + a = N.add.accumulate(a,d) + return a + else: + return N.add.accumulate(a,dimension) + + + def ass(inarray, dimension=None, keepdims=0): + """ +Squares each value in the passed array, adds these squares & returns +the result. Unfortunate function name. :-) Defaults to ALL values in +the array. Dimension can equal None (ravel array first), an integer +(the dimension over which to operate), or a sequence (operate over +multiple dimensions). Set keepdims=1 to maintain the original number +of dimensions. + +Usage: ass(inarray, dimension=None, keepdims=0) +Returns: sum-along-'dimension' for (inarray*inarray) +""" + if dimension == None: + inarray = N.ravel(inarray) + dimension = 0 + return asum(inarray*inarray,dimension,keepdims) + + + def asummult (array1,array2,dimension=None,keepdims=0): + """ +Multiplies elements in array1 and array2, element by element, and +returns the sum (along 'dimension') of all resulting multiplications. +Dimension can equal None (ravel array first), an integer (the +dimension over which to operate), or a sequence (operate over multiple +dimensions). A trivial function, but included for completeness. + +Usage: asummult(array1,array2,dimension=None,keepdims=0) +""" + if dimension == None: + array1 = N.ravel(array1) + array2 = N.ravel(array2) + dimension = 0 + return asum(array1*array2,dimension,keepdims) + + + def asquare_of_sums(inarray, dimension=None, keepdims=0): + """ +Adds the values in the passed array, squares that sum, and returns the +result. Dimension can equal None (ravel array first), an integer (the +dimension over which to operate), or a sequence (operate over multiple +dimensions). If keepdims=1, the returned array will have the same +NUMBER of dimensions as the original. + +Usage: asquare_of_sums(inarray, dimension=None, keepdims=0) +Returns: the square of the sum over dim(s) in dimension +""" + if dimension == None: + inarray = N.ravel(inarray) + dimension = 0 + s = asum(inarray,dimension,keepdims) + if type(s) == N.ArrayType: + return s.astype(N.Float)*s + else: + return float(s)*s + + + def asumdiffsquared(a,b, dimension=None, keepdims=0): + """ +Takes pairwise differences of the values in arrays a and b, squares +these differences, and returns the sum of these squares. Dimension +can equal None (ravel array first), an integer (the dimension over +which to operate), or a sequence (operate over multiple dimensions). +keepdims=1 means the return shape = len(a.shape) = len(b.shape) + +Usage: asumdiffsquared(a,b) +Returns: sum[ravel(a-b)**2] +""" + if dimension == None: + inarray = N.ravel(a) + dimension = 0 + return asum((a-b)**2,dimension,keepdims) + + + def ashellsort(inarray): + """ +Shellsort algorithm. Sorts a 1D-array. + +Usage: ashellsort(inarray) +Returns: sorted-inarray, sorting-index-vector (for original array) +""" + n = len(inarray) + svec = inarray *1.0 + ivec = range(n) + gap = n/2 # integer division needed + while gap >0: + for i in range(gap,n): + for j in range(i-gap,-1,-gap): + while j>=0 and svec[j]>svec[j+gap]: + temp = svec[j] + svec[j] = svec[j+gap] + svec[j+gap] = temp + itemp = ivec[j] + ivec[j] = ivec[j+gap] + ivec[j+gap] = itemp + gap = gap / 2 # integer division needed +# svec is now sorted input vector, ivec has the order svec[i] = vec[ivec[i]] + return svec, ivec + + + def arankdata(inarray): + """ +Ranks the data in inarray, dealing with ties appropritely. Assumes +a 1D inarray. Adapted from Gary Perlman's |Stat ranksort. + +Usage: arankdata(inarray) +Returns: array of length equal to inarray, containing rank scores +""" + n = len(inarray) + svec, ivec = ashellsort(inarray) + sumranks = 0 + dupcount = 0 + newarray = N.zeros(n,N.Float) + for i in range(n): + sumranks = sumranks + i + dupcount = dupcount + 1 + if i==n-1 or svec[i] <> svec[i+1]: + averank = sumranks / float(dupcount) + 1 + for j in range(i-dupcount+1,i+1): + newarray[ivec[j]] = averank + sumranks = 0 + dupcount = 0 + return newarray + + + def afindwithin(data): + """ +Returns a binary vector, 1=within-subject factor, 0=between. Input +equals the entire data array (i.e., column 1=random factor, last +column = measured values. + +Usage: afindwithin(data) data in |Stat format +""" + numfact = len(data[0])-2 + withinvec = [0]*numfact + for col in range(1,numfact+1): + rows = pstat.linexand(data,col,pstat.unique(pstat.colex(data,1))[0]) # get 1 level of this factor + if len(pstat.unique(pstat.colex(rows,0))) < len(rows): # if fewer subjects than scores on this factor + withinvec[col-1] = 1 + return withinvec + + + ######################################################### + ######################################################### + ###### RE-DEFINE DISPATCHES TO INCLUDE ARRAYS ######### + ######################################################### + ######################################################### + +## CENTRAL TENDENCY: + geometricmean = Dispatch ( (lgeometricmean, (ListType, TupleType)), + (ageometricmean, (N.ArrayType,)) ) + harmonicmean = Dispatch ( (lharmonicmean, (ListType, TupleType)), + (aharmonicmean, (N.ArrayType,)) ) + mean = Dispatch ( (lmean, (ListType, TupleType)), + (amean, (N.ArrayType,)) ) + median = Dispatch ( (lmedian, (ListType, TupleType)), + (amedian, (N.ArrayType,)) ) + medianscore = Dispatch ( (lmedianscore, (ListType, TupleType)), + (amedianscore, (N.ArrayType,)) ) + mode = Dispatch ( (lmode, (ListType, TupleType)), + (amode, (N.ArrayType,)) ) + tmean = Dispatch ( (atmean, (N.ArrayType,)) ) + tvar = Dispatch ( (atvar, (N.ArrayType,)) ) + tstdev = Dispatch ( (atstdev, (N.ArrayType,)) ) + tsem = Dispatch ( (atsem, (N.ArrayType,)) ) + +## VARIATION: + moment = Dispatch ( (lmoment, (ListType, TupleType)), + (amoment, (N.ArrayType,)) ) + variation = Dispatch ( (lvariation, (ListType, TupleType)), + (avariation, (N.ArrayType,)) ) + skew = Dispatch ( (lskew, (ListType, TupleType)), + (askew, (N.ArrayType,)) ) + kurtosis = Dispatch ( (lkurtosis, (ListType, TupleType)), + (akurtosis, (N.ArrayType,)) ) + describe = Dispatch ( (ldescribe, (ListType, TupleType)), + (adescribe, (N.ArrayType,)) ) + +## DISTRIBUTION TESTS + + skewtest = Dispatch ( (askewtest, (ListType, TupleType)), + (askewtest, (N.ArrayType,)) ) + kurtosistest = Dispatch ( (akurtosistest, (ListType, TupleType)), + (akurtosistest, (N.ArrayType,)) ) + normaltest = Dispatch ( (anormaltest, (ListType, TupleType)), + (anormaltest, (N.ArrayType,)) ) + +## FREQUENCY STATS: + itemfreq = Dispatch ( (litemfreq, (ListType, TupleType)), + (aitemfreq, (N.ArrayType,)) ) + scoreatpercentile = Dispatch ( (lscoreatpercentile, (ListType, TupleType)), + (ascoreatpercentile, (N.ArrayType,)) ) + percentileofscore = Dispatch ( (lpercentileofscore, (ListType, TupleType)), + (apercentileofscore, (N.ArrayType,)) ) + histogram = Dispatch ( (lhistogram, (ListType, TupleType)), + (ahistogram, (N.ArrayType,)) ) + cumfreq = Dispatch ( (lcumfreq, (ListType, TupleType)), + (acumfreq, (N.ArrayType,)) ) + relfreq = Dispatch ( (lrelfreq, (ListType, TupleType)), + (arelfreq, (N.ArrayType,)) ) + +## VARIABILITY: + obrientransform = Dispatch ( (lobrientransform, (ListType, TupleType)), + (aobrientransform, (N.ArrayType,)) ) + samplevar = Dispatch ( (lsamplevar, (ListType, TupleType)), + (asamplevar, (N.ArrayType,)) ) + samplestdev = Dispatch ( (lsamplestdev, (ListType, TupleType)), + (asamplestdev, (N.ArrayType,)) ) + signaltonoise = Dispatch( (asignaltonoise, (N.ArrayType,)),) + var = Dispatch ( (lvar, (ListType, TupleType)), + (avar, (N.ArrayType,)) ) + stdev = Dispatch ( (lstdev, (ListType, TupleType)), + (astdev, (N.ArrayType,)) ) + sterr = Dispatch ( (lsterr, (ListType, TupleType)), + (asterr, (N.ArrayType,)) ) + sem = Dispatch ( (lsem, (ListType, TupleType)), + (asem, (N.ArrayType,)) ) + z = Dispatch ( (lz, (ListType, TupleType)), + (az, (N.ArrayType,)) ) + zs = Dispatch ( (lzs, (ListType, TupleType)), + (azs, (N.ArrayType,)) ) + +## TRIMMING FCNS: + threshold = Dispatch( (athreshold, (N.ArrayType,)),) + trimboth = Dispatch ( (ltrimboth, (ListType, TupleType)), + (atrimboth, (N.ArrayType,)) ) + trim1 = Dispatch ( (ltrim1, (ListType, TupleType)), + (atrim1, (N.ArrayType,)) ) + +## CORRELATION FCNS: + paired = Dispatch ( (lpaired, (ListType, TupleType)), + (apaired, (N.ArrayType,)) ) + pearsonr = Dispatch ( (lpearsonr, (ListType, TupleType)), + (apearsonr, (N.ArrayType,)) ) + spearmanr = Dispatch ( (lspearmanr, (ListType, TupleType)), + (aspearmanr, (N.ArrayType,)) ) + pointbiserialr = Dispatch ( (lpointbiserialr, (ListType, TupleType)), + (apointbiserialr, (N.ArrayType,)) ) + kendalltau = Dispatch ( (lkendalltau, (ListType, TupleType)), + (akendalltau, (N.ArrayType,)) ) + linregress = Dispatch ( (llinregress, (ListType, TupleType)), + (alinregress, (N.ArrayType,)) ) + +## INFERENTIAL STATS: + ttest_1samp = Dispatch ( (lttest_1samp, (ListType, TupleType)), + (attest_1samp, (N.ArrayType,)) ) + ttest_ind = Dispatch ( (lttest_ind, (ListType, TupleType)), + (attest_ind, (N.ArrayType,)) ) + ttest_rel = Dispatch ( (lttest_rel, (ListType, TupleType)), + (attest_rel, (N.ArrayType,)) ) + chisquare = Dispatch ( (lchisquare, (ListType, TupleType)), + (achisquare, (N.ArrayType,)) ) + ks_2samp = Dispatch ( (lks_2samp, (ListType, TupleType)), + (aks_2samp, (N.ArrayType,)) ) + mannwhitneyu = Dispatch ( (lmannwhitneyu, (ListType, TupleType)), + (amannwhitneyu, (N.ArrayType,)) ) + tiecorrect = Dispatch ( (ltiecorrect, (ListType, TupleType)), + (atiecorrect, (N.ArrayType,)) ) + ranksums = Dispatch ( (lranksums, (ListType, TupleType)), + (aranksums, (N.ArrayType,)) ) + wilcoxont = Dispatch ( (lwilcoxont, (ListType, TupleType)), + (awilcoxont, (N.ArrayType,)) ) + kruskalwallish = Dispatch ( (lkruskalwallish, (ListType, TupleType)), + (akruskalwallish, (N.ArrayType,)) ) + friedmanchisquare = Dispatch ( (lfriedmanchisquare, (ListType, TupleType)), + (afriedmanchisquare, (N.ArrayType,)) ) + +## PROBABILITY CALCS: + chisqprob = Dispatch ( (lchisqprob, (IntType, FloatType)), + (achisqprob, (N.ArrayType,)) ) + zprob = Dispatch ( (lzprob, (IntType, FloatType)), + (azprob, (N.ArrayType,)) ) + ksprob = Dispatch ( (lksprob, (IntType, FloatType)), + (aksprob, (N.ArrayType,)) ) + fprob = Dispatch ( (lfprob, (IntType, FloatType)), + (afprob, (N.ArrayType,)) ) + betacf = Dispatch ( (lbetacf, (IntType, FloatType)), + (abetacf, (N.ArrayType,)) ) + betai = Dispatch ( (lbetai, (IntType, FloatType)), + (abetai, (N.ArrayType,)) ) + erfcc = Dispatch ( (lerfcc, (IntType, FloatType)), + (aerfcc, (N.ArrayType,)) ) + gammln = Dispatch ( (lgammln, (IntType, FloatType)), + (agammln, (N.ArrayType,)) ) + +## ANOVA FUNCTIONS: + F_oneway = Dispatch ( (lF_oneway, (ListType, TupleType)), + (aF_oneway, (N.ArrayType,)) ) + F_value = Dispatch ( (lF_value, (ListType, TupleType)), + (aF_value, (N.ArrayType,)) ) + +## SUPPORT FUNCTIONS: + incr = Dispatch ( (lincr, (ListType, TupleType, N.ArrayType)), ) + sum = Dispatch ( (lsum, (ListType, TupleType)), + (asum, (N.ArrayType,)) ) + cumsum = Dispatch ( (lcumsum, (ListType, TupleType)), + (acumsum, (N.ArrayType,)) ) + ss = Dispatch ( (lss, (ListType, TupleType)), + (ass, (N.ArrayType,)) ) + summult = Dispatch ( (lsummult, (ListType, TupleType)), + (asummult, (N.ArrayType,)) ) + square_of_sums = Dispatch ( (lsquare_of_sums, (ListType, TupleType)), + (asquare_of_sums, (N.ArrayType,)) ) + sumdiffsquared = Dispatch ( (lsumdiffsquared, (ListType, TupleType)), + (asumdiffsquared, (N.ArrayType,)) ) + shellsort = Dispatch ( (lshellsort, (ListType, TupleType)), + (ashellsort, (N.ArrayType,)) ) + rankdata = Dispatch ( (lrankdata, (ListType, TupleType)), + (arankdata, (N.ArrayType,)) ) + findwithin = Dispatch ( (lfindwithin, (ListType, TupleType)), + (afindwithin, (N.ArrayType,)) ) + +###################### END OF NUMERIC FUNCTION BLOCK ##################### + +###################### END OF STATISTICAL FUNCTIONS ###################### + +except ImportError: + pass diff --git a/workflows/bioinformatics/settings.py b/workflows/bioinformatics/settings.py index 76d5c087e84694b74c662cd15e8ca30b9e3899c5..17913d4b566bd34b2d8bbff2ff804aa69d2a98f0 100644 --- a/workflows/bioinformatics/settings.py +++ b/workflows/bioinformatics/settings.py @@ -6,6 +6,6 @@ package_root = os.path.dirname(__file__) package_statics = os.path.join(os.path.dirname(__file__), 'static', package_name) package_bin = os.path.join(package_root, 'bin') -auto_update_db = True +auto_update_db = False create_backups = True diff --git a/workflows/bioinformatics/urls.py b/workflows/bioinformatics/urls.py new file mode 100644 index 0000000000000000000000000000000000000000..3b7daf89e44bba1e742efd466d15cfe25a48b74b --- /dev/null +++ b/workflows/bioinformatics/urls.py @@ -0,0 +1,6 @@ +from django.conf.urls.defaults import patterns, include, url + +urlpatterns = patterns('', + url(r'^get-new-feature-selection-scores/widget(?P[0-9]+)/(?P\w+)/$', 'workflows.bioinformatics.views.get_new_feature_selection_scores', + name='get new feature selection scores'), +) \ No newline at end of file diff --git a/workflows/bioinformatics/views.py b/workflows/bioinformatics/views.py new file mode 100644 index 0000000000000000000000000000000000000000..421d815d4f8fe465e16e4e093318f834461c9f2f --- /dev/null +++ b/workflows/bioinformatics/views.py @@ -0,0 +1,34 @@ +# helperji, context stvari +from django.shortcuts import render, get_object_or_404, redirect +from django.http import Http404, HttpResponse + +# modeli +from workflows.models import * +# auth fore +from django.contrib.auth.decorators import login_required + +# Orange bioinformatics differential expression +# from orngBioinformatics.obiExpression import * + +# SCORE_METHODS = [("fold change", ExpressionSignificance_FoldChange, twoTailTest, True), +# ("log2 fold change", ExpressionSignificance_Log2FoldChange, twoTailTest, True), +# ("t-test", ExpressionSignificance_TTest_T, twoTailTest, True), +# ("t-test p-value", ExpressionSignificance_TTest_PValue, oneTailTestLow, True), +# ("anova", ExpressionSignificance_ANOVA_F, oneTailTestHi, False), +# ("anova p-value", ExpressionSignificance_ANOVA_PValue, oneTailTestLow, False), +# ("signal to noise ratio", ExpressionSignificance_SignalToNoise, twoTailTest, True), +# ("info gain", ExpressionSignificance_Info, oneTailTestHi, True), +# ("chi-square", ExpressionSignificance_ChiSquare, oneTailTestHi, True), +# ("mann-whitney", ExpressionSignigicance_MannWhitneyu_U, oneTailTestLow, True), +# ("AREA (timeseries)", ExpressionSignificance_AREA, oneTailTestHi, False), +# ("FC (timeseries)", ExpressionSignificance_FCts, oneTailTestHi, False)] + +@login_required +def get_new_feature_selection_scores(request, widget_id, method = 0): + w = get_object_or_404(Widget, pk=widget_id) + if w.workflow.user == request.user: + data = w.inputs.all()[0] + _, score_method, _, twotailtest = SCORE_METHODS[method] + return HttpResponse(data, mimetype='text/json') + else: + return HttpResponse(status=400) diff --git a/workflows/bioinformatics/visualization_views.py b/workflows/bioinformatics/visualization_views.py index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..bb486b23f843f3f2264fe268421dbc346d8d0b4a 100644 --- a/workflows/bioinformatics/visualization_views.py +++ b/workflows/bioinformatics/visualization_views.py @@ -0,0 +1,14 @@ +''' +Bioinformatics visualization viewes. + +@author: Anze Vavpetic +''' +from django.shortcuts import render + +def rank_plotter(request, input_dict, output_dict, widget): + #TODO + return render(request, 'interactions/rank_plotter.html', {'widget':widget}) + +def segs_rule_browser(request, input_dict, output_dict, widget): + #TODO + return render(request, 'interactions/segs_rule_browser.html', {'widget':widget}) diff --git a/workflows/cforange/__init__.py b/workflows/cforange/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/workflows/cforange/db/package_data.json b/workflows/cforange/db/package_data.json new file mode 100644 index 0000000000000000000000000000000000000000..ec2fb85cdeb38c4600316fe2ed0a7085b20bc6fc --- /dev/null +++ b/workflows/cforange/db/package_data.json @@ -0,0 +1,3626 @@ +[ + { + "pk": 10, + "model": "workflows.category", + "fields": { + "uid": "d2dfe5ca-7d73-4b4d-b922-b389ec610d99", + "parent": null, + "workflow": null, + "user": null, + "order": 1, + "name": "Orange" + } + }, + { + "pk": 96, + "model": "workflows.abstractwidget", + "fields": { + "category": 10, + "treeview_image": "", + "name": "Prepare results for PR visualization", + "is_streaming": false, + "uid": "b16da039-f997-4d78-baa0-26d5ad8661eb", + "interaction_view": "", + "image": "", + "package": "cforange", + "static_image": "", + "post_interact_action": "", + "user": null, + "visualization_view": "", + "action": "cforange_prepare_results", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 1, + "description": "" + } + }, + { + "pk": 182, + "model": "workflows.abstractinput", + "fields": { + "widget": 96, + "name": "List of learners", + "short_name": "lrn", + "uid": "8a484294-ded6-4ed9-8d4c-043fd8cb3eb7", + "default": "", + "required": false, + "multi": false, + "parameter_type": null, + "variable": "learners", + "parameter": false, + "order": 1, + "description": "" + } + }, + { + "pk": 183, + "model": "workflows.abstractinput", + "fields": { + "widget": 96, + "name": "List of precisions", + "short_name": "pre", + "uid": "91b5674a-88e7-4954-9117-46c1fbeb2d4c", + "default": "", + "required": false, + "multi": false, + "parameter_type": null, + "variable": "precision", + "parameter": false, + "order": 1, + "description": "" + } + }, + { + "pk": 184, + "model": "workflows.abstractinput", + "fields": { + "widget": 96, + "name": "List of recalls", + "short_name": "rec", + "uid": "b1f16202-0bf3-4002-b534-3523c615d26f", + "default": "", + "required": false, + "multi": false, + "parameter_type": null, + "variable": "recall", + "parameter": false, + "order": 1, + "description": "" + } + }, + { + "pk": 185, + "model": "workflows.abstractinput", + "fields": { + "widget": 96, + "name": "List of F-measures", + "short_name": "f", + "uid": "706ba66d-3425-4093-815f-8c62cc034e5b", + "default": "", + "required": false, + "multi": false, + "parameter_type": null, + "variable": "f", + "parameter": false, + "order": 1, + "description": "" + } + }, + { + "pk": 103, + "model": "workflows.abstractoutput", + "fields": { + "widget": 96, + "name": "Algorithm performance", + "short_name": "alp", + "variable": "alp", + "uid": "090071a2-fa72-4195-af38-31885d764207", + "order": 1, + "description": "" + } + }, + { + "pk": 11, + "model": "workflows.category", + "fields": { + "uid": "d0e46009-46b3-43ed-bff0-5fc986ff6b34", + "parent": 10, + "workflow": null, + "user": null, + "order": 1, + "name": "Classification and Regression" + } + }, + { + "pk": 43, + "model": "workflows.abstractwidget", + "fields": { + "category": 11, + "treeview_image": "treeview/orange-C45.png", + "name": "C4.5 Tree Learner", + "is_streaming": false, + "uid": "0e3f39fd-3807-49d5-aab5-4ae441cd9f75", + "interaction_view": "", + "image": "images/orange-C45.png", + "package": "cforange", + "static_image": "", + "post_interact_action": "", + "user": null, + "visualization_view": "", + "action": "c45_tree", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 1, + "description": "" + } + }, + { + "pk": 47, + "model": "workflows.abstractoutput", + "fields": { + "widget": 43, + "name": "C4.5 Tree Learner", + "short_name": "c45", + "variable": "c45out", + "uid": "be5a0f89-b7a8-4e1c-b6d9-48ccd5f4a151", + "order": 1, + "description": "" + } + }, + { + "pk": 45, + "model": "workflows.abstractwidget", + "fields": { + "category": 11, + "treeview_image": "treeview/orange-CN2-small.png", + "name": "CN2 Rule Learner", + "is_streaming": false, + "uid": "766ef980-6622-4168-b4c5-60c5001dba8b", + "interaction_view": "", + "image": "images/orange-CN2-small.png", + "package": "cforange", + "static_image": "", + "post_interact_action": "", + "user": null, + "visualization_view": "", + "action": "cn2", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 2, + "description": "" + } + }, + { + "pk": 49, + "model": "workflows.abstractoutput", + "fields": { + "widget": 45, + "name": "CN2 Rule Learner", + "short_name": "cn2", + "variable": "cn2out", + "uid": "8fd12925-d2ac-4672-82d9-39da4174c2b4", + "order": 1, + "description": "" + } + }, + { + "pk": 44, + "model": "workflows.abstractwidget", + "fields": { + "category": 11, + "treeview_image": "treeview/orange-ClassificationTree.png", + "name": "Classification Tree", + "is_streaming": false, + "uid": "29b5871f-1829-4af6-8463-c6515e2010c6", + "interaction_view": "", + "image": "images/orange-ClassificationTree.png", + "package": "cforange", + "static_image": "", + "post_interact_action": "", + "user": null, + "visualization_view": "", + "action": "class_tree", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 3, + "description": "" + } + }, + { + "pk": 48, + "model": "workflows.abstractoutput", + "fields": { + "widget": 44, + "name": "Classification Tree", + "short_name": "clt", + "variable": "treeout", + "uid": "65dc834a-a59c-4821-99c2-37d1142ac592", + "order": 1, + "description": "" + } + }, + { + "pk": 47, + "model": "workflows.abstractwidget", + "fields": { + "category": 11, + "treeview_image": "treeview/orange-LogisticRegression.png", + "name": "Logistic Regression", + "is_streaming": false, + "uid": "bb9cbb4e-516d-4856-9ad3-614a7a01cba1", + "interaction_view": "", + "image": "images/orange-LogisticRegression.png", + "package": "cforange", + "static_image": "", + "post_interact_action": "", + "user": null, + "visualization_view": "", + "action": "logreg", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 4, + "description": "" + } + }, + { + "pk": 51, + "model": "workflows.abstractoutput", + "fields": { + "widget": 47, + "name": "Logistic Regresion Classifier", + "short_name": "lrc", + "variable": "logregout", + "uid": "bd5fa320-9059-44f4-8186-f5dea9283520", + "order": 1, + "description": "" + } + }, + { + "pk": 48, + "model": "workflows.abstractwidget", + "fields": { + "category": 11, + "treeview_image": "treeview/lookup.png", + "name": "Lookup Learner", + "is_streaming": false, + "uid": "1781ee2a-f353-42b4-96d4-3379329c984e", + "interaction_view": "", + "image": "images/lookup.png", + "package": "cforange", + "static_image": "", + "post_interact_action": "", + "user": null, + "visualization_view": "", + "action": "lookup_learner", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 5, + "description": "" + } + }, + { + "pk": 52, + "model": "workflows.abstractoutput", + "fields": { + "widget": 48, + "name": "Lookup Classifier", + "short_name": "luc", + "variable": "lookupout", + "uid": "9d104fe8-e95a-4553-a9a5-03a49421d148", + "order": 1, + "description": "" + } + }, + { + "pk": 49, + "model": "workflows.abstractwidget", + "fields": { + "category": 11, + "treeview_image": "treeview/orange-Majority.png", + "name": "Majority Learner", + "is_streaming": false, + "uid": "f11eaa13-d94b-40e1-adfb-a811ae36bc2d", + "interaction_view": "", + "image": "images/orange-Majority.png", + "package": "cforange", + "static_image": "", + "post_interact_action": "", + "user": null, + "visualization_view": "", + "action": "majority_learner", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 6, + "description": "" + } + }, + { + "pk": 53, + "model": "workflows.abstractoutput", + "fields": { + "widget": 49, + "name": "Majority Classifier", + "short_name": "mjc", + "variable": "majorout", + "uid": "95de96fb-f220-4b30-a981-e669f3380e79", + "order": 1, + "description": "" + } + }, + { + "pk": 50, + "model": "workflows.abstractwidget", + "fields": { + "category": 11, + "treeview_image": "treeview/orange-NaiveBayes.png", + "name": "Naive Bayes", + "is_streaming": false, + "uid": "bf4a4479-b2c3-47b3-b900-be8d39871ecd", + "interaction_view": "", + "image": "images/orange-NaiveBayes.png", + "package": "cforange", + "static_image": "", + "post_interact_action": "", + "user": null, + "visualization_view": "", + "action": "bayes", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 7, + "description": "" + } + }, + { + "pk": 54, + "model": "workflows.abstractoutput", + "fields": { + "widget": 50, + "name": "Bayes Classifier", + "short_name": "bys", + "variable": "bayesout", + "uid": "4557f31d-6877-44c7-96b1-3dc72f628e2e", + "order": 1, + "description": "" + } + }, + { + "pk": 51, + "model": "workflows.abstractwidget", + "fields": { + "category": 11, + "treeview_image": "treeview/orange-RandomForest.png", + "name": "Random Forest", + "is_streaming": false, + "uid": "8ac403eb-cf44-41cb-b9a1-84dede03a1d4", + "interaction_view": "", + "image": "images/orange-RandomForest.png", + "package": "cforange", + "static_image": "", + "post_interact_action": "", + "user": null, + "visualization_view": "", + "action": "random_forest", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 8, + "description": "" + } + }, + { + "pk": 105, + "model": "workflows.abstractinput", + "fields": { + "widget": 51, + "name": "Number of decision trees", + "short_name": "ndt", + "uid": "d8f62393-dda8-48a0-93c1-ac5bc3c5667f", + "default": "100", + "required": false, + "multi": false, + "parameter_type": "text", + "variable": "n", + "parameter": true, + "order": 1, + "description": "" + } + }, + { + "pk": 55, + "model": "workflows.abstractoutput", + "fields": { + "widget": 51, + "name": "Random Forest Classifier", + "short_name": "rfc", + "variable": "rfout", + "uid": "fe85cc7e-0e13-4f76-841f-ac809d28095f", + "order": 1, + "description": "" + } + }, + { + "pk": 52, + "model": "workflows.abstractwidget", + "fields": { + "category": 11, + "treeview_image": "treeview/orange-Rule-Learner.png", + "name": "Rule Induction", + "is_streaming": false, + "uid": "0ce778a2-b40c-45f1-8383-9327b18fc661", + "interaction_view": "", + "image": "images/orange-Rule-Learner.png", + "package": "cforange", + "static_image": "", + "post_interact_action": "", + "user": null, + "visualization_view": "", + "action": "rules", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 9, + "description": "" + } + }, + { + "pk": 56, + "model": "workflows.abstractoutput", + "fields": { + "widget": 52, + "name": "Rule Learner", + "short_name": "rll", + "variable": "rulesout", + "uid": "20a9db03-d944-4ba4-9786-d2c8dd688e87", + "order": 1, + "description": "" + } + }, + { + "pk": 53, + "model": "workflows.abstractwidget", + "fields": { + "category": 11, + "treeview_image": "treeview/orange-SVM_1.png", + "name": "Support Vector Machine", + "is_streaming": false, + "uid": "6b40b000-3e93-4fe4-bb95-f7ea4822eebf", + "interaction_view": "", + "image": "images/orange-SVM_1.png", + "package": "cforange", + "static_image": "", + "post_interact_action": "", + "user": null, + "visualization_view": "", + "action": "svm", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 10, + "description": "" + } + }, + { + "pk": 57, + "model": "workflows.abstractoutput", + "fields": { + "widget": 53, + "name": "Support Vector Machine Classifier", + "short_name": "svm", + "variable": "svmout", + "uid": "f2c9a031-e94f-4b3f-9302-656c46871e46", + "order": 1, + "description": "" + } + }, + { + "pk": 54, + "model": "workflows.abstractwidget", + "fields": { + "category": 11, + "treeview_image": "treeview/orange-SVMEasy-new.png", + "name": "Support Vector Machine Easy", + "is_streaming": false, + "uid": "d54a6cee-76d0-4ec6-9aab-556e233be366", + "interaction_view": "", + "image": "images/orange-SVMEasy-new.png", + "package": "cforange", + "static_image": "", + "post_interact_action": "", + "user": null, + "visualization_view": "", + "action": "svmeasy", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 11, + "description": "" + } + }, + { + "pk": 58, + "model": "workflows.abstractoutput", + "fields": { + "widget": 54, + "name": "Support Vector Machine Easy Classifier", + "short_name": "sve", + "variable": "svmeasyout", + "uid": "5add6c11-7052-4b5a-975f-b5cf25aada99", + "order": 1, + "description": "" + } + }, + { + "pk": 46, + "model": "workflows.abstractwidget", + "fields": { + "category": 11, + "treeview_image": "treeview/orange-kNN.png", + "name": "k-Nearest Neighbours", + "is_streaming": false, + "uid": "f669618e-108c-4636-8ce5-ec04b26f4642", + "interaction_view": "", + "image": "images/orange-kNN.png", + "package": "cforange", + "static_image": "", + "post_interact_action": "", + "user": null, + "visualization_view": "", + "action": "knn", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 12, + "description": "" + } + }, + { + "pk": 50, + "model": "workflows.abstractoutput", + "fields": { + "widget": 46, + "name": "kNN Clasisifier", + "short_name": "knn", + "variable": "knnout", + "uid": "423a0b89-5430-437c-a6d5-6725d834a469", + "order": 1, + "description": "" + } + }, + { + "pk": 12, + "model": "workflows.category", + "fields": { + "uid": "583debcb-5041-46b3-b464-e21875896710", + "parent": 10, + "workflow": null, + "user": null, + "order": 2, + "name": "Evaluation" + } + }, + { + "pk": 55, + "model": "workflows.abstractwidget", + "fields": { + "category": 12, + "treeview_image": "treeview/Orange-ApplyClassifier5.png", + "name": "Apply Classifier", + "is_streaming": false, + "uid": "00e860b0-d62d-4e1d-8f58-0a1755c0f0e1", + "interaction_view": "", + "image": "images/Orange-ApplyClassifier5.png", + "package": "cforange", + "static_image": "", + "post_interact_action": "", + "user": null, + "visualization_view": "", + "action": "apply_classifier", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 1, + "description": "" + } + }, + { + "pk": 106, + "model": "workflows.abstractinput", + "fields": { + "widget": 55, + "name": "Classifier", + "short_name": "clf", + "uid": "fd103975-883d-4501-9fe3-9ebadb01ae4f", + "default": "", + "required": false, + "multi": false, + "parameter_type": null, + "variable": "classifier", + "parameter": false, + "order": 1, + "description": "" + } + }, + { + "pk": 107, + "model": "workflows.abstractinput", + "fields": { + "widget": 55, + "name": "Dataset", + "short_name": "dat", + "uid": "7c16a50f-c548-4104-afd2-0f3be086fc9e", + "default": "", + "required": false, + "multi": false, + "parameter_type": null, + "variable": "data", + "parameter": false, + "order": 2, + "description": "" + } + }, + { + "pk": 59, + "model": "workflows.abstractoutput", + "fields": { + "widget": 55, + "name": "Dataset", + "short_name": "dat", + "variable": "data", + "uid": "fd7a69ac-a8bf-4681-bf32-328fde34d742", + "order": 1, + "description": "" + } + }, + { + "pk": 56, + "model": "workflows.abstractwidget", + "fields": { + "category": 12, + "treeview_image": "treeview/Orange-BuildClassifier.png", + "name": "Build Classifier", + "is_streaming": false, + "uid": "39809f9a-3742-4ca0-908a-1d8a041261af", + "interaction_view": "", + "image": "images/Orange-BuildClassifier.png", + "package": "cforange", + "static_image": "", + "post_interact_action": "", + "user": null, + "visualization_view": "", + "action": "build_classifier", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 2, + "description": "" + } + }, + { + "pk": 108, + "model": "workflows.abstractinput", + "fields": { + "widget": 56, + "name": "learner", + "short_name": "lrn", + "uid": "24b03594-14c8-4534-98de-b4bf703fef63", + "default": "", + "required": false, + "multi": false, + "parameter_type": null, + "variable": "learner", + "parameter": false, + "order": 1, + "description": "" + } + }, + { + "pk": 109, + "model": "workflows.abstractinput", + "fields": { + "widget": 56, + "name": "data", + "short_name": "dat", + "uid": "3de89884-15aa-40dd-a514-cb7e7d5298c5", + "default": "", + "required": false, + "multi": false, + "parameter_type": null, + "variable": "data", + "parameter": false, + "order": 2, + "description": "" + } + }, + { + "pk": 60, + "model": "workflows.abstractoutput", + "fields": { + "widget": 56, + "name": "classifier", + "short_name": "clf", + "variable": "classifier", + "uid": "d772a4c4-eb0a-4b4c-8386-f1eabd60f371", + "order": 1, + "description": "" + } + }, + { + "pk": 16, + "model": "workflows.category", + "fields": { + "uid": "2c4c8f68-33db-411c-a8ce-176096349eba", + "parent": 12, + "workflow": null, + "user": null, + "order": 1, + "name": "Classification Statistics" + } + }, + { + "pk": 83, + "model": "workflows.abstractwidget", + "fields": { + "category": 16, + "treeview_image": "", + "name": "Area under curve (ROC analysis)", + "is_streaming": false, + "uid": "cc2877f0-65d2-4bc2-9868-261546be1447", + "interaction_view": "", + "image": "", + "package": "cforange", + "static_image": "", + "post_interact_action": "", + "user": null, + "visualization_view": "", + "action": "cforange_auc", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 1, + "description": "" + } + }, + { + "pk": 163, + "model": "workflows.abstractinput", + "fields": { + "widget": 83, + "name": "Results", + "short_name": "res", + "uid": "51a10241-e26e-4fab-831b-4295ac82081a", + "default": "", + "required": false, + "multi": false, + "parameter_type": null, + "variable": "results", + "parameter": false, + "order": 1, + "description": "" + } + }, + { + "pk": 164, + "model": "workflows.abstractinput", + "fields": { + "widget": 83, + "name": "Method", + "short_name": "met", + "uid": "538f7e3a-8650-4c33-ad49-d00221d86145", + "default": "0", + "required": false, + "multi": false, + "parameter_type": "select", + "variable": "method", + "parameter": true, + "order": 2, + "description": "" + } + }, + { + "pk": 37, + "model": "workflows.abstractoption", + "fields": { + "uid": "cde2a234-f9be-461a-9298-d3f0698cea8b", + "abstract_input": 164, + "value": "1", + "name": "By Pairs" + } + }, + { + "pk": 36, + "model": "workflows.abstractoption", + "fields": { + "uid": "ad6a3d61-5be9-45f7-b9d6-9826dde0986c", + "abstract_input": 164, + "value": "0", + "name": "By Weighted Pairs" + } + }, + { + "pk": 39, + "model": "workflows.abstractoption", + "fields": { + "uid": "64e1c620-ce14-4c78-9aed-199bf803b6b7", + "abstract_input": 164, + "value": "3", + "name": "One Against All" + } + }, + { + "pk": 38, + "model": "workflows.abstractoption", + "fields": { + "uid": "8f39364a-2698-430e-912e-dd8c547ca0a7", + "abstract_input": 164, + "value": "2", + "name": "Weighted One Against All" + } + }, + { + "pk": 93, + "model": "workflows.abstractoutput", + "fields": { + "widget": 83, + "name": "Area Under Curve", + "short_name": "auc", + "variable": "AUC", + "uid": "c761f9c4-0204-4734-a208-dd8ca3fbd2ba", + "order": 1, + "description": "" + } + }, + { + "pk": 77, + "model": "workflows.abstractwidget", + "fields": { + "category": 16, + "treeview_image": "", + "name": "Brier Score", + "is_streaming": false, + "uid": "a8e19b70-c295-484c-acca-8ce12c489793", + "interaction_view": "", + "image": "", + "package": "cforange", + "static_image": "", + "post_interact_action": "", + "user": null, + "visualization_view": "", + "action": "cforange_brier_score", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 2, + "description": "Computes the Brier's score, defined as the average (over test examples) of sumx(t(x)-p(x))^2, where x is a class, t(x) is 1 for the correct class and 0 for the others, and p(x) is the probability that the classifier assigned to the class x." + } + }, + { + "pk": 148, + "model": "workflows.abstractinput", + "fields": { + "widget": 77, + "name": "Results", + "short_name": "res", + "uid": "dc37ebf6-0148-4aee-945a-857ea3e8e4fb", + "default": "", + "required": false, + "multi": false, + "parameter_type": null, + "variable": "results", + "parameter": false, + "order": 1, + "description": "" + } + }, + { + "pk": 149, + "model": "workflows.abstractinput", + "fields": { + "widget": 77, + "name": "Report Standard Error", + "short_name": "rse", + "uid": "49a8722a-74b9-4cd6-a9bf-aec25ff05120", + "default": "", + "required": false, + "multi": false, + "parameter_type": "checkbox", + "variable": "reportSE", + "parameter": true, + "order": 2, + "description": "" + } + }, + { + "pk": 79, + "model": "workflows.abstractoutput", + "fields": { + "widget": 77, + "name": "Brier Score", + "short_name": "bs", + "variable": "bs", + "uid": "62634c2a-524e-4293-9a37-922a2902afae", + "order": 1, + "description": "" + } + }, + { + "pk": 75, + "model": "workflows.abstractwidget", + "fields": { + "category": 16, + "treeview_image": "", + "name": "Classification Accuracy", + "is_streaming": false, + "uid": "c3554695-87ec-43a7-8bac-f6738f462a09", + "interaction_view": "", + "image": "", + "package": "cforange", + "static_image": "", + "post_interact_action": "", + "user": null, + "visualization_view": "", + "action": "cforange_classification_accuracy", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 3, + "description": "Computes classification accuracy, i.e. percentage of matches between predicted and actual class. The function returns a list of classification accuracies of all classifiers tested. If reportSE is set to true, the list will contain tuples with accuracies and standard errors." + } + }, + { + "pk": 144, + "model": "workflows.abstractinput", + "fields": { + "widget": 75, + "name": "Results", + "short_name": "res", + "uid": "844835a9-5473-4eba-ad40-162ec8712088", + "default": "", + "required": false, + "multi": false, + "parameter_type": null, + "variable": "results", + "parameter": false, + "order": 1, + "description": "" + } + }, + { + "pk": 145, + "model": "workflows.abstractinput", + "fields": { + "widget": 75, + "name": "Report Standard Error", + "short_name": "rse", + "uid": "322d1975-8cff-423c-b73a-e7f5a1488e2e", + "default": "", + "required": false, + "multi": false, + "parameter_type": "checkbox", + "variable": "reportSE", + "parameter": true, + "order": 2, + "description": "" + } + }, + { + "pk": 77, + "model": "workflows.abstractoutput", + "fields": { + "widget": 75, + "name": "Classification Accuracy", + "short_name": "ca", + "variable": "ca", + "uid": "236ad57c-2e84-4df4-9043-0f0d34b45b9b", + "order": 1, + "description": "" + } + }, + { + "pk": 81, + "model": "workflows.abstractwidget", + "fields": { + "category": 16, + "treeview_image": "", + "name": "Confusion Matrix", + "is_streaming": false, + "uid": "5cab835b-82f8-4146-a4d0-ff3b3f923395", + "interaction_view": "", + "image": "", + "package": "cforange", + "static_image": "", + "post_interact_action": "", + "user": null, + "visualization_view": "", + "action": "cforange_confusion_matrix", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 4, + "description": "" + } + }, + { + "pk": 158, + "model": "workflows.abstractinput", + "fields": { + "widget": 81, + "name": "Results", + "short_name": "res", + "uid": "cbce1aeb-69f9-44ab-9ba1-58e7f7c7edd2", + "default": "", + "required": false, + "multi": false, + "parameter_type": null, + "variable": "results", + "parameter": false, + "order": 1, + "description": "" + } + }, + { + "pk": 160, + "model": "workflows.abstractinput", + "fields": { + "widget": 81, + "name": "Cutoff", + "short_name": "ctf", + "uid": "c6fa462e-df86-424e-8803-3ded38ee6f73", + "default": "", + "required": false, + "multi": false, + "parameter_type": "text", + "variable": "cutoff", + "parameter": true, + "order": 1, + "description": "We can also add the keyword argument cutoff (e.g. confusionMatrices(results, cutoff=0.3); if we do, confusionMatrices will disregard the classifiers' class predictions and observe the predicted probabilities, and consider the prediction \"positive\" if the predicted probability of the positive class is higher than the cutoff." + } + }, + { + "pk": 159, + "model": "workflows.abstractinput", + "fields": { + "widget": 81, + "name": "Class Index", + "short_name": "cli", + "uid": "93b12c97-c01a-42a9-a934-b9c0a220f67a", + "default": "-1", + "required": false, + "multi": false, + "parameter_type": "text", + "variable": "classIndex", + "parameter": true, + "order": 2, + "description": "" + } + }, + { + "pk": 83, + "model": "workflows.abstractoutput", + "fields": { + "widget": 81, + "name": "Confusion Matrix", + "short_name": "cm", + "variable": "cm", + "uid": "80ffab5b-b464-44e3-b659-ee3cbc6318e3", + "order": 1, + "description": "" + } + }, + { + "pk": 82, + "model": "workflows.abstractwidget", + "fields": { + "category": 16, + "treeview_image": "", + "name": "Confusion Matrix Computations", + "is_streaming": false, + "uid": "7ac99069-f563-4580-a0de-2aa79b47ff4b", + "interaction_view": "", + "image": "", + "package": "cforange", + "static_image": "", + "post_interact_action": "", + "user": null, + "visualization_view": "", + "action": "cforange_confusion_matrix_computations", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 5, + "description": "With the confusion matrix defined in terms of positive and negative classes, you can also compute the sensitivity [TP/(TP+FN)], specificity [TN/(TN+FP)], positive predictive value [TP/(TP+FP)] and negative predictive value [TN/(TN+FN)]. In information retrieval, positive predictive value is called precision (the ratio of the number of relevant records retrieved to the total number of irrelevant and relevant records retrieved), and sensitivity is called recall (the ratio of the number of relevant records retrieved to the total number of relevant records in the database). The harmonic mean of precision and recall is called an F-measure, where, depending on the ratio of the weight between precision and recall is implemented as F1 [2*precision*recall/(precision+recall)] or, for a general case, Falpha [(1+alpha)*precision*recall / (alpha*precision + recall)]." + } + }, + { + "pk": 161, + "model": "workflows.abstractinput", + "fields": { + "widget": 82, + "name": "Confusion Matrix", + "short_name": "cm", + "uid": "5bbce817-42f9-4454-ade9-c6b73a8b4f56", + "default": "", + "required": false, + "multi": false, + "parameter_type": null, + "variable": "cm", + "parameter": false, + "order": 1, + "description": "" + } + }, + { + "pk": 162, + "model": "workflows.abstractinput", + "fields": { + "widget": 82, + "name": "alpha", + "short_name": "alp", + "uid": "26e4f697-0231-4cf8-8cbd-2cb325441ba7", + "default": "2.0", + "required": false, + "multi": false, + "parameter_type": "text", + "variable": "alpha", + "parameter": true, + "order": 2, + "description": "" + } + }, + { + "pk": 84, + "model": "workflows.abstractoutput", + "fields": { + "widget": 82, + "name": "Sensitiviry", + "short_name": "sen", + "variable": "sens", + "uid": "4cb7033f-0e4e-425c-96a2-529c8ba17c69", + "order": 1, + "description": "" + } + }, + { + "pk": 85, + "model": "workflows.abstractoutput", + "fields": { + "widget": 82, + "name": "Specificity", + "short_name": "spe", + "variable": "spec", + "uid": "0704171d-35c0-4ae5-9be5-e62103c5a4a2", + "order": 2, + "description": "" + } + }, + { + "pk": 86, + "model": "workflows.abstractoutput", + "fields": { + "widget": 82, + "name": "Positive Predictive Value", + "short_name": "ppv", + "variable": "PPV", + "uid": "75932ec5-48f7-4d32-bfbf-7054ed6140e5", + "order": 3, + "description": "" + } + }, + { + "pk": 87, + "model": "workflows.abstractoutput", + "fields": { + "widget": 82, + "name": "Negative Predictive Value", + "short_name": "npv", + "variable": "NPV", + "uid": "db506973-2b95-4e59-9881-4e365b54c5d9", + "order": 4, + "description": "" + } + }, + { + "pk": 88, + "model": "workflows.abstractoutput", + "fields": { + "widget": 82, + "name": "Precision", + "short_name": "pre", + "variable": "precision", + "uid": "2d390622-897a-411a-bbbf-b2ca33f84271", + "order": 5, + "description": "" + } + }, + { + "pk": 89, + "model": "workflows.abstractoutput", + "fields": { + "widget": 82, + "name": "Recall", + "short_name": "rec", + "variable": "recall", + "uid": "e0b726bb-063d-4759-a1c0-eb8becc7c65b", + "order": 6, + "description": "" + } + }, + { + "pk": 90, + "model": "workflows.abstractoutput", + "fields": { + "widget": 82, + "name": "F1", + "short_name": "f1", + "variable": "F1", + "uid": "4b24d1e9-7a47-4083-ba7c-00c1112ff320", + "order": 7, + "description": "" + } + }, + { + "pk": 91, + "model": "workflows.abstractoutput", + "fields": { + "widget": 82, + "name": "Falpha", + "short_name": "fal", + "variable": "Falpha", + "uid": "8a068c50-6b65-42db-8f95-8c0cfedfeeb0", + "order": 8, + "description": "" + } + }, + { + "pk": 92, + "model": "workflows.abstractoutput", + "fields": { + "widget": 82, + "name": "MCC", + "short_name": "mcc", + "variable": "MCC", + "uid": "83ac8895-17e3-467d-a107-58b112bf1655", + "order": 9, + "description": "" + } + }, + { + "pk": 76, + "model": "workflows.abstractwidget", + "fields": { + "category": 16, + "treeview_image": "", + "name": "Correct Class Average Probability", + "is_streaming": false, + "uid": "e21925ad-fdc3-487d-944e-5eb03caa2350", + "interaction_view": "", + "image": "", + "package": "cforange", + "static_image": "", + "post_interact_action": "", + "user": null, + "visualization_view": "", + "action": "cforange_average_probability", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 6, + "description": "Computes the average probability assigned to the correct class." + } + }, + { + "pk": 146, + "model": "workflows.abstractinput", + "fields": { + "widget": 76, + "name": "Results", + "short_name": "res", + "uid": "2b1e7d82-9c60-47be-8d27-d88d56de0c5d", + "default": "", + "required": false, + "multi": false, + "parameter_type": null, + "variable": "results", + "parameter": false, + "order": 1, + "description": "" + } + }, + { + "pk": 147, + "model": "workflows.abstractinput", + "fields": { + "widget": 76, + "name": "Report Standard Error", + "short_name": "rse", + "uid": "3dfeb6f5-fe7a-461a-b965-de7daa0b61eb", + "default": "", + "required": false, + "multi": false, + "parameter_type": "checkbox", + "variable": "reportSE", + "parameter": true, + "order": 2, + "description": "" + } + }, + { + "pk": 78, + "model": "workflows.abstractoutput", + "fields": { + "widget": 76, + "name": "Average Probability", + "short_name": "ap", + "variable": "ap", + "uid": "39018d38-5c76-41db-a08b-3d1e02793e8f", + "order": 1, + "description": "" + } + }, + { + "pk": 78, + "model": "workflows.abstractwidget", + "fields": { + "category": 16, + "treeview_image": "", + "name": "Information Score", + "is_streaming": false, + "uid": "5fed0e33-c267-4522-a4ff-faf745e80be6", + "interaction_view": "", + "image": "", + "package": "cforange", + "static_image": "", + "post_interact_action": "", + "user": null, + "visualization_view": "", + "action": "cforange_information_score", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 7, + "description": "Computes the information score as defined by Kononenko and Bratko (1991). " + } + }, + { + "pk": 150, + "model": "workflows.abstractinput", + "fields": { + "widget": 78, + "name": "Results", + "short_name": "res", + "uid": "d85b2f09-ae60-49b7-9b07-b73e4f2f37f9", + "default": "", + "required": false, + "multi": false, + "parameter_type": null, + "variable": "results", + "parameter": false, + "order": 1, + "description": "" + } + }, + { + "pk": 151, + "model": "workflows.abstractinput", + "fields": { + "widget": 78, + "name": "Report Standard Error", + "short_name": "rse", + "uid": "34afe990-c3e7-40ac-93c9-15283b3e12fc", + "default": "", + "required": false, + "multi": false, + "parameter_type": "checkbox", + "variable": "reportSE", + "parameter": true, + "order": 2, + "description": "" + } + }, + { + "pk": 80, + "model": "workflows.abstractoutput", + "fields": { + "widget": 78, + "name": "Information Score", + "short_name": "is", + "variable": "is", + "uid": "ae1c8ffc-ef32-4ce4-94c3-3a59ba0ca301", + "order": 1, + "description": "" + } + }, + { + "pk": 17, + "model": "workflows.category", + "fields": { + "uid": "d13c01e6-675f-4c48-a211-b24af72229b0", + "parent": 12, + "workflow": null, + "user": null, + "order": 1, + "name": "Regression Statistics" + } + }, + { + "pk": 84, + "model": "workflows.abstractwidget", + "fields": { + "category": 17, + "treeview_image": "", + "name": "Mean Squared Error", + "is_streaming": false, + "uid": "c527ea0f-336b-4334-8288-bce4e742e043", + "interaction_view": "", + "image": "", + "package": "cforange", + "static_image": "", + "post_interact_action": "", + "user": null, + "visualization_view": "", + "action": "cforange_MSE", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 1, + "description": "" + } + }, + { + "pk": 165, + "model": "workflows.abstractinput", + "fields": { + "widget": 84, + "name": "Results", + "short_name": "res", + "uid": "1f40bcbb-e919-4078-8f11-fc2462bba84b", + "default": "", + "required": false, + "multi": false, + "parameter_type": null, + "variable": "results", + "parameter": false, + "order": 1, + "description": "" + } + }, + { + "pk": 94, + "model": "workflows.abstractoutput", + "fields": { + "widget": 84, + "name": "Mean Squared Error", + "short_name": "mse", + "variable": "MSE", + "uid": "b5a01b55-2f97-4401-8183-6f774b7f18cc", + "order": 1, + "description": "" + } + }, + { + "pk": 86, + "model": "workflows.abstractwidget", + "fields": { + "category": 17, + "treeview_image": "", + "name": "Mean absolute error", + "is_streaming": false, + "uid": "feffec1f-247b-4740-9ddc-df549c5fbaf6", + "interaction_view": "", + "image": "", + "package": "cforange", + "static_image": "", + "post_interact_action": "", + "user": null, + "visualization_view": "", + "action": "cforange_MAE", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 1, + "description": "" + } + }, + { + "pk": 167, + "model": "workflows.abstractinput", + "fields": { + "widget": 86, + "name": "Results", + "short_name": "res", + "uid": "b8b260d7-3497-42f5-93a0-09a411a03852", + "default": "", + "required": false, + "multi": false, + "parameter_type": null, + "variable": "results", + "parameter": false, + "order": 1, + "description": "" + } + }, + { + "pk": 96, + "model": "workflows.abstractoutput", + "fields": { + "widget": 86, + "name": "Mean Absolute Error", + "short_name": "mse", + "variable": "MSE", + "uid": "bf09d357-d915-4ba6-b970-955bef6ee5c5", + "order": 1, + "description": "" + } + }, + { + "pk": 89, + "model": "workflows.abstractwidget", + "fields": { + "category": 17, + "treeview_image": "", + "name": "R-squared", + "is_streaming": false, + "uid": "2299e615-4d72-4db7-9b32-3566b903c749", + "interaction_view": "", + "image": "", + "package": "cforange", + "static_image": "", + "post_interact_action": "", + "user": null, + "visualization_view": "", + "action": "cforange_R2", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 1, + "description": "" + } + }, + { + "pk": 170, + "model": "workflows.abstractinput", + "fields": { + "widget": 89, + "name": "Results", + "short_name": "res", + "uid": "e06248e9-dd6d-4746-8972-1cbf6e6f116f", + "default": "", + "required": false, + "multi": false, + "parameter_type": null, + "variable": "results", + "parameter": false, + "order": 1, + "description": "" + } + }, + { + "pk": 99, + "model": "workflows.abstractoutput", + "fields": { + "widget": 89, + "name": "R-Squared", + "short_name": "r2", + "variable": "R2", + "uid": "889fcdf7-7475-47f7-b1e0-0c199398f037", + "order": 1, + "description": "" + } + }, + { + "pk": 87, + "model": "workflows.abstractwidget", + "fields": { + "category": 17, + "treeview_image": "", + "name": "Relative Squared Error", + "is_streaming": false, + "uid": "0d32c3a7-1f94-4414-92f0-bb81a9152636", + "interaction_view": "", + "image": "", + "package": "cforange", + "static_image": "", + "post_interact_action": "", + "user": null, + "visualization_view": "", + "action": "cforange_RSE", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 1, + "description": "" + } + }, + { + "pk": 168, + "model": "workflows.abstractinput", + "fields": { + "widget": 87, + "name": "Results", + "short_name": "res", + "uid": "64ff64b7-5878-4c9f-8a43-908d3d9b187a", + "default": "", + "required": false, + "multi": false, + "parameter_type": null, + "variable": "results", + "parameter": false, + "order": 1, + "description": "" + } + }, + { + "pk": 97, + "model": "workflows.abstractoutput", + "fields": { + "widget": 87, + "name": "Relative Squared Error", + "short_name": "rse", + "variable": "RSE", + "uid": "66f3b71d-c53f-4653-9cb8-307b9eec70aa", + "order": 1, + "description": "" + } + }, + { + "pk": 88, + "model": "workflows.abstractwidget", + "fields": { + "category": 17, + "treeview_image": "", + "name": "Root Relative Squared Error", + "is_streaming": false, + "uid": "c9ecd12e-eaff-41b6-b104-dff7fb60b124", + "interaction_view": "", + "image": "", + "package": "cforange", + "static_image": "", + "post_interact_action": "", + "user": null, + "visualization_view": "", + "action": "cforange_RRSE", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 1, + "description": "" + } + }, + { + "pk": 169, + "model": "workflows.abstractinput", + "fields": { + "widget": 88, + "name": "Results", + "short_name": "res", + "uid": "44ac11fb-201e-4d8f-b0b2-4d7668ea49a2", + "default": "", + "required": false, + "multi": false, + "parameter_type": null, + "variable": "results", + "parameter": false, + "order": 1, + "description": "" + } + }, + { + "pk": 98, + "model": "workflows.abstractoutput", + "fields": { + "widget": 88, + "name": "Root Relative Squared Error", + "short_name": "rrs", + "variable": "RRSE", + "uid": "aa8347f5-5243-4336-9c0e-2d9649fc5de2", + "order": 1, + "description": "" + } + }, + { + "pk": 85, + "model": "workflows.abstractwidget", + "fields": { + "category": 17, + "treeview_image": "", + "name": "Root mean-squared error", + "is_streaming": false, + "uid": "dc66f264-14a6-491b-9c39-36bded8f95d0", + "interaction_view": "", + "image": "", + "package": "cforange", + "static_image": "", + "post_interact_action": "", + "user": null, + "visualization_view": "", + "action": "cforange_RMSE", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 1, + "description": "" + } + }, + { + "pk": 166, + "model": "workflows.abstractinput", + "fields": { + "widget": 85, + "name": "Results", + "short_name": "res", + "uid": "588d6e48-4365-43e7-8388-09da5fe660ea", + "default": "", + "required": false, + "multi": false, + "parameter_type": null, + "variable": "results", + "parameter": false, + "order": 1, + "description": "" + } + }, + { + "pk": 95, + "model": "workflows.abstractoutput", + "fields": { + "widget": 85, + "name": "Root Mean Squared Error", + "short_name": "rms", + "variable": "RMSE", + "uid": "9ff06017-8f60-4456-916a-6261222631db", + "order": 1, + "description": "" + } + }, + { + "pk": 15, + "model": "workflows.category", + "fields": { + "uid": "8b90267f-c82d-4b62-bee3-66184f4db931", + "parent": 12, + "workflow": null, + "user": null, + "order": 1, + "name": "Testing" + } + }, + { + "pk": 74, + "model": "workflows.abstractwidget", + "fields": { + "category": 15, + "treeview_image": "", + "name": "Cross Validation", + "is_streaming": false, + "uid": "51906007-2471-4869-89f9-f3f805358d13", + "interaction_view": "", + "image": "", + "package": "cforange", + "static_image": "", + "post_interact_action": "", + "user": null, + "visualization_view": "", + "action": "cforange_cross_validation", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 1, + "description": "" + } + }, + { + "pk": 141, + "model": "workflows.abstractinput", + "fields": { + "widget": 74, + "name": "Dataset", + "short_name": "odt", + "uid": "70f67094-39bb-4dad-b567-049002dabdac", + "default": "", + "required": false, + "multi": false, + "parameter_type": null, + "variable": "dataset", + "parameter": false, + "order": 1, + "description": "" + } + }, + { + "pk": 142, + "model": "workflows.abstractinput", + "fields": { + "widget": 74, + "name": "Learner", + "short_name": "lrn", + "uid": "a7f392fe-55f7-48df-9afb-1d0044562a2a", + "default": "", + "required": false, + "multi": false, + "parameter_type": null, + "variable": "learner", + "parameter": false, + "order": 2, + "description": "" + } + }, + { + "pk": 143, + "model": "workflows.abstractinput", + "fields": { + "widget": 74, + "name": "Folds", + "short_name": "fld", + "uid": "2e4561d0-99fd-44fd-99a5-7ac32457aa58", + "default": "10", + "required": false, + "multi": false, + "parameter_type": "text", + "variable": "folds", + "parameter": true, + "order": 3, + "description": "" + } + }, + { + "pk": 76, + "model": "workflows.abstractoutput", + "fields": { + "widget": 74, + "name": "Results", + "short_name": "res", + "variable": "results", + "uid": "59a55d53-ca8a-4a52-9cf5-e690116ef6ac", + "order": 1, + "description": "" + } + }, + { + "pk": 73, + "model": "workflows.abstractwidget", + "fields": { + "category": 15, + "treeview_image": "", + "name": "Cross Validation (multiple learners)", + "is_streaming": false, + "uid": "6c168f2e-e47f-409a-97c1-58d50fbbec4d", + "interaction_view": "", + "image": "", + "package": "cforange", + "static_image": "", + "post_interact_action": "", + "user": null, + "visualization_view": "", + "action": "cforange_multiple_cross_validation", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 1, + "description": "" + } + }, + { + "pk": 138, + "model": "workflows.abstractinput", + "fields": { + "widget": 73, + "name": "Dataset", + "short_name": "odt", + "uid": "16d1fe96-ce53-4a94-8c3e-90ac444b45f8", + "default": "", + "required": false, + "multi": false, + "parameter_type": null, + "variable": "dataset", + "parameter": false, + "order": 1, + "description": "" + } + }, + { + "pk": 139, + "model": "workflows.abstractinput", + "fields": { + "widget": 73, + "name": "Learners", + "short_name": "lrn", + "uid": "499f4007-55c3-431c-93ca-5f493a9e592c", + "default": "", + "required": false, + "multi": true, + "parameter_type": null, + "variable": "learners", + "parameter": false, + "order": 2, + "description": "" + } + }, + { + "pk": 140, + "model": "workflows.abstractinput", + "fields": { + "widget": 73, + "name": "Folds", + "short_name": "fld", + "uid": "2b7ef618-6f7a-4f0c-ad5f-50000aa5ec0b", + "default": "10", + "required": false, + "multi": false, + "parameter_type": "text", + "variable": "folds", + "parameter": true, + "order": 3, + "description": "" + } + }, + { + "pk": 75, + "model": "workflows.abstractoutput", + "fields": { + "widget": 73, + "name": "Results", + "short_name": "res", + "variable": "results", + "uid": "e966fc28-1217-4922-a0d7-d26cdc872872", + "order": 1, + "description": "" + } + }, + { + "pk": 80, + "model": "workflows.abstractwidget", + "fields": { + "category": 15, + "treeview_image": "", + "name": "Leave One Out", + "is_streaming": false, + "uid": "0e9514ce-914a-4dd0-9952-9cd14877c427", + "interaction_view": "", + "image": "", + "package": "cforange", + "static_image": "", + "post_interact_action": "", + "user": null, + "visualization_view": "", + "action": "cforange_leave_one_out", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 1, + "description": "Performs a leave-one-out experiment with the given list of learners and examples. This is equivalent to performing len(examples)-fold cross validation. Function accepts additional keyword arguments for preprocessing, storing classifiers and verbose output." + } + }, + { + "pk": 156, + "model": "workflows.abstractinput", + "fields": { + "widget": 80, + "name": "learners", + "short_name": "lrn", + "uid": "153d8297-ab47-4c51-a540-ed5cf907173f", + "default": "", + "required": false, + "multi": true, + "parameter_type": null, + "variable": "learners", + "parameter": false, + "order": 1, + "description": "" + } + }, + { + "pk": 157, + "model": "workflows.abstractinput", + "fields": { + "widget": 80, + "name": "dataset", + "short_name": "odt", + "uid": "51ffb874-f2b3-431c-a38b-16177631fd6f", + "default": "", + "required": false, + "multi": false, + "parameter_type": null, + "variable": "dataset", + "parameter": false, + "order": 2, + "description": "" + } + }, + { + "pk": 82, + "model": "workflows.abstractoutput", + "fields": { + "widget": 80, + "name": "Results", + "short_name": "res", + "variable": "results", + "uid": "8d618b48-62db-4f71-8440-5797234117af", + "order": 1, + "description": "" + } + }, + { + "pk": 79, + "model": "workflows.abstractwidget", + "fields": { + "category": 15, + "treeview_image": "", + "name": "Proportion Test", + "is_streaming": false, + "uid": "c605a0f4-a3b0-4394-8410-9da8854416a2", + "interaction_view": "", + "image": "", + "package": "cforange", + "static_image": "", + "post_interact_action": "", + "user": null, + "visualization_view": "", + "action": "cforange_proportion_test", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 1, + "description": "Splits the data with a number of examples in the learning and the rest in the testing set. The test is repeated for a given number of times (default 10). Division is stratified by default. Function also accepts keyword arguments for randomization and storing classifiers." + } + }, + { + "pk": 152, + "model": "workflows.abstractinput", + "fields": { + "widget": 79, + "name": "learners", + "short_name": "lrn", + "uid": "b77f51b3-6c21-4d6a-922a-bbbb4679155d", + "default": "", + "required": false, + "multi": true, + "parameter_type": null, + "variable": "learners", + "parameter": false, + "order": 1, + "description": "" + } + }, + { + "pk": 153, + "model": "workflows.abstractinput", + "fields": { + "widget": 79, + "name": "dataset", + "short_name": "odt", + "uid": "2b2037f4-b240-4b26-81e5-8ca01a215193", + "default": "", + "required": false, + "multi": false, + "parameter_type": null, + "variable": "dataset", + "parameter": false, + "order": 2, + "description": "" + } + }, + { + "pk": 154, + "model": "workflows.abstractinput", + "fields": { + "widget": 79, + "name": "Proportion", + "short_name": "prp", + "uid": "ff523b52-e08b-4da5-bb2c-7050731dcf16", + "default": "0.7", + "required": false, + "multi": false, + "parameter_type": "text", + "variable": "learnProp", + "parameter": true, + "order": 3, + "description": "" + } + }, + { + "pk": 155, + "model": "workflows.abstractinput", + "fields": { + "widget": 79, + "name": "Number of repetitions", + "short_name": "rep", + "uid": "da5ab14e-9e1c-4395-b6ce-8ff0d53de4c9", + "default": "100", + "required": false, + "multi": false, + "parameter_type": "text", + "variable": "times", + "parameter": true, + "order": 4, + "description": "" + } + }, + { + "pk": 81, + "model": "workflows.abstractoutput", + "fields": { + "widget": 79, + "name": "Results", + "short_name": "res", + "variable": "results", + "uid": "3e0d529d-53f2-4cbe-9df9-37903837eba5", + "order": 1, + "description": "" + } + }, + { + "pk": 13, + "model": "workflows.category", + "fields": { + "uid": "aa3ed779-1e0b-4c6b-883e-24d85b9a6009", + "parent": 10, + "workflow": null, + "user": null, + "order": 3, + "name": "Utilities" + } + }, + { + "pk": 99, + "model": "workflows.abstractwidget", + "fields": { + "category": 13, + "treeview_image": "", + "name": "ARFF string to Orange Data Table", + "is_streaming": false, + "uid": "d978d9ba-40dc-427e-b16f-066f13ada8e5", + "interaction_view": "", + "image": "", + "package": "cforange", + "static_image": "", + "post_interact_action": "", + "user": null, + "visualization_view": "", + "action": "load_dataset_from_arff_string", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 1, + "description": "" + } + }, + { + "pk": 189, + "model": "workflows.abstractinput", + "fields": { + "widget": 99, + "name": "Arff string", + "short_name": "arf", + "uid": "", + "default": "", + "required": false, + "multi": false, + "parameter_type": null, + "variable": "arff", + "parameter": false, + "order": 1, + "description": "" + } + }, + { + "pk": 111, + "model": "workflows.abstractoutput", + "fields": { + "widget": 99, + "name": "Orange Data Table", + "short_name": "odt", + "variable": "dataset", + "uid": "", + "order": 1, + "description": "" + } + }, + { + "pk": 57, + "model": "workflows.abstractwidget", + "fields": { + "category": 13, + "treeview_image": "treeview/add_noise3.png", + "name": "Add Class Noise", + "is_streaming": false, + "uid": "4c603285-3aa7-4b55-8d57-cc8285e212c2", + "interaction_view": "", + "image": "images/add_noise3.png", + "package": "cforange", + "static_image": "", + "post_interact_action": "", + "user": null, + "visualization_view": "", + "action": "add_class_noise", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 1, + "description": "Adds class noise to a specified percentage of data instances." + } + }, + { + "pk": 110, + "model": "workflows.abstractinput", + "fields": { + "widget": 57, + "name": "Random Seed", + "short_name": "rns", + "uid": "89c20239-c4e0-4368-9828-ab73155e2457", + "default": "", + "required": false, + "multi": false, + "parameter_type": null, + "variable": "rnd_seed", + "parameter": false, + "order": 1, + "description": "" + } + }, + { + "pk": 111, + "model": "workflows.abstractinput", + "fields": { + "widget": 57, + "name": "Dataset", + "short_name": "ds", + "uid": "aa5d2630-534f-4662-8da5-342b2e87995b", + "default": "", + "required": true, + "multi": false, + "parameter_type": null, + "variable": "data", + "parameter": false, + "order": 2, + "description": "" + } + }, + { + "pk": 112, + "model": "workflows.abstractinput", + "fields": { + "widget": 57, + "name": "Noise Level (%)", + "short_name": "nlv", + "uid": "9c5ec335-76a0-4208-81a0-76b4a0457d5d", + "default": "5", + "required": true, + "multi": false, + "parameter_type": "text", + "variable": "noise_level", + "parameter": true, + "order": 3, + "description": "" + } + }, + { + "pk": 61, + "model": "workflows.abstractoutput", + "fields": { + "widget": 57, + "name": "Noisy Dataset", + "short_name": "nds", + "variable": "noisy_data", + "uid": "3b5b1e3c-a589-47f5-bd88-e65ccfcfc988", + "order": 1, + "description": "" + } + }, + { + "pk": 62, + "model": "workflows.abstractoutput", + "fields": { + "widget": 57, + "name": "Noise Indices", + "short_name": "nid", + "variable": "noise_inds", + "uid": "4ce3dc8e-7947-4ab4-825b-2b5afe379e34", + "order": 2, + "description": "" + } + }, + { + "pk": 58, + "model": "workflows.abstractwidget", + "fields": { + "category": 13, + "treeview_image": "", + "name": "Alter table", + "is_streaming": false, + "uid": "6f73d7a0-ed32-4ff8-9743-6a5d0867c693", + "interaction_view": "alter_table", + "image": "", + "package": "cforange", + "static_image": "", + "post_interact_action": "alter_table_finished", + "user": null, + "visualization_view": "", + "action": "alter_table", + "wsdl_method": "", + "wsdl": "", + "interactive": true, + "has_progress_bar": false, + "order": 1, + "description": "" + } + }, + { + "pk": 113, + "model": "workflows.abstractinput", + "fields": { + "widget": 58, + "name": "Table", + "short_name": "tab", + "uid": "1aff9f6a-6d0b-4cf2-ae5c-6de3301dc931", + "default": "", + "required": true, + "multi": false, + "parameter_type": null, + "variable": "data", + "parameter": false, + "order": 1, + "description": "" + } + }, + { + "pk": 63, + "model": "workflows.abstractoutput", + "fields": { + "widget": 58, + "name": "Table", + "short_name": "tab", + "variable": "altered_data", + "uid": "27079127-e4e6-45bd-a2e2-11aade432583", + "order": 1, + "description": "Altered table" + } + }, + { + "pk": 59, + "model": "workflows.abstractwidget", + "fields": { + "category": 13, + "treeview_image": "treeview/Load-ODT.png", + "name": "Load Dataset to Orange Data Table", + "is_streaming": false, + "uid": "776564a9-74ae-4651-8340-c52c055cecbc", + "interaction_view": "", + "image": "images/Load-ODT.png", + "package": "cforange", + "static_image": "", + "post_interact_action": "", + "user": null, + "visualization_view": "", + "action": "load_dataset", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 1, + "description": "" + } + }, + { + "pk": 114, + "model": "workflows.abstractinput", + "fields": { + "widget": 59, + "name": "File", + "short_name": "fil", + "uid": "748e5dff-6c21-4feb-af71-aaf955f73893", + "default": "", + "required": false, + "multi": false, + "parameter_type": "file", + "variable": "file", + "parameter": true, + "order": 1, + "description": "" + } + }, + { + "pk": 64, + "model": "workflows.abstractoutput", + "fields": { + "widget": 59, + "name": "Dataset", + "short_name": "ds", + "variable": "dataset", + "uid": "91783e70-d1df-4467-939f-d0800108850c", + "order": 1, + "description": "" + } + }, + { + "pk": 61, + "model": "workflows.abstractwidget", + "fields": { + "category": 13, + "treeview_image": "treeview/ODT-to-ARFF-string.png", + "name": "Orange Data Table to ARFF String", + "is_streaming": false, + "uid": "e85e8eee-a6c1-464c-ab11-fecf4240fdd0", + "interaction_view": "", + "image": "images/ODT-to-ARFF-string.png", + "package": "cforange", + "static_image": "", + "post_interact_action": "", + "user": null, + "visualization_view": "", + "action": "odt_to_arff", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 1, + "description": "Transforms an orange data table to an arff string." + } + }, + { + "pk": 116, + "model": "workflows.abstractinput", + "fields": { + "widget": 61, + "name": "Orange data table", + "short_name": "odt", + "uid": "052788be-6a3d-4c8f-be34-b957ff445c04", + "default": "", + "required": false, + "multi": false, + "parameter_type": null, + "variable": "odt", + "parameter": false, + "order": 1, + "description": "" + } + }, + { + "pk": 65, + "model": "workflows.abstractoutput", + "fields": { + "widget": 61, + "name": "ARFF", + "short_name": "arf", + "variable": "arff", + "uid": "02062c11-09e0-4bfd-b712-ad1c655a5e02", + "order": 1, + "description": "" + } + }, + { + "pk": 60, + "model": "workflows.abstractwidget", + "fields": { + "category": 13, + "treeview_image": "", + "name": "Orange Data Table to ARFF file", + "is_streaming": false, + "uid": "c5a545bb-e0ac-4b73-aa76-4add863e3252", + "interaction_view": "", + "image": "", + "package": "cforange", + "static_image": "", + "post_interact_action": "", + "user": null, + "visualization_view": "odt_to_arff", + "action": "string_to_file", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 1, + "description": "" + } + }, + { + "pk": 115, + "model": "workflows.abstractinput", + "fields": { + "widget": 60, + "name": "Data", + "short_name": "dat", + "uid": "1eb8eb33-61d6-4b77-93ef-abfe6fd2c15f", + "default": "", + "required": false, + "multi": false, + "parameter_type": null, + "variable": "data", + "parameter": false, + "order": 1, + "description": "" + } + }, + { + "pk": 62, + "model": "workflows.abstractwidget", + "fields": { + "category": 13, + "treeview_image": "", + "name": "Orange Data Table to CSV file", + "is_streaming": false, + "uid": "dae920fc-c301-4285-9707-9aa8dbb9b9f7", + "interaction_view": "", + "image": "", + "package": "cforange", + "static_image": "", + "post_interact_action": "", + "user": null, + "visualization_view": "odt_to_csv", + "action": "string_to_file", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 1, + "description": "" + } + }, + { + "pk": 117, + "model": "workflows.abstractinput", + "fields": { + "widget": 62, + "name": "Data", + "short_name": "dat", + "uid": "a9f98093-9488-483a-bfd2-4e484b1c1ccb", + "default": "", + "required": false, + "multi": false, + "parameter_type": null, + "variable": "data", + "parameter": false, + "order": 1, + "description": "" + } + }, + { + "pk": 63, + "model": "workflows.abstractwidget", + "fields": { + "category": 13, + "treeview_image": "", + "name": "Orange Data Table to TAB file", + "is_streaming": false, + "uid": "6f475ead-0c0e-4b77-bc1a-ccfa7614962c", + "interaction_view": "", + "image": "", + "package": "cforange", + "static_image": "", + "post_interact_action": "", + "user": null, + "visualization_view": "odt_to_tab", + "action": "string_to_file", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 1, + "description": "" + } + }, + { + "pk": 118, + "model": "workflows.abstractinput", + "fields": { + "widget": 63, + "name": "Dataset", + "short_name": "dat", + "uid": "1b6f9c59-36f1-4deb-bec7-c9633f6c7fd8", + "default": "", + "required": false, + "multi": false, + "parameter_type": null, + "variable": "data", + "parameter": false, + "order": 1, + "description": "" + } + }, + { + "pk": 64, + "model": "workflows.abstractwidget", + "fields": { + "category": 13, + "treeview_image": "treeview/Orange-SelectAttributes_60.png", + "name": "Select Attributes", + "is_streaming": false, + "uid": "7b3455ff-5b2a-4c51-aa09-38dab4dfc2dc", + "interaction_view": "select_attrs", + "image": "images/Orange-SelectAttributes_60.png", + "package": "cforange", + "static_image": "", + "post_interact_action": "select_attrs_post", + "user": null, + "visualization_view": "", + "action": "select_attrs", + "wsdl_method": "", + "wsdl": "", + "interactive": true, + "has_progress_bar": false, + "order": 1, + "description": "" + } + }, + { + "pk": 119, + "model": "workflows.abstractinput", + "fields": { + "widget": 64, + "name": "Data table", + "short_name": "tab", + "uid": "d0f1d1e2-b3e8-4f79-bb88-50c2f3812e79", + "default": "", + "required": true, + "multi": false, + "parameter_type": null, + "variable": "data", + "parameter": false, + "order": 1, + "description": "Data table in form of a .tab file." + } + }, + { + "pk": 66, + "model": "workflows.abstractoutput", + "fields": { + "widget": 64, + "name": "Data table", + "short_name": "tab", + "variable": "data", + "uid": "59a32aa6-0c2d-41d4-a0f0-690eef9ee3c1", + "order": 1, + "description": "Modified data table, with designated Class Attribute." + } + }, + { + "pk": 65, + "model": "workflows.abstractwidget", + "fields": { + "category": 13, + "treeview_image": "treeview/orange-SelectData.png", + "name": "Select Data", + "is_streaming": false, + "uid": "2039c5fe-48a1-426c-b418-f0a16fdf1b18", + "interaction_view": "select_data", + "image": "images/orange-SelectData.png", + "package": "cforange", + "static_image": "", + "post_interact_action": "select_data_post", + "user": null, + "visualization_view": "", + "action": "select_data", + "wsdl_method": "", + "wsdl": "", + "interactive": true, + "has_progress_bar": false, + "order": 1, + "description": "Select data according to the filter provided by user" + } + }, + { + "pk": 120, + "model": "workflows.abstractinput", + "fields": { + "widget": 65, + "name": "data", + "short_name": "dat", + "uid": "042b574e-c08a-43ad-87db-9030a8a74cb8", + "default": "", + "required": false, + "multi": false, + "parameter_type": null, + "variable": "data", + "parameter": false, + "order": 1, + "description": "Data to be filtered." + } + }, + { + "pk": 67, + "model": "workflows.abstractoutput", + "fields": { + "widget": 65, + "name": "Matching Data", + "short_name": "dat", + "variable": "data", + "uid": "93e88b7e-048a-412d-b93f-552659e62ecb", + "order": 1, + "description": "Data matching the filter provided" + } + }, + { + "pk": 68, + "model": "workflows.abstractwidget", + "fields": { + "category": 13, + "treeview_image": "", + "name": "Split dataset", + "is_streaming": false, + "uid": "af4010a8-836c-41c7-adf6-50118295cf8e", + "interaction_view": "", + "image": "", + "package": "cforange", + "static_image": "", + "post_interact_action": "", + "user": null, + "visualization_view": "", + "action": "cforange_split_dataset", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 1, + "description": "Splits a dataset to a train and test dataset." + } + }, + { + "pk": 123, + "model": "workflows.abstractinput", + "fields": { + "widget": 68, + "name": "Dataset", + "short_name": "odt", + "uid": "fe354f76-d6ed-4907-9ab6-d8784b3c7620", + "default": "", + "required": false, + "multi": false, + "parameter_type": null, + "variable": "dataset", + "parameter": false, + "order": 1, + "description": "A dataset that will be split into two parts" + } + }, + { + "pk": 124, + "model": "workflows.abstractinput", + "fields": { + "widget": 68, + "name": "Split factor", + "short_name": "p", + "uid": "6a637f14-7ba4-4c33-90e0-6cc260587260", + "default": "0.5", + "required": false, + "multi": false, + "parameter_type": "text", + "variable": "p", + "parameter": true, + "order": 1, + "description": "A number between 0 and 1 that determines the sizes of the two datasets" + } + }, + { + "pk": 69, + "model": "workflows.abstractoutput", + "fields": { + "widget": 68, + "name": "Train data", + "short_name": "tra", + "variable": "train_data", + "uid": "665e0af4-6e6e-4616-8e96-2d8866497a40", + "order": 1, + "description": "Train data" + } + }, + { + "pk": 70, + "model": "workflows.abstractoutput", + "fields": { + "widget": 68, + "name": "Test data", + "short_name": "tes", + "variable": "test_data", + "uid": "514abf26-b0d5-4483-9273-6b930bf4eb68", + "order": 1, + "description": "Test data" + } + }, + { + "pk": 66, + "model": "workflows.abstractwidget", + "fields": { + "category": 13, + "treeview_image": "", + "name": "Table viewer", + "is_streaming": false, + "uid": "5becb9ab-93a8-4811-aa37-66ca85a7c0f3", + "interaction_view": "", + "image": "", + "package": "cforange", + "static_image": "", + "post_interact_action": "", + "user": null, + "visualization_view": "table_viewer", + "action": "table_viewer", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 1, + "description": "" + } + }, + { + "pk": 121, + "model": "workflows.abstractinput", + "fields": { + "widget": 66, + "name": "Table", + "short_name": "tab", + "uid": "8b1571c3-dc4d-401b-a634-560236572496", + "default": "", + "required": false, + "multi": false, + "parameter_type": null, + "variable": "data", + "parameter": false, + "order": 1, + "description": "" + } + }, + { + "pk": 67, + "model": "workflows.abstractwidget", + "fields": { + "category": 13, + "treeview_image": "treeview/UCI-2-ODT.png", + "name": "UCI Dataset to Orange Data Table", + "is_streaming": false, + "uid": "4e8683ae-c571-413c-a592-2093de96e8d1", + "interaction_view": "", + "image": "images/UCI-2-ODT.png", + "package": "cforange", + "static_image": "", + "post_interact_action": "", + "user": null, + "visualization_view": "", + "action": "uci_to_odt", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 1, + "description": "" + } + }, + { + "pk": 122, + "model": "workflows.abstractinput", + "fields": { + "widget": 67, + "name": "Dataset", + "short_name": "ds", + "uid": "268452b1-6016-404d-bdb8-01f03df4890f", + "default": "iris.tab", + "required": false, + "multi": false, + "parameter_type": "select", + "variable": "filename", + "parameter": true, + "order": 1, + "description": "" + } + }, + { + "pk": 10, + "model": "workflows.abstractoption", + "fields": { + "uid": "0808be1d-5602-4277-9571-96ecb1faf58b", + "abstract_input": 122, + "value": "adult_sample.tab", + "name": "Adult (sample)" + } + }, + { + "pk": 11, + "model": "workflows.abstractoption", + "fields": { + "uid": "46fca181-b956-47c6-8281-eb60b69052d8", + "abstract_input": 122, + "value": "balance-scale.tab", + "name": "Balance Scale" + } + }, + { + "pk": 12, + "model": "workflows.abstractoption", + "fields": { + "uid": "84129065-b876-4ce7-ac3a-09d5dd19989b", + "abstract_input": 122, + "value": "breast-cancer.tab", + "name": "Breast Cancer" + } + }, + { + "pk": 13, + "model": "workflows.abstractoption", + "fields": { + "uid": "dd097a8f-01c7-4ec6-b00e-22efe6aeeb61", + "abstract_input": 122, + "value": "breast-cancer-wisconsin.tab", + "name": "Breast Cancer Wisconsin" + } + }, + { + "pk": 14, + "model": "workflows.abstractoption", + "fields": { + "uid": "45898880-5a9e-4b8f-9a04-d04c77a0ab28", + "abstract_input": 122, + "value": "car.tab", + "name": "Car Evaluation" + } + }, + { + "pk": 15, + "model": "workflows.abstractoption", + "fields": { + "uid": "2b7b385d-b1c5-4242-86eb-ce77602f2c8b", + "abstract_input": 122, + "value": "voting.tab", + "name": "Congressional Voting Records" + } + }, + { + "pk": 16, + "model": "workflows.abstractoption", + "fields": { + "uid": "e2e36bac-e538-4783-9821-81d57a5eba58", + "abstract_input": 122, + "value": "crx.tab", + "name": "Credit Approval" + } + }, + { + "pk": 17, + "model": "workflows.abstractoption", + "fields": { + "uid": "33892420-6b22-4262-93d8-45f8c0fb05f8", + "abstract_input": 122, + "value": "glass.tab", + "name": "Glass Identification" + } + }, + { + "pk": 18, + "model": "workflows.abstractoption", + "fields": { + "uid": "346d63f4-53ec-40d1-8080-e97e1f8b7663", + "abstract_input": 122, + "value": "heart_disease.tab", + "name": "Heart Disease" + } + }, + { + "pk": 19, + "model": "workflows.abstractoption", + "fields": { + "uid": "6edc1a09-369f-4465-8b0e-0c6f192f0d82", + "abstract_input": 122, + "value": "horse-colic.tab", + "name": "Horse Colic" + } + }, + { + "pk": 40, + "model": "workflows.abstractoption", + "fields": { + "uid": "1e765b4f-0c6f-43d6-a4c8-630faa528030", + "abstract_input": 122, + "value": "housing.tab", + "name": "Housing" + } + }, + { + "pk": 20, + "model": "workflows.abstractoption", + "fields": { + "uid": "e14dbdbb-5849-4a56-b8be-8fc2d75d280d", + "abstract_input": 122, + "value": "ionosphere.tab", + "name": "Ionosphere" + } + }, + { + "pk": 21, + "model": "workflows.abstractoption", + "fields": { + "uid": "bfd3ffc2-f47c-487f-bfa1-4331a1c5aa3c", + "abstract_input": 122, + "value": "iris.tab", + "name": "Iris" + } + }, + { + "pk": 22, + "model": "workflows.abstractoption", + "fields": { + "uid": "695fadad-4433-4d51-88d9-c6ca9ccf0730", + "abstract_input": 122, + "value": "kr-vs-kp.tab", + "name": "King Rook vs. King Pawn" + } + }, + { + "pk": 23, + "model": "workflows.abstractoption", + "fields": { + "uid": "27f607c8-651a-4637-a4ab-4f7f12a0a37f", + "abstract_input": 122, + "value": "lenses.tab", + "name": "Lenses" + } + }, + { + "pk": 24, + "model": "workflows.abstractoption", + "fields": { + "uid": "66a974be-f910-4710-a18a-87a98136ae7d", + "abstract_input": 122, + "value": "bupa.tab", + "name": "Liver Disorder (BUPA)" + } + }, + { + "pk": 25, + "model": "workflows.abstractoption", + "fields": { + "uid": "338f8871-e237-4c35-a34c-03228248deac", + "abstract_input": 122, + "value": "lung-cancer.tab", + "name": "Lung Cancer" + } + }, + { + "pk": 26, + "model": "workflows.abstractoption", + "fields": { + "uid": "b0a9f5aa-1b16-4dba-981d-eb9145883700", + "abstract_input": 122, + "value": "promoters.tab", + "name": "Molecular Biology (Promoter Gene Sequences)" + } + }, + { + "pk": 27, + "model": "workflows.abstractoption", + "fields": { + "uid": "a316eaf4-053b-4b02-9c96-c5f80f06d02f", + "abstract_input": 122, + "value": "monks-1_test.tab", + "name": "Monks 1 Test" + } + }, + { + "pk": 28, + "model": "workflows.abstractoption", + "fields": { + "uid": "9ab37819-cdd4-4dba-8a38-21bf7d3076f0", + "abstract_input": 122, + "value": "monks-2_test.tab", + "name": "Monks 2 Test" + } + }, + { + "pk": 29, + "model": "workflows.abstractoption", + "fields": { + "uid": "0d832f9c-6fa6-4af9-bbb5-b6bfdabbbe5e", + "abstract_input": 122, + "value": "monks-3_test.tab", + "name": "Monks 3 Test" + } + }, + { + "pk": 30, + "model": "workflows.abstractoption", + "fields": { + "uid": "f498f469-904c-4e36-88d1-9748495e6cf3", + "abstract_input": 122, + "value": "mushroom.tab", + "name": "Mushroom" + } + }, + { + "pk": 31, + "model": "workflows.abstractoption", + "fields": { + "uid": "136a2c2e-a3b6-4abb-88f1-641ca837804b", + "abstract_input": 122, + "value": "post-operative.tab", + "name": "Post-Operative Patient" + } + }, + { + "pk": 32, + "model": "workflows.abstractoption", + "fields": { + "uid": "8ecb78b9-17d4-48d7-8bdf-09c09445a569", + "abstract_input": 122, + "value": "tic_tac_toe.tab", + "name": "Tic Tac Toe" + } + }, + { + "pk": 33, + "model": "workflows.abstractoption", + "fields": { + "uid": "d7033002-c7de-4295-84ff-c9a9b2e32cb6", + "abstract_input": 122, + "value": "wine.tab", + "name": "Wine" + } + }, + { + "pk": 34, + "model": "workflows.abstractoption", + "fields": { + "uid": "1b451375-329b-46f2-b471-9e4d7d0bc6ca", + "abstract_input": 122, + "value": "yeast.tab", + "name": "Yeast" + } + }, + { + "pk": 35, + "model": "workflows.abstractoption", + "fields": { + "uid": "d1464e56-53fc-47fd-b1b4-43d635159c45", + "abstract_input": 122, + "value": "zoo.tab", + "name": "Zoo" + } + }, + { + "pk": 68, + "model": "workflows.abstractoutput", + "fields": { + "widget": 67, + "name": "Orange Data Table", + "short_name": "odt", + "variable": "data", + "uid": "2150098d-bcd0-480a-afac-78a833e76eb5", + "order": 1, + "description": "" + } + }, + { + "pk": 14, + "model": "workflows.category", + "fields": { + "uid": "81efce93-b6c5-4270-bde9-ec946a394b10", + "parent": 10, + "workflow": null, + "user": null, + "order": 4, + "name": "Feature Subset Selection" + } + }, + { + "pk": 72, + "model": "workflows.abstractwidget", + "fields": { + "category": 14, + "treeview_image": "", + "name": "Filter ReliefF", + "is_streaming": false, + "uid": "7547eb6d-e83d-4033-b0cb-1e8319c2508f", + "interaction_view": "", + "image": "", + "package": "cforange", + "static_image": "", + "post_interact_action": "", + "user": null, + "visualization_view": "", + "action": "cforange_filter_relieff", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 1, + "description": "Takes the data set data and a measure for score of attributes measure. Repeats the process of estimating attributes and removing the worst attribute if its measure is lower than margin. Stops when no attribute score is below this margin. The default for measure is relief(k=20, m=50), and margin defaults to 0.0" + } + }, + { + "pk": 134, + "model": "workflows.abstractinput", + "fields": { + "widget": 72, + "name": "Dataset", + "short_name": "odt", + "uid": "2dc5a9e7-06c8-494f-9f4b-80b3c5eb31ff", + "default": "", + "required": false, + "multi": false, + "parameter_type": null, + "variable": "dataset", + "parameter": false, + "order": 1, + "description": "" + } + }, + { + "pk": 135, + "model": "workflows.abstractinput", + "fields": { + "widget": 72, + "name": "k", + "short_name": "k", + "uid": "3ef628d4-03af-4d46-b90b-6c4475054d31", + "default": "20", + "required": false, + "multi": false, + "parameter_type": "text", + "variable": "k", + "parameter": true, + "order": 1, + "description": "" + } + }, + { + "pk": 136, + "model": "workflows.abstractinput", + "fields": { + "widget": 72, + "name": "m", + "short_name": "m", + "uid": "9b3082b4-5903-41cb-a4c0-63d471f80f57", + "default": "50", + "required": false, + "multi": false, + "parameter_type": "text", + "variable": "m", + "parameter": true, + "order": 1, + "description": "" + } + }, + { + "pk": 137, + "model": "workflows.abstractinput", + "fields": { + "widget": 72, + "name": "margin", + "short_name": "mrg", + "uid": "1cce36a4-167d-4a23-83ee-eae626a3b492", + "default": "0.0", + "required": false, + "multi": false, + "parameter_type": "text", + "variable": "margin", + "parameter": true, + "order": 1, + "description": "" + } + }, + { + "pk": 74, + "model": "workflows.abstractoutput", + "fields": { + "widget": 72, + "name": "New Dataset", + "short_name": "odt", + "variable": "new_dataset", + "uid": "60efae56-9748-471d-b85d-9c9a3bc84f52", + "order": 1, + "description": "" + } + }, + { + "pk": 69, + "model": "workflows.abstractwidget", + "fields": { + "category": 14, + "treeview_image": "", + "name": "Score estimation (Relief)", + "is_streaming": false, + "uid": "d7f0e9b0-aa2a-4cdb-b4a8-cfb3b0bafa43", + "interaction_view": "", + "image": "", + "package": "cforange", + "static_image": "", + "post_interact_action": "", + "user": null, + "visualization_view": "", + "action": "cforange_score_estimation", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 1, + "description": "" + } + }, + { + "pk": 125, + "model": "workflows.abstractinput", + "fields": { + "widget": 69, + "name": "Dataset", + "short_name": "odt", + "uid": "964fbb76-a749-4775-a665-ff0242b7dc2c", + "default": "", + "required": false, + "multi": false, + "parameter_type": null, + "variable": "dataset", + "parameter": false, + "order": 1, + "description": "" + } + }, + { + "pk": 126, + "model": "workflows.abstractinput", + "fields": { + "widget": 69, + "name": "k", + "short_name": "k", + "uid": "d6f4067c-39c3-4ea5-80f8-77759db7b2dc", + "default": "20", + "required": false, + "multi": false, + "parameter_type": "text", + "variable": "k", + "parameter": true, + "order": 2, + "description": "" + } + }, + { + "pk": 127, + "model": "workflows.abstractinput", + "fields": { + "widget": 69, + "name": "m", + "short_name": "m", + "uid": "cc9cff1e-75d7-474c-8408-dc39aec692af", + "default": "50", + "required": false, + "multi": false, + "parameter_type": "text", + "variable": "m", + "parameter": true, + "order": 3, + "description": "" + } + }, + { + "pk": 71, + "model": "workflows.abstractoutput", + "fields": { + "widget": 69, + "name": "Scores", + "short_name": "sco", + "variable": "results", + "uid": "45a4eb83-dc9d-4800-b9eb-ad145e3abf6c", + "order": 1, + "description": "" + } + }, + { + "pk": 71, + "model": "workflows.abstractwidget", + "fields": { + "category": 14, + "treeview_image": "", + "name": "Select Attributes Above Threshold", + "is_streaming": false, + "uid": "660c49cb-21ca-4e06-97ca-893946340b7f", + "interaction_view": "", + "image": "", + "package": "cforange", + "static_image": "", + "post_interact_action": "", + "user": null, + "visualization_view": "", + "action": "cforange_atts_above_thresh", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 1, + "description": "Constructs and returns a new data set that includes a class and attributes from the list returned by function attMeasure that have the score above or equal to a specified threshold. data is used to pass an original data set. Parameter threshold is optional and defaults to 0.0." + } + }, + { + "pk": 131, + "model": "workflows.abstractinput", + "fields": { + "widget": 71, + "name": "Dataset", + "short_name": "odt", + "uid": "ca1fcc61-3c66-4908-a381-0974fecda1a1", + "default": "", + "required": false, + "multi": false, + "parameter_type": null, + "variable": "dataset", + "parameter": false, + "order": 1, + "description": "" + } + }, + { + "pk": 132, + "model": "workflows.abstractinput", + "fields": { + "widget": 71, + "name": "Scores", + "short_name": "sco", + "uid": "2e60fc2e-3470-46d3-a64f-ab1e7c75487b", + "default": "", + "required": false, + "multi": false, + "parameter_type": null, + "variable": "scores", + "parameter": false, + "order": 2, + "description": "" + } + }, + { + "pk": 133, + "model": "workflows.abstractinput", + "fields": { + "widget": 71, + "name": "Threshold", + "short_name": "thr", + "uid": "2ee5a729-2e13-49c0-805b-120833d80909", + "default": "0.0", + "required": false, + "multi": false, + "parameter_type": "text", + "variable": "thresh", + "parameter": true, + "order": 3, + "description": "" + } + }, + { + "pk": 73, + "model": "workflows.abstractoutput", + "fields": { + "widget": 71, + "name": "New Dataset", + "short_name": "odt", + "variable": "new_dataset", + "uid": "5fe21797-fc14-4bb3-b7fb-de879279a533", + "order": 1, + "description": "" + } + }, + { + "pk": 70, + "model": "workflows.abstractwidget", + "fields": { + "category": 14, + "treeview_image": "", + "name": "Select best N attributes", + "is_streaming": false, + "uid": "68ce4c2e-baf7-4a2e-843b-3b5622c20448", + "interaction_view": "", + "image": "", + "package": "cforange", + "static_image": "", + "post_interact_action": "", + "user": null, + "visualization_view": "", + "action": "cforange_best_natts", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 1, + "description": "Constructs and returns a new data set that includes a class and only N best attributes from a list scores. data is used to pass an original data set." + } + }, + { + "pk": 128, + "model": "workflows.abstractinput", + "fields": { + "widget": 70, + "name": "Dataset", + "short_name": "odt", + "uid": "b41c9bbf-7405-451d-8dda-794ce850d452", + "default": "", + "required": false, + "multi": false, + "parameter_type": null, + "variable": "dataset", + "parameter": false, + "order": 1, + "description": "" + } + }, + { + "pk": 129, + "model": "workflows.abstractinput", + "fields": { + "widget": 70, + "name": "Scores", + "short_name": "sco", + "uid": "a19e140e-4aab-4491-9536-992152a3d56f", + "default": "", + "required": false, + "multi": false, + "parameter_type": null, + "variable": "scores", + "parameter": false, + "order": 2, + "description": "" + } + }, + { + "pk": 130, + "model": "workflows.abstractinput", + "fields": { + "widget": 70, + "name": "N", + "short_name": "n", + "uid": "af3d33a6-674f-42c6-a092-abfd65196617", + "default": "3", + "required": false, + "multi": false, + "parameter_type": "text", + "variable": "n", + "parameter": true, + "order": 3, + "description": "" + } + }, + { + "pk": 72, + "model": "workflows.abstractoutput", + "fields": { + "widget": 70, + "name": "New Dataset", + "short_name": "odt", + "variable": "new_dataset", + "uid": "27db7439-6424-4565-b8ce-711adfe7fcf2", + "order": 1, + "description": "" + } + } +] \ No newline at end of file diff --git a/workflows/cforange/interaction_views.py b/workflows/cforange/interaction_views.py new file mode 100644 index 0000000000000000000000000000000000000000..80cf866171802d64a5bdab2d173acb9501094960 --- /dev/null +++ b/workflows/cforange/interaction_views.py @@ -0,0 +1,4 @@ +from django.shortcuts import render + +def cforange_filter_integers(request,input_dict,output_dict,widget): + return render(request, 'interactions/cforange_filter_integers.html',{'widget':widget,'intList':input_dict['intList']}) \ No newline at end of file diff --git a/workflows/cforange/library.py b/workflows/cforange/library.py new file mode 100644 index 0000000000000000000000000000000000000000..04f0fa09ecb04eab57063e5d8d81ce3932662d0d --- /dev/null +++ b/workflows/cforange/library.py @@ -0,0 +1,285 @@ +def cforange_split_dataset(input_dict): + import orange + output_dict = {} + data = input_dict['dataset'] + selection = orange.MakeRandomIndices2(data,float(input_dict['p'])) + train_data = data.select(selection,0) + test_data = data.select(selection,1) + output_dict['train_data']=train_data + output_dict['test_data']=test_data + return output_dict + +def cforange_score_estimation(input_dict): + import orange + import orngFSS + data = input_dict['dataset'] + ma = orngFSS.attMeasure(data,orange.MeasureAttribute_relief(k=int(input_dict['k']), m=int(input_dict['m']))) + output_string = "" + output_dict = {} + output_dict['results'] = ma + return output_dict + +def cforange_best_natts(input_dict): + import orange + import orngFSS + data = input_dict['dataset'] + scores = input_dict['scores'] + n = int(input_dict['n']) + new_dataset = orngFSS.selectBestNAtts(data,scores,n) + output_dict={} + output_dict['new_dataset'] = new_dataset + return output_dict + +def cforange_atts_above_thresh(input_dict): + import orange + import orngFSS + data = input_dict['dataset'] + scores = input_dict['scores'] + thresh = float(input_dict['thresh']) + new_dataset = orngFSS.selectAttsAboveThresh(data,scores,thresh) + output_dict={} + output_dict['new_dataset'] = new_dataset + return output_dict + +def cforange_filter_relieff(input_dict): + import orange + import orngFSS + data = input_dict['dataset'] + measure = orange.MeasureAttribute_relief(k=int(input_dict['k']), m=int(input_dict['m'])) + margin = float(input_dict['margin']) + new_dataset = orngFSS.filterRelieff(data,measure,margin) + output_dict = {} + output_dict['new_dataset'] = new_dataset + return output_dict + +def cforange_multiple_cross_validation(input_dict): + import orange, orngTest, orngStat + learners = input_dict['learners'] + data = input_dict['dataset'] + folds = int(input_dict['folds']) + results = orngTest.crossValidation(learners, data, folds=folds) + output_dict = {} + output_dict['results']=results + return output_dict + +def cforange_proportion_test(input_dict): + import orange, orngTest, orngStat + learners = input_dict['learners'] + data = input_dict['dataset'] + learnProp = float(input_dict['learnProp']) + times = int(input_dict['times']) + results = orngTest.proportionTest(learners, data, learnProp, times=times) + output_dict = {} + output_dict['results']=results + return output_dict + +def cforange_leave_one_out(input_dict): + import orange, orngTest, orngStat + learners = input_dict['learners'] + data = input_dict['dataset'] + results = orngTest.leaveOneOut(learners, data) + output_dict = {} + output_dict['results']=results + return output_dict + +def cforange_cross_validation(input_dict): + import orange, orngTest, orngStat + learners = [input_dict['learner']] + data = input_dict['dataset'] + folds = int(input_dict['folds']) + results = orngTest.crossValidation(learners, data, folds=folds) + output_dict = {} + output_dict['results']=results + return output_dict + +def cforange_classification_accuracy(input_dict): + import orngStat + results = input_dict['results'] + CAs = orngStat.CA(results) + if len(CAs)==1: + CAs = CAs[0] + output_dict = {} + output_dict['ca']=CAs + return output_dict + +def cforange_classification_accuracy(input_dict): + import orngStat + results = input_dict['results'] + if input_dict['reportSE']=='true': + reportSE = True + else: + reportSE = False + CAs = orngStat.CA(results,reportSE=reportSE) + if len(CAs)==1: + CAs = CAs[0] + output_dict = {} + output_dict['ca']=CAs + return output_dict + +def cforange_average_probability(input_dict): + import orngStat + results = input_dict['results'] + if input_dict['reportSE']=='true': + reportSE = True + else: + reportSE = False + APs = orngStat.AP(results,reportSE=reportSE) + if len(APs)==1: + APs = APs[0] + output_dict = {} + output_dict['ap']=APs + return output_dict + +def cforange_brier_score(input_dict): + import orngStat + results = input_dict['results'] + if input_dict['reportSE']=='true': + reportSE = True + else: + reportSE = False + BSs = orngStat.BrierScore(results,reportSE=reportSE) + if len(BSs)==1: + BSs = BSs[0] + output_dict = {} + output_dict['bs']=BSs + return output_dict + +def cforange_information_score(input_dict): + import orngStat + results = input_dict['results'] + if input_dict['reportSE']=='true': + reportSE = True + else: + reportSE = False + ISs = orngStat.IS(results,apriori=None,reportSE=reportSE) + if len(ISs)==1: + ISs = ISs[0] + output_dict = {} + output_dict['is']=ISs + return output_dict + +def cforange_confusion_matrix(input_dict): + import orngStat + results = input_dict['results'] + classIndex = int(input_dict['classIndex']) + if input_dict['cutoff']!='': + cutoff = float(input_dict['cutoff']) + cm = orngStat.confusionMatrices(results,classIndex=classIndex,cutoff=cutoff) + else: + cm = orngStat.confusionMatrices(results,classIndex=classIndex) + if len(cm)==1: + cm = cm[0] + print cm + output_dict = {} + output_dict['cm']=cm + return output_dict + +def cforange_confusion_matrix_computations(input_dict): + import orngStat + cm = input_dict['cm'] + alpha = float(input_dict['alpha']) + output_dict = {} + output_dict['sens']=orngStat.sens(cm) + output_dict['spec']=orngStat.spec(cm) + output_dict['PPV']=orngStat.PPV(cm) + output_dict['NPV']=orngStat.NPV(cm) + output_dict['precision']=orngStat.precision(cm) + output_dict['recall']=orngStat.recall(cm) + output_dict['F1']=orngStat.F1(cm) + output_dict['Falpha']=orngStat.Falpha(cm,alpha=alpha) + output_dict['MCC']=orngStat.MCC(cm) + return output_dict + +def cforange_auc(input_dict): + import orngStat + results = input_dict['results'] + method = int(input_dict['method']) + auc = orngStat.AUC(results,method) + output_dict = {} + output_dict['AUC']=auc + return output_dict + +def cforange_MSE(input_dict): + import orngStat + results = input_dict['results'] + errors = orngStat.MSE(results) + if len(errors)==1: + errors = errors[0] + output_dict = {} + output_dict['MSE']=errors + return output_dict + +def cforange_RMSE(input_dict): + import orngStat + results = input_dict['results'] + errors = orngStat.RMSE(results) + if len(errors)==1: + errors = errors[0] + output_dict = {} + output_dict['RMSE']=errors + return output_dict + +def cforange_MAE(input_dict): + import orngStat + results = input_dict['results'] + errors = orngStat.MAE(results) + if len(errors)==1: + errors = errors[0] + output_dict = {} + output_dict['MAE']=errors + return output_dict + +def cforange_RSE(input_dict): + import orngStat + results = input_dict['results'] + errors = orngStat.RSE(results) + if len(errors)==1: + errors = errors[0] + output_dict = {} + output_dict['RSE']=errors + return output_dict + +def cforange_RRSE(input_dict): + import orngStat + results = input_dict['results'] + errors = orngStat.RRSE(results) + if len(errors)==1: + errors = errors[0] + output_dict = {} + output_dict['RRSE']=errors + return output_dict + +def cforange_RAE(input_dict): + import orngStat + results = input_dict['results'] + errors = orngStat.RAE(results) + if len(errors)==1: + errors = errors[0] + output_dict = {} + output_dict['RAE']=errors + return output_dict + +def cforange_R2(input_dict): + import orngStat + results = input_dict['results'] + errors = orngStat.R2(results) + if len(errors)==1: + errors = errors[0] + output_dict = {} + output_dict['R2']=errors + return output_dict + +def cforange_prepare_results(input_dict): + output_dict = {} + learners = input_dict['learners'] + newlist = [] + for i in range(0,len(learners)): + newdict = {} + newdict['name']=str(learners[i]) + newdict['fbeta']=0.5 + newdict['precision']=input_dict['precision'][i] + newdict['recall']=input_dict['recall'][i] + newdict['fscore']=input_dict['f'][i] + newlist.append(newdict) + output_dict['alp']=newlist + return output_dict \ No newline at end of file diff --git a/workflows/cforange/settings.py b/workflows/cforange/settings.py new file mode 100644 index 0000000000000000000000000000000000000000..9745180faf827602884c7849987d83209070f121 --- /dev/null +++ b/workflows/cforange/settings.py @@ -0,0 +1,13 @@ +import os + +# === STANDARD PACKAGE SETTINGS === +PACKAGE_ROOT = os.path.dirname(__file__) + +# === AUTO IMPORT OPTIONS === +#If auto_import_package_data is true then given data file is automatically imported when ClowdFlows project is newly deployed or refreshed from git +AUTO_IMPORT_DB = True +#For auto_import_package_data_replace_option description see the 'replace' option in workflows/import_package command +AUTO_IMPORT_DB_REPLACE_OPTION = True +#If file(s) other than ./db/package_data.json should be imported, auto_import_package_data_files should be corrected +AUTO_IMPORT_DB_FILES = [os.path.join(PACKAGE_ROOT,'db/package_data.json')] + diff --git a/workflows/cforange/static/cforange/icons/treeview/construction_work .png b/workflows/cforange/static/cforange/icons/treeview/construction_work .png new file mode 100644 index 0000000000000000000000000000000000000000..69bd351914a95f81eb1941f6e7908474916f6116 Binary files /dev/null and b/workflows/cforange/static/cforange/icons/treeview/construction_work .png differ diff --git a/workflows/cforange/static/cforange/icons/widget/construction_work .png b/workflows/cforange/static/cforange/icons/widget/construction_work .png new file mode 100644 index 0000000000000000000000000000000000000000..bc16d376995c1545972b60487ee8cd653177b407 Binary files /dev/null and b/workflows/cforange/static/cforange/icons/widget/construction_work .png differ diff --git a/workflows/cforange/templates/interactions/cforange_filter_integers.html b/workflows/cforange/templates/interactions/cforange_filter_integers.html new file mode 100644 index 0000000000000000000000000000000000000000..33cb05ab4661eb9823647f3fa19b6fa3ce710dcb --- /dev/null +++ b/workflows/cforange/templates/interactions/cforange_filter_integers.html @@ -0,0 +1,8 @@ +
+
+{% for i in intList %} +{{i}}
+{% endfor %} + +
+
\ No newline at end of file diff --git a/workflows/cforange/templates/visualizations/cforange_display_integers.html b/workflows/cforange/templates/visualizations/cforange_display_integers.html new file mode 100644 index 0000000000000000000000000000000000000000..e05b226ccb2a3d578b4de3c436957deb0694a9b1 --- /dev/null +++ b/workflows/cforange/templates/visualizations/cforange_display_integers.html @@ -0,0 +1,28 @@ +
+
+ + + {% for i in input_dict.intList %} + + + + + {% endfor %} + + + + +
+ {% if forloop.first %} {% else %}+{% endif %} + + {{ i }} +
+ = + + {{ input_dict.sum }} +
+
+{{ check }} + +
+
\ No newline at end of file diff --git a/workflows/cforange/urls.py b/workflows/cforange/urls.py new file mode 100644 index 0000000000000000000000000000000000000000..7c56d8e8861188bdd8b72151f491c761a654e179 --- /dev/null +++ b/workflows/cforange/urls.py @@ -0,0 +1,8 @@ +from django.conf.urls.defaults import patterns, include, url + +urlpatterns = patterns('', + #url(r'^get-adc-index/widget(?P[0-9]+)/nx/Index.html$', 'workflows.latino.views.get_adc_index', name='get adc index'), + #url(r'^get-adc-index/widget(?P[0-9]+)/(?Pn?)x/Index.html$', 'workflows.latino.views.get_adc_index', name='get adc index'), + #url(r'^get-adc-index/widget(?P[0-9]+)/(?Pn?)x/Index(?P[0-9]+)-(?P[0-9]+).html$', 'workflows.latino.views.get_adc_index', name='get adc index'), + #url(r'^get-adc-index/widget(?P[0-9]+)/(?Pn?)x/Document(?P[0-9]+).html', 'workflows.latino.views.get_adc_page', name='get adc page'), +) \ No newline at end of file diff --git a/workflows/cforange/views.py b/workflows/cforange/views.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/workflows/cforange/visualization_views.py b/workflows/cforange/visualization_views.py new file mode 100644 index 0000000000000000000000000000000000000000..f99b6f363ad341a3bac37080d0c211b9a67fbd47 --- /dev/null +++ b/workflows/cforange/visualization_views.py @@ -0,0 +1,8 @@ +from django.shortcuts import render + +def cforange_display_summation(request,input_dict,output_dict,widget): + if sum(input_dict['intList']) == input_dict['sum']: + check = 'The calculation appears correct.' + else: + check = 'The calculation appears incorrect!' + return render(request, 'visualizations/cforange_display_integers.html',{'widget':widget,'input_dict':input_dict, 'output_dict':output_dict, 'check':check}) diff --git a/workflows/decision_support/db/package_data.json b/workflows/decision_support/db/package_data.json new file mode 100644 index 0000000000000000000000000000000000000000..f325b66f2686c99e65651977dc3f90f4617958ee --- /dev/null +++ b/workflows/decision_support/db/package_data.json @@ -0,0 +1,169 @@ +[ + { + "pk": 51, + "model": "workflows.category", + "fields": { + "uid": "", + "parent": null, + "workflow": null, + "user": null, + "order": 1, + "name": "Decision Support" + } + }, + { + "pk": 291, + "model": "workflows.abstractwidget", + "fields": { + "category": 51, + "treeview_image": "", + "name": "Decision support visualization", + "is_streaming": false, + "uid": "", + "interaction_view": "", + "image": "", + "package": "decision_support", + "static_image": "", + "post_interact_action": "", + "user": null, + "visualization_view": "ds_charts_viewer", + "action": "ds_charts", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 1, + "description": "" + } + }, + { + "pk": 979, + "model": "workflows.abstractinput", + "fields": { + "widget": 291, + "name": "DS Model", + "short_name": "mdl", + "uid": "", + "default": "", + "required": true, + "multi": false, + "parameter_type": null, + "variable": "model", + "parameter": false, + "order": 1, + "description": "Decision Support model" + } + }, + { + "pk": 228, + "model": "workflows.abstractwidget", + "fields": { + "category": 51, + "treeview_image": "", + "name": "Kepner-Tregoe", + "is_streaming": false, + "uid": "", + "interaction_view": "kepner_tregoe", + "image": "", + "package": "decision_support", + "static_image": "", + "post_interact_action": "kepner_tregoe_finished", + "user": null, + "visualization_view": "", + "action": "kepner_tregoe", + "wsdl_method": "", + "wsdl": "", + "interactive": true, + "has_progress_bar": false, + "order": 1, + "description": "" + } + }, + { + "pk": 660, + "model": "workflows.abstractinput", + "fields": { + "widget": 228, + "name": "Orange data table", + "short_name": "odt", + "uid": "", + "default": "", + "required": false, + "multi": false, + "parameter_type": null, + "variable": "data", + "parameter": false, + "order": 1, + "description": "Orange data table" + } + }, + { + "pk": 239, + "model": "workflows.abstractoutput", + "fields": { + "widget": 228, + "name": "Orange data table", + "short_name": "odt", + "variable": "data", + "uid": "", + "order": 1, + "description": "Orange data table" + } + }, + { + "pk": 346, + "model": "workflows.abstractoutput", + "fields": { + "widget": 228, + "name": "K-T model", + "short_name": "mdl", + "variable": "model", + "uid": "", + "order": 2, + "description": "K-T model" + } + }, + { + "pk": 290, + "model": "workflows.abstractwidget", + "fields": { + "category": 51, + "treeview_image": "", + "name": "Sensitivity analysis", + "is_streaming": false, + "uid": "", + "interaction_view": "", + "image": "", + "package": "decision_support", + "static_image": "", + "post_interact_action": "", + "user": null, + "visualization_view": "sensitivity_analysis_viewer", + "action": "sensitivity_analysis", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 1, + "description": "" + } + }, + { + "pk": 978, + "model": "workflows.abstractinput", + "fields": { + "widget": 290, + "name": "DS model", + "short_name": "mdl", + "uid": "", + "default": "", + "required": true, + "multi": false, + "parameter_type": null, + "variable": "model", + "parameter": false, + "order": 1, + "description": "model" + } + } +] \ No newline at end of file diff --git a/workflows/latino/bin/LatinoClowdFlows.dll b/workflows/latino/bin/LatinoClowdFlows.dll index 9d5c2f32c05c408fda887660ad5a3a4a70d7a947..1382af07c84fb9f2c6a6839199ea70cf11daf4d5 100644 Binary files a/workflows/latino/bin/LatinoClowdFlows.dll and b/workflows/latino/bin/LatinoClowdFlows.dll differ diff --git a/workflows/latino/bin/LatinoClowdFlows.pdb b/workflows/latino/bin/LatinoClowdFlows.pdb index 74778b02cb8bf87ab36fa3588a39e67ca8048c54..7ca35aca74d593d61e36348073894a27e3024901 100644 Binary files a/workflows/latino/bin/LatinoClowdFlows.pdb and b/workflows/latino/bin/LatinoClowdFlows.pdb differ diff --git a/workflows/latino/bin/SmoothInterfaces.exe b/workflows/latino/bin/SmoothInterfaces.exe index ff625dff0a6c271139c22d3ef93108d7a9476a8a..4079e5ff4a1f6be4f826b9664b688489ee45bd91 100644 Binary files a/workflows/latino/bin/SmoothInterfaces.exe and b/workflows/latino/bin/SmoothInterfaces.exe differ diff --git a/workflows/latino/bin/SmoothInterfaces.pdb b/workflows/latino/bin/SmoothInterfaces.pdb index 04dcf25728322d09394217d08ed80d8dacfaadb9..feb8cc8881b49da16bcc73975fb86f44cbb814fd 100644 Binary files a/workflows/latino/bin/SmoothInterfaces.pdb and b/workflows/latino/bin/SmoothInterfaces.pdb differ diff --git a/workflows/latino/db/package_data.json b/workflows/latino/db/package_data.json index 47b321898fab518e5ccb4390ed47ded2f1205234..3e0a212a0e4873152750d6073f67580b4e55f258 100644 --- a/workflows/latino/db/package_data.json +++ b/workflows/latino/db/package_data.json @@ -23,162 +23,6 @@ "name": "Sentiment Analysis" } }, - { - "pk": 3, - "model": "workflows.category", - "fields": { - "uid": "1eed1020-c423-4831-80ce-50f65ae276e1", - "parent": 1, - "workflow": null, - "user": null, - "order": 1, - "name": "Data In/Out" - } - }, - { - "pk": 4, - "model": "workflows.category", - "fields": { - "uid": "48fb3ce2-11ef-4afc-ab1e-aec6f1d6d055", - "parent": 1, - "workflow": null, - "user": null, - "order": 2, - "name": "Document Corpus" - } - }, - { - "pk": 5, - "model": "workflows.category", - "fields": { - "uid": "5a4286c8-d182-45a5-896b-cc3c6ed84f22", - "parent": 1, - "workflow": null, - "user": null, - "order": 3, - "name": "Tokenization" - } - }, - { - "pk": 6, - "model": "workflows.category", - "fields": { - "uid": "64780432-b114-4589-a24c-2331aed23502", - "parent": 5, - "workflow": null, - "user": null, - "order": 1, - "name": "Advanced" - } - }, - { - "pk": 7, - "model": "workflows.category", - "fields": { - "uid": "05c26564-86b5-4a73-aae9-f4dc14bc75d7", - "parent": 1, - "workflow": null, - "user": null, - "order": 4, - "name": "Tagging" - } - }, - { - "pk": 8, - "model": "workflows.category", - "fields": { - "uid": "94237b9c-8e88-460c-abc5-9c8108acb821", - "parent": 7, - "workflow": null, - "user": null, - "order": 1, - "name": "Advanced" - } - }, - { - "pk": 9, - "model": "workflows.category", - "fields": { - "uid": "b5964b0d-5acb-4dbf-a49d-4331a5a44f9c", - "parent": 1, - "workflow": null, - "user": null, - "order": 5, - "name": "Bag of Words" - } - }, - { - "pk": 10, - "model": "workflows.category", - "fields": { - "uid": "0378e3a8-b71e-47b5-96b0-84dca2680f4d", - "parent": 9, - "workflow": null, - "user": null, - "order": 1, - "name": "Advanced" - } - }, - { - "pk": 11, - "model": "workflows.category", - "fields": { - "uid": "a8348101-116c-4e04-b2a7-af1c991c3927", - "parent": 1, - "workflow": null, - "user": null, - "order": 6, - "name": "Dataset" - } - }, - { - "pk": 12, - "model": "workflows.category", - "fields": { - "uid": "c7a26cb0-65af-4c6e-887b-7c6a7a162327", - "parent": 1, - "workflow": null, - "user": null, - "order": 7, - "name": "Similarity Matrix" - } - }, - { - "pk": 13, - "model": "workflows.category", - "fields": { - "uid": "74457dca-9fba-4d66-98dd-633df385a22b", - "parent": 1, - "workflow": null, - "user": null, - "order": 8, - "name": "Clustering" - } - }, - { - "pk": 14, - "model": "workflows.category", - "fields": { - "uid": "c8fd491e-75ad-487d-b42f-b5ca07ec045a", - "parent": 1, - "workflow": null, - "user": null, - "order": 9, - "name": "Classification" - } - }, - { - "pk": 15, - "model": "workflows.category", - "fields": { - "uid": "438cb7e7-d0e5-4bb9-9cad-10f6907ec568", - "parent": 1, - "workflow": null, - "user": null, - "order": 10, - "name": "Helpers" - } - }, { "pk": 1, "model": "workflows.abstractwidget", @@ -259,7 +103,7 @@ "fields": { "category": 2, "treeview_image": null, - "name": "Kr1 NOVA Funkcija", + "name": "Krena NOVA Funkcija", "is_streaming": false, "uid": "temporary_uid:68e5b5ed-bcdb-e27e-61bb-efc9ba337d07", "interaction_view": "", @@ -385,524 +229,449 @@ }, { "pk": 3, + "model": "workflows.category", + "fields": { + "uid": "1eed1020-c423-4831-80ce-50f65ae276e1", + "parent": 1, + "workflow": null, + "user": null, + "order": 1, + "name": "Data In/Out" + } + }, + { + "pk": 8, "model": "workflows.abstractwidget", "fields": { - "category": 15, + "category": 3, "treeview_image": null, - "name": "Flatten String Hierarchy", + "name": "Load Document Corpus From File", "is_streaming": false, - "uid": "1d9f109e-8490-4c98-8957-4b0f698ce1bd", + "uid": "60e1a6ee-1d9f-4ae6-a5a9-fd06d30d4348", "interaction_view": "", "image": null, "package": "latino", - "static_image": "flatten_string_hierarchy_image.png", + "static_image": "load_adc_from_file_image.png", "post_interact_action": "", "user": null, "visualization_view": "", - "action": "latino_flatten_object_to_string_array", + "action": "latino_load_adc", "wsdl_method": "", "wsdl": "", "interactive": false, "has_progress_bar": false, "order": 4, - "description": "Automatically generated widget from function FlattenObjectToStringArray in package latino. The original function signature: FlattenObjectToStringArray." + "description": "This widges processes raw text file and loads the texts into ADC (Annotated Document Corpus) structure. The input file contains one document per line - the whole line represents text from the body of a document. In case lines contain more document properties (i.e.: ids, titles, labels,...) than other widgets should be used to load ADC structure." } }, { - "pk": 7, + "pk": 16, "model": "workflows.abstractinput", "fields": { - "widget": 3, - "name": "data", - "short_name": "obj", - "uid": "26dc986a-e8c4-ee4b-ac32-5ce895f3383a", + "widget": 8, + "name": "Raw Text File", + "short_name": "fil", + "uid": "d66b7ce4-8aa4-d26e-db69-762683ee0e3e", "default": "", - "required": true, + "required": false, "multi": false, "parameter_type": null, - "variable": "data", + "variable": "file", "parameter": false, "order": 1, - "description": "System.Object" + "description": "Input Text File: Contains one document per line - the whole line represents text from the body of a document." } }, { - "pk": 3, + "pk": 17, + "model": "workflows.abstractinput", + "fields": { + "widget": 8, + "name": "First words in line with preceding exclamation (!) present labels", + "short_name": "bol", + "uid": "cb7ddfe9-08ad-6b8a-d1fa-76cd91888d5e", + "default": "false", + "required": true, + "multi": false, + "parameter_type": "checkbox", + "variable": "leadingLabels", + "parameter": true, + "order": 2, + "description": "System.Boolean" + } + }, + { + "pk": 7, "model": "workflows.abstractoutput", "fields": { - "widget": 3, - "name": "flatData", - "short_name": "obj", - "variable": "flatData", - "uid": "a16d6a3b-e656-9b50-10c8-8359a5174193", + "widget": 8, + "name": "Annotated Document Corpus", + "short_name": "adc", + "variable": "adc", + "uid": "ec3c5eae-d5b8-16e9-deeb-ce175eaab9e0", "order": 1, "description": "" } }, { - "pk": 4, + "pk": 9, "model": "workflows.abstractwidget", "fields": { - "category": 15, + "category": 3, "treeview_image": null, - "name": "Display Table", + "name": "Load Document Corpus From String", "is_streaming": false, - "uid": "bb74ec59-dba8-461b-ae66-de35a5c3fdea", + "uid": "c35c429d-9d4b-40c4-b699-b7dee25f97a3", "interaction_view": "", "image": null, "package": "latino", - "static_image": "table_view_image.png", + "static_image": "load_adc_from_file_image.png", "post_interact_action": "", "user": null, - "visualization_view": "show_table", - "action": "show_table", + "visualization_view": "", + "action": "latino_load_adcfrom_string", "wsdl_method": "", "wsdl": "", "interactive": false, "has_progress_bar": false, - "order": 3, - "description": "Automatically generated widget from function ShowTable_PYTHON in package latino. The original function signature: ShowTable_PYTHON." + "order": 5, + "description": "This widges processes raw text file and loads the texts into ADC (Annotated Document Corpus) structure. The input file contains one document per line - the whole line represents text from the body of a document. In case lines contain more document properties (i.e.: ids, titles, labels,...) than other widgets should be used to load ADC structure." } }, { - "pk": 8, + "pk": 18, "model": "workflows.abstractinput", "fields": { - "widget": 4, - "name": "Table", - "short_name": "tbl", - "uid": "5800c1e5-44ec-5843-e9cf-fde725c7b521", + "widget": 9, + "name": "String", + "short_name": "str", + "uid": "91e6fdd5-0969-dc3f-395e-3f2827a9c57c", "default": "", "required": true, "multi": false, - "parameter_type": null, - "variable": "tbl", + "parameter_type": "textarea", + "variable": "plainString", "parameter": false, "order": 1, - "description": "System.Object" - } - }, - { - "pk": 5, - "model": "workflows.abstractwidget", - "fields": { - "category": 15, - "treeview_image": null, - "name": "Generate Integer Range", - "is_streaming": false, - "uid": "f7d78342-9aea-4e01-b46b-8d20631ee5bf", - "interaction_view": "", - "image": null, - "package": "latino", - "static_image": "range_create_integers_image.png", - "post_interact_action": "", - "user": null, - "visualization_view": "", - "action": "create_range", - "wsdl_method": "", - "wsdl": "", - "interactive": false, - "has_progress_bar": false, - "order": 5, - "description": "Automatically generated widget from function GenerateIntegerRange_PYTHON in package latino. The original function signature: GenerateIntegerRange_PYTHON." - } - }, - { - "pk": 9, - "model": "workflows.abstractinput", - "fields": { - "widget": 5, - "name": "Start", - "short_name": "int", - "uid": "4c165f6b-7698-65d2-f8fb-2d2cf104ba47", - "default": "0", - "required": true, - "multi": false, - "parameter_type": "text", - "variable": "start", - "parameter": true, - "order": 1, - "description": "System.Int32" + "description": "Input Text String: Contains one document per line - the whole line represents text from the body of a document." } }, { - "pk": 10, + "pk": 19, "model": "workflows.abstractinput", "fields": { - "widget": 5, - "name": "Stop", - "short_name": "int", - "uid": "c707764e-6ee4-8318-565e-3c4aab14cc45", - "default": "10", + "widget": 9, + "name": "First words in line with preceding exclamation (!) present labels", + "short_name": "bol", + "uid": "18f37a99-5141-f7d2-108e-49ee6505c3da", + "default": "false", "required": true, "multi": false, - "parameter_type": "text", - "variable": "stop", + "parameter_type": "checkbox", + "variable": "leadingLabels", "parameter": true, "order": 2, - "description": "System.Int32" - } - }, - { - "pk": 11, - "model": "workflows.abstractinput", - "fields": { - "widget": 5, - "name": "Step", - "short_name": "int", - "uid": "64dbf120-d027-0a31-a88d-e78bd15fb0c1", - "default": "1", - "required": true, - "multi": false, - "parameter_type": "text", - "variable": "step", - "parameter": true, - "order": 3, - "description": "System.Int32" + "description": "System.Boolean" } }, { - "pk": 4, + "pk": 8, "model": "workflows.abstractoutput", "fields": { - "widget": 5, - "name": "Range", - "short_name": "ary", - "variable": "range", - "uid": "8eb96f55-f038-e517-5e11-8dbda6295b0f", + "widget": 9, + "name": "Annotated Document Corpus", + "short_name": "adc", + "variable": "adc", + "uid": "52165ac9-d42a-be5c-f24d-d6f02ab32e77", "order": 1, "description": "" } }, { - "pk": 6, + "pk": 10, "model": "workflows.abstractwidget", "fields": { - "category": 15, + "category": 3, "treeview_image": null, - "name": "Python Snippet", + "name": "Convert Corpus to XML String", "is_streaming": false, - "uid": "b6164644-ff14-4f8f-bdee-ca55da77b57c", + "uid": "aa63a521-88c5-42c3-a85e-87c0be6288d5", "interaction_view": "", "image": null, "package": "latino", - "static_image": "python_snippet_image.png", + "static_image": "adc_to_xml_image.png", "post_interact_action": "", "user": null, "visualization_view": "", - "action": "python_snippet", + "action": "latino_save_adcto_xml", "wsdl_method": "", "wsdl": "", "interactive": false, "has_progress_bar": false, - "order": 6, - "description": "Runs python snippet. You can use variable which is provided on the input by the name \"in1\" .. \"inN\". Whatever you want to otput needs to be asigned to the variable \"out1\" before the code is terminated" + "order": 1, + "description": "Automatically generated widget from function SaveADCtoXml in package latino. The original function signature: SaveADCtoXml." } }, { - "pk": 12, + "pk": 20, "model": "workflows.abstractinput", "fields": { - "widget": 6, - "name": "in", - "short_name": "in", - "uid": "16ca773f-1d2d-823f-6e9c-1be0dd267369", + "widget": 10, + "name": "Annotated Document Corpus", + "short_name": "adc", + "uid": "bb23f353-d88a-ae54-fd44-1249badd8f28", "default": "", - "required": false, - "multi": true, + "required": true, + "multi": false, "parameter_type": null, - "variable": "in", + "variable": "adc", "parameter": false, "order": 1, - "description": "input can be accesed as variable \"in1\" .. \"inN\" inside the code" - } - }, - { - "pk": 13, - "model": "workflows.abstractinput", - "fields": { - "widget": 6, - "name": "Python Snippet Code", - "short_name": "py", - "uid": "1991f19a-463b-d851-aab6-72facc87a2f1", - "default": "# This is the Python Code Snippet where you can modify the data however is needed.\n# Varaible \"in1\" .. \"inN\" contains whatever you connected to the input port\n# Whatever is assigned to the variable \"out1\" will be transfered to the output port.\n\nout1 = in1", - "required": true, - "multi": false, - "parameter_type": "textarea", - "variable": "pycode", - "parameter": true, - "order": 2, - "description": "Input can be accesed as variable \"in1\" .. \"inN\" inside the code and output can be accesed/assigned as variable \"out1\" inside the code." + "description": "LatinoClowdFlows.DocumentCorpus" } }, { - "pk": 5, + "pk": 9, "model": "workflows.abstractoutput", "fields": { - "widget": 6, - "name": "out", - "short_name": "out", - "variable": "out", - "uid": "455b1583-c2d4-13c5-1036-435dfe160152", + "widget": 10, + "name": "XML String", + "short_name": "xml", + "variable": "string", + "uid": "d8a5c93d-9a82-ff1a-7d5a-a6e287074ca5", "order": 1, - "description": "output can be accesed/assigned as variable \"out1\" inside the code" + "description": "" } }, { - "pk": 7, + "pk": 11, "model": "workflows.abstractwidget", "fields": { - "category": 15, + "category": 3, "treeview_image": null, - "name": "Split Object", + "name": "Convert XML String to Corpus", "is_streaming": false, - "uid": "4ae60fcc-ae3a-4609-9ec1-a7724d5ac0c4", + "uid": "136d957e-57e1-47da-a6f6-05423d5abb9e", "interaction_view": "", "image": null, "package": "latino", - "static_image": "object_split_image.png", + "static_image": "xml_to_adc_image.png", "post_interact_action": "", "user": null, "visualization_view": "", - "action": "split_object", + "action": "latino_load_adcfrom_xml", "wsdl_method": "", "wsdl": "", "interactive": false, "has_progress_bar": false, - "order": 7, - "description": "Automatically generated widget from function SplitObject_PYTHON in package latino. The original function signature: SplitObject_PYTHON." + "order": 2, + "description": "Automatically generated widget from function LoadADCfromXml in package latino. The original function signature: LoadADCfromXml." } }, { - "pk": 14, + "pk": 21, "model": "workflows.abstractinput", "fields": { - "widget": 7, - "name": "object", - "short_name": "obj", - "uid": "298ce522-7061-993e-a940-069d9c9b739d", + "widget": 11, + "name": "XML String", + "short_name": "xml", + "uid": "7a3acca8-7b22-8d31-2dab-8dd606665cc1", "default": "", "required": true, "multi": false, "parameter_type": null, - "variable": "object", + "variable": "xml", "parameter": false, "order": 1, - "description": "System.Object" - } - }, - { - "pk": 15, - "model": "workflows.abstractinput", - "fields": { - "widget": 7, - "name": "Object Modifier", - "short_name": "atr", - "uid": "bf822d07-096d-1aeb-e6da-3670d6aa9c53", - "default": "", - "required": true, - "multi": false, - "parameter_type": "text", - "variable": "attribute", - "parameter": true, - "order": 2, - "description": "if one wants to extract object's attributes, leading dot should be used." + "description": "System.String" } }, { - "pk": 6, + "pk": 10, "model": "workflows.abstractoutput", "fields": { - "widget": 7, - "name": "object", - "short_name": "obj", - "variable": "object", - "uid": "6817c1ee-8e0f-8e63-f4d6-7c43522b7f10", + "widget": 11, + "name": "Annotated Document Corpus", + "short_name": "adc", + "variable": "adc", + "uid": "2933b0e7-2639-9cf0-dc73-c22e77e3263c", "order": 1, "description": "" } }, { - "pk": 8, + "pk": 12, "model": "workflows.abstractwidget", "fields": { "category": 3, "treeview_image": null, - "name": "Load Document Corpus From File", + "name": "Get Plain Texts", "is_streaming": false, - "uid": "60e1a6ee-1d9f-4ae6-a5a9-fd06d30d4348", + "uid": "54f29d7e-a8a6-4cbb-a042-8cdf89328a2b", "interaction_view": "", "image": null, "package": "latino", - "static_image": "load_adc_from_file_image.png", + "static_image": "adc_to_text_image.png", "post_interact_action": "", "user": null, "visualization_view": "", - "action": "latino_load_adc", + "action": "latino_get_doc_strings", "wsdl_method": "", "wsdl": "", "interactive": false, "has_progress_bar": false, - "order": 4, - "description": "This widges processes raw text file and loads the texts into ADC (Annotated Document Corpus) structure. The input file contains one document per line - the whole line represents text from the body of a document. In case lines contain more document properties (i.e.: ids, titles, labels,...) than other widgets should be used to load ADC structure." + "order": 3, + "description": "Automatically generated widget from function GetDocStrings in package latino. The original function signature: GetDocStrings." } }, { - "pk": 16, + "pk": 22, "model": "workflows.abstractinput", "fields": { - "widget": 8, - "name": "Raw Text File", - "short_name": "fil", - "uid": "d66b7ce4-8aa4-d26e-db69-762683ee0e3e", + "widget": 12, + "name": "Annotated Document Corpus", + "short_name": "adc", + "uid": "3367601a-8a96-20f1-7bdb-b2610fa4eb18", "default": "", - "required": false, + "required": true, "multi": false, - "parameter_type": null, - "variable": "file", + "parameter_type": "text", + "variable": "adc", "parameter": false, "order": 1, - "description": "Input Text File: Contains one document per line - the whole line represents text from the body of a document." + "description": "LatinoClowdFlows.DocumentCorpus" } }, { - "pk": 17, + "pk": 23, "model": "workflows.abstractinput", "fields": { - "widget": 8, - "name": "First words in line with preceding exclamation (!) present labels", - "short_name": "bol", - "uid": "cb7ddfe9-08ad-6b8a-d1fa-76cd91888d5e", - "default": "false", + "widget": 12, + "name": "Token Annotation", + "short_name": "str", + "uid": "4d5f3e18-4eff-0fd7-5961-27e5aae71e74", + "default": "TextBlock", "required": true, "multi": false, - "parameter_type": "checkbox", - "variable": "leadingLabels", + "parameter_type": null, + "variable": "elementAnnotation", "parameter": true, "order": 2, - "description": "System.Boolean" - } - }, - { - "pk": 7, - "model": "workflows.abstractoutput", - "fields": { - "widget": 8, - "name": "Annotated Document Corpus", - "short_name": "adc", - "variable": "adc", - "uid": "ec3c5eae-d5b8-16e9-deeb-ce175eaab9e0", - "order": 1, - "description": "" + "description": "System.String" } }, { - "pk": 9, - "model": "workflows.abstractwidget", + "pk": 24, + "model": "workflows.abstractinput", "fields": { - "category": 3, - "treeview_image": null, - "name": "Load Document Corpus From String", - "is_streaming": false, - "uid": "c35c429d-9d4b-40c4-b699-b7dee25f97a3", - "interaction_view": "", - "image": null, - "package": "latino", - "static_image": "load_adc_from_file_image.png", - "post_interact_action": "", - "user": null, - "visualization_view": "", - "action": "latino_load_adcfrom_string", - "wsdl_method": "", - "wsdl": "", - "interactive": false, - "has_progress_bar": false, - "order": 5, - "description": "This widges processes raw text file and loads the texts into ADC (Annotated Document Corpus) structure. The input file contains one document per line - the whole line represents text from the body of a document. In case lines contain more document properties (i.e.: ids, titles, labels,...) than other widgets should be used to load ADC structure." + "widget": 12, + "name": "Feature Condition", + "short_name": "str", + "uid": "a7486c8d-0b1c-3a32-999f-15107c13c3f4", + "default": "", + "required": false, + "multi": false, + "parameter_type": "text", + "variable": "elementFeatureConditions", + "parameter": true, + "order": 3, + "description": "Condition which tokens to include based on their features.\nFormat examples:\n-Feature1 (don't include tokens with Feature1 set ta any value)\n-Feature1=Value1 (don't include tokens with Feature1 set to the value Value1)\n-Feature1 +Feature2 (don't include tokens with Feature1 set unless it has also Feature2 set)\n-Feature1=Value1 +Feature2 (don't include tokens with Feature1 set to Value1 unless it has also Feature2 set to any value)..." } }, { - "pk": 18, + "pk": 25, "model": "workflows.abstractinput", "fields": { - "widget": 9, - "name": "String", + "widget": 12, + "name": "Delimiter for token concatenation", "short_name": "str", - "uid": "91e6fdd5-0969-dc3f-395e-3f2827a9c57c", + "uid": "5ebae072-0a90-6b4c-a7dc-47bb6bd71d51", "default": "", "required": true, "multi": false, - "parameter_type": "textarea", - "variable": "plainString", - "parameter": false, - "order": 1, - "description": "Input Text String: Contains one document per line - the whole line represents text from the body of a document." + "parameter_type": "text", + "variable": "delimiter", + "parameter": true, + "order": 4, + "description": "System.String" } }, { - "pk": 19, + "pk": 26, "model": "workflows.abstractinput", "fields": { - "widget": 9, - "name": "First words in line with preceding exclamation (!) present labels", + "widget": 12, + "name": "Include Document Identifier", "short_name": "bol", - "uid": "18f37a99-5141-f7d2-108e-49ee6505c3da", - "default": "false", + "uid": "507de999-1802-700d-27e5-ea5f972bc8e7", + "default": "", "required": true, "multi": false, "parameter_type": "checkbox", - "variable": "leadingLabels", + "variable": "includeDocId", "parameter": true, - "order": 2, + "order": 5, "description": "System.Boolean" } }, { - "pk": 8, + "pk": 11, "model": "workflows.abstractoutput", "fields": { - "widget": 9, - "name": "Annotated Document Corpus", - "short_name": "adc", - "variable": "adc", - "uid": "52165ac9-d42a-be5c-f24d-d6f02ab32e77", + "widget": 12, + "name": "Texts", + "short_name": "str", + "variable": "strings", + "uid": "bbcbbfc9-8a0f-bf3f-4b2d-7bf018ea4ff9", "order": 1, "description": "" } }, { - "pk": 10, + "pk": 4, + "model": "workflows.category", + "fields": { + "uid": "48fb3ce2-11ef-4afc-ab1e-aec6f1d6d055", + "parent": 1, + "workflow": null, + "user": null, + "order": 2, + "name": "Document Corpus" + } + }, + { + "pk": 13, "model": "workflows.abstractwidget", "fields": { - "category": 3, + "category": 4, "treeview_image": null, - "name": "Convert Corpus to XML String", + "name": "Extract Feature", "is_streaming": false, - "uid": "aa63a521-88c5-42c3-a85e-87c0be6288d5", + "uid": "ec9955a3-263b-49b4-a0ca-b62598962b76", "interaction_view": "", "image": null, "package": "latino", - "static_image": "adc_to_xml_image.png", + "static_image": "adc_extract_feature_image.png", "post_interact_action": "", "user": null, "visualization_view": "", - "action": "latino_save_adcto_xml", + "action": "latino_extract_documents_features", "wsdl_method": "", "wsdl": "", "interactive": false, "has_progress_bar": false, - "order": 1, - "description": "Automatically generated widget from function SaveADCtoXml in package latino. The original function signature: SaveADCtoXml." + "order": 3, + "description": "Automatically generated widget from function ExtractDocumentsFeatures in package latino. The original function signature: ExtractDocumentsFeatures." } }, { - "pk": 20, + "pk": 27, "model": "workflows.abstractinput", "fields": { - "widget": 10, + "widget": 13, "name": "Annotated Document Corpus", "short_name": "adc", - "uid": "bb23f353-d88a-ae54-fd44-1249badd8f28", + "uid": "3f1837e3-2997-a696-2e4e-48d1166f314f", "default": "", "required": true, "multi": false, @@ -914,111 +683,73 @@ } }, { - "pk": 9, - "model": "workflows.abstractoutput", - "fields": { - "widget": 10, - "name": "XML String", - "short_name": "xml", - "variable": "string", - "uid": "d8a5c93d-9a82-ff1a-7d5a-a6e287074ca5", - "order": 1, - "description": "" - } - }, - { - "pk": 11, - "model": "workflows.abstractwidget", - "fields": { - "category": 3, - "treeview_image": null, - "name": "Convert XML String to Corpus", - "is_streaming": false, - "uid": "136d957e-57e1-47da-a6f6-05423d5abb9e", - "interaction_view": "", - "image": null, - "package": "latino", - "static_image": "xml_to_adc_image.png", - "post_interact_action": "", - "user": null, - "visualization_view": "", - "action": "latino_load_adcfrom_xml", - "wsdl_method": "", - "wsdl": "", - "interactive": false, - "has_progress_bar": false, - "order": 2, - "description": "Automatically generated widget from function LoadADCfromXml in package latino. The original function signature: LoadADCfromXml." - } - }, - { - "pk": 21, + "pk": 28, "model": "workflows.abstractinput", "fields": { - "widget": 11, - "name": "XML String", - "short_name": "xml", - "uid": "7a3acca8-7b22-8d31-2dab-8dd606665cc1", + "widget": 13, + "name": "Extracted Feature Name", + "short_name": "str", + "uid": "6a42ceda-c81e-6257-1b95-4aa8ae605bef", "default": "", "required": true, "multi": false, - "parameter_type": null, - "variable": "xml", - "parameter": false, - "order": 1, + "parameter_type": "text", + "variable": "featureName", + "parameter": true, + "order": 2, "description": "System.String" } }, { - "pk": 10, + "pk": 12, "model": "workflows.abstractoutput", "fields": { - "widget": 11, - "name": "Annotated Document Corpus", - "short_name": "adc", - "variable": "adc", - "uid": "2933b0e7-2639-9cf0-dc73-c22e77e3263c", + "widget": 13, + "name": "List of Extracted Features", + "short_name": "str", + "variable": "strings", + "uid": "55c95429-7b4f-7c47-54da-a5273ff4d490", "order": 1, "description": "" } }, { - "pk": 12, + "pk": 14, "model": "workflows.abstractwidget", "fields": { - "category": 3, + "category": 4, "treeview_image": null, - "name": "Get Plain Texts", + "name": "Add Feature", "is_streaming": false, - "uid": "54f29d7e-a8a6-4cbb-a042-8cdf89328a2b", + "uid": "cbae1835-bbea-43f2-b698-dc02ba106462", "interaction_view": "", "image": null, "package": "latino", - "static_image": "adc_to_text_image.png", + "static_image": "adc_add_feature_image.png", "post_interact_action": "", "user": null, "visualization_view": "", - "action": "latino_get_doc_strings", + "action": "latino_add_documents_features", "wsdl_method": "", "wsdl": "", "interactive": false, "has_progress_bar": false, - "order": 3, - "description": "Automatically generated widget from function GetDocStrings in package latino. The original function signature: GetDocStrings." + "order": 1, + "description": "Automatically generated widget from function AddDocumentsFeatures in package latino. The original function signature: AddDocumentsFeatures." } }, { - "pk": 22, + "pk": 29, "model": "workflows.abstractinput", "fields": { - "widget": 12, + "widget": 14, "name": "Annotated Document Corpus", "short_name": "adc", - "uid": "3367601a-8a96-20f1-7bdb-b2610fa4eb18", + "uid": "a3c0bde8-766f-5e30-2b2e-1fdcbaa8d913", "default": "", "required": true, "multi": false, - "parameter_type": "text", + "parameter_type": null, "variable": "adc", "parameter": false, "order": 1, @@ -1026,231 +757,29 @@ } }, { - "pk": 23, + "pk": 30, "model": "workflows.abstractinput", "fields": { - "widget": 12, - "name": "Token Annotation", + "widget": 14, + "name": "Feature Values (Array of Labels)", "short_name": "str", - "uid": "4d5f3e18-4eff-0fd7-5961-27e5aae71e74", - "default": "TextBlock", + "uid": "1219e584-0660-0d7b-4d31-288f5e99df89", + "default": "", "required": true, "multi": false, "parameter_type": null, - "variable": "elementAnnotation", - "parameter": true, + "variable": "featureValues", + "parameter": false, "order": 2, - "description": "System.String" + "description": "System.Collections.Generic.List`1[[System.String, mscorlib, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089]]" } }, { - "pk": 24, + "pk": 31, "model": "workflows.abstractinput", "fields": { - "widget": 12, - "name": "Feature Condition", - "short_name": "str", - "uid": "a7486c8d-0b1c-3a32-999f-15107c13c3f4", - "default": "", - "required": false, - "multi": false, - "parameter_type": "text", - "variable": "elementFeatureConditions", - "parameter": true, - "order": 3, - "description": "Condition which tokens to include based on their features.\nFormat examples:\n-Feature1 (don't include tokens with Feature1 set ta any value)\n-Feature1=Value1 (don't include tokens with Feature1 set to the value Value1)\n-Feature1 +Feature2 (don't include tokens with Feature1 set unless it has also Feature2 set)\n-Feature1=Value1 +Feature2 (don't include tokens with Feature1 set to Value1 unless it has also Feature2 set to any value)..." - } - }, - { - "pk": 25, - "model": "workflows.abstractinput", - "fields": { - "widget": 12, - "name": "Delimiter for token concatenation", - "short_name": "str", - "uid": "5ebae072-0a90-6b4c-a7dc-47bb6bd71d51", - "default": "", - "required": true, - "multi": false, - "parameter_type": "text", - "variable": "delimiter", - "parameter": true, - "order": 4, - "description": "System.String" - } - }, - { - "pk": 26, - "model": "workflows.abstractinput", - "fields": { - "widget": 12, - "name": "Include Document Identifier", - "short_name": "bol", - "uid": "507de999-1802-700d-27e5-ea5f972bc8e7", - "default": "", - "required": true, - "multi": false, - "parameter_type": "checkbox", - "variable": "includeDocId", - "parameter": true, - "order": 5, - "description": "System.Boolean" - } - }, - { - "pk": 11, - "model": "workflows.abstractoutput", - "fields": { - "widget": 12, - "name": "Texts", - "short_name": "str", - "variable": "strings", - "uid": "bbcbbfc9-8a0f-bf3f-4b2d-7bf018ea4ff9", - "order": 1, - "description": "" - } - }, - { - "pk": 13, - "model": "workflows.abstractwidget", - "fields": { - "category": 4, - "treeview_image": null, - "name": "Extract Feature", - "is_streaming": false, - "uid": "ec9955a3-263b-49b4-a0ca-b62598962b76", - "interaction_view": "", - "image": null, - "package": "latino", - "static_image": "adc_extract_feature_image.png", - "post_interact_action": "", - "user": null, - "visualization_view": "", - "action": "latino_extract_documents_features", - "wsdl_method": "", - "wsdl": "", - "interactive": false, - "has_progress_bar": false, - "order": 3, - "description": "Automatically generated widget from function ExtractDocumentsFeatures in package latino. The original function signature: ExtractDocumentsFeatures." - } - }, - { - "pk": 27, - "model": "workflows.abstractinput", - "fields": { - "widget": 13, - "name": "Annotated Document Corpus", - "short_name": "adc", - "uid": "3f1837e3-2997-a696-2e4e-48d1166f314f", - "default": "", - "required": true, - "multi": false, - "parameter_type": null, - "variable": "adc", - "parameter": false, - "order": 1, - "description": "LatinoClowdFlows.DocumentCorpus" - } - }, - { - "pk": 28, - "model": "workflows.abstractinput", - "fields": { - "widget": 13, - "name": "Extracted Feature Name", - "short_name": "str", - "uid": "6a42ceda-c81e-6257-1b95-4aa8ae605bef", - "default": "", - "required": true, - "multi": false, - "parameter_type": "text", - "variable": "featureName", - "parameter": true, - "order": 2, - "description": "System.String" - } - }, - { - "pk": 12, - "model": "workflows.abstractoutput", - "fields": { - "widget": 13, - "name": "List of Extracted Features", - "short_name": "str", - "variable": "strings", - "uid": "55c95429-7b4f-7c47-54da-a5273ff4d490", - "order": 1, - "description": "" - } - }, - { - "pk": 14, - "model": "workflows.abstractwidget", - "fields": { - "category": 4, - "treeview_image": null, - "name": "Add Feature", - "is_streaming": false, - "uid": "cbae1835-bbea-43f2-b698-dc02ba106462", - "interaction_view": "", - "image": null, - "package": "latino", - "static_image": "adc_add_feature_image.png", - "post_interact_action": "", - "user": null, - "visualization_view": "", - "action": "latino_add_documents_features", - "wsdl_method": "", - "wsdl": "", - "interactive": false, - "has_progress_bar": false, - "order": 1, - "description": "Automatically generated widget from function AddDocumentsFeatures in package latino. The original function signature: AddDocumentsFeatures." - } - }, - { - "pk": 29, - "model": "workflows.abstractinput", - "fields": { - "widget": 14, - "name": "Annotated Document Corpus", - "short_name": "adc", - "uid": "a3c0bde8-766f-5e30-2b2e-1fdcbaa8d913", - "default": "", - "required": true, - "multi": false, - "parameter_type": null, - "variable": "adc", - "parameter": false, - "order": 1, - "description": "LatinoClowdFlows.DocumentCorpus" - } - }, - { - "pk": 30, - "model": "workflows.abstractinput", - "fields": { - "widget": 14, - "name": "Feature Values (Array of Labels)", - "short_name": "str", - "uid": "1219e584-0660-0d7b-4d31-288f5e99df89", - "default": "", - "required": true, - "multi": false, - "parameter_type": null, - "variable": "featureValues", - "parameter": false, - "order": 2, - "description": "System.Collections.Generic.List`1[[System.String, mscorlib, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089]]" - } - }, - { - "pk": 31, - "model": "workflows.abstractinput", - "fields": { - "widget": 14, - "name": "New Feature Name", + "widget": 14, + "name": "New Feature Name", "short_name": "str", "uid": "a69d8966-d9a4-8557-c845-12740520163d", "default": "feature", @@ -1670,6 +1199,18 @@ "description": "LatinoClowdFlows.DocumentCorpus" } }, + { + "pk": 5, + "model": "workflows.category", + "fields": { + "uid": "5a4286c8-d182-45a5-896b-cc3c6ed84f22", + "parent": 1, + "workflow": null, + "user": null, + "order": 3, + "name": "Tokenization" + } + }, { "pk": 19, "model": "workflows.abstractwidget", @@ -2448,6 +1989,18 @@ "description": "" } }, + { + "pk": 6, + "model": "workflows.category", + "fields": { + "uid": "64780432-b114-4589-a24c-2331aed23502", + "parent": 5, + "workflow": null, + "user": null, + "order": 1, + "name": "Advanced" + } + }, { "pk": 26, "model": "workflows.abstractwidget", @@ -2706,6 +2259,18 @@ "description": "" } }, + { + "pk": 7, + "model": "workflows.category", + "fields": { + "uid": "05c26564-86b5-4a73-aae9-f4dc14bc75d7", + "parent": 1, + "workflow": null, + "user": null, + "order": 4, + "name": "Tagging" + } + }, { "pk": 29, "model": "workflows.abstractwidget", @@ -2891,124 +2456,32 @@ } }, { - "pk": 31, + "pk": 32, "model": "workflows.abstractwidget", "fields": { - "category": 8, + "category": 7, "treeview_image": null, - "name": "POS Tagger Hub (Text)", + "name": "Get Stopword Set", "is_streaming": false, - "uid": "33307dbf-08b5-4768-ba21-ebe1aa68c69b", + "uid": "773960d9-ccca-41bc-8b13-0c0f62c62f0e", "interaction_view": "", "image": null, "package": "latino", - "static_image": "tag_pos_do_image.png", + "static_image": "get_stop_image.png", "post_interact_action": "", "user": null, "visualization_view": "", - "action": "latino_pos_tag_string", + "action": "latino_get_stop_words", "wsdl_method": "", "wsdl": "", "interactive": false, "has_progress_bar": false, - "order": 2, - "description": "Automatically generated widget from function PosTagString in package latino. The original function signature: PosTagString." + "order": 6, + "description": "Automatically generated widget from function GetStopWords in package latino. The original function signature: GetStopWords." } }, { - "pk": 82, - "model": "workflows.abstractinput", - "fields": { - "widget": 31, - "name": "Text", - "short_name": "str", - "uid": "ed41747a-dc93-d818-291c-62d791613083", - "default": "", - "required": true, - "multi": false, - "parameter_type": null, - "variable": "text", - "parameter": false, - "order": 1, - "description": "System.Object" - } - }, - { - "pk": 83, - "model": "workflows.abstractinput", - "fields": { - "widget": 31, - "name": "POS Tagger", - "short_name": "pst", - "uid": "981e7201-35ab-7b25-ae8f-29e5b03cb407", - "default": "", - "required": true, - "multi": false, - "parameter_type": null, - "variable": "posTagger", - "parameter": false, - "order": 2, - "description": "OpenNLP.Tools.PosTagger.EnglishMaximumEntropyPosTagger" - } - }, - { - "pk": 84, - "model": "workflows.abstractinput", - "fields": { - "widget": 31, - "name": "Output Feature Name", - "short_name": "str", - "uid": "02d347c6-12c9-9892-5d69-38548474b35a", - "default": "posTag", - "required": true, - "multi": false, - "parameter_type": "text", - "variable": "outputFeature", - "parameter": true, - "order": 3, - "description": "System.String" - } - }, - { - "pk": 32, - "model": "workflows.abstractoutput", - "fields": { - "widget": 31, - "name": "String", - "short_name": "str", - "variable": "string", - "uid": "b773304e-55a1-d06f-363c-e10850b7bd56", - "order": 1, - "description": "" - } - }, - { - "pk": 32, - "model": "workflows.abstractwidget", - "fields": { - "category": 7, - "treeview_image": null, - "name": "Get Stopword Set", - "is_streaming": false, - "uid": "773960d9-ccca-41bc-8b13-0c0f62c62f0e", - "interaction_view": "", - "image": null, - "package": "latino", - "static_image": "get_stop_image.png", - "post_interact_action": "", - "user": null, - "visualization_view": "", - "action": "latino_get_stop_words", - "wsdl_method": "", - "wsdl": "", - "interactive": false, - "has_progress_bar": false, - "order": 6, - "description": "Automatically generated widget from function GetStopWords in package latino. The original function signature: GetStopWords." - } - }, - { - "pk": 85, + "pk": 85, "model": "workflows.abstractinput", "fields": { "widget": 32, @@ -3644,98 +3117,6 @@ "description": "" } }, - { - "pk": 36, - "model": "workflows.abstractwidget", - "fields": { - "category": 8, - "treeview_image": null, - "name": "Condition Tagger", - "is_streaming": false, - "uid": "9de84740-c4dc-464f-9388-fd736598829c", - "interaction_view": "", - "image": null, - "package": "latino", - "static_image": "tag_condition_image.png", - "post_interact_action": "", - "user": null, - "visualization_view": "", - "action": "latino_construct_condition_tagger", - "wsdl_method": "", - "wsdl": "", - "interactive": false, - "has_progress_bar": false, - "order": 1, - "description": "Automatically generated widget from function ConstructConditionTagger in package latino. The original function signature: ConstructConditionTagger." - } - }, - { - "pk": 90, - "model": "workflows.abstractinput", - "fields": { - "widget": 36, - "name": "Feature Condition", - "short_name": "str", - "uid": "93ce61ae-f7af-ee3a-26d0-1b0af81bcc03", - "default": "", - "required": true, - "multi": false, - "parameter_type": "text", - "variable": "featureCondition", - "parameter": true, - "order": 1, - "description": "Condition which tokens to include based on their features.\nFormat examples:\n-Feature1 (don't include tokens with Feature1 set ta any value)\n-Feature1=Value1 (don't include tokens with Feature1 set to the value Value1)\n-Feature1 +Feature2 (don't include tokens with Feature1 set unless it has also Feature2 set)\n-Feature1=Value1 +Feature2 (don't include tokens with Feature1 set to Value1 unless it has also Feature2 set to any value)..." - } - }, - { - "pk": 91, - "model": "workflows.abstractinput", - "fields": { - "widget": 36, - "name": "output Feature Value", - "short_name": "str", - "uid": "96c4c439-c645-d2b4-cd7d-8524f3c3b2bf", - "default": "true", - "required": true, - "multi": false, - "parameter_type": "text", - "variable": "outputFeatureValue", - "parameter": true, - "order": 2, - "description": "System.String" - } - }, - { - "pk": 92, - "model": "workflows.abstractinput", - "fields": { - "widget": 36, - "name": "Put token/feature text as the output feature value", - "short_name": "bol", - "uid": "b2e17caf-f2d7-b587-b76e-83bdf12109ae", - "default": "", - "required": true, - "multi": false, - "parameter_type": "checkbox", - "variable": "elementsTextToFeatureValue", - "parameter": true, - "order": 3, - "description": "If set to true than token or token's feature text is asigned as output feature value" - } - }, - { - "pk": 37, - "model": "workflows.abstractoutput", - "fields": { - "widget": 36, - "name": "Tagger", - "short_name": "tgr", - "variable": "tagger", - "uid": "f1d9b4f2-62a1-add4-3f66-cb599081a33c", - "order": 1, - "description": "" - } - }, { "pk": 37, "model": "workflows.abstractwidget", @@ -3957,86 +3338,282 @@ } }, { - "pk": 39, + "pk": 8, + "model": "workflows.category", + "fields": { + "uid": "94237b9c-8e88-460c-abc5-9c8108acb821", + "parent": 7, + "workflow": null, + "user": null, + "order": 1, + "name": "Advanced" + } + }, + { + "pk": 31, "model": "workflows.abstractwidget", "fields": { "category": 8, "treeview_image": null, - "name": "Universal Multiple Tagger Hub", + "name": "POS Tagger Hub (Text)", "is_streaming": false, - "uid": "5bb5799b-00d3-449a-b79a-42e31a36ec8c", + "uid": "33307dbf-08b5-4768-ba21-ebe1aa68c69b", "interaction_view": "", "image": null, "package": "latino", - "static_image": "tag_multiple_do_image.png", + "static_image": "tag_pos_do_image.png", "post_interact_action": "", "user": null, "visualization_view": "", - "action": "latino_tag_adcmultiple", + "action": "latino_pos_tag_string", "wsdl_method": "", "wsdl": "", "interactive": false, "has_progress_bar": false, - "order": 5, - "description": "Automatically generated widget from function TagADCMultiple in package latino. The original function signature: TagADCMultiple." + "order": 2, + "description": "Automatically generated widget from function PosTagString in package latino. The original function signature: PosTagString." } }, { - "pk": 101, + "pk": 82, "model": "workflows.abstractinput", "fields": { - "widget": 39, - "name": "Annotated Document Corpus", - "short_name": "adc", - "uid": "da1abf2d-4055-a35e-be5f-51113e7af728", + "widget": 31, + "name": "Text", + "short_name": "str", + "uid": "ed41747a-dc93-d818-291c-62d791613083", "default": "", "required": true, "multi": false, "parameter_type": null, - "variable": "adc", + "variable": "text", "parameter": false, "order": 1, - "description": "LatinoClowdFlows.DocumentCorpus" + "description": "System.Object" } }, { - "pk": 102, + "pk": 83, "model": "workflows.abstractinput", "fields": { - "widget": 39, - "name": "Token Tagger", - "short_name": "tgr", - "uid": "07a9308b-9a84-28db-7d6d-d7ba80587882", + "widget": 31, + "name": "POS Tagger", + "short_name": "pst", + "uid": "981e7201-35ab-7b25-ae8f-29e5b03cb407", "default": "", "required": true, - "multi": true, + "multi": false, "parameter_type": null, - "variable": "tagger", + "variable": "posTagger", "parameter": false, "order": 2, - "description": "System.Object" + "description": "OpenNLP.Tools.PosTagger.EnglishMaximumEntropyPosTagger" } }, { - "pk": 103, + "pk": 84, "model": "workflows.abstractinput", "fields": { - "widget": 39, - "name": "Token Annotation [ / Feature Name ] (one line per each tagger)", + "widget": 31, + "name": "Output Feature Name", "short_name": "str", - "uid": "8081e46e-e87f-d112-5008-c99825c5ff12", - "default": "Token", + "uid": "02d347c6-12c9-9892-5d69-38548474b35a", + "default": "posTag", "required": true, "multi": false, - "parameter_type": "textarea", - "variable": "elementAnnotation", + "parameter_type": "text", + "variable": "outputFeature", "parameter": true, "order": 3, - "description": "Token Annotation of the token to be tagged. If also the feature name is used than the feature value of selected token will be tagged.\nUsage: \n1. TokenName\n2. TokenName/FatureName\nIf multiple taggers are used then one line per tagger must be specified." + "description": "System.String" } }, { - "pk": 104, + "pk": 32, + "model": "workflows.abstractoutput", + "fields": { + "widget": 31, + "name": "String", + "short_name": "str", + "variable": "string", + "uid": "b773304e-55a1-d06f-363c-e10850b7bd56", + "order": 1, + "description": "" + } + }, + { + "pk": 36, + "model": "workflows.abstractwidget", + "fields": { + "category": 8, + "treeview_image": null, + "name": "Condition Tagger", + "is_streaming": false, + "uid": "9de84740-c4dc-464f-9388-fd736598829c", + "interaction_view": "", + "image": null, + "package": "latino", + "static_image": "tag_condition_image.png", + "post_interact_action": "", + "user": null, + "visualization_view": "", + "action": "latino_construct_condition_tagger", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 1, + "description": "Automatically generated widget from function ConstructConditionTagger in package latino. The original function signature: ConstructConditionTagger." + } + }, + { + "pk": 90, + "model": "workflows.abstractinput", + "fields": { + "widget": 36, + "name": "Feature Condition", + "short_name": "str", + "uid": "93ce61ae-f7af-ee3a-26d0-1b0af81bcc03", + "default": "", + "required": true, + "multi": false, + "parameter_type": "text", + "variable": "featureCondition", + "parameter": true, + "order": 1, + "description": "Condition which tokens to include based on their features.\nFormat examples:\n-Feature1 (don't include tokens with Feature1 set ta any value)\n-Feature1=Value1 (don't include tokens with Feature1 set to the value Value1)\n-Feature1 +Feature2 (don't include tokens with Feature1 set unless it has also Feature2 set)\n-Feature1=Value1 +Feature2 (don't include tokens with Feature1 set to Value1 unless it has also Feature2 set to any value)..." + } + }, + { + "pk": 91, + "model": "workflows.abstractinput", + "fields": { + "widget": 36, + "name": "output Feature Value", + "short_name": "str", + "uid": "96c4c439-c645-d2b4-cd7d-8524f3c3b2bf", + "default": "true", + "required": true, + "multi": false, + "parameter_type": "text", + "variable": "outputFeatureValue", + "parameter": true, + "order": 2, + "description": "System.String" + } + }, + { + "pk": 92, + "model": "workflows.abstractinput", + "fields": { + "widget": 36, + "name": "Put token/feature text as the output feature value", + "short_name": "bol", + "uid": "b2e17caf-f2d7-b587-b76e-83bdf12109ae", + "default": "", + "required": true, + "multi": false, + "parameter_type": "checkbox", + "variable": "elementsTextToFeatureValue", + "parameter": true, + "order": 3, + "description": "If set to true than token or token's feature text is asigned as output feature value" + } + }, + { + "pk": 37, + "model": "workflows.abstractoutput", + "fields": { + "widget": 36, + "name": "Tagger", + "short_name": "tgr", + "variable": "tagger", + "uid": "f1d9b4f2-62a1-add4-3f66-cb599081a33c", + "order": 1, + "description": "" + } + }, + { + "pk": 39, + "model": "workflows.abstractwidget", + "fields": { + "category": 8, + "treeview_image": null, + "name": "Universal Multiple Tagger Hub", + "is_streaming": false, + "uid": "5bb5799b-00d3-449a-b79a-42e31a36ec8c", + "interaction_view": "", + "image": null, + "package": "latino", + "static_image": "tag_multiple_do_image.png", + "post_interact_action": "", + "user": null, + "visualization_view": "", + "action": "latino_tag_adcmultiple", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 5, + "description": "Automatically generated widget from function TagADCMultiple in package latino. The original function signature: TagADCMultiple." + } + }, + { + "pk": 101, + "model": "workflows.abstractinput", + "fields": { + "widget": 39, + "name": "Annotated Document Corpus", + "short_name": "adc", + "uid": "da1abf2d-4055-a35e-be5f-51113e7af728", + "default": "", + "required": true, + "multi": false, + "parameter_type": null, + "variable": "adc", + "parameter": false, + "order": 1, + "description": "LatinoClowdFlows.DocumentCorpus" + } + }, + { + "pk": 102, + "model": "workflows.abstractinput", + "fields": { + "widget": 39, + "name": "Token Tagger", + "short_name": "tgr", + "uid": "07a9308b-9a84-28db-7d6d-d7ba80587882", + "default": "", + "required": true, + "multi": true, + "parameter_type": null, + "variable": "tagger", + "parameter": false, + "order": 2, + "description": "System.Object" + } + }, + { + "pk": 103, + "model": "workflows.abstractinput", + "fields": { + "widget": 39, + "name": "Token Annotation [ / Feature Name ] (one line per each tagger)", + "short_name": "str", + "uid": "8081e46e-e87f-d112-5008-c99825c5ff12", + "default": "Token", + "required": true, + "multi": false, + "parameter_type": "textarea", + "variable": "elementAnnotation", + "parameter": true, + "order": 3, + "description": "Token Annotation of the token to be tagged. If also the feature name is used than the feature value of selected token will be tagged.\nUsage: \n1. TokenName\n2. TokenName/FatureName\nIf multiple taggers are used then one line per tagger must be specified." + } + }, + { + "pk": 104, "model": "workflows.abstractinput", "fields": { "widget": 39, @@ -4251,14 +3828,26 @@ } }, { - "pk": 42, + "pk": 9, + "model": "workflows.category", + "fields": { + "uid": "b5964b0d-5acb-4dbf-a49d-4331a5a44f9c", + "parent": 1, + "workflow": null, + "user": null, + "order": 5, + "name": "Bag of Words" + } + }, + { + "pk": 43, "model": "workflows.abstractwidget", "fields": { - "category": 10, + "category": 9, "treeview_image": null, - "name": "BOW Space (Text)", + "name": "BOW Space", "is_streaming": false, - "uid": "471eb047-8d39-4eac-8cc1-6c768c42d897", + "uid": "070a465b-ea02-4238-b16c-9df74fbcbba8", "interaction_view": "", "image": null, "package": "latino", @@ -4266,7 +3855,7 @@ "post_interact_action": "", "user": null, "visualization_view": "", - "action": "latino_construct_bow_space_1", + "action": "latino_construct_bow_space_2", "wsdl_method": "", "wsdl": "", "interactive": false, @@ -4276,303 +3865,50 @@ } }, { - "pk": 111, + "pk": 120, "model": "workflows.abstractinput", "fields": { - "widget": 42, - "name": "Textual Documents (Array of strings)", - "short_name": "str", - "uid": "e6029afb-4598-b3fb-d0a1-2cfd5ea0178b", + "widget": 43, + "name": "Annotated Document Corpus", + "short_name": "adc", + "uid": "0abb5f7e-8690-60c4-b5c7-85c1d1731d8e", "default": "", "required": true, "multi": false, "parameter_type": "textarea", - "variable": "documents", + "variable": "adc", "parameter": false, "order": 1, - "description": "System.Object" + "description": "LatinoClowdFlows.DocumentCorpus" } }, { - "pk": 112, + "pk": 121, "model": "workflows.abstractinput", "fields": { - "widget": 42, - "name": "Tokenizer", - "short_name": "tkn", - "uid": "df8d5b1f-200e-86dd-76ea-cdc3a65e2b02", - "default": "", + "widget": 43, + "name": "Token Annotation", + "short_name": "str", + "uid": "03cb77fe-4b71-3f3b-b52c-7ce359cd671a", + "default": "Token", "required": true, "multi": false, - "parameter_type": null, - "variable": "tokenizer", - "parameter": false, + "parameter_type": "text", + "variable": "tokenId", + "parameter": true, "order": 2, - "description": "Latino.TextMining.ITokenizer" + "description": "System.String" } }, { - "pk": 113, + "pk": 122, "model": "workflows.abstractinput", "fields": { - "widget": 42, - "name": "Stemmer or Lemmatizer (Tagger)", - "short_name": "tgr", - "uid": "4cfdfa8f-e335-791d-72f0-f4f51666450d", - "default": "", - "required": true, - "multi": false, - "parameter_type": null, - "variable": "stemmer", - "parameter": false, - "order": 3, - "description": "Latino.TextMining.IStemmer" - } - }, - { - "pk": 114, - "model": "workflows.abstractinput", - "fields": { - "widget": 42, - "name": "Stopwords (Array of Stopwords)", - "short_name": "str", - "uid": "b183ddfb-9854-a13d-58e5-4943bb461192", - "default": "", - "required": false, - "multi": false, - "parameter_type": "textarea", - "variable": "stopwords", - "parameter": false, - "order": 4, - "description": "System.Collections.Generic.List`1[[System.String, mscorlib, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089]]" - } - }, - { - "pk": 115, - "model": "workflows.abstractinput", - "fields": { - "widget": 42, - "name": "Maximum N-Gram Length", - "short_name": "int", - "uid": "9f7a8787-5d03-122a-97a3-540ef7e2ce9d", - "default": "2", - "required": true, - "multi": false, - "parameter_type": "text", - "variable": "maxNGramLen", - "parameter": true, - "order": 5, - "description": "System.Int32" - } - }, - { - "pk": 116, - "model": "workflows.abstractinput", - "fields": { - "widget": 42, - "name": "Minimum Word Freqency", - "short_name": "dbl", - "uid": "10e4ef28-62b4-c59e-2f6c-7188bfac4a17", - "default": "5", - "required": true, - "multi": false, - "parameter_type": "text", - "variable": "minWordFreq", - "parameter": true, - "order": 6, - "description": "System.Int32" - } - }, - { - "pk": 117, - "model": "workflows.abstractinput", - "fields": { - "widget": 42, - "name": "Word Weighting Type", - "short_name": "wwt", - "uid": "76121091-e83a-ec17-bdf2-cd239e7c8b5d", - "default": "TfIdf", - "required": true, - "multi": false, - "parameter_type": "select", - "variable": "wordWeightType", - "parameter": true, - "order": 7, - "description": "Latino.TextMining.WordWeightType" - } - }, - { - "pk": 53, - "model": "workflows.abstractoption", - "fields": { - "uid": "25f63cf8-7cdf-fae2-f661-c69cfb7ec932", - "abstract_input": 117, - "value": "TermFreq", - "name": "Term Freq" - } - }, - { - "pk": 54, - "model": "workflows.abstractoption", - "fields": { - "uid": "699fb42d-3593-80ad-0116-b4401750e866", - "abstract_input": 117, - "value": "TfIdf", - "name": "Tf Idf" - } - }, - { - "pk": 55, - "model": "workflows.abstractoption", - "fields": { - "uid": "6e6fc78e-809b-3f9a-39ec-ae31d1db030f", - "abstract_input": 117, - "value": "LogDfTfIdf", - "name": "Log Df Tf Idf" - } - }, - { - "pk": 56, - "model": "workflows.abstractoption", - "fields": { - "uid": "718c39f4-26d5-b154-0b62-e1a5247f22ca", - "abstract_input": 117, - "value": "Dyakonov", - "name": "Dyakonov" - } - }, - { - "pk": 118, - "model": "workflows.abstractinput", - "fields": { - "widget": 42, - "name": "Cut Low Weights Percentage", - "short_name": "dbl", - "uid": "0160cac5-a82c-a2ea-d8ec-66e3a76b2a02", - "default": "0.2", - "required": true, - "multi": false, - "parameter_type": "text", - "variable": "cutLowWeightsPerc", - "parameter": true, - "order": 8, - "description": "System.Double" - } - }, - { - "pk": 119, - "model": "workflows.abstractinput", - "fields": { - "widget": 42, - "name": "Normalize Vectors", - "short_name": "bol", - "uid": "58e02ee9-6308-6ad8-49aa-dce004c8175f", - "default": "true", - "required": true, - "multi": false, - "parameter_type": "checkbox", - "variable": "normalizeVectors", - "parameter": true, - "order": 9, - "description": "System.Boolean" - } - }, - { - "pk": 43, - "model": "workflows.abstractoutput", - "fields": { - "widget": 42, - "name": "Bag of Words Space", - "short_name": "bow", - "variable": "bow", - "uid": "6353aa61-77fd-189f-a260-14a8a4195839", - "order": 1, - "description": "" - } - }, - { - "pk": 44, - "model": "workflows.abstractoutput", - "fields": { - "widget": 42, - "name": "Dataset", - "short_name": "ds", - "variable": "ds", - "uid": "03e3ee38-d0a6-a503-1bf1-67dc3b950de4", - "order": 2, - "description": "" - } - }, - { - "pk": 43, - "model": "workflows.abstractwidget", - "fields": { - "category": 9, - "treeview_image": null, - "name": "BOW Space", - "is_streaming": false, - "uid": "070a465b-ea02-4238-b16c-9df74fbcbba8", - "interaction_view": "", - "image": null, - "package": "latino", - "static_image": "bow_space_image.png", - "post_interact_action": "", - "user": null, - "visualization_view": "", - "action": "latino_construct_bow_space_2", - "wsdl_method": "", - "wsdl": "", - "interactive": false, - "has_progress_bar": false, - "order": 1, - "description": "Automatically generated widget from function ConstructBowSpace in package latino. The original function signature: ConstructBowSpace." - } - }, - { - "pk": 120, - "model": "workflows.abstractinput", - "fields": { - "widget": 43, - "name": "Annotated Document Corpus", - "short_name": "adc", - "uid": "0abb5f7e-8690-60c4-b5c7-85c1d1731d8e", - "default": "", - "required": true, - "multi": false, - "parameter_type": "textarea", - "variable": "adc", - "parameter": false, - "order": 1, - "description": "LatinoClowdFlows.DocumentCorpus" - } - }, - { - "pk": 121, - "model": "workflows.abstractinput", - "fields": { - "widget": 43, - "name": "Token Annotation", - "short_name": "str", - "uid": "03cb77fe-4b71-3f3b-b52c-7ce359cd671a", - "default": "Token", - "required": true, - "multi": false, - "parameter_type": "text", - "variable": "tokenId", - "parameter": true, - "order": 2, - "description": "System.String" - } - }, - { - "pk": 122, - "model": "workflows.abstractinput", - "fields": { - "widget": 43, - "name": "Stem Feature Name", - "short_name": "str", - "uid": "e22eb888-c070-e863-0abf-c8b448917d5f", - "default": "stem", + "widget": 43, + "name": "Stem Feature Name", + "short_name": "str", + "uid": "e22eb888-c070-e863-0abf-c8b448917d5f", + "default": "stem", "required": true, "multi": false, "parameter_type": "text", @@ -4941,11 +4277,276 @@ } }, { - "pk": 46, - "model": "workflows.abstractwidget", + "pk": 10, + "model": "workflows.category", "fields": { - "category": 10, - "treeview_image": null, + "uid": "0378e3a8-b71e-47b5-96b0-84dca2680f4d", + "parent": 9, + "workflow": null, + "user": null, + "order": 1, + "name": "Advanced" + } + }, + { + "pk": 42, + "model": "workflows.abstractwidget", + "fields": { + "category": 10, + "treeview_image": null, + "name": "BOW Space (Text)", + "is_streaming": false, + "uid": "471eb047-8d39-4eac-8cc1-6c768c42d897", + "interaction_view": "", + "image": null, + "package": "latino", + "static_image": "bow_space_image.png", + "post_interact_action": "", + "user": null, + "visualization_view": "", + "action": "latino_construct_bow_space_1", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 1, + "description": "Automatically generated widget from function ConstructBowSpace in package latino. The original function signature: ConstructBowSpace." + } + }, + { + "pk": 111, + "model": "workflows.abstractinput", + "fields": { + "widget": 42, + "name": "Textual Documents (Array of strings)", + "short_name": "str", + "uid": "e6029afb-4598-b3fb-d0a1-2cfd5ea0178b", + "default": "", + "required": true, + "multi": false, + "parameter_type": "textarea", + "variable": "documents", + "parameter": false, + "order": 1, + "description": "System.Object" + } + }, + { + "pk": 112, + "model": "workflows.abstractinput", + "fields": { + "widget": 42, + "name": "Tokenizer", + "short_name": "tkn", + "uid": "df8d5b1f-200e-86dd-76ea-cdc3a65e2b02", + "default": "", + "required": true, + "multi": false, + "parameter_type": null, + "variable": "tokenizer", + "parameter": false, + "order": 2, + "description": "Latino.TextMining.ITokenizer" + } + }, + { + "pk": 113, + "model": "workflows.abstractinput", + "fields": { + "widget": 42, + "name": "Stemmer or Lemmatizer (Tagger)", + "short_name": "tgr", + "uid": "4cfdfa8f-e335-791d-72f0-f4f51666450d", + "default": "", + "required": true, + "multi": false, + "parameter_type": null, + "variable": "stemmer", + "parameter": false, + "order": 3, + "description": "Latino.TextMining.IStemmer" + } + }, + { + "pk": 114, + "model": "workflows.abstractinput", + "fields": { + "widget": 42, + "name": "Stopwords (Array of Stopwords)", + "short_name": "str", + "uid": "b183ddfb-9854-a13d-58e5-4943bb461192", + "default": "", + "required": false, + "multi": false, + "parameter_type": "textarea", + "variable": "stopwords", + "parameter": false, + "order": 4, + "description": "System.Collections.Generic.List`1[[System.String, mscorlib, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089]]" + } + }, + { + "pk": 115, + "model": "workflows.abstractinput", + "fields": { + "widget": 42, + "name": "Maximum N-Gram Length", + "short_name": "int", + "uid": "9f7a8787-5d03-122a-97a3-540ef7e2ce9d", + "default": "2", + "required": true, + "multi": false, + "parameter_type": "text", + "variable": "maxNGramLen", + "parameter": true, + "order": 5, + "description": "System.Int32" + } + }, + { + "pk": 116, + "model": "workflows.abstractinput", + "fields": { + "widget": 42, + "name": "Minimum Word Freqency", + "short_name": "dbl", + "uid": "10e4ef28-62b4-c59e-2f6c-7188bfac4a17", + "default": "5", + "required": true, + "multi": false, + "parameter_type": "text", + "variable": "minWordFreq", + "parameter": true, + "order": 6, + "description": "System.Int32" + } + }, + { + "pk": 117, + "model": "workflows.abstractinput", + "fields": { + "widget": 42, + "name": "Word Weighting Type", + "short_name": "wwt", + "uid": "76121091-e83a-ec17-bdf2-cd239e7c8b5d", + "default": "TfIdf", + "required": true, + "multi": false, + "parameter_type": "select", + "variable": "wordWeightType", + "parameter": true, + "order": 7, + "description": "Latino.TextMining.WordWeightType" + } + }, + { + "pk": 53, + "model": "workflows.abstractoption", + "fields": { + "uid": "25f63cf8-7cdf-fae2-f661-c69cfb7ec932", + "abstract_input": 117, + "value": "TermFreq", + "name": "Term Freq" + } + }, + { + "pk": 54, + "model": "workflows.abstractoption", + "fields": { + "uid": "699fb42d-3593-80ad-0116-b4401750e866", + "abstract_input": 117, + "value": "TfIdf", + "name": "Tf Idf" + } + }, + { + "pk": 55, + "model": "workflows.abstractoption", + "fields": { + "uid": "6e6fc78e-809b-3f9a-39ec-ae31d1db030f", + "abstract_input": 117, + "value": "LogDfTfIdf", + "name": "Log Df Tf Idf" + } + }, + { + "pk": 56, + "model": "workflows.abstractoption", + "fields": { + "uid": "718c39f4-26d5-b154-0b62-e1a5247f22ca", + "abstract_input": 117, + "value": "Dyakonov", + "name": "Dyakonov" + } + }, + { + "pk": 118, + "model": "workflows.abstractinput", + "fields": { + "widget": 42, + "name": "Cut Low Weights Percentage", + "short_name": "dbl", + "uid": "0160cac5-a82c-a2ea-d8ec-66e3a76b2a02", + "default": "0.2", + "required": true, + "multi": false, + "parameter_type": "text", + "variable": "cutLowWeightsPerc", + "parameter": true, + "order": 8, + "description": "System.Double" + } + }, + { + "pk": 119, + "model": "workflows.abstractinput", + "fields": { + "widget": 42, + "name": "Normalize Vectors", + "short_name": "bol", + "uid": "58e02ee9-6308-6ad8-49aa-dce004c8175f", + "default": "true", + "required": true, + "multi": false, + "parameter_type": "checkbox", + "variable": "normalizeVectors", + "parameter": true, + "order": 9, + "description": "System.Boolean" + } + }, + { + "pk": 43, + "model": "workflows.abstractoutput", + "fields": { + "widget": 42, + "name": "Bag of Words Space", + "short_name": "bow", + "variable": "bow", + "uid": "6353aa61-77fd-189f-a260-14a8a4195839", + "order": 1, + "description": "" + } + }, + { + "pk": 44, + "model": "workflows.abstractoutput", + "fields": { + "widget": 42, + "name": "Dataset", + "short_name": "ds", + "variable": "ds", + "uid": "03e3ee38-d0a6-a503-1bf1-67dc3b950de4", + "order": 2, + "description": "" + } + }, + { + "pk": 46, + "model": "workflows.abstractwidget", + "fields": { + "category": 10, + "treeview_image": null, "name": "Process New Documents (Text)", "is_streaming": false, "uid": "f9e2ef30-ec02-49af-8616-94d4cb13404c", @@ -5014,6 +4615,18 @@ "description": "" } }, + { + "pk": 11, + "model": "workflows.category", + "fields": { + "uid": "a8348101-116c-4e04-b2a7-af1c991c3927", + "parent": 1, + "workflow": null, + "user": null, + "order": 6, + "name": "Dataset" + } + }, { "pk": 47, "model": "workflows.abstractwidget", @@ -5410,6 +5023,18 @@ "description": "" } }, + { + "pk": 12, + "model": "workflows.category", + "fields": { + "uid": "c7a26cb0-65af-4c6e-887b-7c6a7a162327", + "parent": 1, + "workflow": null, + "user": null, + "order": 7, + "name": "Similarity Matrix" + } + }, { "pk": 52, "model": "workflows.abstractwidget", @@ -5576,6 +5201,18 @@ "description": "" } }, + { + "pk": 13, + "model": "workflows.category", + "fields": { + "uid": "74457dca-9fba-4d66-98dd-633df385a22b", + "parent": 1, + "workflow": null, + "user": null, + "order": 8, + "name": "Clustering" + } + }, { "pk": 54, "model": "workflows.abstractwidget", @@ -6124,6 +5761,18 @@ "description": "System.Object" } }, + { + "pk": 14, + "model": "workflows.category", + "fields": { + "uid": "c8fd491e-75ad-487d-b42f-b5ca07ec045a", + "parent": 1, + "workflow": null, + "user": null, + "order": 9, + "name": "Classification" + } + }, { "pk": 60, "model": "workflows.abstractwidget", @@ -7018,125 +6667,355 @@ } }, { - "pk": 72, + "pk": 72, + "model": "workflows.abstractoutput", + "fields": { + "widget": 67, + "name": "Classifier", + "short_name": "csf", + "variable": "classifier", + "uid": "5d3ae75f-7fc8-ab56-8eab-3a4b1fa0a20c", + "order": 1, + "description": "" + } + }, + { + "pk": 68, + "model": "workflows.abstractwidget", + "fields": { + "category": 14, + "treeview_image": null, + "name": "Knn Fast Classifier", + "is_streaming": false, + "uid": "78f39d45-04bd-4b6a-9fd9-bf22f2a78223", + "interaction_view": "", + "image": null, + "package": "latino", + "static_image": "classifier_knn_fast_image.png", + "post_interact_action": "", + "user": null, + "visualization_view": "", + "action": "latino_construct_knn_classifier_fast", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 9, + "description": "Automatically generated widget from function ConstructKnnClassifierFast in package latino. The original function signature: ConstructKnnClassifierFast." + } + }, + { + "pk": 196, + "model": "workflows.abstractinput", + "fields": { + "widget": 68, + "name": "K (Neighbourhood)", + "short_name": "int", + "uid": "50732a27-8321-b266-db28-85cba65c7f21", + "default": "10", + "required": true, + "multi": false, + "parameter_type": "text", + "variable": "k", + "parameter": true, + "order": 1, + "description": "System.Int32" + } + }, + { + "pk": 197, + "model": "workflows.abstractinput", + "fields": { + "widget": 68, + "name": "Soft Voting", + "short_name": "bol", + "uid": "70286a7a-bb2c-f5d7-9ffc-e838a3c880c1", + "default": "true", + "required": true, + "multi": false, + "parameter_type": "checkbox", + "variable": "softVoting", + "parameter": true, + "order": 2, + "description": "System.Boolean" + } + }, + { + "pk": 73, + "model": "workflows.abstractoutput", + "fields": { + "widget": 68, + "name": "Classifier", + "short_name": "csf", + "variable": "classifier", + "uid": "efcbe2f3-a6c4-032f-8c21-513b5470bd20", + "order": 1, + "description": "" + } + }, + { + "pk": 69, + "model": "workflows.abstractwidget", + "fields": { + "category": 14, + "treeview_image": null, + "name": "Train Classifier Hub", + "is_streaming": false, + "uid": "99eb79a4-348c-411a-9bba-a42caf048636", + "interaction_view": "", + "image": null, + "package": "latino", + "static_image": "classifier_train_image.png", + "post_interact_action": "", + "user": null, + "visualization_view": "", + "action": "latino_train_classifier", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 15, + "description": "Automatically generated widget from function TrainClassifier in package latino. The original function signature: TrainClassifier." + } + }, + { + "pk": 198, + "model": "workflows.abstractinput", + "fields": { + "widget": 69, + "name": "Classifier", + "short_name": "csf", + "uid": "e9bac05e-f82f-09cd-497d-4407fcecd056", + "default": "", + "required": true, + "multi": false, + "parameter_type": null, + "variable": "csf", + "parameter": false, + "order": 1, + "description": "Latino.Model.IModel`1[[System.String, mscorlib, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089]]" + } + }, + { + "pk": 199, + "model": "workflows.abstractinput", + "fields": { + "widget": 69, + "name": "Dataset", + "short_name": "ds", + "uid": "4df20aa1-64f1-11c1-bd8c-b9c64e5c0437", + "default": "", + "required": true, + "multi": false, + "parameter_type": null, + "variable": "ds", + "parameter": false, + "order": 2, + "description": "Latino.Model.LabeledDataset`2[[System.String, mscorlib, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089],[Latino.SparseVector`1[[System.Double, mscorlib, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089]], Latino, Version=1.0.0.0, Culture=neutral, PublicKeyToken=null]]" + } + }, + { + "pk": 74, + "model": "workflows.abstractoutput", + "fields": { + "widget": 69, + "name": "Classifier", + "short_name": "csf", + "variable": "csf", + "uid": "fcf15ec3-2176-5d23-4c0a-ebe19faf2a92", + "order": 1, + "description": "" + } + }, + { + "pk": 70, + "model": "workflows.abstractwidget", + "fields": { + "category": 14, + "treeview_image": null, + "name": "Predict Classification", + "is_streaming": false, + "uid": "a92685c6-aeca-42bd-af49-c815ebafb573", + "interaction_view": "", + "image": null, + "package": "latino", + "static_image": "classification_predict_image.png", + "post_interact_action": "", + "user": null, + "visualization_view": "", + "action": "latino_predict_classification", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 13, + "description": "Automatically generated widget from function PredictClassification in package latino. The original function signature: PredictClassification." + } + }, + { + "pk": 200, + "model": "workflows.abstractinput", + "fields": { + "widget": 70, + "name": "Classifier", + "short_name": "csf", + "uid": "d48016c2-edb0-265e-c40a-858745ab012b", + "default": "", + "required": true, + "multi": false, + "parameter_type": null, + "variable": "csf", + "parameter": false, + "order": 1, + "description": "Latino.Model.IModel`1[[System.String, mscorlib, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089]]" + } + }, + { + "pk": 201, + "model": "workflows.abstractinput", + "fields": { + "widget": 70, + "name": "Dataset", + "short_name": "ds", + "uid": "6bfd0b18-b660-feb8-8894-492da5fb7ac9", + "default": "", + "required": true, + "multi": false, + "parameter_type": null, + "variable": "ds", + "parameter": false, + "order": 2, + "description": "Latino.Model.LabeledDataset`2[[System.String, mscorlib, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089],[Latino.SparseVector`1[[System.Double, mscorlib, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089]], Latino, Version=1.0.0.0, Culture=neutral, PublicKeyToken=null]]" + } + }, + { + "pk": 75, "model": "workflows.abstractoutput", "fields": { - "widget": 67, - "name": "Classifier", - "short_name": "csf", - "variable": "classifier", - "uid": "5d3ae75f-7fc8-ab56-8eab-3a4b1fa0a20c", + "widget": 70, + "name": "Prediction(s)", + "short_name": "prd", + "variable": "predictions", + "uid": "efaba4b6-3b66-e7b9-f361-866c6104c3ed", "order": 1, "description": "" } }, { - "pk": 68, + "pk": 76, + "model": "workflows.abstractoutput", + "fields": { + "widget": 70, + "name": "Labeled dataset", + "short_name": "ds", + "variable": "ds", + "uid": "3a72cfc4-179d-8db9-8086-6017f697e3b8", + "order": 2, + "description": "" + } + }, + { + "pk": 71, "model": "workflows.abstractwidget", "fields": { "category": 14, "treeview_image": null, - "name": "Knn Fast Classifier", + "name": "Prediction Info", "is_streaming": false, - "uid": "78f39d45-04bd-4b6a-9fd9-bf22f2a78223", + "uid": "04813a15-93d0-48ca-9c64-0bb6a7bc92b4", "interaction_view": "", "image": null, "package": "latino", - "static_image": "classifier_knn_fast_image.png", + "static_image": "clasification_info_image.png", "post_interact_action": "", "user": null, "visualization_view": "", - "action": "latino_construct_knn_classifier_fast", + "action": "latino_prediction_info", "wsdl_method": "", "wsdl": "", "interactive": false, "has_progress_bar": false, - "order": 9, - "description": "Automatically generated widget from function ConstructKnnClassifierFast in package latino. The original function signature: ConstructKnnClassifierFast." + "order": 14, + "description": "Automatically generated widget from function PredictionInfo in package latino. The original function signature: PredictionInfo." } }, { - "pk": 196, + "pk": 202, "model": "workflows.abstractinput", "fields": { - "widget": 68, - "name": "K (Neighbourhood)", - "short_name": "int", - "uid": "50732a27-8321-b266-db28-85cba65c7f21", - "default": "10", + "widget": 71, + "name": "Prediction(s)", + "short_name": "prd", + "uid": "4196036a-5c53-8452-8ccc-325f1ac734d6", + "default": "", "required": true, "multi": false, - "parameter_type": "text", - "variable": "k", - "parameter": true, + "parameter_type": null, + "variable": "predictions", + "parameter": false, "order": 1, - "description": "System.Int32" + "description": "System.Collections.Generic.List`1[[Latino.Model.Prediction`1[[System.String, mscorlib, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089]], Latino, Version=1.0.0.0, Culture=neutral, PublicKeyToken=null]]" } }, { - "pk": 197, - "model": "workflows.abstractinput", + "pk": 77, + "model": "workflows.abstractoutput", "fields": { - "widget": 68, - "name": "Soft Voting", - "short_name": "bol", - "uid": "70286a7a-bb2c-f5d7-9ffc-e838a3c880c1", - "default": "true", - "required": true, - "multi": false, - "parameter_type": "checkbox", - "variable": "softVoting", - "parameter": true, - "order": 2, - "description": "System.Boolean" + "widget": 71, + "name": "Lable(s) (Array of Strings)", + "short_name": "str", + "variable": "labels", + "uid": "776fb044-5dfc-18a2-cf20-4e817796f204", + "order": 1, + "description": "" } }, { - "pk": 73, + "pk": 78, "model": "workflows.abstractoutput", "fields": { - "widget": 68, - "name": "Classifier", - "short_name": "csf", - "variable": "classifier", - "uid": "efcbe2f3-a6c4-032f-8c21-513b5470bd20", - "order": 1, + "widget": 71, + "name": "Prediction Info(s)", + "short_name": "obj", + "variable": "predictInfos", + "uid": "8653f42a-e09b-5eaf-795f-587cc48bfc21", + "order": 2, "description": "" } }, { - "pk": 69, + "pk": 72, "model": "workflows.abstractwidget", "fields": { "category": 14, "treeview_image": null, - "name": "Train Classifier Hub", + "name": "Cross Validation", "is_streaming": false, - "uid": "99eb79a4-348c-411a-9bba-a42caf048636", + "uid": "c42c757c-245e-4eb2-a657-e77fcc4f4802", "interaction_view": "", "image": null, "package": "latino", - "static_image": "classifier_train_image.png", + "static_image": "classif_cross_valid_image.png", "post_interact_action": "", "user": null, "visualization_view": "", - "action": "latino_train_classifier", + "action": "latino_cross_validation", "wsdl_method": "", "wsdl": "", "interactive": false, "has_progress_bar": false, - "order": 15, - "description": "Automatically generated widget from function TrainClassifier in package latino. The original function signature: TrainClassifier." + "order": 11, + "description": "Automatically generated widget from function CrossValidation in package latino. The original function signature: CrossValidation." } }, { - "pk": 198, + "pk": 203, "model": "workflows.abstractinput", "fields": { - "widget": 69, + "widget": 72, "name": "Classifier", "short_name": "csf", - "uid": "e9bac05e-f82f-09cd-497d-4407fcecd056", + "uid": "06a3b7c8-681f-ab02-f171-75362b9ea5a0", "default": "", "required": true, "multi": false, @@ -7148,13 +7027,13 @@ } }, { - "pk": 199, + "pk": 204, "model": "workflows.abstractinput", "fields": { - "widget": 69, + "widget": 72, "name": "Dataset", "short_name": "ds", - "uid": "4df20aa1-64f1-11c1-bd8c-b9c64e5c0437", + "uid": "eeab1227-dfa8-31df-b27b-2a27efeda6b0", "default": "", "required": true, "multi": false, @@ -7166,51 +7045,123 @@ } }, { - "pk": 74, + "pk": 205, + "model": "workflows.abstractinput", + "fields": { + "widget": 72, + "name": "Num of Sets", + "short_name": "int", + "uid": "21ea2877-e44d-c8db-0612-b9bce6e62f36", + "default": "10", + "required": true, + "multi": false, + "parameter_type": "text", + "variable": "numOfSets", + "parameter": true, + "order": 3, + "description": "System.Int32" + } + }, + { + "pk": 206, + "model": "workflows.abstractinput", + "fields": { + "widget": 72, + "name": "Assign Sets Randomly", + "short_name": "bol", + "uid": "f7bb77e7-327e-5794-f330-186d669d6b1b", + "default": "true", + "required": true, + "multi": false, + "parameter_type": "checkbox", + "variable": "random", + "parameter": true, + "order": 4, + "description": "System.Boolean" + } + }, + { + "pk": 207, + "model": "workflows.abstractinput", + "fields": { + "widget": 72, + "name": "Use Seed for Random", + "short_name": "bol", + "uid": "bdc7e5b8-00f1-d7bd-6927-fc91856a280a", + "default": "false", + "required": true, + "multi": false, + "parameter_type": "checkbox", + "variable": "useSeed", + "parameter": true, + "order": 5, + "description": "System.Boolean" + } + }, + { + "pk": 208, + "model": "workflows.abstractinput", + "fields": { + "widget": 72, + "name": "Random Seed", + "short_name": "int", + "uid": "15d0d98c-b25b-e7fc-19f1-d547a0dddd14", + "default": "0", + "required": true, + "multi": false, + "parameter_type": "text", + "variable": "randomSeed", + "parameter": true, + "order": 6, + "description": "System.Int32" + } + }, + { + "pk": 79, "model": "workflows.abstractoutput", "fields": { - "widget": 69, - "name": "Classifier", - "short_name": "csf", - "variable": "csf", - "uid": "fcf15ec3-2176-5d23-4c0a-ebe19faf2a92", + "widget": 72, + "name": "Data Object with results", + "short_name": "obj", + "variable": "obj", + "uid": "43d23d99-8ac4-5b3e-c89c-43d63db3920e", "order": 1, "description": "" } }, { - "pk": 70, + "pk": 73, "model": "workflows.abstractwidget", "fields": { "category": 14, "treeview_image": null, - "name": "Predict Classification", + "name": "Cross Validation (Predefined Splits)", "is_streaming": false, - "uid": "a92685c6-aeca-42bd-af49-c815ebafb573", + "uid": "72b20813-e1a8-4c2d-a448-b7337ecc2a08", "interaction_view": "", "image": null, "package": "latino", - "static_image": "classification_predict_image.png", + "static_image": "classif_cross_valid_predef_image.png", "post_interact_action": "", "user": null, "visualization_view": "", - "action": "latino_predict_classification", + "action": "latino_cross_validation_predef_splits", "wsdl_method": "", "wsdl": "", "interactive": false, "has_progress_bar": false, - "order": 13, - "description": "Automatically generated widget from function PredictClassification in package latino. The original function signature: PredictClassification." + "order": 12, + "description": "Automatically generated widget from function CrossValidationPredefSplits in package latino. The original function signature: CrossValidationPredefSplits." } }, { - "pk": 200, + "pk": 209, "model": "workflows.abstractinput", "fields": { - "widget": 70, + "widget": 73, "name": "Classifier", "short_name": "csf", - "uid": "d48016c2-edb0-265e-c40a-858745ab012b", + "uid": "4178be10-5a31-bbfd-dafc-51470e1458bb", "default": "", "required": true, "multi": false, @@ -7222,13 +7173,13 @@ } }, { - "pk": 201, + "pk": 210, "model": "workflows.abstractinput", "fields": { - "widget": 70, + "widget": 73, "name": "Dataset", "short_name": "ds", - "uid": "6bfd0b18-b660-feb8-8894-492da5fb7ac9", + "uid": "657a8f1f-7958-d479-a062-428ea6250620", "default": "", "required": true, "multi": false, @@ -7240,64 +7191,69 @@ } }, { - "pk": 75, - "model": "workflows.abstractoutput", + "pk": 211, + "model": "workflows.abstractinput", "fields": { - "widget": 70, - "name": "Prediction(s)", - "short_name": "prd", - "variable": "predictions", - "uid": "efaba4b6-3b66-e7b9-f361-866c6104c3ed", - "order": 1, - "description": "" + "widget": 73, + "name": "Sets (List with predefined set numbers)", + "short_name": "lst", + "uid": "f51e4a03-3fc8-f5b2-2555-5f07920a4ddb", + "default": "", + "required": true, + "multi": false, + "parameter_type": null, + "variable": "sets", + "parameter": false, + "order": 3, + "description": "System.Collections.Generic.List`1[[System.Int32, mscorlib, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089]]" } }, { - "pk": 76, + "pk": 80, "model": "workflows.abstractoutput", "fields": { - "widget": 70, - "name": "Labeled dataset", - "short_name": "ds", - "variable": "ds", - "uid": "3a72cfc4-179d-8db9-8086-6017f697e3b8", - "order": 2, + "widget": 73, + "name": "Data Object with results", + "short_name": "obj", + "variable": "obj", + "uid": "70558c82-fa96-7f12-c8e4-f5c1297e2cfe", + "order": 1, "description": "" } }, { - "pk": 71, + "pk": 74, "model": "workflows.abstractwidget", "fields": { "category": 14, "treeview_image": null, - "name": "Prediction Info", + "name": "View Classifications", "is_streaming": false, - "uid": "04813a15-93d0-48ca-9c64-0bb6a7bc92b4", + "uid": "9899c08b-df30-429a-83ab-fc83ecf0d26a", "interaction_view": "", "image": null, "package": "latino", - "static_image": "clasification_info_image.png", + "static_image": "classif_result_view_image.png", "post_interact_action": "", "user": null, - "visualization_view": "", - "action": "latino_prediction_info", + "visualization_view": "show_classifications", + "action": "show_classifications", "wsdl_method": "", "wsdl": "", "interactive": false, "has_progress_bar": false, - "order": 14, - "description": "Automatically generated widget from function PredictionInfo in package latino. The original function signature: PredictionInfo." + "order": 16, + "description": "Automatically generated widget from function ViewClasssifications_PYTHON in package latino. The original function signature: ViewClasssifications_PYTHON." } }, { - "pk": 202, + "pk": 212, "model": "workflows.abstractinput", "fields": { - "widget": 71, + "widget": 74, "name": "Prediction(s)", "short_name": "prd", - "uid": "4196036a-5c53-8452-8ccc-325f1ac734d6", + "uid": "1f322a3d-2653-00f7-557e-f13bd0f9a118", "default": "", "required": true, "multi": false, @@ -7305,400 +7261,444 @@ "variable": "predictions", "parameter": false, "order": 1, - "description": "System.Collections.Generic.List`1[[Latino.Model.Prediction`1[[System.String, mscorlib, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089]], Latino, Version=1.0.0.0, Culture=neutral, PublicKeyToken=null]]" - } - }, - { - "pk": 77, - "model": "workflows.abstractoutput", - "fields": { - "widget": 71, - "name": "Lable(s) (Array of Strings)", - "short_name": "str", - "variable": "labels", - "uid": "776fb044-5dfc-18a2-cf20-4e817796f204", - "order": 1, - "description": "" - } - }, - { - "pk": 78, - "model": "workflows.abstractoutput", - "fields": { - "widget": 71, - "name": "Prediction Info(s)", - "short_name": "obj", - "variable": "predictInfos", - "uid": "8653f42a-e09b-5eaf-795f-587cc48bfc21", - "order": 2, - "description": "" + "description": "System.Object" } }, { - "pk": 72, + "pk": 75, "model": "workflows.abstractwidget", "fields": { "category": 14, "treeview_image": null, - "name": "Cross Validation", + "name": "Accuracy Claculation", "is_streaming": false, - "uid": "c42c757c-245e-4eb2-a657-e77fcc4f4802", + "uid": "990b6a22-0d62-431a-aea8-03d3a934f4f2", "interaction_view": "", "image": null, "package": "latino", - "static_image": "classif_cross_valid_image.png", + "static_image": "accuracy_calc_from_labels_image.png", "post_interact_action": "", "user": null, "visualization_view": "", - "action": "latino_cross_validation", + "action": "compare_lists", "wsdl_method": "", "wsdl": "", "interactive": false, "has_progress_bar": false, - "order": 11, - "description": "Automatically generated widget from function CrossValidation in package latino. The original function signature: CrossValidation." + "order": 10, + "description": "Automatically generated widget from function AccuracyClaculation_PYTHON in package latino. The original function signature: AccuracyClaculation_PYTHON." } }, { - "pk": 203, + "pk": 213, "model": "workflows.abstractinput", "fields": { - "widget": 72, - "name": "Classifier", - "short_name": "csf", - "uid": "06a3b7c8-681f-ab02-f171-75362b9ea5a0", + "widget": 75, + "name": "List 1", + "short_name": "lst", + "uid": "4337f7d3-5113-be91-9dcd-7f23d955ca3e", "default": "", "required": true, "multi": false, "parameter_type": null, - "variable": "csf", + "variable": "list1", "parameter": false, "order": 1, - "description": "Latino.Model.IModel`1[[System.String, mscorlib, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089]]" + "description": "System.Object" } }, { - "pk": 204, + "pk": 214, "model": "workflows.abstractinput", "fields": { - "widget": 72, - "name": "Dataset", - "short_name": "ds", - "uid": "eeab1227-dfa8-31df-b27b-2a27efeda6b0", + "widget": 75, + "name": "List 2", + "short_name": "lst", + "uid": "3865d2f0-cf4f-cacc-78dc-77a6b0071e45", "default": "", "required": true, "multi": false, "parameter_type": null, - "variable": "ds", + "variable": "list2", "parameter": false, "order": 2, - "description": "Latino.Model.LabeledDataset`2[[System.String, mscorlib, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089],[Latino.SparseVector`1[[System.Double, mscorlib, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089]], Latino, Version=1.0.0.0, Culture=neutral, PublicKeyToken=null]]" + "description": "System.Object" } }, { - "pk": 205, - "model": "workflows.abstractinput", + "pk": 81, + "model": "workflows.abstractoutput", "fields": { - "widget": 72, - "name": "Num of Sets", - "short_name": "int", - "uid": "21ea2877-e44d-c8db-0612-b9bce6e62f36", - "default": "10", - "required": true, - "multi": false, - "parameter_type": "text", - "variable": "numOfSets", - "parameter": true, - "order": 3, - "description": "System.Int32" + "widget": 75, + "name": "Accuracy", + "short_name": "dbl", + "variable": "accuracy", + "uid": "0d48ff94-0a42-3bfd-5b92-14c8d96a96e8", + "order": 1, + "description": "" } }, { - "pk": 206, - "model": "workflows.abstractinput", + "pk": 82, + "model": "workflows.abstractoutput", "fields": { - "widget": 72, - "name": "Assign Sets Randomly", - "short_name": "bol", - "uid": "f7bb77e7-327e-5794-f330-186d669d6b1b", - "default": "true", - "required": true, - "multi": false, - "parameter_type": "checkbox", - "variable": "random", - "parameter": true, + "widget": 75, + "name": "Statistics", + "short_name": "obj", + "variable": "statistics", + "uid": "4b9c7c60-639b-3ca5-682b-43b367f67cc3", + "order": 2, + "description": "" + } + }, + { + "pk": 15, + "model": "workflows.category", + "fields": { + "uid": "438cb7e7-d0e5-4bb9-9cad-10f6907ec568", + "parent": 1, + "workflow": null, + "user": null, + "order": 10, + "name": "Helpers" + } + }, + { + "pk": 3, + "model": "workflows.abstractwidget", + "fields": { + "category": 15, + "treeview_image": null, + "name": "Flatten String Hierarchy", + "is_streaming": false, + "uid": "1d9f109e-8490-4c98-8957-4b0f698ce1bd", + "interaction_view": "", + "image": null, + "package": "latino", + "static_image": "flatten_string_hierarchy_image.png", + "post_interact_action": "", + "user": null, + "visualization_view": "", + "action": "latino_flatten_object_to_string_array", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, "order": 4, - "description": "System.Boolean" + "description": "Automatically generated widget from function FlattenObjectToStringArray in package latino. The original function signature: FlattenObjectToStringArray." } }, { - "pk": 207, + "pk": 7, "model": "workflows.abstractinput", "fields": { - "widget": 72, - "name": "Use Seed for Random", - "short_name": "bol", - "uid": "bdc7e5b8-00f1-d7bd-6927-fc91856a280a", - "default": "false", + "widget": 3, + "name": "data", + "short_name": "obj", + "uid": "26dc986a-e8c4-ee4b-ac32-5ce895f3383a", + "default": "", "required": true, "multi": false, - "parameter_type": "checkbox", - "variable": "useSeed", - "parameter": true, - "order": 5, - "description": "System.Boolean" + "parameter_type": null, + "variable": "data", + "parameter": false, + "order": 1, + "description": "System.Object" } }, { - "pk": 208, - "model": "workflows.abstractinput", + "pk": 3, + "model": "workflows.abstractoutput", "fields": { - "widget": 72, - "name": "Random Seed", - "short_name": "int", - "uid": "15d0d98c-b25b-e7fc-19f1-d547a0dddd14", - "default": "0", - "required": true, - "multi": false, - "parameter_type": "text", - "variable": "randomSeed", - "parameter": true, - "order": 6, - "description": "System.Int32" + "widget": 3, + "name": "flatData", + "short_name": "obj", + "variable": "flatData", + "uid": "a16d6a3b-e656-9b50-10c8-8359a5174193", + "order": 1, + "description": "" + } + }, + { + "pk": 4, + "model": "workflows.abstractwidget", + "fields": { + "category": 15, + "treeview_image": null, + "name": "Display Table", + "is_streaming": false, + "uid": "bb74ec59-dba8-461b-ae66-de35a5c3fdea", + "interaction_view": "", + "image": null, + "package": "latino", + "static_image": "table_view_image.png", + "post_interact_action": "", + "user": null, + "visualization_view": "show_table", + "action": "show_table", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 3, + "description": "Automatically generated widget from function ShowTable_PYTHON in package latino. The original function signature: ShowTable_PYTHON." } }, { - "pk": 79, - "model": "workflows.abstractoutput", + "pk": 8, + "model": "workflows.abstractinput", "fields": { - "widget": 72, - "name": "Data Object with results", - "short_name": "obj", - "variable": "obj", - "uid": "43d23d99-8ac4-5b3e-c89c-43d63db3920e", + "widget": 4, + "name": "Table", + "short_name": "tbl", + "uid": "5800c1e5-44ec-5843-e9cf-fde725c7b521", + "default": "", + "required": true, + "multi": false, + "parameter_type": null, + "variable": "tbl", + "parameter": false, "order": 1, - "description": "" + "description": "System.Object" } }, { - "pk": 73, + "pk": 5, "model": "workflows.abstractwidget", "fields": { - "category": 14, + "category": 15, "treeview_image": null, - "name": "Cross Validation (Predefined Splits)", + "name": "Generate Integer Range", "is_streaming": false, - "uid": "72b20813-e1a8-4c2d-a448-b7337ecc2a08", + "uid": "f7d78342-9aea-4e01-b46b-8d20631ee5bf", "interaction_view": "", "image": null, "package": "latino", - "static_image": "classif_cross_valid_predef_image.png", + "static_image": "range_create_integers_image.png", "post_interact_action": "", "user": null, "visualization_view": "", - "action": "latino_cross_validation_predef_splits", + "action": "create_range", "wsdl_method": "", "wsdl": "", "interactive": false, "has_progress_bar": false, - "order": 12, - "description": "Automatically generated widget from function CrossValidationPredefSplits in package latino. The original function signature: CrossValidationPredefSplits." + "order": 5, + "description": "Automatically generated widget from function GenerateIntegerRange_PYTHON in package latino. The original function signature: GenerateIntegerRange_PYTHON." } }, { - "pk": 209, + "pk": 9, "model": "workflows.abstractinput", "fields": { - "widget": 73, - "name": "Classifier", - "short_name": "csf", - "uid": "4178be10-5a31-bbfd-dafc-51470e1458bb", - "default": "", + "widget": 5, + "name": "Start", + "short_name": "int", + "uid": "4c165f6b-7698-65d2-f8fb-2d2cf104ba47", + "default": "0", "required": true, "multi": false, - "parameter_type": null, - "variable": "csf", - "parameter": false, + "parameter_type": "text", + "variable": "start", + "parameter": true, "order": 1, - "description": "Latino.Model.IModel`1[[System.String, mscorlib, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089]]" + "description": "System.Int32" } }, { - "pk": 210, + "pk": 10, "model": "workflows.abstractinput", "fields": { - "widget": 73, - "name": "Dataset", - "short_name": "ds", - "uid": "657a8f1f-7958-d479-a062-428ea6250620", - "default": "", + "widget": 5, + "name": "Stop", + "short_name": "int", + "uid": "c707764e-6ee4-8318-565e-3c4aab14cc45", + "default": "10", "required": true, "multi": false, - "parameter_type": null, - "variable": "ds", - "parameter": false, + "parameter_type": "text", + "variable": "stop", + "parameter": true, "order": 2, - "description": "Latino.Model.LabeledDataset`2[[System.String, mscorlib, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089],[Latino.SparseVector`1[[System.Double, mscorlib, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089]], Latino, Version=1.0.0.0, Culture=neutral, PublicKeyToken=null]]" + "description": "System.Int32" } }, { - "pk": 211, + "pk": 11, "model": "workflows.abstractinput", "fields": { - "widget": 73, - "name": "Sets (List with predefined set numbers)", - "short_name": "lst", - "uid": "f51e4a03-3fc8-f5b2-2555-5f07920a4ddb", - "default": "", + "widget": 5, + "name": "Step", + "short_name": "int", + "uid": "64dbf120-d027-0a31-a88d-e78bd15fb0c1", + "default": "1", "required": true, "multi": false, - "parameter_type": null, - "variable": "sets", - "parameter": false, + "parameter_type": "text", + "variable": "step", + "parameter": true, "order": 3, - "description": "System.Collections.Generic.List`1[[System.Int32, mscorlib, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089]]" + "description": "System.Int32" } }, { - "pk": 80, + "pk": 4, "model": "workflows.abstractoutput", "fields": { - "widget": 73, - "name": "Data Object with results", - "short_name": "obj", - "variable": "obj", - "uid": "70558c82-fa96-7f12-c8e4-f5c1297e2cfe", + "widget": 5, + "name": "Range", + "short_name": "ary", + "variable": "range", + "uid": "8eb96f55-f038-e517-5e11-8dbda6295b0f", "order": 1, "description": "" } }, { - "pk": 74, + "pk": 6, "model": "workflows.abstractwidget", "fields": { - "category": 14, + "category": 15, "treeview_image": null, - "name": "View Classifications", + "name": "Python Snippet", "is_streaming": false, - "uid": "9899c08b-df30-429a-83ab-fc83ecf0d26a", + "uid": "b6164644-ff14-4f8f-bdee-ca55da77b57c", "interaction_view": "", "image": null, "package": "latino", - "static_image": "classif_result_view_image.png", + "static_image": "python_snippet_image.png", "post_interact_action": "", "user": null, - "visualization_view": "show_classifications", - "action": "show_classifications", + "visualization_view": "", + "action": "python_snippet", "wsdl_method": "", "wsdl": "", "interactive": false, "has_progress_bar": false, - "order": 16, - "description": "Automatically generated widget from function ViewClasssifications_PYTHON in package latino. The original function signature: ViewClasssifications_PYTHON." + "order": 6, + "description": "Runs python snippet. You can use variable which is provided on the input by the name \"in1\" .. \"inN\". Whatever you want to otput needs to be asigned to the variable \"out1\" before the code is terminated" } }, { - "pk": 212, + "pk": 12, "model": "workflows.abstractinput", "fields": { - "widget": 74, - "name": "Prediction(s)", - "short_name": "prd", - "uid": "1f322a3d-2653-00f7-557e-f13bd0f9a118", + "widget": 6, + "name": "in", + "short_name": "in", + "uid": "16ca773f-1d2d-823f-6e9c-1be0dd267369", "default": "", - "required": true, - "multi": false, + "required": false, + "multi": true, "parameter_type": null, - "variable": "predictions", + "variable": "in", "parameter": false, "order": 1, - "description": "System.Object" + "description": "input can be accesed as variable \"in1\" .. \"inN\" inside the code" } }, { - "pk": 75, + "pk": 13, + "model": "workflows.abstractinput", + "fields": { + "widget": 6, + "name": "Python Snippet Code", + "short_name": "py", + "uid": "1991f19a-463b-d851-aab6-72facc87a2f1", + "default": "# This is the Python Code Snippet where you can modify the data however is needed.\n# Varaible \"in1\" .. \"inN\" contains whatever you connected to the input port\n# Whatever is assigned to the variable \"out1\" will be transfered to the output port.\n\nout1 = in1", + "required": true, + "multi": false, + "parameter_type": "textarea", + "variable": "pycode", + "parameter": true, + "order": 2, + "description": "Input can be accesed as variable \"in1\" .. \"inN\" inside the code and output can be accesed/assigned as variable \"out1\" inside the code." + } + }, + { + "pk": 5, + "model": "workflows.abstractoutput", + "fields": { + "widget": 6, + "name": "out", + "short_name": "out", + "variable": "out", + "uid": "455b1583-c2d4-13c5-1036-435dfe160152", + "order": 1, + "description": "output can be accesed/assigned as variable \"out1\" inside the code" + } + }, + { + "pk": 7, "model": "workflows.abstractwidget", "fields": { - "category": 14, + "category": 15, "treeview_image": null, - "name": "Accuracy Claculation", + "name": "Split Object", "is_streaming": false, - "uid": "990b6a22-0d62-431a-aea8-03d3a934f4f2", + "uid": "4ae60fcc-ae3a-4609-9ec1-a7724d5ac0c4", "interaction_view": "", "image": null, "package": "latino", - "static_image": "accuracy_calc_from_labels_image.png", + "static_image": "object_split_image.png", "post_interact_action": "", "user": null, "visualization_view": "", - "action": "compare_lists", + "action": "split_object", "wsdl_method": "", "wsdl": "", "interactive": false, "has_progress_bar": false, - "order": 10, - "description": "Automatically generated widget from function AccuracyClaculation_PYTHON in package latino. The original function signature: AccuracyClaculation_PYTHON." + "order": 7, + "description": "Automatically generated widget from function SplitObject_PYTHON in package latino. The original function signature: SplitObject_PYTHON." } }, { - "pk": 213, + "pk": 14, "model": "workflows.abstractinput", "fields": { - "widget": 75, - "name": "List 1", - "short_name": "lst", - "uid": "4337f7d3-5113-be91-9dcd-7f23d955ca3e", + "widget": 7, + "name": "object", + "short_name": "obj", + "uid": "298ce522-7061-993e-a940-069d9c9b739d", "default": "", "required": true, "multi": false, "parameter_type": null, - "variable": "list1", + "variable": "object", "parameter": false, "order": 1, "description": "System.Object" } }, { - "pk": 214, + "pk": 15, "model": "workflows.abstractinput", "fields": { - "widget": 75, - "name": "List 2", - "short_name": "lst", - "uid": "3865d2f0-cf4f-cacc-78dc-77a6b0071e45", + "widget": 7, + "name": "Object Modifier", + "short_name": "atr", + "uid": "bf822d07-096d-1aeb-e6da-3670d6aa9c53", "default": "", "required": true, "multi": false, - "parameter_type": null, - "variable": "list2", - "parameter": false, + "parameter_type": "text", + "variable": "attribute", + "parameter": true, "order": 2, - "description": "System.Object" - } - }, - { - "pk": 81, - "model": "workflows.abstractoutput", - "fields": { - "widget": 75, - "name": "Accuracy", - "short_name": "dbl", - "variable": "accuracy", - "uid": "0d48ff94-0a42-3bfd-5b92-14c8d96a96e8", - "order": 1, - "description": "" + "description": "if one wants to extract object's attributes, leading dot should be used." } }, { - "pk": 82, + "pk": 6, "model": "workflows.abstractoutput", "fields": { - "widget": 75, - "name": "Statistics", + "widget": 7, + "name": "object", "short_name": "obj", - "variable": "statistics", - "uid": "4b9c7c60-639b-3ca5-682b-43b367f67cc3", - "order": 2, + "variable": "object", + "uid": "6817c1ee-8e0f-8e63-f4d6-7c43522b7f10", + "order": 1, "description": "" } }, diff --git a/workflows/library.py b/workflows/library.py index 77a3365de717f531d770bf6622031600ea3ce89b..8b26598b0b39ce66c0d01865eb7502809e3fbae1 100755 --- a/workflows/library.py +++ b/workflows/library.py @@ -102,7 +102,6 @@ def call_webservice(input_dict): except Exception as e: print e ws_dict[i['name']]='' - print ws_dict results = function_to_call(**ws_dict) output_dict=results return output_dict @@ -564,6 +563,16 @@ def load_dataset(input_dict): output_dict = {} output_dict['dataset'] = orange.ExampleTable(input_dict['file']) return output_dict + +def load_dataset_from_arff_string(input_dict): + import orange + import tempfile + f = tempfile.NamedTemporaryFile(delete=False,suffix='.arff') + f.write(input_dict['arff']) + f.close() + output_dict = {} + output_dict['dataset'] = orange.ExampleTable(f.name) + return output_dict # SATURATION NOISE FILTER diff --git a/workflows/management/commands/new_package_from_template.py b/workflows/management/commands/new_package_from_template.py new file mode 100644 index 0000000000000000000000000000000000000000..013d073909055dfcb49f135d15a50dbd9e6506f6 --- /dev/null +++ b/workflows/management/commands/new_package_from_template.py @@ -0,0 +1,97 @@ +from django.core.management.base import BaseCommand, CommandError +from optparse import make_option +import uuid +from distutils import dir_util +import os +import sys +from mothra.settings import PROJECT_DIR + +class Command(BaseCommand): + args = 'new_package_name' + help = 'Creates new package based on package_template. It generates new uids for template database objects, renames functions, renames files and registers package in INSTALLED_APPS.' + + def handle(self, *args, **options): + if (len(args) < 1): + raise CommandError('Argument "new_package_name" is required') + self.stdout.write('Starting package initiation from template.\n') + if (int(options['verbosity'])<2): + self.stdout.write('Tip: use higher verbosity option number to see what is going on in detail.\n') + new_package_from_template(self.stdout.write, args[0], int(options['verbosity'])) + self.stdout.write('Creating new package successfully finished. You might want to use import_package command now.\n') + + +def replace_in_files(files, topPath, fromStr, toStr, verbosity, writeFunc): + if verbosity > 1: + writeFunc("Replacing '%s' with '%s' in:\n" % (fromStr, toStr)) + for f in files: + content = open(f, 'r').read() + contentNew = content.replace(fromStr, toStr) + if content != contentNew: + open(f, 'w').write(contentNew) + if verbosity > 2: + writeFunc(" .%s\n" % (f[len(topPath):],)) + + +def new_package_from_template(writeFunc, package, verbosity): + pckTmpName = "package_template" + pckTmpTitle = "Package Template" + pckTmpPrefix = "pcktmp_" + pckTmpUid = "uid_to_replace" + pckPrefix = package + "_" + subappPlaceholder = " #WORKFLOWS_SUBAPP_PLACEHOLDER" + subapp = " \'workflows.%s\',\n"%package + settFile = os.path.join(PROJECT_DIR,'settings.py') + + wfDir = os.path.abspath(os.path.join(PROJECT_DIR,"..","workflows")) + templateDir = os.path.join(wfDir,pckTmpName) + packageDir = os.path.join(wfDir, package) + if os.path.exists(packageDir): + raise Exception('Directory "%s" for package of given name already exists! Terminating procedure of creating new package.'%packageDir) + copied = dir_util.copy_tree(templateDir, packageDir) + + if verbosity>1: + writeFunc("Creating objects:\n") + for f in copied: + if verbosity>2: + writeFunc(" .%s\n"%f[len(wfDir):]) + + replace_in_files(copied, wfDir, pckTmpPrefix, pckPrefix, verbosity, writeFunc) + replace_in_files(copied, wfDir, pckTmpTitle, package[:1].upper()+package[1:], verbosity, writeFunc) + replace_in_files(copied, wfDir, pckTmpName, package, verbosity, writeFunc) + + if verbosity>1: + writeFunc("Replacing temporary uids in:\n") + for f in copied: + content = open(f, 'r').read() + contentNew = content + while contentNew.count(pckTmpUid)>0: + contentNew = contentNew.replace(pckTmpUid, str(uuid.uuid4()), 1) + if content != contentNew: + open(f, 'w').write(contentNew) + if verbosity>2: + writeFunc(" .%s\n"%(f[len(wfDir):],)) + + if verbosity>1: + writeFunc("Renaming files:\n") + for f in copied: + if f[len(wfDir):].count(pckTmpPrefix)>0: + fNew = f.replace(pckTmpPrefix, pckPrefix) + os.rename(f,fNew) + if verbosity>2: + writeFunc(" .%s => .%s\n"%(f[len(wfDir):],fNew[len(wfDir):])) + + if verbosity>1: + writeFunc("Renaming directories:\n") + for f in [x[0] for x in os.walk(packageDir)]: + if f.endswith(pckTmpName): + fNew = f.replace(pckTmpName, package) + os.rename(f,fNew) + if verbosity>2: + writeFunc(" .%s => .%s\n"%(f[len(wfDir):],fNew[len(wfDir):])) + + if verbosity>1: + writeFunc("Adding package to INSTALLED_APPS variable in .%s\n"%settFile[len(PROJECT_DIR):]) + content = open(settFile, 'r').read() + place = content.find(subappPlaceholder) + contentNew = content[:place] + subapp + content[place:] + open(settFile, 'w').write(contentNew) \ No newline at end of file diff --git a/workflows/models.py b/workflows/models.py index f2098506294303e09383a59455629d370b29c814..a030e57096aa84f2c9c258da64950b896baa7908 100644 --- a/workflows/models.py +++ b/workflows/models.py @@ -258,6 +258,20 @@ class AbstractWidget(models.Model): class Meta: ordering = ('order','name',) + def set_uid(self,commit=False): + import uuid + self.uid = uuid.uuid4() + if commit: + self.save() + for i in self.inputs.all(): + i.uid = uuid.uuid4() + if commit: + i.save() + for o in self.outputs.all(): + o.uid = uuid.uuid4() + if commit: + o.save() + def __unicode__(self): return unicode(self.name) diff --git a/workflows/nl_toolkit/__init__.py b/workflows/nl_toolkit/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/workflows/nl_toolkit/db/package_data.json b/workflows/nl_toolkit/db/package_data.json new file mode 100644 index 0000000000000000000000000000000000000000..bc95fb9bbc44918480f5c9d533e1306791cfb2e4 --- /dev/null +++ b/workflows/nl_toolkit/db/package_data.json @@ -0,0 +1,261 @@ +[ + { + "pk": 20, + "model": "workflows.category", + "fields": { + "uid": "1696fd8b-44ed-4a67-91cd-199ba53accec", + "parent": null, + "workflow": null, + "user": null, + "order": 1, + "name": "Natural Language Toolkit" + } + }, + { + "pk": 98, + "model": "workflows.abstractwidget", + "fields": { + "category": 20, + "treeview_image": "", + "name": "Create Integer List", + "is_streaming": false, + "uid": "97a79648-fc15-4fd7-9790-7f9d117b3dda", + "interaction_view": "", + "image": "", + "package": "nl_toolkit", + "static_image": "construction_work .png", + "post_interact_action": "", + "user": null, + "visualization_view": "", + "action": "nl_toolkit_create_integers", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 1, + "description": "" + } + }, + { + "pk": 248, + "model": "workflows.abstractinput", + "fields": { + "widget": 98, + "name": "Integer List String", + "short_name": "str", + "uid": "5af7be3e-eb15-4290-9ef5-b9d9a8ade2e2", + "default": "3\r\n2\r\n1\r\n4", + "required": false, + "multi": false, + "parameter_type": "textarea", + "variable": "intStr", + "parameter": true, + "order": 1, + "description": "Comma or new-line separated list of integers" + } + }, + { + "pk": 253, + "model": "workflows.abstractinput", + "fields": { + "widget": 98, + "name": "Sort list", + "short_name": "bol", + "uid": "e6d57c61-e24e-47f7-93b8-8f051b37082d", + "default": "true", + "required": true, + "multi": false, + "parameter_type": "checkbox", + "variable": "sort", + "parameter": true, + "order": 2, + "description": "Should the list be sorted" + } + }, + { + "pk": 104, + "model": "workflows.abstractoutput", + "fields": { + "widget": 98, + "name": "Integer List", + "short_name": "lst", + "variable": "intList", + "uid": "3bd7a446-4fa9-4342-a255-f2d445b85410", + "order": 1, + "description": "List of integers" + } + }, + { + "pk": 101, + "model": "workflows.abstractwidget", + "fields": { + "category": 20, + "treeview_image": "", + "name": "Filter Integers", + "is_streaming": false, + "uid": "61da3a56-735f-41c2-bd0d-1117aee78fca", + "interaction_view": "nl_toolkit_filter_integers", + "image": "", + "package": "nl_toolkit", + "static_image": "construction_work .png", + "post_interact_action": "nl_toolkit_post_filter_integers", + "user": null, + "visualization_view": "", + "action": "nl_toolkit_pre_filter_integers", + "wsdl_method": "", + "wsdl": "", + "interactive": true, + "has_progress_bar": false, + "order": 2, + "description": "" + } + }, + { + "pk": 250, + "model": "workflows.abstractinput", + "fields": { + "widget": 101, + "name": "Integer List", + "short_name": "lst", + "uid": "10e909b5-9322-46e3-b199-f7a91d457fb3", + "default": "", + "required": true, + "multi": false, + "parameter_type": null, + "variable": "intList", + "parameter": false, + "order": 1, + "description": "List of integers" + } + }, + { + "pk": 106, + "model": "workflows.abstractoutput", + "fields": { + "widget": 101, + "name": "Filtered Integer List", + "short_name": "lst", + "variable": "intList", + "uid": "f8fd00cd-c52f-48b4-9fd7-01a36ea34db7", + "order": 1, + "description": "Filtered list of integers" + } + }, + { + "pk": 99, + "model": "workflows.abstractwidget", + "fields": { + "category": 20, + "treeview_image": "", + "name": "Sum Integers", + "is_streaming": false, + "uid": "8c07de73-e99b-48a0-8853-688aec12e04a", + "interaction_view": "", + "image": "", + "package": "nl_toolkit", + "static_image": "construction_work .png", + "post_interact_action": "", + "user": null, + "visualization_view": "", + "action": "nl_toolkit_sum_integers", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 3, + "description": "" + } + }, + { + "pk": 249, + "model": "workflows.abstractinput", + "fields": { + "widget": 99, + "name": "Integer List", + "short_name": "lst", + "uid": "98dbb24d-1ea7-43b2-9278-1c257312ec2f", + "default": "", + "required": true, + "multi": false, + "parameter_type": null, + "variable": "intList", + "parameter": false, + "order": 1, + "description": "List of integers" + } + }, + { + "pk": 105, + "model": "workflows.abstractoutput", + "fields": { + "widget": 99, + "name": "Sum", + "short_name": "int", + "variable": "sum", + "uid": "189b0b2c-ff0e-4c12-8a7c-75590c9cda43", + "order": 1, + "description": "Sum of integer list" + } + }, + { + "pk": 100, + "model": "workflows.abstractwidget", + "fields": { + "category": 20, + "treeview_image": "", + "name": "Display Summation", + "is_streaming": false, + "uid": "851f642a-939b-4e7f-aae1-a281de120277", + "interaction_view": "", + "image": "", + "package": "nl_toolkit", + "static_image": "construction_work .png", + "post_interact_action": "", + "user": null, + "visualization_view": "nl_toolkit_display_summation", + "action": "nl_toolkit_pre_display_summation", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 4, + "description": "" + } + }, + { + "pk": 251, + "model": "workflows.abstractinput", + "fields": { + "widget": 100, + "name": "Integer List", + "short_name": "lst", + "uid": "22e8f1a4-5bd4-470e-8ca1-1fcb73f405ef", + "default": "", + "required": false, + "multi": false, + "parameter_type": null, + "variable": "intList", + "parameter": false, + "order": 1, + "description": "List of integers" + } + }, + { + "pk": 252, + "model": "workflows.abstractinput", + "fields": { + "widget": 100, + "name": "Sum", + "short_name": "int", + "uid": "848784b4-5f5f-4291-99e4-1dd00288e496", + "default": "", + "required": false, + "multi": false, + "parameter_type": null, + "variable": "sum", + "parameter": false, + "order": 2, + "description": "Sum (possibly correct) of integer list" + } + } +] \ No newline at end of file diff --git a/workflows/nl_toolkit/interaction_views.py b/workflows/nl_toolkit/interaction_views.py new file mode 100644 index 0000000000000000000000000000000000000000..76dcfce3c6468ec3b3b3fa22d5266fe5ed0e5a29 --- /dev/null +++ b/workflows/nl_toolkit/interaction_views.py @@ -0,0 +1,4 @@ +from django.shortcuts import render + +def nl_toolkit_filter_integers(request,input_dict,output_dict,widget): + return render(request, 'interactions/nl_toolkit_filter_integers.html',{'widget':widget,'intList':input_dict['intList']}) \ No newline at end of file diff --git a/workflows/nl_toolkit/library.py b/workflows/nl_toolkit/library.py new file mode 100644 index 0000000000000000000000000000000000000000..af9280539944782d6b94748deb5f70f77b36952a --- /dev/null +++ b/workflows/nl_toolkit/library.py @@ -0,0 +1,46 @@ +import re + +def nl_toolkit_get_all_synsets(input_dict): + if input_dict['corpus'].lower()=='wordnet': + from nltk.corpus import wordnet as wn + synsets = {} + for word in input_dict['words']: + synsets[word] = wn.synsets(word) + return {'synsets' : synsets} + +def nl_toolkit_get_word_synsets(input_dict): + if input_dict['corpus'].lower()=='wordnet': + from nltk.corpus import wordnet as wn + return {'synsets':wn.synsets(input_dict['word'])} + +def nl_toolkit_create_integers(input_dict): + intStr = input_dict['intStr'] + intList = [] + for i in re.findall(r'\w+', intStr): + try: + intList.append(int(i)) + except: + pass + if input_dict['sort'].lower() == "true": + intList.sort() + return {'intList':intList} + +def nl_toolkit_sum_integers(input_dict): + intList = input_dict['intList'] + return {'sum':sum(intList)} + +def nl_toolkit_pre_filter_integers(input_dict): + return input_dict + +def nl_toolkit_post_filter_integers(postdata,input_dict,output_dict): + intListOut = postdata['intListOut'] + intList = [] + for i in intListOut: + try: + intList.append(int(i)) + except: + pass + return {'intList': intList} + +def nl_toolkit_pre_display_summation(input_dict): + return {} \ No newline at end of file diff --git a/workflows/nl_toolkit/settings.py b/workflows/nl_toolkit/settings.py new file mode 100644 index 0000000000000000000000000000000000000000..5a1dca643c7bd0a022acf36f54654bd4756e8b97 --- /dev/null +++ b/workflows/nl_toolkit/settings.py @@ -0,0 +1,13 @@ +import os + +# === STANDARD PACKAGE SETTINGS === +PACKAGE_ROOT = os.path.dirname(__file__) + +# === AUTO IMPORT OPTIONS === +#If auto_import_package_data is true then given data file is automatically imported when ClowdFlows project is newly deployed or refreshed from git +AUTO_IMPORT_DB = False +#For auto_import_package_data_replace_option description see the 'replace' option in workflows/import_package command +AUTO_IMPORT_DB_REPLACE_OPTION = True +#If file(s) other than ./db/package_data.json should be imported, auto_import_package_data_files should be corrected +AUTO_IMPORT_DB_FILES = [os.path.join(PACKAGE_ROOT,'db/package_data.json')] + diff --git a/workflows/nl_toolkit/static/nl_toolkit/icons/treeview/construction_work .png b/workflows/nl_toolkit/static/nl_toolkit/icons/treeview/construction_work .png new file mode 100644 index 0000000000000000000000000000000000000000..69bd351914a95f81eb1941f6e7908474916f6116 Binary files /dev/null and b/workflows/nl_toolkit/static/nl_toolkit/icons/treeview/construction_work .png differ diff --git a/workflows/nl_toolkit/static/nl_toolkit/icons/widget/construction_work .png b/workflows/nl_toolkit/static/nl_toolkit/icons/widget/construction_work .png new file mode 100644 index 0000000000000000000000000000000000000000..bc16d376995c1545972b60487ee8cd653177b407 Binary files /dev/null and b/workflows/nl_toolkit/static/nl_toolkit/icons/widget/construction_work .png differ diff --git a/workflows/nl_toolkit/templates/interactions/nl_toolkit_filter_integers.html b/workflows/nl_toolkit/templates/interactions/nl_toolkit_filter_integers.html new file mode 100644 index 0000000000000000000000000000000000000000..33cb05ab4661eb9823647f3fa19b6fa3ce710dcb --- /dev/null +++ b/workflows/nl_toolkit/templates/interactions/nl_toolkit_filter_integers.html @@ -0,0 +1,8 @@ +
+
+{% for i in intList %} +{{i}}
+{% endfor %} + +
+
\ No newline at end of file diff --git a/workflows/nl_toolkit/templates/visualizations/nl_toolkit_display_integers.html b/workflows/nl_toolkit/templates/visualizations/nl_toolkit_display_integers.html new file mode 100644 index 0000000000000000000000000000000000000000..e05b226ccb2a3d578b4de3c436957deb0694a9b1 --- /dev/null +++ b/workflows/nl_toolkit/templates/visualizations/nl_toolkit_display_integers.html @@ -0,0 +1,28 @@ +
+
+ + + {% for i in input_dict.intList %} + + + + + {% endfor %} + + + + +
+ {% if forloop.first %} {% else %}+{% endif %} + + {{ i }} +
+ = + + {{ input_dict.sum }} +
+
+{{ check }} + +
+
\ No newline at end of file diff --git a/workflows/nl_toolkit/urls.py b/workflows/nl_toolkit/urls.py new file mode 100644 index 0000000000000000000000000000000000000000..7c56d8e8861188bdd8b72151f491c761a654e179 --- /dev/null +++ b/workflows/nl_toolkit/urls.py @@ -0,0 +1,8 @@ +from django.conf.urls.defaults import patterns, include, url + +urlpatterns = patterns('', + #url(r'^get-adc-index/widget(?P[0-9]+)/nx/Index.html$', 'workflows.latino.views.get_adc_index', name='get adc index'), + #url(r'^get-adc-index/widget(?P[0-9]+)/(?Pn?)x/Index.html$', 'workflows.latino.views.get_adc_index', name='get adc index'), + #url(r'^get-adc-index/widget(?P[0-9]+)/(?Pn?)x/Index(?P[0-9]+)-(?P[0-9]+).html$', 'workflows.latino.views.get_adc_index', name='get adc index'), + #url(r'^get-adc-index/widget(?P[0-9]+)/(?Pn?)x/Document(?P[0-9]+).html', 'workflows.latino.views.get_adc_page', name='get adc page'), +) \ No newline at end of file diff --git a/workflows/nl_toolkit/views.py b/workflows/nl_toolkit/views.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/workflows/nl_toolkit/visualization_views.py b/workflows/nl_toolkit/visualization_views.py new file mode 100644 index 0000000000000000000000000000000000000000..046c072c9f6d149db05feef0fad43227995e3bed --- /dev/null +++ b/workflows/nl_toolkit/visualization_views.py @@ -0,0 +1,8 @@ +from django.shortcuts import render + +def nl_toolkit_display_summation(request,input_dict,output_dict,widget): + if sum(input_dict['intList']) == input_dict['sum']: + check = 'The calculation appears correct.' + else: + check = 'The calculation appears incorrect!' + return render(request, 'visualizations/nl_toolkit_display_integers.html',{'widget':widget,'input_dict':input_dict, 'output_dict':output_dict, 'check':check}) diff --git a/workflows/nlp/db/nlp.json b/workflows/nlp/db/nlp.json new file mode 100644 index 0000000000000000000000000000000000000000..dee8a4cb09c786cd2c11fe3f1eb819b34eb29523 --- /dev/null +++ b/workflows/nlp/db/nlp.json @@ -0,0 +1,260 @@ +[ + { + "pk": 33, + "model": "workflows.category", + "fields": { + "uid": "15acb469-c510-44f0-8330-60bfe11a463c", + "parent": null, + "workflow": null, + "user": null, + "order": 1, + "name": "NLP" + } + }, + { + "pk": 225, + "model": "workflows.abstractwidget", + "fields": { + "category": 33, + "treeview_image": "treeview/nlp_3.png", + "name": "Load corpus", + "is_streaming": false, + "uid": "ce3c4bc7-689c-4eff-8c5c-2aa1aaf82971", + "interaction_view": "", + "image": "images/nlp_9.png", + "package": "nlp", + "static_image": "nlp.png", + "post_interact_action": "", + "user": null, + "visualization_view": "", + "action": "load_corpus", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 1, + "description": "" + } + }, + { + "pk": 647, + "model": "workflows.abstractinput", + "fields": { + "widget": 225, + "name": "File", + "short_name": "fil", + "uid": "096159a3-ddc8-4a57-b769-dddfb2590065", + "default": "", + "required": true, + "multi": false, + "parameter_type": "file", + "variable": "file", + "parameter": true, + "order": 1, + "description": "File" + } + }, + { + "pk": 234, + "model": "workflows.abstractoutput", + "fields": { + "widget": 225, + "name": "Corpus", + "short_name": "cor", + "variable": "corpus", + "uid": "edcc6852-ba3b-4fd7-a46a-6988a7ff61ad", + "order": 1, + "description": "corpus" + } + }, + { + "pk": 187, + "model": "workflows.abstractwidget", + "fields": { + "category": 33, + "treeview_image": "treeview/nlp.png", + "name": "Merge Sentences", + "is_streaming": false, + "uid": "de403444-0ae6-470a-bd7e-f86ac289bf98", + "interaction_view": "", + "image": "images/nlp_6.png", + "package": "nlp", + "static_image": "nlp.png", + "post_interact_action": "", + "user": null, + "visualization_view": "", + "action": "merge_sentences", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 1, + "description": "" + } + }, + { + "pk": 501, + "model": "workflows.abstractinput", + "fields": { + "widget": 187, + "name": "Sentences", + "short_name": "sen", + "uid": "15389d3e-d0b9-488e-9db8-038ff1b6e06a", + "default": "", + "required": true, + "multi": true, + "parameter_type": null, + "variable": "sentences", + "parameter": false, + "order": 1, + "description": "" + } + }, + { + "pk": 502, + "model": "workflows.abstractinput", + "fields": { + "widget": 187, + "name": "Join Method", + "short_name": "met", + "uid": "27c8cd4a-1016-4a88-b463-4713874fc87f", + "default": "union", + "required": true, + "multi": false, + "parameter_type": "select", + "variable": "method", + "parameter": true, + "order": 2, + "description": "" + } + }, + { + "pk": 98, + "model": "workflows.abstractoption", + "fields": { + "uid": "de9d5d39-53dc-4791-8d1d-2298c678f5b6", + "abstract_input": 502, + "value": "intersection", + "name": "Intersection" + } + }, + { + "pk": 100, + "model": "workflows.abstractoption", + "fields": { + "uid": "8a445f1f-44ad-481f-aa17-46e6222c82be", + "abstract_input": 502, + "value": "intersection_two", + "name": "Intersection by at least two" + } + }, + { + "pk": 99, + "model": "workflows.abstractoption", + "fields": { + "uid": "d0886005-a48a-415f-b767-e670211f6fe9", + "abstract_input": 502, + "value": "union", + "name": "Union" + } + }, + { + "pk": 192, + "model": "workflows.abstractoutput", + "fields": { + "widget": 187, + "name": "Merged Sentences", + "short_name": "sen", + "variable": "merged_sentences", + "uid": "b07f6501-4ce3-4f09-a8ec-1e79eb1652de", + "order": 1, + "description": "" + } + }, + { + "pk": 186, + "model": "workflows.abstractwidget", + "fields": { + "category": 33, + "treeview_image": "treeview/nlp_1.png", + "name": "Sentence Viewer", + "is_streaming": false, + "uid": "94485ea5-f1d6-46ae-abaf-fd78988692fd", + "interaction_view": "", + "image": "images/nlp_7.png", + "package": "nlp", + "static_image": "nlp.png", + "post_interact_action": "", + "user": null, + "visualization_view": "definition_sentences_viewer", + "action": "definition_sentences", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 1, + "description": "" + } + }, + { + "pk": 500, + "model": "workflows.abstractinput", + "fields": { + "widget": 186, + "name": "candidates", + "short_name": "can", + "uid": "2ad6546b-5fae-42fa-97ab-e75e91838c19", + "default": "", + "required": true, + "multi": false, + "parameter_type": null, + "variable": "candidates", + "parameter": false, + "order": 1, + "description": "" + } + }, + { + "pk": 185, + "model": "workflows.abstractwidget", + "fields": { + "category": 33, + "treeview_image": "treeview/nlp_2.png", + "name": "Term Candidates Viewer", + "is_streaming": false, + "uid": "af5ee96e-7307-46f9-8d8c-55cb7c36d111", + "interaction_view": "", + "image": "images/nlp_8.png", + "package": "nlp", + "static_image": "nlp.png", + "post_interact_action": "", + "user": null, + "visualization_view": "term_candidate_viewer", + "action": "term_candidates", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 1, + "description": "" + } + }, + { + "pk": 499, + "model": "workflows.abstractinput", + "fields": { + "widget": 185, + "name": "candidates", + "short_name": "can", + "uid": "32536744-964e-4206-b582-be92ab355e8e", + "default": "", + "required": true, + "multi": false, + "parameter_type": null, + "variable": "candidates", + "parameter": false, + "order": 1, + "description": "" + } + } +] \ No newline at end of file diff --git a/workflows/nlp/library.py b/workflows/nlp/library.py index a0efae28acc663ae780f870e642debe0a973268e..44c8cd58a9ce8a200f1e87a8fdc355897f3f31a7 100644 --- a/workflows/nlp/library.py +++ b/workflows/nlp/library.py @@ -2,6 +2,7 @@ import nlp import os.path import base64 from services.webservice import WebService +from workflows.security import safeOpen def merge_sentences(input_dict): """ diff --git a/workflows/nlp/static/nlp/icons/treeview/nlp.png b/workflows/nlp/static/nlp/icons/treeview/nlp.png new file mode 100755 index 0000000000000000000000000000000000000000..2ce25f8a4398753a535f6f95c551ace2eec646b4 Binary files /dev/null and b/workflows/nlp/static/nlp/icons/treeview/nlp.png differ diff --git a/workflows/nlp/static/nlp/icons/treeview/ws.png b/workflows/nlp/static/nlp/icons/treeview/ws.png new file mode 100755 index 0000000000000000000000000000000000000000..f8e76e60f1000f96767de0b6e5986f86a407e58f Binary files /dev/null and b/workflows/nlp/static/nlp/icons/treeview/ws.png differ diff --git a/workflows/nlp/static/nlp/icons/widget/nlp.png b/workflows/nlp/static/nlp/icons/widget/nlp.png new file mode 100755 index 0000000000000000000000000000000000000000..2ce25f8a4398753a535f6f95c551ace2eec646b4 Binary files /dev/null and b/workflows/nlp/static/nlp/icons/widget/nlp.png differ diff --git a/workflows/nlp/static/nlp/icons/widget/ws.png b/workflows/nlp/static/nlp/icons/widget/ws.png new file mode 100755 index 0000000000000000000000000000000000000000..f8e76e60f1000f96767de0b6e5986f86a407e58f Binary files /dev/null and b/workflows/nlp/static/nlp/icons/widget/ws.png differ diff --git a/workflows/nlp/visualization_views.py b/workflows/nlp/visualization_views.py index e2dfe9a9fa58c1758ee1610d2a4f0b62571ac5e2..da4e660590bf6023ec1644dbe5b0bec57ce59340 100644 --- a/workflows/nlp/visualization_views.py +++ b/workflows/nlp/visualization_views.py @@ -1,3 +1,9 @@ +''' +NLP visualization views. + +@author: Anze Vavpetic +''' +from django.shortcuts import render import nlp def definition_sentences_viewer(request, input_dict, output_dict, widget): diff --git a/workflows/package_template/__init__.py b/workflows/package_template/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/workflows/package_template/db/package_data.json b/workflows/package_template/db/package_data.json new file mode 100644 index 0000000000000000000000000000000000000000..6245064c1700c8f4c9f0675275978f717c998bfa --- /dev/null +++ b/workflows/package_template/db/package_data.json @@ -0,0 +1,261 @@ +[ + { + "pk": 20, + "model": "workflows.category", + "fields": { + "uid": "uid_to_replace", + "parent": null, + "workflow": null, + "user": null, + "order": 1, + "name": "Package Template" + } + }, + { + "pk": 98, + "model": "workflows.abstractwidget", + "fields": { + "category": 20, + "treeview_image": "", + "name": "Create Integer List", + "is_streaming": false, + "uid": "uid_to_replace", + "interaction_view": "", + "image": "", + "package": "package_template", + "static_image": "construction_work .png", + "post_interact_action": "", + "user": null, + "visualization_view": "", + "action": "pcktmp_create_integers", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 1, + "description": "" + } + }, + { + "pk": 248, + "model": "workflows.abstractinput", + "fields": { + "widget": 98, + "name": "Integer List String", + "short_name": "str", + "uid": "uid_to_replace", + "default": "3\r\n2\r\n1\r\n4", + "required": false, + "multi": false, + "parameter_type": "textarea", + "variable": "intStr", + "parameter": true, + "order": 1, + "description": "Comma or new-line separated list of integers" + } + }, + { + "pk": 253, + "model": "workflows.abstractinput", + "fields": { + "widget": 98, + "name": "Sort list", + "short_name": "bol", + "uid": "uid_to_replace", + "default": "true", + "required": true, + "multi": false, + "parameter_type": "checkbox", + "variable": "sort", + "parameter": true, + "order": 2, + "description": "Should the list be sorted" + } + }, + { + "pk": 104, + "model": "workflows.abstractoutput", + "fields": { + "widget": 98, + "name": "Integer List", + "short_name": "lst", + "variable": "intList", + "uid": "uid_to_replace", + "order": 1, + "description": "List of integers" + } + }, + { + "pk": 101, + "model": "workflows.abstractwidget", + "fields": { + "category": 20, + "treeview_image": "", + "name": "Filter Integers", + "is_streaming": false, + "uid": "uid_to_replace", + "interaction_view": "pcktmp_filter_integers", + "image": "", + "package": "package_template", + "static_image": "construction_work .png", + "post_interact_action": "pcktmp_post_filter_integers", + "user": null, + "visualization_view": "", + "action": "pcktmp_pre_filter_integers", + "wsdl_method": "", + "wsdl": "", + "interactive": true, + "has_progress_bar": false, + "order": 2, + "description": "" + } + }, + { + "pk": 250, + "model": "workflows.abstractinput", + "fields": { + "widget": 101, + "name": "Integer List", + "short_name": "lst", + "uid": "uid_to_replace", + "default": "", + "required": true, + "multi": false, + "parameter_type": null, + "variable": "intList", + "parameter": false, + "order": 1, + "description": "List of integers" + } + }, + { + "pk": 106, + "model": "workflows.abstractoutput", + "fields": { + "widget": 101, + "name": "Filtered Integer List", + "short_name": "lst", + "variable": "intList", + "uid": "uid_to_replace", + "order": 1, + "description": "Filtered list of integers" + } + }, + { + "pk": 99, + "model": "workflows.abstractwidget", + "fields": { + "category": 20, + "treeview_image": "", + "name": "Sum Integers", + "is_streaming": false, + "uid": "uid_to_replace", + "interaction_view": "", + "image": "", + "package": "package_template", + "static_image": "construction_work .png", + "post_interact_action": "", + "user": null, + "visualization_view": "", + "action": "pcktmp_sum_integers", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 3, + "description": "" + } + }, + { + "pk": 249, + "model": "workflows.abstractinput", + "fields": { + "widget": 99, + "name": "Integer List", + "short_name": "lst", + "uid": "uid_to_replace", + "default": "", + "required": true, + "multi": false, + "parameter_type": null, + "variable": "intList", + "parameter": false, + "order": 1, + "description": "List of integers" + } + }, + { + "pk": 105, + "model": "workflows.abstractoutput", + "fields": { + "widget": 99, + "name": "Sum", + "short_name": "int", + "variable": "sum", + "uid": "uid_to_replace", + "order": 1, + "description": "Sum of integer list" + } + }, + { + "pk": 100, + "model": "workflows.abstractwidget", + "fields": { + "category": 20, + "treeview_image": "", + "name": "Display Summation", + "is_streaming": false, + "uid": "uid_to_replace", + "interaction_view": "", + "image": "", + "package": "package_template", + "static_image": "construction_work .png", + "post_interact_action": "", + "user": null, + "visualization_view": "pcktmp_display_summation", + "action": "pcktmp_pre_display_summation", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 4, + "description": "" + } + }, + { + "pk": 251, + "model": "workflows.abstractinput", + "fields": { + "widget": 100, + "name": "Integer List", + "short_name": "lst", + "uid": "uid_to_replace", + "default": "", + "required": false, + "multi": false, + "parameter_type": null, + "variable": "intList", + "parameter": false, + "order": 1, + "description": "List of integers" + } + }, + { + "pk": 252, + "model": "workflows.abstractinput", + "fields": { + "widget": 100, + "name": "Sum", + "short_name": "int", + "uid": "uid_to_replace", + "default": "", + "required": false, + "multi": false, + "parameter_type": null, + "variable": "sum", + "parameter": false, + "order": 2, + "description": "Sum (possibly correct) of integer list" + } + } +] \ No newline at end of file diff --git a/workflows/package_template/interaction_views.py b/workflows/package_template/interaction_views.py new file mode 100644 index 0000000000000000000000000000000000000000..f0a1d7a87c71b47816ee61dda16bc663483dcf6e --- /dev/null +++ b/workflows/package_template/interaction_views.py @@ -0,0 +1,4 @@ +from django.shortcuts import render + +def pcktmp_filter_integers(request,input_dict,output_dict,widget): + return render(request, 'interactions/pcktmp_filter_integers.html',{'widget':widget,'intList':input_dict['intList']}) \ No newline at end of file diff --git a/workflows/package_template/library.py b/workflows/package_template/library.py new file mode 100644 index 0000000000000000000000000000000000000000..b8b6c976b76bb48fbc974352dfb3fc5700ad5c9c --- /dev/null +++ b/workflows/package_template/library.py @@ -0,0 +1,33 @@ +import re + +def pcktmp_create_integers(input_dict): + intStr = input_dict['intStr'] + intList = [] + for i in re.findall(r'\w+', intStr): + try: + intList.append(int(i)) + except: + pass + if input_dict['sort'].lower() == "true": + intList.sort() + return {'intList':intList} + +def pcktmp_sum_integers(input_dict): + intList = input_dict['intList'] + return {'sum':sum(intList)} + +def pcktmp_pre_filter_integers(input_dict): + return input_dict + +def pcktmp_post_filter_integers(postdata,input_dict,output_dict): + intListOut = postdata['intListOut'] + intList = [] + for i in intListOut: + try: + intList.append(int(i)) + except: + pass + return {'intList': intList} + +def pcktmp_pre_display_summation(input_dict): + return {} \ No newline at end of file diff --git a/workflows/package_template/settings.py b/workflows/package_template/settings.py new file mode 100644 index 0000000000000000000000000000000000000000..5a1dca643c7bd0a022acf36f54654bd4756e8b97 --- /dev/null +++ b/workflows/package_template/settings.py @@ -0,0 +1,13 @@ +import os + +# === STANDARD PACKAGE SETTINGS === +PACKAGE_ROOT = os.path.dirname(__file__) + +# === AUTO IMPORT OPTIONS === +#If auto_import_package_data is true then given data file is automatically imported when ClowdFlows project is newly deployed or refreshed from git +AUTO_IMPORT_DB = False +#For auto_import_package_data_replace_option description see the 'replace' option in workflows/import_package command +AUTO_IMPORT_DB_REPLACE_OPTION = True +#If file(s) other than ./db/package_data.json should be imported, auto_import_package_data_files should be corrected +AUTO_IMPORT_DB_FILES = [os.path.join(PACKAGE_ROOT,'db/package_data.json')] + diff --git a/workflows/package_template/static/package_template/icons/treeview/construction_work .png b/workflows/package_template/static/package_template/icons/treeview/construction_work .png new file mode 100644 index 0000000000000000000000000000000000000000..69bd351914a95f81eb1941f6e7908474916f6116 Binary files /dev/null and b/workflows/package_template/static/package_template/icons/treeview/construction_work .png differ diff --git a/workflows/package_template/static/package_template/icons/widget/construction_work .png b/workflows/package_template/static/package_template/icons/widget/construction_work .png new file mode 100644 index 0000000000000000000000000000000000000000..bc16d376995c1545972b60487ee8cd653177b407 Binary files /dev/null and b/workflows/package_template/static/package_template/icons/widget/construction_work .png differ diff --git a/workflows/package_template/templates/interactions/pcktmp_filter_integers.html b/workflows/package_template/templates/interactions/pcktmp_filter_integers.html new file mode 100644 index 0000000000000000000000000000000000000000..33cb05ab4661eb9823647f3fa19b6fa3ce710dcb --- /dev/null +++ b/workflows/package_template/templates/interactions/pcktmp_filter_integers.html @@ -0,0 +1,8 @@ +
+
+{% for i in intList %} +{{i}}
+{% endfor %} + +
+
\ No newline at end of file diff --git a/workflows/package_template/templates/visualizations/pcktmp_display_integers.html b/workflows/package_template/templates/visualizations/pcktmp_display_integers.html new file mode 100644 index 0000000000000000000000000000000000000000..e05b226ccb2a3d578b4de3c436957deb0694a9b1 --- /dev/null +++ b/workflows/package_template/templates/visualizations/pcktmp_display_integers.html @@ -0,0 +1,28 @@ +
+
+ + + {% for i in input_dict.intList %} + + + + + {% endfor %} + + + + +
+ {% if forloop.first %} {% else %}+{% endif %} + + {{ i }} +
+ = + + {{ input_dict.sum }} +
+
+{{ check }} + +
+
\ No newline at end of file diff --git a/workflows/package_template/urls.py b/workflows/package_template/urls.py new file mode 100644 index 0000000000000000000000000000000000000000..7c56d8e8861188bdd8b72151f491c761a654e179 --- /dev/null +++ b/workflows/package_template/urls.py @@ -0,0 +1,8 @@ +from django.conf.urls.defaults import patterns, include, url + +urlpatterns = patterns('', + #url(r'^get-adc-index/widget(?P[0-9]+)/nx/Index.html$', 'workflows.latino.views.get_adc_index', name='get adc index'), + #url(r'^get-adc-index/widget(?P[0-9]+)/(?Pn?)x/Index.html$', 'workflows.latino.views.get_adc_index', name='get adc index'), + #url(r'^get-adc-index/widget(?P[0-9]+)/(?Pn?)x/Index(?P[0-9]+)-(?P[0-9]+).html$', 'workflows.latino.views.get_adc_index', name='get adc index'), + #url(r'^get-adc-index/widget(?P[0-9]+)/(?Pn?)x/Document(?P[0-9]+).html', 'workflows.latino.views.get_adc_page', name='get adc page'), +) \ No newline at end of file diff --git a/workflows/package_template/views.py b/workflows/package_template/views.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/workflows/package_template/visualization_views.py b/workflows/package_template/visualization_views.py new file mode 100644 index 0000000000000000000000000000000000000000..4780451e76ea84222cdd3e4b6c0f8f230d231779 --- /dev/null +++ b/workflows/package_template/visualization_views.py @@ -0,0 +1,8 @@ +from django.shortcuts import render + +def pcktmp_display_summation(request,input_dict,output_dict,widget): + if sum(input_dict['intList']) == input_dict['sum']: + check = 'The calculation appears correct.' + else: + check = 'The calculation appears incorrect!' + return render(request, 'visualizations/pcktmp_display_integers.html',{'widget':widget,'input_dict':input_dict, 'output_dict':output_dict, 'check':check}) diff --git a/workflows/perfeval/__init__.py b/workflows/perfeval/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/workflows/perfeval/db/package_data.json b/workflows/perfeval/db/package_data.json new file mode 100644 index 0000000000000000000000000000000000000000..a46de22758d5744ceb0c5cd8b0d028415427de94 --- /dev/null +++ b/workflows/perfeval/db/package_data.json @@ -0,0 +1,511 @@ +[ + { + "pk": 48, + "model": "workflows.category", + "fields": { + "uid": "ed2728bc-3fd5-4244-9876-f6a757d1b922", + "parent": null, + "workflow": null, + "user": null, + "order": 1, + "name": "Performance Evaluation" + } + }, + { + "pk": 120, + "model": "workflows.abstractwidget", + "fields": { + "category": 48, + "treeview_image": "treeview/aggregate-icon.png", + "name": "Aggregate Detection Results", + "is_streaming": false, + "uid": "17cf3814-1edd-4c0c-84dd-e1198c033584", + "interaction_view": "", + "image": "images/aggregate-icon.png", + "package": "perfeval", + "static_image": "", + "post_interact_action": "", + "user": null, + "visualization_view": "", + "action": "aggr_results", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 1, + "description": "" + } + }, + { + "pk": 267, + "model": "workflows.abstractinput", + "fields": { + "widget": 120, + "name": "Positive Indices", + "short_name": "psi", + "uid": "91cfd170-b37a-457e-9077-285c6e88a408", + "default": "", + "required": false, + "multi": false, + "parameter_type": null, + "variable": "pos_inds", + "parameter": false, + "order": 1, + "description": "" + } + }, + { + "pk": 268, + "model": "workflows.abstractinput", + "fields": { + "widget": 120, + "name": "Detected Instances", + "short_name": "dti", + "uid": "da95100a-ec91-46d7-9244-b642aab9bf0d", + "default": "", + "required": false, + "multi": true, + "parameter_type": null, + "variable": "detected_inds", + "parameter": false, + "order": 2, + "description": "" + } + }, + { + "pk": 125, + "model": "workflows.abstractoutput", + "fields": { + "widget": 120, + "name": "Aggregated Detection Results", + "short_name": "adr", + "variable": "aggr_dict", + "uid": "d5a86b48-af73-486a-9f0b-6d15261fa743", + "order": 1, + "description": "" + } + }, + { + "pk": 107, + "model": "workflows.abstractwidget", + "fields": { + "category": 48, + "treeview_image": "treeview/Evaluate.png", + "name": "Evaluate Detection Algorithms", + "is_streaming": false, + "uid": "0b604a1d-f188-4e74-bcda-188829c507c0", + "interaction_view": "", + "image": "images/Evaluate.png", + "package": "perfeval", + "static_image": "", + "post_interact_action": "", + "user": null, + "visualization_view": "", + "action": "eval_noise_detection", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 1, + "description": "" + } + }, + { + "pk": 245, + "model": "workflows.abstractinput", + "fields": { + "widget": 107, + "name": "Noisy Instances", + "short_name": "nid", + "uid": "a386be60-c5ad-4565-bbab-3fb41266d233", + "default": "", + "required": true, + "multi": false, + "parameter_type": null, + "variable": "noisy_inds", + "parameter": false, + "order": 1, + "description": "" + } + }, + { + "pk": 246, + "model": "workflows.abstractinput", + "fields": { + "widget": 107, + "name": "Detected Noise", + "short_name": "dni", + "uid": "345a8672-386a-4bc5-b7ff-d7a3a9ee4b0c", + "default": "", + "required": true, + "multi": true, + "parameter_type": null, + "variable": "detected_noise", + "parameter": false, + "order": 2, + "description": "" + } + }, + { + "pk": 247, + "model": "workflows.abstractinput", + "fields": { + "widget": 107, + "name": "Beta parameter for F-mesure ", + "short_name": "bfm", + "uid": "d6d32949-4eb8-47b5-86fc-2d4026ef54b1", + "default": "1", + "required": false, + "multi": false, + "parameter_type": "text", + "variable": "f_beta", + "parameter": true, + "order": 3, + "description": "" + } + }, + { + "pk": 113, + "model": "workflows.abstractoutput", + "fields": { + "widget": 107, + "name": "Noise Detection Performance", + "short_name": "ndp", + "variable": "nd_eval", + "uid": "8254f904-c735-4965-aef4-6ef613ebd5e8", + "order": 1, + "description": "" + } + }, + { + "pk": 119, + "model": "workflows.abstractwidget", + "fields": { + "category": 48, + "treeview_image": "treeview/EvaluateRepeated.png", + "name": "Evaluate Repeated Detection", + "is_streaming": false, + "uid": "5b52c563-3302-429f-9232-4f003e8c11f0", + "interaction_view": "", + "image": "images/EvaluateRepeated.png", + "package": "perfeval", + "static_image": "", + "post_interact_action": "", + "user": null, + "visualization_view": "", + "action": "eval_batch", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 1, + "description": "" + } + }, + { + "pk": 265, + "model": "workflows.abstractinput", + "fields": { + "widget": 119, + "name": "Algorithm Performances", + "short_name": "aps", + "uid": "3b202ce7-d565-4f45-ae82-3f040f10499c", + "default": "", + "required": true, + "multi": false, + "parameter_type": null, + "variable": "perfs", + "parameter": false, + "order": 1, + "description": "" + } + }, + { + "pk": 266, + "model": "workflows.abstractinput", + "fields": { + "widget": 119, + "name": "F-measure Beta-parameter", + "short_name": "btp", + "uid": "c584ebf0-a9b4-4877-b503-2abef8c7dd72", + "default": "1", + "required": true, + "multi": false, + "parameter_type": "text", + "variable": "beta", + "parameter": true, + "order": 2, + "description": "" + } + }, + { + "pk": 124, + "model": "workflows.abstractoutput", + "fields": { + "widget": 119, + "name": "Performance Results", + "short_name": "prs", + "variable": "perf_results", + "uid": "1e4b9502-809e-46ab-ab12-728edaf4c3f3", + "order": 1, + "description": "" + } + }, + { + "pk": 121, + "model": "workflows.abstractwidget", + "fields": { + "category": 48, + "treeview_image": "treeview/results-table.png", + "name": "Evaluation Results to Table", + "is_streaming": false, + "uid": "9b6d7a11-1bd9-41b6-ac6d-343fcf31ee1e", + "interaction_view": "", + "image": "images/results-table.png", + "package": "perfeval", + "static_image": "", + "post_interact_action": "", + "user": null, + "visualization_view": "eval_to_table_view", + "action": "eval_to_table", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 1, + "description": "" + } + }, + { + "pk": 269, + "model": "workflows.abstractinput", + "fields": { + "widget": 121, + "name": "Evaluation Results", + "short_name": "evr", + "uid": "37860143-3d94-4f29-9d73-2e817497c46b", + "default": "", + "required": true, + "multi": false, + "parameter_type": null, + "variable": "eval_results", + "parameter": false, + "order": 1, + "description": "" + } + }, + { + "pk": 220, + "model": "workflows.abstractwidget", + "fields": { + "category": 48, + "treeview_image": "treeview/Bar-chart-icon.png", + "name": "Performance Chart", + "is_streaming": false, + "uid": "4c667e7a-82fe-4dc3-8f70-48fc261e0eee", + "interaction_view": "", + "image": "images/Bar-chart-icon.png", + "package": "perfeval", + "static_image": "", + "post_interact_action": "", + "user": null, + "visualization_view": "eval_bar_chart_view", + "action": "eval_bar_chart", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 1, + "description": "" + } + }, + { + "pk": 642, + "model": "workflows.abstractinput", + "fields": { + "widget": 220, + "name": "Evaluation Results", + "short_name": "evr", + "uid": "cf7561c5-a1ff-42bf-905b-92330ff39ae4", + "default": "", + "required": true, + "multi": false, + "parameter_type": null, + "variable": "eval_results", + "parameter": false, + "order": 1, + "description": "" + } + }, + { + "pk": 108, + "model": "workflows.abstractwidget", + "fields": { + "category": 48, + "treeview_image": "treeview/viper600-60_1.png", + "name": "VIPER: Visual Performance Evaluation", + "is_streaming": false, + "uid": "4098a678-2d14-4cbe-808d-2d150a7c2341", + "interaction_view": "", + "image": "images/viper600-60_1.png", + "package": "perfeval", + "static_image": "", + "post_interact_action": "", + "user": null, + "visualization_view": "pr_space_view", + "action": "pr_space", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 1, + "description": "" + } + }, + { + "pk": 274, + "model": "workflows.abstractinput", + "fields": { + "widget": 108, + "name": "ε-proximity evaluation parameter [%]", + "short_name": "eps", + "uid": "1a17f990-5041-47c3-b47a-492a03fc6d21", + "default": "0.05", + "required": false, + "multi": false, + "parameter_type": "select", + "variable": "eps", + "parameter": true, + "order": 1, + "description": "" + } + }, + { + "pk": 44, + "model": "workflows.abstractoption", + "fields": { + "uid": "a10c7375-7021-4abb-8b50-3f8700806d9b", + "abstract_input": 274, + "value": "0.01", + "name": " 1" + } + }, + { + "pk": 45, + "model": "workflows.abstractoption", + "fields": { + "uid": "d8a2826b-2dad-4544-bbce-6aa2434c7c02", + "abstract_input": 274, + "value": "0.02", + "name": " 2" + } + }, + { + "pk": 46, + "model": "workflows.abstractoption", + "fields": { + "uid": "ba0813b6-23ba-4626-89df-04ce026a87c4", + "abstract_input": 274, + "value": "0.03", + "name": " 3" + } + }, + { + "pk": 47, + "model": "workflows.abstractoption", + "fields": { + "uid": "cdd0bfe5-1cf4-4988-8ea2-2f93bfffcde8", + "abstract_input": 274, + "value": "0.04", + "name": " 4" + } + }, + { + "pk": 48, + "model": "workflows.abstractoption", + "fields": { + "uid": "9003923c-73e9-4ab2-bb1a-c1cb5af8f289", + "abstract_input": 274, + "value": "0.05", + "name": " 5" + } + }, + { + "pk": 49, + "model": "workflows.abstractoption", + "fields": { + "uid": "dcf68d77-c3aa-4044-9800-a3bc568d5b54", + "abstract_input": 274, + "value": "0.06", + "name": " 6" + } + }, + { + "pk": 50, + "model": "workflows.abstractoption", + "fields": { + "uid": "d3b0473c-d8a3-46c9-8d8c-fa5ecd21461a", + "abstract_input": 274, + "value": "0.07", + "name": " 7" + } + }, + { + "pk": 51, + "model": "workflows.abstractoption", + "fields": { + "uid": "c0be8ca7-3377-40bf-91a0-a6f2ce152078", + "abstract_input": 274, + "value": "0.08", + "name": " 8" + } + }, + { + "pk": 52, + "model": "workflows.abstractoption", + "fields": { + "uid": "a383432b-3b92-4293-8d1d-e747bcb4eeba", + "abstract_input": 274, + "value": "0.09", + "name": " 9" + } + }, + { + "pk": 54, + "model": "workflows.abstractoption", + "fields": { + "uid": "60176291-79c8-4042-afc4-55c8a11209d7", + "abstract_input": 274, + "value": "0.1", + "name": "10" + } + }, + { + "pk": 75, + "model": "workflows.abstractoption", + "fields": { + "uid": "a82e638a-928d-4271-89a1-e511e50f43a4", + "abstract_input": 274, + "value": "0", + "name": "Do not use ε-proximity evaluation" + } + }, + { + "pk": 248, + "model": "workflows.abstractinput", + "fields": { + "widget": 108, + "name": "Algorithm Performance", + "short_name": "alp", + "uid": "9bd36b53-7c9a-4ce7-a591-acecfe1bdc14", + "default": "", + "required": true, + "multi": false, + "parameter_type": null, + "variable": "performance", + "parameter": false, + "order": 2, + "description": "" + } + } +] \ No newline at end of file diff --git a/workflows/perfeval/interaction_views.py b/workflows/perfeval/interaction_views.py new file mode 100644 index 0000000000000000000000000000000000000000..c40ffeea1e7a438ec946c9af2a0f1037bed0c090 --- /dev/null +++ b/workflows/perfeval/interaction_views.py @@ -0,0 +1,4 @@ +from django.shortcuts import render + +def perfeval_filter_integers(request,input_dict,output_dict,widget): + return render(request, 'interactions/perfeval_filter_integers.html',{'widget':widget,'intList':input_dict['intList']}) \ No newline at end of file diff --git a/workflows/perfeval/library.py b/workflows/perfeval/library.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/workflows/perfeval/settings.py b/workflows/perfeval/settings.py new file mode 100644 index 0000000000000000000000000000000000000000..9745180faf827602884c7849987d83209070f121 --- /dev/null +++ b/workflows/perfeval/settings.py @@ -0,0 +1,13 @@ +import os + +# === STANDARD PACKAGE SETTINGS === +PACKAGE_ROOT = os.path.dirname(__file__) + +# === AUTO IMPORT OPTIONS === +#If auto_import_package_data is true then given data file is automatically imported when ClowdFlows project is newly deployed or refreshed from git +AUTO_IMPORT_DB = True +#For auto_import_package_data_replace_option description see the 'replace' option in workflows/import_package command +AUTO_IMPORT_DB_REPLACE_OPTION = True +#If file(s) other than ./db/package_data.json should be imported, auto_import_package_data_files should be corrected +AUTO_IMPORT_DB_FILES = [os.path.join(PACKAGE_ROOT,'db/package_data.json')] + diff --git a/workflows/perfeval/static/perfeval/icons/treeview/construction_work .png b/workflows/perfeval/static/perfeval/icons/treeview/construction_work .png new file mode 100644 index 0000000000000000000000000000000000000000..69bd351914a95f81eb1941f6e7908474916f6116 Binary files /dev/null and b/workflows/perfeval/static/perfeval/icons/treeview/construction_work .png differ diff --git a/workflows/perfeval/static/perfeval/icons/widget/construction_work .png b/workflows/perfeval/static/perfeval/icons/widget/construction_work .png new file mode 100644 index 0000000000000000000000000000000000000000..bc16d376995c1545972b60487ee8cd653177b407 Binary files /dev/null and b/workflows/perfeval/static/perfeval/icons/widget/construction_work .png differ diff --git a/workflows/perfeval/templates/interactions/perfeval_filter_integers.html b/workflows/perfeval/templates/interactions/perfeval_filter_integers.html new file mode 100644 index 0000000000000000000000000000000000000000..33cb05ab4661eb9823647f3fa19b6fa3ce710dcb --- /dev/null +++ b/workflows/perfeval/templates/interactions/perfeval_filter_integers.html @@ -0,0 +1,8 @@ +
+
+{% for i in intList %} +{{i}}
+{% endfor %} + +
+
\ No newline at end of file diff --git a/workflows/perfeval/templates/visualizations/perfeval_display_integers.html b/workflows/perfeval/templates/visualizations/perfeval_display_integers.html new file mode 100644 index 0000000000000000000000000000000000000000..e05b226ccb2a3d578b4de3c436957deb0694a9b1 --- /dev/null +++ b/workflows/perfeval/templates/visualizations/perfeval_display_integers.html @@ -0,0 +1,28 @@ +
+
+ + + {% for i in input_dict.intList %} + + + + + {% endfor %} + + + + +
+ {% if forloop.first %} {% else %}+{% endif %} + + {{ i }} +
+ = + + {{ input_dict.sum }} +
+
+{{ check }} + +
+
\ No newline at end of file diff --git a/workflows/perfeval/urls.py b/workflows/perfeval/urls.py new file mode 100644 index 0000000000000000000000000000000000000000..7c56d8e8861188bdd8b72151f491c761a654e179 --- /dev/null +++ b/workflows/perfeval/urls.py @@ -0,0 +1,8 @@ +from django.conf.urls.defaults import patterns, include, url + +urlpatterns = patterns('', + #url(r'^get-adc-index/widget(?P[0-9]+)/nx/Index.html$', 'workflows.latino.views.get_adc_index', name='get adc index'), + #url(r'^get-adc-index/widget(?P[0-9]+)/(?Pn?)x/Index.html$', 'workflows.latino.views.get_adc_index', name='get adc index'), + #url(r'^get-adc-index/widget(?P[0-9]+)/(?Pn?)x/Index(?P[0-9]+)-(?P[0-9]+).html$', 'workflows.latino.views.get_adc_index', name='get adc index'), + #url(r'^get-adc-index/widget(?P[0-9]+)/(?Pn?)x/Document(?P[0-9]+).html', 'workflows.latino.views.get_adc_page', name='get adc page'), +) \ No newline at end of file diff --git a/workflows/perfeval/views.py b/workflows/perfeval/views.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/workflows/perfeval/visualization_views.py b/workflows/perfeval/visualization_views.py new file mode 100644 index 0000000000000000000000000000000000000000..56a476a31a2bd2e2a74b1b20887ac848e5e40ca7 --- /dev/null +++ b/workflows/perfeval/visualization_views.py @@ -0,0 +1,8 @@ +from django.shortcuts import render + +def perfeval_display_summation(request,input_dict,output_dict,widget): + if sum(input_dict['intList']) == input_dict['sum']: + check = 'The calculation appears correct.' + else: + check = 'The calculation appears incorrect!' + return render(request, 'visualizations/perfeval_display_integers.html',{'widget':widget,'input_dict':input_dict, 'output_dict':output_dict, 'check':check}) diff --git a/workflows/subgroup_discovery/db/package_data.json b/workflows/subgroup_discovery/db/package_data.json new file mode 100644 index 0000000000000000000000000000000000000000..572247d910b0cce465c106088aa1769a6639bb69 --- /dev/null +++ b/workflows/subgroup_discovery/db/package_data.json @@ -0,0 +1,392 @@ +[ + { + "pk": 36, + "model": "workflows.category", + "fields": { + "uid": "54d259aa-2937-4ba2-8e60-017425fc55cf", + "parent": null, + "workflow": null, + "user": null, + "order": 1, + "name": "Orange" + } + }, + { + "pk": 71, + "model": "workflows.category", + "fields": { + "uid": "b01ed61e-31e6-4f6f-87b1-f93285801d1b", + "parent": 36, + "workflow": null, + "user": null, + "order": 1, + "name": "Subgroup discovery" + } + }, + { + "pk": 294, + "model": "workflows.abstractwidget", + "fields": { + "category": 71, + "treeview_image": "", + "name": "Build subgroups", + "is_streaming": false, + "uid": "d2a5a80b-1d11-4420-b12d-a42ab79e4e12", + "interaction_view": "build_subgroups", + "image": "", + "package": "subgroup_discovery", + "static_image": "builder.png", + "post_interact_action": "build_subgroups_finished", + "user": null, + "visualization_view": "", + "action": "build_subgroups", + "wsdl_method": "", + "wsdl": "", + "interactive": true, + "has_progress_bar": false, + "order": 1, + "description": "" + } + }, + { + "pk": 981, + "model": "workflows.abstractinput", + "fields": { + "widget": 294, + "name": "Data table", + "short_name": "dat", + "uid": "3f631385-f579-431d-9b00-887fdfcfe39c", + "default": "", + "required": true, + "multi": false, + "parameter_type": null, + "variable": "data", + "parameter": false, + "order": 1, + "description": "" + } + }, + { + "pk": 348, + "model": "workflows.abstractoutput", + "fields": { + "widget": 294, + "name": "SD rules", + "short_name": "rul", + "variable": "rules", + "uid": "81a5d62c-9725-4ec1-a352-d18ef75e0a74", + "order": 1, + "description": "" + } + }, + { + "pk": 350, + "model": "workflows.abstractoutput", + "fields": { + "widget": 294, + "name": "SD classifier", + "short_name": "cls", + "variable": "classifier", + "uid": "3095ff52-cc73-4d70-92bf-1bdfa81a5631", + "order": 2, + "description": "classifier" + } + }, + { + "pk": 298, + "model": "workflows.abstractwidget", + "fields": { + "category": 71, + "treeview_image": "", + "name": "Query data with subgroups", + "is_streaming": false, + "uid": "920cd259-b727-4f88-b4a6-bea0c3ea60b1", + "interaction_view": "", + "image": "", + "package": "subgroup_discovery", + "static_image": "", + "post_interact_action": "", + "user": null, + "visualization_view": "", + "action": "query_with_subgroups", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 1, + "description": "" + } + }, + { + "pk": 985, + "model": "workflows.abstractinput", + "fields": { + "widget": 298, + "name": "Data table", + "short_name": "dat", + "uid": "df3a0ac5-ca5f-483a-91c1-d5a32a404de9", + "default": "", + "required": true, + "multi": false, + "parameter_type": null, + "variable": "data", + "parameter": false, + "order": 1, + "description": "data table" + } + }, + { + "pk": 986, + "model": "workflows.abstractinput", + "fields": { + "widget": 298, + "name": "SD Rules", + "short_name": "rul", + "uid": "ca5c3d62-8f8b-4cfd-9be2-200e5c5fd6fc", + "default": "", + "required": true, + "multi": false, + "parameter_type": null, + "variable": "rules", + "parameter": false, + "order": 2, + "description": "rules" + } + }, + { + "pk": 352, + "model": "workflows.abstractoutput", + "fields": { + "widget": 298, + "name": "Data table", + "short_name": "dat", + "variable": "data", + "uid": "240f52b7-74cf-4451-9ca9-27d647a0a7b9", + "order": 1, + "description": "queried data" + } + }, + { + "pk": 354, + "model": "workflows.abstractoutput", + "fields": { + "widget": 298, + "name": "Data table", + "short_name": "rem", + "variable": "remaining_data", + "uid": "fb43b33f-8d79-4240-a8fb-2cb49fc28875", + "order": 2, + "description": "remaining data" + } + }, + { + "pk": 297, + "model": "workflows.abstractwidget", + "fields": { + "category": 71, + "treeview_image": "", + "name": "Select subgroups", + "is_streaming": false, + "uid": "6e02a8d4-fe84-4430-8a74-beb1e4c51964", + "interaction_view": "select_subgroups", + "image": "", + "package": "subgroup_discovery", + "static_image": "", + "post_interact_action": "select_subgroups_finished", + "user": null, + "visualization_view": "select_subgroups", + "action": "select_subgroups", + "wsdl_method": "", + "wsdl": "", + "interactive": true, + "has_progress_bar": false, + "order": 1, + "description": "" + } + }, + { + "pk": 984, + "model": "workflows.abstractinput", + "fields": { + "widget": 297, + "name": "SD rules", + "short_name": "rul", + "uid": "a9f87b37-1b21-47d3-80fa-687d68f2a5c3", + "default": "", + "required": true, + "multi": false, + "parameter_type": null, + "variable": "rules", + "parameter": false, + "order": 1, + "description": "rules" + } + }, + { + "pk": 351, + "model": "workflows.abstractoutput", + "fields": { + "widget": 297, + "name": "SD rules", + "short_name": "rul", + "variable": "sel_rules", + "uid": "a95fdc85-da40-4fd3-a324-7940bd9132d9", + "order": 1, + "description": "selected rules" + } + }, + { + "pk": 296, + "model": "workflows.abstractwidget", + "fields": { + "category": 71, + "treeview_image": "", + "name": "Subgroup ROC visualization", + "is_streaming": false, + "uid": "b05c5df4-d3fb-44e6-b647-b4c5f78c05cf", + "interaction_view": "", + "image": "", + "package": "subgroup_discovery", + "static_image": "roc.png", + "post_interact_action": "", + "user": null, + "visualization_view": "subgroup_roc_visualization", + "action": "subgroup_roc_visualization", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 1, + "description": "" + } + }, + { + "pk": 983, + "model": "workflows.abstractinput", + "fields": { + "widget": 296, + "name": "SD rules", + "short_name": "rul", + "uid": "8ceb4772-878e-45c1-b43c-c3ab8605abd0", + "default": "", + "required": true, + "multi": true, + "parameter_type": null, + "variable": "rules", + "parameter": false, + "order": 1, + "description": "rules" + } + }, + { + "pk": 295, + "model": "workflows.abstractwidget", + "fields": { + "category": 71, + "treeview_image": "", + "name": "Subgroup bar visualization", + "is_streaming": false, + "uid": "b3cb0462-6bae-4922-a67d-504e93fc0fc6", + "interaction_view": "", + "image": "", + "package": "subgroup_discovery", + "static_image": "bar.png", + "post_interact_action": "", + "user": null, + "visualization_view": "subgroup_bar_visualization", + "action": "subgroup_bar_visualization", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 1, + "description": "" + } + }, + { + "pk": 982, + "model": "workflows.abstractinput", + "fields": { + "widget": 295, + "name": "SD rules", + "short_name": "rul", + "uid": "1c212c77-bc15-437c-a69a-1a9e153a35d1", + "default": "", + "required": true, + "multi": false, + "parameter_type": null, + "variable": "rules", + "parameter": false, + "order": 1, + "description": "rules" + } + }, + { + "pk": 39, + "model": "workflows.category", + "fields": { + "uid": "6ff015c3-71f5-4c20-8f4c-e63ea090c586", + "parent": 36, + "workflow": null, + "user": null, + "order": 1, + "name": "Utilities" + } + }, + { + "pk": 299, + "model": "workflows.abstractwidget", + "fields": { + "category": 39, + "treeview_image": "", + "name": "Table from sets of examples", + "is_streaming": false, + "uid": "13af5c58-1384-417e-9cc6-11e1853c19eb", + "interaction_view": "table_from_sets", + "image": "", + "package": "subgroup_discovery", + "static_image": "", + "post_interact_action": "table_from_sets_finished", + "user": null, + "visualization_view": "", + "action": "table_from_sets", + "wsdl_method": "", + "wsdl": "", + "interactive": true, + "has_progress_bar": false, + "order": 1, + "description": "" + } + }, + { + "pk": 987, + "model": "workflows.abstractinput", + "fields": { + "widget": 299, + "name": "Data table", + "short_name": "dat", + "uid": "183384b7-3c8a-41e4-becf-25c852689b0a", + "default": "", + "required": true, + "multi": true, + "parameter_type": null, + "variable": "data", + "parameter": false, + "order": 1, + "description": "one set of examples" + } + }, + { + "pk": 353, + "model": "workflows.abstractoutput", + "fields": { + "widget": 299, + "name": "Data table", + "short_name": "dat", + "variable": "merged_data", + "uid": "d95b0b32-fdc9-419b-9f7d-fde45587b4de", + "order": 1, + "description": "new data table" + } + } +] \ No newline at end of file diff --git a/workflows/subgroup_discovery/settings.py b/workflows/subgroup_discovery/settings.py index bf3bb9503f6dea8969ef8608bb244942d5271782..9d88c0bb09e49d983a9ba914732d545b4f8ebd8b 100644 --- a/workflows/subgroup_discovery/settings.py +++ b/workflows/subgroup_discovery/settings.py @@ -6,6 +6,6 @@ package_root = os.path.dirname(__file__) package_statics = os.path.join(os.path.dirname(__file__), 'static', package_name) package_bin = os.path.join(package_root, 'bin') -auto_update_db = True +auto_update_db = False create_backups = True diff --git a/workflows/subgroup_discovery/static/icons/treeview/bar.png b/workflows/subgroup_discovery/static/subgroup_discovery/icons/treeview/bar.png similarity index 100% rename from workflows/subgroup_discovery/static/icons/treeview/bar.png rename to workflows/subgroup_discovery/static/subgroup_discovery/icons/treeview/bar.png diff --git a/workflows/subgroup_discovery/static/icons/treeview/builder.png b/workflows/subgroup_discovery/static/subgroup_discovery/icons/treeview/builder.png similarity index 100% rename from workflows/subgroup_discovery/static/icons/treeview/builder.png rename to workflows/subgroup_discovery/static/subgroup_discovery/icons/treeview/builder.png diff --git a/workflows/subgroup_discovery/static/icons/treeview/eval.png b/workflows/subgroup_discovery/static/subgroup_discovery/icons/treeview/eval.png similarity index 100% rename from workflows/subgroup_discovery/static/icons/treeview/eval.png rename to workflows/subgroup_discovery/static/subgroup_discovery/icons/treeview/eval.png diff --git a/workflows/subgroup_discovery/static/icons/treeview/roc.png b/workflows/subgroup_discovery/static/subgroup_discovery/icons/treeview/roc.png similarity index 100% rename from workflows/subgroup_discovery/static/icons/treeview/roc.png rename to workflows/subgroup_discovery/static/subgroup_discovery/icons/treeview/roc.png diff --git a/workflows/subgroup_discovery/static/icons/widget/bar.png b/workflows/subgroup_discovery/static/subgroup_discovery/icons/widget/bar.png similarity index 100% rename from workflows/subgroup_discovery/static/icons/widget/bar.png rename to workflows/subgroup_discovery/static/subgroup_discovery/icons/widget/bar.png diff --git a/workflows/subgroup_discovery/static/icons/widget/builder.png b/workflows/subgroup_discovery/static/subgroup_discovery/icons/widget/builder.png similarity index 100% rename from workflows/subgroup_discovery/static/icons/widget/builder.png rename to workflows/subgroup_discovery/static/subgroup_discovery/icons/widget/builder.png diff --git a/workflows/subgroup_discovery/static/icons/widget/eval.png b/workflows/subgroup_discovery/static/subgroup_discovery/icons/widget/eval.png similarity index 100% rename from workflows/subgroup_discovery/static/icons/widget/eval.png rename to workflows/subgroup_discovery/static/subgroup_discovery/icons/widget/eval.png diff --git a/workflows/subgroup_discovery/static/icons/widget/roc.png b/workflows/subgroup_discovery/static/subgroup_discovery/icons/widget/roc.png similarity index 100% rename from workflows/subgroup_discovery/static/icons/widget/roc.png rename to workflows/subgroup_discovery/static/subgroup_discovery/icons/widget/roc.png diff --git a/workflows/views.py b/workflows/views.py index 1e53d0cde364e497ad6be7e2bf7a18dfda6587b1..919f945289aadf9017a22863538b4887fb79d07e 100644 --- a/workflows/views.py +++ b/workflows/views.py @@ -22,12 +22,13 @@ from django.contrib.auth.decorators import login_required #settings from mothra.settings import DEBUG, FILES_FOLDER - - #ostalo import os -from latino.views import * +from workflows import module_importer +def setattr_local(name, value, package): + setattr(sys.modules[__name__], name, value) +module_importer.import_all_packages_libs("views",setattr_local) @login_required def get_category(request): diff --git a/workflows/weka/__init__.py b/workflows/weka/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/workflows/weka/db/package_data.json b/workflows/weka/db/package_data.json new file mode 100644 index 0000000000000000000000000000000000000000..384a9c9b1a30442ac5f62d8da9b6378ee12f0170 --- /dev/null +++ b/workflows/weka/db/package_data.json @@ -0,0 +1,2220 @@ +[ + { + "pk": 1, + "model": "workflows.category", + "fields": { + "uid": "fe5d5a78-ee52-4434-8a90-1fcfeb0ea63e", + "parent": null, + "workflow": null, + "user": null, + "order": 1, + "name": "Weka" + } + }, + { + "pk": 2, + "model": "workflows.category", + "fields": { + "uid": "74a7063e-9e7f-4305-8b5e-d5f1a4c40250", + "parent": 1, + "workflow": null, + "user": null, + "order": 1, + "name": "Classification" + } + }, + { + "pk": 1, + "model": "workflows.abstractwidget", + "fields": { + "category": 2, + "treeview_image": "", + "name": "IBk", + "is_streaming": false, + "uid": "a6125102-c2b4-4206-842b-8d0c510f7e86", + "interaction_view": "", + "image": "", + "package": "weka", + "static_image": "", + "post_interact_action": "", + "user": 1, + "visualization_view": "", + "action": "call_webservice", + "wsdl_method": "IBk", + "wsdl": "http://vihar.ijs.si:8092/Classification?wsdl", + "interactive": false, + "has_progress_bar": false, + "order": 1, + "description": "" + } + }, + { + "pk": 1, + "model": "workflows.abstractinput", + "fields": { + "widget": 1, + "name": "Timeout", + "short_name": "to", + "uid": "ca7a572b-2662-460b-a788-42e3b82824ac", + "default": "60", + "required": false, + "multi": false, + "parameter_type": "text", + "variable": "timeout", + "parameter": true, + "order": 1, + "description": "" + } + }, + { + "pk": 2, + "model": "workflows.abstractinput", + "fields": { + "widget": 1, + "name": "Send empty strings to webservices", + "short_name": "ses", + "uid": "7ac0ea58-70b3-4e75-a146-2abcd9ff81a0", + "default": "", + "required": false, + "multi": false, + "parameter_type": "checkbox", + "variable": "sendemptystrings", + "parameter": true, + "order": 2, + "description": "" + } + }, + { + "pk": 3, + "model": "workflows.abstractinput", + "fields": { + "widget": 1, + "name": "params", + "short_name": "par", + "uid": "34b5d0f6-a72f-4d16-a338-1b1da6597c21", + "default": "", + "required": false, + "multi": false, + "parameter_type": "text", + "variable": "params", + "parameter": true, + "order": 3, + "description": "" + } + }, + { + "pk": 1, + "model": "workflows.abstractoutput", + "fields": { + "widget": 1, + "name": "IBk_learner", + "short_name": "IBk", + "variable": "IBk_learner", + "uid": "eb204c56-6279-4f3a-b1c0-37a6b0c7b91b", + "order": 1, + "description": "" + } + }, + { + "pk": 2, + "model": "workflows.abstractwidget", + "fields": { + "category": 2, + "treeview_image": "", + "name": "J48", + "is_streaming": false, + "uid": "dc8980c2-df3c-4797-a0b9-297e10e188e4", + "interaction_view": "", + "image": "", + "package": "weka", + "static_image": "", + "post_interact_action": "", + "user": 1, + "visualization_view": "", + "action": "call_webservice", + "wsdl_method": "J48", + "wsdl": "http://vihar.ijs.si:8092/Classification?wsdl", + "interactive": false, + "has_progress_bar": false, + "order": 1, + "description": "" + } + }, + { + "pk": 4, + "model": "workflows.abstractinput", + "fields": { + "widget": 2, + "name": "Timeout", + "short_name": "to", + "uid": "166f1dc1-6cf7-4ddb-9480-e4ec9cad8613", + "default": "60", + "required": false, + "multi": false, + "parameter_type": "text", + "variable": "timeout", + "parameter": true, + "order": 1, + "description": "" + } + }, + { + "pk": 5, + "model": "workflows.abstractinput", + "fields": { + "widget": 2, + "name": "Send empty strings to webservices", + "short_name": "ses", + "uid": "44995999-b169-4672-94b4-832cc5afc25f", + "default": "", + "required": false, + "multi": false, + "parameter_type": "checkbox", + "variable": "sendemptystrings", + "parameter": true, + "order": 2, + "description": "" + } + }, + { + "pk": 6, + "model": "workflows.abstractinput", + "fields": { + "widget": 2, + "name": "params", + "short_name": "par", + "uid": "d9b876f1-5346-43e4-b6f9-183ba17d04ac", + "default": "", + "required": false, + "multi": false, + "parameter_type": "text", + "variable": "params", + "parameter": true, + "order": 3, + "description": "" + } + }, + { + "pk": 2, + "model": "workflows.abstractoutput", + "fields": { + "widget": 2, + "name": "J48_learner", + "short_name": "J48", + "variable": "J48_learner", + "uid": "f853af74-d660-4450-ada4-fb32fc6aae86", + "order": 1, + "description": "" + } + }, + { + "pk": 3, + "model": "workflows.abstractwidget", + "fields": { + "category": 2, + "treeview_image": "", + "name": "JRip", + "is_streaming": false, + "uid": "49e693c9-1ca5-40d0-af31-9d84c0bc4151", + "interaction_view": "", + "image": "", + "package": "weka", + "static_image": "", + "post_interact_action": "", + "user": 1, + "visualization_view": "", + "action": "call_webservice", + "wsdl_method": "JRip", + "wsdl": "http://vihar.ijs.si:8092/Classification?wsdl", + "interactive": false, + "has_progress_bar": false, + "order": 1, + "description": "" + } + }, + { + "pk": 7, + "model": "workflows.abstractinput", + "fields": { + "widget": 3, + "name": "Timeout", + "short_name": "to", + "uid": "abddf5f0-d80d-4595-bda1-a7708831704f", + "default": "60", + "required": false, + "multi": false, + "parameter_type": "text", + "variable": "timeout", + "parameter": true, + "order": 1, + "description": "" + } + }, + { + "pk": 8, + "model": "workflows.abstractinput", + "fields": { + "widget": 3, + "name": "Send empty strings to webservices", + "short_name": "ses", + "uid": "7f11ab99-79a8-417c-93ce-022159e4bd8b", + "default": "", + "required": false, + "multi": false, + "parameter_type": "checkbox", + "variable": "sendemptystrings", + "parameter": true, + "order": 2, + "description": "" + } + }, + { + "pk": 9, + "model": "workflows.abstractinput", + "fields": { + "widget": 3, + "name": "params", + "short_name": "par", + "uid": "30e65c8c-04a3-4cea-a503-fa836c8f8222", + "default": "", + "required": false, + "multi": false, + "parameter_type": "text", + "variable": "params", + "parameter": true, + "order": 3, + "description": "" + } + }, + { + "pk": 3, + "model": "workflows.abstractoutput", + "fields": { + "widget": 3, + "name": "JRip_learner", + "short_name": "JRi", + "variable": "JRip_learner", + "uid": "022a1a8a-ad2b-49bb-9ec9-05f0903bda5d", + "order": 1, + "description": "" + } + }, + { + "pk": 4, + "model": "workflows.abstractwidget", + "fields": { + "category": 2, + "treeview_image": "", + "name": "KStar", + "is_streaming": false, + "uid": "353f931b-fc5f-40b7-9e26-9364990c56eb", + "interaction_view": "", + "image": "", + "package": "weka", + "static_image": "", + "post_interact_action": "", + "user": 1, + "visualization_view": "", + "action": "call_webservice", + "wsdl_method": "KStar", + "wsdl": "http://vihar.ijs.si:8092/Classification?wsdl", + "interactive": false, + "has_progress_bar": false, + "order": 1, + "description": "" + } + }, + { + "pk": 10, + "model": "workflows.abstractinput", + "fields": { + "widget": 4, + "name": "Timeout", + "short_name": "to", + "uid": "72ccc121-19fd-4501-95b1-f66662970c21", + "default": "60", + "required": false, + "multi": false, + "parameter_type": "text", + "variable": "timeout", + "parameter": true, + "order": 1, + "description": "" + } + }, + { + "pk": 11, + "model": "workflows.abstractinput", + "fields": { + "widget": 4, + "name": "Send empty strings to webservices", + "short_name": "ses", + "uid": "527e1e1a-3ff7-40c1-add6-f08b84b82a2b", + "default": "", + "required": false, + "multi": false, + "parameter_type": "checkbox", + "variable": "sendemptystrings", + "parameter": true, + "order": 2, + "description": "" + } + }, + { + "pk": 12, + "model": "workflows.abstractinput", + "fields": { + "widget": 4, + "name": "params", + "short_name": "par", + "uid": "26fe9a12-9917-4a3f-822e-7f9c64260e9c", + "default": "", + "required": false, + "multi": false, + "parameter_type": "text", + "variable": "params", + "parameter": true, + "order": 3, + "description": "" + } + }, + { + "pk": 4, + "model": "workflows.abstractoutput", + "fields": { + "widget": 4, + "name": "KStar_learner", + "short_name": "KSt", + "variable": "KStar_learner", + "uid": "b82ceb0d-5430-40f7-9dbc-e1b981ee959d", + "order": 1, + "description": "" + } + }, + { + "pk": 5, + "model": "workflows.abstractwidget", + "fields": { + "category": 2, + "treeview_image": "", + "name": "LibSVM", + "is_streaming": false, + "uid": "dc1efcee-56bc-40b0-a699-b3750d4651ac", + "interaction_view": "", + "image": "", + "package": "weka", + "static_image": "", + "post_interact_action": "", + "user": 1, + "visualization_view": "", + "action": "call_webservice", + "wsdl_method": "LibSVM", + "wsdl": "http://vihar.ijs.si:8092/Classification?wsdl", + "interactive": false, + "has_progress_bar": false, + "order": 1, + "description": "" + } + }, + { + "pk": 13, + "model": "workflows.abstractinput", + "fields": { + "widget": 5, + "name": "Timeout", + "short_name": "to", + "uid": "72e6a74a-6213-4821-b114-9eedcb0d3556", + "default": "60", + "required": false, + "multi": false, + "parameter_type": "text", + "variable": "timeout", + "parameter": true, + "order": 1, + "description": "" + } + }, + { + "pk": 14, + "model": "workflows.abstractinput", + "fields": { + "widget": 5, + "name": "Send empty strings to webservices", + "short_name": "ses", + "uid": "80a185a0-919e-41ef-9f6d-5696a04508c5", + "default": "", + "required": false, + "multi": false, + "parameter_type": "checkbox", + "variable": "sendemptystrings", + "parameter": true, + "order": 2, + "description": "" + } + }, + { + "pk": 15, + "model": "workflows.abstractinput", + "fields": { + "widget": 5, + "name": "params", + "short_name": "par", + "uid": "399e84ce-3ff0-4c15-a367-c9848d3cbb93", + "default": "", + "required": false, + "multi": false, + "parameter_type": "text", + "variable": "params", + "parameter": true, + "order": 3, + "description": "" + } + }, + { + "pk": 5, + "model": "workflows.abstractoutput", + "fields": { + "widget": 5, + "name": "LibSVM_learner", + "short_name": "Lib", + "variable": "LibSVM_learner", + "uid": "73e0b8a5-33d6-4b23-b366-b8173028b44d", + "order": 1, + "description": "" + } + }, + { + "pk": 6, + "model": "workflows.abstractwidget", + "fields": { + "category": 2, + "treeview_image": "", + "name": "Multilayer Perceptron", + "is_streaming": false, + "uid": "fa4f24ac-1055-408a-852f-1ce4d90ef3ce", + "interaction_view": "", + "image": "", + "package": "weka", + "static_image": "", + "post_interact_action": "", + "user": 1, + "visualization_view": "", + "action": "call_webservice", + "wsdl_method": "Multilayer_Perceptron", + "wsdl": "http://vihar.ijs.si:8092/Classification?wsdl", + "interactive": false, + "has_progress_bar": false, + "order": 1, + "description": "" + } + }, + { + "pk": 16, + "model": "workflows.abstractinput", + "fields": { + "widget": 6, + "name": "Timeout", + "short_name": "to", + "uid": "b4ffb057-67de-4df8-ad9c-1fe44fd491bd", + "default": "60", + "required": false, + "multi": false, + "parameter_type": "text", + "variable": "timeout", + "parameter": true, + "order": 1, + "description": "" + } + }, + { + "pk": 17, + "model": "workflows.abstractinput", + "fields": { + "widget": 6, + "name": "Send empty strings to webservices", + "short_name": "ses", + "uid": "a084b7c4-cbc1-44eb-b9ae-8265f800d999", + "default": "", + "required": false, + "multi": false, + "parameter_type": "checkbox", + "variable": "sendemptystrings", + "parameter": true, + "order": 2, + "description": "" + } + }, + { + "pk": 18, + "model": "workflows.abstractinput", + "fields": { + "widget": 6, + "name": "params", + "short_name": "par", + "uid": "f1f2a5e7-40c7-485f-a592-0d7a13bafa30", + "default": "", + "required": false, + "multi": false, + "parameter_type": "text", + "variable": "params", + "parameter": true, + "order": 3, + "description": "" + } + }, + { + "pk": 6, + "model": "workflows.abstractoutput", + "fields": { + "widget": 6, + "name": "Multilayer_Perceptron_learner", + "short_name": "Mul", + "variable": "Multilayer_Perceptron_learner", + "uid": "8174e40c-d895-4db1-a51b-d9b83aa3fa42", + "order": 1, + "description": "" + } + }, + { + "pk": 7, + "model": "workflows.abstractwidget", + "fields": { + "category": 2, + "treeview_image": "", + "name": "Naive Bayes", + "is_streaming": false, + "uid": "8283f38a-33dc-4d0d-812d-1c58c7209469", + "interaction_view": "", + "image": "", + "package": "weka", + "static_image": "", + "post_interact_action": "", + "user": 1, + "visualization_view": "", + "action": "call_webservice", + "wsdl_method": "Naive_Bayes", + "wsdl": "http://vihar.ijs.si:8092/Classification?wsdl", + "interactive": false, + "has_progress_bar": false, + "order": 1, + "description": "" + } + }, + { + "pk": 19, + "model": "workflows.abstractinput", + "fields": { + "widget": 7, + "name": "Timeout", + "short_name": "to", + "uid": "0d9e4fd6-aae2-42d1-9dc8-8c905cbea8ab", + "default": "60", + "required": false, + "multi": false, + "parameter_type": "text", + "variable": "timeout", + "parameter": true, + "order": 1, + "description": "" + } + }, + { + "pk": 20, + "model": "workflows.abstractinput", + "fields": { + "widget": 7, + "name": "Send empty strings to webservices", + "short_name": "ses", + "uid": "9d4e54cc-013a-4997-8804-bc6329901226", + "default": "", + "required": false, + "multi": false, + "parameter_type": "checkbox", + "variable": "sendemptystrings", + "parameter": true, + "order": 2, + "description": "" + } + }, + { + "pk": 21, + "model": "workflows.abstractinput", + "fields": { + "widget": 7, + "name": "params", + "short_name": "par", + "uid": "4c034605-af49-4af3-b1b5-ba3feb4fe5f7", + "default": "", + "required": false, + "multi": false, + "parameter_type": "text", + "variable": "params", + "parameter": true, + "order": 3, + "description": "" + } + }, + { + "pk": 7, + "model": "workflows.abstractoutput", + "fields": { + "widget": 7, + "name": "Naive_Bayes_learner", + "short_name": "Nai", + "variable": "Naive_Bayes_learner", + "uid": "e399b729-9f81-437d-8037-6aee44155e14", + "order": 1, + "description": "" + } + }, + { + "pk": 10, + "model": "workflows.abstractwidget", + "fields": { + "category": 2, + "treeview_image": "", + "name": "REP Tree", + "is_streaming": false, + "uid": "77f625ba-46fd-4c7c-bcc1-ab279682a02c", + "interaction_view": "", + "image": "", + "package": "weka", + "static_image": "", + "post_interact_action": "", + "user": 1, + "visualization_view": "", + "action": "call_webservice", + "wsdl_method": "REPTree", + "wsdl": "http://vihar.ijs.si:8092/Classification?wsdl", + "interactive": false, + "has_progress_bar": false, + "order": 1, + "description": "" + } + }, + { + "pk": 28, + "model": "workflows.abstractinput", + "fields": { + "widget": 10, + "name": "Timeout", + "short_name": "to", + "uid": "1f6b1b09-1319-45a8-a785-83afa47649fb", + "default": "60", + "required": false, + "multi": false, + "parameter_type": "text", + "variable": "timeout", + "parameter": true, + "order": 1, + "description": "" + } + }, + { + "pk": 29, + "model": "workflows.abstractinput", + "fields": { + "widget": 10, + "name": "Send empty strings to webservices", + "short_name": "ses", + "uid": "48127408-7f94-47bc-93d7-723787cccbe9", + "default": "", + "required": false, + "multi": false, + "parameter_type": "checkbox", + "variable": "sendemptystrings", + "parameter": true, + "order": 2, + "description": "" + } + }, + { + "pk": 30, + "model": "workflows.abstractinput", + "fields": { + "widget": 10, + "name": "params", + "short_name": "par", + "uid": "0cd58b20-a46d-42ba-b1ea-e63f77bce087", + "default": "", + "required": false, + "multi": false, + "parameter_type": "text", + "variable": "params", + "parameter": true, + "order": 3, + "description": "" + } + }, + { + "pk": 10, + "model": "workflows.abstractoutput", + "fields": { + "widget": 10, + "name": "REPTree_learner", + "short_name": "REP", + "variable": "REPTree_learner", + "uid": "6b3e9710-6b9b-492c-808a-52a9f4476c65", + "order": 1, + "description": "" + } + }, + { + "pk": 8, + "model": "workflows.abstractwidget", + "fields": { + "category": 2, + "treeview_image": "treeview/Toolbar_-_Loop.png", + "name": "Random Tree", + "is_streaming": false, + "uid": "0adcbc66-4b1a-475b-b82f-42682050c8f4", + "interaction_view": "", + "image": "images/Toolbar_-_Loop.png", + "package": "weka", + "static_image": "", + "post_interact_action": "", + "user": 1, + "visualization_view": "", + "action": "call_webservice", + "wsdl_method": "RandomTree", + "wsdl": "http://vihar.ijs.si:8092/Classification?wsdl", + "interactive": false, + "has_progress_bar": false, + "order": 1, + "description": "" + } + }, + { + "pk": 22, + "model": "workflows.abstractinput", + "fields": { + "widget": 8, + "name": "Timeout", + "short_name": "to", + "uid": "ba293824-883c-463c-aefc-b7f3d6617dec", + "default": "60", + "required": false, + "multi": false, + "parameter_type": "text", + "variable": "timeout", + "parameter": true, + "order": 1, + "description": "" + } + }, + { + "pk": 23, + "model": "workflows.abstractinput", + "fields": { + "widget": 8, + "name": "Send empty strings to webservices", + "short_name": "ses", + "uid": "4061b687-c92d-4eca-9e2a-b420bb2b8611", + "default": "", + "required": false, + "multi": false, + "parameter_type": "checkbox", + "variable": "sendemptystrings", + "parameter": true, + "order": 2, + "description": "" + } + }, + { + "pk": 24, + "model": "workflows.abstractinput", + "fields": { + "widget": 8, + "name": "params", + "short_name": "par", + "uid": "b38f475d-34b7-4892-9ae2-2f1c0fbcd753", + "default": "", + "required": false, + "multi": false, + "parameter_type": "text", + "variable": "params", + "parameter": true, + "order": 3, + "description": "" + } + }, + { + "pk": 8, + "model": "workflows.abstractoutput", + "fields": { + "widget": 8, + "name": "RandomTree_learner", + "short_name": "Ran", + "variable": "RandomTree_learner", + "uid": "26150fbf-2b4c-4ab4-b227-df9c941febed", + "order": 1, + "description": "" + } + }, + { + "pk": 9, + "model": "workflows.abstractwidget", + "fields": { + "category": 2, + "treeview_image": "treeview/120px-Gears_icon.png", + "name": "RandomForest", + "is_streaming": false, + "uid": "a86057ba-aa1f-40fe-a548-e6c3259b7a6e", + "interaction_view": "", + "image": "images/120px-Gears_icon.png", + "package": "weka", + "static_image": "", + "post_interact_action": "", + "user": 1, + "visualization_view": "", + "action": "call_webservice", + "wsdl_method": "RandomForest", + "wsdl": "http://vihar.ijs.si:8092/Classification?wsdl", + "interactive": false, + "has_progress_bar": false, + "order": 1, + "description": "" + } + }, + { + "pk": 25, + "model": "workflows.abstractinput", + "fields": { + "widget": 9, + "name": "Timeout", + "short_name": "to", + "uid": "d88bcf23-9d76-4ac7-9335-1a480ba93db7", + "default": "60", + "required": false, + "multi": false, + "parameter_type": "text", + "variable": "timeout", + "parameter": true, + "order": 1, + "description": "" + } + }, + { + "pk": 26, + "model": "workflows.abstractinput", + "fields": { + "widget": 9, + "name": "Send empty strings to webservices", + "short_name": "ses", + "uid": "e91780de-4643-4e7d-86a7-6fe3f4d81324", + "default": "", + "required": false, + "multi": false, + "parameter_type": "checkbox", + "variable": "sendemptystrings", + "parameter": true, + "order": 2, + "description": "" + } + }, + { + "pk": 27, + "model": "workflows.abstractinput", + "fields": { + "widget": 9, + "name": "params", + "short_name": "par", + "uid": "7e7e0abe-932b-48e0-937b-cc6285f08ac7", + "default": "", + "required": false, + "multi": false, + "parameter_type": "text", + "variable": "params", + "parameter": true, + "order": 3, + "description": "" + } + }, + { + "pk": 9, + "model": "workflows.abstractoutput", + "fields": { + "widget": 9, + "name": "RandomForest_learner", + "short_name": "Ran", + "variable": "RandomForest_learner", + "uid": "5fcca051-bda1-44b3-b8d9-199fef38d0b4", + "order": 1, + "description": "" + } + }, + { + "pk": 11, + "model": "workflows.abstractwidget", + "fields": { + "category": 2, + "treeview_image": "", + "name": "SMO", + "is_streaming": false, + "uid": "9fa88a17-5a6a-4173-88bf-8458dc24baea", + "interaction_view": "", + "image": "", + "package": "weka", + "static_image": "", + "post_interact_action": "", + "user": 1, + "visualization_view": "", + "action": "call_webservice", + "wsdl_method": "SMO", + "wsdl": "http://vihar.ijs.si:8092/Classification?wsdl", + "interactive": false, + "has_progress_bar": false, + "order": 1, + "description": "" + } + }, + { + "pk": 31, + "model": "workflows.abstractinput", + "fields": { + "widget": 11, + "name": "Timeout", + "short_name": "to", + "uid": "88f3ce4a-fd3e-45a3-bacf-94bb6ae183e1", + "default": "60", + "required": false, + "multi": false, + "parameter_type": "text", + "variable": "timeout", + "parameter": true, + "order": 1, + "description": "" + } + }, + { + "pk": 32, + "model": "workflows.abstractinput", + "fields": { + "widget": 11, + "name": "Send empty strings to webservices", + "short_name": "ses", + "uid": "69e8a087-14b7-4539-bea2-07450fb40cdd", + "default": "", + "required": false, + "multi": false, + "parameter_type": "checkbox", + "variable": "sendemptystrings", + "parameter": true, + "order": 2, + "description": "" + } + }, + { + "pk": 33, + "model": "workflows.abstractinput", + "fields": { + "widget": 11, + "name": "params", + "short_name": "par", + "uid": "fbd439ac-767f-4923-b9d9-7be4c61e2c4a", + "default": "", + "required": false, + "multi": false, + "parameter_type": "text", + "variable": "params", + "parameter": true, + "order": 3, + "description": "" + } + }, + { + "pk": 11, + "model": "workflows.abstractoutput", + "fields": { + "widget": 11, + "name": "SMO_learner", + "short_name": "SMO", + "variable": "SMO_learner", + "uid": "fdb2e4fb-85e1-41b7-b1af-96e3779ff751", + "order": 1, + "description": "" + } + }, + { + "pk": 3, + "model": "workflows.category", + "fields": { + "uid": "6d75faa7-c547-475c-8b96-04b78d4ff402", + "parent": 1, + "workflow": null, + "user": null, + "order": 1, + "name": "Evaluation" + } + }, + { + "pk": 12, + "model": "workflows.abstractwidget", + "fields": { + "category": 3, + "treeview_image": "", + "name": "Apply Classifier", + "is_streaming": false, + "uid": "86315b53-900f-4844-b41e-019f91afd955", + "interaction_view": "", + "image": "", + "package": "weka", + "static_image": "", + "post_interact_action": "", + "user": 1, + "visualization_view": "", + "action": "call_webservice", + "wsdl_method": "apply_classifier", + "wsdl": "http://vihar.ijs.si:8092/Evaluation?wsdl", + "interactive": false, + "has_progress_bar": false, + "order": 1, + "description": "" + } + }, + { + "pk": 34, + "model": "workflows.abstractinput", + "fields": { + "widget": 12, + "name": "Timeout", + "short_name": "to", + "uid": "542b8e26-bc86-4e7c-8595-01911478f235", + "default": "60", + "required": false, + "multi": false, + "parameter_type": "text", + "variable": "timeout", + "parameter": true, + "order": 1, + "description": "" + } + }, + { + "pk": 35, + "model": "workflows.abstractinput", + "fields": { + "widget": 12, + "name": "Send empty strings to webservices", + "short_name": "ses", + "uid": "80e54d5f-d86f-4681-8bad-8b37cc1ec2c3", + "default": "", + "required": false, + "multi": false, + "parameter_type": "checkbox", + "variable": "sendemptystrings", + "parameter": true, + "order": 2, + "description": "" + } + }, + { + "pk": 36, + "model": "workflows.abstractinput", + "fields": { + "widget": 12, + "name": "classifier", + "short_name": "cla", + "uid": "09c54f59-0beb-4300-b682-47ebd63b71c9", + "default": "", + "required": false, + "multi": false, + "parameter_type": null, + "variable": "classifier", + "parameter": false, + "order": 3, + "description": "" + } + }, + { + "pk": 37, + "model": "workflows.abstractinput", + "fields": { + "widget": 12, + "name": "instances", + "short_name": "ins", + "uid": "c22197af-f60b-4b76-b40e-516c17c9e1ab", + "default": "", + "required": false, + "multi": false, + "parameter_type": null, + "variable": "instances", + "parameter": false, + "order": 4, + "description": "" + } + }, + { + "pk": 12, + "model": "workflows.abstractoutput", + "fields": { + "widget": 12, + "name": "classes", + "short_name": "cla", + "variable": "classes", + "uid": "9b29be8c-d5e9-46c5-84a8-530275037210", + "order": 1, + "description": "" + } + }, + { + "pk": 13, + "model": "workflows.abstractwidget", + "fields": { + "category": 3, + "treeview_image": "", + "name": "Apply Clusterer", + "is_streaming": false, + "uid": "c8b8dcb1-9eea-4c94-b42a-fa3eeac1c771", + "interaction_view": "", + "image": "", + "package": "weka", + "static_image": "", + "post_interact_action": "", + "user": 1, + "visualization_view": "", + "action": "call_webservice", + "wsdl_method": "apply_clusterer", + "wsdl": "http://vihar.ijs.si:8092/Evaluation?wsdl", + "interactive": false, + "has_progress_bar": false, + "order": 1, + "description": "" + } + }, + { + "pk": 38, + "model": "workflows.abstractinput", + "fields": { + "widget": 13, + "name": "Timeout", + "short_name": "to", + "uid": "09b6a103-7879-469c-bf8b-712f80de4b52", + "default": "60", + "required": false, + "multi": false, + "parameter_type": "text", + "variable": "timeout", + "parameter": true, + "order": 1, + "description": "" + } + }, + { + "pk": 39, + "model": "workflows.abstractinput", + "fields": { + "widget": 13, + "name": "Send empty strings to webservices", + "short_name": "ses", + "uid": "416696a5-40bf-41bb-afd6-dd9f89c4246f", + "default": "", + "required": false, + "multi": false, + "parameter_type": "checkbox", + "variable": "sendemptystrings", + "parameter": true, + "order": 2, + "description": "" + } + }, + { + "pk": 40, + "model": "workflows.abstractinput", + "fields": { + "widget": 13, + "name": "clusterer", + "short_name": "clu", + "uid": "5b14518d-51a9-4638-9231-dd73d19b6b3c", + "default": "", + "required": false, + "multi": false, + "parameter_type": null, + "variable": "clusterer", + "parameter": false, + "order": 3, + "description": "" + } + }, + { + "pk": 41, + "model": "workflows.abstractinput", + "fields": { + "widget": 13, + "name": "instances", + "short_name": "ins", + "uid": "278891e3-2530-4c9b-9871-57e6b6ad4070", + "default": "", + "required": false, + "multi": false, + "parameter_type": null, + "variable": "instances", + "parameter": false, + "order": 4, + "description": "" + } + }, + { + "pk": 13, + "model": "workflows.abstractoutput", + "fields": { + "widget": 13, + "name": "clusters", + "short_name": "clu", + "variable": "clusters", + "uid": "10685da4-1505-46f5-aed3-30e7228eaaa6", + "order": 1, + "description": "" + } + }, + { + "pk": 14, + "model": "workflows.abstractoutput", + "fields": { + "widget": 13, + "name": "number_of_clusters", + "short_name": "num", + "variable": "number_of_clusters", + "uid": "618a6e7b-54a2-4ee0-8356-6819a88451c7", + "order": 2, + "description": "" + } + }, + { + "pk": 15, + "model": "workflows.abstractoutput", + "fields": { + "widget": 13, + "name": "centroids", + "short_name": "cen", + "variable": "centroids", + "uid": "39951e7d-8354-49b6-aa26-cbbc5be2285a", + "order": 3, + "description": "" + } + }, + { + "pk": 14, + "model": "workflows.abstractwidget", + "fields": { + "category": 3, + "treeview_image": "", + "name": "Build Classifier", + "is_streaming": false, + "uid": "904dbc2d-1de8-4b97-a48f-e6ea84753f6a", + "interaction_view": "", + "image": "", + "package": "weka", + "static_image": "", + "post_interact_action": "", + "user": 1, + "visualization_view": "", + "action": "call_webservice", + "wsdl_method": "build_classifier", + "wsdl": "http://vihar.ijs.si:8092/Evaluation?wsdl", + "interactive": false, + "has_progress_bar": false, + "order": 1, + "description": "" + } + }, + { + "pk": 42, + "model": "workflows.abstractinput", + "fields": { + "widget": 14, + "name": "Timeout", + "short_name": "to", + "uid": "e3d4fe8d-8b9d-45ff-a3a4-de7ce9a29327", + "default": "60", + "required": false, + "multi": false, + "parameter_type": "text", + "variable": "timeout", + "parameter": true, + "order": 1, + "description": "" + } + }, + { + "pk": 43, + "model": "workflows.abstractinput", + "fields": { + "widget": 14, + "name": "Send empty strings to webservices", + "short_name": "ses", + "uid": "8b63a66a-2a37-41d0-8c55-8b92f98fc79f", + "default": "", + "required": false, + "multi": false, + "parameter_type": "checkbox", + "variable": "sendemptystrings", + "parameter": true, + "order": 2, + "description": "" + } + }, + { + "pk": 44, + "model": "workflows.abstractinput", + "fields": { + "widget": 14, + "name": "learner", + "short_name": "lea", + "uid": "17b77977-c7fd-4a1e-8bdd-e57dda0495d9", + "default": "", + "required": false, + "multi": false, + "parameter_type": null, + "variable": "learner", + "parameter": false, + "order": 3, + "description": "" + } + }, + { + "pk": 45, + "model": "workflows.abstractinput", + "fields": { + "widget": 14, + "name": "instances", + "short_name": "ins", + "uid": "1c36937f-7127-48a5-b086-ab3356e8db7d", + "default": "", + "required": false, + "multi": false, + "parameter_type": null, + "variable": "instances", + "parameter": false, + "order": 4, + "description": "" + } + }, + { + "pk": 16, + "model": "workflows.abstractoutput", + "fields": { + "widget": 14, + "name": "classifier", + "short_name": "cla", + "variable": "classifier", + "uid": "764cd629-5f3a-44b7-ab8f-a71edd9363c7", + "order": 1, + "description": "" + } + }, + { + "pk": 15, + "model": "workflows.abstractwidget", + "fields": { + "category": 3, + "treeview_image": "", + "name": "Build Clusterer", + "is_streaming": false, + "uid": "533bec7f-539a-4a95-b883-a841bf3b0118", + "interaction_view": "", + "image": "", + "package": "weka", + "static_image": "", + "post_interact_action": "", + "user": 1, + "visualization_view": "", + "action": "call_webservice", + "wsdl_method": "build_clusterer", + "wsdl": "http://vihar.ijs.si:8092/Evaluation?wsdl", + "interactive": false, + "has_progress_bar": false, + "order": 1, + "description": "" + } + }, + { + "pk": 46, + "model": "workflows.abstractinput", + "fields": { + "widget": 15, + "name": "Timeout", + "short_name": "to", + "uid": "db17d2bf-4d1e-49cd-863c-2055246de11c", + "default": "60", + "required": false, + "multi": false, + "parameter_type": "text", + "variable": "timeout", + "parameter": true, + "order": 1, + "description": "" + } + }, + { + "pk": 47, + "model": "workflows.abstractinput", + "fields": { + "widget": 15, + "name": "Send empty strings to webservices", + "short_name": "ses", + "uid": "21155ff6-e3dc-4b73-99be-7070bc16a7ac", + "default": "", + "required": false, + "multi": false, + "parameter_type": "checkbox", + "variable": "sendemptystrings", + "parameter": true, + "order": 2, + "description": "" + } + }, + { + "pk": 48, + "model": "workflows.abstractinput", + "fields": { + "widget": 15, + "name": "learner", + "short_name": "lea", + "uid": "eec2f308-1c5e-4de3-b60d-59b4f19d7aa6", + "default": "", + "required": false, + "multi": false, + "parameter_type": null, + "variable": "learner", + "parameter": false, + "order": 3, + "description": "" + } + }, + { + "pk": 49, + "model": "workflows.abstractinput", + "fields": { + "widget": 15, + "name": "instances", + "short_name": "ins", + "uid": "da953155-8ed2-416b-b739-bed4fd748dda", + "default": "", + "required": false, + "multi": false, + "parameter_type": null, + "variable": "instances", + "parameter": false, + "order": 4, + "description": "" + } + }, + { + "pk": 17, + "model": "workflows.abstractoutput", + "fields": { + "widget": 15, + "name": "clusterer", + "short_name": "clu", + "variable": "clusterer", + "uid": "777aeed1-65ad-478b-9b2f-058e2441f13f", + "order": 1, + "description": "" + } + }, + { + "pk": 16, + "model": "workflows.abstractwidget", + "fields": { + "category": 3, + "treeview_image": "", + "name": "Cross Validate", + "is_streaming": false, + "uid": "272f5485-8081-4483-8321-118599817882", + "interaction_view": "", + "image": "", + "package": "weka", + "static_image": "", + "post_interact_action": "", + "user": 1, + "visualization_view": "", + "action": "call_webservice", + "wsdl_method": "cross_validate", + "wsdl": "http://vihar.ijs.si:8092/Evaluation?wsdl", + "interactive": false, + "has_progress_bar": false, + "order": 1, + "description": "" + } + }, + { + "pk": 50, + "model": "workflows.abstractinput", + "fields": { + "widget": 16, + "name": "Timeout", + "short_name": "to", + "uid": "db8ea5c9-738b-44c1-bcd9-9b704a8798c6", + "default": "60", + "required": false, + "multi": false, + "parameter_type": "text", + "variable": "timeout", + "parameter": true, + "order": 1, + "description": "" + } + }, + { + "pk": 51, + "model": "workflows.abstractinput", + "fields": { + "widget": 16, + "name": "Send empty strings to webservices", + "short_name": "ses", + "uid": "f5f83294-4688-4244-8b8a-95df71859fb0", + "default": "", + "required": false, + "multi": false, + "parameter_type": "checkbox", + "variable": "sendemptystrings", + "parameter": true, + "order": 2, + "description": "" + } + }, + { + "pk": 52, + "model": "workflows.abstractinput", + "fields": { + "widget": 16, + "name": "learner", + "short_name": "lea", + "uid": "5ddb24a4-f7ad-4d31-9a3d-8c18da1361a2", + "default": "", + "required": false, + "multi": false, + "parameter_type": null, + "variable": "learner", + "parameter": false, + "order": 3, + "description": "" + } + }, + { + "pk": 53, + "model": "workflows.abstractinput", + "fields": { + "widget": 16, + "name": "instances", + "short_name": "ins", + "uid": "91c0ad22-1d1b-4f0b-8ea9-be94be1ade29", + "default": "", + "required": false, + "multi": false, + "parameter_type": null, + "variable": "instances", + "parameter": false, + "order": 4, + "description": "" + } + }, + { + "pk": 54, + "model": "workflows.abstractinput", + "fields": { + "widget": 16, + "name": "folds", + "short_name": "fol", + "uid": "1a99446a-c3bd-40b4-996a-eb1af12fbf65", + "default": "", + "required": false, + "multi": false, + "parameter_type": "text", + "variable": "folds", + "parameter": true, + "order": 5, + "description": "" + } + }, + { + "pk": 18, + "model": "workflows.abstractoutput", + "fields": { + "widget": 16, + "name": "accuracy", + "short_name": "acc", + "variable": "accuracy", + "uid": "1030110c-bab6-4dbd-b19e-549ea1c94450", + "order": 1, + "description": "" + } + }, + { + "pk": 19, + "model": "workflows.abstractoutput", + "fields": { + "widget": 16, + "name": "confusion_matrix", + "short_name": "con", + "variable": "confusion_matrix", + "uid": "b6b27c8f-ce52-4e01-bdf2-de6011a42f44", + "order": 2, + "description": "" + } + }, + { + "pk": 20, + "model": "workflows.abstractoutput", + "fields": { + "widget": 16, + "name": "accuracy_by_class", + "short_name": "acc", + "variable": "accuracy_by_class", + "uid": "ca272653-396e-4a43-a94c-dc86029728eb", + "order": 3, + "description": "" + } + }, + { + "pk": 21, + "model": "workflows.abstractoutput", + "fields": { + "widget": 16, + "name": "summary", + "short_name": "sum", + "variable": "summary", + "uid": "997d0a9d-273d-47df-8aaf-3e74e35d6b7d", + "order": 4, + "description": "" + } + }, + { + "pk": 4, + "model": "workflows.category", + "fields": { + "uid": "0ae049f7-6040-4992-bb03-f9134b8e63c7", + "parent": 1, + "workflow": null, + "user": null, + "order": 1, + "name": "Utilities" + } + }, + { + "pk": 17, + "model": "workflows.abstractwidget", + "fields": { + "category": 4, + "treeview_image": "", + "name": "Arff to Weka Instances", + "is_streaming": false, + "uid": "3a141e99-a804-4b73-b06b-c75032325aa3", + "interaction_view": "", + "image": "", + "package": "weka", + "static_image": "", + "post_interact_action": "", + "user": 1, + "visualization_view": "", + "action": "call_webservice", + "wsdl_method": "arff_to_weka_instances", + "wsdl": "http://vihar.ijs.si:8092/Utilities?wsdl", + "interactive": false, + "has_progress_bar": false, + "order": 1, + "description": "" + } + }, + { + "pk": 55, + "model": "workflows.abstractinput", + "fields": { + "widget": 17, + "name": "Timeout", + "short_name": "to", + "uid": "2ec43e59-25e0-40da-a96f-7deb05ad063e", + "default": "60", + "required": false, + "multi": false, + "parameter_type": "text", + "variable": "timeout", + "parameter": true, + "order": 1, + "description": "" + } + }, + { + "pk": 56, + "model": "workflows.abstractinput", + "fields": { + "widget": 17, + "name": "Send empty strings to webservices", + "short_name": "ses", + "uid": "0161f92d-e191-4d60-979f-f0a7d69f40cb", + "default": "", + "required": false, + "multi": false, + "parameter_type": "checkbox", + "variable": "sendemptystrings", + "parameter": true, + "order": 2, + "description": "" + } + }, + { + "pk": 57, + "model": "workflows.abstractinput", + "fields": { + "widget": 17, + "name": "arff", + "short_name": "arf", + "uid": "b027ab99-adb6-44fb-96ab-08deb7db7f5d", + "default": "", + "required": false, + "multi": false, + "parameter_type": null, + "variable": "arff", + "parameter": false, + "order": 3, + "description": "" + } + }, + { + "pk": 58, + "model": "workflows.abstractinput", + "fields": { + "widget": 17, + "name": "class_index", + "short_name": "cla", + "uid": "1e067896-7006-4b44-82af-b03a61727080", + "default": "", + "required": false, + "multi": false, + "parameter_type": "text", + "variable": "class_index", + "parameter": true, + "order": 4, + "description": "" + } + }, + { + "pk": 22, + "model": "workflows.abstractoutput", + "fields": { + "widget": 17, + "name": "instances", + "short_name": "ins", + "variable": "instances", + "uid": "110f9834-3e44-419b-a3af-39622417ecd6", + "order": 1, + "description": "" + } + }, + { + "pk": 18, + "model": "workflows.abstractwidget", + "fields": { + "category": 4, + "treeview_image": "", + "name": "Print Model", + "is_streaming": false, + "uid": "5ac9aced-9520-4ea4-ac0d-bc58c8be2aef", + "interaction_view": "", + "image": "", + "package": "weka", + "static_image": "", + "post_interact_action": "", + "user": 1, + "visualization_view": "", + "action": "call_webservice", + "wsdl_method": "print_model", + "wsdl": "http://vihar.ijs.si:8092/Utilities?wsdl", + "interactive": false, + "has_progress_bar": false, + "order": 1, + "description": "" + } + }, + { + "pk": 59, + "model": "workflows.abstractinput", + "fields": { + "widget": 18, + "name": "Timeout", + "short_name": "to", + "uid": "2a81aa2c-58af-44e1-8b03-c2dc72225c5c", + "default": "60", + "required": false, + "multi": false, + "parameter_type": "text", + "variable": "timeout", + "parameter": true, + "order": 1, + "description": "" + } + }, + { + "pk": 60, + "model": "workflows.abstractinput", + "fields": { + "widget": 18, + "name": "Send empty strings to webservices", + "short_name": "ses", + "uid": "00fdd694-785f-4e96-a616-5feea30eec49", + "default": "", + "required": false, + "multi": false, + "parameter_type": "checkbox", + "variable": "sendemptystrings", + "parameter": true, + "order": 2, + "description": "" + } + }, + { + "pk": 61, + "model": "workflows.abstractinput", + "fields": { + "widget": 18, + "name": "model", + "short_name": "mod", + "uid": "df7c8785-4e7f-4e74-b123-c86b7f380516", + "default": "", + "required": false, + "multi": false, + "parameter_type": null, + "variable": "model", + "parameter": false, + "order": 3, + "description": "" + } + }, + { + "pk": 23, + "model": "workflows.abstractoutput", + "fields": { + "widget": 18, + "name": "model_as_string", + "short_name": "mod", + "variable": "model_as_string", + "uid": "873ce128-d626-4490-a34e-1276f3048a8b", + "order": 1, + "description": "" + } + }, + { + "pk": 19, + "model": "workflows.abstractwidget", + "fields": { + "category": 4, + "treeview_image": "", + "name": "Print Tree", + "is_streaming": false, + "uid": "bc3756b6-7f76-48ed-ae71-8bdae8870b23", + "interaction_view": "", + "image": "", + "package": "weka", + "static_image": "", + "post_interact_action": "", + "user": 1, + "visualization_view": "", + "action": "call_webservice", + "wsdl_method": "print_tree", + "wsdl": "http://vihar.ijs.si:8092/Utilities?wsdl", + "interactive": false, + "has_progress_bar": false, + "order": 1, + "description": "" + } + }, + { + "pk": 62, + "model": "workflows.abstractinput", + "fields": { + "widget": 19, + "name": "Timeout", + "short_name": "to", + "uid": "b0aa8640-b529-4433-b795-f1f022939fc3", + "default": "60", + "required": false, + "multi": false, + "parameter_type": "text", + "variable": "timeout", + "parameter": true, + "order": 1, + "description": "" + } + }, + { + "pk": 63, + "model": "workflows.abstractinput", + "fields": { + "widget": 19, + "name": "Send empty strings to webservices", + "short_name": "ses", + "uid": "3ce46037-9612-4b55-8297-1a604d545a64", + "default": "", + "required": false, + "multi": false, + "parameter_type": "checkbox", + "variable": "sendemptystrings", + "parameter": true, + "order": 2, + "description": "" + } + }, + { + "pk": 64, + "model": "workflows.abstractinput", + "fields": { + "widget": 19, + "name": "tree_model", + "short_name": "tre", + "uid": "2910d5a9-1735-416f-8ec4-cc2afbd5e977", + "default": "", + "required": false, + "multi": false, + "parameter_type": null, + "variable": "tree_model", + "parameter": false, + "order": 3, + "description": "" + } + }, + { + "pk": 24, + "model": "workflows.abstractoutput", + "fields": { + "widget": 19, + "name": "tree", + "short_name": "tre", + "variable": "tree", + "uid": "dc917831-9cb4-457b-9b40-4acb54d23335", + "order": 1, + "description": "" + } + }, + { + "pk": 20, + "model": "workflows.abstractwidget", + "fields": { + "category": 4, + "treeview_image": "", + "name": "Weka Instances To Arff", + "is_streaming": false, + "uid": "4d855dc4-0544-484a-914e-79487e8bb982", + "interaction_view": "", + "image": "", + "package": "weka", + "static_image": "", + "post_interact_action": "", + "user": 1, + "visualization_view": "", + "action": "call_webservice", + "wsdl_method": "weka_instances_to_arff", + "wsdl": "http://vihar.ijs.si:8092/Utilities?wsdl", + "interactive": false, + "has_progress_bar": false, + "order": 1, + "description": "" + } + }, + { + "pk": 65, + "model": "workflows.abstractinput", + "fields": { + "widget": 20, + "name": "Timeout", + "short_name": "to", + "uid": "8bb6803e-eb93-4809-8b1b-a8d9f74e1910", + "default": "60", + "required": false, + "multi": false, + "parameter_type": "text", + "variable": "timeout", + "parameter": true, + "order": 1, + "description": "" + } + }, + { + "pk": 66, + "model": "workflows.abstractinput", + "fields": { + "widget": 20, + "name": "Send empty strings to webservices", + "short_name": "ses", + "uid": "306aa32b-0c75-41e7-8a12-1dbad868124c", + "default": "", + "required": false, + "multi": false, + "parameter_type": "checkbox", + "variable": "sendemptystrings", + "parameter": true, + "order": 2, + "description": "" + } + }, + { + "pk": 67, + "model": "workflows.abstractinput", + "fields": { + "widget": 20, + "name": "instances", + "short_name": "ins", + "uid": "0ba276f9-f0c5-4d1e-ada8-7398e3ea3d72", + "default": "", + "required": false, + "multi": false, + "parameter_type": null, + "variable": "instances", + "parameter": false, + "order": 3, + "description": "" + } + }, + { + "pk": 25, + "model": "workflows.abstractoutput", + "fields": { + "widget": 20, + "name": "arff", + "short_name": "arf", + "variable": "arff", + "uid": "bfbd3109-e756-4a38-bcaf-b2d5883eda77", + "order": 1, + "description": "" + } + }, + { + "pk": 97, + "model": "workflows.abstractwidget", + "fields": { + "category": 4, + "treeview_image": "", + "name": "Weka confusion matrix computations", + "is_streaming": false, + "uid": "b62a32e0-ab1b-4bf9-b2f2-ed699d4b05be", + "interaction_view": "", + "image": "", + "package": "weka", + "static_image": "", + "post_interact_action": "", + "user": null, + "visualization_view": "", + "action": "weka_statistics", + "wsdl_method": "", + "wsdl": "", + "interactive": false, + "has_progress_bar": false, + "order": 1, + "description": "" + } + }, + { + "pk": 186, + "model": "workflows.abstractinput", + "fields": { + "widget": 97, + "name": "Summary", + "short_name": "sum", + "uid": "173d8ec1-24ce-48bf-a77b-ce948f20b32f", + "default": "", + "required": false, + "multi": false, + "parameter_type": null, + "variable": "summary", + "parameter": false, + "order": 1, + "description": "The summary output of the Cross Validate widget" + } + }, + { + "pk": 187, + "model": "workflows.abstractinput", + "fields": { + "widget": 97, + "name": "Class Index", + "short_name": "ci", + "uid": "ef5fbfe5-72dc-4e3d-9d88-239caeeed56e", + "default": "-1", + "required": false, + "multi": false, + "parameter_type": "text", + "variable": "classIndex", + "parameter": true, + "order": 2, + "description": "Class index. If -1, it will return a weighted average." + } + }, + { + "pk": 104, + "model": "workflows.abstractoutput", + "fields": { + "widget": 97, + "name": "Precision", + "short_name": "pre", + "variable": "precision", + "uid": "fbd6d60c-251d-41ab-a786-01760a0110ef", + "order": 1, + "description": "" + } + }, + { + "pk": 105, + "model": "workflows.abstractoutput", + "fields": { + "widget": 97, + "name": "Recall", + "short_name": "rec", + "variable": "recall", + "uid": "4f3e18a4-fcc0-44e1-9c37-0f6d53127a94", + "order": 2, + "description": "" + } + }, + { + "pk": 106, + "model": "workflows.abstractoutput", + "fields": { + "widget": 97, + "name": "F-score", + "short_name": "f", + "variable": "f", + "uid": "9ef277a3-ca41-443e-b975-3c6b6d1f4ff2", + "order": 3, + "description": "" + } + }, + { + "pk": 107, + "model": "workflows.abstractoutput", + "fields": { + "widget": 97, + "name": "ROC area", + "short_name": "auc", + "variable": "auc", + "uid": "c602bc74-f0ff-4cac-9a53-900a777fe236", + "order": 4, + "description": "" + } + }, + { + "pk": 108, + "model": "workflows.abstractoutput", + "fields": { + "widget": 97, + "name": "TP rate", + "short_name": "tp", + "variable": "tp_rate", + "uid": "401a36ad-3bbf-49b4-b6a1-1eda084683fc", + "order": 5, + "description": "" + } + }, + { + "pk": 109, + "model": "workflows.abstractoutput", + "fields": { + "widget": 97, + "name": "FP rate", + "short_name": "fp", + "variable": "fp_rate", + "uid": "41a9a618-0874-4665-b580-f181cd20202c", + "order": 6, + "description": "" + } + } +] \ No newline at end of file diff --git a/workflows/weka/interaction_views.py b/workflows/weka/interaction_views.py new file mode 100644 index 0000000000000000000000000000000000000000..2c56b622c0488251452312863917fa6e004a7329 --- /dev/null +++ b/workflows/weka/interaction_views.py @@ -0,0 +1,4 @@ +from django.shortcuts import render + +def weka_filter_integers(request,input_dict,output_dict,widget): + return render(request, 'interactions/weka_filter_integers.html',{'widget':widget,'intList':input_dict['intList']}) \ No newline at end of file diff --git a/workflows/weka/library.py b/workflows/weka/library.py new file mode 100644 index 0000000000000000000000000000000000000000..36e1b392f4f2ab8c4817b701beafd2cb175f2be6 --- /dev/null +++ b/workflows/weka/library.py @@ -0,0 +1,31 @@ +def weka_statistics(input_dict): + summary = input_dict['summary'] + class_index = int(input_dict['classIndex']) + summary_lines = summary.split('\n')[3:] + summary_lines.pop() + if class_index>-1: + line = summary_lines[class_index] + splitline = line.split() + tp_rate = splitline[0] + fp_rate = splitline[1] + precision = splitline[2] + recall = splitline[3] + f = splitline[4] + auc = splitline[5] + else: + avg_line = summary_lines.pop() + splitline = avg_line.split()[2:] + tp_rate = splitline[0] + fp_rate = splitline[1] + precision = splitline[2] + recall = splitline[3] + f = splitline[4] + auc = splitline[5] + output_dict = {} + output_dict['precision']=precision + output_dict['recall']=recall + output_dict['auc']=auc + output_dict['tp_rate']=tp_rate + output_dict['fp_rate']=fp_rate + output_dict['f']=f + return output_dict \ No newline at end of file diff --git a/workflows/weka/settings.py b/workflows/weka/settings.py new file mode 100644 index 0000000000000000000000000000000000000000..9745180faf827602884c7849987d83209070f121 --- /dev/null +++ b/workflows/weka/settings.py @@ -0,0 +1,13 @@ +import os + +# === STANDARD PACKAGE SETTINGS === +PACKAGE_ROOT = os.path.dirname(__file__) + +# === AUTO IMPORT OPTIONS === +#If auto_import_package_data is true then given data file is automatically imported when ClowdFlows project is newly deployed or refreshed from git +AUTO_IMPORT_DB = True +#For auto_import_package_data_replace_option description see the 'replace' option in workflows/import_package command +AUTO_IMPORT_DB_REPLACE_OPTION = True +#If file(s) other than ./db/package_data.json should be imported, auto_import_package_data_files should be corrected +AUTO_IMPORT_DB_FILES = [os.path.join(PACKAGE_ROOT,'db/package_data.json')] + diff --git a/workflows/weka/static/weka/icons/treeview/construction_work .png b/workflows/weka/static/weka/icons/treeview/construction_work .png new file mode 100644 index 0000000000000000000000000000000000000000..69bd351914a95f81eb1941f6e7908474916f6116 Binary files /dev/null and b/workflows/weka/static/weka/icons/treeview/construction_work .png differ diff --git a/workflows/weka/static/weka/icons/widget/construction_work .png b/workflows/weka/static/weka/icons/widget/construction_work .png new file mode 100644 index 0000000000000000000000000000000000000000..bc16d376995c1545972b60487ee8cd653177b407 Binary files /dev/null and b/workflows/weka/static/weka/icons/widget/construction_work .png differ diff --git a/workflows/weka/templates/interactions/weka_filter_integers.html b/workflows/weka/templates/interactions/weka_filter_integers.html new file mode 100644 index 0000000000000000000000000000000000000000..33cb05ab4661eb9823647f3fa19b6fa3ce710dcb --- /dev/null +++ b/workflows/weka/templates/interactions/weka_filter_integers.html @@ -0,0 +1,8 @@ +
+
+{% for i in intList %} +{{i}}
+{% endfor %} + +
+
\ No newline at end of file diff --git a/workflows/weka/templates/visualizations/weka_display_integers.html b/workflows/weka/templates/visualizations/weka_display_integers.html new file mode 100644 index 0000000000000000000000000000000000000000..e05b226ccb2a3d578b4de3c436957deb0694a9b1 --- /dev/null +++ b/workflows/weka/templates/visualizations/weka_display_integers.html @@ -0,0 +1,28 @@ +
+
+ + + {% for i in input_dict.intList %} + + + + + {% endfor %} + + + + +
+ {% if forloop.first %} {% else %}+{% endif %} + + {{ i }} +
+ = + + {{ input_dict.sum }} +
+
+{{ check }} + +
+
\ No newline at end of file diff --git a/workflows/weka/urls.py b/workflows/weka/urls.py new file mode 100644 index 0000000000000000000000000000000000000000..7c56d8e8861188bdd8b72151f491c761a654e179 --- /dev/null +++ b/workflows/weka/urls.py @@ -0,0 +1,8 @@ +from django.conf.urls.defaults import patterns, include, url + +urlpatterns = patterns('', + #url(r'^get-adc-index/widget(?P[0-9]+)/nx/Index.html$', 'workflows.latino.views.get_adc_index', name='get adc index'), + #url(r'^get-adc-index/widget(?P[0-9]+)/(?Pn?)x/Index.html$', 'workflows.latino.views.get_adc_index', name='get adc index'), + #url(r'^get-adc-index/widget(?P[0-9]+)/(?Pn?)x/Index(?P[0-9]+)-(?P[0-9]+).html$', 'workflows.latino.views.get_adc_index', name='get adc index'), + #url(r'^get-adc-index/widget(?P[0-9]+)/(?Pn?)x/Document(?P[0-9]+).html', 'workflows.latino.views.get_adc_page', name='get adc page'), +) \ No newline at end of file diff --git a/workflows/weka/views.py b/workflows/weka/views.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/workflows/weka/visualization_views.py b/workflows/weka/visualization_views.py new file mode 100644 index 0000000000000000000000000000000000000000..63b27e8ed6d5f9177846d9f473b06938e6bcc895 --- /dev/null +++ b/workflows/weka/visualization_views.py @@ -0,0 +1,8 @@ +from django.shortcuts import render + +def weka_display_summation(request,input_dict,output_dict,widget): + if sum(input_dict['intList']) == input_dict['sum']: + check = 'The calculation appears correct.' + else: + check = 'The calculation appears incorrect!' + return render(request, 'visualizations/weka_display_integers.html',{'widget':widget,'input_dict':input_dict, 'output_dict':output_dict, 'check':check})