Commit 65698c79 authored by Izidorf's avatar Izidorf

scikt-learn

parent 0c7d85bd
K 25
svn:wc:ra_dav:version-url
V 21
/svn/!svn/ver/3/trunk
END
csv2arff.py
K 25
svn:wc:ra_dav:version-url
V 33
/svn/!svn/ver/2/trunk/csv2arff.py
END
10
dir
3
http://csv2arff.googlecode.com/svn/trunk
http://csv2arff.googlecode.com/svn
2008-07-17T09:36:53.100851Z
3
bianchimro
6882ac45-3452-0410-b3ff-f72446878f82
csv2arff.py
file
2013-06-20T08:20:15.000000Z
3cd48889501c569b6bd45ba00a39be7d
2008-07-17T09:32:31.216611Z
2
bianchimro
3968
test_csv2arff
dir
# -*- coding: cp1252 -*-
import csv
import sys
from xml.dom import minidom
def get_attributes(file_xml):
out = []
dom1 = minidom.parse(file_xml)
for node in dom1.getElementsByTagName('attribute'):
out.append({
'name': node.getAttribute('name') ,
'atype': node.getAttribute('atype'),
'format':node.getAttribute('format'),
'skip':node.getAttribute('skip')
})
#print out
return out
def get_relation(file_xml):
dom1 = minidom.parse(file_xml)
out=''
delimiter=''
for node in dom1.getElementsByTagName('csv'):
out=node.getAttribute('name')
delimiter=node.getAttribute('delimiter');
if(len(delimiter)==0):
delimiter=';';
print delimiter
return out, delimiter
class csv_arff_converter:
def __init__(self,csv_file, attribute_file, file_out):
self.csv_file = csv_file
self.attribute_file = attribute_file
self.file_out = file_out
def run(self):
classes = []
#read attribute
self.relation_name, self.delimiter = get_relation(attribute_file)
attributes_list = get_attributes(attribute_file)
arff_data = '@RELATION ' + self.relation_name + '\n\n'
for i in attributes_list:
if (i['skip'] != 'yes'):
arff_data += '@ATTRIBUTE '+i['name']+' ' + i['atype']
if (i['atype']=='date'):
arff_data += ' '+i['format']
if (i['atype']=='class'):
arff_data += ' (#@#'+i['name'] + '#@#)'
arff_data +='\n'
classes.append('')
arff_data += '\n@DATA\n'
print classes
#open csv
reader = csv.reader(open(self.csv_file), delimiter=self.delimiter, quoting=csv.QUOTE_NONE)
rnum = 0
for row in reader:
#print row
buff = ''
pos = 0
#print len(row)
#occhio alla lunghezza riga
for j in range(0, len(row)-1):
field = row[j]
if(attributes_list[pos]['skip'] != 'yes'):
if (pos > 0):
buff += ','
if(attributes_list[pos]['atype'] == 'string'):
field = "'" + field + "'"
buff += field
#se una classe raccolgo i valori
if(attributes_list[pos]['atype'] == 'class'):
if (rnum > 0):
classes[pos]+= ','+ field
else:
classes[pos]+= field
pos += 1
buff += '\n'
arff_data += buff
rnum += 1
pos = 0
for a in classes:
j = a.split(',')
un = list(set(j))
#print un
if (len(un) > 0):
this_replacement = ",".join(un)
#print this_replacement
old_text = '#@#'+ attributes_list[pos]['name'] + '#@#'
#print old_text
arff_data = arff_data.replace(old_text, this_replacement)
pos += 1
#print arff_data
a = open(self.file_out, 'w')
a.write(arff_data)
a.close()
if __name__ == "__main__":
#csv_file = sys.argv[1]
#attribute_file = sys.argv[2]
csv_file = './test_csv2arff/test_dataset_1.csv'
attribute_file = './test_csv2arff/test_dataset_1.att'
instance = csv_arff_converter(csv_file, attribute_file, './test_csv2arff/output.arff')
instance.run()
# -*- coding: cp1252 -*-
import csv
import sys
from xml.dom import minidom
def get_attributes(file_xml):
out = []
dom1 = minidom.parse(file_xml)
for node in dom1.getElementsByTagName('attribute'):
out.append({
'name': node.getAttribute('name') ,
'atype': node.getAttribute('atype'),
'format':node.getAttribute('format'),
'skip':node.getAttribute('skip')
})
#print out
return out
def get_relation(file_xml):
dom1 = minidom.parse(file_xml)
out=''
delimiter=''
for node in dom1.getElementsByTagName('csv'):
out=node.getAttribute('name')
delimiter=node.getAttribute('delimiter');
if(len(delimiter)==0):
delimiter=';';
print delimiter
return out, delimiter
class csv_arff_converter:
def __init__(self,csv_file, attribute_file, file_out):
self.csv_file = csv_file
self.attribute_file = attribute_file
self.file_out = file_out
def run(self):
classes = []
#read attribute
self.relation_name, self.delimiter = get_relation(attribute_file)
attributes_list = get_attributes(attribute_file)
arff_data = '@RELATION ' + self.relation_name + '\n\n'
for i in attributes_list:
if (i['skip'] != 'yes'):
arff_data += '@ATTRIBUTE '+i['name']+' ' + i['atype']
if (i['atype']=='date'):
arff_data += ' '+i['format']
if (i['atype']=='class'):
arff_data += ' (#@#'+i['name'] + '#@#)'
arff_data +='\n'
classes.append('')
arff_data += '\n@DATA\n'
print classes
#open csv
reader = csv.reader(open(self.csv_file), delimiter=self.delimiter, quoting=csv.QUOTE_NONE)
rnum = 0
for row in reader:
#print row
buff = ''
pos = 0
#print len(row)
#occhio alla lunghezza riga
for j in range(0, len(row)-1):
field = row[j]
if(attributes_list[pos]['skip'] != 'yes'):
if (pos > 0):
buff += ','
if(attributes_list[pos]['atype'] == 'string'):
field = "'" + field + "'"
buff += field
#se una classe raccolgo i valori
if(attributes_list[pos]['atype'] == 'class'):
if (rnum > 0):
classes[pos]+= ','+ field
else:
classes[pos]+= field
pos += 1
buff += '\n'
arff_data += buff
rnum += 1
pos = 0
for a in classes:
j = a.split(',')
un = list(set(j))
#print un
if (len(un) > 0):
this_replacement = ",".join(un)
#print this_replacement
old_text = '#@#'+ attributes_list[pos]['name'] + '#@#'
#print old_text
arff_data = arff_data.replace(old_text, this_replacement)
pos += 1
#print arff_data
a = open(self.file_out, 'w')
a.write(arff_data)
a.close()
if __name__ == "__main__":
#csv_file = sys.argv[1]
#attribute_file = sys.argv[2]
csv_file = './test_csv2arff/test_dataset_1.csv'
attribute_file = './test_csv2arff/test_dataset_1.att'
instance = csv_arff_converter(csv_file, attribute_file, './test_csv2arff/output.arff')
instance.run()
K 25
svn:wc:ra_dav:version-url
V 35
/svn/!svn/ver/3/trunk/test_csv2arff
END
test_dataset_1.att
K 25
svn:wc:ra_dav:version-url
V 54
/svn/!svn/ver/3/trunk/test_csv2arff/test_dataset_1.att
END
output.arff
K 25
svn:wc:ra_dav:version-url
V 47
/svn/!svn/ver/3/trunk/test_csv2arff/output.arff
END
test_dataset_1.csv
K 25
svn:wc:ra_dav:version-url
V 54
/svn/!svn/ver/3/trunk/test_csv2arff/test_dataset_1.csv
END
10
dir
3
http://csv2arff.googlecode.com/svn/trunk/test_csv2arff
http://csv2arff.googlecode.com/svn
2008-07-17T09:36:53.100851Z
3
bianchimro
6882ac45-3452-0410-b3ff-f72446878f82
test_dataset_1.att
file
2013-06-20T08:20:15.000000Z
bbc1c0f68459e4837932dd23adc9d878
2008-07-17T09:36:53.100851Z
3
bianchimro
1717
output.arff
file
2013-06-20T08:20:15.000000Z
e13d8ccc4be16ce6f650fd61edc00d9a
2008-07-17T09:36:53.100851Z
3
bianchimro
1030624
test_dataset_1.csv
file
2013-06-20T08:20:15.000000Z
7b517897ae99c70aed31b21030a7ed3e
2008-07-17T09:36:53.100851Z
3
bianchimro
1947734
<?xml version="1.0"?>
<csv name="dataset_1" delimiter=";">
<attribute name="dataora" atype="string" skip="no"/>
<attribute name="eur_usd_aggregates_future_60_diff_massimo" atype="numeric"/>
<attribute name="eur_usd_aggregates_past_30_close_avg" atype="numeric"/>
<attribute name="eur_usd_aggregates_past_30_close_var" atype="numeric" skip="yes"/>
<attribute name="eur_usd_aggregates_past_300_close_avg" atype="numeric"/>
<attribute name="eur_usd_aggregates_past_300_close_var" atype="numeric" skip="yes"/>
<attribute name="eur_usd_aggregates_past_1200_close_avg" atype="numeric"/>
<attribute name="eur_usd_aggregates_past_1200_close_var" atype="numeric" skip="yes"/>
<attribute name="eur_gbp_aggregates_past_30_close_avg" atype="numeric"/>
<attribute name="eur_gbp_aggregates_past_30_close_var" atype="numeric" skip="yes"/>
<attribute name="eur_gbp_aggregates_past_300_close_avg" atype="numeric" />
<attribute name="eur_gbp_aggregates_past_300_close_var" atype="numeric" skip="yes"/>
<attribute name="eur_gbp_aggregates_past_1200_close_avg" atype="numeric"/>
<attribute name="eur_gbp_aggregates_past_1200_close_var" atype="numeric" skip="yes"/>
<attribute name="gbp_usd_aggregates_past_30_close_avg" atype="numeric"/>
<attribute name="gbp_usd_aggregates_past_30_close_var" atype="numeric" skip="yes"/>
<attribute name="gbp_usd_aggregates_past_300_close_avg" atype="numeric"/>
<attribute name="gbp_usd_aggregates_past_300_close_var" atype="numeric" skip="yes"/>
<attribute name="gbp_usd_aggregates_past_1200_close_avg" atype="numeric"/>
<attribute name="gbp_usd_aggregates_past_1200_close_var" atype="numeric" skip="yes"/>
</csv>
\ No newline at end of file
This diff is collapsed.
<?xml version="1.0"?>
<csv name="dataset_1" delimiter=";">
<attribute name="dataora" atype="string" skip="no"/>
<attribute name="eur_usd_aggregates_future_60_diff_massimo" atype="numeric"/>
<attribute name="eur_usd_aggregates_past_30_close_avg" atype="numeric"/>
<attribute name="eur_usd_aggregates_past_30_close_var" atype="numeric" skip="yes"/>
<attribute name="eur_usd_aggregates_past_300_close_avg" atype="numeric"/>
<attribute name="eur_usd_aggregates_past_300_close_var" atype="numeric" skip="yes"/>
<attribute name="eur_usd_aggregates_past_1200_close_avg" atype="numeric"/>
<attribute name="eur_usd_aggregates_past_1200_close_var" atype="numeric" skip="yes"/>
<attribute name="eur_gbp_aggregates_past_30_close_avg" atype="numeric"/>
<attribute name="eur_gbp_aggregates_past_30_close_var" atype="numeric" skip="yes"/>
<attribute name="eur_gbp_aggregates_past_300_close_avg" atype="numeric" />
<attribute name="eur_gbp_aggregates_past_300_close_var" atype="numeric" skip="yes"/>
<attribute name="eur_gbp_aggregates_past_1200_close_avg" atype="numeric"/>
<attribute name="eur_gbp_aggregates_past_1200_close_var" atype="numeric" skip="yes"/>
<attribute name="gbp_usd_aggregates_past_30_close_avg" atype="numeric"/>
<attribute name="gbp_usd_aggregates_past_30_close_var" atype="numeric" skip="yes"/>
<attribute name="gbp_usd_aggregates_past_300_close_avg" atype="numeric"/>
<attribute name="gbp_usd_aggregates_past_300_close_var" atype="numeric" skip="yes"/>
<attribute name="gbp_usd_aggregates_past_1200_close_avg" atype="numeric"/>
<attribute name="gbp_usd_aggregates_past_1200_close_var" atype="numeric" skip="yes"/>
</csv>
\ No newline at end of file
This diff is collapsed.
......@@ -199,6 +199,7 @@ INSTALLED_APPS_WORKFLOWS_SUB = (
'workflows.mysql',
'workflows.lemmagen',
'workflows.crossbee',
'workflows.scikitAlgorithms',
#WORKFLOWS_SUBAPP_PLACEHOLDER
)
......
......@@ -10,4 +10,10 @@ httplib2==0.7.5
pyparsing==1.5.6
pydot==1.0.28
wsgiref==0.1.2
mysql-connector-python==1.0.9
\ No newline at end of file
mysql-connector-python==1.0.9
# scikit-learn requires
# NumPy>=1.3
# SciPy >= 0.7
# development headers
# working C++ compiler
\ No newline at end of file
[
{
"pk": 20,
"model": "workflows.category",
"fields": {
"uid": "62f6171b-5448-424b-b5cc-88cd63883fd0",
"parent": null,
"workflow": null,
"user": null,
"order": 1,
"name": "ScikitAlgorithms"
}
},
{
"pk": 98,
"model": "workflows.abstractwidget",
"fields": {
"category": 20,
"treeview_image": "",
"name": "Create Integer List",
"is_streaming": false,
"uid": "d2da92f2-0ea9-409c-910a-78174e70a79d",
"interaction_view": "",
"image": "",
"package": "scikitAlgorithms",
"static_image": "construction_work .png",
"post_interact_action": "",
"user": null,
"visualization_view": "",
"action": "scikitAlgorithms_create_integers",
"wsdl_method": "",
"wsdl": "",
"interactive": false,
"has_progress_bar": false,
"order": 1,
"description": ""
}
},
{
"pk": 248,
"model": "workflows.abstractinput",
"fields": {
"widget": 98,
"name": "Integer List String",
"short_name": "str",
"uid": "d2150953-b5a2-4860-8bcb-c0e96e388dfd",
"default": "3\r\n2\r\n1\r\n4",
"required": false,
"multi": false,
"parameter_type": "textarea",
"variable": "intStr",
"parameter": true,
"order": 1,
"description": "Comma or new-line separated list of integers"
}
},
{
"pk": 253,
"model": "workflows.abstractinput",
"fields": {
"widget": 98,