Coupure prévue mardi 3 Août au matin pour maintenance du serveur. Nous faisons au mieux pour que celle-ci soit la plus brève possible.

Commit 65698c79 authored by Izidorf's avatar Izidorf
Browse files

scikt-learn

parent 0c7d85bd
K 25
svn:wc:ra_dav:version-url
V 21
/svn/!svn/ver/3/trunk
END
csv2arff.py
K 25
svn:wc:ra_dav:version-url
V 33
/svn/!svn/ver/2/trunk/csv2arff.py
END
10
dir
3
http://csv2arff.googlecode.com/svn/trunk
http://csv2arff.googlecode.com/svn
2008-07-17T09:36:53.100851Z
3
bianchimro
6882ac45-3452-0410-b3ff-f72446878f82
csv2arff.py
file
2013-06-20T08:20:15.000000Z
3cd48889501c569b6bd45ba00a39be7d
2008-07-17T09:32:31.216611Z
2
bianchimro
3968
test_csv2arff
dir
# -*- coding: cp1252 -*-
import csv
import sys
from xml.dom import minidom
def get_attributes(file_xml):
out = []
dom1 = minidom.parse(file_xml)
for node in dom1.getElementsByTagName('attribute'):
out.append({
'name': node.getAttribute('name') ,
'atype': node.getAttribute('atype'),
'format':node.getAttribute('format'),
'skip':node.getAttribute('skip')
})
#print out
return out
def get_relation(file_xml):
dom1 = minidom.parse(file_xml)
out=''
delimiter=''
for node in dom1.getElementsByTagName('csv'):
out=node.getAttribute('name')
delimiter=node.getAttribute('delimiter');
if(len(delimiter)==0):
delimiter=';';
print delimiter
return out, delimiter
class csv_arff_converter:
def __init__(self,csv_file, attribute_file, file_out):
self.csv_file = csv_file
self.attribute_file = attribute_file
self.file_out = file_out
def run(self):
classes = []
#read attribute
self.relation_name, self.delimiter = get_relation(attribute_file)
attributes_list = get_attributes(attribute_file)
arff_data = '@RELATION ' + self.relation_name + '\n\n'
for i in attributes_list:
if (i['skip'] != 'yes'):
arff_data += '@ATTRIBUTE '+i['name']+' ' + i['atype']
if (i['atype']=='date'):
arff_data += ' '+i['format']
if (i['atype']=='class'):
arff_data += ' (#@#'+i['name'] + '#@#)'
arff_data +='\n'
classes.append('')
arff_data += '\n@DATA\n'
print classes
#open csv
reader = csv.reader(open(self.csv_file), delimiter=self.delimiter, quoting=csv.QUOTE_NONE)
rnum = 0
for row in reader:
#print row
buff = ''
pos = 0
#print len(row)
#occhio alla lunghezza riga
for j in range(0, len(row)-1):
field = row[j]
if(attributes_list[pos]['skip'] != 'yes'):
if (pos > 0):
buff += ','
if(attributes_list[pos]['atype'] == 'string'):
field = "'" + field + "'"
buff += field
#se una classe raccolgo i valori
if(attributes_list[pos]['atype'] == 'class'):
if (rnum > 0):
classes[pos]+= ','+ field
else:
classes[pos]+= field
pos += 1
buff += '\n'
arff_data += buff
rnum += 1
pos = 0
for a in classes:
j = a.split(',')
un = list(set(j))
#print un
if (len(un) > 0):
this_replacement = ",".join(un)
#print this_replacement
old_text = '#@#'+ attributes_list[pos]['name'] + '#@#'
#print old_text
arff_data = arff_data.replace(old_text, this_replacement)
pos += 1
#print arff_data
a = open(self.file_out, 'w')
a.write(arff_data)
a.close()
if __name__ == "__main__":
#csv_file = sys.argv[1]
#attribute_file = sys.argv[2]
csv_file = './test_csv2arff/test_dataset_1.csv'
attribute_file = './test_csv2arff/test_dataset_1.att'
instance = csv_arff_converter(csv_file, attribute_file, './test_csv2arff/output.arff')
instance.run()
# -*- coding: cp1252 -*-
import csv
import sys
from xml.dom import minidom
def get_attributes(file_xml):
out = []
dom1 = minidom.parse(file_xml)
for node in dom1.getElementsByTagName('attribute'):
out.append({
'name': node.getAttribute('name') ,
'atype': node.getAttribute('atype'),
'format':node.getAttribute('format'),
'skip':node.getAttribute('skip')
})
#print out
return out
def get_relation(file_xml):
dom1 = minidom.parse(file_xml)
out=''
delimiter=''
for node in dom1.getElementsByTagName('csv'):
out=node.getAttribute('name')
delimiter=node.getAttribute('delimiter');
if(len(delimiter)==0):
delimiter=';';
print delimiter
return out, delimiter
class csv_arff_converter:
def __init__(self,csv_file, attribute_file, file_out):
self.csv_file = csv_file
self.attribute_file = attribute_file
self.file_out = file_out
def run(self):
classes = []
#read attribute
self.relation_name, self.delimiter = get_relation(attribute_file)
attributes_list = get_attributes(attribute_file)
arff_data = '@RELATION ' + self.relation_name + '\n\n'
for i in attributes_list:
if (i['skip'] != 'yes'):
arff_data += '@ATTRIBUTE '+i['name']+' ' + i['atype']
if (i['atype']=='date'):
arff_data += ' '+i['format']
if (i['atype']=='class'):
arff_data += ' (#@#'+i['name'] + '#@#)'
arff_data +='\n'
classes.append('')
arff_data += '\n@DATA\n'
print classes
#open csv
reader = csv.reader(open(self.csv_file), delimiter=self.delimiter, quoting=csv.QUOTE_NONE)
rnum = 0
for row in reader:
#print row
buff = ''
pos = 0
#print len(row)
#occhio alla lunghezza riga
for j in range(0, len(row)-1):
field = row[j]
if(attributes_list[pos]['skip'] != 'yes'):
if (pos > 0):
buff += ','
if(attributes_list[pos]['atype'] == 'string'):
field = "'" + field + "'"
buff += field
#se una classe raccolgo i valori
if(attributes_list[pos]['atype'] == 'class'):
if (rnum > 0):
classes[pos]+= ','+ field
else:
classes[pos]+= field
pos += 1
buff += '\n'
arff_data += buff
rnum += 1
pos = 0
for a in classes:
j = a.split(',')
un = list(set(j))
#print un
if (len(un) > 0):
this_replacement = ",".join(un)
#print this_replacement
old_text = '#@#'+ attributes_list[pos]['name'] + '#@#'
#print old_text
arff_data = arff_data.replace(old_text, this_replacement)
pos += 1
#print arff_data
a = open(self.file_out, 'w')
a.write(arff_data)
a.close()
if __name__ == "__main__":
#csv_file = sys.argv[1]
#attribute_file = sys.argv[2]
csv_file = './test_csv2arff/test_dataset_1.csv'
attribute_file = './test_csv2arff/test_dataset_1.att'
instance = csv_arff_converter(csv_file, attribute_file, './test_csv2arff/output.arff')
instance.run()
K 25
svn:wc:ra_dav:version-url
V 35
/svn/!svn/ver/3/trunk/test_csv2arff
END
test_dataset_1.att
K 25
svn:wc:ra_dav:version-url
V 54
/svn/!svn/ver/3/trunk/test_csv2arff/test_dataset_1.att
END
output.arff
K 25
svn:wc:ra_dav:version-url
V 47
/svn/!svn/ver/3/trunk/test_csv2arff/output.arff
END
test_dataset_1.csv
K 25
svn:wc:ra_dav:version-url
V 54
/svn/!svn/ver/3/trunk/test_csv2arff/test_dataset_1.csv
END
10
dir
3
http://csv2arff.googlecode.com/svn/trunk/test_csv2arff
http://csv2arff.googlecode.com/svn
2008-07-17T09:36:53.100851Z
3
bianchimro
6882ac45-3452-0410-b3ff-f72446878f82
test_dataset_1.att
file
2013-06-20T08:20:15.000000Z
bbc1c0f68459e4837932dd23adc9d878
2008-07-17T09:36:53.100851Z
3
bianchimro
1717
output.arff
file
2013-06-20T08:20:15.000000Z
e13d8ccc4be16ce6f650fd61edc00d9a
2008-07-17T09:36:53.100851Z
3
bianchimro
1030624
test_dataset_1.csv
file
2013-06-20T08:20:15.000000Z
7b517897ae99c70aed31b21030a7ed3e
2008-07-17T09:36:53.100851Z
3
bianchimro
1947734
<?xml version="1.0"?>
<csv name="dataset_1" delimiter=";">
<attribute name="dataora" atype="string" skip="no"/>
<attribute name="eur_usd_aggregates_future_60_diff_massimo" atype="numeric"/>
<attribute name="eur_usd_aggregates_past_30_close_avg" atype="numeric"/>
<attribute name="eur_usd_aggregates_past_30_close_var" atype="numeric" skip="yes"/>
<attribute name="eur_usd_aggregates_past_300_close_avg" atype="numeric"/>
<attribute name="eur_usd_aggregates_past_300_close_var" atype="numeric" skip="yes"/>
<attribute name="eur_usd_aggregates_past_1200_close_avg" atype="numeric"/>
<attribute name="eur_usd_aggregates_past_1200_close_var" atype="numeric" skip="yes"/>
<attribute name="eur_gbp_aggregates_past_30_close_avg" atype="numeric"/>
<attribute name="eur_gbp_aggregates_past_30_close_var" atype="numeric" skip="yes"/>
<attribute name="eur_gbp_aggregates_past_300_close_avg" atype="numeric" />
<attribute name="eur_gbp_aggregates_past_300_close_var" atype="numeric" skip="yes"/>
<attribute name="eur_gbp_aggregates_past_1200_close_avg" atype="numeric"/>
<attribute name="eur_gbp_aggregates_past_1200_close_var" atype="numeric" skip="yes"/>
<attribute name="gbp_usd_aggregates_past_30_close_avg" atype="numeric"/>
<attribute name="gbp_usd_aggregates_past_30_close_var" atype="numeric" skip="yes"/>
<attribute name="gbp_usd_aggregates_past_300_close_avg" atype="numeric"/>
<attribute name="gbp_usd_aggregates_past_300_close_var" atype="numeric" skip="yes"/>
<attribute name="gbp_usd_aggregates_past_1200_close_avg" atype="numeric"/>
<attribute name="gbp_usd_aggregates_past_1200_close_var" atype="numeric" skip="yes"/>
</csv>
\ No newline at end of file
This diff is collapsed.
<?xml version="1.0"?>
<csv name="dataset_1" delimiter=";">
<attribute name="dataora" atype="string" skip="no"/>
<attribute name="eur_usd_aggregates_future_60_diff_massimo" atype="numeric"/>
<attribute name="eur_usd_aggregates_past_30_close_avg" atype="numeric"/>
<attribute name="eur_usd_aggregates_past_30_close_var" atype="numeric" skip="yes"/>
<attribute name="eur_usd_aggregates_past_300_close_avg" atype="numeric"/>
<attribute name="eur_usd_aggregates_past_300_close_var" atype="numeric" skip="yes"/>
<attribute name="eur_usd_aggregates_past_1200_close_avg" atype="numeric"/>
<attribute name="eur_usd_aggregates_past_1200_close_var" atype="numeric" skip="yes"/>
<attribute name="eur_gbp_aggregates_past_30_close_avg" atype="numeric"/>
<attribute name="eur_gbp_aggregates_past_30_close_var" atype="numeric" skip="yes"/>
<attribute name="eur_gbp_aggregates_past_300_close_avg" atype="numeric" />
<attribute name="eur_gbp_aggregates_past_300_close_var" atype="numeric" skip="yes"/>
<attribute name="eur_gbp_aggregates_past_1200_close_avg" atype="numeric"/>
<attribute name="eur_gbp_aggregates_past_1200_close_var" atype="numeric" skip="yes"/>
<attribute name="gbp_usd_aggregates_past_30_close_avg" atype="numeric"/>
<attribute name="gbp_usd_aggregates_past_30_close_var" atype="numeric" skip="yes"/>
<attribute name="gbp_usd_aggregates_past_300_close_avg" atype="numeric"/>
<attribute name="gbp_usd_aggregates_past_300_close_var" atype="numeric" skip="yes"/>
<attribute name="gbp_usd_aggregates_past_1200_close_avg" atype="numeric"/>
<attribute name="gbp_usd_aggregates_past_1200_close_var" atype="numeric" skip="yes"/>
</csv>
\ No newline at end of file
This diff is collapsed.
......@@ -199,6 +199,7 @@ INSTALLED_APPS_WORKFLOWS_SUB = (
'workflows.mysql',
'workflows.lemmagen',
'workflows.crossbee',
'workflows.scikitAlgorithms',
#WORKFLOWS_SUBAPP_PLACEHOLDER
)
......
......@@ -10,4 +10,10 @@ httplib2==0.7.5
pyparsing==1.5.6
pydot==1.0.28
wsgiref==0.1.2
mysql-connector-python==1.0.9
\ No newline at end of file
mysql-connector-python==1.0.9
# scikit-learn requires
# NumPy>=1.3
# SciPy >= 0.7
# development headers
# working C++ compiler
\ No newline at end of file
[
{
"pk": 20,
"model": "workflows.category",
"fields": {
"uid": "62f6171b-5448-424b-b5cc-88cd63883fd0",
"parent": null,
"workflow": null,
"user": null,
"order": 1,