Commit c40f4c76 authored by Izidorf's avatar Izidorf

bacon

parent 9f42806f
# naive bayes
# from sklearn import datasets
# iris = datasets.load_iris()
# from sklearn.naive_bayes import GaussianNB
# gnb = GaussianNB()
# y_pred = gnb.fit(iris.data, iris.target).predict(iris.data)
# print "Number of mislabeled points : %d" % (iris.target != y_pred).sum()
#print iris
#print iris.data
# print iris.target
# this code converts data from the csv file into scikit learn dataset and returns it as a tuple
def getDataFromCSV():
from numpy import genfromtxt
my_data = genfromtxt('iris.csv', delimiter=',')
n_sample = []
n_feature = []
attributes = []
for x in my_data:
n_feature.append(x[-1])
n_sample.append(x[:-1])
dataset = (n_sample, n_feature, attributes)
dictionary={"data":dataset}
return dictionary
def naiveBayes(dictionary):
dataset = dictionary["data"]
n_sample = dataset[0]
n_feature = dataset[1]
from sklearn.naive_bayes import GaussianNB
gnb = GaussianNB()
y_pred = gnb.fit(n_sample, n_feature).predict(n_sample)
print "Expected Classification"
print n_feature
print "Predicted Classification"
print y_pred
print "Number of mislabeled points : %d" % (n_feature != y_pred).sum()
#data = getDataFromCSV()
#print data
# ********************* ************************* ***********************
# GENERATE DECISION TREE PDF
# def decisionTreeJ48():
# from sklearn.datasets import load_iris
# from sklearn import tree
# iris = load_iris()
# clf = tree.DecisionTreeClassifier()
# clf = clf.fit(iris.data, iris.target)
# from StringIO import StringIO
# out = StringIO()
# out = tree.export_graphviz(clf, out_file=out)
# print out.getvalue()
# import StringIO, pydot
# dot_data = StringIO.StringIO()
# tree.export_graphviz(clf, out_file=dot_data)
# graph = pydot.graph_from_dot_data(dot_data.getvalue())
# graph.write_pdf("iris.pdf")
# #graph_from_dot_data(out.getvalue()).write_pdf("iris.pdf")
# decisionTreeJ48()
# GENERATE DECISION TREE PNG
def decisionTreeJ48(dictionary):
dataset = dictionary["data"]
n_sample = dataset[0]
n_feature = dataset[1]
from sklearn import tree
clf = tree.DecisionTreeClassifier()
clf = clf.fit(n_sample, n_feature)
from StringIO import StringIO
out = StringIO()
out = tree.export_graphviz(clf, out_file=out)
import StringIO, pydot
from os import system
dot_data = StringIO.StringIO()
dotfile = open("decisionTreeJ48-scikit.dot", 'w')
dotfile = tree.export_graphviz(clf, out_file=dotfile)
dotfile.close()
system("dot -Tpng decisionTreeJ48-scikit.dot -o decisionTreeJ48-scikit.png")
# data = getDataFromCSV()
# decisionTreeJ48(data)
def ScikitDatasetToCSV(dictionary):
dataset = dictionary["data"]
n_sample = dataset[0]
n_feature = dataset[1]
import numpy
csv=[]
count=0
for sample in n_sample:
csv.append(numpy.append(sample,n_feature[count])) #join n_sample and n_feature array
count+=1
numpy.savetxt("foo.csv", csv, delimiter=",")
#data = getDataFromCSV()
# def ScikitDatasetToTAB(dictionary):
# dataset = dictionary["data"]
# n_sample = dataset[0]
# n_feature = dataset[1]
# import numpy
# csv=[]
# count=0
# for sample in n_sample:
# csv.append(numpy.append(sample,n_feature[count])) #join n_sample and n_feature array
# count+=1
# numpy.savetxt("foo.csv", csv, delimiter=" ")
# my_data = genfromtxt("foo.csv", delimiter=" ")
# print my_data
# ScikitDatasetToTAB(data)
def NNK(input_dict):
dataset = input_dict["data"]
n_sample = dataset[0]
n_feature = dataset[1]
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier().fit(n_sample,n_feature)
print knn
def SVC(input_dict):
dataset = input_dict["data"]
n_sample = dataset[0]
n_feature = dataset[1]
from sklearn.svm import SVC
clf = SVC().fit(n_sample,n_feature)
print clf
# SVC(data)
def nearestNeighbour():
import numpy as np
import pylab as pl
from matplotlib.colors import ListedColormap
from sklearn import datasets
from sklearn.neighbors import NearestCentroid
n_neighbors = 15
# import some data to play with
iris = datasets.load_iris()
X = iris.data[:, :2] # we only take the first two features. We could
# avoid this ugly slicing by using a two-dim dataset
y = iris.target
h = .02 # step size in the mesh
# Create color maps
cmap_light = ListedColormap(['#FFAAAA', '#AAFFAA', '#AAAAFF'])
cmap_bold = ListedColormap(['#FF0000', '#00FF00', '#0000FF'])
for shrinkage in [None, 0.1]:
# we create an instance of Neighbours Classifier and fit the data.
clf = NearestCentroid(shrink_threshold=shrinkage)
clf.fit(X, y)
y_pred = clf.predict(X)
print shrinkage, np.mean(y == y_pred)
# Plot the decision boundary. For that, we will asign a color to each
# point in the mesh [x_min, m_max]x[y_min, y_max].
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
np.arange(y_min, y_max, h))
Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
# Put the result into a color plot
Z = Z.reshape(xx.shape)
pl.figure()
pl.pcolormesh(xx, yy, Z, cmap=cmap_light)
# Plot also the training points
pl.scatter(X[:, 0], X[:, 1], c=y, cmap=cmap_bold)
pl.title("3-Class classification (shrink_threshold=%r)"
% shrinkage)
pl.axis('tight')
# pl.show()
#nearestNeighbour()
#
# REGRESSION EXAMPLES
#
#L2-regularized least squares linear model
def RidgeRegression(input_dict):
# from sklearn.datasets import load_iris
# from sklearn import tree
# iris = load_iris()
# clf = tree.DecisionTreeClassifier()
# clf = clf.fit(iris.data, iris.target)
from sklearn.datasets import load_diabetes
diabetes = load_diabetes()
n_sample = diabetes.data
n_feature = diabetes.target
print "*******SAMPLES********"
print n_sample
print "******FEARTURES*******"
print n_feature
from sklearn.linear_model import Ridge
rgs = Ridge().fit(n_sample, n_feature)
print rgs
print rgs.predict(n_sample)
# L1+L2-regularized least squares linear model trained using Coordinate Descent
def ElasticNetRegression(input_dict):
# from sklearn.datasets import load_iris
# from sklearn import tree
# iris = load_iris()
# clf = tree.DecisionTreeClassifier()
# clf = clf.fit(iris.data, iris.target)
from sklearn.datasets import load_diabetes
dta = load_diabetes()
n_sample = dta.data
n_feature = dta.target
print "*******SAMPLES********"
print n_sample
print "******FEARTURES*******"
print n_feature
from sklearn.linear_model import ElasticNet
rgs = ElasticNet().fit(n_sample, n_feature)
print rgs
print rgs.predict(n_sample)
# ElasticNetRegression(data)
def ClusteringKMeans():
from sklearn import cluster, datasets
iris = datasets.load_iris()
X_iris = iris.data
y_iris = iris.target
k_means = cluster.KMeans(k=3) #number of clusters
k_means.fit(X_iris)
print k_means.labels_
print y_iris
#ClusteringKMeans()
def ClusteringMS():
from sklearn import cluster, datasets
iris = datasets.load_iris()
X_iris = iris.data
y_iris = iris.target
k_means = cluster.SpectralClustering(3)
k_means.fit(X_iris)
print k_means.labels_
print y_iris
#ClusteringMS()
def test():
print(__doc__)
from sklearn import datasets, neighbors, linear_model
digits = datasets.load_digits()
X_digits = digits.data
y_digits = digits.target
n_samples = len(X_digits)
X_train = X_digits[:.9 * n_samples]
y_train = y_digits[:.9 * n_samples]
X_test = X_digits[.9 * n_samples:]
y_test = y_digits[.9 * n_samples:]
knn = neighbors.KNeighborsClassifier()
logistic = linear_model.LogisticRegression()
print('KNN score: %f' % knn.fit(X_train, y_train).score(X_test, y_test))
print('LogisticRegression score: %f'
% logistic.fit(X_train, y_train).score(X_test, y_test))
#test()
def scikitAlgorithms_UCIDataset(var):
from sklearn import datasets
allDSets = {"iris":datasets.load_iris(), "boston":datasets.load_diabetes(), "diabetes":datasets.load_boston()}
dataset = allDSets[var]
output_dict = dataset
return output_dict
#iris = scikitAlgorithms_UCIDataset("iris")
#print iris
def retrunRightType(value):
try:
a= float(value) if '.' in value else int(value) #return int or float
except ValueError:
a= value #return string
print "input par", a
print "is type of", type(a)
retrunRightType("ahas")
#print "input par", a
#print "is type of", type(a)
import re
# def scikitAlgorithms_create_integers(input_dict):
# intStr = input_dict['intStr']
# intList = []
# for i in re.findall(r'\w+', intStr):
# try:
# intList.append(int(i))
# except:
# pass
# if input_dict['sort'].lower() == "true":
# intList.sort()
# return {'intList':intList}
# def scikitAlgorithms_sum_integers(input_dict):
# intList = input_dict['intList']
# return {'sum':sum(intList)}
# def scikitAlgorithms_pre_filter_integers(input_dict):
# return input_dict
# def scikitAlgorithms_post_filter_integers(postdata,input_dict,output_dict):
# intListOut = postdata['intListOut']
# intList = []
# for i in intListOut:
# try:
# intList.append(int(i))
# except:
# pass
# return {'intList': intList}
# def scikitAlgorithms_pre_display_summation(input_dict):
# return {}
# def scikitAlgorithms_sumTwoIntegers(input_dict):
# output_dict = {}
# output_dict['sum'] = int(input_dict['int1']) + int(input_dict['int2'])
# return output_dict
# def scikitAlgorithms_myFirstAction(input_dict):
# output_dict={}
# output_dict['out1'] = input_dict['inp1']
# return output_dict
#
# CLASSIFICATION ALGORITHMS
......@@ -56,35 +14,40 @@ def scikitAlgorithms_naiveBayes(input_dict):
def scikitAlgorithms_J48(input_dict):
from sklearn import tree
clf = tree.DecisionTreeClassifier()
#parse input and determin its type
try:
featureValue= float(input_dict["featureIn"]) if '.' in input_dict["featureIn"] else int(input_dict["featureIn"]) #return int or float
except ValueError:
featureValue= input_dict["featureIn"] #return string
clf = tree.DecisionTreeClassifier(max_features=featureValue, max_depth=int(input_dict["depthIn"]))
output_dict={}
output_dict['treeOut'] = clf
return output_dict
def scikitAlgorithms_linearSVC(input_dict):
from sklearn.svm import LinearSVC
clf = LinearSVC()
clf = LinearSVC(C=float(input_dict["penaltyIn"]),loss=input_dict["lossIn"],penalty=input_dict["normIn"], multi_class=input_dict["classIn"])
output_dict={}
output_dict['SVCout'] = clf
return output_dict
def scikitAlgorithms_SVC(input_dict):
from sklearn.svm import SVC
clf = SVC()
clf = SVC(C=float(input_dict["penaltyIn"]), kernel=str(input_dict["kernelIn"]), degree=int(input_dict["degIn"]))
output_dict={}
output_dict['SVCout'] = clf
return output_dict
def scikitAlgorithms_kNearestNeighbors(input_dict):
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier()
knn = KNeighborsClassifier(n_neighbors=int(input_dict['numNeib']), weights=input_dict['wgIn'], algorithm=input_dict['algIn'])
output_dict={}
output_dict['KNNout'] = knn
return output_dict
def scikitAlgorithms_logiscticRegression(input_dict):
from sklearn.linear_model import LogisticRegression
clf = LogisticRegression()
clf = LogisticRegression(penalty=str(input_dict["penIn"]), C=float(input_dict["cIn"]))
output_dict={}
output_dict['LRout'] = clf
return output_dict
......@@ -108,8 +71,8 @@ def scikitAlgorithms_ElasticNet(input_dict):
return output_dict
def scikitAlgorithms_LassoLARS(input_dict):
from sklearn.linear_model import LassoLARS
clf = LassoLARS()
from sklearn.linear_model import LassoLars
clf = LassoLars(alpha=float(input_dict["authIn"]))
output_dict={}
output_dict['out'] = clf
return output_dict
......@@ -123,13 +86,13 @@ def scikitAlgorithms_SGDRegressor(input_dict):
def scikitAlgorithms_ARDRegression(input_dict):
from sklearn.linear_model import ARDRegression
clf = ARDRegression()
clf = ARDRegression(n_iter=int(input_dict["iterIn"]))
output_dict={}
output_dict['out'] = clf
return output_dict
def scikitAlgorithms_SVR(input_dict):
from sklearn.linear_model import SVR
from sklearn.svm import SVR
clf = SVR()
output_dict={}
output_dict['out'] = clf
......@@ -153,10 +116,10 @@ def scikitAlgorithms_KMeans(input_dict):
def scikitAlgorithms_UCIDataset(input_dict):
from sklearn import datasets
allDSets = {"iris":datasets.load_iris(), "boston":datasets.load_diabetes(), "diabetes":datasets.load_boston(), " linnerud":datasets.load_linnerud()}
allDSets = {"iris":datasets.load_iris(), "boston":datasets.load_boston(), "diabetes":datasets.load_diabetes(), " linnerud":datasets.load_linnerud()}
dataset = allDSets[input_dict['dsIn']]
output_dict = {}
output_dict['dtsOut'] = (dataset.data, dataset.target)
output_dict['dtsOut'] = dataset#(dataset.data, dataset.target)
return output_dict
def scikitAlgorithms_CSVtoNumpy(input_dict):
......@@ -187,8 +150,8 @@ def scikitAlgorithms_SVMtoScikitDataset(input_dict):
def scikitAlgorithms_buildClassifier(input_dict):
learner = input_dict['learner']
data = input_dict['instances']
n_sample = data[0]
n_feature = data[1]
n_sample = data["data"]
n_feature = data["target"]
classifier = learner.fit(n_sample, n_feature) #.predict(n_sample)
......@@ -199,11 +162,11 @@ def scikitAlgorithms_applyClassifier(input_dict):
classifier = input_dict['classifier']
data = input_dict['data']
data["target"] = classifier.predict(data["data"])
y_pred = (data[0], classifier.predict(data[0]))
new_data = y_pred #"Number of mislabeled points : %d" % (data[0] != y_pred).sum()
output_dict = {'classes':new_data}
new_data = (data["data"], classifier.predict(data["data"]))
output_dict = {'classes':data}
return output_dict
def scikitAlgorithms_scikitDatasetToCSV(input_dict):
......
......@@ -68,9 +68,10 @@ def scikitAlgorithms_displayDS(request,input_dict,output_dict,widget):
def helperDisplayDS(data):
#get data to fill table
data = data['data']
n_sample = data[0]
n_feature = data[1]
info = data['data']
n_sample = info["data"]
n_feature = info["target"]
# join data in the right format
import numpy
csv=[]
......@@ -86,6 +87,31 @@ def helperDisplayDS(data):
return {'attrs':attrs, 'metas':metas, 'data_new':data_new, 'class_var':class_var}
# def helperDisplayDS(data):
# #get data to fill table
# info = data['data']
# n_sample = info["data"]
# n_feature = info["target"]
# attrs = info["feature_names"]
# # join data in the right format
# import numpy
# csv=[]
# count=0
# for sample in n_sample:
# csv.append(numpy.append(sample,n_feature[count])) #join n_sample and n_feature array
# count+=1
# # attrs = ["attribute" for i in range(len(n_sample[0]))] #name of attributes
# class_var = ["class" for i in range(len([n_sample[0]]))]
# metas = ''
# data_new = csv #fill table with data
#
# return {'attrs':attrs, 'metas':metas, 'data_new':data_new, 'class_var':class_var}
def orng_table_to_dict(data):
import Orange
attrs, metas, data_new = [], [], []
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment