'''
Created on 28 august 2014
@author: hurstel
'''
import bisect
import math
class DataAnalyzer:
"""
Abstract class.
This class is defined to be a 'skeleton' for
analyzing classes depending on the signal type to
process.
But it already provides basic common function
(such as 'compute()' that compute generic values
like 'average', 'standard deviation', etc.) for the given
sampled data
"""
class Error:
"""
Container class for
DataAnalyzer's method's errors
"""
class CallFindButNoCompute(Exception):
"""
Error that should be raised if the processing
('findCluster()' method) is called but the basic values
aren't computed yet (make a call to 'compute()').
"""
def __init__(self):
"""
Constructor - do not call directly
"""
Exception.__init__(self)
def __str__(self):
"""
str method overload - do not call directly, use str()
"""
return "Cannot look for cluster if the data information hasn't been compute yet (call \
DataAnalyzer.compute() before calling DataAnalyzer.findCluster()"
def __init__(self, data):
"""
Constructor - do not call directly
The 'data' parameter is the array of the data sampled for a
single entry. Is therefore an 'int array'.
"""
self._data= data
"""
private attribute - do not call directly
the sampled data to analyse
"""
self._compute = False
"""
private attribute - do not use
boolean to determine wheter the basics values
(average, standard deviation, etc.) already have been
computed. Naturally is initialized to False.
"""
def analyze(self):
"""
Pure virtual method - can't be called from this mother class
Use it to implement the specific analyze of the data for
child classes
--- Deprecated, do not use ---
"""
raise NotImplementedError
def compute(self):
"""
Public method
Method that compute the sampled dataset ('_data'), basic values:
- average -> 'average' public attribute
- standard deviation -> 'std_dev' public attribute
- median value -> 'median' public attribute
--- IMPORTANT---
Good use of this abstract class (and therefore when using its sub-classes)
is to call this function before any other (especially 'findCluster()')
"""
#compute the average
#and storing a sorted version of the data to later allow the median
#value computation
sortData= []
t=0
tt=0
for d in self._data:
t+=d
tt+= (d*d)
bisect.insort(sortData, d)
self.average= t/float(len(sortData))
"""
Public attribute
The average value of the data set.
"""
#compute the standard deviation
l=len(sortData)
if not(l%2): #length even
self.median= (sortData[(l/2)-1]+sortData[l/2])/2.0
"""
Public attribute
The median value of the data set.
"""
else:
self.median= sortData[l/2]
#standard deviation computation
self.std_dev= math.sqrt( (1.0/l)*tt - math.pow(self.average,2) )
"""
Public attribute
The standard deviation of the dataset.
"""
self._compute= True
def findCluster(self):
"""
Pure virtual public method - can't be called from this mother class
Use it to implement the search of relevant clusters according
to the signal type associated to the data set to process.
Should return a list of said clusters.
Each lusters having 2-tuple form: (b,e)
(with b the beginning occurrence in the dataset of the cluster, and
e the ending occurrence in the dataset of the cluster)
"""
raise NotImplementedError
def probableSignalClusterSignificantPos(self, cluster):
"""
Public method
Actually just returns the beginning occurrence of the cluster.
But should be overridden if there is a more pertinent way to
deduce the significant position from a cluster depending on the
associated signal type ...
"""
return cluster[0]