DataAnalyzer.py 4.58 KB
Newer Older
Hurstel's avatar
Hurstel committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141
'''
Created on 28 august 2014

@author: hurstel
'''

import bisect
import math

class DataAnalyzer:
    """
    Abstract class.
    This class is defined to be a 'skeleton' for
    analyzing classes depending on the signal type to
    process.
    But it already provides basic common function
    (such as 'compute()' that compute generic values
    like 'average', 'standard deviation', etc.) for the  given
    sampled data 
    """
    class Error:
        """
        Container class for
        DataAnalyzer's method's errors
        """
        class CallFindButNoCompute(Exception):
            """
            Error that should be raised if the processing
            ('findCluster()' method) is called but the basic values
            aren't computed yet (make a call to 'compute()').
            """
            def __init__(self):
                """
                Constructor - do not call directly
                """
                Exception.__init__(self)
                
            def __str__(self):
                """
                str method overload - do not call directly, use str()
                """
                return "Cannot look for cluster if the data information hasn't been compute yet (call \
                DataAnalyzer.compute() before calling DataAnalyzer.findCluster()"
                
                
    def __init__(self, data):
        """
        Constructor - do not call directly
        The 'data' parameter is the array of the data sampled for a
        single entry. Is therefore an 'int array'.
        """
        self._data= data
        """
        private attribute - do not call directly
        the sampled data to analyse
        """
        self._compute = False
        """
        private attribute - do not use
        boolean to determine wheter the basics values
        (average, standard deviation, etc.) already have been
        computed. Naturally is initialized to False.
        """
        
    def analyze(self):
        """
        Pure virtual method - can't be called from this mother class
        Use it to implement the specific analyze of the data for
        child classes
        --- Deprecated, do not use ---
        """
        raise NotImplementedError
    
    def compute(self):
        """
        Public method
        Method that compute the sampled dataset ('_data'), basic values:
        - average -> 'average' public attribute
        - standard deviation -> 'std_dev' public attribute
        - median value -> 'median' public attribute
        --- IMPORTANT---
        Good use of this abstract class (and therefore when using its sub-classes)
        is to call this function before any other (especially 'findCluster()') 
        """
        #compute the average
        #and storing a sorted version of the data to later allow the median
        #value computation
        sortData= []
        t=0
        tt=0
        for d in self._data:
            t+=d
            tt+= (d*d)
            bisect.insort(sortData, d)
        self.average= t/float(len(sortData))
        """
        Public attribute
        The average value of the data set.
        """
        
        #compute the standard deviation
        l=len(sortData)
        if not(l%2): #length even
            self.median= (sortData[(l/2)-1]+sortData[l/2])/2.0
            """
            Public attribute
            The median value of the data set.
            """
        else:
            self.median= sortData[l/2]
            
        #standard deviation computation
        self.std_dev= math.sqrt( (1.0/l)*tt - math.pow(self.average,2) )
        """
        Public attribute
        The standard deviation of the dataset.
        """
        
        self._compute= True
        
    def findCluster(self):        
        """
        Pure virtual public method - can't be called from this mother class
        Use it to implement the search of relevant clusters according
        to the signal type associated to the data set to process.
        Should return a list of said clusters.
        Each lusters having 2-tuple form: (b,e)
        (with b the beginning occurrence in the dataset of the cluster, and
        e the ending occurrence in the dataset of the cluster)
        """
        raise NotImplementedError
    
    def probableSignalClusterSignificantPos(self, cluster):
        """
        Public method
        Actually just returns the beginning occurrence of the cluster.
        But should be overridden if there is a more pertinent way to
        deduce the significant position from a cluster depending on the
        associated signal type ...
        """
        return cluster[0]