library.py 12.4 KB
Newer Older
1
# -*- coding: utf-8 -*-
Janez K's avatar
Janez K committed
2 3 4 5 6 7
'''
Streaming widgets librarby

@author: Janez Kranjc <janez.kranjc@ijs.si>
'''

Janez K's avatar
Janez K committed
8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135
from workflows.security import safeOpen

def streaming_add_neutral_zone(input_dict):
    import copy
    tweets = copy.deepcopy(input_dict['ltw'])
    neutral_zone = float(input_dict['zone'])

    ltw = []

    for tweet in tweets:
        if tweet['reliability']!=-1.0 and tweet['reliability']<neutral_zone:
            tweet['sentiment']="Neutral"
        ltw.append(tweet)

    output_dict = {}

    output_dict['ltw']=ltw
    return output_dict

def streaming_remove_words_from_tweets(input_dict):
    import copy
    tweets = copy.deepcopy(input_dict['ltw'])
    words = input_dict['words'].encode("utf-8")
    words = words.split("\n")

    ltw = []
    import re

    for tweet in tweets:
        for word in words:
            pattern = re.compile(word, re.IGNORECASE)
            tweet['text']=pattern.sub('',tweet['text'])
        ltw.append(tweet)

    output_dict = {}

    output_dict['ltw']=ltw
    return output_dict

def streaming_simulate_stream_from_text_file(input_dict,widget,stream=None):
    import datetime
    csvfile = safeOpen(input_dict['file'])
    tweet_data = csvfile.read()
    tweet_data = tweet_data.strip()
    tweets = tweet_data.split("\n")
    ltw = []
    i=1
    for tw in tweets:
        tweet = {}
        tweet['id']=i
        tweet['created_at']=datetime.datetime.now()
        tweet['text']=tw
        tweet['user']="dragi"
        tweet['lang']="bg"
        i=i+1
        ltw.append(tweet)
    output_dict = {}
    output_dict['ltw']=ltw
    return output_dict

def streaming_simulate_stream_from_csv(input_dict,widget,stream=None):
    from streams.models import StreamWidgetData
    import datetime
    import csv
    csvfile = safeOpen(input_dict['csv'])
    csvreader = csv.reader(csvfile,delimiter=";",quotechar='"')
    rows = []
    ltw = []
    i=0
    counter = 0
    started = False
    last_id = "not-started-yet"
    if not stream is None:
        try:
            swd = StreamWidgetData.objects.get(stream=stream,widget=widget)
            last_id = swd.value
        except:
            started = True
    else:
        started = True
    for row in csvreader:
        rows.append(row)
        if i!=0:
            rows[i][1] = datetime.datetime.strptime(rows[i][1],"%m/%d/%Y %I:%M:%S %p")
            tweet = {}
            tweet['id'] = rows[i][0]
            tweet['created_at'] = rows[i][1]
            tweet['text'] = rows[i][3].encode('utf-8')
            tweet['user'] = rows[i][5].encode('utf-8')
            tweet['lang'] = rows[i][11]
            if started:
                counter = counter + 1
                ltw.append(tweet)
            if counter == 50 and started:
                started = False
                if not stream is None:
                    try:
                        swd = StreamWidgetData.objects.get(stream=stream,widget=widget)
                        swd.value = tweet['id']
                        swd.save()
                    except:
                        swd = StreamWidgetData()
                        swd.stream = stream
                        swd.widget = widget
                        data = tweet['id']
                        swd.value = data
                        swd.save()
            if tweet['id']==last_id:
                started = True
        i = i + 1
    if counter < 51 and not stream is None and started == True:
        try:
            swd = StreamWidgetData.objects.get(stream=stream,widget=widget)
            swd.value = "done"
            swd.save()
        except:
            swd = StreamWidgetData()
            swd.stream = stream
            swd.widget = widget
            data = "done"
            swd.value = data
            swd.save()
    output_dict = {}
    #print ltw
    #print len(ltw)
    output_dict['ltw']=ltw
    return output_dict

136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173
def streaming_split_pos_neg(input_dict):

    tweets = input_dict['ltw']

    positive_tweets = []
    negative_tweets = []

    for tweet in tweets:
        try:
            if tweet['sentiment']=="Positive":
                positive_tweets.append(tweet)
            if tweet['sentiment']=="Negative" and tweet['reliability']!=-1.0:
                negative_tweets.append(tweet)
        except:
            pass

    output_dict = {}

    output_dict['ptw']=positive_tweets
    output_dict['ntw']=negative_tweets
    return output_dict

def streaming_filter_tweets_by_language(input_dict):

    language = input_dict['lang']

    tweets = input_dict['ltw']

    new_tweets = []

    for tweet in tweets:
        if tweet['lang']==language:
            new_tweets.append(tweet)

    output_dict = {}
    output_dict['ltw']=new_tweets
    return output_dict

Janez K's avatar
Janez K committed
174
def streaming_display_tweets(input_dict,widget,stream=None):
Janez K's avatar
Janez K committed
175 176 177 178 179 180
    from streams.models import StreamWidgetData
    if stream is None:
        return {}
    else:
        try:
            swd = StreamWidgetData.objects.get(stream=stream,widget=widget)
Janez K's avatar
Janez K committed
181 182 183
            swd.value = input_dict['ltw']
            swd.save()
        except:
Janez K's avatar
Janez K committed
184 185 186
            swd = StreamWidgetData()
            swd.stream = stream
            swd.widget = widget
Janez K's avatar
Janez K committed
187
            data = input_dict['ltw']
Janez K's avatar
Janez K committed
188 189
            swd.value = data
            swd.save()
Janez K's avatar
Janez K committed
190 191 192 193 194 195 196 197 198 199 200
        return {}

def streaming_collect_and_display_tweets(input_dict,widget,stream=None):
    from streams.models import StreamWidgetData
    if stream is None:
        return {}
    else:
        new_tweets = []
        for tweet in input_dict['ltw']:
            new_tweets.append(StreamWidgetData(stream=stream,widget=widget,value=tweet))
        StreamWidgetData.objects.bulk_create(new_tweets)
Janez K's avatar
Janez K committed
201 202
        return {}

Janez K's avatar
Janez K committed
203 204 205 206 207
def streaming_sentiment_graph(input_dict,widget,stream=None):
    from streams.models import StreamWidgetData
    if stream is None:
        return {}
    else:
Janez K's avatar
Janez K committed
208 209 210 211
        new_tweets = []
        for tweet in input_dict['ltw']:
            new_tweets.append(StreamWidgetData(stream=stream,widget=widget,value=tweet))
        StreamWidgetData.objects.bulk_create(new_tweets)
Janez K's avatar
Janez K committed
212 213
        return {}

214 215 216
def streaming_tweet_sentiment_service(input_dict,widget,stream=None):
    import pickle
    from pysimplesoap.client import SoapClient, SoapFault
Janez K's avatar
Janez K committed
217
    import pysimplesoap
218 219

    client = SoapClient(location = "http://batman.ijs.si:8008/",action = 'http://batman.ijs.si:8008/',namespace = "http://example.com/tweetsentiment.wsdl",soap_ns='soap',trace = False,ns = False)
Janez K's avatar
Janez K committed
220
    pysimplesoap.client.TIMEOUT = 60
221 222 223 224 225 226 227 228 229 230 231 232

    list_of_tweets = input_dict['ltw']

    new_list_of_tweets = []

    for tweet in list_of_tweets:
        new_list_of_tweets.append({'id':tweet['id'],'text':tweet['text'],'language':tweet['lang']})

    pickled_list_of_tweets = pickle.dumps(new_list_of_tweets)

    response = client.TweetSentimentService(tweets=pickled_list_of_tweets)

Janez K's avatar
Janez K committed
233
    new_ltw = pickle.loads(str(response.TweetSentimentResult))
234 235 236 237 238 239 240 241

    i=0
    for new_tweet in new_ltw:
        list_of_tweets[i]['sentiment']=new_tweet['sentiment']
        list_of_tweets[i]['lang']=new_tweet['language']
        list_of_tweets[i]['reliability']=new_tweet['reliability']
        i = i + 1

Janez K's avatar
Janez K committed
242
    output_dict = {}
243 244 245

    output_dict['ltw'] = list_of_tweets

Janez K's avatar
Janez K committed
246 247
    return output_dict

248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324
def streaming_twitter(input_dict,widget,stream=None):
    import tweepy
    from streams.models import StreamWidgetData
    from streams.models import HaltStream

    if input_dict['cfauth']=="true":
        consumer_key="zmK41mqxU3ZNJTFQpYwTdg"
        consumer_secret="9StnKNAe20ebDOREQjsVjAjBEiz5R9feZJTGUYWqLo"
        access_token="45210078-VydgdJMwhWYjZRvlNbrKj6jfqicUIsdMnRbnaPElL"
        access_token_secret="uLvIN3MMxFSxdK4M8P5RYojjUkbc2reqNydYtpT7Ks"
    else:
        consumer_key = input_dict['ck']
        consumer_secret = input_dict['cs']
        access_token = input_dict['at']
        access_token_secret = input_dict['as']

    auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
    auth.set_access_token(access_token, access_token_secret)

    api = tweepy.API(auth)

    query = input_dict['query']

    rate_limit_status = api.rate_limit_status()

    if rate_limit_status['resources']['search']['/search/tweets']['remaining']>0:

        if stream is None:
            try:
                ltw = api.new_search(q=input_dict['query'],geocode=input_dict['geocode'],count=100)
            except Exception as e:
                raise HaltStream("The Twitter API returned an error: "+str(e))
        else:
            try:
                swd = StreamWidgetData.objects.get(stream=stream,widget=widget)
                data = swd.value
            except Exception as e:
                swd = StreamWidgetData()
                swd.stream = stream
                swd.widget = widget
                data = {}
                swd.value = data
                swd.save()
            if data.has_key(query):
                since_id = data[query]
                try:
                    ltw = api.new_search(q=input_dict['query'],geocode=input_dict['geocode'],count=100,since_id=since_id)
                except Exception as e:
                    raise HaltStream("The Twitter API returned an error: "+str(e))
            else:
                try:
                    ltw = api.new_search(q=input_dict['query'],geocode=input_dict['geocode'],count=100)
                except Exception as e:
                    raise HaltStream("The Twitter API returned an error: "+str(e))
            if len(ltw)>0:
                data[query]=ltw[0].id
                swd.value = data
                swd.save()
    else:
        import datetime
        import time
        current_time = int(time.mktime(datetime.datetime.now().timetuple()))
        remaining = rate_limit_status['resources']['search']['/search/tweets']['reset']-current_time
        if input_dict['cfauth']=="true":
            raise HaltStream("The twitter API limit has been reached. Try again in "+str(remaining)+" seconds. Try using your own credentials.")
        else:
            raise HaltStream("The twitter API limit has been reached. Try again in "+str(remaining)+" seconds.")

    output_dict = {}

    tweets = []

    for tw in ltw:
        tweet = {}
        tweet['id'] = tw.id
        tweet['created_at'] = tw.created_at
        tweet['text'] = unicode(tw.text).encode("utf-8")
Janez K's avatar
Janez K committed
325 326 327 328
        try:
            tweet['user'] = tw.user['screen_name']
        except:
            tweet['user'] = ""
329 330 331 332 333 334 335 336 337
        tweet['lang'] = tw.lang
        tweets.append(tweet)

    if len(tweets)>0 or stream is None:
        output_dict['ltw']=tweets
        return output_dict
    else:
        raise HaltStream("No new results, halting stream.")

Janez K's avatar
Janez K committed
338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358
def streaming_rss_reader(input_dict,widget,stream=None):
    import feedparser
    from streams.models import StreamWidgetData
    feed = feedparser.parse(input_dict['url'])
    output_dict = {}
    if stream is None:
        output_dict['url'] = feed['items'][0]['link']
    else:
        try:
            swd = StreamWidgetData.objects.get(stream=stream,widget=widget)
            data = swd.value
        except:
            swd = StreamWidgetData()
            swd.stream = stream
            swd.widget = widget
            data = []
            swd.value = data
            swd.save()
        feed_length = len(feed['items'])
        feed['items'].reverse()
        for item in feed['items']:
Janez K's avatar
Janez K committed
359 360
            if item['link'] not in data:
                data.append(item['link'])
Janez K's avatar
Janez K committed
361 362 363 364 365
                swd.value = data
                swd.save()
                output_dict['url'] = item['link']
                break
        else:
Janez K's avatar
Janez K committed
366 367 368
            from streams.models import HaltStream
            raise HaltStream("Halting stream.")
    return output_dict
Janez K's avatar
Janez K committed
369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397


def streaming_sliding_window(input_dict,widget,stream=None):
    from streams.models import StreamWidgetData
    output_dict = {}
    if stream is None:
        output_dict['list']=input_dict['list'][:int(input_dict['size'])]
    else:
        try:
            swd = StreamWidgetData.objects.get(stream=stream,widget=widget)
            data = swd.value
        except:
            swd = StreamWidgetData()
            swd.stream = stream
            swd.widget = widget
            data = []
            swd.value = data
            swd.save()
        size = int(input_dict['size'])
        if len(input_dict['list'])>=size:
            output_dict['list']=input_dict['list'][:size]
            swd.value=output_dict['list']
            swd.save()
        else:
            current_window = input_dict['list'] + swd.value
            swd.value = current_window[:size]
            output_dict['list']=current_window[:size]
            swd.save()
    return output_dict