Commit 962be4d8 authored by Janez K's avatar Janez K

twitter feed and twitter sentiment analysis

parent 30709c05
......@@ -11,4 +11,3 @@ pyparsing==1.5.6
pydot==1.0.28
wsgiref==0.1.2
feedparser==5.1.2
tweepy==2.0
# Tweepy
# Copyright 2009-2010 Joshua Roesslein
# See LICENSE for details.
"""
Tweepy Twitter API library
"""
__version__ = '2.0'
__author__ = 'Joshua Roesslein'
__license__ = 'MIT'
from tweepy.models import Status, User, DirectMessage, Friendship, SavedSearch, SearchResult, ModelFactory, Category
from tweepy.error import TweepError
from tweepy.api import API
from tweepy.cache import Cache, MemoryCache, FileCache
from tweepy.auth import BasicAuthHandler, OAuthHandler
from tweepy.streaming import Stream, StreamListener
from tweepy.cursor import Cursor
# Global, unauthenticated instance of API
api = API()
def debug(enable=True, level=1):
import httplib
httplib.HTTPConnection.debuglevel = level
This diff is collapsed.
# Tweepy
# Copyright 2009-2010 Joshua Roesslein
# See LICENSE for details.
from urllib2 import Request, urlopen
import base64
from tweepy import oauth
from tweepy.error import TweepError
from tweepy.api import API
class AuthHandler(object):
def apply_auth(self, url, method, headers, parameters):
"""Apply authentication headers to request"""
raise NotImplementedError
def get_username(self):
"""Return the username of the authenticated user"""
raise NotImplementedError
class BasicAuthHandler(AuthHandler):
def __init__(self, username, password):
self.username = username
self._b64up = base64.b64encode('%s:%s' % (username, password))
def apply_auth(self, url, method, headers, parameters):
headers['Authorization'] = 'Basic %s' % self._b64up
def get_username(self):
return self.username
class OAuthHandler(AuthHandler):
"""OAuth authentication handler"""
OAUTH_HOST = 'api.twitter.com'
OAUTH_ROOT = '/oauth/'
def __init__(self, consumer_key, consumer_secret, callback=None, secure=False):
self._consumer = oauth.OAuthConsumer(consumer_key, consumer_secret)
self._sigmethod = oauth.OAuthSignatureMethod_HMAC_SHA1()
self.request_token = None
self.access_token = None
self.callback = callback
self.username = None
self.secure = secure
def _get_oauth_url(self, endpoint, secure=False):
if self.secure or secure:
prefix = 'https://'
else:
prefix = 'http://'
return prefix + self.OAUTH_HOST + self.OAUTH_ROOT + endpoint
def apply_auth(self, url, method, headers, parameters):
request = oauth.OAuthRequest.from_consumer_and_token(
self._consumer, http_url=url, http_method=method,
token=self.access_token, parameters=parameters
)
request.sign_request(self._sigmethod, self._consumer, self.access_token)
headers.update(request.to_header())
def _get_request_token(self):
try:
url = self._get_oauth_url('request_token')
request = oauth.OAuthRequest.from_consumer_and_token(
self._consumer, http_url=url, callback=self.callback
)
request.sign_request(self._sigmethod, self._consumer, None)
resp = urlopen(Request(url, headers=request.to_header()))
return oauth.OAuthToken.from_string(resp.read())
except Exception, e:
raise TweepError(e)
def set_request_token(self, key, secret):
self.request_token = oauth.OAuthToken(key, secret)
def set_access_token(self, key, secret):
self.access_token = oauth.OAuthToken(key, secret)
def get_authorization_url(self, signin_with_twitter=False):
"""Get the authorization URL to redirect the user"""
try:
# get the request token
self.request_token = self._get_request_token()
# build auth request and return as url
if signin_with_twitter:
url = self._get_oauth_url('authenticate')
else:
url = self._get_oauth_url('authorize')
request = oauth.OAuthRequest.from_token_and_callback(
token=self.request_token, http_url=url
)
return request.to_url()
except Exception, e:
raise TweepError(e)
def get_access_token(self, verifier=None):
"""
After user has authorized the request token, get access token
with user supplied verifier.
"""
try:
url = self._get_oauth_url('access_token')
# build request
request = oauth.OAuthRequest.from_consumer_and_token(
self._consumer,
token=self.request_token, http_url=url,
verifier=str(verifier)
)
request.sign_request(self._sigmethod, self._consumer, self.request_token)
# send request
resp = urlopen(Request(url, headers=request.to_header()))
self.access_token = oauth.OAuthToken.from_string(resp.read())
return self.access_token
except Exception, e:
raise TweepError(e)
def get_xauth_access_token(self, username, password):
"""
Get an access token from an username and password combination.
In order to get this working you need to create an app at
http://twitter.com/apps, after that send a mail to api@twitter.com
and request activation of xAuth for it.
"""
try:
url = self._get_oauth_url('access_token', secure=True) # must use HTTPS
request = oauth.OAuthRequest.from_consumer_and_token(
oauth_consumer=self._consumer,
http_method='POST', http_url=url,
parameters = {
'x_auth_mode': 'client_auth',
'x_auth_username': username,
'x_auth_password': password
}
)
request.sign_request(self._sigmethod, self._consumer, None)
resp = urlopen(Request(url, data=request.to_postdata()))
self.access_token = oauth.OAuthToken.from_string(resp.read())
return self.access_token
except Exception, e:
raise TweepError(e)
def get_username(self):
if self.username is None:
api = API(self)
user = api.verify_credentials()
if user:
self.username = user.screen_name
else:
raise TweepError("Unable to get username, invalid oauth token!")
return self.username
# Tweepy
# Copyright 2009-2010 Joshua Roesslein
# See LICENSE for details.
import httplib
import urllib
import time
import re
from tweepy.error import TweepError
from tweepy.utils import convert_to_utf8_str
from tweepy.models import Model
re_path_template = re.compile('{\w+}')
def bind_api(**config):
class APIMethod(object):
path = config['path']
payload_type = config.get('payload_type', None)
payload_list = config.get('payload_list', False)
allowed_param = config.get('allowed_param', [])
method = config.get('method', 'GET')
require_auth = config.get('require_auth', False)
search_api = config.get('search_api', False)
use_cache = config.get('use_cache', True)
def __init__(self, api, args, kargs):
# If authentication is required and no credentials
# are provided, throw an error.
if self.require_auth and not api.auth:
raise TweepError('Authentication required!')
self.api = api
self.post_data = kargs.pop('post_data', None)
self.retry_count = kargs.pop('retry_count', api.retry_count)
self.retry_delay = kargs.pop('retry_delay', api.retry_delay)
self.retry_errors = kargs.pop('retry_errors', api.retry_errors)
self.headers = kargs.pop('headers', {})
self.build_parameters(args, kargs)
# Pick correct URL root to use
if self.search_api:
self.api_root = api.search_root
else:
self.api_root = api.api_root
# Perform any path variable substitution
self.build_path()
if api.secure:
self.scheme = 'https://'
else:
self.scheme = 'http://'
if self.search_api:
self.host = api.search_host
else:
self.host = api.host
# Manually set Host header to fix an issue in python 2.5
# or older where Host is set including the 443 port.
# This causes Twitter to issue 301 redirect.
# See Issue https://github.com/tweepy/tweepy/issues/12
self.headers['Host'] = self.host
def build_parameters(self, args, kargs):
self.parameters = {}
for idx, arg in enumerate(args):
if arg is None:
continue
try:
self.parameters[self.allowed_param[idx]] = convert_to_utf8_str(arg)
except IndexError:
raise TweepError('Too many parameters supplied!')
for k, arg in kargs.items():
if arg is None:
continue
if k in self.parameters:
raise TweepError('Multiple values for parameter %s supplied!' % k)
self.parameters[k] = convert_to_utf8_str(arg)
def build_path(self):
for variable in re_path_template.findall(self.path):
name = variable.strip('{}')
if name == 'user' and 'user' not in self.parameters and self.api.auth:
# No 'user' parameter provided, fetch it from Auth instead.
value = self.api.auth.get_username()
else:
try:
value = urllib.quote(self.parameters[name])
except KeyError:
raise TweepError('No parameter value found for path variable: %s' % name)
del self.parameters[name]
self.path = self.path.replace(variable, value)
def execute(self):
# Build the request URL
url = self.api_root + self.path
if len(self.parameters):
url = '%s?%s' % (url, urllib.urlencode(self.parameters))
# Query the cache if one is available
# and this request uses a GET method.
if self.use_cache and self.api.cache and self.method == 'GET':
cache_result = self.api.cache.get(url)
# if cache result found and not expired, return it
if cache_result:
# must restore api reference
if isinstance(cache_result, list):
for result in cache_result:
if isinstance(result, Model):
result._api = self.api
else:
if isinstance(cache_result, Model):
cache_result._api = self.api
return cache_result
# Continue attempting request until successful
# or maximum number of retries is reached.
retries_performed = 0
while retries_performed < self.retry_count + 1:
# Open connection
# FIXME: add timeout
if self.api.secure:
conn = httplib.HTTPSConnection(self.host)
else:
conn = httplib.HTTPConnection(self.host)
# Apply authentication
if self.api.auth:
self.api.auth.apply_auth(
self.scheme + self.host + url,
self.method, self.headers, self.parameters
)
# Execute request
try:
conn.request(self.method, url, headers=self.headers, body=self.post_data)
resp = conn.getresponse()
except Exception, e:
raise TweepError('Failed to send request: %s' % e)
# Exit request loop if non-retry error code
if self.retry_errors:
if resp.status not in self.retry_errors: break
else:
if resp.status == 200: break
# Sleep before retrying request again
time.sleep(self.retry_delay)
retries_performed += 1
# If an error was returned, throw an exception
self.api.last_response = resp
if resp.status != 200:
try:
error_msg = self.api.parser.parse_error(resp.read())
except Exception:
error_msg = "Twitter error response: status code = %s" % resp.status
raise TweepError(error_msg, resp)
# Parse the response payload
result = self.api.parser.parse(self, resp.read())
conn.close()
# Store result into cache if one is available.
if self.use_cache and self.api.cache and self.method == 'GET' and result:
self.api.cache.store(url, result)
return result
def _call(api, *args, **kargs):
method = APIMethod(api, args, kargs)
return method.execute()
# Set pagination mode
if 'cursor' in APIMethod.allowed_param:
_call.pagination_mode = 'cursor'
elif 'page' in APIMethod.allowed_param:
_call.pagination_mode = 'page'
return _call
This diff is collapsed.
# Tweepy
# Copyright 2009-2010 Joshua Roesslein
# See LICENSE for details.
from tweepy.error import TweepError
class Cursor(object):
"""Pagination helper class"""
def __init__(self, method, *args, **kargs):
if hasattr(method, 'pagination_mode'):
if method.pagination_mode == 'cursor':
self.iterator = CursorIterator(method, args, kargs)
else:
self.iterator = PageIterator(method, args, kargs)
else:
raise TweepError('This method does not perform pagination')
def pages(self, limit=0):
"""Return iterator for pages"""
if limit > 0:
self.iterator.limit = limit
return self.iterator
def items(self, limit=0):
"""Return iterator for items in each page"""
i = ItemIterator(self.iterator)
i.limit = limit
return i
class BaseIterator(object):
def __init__(self, method, args, kargs):
self.method = method
self.args = args
self.kargs = kargs
self.limit = 0
def next(self):
raise NotImplementedError
def prev(self):
raise NotImplementedError
def __iter__(self):
return self
class CursorIterator(BaseIterator):
def __init__(self, method, args, kargs):
BaseIterator.__init__(self, method, args, kargs)
self.next_cursor = -1
self.prev_cursor = 0
self.count = 0
def next(self):
if self.next_cursor == 0 or (self.limit and self.count == self.limit):
raise StopIteration
data, cursors = self.method(
cursor=self.next_cursor, *self.args, **self.kargs
)
self.prev_cursor, self.next_cursor = cursors
if len(data) == 0:
raise StopIteration
self.count += 1
return data
def prev(self):
if self.prev_cursor == 0:
raise TweepError('Can not page back more, at first page')
data, self.next_cursor, self.prev_cursor = self.method(
cursor=self.prev_cursor, *self.args, **self.kargs
)
self.count -= 1
return data
class PageIterator(BaseIterator):
def __init__(self, method, args, kargs):
BaseIterator.__init__(self, method, args, kargs)
self.current_page = 0
def next(self):
self.current_page += 1
items = self.method(page=self.current_page, *self.args, **self.kargs)
if len(items) == 0 or (self.limit > 0 and self.current_page > self.limit):
raise StopIteration
return items
def prev(self):
if (self.current_page == 1):
raise TweepError('Can not page back more, at first page')
self.current_page -= 1
return self.method(page=self.current_page, *self.args, **self.kargs)
class ItemIterator(BaseIterator):
def __init__(self, page_iterator):
self.page_iterator = page_iterator
self.limit = 0
self.current_page = None
self.page_index = -1
self.count = 0
def next(self):
if self.limit > 0 and self.count == self.limit:
raise StopIteration
if self.current_page is None or self.page_index == len(self.current_page) - 1:
# Reached end of current page, get the next page...
self.current_page = self.page_iterator.next()
self.page_index = -1
self.page_index += 1
self.count += 1
return self.current_page[self.page_index]
def prev(self):
if self.current_page is None:
raise TweepError('Can not go back more, at first page')
if self.page_index == 0:
# At the beginning of the current page, move to next...
self.current_page = self.page_iterator.prev()
self.page_index = len(self.current_page)
if self.page_index == 0:
raise TweepError('No more items')
self.page_index -= 1
self.count -= 1
return self.current_page[self.page_index]
# Tweepy
# Copyright 2009-2010 Joshua Roesslein
# See LICENSE for details.
class TweepError(Exception):
"""Tweepy exception"""
def __init__(self, reason, response=None):
self.reason = unicode(reason)
self.response = response
Exception.__init__(self, reason)
def __str__(self):
return self.reason
This diff is collapsed.
This diff is collapsed.
# Tweepy
# Copyright 2009-2010 Joshua Roesslein
# See LICENSE for details.
from tweepy.models import ModelFactory
from tweepy.utils import import_simplejson
from tweepy.error import TweepError
class Parser(object):
def parse(self, method, payload):
"""
Parse the response payload and return the result.
Returns a tuple that contains the result data and the cursors
(or None if not present).
"""
raise NotImplementedError
def parse_error(self, payload):
"""
Parse the error message from payload.
If unable to parse the message, throw an exception
and default error message will be used.
"""
raise NotImplementedError
class RawParser(Parser):
def __init__(self):
pass
def parse(self, method, payload):
return payload
def parse_error(self, payload):
return payload
class JSONParser(Parser):
payload_format = 'json'
def __init__(self):
self.json_lib = import_simplejson()
def parse(self, method, payload):
try:
json = self.json_lib.loads(payload)
except Exception, e:
raise TweepError('Failed to parse JSON payload: %s' % e)
needsCursors = method.parameters.has_key('cursor')
if needsCursors and isinstance(json, dict) and 'previous_cursor' in json and 'next_cursor' in json:
cursors = json['previous_cursor'], json['next_cursor']
return json, cursors
else:
return json
def parse_error(self, payload):
error = self.json_lib.loads(payload)
if error.has_key('error'):
return error['error']
else:
return error['errors']
class ModelParser(JSONParser):
def __init__(self, model_factory=None):
JSONParser.__init__(self)
self.model_factory = model_factory or ModelFactory
def parse(self, method, payload):
try:
if method.payload_type is None: return
model = getattr(self.model_factory, method.payload_type)
except AttributeError:
raise TweepError('No model for this payload type: %s' % method.payload_type)
json = JSONParser.parse(self, method, payload)
if isinstance(json, tuple):
json, cursors = json
else:
cursors = None
if method.payload_list:
result = model.parse_list(method.api, json)
else:
result = model.parse(method.api, json)
if cursors:
return result, cursors
else:
return result
# Tweepy
# Copyright 2009-2010 Joshua Roesslein
# See LICENSE for details.
import httplib
from socket import timeout
from threading import Thread
from time import sleep
from tweepy.models import Status
from tweepy.api import API
from tweepy.error import TweepError
from tweepy.utils import import_simplejson, urlencode_noplus
json = import_simplejson()
STREAM_VERSION = '1.1'
class StreamListener(object):
def __init__(self, api=None):
self.api = api or API()
def on_data(self, data):
"""Called when raw data is received from connection.
Override this method if you wish to manually handle
the stream data. Return False to stop stream and close connection.
"""
if 'in_reply_to_status_id' in data:
status = Status.parse(self.api, json.loads(data))
if self.on_status(status) is False:
return False
elif 'delete' in data:
delete = json.loads(data)['delete']['status']
if self.on_delete(delete['id'], delete['user_id']) is False:
return False
elif 'limit' in data:
if self.on_limit(json.loads(data)['limit']['track']) is False:
return False
def on_status(self, status):
"""Called when a new status arrives"""
return
def on_delete(self, status_id, user_id):
"""Called when a delete notice arrives for a status"""
return
def on_limit(self, track):
"""Called when a limitation notice arrvies"""
return
def on_error(self, status_code):
"""Called when a non-200 status code is returned"""
return False
def on_timeout(self):
"""Called when stream connection times out"""
return
class Stream(object):
host = 'stream.twitter.com'
def __init__(self, auth, listener, **options):
self.auth = auth
self.listener = listener
self.running = False
self.timeout = options.get("timeout", 300.0)
self.retry_count = options.get("retry_count")
self.retry_time = options.get("retry_time", 10.0)
self.snooze_time = options.get("snooze_time", 5.0)
self.buffer_size = options.get("buffer_size", 1500)
if options.get("secure", True):
self.scheme = "https"
else:
self.scheme = "http"
self.api = API()
self.headers = options.get("headers") or {}
self.parameters = None
self.body = None
def _run(self):
# Authenticate
url = "%s://%s%s" % (self.scheme, self.host, self.url)