Getting reTweets from Python

Sep 22, 2010 · 3 min read · python programming code twitter ·

In this snippet I will show how to use the Tweepy library to get statistics from Tweeter.

The script will read a sample feed from the Twitter Streaming API (https://web.archive.org/web/20100723015215/http://dev.twitter.com/pages/streaming_api) and perform the following actions:

Read the sample feed
Note the number of retweets seen
Track the number of times posts have been retweeted, and
Produce an hourly report of the 20 most frequently retweeted posts

  1#!/usr/bin/env python
  2 
  3"""
  4Tweepy library proof of concept
  5 
  6@author: Adrian Deccico
  7"""
  8 
  9from getpass import getpass
 10import hashlib
 11import time
 12import re
 13import operator
 14import logging
 15import codecs
 16import tweepy
 17 
 18logging.basicConfig(level = logging.INFO)
 19 
 20class TweetListener(tweepy.StreamListener):
 21 
 22    #we use this pattern to decide if a post is a retweet or not, given that retweet fields of the feed don't work
 23    __retweet_pattern = "^(rt|retweet).*$"
 24 
 25 
 26    #statistics
 27    count = 0
 28    found = 0
 29    hour_ranking = {}
 30    retweets = {}
 31 
 32    #constants
 33    TOP_TWEETS = 20 #number of tweets to display in each hour
 34 
 35 
 36    def on_status(self, status):
 37        """callback that will process new tweets"""
 38        try:
 39            self.count += 1
 40            text = status.text
 41            #check wether we got a retweet or not
 42            logging.debug('count %s found %s - %s' % (self.count, self.found, text))
 43            if re.match(self.__retweet_pattern, text, re.IGNORECASE) == None:
 44                return
 45 
 46            self.found += 1
 47 
 48            if text not in self.retweets.keys():
 49                twitt_times = 1
 50            else:
 51                twitt_times = self.retweets[text] + 1
 52 
 53            self.retweets[text] = twitt_times
 54 
 55            hour = status.created_at.strftime("%Y%m%d%H")
 56 
 57            logging.info("hour: %s - times: %s - %s" % (hour, twitt_times, text))
 58            logging.info("Number of retweets found:%s" % self.found)
 59 
 60            if hour in self.hour_ranking.keys():
 61                if text in self.hour_ranking[hour].keys():
 62                    number = self.hour_ranking[hour][text] + 1
 63                else:
 64                    number = 1
 65            else:
 66                number = 1
 67                self.hour_ranking[hour] = {}
 68 
 69            logging.debug("adding %s to key %s" %(number,text))
 70            self.hour_ranking[hour][text] = number
 71            self.printHourlyReport()
 72 
 73        except:
 74            logging.exception("error while analyzing tweets")
 75 
 76    def printHourlyReport(self):
 77        """Print an hourly statistic file in results.txt"""
 78        logging.debug("updating statistics file")
 79        with codecs.open("results.txt", "w", "utf-8") as f:
 80            for h in sorted(self.hour_ranking):
 81                logging.debug(h + " " + str(type(h)))
 82                f.write("Top %s tweets at: %s n" % (self.TOP_TWEETS, h))
 83                count = self.TOP_TWEETS
 84                h_dict = self.hour_ranking[h]
 85                #sort retweets by times and then by text
 86                for t in sorted(h_dict, key=lambda k: (-h_dict[k], k)):
 87                    line = "%s time%s - %s n" % (h_dict[t],"s" if h_dict[t]>1 else "",t)
 88                    f.write(line)
 89                    count -= 1
 90                    if count == 0:
 91                        break
 92                f.write("-------------------------------nn")
 93 
 94    def on_error(self, status_code):
 95        logging.error('An error has occured! Status code = %s' % status_code)
 96        return True  # keep stream alive
 97 
 98    def on_timeout(self):
 99        logging.info('Time out event')
100 
101 
102def main():
103    # Prompt for login credentials and setup stream object
104    username = raw_input('Twitter username: ')
105    password = getpass('Twitter password: ')
106    stream = tweepy.Stream(username, password, TweetListener(), timeout=None)
107 
108    logging.info("Analyzing Tweeter sample feed. Results.txt will be updated in order to reflect the top 20 retweets of each hour.n")
109    stream.sample()
110 
111if __name__ == '__main__':
112    try:
113        main()
114    except KeyboardInterrupt:
115        logging.info('nExecution finished!')