I wanted to do some research with public tweets using tweepy. Basically what I wanted to do is save all the tweets to my local mysql database instance that met my certain criteria. The criteria / filter was a table I have within my database.
I decided to use tweepy, find it on github here.
Check out Brandon Wood’s article here where I based this off of.
# twitter client import tweepy # database interface import mysql.connector conn = mysql.connector.connect(user='root', password='mypass', host='127.0.0.1', database='analytics') curs = conn.cursor() class StreamWatcherHandler(tweepy.StreamListener): def on_status(self, status): try: usr = status.author.screen_name.strip() txt = status.text.strip() in_reply_to = status.in_reply_to_status_id src = status.source.strip() cat = status.created_at # Now that we have our tweet information, let's stow it away in our sql database curs.execute("insert into tweets (username, created_at, content, reply_to, source) values(%s, %s, %s, %s, %s)", (usr, cat, txt, in_reply_to, src)) conn.commit() except Exception as e: # Most errors we're going to see relate to the handling of UTF-8 messages (sorry) print(e) def on_error(self, status_code): print('An error has occured! Status code = %s' % status_code) return True def main(): # establish stream consumer_key = "key" consumer_secret = "secret" auth1 = tweepy.auth.OAuthHandler(consumer_key, consumer_secret) access_token = "token" access_token_secret = "token_secret" auth1.set_access_token(access_token, access_token_secret) #This is where we define our filter from our database. Each cell will contain a keyword that I want the stream to look for curs.execute('SELECT name FROM mytable'); row = [item[0] for item in curs.fetchall()] print "Establishing stream...", stream = tweepy.Stream(auth1, StreamWatcherHandler(), timeout=None) stream.filter(track=row) print "Done" if __name__ == '__main__': try: main() except KeyboardInterrupt: print "Disconnecting from database... ", conn.commit() conn.close() print "Done"
Here is what the database table looks like:
So now the stream is only looking for tweets that have “test”, “test2”, or “test3” keywords.