I wanted to do some research with public tweets using tweepy. Basically what I wanted to do is save all the tweets to my local mysql database instance that met my certain criteria. The criteria / filter was a table I have within my database.
I decided to use tweepy, find it on github here.
Check out Brandon Wood’s article here where I based this off of.
# twitter client
import tweepy
# database interface
import mysql.connector
conn = mysql.connector.connect(user='root', password='mypass', host='127.0.0.1', database='analytics')
curs = conn.cursor()
class StreamWatcherHandler(tweepy.StreamListener):
def on_status(self, status):
try:
usr = status.author.screen_name.strip()
txt = status.text.strip()
in_reply_to = status.in_reply_to_status_id
src = status.source.strip()
cat = status.created_at
# Now that we have our tweet information, let's stow it away in our sql database
curs.execute("insert into tweets (username, created_at, content, reply_to, source) values(%s, %s, %s, %s, %s)",
(usr, cat, txt, in_reply_to, src))
conn.commit()
except Exception as e:
# Most errors we're going to see relate to the handling of UTF-8 messages (sorry)
print(e)
def on_error(self, status_code):
print('An error has occured! Status code = %s' % status_code)
return True
def main():
# establish stream
consumer_key = "key"
consumer_secret = "secret"
auth1 = tweepy.auth.OAuthHandler(consumer_key, consumer_secret)
access_token = "token"
access_token_secret = "token_secret"
auth1.set_access_token(access_token, access_token_secret)
#This is where we define our filter from our database. Each cell will contain a keyword that I want the stream to look for
curs.execute('SELECT name FROM mytable');
row = [item[0] for item in curs.fetchall()]
print "Establishing stream...",
stream = tweepy.Stream(auth1, StreamWatcherHandler(), timeout=None)
stream.filter(track=row)
print "Done"
if __name__ == '__main__':
try:
main()
except KeyboardInterrupt:
print "Disconnecting from database... ",
conn.commit()
conn.close()
print "Done"
Here is what the database table looks like:

So now the stream is only looking for tweets that have “test”, “test2”, or “test3” keywords.