Python Save Facebook Graph data to MySQL database

In this script I wanted to save public Facebook Pages post data into my local database. See source in github

The main.py script will setup the database structure, retrieve the data from the facebook graph api, then store it into your database.

If you would like only today’s post from the Facebook pages you can use the argument –t without this it will get all public posts available. You can also use -n (number of posts) to limit the amount of post to save.

You will also need to include the page names. Here is an example when you want to save all the public  posts data for a page TacoBell.all_graph_api

Here you only get today’s post:

graph_api

To get multiple pages at once no coma is needed just add all the pages with a space in between.

main.py -p tacobell mcdonalds -t

main.py:


from time import sleep
from time import gmtime
from dateutil import parser
import datetime
from facebook import GraphAPI
import argparse, sys
from urlparse import parse_qs, urlparse
import mysql.connector
from mysql.connector import errorcode
from config import config, access_token

cnx = mysql.connector.connect(**config)
cursor = cnx.cursor()
table_name = 'pages'
table = ("CREATE TABLE `pages` ("
"  `id` int(11) NOT NULL AUTO_INCREMENT,"
"  `page_name` varchar(40) NOT NULL,"
"  `post_id` varchar(40) NOT NULL,"
"  `created_time` datetime NOT NULL,"
"  `message` TEXT NOT NULL,"
"  PRIMARY KEY (`id`)"
") ENGINE=InnoDB")
def create_db_table():
cursor.execute('SHOW DATABASES;')
all_dbs = cursor.fetchall()
if all_dbs.count((config['database'],)) == 0:
print("Creating db")
cursor.execute(
"CREATE DATABASE {} DEFAULT CHARACTER SET 'utf8'".format(config['database']))

cursor.execute('USE %s;' % config['database'])
cursor.execute("SHOW TABLES;")
all_tables = cursor.fetchall()
if all_tables.count(('pages',)) == 0:
print("creating table")
cursor.execute(table)
add_message = ("INSERT INTO pages "
"(post_id, page_name, created_time, message) "
"VALUES (%s, %s,%s, %s)")
def insert_post(post_id, page_name, created_time, message):
cursor.execute("SELECT * FROM pages WHERE post_id=%s;", (post_id,))
if cursor.fetchall():
print("already saved this post")
return False
print('saving this post')
cursor.execute(add_message, (post_id, page_name, created_time, message))
cnx.commit()
import calendar
from datetime import datetime, timedelta
def make_it_good(s):
res = ""
for ch in s:
try:
res += ch.decode('utf8')
except Exception as e:
pass
return res
def utc_to_local(utc_dt):
# get integer timestamp to avoid precision lost
timestamp = calendar.timegm(utc_dt.timetuple())
local_dt = datetime.fromtimestamp(timestamp)
assert utc_dt.resolution >= timedelta(microseconds=1)
return local_dt.replace(microsecond=utc_dt.microsecond)
def save_posts(page, num_post=None, only_today=False):
if not num_post:
num_post = 1000000000
graph = GraphAPI(access_token)
arguments = {}
total = 0
num_saved = 0
while 1:
try:
posts = graph.get_connections(page, 'posts', **arguments)
except Exception as e:
try:
sleep(3)
posts = graph.get_connections(page, 'posts', **arguments)
except Exception as e:
print("Retrieved a total of %d posts" % total)
print("But some error happened after that")
print("This is the error")
print(e)
print("Breaking...")
break
next = None
if 'paging' in posts:
next = posts['paging']['next']
posts = posts['data']
total += len(posts)
any = False
for post in posts:
if 'message' in post:
created_time = parser.parse(post['created_time'])
created_time = utc_to_local(created_time)
today = datetime.now()
if only_today:
if today.year == created_time.year and today.month == created_time.month and \
today.day == created_time.day:
any = True
else:
continue
num_saved += 1
insert_post(post['id'], page, created_time, make_it_good(post['message']))
if only_today and not any:
break
if next:
qs = parse_qs(urlparse(next).query)
arguments = {'limit': qs['limit'][0], 'until': qs['until'][0]}
else:
break
print('done')
print('Saved total of %d posts' % num_saved)
create_db_table()
def main():
parser = argparse.ArgumentParser(
description='Saving facebook pages post to a mysql db ')
parser.add_argument("-p", nargs='+', help="Please specify the name of page")
parser.add_argument('-n',
help="Please specify number of posts you want to save to db( if no argument is supplied all "
"posts will be saved, note this might take some time")
parser.add_argument('-t', action='store_true', help="Specify if you want to get only today posts")
args = parser.parse_args()
only_today = args.t
pages = args.p
n = args.n
if not pages:
parser.print_usage()
sys.exit(1)
if n:
n = int(n)
if only_today:
print("Saving today posts")
for page in pages:
print("Saving posts for %s" % page)
save_posts(page, n, only_today)
if (__name__ == '__main__'):
main()

 

Config file:


config={
'user':'root',
'host':127.0.0.1,
'password':'mypass',
'database':'analytics'
}
access_token='token'