# A complete solution to the breadth-first search, using a table of links as
# described in Chapter 6, is as follows:
import pymysql
#conn = pymysql.connect(host='127.0.0.1', unix_socket='/tmp/mysql.sock', user='root', passwd=None, db='mysql')
conn = pymysql.connect(host='127.0.0.1', port =3306, user='root', password='LlQ54951', db='mysql', charset='utf8')
cur = conn.cursor()
cur.execute('USE wikipedia')
#getUrl is a helper function that retrieves URLs from the database given a page ID.
def getUrl(pageId):
cur.execute( 'SELECT url FROM pages WHERE id=%s', (int(pageId)) )
return cur.fetchone()[0]
# getLinks takes a fromPageId representing the integer ID
# for the current page, and fetches a list of all integer IDs for pages it links to
def getLinks(fromPageId):
cur.execute( 'SELECT toPageId FROM links WHERE fromPageId = %s', (int(fromPageId)) )
if cur.rowcount ==0:
return[]
toPageIdNestedTuple=cur.fetchall()
#print(toPageIdNestedTuple)
return[x[0] for x in toPageIdNestedTuple]
# searchBreadth, works recursively to construct a list of
# all possible paths from the search page and stops when it finds a path that has
# reached the target page
def searchBreadth(targetPageId, pathNestedList=[ [1] ]):
newPathNestedList = []
for pathList in pathNestedList:
linkList = getLinks(pathList[-1])
for link in linkList:
print(link)
if link == targetPageId:
return pathList + [link]
else:
newPathNestedList.append(pathList + [link])
#print(newPathNestedList)
return searchBreadth(targetPageId, newPathNestedList)
nodes = getLinks(1) # ID 1 (Kevin Bacon)
targetPageId = 28624 #Eric Idle (page ID 28624
pageIdList = searchBreadth(targetPageId)
print(pageIdList)
for pageId in pageIdList:
print(getUrl(pageId))