Downloading a comic…. 9% 12:39:32 PM
There is a solution provided in the book but this is my way of going around the task
#comic saver - saves each post of a comic page
#USAGE python comic.py
import logging, requests, bs4, os, sys
logging.basicConfig(level=logging.DEBUG, format=" %(asctime)s - %(levelname)s - %(message)s")
#TODO: get url
if len( sys.argv ) == 1:
#comic_url = sys.argv[1]
comic_url = "http://xkcd.com"
try:
#TODO: make request to url
res = requests.get(comic_url)
res.raise_for_status()
comic_soup = bs4.BeautifulSoup(res.text,"html.parser" )
logging.info("Soup created ")
prev_button = comic_soup.select("a[rel='prev']")[0]
prev_button_href = prev_button.get("href")
logging.info("Prev button obtained")
iterator = 0
limit = 100
directory = None
if not os.path.exists(os.path.abspath("comic")):
directory = os.mkdir("comic")
else:
directory = os.path.abspath("comic")
while not prev_button_href.endswith("#") and iterator < limit: #TODO: get image in comic comic_image = comic_soup.select("#comic > img")
assert comic_image, "Image should exist"
if comic_image:
comic_image = comic_image[0]
comic_image_url = comic_image.get("src")
comic_title =comic_soup.select("#ctitle")
comic_title = comic_title[0].get_text()
try:
#TODO: download image
comic_res = requests.get(comic_url + comic_image_url)
comic_res.raise_for_status()
#TODO: create a file
comic_file = open(os.path.join(directory, comic_title + ".jpg" ), "wb")
logging.info("Comic File created as {}".format(comic_title))
for chunk in comic_res.iter_content(10000):
comic_file.write(chunk)
#close file
comic_file.close()
except Exception as err:
logging.error("Comic Download Error: " + str( err ))
#TODO: switch to previous page
res = requests.get(comic_url + prev_button_href)
#TODO: res set soup
comic_soup = bs4.BeautifulSoup(res.text,"html.parser" )
#TODO: set prev_button
prev_button = comic_soup.select("a[rel='prev']")
if prev_button:
prev_button = prev_button[0]
prev_button_href = prev_button.get("href")
logging.info("Page has been changed")
iterator+= 1
except Exception as err:
logging.error(str(err))
else:
logging.error("Script requires a ")
#TODO: loop over the previous button and make request
#TODO: save image in html
#TODO repeat till no previous button is found
After 2 minutes my Comic directory looked like this:

Just pythoning comic books…
See you soon!