Downloading a comic…. 9% 12:39:32 PM
There is a solution provided in the book but this is my way of going around the task
#comic saver - saves each post of a comic page #USAGE python comic.py import logging, requests, bs4, os, sys logging.basicConfig(level=logging.DEBUG, format=" %(asctime)s - %(levelname)s - %(message)s") #TODO: get url if len( sys.argv ) == 1: #comic_url = sys.argv[1] comic_url = "http://xkcd.com" try: #TODO: make request to url res = requests.get(comic_url) res.raise_for_status() comic_soup = bs4.BeautifulSoup(res.text,"html.parser" ) logging.info("Soup created ") prev_button = comic_soup.select("a[rel='prev']")[0] prev_button_href = prev_button.get("href") logging.info("Prev button obtained") iterator = 0 limit = 100 directory = None if not os.path.exists(os.path.abspath("comic")): directory = os.mkdir("comic") else: directory = os.path.abspath("comic") while not prev_button_href.endswith("#") and iterator < limit: #TODO: get image in comic comic_image = comic_soup.select("#comic > img") assert comic_image, "Image should exist" if comic_image: comic_image = comic_image[0] comic_image_url = comic_image.get("src") comic_title =comic_soup.select("#ctitle") comic_title = comic_title[0].get_text() try: #TODO: download image comic_res = requests.get(comic_url + comic_image_url) comic_res.raise_for_status() #TODO: create a file comic_file = open(os.path.join(directory, comic_title + ".jpg" ), "wb") logging.info("Comic File created as {}".format(comic_title)) for chunk in comic_res.iter_content(10000): comic_file.write(chunk) #close file comic_file.close() except Exception as err: logging.error("Comic Download Error: " + str( err )) #TODO: switch to previous page res = requests.get(comic_url + prev_button_href) #TODO: res set soup comic_soup = bs4.BeautifulSoup(res.text,"html.parser" ) #TODO: set prev_button prev_button = comic_soup.select("a[rel='prev']") if prev_button: prev_button = prev_button[0] prev_button_href = prev_button.get("href") logging.info("Page has been changed") iterator+= 1 except Exception as err: logging.error(str(err)) else: logging.error("Script requires a ") #TODO: loop over the previous button and make request #TODO: save image in html #TODO repeat till no previous button is found
After 2 minutes my Comic directory looked like this:

See you soon!