Coding

“Python Learning: Automate Boring Stuff with Python | Chapter 11 : My Solution to Comic Downloader

Downloading a comic…. 9% 12:39:32 PM


There is a solution provided in the book but this is my way of going around the task

 


#comic saver - saves each post of a comic page 
#USAGE python comic.py <url>

import logging, requests, bs4, os, sys 
logging.basicConfig(level=logging.DEBUG, format=" %(asctime)s - %(levelname)s - %(message)s")
#TODO: get url


if len(  sys.argv ) == 1:
	#comic_url = sys.argv[1]
	comic_url = "http://xkcd.com"
	try:
		#TODO: make request to url 
		res = requests.get(comic_url)
		res.raise_for_status()
		comic_soup = bs4.BeautifulSoup(res.text,"html.parser" )
		logging.info("Soup created ")
		prev_button = comic_soup.select("a[rel='prev']")[0]
		prev_button_href = prev_button.get("href")
		logging.info("Prev button obtained")
		iterator = 0
		limit = 100
		dir = None
		if not os.path.exists(os.path.abspath("comic")):
			dir = os.mkdir("comic")	
		else:
			dir = os.path.abspath("comic")
		while not prev_button.endswith("#")  and iterator < limit: #TODO: get image in comic comic_image = comic_soup.select("#comic > img")
			assert comic_image, "Image should exist"
			if comic_image:
				comic_image = comic_image[0]
				comic_image_url = comic_image.get("src")
				comic_title =comic_soup.select("#ctitle")
				comic_title = comic_title[0].get_text()
				
				try:
					#TODO: download image 
					comic_res = requests.get(comic_url + comic_image_url)
					comic_res.raise_for_status()
					#TODO: create a file 
					comic_file = open(os.path.join(dir, comic_title + ".jpg" ), "wb")
					logging.info("Comic File created as {}".format(comic_title))
					for chunk in comic_res.iter_content(10000):
						comic_file.write(chunk)
					#close file 
					comic_file.close()
				except Exception as err:
					logging.error("Comic Download Error: " + str( err ))
			
			#TODO: switch to previous page 
			res = requests.get(comic_url + prev_button_href)
			#TODO:  res set soup
			comic_soup = bs4.BeautifulSoup(res.text,"html.parser" )
			#TODO: set prev_button
			prev_button = comic_soup.select("a[rel='prev']")
			if prev_button:
				prev_button = prev_button[0]
				prev_button_href = prev_button.get("href")
			logging.info("Page has been changed")
			iterator+= 1
	except Exception as err:
		logging.error(str(err)) 

else:
	logging.error("Script requires a <url>")

#TODO: loop over the previous button and make request
#TODO: save image in html
#TODO repeat till no previous button is found




After 2 minutes my Comic directory looked like this:

Just pythoning comic books…

See you soon!

error: Content is protected !!