Downloading a comic…. 9% 12:39:32 PM

There is a solution provided in the book but this is my way of going around the task


#comic saver - saves each post of a comic page
#USAGE python 

import logging, requests, bs4, os, sys
logging.basicConfig(level=logging.DEBUG, format=" %(asctime)s - %(levelname)s - %(message)s")
#TODO: get url

if len( sys.argv ) == 1:
    #comic_url = sys.argv[1]
    comic_url = ""
        #TODO: make request to url
        res = requests.get(comic_url)
        comic_soup = bs4.BeautifulSoup(res.text,"html.parser" )"Soup created ")
        prev_button ="a[rel='prev']")[0]
        prev_button_href = prev_button.get("href")"Prev button obtained")
        iterator = 0
        limit = 100
        directory = None
        if not os.path.exists(os.path.abspath("comic")):
            directory = os.mkdir("comic")
            directory = os.path.abspath("comic")
        while not prev_button_href.endswith("#") and iterator < limit: #TODO: get image in comic comic_image ="#comic > img")
            assert comic_image, "Image should exist"
            if comic_image:
                comic_image = comic_image[0]
                comic_image_url = comic_image.get("src")
            comic_title = comic_title[0].get_text()

                #TODO: download image
                comic_res = requests.get(comic_url + comic_image_url)
                #TODO: create a file
                comic_file = open(os.path.join(directory, comic_title + ".jpg" ), "wb")
      "Comic File created as {}".format(comic_title))
                for chunk in comic_res.iter_content(10000):
                    #close file
            except Exception as err:
                logging.error("Comic Download Error: " + str( err ))

            #TODO: switch to previous page
            res = requests.get(comic_url + prev_button_href)
            #TODO: res set soup
            comic_soup = bs4.BeautifulSoup(res.text,"html.parser" )
            #TODO: set prev_button
            prev_button ="a[rel='prev']")
            if prev_button:
                prev_button = prev_button[0]
                prev_button_href = prev_button.get("href")
      "Page has been changed")
            iterator+= 1
    except Exception as err:

    logging.error("Script requires a ")

#TODO: loop over the previous button and make request
#TODO: save image in html
#TODO repeat till no previous button is found

After 2 minutes my Comic directory looked like this:

Just pythoning comic books…

See you soon!

%d bloggers like this: