#! /usr/bin/env python

# This software carries the following license:
#
# Modified BSD license
# 
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
# 
# 1. Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# 
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
# 
# 3. The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
# 
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
# WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO
# EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# Steinar Knutsen

import markup, urllib, urlparse, sys, string

class comicpage(markup.markupparser):
	def __init__(self, url):
		markup.markupparser.__init__(self)
		self.entitydefs = {}
		self.handle_charref = self.unknown_charref
		self.url = url

	def do_img(self, attributes):
		self.document.append('<img')
		for name, value in attributes:
			if name == "src":
				filename = string.replace(
					urllib.quote(string.split(value, '/')[-1]),
					'%', '_')
				if len(filename) == 0:
					filename = string.replace(string.replace(
						urllib.quote(value), '%', '_'),
						'/', '_')
				url = urlparse.urljoin(self.url, value)
				try:
					urllib.urlretrieve(url, filename)
					self.document.append(' src="%s"' % filename)
				except:
					continue
			else:
				self.document.append(' %s="%s"' % (name, value))
		self.document.append('>')

	def do_frame(self, attributes):
		self.document.append('<frame ')
		for name, value in attributes:
			if name == "src":
				filename = string.replace(
					urllib.quote(string.split(value, '/')[-1]),
					'%', '_')
				if len(filename) == 0:
					filename = string.replace(string.replace(
						urllib.quote(value), '%', '_'),
						'/', '_')
				url = urlparse.urljoin(self.url, value)
				try:
					main(url, filename)
					self.document.append(' src="%s"' % filename)
				except:
					continue
			else:
				self.document.append(' %s="%s"' % (name, value))
		self.document.append('>')
	
	def start_table(self, attributes):
		self.document.append('<table')
		for name, value in attributes:
			if name == "background":
				filename = string.replace(
					urllib.quote(string.split(value, '/')[-1]),
					'%', '_')
				if len(filename) == 0:
					filename = string.replace(string.replace(
						urllib.quote(value), '%', '_'),
						'/', '_')
				url = urlparse.urljoin(self.url, value)
				try:
					urllib.urlretrieve(url, filename)
					self.document.append(' background="%s"' % filename)
				except:
					continue
			else:
				self.document.append(' %s="%s"' % (name, value))
		self.document.append('>')

	def end_table(self):
		self.document.append('</table>')

	def do_base(self, attributes):
	# Strip away base tags.
		pass

	def start_a(self, attributes):
		self.document.append('<a')
		for name, value in attributes:
			if name == "href":
				url = urlparse.urljoin(self.url, value)
				self.document.append(' href="%s"' % url)
			else:
				self.document.append(' %s="%s"' % (name, value))
		self.document.append('>')

	def end_a(self):
		self.document.append('</a>')
	
	def handle_comment(self, comment):
		self.document.extend(['<!--', comment, '-->'])

def main(url, filename):
	comic = comicpage(url)
	page = urllib.urlopen(url).readlines()
	for line in page:
		comic.feed(line)
	text = comic.close()
	file = open(filename, 'w')
	file.write(string.join(text, ''))
	file.close()

main(sys.argv[1], sys.argv[2])
