#! /usr/bin/env python import sys, string, re from DCLMatcher import DCLMatcher class lexer: linetags = ["TITLE:", "HEADER:", "SUBHEADER:", "INCLUDE:", "CODE:"] def __init__(self, data): self.data = data self.elementlist = [] self.inside_code = 0 self.about_to_enter_code = 0 self.linebased = 0 def get_token(self): if self.inside_code: return self.get_code_token() elif len(self.elementlist) > 0: tmp = self.elementlist.pop() if self.about_to_enter_code: self.about_to_enter_code = 0 self.inside_code = 1 return tmp if self.linebased: self.linebased -= 1 return tmp elif len(tmp) == 1 and tmp in "@{}": return tmp elif tmp[0] == "@": self.elementlist.append(tmp[1:]) return tmp[0] else: return self.quote(tmp) else: tmp1 = string.split(self.data, '\n', 1) while len(tmp1) > 0: if len(tmp1) == 1: if len(tmp1[0]) == 0: return "" else: first = tmp1[0] self.data == "" break elif len(tmp1[0]) == 0: tmp1 = string.split(tmp1[1], '\n', 1) else: first, self.data = tmp1 break else: return "" tmp = string.split(first) if string.upper(tmp[0]) in self.linetags: self.elementlist = string.split(first, None, 1) self.elementlist[0] = self.elementlist[0][:-1] self.elementlist.reverse() self.linebased = len(self.elementlist) if string.upper(self.elementlist[-1]) == "CODE": self.about_to_enter_code = 1 if string.upper(self.elementlist[-1]) != "INCLUDE": self.elementlist[0] = self.quote(self.elementlist[0]) return "@" else: self.elementlist = tmp self.elementlist.reverse() return self.get_token() def get_code_token(self): match = re.search(r"(?im)^Text:$", self.data) startindex = match.start() endindex = match.end() rawlines = string.split(self.data[:startindex], '\n') self.data = self.data[endindex+1:] for i in range(len(rawlines)): if len(rawlines[i]) == 0: continue elif rawlines[i][0] == '@': rawlines[i] = rawlines[i][1:] elif re.match(r"(?i)^Include: ", rawlines[i]): file = open(rawlines[i][len("Include: "):]) rawlines[i] = file.read(-1) if rawlines[i][-1] == "\n": rawlines[i] = rawlines[i][:-1] file.close() joinedlines = string.join(rawlines, '\n') self.inside_code = 0 return self.quote(joinedlines) def quote(self, element): return string.replace(string.replace(string.replace(element, "&", "&"), "<", "<"), ">", ">") class delimiter: def __init__(self, symbol): self.symbol = symbol EndCurly = delimiter("}") class converter(lexer): def __init__(self, data): self.tags = DCLMatcher({ "TITLE": self.__title, "HEADER": self.__header, "SUBHEADER": self.__subheader, "ITALIC": self.__italic, "CODE": self.__code, "ORDEREDLIST": self.__ol, "UNORDEREDLIST": self.__ul, "TAGGEDLIST": self.__tl, "TABLE": self.__table, "LINK": self.__link, "IMAGE": self.__image, "PARAGRAPH": self.__paragraph, "INCLUDE": self.__include, "JOIN": self.__join, "{": self.__braceleft, "}": self.__braceright, "@": self.__at }) lexer.__init__(self, data) def close(self): # Trigs the parsing of the input. logical = self.get_logical() document = [] while len(logical) > 0: document.append(logical) logical = self.get_logical() return string.join(document, ' ') def get_logical(self): token = self.get_token() if token == '@': return self.alpha() elif token == '{': return self.grouper() elif token == '}': return EndCurly else: return token def alpha(self): # Tag look-up and unpleasant stuff like that. command = self.get_token() # This will and should explode confronted with unknown tags. try: commandcode = self.tags[command] except: sys.stdout.write("Unknown command: %s\n" % command) sys.exit(1) return commandcode() def grouper(self): # Token grouping. tokens = [] logical = self.get_logical() while logical != EndCurly: tokens.append(logical) logical = self.get_logical() return string.join(tokens, ' ') def __title(self): title = self.get_logical() return "%s\n

%s

\n" % (title, title) def __link(self): url = self.get_logical() text = self.get_logical() return '%s' % (url, text) def __image(self): url = self.get_logical() text = self.get_logical() return '%s' % (url, text) def __paragraph(self): return "\n

\n%s\n

\n" % self.get_logical() def __header(self): return "\n

%s

\n" % self.get_logical() def __subheader(self): return "\n

%s

\n" % self.get_logical() def __code(self): return "\n
\n%s
\n" % self.get_logical() def __italic(self): return "%s" % self.get_logical() # Variable element number tags, dl, ul, ol. def isBeginCurly(self): if self.get_token() != '{': raise "{ expected at line %s" % self.lineno def __tl(self): self.isBeginCurly() dl = ['\n
'] item = self.get_logical() while item != EndCurly: definition = self.get_logical() dl.append("
%s
" % item) dl.append("
%s
" % definition) item = self.get_logical() dl.append('
\n') return string.join(dl, '\n') def __ul(self): self.isBeginCurly() ul = ['\n\n') return string.join(ul, '\n') def __ol(self): self.isBeginCurly() ol = ['\n
    '] item = self.get_logical() while item != EndCurly: ol.append("
  1. %s
  2. " % item) item = self.get_logical() ol.append('
\n') return string.join(ol, '\n') def __table(self): self.isBeginCurly() table = ["\n"] row = [""] self.isBeginCurly() while 1: item = self.get_logical() while item != EndCurly: row.append("" % item) item = self.get_logical() row.append("") table.extend(row) row = [""] tmp = self.get_token() if tmp == '{': pass elif tmp == '}': break else: raise SyntaxError table.append("
%s
") return string.join(table, "\n") def __braceleft(self): return "{" def __braceright(self): return "}" def __at(self): return "@" def __join(self): return self.get_logical() + self.get_logical() def __include(self): file = open(self.get_logical()) text = file.read(-1) file.close() return text if __name__ == '__main__': html = converter(sys.stdin.read(-1)) sys.stdout.write(html.close())