#! /usr/bin/python

"""Quick-quick implementation of WikiWikiWeb in Python
"""

# PikiPiki copyright (C) 1999, 2000 Martin Pool <mbp@humbug.org.au>
# MornMorn fork by Steinar Knutsen, 2002, based on PikiPiki version 1.62

# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.

# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.   See the GNU
# General Public License for more details.

# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
# USA

# TODO:
#  Remove all inline HTML
#  Sanitized HTML generator
#  HTML-generator generating better HTML

__version__ = '$Revision: 0.6 $'[11:-2];


import cgi, sys, string, os, re, errno, time, stat, md5, binascii, exceptions,\
   urllib
from cgi import log
from os import path, environ
from socket import gethostbyaddr
from time import localtime, strftime
from cStringIO import StringIO
from mornconfig import *

sys.stderr = open('/tmp/morn_wiki_err', 'a')

class PageChanged(exceptions.Exception):
   def __init__(self, new_raw_page, new_checksum):
      self.new_raw_page = new_raw_page
      self.new_checksum = new_checksum

def emit_header():
   sys.stdout.write("Content-type: text/html\n\n")


# Regular expression defining a WikiWord (but this definition
# is also assumed in other places.
word_re_str = r"([A-ZÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞ][a-zßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ0-9]+){2,}"
word_anchored_re = re.compile('^' + word_re_str + '$')
command_re_str = "(search|edit|fullsearch|titlesearch)\=(.*)"

# Editlog -----------------------------------------------------------

# Functions to keep track of when people have changed pages, so we can
# do the recent changes page and so on.
# The editlog is stored with one record per line, as tab-separated
# words: page_name, host, time

# TODO: Check values written in are reasonable

def editlog_add(page_name, host):
   editlog = open(editlog_name, 'a+')
   try: 
      editlog.seek(0, 2)               # to end
      editlog.write(string.join((page_name, host, `time.time()`), "\t") + "\n")
   finally:
      editlog.close()


def editlog_raw_lines():
   editlog = open(editlog_name, 'r')
   try:
      return editlog.readlines()
   finally:
      editlog.close()


   


# Formatting stuff --------------------------------------------------


def get_scriptname():
   return environ.get('SCRIPT_NAME', '')


def send_title(text, link=None, msg=None):
   sys.stdout.write("<head><title>%s</title>\n" % text)
   if css_url:
      sys.stdout.write('<link rel="stylesheet" type="text/css" href="%s">\n'
         % css_url)
   sys.stdout.write("</head>")
   sys.stdout.write('<body><h1>')
   if link:
      sys.stdout.write('<a href="%s">%s</a>' % (link, text))
   else:
      sys.stdout.write(text)
   sys.stdout.write('</h1>\n')
   if msg:
      sys.stdout.write("<p>\n")
      sys.stdout.write(msg)
      sys.stdout.write("</p>\n")
      sys.stdout.write('\n<hr>\n')



def link_tag(params, text=None, ss_class=None):
   if text is None:
      text = params              # default
   if ss_class:
      classattr = 'class="%s" ' % ss_class
   else:
      classattr = ''
   return '<a %s href="%s?%s">%s</a>' % (classattr, get_scriptname(),
                               params, text)

def quote_html(raw_text):
   quoted = string.replace(raw_text, '&', '&amp;')
   quoted = string.replace(quoted, '<', '&lt;')
   quoted = string.replace(quoted, '>', '&gt;')
   return quoted

# Search ---------------------------------------------------

def do_fullsearch(needle):
   send_title('Full text search for "%s"' % (needle))

   needle_re = re.compile(needle, re.IGNORECASE)
   hits = []
   all_pages = page_list()
   for page_name in all_pages:
      body = Page(page_name).get_raw_body()
      count = len(needle_re.findall(body))
      if count:
         hits.append((count, page_name))

   # The default comparison for tuples compares elements in order,
   # so this sorts by number of hits
   hits.sort()
   hits.reverse()

   sys.stdout.write("<UL>\n")
   for (count, page_name) in hits:
      sys.stdout.write('<LI>' + Page(page_name).link_to())
      sys.stdout.write(' . . . . ' + `count`)
      sys.stdout.write([' match\n', ' matches\n'][count <> 1])
   sys.stdout.write("</UL>\n")

   print_search_stats(len(hits), len(all_pages))
   print_footer("", 0)


def do_titlesearch(needle):
   # TODO: check needle is legal -- but probably we can just accept any
   # RE

   send_title("Title search for \"" + needle + '"')
   
   needle_re = re.compile(needle, re.IGNORECASE)
   all_pages = page_list()
   hits = filter(needle_re.search, all_pages)

   sys.stdout.write("<UL>\n")
   for filename in hits:
      sys.stdout.write('<LI>%s\n' % Page(filename).link_to())
   sys.stdout.write("</UL>\n")

   print_search_stats(len(hits), len(all_pages))
   print_footer("", 0)


def print_search_stats(hits, searched):
   sys.stdout.write("<p>%d hits " % hits)
   sys.stdout.write(" out of %d pages searched." % searched)


def do_edit(pagename):
   Page(pagename).send_editor()

def do_preview(dummypagename):
   global form
   pagename = form["savepage"].value
   send_title('Preview ' + urllib.unquote(pagename), None, None)
   raw_body = form['savetext'].value
   PageFormatter(raw_body).print_html()
   sys.stdout.write('<hr><form method="post" action="%s">' % (get_scriptname()))
   sys.stdout.write('<input type=hidden name="savepage" value="%s">' %
      pagename)
   md5hash = form['checksum'].value
   sys.stdout.write("""<textarea wrap="virtual" name="savetext" rows="24"
          cols="80">%s</textarea>""" % quote_html(raw_body))
   sys.stdout.write('<input type=hidden name="checksum" value="%s">' % md5hash)
   sys.stdout.write("<br><input type=checkbox name=silentedit> ")
   sys.stdout.write(Page('SilentEdit').link_to())
   sys.stdout.write("""<br><input type=submit value="Save">
      <input type=submit name=preview value="Preview">
      <input type=reset value="Reset">
   """)
   sys.stdout.write("<p>" + Page('EditingTips').link_to())
   print_footer(pagename, 0)

def do_savepage(pagename):
   global form
   pg = Page(urllib.unquote(pagename))
   checksum = binascii.unhexlify(form['checksum'].value)
   try:
      if form.has_key("silentedit"):
         pg.save_text(form['savetext'].value, checksum, 1)
      else:
         pg.save_text(form['savetext'].value, checksum)
   except PageChanged, exception_instance:
      msg = "This page has changed while you were working." \
         " Please merge your changes with the changes with the current" \
         " version. Your version is repeated below, the current version" \
         " is shown in the editor area.\n<p><pre>\n%s\n</pre><p>" % \
         quote_html(form['savetext'].value)
      pg.send_editor(msg)
      return

   msg = "<b>Thankyou for your changes.   Your attention to" \
      " detail is appreciated.</b>"
   pg.send_page(msg)


def make_index_key():
   s = '<p><center>'
   links = map(lambda ch: '<a href="#%s">%s</a>' % (ch, ch),
            string.lowercase)
   s = s + string.join(links, ' | ')
   s = s + '</center><p>'
   return s


def page_list():
   pagelist = map(urllib.unquote, os.listdir(text_dir))
   return filter(word_anchored_re.match, pagelist)

def print_footer(name, editable=1, mod_string=None, advisory_lock=None):
   editable = 0
   sys.stdout.write('\n<hr>')
   sys.stdout.write("%s %s %s\n" % (link_tag('NeverWhere', "NeverWhere"),
   link_tag('RecentChanges', "RecentChanges"),
   link_tag('TitleIndex', "TitleIndex")))
   if editable:
      sys.stdout.write("<br>%s of this page" %
         link_tag('edit='+urllib.quote(name), 'EditText'))
      if mod_string:
         sys.stdout.write(" (last modified %s) " % mod_string)
      if advisory_lock:
         sys.stdout.write('\n<br>\n')
         sys.stdout.write(advisory_lock)
   sys.stdout.write("<br>%s by browsing, searching, or an index</body>" %
      link_tag('FindPage', 'FindPage'))


# ----------------------------------------------------------
# Macros
def _macro_TitleSearch():
   return _macro_search("titlesearch")

def _macro_FullSearch():
   return _macro_search("fullsearch")

def _macro_search(type):
   if form.has_key('value'):
      default = form["value"].value
   else:
      default = ''
   return """<form method=get>
   <input name=%s size=30 value="%s"> 
   <input type=submit value="Go">
   </form>\n""" % (type, default)

def _macro_GoTo():
   return """<form method=get><input name=goto size=30>
   <input type=submit value="Go">
   </form>\n"""
   # isindex is deprecated, but it gives the right result here

def _macro_WordIndex():
   s = make_index_key()
   pages = list(page_list())
   map = {}
   word_re = re.compile('[A-ZÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞ][a-zßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ0-9]+')
   for name in pages:
      for word in word_re.findall(name):
         try:
            map[word].append(name)
         except KeyError:
            map[word] = [name]

   all_words = map.keys()
   all_words.sort()
   last_letter = None
   for word in all_words:
      letter = string.lower(word[0])
      if letter <> last_letter:
         s = s + '<a name="%s"><h3>%s</h3></a>\n' % (letter, letter)
         last_letter = letter
         
      s = s + '<b>%s</b><ul>\n' % word
      links = map[word]
      links.sort()
      last_page = None
      for name in links:
         if name == last_page: continue
         s = s + '<li>' + Page(name).link_to() + '\n'
      s = s + '\n</ul>\n'
   return s


def _macro_TitleIndex():
   s = make_index_key()
   pages = list(page_list())
   pages.sort()
   current_letter = None
   for name in pages:
      letter = string.lower(name[0])
      if letter <> current_letter:
         s = s + '<a name="%s"><h3>%s</h3></a>' % (letter, letter)
         current_letter = letter
      else:
         s = s + '<br>'
      s = s + Page(name).link_to()
   return s


def _macro_RecentChanges():
   lines = editlog_raw_lines()
   lines.reverse()
   
   ratchet_day = None
   done_words = {}
   buf = StringIO()
   for line in lines:
      page_name, addr, ed_time = string.split(line, '\t')
      # year, month, day, DoW
      time_tuple = localtime(float(ed_time))
      day = tuple(time_tuple[0:3])
      if day <> ratchet_day:
         buf.write('<h3>%s</h3>' % strftime(date_fmt, time_tuple))
         ratchet_day = day

      if done_words.has_key(page_name):
         continue

      done_words[page_name] = 1
      buf.write(Page(page_name).link_to())
      if show_hosts:
         buf.write(' . . . . ')
         try:
            buf.write(gethostbyaddr(addr)[0])
         except:
            buf.write(addr)
      if changed_time_fmt:
         buf.write(time.strftime(changed_time_fmt, time_tuple))
      buf.write('<br>\n')

   return buf.getvalue()



# ----------------------------------------------------------
class PageFormatter:
   """Object that turns Wiki markup into HTML.

   All formatting commands can be parsed one line at a time, though
   some state is carried over between lines.
   """
   def __init__(self, raw):
      self.raw = raw
      self.is_h = self.is_em = self.is_b = 0
      self.list_indents = []
      self.in_pre = 0


   def _emph_repl(self, word):
      if len(word) == 3:
         self.is_b = not self.is_b
         return ['</b>', '<b>'][self.is_b]
      else:
         self.is_em = not self.is_em
         return ['</em>', '<em>'][self.is_em]

   def _head_repl(self, word):
      self.is_h = not self.is_h
      lvl = len(string.strip(word))
      return ['</h%d>' % lvl, '<h%d>' % lvl][self.is_h]

   def _rule_repl(self, word):
      s = self._undent()
      if len(word) <= 4:
         s = s + "\n<hr>\n"
      else:
         s = s + "\n<hr size=%d>\n" % (len(word) - 2 )
      return s

   def _word_repl(self, word):
      return Page(word).link_to()

   def _img_repl(self, word):
      return '<img alt="%s" src="%s">' % (word, word)

   def _url_repl(self, word):
      return '<a href="%s">%s</a>' % (word, word)


   def _email_repl(self, word):
      return '<a href="mailto:%s">%s</a>' % (word, word)


   def _ent_repl(self, s):
      return {'&': '&amp;',
            '<': '&lt;',
            '>': '&gt;'}[s]
   

   def _li_repl(self, match):
      return '<li>'


   def _pre_repl(self, word):
      if word == '{{{' and not self.in_pre:
         self.in_pre = 1
         return '<pre>'
      elif self.in_pre:
         self.in_pre = 0
         return '</pre>'
      else:
         return ''

   def _macro_repl(self, word):
      macro_name = word[2:-2]
      # TODO: Somehow get the default value into the search field
      return apply(globals()['_macro_' + macro_name], ())


   def _indent_level(self):
      return len(self.list_indents) and self.list_indents[-1]

   def _indent_to(self, new_level):
      s = ''
      while self._indent_level() > new_level:
         del(self.list_indents[-1])
         s = s + '</ul>\n'
      while self._indent_level() < new_level:
         self.list_indents.append(new_level)
         s = s + '<ul>\n'
      return s

   def _undent(self):
      res = '</ul>' * len(self.list_indents)
      self.list_indents = []
      return res


   def replace(self, match):
      for type, hit in match.groupdict().items():
         if hit:
            if self.in_pre and type not in ("pre", "ent", "word", "url"):
               return hit
            else:
               return apply(getattr(self, '_' + type + '_repl'), (hit,))
      else:
         raise "Can't handle match " + `match`
      

   def print_html(self):
      # For each line, we scan through looking for magic
      # strings, outputting verbatim any intervening text
      scan_re = re.compile(
         r"(?:(?P<emph>'{2,3})"
         + r"|(?P<head>^\s*={1,6}\s+|\s+={1,6}\s*$)"
         + r"|(?P<ent>[<>&])"
         + r"|(?P<word>(?:[A-ZÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞ][a-zßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ0-9]+){2,})"
         + r"|(?P<rule>-{4,})"
         + r"|(?P<img>(http|ftp)\:[^\s'\"]+\S\.(jpeg|jpg|gif|png))"
         + r"|(?P<url>(http|ftp|nntp|news|mailto)\:[^\s'\"]+\S)"
         + r"|(?P<email>[-\w._+]+\@[\w.-]+)"
         + r"|(?P<li>^\s+\*)"
         + r"|(?P<pre>(\{\{\{|\}\}\}))"
         + r"|(?P<macro>\[\[(TitleSearch|FullSearch|WordIndex"
                     + r"|TitleIndex|RecentChanges|GoTo)\]\])"
         + r")")
      blank_re = re.compile("^\s*$")
      bullet_re = re.compile("^\s+\*")
      indent_re = re.compile("^\s*")
      eol_re = re.compile(r'\r?\n')
      raw = string.expandtabs(self.raw)
      for line in eol_re.split(raw):
         if not self.in_pre:
            # XXX: Should we check these conditions in this order?
            if blank_re.match(line):
               sys.stdout.write('<p>\n')
               continue
            indent = indent_re.match(line)
            sys.stdout.write(self._indent_to(len(indent.group(0))))
         sys.stdout.write(re.sub(scan_re, self.replace, line))
         sys.stdout.write("\n")
      if self.in_pre: sys.stdout.write('</pre>\n')
      sys.stdout.write(self._undent())
      

# ----------------------------------------------------------
class Page:
   def __init__(self, page_name):
      self.page_name = page_name

   def _text_filename(self):
      return path.join(text_dir, urllib.quote(self.page_name))

   def _lock_filename(self):
      return path.join(text_dir, "!" + urllib.quote(self.page_name))

   def _tmp_filename(self):
      return path.join(text_dir, ('#' + urllib.quote(self.page_name) + '.' + `os.getpid()` + '#'))


   def exists(self):
      try:
         os.stat(self._text_filename())
         return 1
      except OSError, er:
         if er.errno == errno.ENOENT:
            return 0
         else:
            raise er
   
   def lock_exists(self):
      try:
         os.stat(self._lock_filename())
         return 1
      except OSError, er:
         if er.errno == errno.ENOENT:
            return 0
         else:
            raise er
      

   def link_to(self):
      word = self.page_name
      if self.exists():
         return link_tag(urllib.quote(word), word)
      else:
         if nonexist_qm:
            return link_tag(urllib.quote(word), '?', 'nonexistent') + word
         else:
            return link_tag(urllib.quote(word), word, 'nonexistent')


   def get_raw_body(self):
      try:
         return open(self._text_filename(), 'r').read()
      except IOError, er:
         if er.errno == errno.ENOENT:
            # just doesn't exist, use default
            return 'Describe %s here.' % self.page_name
         else:
            raise er
   

   def send_page(self, msg=None):
      link = get_scriptname() + '?fullsearch=' + urllib.quote(self.page_name)
      send_title(self.page_name, link, msg)
      PageFormatter(self.get_raw_body()).print_html()
      print_footer(self.page_name, 1, self._last_modified(),
         self._advisory_lock())


   def _last_modified(self):
      if not self.exists():
         return None
      modtime = localtime(os.stat(self._text_filename())[stat.ST_MTIME])
      return strftime(datetime_fmt, modtime)

   def _advisory_lock(self):
      if not self.lock_exists():
         return None
      checkout = localtime(os.stat(self._lock_filename())[stat.ST_MTIME])
      who = open(self._lock_filename()).read(-1)
      try:
         who = gethostbyaddr(who)[0]
      except:
         pass
      return "(%s editing started from %s)" % (strftime(datetime_fmt, checkout),
         who)


   def activate_advisory_lock(self):
      remote_name = environ.get('REMOTE_ADDR', '')
      tmp = open(self._lock_filename(), "wb")
      try:
         tmp.write(remote_name)
      finally:
         tmp.close()

   def send_editor(self, msg = None):
      self.activate_advisory_lock()
      send_title('Edit ' + self.page_name, None, msg)
      sys.stdout.write('<form method="post" action="%s">' % (get_scriptname()))
      sys.stdout.write('<input type=hidden name="savepage" value="%s">' %
         urllib.quote((self.page_name)))
      raw_body = self.get_raw_body()
      md5hash = binascii.hexlify(md5.new(raw_body).digest())
      sys.stdout.write("""<textarea wrap="virtual" name="savetext" rows="24"
             cols="80">%s</textarea>""" % quote_html(raw_body))
      sys.stdout.write('<input type=hidden name="checksum" value="%s">' % md5hash)
      sys.stdout.write("<br><input type=checkbox name=silentedit> ")
      sys.stdout.write(Page('SilentEdit').link_to())
      sys.stdout.write("""<br><input type=submit value="Save">
         <input type=submit name=preview value="Preview">
         <input type=reset value="Reset">
      """)
      sys.stdout.write("<p>" + Page('EditingTips').link_to())
      print_footer(self.page_name, 0)
             

   def _write_file(self, text):
      tmp_filename = self._tmp_filename()
      open(tmp_filename, 'w').write(text)
      text = self._text_filename()
      os.rename(tmp_filename, text)
      try:
         os.remove(self._lock_filename())
      except:
         pass


   def save_text(self, newtext, checksum, silent_edit=None):
      current_raw = self.get_raw_body()
      current_checksum = md5.new(current_raw).digest()
      if current_checksum == checksum:
         # self._write_file(newtext)
         if not silent_edit:
            remote_name = environ.get('REMOTE_ADDR', '')
            editlog_add(self.page_name, remote_name)
      else:
         raise PageChanged(current_raw, current_checksum)


emit_header()

try:
   form = cgi.FieldStorage()

   handlers = {
      'fullsearch': do_fullsearch,
      'titlesearch': do_titlesearch,
      'edit': do_edit,
      'savepage': do_savepage,
      'preview': do_preview
   }
   # I know, this is plain sick, but hopefully nobody forced you into
   # reading this source. The point of the following is that I want to
   # control the sequence for the testing of commands. This was introduced
   # when I added preview. The only important thing right now is testing for
   # preview before savepage.
   handler_keys = (
      'preview',
      'savepage',
      'edit',
      'titlesearch',
      'fullsearch'
   )

   for cmd in handler_keys:
      if form.has_key(cmd):
         apply(handlers[cmd], (form[cmd].value,))
         break
   else:
      path_info = environ.get('PATH_INFO', '')

      if form.has_key('goto'):
         query = form['goto'].value
      elif len(path_info) and path_info[0] == '/':
         query = path_info[1:] or 'NeverWhere'
      else:    
         query = environ.get('QUERY_STRING', '') or 'NeverWhere'

      query = urllib.unquote(query)
      word_match = re.match(word_re_str, query)
      if word_match:
         word = word_match.group(0)
         Page(word).send_page()
      else:
         sys.stdout.write("<p>Can't work out query \"<pre>" + query + "</pre>\"")

except:
   cgi.print_exception()

sys.stdout.flush()
