
import sys, os, traceback
import re

if hasattr(sys.stdout, 'fileno') and os.isatty(sys.stdout.fileno()):
    def log(msg):
        print msg 
else:
    def log(msg):
        pass

def convert_rest_html(source, source_path, stylesheet=None, encoding='latin1'):
    """ return html latin1-encoded document for the given input. 
        source  a ReST-string
        sourcepath where to look for includes (basically)
        stylesheet path (to be used if any)
    """
    from docutils.core import publish_string
    kwargs = {
        'stylesheet' : stylesheet, 
        'traceback' : 1, 
        'output_encoding' : encoding, 
        #'halt' : 0, # 'info',
        'halt_level' : 2, 
    }
    return publish_string(source, str(source_path), writer_name='html',
                          settings_overrides=kwargs)

def process(txtpath, encoding='latin1'):
    """ process a textfile """
    log("processing %s" % txtpath) 
    assert txtpath.check(ext='.txt') 
    htmlpath = txtpath.localpath.new(ext='.html')
    #svninfopath = txtpath.localpath.new(ext='.svninfo')

    style = txtpath.dirpath('style.css')
    if style.check():
        stylesheet = style.basename 
    else:
        stylesheet = None
    content = unicode(txtpath.read(), encoding)
    doc = convert_rest_html(content, txtpath, stylesheet=stylesheet, encoding=encoding) 
    htmlpath.write(doc) 
    #log("wrote %r" % htmlpath)
    #if txtpath.check(svnwc=1, versioned=1): 
    #    info = txtpath.info()
    #    svninfopath.dump(info) 

rex1 = re.compile(ur'.*<body>(.*)</body>.*', re.MULTILINE | re.DOTALL)
rex2 = re.compile(ur'.*<div class="document">(.*)</div>.*', re.MULTILINE | re.DOTALL)

def strip_html_header(string):
    """ return the content of the body-tag """ 
    uni = unicode(string, 'utf8')
    for rex in rex1,rex2: 
        match = rex.search(uni) 
        if not match: 
            break 
        uni = match.group(1) 
    return uni 
