#!/usr/bin/env python
"""Generate HTML documentation from live Python objects."""

__version__ = 'Ka-Ping Yee <ping@lfw.org>, 29 May 2000'

import sys, os, re, types, inspect
from string import join, replace, expandtabs, rstrip

# ---------------------------------------------------- formatting utilities
def serialize(stuff):
    """Combine a list containing strings and nested lists into a single
    string.  This lets us manipulate lists until the last moment, since
    rearranging lists is faster than rearranging strings."""
    if type(stuff) is type(''): return stuff
    results = []
    for item in stuff:
        if type(item) is type(''): results.append(item)
        else: results.append(serialize(item))
    return join(results, '')

def htmlescape(text):
    return replace(replace(replace(text, '&', '&amp;'),
                                         '<', '&lt;'),
                                         '>', '&gt;')

def htmlrepr(object):
    return htmlescape(repr(object))

def preformat(text):
    text = htmlescape(expandtabs(text))
    return replace(replace(replace(replace(text, '\n\n', '\n \n'),
                                                 '\n\n', '\n \n'),
                                                 ' ', '&nbsp;'),
                                                 '\n', '<br>\n')

def multicolumn(list, format, cols=4):
    results = ['<table width="100%"><tr>']
    rows = (len(list)+cols-1)/cols

    for col in range(cols):
        results.append('<td width="%d%%" valign=top>' % (100/cols))
        for i in range(rows*col, rows*col+rows):
            if i < len(list):
                results.append(format(list[i]) + '<br>')
        results.append('</td>')
    results.append('</tr></table>')
    return results

def heading(title, fgcol, bgcol, extras=''):
    return ["""
<p><table width="100%%" cellspacing=0 cellpadding=0 border=0>
<tr bgcolor="%s"><td colspan=3 valign=bottom><small><small><br></small></small
><font color="%s" face="helvetica, arial">&nbsp;%s</font></td
><td align=right valign=bottom
><font color="%s" face="helvetica, arial">&nbsp;%s</font></td></tr>
""" % (bgcol, fgcol, title, fgcol, extras), '</table>']

def section(title, fgcol, bgcol, contents, width=20,
            prelude='', marginalia=None, gap='&nbsp;&nbsp;&nbsp;'):
    if marginalia is None:
        marginalia = '&nbsp;' * width
    results = []
    results.append("""
<p><table width="100%%" cellspacing=0 cellpadding=0 border=0>
<tr bgcolor="%s"><td colspan=3 valign=bottom><small><small><br></small></small
><font color="%s" face="helvetica, arial">&nbsp;%s</font></td></tr>
""" % (bgcol, fgcol, title))
    if prelude:
        results.append("""
<tr><td bgcolor="%s">%s</td>
<td bgcolor="%s" colspan=2>%s</td></tr>
""" % (bgcol, marginalia, bgcol, prelude))
    results.append("""
<tr><td bgcolor="%s">%s</td><td>%s</td>
""" % (bgcol, marginalia, gap))

    # Alas, this horrible hack seems to be the only way to force Netscape
    # to expand the main cell consistently to the maximum available width.
    results.append('<td><small><small>' + '&nbsp; '*100 + '</small></small\n>')

    results.append(contents)
    results.append('</td></tr></table>')
    return results

def footer():
    return """
<table width="100%"><tr><td align=right>
<font face="helvetica, arial"><small><small>generated with
<strong>htmldoc</strong> by Ka-Ping Yee</a></small></small></font>
</td></tr></table>
"""

# -------------------------------------------------------- automatic markup
def namelink(name, *dicts):
    for dict in dicts:
        if dict.has_key(name):
            return '<a href="%s">%s</a>' % (dict[name], name)
    return name

def classlink(object, modname, *dicts):
    name = object.__name__
    if object.__module__ != modname:
        name = object.__module__ + '.' + name
    for dict in dicts:
        if dict.has_key(object):
            return '<a href="%s">%s</a>' % (dict[object], name)
    return name

def modpkglink((name, path, ispackage, shadowed)):
    if shadowed:
        return '<font color="#808080">%s</font>' % name
    if path:
        url = '%s.%s.html' % (path, name)
    else:
        url = '%s.html' % name
    if ispackage:
        text = '<strong>%s</strong>&nbsp;(package)' % name
    else:
        text = name
    return '<a href="%s">%s</a>' % (url, text)

def modulelink(object):
    return '<a href="%s.html">%s</a>' % (object.__name__, object.__name__)

def markup(text, functions={}, classes={}, methods={}, escape=htmlescape):
    """Mark up some plain text, given a context of symbols to look for.
    Each context dictionary maps object names to named anchor identifiers."""
    results = []
    here = 0
    pattern = re.compile('(self\.)?(\w+)')
    while 1:
        match = pattern.search(text, here)
        if not match: break
        start, end = match.regs[2]
        found, name = match.group(0), match.group(2)
        results.append(escape(text[here:start]))

        if text[end:end+1] == '(':
            results.append(namelink(name, methods, functions, classes))
        elif match.group(1):
            results.append('<strong>%s</strong>' % name)
        else:
            results.append(namelink(name, classes))
        here = end
    results.append(text[here:])
    return join(results, '')

def getdoc(object):
    result = inspect.getdoc(object)
    if not result:
        try: result = inspect.getcomments(object)
        except: pass
    return result and rstrip(result) + '\n' or ''

# -------------------------------------------------- type-specific routines
def document_tree(tree, modname, classes={}, parent=None):
    """Produce HTML for a class tree as returned by inspect.getclasstree()."""
    results = ['<dl>\n']
    for entry in tree:
        if type(entry) is type(()):
            c, bases = entry
            results.append('<dt><font face="helvetica, arial"><small>')
            results.append(classlink(c, modname, classes))
            if bases and bases != (parent,):
                parents = []
                for base in bases:
                    parents.append(classlink(base, modname, classes))
                results.append('(' + join(parents, ', ') + ')')
            results.append('\n</small></font></dt>')
        elif type(entry) is type([]):
            results.append('<dd>\n')
            results.append(document_tree(entry, modname, classes, c))
            results.append('</dd>\n')
    results.append('</dl>\n')
    return results

def isconstant(object):
    """Check if an object is of a type that probably means it's a constant."""
    return type(object) in [
        types.FloatType, types.IntType, types.ListType, types.LongType,
        types.StringType, types.TupleType, types.TypeType,
        hasattr(types, 'UnicodeType') and types.UnicodeType or 0]

def document_module(object):
    """Produce HTML documentation for a given module object."""
    name = object.__name__
    results = []
    head = '<br><big><big><strong>&nbsp;%s</strong></big></big>' % name
    try:
        file = inspect.getsourcefile(object)
        filelink = '<a href="file:%s">%s</a>' % (file, file)
    except TypeError:
        filelink = '(built-in)'
    if hasattr(object, '__version__'):
        head = head + ' (version: %s)' % htmlescape(object.__version__)
    results.append(heading(head, '#ffffff', '#7799ee', filelink))

    cadr = lambda list: list[1]
    modules = map(cadr, inspect.getmembers(object, inspect.ismodule))

    classes, cdict = [], {}
    for key, value in inspect.getmembers(object, inspect.isclass):
        if (inspect.getmodule(value) or object) is object:
            classes.append(value)
            cdict[key] = cdict[value] = '#' + key
    functions, fdict = [], {}
    for key, value in inspect.getmembers(object, inspect.isroutine):
        if inspect.isbuiltin(value) or inspect.getmodule(value) is object:
            functions.append(value)
            fdict[key] = '#-' + key
            if inspect.isfunction(value): fdict[value] = fdict[key]
    constants = []
    for key, value in inspect.getmembers(object, isconstant):
        if key[:1] != '_':
            constants.append((key, value))

    for c in classes:
        for base in c.__bases__:
            key, modname = base.__name__, base.__module__
            if modname != name and sys.modules.has_key(modname):
                module = sys.modules[modname]
                if hasattr(module, key) and getattr(module, key) is base:
                    if not cdict.has_key(key):
                        cdict[key] = cdict[base] = modname + '.html#' + key

    doc = markup(getdoc(object), fdict, cdict, escape=preformat)
    if doc: doc = '<p><small><tt>' + doc + '</tt></small>\n\n'
    else: doc = '<p><small><em>no doc string</em></small>\n'
    results.append(doc)

    if hasattr(object, '__path__'):
        modpkgs = []
        modnames = []
        for file in os.listdir(object.__path__[0]):
            if file[:1] != '_':
                path = os.path.join(object.__path__[0], file)
                if file[-3:] == '.py' and file[:-3] not in modnames:
                    modpkgs.append((file[:-3], 0, name, 0))
                    modnames.append(file[:-3])
                elif file[-4:] == '.pyc' and file[:-4] not in modnames:
                    modpkgs.append((file[:-4], 0, name, 0))
                    modnames.append(file[:-4])
                elif os.path.isdir(path):
                    init = os.path.join(path, '__init__.py')
                    initc = os.path.join(path, '__init__.pyc')
                    if os.path.isfile(init) or os.path.isfile(initc):
                        modpkgs.append((file, 1, name, 0))
        modpkgs.sort()
        contents = multicolumn(modpkgs, modpkglink)
        results.append(section('<big><strong>Package Contents</strong></big>',
                               '#ffffff', '#aa55cc', contents))

    elif modules:
        contents = multicolumn(modules, modulelink)
        results.append(section('<big><strong>Modules</strong></big>',
                               '#fffff', '#aa55cc', contents))

    if classes:
        contents = document_tree(inspect.getclasstree(classes, 1), name, cdict)
        for item in classes:
            contents.append(document_class(item, fdict, cdict))
        results.append(section('<big><strong>Classes</strong></big>',
                               '#ffffff', '#ee77aa', contents))
    if functions:
        contents = []
        for item in functions:
            contents.append(document_function(item, fdict, cdict))
        results.append(section('<big><strong>Functions</strong></big>',
                               '#ffffff', '#eeaa77', contents))

    if constants:
        contents = []
        for key, value in constants:
            contents.append('<br><strong>%s</strong> = %s' %
                            (key, htmlrepr(value)))
        results.append(section('<big><strong>Constants</strong></big>',
                               '#ffffff', '#55aa55', contents))

    return results

def document_class(object, functions={}, classes={}):
    """Produce HTML documentation for a given class object."""
    name = object.__name__
    bases = object.__bases__
    results = []
    
    methods, mdict = [], {}
    for key, value in inspect.getmembers(object, inspect.ismethod):
        methods.append(value)
        mdict[key] = mdict[value] = '#' + name + '-' + key

    if methods:
        for item in methods:
            results.append(document_method(
                item, functions, classes, mdict, name))

    title = '<a name="%s">class <strong>%s</strong></a>' % (name, name)
    if bases:
        parents = []
        for base in bases:
            parents.append(classlink(base, object.__module__, classes))
        title = title + '(%s)' % join(parents, ', ')
    doc = markup(getdoc(object), functions, classes, mdict, escape=preformat)
    if doc: doc = '<small><tt>' + doc + '<br>&nbsp;</tt></small>'
    else: doc = '<small><em>no doc string</em></small>'
    return section(title, '#000000', '#ffc8d8', results, 10, doc)

def document_method(object, functions={}, classes={}, methods={}, clname=''):
    """Produce HTML documentation for a given method object."""
    return document_function(
        object.im_func, functions, classes, methods, clname)

def defaultformat(object):
    return '<small><font color="#a0a0a0">=' + \
        htmlrepr(object) + '</font></small>'

def document_function(object, functions={}, classes={}, methods={}, clname=''):
    """Produce HTML documentation for a given function object."""
    try:
        args, varargs, varkw, defaults = inspect.getargspec(object)
        argspec = inspect.formatargspec(
            args, varargs, varkw, defaults, defaultformat=defaultformat)
    except TypeError:
        argspec = '(<small>...</em></small>)'
    
    if object.__name__ == '<lambda>':
        decl = ['<em>lambda</em> ', argspec[1:-1]]
    else:
        anchor = clname + '-' + object.__name__
        decl = ['<a name="%s"\n>' % anchor,
                '<strong>%s</strong>' % object.__name__, argspec, '</a>\n']
    doc = markup(getdoc(object), functions, classes, methods, escape=preformat)
    if doc:
        doc = replace(doc, '<br>\n', '</tt></small\n><dd><small><tt>')
        doc = ['<dd><small><tt>', doc, '</tt></small>']
    else:
        doc = '<dd><small><em>no doc string</em></small>'
    return ['<dl><dt>', decl, doc, '</dl>']

def document_builtin(object):
    """Produce HTML documentation for a given built-in function."""
    return ('<strong>%s</strong>' % object.__name__ +
            '(<small>...</small>)')

# --------------------------------------------------- main dispatch routine
def document(object):
    """Generate documentation for a given object."""
    if inspect.ismodule(object): results = document_module(object)
    elif inspect.isclass(object): results = document_class(object)
    elif inspect.ismethod(object): results = document_method(object)
    elif inspect.isfunction(object): results = document_function(object)
    elif inspect.isbuiltin(object): results = document_builtin(object)
    else: raise TypeError, 'don\'t know how to document this kind of object'
    return serialize(results)

def index(dir, shadowed=None):
    modpkgs = []
    if shadowed is None: shadowed = {}
    seen = {}
    files = os.listdir(dir)

    def found(name, ispackage, modpkgs=modpkgs, shadowed=shadowed, seen=seen):
        if not seen.has_key(name):
            modpkgs.append((name, '', ispackage, shadowed.has_key(name)))
            seen[name] = 1
            shadowed[name] = 1

    # Package spam/__init__.py takes precedence over module spam.py.
    for file in files:
        path = os.path.join(dir, file)
        if os.path.isdir(path):
            init = os.path.join(path, '__init__.py')
            initc = os.path.join(path, '__init__.pyc')
            if os.path.isfile(init) or os.path.isfile(initc):
                found(file, 1)
    for file in files:
        path = os.path.join(dir, file)
        if file[:1] != '_' and os.path.isfile(path): 
            if file[-3:] == '.py':
                found(file[:-3], 0)
            elif file[-4:] == '.pyc':
                found(file[:-4], 0)
            elif file[-11:] == 'module.so':
                found(file[:-11], 0)
            elif file[-13:] == 'module.so.1':
                found(file[:-13], 0)

    modpkgs.sort()
    contents = multicolumn(modpkgs, modpkglink)
    results = section('<big><strong>%s</strong></big>' % dir,
                      '#ffffff', '#ee77aa', contents)
    return serialize(results)

if __name__ == '__main__':
    import os
    modnames = []
    for arg in sys.argv[1:]:
        if os.path.isdir(arg):
            for file in os.listdir(arg):
                if file[-3:] == '.py' and file[:-3] not in modnames:
                    modnames.append(file[:-3])
                elif file[-4:] == '.pyc' and file[:-4] not in modnames:
                    modnames.append(file[:-4])
                elif file[-9:] == 'module.so':
                    modnames.append(file[:-9])
        else:
            if arg[-3:] == '.py' and arg[:-3] not in modnames:
                modnames.append(arg[:-3])
            elif arg[-4:] == '.pyc' and arg[:-4] not in modnames:
                modnames.append(arg[:-4])
            else:
                modnames.append(arg)

    for modname in modnames:
        try:
            module = __import__(modname)
        except:
            print 'failed to import %s' % modname
        else:
            file = open(modname + '.html', 'w')
            file.write(
"""<!doctype html public "-//W3C//DTD HTML 4.0 Transitional//EN">
<html><title>%s</title><body bgcolor="#ffffff">
""" % modname)
            file.write(document(module))
            file.write('</body></html>')
            file.close()
            print 'wrote %s.html' % modname
