#!/bin/env python
# -*- coding: iso-8859-15 -*-
"""oodoctest - a utility to run python code samples from an OOo document

By Olivier Grisel <olivier.grisel@ensta.org>

oodoctest extracts and run code samples found in an OpenOffice.org document to
test their validity using the standard python doctest machinery. The goal is
to make it easy to write runnable python documentation with OpenOffice.org.

This script can be both used a command line tool (oodoctest.py --help) or as the
provider of the OODocFileTest class to integration OO documents as part of
unittest suite.

OpenOffice.org is not required to run oodoctest.
"""

__copyright__ = """
(C) Copyright 2005 Olivier Grisel <olivier.grisel@ensta.org>

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License version 2 as published
by the Free Software Foundation.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
02111-1307, USA.
"""

__version__ = "0.2.0"
__author__ = "Olivier Grisel <olivier.grisel@ensta.org>"

__TODO__ = """\
- use iterparse instead of parse
- add support for the OpenDocument format (00 2.0 and 1.1.5)
- find a way to plug it in OOo as a macro to inline test the document
"""

import os
import sys
import zipfile
from cStringIO import StringIO
import doctest
import unittest
from copy import copy
from doctest import DocFileCase, DocTestParser

try:
    import cElementTree as ElementTree
except ImportError:
    try:
        import cElementTree as ElementTree
    except ImportError:
        from elementtree import ElementTree

#
# OpenOffice.org XML namespace
#

def _build_namespace(base=None):
    """Build an openoffice document namespace

    OpenOffice 1.1 and OpenDocument share a similar namespace structure. Only
    the base url changes.
    """
    if base is None:
        base = "http://openoffice.org/2000" # base URI fot OO namespace
    oo_office = "%s/office" % base
    oo_body = "{%s}body" % oo_office
    oo_text = "%s/text" % base
    ns = {
        'office': oo_office,      # Office URI
        'body': oo_body,          # Body of the document
        'text': oo_text,          # Text URI
        'p': "{%s}p" % oo_text,   # Paragraph
        's': "{%s}s" % oo_text,   # Whitespaces sequence
        'c': "{%s}c" % oo_text,   # Counter
        'h': "{%s}h" % oo_text,   # Header
        'level': "{%s}level" % oo_text, # Header level
        'xlink': "http://www.w3.org/1999/xlink", # standard xmlink
    }
    return ns

#
# Doctest extraction API
#

def _extract_text(element, text='', ns=None):
    r"""Recursively extract text from an tree structured document

    >>> e = ElementTree.XML('''\
    ... <?xml version="1.0" encoding="UTF-8"?>
    ... <office:document-content
    ...   xmlns:office="%(office)s"
    ...   xmlns:text="%(text)s"
    ...   xmlns:xlink="%(xlink)s"
    ...   >\
    ... <office:body>\
    ... <text:p>This is a <text:a xlink:href="http://example.org/">\
    ... paragraph</text:a></text:p>\
    ... <text:p>3 spaces<text:s text:c="3"/>endSpaces</text:p>\
    ... </office:body>\
    ... </office:document-content>
    ... ''' % _build_namespace())
    >>> print _extract_text(e)
    This is a paragraph
    3 spaces   endSpaces
    <BLANKLINE>
    """
    if ns is None:
        ns = _build_namespace()
    if element.text:
        text += element.text
    if element.tag == ns['s']:
        # Expanding spacer markup into white spaces and appending the tail
        counter = element.get(ns['c'])
        if counter:
            text += ' '*int(counter)
    for child in element.getchildren():
        # Recursive descent to the children
        text = _extract_text(child, text=text, ns=ns)
    if element.tag == ns['p']:
        # Paragraphs have an implicit newline character
        text += '\n'
    if element.tail is not None:
        text += element.tail
    return text


def extract_doctests(path, module_relative=True,
        package=None, globs=None, parser=DocTestParser(), **options):
    """extract doctests from an OO document at 'path'

    return a list of doctests instances
    """
    if globs is None:
        globs = {}

    if package and not module_relative:
        raise ValueError("Package may only be specified for module-"
                         "relative paths.")

    # Relativize the path.
    if module_relative:
        package = doctest._normalize_module(package)
        path = doctest._module_relative_path(package, path)

    # Find the file and extract a formatted utf-8 string
    name = os.path.basename(path)
    oofile = zipfile.ZipFile(path)
    content = oofile.read('content.xml')

    # build an iterator out of the content thanks to iterparse
    iterparser = ElementTree.iterparse(StringIO(content),
                                       events=("start", "end"))
    context = iter(iterparser)

    # build the namespace
    # TODO: do it according to the file content for ODF support
    ns = _build_namespace()

    # go to the body of the document
    in_body = False
    while not in_body:
        _, element = context.next()
        if element.tag == ns['body']:
            in_body = True

    # wrap it in a DocFileCase.
    tests = []
    collected_text = ''
    for event, element in context:
        if event != 'end':
            continue
        if element.tag == ns['body']:
            break
        if element.tag == ns['h']:
            # new section: test if anything interesting collected so far
            if '>>>' not in collected_text:
                collected_text = ''
            element.clear()
        elif element.tag == ns['p']:
            # this is a content, extract a formatted text out of it and add it
            # to the pending collected text
            collected_text += _extract_text(element, ns=ns)
            element.clear()

    if '>>>' in collected_text:
        # collect tests
        # TODO: make name hold the name of the current section as well
        # eg filename (section: sectionname)
        tests.append(parser.get_doctest(collected_text, globs, name, path, 0))
    return tests

#
# Unittest API
#

def OODocFileTest(path, module_relative=True, package=None,
                  globs=None, parser=DocTestParser(), **options):
    """Build a DocFileCase out of an OpenOffice document.

    Arguments are similar to those of the doctest.DocFileTest function
    """
    [test] = extract_doctests(path, module_relative=module_relative,
                               package=package, globs=globs, parser=parser)
    return DocFileCase(test, **options)

def OODocFileSuite(*paths, **kw):
    suite = unittest.TestSuite()

    # We do this here so that _normalize_module is called at the right
    # level.  If it were called in DocFileTest, then this function
    # would be the caller and we might guess the package incorrectly.
    if kw.get('module_relative', True):
        kw['package'] = doctest._normalize_module(kw.get('package'))
    kw2 = kw.copy()
    for key in ('module_relative', 'package', 'globs', 'parser'):
        if kw2.has_key(key):
            del kw2[key]

    for path in paths:
        for test in extract_doctests(path, **kw):
            suite.addTest(DocFileCase(test, **kw2))
    return suite

OODocFileSuite.__doc__ = doctest.DocFileSuite.__doc__

def main():
    """Run oodoctest.py as a command line utility
    """
    # Settings from commandline options
    from optparse import OptionParser
    parser = OptionParser()
    parser.set_usage("%prog [options] oofile [oofile2 ...]")
    parser.add_option("-v", "--verbose",
                  action="store_const", const=1, dest="verbosity",
                  help="show extracted doctests and their results",
                  default=0)
    parser.add_option("-e", "--ellipsis", dest="ellipsis", type="choice",
                      help="enaable ellipsis (default is on)",
                      choices=['on','off'],
                      default='on')
    options, args = parser.parse_args()
    cwd = os.getcwd()
    paths = [os.path.join(cwd, arg) for arg in args]
    if not args:
        print "You should provide an OpenOffice file as argument:"
        parser.print_usage()
        sys.exit(1)
    kw = {
        'module_relative': False,
        }
    optionflags = 0
    if options.ellipsis is 'on':
        optionflags = optionflags | doctest.ELLIPSIS

    # tests exctraction
    tests = []
    for path in paths:
        tests += extract_doctests(path, **kw)

    # run away! run away!
    runner = doctest.DocTestRunner(optionflags=optionflags)
    for test in tests:
        runner.run(test)
    runner.summarize(verbose=options.verbosity)


if __name__ == '__main__':
    main()

