########################################################################
# $Header: /var/local/cvsroot/4Suite/Ft/Xml/Xslt/StylesheetReader.py,v 1.44 2004/09/06 23:37:08 mbrown Exp $
"""
Classes for the creation of a stylesheet object

Copyright 2002 Fourthought, Inc. (USA).
Detailed license and copyright information: http://4suite.org/COPYRIGHT
Project home, documentation, distributions: http://4suite.org/
"""

import cStringIO, re
from xml.dom import Node

from Ft.Lib import Uri
from Ft.Xml import Xslt, XMLNS_NAMESPACE, InputSource, HAS_PYEXPAT
from Ft.Xml.Domlette import NonvalidatingReader
from Ft.Xml.Xslt import XsltException, Error

from StylesheetHandler import NS_SPLIT_CHAR, StylesheetHandler

# borrowed from xmllib (there are too many regexp to import it)
_S = '[ \t\r\n]+'                       # white space
_opS = '[ \t\r\n]*'                     # optional white space
_QStr = "(?:'[^']*'|\"[^\"]*\")"        # quoted XML string
_xmldecl = re.compile('<\?xml' + _S + 'version' + _opS + '=' + _opS + _QStr +
                      '(?:' + _S + 'encoding' + _opS + '=' + _opS +
                      "(?P<encoding>'[A-Za-z][-A-Za-z0-9._]*'|"
                      '"[A-Za-z][-A-Za-z0-9._]*"))?'
                      '(?:' + _S + 'standalone' + _opS + '=' + _opS +
                      '(?:\'(?:yes|no)\'|"(?:yes|no)"))?' + _opS + '\?>')

def _change_node(xslt_node, attributes, importIndex=0):
    xslt_node.__dict__.update(attributes)
    xslt_node.importIndex += importIndex
    for child in xslt_node.children or []:
        _change_node(child, attributes, importIndex)
    return


class StylesheetReader(StylesheetHandler):

    def __init__(self, altBaseUris=None, ownerDocument=None,
                 importIndex=0, globalVars=None, extElements=None,
                 visitedStyUris=None):
        StylesheetHandler.__init__(self, importIndex, globalVars, extElements,
                                   visitedStyUris)
        self._alt_base_uris = altBaseUris or []
        self._ownerDoc = ownerDocument
        return

    def reset(self):
        StylesheetHandler.reset(self)
        self._ownerDoc = None
        self._input_source = None
        return

    def clone(self):
        return self.__class__(self._alt_base_uris, self._ownerDoc,
                              self._import_index, self._global_vars,
                              self._extElements, self._visited_stylesheet_uris)

    def createParser(self, baseUri):
        if HAS_PYEXPAT:
            from xml.parsers import expat
            parser = expat.ParserCreate(namespace_separator=NS_SPLIT_CHAR)
        else:
            parser = DummyExpatParser(self, baseUri)
        parser.StartElementHandler = self.startElement
        parser.EndElementHandler = self.endElement
        parser.StartNamespaceDeclHandler = self.startNamespaceDecl
        parser.EndNamespaceDeclHandler = self.endNamespaceDecl
        parser.CharacterDataHandler = self.characterData
        parser.ExternalEntityRefHandler = self.externalEntityRef
        parser.SetBase(baseUri)
        return parser

    def fromInstant(self, instant, baseUri='', is_import=0):
        root = instant.root
        if baseUri and root.baseUri != baseUri:
            update_attrs = {'baseUri' : baseUri}
        else:
            update_attrs = {}

        if self._ownerDoc:
            # this is an inclusion; change the document and importIndex
            update_attrs['ownerDocument'] = self._ownerDoc
            _change_node(root, update_attrs, self._import_index + (is_import and 1 or 0))
            self._ownerDoc.primeInstructions.extend(root.primeInstructions)
            self._ownerDoc.idleInstructions.extend(root.idleInstructions)
        elif update_attrs:
            _change_node(root, update_attrs)

        # Update the state of the reader
        if not self._ownerDoc:
            self._ownerDoc = root
        self._import_index = root.stylesheet.importIndex

        return root.stylesheet

    def fromDocument(self, document, baseUri='', isf=None, processXIncludes=False):
        if not baseUri:
            if hasattr(document, 'documentURI'):
                baseUri = node.documentURI
            if hasattr(document, 'baseURI'):
                baseUri = node.baseURI
            elif hasattr(document, 'refUri'):
                baseUri = node.refUri

        self._baseUri = baseUri
        self._processXIncludes = processXIncludes

        #We need to fake out an input source on the document
        isf = isf or InputSource.DefaultFactory
        self._input_source = isf.fromStream(None, baseUri, processIncludes=processXIncludes)

        expat_parser = self.createParser(baseUri)
        self.parser = DomParser(expat_parser)
        self.startDocument()

        if HAS_PYEXPAT:
            from xml.parsers import expat
            try:
                self.parser.parse(document)
            except expat.ExpatError, error:
                raise XsltException(Error.STYLESHEET_PARSE_ERROR, self._baseUri,
                                    error.lineno, error.offset,
                                    expat.ErrorString(error.code))
        else:
            self.parser.parse(document)

        self.endDocument()
        root = self._node_stack[0]
        root.stylesheet._input_source = self._input_source

        #Cache for document()
        self._ownerDoc.sourceNodes[self._baseUri] = document
        if root is self._ownerDoc:
            # the top-most stylesheet
            root.stylesheet.setup()

        return root.stylesheet

    def fromSrc(self, isrc, extElements=None):
        """
        Read in a stylesheet from an InputSource
        """
        #FIXME: This is very wasteful in the case that document("") is not called,
        # because we need to cache the original content of the stylesheet for document("")
        # when we read in the stream, then create a new InputSource with the content
        content = isrc.stream.read()
        new_source = isrc.clone(cStringIO.StringIO(content))

        #Set up the ext elements if given
        if extElements is not None:
            self._extElements.update(extElements)

        self._processXIncludes = isrc.processIncludes

        #Do the parsing
        if HAS_PYEXPAT:
            stylesheet =  self._parseSrc(new_source)
        else:
            doc = NonvalidatingReader.parse(new_source)
            stylesheet = self.fromDocument(doc, isrc.uri, new_source.factory)

        #Do the caching
        self._ownerDoc.sources[isrc.uri] = content

        return stylesheet

    def _parseSrc(self, isrc):

        self._input_source = isrc

        self.parser = self.createParser(isrc.uri)
        self.startDocument()

        if HAS_PYEXPAT:
            from xml.parsers import expat
            try:
                success = self.parser.ParseFile(isrc.stream)
            except expat.ExpatError, error:
                raise XsltException(Error.STYLESHEET_PARSE_ERROR, isrc.uri,
                                    error.lineno, error.offset,
                                    expat.ErrorString(error.code))
        else:
            success = self.parser.ParseFile(isrc.stream)

        self.endDocument()
        root = self._node_stack[0]
        root.stylesheet._input_source = self._input_source

        if root is self._ownerDoc:
            # the top-most stylesheet
            root.stylesheet.setup()

        return root.stylesheet


class DummyExpatParser:
    #Methods not defined will never be called
    #e.g. ExternalEntityParserCreate
    def __init__(self, handler, base=None):
        self._base = base
        self._handler = handler
        return

    def GetBase(self):
        return self._base

    def SetBase(self, baseUri):
        self._base = baseUri

    def StartNamespaceDeclHandler(self, prefix, namespace):
        try:
            self._handler.startNamespaceDecl(prefix, namespace)
        except AttributeError:
            pass
        return

    def EndNamespaceDeclHandler(self, prefix):
        try:
            self._handler.endNamespaceDecl(prefix)
        except AttributeError:
            pass
        return

    def StartElementHandler(self, name, attrs):
        try:
            self._handler.startElement(name, attrs)
        except AttributeError:
            pass
        return

    def EndElementHandler(self, name):
        try:
            self._handler.endElement(name)
        except AttributeError:
            pass
        return

    def CharacterDataHandler(self, text):
        try:
            self._handler.characterData(text)
        except AttributeError:
            pass
        return

    def parseFile(self, uri):
        doc = NonvalidatingReader.parseUri(uri)
        parser = DomParser(self)
        parser.parse(doc)
        return


class DomParser:
    """
    A class which converts a DOM tree into the corresponding expat parser
    callbacks. This allows for all stylesheet logic to exist in one class.
    """

    def __init__(self, parser,nsSplit=NS_SPLIT_CHAR):
        self._parser = parser
        self._nsSplitChar = nsSplit

        # mimic expat parser object interface
        self.ErrorLineNumber = '??'
        self.ErrorColumnNumber = '??'
        return

    # mimic expat parser object interfaces
    def GetBase(self):
        return self._parser.GetBase()

    # mimic expat parser object interfaces
    def SetBase(self, baseUri):
        return self._parser.SetBase(baseUri)

    # mimic expat parser object interfaces
    def ExternalEntityParserCreate(self, context):
        return self._parser.ExternalEntityParserCreate(context)

    def parse(self, document):
        if document.documentElement:
            self._current_nss = {}
            self._walk_tree(document.documentElement)
        return

    def _walk_tree(self, node):
        if node.nodeType == Node.ELEMENT_NODE:
            namespaces = {}
            attrs = {}
            # Create expat-style attribute names and trim out namespaces
            for attr in node.attributes.values():
                namespace = attr.namespaceURI
                if namespace == XMLNS_NAMESPACE:
                    # actual namespace declarations
                    if attr.prefix:
                        # xmlns:prefix
                        prefix = attr.localName
                    else:
                        prefix = None

                    namespaces[prefix] = attr.value or None
                else:
                    if namespace:
                        # DOM doesn't need separate namespace declarations
                        namespaces[attr.prefix] = namespace
                        name = namespace + self._nsSplitChar + attr.localName
                    else:
                        name = attr.localName
                    attrs[name] = attr.value

            # Create expat-style element name
            if node.namespaceURI:
                # DOM doesn't need separate namespace declarations
                namespaces[node.prefix] = node.namespaceURI
                name = node.namespaceURI + self._nsSplitChar + node.localName
            else:
                name = node.localName

            # StartNamespaceDeclHandler
            current = self._current_nss.copy()
            for prefix, namespace in namespaces.items():
                if current.get(prefix, -1) == namespace:
                    # remove duplicate definitions
                    del namespaces[prefix]
                else:
                    self._current_nss[prefix] = namespace
                    self._parser.StartNamespaceDeclHandler(prefix, namespace)

            # ElementHandler
            self._parser.StartElementHandler(name, attrs)
            for child in node.childNodes:
                self._walk_tree(child)
            self._parser.EndElementHandler(name)

            # EndNamespaceDeclHandler
            for prefix in namespaces.keys():
                self._parser.EndNamespaceDeclHandler(prefix)

            # restore original namespaces
            self._current_nss = current

        elif node.nodeType in [Node.TEXT_NODE, Node.CDATA_SECTION_NODE]:
            # CDataSection is just another way of specifying text
            self._parser.CharacterDataHandler(node.data)

        # XSLT ignores everything except Text and Element nodes
        return
