########################################################################
#
# File Name:            TreeCompare.py
#
# Documentation:        http://docs.4suite.org/Lib/TreeCompare.py.html
#
"""
Tools to compare two XML strings.  Mostly for test suites
WWW: http://4suite.org/4DOM         e-mail: support@4suite.org

Copyright (c) 2000-2001 Fourthought Inc, USA.   All Rights Reserved.
See  http://4suite.org/COPYRIGHT  for license and copyright information
"""
import re, string, types
from xml.dom import Node
from xml.dom.ext.reader import HtmlLib
from xml.dom.ext.reader import Sgmlop

# HtmlLib/Sgmlop is broken in PyXML 0.6.5 (fixed in CVS)
# handle_special should simply ignore DOCTYPE directives
Sgmlop.HtmlParser.handle_special = lambda *args: None

# handle_proc should ignore PIs as well.
Sgmlop.HtmlParser.handle_proc = lambda *args: None


# Setup the XML reader
from Ft.Lib import pDomlette
try:
    from Ft.Lib import cDomlette
    XmlReader = cDomlette.RawExpatReader
except:
    XmlReader = pDomlette.PyExpatReader

g_htmlTest = re.compile("(<!doctype html)|(<html)",re.IGNORECASE)
g_xmlTest = re.compile(r'<\?xml')

# Create a text comparision func
# cDomlette uses UTF8 buffers, while pDomlette can be Unicode
try:
    unicode('')
    def utf8_compare(s1, s2):
        if type(s1) != type(s2):
            if type(s1) == types.UnicodeType:
                s1 = s1.encode('utf8')
            if type(s2) == types.UnicodeType:
                s2 = s2.encode('utf8')
        return cmp(s1, s2)
except:
    utf8_compare = cmp

def TreeCompare(xml1, xml2, ignoreWhitespace=0):
    #See if we need to use XML or HTML
    if g_htmlTest.search(xml1) and not g_xmlTest.match(xml1):
        reader1 = reader2 = HtmlLib.Reader()
        ignoreWhitespace = html = 1
    else:
        reader1 = pDomlette.PyExpatReader()
        reader2 = XmlReader()
        html = 0

    try:
        doc1 = reader1.fromString(xml1)
    except:
        print '--- Expected ---'
        print xml1
        raise

    try:
        doc2 = reader2.fromString(xml2)
    except:
        reader1.releaseNode(doc1)
        print '--- Actual ---'
        print xml2
        raise

    result = NodeCompare(doc1, doc2, ignoreWhitespace, html)
    reader1.releaseNode(doc1)
    reader2.releaseNode(doc2)
    return not result


def NodeCompare(node1, node2, ignoreWhitespace=0, isHtml=0):
    if node1.nodeType != node2.nodeType:
        return ReportError(node1, node2, 'nodeType')
    if node1.nodeType == Node.DOCUMENT_NODE:
        return NodeCompare(node1.documentElement, node2.documentElement,
                           ignoreWhitespace, isHtml)
    elif node1.nodeType == Node.ELEMENT_NODE:
        if isHtml:
            # HTML DOM should already capitalize all tagNames
            #if string.lower(node1.tagName) != string.lower(node2.tagName):
            if node1.tagName != node2.tagName:
                return ReportError(node1, node2, 'tagName')
            # Elements where whitespace is significant
            if node1.tagName in ['SCRIPT', 'STYLE', 'PRE', 'TEXTAREA']:
                ignoreWhitespace = 0
        else:
            if node1.localName != node2.localName:
                return ReportError(node1, node2, 'localName')
            if node1.namespaceURI != node2.namespaceURI:
                return ReportError(node1, node2, 'namespaceURI')
        attrs1 = node1.attributes.values()
        attrs2 = node2.attributes.values()
        attrs1.sort(lambda a, b: cmp(a.name, b.name))
        attrs2.sort(lambda a, b: cmp(a.name, b.name))
        if len(attrs1) != len(attrs2):
            return ReportError(node1, node2, 'attributes')
        for attr1, attr2 in map(None, attrs1, attrs2):
            if isHtml:
                # HTML DOMs should force upper case already
                #if string.lower(attr1.name) != string.lower(attr2.name):
                if attr1.name != attr2.name:
                    return ReportError(attr1, attr2, 'name')
            else:
                if attr1.localName != attr2.localName:
                    print node1.attributes.keys()
                    print node2.attributes.keys()
                    return ReportError(attr1, attr2, 'localName')
                if attr1.namespaceURI != attr2.namespaceURI:
                    return ReportError(attr1, attr2, 'namespaceURI')
        if len(node1.childNodes) != len(node2.childNodes):
            return ReportError(node1, node2, 'childNodes')
        for child1, child2 in map(None, node1.childNodes, node2.childNodes):
            if not NodeCompare(child1, child2, ignoreWhitespace, isHtml):
                return 0
    elif node1.nodeType == Node.TEXT_NODE:
        text1 = node1.data
        text2 = node2.data
        if ignoreWhitespace or isHtml:
            if not string.strip(text1):
                text1 = None
            if not string.strip(text2):
                text2 = None
        if utf8_compare(text1, text2):
            return ReportError(node1, node2, 'data')
    # All tests pass, they are the same
    return 1
        

def ReportError(node1, node2, attribute):
    print '--- Node 1 ---'
    PrintParentage(node1)
    print attribute, repr(getattr(node1, attribute)), node1
    print '--- Node 2 ---'
    PrintParentage(node2)
    print attribute, repr(getattr(node2, attribute)), node2
    return 0

def PrintParentage(node):
    nodes = [node]
    if node.nodeType == Node.ATTRIBUTE_NODE:
        parent = node.ownerElement
    else:
        parent = node.parentNode
    while parent:
        nodes.insert(0, parent)
        parent = parent.parentNode
    indent = ''
    for node in nodes:
        print '%s%s' % (indent, node.nodeName)
        indent = indent + '  '
