########################################################################
# $Header: /var/local/cvsroot/4Suite/Ft/Xml/__init__.py,v 1.31 2004/09/22 19:49:35 mbrown Exp $
"""
Things commonly needed by many modules in Ft.Xml

Copyright 2002 Fourthought, Inc. (USA).
Detailed license and copyright information: http://4suite.org/COPYRIGHT
Project home, documentation, distributions: http://4suite.org/
"""

EMPTY_NAMESPACE = None
EMPTY_PREFIX = None
XML_NAMESPACE = u"http://www.w3.org/XML/1998/namespace"
XMLNS_NAMESPACE = u"http://www.w3.org/2000/xmlns/"
XHTML_NAMESPACE = u"http://www.w3.org/1999/xhtml"

# Defined here to ensure that cDomlette and FtMiniDom readers operate
# the same.
READ_EXTERNAL_DTD = 1

from Ft import FtException, __version__

class ReaderException(FtException):
    XML_PARSE_ERROR = 100
    XML_SAX_PARSE_ERROR = 101
    XMLNS_UNKNOWN_PREFIX = 200
    INVALID_XMLNS = 201
    XMLNS_DUPLICATE_ATTRS = 202

    def __init__(self, errorCode, *args):
        FtException.__init__(self, errorCode, MessageSource.READER, args)
        return

class XIncludeException(FtException):
    XINCLUDE_MISSING_HREF = 10
    CIRCULAR_INCLUDE_ERROR = 20

    def __init__(self, errorCode, *args):
        FtException.__init__(self, errorCode, MessageSource.XINCLUDE, args)
        return


import MessageSource


def PySplitQName(qname):
    """
    Pure python implementation of SplitQName(); only used if the C
    version, which is about 3x faster, is not available. Given a
    QName as defined in the Namespaces in XML spec
    <http://www.w3.org/TR/REC-xml-names>, returns a tuple
    consisting of the prefix and local part.
    """
    index = qname.rfind(':')
    if index == -1:
        split_name = (None, qname)
    else:
        prefix = qname[:index]
        local = qname[index+1:]
        split_name = (prefix, local)
    return split_name

try:
    import SplitQNamec
    SplitQName = SplitQNamec.SplitQName
except ImportError:
    #Do it this way so we can test both!!!
    SplitQName = PySplitQName


#Wrap this so that we can import it later
def ApplyXUpdate(*args, **kw_args):
    import Ft.Xml.XUpdate
    return apply(XUpdate.ApplyXUpdate, args, kw_args)

#Good ol' backward compatibility for creative spellings
def ApplyXupdate(*args, **kw_args):
    import Ft.Xml.XUpdate
    return apply(XUpdate.ApplyXupdate, args, kw_args)


from distutils import version
pyxml_required = version.StrictVersion('0.8.0')
def CheckVersion(feature=None):
    """
    PyXML is required by some features of 4Suite.
    This is a common function to test whether a correct version of
    PyXML is installed. It raises a SystemExit if the test result
    is negative, and returns None otherwise.

    The feature argument is a string indicating which feature in
    4Suite requires PyXML.
    """
    try:
        import _xmlplus
        xml_version = version.StrictVersion(_xmlplus.__version__)
    except:
        xml_version = version.StrictVersion('0.0.0')

    if xml_version < pyxml_required:
        import sys
        if feature:
            feature_string = "%s in " % feature
        else:
            feature_string = "this feature in "
        print """
        PyXML v%s is required for %s4Suite.
        It is available at http://sourceforge.net/projects/pyxml.
        """ % (str(pyxml_required),feature_string)
        sys.exit(1)

HAS_PYEXPAT = 1
try:
    from xml.parsers import expat
    expat.ParserCreate
    del expat
except (ImportError, AttributeError):
    HAS_PYEXPAT = 0

def VerifyPyexpat(dtdSupport=0, featureName=None):
    """
    Both expat (the XML parser written in C) and its wrapper pyexpat
    are required for some features of 4Suite. This is a common
    function to test whether pyexpat and expat are installed and
    usable. Unlike the HAS_PYEXPAT constant, this function indicates
    that the test failed via a SystemExit. Success is indicated by
    returning 1.

    The dtdSupport flag, if set, causes the test to make sure that the
    installed version of expat supports DTDs (expat 1.95.0 and up).

    The featureName argument is used in the SystemExit message to
    indicate which 4Suite feature needed pyexpat and expat.
    """
    if not featureName:
       featureName = 'this 4Suite feature'
    if HAS_PYEXPAT:
        from xml.parsers import expat
        if dtdSupport and not hasattr(expat, 'model'):
            raise SystemExit("The expat library used by pyexpat must "
                             "be at least version 1.95.0 in order to "
                             "use '%s'. You must either install PyXML "
                             "v%s, or install an updated expat "
                             "library and then recompile python." %
                             (featureName, pyxml_required))
        else:
            return HAS_PYEXPAT
    else:
        raise SystemExit("You must either install PyXML v%s, or "
                         "recompile your python with pyexpat "
                         "support in order to use '%s'" %
                         (pyxml_required, featureName))


##4XML string processing design considerations

##1) 4XSLT and 4XPath (indeed 4XPointer, 4XLink and 4RDF as well) will use
##unicode objects for all internal string processing

##* Note that this principle isn't yet all in place.  For instance,
##Jeremy's parsers, 4RDF and some other parts will need extensive work to
##conform to this.  Most of my effort has been in normalizing the
##boundaries between code that is compliant with and non-compliant with
##this principle.

##2) The public programmer's API will deal only with Unicode objects and
##ASCII-encoded strings

##* I actually would like to accept only Unicode objects to avoid any
##English bias, but Jeremy argues for accepting ASCII strings
##
##Also note the following from Fredrik Lundh:
##"Python's Unicode system allows you to mix Unicode strings with
##standard strings, as long as the latter contain only ASCII characters.
##Good practice is to make sure your code is as tolerant as Python.
##
##(or to put it another way, write code that does the right thing if
##an operation returns a Unicode string instead of the corresponding
##ASCII string, and likewise, if a function that usually returns a
##Unicode string returns an ordinary string instead).
##
##Standard strings containing non-ASCII data is a different thing;
##they're encoded, and should be seen as binary buffers."

##3) Input from file, URI, etc. will accept properly encoded character
##buffers

##* Note: this is implemented by the codec's streamreader

##4) Output APIs to Python strings will output Unicode objects

##5) Output APIs to file objects will put out character buffers with the
##appropriate encoding based on the processing (e.g. xsl:output element)

##* Note: this is implemented by the codec's streamwriter

##6) In XML and HTML output, named character entities will be preferred
##for output, followed by the natural repr from the encoding, with a
##fall-back to numerical character entities
