#include "expat_module.h"
#include "parse_event_handler.h"
#include "util.h"

/** EXPAT XML_Char to PyUnicode helpers *********************************/

#ifdef XML_UNICODE
/* Expat uses UTF-16 */
static PyObject *Unicode_FromXMLCharAndSize(const XML_Char *str, int size)
{
  int bo = (BYTEORDER == 1234) ? -1 : 1;
  /* sizeof(Py_UNICODE) could be 4, however UTF-16 is 2 bytes wide */
  return PyUnicode_DecodeUTF16((const char *)str, size<<1, NULL, &bo);
}

static PyObject *Unicode_FromXMLChar(const XML_Char *str)
{
  int bo = (BYTEORDER == 1234) ? -1 : 1;
  const XML_Char *p = str;

  while (*p) p++;
  /* sizeof(Py_UNICODE) could be 4, however UTF-16 is 2 bytes wide */
  return PyUnicode_DecodeUTF16((const char *)str, (p - str)<<1, NULL, &bo);
}

static XML_Char *XMLChar_FromUnicode(PyObject *u)
{
  Py_UNICODE *s = PyUnicode_AS_UNICODE(u);
  int size = PyUnicode_GET_SIZE(u);
  XML_Char *result, *p;
#ifdef Py_UNICODE_WIDE /* UCS-4 */
  int i, pairs;
  
  for (i = pairs = 0; i < size; i++) {
    if (s[i] >= 0x10000) pairs++;
  }
  result = p = (XML_Char *) malloc((size + pairs + 1) * sizeof(XML_Char));
  if (result == NULL) return NULL;

  while (size-- > 0) {
    Py_UNICODE ch = *s++;
    if (ch >= 0x10000) {
      *p++ = (0xDC00 | ((ch - 0x10000) & 0x3FF));
      *p++ = (0xD800 | ((ch - 0x10000) >> 10));
    } else {
      *p++ = ch;
    }
  }

#else /* UCS-2 */
  /* Simple one-to-one copy of the Py_UNICODE data */
  result = p = (XML_Char *) malloc((size + 1) * sizeof(XML_Char));
  if (result == NULL) return NULL;

  while (size-- > 0) {
    *p++ = *s++;
  }
#endif

  /* Zero-terminate the XML_Char string for Expat */
  *p++ = 0;
  return result;
}

#else
/* Expat uses UTF-8 */
#define Unicode_FromXMLCharAndSize(s, n) PyUnicode_DecodeUTF8(s, n, NULL)
#define Unicode_FromXMLChar(s) PyUnicode_DecodeUTF8(s, strlen(s), NULL)

static XML_Char *XMLChar_FromUnicode(PyObject *u)
{
  Py_UNICODE *s = PyUnicode_AS_UNICODE(u);
  int size = PyUnicode_GET_SIZE(u);
  XML_Char *result, *p;
  int i;

  /* Overallocate and give the excess back at the end. */
  result = p = (XML_Char *) malloc((size * 4) * sizeof(XML_Char));

  for (i = 0; i < size;) {
    Py_UNICODE ch = s[i++];

    if (ch < 0x80) {
      /* Encode ASCII */
      *p++ = (XML_Char) ch;
    }
    else if (ch < 0x0800) {
      /* Encode Latin-1 */
      *p++ = (XML_Char)(0xC0 | (ch >> 6));
      *p++ = (XML_Char)(0x80 | (ch & 0x3F));
    }
#ifdef Py_UNICODE_WIDE
    else if (ch >= 0x10000) {
      /* Encode UCS-4 Unicode ordinals */
      *p++ = (XML_Char)(0xF0 | (ch >> 18));
      *p++ = (XML_Char)(0x80 | ((ch >> 12) & 0x3F));
      *p++ = (XML_Char)(0x80 | ((ch >> 6) & 0x3F));
      *p++ = (XML_Char)(0x80 | (ch & 0x3F));
    }
#endif
    else {
      /* Encode UCS-2 Unicode ordinals */
      /* Check for high surrogate */
      if (0xD800 <= ch && ch <= 0xDBFF && i != size) {
        Py_UNICODE ch2 = s[i];
        /* Check for low surrogate and combine the two to
           form a UCS4 value */
        if (0xDC00 <= ch2 && ch2 <= 0xDFFF) {
          Py_UCS4 ucs4 = ((ch - 0xD800) << 10 | (ch2 - 0xDC00)) + 0x10000;
          *p++ = (XML_Char)(0xF0 | (ucs4 >> 18));
          *p++ = (XML_Char)(0x80 | ((ucs4 >> 12) & 0x3F));
          *p++ = (XML_Char)(0x80 | ((ucs4 >> 6) & 0x3F));
          *p++ = (XML_Char)(0x80 | (ucs4 & 0x3F));
          i++;
          continue;
        }
        /* Fall through: handles isolated high surrogates */
      }
      *p++ = (XML_Char)(0xE0 | (ch >> 12));
      *p++ = (XML_Char)(0x80 | ((ch >> 6) & 0x3F));
      *p++ = (XML_Char)(0x80 | (ch & 0x3F));
    }
  }

  /* Zero-terminate the XML_Char string for Expat */
  *p++ = 0;

  /* Cut back to the size actually needed. */
  return (XML_Char *) realloc(result, p - result);
}

#endif

static XML_Char *XMLChar_FromObject(PyObject *obj)
{
  XML_Char *result;
  PyObject *unistr = PyUnicode_FromObject(obj);
  if (unistr == NULL) return NULL;

  result = XMLChar_FromUnicode(unistr);
  Py_DECREF(unistr);
  return result;
}

/** EXPAT callbacks *****************************************************/

static int unknownEncodingHandler(void *userData, const XML_Char *name,
                                  XML_Encoding *info);

static void unparsedEntityDeclHandler(void *userData,
                                      const XML_Char *entityName,
                                      const XML_Char *base,
                                      const XML_Char *systemId,
                                      const XML_Char *publicId,
                                      const XML_Char *notationName);

static int entityRef(XML_Parser parser, const XML_Char *context,
                     const XML_Char *base, const XML_Char *systemId,
                     const XML_Char *publicId);

static void comment(void *userData, const XML_Char *data);

static void startElement(void *userData, const XML_Char *name,
                         const XML_Char **atts);
static void endElement(void *userData, const XML_Char *name);

static void startNsScope(void *userData, const XML_Char *prefix,
                         const XML_Char *uri);

static void characterData(void *userData, const XML_Char *s, int len);

static void processingInstruction(void *userData, const XML_Char *target,
                                  const XML_Char *data);

static void startDoctypeDecl(void *userData, const XML_Char *name,
                             const XML_Char *sysid, const XML_Char *pubid,
                             int has_internal_subset);
static void endDoctypeDecl(void *userData);

int initializeStateTableForParseEvents(ParserState *state);


/** Parsing routines **************************************************/

/* free function for PyObjects */
static void pyobject_free(void *ptr) { Py_DECREF((PyObject *)ptr); }

int cleanupParserState(ParserState *s)
{
  void *junk;

  StateTable_Finalize(s);

  _stack_pop(s->node_stack, &junk);

  _stack_pop(s->preserve_state, &junk);
  free(junk);

  _stack_destroy(s->preserve_state);
  _stack_destroy(s->node_stack);
  _stack_destroy(s->xml_base_stack);
  _stack_destroy(s->elem_depth_event_stack);
  list_destroy(s->namespaces);

  free(s->visited_hrefs);
  free(s->preserve_state);
  free(s->node_stack);
  free(s->xml_base_stack);
  free(s->namespaces);
  free(s->elem_depth_event_stack);

  Py_DECREF(s->new_nss);
  Py_XDECREF(s->curr_text);
  free(s);
  return 1;
}


int setupParserState(ParserState **sptr, PyDocumentObject *doc,
                     PyObject *inputSource)
{
  ParserState *state;
  Stack *stack;
  List *list;
  int *new_preserve_state;

  if ((state = (ParserState *)calloc(1, sizeof(ParserState))) == NULL) {
    PyErr_NoMemory();
    return 0;
  }

  /* elem_depth_event_stack */
  if ((stack = (Stack *)malloc(sizeof(Stack))) == NULL) {
    PyErr_NoMemory();
    free(state);
    return 0;
  }
  _stack_init(stack, free);
  state->elem_depth_event_stack = stack;

  /* preserve_state */
  if ((stack = (Stack *)malloc(sizeof(Stack))) == NULL) {
    PyErr_NoMemory();
    _stack_destroy(state->elem_depth_event_stack);
    free(state->elem_depth_event_stack);
    free(state);
    return 0;
  }
  if ((new_preserve_state = (int *)malloc(sizeof(int))) == NULL) {
    PyErr_NoMemory();
    free(stack);
    _stack_destroy(state->elem_depth_event_stack);
    free(state->elem_depth_event_stack);
    free(state);
    return 0;
  }
  *new_preserve_state = 1;
  _stack_init(stack, free);
  _stack_push(stack, ((void *)new_preserve_state));
  state->preserve_state = stack;

  /* node_stack */
  if ((stack = (Stack *)malloc(sizeof(Stack))) == NULL) {
    PyErr_NoMemory();
    _stack_destroy(state->preserve_state);
    free(state->preserve_state);
    _stack_destroy(state->elem_depth_event_stack);
    free(state->elem_depth_event_stack);
    free(state);
    return 0;
  }
  _stack_init(stack, NULL);
  state->node_stack = stack;

  /* new namespaces */
  if ((state->new_nss = PyDict_New()) == NULL) {
    _stack_destroy(state->node_stack);
    free(state->node_stack);
    _stack_destroy(state->preserve_state);
    free(state->preserve_state);
    _stack_destroy(state->elem_depth_event_stack);
    free(state->elem_depth_event_stack);
    free(state);
    return 0;
  }

  /* in-scope namespaces */
  if ((list = (List *)malloc(sizeof(List))) == NULL) {
    PyErr_NoMemory();
    _stack_destroy(state->node_stack);
    free(state->node_stack);
    _stack_destroy(state->preserve_state);
    free(state->preserve_state);
    _stack_destroy(state->elem_depth_event_stack);
    free(state->elem_depth_event_stack);
    free(state);
    return 0;
  }
  list_init(list, free_nsmapping);
  state->namespaces = list;

  /* XML Base stack */
  if ((stack = (Stack *)malloc(sizeof(Stack))) == NULL) {
    PyErr_NoMemory();
    list_destroy(state->namespaces);
    free(state->namespaces);
    _stack_destroy(state->node_stack);
    free(state->node_stack);
    _stack_destroy(state->preserve_state);
    free(state->preserve_state);
    _stack_destroy(state->elem_depth_event_stack);
    free(state->elem_depth_event_stack);
    free(state);
    return 0;
  }
  _stack_init(stack, pyobject_free);
  _stack_push(stack, (void *) PyObject_GetAttrString(inputSource, "uri"));
  state->xml_base_stack = stack;

  state->ownerDoc = doc;

  _stack_push(state->node_stack, doc);

  state->curr_text = NULL;

  state->xinclude_content_depth = 0;
  state->input_source = inputSource;

  if ((list = (List *)malloc(sizeof(List))) == NULL) {
    PyErr_NoMemory();
    _stack_destroy(state->xml_base_stack);
    free(state->xml_base_stack);
    list_destroy(state->namespaces);
    free(state->namespaces);
    _stack_destroy(state->node_stack);
    free(state->node_stack);
    _stack_destroy(state->preserve_state);
    free(state->preserve_state);
    _stack_destroy(state->elem_depth_event_stack);
    free(state->elem_depth_event_stack);
    free(state);
    return 0;
  }
  list_init(list, NULL);
  state->visited_hrefs = list;

  /* set up state table */
  if (StateTable_Initialize(state) == 0) {
    PyErr_NoMemory();
    list_destroy(state->visited_hrefs);
    free(state->visited_hrefs);
    _stack_destroy(state->xml_base_stack);
    free(state->xml_base_stack);
    list_destroy(state->namespaces);
    free(state->namespaces);
    _stack_destroy(state->node_stack);
    free(state->node_stack);
    _stack_destroy(state->preserve_state);
    free(state->preserve_state);
    _stack_destroy(state->elem_depth_event_stack);
    free(state->elem_depth_event_stack);
    free(state);
    return 0;
  }

  if (StateTable_AddState(state, PARSE_STREAM_STATE) == 0) {
    cleanupParserState(state);
    return 0;
  }

  if (!StateTable_AddTransition(state, START_STATE, PARSE_RESUME_EVENT,
                                PARSE_STREAM_STATE)) {
    cleanupParserState(state);
    return 0;
  }

  if (!StateTable_AddTransition(state, PARSE_STREAM_STATE, PARSE_RESUME_EVENT,
                                PARSE_STREAM_STATE)) {
    cleanupParserState(state);
    return 0;
  }

  if (!initializeStateTableForParseEvents(state)) {
    cleanupParserState(state);
    return 0;
  }

  *sptr = state;
  return 1;
}

/*
int setupEeParserState(ParserState **sptr, PyDocumentObject *doc,
                     PyObject *inputSource)
{
  _stack_push(state->node_stack, elem);
  return 1;
}
*/

int initParser(XML_Parser *parser, ParserState *state)
{
  XML_SetUserData(*parser, state);

  /* enable use of all encodings available with Python */
  XML_SetUnknownEncodingHandler(*parser, unknownEncodingHandler, NULL);

  /* for unparsed entity declarations */
  XML_SetUnparsedEntityDeclHandler(*parser, unparsedEntityDeclHandler);

  /* enable prefix information in names (URI + sep + local + sep + prefix) */
  XML_SetReturnNSTriplet(*parser, 1);
  XML_SetStartNamespaceDeclHandler(*parser, startNsScope);

  XML_SetElementHandler(*parser, startElement, endElement);
  XML_SetCharacterDataHandler(*parser, characterData);
  XML_SetProcessingInstructionHandler(*parser, processingInstruction);
  XML_SetCommentHandler(*parser, comment);

  /* Capture doctype events to disable comments and processing instructions
     while reading the DTD,
  */
  XML_SetStartDoctypeDeclHandler(*parser, startDoctypeDecl);
  XML_SetEndDoctypeDeclHandler(*parser, endDoctypeDecl);

  XML_SetExternalEntityRefHandler(*parser, entityRef);

  state->in_dtd = 0;
  return 1;
}


size_t readFromObject(PyObject *object, char *buf, int max)
/* Read max bytes from a Python file-like object into buf */
{
  size_t len;
  PyObject *str;

  if ((str = PyObject_CallMethod(object, "read", "i", max)) == NULL)
    return -1;

  if (!PyString_Check(str)) {
    PyErr_Format(PyExc_TypeError,
		 "read() did not return a string object (type=%.400s)",
		 str->ob_type->tp_name);
    Py_DECREF(str);
    return -1;
  }

  len = PyString_GET_SIZE(str);
  memcpy(buf, PyString_AS_STRING(str), len);
  Py_DECREF(str);

  return len;
}


int doParse(XML_Parser parser, ParserState *state, PyObject *inputSource)
{
  PyObject *encoding, *uri, *stream;
  enum XML_Status status;
  size_t bytes_read;

  /* Set externally defined encoding, if defined */
  encoding = PyObject_GetAttrString(inputSource, "encoding");
  if (encoding == NULL) {
    return 0;
  } else if (encoding == Py_None) {
    Py_DECREF(encoding);
  } else {
    XML_Char *encstr = XMLChar_FromObject(encoding);
    Py_DECREF(encoding);
    if (encstr == NULL) {
      return 0;
    }
    status = XML_SetEncoding(parser, encstr);
    free(encstr);
    if (status != XML_STATUS_OK) {
      return 0;
    }
  }

  /* Set the base URI for the stream */
  uri = PyObject_GetAttrString(inputSource, "uri");
  if (uri == NULL) {
    return 0;
  } else {
    XML_Char *base = XMLChar_FromObject(uri);
    Py_DECREF(uri);
    if (base == NULL) {
      return 0;
    }
    status = XML_SetBase(parser, base);
    free(base);
    if (status != XML_STATUS_OK) {
      return 0;
    }
  }

  /* Get the stream and feed it to Expat */
  stream = PyObject_GetAttrString(inputSource, "stream");
  if (stream == NULL) return 0;

  do {
    void *buffer = XML_GetBuffer(parser, BUFSIZ);
    if (buffer == NULL) {
      PyErr_NoMemory();
      Py_DECREF(stream);
      return 0;
    }

    bytes_read = readFromObject(stream, (char *)buffer, BUFSIZ);
    if (bytes_read < 0) {
      Py_DECREF(stream);
      return 0;
    }

    StateTable_Transit(state, PARSE_RESUME_EVENT);

    if (XML_ParseBuffer(parser, bytes_read, bytes_read == 0) != XML_STATUS_OK
        || state->core.current == ERROR_STATE) {
#ifdef DEBUG_PARSER
      fprintf(stderr, "-- Parsing error ------------ \n");
      fprintf(stderr, "Expat error code: %d\n", XML_GetErrorCode(parser));
      fprintf(stderr, "Current state: %d\n", state->core.current);
#endif
      if (!PyErr_Occurred()) {
        PyObject *uri = PyObject_GetAttrString(inputSource, "uri");
        const char *reason = XML_ErrorString(XML_GetErrorCode(parser));
        ReaderException_XmlParseError(uri,
                                      XML_GetCurrentLineNumber(parser),
                                      XML_GetCurrentColumnNumber(parser),
                                      reason);
      }
      Py_DECREF(stream);
      return 0;
    }
  } while (bytes_read);

  /*All done with the stream*/
  Py_DECREF(stream);

  return 1;
}


PyObject *beginParse(PyObject *inputSource, int readDtd, int parseAsEntity)
{
  XML_Parser parser = XML_ParserCreateNS(NULL, EXPAT_NSSEP);
  XML_Parser parent_parser = NULL;
  ParserState *state;
  unsigned long *docIx;
  PyDocumentObject *doc = NULL;
  PyDocumentFragmentObject *docfrag = NULL;
  PyObject *base, *baseUnicode;
  XML_Char *epe_context = NULL;

  if (parser == NULL) {
    PyErr_NoMemory();
    return NULL;
  }

  if (readDtd) {
    XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
  }

  if ((docIx = (unsigned long *)malloc(sizeof(unsigned long))) == NULL) {
    PyErr_NoMemory();
    XML_ParserFree(parser);
    return NULL;
  }
  *docIx = 0;

  base = PyObject_GetAttrString(inputSource, "uri");
  if (base == NULL) {
    free(docIx);
    XML_ParserFree(parser);
    return NULL;
  }

  baseUnicode = PyUnicode_FromObject(base);
  Py_DECREF(base);
  if (baseUnicode == NULL) {
    free(docIx);
    XML_ParserFree(parser);
    return NULL;
  }

  if ((doc = Document_New(docIx, baseUnicode)) == NULL) {
    Py_DECREF(baseUnicode);
    free(docIx);
    XML_ParserFree(parser);
    return NULL;
  }
  Py_DECREF(baseUnicode);

  if (!setupParserState(&state, doc, inputSource)){
    /* an error occurred */
    Py_DECREF(doc);
    free(docIx);
    cleanupParserState(state);
    XML_ParserFree(parser);
    return NULL;
  }

  state->docIx = docIx;
  StateTable_SetState(state, START_STATE);

  if (!initParser(&parser, state))
  {
    Py_DECREF(doc);
    XML_ParserFree(parser);
    return NULL;
  }

  if (parseAsEntity){
    if ((docfrag = Document_CreateDocumentFragment(doc)) == NULL) {
      Py_DECREF(docfrag);
      free(docIx);
      cleanupParserState(state);
      XML_ParserFree(parser);
      return NULL;
    }
    parent_parser = parser;
    epe_context = (XML_Char *) calloc(1, sizeof(XML_Char));
    parser = XML_ExternalEntityParserCreate(parent_parser, epe_context, NULL);
    _stack_push(state->node_stack, docfrag);
    /*
    if (!setupEeParserState(&state, doc, docfrag, inputSource)){
    */
      /* an error occurred */
    /*
      Py_DECREF(doc);
      free(docIx);
      free(epe_context);
      cleanupParserState(state);
      XML_ParserFree(parser);
      XML_ParserFree(parent_parser);
      return NULL;
    }
    */
  }

  if (doParse(parser, state, inputSource) == 0) {
    Py_DECREF(doc);
    free(docIx);
    cleanupParserState(state);
    XML_ParserFree(parser);
    if (parent_parser) {
      /* implies parseAsEntity*/
      free(epe_context);
      XML_ParserFree(parent_parser);
    }
    return NULL;
  }

  doc = state->ownerDoc;
  free(state->docIx);
  cleanupParserState(state);
  XML_ParserFree(parser);

  if (parseAsEntity){
    free(epe_context);
    return (PyObject *)docfrag;
  }
  else{
    return (PyObject *)doc;
  }
}

/** EXPAT callbacks *****************************************************/

static const unsigned char template_buffer[] = {
  0x00,  0x01,  0x02,  0x03,  0x04,  0x05,  0x06,  0x07,
  0x08,  0x09,  0x0A,  0x0B,  0x0C,  0x0D,  0x0E,  0x0F,
  0x10,  0x11,  0x12,  0x13,  0x14,  0x15,  0x16,  0x17,
  0x18,  0x19,  0x1A,  0x1B,  0x1C,  0x1D,  0x1E,  0x1F,
  0x20,  0x21,  0x22,  0x23,  0x24,  0x25,  0x26,  0x27,
  0x28,  0x29,  0x2A,  0x2B,  0x2C,  0x2D,  0x2E,  0x2F,
  0x30,  0x31,  0x32,  0x33,  0x34,  0x35,  0x36,  0x37,
  0x38,  0x39,  0x3A,  0x3B,  0x3C,  0x3D,  0x3E,  0x3F,
  0x40,  0x41,  0x42,  0x43,  0x44,  0x45,  0x46,  0x47,
  0x48,  0x49,  0x4A,  0x4B,  0x4C,  0x4D,  0x4E,  0x4F,
  0x50,  0x51,  0x52,  0x53,  0x54,  0x55,  0x56,  0x57,
  0x58,  0x59,  0x5A,  0x5B,  0x5C,  0x5D,  0x5E,  0x5F,
  0x60,  0x61,  0x62,  0x63,  0x64,  0x65,  0x66,  0x67,
  0x68,  0x69,  0x6A,  0x6B,  0x6C,  0x6D,  0x6E,  0x6F,
  0x70,  0x71,  0x72,  0x73,  0x74,  0x75,  0x76,  0x77,
  0x78,  0x79,  0x7A,  0x7B,  0x7C,  0x7D,  0x7E,  0x7F,
  0x80,  0x81,  0x82,  0x83,  0x84,  0x85,  0x86,  0x87,
  0x88,  0x89,  0x8A,  0x8B,  0x8C,  0x8D,  0x8E,  0x8F,
  0x90,  0x91,  0x92,  0x93,  0x94,  0x95,  0x96,  0x97,
  0x98,  0x99,  0x9A,  0x9B,  0x9C,  0x9D,  0x9E,  0x9F,
  0xA0,  0xA1,  0xA2,  0xA3,  0xA4,  0xA5,  0xA6,  0xA7,
  0xA8,  0xA9,  0xAA,  0xAB,  0xAC,  0xAD,  0xAE,  0xAF,
  0xB0,  0xB1,  0xB2,  0xB3,  0xB4,  0xB5,  0xB6,  0xB7,
  0xB8,  0xB9,  0xBA,  0xBB,  0xBC,  0xBD,  0xBE,  0xBF,
  0xC0,  0xC1,  0xC2,  0xC3,  0xC4,  0xC5,  0xC6,  0xC7,
  0xC8,  0xC9,  0xCA,  0xCB,  0xCC,  0xCD,  0xCE,  0xCF,
  0xD0,  0xD1,  0xD2,  0xD3,  0xD4,  0xD5,  0xD6,  0xD7,
  0xD8,  0xD9,  0xDA,  0xDB,  0xDC,  0xDD,  0xDE,  0xDF,
  0xE0,  0xE1,  0xE2,  0xE3,  0xE4,  0xE5,  0xE6,  0xE7,
  0xE8,  0xE9,  0xEA,  0xEB,  0xEC,  0xED,  0xEE,  0xEF,
  0xF0,  0xF1,  0xF2,  0xF3,  0xF4,  0xF5,  0xF6,  0xF7,
  0xF8,  0xF9,  0xFA,  0xFB,  0xFC,  0xFD,  0xFE,  0xFF,
  /* terminator */
  0x00
};

static int unknownEncodingHandler(void *userData, const XML_Char *name,
                                  XML_Encoding *info)
{
    PyObject *_u_name, *_s_name, *_u_string;
    int i;

    _u_name = Unicode_FromXMLChar(name);
    if (_u_name == NULL)
      return 0;

    _s_name = PyUnicode_EncodeASCII(PyUnicode_AS_UNICODE(_u_name),
                                    PyUnicode_GET_SIZE(_u_name),
                                    NULL);
    Py_DECREF(_u_name);
    if (_s_name == NULL)
      return 0;

    /* Yes, supports only 8bit encodings */
    _u_string = PyUnicode_Decode(template_buffer, 256,
                                 PyString_AS_STRING(_s_name),
                                 "replace");
    Py_DECREF(_s_name);

    if (_u_string == NULL) {
      if (PyErr_ExceptionMatches(PyExc_LookupError))
        PyErr_Clear();
      return 0;
    }

    for (i = 0; i < 256; i++) {
	Py_UNICODE c = PyUnicode_AS_UNICODE(_u_string)[i];
	if (c == Py_UNICODE_REPLACEMENT_CHARACTER)
	    info->map[i] = -1;
	else
	    info->map[i] = c;
    }
    info->data = NULL;
    info->convert = NULL;
    info->release = NULL;
    Py_DECREF(_u_string);
    return 1;
}

static void unparsedEntityDeclHandler(void *userData,
                                      const XML_Char *entityName,
                                      const XML_Char *base,
                                      const XML_Char *systemId,
                                      const XML_Char *publicId,
                                      const XML_Char *notationName)
{
  ParserState *state = (ParserState *)userData;

  /* only worry about unparsed entities */
  if (1 || notationName) {
    PyObject *_u_base, *_u_systemId;
    PyObject *resolver, *uri, *name;
    int res;

    _u_base = Unicode_FromXMLChar(base);
    if (_u_base == NULL) {
      StateTable_SignalError(state);
      return;
    }

    _u_systemId = Unicode_FromXMLChar(systemId);
    if (_u_systemId == NULL) {
      Py_DECREF(_u_base);
      StateTable_SignalError(state);
      return;
    }

    resolver = PyObject_CallMethod(state->input_source, "getUriResolver",
                                   NULL);
    if (!resolver) {
      Py_DECREF(_u_base);
      Py_DECREF(_u_systemId);
      StateTable_SignalError(state);
      return;
    }

    uri = PyObject_CallMethod(resolver, "normalize", "OO", _u_systemId,
                              _u_base);
    Py_DECREF(_u_base);
    Py_DECREF(_u_systemId);
    Py_DECREF(resolver);
    if (uri == NULL) {
      StateTable_SignalError(state);
      return;
    }

    name = Unicode_FromXMLChar(entityName);
    if (name == NULL) {
      Py_DECREF(uri);
      StateTable_SignalError(state);
      return;
    }

    res = PyDict_SetItem(state->ownerDoc->unparsedEntities, name, uri);
    Py_DECREF(name);
    Py_DECREF(uri);
    if (res == -1) {
      StateTable_SignalError(state);
      return;
    }
  }
}

void characterData(void *userData, const XML_Char *s, int len)
{
  ParserState *state = (ParserState *)userData;

  state->expat_charbuf = Unicode_FromXMLCharAndSize(s, len);

  StateTable_Transit(state, CHARACTER_DATA_EVENT);
  StateTable_Transit(state, PARSE_RESUME_EVENT);

  /*Clear up the state that we set*/
  Py_DECREF(state->expat_charbuf);
}


/*Called for the start of every new element.  Push information onto the state
  And then set the state to START_ELEMENT */
void startElement(void *userData, const XML_Char *name, const XML_Char **atts)
{
  ParserState *state = (ParserState *)userData;
  const XML_Char **ppattr;
  PyObject *curr_att_value, *curr_att_name = NULL;

  state->expat_name = Unicode_FromXMLChar(name);
  if (!state->expat_name) {
    StateTable_SignalError(state);
    return;
  }

  state->expat_atts = PyDict_New();
  if (!state->expat_atts) {
    Py_DECREF(state->expat_name);
    StateTable_SignalError(state);
    return;
  }

  for(ppattr = atts; *ppattr; ppattr += 2) {
    curr_att_name = Unicode_FromXMLChar(ppattr[0]);
    curr_att_value = Unicode_FromXMLChar(ppattr[1]);

    PyDict_SetItem(state->expat_atts, curr_att_name, curr_att_value);

    /* let the dictionary own these */
    Py_DECREF(curr_att_value);
    Py_DECREF(curr_att_name);
  }

  if ((state->curr_elem_depth_event = (int *)calloc(1, sizeof(int))) == NULL) {
    /* Exception: Out of memory */
    return;
  }
  StateTable_Transit(state, START_ELEMENT_EVENT);
  StateTable_Transit(state, PARSE_RESUME_EVENT);

  /*Clear up the state that we set*/
  Py_DECREF(state->expat_name);
  Py_DECREF(state->expat_atts);
}


void endElement(void *userData, const XML_Char *name)
{
  ParserState *state = (ParserState *)userData;

  state->expat_name = Unicode_FromXMLChar(name);
  StateTable_Transit(state, END_ELEMENT_EVENT);
  StateTable_Transit(state, PARSE_RESUME_EVENT);

  /*Clear up the state that we set*/
  Py_DECREF(state->expat_name);
}


void startNsScope(void *userData, const XML_Char *prefix, const XML_Char *uri)
{
  ParserState *state = (ParserState *)userData;

  if (prefix) {
    state->expat_prefix = Unicode_FromXMLChar(prefix);
  } else {
    state->expat_prefix = Py_None;
    Py_INCREF(Py_None);
  }

  if (uri) {
    state->expat_uri = Unicode_FromXMLChar(uri);
  } else {
    /* use empty string, fixes SF#834917 */
    state->expat_uri = PyUnicode_FromUnicode(NULL, 0);
  }

  StateTable_Transit(state, START_NS_SCOPE_EVENT);
  StateTable_Transit(state, PARSE_RESUME_EVENT);

  /*Clear up the state that we set*/
  Py_DECREF(state->expat_prefix);
  Py_DECREF(state->expat_uri);

}

void processingInstruction(void *userData, const XML_Char *target, const XML_Char *data)
{
  ParserState *state = (ParserState *)userData;

  /* ignore processing instructions within the DTD */
  if (!state->in_dtd) {
    state->expat_target = Unicode_FromXMLChar(target);
    state->expat_data = Unicode_FromXMLChar(data);

    StateTable_Transit(state, PI_EVENT);
    StateTable_Transit(state, PARSE_RESUME_EVENT);

    /*Clear up the state that we set*/
    Py_DECREF(state->expat_target);
    Py_DECREF(state->expat_data);
  }
}


void comment(void *userData, const XML_Char *data)
{
  ParserState *state = (ParserState *)userData;

  /* ignore comments within the DTD */
  if (!state->in_dtd) {
    state->expat_data = Unicode_FromXMLChar(data);

    StateTable_Transit(state, COMMENT_EVENT);
    StateTable_Transit(state, PARSE_RESUME_EVENT);

    /*Clear up the state that we set*/
    Py_DECREF(state->expat_data);
  }
}


static void startDoctypeDecl(void *userData, const XML_Char *name,
                             const XML_Char *sysid, const XML_Char *pubid,
                             int has_internal_subset)
{
  ParserState *state = (ParserState *)userData;

  if (sysid) {
    Py_DECREF(state->ownerDoc->systemId);
    state->ownerDoc->systemId = Unicode_FromXMLChar(sysid);
  }

  if (pubid) {
    Py_DECREF(state->ownerDoc->publicId);
    state->ownerDoc->publicId = Unicode_FromXMLChar(pubid);
  }

  state->in_dtd = 1;
}


static void endDoctypeDecl(void *userData)
{
  ParserState *state = (ParserState *)userData;

  state->in_dtd = 0;
}


static int entityRef(XML_Parser parser, const XML_Char *context,
                     const XML_Char *base, const XML_Char *systemId,
                     const XML_Char *publicId)
{
  ParserState *state = (ParserState *)XML_GetUserData(parser);
  XML_Parser new_parser;
  PyObject *sysid, *pubid;
  PyObject *new_input_source, *old_input_source;
  int rc;

  /* Invoke the entity resolver to retrieve a file-like object */
  sysid = Unicode_FromXMLChar(systemId);
  if (publicId) {
    pubid = Unicode_FromXMLChar(publicId);
  } else {
    pubid = Py_None;
    Py_INCREF(Py_None);
  }

  new_input_source = PyObject_CallMethod(state->input_source, "resolve", "NNs",
                                         sysid, pubid, "EXTERNAL ENTITY");
  if (new_input_source == NULL) {
    return 0;
  }

  new_parser = XML_ExternalEntityParserCreate(parser, context, NULL);
  if (new_parser == NULL) {
    PyErr_NoMemory();
    Py_DECREF(new_input_source);
    return 0;
  }

  XML_SetUserData(parser, state);

  old_input_source = state->input_source;
  state->input_source = new_input_source;

#ifdef DEBUG_PARSER
  fprintf(stderr, "Begin external entity parsing.\n");
#endif

  rc = doParse(new_parser, state, new_input_source);

#ifdef DEBUG_PARSER
  if (rc == 0)
    fprintf(stderr, "Entity parsing failed.\n");
  else
    fprintf(stderr, "End external entity parsing.\n");
#endif

  state->input_source = old_input_source;
  Py_DECREF(new_input_source);

  XML_ParserFree(new_parser);

  return rc;
}


