| <?php
/*
 * To-do list for this library:
 * - Add a feature to prevent XML explosion
 * - Remember comments and line feeds inside comments count
 */
namespace Soothsilver\DtdParser;
/**
 * Represents all information extracted from a Document Type Declaration file, possibly combined with an internal subset.
 */
class DTD
{
    /**
     * @var Element[]
     */
    public $elements = [];
    /**
     * @var ParameterEntity[]
     */
    public $parameterEntities = [];
    /**
     * @var GeneralEntity[]
     */
    public $generalEntities = [];
    /**
     * @var Notation[]
     */
    public $notations = [];
    /**
     * @var Error[]
     */
    public $errors = [];
    /**
     * @var Error[]
     */
    public $warnings = [];
    /**
     * @var ProcessingInstruction[]
     */
    public $processingInstructions = [];
    /**
     * Returns a boolean representing the well-formedness and validity of the DTD.
     * @return bool True, if no errors were triggered during parsing; false otherwise.
     */
    public function isWellFormedAndValid()
    {
        return count($this->errors) === 0;
    }
    private $shouldLoadExternalEntities = false;
    private $currentOffset = 0;
    private $line = 1;
    private $xmlRegexes;
    private function addWarning($message, $line)
    {
        $this->warnings[] = new Error($message . " (line " . $line . ")");
    }
    private function addFatalError($message, $line)
    {
        $this->errors[] = new Error($message . " (line " . $line . ")");
    }
    private function isNameValid($name)
    {
        return preg_match("#" . $this->xmlRegexes->Name . "#u", $name) === 1;
    }
    private function isNmTokenValid($nmToken)
    {
        return preg_match("#" . $this->xmlRegexes->NmToken . "#u", $nmToken) === 1;
    }
    private function findNonspace($text, $startAt, $length)
    {
        $index = $startAt;
        while ($index < $length)
        {
            $mbCharacter = substr($text, $index, 1);
            if ($mbCharacter === ' ' || $mbCharacter === "\t")
            {
                $index++;
            }
            else if ($mbCharacter === "\n")
            {
                $this->line++;
                $index++;
            }
            else
            {
                return $index;
            }
        }
        return false;
    }
    /**
     * The string given is split by whitespace into individual words, with the following exceptions:
     * 1. A quote (") open a quoted string which is put into a single token even if it includes whitespaces or apostrophes. This token is ended by the next quote (").
     * 2. The same goes for apostrophe (') except that apostrophe ends the token and quotes inside are not recognized.
     * In both of the cases above, the quotes or apostrophes are put into a single, separate tokens.
     * 3. An opening parenthesis ('(') forces a different mode where tokens are separated by whitespace and the '|' character as in the enumeration or notation attribute type definition in DTD. If two words inside are separated only by whitespace but not by '|', the tokenization fails.
     * Some other caveats apply. Sorry for not detailing them here.
     * @param string $string The string to split into tokens.
     * @param string $tokenizationErrorMessage Out-parameter. If tokenization fails, this is filled with the reason.
     * @return string[]|bool An array of string tokens if tokenization is successful; false otherwise.
     */
    private function tokenize($string, &$tokenizationErrorMessage)
    {
        $length = strlen($string);
        $tokens = [];
        $outerQuote = false;
        $constructingWord = "";
        $afterWhitespace = false;
        $prohibitNonTerminalInsideParentheses = false;
        for ($i = 0; $i < $length; $i++)
        {
            $char = $string[$i];
            switch($char)
            {
                case "\t":
                case "\n":
                case " ":
                    if ($constructingWord !== "" && $outerQuote === false)
                    {
                        $tokens[] = $constructingWord;
                        $constructingWord = "";
                    }
                    else if ($outerQuote !== false)
                    {
                        if ($outerQuote === "(")
                        {
                            if ($constructingWord !== "")
                            {
                                if ($prohibitNonTerminalInsideParentheses)
                                {
                                    // Inside an enum, this was done: "( A B | C)" which is prohibited
                                    $tokenizationErrorMessage = "Inside an enumeration, values must be separated by the '|' character, not by whitespace.";
                                    return false;
                                }
                                $tokens[] = $constructingWord;
                                $constructingWord = "";
                                $prohibitNonTerminalInsideParentheses = true;
                            }
                        }
                        else
                        {
                            $constructingWord .= $char;
                        }
                    }
                    $afterWhitespace = true;
                    break;
                case "|":
                    $afterWhitespace = false;
                    if ($outerQuote === "(")
                    {
                        if ($constructingWord !== "")
                        {
                            if ($prohibitNonTerminalInsideParentheses)
                            {
                                // Inside an enum, this was done: "( A B | C)" which is prohibited
                                $tokenizationErrorMessage = "Inside an enumeration, values must be separated by the '|' character, not by whitespace.";
                                return false;
                            }
                            $tokens[] = $constructingWord;
                            $constructingWord = "";
                        }
                        $tokens[] = "|";
                        $prohibitNonTerminalInsideParentheses = false;
                    }
                    else
                    {
                        $constructingWord .= "|";
                    }
                    break;
                case "(":
                    $afterWhitespace = false;
                    if ($outerQuote === false)
                    {
                        $tokens[] = "(";
                        $outerQuote = "(";
                        $prohibitNonTerminalInsideParentheses = false;
                    }
                    else
                    {
                        $constructingWord .= "(";
                    }
                    break;
                case ")":
                    $afterWhitespace = false;
                    if ($outerQuote === false)
                    {
                        // This character should not be anywhere on its own.
                        $tokenizationErrorMessage = "The ')' character is illegal here.";
                        return false;
                    }
                    else if ($outerQuote === '(')
                    {
                        if ($constructingWord !== "")
                        {
                            $tokens[] = $constructingWord;
                            $constructingWord = "";
                        }
                        $tokens[] = ")";
                        $outerQuote = false;
                    }
                    else
                    {
                        $constructingWord .= ")";
                    }
                    break;
                case "'":
                case '"':
                    if ($outerQuote === false && $afterWhitespace === true)
                    {
                        $tokens[] = $char;
                        $outerQuote = $char;
                    }
                    else if ($outerQuote !== false)
                    {
                        if ($outerQuote === $char)
                        {
                             $tokens[] = $constructingWord;
                             $tokens[] = $char;
                             $constructingWord = "";
                             $outerQuote = false;
                        }
                        else
                        {
                            $constructingWord .= $char;
                        }
                    }
                    else
                    {
                        $tokenizationErrorMessage = "Quotes must only appear after whitespace in this context.";
                        return false;
                    }
                    $afterWhitespace = false;
                    break;
                default:
                    $constructingWord .= $char;
                    $afterWhitespace = false;
                    break;
            }
        }
        if ($constructingWord !== "")
        {
            $tokens[] = $constructingWord;
        }
        return $tokens;
    }
    private function startsWith($haystack, $needle)
    {
        $length = strlen($needle);
        return (substr($haystack, 0, $length) === $needle);
    }
    private function evaluatePEReferencesIn($text, $peStyle)
    {
        $matches = [];
        while (preg_match('#(("[^"]*")|(\'[^\']*\')|[^\'"])*%([^;]*);#', $text, $matches, PREG_OFFSET_CAPTURE) === 1)
        {
            $entityBeginsAt = $matches[4][1] - 1;
            $entityEndsBefore = $matches[4][1] + strlen($matches[4][0])+1;
            $entityName = $matches[4][0];
            if (array_key_exists($entityName, $this->parameterEntities))
            {
                $replacementText = $this->parameterEntities[$entityName]->replacementText;
                switch($peStyle)
                {
                    case Internal\PEStyle::IgnoreQuotedText:
                    case Internal\PEStyle::MatchingParentheses: // TODO matching parentheses do not work
                         // The two spaces are mandated by specification to disallow funny stuff
                         $text = substr($text, 0, $entityBeginsAt) . " " . $replacementText . " " . substr($text, $entityEndsBefore);
                        break;
                    case Internal\PEStyle::InEntityDeclaration:
                        // Included in literal.
                        $text = substr($text, 0, $entityBeginsAt) . $replacementText . substr($text, $entityEndsBefore);
                        break;
                    default:
                        trigger_error("Bad peStyle argument.", E_ERROR);
                        break;
                }
            }
            else
            {
                $this->addFatalError("Parameter entity '" . $entityName . "' is used, but not defined.", $this->line);
                return $text;
            }
        }
        return $text;
    }
    private function parseGlobalPEReference($referenceText)
    {
        $this->addFatalError("The parameter entity '" . $referenceText . "' is not yet declared.", $this->line);
    }
    private function parseQuotedString($tokens, $index)
    {
        if ($index + 2 >= count($tokens))
        {
            $this->addFatalError("End of declaration reached while trying to parse a quoted string.", $this->line);
            return false;
        }
        $firstQuote = $tokens[$index];
        $middle = $tokens[$index+1];
        $lastQuote = $tokens[$index+2];
        if ($firstQuote !== "'" && $firstQuote !== '"')
        {
            $this->addFatalError("A quotation mark or apostrophe was expected but '" . $firstQuote . "' is present instead.", $this->line);
        }
        if ($firstQuote !== $lastQuote)
        {
            $this->addFatalError("Quotes must match at the ends of each quoted string.", $this->line);
            return false;
        }
        return $middle;
    }
    private function parseExternalIdentifier($tokens, $index)
    {
        $identifier = $this->parseQuotedString($tokens, $index);
        return $identifier;
    }
    private function parseElement($declaration)
    {
        $declaration = $this->evaluatePEReferencesIn($declaration, Internal\PEStyle::MatchingParentheses);
        $tokens = array_values(array_filter(preg_split("/\s+/", $declaration)));
        if (count($tokens) === 0)
        {
            $this->addFatalError("An <!ELEMENT> declaration must have a type name.", $this->line);
            return;
        }
        $name = $tokens[0];
        if (!$this->isNameValid($name))
        {
            $this->addFatalError("'{$name}' is not a valid element name.'", $this->line);
        }
        $contentspec = false;
        $isMixed = false;
        if (count($tokens) === 1)
        {
            $this->addFatalError("'{$name}' does not have content type specified.", $this->line);
        }
        else if (count($tokens) === 2)
        {
            if ($tokens[1] === "ANY") { $contentspec = "ANY"; }
            else if ($tokens[1] === "EMPTY") { $contentspec = "EMPTY"; }
        }
        if ($contentspec === false)
        {
            array_shift($tokens);
            $contentspec = implode("", $tokens);
            $contentspec = str_replace(" ", "", $contentspec);
            $contentspec = str_replace("\t", "", $contentspec);
            $contentspec = str_replace("\n", "", $contentspec);
            $isMixed = $this->startsWith($contentspec, "(#PCDATA");
            // TODO verify legality of children regex
        }
        if (array_key_exists($name, $this->elements))
        {
            if ($this->elements[$name]->contentSpecification === Element::CONTENT_SPECIFICATION_NOT_GIVEN)
            {
                $this->elements[$name]->contentSpecification = $contentspec;
                $this->elements[$name]->mixed = $isMixed;
            }
            else
            {
                $this->addFatalError("This element ('{$name}') was already declared.", $this->line);
            }
            return;
        }
        else
        {
            $this->elements[$name] = new Element($name, $contentspec, $isMixed);
        }
    }
    private function parseAttlist($markupDeclaration)
    {
        $markupDeclaration = $this->evaluatePEReferencesIn($markupDeclaration, Internal\PEStyle::IgnoreQuotedText);
        $tokens = $this->tokenize($markupDeclaration, $tokenizationError);
        if ($tokens === false)
        {
            $this->addFatalError("ATTLIST declaration could not be tokenized: " . $tokenizationError, $this->line);
            return;
        }
        if (count($tokens) === 0)
        {
            $this->addFatalError("An <!ATTLIST> declaration must have a type name.", $this->line);
            return;
        }
        $elementType = $tokens[0];
        if (!$this->isNameValid($elementType))
        {
            $this->addFatalError("'{$elementType}' is not a valid element name.'", $this->line);
        }
        $tokenId = 1;
        $attributeName = false;
        $attributeType = false;
        $attributeEnumeration = [];
        $attributeDefaultValue = false;
        $attributeDefaultType = false;
        $state = Internal\AttlistMode::NeedName;
        while ($tokenId < count($tokens))
        {
            $token = $tokens[$tokenId];
            if ($state === Internal\AttlistMode::NeedName)
            {
                if (!$this->isNameValid($token)) { $this->addFatalError("'{$token}' is not a valid attribute name.'", $this->line); }
                $attributeName = $token;
                $state = Internal\AttlistMode::NeedAttType;
            }
            else if ($state === Internal\AttlistMode::NeedAttType)
            {
                $state = Internal\AttlistMode::NeedDefaultDecl;
                switch($token)
                {
                    case "CDATA":
                    case "ID":
                    case "IDREF":
                    case "IDREFS":
                    case "ENTITY":
                    case "ENTITIES":
                    case "NMTOKEN":
                    case "NMTOKENS":
                        $attributeType = $token;
                        break;
                    case "(":
                        $attributeType = Attribute::ATTTYPE_ENUMERATION;
                        $state = Internal\AttlistMode::InsideEnumeration_NeedValue;
                        break;
                    case "NOTATION":
                        $attributeType = Attribute::ATTTYPE_NOTATION; // TODO validity checks
                        $state = Internal\AttlistMode::AfterNOTATION;
                        break;
                    default:
                        $this->addFatalError("The attribute '" . $attributeName . "' has a declared type that does not exist.", $this->line);
                        break;
                }
            }
            else if ($state === Internal\AttlistMode::InsideEnumeration_NeedValue)
            {
                if (!$this->isNmTokenValid($token))
                {
                    $this->addFatalError("An enumerated type must only have NMTOKENs as possible values.", $this->line);
                    return;
                }
                $attributeEnumeration[] = $token;
                $state = Internal\AttlistMode::InsideEnumeration_NeedSeparator;
            }
            else if ($state === Internal\AttlistMode::InsideEnumeration_NeedSeparator)
            {
                if ($token === "|")
                {
                    $state = Internal\AttlistMode::InsideEnumeration_NeedValue;
                }
                else if ($token === ")")
                {
                    $state = Internal\AttlistMode::NeedDefaultDecl;
                }
                else
                {
                    $this->addFatalError("In the attribute '{$attributeName}' enumeration, the token '|' or ')' was expected.", $this->line);
                }
            }
            else if ($state === Internal\AttlistMode::AfterNOTATION)
            {
                if ($token === "(")
                {
                    $state = Internal\AttlistMode::InsideEnumeration_NeedValue;
                }
                else
                {
                    $this->addFatalError("The attribute '" . $attributeName . "' is declared NOTATION but misses a notations enumeration.", $this->line);
                }
            }
            else if ($state === Internal\AttlistMode::NeedDefaultDecl)
            {
                switch($token)
                {
                    case "#REQUIRED":
                    case "#IMPLIED":
                        $attributeDefaultValue = "";
                        $attributeDefaultType = $token;
                        break;
                    case "#FIXED":
                        $attributeDefaultType = "#FIXED";
                        if ($tokenId + 3 < count($tokens))
                        {
                            if  (($tokens[$tokenId+1] === "'" && $tokens[$tokenId+3] === "'") ||
                                 ($tokens[$tokenId+1] === '"' && $tokens[$tokenId+3] === '"'))
                            {
                                // Parameter entities should not be expanded here.
                                $attributeDefaultValue = $tokens[$tokenId+2];
                            }
                            else
                            {
                                $this->addFatalError("The attribute '" . $attributeName . "' has an #FIXED declaration.", $this->line);
                            }
                            $tokenId+=3;
                        }
                        else
                        {
                            $this->addFatalError("The attribute '" . $attributeName . "' has a #FIXED declaration, but its default value is not provided.", $this->line);
                        }
                        break;
                    case "'":
                    case '"':
                        $attributeDefaultType = Attribute::DEFAULT_IMPLICIT_DEFAULT;
                        if ($tokenId + 2 < count($tokens))
                        {
                            if  ($tokens[$tokenId+2] === $token)
                            {
                                // Parameter entities should not be expanded here.
                                $attributeDefaultValue = $tokens[$tokenId+1];
                            }
                            else
                            {
                                $this->addFatalError("The attribute '" . $attributeName . "' starts quoting a default value, but does not finish this quotation.", $this->line);
                            }
                            $tokenId += 2;
                        }
                        else
                        {
                            $this->addFatalError("The attribute '" . $attributeName . "' starts a default value declaration, but does not finish it.", $this->line);
                        }
                        break;
                    default:
                        $this->addFatalError("The attribute '" . $attributeName . "' has an invalid DefaultDeclaration.", $this->line);
                        break;
                }
                $attributeCreated = new Attribute($attributeName, $attributeType, $attributeDefaultType, $attributeDefaultValue, $attributeEnumeration);
                if (!array_key_exists($elementType, $this->elements))
                {
                    $this->elements[$elementType] = new Element($elementType, Element::CONTENT_SPECIFICATION_NOT_GIVEN, false);
                }
                if (array_key_exists($attributeName, $this->elements[$elementType]->attributes))
                {
                    // At user option, for interopability, the XML processor may issue a warning.
                    // This processor chooses not to issue it. At any rate, we must keep the previous definition.
                }
                else
                {
                    $this->elements[$elementType]->attributes[$attributeName] = $attributeCreated;
                }
                $attributeName = false;
                $attributeDefaultType = false;
                $attributeDefaultValue = false;
                $attributeEnumeration = false;
                $attributeType = false;
                $state = Internal\AttlistMode::NeedName;
            }
            $tokenId++;
        }
        if ($attributeName !== false)
        {
            $this->addFatalError("An attribute definition inside the ATTLIST was not completed.", $this->line);
        }
    }
    private function parseNotation($markupDeclaration)
    {
        $markupDeclaration = $this->evaluatePEReferencesIn($markupDeclaration, Internal\PEStyle::IgnoreQuotedText);
        $tokens = $this->tokenize($markupDeclaration, $tokenizationError);
        if ($tokens === false)
        {
            $this->addFatalError("Notation declaration could not be tokenized: " . $tokenizationError, $this->line);
            return;
        }
        if (count($tokens) === 5 || count($tokens) === 8)
        {
            $error = false;
            $name = $tokens[0];
            if (!$this->isNameValid($name))
            {
                $this->addFatalError("'" . $name . "' is not a valid NOTATION name.", $this->line);
                return;
            }
            $externalIDType = $tokens[1];
            $systemId = "";
            $publicId = "";
            if ($tokens[2] !== $tokens[4]) { $error = true; }
            if ($tokens[2] !== "'" && $tokens[2] !== '"') { $error = true; }
            if ($externalIDType !== "PUBLIC" && $externalIDType !== "SYSTEM")
            {
                $this->addFatalError("Notations must be either PUBLIC or SYSTEM.", $this->line);
                return;
            }
            if ($externalIDType === "SYSTEM")
            {
                $systemId = $tokens[3];
            }
            if ($externalIDType === "PUBLIC")
            {
                $publicId = $tokens[3];
            }
            if (count($tokens) === 8)
            {
                if ($tokens[5] !== $tokens[7]) { $error = true; }
                if ($tokens[5] !== "'" && $tokens[5] !== '"') { $error = true; }
                $systemId = $tokens[6];
                if ($externalIDType !== "PUBLIC")
                {
                    $this->addFatalError("A public identifier was provided even thought the notation is not declared PUBLIC.", $this->line);
                    return;
                }
            }
            if ($error)
            {
                $this->addFatalError("External ID's in '" . $markupDeclaration . "' are not properly quoted.", $this->line);
                return;
            }
            $notation = new Notation($name, $systemId, $publicId);
            if (array_key_exists($name, $this->notations))
            {
                $this->addFatalError("Notation '" . $name . "' is already declared.", $this->line);
                return;
            }
            $this->notations[$name] = $notation;
        }
        else
        {
            $this->addFatalError("'" . $markupDeclaration . "' is not a well-formed NOTATION declaration.", $this->line);
        }
    }
    private function parseEntityDeclaration($markupDeclaration)
    {
        $tokenizationError = "";
        $markupDeclaration = $this->evaluatePEReferencesIn($markupDeclaration, Internal\PEStyle::IgnoreQuotedText);
        $tokens = $this->tokenize($markupDeclaration, $tokenizationError);
        if ($tokens === false)
        {
            $this->addFatalError("Entity declaration could not be tokenized: " . $tokenizationError, $this->line);
            return;
        }
        if (count($tokens) < 4)
        {
            $this->addFatalError("'" . $markupDeclaration . "' is not a well-formed ENTITY declaration.", $this->line);
            return;
        }
        $tokenId = 0;
        $isParametric = false;
        $isExternal = false;
        $publicIdentifier = false;
        $systemIdentifier = false;
        $notation = false;
        if ($tokens[$tokenId] === "%")
        {
            $isParametric = true;
            $tokenId++;
        }
        $name = $tokens[$tokenId];
        $tokenId++;
        if (!$this->isNameValid($name))
        {
            $this->addFatalError("'" . $name . "' is not a valid ENTITY name.", $this->line);
            return;
        }
        if ($tokens[$tokenId] === "SYSTEM" || $tokens[$tokenId] === "PUBLIC")
        {
            if ($tokens[$tokenId] === "SYSTEM")
            {
                if ($tokenId + 3 <= count($tokens) - 1)
                {
                    if ($tokens[$tokenId + 1] === $tokens[$tokenId + 3])
                    {
                        if ($tokens[$tokenId + 1 ] === "'" || $tokens[$tokenId + 1] === '"')
                        {
                            $systemIdentifier = $tokens[$tokenId + 2];
                        }
                        else
                        {
                            $this->addFatalError("'" . $markupDeclaration . "' is not a well-formed SYSTEM external ENTITY because its SystemId was not properly quoted.", $this->line);
                            return;
                        }
                    }
                    else
                    {
                        $this->addFatalError("'" . $markupDeclaration . "' is not a well-formed SYSTEM external ENTITY because its SystemId quotes do not match.", $this->line);
                        return;
                    }
                }
                else
                {
                    $this->addFatalError("'" . $markupDeclaration . "' is not a well-formed SYSTEM external ENTITY because it could not be properly tokenized.", $this->line);
                    return;
                }
                $tokenId += 4;
            }
            else // Public identifier
            {
                $tokenId++;
                $publicIdentifier = $this->parseExternalIdentifier($tokens, $tokenId);
                if ($publicIdentifier === false) {
                    $this->addFatalError("Parsing the public identifier of '" . $markupDeclaration . "' failed.", $this->line);
                    return;
                }
                $tokenId += 3;
                $systemIdentifier = $this->parseExternalIdentifier($tokens, $tokenId);
                if ($publicIdentifier === false) {
                    $this->addFatalError("Parsing the system identifier of '" . $markupDeclaration . "' failed.", $this->line);
                    return;
                }
                $tokenId += 3;
            }
            $replacementText = "";
            $isExternal = true;
            if ($tokenId < count($tokens))
            {
                if ($tokens[$tokenId] === "NDATA")
                {
                    $tokenId++;
                    if ($tokenId === count($tokens)-1)
                    {
                        $notation = $tokens[$tokenId];
                        $tokenId++;
                        if (!$this->isNameValid($notation))
                        {
                            $this->addFatalError("In a general entity declaration, NDATA was followed by '" . $notation . "' which is not a Name.", $this->line);
                            return;
                        }
                        if (!array_key_exists($notation, $this->notations))
                        {
                            $this->addFatalError("An ENTITY declaration refers to the notation '" . $notation . "' which is not yet declared.", $this->line);
                            return;
                        }
                        if ($isParametric)
                        {
                            $this->addFatalError("Parametric entities may not have an NDATA specifier.", $this->line);
                            return;
                        }
                    }
                    else
                    {
                        $this->addFatalError("In a general entity declaration, the keyword NDATA must be followed by a Name only. It is followed by something else, however.", $this->line);
                        return;
                    }
                }
                else
                {
                    $this->addFatalError("NDATA or end of entity declaration expected", $this->line);
                    return;
                }
            }
            if ($this->shouldLoadExternalEntities)
            {
                if (file_exists($systemIdentifier))
                {
                    $externalContent = file_get_contents($systemIdentifier);
                    if ($externalContent !== false)
                    {
                        $this->addWarning("This DTD parser is not programmed to parse additional external entities.", $this->line);
                    }
                    else
                    {
                        $this->addWarning("An external parameter entity is declared but reading from the file given by system identifier failed.", $this->file);
                    }
                }
                else
                {
                    $this->addWarning("An external parameter entity is declared but its system identifier does not point to a file.", $this->line);
                }
            }
        }
        else if ($tokens[$tokenId] === "'" || $tokens[$tokenId] === '"')
        {
            if ($tokens[$tokenId] === $tokens[$tokenId+2] && count($tokens) === $tokenId+3)
            {
                $replacementText = $tokens[$tokenId+1];
                $replacementText = $this->evaluatePEReferencesIn($replacementText, Internal\PEStyle::InEntityDeclaration);
                if (strpos($replacementText, "%") !== false)
                {
                    $this->addFatalError("Entities cannot contain the character '%' unless as part of a parameter entity reference.", $this->line);
                    return;
                }
                $tokenId += 3;
            }
            else
            {
                $this->addFatalError("'" . $markupDeclaration . "' is not a well-formed ENTITY because it contains additional illegal markup.", $this->line);
                return;
            }
        }
        else
        {
            $this->addFatalError("'" . $markupDeclaration . "' is not a well-formed ENTITY.", $this->line);
            return;
        }
        if ($tokenId !== count($tokens))
        {
            $this->addFatalError("'" . $markupDeclaration . "' contains additional illegal tokens near the end.", $this->line);
            return;
        }
        if ($isParametric)
        {
            if (!array_key_exists($name, $this->parameterEntities))
            {
                 // We could issue a warning (at user option), but we must not issue an error.
                 $this->parameterEntities[$name] = new ParameterEntity($name, $replacementText, $isExternal, $systemIdentifier, $publicIdentifier);
            }
        }
        else
        {
            if (!array_key_exists($name, $this->generalEntities))
            {
                $this->generalEntities[$name] = new GeneralEntity($name, $replacementText, $isExternal, $systemIdentifier, $publicIdentifier, $notation);
            }
        }
    }
    private function parseMarkupDeclaration($markupDeclaration)
    {
        if ($this->startsWith($markupDeclaration, "<!ELEMENT ") || $this->startsWith($markupDeclaration, "<!ELEMENT\n") || $this->startsWith($markupDeclaration, "<!ELEMENT\t"))
            $this->parseElement(substr($markupDeclaration, strlen("<!ELEMENT "), -1));
        else if ($this->startsWith($markupDeclaration, "<!ATTLIST ")|| $this->startsWith($markupDeclaration, "<!ATTLIST\n") || $this->startsWith($markupDeclaration, "<!ATTLIST\t"))
            $this->parseAttlist(substr($markupDeclaration, strlen("<!ATTLIST "), -1));
        else if ($this->startsWith($markupDeclaration, "<!NOTATION ")|| $this->startsWith($markupDeclaration, "<!NOTATION\n") || $this->startsWith($markupDeclaration, "<!NOTATION\t"))
            $this->parseNotation(substr($markupDeclaration, strlen("<!NOTATION "), -1));
        else if ($this->startsWith($markupDeclaration, "<!ENTITY ")|| $this->startsWith($markupDeclaration, "<!ENTITY\n") || $this->startsWith($markupDeclaration, "<!ENTITY\t"))
            $this->parseEntityDeclaration(substr($markupDeclaration, strlen("<!ENTITY "), -1));
        else
        {
            $this->addFatalError("This declaration type does not exist (only ELEMENT, ATTLIST, NOTATION and ENTITY are possible.", $this->line);
        }
    }
    private function parseProcessingInstruction($processingInstruction)
    {
        $split = explode(' ', $processingInstruction, 2);
        if (count($split) !== 2)
        {
            $this->addFatalError("This processing instruction does not have a target.", $this->line);
            return;
        }
        if (!$this->isNameValid($split[0]))
        {
            $this->addFatalError("The target of a processing instruction must be a Name.", $this->line);
            return;
        }
        $this->processingInstructions = new ProcessingInstruction($split[0], $split[1]);
    }
    private function parseGlobalSpace($text, $isInternalSubset)
    {
        $this->line = 1;
        $this->currentOffset = 0;
        $includeSectionsOpened = 0;
        $ignoreSectionsOpened = 0;
        // 1. Normalize end-of-lines as per unicode spec
        $text = str_replace("\r\n", "\n", $text); // Quotes necessary, with apostrophes, it would not work.
        $text = str_replace("\r", "\n", $text);   // Quotes necessary, with apostrophes, it would not work.
                                                  // str_replace only counts a \n as a newline if it is within
                                                  // quotes.
        // 2. Remove comments
        // TODO save comments
        $text = preg_replace('/<!--(([^-])|(-[^-]))*-->/', '', $text);
        $length = strlen($text);
        // 3. Go through the text, searching for
        //  a) %ref; Parameter entity reference.
        //  b) <!ELEMENT Name TextNoGt>
        //  c) <!ATTLIST Name TextNoGt>
        //  d) <!ENTITY (%) Name SYSTEMLITERALCONTAINSGT>
        //  e) <!NOTATION Name SYSTEMLITERALCONTAINSGT>
        //  f) <![ INCLUDE [ ]]>
        //  g) <![ IGNORE [ ]]>
        //  h) <!-- causes error, it should have been removed
        $this->currentOffset = $this->findNonspace($text, $this->currentOffset, $length);
        while ($this->currentOffset !== false)
        {
             if (substr($text, $this->currentOffset, 3) === "]]>")
            {
                if ($ignoreSectionsOpened > 0)
                {
                    $ignoreSectionsOpened--;
                }
                else if ($includeSectionsOpened > 0)
                {
                    $includeSectionsOpened--;
                }
                else
                {
                    $this->addFatalError("The token ']]>' does not close any conditional section at this position.", $this->line);
                }
                $this->currentOffset += 3;
            }
            else if (substr($text, $this->currentOffset, 3) === "<![")
            {
                if ($isInternalSubset)
                {
                    $this->addFatalError("Internal subsets cannot contain conditional sections.", $this->line);
                }
                if ($ignoreSectionsOpened > 0)
                {
                    $ignoreSectionsOpened++;
                    $this->currentOffset += 3;
                }
                else
                {
                    // This is a conditional section.
                    $nextOpeningBrace = strpos($text, "[", $this->currentOffset + 3);
                    if ($nextOpeningBrace === false)
                    {
                        $this->addFatalError("The conditional section is missing its second opening bracket.", $this->line);
                        break;
                    }
                    $includeOrIgnore = substr($text, $this->currentOffset + 3, $nextOpeningBrace - $this->currentOffset - 3);
                    $includeOrIgnore = trim($this->evaluatePEReferencesIn($includeOrIgnore, Internal\PEStyle::IgnoreQuotedText));
                    if ($includeOrIgnore === "INCLUDE")
                    {
                        $includeSectionsOpened++;
                        $this->currentOffset = $nextOpeningBrace+1;
                    }
                    else if ($includeOrIgnore === "IGNORE")
                    {
                        $ignoreSectionsOpened++;
                        $this->currentOffset = $nextOpeningBrace+1;
                    }
                    else
                    {
                        $this->addFatalError("The marked section was neither INCLUDE nor IGNORE. No other marked sections are allowed in a DTD.", $this->line);
                        $this->currentOffset = $nextOpeningBrace + 1;
                    }
                }
            }
            elseif ($ignoreSectionsOpened == 0)
            {
                if (substr($text, $this->currentOffset, 1) === "%")
                {
                    // This is a parameter-entity reference.
                    $endingColon = strpos($text, ";", $this->currentOffset+1);
                    if ($endingColon === false)
                    {
                        $this->addFatalError("The parameter entity reference is not finished.", $this->line);
                        break;
                    }
                    else
                    {
                        $PEReferenceText = substr($text, $this->currentOffset+1, $endingColon - $this->currentOffset -1);
                        $this->parseGlobalPEReference($PEReferenceText);
                        $this->line += substr_count($PEReferenceText, "\n");
                        $this->currentOffset = $endingColon+1;
                    }
                }
                else if (substr($text, $this->currentOffset, 4) === "<!--")
                {
                    $this->addFatalError("The comment contained two consecutive dashes '--' which is not permitted. Perhaps your file contained nested comments?", $this->line);
                    break;
                }
                else if (substr($text, $this->currentOffset, 2) === "<!")
                {
                    // This is a declaration.
                    $tagBeginsAt = $this->currentOffset;
                    $inQuotes = false; $inApostrophes = false;
                    $this->currentOffset += 2;
                    $index = $this->currentOffset+2;
                    $tagEndsAt = false;
                    while ($this->currentOffset < $length)
                    {
                        $character = substr($text, $this->currentOffset, 1);
                        if ($character === "'")
                        {
                            if (!$inQuotes) { $inApostrophes = !$inApostrophes;}
                        }
                        else if ($character === '"')
                        {
                            if (!$inApostrophes) { $inQuotes = !$inQuotes; }
                        }
                        else if ($character === '>')
                        {
                            if (!$inApostrophes && !$inQuotes)
                            {
                                $tagEndsAt = $this->currentOffset;
                                $this->currentOffset++;
                                break;
                            }
                        }
                        $this->currentOffset++;
                    }
                    if ($tagEndsAt === false)
                    {
                        $this->addFatalError("The markup declaration is not finished.", $this->line);
                        break;
                    }
                    else
                    {
                        $markupDeclaration = substr($text, $tagBeginsAt, $tagEndsAt - $tagBeginsAt+1);
                        $this->parseMarkupDeclaration($markupDeclaration);
                        $this->line += substr_count($markupDeclaration, "\n");
                    }
                }
                else if (substr($text, $this->currentOffset, 2) === "<?")
                {
                    $endAt = strpos($text, "?>", $this->currentOffset + 2);
                    if ($endAt === false)
                    {
                        $this->addFatalError("The processing instruction is not finished.", $this->line);
                        break;
                    }
                    $processing_instruction = substr($text, $this->currentOffset, $endAt - $this->currentOffset + 2);
                    $this->parseProcessingInstruction($processing_instruction);
                    $this->line += substr_count($processing_instruction, "\n");
                    $this->currentOffset = $endAt+2;
                }
                else if (substr($text, $this->currentOffset, 1) === "<")
                {
                    // This is a declaration.
                    $this->addFatalError("The character '<' here must be immediately followed by '!' or '?'." , $this->line);
                    break;
                }
                else
                {
                    $character = substr($text, $this->currentOffset, 1);
                    $this->addFatalError("The character '" . $character . "' is not permitted here (only '%', '< !' and '< ?' and possibly ']]>' are permitted)." , $this->line);
                    break;
                }
            }
            else
            {
                $this->currentOffset++;
            }
            // Find next character.
            $this->currentOffset = $this->findNonspace($text, $this->currentOffset, $length);
        }
        if ($includeSectionsOpened > 0 || $ignoreSectionsOpened > 0)
        {
            $this->addFatalError("A conditional section was not closed by the end of the DTD.", $this->line);
        }
    }
    private function __construct($text, $internalSubset)
    {
        $this->xmlRegexes = new Internal\XmlRegexes();
        $this->parseGlobalSpace($internalSubset, true);
        $this->parseGlobalSpace($text, false);
    }
    /**
     * Parse the text given as though it were part of a .dtd file and return an \Aurora\DTD instance, even if
     * parsing fails.
     * @param string $text UTF-8 text to parse
     * @param string $internalSubset optionally, parse this XML internal subset in addition to the main DTD text given as the first parameter
     * @return DTD Object representing the parsed DTD document.
     */
    public static function parseText($text, $internalSubset = "")
    {
        $dtd = new DTD($text, $internalSubset);
        return $dtd;
    }
}
/**
 * Represents an XML notation declaration
 * @link http://www.w3.org/TR/REC-xml/#Notations
 */
class Notation
{
    /**
     * @var string Notation name
     */
    public $name = "";
    /**
     * @var string Public ID or an empty string if there is no public ID
     */
    public $publicID = "";
    /**
     * @var string System ID (mandatory)
     */
    public $systemID = "";
    public function  __construct($name, $systemID, $publicID)
    {
        $this->name =$name;
        $this->systemID = $systemID;
        $this->publicID = $publicID;
    }
}
class Attribute
{
    const ATTTYPE_CDATA = "CDATA";
    const ATTTYPE_ID = "ID";
    const ATTTYPE_IDREF = "IDREF";
    const ATTTYPE_IDREFS = "IDREFS";
    const ATTTYPE_ENTITY = "ENTITY";
    const ATTTYPE_ENTITIES = "ENTITIES";
    const ATTTYPE_NMTOKEN = "NMTOKEN";
    const ATTTYPE_NMTOKENS = "NMTOKENS";
    const ATTTYPE_ENUMERATION = "##ENUMERATION_INTERNAL_IDENTIFIER##";
    const ATTTYPE_NOTATION = "NOTATION";
    const DEFAULT_REQUIRED = "#REQUIRED";
    const DEFAULT_IMPLIED = "#IMPLIED";
    const DEFAULT_FIXED = "#FIXED";
    const DEFAULT_IMPLICIT_DEFAULT = "##DEFAULT_VALUE_IF_EMPTY_INTERNAL_IDENTIFIER##";
    public $name;
    public $type;
    public $enumeration = array();
    public $defaultType;
    public $defaultValue;
    /**
     * @param      $name
     * @param      $type
     * @param      $defaultType
     * @param      $defaultValue
     * @param array $enumeration
     */
    public function __construct($name, $type, $defaultType, $defaultValue, $enumeration = false)
    {
        $this->name = $name;
        $this->enumeration = $enumeration;
        $this->type = $type;
        $this->defaultType = $defaultType;
        $this->defaultValue = $defaultValue;
    }
}
class ProcessingInstruction
{
    /**
     * @var string
     */
    public $target;
    /**
     * @var string
     */
    public $data;
    public function __construct($target, $data)
    {
        $this->target = $target;
        $this->data = $data;
    }
}
class Element
{
    const CONTENT_SPECIFICATION_ANY = "ANY";
    const CONTENT_SPECIFICATION_EMPTY = "EMPTY";
    const CONTENT_SPECIFICATION_NOT_GIVEN = false;
    /**
     * @var boolean
     */
    public $mixed;
    /**
     * @var string
     */
    public $type = "";
    /**
     * @var string
     */
    public $contentSpecification = Element::CONTENT_SPECIFICATION_NOT_GIVEN;
    /**
     * @var Attribute[]
     */
    public $attributes = array();
    public function __construct($type, $contentModel, $mixed)
    {
        $this->mixed = $mixed;
        $this->type = $type;
        $this->contentSpecification = $contentModel;
    }
    public function isMixed()
    {
        return $this->mixed;
    }
    public function isPureText()
    {
        return $this->contentSpecification === "(#PCDATA)";
    }
}
class GeneralEntity
{
    /**
     * @var string
     */
    public $name = "";
    /**
     * @var string
     */
    public $replacementText = "";
    /**
     * @var string
     */
    public $notation = false;
    /**
     * @var bool
     */
    public $external = false;
    /**
     * @var string
     */
    public $systemId = false;
    /**
     * @var string
     */
    public $publicId = false;
    public function __construct($name, $replacementText, $external, $systemId, $publicId, $notation)
    {
        $this->name = $name;
        $this->replacementText = $replacementText;
        $this->notation = $notation;
        $this->external = $external;
        $this->systemId = $systemId;
        $this->publicId = $publicId;
    }
}
class ParameterEntity
{
    /**
     * @var string
     */
    public $name = "";
    /**
     * @var string
     */
    public $replacementText = "";
    /**
     * @var bool
     */
    public $external = false;
    /**
     * @var string
     */
    public $systemId = false;
    /**
     * @var string
     */
    public $publicId = false;
    public function __construct($name, $replacementText, $external, $systemId, $publicId)
    {
        $this->name = $name;
        $this->replacementText = $replacementText;
        $this->external = $external;
        $this->systemId = $systemId;
        $this->publicId = $publicId;
    }
}
class Error
{
    private $message;
    public function getMessage()
    {
        return $this->message;
    }
    public function __construct($message)
    {
        $this->message = $message;
    }
}
namespace Soothsilver\DtdParser\Internal;
/**
 * Contains regular expressions for various productions in the XML specification
 * @package Soothsilver\DtdParser\Internal
 */
class XmlRegexes {
    public $NameChar;
    public $NameStartChar;
    public $Name;
    public $NmToken;
    public function __construct()
    {
        $this->NameChar = "[:A-Z_a-z\\-.0-9\\xC0-\\xD6\\xD8-\\xF6\\xF8-\\x{2FF}\\x{370}-\\x{37D}\\x{37F}-\\x{1FFF}\\x{200C}-\\x{200D}\\x{2070}-\\x{218F}\\x{2C00}-\\x{2FEF}\\x{3001}-\\x{D7FF}\\x{F900}-\\x{FDCF}\\x{FDF0}-\\x{FFFD}\\x{10000}-\\x{EFFFF}]";
        $this->NameStartChar = "[:A-Z_a-z\-.0-9\\xB7\\xC0-\\xD6\\xD8-\\xF6\\xF8-\\x{37D}\\x{37F}-\\x{1FFF}\\x{200C}-\\x{200D}\\x{203F}-\\x{2040}\\x{2070}-\\x{218F}\\x{2C00}-\\x{2FEF}\\x{3001}-\\x{D7FF}\\x{F900}-\\x{FDCF}\\x{FDF0}-\\x{FFFD}\\x{10000}-\\x{EFFFF}]";
        $this->Name = "{$this->NameStartChar}{$this->NameChar}*";
        $this->NmToken = "{$this->NameChar}+";
    }
}
/**
 * Represents the parser state during the parsing of an ATTLIST declaration
 * @package Soothsilver\DtdParser\Internal
 */
abstract class AttlistMode {
    const NeedName = 0;
    const NeedAttType = 1;
    const AfterNOTATION = 2;
    const InsideEnumeration_NeedValue = 3;
    const NeedDefaultDecl = 4;
    const InsideEnumeration_NeedSeparator = 5;
}
/**
 * Represents the state of the parser that determines what should be done about parameter entities found.
 * @package Soothsilver\DtdParser\Internal
 */
abstract class PEStyle {
    const IgnoreQuotedText = 0;
    const MatchingParentheses = 1;
    const InEntityDeclaration = 2;
}
/**
 * Represents the parser state.
 * @package Soothsilver\DtdParser\Internal
 */
abstract class TokenizeMode {
    const Attlist = 0;
    const EntityDeclaration = 1;
    const NotationDeclaration = 2;
}
 |