attributeFilters = new AttributeFilterCollection; $this->bundleGenerator = new BundleGenerator($this); $this->plugins = new PluginCollection($this); $this->registeredVars = ['urlConfig' => new UrlConfig]; $this->rendering = new Rendering($this); $this->rootRules = new Ruleset; $this->rulesGenerator = new RulesGenerator; $this->tags = new TagCollection; $this->templateChecker = new TemplateChecker; $this->templateNormalizer = new TemplateNormalizer; } public function __get($k) { if (\preg_match('#^[A-Z][A-Za-z_0-9]+$#D', $k)) return (isset($this->plugins[$k])) ? $this->plugins[$k] : $this->plugins->load($k); if (isset($this->registeredVars[$k])) return $this->registeredVars[$k]; throw new RuntimeException("Undefined property '" . __CLASS__ . '::$' . $k . "'"); } public function __isset($k) { if (\preg_match('#^[A-Z][A-Za-z_0-9]+$#D', $k)) return isset($this->plugins[$k]); return isset($this->registeredVars[$k]); } public function __set($k, $v) { if (\preg_match('#^[A-Z][A-Za-z_0-9]+$#D', $k)) $this->plugins[$k] = $v; else $this->registeredVars[$k] = $v; } public function __unset($k) { if (\preg_match('#^[A-Z][A-Za-z_0-9]+$#D', $k)) unset($this->plugins[$k]); else unset($this->registeredVars[$k]); } public function enableJavaScript() { if (!isset($this->javascript)) $this->javascript = new JavaScript($this); } public function finalize() { $return = []; $this->plugins->finalize(); foreach ($this->tags as $tag) $this->templateNormalizer->normalizeTag($tag); $return['renderer'] = $this->rendering->getRenderer(); $this->addTagRules(); $config = $this->asConfig(); if (isset($this->javascript)) $return['js'] = $this->javascript->getParser(ConfigHelper::filterConfig($config, 'JS')); $config = ConfigHelper::filterConfig($config, 'PHP'); ConfigHelper::optimizeArray($config); $return['parser'] = new Parser($config); return $return; } public function loadBundle($bundleName) { if (!\preg_match('#^[A-Z][A-Za-z0-9]+$#D', $bundleName)) throw new InvalidArgumentException("Invalid bundle name '" . $bundleName . "'"); $className = __CLASS__ . '\\Bundles\\' . $bundleName; $bundle = new $className; $bundle->configure($this); } public function saveBundle($className, $filepath, array $options = []) { $file = "bundleGenerator->generate($className, $options); return (\file_put_contents($filepath, $file) !== \false); } public function asConfig() { $this->plugins->finalize(); $properties = \get_object_vars($this); unset($properties['attributeFilters']); unset($properties['bundleGenerator']); unset($properties['javascript']); unset($properties['rendering']); unset($properties['rulesGenerator']); unset($properties['registeredVars']); unset($properties['templateChecker']); unset($properties['templateNormalizer']); unset($properties['stylesheet']); $config = ConfigHelper::toArray($properties); $bitfields = RulesHelper::getBitfields($this->tags, $this->rootRules); $config['rootContext'] = $bitfields['root']; $config['rootContext']['flags'] = $config['rootRules']['flags']; $config['registeredVars'] = ConfigHelper::toArray($this->registeredVars, \true); $config += [ 'plugins' => [], 'tags' => [] ]; $config['tags'] = \array_intersect_key($config['tags'], $bitfields['tags']); foreach ($bitfields['tags'] as $tagName => $tagBitfields) $config['tags'][$tagName] += $tagBitfields; unset($config['rootRules']); return $config; } protected function addTagRules() { $rules = $this->rulesGenerator->getRules($this->tags); $this->rootRules->merge($rules['root'], \false); foreach ($rules['tags'] as $tagName => $tagRules) $this->tags[$tagName]->rules->merge($tagRules, \false); } } /* * @package s9e\TextFormatter * @copyright Copyright (c) 2010-2017 The s9e Authors * @license http://www.opensource.org/licenses/mit-license.php The MIT License */ namespace s9e\TextFormatter\Configurator; use s9e\TextFormatter\Configurator; use s9e\TextFormatter\Configurator\RendererGenerators\PHP; class BundleGenerator { protected $configurator; public $serializer = 'serialize'; public $unserializer = 'unserialize'; public function __construct(Configurator $configurator) { $this->configurator = $configurator; } public function generate($className, array $options = []) { $options += ['autoInclude' => \true]; $objects = $this->configurator->finalize(); $parser = $objects['parser']; $renderer = $objects['renderer']; $namespace = ''; if (\preg_match('#(.*)\\\\([^\\\\]+)$#', $className, $m)) { $namespace = $m[1]; $className = $m[2]; } $php = []; $php[] = '/**'; $php[] = '* @package s9e\TextFormatter'; $php[] = '* @copyright Copyright (c) 2010-2017 The s9e Authors'; $php[] = '* @license http://www.opensource.org/licenses/mit-license.php The MIT License'; $php[] = '*/'; if ($namespace) { $php[] = 'namespace ' . $namespace . ';'; $php[] = ''; } $php[] = 'abstract class ' . $className . ' extends \\s9e\\TextFormatter\\Bundle'; $php[] = '{'; $php[] = ' /**'; $php[] = ' * @var s9e\\TextFormatter\\Parser Singleton instance used by parse()'; $php[] = ' */'; $php[] = ' protected static $parser;'; $php[] = ''; $php[] = ' /**'; $php[] = ' * @var s9e\\TextFormatter\\Renderer Singleton instance used by render()'; $php[] = ' */'; $php[] = ' protected static $renderer;'; $php[] = ''; $events = [ 'beforeParse' => 'Callback executed before parse(), receives the original text as argument', 'afterParse' => 'Callback executed after parse(), receives the parsed text as argument', 'beforeRender' => 'Callback executed before render(), receives the parsed text as argument', 'afterRender' => 'Callback executed after render(), receives the output as argument', 'beforeUnparse' => 'Callback executed before unparse(), receives the parsed text as argument', 'afterUnparse' => 'Callback executed after unparse(), receives the original text as argument' ]; foreach ($events as $eventName => $eventDesc) if (isset($options[$eventName])) { $php[] = ' /**'; $php[] = ' * @var ' . $eventDesc; $php[] = ' */'; $php[] = ' public static $' . $eventName . ' = ' . \var_export($options[$eventName], \true) . ';'; $php[] = ''; } $php[] = ' /**'; $php[] = ' * Return a new instance of s9e\\TextFormatter\\Parser'; $php[] = ' *'; $php[] = ' * @return s9e\\TextFormatter\\Parser'; $php[] = ' */'; $php[] = ' public static function getParser()'; $php[] = ' {'; if (isset($options['parserSetup'])) { $php[] = ' $parser = ' . $this->exportObject($parser) . ';'; $php[] = ' ' . $this->exportCallback($namespace, $options['parserSetup'], '$parser') . ';'; $php[] = ''; $php[] = ' return $parser;'; } else $php[] = ' return ' . $this->exportObject($parser) . ';'; $php[] = ' }'; $php[] = ''; $php[] = ' /**'; $php[] = ' * Return a new instance of s9e\\TextFormatter\\Renderer'; $php[] = ' *'; $php[] = ' * @return s9e\\TextFormatter\\Renderer'; $php[] = ' */'; $php[] = ' public static function getRenderer()'; $php[] = ' {'; if (!empty($options['autoInclude']) && $this->configurator->rendering->engine instanceof PHP && isset($this->configurator->rendering->engine->lastFilepath)) { $className = \get_class($renderer); $filepath = \realpath($this->configurator->rendering->engine->lastFilepath); $php[] = ' if (!class_exists(' . \var_export($className, \true) . ', false)'; $php[] = ' && file_exists(' . \var_export($filepath, \true) . '))'; $php[] = ' {'; $php[] = ' include ' . \var_export($filepath, \true) . ';'; $php[] = ' }'; $php[] = ''; } if (isset($options['rendererSetup'])) { $php[] = ' $renderer = ' . $this->exportObject($renderer) . ';'; $php[] = ' ' . $this->exportCallback($namespace, $options['rendererSetup'], '$renderer') . ';'; $php[] = ''; $php[] = ' return $renderer;'; } else $php[] = ' return ' . $this->exportObject($renderer) . ';'; $php[] = ' }'; $php[] = '}'; return \implode("\n", $php); } protected function exportCallback($namespace, callable $callback, $argument) { if (\is_array($callback) && \is_string($callback[0])) $callback = $callback[0] . '::' . $callback[1]; if (!\is_string($callback)) return 'call_user_func(' . \var_export($callback, \true) . ', ' . $argument . ')'; if ($callback[0] !== '\\') $callback = '\\' . $callback; if (\substr($callback, 0, 2 + \strlen($namespace)) === '\\' . $namespace . '\\') $callback = \substr($callback, 2 + \strlen($namespace)); return $callback . '(' . $argument . ')'; } protected function exportObject($obj) { $str = \call_user_func($this->serializer, $obj); $str = \var_export($str, \true); return $this->unserializer . '(' . $str . ')'; } } /* * @package s9e\TextFormatter * @copyright Copyright (c) 2010-2017 The s9e Authors * @license http://www.opensource.org/licenses/mit-license.php The MIT License */ namespace s9e\TextFormatter\Configurator; interface ConfigProvider { public function asConfig(); } /* * @package s9e\TextFormatter * @copyright Copyright (c) 2010-2017 The s9e Authors * @license http://www.opensource.org/licenses/mit-license.php The MIT License */ namespace s9e\TextFormatter\Configurator; interface FilterableConfigValue { public function filterConfig($target); } /* * @package s9e\TextFormatter * @copyright Copyright (c) 2010-2017 The s9e Authors * @license http://www.opensource.org/licenses/mit-license.php The MIT License */ namespace s9e\TextFormatter\Configurator\Helpers; use DOMAttr; use RuntimeException; abstract class AVTHelper { public static function parse($attrValue) { $tokens = []; $attrLen = \strlen($attrValue); $pos = 0; while ($pos < $attrLen) { if ($attrValue[$pos] === '{') { if (\substr($attrValue, $pos, 2) === '{{') { $tokens[] = ['literal', '{']; $pos += 2; continue; } ++$pos; $expr = ''; while ($pos < $attrLen) { $spn = \strcspn($attrValue, '\'"}', $pos); if ($spn) { $expr .= \substr($attrValue, $pos, $spn); $pos += $spn; } if ($pos >= $attrLen) throw new RuntimeException('Unterminated XPath expression'); $c = $attrValue[$pos]; ++$pos; if ($c === '}') break; $quotePos = \strpos($attrValue, $c, $pos); if ($quotePos === \false) throw new RuntimeException('Unterminated XPath expression'); $expr .= $c . \substr($attrValue, $pos, $quotePos + 1 - $pos); $pos = 1 + $quotePos; } $tokens[] = ['expression', $expr]; } $spn = \strcspn($attrValue, '{', $pos); if ($spn) { $str = \substr($attrValue, $pos, $spn); $str = \str_replace('}}', '}', $str); $tokens[] = ['literal', $str]; $pos += $spn; } } return $tokens; } public static function replace(DOMAttr $attribute, callable $callback) { $tokens = self::parse($attribute->value); foreach ($tokens as $k => $token) $tokens[$k] = $callback($token); $attribute->value = \htmlspecialchars(self::serialize($tokens), \ENT_NOQUOTES, 'UTF-8'); } public static function serialize(array $tokens) { $attrValue = ''; foreach ($tokens as $token) if ($token[0] === 'literal') $attrValue .= \preg_replace('([{}])', '$0$0', $token[1]); elseif ($token[0] === 'expression') $attrValue .= '{' . $token[1] . '}'; else throw new RuntimeException('Unknown token type'); return $attrValue; } public static function toXSL($attrValue) { $xsl = ''; foreach (self::parse($attrValue) as $_f6b3b659) { list($type, $content) = $_f6b3b659; if ($type === 'expression') $xsl .= ''; elseif (\trim($content) !== $content) $xsl .= '' . \htmlspecialchars($content, \ENT_NOQUOTES, 'UTF-8') . ''; else $xsl .= \htmlspecialchars($content, \ENT_NOQUOTES, 'UTF-8'); } return $xsl; } } /* * @package s9e\TextFormatter * @copyright Copyright (c) 2010-2017 The s9e Authors * @license http://www.opensource.org/licenses/mit-license.php The MIT License */ namespace s9e\TextFormatter\Configurator\Helpers; class CharacterClassBuilder { protected $chars; public $delimiter = '/'; protected $ranges; public function fromList(array $chars) { $this->chars = $chars; $this->unescapeLiterals(); \sort($this->chars); $this->storeRanges(); $this->reorderDash(); $this->fixCaret(); $this->escapeSpecialChars(); return $this->buildCharacterClass(); } protected function buildCharacterClass() { $str = '['; foreach ($this->ranges as $_b7914274) { list($start, $end) = $_b7914274; if ($end > $start + 2) $str .= $this->chars[$start] . '-' . $this->chars[$end]; else $str .= \implode('', \array_slice($this->chars, $start, $end + 1 - $start)); } $str .= ']'; return $str; } protected function escapeSpecialChars() { $specialChars = ['\\', ']', $this->delimiter]; foreach (\array_intersect($this->chars, $specialChars) as $k => $v) $this->chars[$k] = '\\' . $v; } protected function fixCaret() { $k = \array_search('^', $this->chars, \true); if ($this->ranges[0][0] !== $k) return; if (isset($this->ranges[1])) { $range = $this->ranges[0]; $this->ranges[0] = $this->ranges[1]; $this->ranges[1] = $range; } else $this->chars[$k] = '\\^'; } protected function reorderDash() { $dashIndex = \array_search('-', $this->chars, \true); if ($dashIndex === \false) return; $k = \array_search([$dashIndex, $dashIndex], $this->ranges, \true); if ($k > 0) { unset($this->ranges[$k]); \array_unshift($this->ranges, [$dashIndex, $dashIndex]); } $commaIndex = \array_search(',', $this->chars); $range = [$commaIndex, $dashIndex]; $k = \array_search($range, $this->ranges, \true); if ($k !== \false) { $this->ranges[$k] = [$commaIndex, $commaIndex]; \array_unshift($this->ranges, [$dashIndex, $dashIndex]); } } protected function storeRanges() { $values = []; foreach ($this->chars as $char) if (\strlen($char) === 1) $values[] = \ord($char); else $values[] = \false; $i = \count($values) - 1; $ranges = []; while ($i >= 0) { $start = $i; $end = $i; while ($start > 0 && $values[$start - 1] === $values[$end] - ($end + 1 - $start)) --$start; $ranges[] = [$start, $end]; $i = $start - 1; } $this->ranges = \array_reverse($ranges); } protected function unescapeLiterals() { foreach ($this->chars as $k => $char) if ($char[0] === '\\' && \preg_match('(^\\\\[^a-z]$)Di', $char)) $this->chars[$k] = \substr($char, 1); } } /* * @package s9e\TextFormatter * @copyright Copyright (c) 2010-2017 The s9e Authors * @license http://www.opensource.org/licenses/mit-license.php The MIT License */ namespace s9e\TextFormatter\Configurator\Helpers; use RuntimeException; use Traversable; use s9e\TextFormatter\Configurator\ConfigProvider; use s9e\TextFormatter\Configurator\FilterableConfigValue; use s9e\TextFormatter\Configurator\JavaScript\Dictionary; abstract class ConfigHelper { public static function filterConfig(array $config, $target = 'PHP') { $filteredConfig = []; foreach ($config as $name => $value) { if ($value instanceof FilterableConfigValue) { $value = $value->filterConfig($target); if (!isset($value)) continue; } if (\is_array($value)) $value = self::filterConfig($value, $target); $filteredConfig[$name] = $value; } return $filteredConfig; } public static function generateQuickMatchFromList(array $strings) { foreach ($strings as $string) { $stringLen = \strlen($string); $substrings = []; for ($len = $stringLen; $len; --$len) { $pos = $stringLen - $len; do { $substrings[\substr($string, $pos, $len)] = 1; } while (--$pos >= 0); } if (isset($goodStrings)) { $goodStrings = \array_intersect_key($goodStrings, $substrings); if (empty($goodStrings)) break; } else $goodStrings = $substrings; } if (empty($goodStrings)) return \false; return \strval(\key($goodStrings)); } public static function optimizeArray(array &$config, array &$cache = []) { foreach ($config as $k => &$v) { if (!\is_array($v)) continue; self::optimizeArray($v, $cache); $cacheKey = \serialize($v); if (!isset($cache[$cacheKey])) $cache[$cacheKey] = $v; $config[$k] =& $cache[$cacheKey]; } unset($v); } public static function toArray($value, $keepEmpty = \false, $keepNull = \false) { $array = []; foreach ($value as $k => $v) { $isDictionary = $v instanceof Dictionary; if ($v instanceof ConfigProvider) $v = $v->asConfig(); elseif ($v instanceof Traversable || \is_array($v)) $v = self::toArray($v, $keepEmpty, $keepNull); elseif (\is_scalar($v) || \is_null($v)) ; else { $type = (\is_object($v)) ? 'an instance of ' . \get_class($v) : 'a ' . \gettype($v); throw new RuntimeException('Cannot convert ' . $type . ' to array'); } if (!isset($v) && !$keepNull) continue; if (!$keepEmpty && $v === []) continue; $array[$k] = ($isDictionary) ? new Dictionary($v) : $v; } return $array; } } /* * @package s9e\TextFormatter * @copyright Copyright (c) 2010-2017 The s9e Authors * @license http://www.opensource.org/licenses/mit-license.php The MIT License */ namespace s9e\TextFormatter\Configurator\Helpers; use DOMElement; use DOMXPath; class ElementInspector { protected static $htmlElements = [ 'a'=>['c'=>"\17\0\0\0\0\1",'c3'=>'@href','ac'=>"\0",'dd'=>"\10\0\0\0\0\1",'t'=>1,'fe'=>1], 'abbr'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0"], 'address'=>['c'=>"\3\40",'ac'=>"\1",'dd'=>"\0\45",'b'=>1,'cp'=>['p']], 'article'=>['c'=>"\3\4",'ac'=>"\1",'dd'=>"\0",'b'=>1,'cp'=>['p']], 'aside'=>['c'=>"\3\4",'ac'=>"\1",'dd'=>"\0\0\0\0\10",'b'=>1,'cp'=>['p']], 'audio'=>['c'=>"\57",'c3'=>'@controls','c1'=>'@controls','ac'=>"\0\0\0\104",'ac26'=>'not(@src)','dd'=>"\0\0\0\0\0\2",'dd41'=>'@src','t'=>1], 'b'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0",'fe'=>1], 'base'=>['c'=>"\20",'ac'=>"\0",'dd'=>"\0",'nt'=>1,'e'=>1,'v'=>1,'b'=>1], 'bdi'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0"], 'bdo'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0"], 'blockquote'=>['c'=>"\203",'ac'=>"\1",'dd'=>"\0",'b'=>1,'cp'=>['p']], 'body'=>['c'=>"\200\0\4",'ac'=>"\1",'dd'=>"\0",'b'=>1], 'br'=>['c'=>"\5",'ac'=>"\0",'dd'=>"\0",'nt'=>1,'e'=>1,'v'=>1], 'button'=>['c'=>"\117",'ac'=>"\4",'dd'=>"\10"], 'canvas'=>['c'=>"\47",'ac'=>"\0",'dd'=>"\0",'t'=>1], 'caption'=>['c'=>"\0\2",'ac'=>"\1",'dd'=>"\0\0\0\200",'b'=>1], 'cite'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0"], 'code'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0",'fe'=>1], 'col'=>['c'=>"\0\0\20",'ac'=>"\0",'dd'=>"\0",'nt'=>1,'e'=>1,'v'=>1,'b'=>1], 'colgroup'=>['c'=>"\0\2",'ac'=>"\0\0\20",'ac20'=>'not(@span)','dd'=>"\0",'nt'=>1,'e'=>1,'e?'=>'@span','b'=>1], 'data'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0"], 'datalist'=>['c'=>"\5",'ac'=>"\4\200\0\10",'dd'=>"\0"], 'dd'=>['c'=>"\0\0\200",'ac'=>"\1",'dd'=>"\0",'b'=>1,'cp'=>['dd','dt']], 'del'=>['c'=>"\5",'ac'=>"\0",'dd'=>"\0",'t'=>1], 'details'=>['c'=>"\213",'ac'=>"\1\0\0\2",'dd'=>"\0",'b'=>1,'cp'=>['p']], 'dfn'=>['c'=>"\7\0\0\0\40",'ac'=>"\4",'dd'=>"\0\0\0\0\40"], 'div'=>['c'=>"\3",'ac'=>"\1",'dd'=>"\0",'b'=>1,'cp'=>['p']], 'dl'=>['c'=>"\3",'c1'=>'dt and dd','ac'=>"\0\200\200",'dd'=>"\0",'nt'=>1,'b'=>1,'cp'=>['p']], 'dt'=>['c'=>"\0\0\200",'ac'=>"\1",'dd'=>"\0\5\0\40",'b'=>1,'cp'=>['dd','dt']], 'em'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0",'fe'=>1], 'embed'=>['c'=>"\57",'ac'=>"\0",'dd'=>"\0",'nt'=>1,'e'=>1,'v'=>1], 'fieldset'=>['c'=>"\303",'ac'=>"\1\0\0\20",'dd'=>"\0",'b'=>1,'cp'=>['p']], 'figcaption'=>['c'=>"\0\0\0\0\0\4",'ac'=>"\1",'dd'=>"\0",'b'=>1,'cp'=>['p']], 'figure'=>['c'=>"\203",'ac'=>"\1\0\0\0\0\4",'dd'=>"\0",'b'=>1,'cp'=>['p']], 'footer'=>['c'=>"\3\40",'ac'=>"\1",'dd'=>"\0\0\0\0\10",'b'=>1,'cp'=>['p']], 'form'=>['c'=>"\3\0\0\0\20",'ac'=>"\1",'dd'=>"\0\0\0\0\20",'b'=>1,'cp'=>['p']], 'h1'=>['c'=>"\3\1",'ac'=>"\4",'dd'=>"\0",'b'=>1,'cp'=>['p']], 'h2'=>['c'=>"\3\1",'ac'=>"\4",'dd'=>"\0",'b'=>1,'cp'=>['p']], 'h3'=>['c'=>"\3\1",'ac'=>"\4",'dd'=>"\0",'b'=>1,'cp'=>['p']], 'h4'=>['c'=>"\3\1",'ac'=>"\4",'dd'=>"\0",'b'=>1,'cp'=>['p']], 'h5'=>['c'=>"\3\1",'ac'=>"\4",'dd'=>"\0",'b'=>1,'cp'=>['p']], 'h6'=>['c'=>"\3\1",'ac'=>"\4",'dd'=>"\0",'b'=>1,'cp'=>['p']], 'head'=>['c'=>"\0\0\4",'ac'=>"\20",'dd'=>"\0",'nt'=>1,'b'=>1], 'header'=>['c'=>"\3\40\0\40",'ac'=>"\1",'dd'=>"\0\0\0\0\10",'b'=>1,'cp'=>['p']], 'hr'=>['c'=>"\1\100",'ac'=>"\0",'dd'=>"\0",'nt'=>1,'e'=>1,'v'=>1,'b'=>1,'cp'=>['p']], 'html'=>['c'=>"\0",'ac'=>"\0\0\4",'dd'=>"\0",'nt'=>1,'b'=>1], 'i'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0",'fe'=>1], 'iframe'=>['c'=>"\57",'ac'=>"\4",'dd'=>"\0"], 'img'=>['c'=>"\57\20\10",'c3'=>'@usemap','ac'=>"\0",'dd'=>"\0",'nt'=>1,'e'=>1,'v'=>1], 'input'=>['c'=>"\17\20",'c3'=>'@type!="hidden"','c12'=>'@type!="hidden" or @type="hidden"','c1'=>'@type!="hidden"','ac'=>"\0",'dd'=>"\0",'nt'=>1,'e'=>1,'v'=>1], 'ins'=>['c'=>"\7",'ac'=>"\0",'dd'=>"\0",'t'=>1], 'kbd'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0"], 'keygen'=>['c'=>"\117",'ac'=>"\0",'dd'=>"\0",'nt'=>1,'e'=>1,'v'=>1], 'label'=>['c'=>"\17\20\0\0\4",'ac'=>"\4",'dd'=>"\0\0\1\0\4"], 'legend'=>['c'=>"\0\0\0\20",'ac'=>"\4",'dd'=>"\0",'b'=>1], 'li'=>['c'=>"\0\0\0\0\200",'ac'=>"\1",'dd'=>"\0",'b'=>1,'cp'=>['li']], 'link'=>['c'=>"\20",'ac'=>"\0",'dd'=>"\0",'nt'=>1,'e'=>1,'v'=>1,'b'=>1], 'main'=>['c'=>"\3\0\0\0\10",'ac'=>"\1",'dd'=>"\0",'b'=>1,'cp'=>['p']], 'mark'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0"], 'media element'=>['c'=>"\0\0\0\0\0\2",'ac'=>"\0",'dd'=>"\0",'nt'=>1,'b'=>1], 'menu'=>['c'=>"\1\100",'ac'=>"\0\300",'dd'=>"\0",'nt'=>1,'b'=>1,'cp'=>['p']], 'menuitem'=>['c'=>"\0\100",'ac'=>"\0",'dd'=>"\0",'nt'=>1,'e'=>1,'v'=>1,'b'=>1], 'meta'=>['c'=>"\20",'ac'=>"\0",'dd'=>"\0",'nt'=>1,'e'=>1,'v'=>1,'b'=>1], 'meter'=>['c'=>"\7\0\1\0\2",'ac'=>"\4",'dd'=>"\0\0\0\0\2"], 'nav'=>['c'=>"\3\4",'ac'=>"\1",'dd'=>"\0\0\0\0\10",'b'=>1,'cp'=>['p']], 'noscript'=>['c'=>"\25",'ac'=>"\0",'dd'=>"\0",'nt'=>1], 'object'=>['c'=>"\147",'ac'=>"\0\0\0\0\1",'dd'=>"\0",'t'=>1], 'ol'=>['c'=>"\3",'c1'=>'li','ac'=>"\0\200\0\0\200",'dd'=>"\0",'nt'=>1,'b'=>1,'cp'=>['p']], 'optgroup'=>['c'=>"\0\0\2",'ac'=>"\0\200\0\10",'dd'=>"\0",'nt'=>1,'b'=>1,'cp'=>['optgroup','option']], 'option'=>['c'=>"\0\0\2\10",'ac'=>"\0",'dd'=>"\0",'b'=>1,'cp'=>['option']], 'output'=>['c'=>"\107",'ac'=>"\4",'dd'=>"\0"], 'p'=>['c'=>"\3",'ac'=>"\4",'dd'=>"\0",'b'=>1,'cp'=>['p']], 'param'=>['c'=>"\0\0\0\0\1",'ac'=>"\0",'dd'=>"\0",'nt'=>1,'e'=>1,'v'=>1,'b'=>1], 'picture'=>['c'=>"\45",'ac'=>"\0\200\10",'dd'=>"\0",'nt'=>1], 'pre'=>['c'=>"\3",'ac'=>"\4",'dd'=>"\0",'pre'=>1,'b'=>1,'cp'=>['p']], 'progress'=>['c'=>"\7\0\1\1",'ac'=>"\4",'dd'=>"\0\0\0\1"], 'q'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0"], 'rb'=>['c'=>"\0\10",'ac'=>"\4",'dd'=>"\0",'b'=>1], 'rp'=>['c'=>"\0\10\100",'ac'=>"\4",'dd'=>"\0",'b'=>1,'cp'=>['rp','rt']], 'rt'=>['c'=>"\0\10\100",'ac'=>"\4",'dd'=>"\0",'b'=>1,'cp'=>['rp','rt']], 'rtc'=>['c'=>"\0\10",'ac'=>"\4\0\100",'dd'=>"\0",'b'=>1], 'ruby'=>['c'=>"\7",'ac'=>"\4\10",'dd'=>"\0"], 's'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0",'fe'=>1], 'samp'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0"], 'script'=>['c'=>"\25\200",'ac'=>"\0",'dd'=>"\0",'to'=>1], 'section'=>['c'=>"\3\4",'ac'=>"\1",'dd'=>"\0",'b'=>1,'cp'=>['p']], 'select'=>['c'=>"\117",'ac'=>"\0\200\2",'dd'=>"\0",'nt'=>1], 'small'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0",'fe'=>1], 'source'=>['c'=>"\0\0\10\4",'ac'=>"\0",'dd'=>"\0",'nt'=>1,'e'=>1,'v'=>1,'b'=>1], 'span'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0"], 'strong'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0",'fe'=>1], 'style'=>['c'=>"\20",'ac'=>"\0",'dd'=>"\0",'to'=>1,'b'=>1], 'sub'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0"], 'summary'=>['c'=>"\0\0\0\2",'ac'=>"\4\1",'dd'=>"\0",'b'=>1], 'sup'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0"], 'table'=>['c'=>"\3\0\0\200",'ac'=>"\0\202",'dd'=>"\0",'nt'=>1,'b'=>1,'cp'=>['p']], 'tbody'=>['c'=>"\0\2",'ac'=>"\0\200\0\0\100",'dd'=>"\0",'nt'=>1,'b'=>1,'cp'=>['tbody','td','tfoot','th','thead','tr']], 'td'=>['c'=>"\200\0\40",'ac'=>"\1",'dd'=>"\0",'b'=>1,'cp'=>['td','th']], 'template'=>['c'=>"\25\200\20",'ac'=>"\0",'dd'=>"\0",'nt'=>1], 'textarea'=>['c'=>"\117",'ac'=>"\0",'dd'=>"\0",'pre'=>1,'to'=>1], 'tfoot'=>['c'=>"\0\2",'ac'=>"\0\200\0\0\100",'dd'=>"\0",'nt'=>1,'b'=>1,'cp'=>['tbody','td','th','thead','tr']], 'th'=>['c'=>"\0\0\40",'ac'=>"\1",'dd'=>"\0\5\0\40",'b'=>1,'cp'=>['td','th']], 'thead'=>['c'=>"\0\2",'ac'=>"\0\200\0\0\100",'dd'=>"\0",'nt'=>1,'b'=>1], 'time'=>['c'=>"\7",'ac'=>"\4",'ac2'=>'@datetime','dd'=>"\0"], 'title'=>['c'=>"\20",'ac'=>"\0",'dd'=>"\0",'to'=>1,'b'=>1], 'tr'=>['c'=>"\0\2\0\0\100",'ac'=>"\0\200\40",'dd'=>"\0",'nt'=>1,'b'=>1,'cp'=>['td','th','tr']], 'track'=>['c'=>"\0\0\0\100",'ac'=>"\0",'dd'=>"\0",'nt'=>1,'e'=>1,'v'=>1,'b'=>1], 'u'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0",'fe'=>1], 'ul'=>['c'=>"\3",'c1'=>'li','ac'=>"\0\200\0\0\200",'dd'=>"\0",'nt'=>1,'b'=>1,'cp'=>['p']], 'var'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0"], 'video'=>['c'=>"\57",'c3'=>'@controls','ac'=>"\0\0\0\104",'ac26'=>'not(@src)','dd'=>"\0\0\0\0\0\2",'dd41'=>'@src','t'=>1], 'wbr'=>['c'=>"\5",'ac'=>"\0",'dd'=>"\0",'nt'=>1,'e'=>1,'v'=>1] ]; public static function closesParent(DOMElement $child, DOMElement $parent) { $parentName = $parent->nodeName; $childName = $child->nodeName; return !empty(self::$htmlElements[$parentName]['cp']) && \in_array($childName, self::$htmlElements[$parentName]['cp'], \true); } public static function disallowsText(DOMElement $element) { return self::hasProperty($element, 'nt'); } public static function getAllowChildBitfield(DOMElement $element) { return self::getBitfield($element, 'ac'); } public static function getCategoryBitfield(DOMElement $element) { return self::getBitfield($element, 'c'); } public static function getDenyDescendantBitfield(DOMElement $element) { return self::getBitfield($element, 'dd'); } public static function isBlock(DOMElement $element) { return self::hasProperty($element, 'b'); } public static function isEmpty(DOMElement $element) { return self::hasProperty($element, 'e'); } public static function isFormattingElement(DOMElement $element) { return self::hasProperty($element, 'fe'); } public static function isTextOnly(DOMElement $element) { return self::hasProperty($element, 'to'); } public static function isTransparent(DOMElement $element) { return self::hasProperty($element, 't'); } public static function isVoid(DOMElement $element) { return self::hasProperty($element, 'v'); } public static function preservesWhitespace(DOMElement $element) { return self::hasProperty($element, 'pre'); } protected static function evaluate($query, DOMElement $element) { $xpath = new DOMXPath($element->ownerDocument); return $xpath->evaluate('boolean(' . $query . ')', $element); } protected static function getBitfield(DOMElement $element, $name) { $props = self::getProperties($element); $bitfield = self::toBin($props[$name]); foreach (\array_keys(\array_filter(\str_split($bitfield, 1))) as $bitNumber) { $conditionName = $name . $bitNumber; if (isset($props[$conditionName]) && !self::evaluate($props[$conditionName], $element)) $bitfield[$bitNumber] = '0'; } return self::toRaw($bitfield); } protected static function getProperties(DOMElement $element) { return (isset(self::$htmlElements[$element->nodeName])) ? self::$htmlElements[$element->nodeName] : self::$htmlElements['span']; } protected static function hasProperty(DOMElement $element, $propName) { $props = self::getProperties($element); return !empty($props[$propName]) && (!isset($props[$propName . '?']) || self::evaluate($props[$propName . '?'], $element)); } protected static function toBin($raw) { $bin = ''; foreach (\str_split($raw, 1) as $char) $bin .= \strrev(\substr('0000000' . \decbin(\ord($char)), -8)); return $bin; } protected static function toRaw($bin) { return \implode('', \array_map('chr', \array_map('bindec', \array_map('strrev', \str_split($bin, 8))))); } } /* * @package s9e\TextFormatter * @copyright Copyright (c) 2010-2017 The s9e Authors * @license http://www.opensource.org/licenses/mit-license.php The MIT License */ namespace s9e\TextFormatter\Configurator\Helpers; use RuntimeException; abstract class RegexpBuilder { protected static $characterClassBuilder; public static function fromList(array $words, array $options = []) { if (empty($words)) return ''; $options += [ 'delimiter' => '/', 'caseInsensitive' => \false, 'specialChars' => [], 'unicode' => \true, 'useLookahead' => \false ]; if ($options['caseInsensitive']) { foreach ($words as &$word) $word = \strtr( $word, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz' ); unset($word); } $words = \array_unique($words); \sort($words); $initials = []; $esc = $options['specialChars']; $esc += [$options['delimiter'] => '\\' . $options['delimiter']]; $esc += [ '!' => '!', '-' => '-', ':' => ':', '<' => '<', '=' => '=', '>' => '>', '}' => '}' ]; $splitWords = []; foreach ($words as $word) { $regexp = ($options['unicode']) ? '(.)us' : '(.)s'; if (\preg_match_all($regexp, $word, $matches) === \false) throw new RuntimeException("Invalid UTF-8 string '" . $word . "'"); $splitWord = []; foreach ($matches[0] as $pos => $c) { if (!isset($esc[$c])) $esc[$c] = \preg_quote($c); if ($pos === 0) $initials[] = $esc[$c]; $splitWord[] = $esc[$c]; } $splitWords[] = $splitWord; } self::$characterClassBuilder = new CharacterClassBuilder; self::$characterClassBuilder->delimiter = $options['delimiter']; $regexp = self::assemble([self::mergeChains($splitWords)]); if ($options['useLookahead'] && \count($initials) > 1 && $regexp[0] !== '[') { $useLookahead = \true; foreach ($initials as $initial) if (!self::canBeUsedInCharacterClass($initial)) { $useLookahead = \false; break; } if ($useLookahead) $regexp = '(?=' . self::generateCharacterClass($initials) . ')' . $regexp; } return $regexp; } protected static function mergeChains(array $chains, $preventRemerge = \false) { if (!isset($chains[1])) return $chains[0]; $mergedChain = self::removeLongestCommonPrefix($chains); if (!isset($chains[0][0]) && !\array_filter($chains)) return $mergedChain; $suffix = self::removeLongestCommonSuffix($chains); if (isset($chains[1])) { self::optimizeDotChains($chains); self::optimizeCatchallChains($chains); } $endOfChain = \false; $remerge = \false; $groups = []; foreach ($chains as $chain) { if (!isset($chain[0])) { $endOfChain = \true; continue; } $head = $chain[0]; if (isset($groups[$head])) $remerge = \true; $groups[$head][] = $chain; } $characterClass = []; foreach ($groups as $head => $groupChains) { $head = (string) $head; if ($groupChains === [[$head]] && self::canBeUsedInCharacterClass($head)) $characterClass[$head] = $head; } \sort($characterClass); if (isset($characterClass[1])) { foreach ($characterClass as $char) unset($groups[$char]); $head = self::generateCharacterClass($characterClass); $groups[$head][] = [$head]; $groups = [$head => $groups[$head]] + $groups; } if ($remerge && !$preventRemerge) { $mergedChains = []; foreach ($groups as $head => $groupChains) $mergedChains[] = self::mergeChains($groupChains); self::mergeTails($mergedChains); $regexp = \implode('', self::mergeChains($mergedChains, \true)); if ($endOfChain) $regexp = self::makeRegexpOptional($regexp); $mergedChain[] = $regexp; } else { self::mergeTails($chains); $mergedChain[] = self::assemble($chains); } foreach ($suffix as $atom) $mergedChain[] = $atom; return $mergedChain; } protected static function mergeTails(array &$chains) { self::mergeTailsCC($chains); self::mergeTailsAltern($chains); $chains = \array_values($chains); } protected static function mergeTailsCC(array &$chains) { $groups = []; foreach ($chains as $k => $chain) if (isset($chain[1]) && !isset($chain[2]) && self::canBeUsedInCharacterClass($chain[0])) $groups[$chain[1]][$k] = $chain; foreach ($groups as $groupChains) { if (\count($groupChains) < 2) continue; $chains = \array_diff_key($chains, $groupChains); $chains[] = self::mergeChains(\array_values($groupChains)); } } protected static function mergeTailsAltern(array &$chains) { $groups = []; foreach ($chains as $k => $chain) if (!empty($chain)) { $tail = \array_slice($chain, -1); $groups[$tail[0]][$k] = $chain; } foreach ($groups as $tail => $groupChains) { if (\count($groupChains) < 2) continue; $mergedChain = self::mergeChains(\array_values($groupChains)); $oldLen = 0; foreach ($groupChains as $groupChain) $oldLen += \array_sum(\array_map('strlen', $groupChain)); if ($oldLen <= \array_sum(\array_map('strlen', $mergedChain))) continue; $chains = \array_diff_key($chains, $groupChains); $chains[] = $mergedChain; } } protected static function removeLongestCommonPrefix(array &$chains) { $pLen = 0; while (1) { $c = \null; foreach ($chains as $chain) { if (!isset($chain[$pLen])) break 2; if (!isset($c)) { $c = $chain[$pLen]; continue; } if ($chain[$pLen] !== $c) break 2; } ++$pLen; } if (!$pLen) return []; $prefix = \array_slice($chains[0], 0, $pLen); foreach ($chains as &$chain) $chain = \array_slice($chain, $pLen); unset($chain); return $prefix; } protected static function removeLongestCommonSuffix(array &$chains) { $chainsLen = \array_map('count', $chains); $maxLen = \min($chainsLen); if (\max($chainsLen) === $maxLen) --$maxLen; $sLen = 0; while ($sLen < $maxLen) { $c = \null; foreach ($chains as $k => $chain) { $pos = $chainsLen[$k] - ($sLen + 1); if (!isset($c)) { $c = $chain[$pos]; continue; } if ($chain[$pos] !== $c) break 2; } ++$sLen; } if (!$sLen) return []; $suffix = \array_slice($chains[0], -$sLen); foreach ($chains as &$chain) $chain = \array_slice($chain, 0, -$sLen); unset($chain); return $suffix; } protected static function assemble(array $chains) { $endOfChain = \false; $regexps = []; $characterClass = []; foreach ($chains as $chain) { if (empty($chain)) { $endOfChain = \true; continue; } if (!isset($chain[1]) && self::canBeUsedInCharacterClass($chain[0])) $characterClass[$chain[0]] = $chain[0]; else $regexps[] = \implode('', $chain); } if (!empty($characterClass)) { \sort($characterClass); $regexp = (isset($characterClass[1])) ? self::generateCharacterClass($characterClass) : $characterClass[0]; \array_unshift($regexps, $regexp); } if (empty($regexps)) return ''; if (isset($regexps[1])) { $regexp = \implode('|', $regexps); $regexp = ((self::canUseAtomicGrouping($regexp)) ? '(?>' : '(?:') . $regexp . ')'; } else $regexp = $regexps[0]; if ($endOfChain) $regexp = self::makeRegexpOptional($regexp); return $regexp; } protected static function makeRegexpOptional($regexp) { if (\preg_match('#^\\.\\+\\??$#', $regexp)) return \str_replace('+', '*', $regexp); if (\preg_match('#^(\\\\?.)((?:\\1\\?)+)$#Du', $regexp, $m)) return $m[1] . '?' . $m[2]; if (\preg_match('#^(?:[$^]|\\\\[bBAZzGQEK])$#', $regexp)) return ''; if (\preg_match('#^\\\\?.$#Dus', $regexp)) $isAtomic = \true; elseif (\preg_match('#^[^[(].#s', $regexp)) $isAtomic = \false; else { $def = RegexpParser::parse('#' . $regexp . '#'); $tokens = $def['tokens']; switch (\count($tokens)) { case 1: $startPos = $tokens[0]['pos']; $len = $tokens[0]['len']; $isAtomic = (bool) ($startPos === 0 && $len === \strlen($regexp)); if ($isAtomic && $tokens[0]['type'] === 'characterClass') { $regexp = \rtrim($regexp, '+*?'); if (!empty($tokens[0]['quantifiers']) && $tokens[0]['quantifiers'] !== '?') $regexp .= '*'; } break; case 2: if ($tokens[0]['type'] === 'nonCapturingSubpatternStart' && $tokens[1]['type'] === 'nonCapturingSubpatternEnd') { $startPos = $tokens[0]['pos']; $len = $tokens[1]['pos'] + $tokens[1]['len']; $isAtomic = (bool) ($startPos === 0 && $len === \strlen($regexp)); break; } default: $isAtomic = \false; } } if (!$isAtomic) $regexp = ((self::canUseAtomicGrouping($regexp)) ? '(?>' : '(?:') . $regexp . ')'; $regexp .= '?'; return $regexp; } protected static function generateCharacterClass(array $chars) { return self::$characterClassBuilder->fromList($chars); } protected static function canBeUsedInCharacterClass($char) { if (\preg_match('#^\\\\[aefnrtdDhHsSvVwW]$#D', $char)) return \true; if (\preg_match('#^\\\\[^A-Za-z0-9]$#Dus', $char)) return \true; if (\preg_match('#..#Dus', $char)) return \false; if (\preg_quote($char) !== $char && !\preg_match('#^[-!:<=>}]$#D', $char)) return \false; return \true; } protected static function optimizeDotChains(array &$chains) { $validAtoms = [ '\\d' => 1, '\\D' => 1, '\\h' => 1, '\\H' => 1, '\\s' => 1, '\\S' => 1, '\\v' => 1, '\\V' => 1, '\\w' => 1, '\\W' => 1, '\\^' => 1, '\\$' => 1, '\\.' => 1, '\\?' => 1, '\\[' => 1, '\\]' => 1, '\\(' => 1, '\\)' => 1, '\\+' => 1, '\\*' => 1, '\\\\' => 1 ]; do { $hasMoreDots = \false; foreach ($chains as $k1 => $dotChain) { $dotKeys = \array_keys($dotChain, '.?', \true); if (!empty($dotKeys)) { $dotChain[$dotKeys[0]] = '.'; $chains[$k1] = $dotChain; \array_splice($dotChain, $dotKeys[0], 1); $chains[] = $dotChain; if (isset($dotKeys[1])) $hasMoreDots = \true; } } } while ($hasMoreDots); foreach ($chains as $k1 => $dotChain) { $dotKeys = \array_keys($dotChain, '.', \true); if (empty($dotKeys)) continue; foreach ($chains as $k2 => $tmpChain) { if ($k2 === $k1) continue; foreach ($dotKeys as $dotKey) { if (!isset($tmpChain[$dotKey])) continue 2; if (!\preg_match('#^.$#Du', \preg_quote($tmpChain[$dotKey])) && !isset($validAtoms[$tmpChain[$dotKey]])) continue 2; $tmpChain[$dotKey] = '.'; } if ($tmpChain === $dotChain) unset($chains[$k2]); } } } protected static function optimizeCatchallChains(array &$chains) { $precedence = [ '.*' => 3, '.*?' => 2, '.+' => 1, '.+?' => 0 ]; $tails = []; foreach ($chains as $k => $chain) { if (!isset($chain[0])) continue; $head = $chain[0]; if (!isset($precedence[$head])) continue; $tail = \implode('', \array_slice($chain, 1)); if (!isset($tails[$tail]) || $precedence[$head] > $tails[$tail]['precedence']) $tails[$tail] = [ 'key' => $k, 'precedence' => $precedence[$head] ]; } $catchallChains = []; foreach ($tails as $tail => $info) $catchallChains[$info['key']] = $chains[$info['key']]; foreach ($catchallChains as $k1 => $catchallChain) { $headExpr = $catchallChain[0]; $tailExpr = \false; $match = \array_slice($catchallChain, 1); if (isset($catchallChain[1]) && isset($precedence[\end($catchallChain)])) $tailExpr = \array_pop($match); $matchCnt = \count($match); foreach ($chains as $k2 => $chain) { if ($k2 === $k1) continue; $start = 0; $end = \count($chain); if ($headExpr[1] === '+') { $found = \false; foreach ($chain as $start => $atom) if (self::matchesAtLeastOneCharacter($atom)) { $found = \true; break; } if (!$found) continue; } if ($tailExpr === \false) $end = $start; else { if ($tailExpr[1] === '+') { $found = \false; while (--$end > $start) if (self::matchesAtLeastOneCharacter($chain[$end])) { $found = \true; break; } if (!$found) continue; } $end -= $matchCnt; } while ($start <= $end) { if (\array_slice($chain, $start, $matchCnt) === $match) { unset($chains[$k2]); break; } ++$start; } } } } protected static function matchesAtLeastOneCharacter($expr) { if (\preg_match('#^[$*?^]$#', $expr)) return \false; if (\preg_match('#^.$#u', $expr)) return \true; if (\preg_match('#^.\\+#u', $expr)) return \true; if (\preg_match('#^\\\\[^bBAZzGQEK1-9](?![*?])#', $expr)) return \true; return \false; } protected static function canUseAtomicGrouping($expr) { if (\preg_match('#(?\\\\\\\\)*\\.#', $expr)) return \false; if (\preg_match('#(?\\\\\\\\)*[+*]#', $expr)) return \false; if (\preg_match('#(?\\\\\\\\)*\\(?(?\\\\\\\\)*\\\\[a-z0-9]#', $expr)) return \false; return \true; } } /* * @package s9e\TextFormatter * @copyright Copyright (c) 2010-2017 The s9e Authors * @license http://www.opensource.org/licenses/mit-license.php The MIT License */ namespace s9e\TextFormatter\Configurator\Helpers; use s9e\TextFormatter\Configurator\Collections\Ruleset; use s9e\TextFormatter\Configurator\Collections\TagCollection; abstract class RulesHelper { public static function getBitfields(TagCollection $tags, Ruleset $rootRules) { $rules = ['*root*' => \iterator_to_array($rootRules)]; foreach ($tags as $tagName => $tag) $rules[$tagName] = \iterator_to_array($tag->rules); $matrix = self::unrollRules($rules); self::pruneMatrix($matrix); $groupedTags = []; foreach (\array_keys($matrix) as $tagName) { if ($tagName === '*root*') continue; $k = ''; foreach ($matrix as $tagMatrix) { $k .= $tagMatrix['allowedChildren'][$tagName]; $k .= $tagMatrix['allowedDescendants'][$tagName]; } $groupedTags[$k][] = $tagName; } $bitTag = []; $bitNumber = 0; $tagsConfig = []; foreach ($groupedTags as $tagNames) { foreach ($tagNames as $tagName) { $tagsConfig[$tagName]['bitNumber'] = $bitNumber; $bitTag[$bitNumber] = $tagName; } ++$bitNumber; } foreach ($matrix as $tagName => $tagMatrix) { $allowedChildren = ''; $allowedDescendants = ''; foreach ($bitTag as $targetName) { $allowedChildren .= $tagMatrix['allowedChildren'][$targetName]; $allowedDescendants .= $tagMatrix['allowedDescendants'][$targetName]; } $tagsConfig[$tagName]['allowed'] = self::pack($allowedChildren, $allowedDescendants); } $return = [ 'root' => $tagsConfig['*root*'], 'tags' => $tagsConfig ]; unset($return['tags']['*root*']); return $return; } protected static function initMatrix(array $rules) { $matrix = []; $tagNames = \array_keys($rules); foreach ($rules as $tagName => $tagRules) { $matrix[$tagName]['allowedChildren'] = \array_fill_keys($tagNames, 0); $matrix[$tagName]['allowedDescendants'] = \array_fill_keys($tagNames, 0); } return $matrix; } protected static function applyTargetedRule(array &$matrix, $rules, $ruleName, $key, $value) { foreach ($rules as $tagName => $tagRules) { if (!isset($tagRules[$ruleName])) continue; foreach ($tagRules[$ruleName] as $targetName) $matrix[$tagName][$key][$targetName] = $value; } } protected static function unrollRules(array $rules) { $matrix = self::initMatrix($rules); $tagNames = \array_keys($rules); foreach ($rules as $tagName => $tagRules) { if (!empty($tagRules['ignoreTags'])) { $rules[$tagName]['denyChild'] = $tagNames; $rules[$tagName]['denyDescendant'] = $tagNames; } if (!empty($tagRules['requireParent'])) { $denyParents = \array_diff($tagNames, $tagRules['requireParent']); foreach ($denyParents as $parentName) $rules[$parentName]['denyChild'][] = $tagName; } } self::applyTargetedRule($matrix, $rules, 'allowChild', 'allowedChildren', 1); self::applyTargetedRule($matrix, $rules, 'allowDescendant', 'allowedDescendants', 1); self::applyTargetedRule($matrix, $rules, 'denyChild', 'allowedChildren', 0); self::applyTargetedRule($matrix, $rules, 'denyDescendant', 'allowedDescendants', 0); return $matrix; } protected static function pruneMatrix(array &$matrix) { $usableTags = ['*root*' => 1]; $parentTags = $usableTags; do { $nextTags = []; foreach (\array_keys($parentTags) as $tagName) $nextTags += \array_filter($matrix[$tagName]['allowedChildren']); $parentTags = \array_diff_key($nextTags, $usableTags); $parentTags = \array_intersect_key($parentTags, $matrix); $usableTags += $parentTags; } while (!empty($parentTags)); $matrix = \array_intersect_key($matrix, $usableTags); unset($usableTags['*root*']); foreach ($matrix as $tagName => &$tagMatrix) { $tagMatrix['allowedChildren'] = \array_intersect_key($tagMatrix['allowedChildren'], $usableTags); $tagMatrix['allowedDescendants'] = \array_intersect_key($tagMatrix['allowedDescendants'], $usableTags); } unset($tagMatrix); } protected static function pack($allowedChildren, $allowedDescendants) { $allowedChildren = \str_split($allowedChildren, 8); $allowedDescendants = \str_split($allowedDescendants, 8); $allowed = []; foreach (\array_keys($allowedChildren) as $k) $allowed[] = \bindec(\sprintf( '%1$08s%2$08s', \strrev($allowedDescendants[$k]), \strrev($allowedChildren[$k]) )); return $allowed; } } /* * @package s9e\TextFormatter * @copyright Copyright (c) 2010-2017 The s9e Authors * @license http://www.opensource.org/licenses/mit-license.php The MIT License */ namespace s9e\TextFormatter\Configurator\Helpers; use DOMAttr; use DOMCharacterData; use DOMDocument; use DOMElement; use DOMNode; use DOMProcessingInstruction; use DOMXPath; use RuntimeException; use s9e\TextFormatter\Configurator\Exceptions\InvalidXslException; use s9e\TextFormatter\Configurator\Helpers\RegexpBuilder; abstract class TemplateHelper { const XMLNS_XSL = 'http://www.w3.org/1999/XSL/Transform'; public static function getAttributesByRegexp(DOMDocument $dom, $regexp) { $xpath = new DOMXPath($dom); $nodes = []; foreach ($xpath->query('//@*') as $attribute) if (\preg_match($regexp, $attribute->name)) $nodes[] = $attribute; foreach ($xpath->query('//xsl:attribute') as $attribute) if (\preg_match($regexp, $attribute->getAttribute('name'))) $nodes[] = $attribute; foreach ($xpath->query('//xsl:copy-of') as $node) { $expr = $node->getAttribute('select'); if (\preg_match('/^@(\\w+)$/', $expr, $m) && \preg_match($regexp, $m[1])) $nodes[] = $node; } return $nodes; } public static function getCSSNodes(DOMDocument $dom) { $regexp = '/^style$/i'; $nodes = \array_merge( self::getAttributesByRegexp($dom, $regexp), self::getElementsByRegexp($dom, '/^style$/i') ); return $nodes; } public static function getElementsByRegexp(DOMDocument $dom, $regexp) { $xpath = new DOMXPath($dom); $nodes = []; foreach ($xpath->query('//*') as $element) if (\preg_match($regexp, $element->localName)) $nodes[] = $element; foreach ($xpath->query('//xsl:element') as $element) if (\preg_match($regexp, $element->getAttribute('name'))) $nodes[] = $element; foreach ($xpath->query('//xsl:copy-of') as $node) { $expr = $node->getAttribute('select'); if (\preg_match('/^\\w+$/', $expr) && \preg_match($regexp, $expr)) $nodes[] = $node; } return $nodes; } public static function getJSNodes(DOMDocument $dom) { $regexp = '/^(?>data-s9e-livepreview-postprocess$|on)/i'; $nodes = \array_merge( self::getAttributesByRegexp($dom, $regexp), self::getElementsByRegexp($dom, '/^script$/i') ); return $nodes; } public static function getObjectParamsByRegexp(DOMDocument $dom, $regexp) { $xpath = new DOMXPath($dom); $nodes = []; foreach (self::getAttributesByRegexp($dom, $regexp) as $attribute) if ($attribute->nodeType === \XML_ATTRIBUTE_NODE) { if (\strtolower($attribute->parentNode->localName) === 'embed') $nodes[] = $attribute; } elseif ($xpath->evaluate('ancestor::embed', $attribute)) $nodes[] = $attribute; foreach ($dom->getElementsByTagName('object') as $object) foreach ($object->getElementsByTagName('param') as $param) if (\preg_match($regexp, $param->getAttribute('name'))) $nodes[] = $param; return $nodes; } public static function getParametersFromXSL($xsl) { $paramNames = []; $xsl = '' . $xsl . ''; $dom = new DOMDocument; $dom->loadXML($xsl); $xpath = new DOMXPath($dom); $query = '//xsl:*/@match | //xsl:*/@select | //xsl:*/@test'; foreach ($xpath->query($query) as $attribute) foreach (XPathHelper::getVariables($attribute->value) as $varName) { $varQuery = 'ancestor-or-self::*/preceding-sibling::xsl:variable[@name="' . $varName . '"]'; if (!$xpath->query($varQuery, $attribute)->length) $paramNames[] = $varName; } $query = '//*[namespace-uri() != "' . self::XMLNS_XSL . '"]/@*[contains(., "{")]'; foreach ($xpath->query($query) as $attribute) { $tokens = AVTHelper::parse($attribute->value); foreach ($tokens as $token) { if ($token[0] !== 'expression') continue; foreach (XPathHelper::getVariables($token[1]) as $varName) { $varQuery = 'ancestor-or-self::*/preceding-sibling::xsl:variable[@name="' . $varName . '"]'; if (!$xpath->query($varQuery, $attribute)->length) $paramNames[] = $varName; } } } $paramNames = \array_unique($paramNames); \sort($paramNames); return $paramNames; } public static function getURLNodes(DOMDocument $dom) { $regexp = '/(?>^(?>action|background|c(?>ite|lassid|odebase)|data|formaction|href|icon|longdesc|manifest|p(?>luginspage|oster|rofile)|usemap)|src)$/i'; $nodes = self::getAttributesByRegexp($dom, $regexp); foreach (self::getObjectParamsByRegexp($dom, '/^(?:dataurl|movie)$/i') as $param) { $node = $param->getAttributeNode('value'); if ($node) $nodes[] = $node; } return $nodes; } public static function highlightNode(DOMNode $node, $prepend, $append) { $uniqid = \uniqid('_'); if ($node instanceof DOMAttr) $node->value .= $uniqid; elseif ($node instanceof DOMElement) $node->setAttribute($uniqid, ''); elseif ($node instanceof DOMCharacterData || $node instanceof DOMProcessingInstruction) $node->data .= $uniqid; $dom = $node->ownerDocument; $dom->formatOutput = \true; $docXml = self::innerXML($dom->documentElement); $docXml = \trim(\str_replace("\n ", "\n", $docXml)); $nodeHtml = \htmlspecialchars(\trim($dom->saveXML($node))); $docHtml = \htmlspecialchars($docXml); $html = \str_replace($nodeHtml, $prepend . $nodeHtml . $append, $docHtml); if ($node instanceof DOMAttr) { $node->value = \substr($node->value, 0, -\strlen($uniqid)); $html = \str_replace($uniqid, '', $html); } elseif ($node instanceof DOMElement) { $node->removeAttribute($uniqid); $html = \str_replace(' ' . $uniqid . '=""', '', $html); } elseif ($node instanceof DOMCharacterData || $node instanceof DOMProcessingInstruction) { $node->data .= $uniqid; $html = \str_replace($uniqid, '', $html); } return $html; } public static function loadTemplate($template) { $dom = self::loadTemplateAsXML($template); if ($dom) return $dom; $dom = self::loadTemplateAsXML(self::fixEntities($template)); if ($dom) return $dom; if (\strpos($template, 'message); } return self::loadTemplateAsHTML($template); } public static function replaceHomogeneousTemplates(array &$templates, $minCount = 3) { $tagNames = []; $expr = 'name()'; foreach ($templates as $tagName => $template) { $elName = \strtolower(\preg_replace('/^[^:]+:/', '', $tagName)); if ($template === '<' . $elName . '>') { $tagNames[] = $tagName; if (\strpos($tagName, ':') !== \false) $expr = 'local-name()'; } } if (\count($tagNames) < $minCount) return; $chars = \preg_replace('/[^A-Z]+/', '', \count_chars(\implode('', $tagNames), 3)); if (\is_string($chars) && $chars !== '') $expr = 'translate(' . $expr . ",'" . $chars . "','" . \strtolower($chars) . "')"; $template = ''; foreach ($tagNames as $tagName) $templates[$tagName] = $template; } public static function replaceTokens($template, $regexp, $fn) { if ($template === '') return $template; $dom = self::loadTemplate($template); $xpath = new DOMXPath($dom); foreach ($xpath->query('//@*') as $attribute) { $attrValue = \preg_replace_callback( $regexp, function ($m) use ($fn, $attribute) { $replacement = $fn($m, $attribute); if ($replacement[0] === 'expression') return '{' . $replacement[1] . '}'; elseif ($replacement[0] === 'passthrough') return '{.}'; else return $replacement[1]; }, $attribute->value ); $attribute->value = \htmlspecialchars($attrValue, \ENT_COMPAT, 'UTF-8'); } foreach ($xpath->query('//text()') as $node) { \preg_match_all( $regexp, $node->textContent, $matches, \PREG_SET_ORDER | \PREG_OFFSET_CAPTURE ); if (empty($matches)) continue; $parentNode = $node->parentNode; $lastPos = 0; foreach ($matches as $m) { $pos = $m[0][1]; if ($pos > $lastPos) $parentNode->insertBefore( $dom->createTextNode( \substr($node->textContent, $lastPos, $pos - $lastPos) ), $node ); $lastPos = $pos + \strlen($m[0][0]); $_m = []; foreach ($m as $capture) $_m[] = $capture[0]; $replacement = $fn($_m, $node); if ($replacement[0] === 'expression') $parentNode ->insertBefore( $dom->createElementNS(self::XMLNS_XSL, 'xsl:value-of'), $node ) ->setAttribute('select', $replacement[1]); elseif ($replacement[0] === 'passthrough') $parentNode->insertBefore( $dom->createElementNS(self::XMLNS_XSL, 'xsl:apply-templates'), $node ); else $parentNode->insertBefore($dom->createTextNode($replacement[1]), $node); } $text = \substr($node->textContent, $lastPos); if ($text > '') $parentNode->insertBefore($dom->createTextNode($text), $node); $parentNode->removeChild($node); } return self::saveTemplate($dom); } public static function saveTemplate(DOMDocument $dom) { return self::innerXML($dom->documentElement); } protected static function fixEntities($template) { return \preg_replace_callback( '(&(?!quot;|amp;|apos;|lt;|gt;)\\w+;)', function ($m) { return \html_entity_decode($m[0], \ENT_NOQUOTES, 'UTF-8'); }, \preg_replace('(&(?![A-Za-z0-9]+;|#\\d+;|#x[A-Fa-f0-9]+;))', '&', $template) ); } protected static function innerXML(DOMElement $element) { $xml = $element->ownerDocument->saveXML($element); $pos = 1 + \strpos($xml, '>'); $len = \strrpos($xml, '<') - $pos; if ($len < 1) return ''; $xml = \substr($xml, $pos, $len); return $xml; } protected static function loadTemplateAsHTML($template) { $dom = new DOMDocument; $html = '
' . $template . '
'; $useErrors = \libxml_use_internal_errors(\true); $dom->loadHTML($html); self::removeInvalidAttributes($dom); \libxml_use_internal_errors($useErrors); $xml = '' . self::innerXML($dom->documentElement->firstChild->firstChild) . ''; $useErrors = \libxml_use_internal_errors(\true); $dom->loadXML($xml); \libxml_use_internal_errors($useErrors); return $dom; } protected static function loadTemplateAsXML($template) { $xml = '' . $template . ''; $useErrors = \libxml_use_internal_errors(\true); $dom = new DOMDocument; $success = $dom->loadXML($xml); self::removeInvalidAttributes($dom); \libxml_use_internal_errors($useErrors); return ($success) ? $dom : \false; } protected static function removeInvalidAttributes(DOMDocument $dom) { $xpath = new DOMXPath($dom); foreach ($xpath->query('//@*') as $attribute) if (!\preg_match('(^(?:[-\\w]+:)?(?!\\d)[-\\w]+$)D', $attribute->nodeName)) $attribute->parentNode->removeAttributeNode($attribute); } } /* * @package s9e\TextFormatter * @copyright Copyright (c) 2010-2017 The s9e Authors * @license http://www.opensource.org/licenses/mit-license.php The MIT License */ namespace s9e\TextFormatter\Configurator\Helpers; use DOMElement; use DOMXPath; class TemplateInspector { const XMLNS_XSL = 'http://www.w3.org/1999/XSL/Transform'; protected $allowChildBitfields = []; protected $allowsChildElements; protected $allowsText; protected $branches; protected $contentBitfield = "\0"; protected $defaultBranchBitfield; protected $denyDescendantBitfield = "\0"; protected $dom; protected $hasElements = \false; protected $hasRootText; protected $isBlock = \false; protected $isEmpty; protected $isFormattingElement; protected $isPassthrough = \false; protected $isTransparent = \false; protected $isVoid; protected $leafNodes = []; protected $preservesNewLines = \false; protected $rootBitfields = []; protected $rootNodes = []; protected $xpath; public function __construct($template) { $this->dom = TemplateHelper::loadTemplate($template); $this->xpath = new DOMXPath($this->dom); $this->defaultBranchBitfield = ElementInspector::getAllowChildBitfield($this->dom->createElement('div')); $this->analyseRootNodes(); $this->analyseBranches(); $this->analyseContent(); } public function allowsChild(TemplateInspector $child) { if (!$this->allowsDescendant($child)) return \false; foreach ($child->rootBitfields as $rootBitfield) foreach ($this->allowChildBitfields as $allowChildBitfield) if (!self::match($rootBitfield, $allowChildBitfield)) return \false; return ($this->allowsText || !$child->hasRootText); } public function allowsDescendant(TemplateInspector $descendant) { if (self::match($descendant->contentBitfield, $this->denyDescendantBitfield)) return \false; return ($this->allowsChildElements || !$descendant->hasElements); } public function allowsChildElements() { return $this->allowsChildElements; } public function allowsText() { return $this->allowsText; } public function closesParent(TemplateInspector $parent) { foreach ($this->rootNodes as $rootNode) foreach ($parent->leafNodes as $leafNode) if (ElementInspector::closesParent($leafNode, $rootNode)) return \true; return \false; } public function evaluate($expr, DOMElement $node = \null) { return $this->xpath->evaluate($expr, $node); } public function isBlock() { return $this->isBlock; } public function isFormattingElement() { return $this->isFormattingElement; } public function isEmpty() { return $this->isEmpty; } public function isPassthrough() { return $this->isPassthrough; } public function isTransparent() { return $this->isTransparent; } public function isVoid() { return $this->isVoid; } public function preservesNewLines() { return $this->preservesNewLines; } protected function analyseContent() { $query = '//*[namespace-uri() != "' . self::XMLNS_XSL . '"]'; foreach ($this->xpath->query($query) as $node) { $this->contentBitfield |= ElementInspector::getCategoryBitfield($node); $this->hasElements = \true; } $this->isPassthrough = (bool) $this->evaluate('count(//xsl:apply-templates)'); } protected function analyseRootNodes() { $query = '//*[namespace-uri() != "' . self::XMLNS_XSL . '"][not(ancestor::*[namespace-uri() != "' . self::XMLNS_XSL . '"])]'; foreach ($this->xpath->query($query) as $node) { $this->rootNodes[] = $node; if ($this->elementIsBlock($node)) $this->isBlock = \true; $this->rootBitfields[] = ElementInspector::getCategoryBitfield($node); } $predicate = '[not(ancestor::*[namespace-uri() != "' . self::XMLNS_XSL . '"])]'; $predicate .= '[not(ancestor::xsl:attribute | ancestor::xsl:comment | ancestor::xsl:variable)]'; $query = '//text()[normalize-space() != ""]' . $predicate . '|//xsl:text[normalize-space() != ""]' . $predicate . '|//xsl:value-of' . $predicate; $this->hasRootText = (bool) $this->evaluate('count(' . $query . ')'); } protected function analyseBranches() { $this->branches = []; foreach ($this->xpath->query('//xsl:apply-templates') as $applyTemplates) { $query = 'ancestor::*[namespace-uri() != "' . self::XMLNS_XSL . '"]'; $this->branches[] = \iterator_to_array($this->xpath->query($query, $applyTemplates)); } $this->computeAllowsChildElements(); $this->computeAllowsText(); $this->computeBitfields(); $this->computeFormattingElement(); $this->computeIsEmpty(); $this->computeIsTransparent(); $this->computeIsVoid(); $this->computePreservesNewLines(); $this->storeLeafNodes(); } protected function anyBranchHasProperty($methodName) { foreach ($this->branches as $branch) foreach ($branch as $element) if (ElementInspector::$methodName($element)) return \true; return \false; } protected function computeBitfields() { if (empty($this->branches)) { $this->allowChildBitfields = ["\0"]; return; } foreach ($this->branches as $branch) { $branchBitfield = $this->defaultBranchBitfield; foreach ($branch as $element) { if (!ElementInspector::isTransparent($element)) $branchBitfield = "\0"; $branchBitfield |= ElementInspector::getAllowChildBitfield($element); $this->denyDescendantBitfield |= ElementInspector::getDenyDescendantBitfield($element); } $this->allowChildBitfields[] = $branchBitfield; } } protected function computeAllowsChildElements() { $this->allowsChildElements = ($this->anyBranchHasProperty('isTextOnly')) ? \false : !empty($this->branches); } protected function computeAllowsText() { foreach (\array_filter($this->branches) as $branch) if (ElementInspector::disallowsText(\end($branch))) { $this->allowsText = \false; return; } $this->allowsText = \true; } protected function computeFormattingElement() { foreach ($this->branches as $branch) foreach ($branch as $element) if (!ElementInspector::isFormattingElement($element) && !$this->isFormattingSpan($element)) { $this->isFormattingElement = \false; return; } $this->isFormattingElement = (bool) \count(\array_filter($this->branches)); } protected function computeIsEmpty() { $this->isEmpty = ($this->anyBranchHasProperty('isEmpty')) || empty($this->branches); } protected function computeIsTransparent() { foreach ($this->branches as $branch) foreach ($branch as $element) if (!ElementInspector::isTransparent($element)) { $this->isTransparent = \false; return; } $this->isTransparent = !empty($this->branches); } protected function computeIsVoid() { $this->isVoid = ($this->anyBranchHasProperty('isVoid')) || empty($this->branches); } protected function computePreservesNewLines() { foreach ($this->branches as $branch) { $style = ''; foreach ($branch as $element) $style .= $this->getStyle($element, \true); if (\preg_match('(.*white-space\\s*:\\s*(no|pre))is', $style, $m) && \strtolower($m[1]) === 'pre') { $this->preservesNewLines = \true; return; } } $this->preservesNewLines = \false; } protected function elementIsBlock(DOMElement $element) { $style = $this->getStyle($element); if (\preg_match('(\\bdisplay\\s*:\\s*block)i', $style)) return \true; if (\preg_match('(\\bdisplay\\s*:\\s*(?:inli|no)ne)i', $style)) return \false; return ElementInspector::isBlock($element); } protected function getStyle(DOMElement $node, $deep = \false) { $style = ''; if (ElementInspector::preservesWhitespace($node)) $style .= 'white-space:pre;'; $style .= $node->getAttribute('style'); $query = (($deep) ? './/' : './') . 'xsl:attribute[@name="style"]'; foreach ($this->xpath->query($query, $node) as $attribute) $style .= ';' . $attribute->textContent; return $style; } protected function isFormattingSpan(DOMElement $node) { if ($node->nodeName !== 'span') return \false; if ($node->getAttribute('class') === '' && $node->getAttribute('style') === '') return \false; foreach ($node->attributes as $attrName => $attribute) if ($attrName !== 'class' && $attrName !== 'style') return \false; return \true; } protected function storeLeafNodes() { foreach (\array_filter($this->branches) as $branch) $this->leafNodes[] = \end($branch); } protected static function match($bitfield1, $bitfield2) { return (\trim($bitfield1 & $bitfield2, "\0") !== ''); } } /* * @package s9e\TextFormatter * @copyright Copyright (c) 2010-2017 The s9e Authors * @license http://www.opensource.org/licenses/mit-license.php The MIT License */ namespace s9e\TextFormatter\Configurator\Helpers; use DOMDocument; use DOMElement; use DOMNode; use DOMXPath; use RuntimeException; class TemplateParser { const XMLNS_XSL = 'http://www.w3.org/1999/XSL/Transform'; public static $voidRegexp = '/^(?:area|base|br|col|command|embed|hr|img|input|keygen|link|meta|param|source|track|wbr)$/Di'; public static function parse($template) { $xsl = '' . $template . ''; $dom = new DOMDocument; $dom->loadXML($xsl); $ir = new DOMDocument; $ir->loadXML('