Нет описания

rebuildParser.php 6.6KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219
  1. <?php
  2. $grammarFile = __DIR__ . '/zend_language_parser.phpy';
  3. $skeletonFile = __DIR__ . '/kmyacc.php.parser';
  4. $tmpGrammarFile = __DIR__ . '/tmp_parser.phpy';
  5. $tmpResultFile = __DIR__ . '/tmp_parser.php';
  6. $parserResultFile = __DIR__ . '/../lib/PhpParser/Parser.php';
  7. // check for kmyacc.exe binary in this directory, otherwise fall back to global name
  8. $kmyacc = __DIR__ . '/kmyacc.exe';
  9. if (!file_exists($kmyacc)) {
  10. $kmyacc = 'kmyacc';
  11. }
  12. $options = array_flip($argv);
  13. $optionDebug = isset($options['--debug']);
  14. $optionKeepTmpGrammar = isset($options['--keep-tmp-grammar']);
  15. ///////////////////////////////
  16. /// Utility regex constants ///
  17. ///////////////////////////////
  18. const LIB = '(?(DEFINE)
  19. (?<singleQuotedString>\'[^\\\\\']*+(?:\\\\.[^\\\\\']*+)*+\')
  20. (?<doubleQuotedString>"[^\\\\"]*+(?:\\\\.[^\\\\"]*+)*+")
  21. (?<string>(?&singleQuotedString)|(?&doubleQuotedString))
  22. (?<comment>/\*[^*]*+(?:\*(?!/)[^*]*+)*+\*/)
  23. (?<code>\{[^\'"/{}]*+(?:(?:(?&string)|(?&comment)|(?&code)|/)[^\'"/{}]*+)*+})
  24. )';
  25. const PARAMS = '\[(?<params>[^[\]]*+(?:\[(?&params)\][^[\]]*+)*+)\]';
  26. const ARGS = '\((?<args>[^()]*+(?:\((?&args)\)[^()]*+)*+)\)';
  27. ///////////////////
  28. /// Main script ///
  29. ///////////////////
  30. echo 'Building temporary preproprocessed grammar file.', "\n";
  31. $grammarCode = file_get_contents($grammarFile);
  32. $grammarCode = resolveConstants($grammarCode);
  33. $grammarCode = resolveNodes($grammarCode);
  34. $grammarCode = resolveMacros($grammarCode);
  35. $grammarCode = resolveArrays($grammarCode);
  36. file_put_contents($tmpGrammarFile, $grammarCode);
  37. $additionalArgs = $optionDebug ? '-t -v' : '';
  38. echo "Building parser.\n";
  39. $output = trim(shell_exec("$kmyacc $additionalArgs -l -m $skeletonFile $tmpGrammarFile 2>&1"));
  40. echo "Output: \"$output\"\n";
  41. moveFileWithDirCheck($tmpResultFile, $parserResultFile);
  42. if (!$optionKeepTmpGrammar) {
  43. unlink($tmpGrammarFile);
  44. }
  45. ///////////////////////////////
  46. /// Preprocessing functions ///
  47. ///////////////////////////////
  48. function resolveConstants($code) {
  49. return preg_replace('~[A-Z][a-zA-Z_\\\\]++::~', 'Node\\\\$0', $code);
  50. }
  51. function resolveNodes($code) {
  52. return preg_replace_callback(
  53. '~(?<name>[A-Z][a-zA-Z_\\\\]++)\s*' . PARAMS . '~',
  54. function($matches) {
  55. // recurse
  56. $matches['params'] = resolveNodes($matches['params']);
  57. $params = magicSplit(
  58. '(?:' . PARAMS . '|' . ARGS . ')(*SKIP)(*FAIL)|,',
  59. $matches['params']
  60. );
  61. $paramCode = '';
  62. foreach ($params as $param) {
  63. $paramCode .= $param . ', ';
  64. }
  65. return 'new Node\\' . $matches['name'] . '(' . $paramCode . '$attributes)';
  66. },
  67. $code
  68. );
  69. }
  70. function resolveMacros($code) {
  71. return preg_replace_callback(
  72. '~\b(?<!::|->)(?!array\()(?<name>[a-z][A-Za-z]++)' . ARGS . '~',
  73. function($matches) {
  74. // recurse
  75. $matches['args'] = resolveMacros($matches['args']);
  76. $name = $matches['name'];
  77. $args = magicSplit(
  78. '(?:' . PARAMS . '|' . ARGS . ')(*SKIP)(*FAIL)|,',
  79. $matches['args']
  80. );
  81. if ('error' == $name) {
  82. assertArgs(1, $args, $name);
  83. return 'throw new Error(' . $args[0] . ')';
  84. }
  85. if ('init' == $name) {
  86. return '$$ = array(' . implode(', ', $args) . ')';
  87. }
  88. if ('push' == $name) {
  89. assertArgs(2, $args, $name);
  90. return $args[0] . '[] = ' . $args[1] . '; $$ = ' . $args[0];
  91. }
  92. if ('pushNormalizing' == $name) {
  93. assertArgs(2, $args, $name);
  94. return 'if (is_array(' . $args[1] . ')) { $$ = array_merge(' . $args[0] . ', ' . $args[1] . '); } else { ' . $args[0] . '[] = ' . $args[1] . '; $$ = ' . $args[0] . '; }';
  95. }
  96. if ('toArray' == $name) {
  97. assertArgs(1, $args, $name);
  98. return 'is_array(' . $args[0] . ') ? ' . $args[0] . ' : array(' . $args[0] . ')';
  99. }
  100. if ('parseVar' == $name) {
  101. assertArgs(1, $args, $name);
  102. return 'substr(' . $args[0] . ', 1)';
  103. }
  104. if ('parseEncapsed' == $name) {
  105. assertArgs(2, $args, $name);
  106. return 'foreach (' . $args[0] . ' as &$s) { if (is_string($s)) { $s = Node\Scalar\String::parseEscapeSequences($s, ' . $args[1] . '); } }';
  107. }
  108. if ('parseEncapsedDoc' == $name) {
  109. assertArgs(1, $args, $name);
  110. return 'foreach (' . $args[0] . ' as &$s) { if (is_string($s)) { $s = Node\Scalar\String::parseEscapeSequences($s, null); } } $s = preg_replace(\'~(\r\n|\n|\r)$~\', \'\', $s); if (\'\' === $s) array_pop(' . $args[0] . ');';
  111. }
  112. throw new Exception(sprintf('Unknown macro "%s"', $name));
  113. },
  114. $code
  115. );
  116. }
  117. function assertArgs($num, $args, $name) {
  118. if ($num != count($args)) {
  119. die('Wrong argument count for ' . $name . '().');
  120. }
  121. }
  122. function resolveArrays($code) {
  123. return preg_replace_callback(
  124. '~' . PARAMS . '~',
  125. function ($matches) {
  126. $elements = magicSplit(
  127. '(?:' . PARAMS . '|' . ARGS . ')(*SKIP)(*FAIL)|,',
  128. $matches['params']
  129. );
  130. // don't convert [] to array, it might have different meaning
  131. if (empty($elements)) {
  132. return $matches[0];
  133. }
  134. $elementCodes = array();
  135. foreach ($elements as $element) {
  136. // convert only arrays where all elements have keys
  137. if (false === strpos($element, ':')) {
  138. return $matches[0];
  139. }
  140. list($key, $value) = explode(':', $element, 2);
  141. $elementCodes[] = "'" . $key . "' =>" . $value;
  142. }
  143. return 'array(' . implode(', ', $elementCodes) . ')';
  144. },
  145. $code
  146. );
  147. }
  148. function moveFileWithDirCheck($fromPath, $toPath) {
  149. $dir = dirname($toPath);
  150. if (!is_dir($dir)) {
  151. mkdir($dir, 0777, true);
  152. }
  153. rename($fromPath, $toPath);
  154. }
  155. //////////////////////////////
  156. /// Regex helper functions ///
  157. //////////////////////////////
  158. function regex($regex) {
  159. return '~' . LIB . '(?:' . str_replace('~', '\~', $regex) . ')~';
  160. }
  161. function magicSplit($regex, $string) {
  162. $pieces = preg_split(regex('(?:(?&string)|(?&comment)|(?&code))(*SKIP)(*FAIL)|' . $regex), $string);
  163. foreach ($pieces as &$piece) {
  164. $piece = trim($piece);
  165. }
  166. return array_filter($pieces);
  167. }