菜谱项目

build-manual 8.7KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314
  1. #!/usr/bin/env php
  2. <?php
  3. /*
  4. * This file is part of Psy Shell.
  5. *
  6. * (c) 2012-2017 Justin Hileman
  7. *
  8. * For the full copyright and license information, please view the LICENSE
  9. * file that was distributed with this source code.
  10. */
  11. define('WRAP_WIDTH', 100);
  12. $count = 0;
  13. if (count($argv) !== 3 || !is_dir($argv[1])) {
  14. echo "usage: build_manual path/to/manual output_filename.db\n";
  15. exit(1);
  16. }
  17. function htmlwrap($text, $width = null)
  18. {
  19. if ($width === null) {
  20. $width = WRAP_WIDTH;
  21. }
  22. $len = strlen($text);
  23. $return = array();
  24. $lastSpace = null;
  25. $inTag = false;
  26. $i = $tagWidth = 0;
  27. do {
  28. switch (substr($text, $i, 1)) {
  29. case "\n":
  30. $return[] = trim(substr($text, 0, $i));
  31. $text = substr($text, $i);
  32. $len = strlen($text);
  33. $i = $lastSpace = 0;
  34. continue;
  35. case ' ':
  36. if (!$inTag) {
  37. $lastSpace = $i;
  38. }
  39. break;
  40. case '<':
  41. $inTag = true;
  42. break;
  43. case '>':
  44. $inTag = false;
  45. default:
  46. }
  47. if ($inTag) {
  48. $tagWidth++;
  49. }
  50. $i++;
  51. if (!$inTag && ($i - $tagWidth > $width)) {
  52. $lastSpace = $lastSpace ?: $width;
  53. $return[] = trim(substr($text, 0, $lastSpace));
  54. $text = substr($text, $lastSpace);
  55. $len = strlen($text);
  56. $i = $tagWidth = 0;
  57. }
  58. } while ($i < $len);
  59. $return[] = trim($text);
  60. return implode("\n", $return);
  61. }
  62. function extract_paragraphs($element)
  63. {
  64. $paragraphs = array();
  65. foreach ($element->getElementsByTagName('para') as $p) {
  66. $text = '';
  67. foreach ($p->childNodes as $child) {
  68. // @todo figure out if there's something we can do with tables.
  69. if ($child instanceof DOMElement && $child->tagName === 'table') {
  70. continue;
  71. }
  72. // skip references, because ugh.
  73. if (preg_match('{^\s*&[a-z][a-z\.]+;\s*$}', $child->textContent)) {
  74. continue;
  75. }
  76. $text .= $child->ownerDocument->saveXML($child);
  77. }
  78. if ($text = trim(preg_replace('{\n[ \t]+}', ' ', $text))) {
  79. $paragraphs[] = $text;
  80. }
  81. }
  82. return implode("\n\n", $paragraphs);
  83. }
  84. function format_doc($doc)
  85. {
  86. $chunks = array();
  87. if (!empty($doc['description'])) {
  88. $chunks[] = '<comment>Description:</comment>';
  89. $chunks[] = indent_text(htmlwrap(thunk_tags($doc['description']), WRAP_WIDTH - 2));
  90. $chunks[] = '';
  91. }
  92. if (!empty($doc['params'])) {
  93. $chunks[] = '<comment>Param:</comment>';
  94. $typeMax = max(array_map(function ($param) {
  95. return strlen($param['type']);
  96. }, $doc['params']));
  97. $max = max(array_map(function ($param) {
  98. return strlen($param['name']);
  99. }, $doc['params']));
  100. $template = ' <info>%-' . $typeMax . 's</info> <strong>%-' . $max . 's</strong> %s';
  101. $indent = str_repeat(' ', $typeMax + $max + 6);
  102. $wrapWidth = WRAP_WIDTH - strlen($indent);
  103. foreach ($doc['params'] as $param) {
  104. $desc = indent_text(htmlwrap(thunk_tags($param['description']), $wrapWidth), $indent, false);
  105. $chunks[] = sprintf($template, $param['type'], $param['name'], $desc);
  106. }
  107. $chunks[] = '';
  108. }
  109. if (isset($doc['return']) || isset($doc['return_type'])) {
  110. $chunks[] = '<comment>Return:</comment>';
  111. $type = isset($doc['return_type']) ? $doc['return_type'] : 'unknown';
  112. $desc = isset($doc['return']) ? $doc['return'] : '';
  113. $indent = str_repeat(' ', strlen($type) + 4);
  114. $wrapWidth = WRAP_WIDTH - strlen($indent);
  115. if (!empty($desc)) {
  116. $desc = indent_text(htmlwrap(thunk_tags($doc['return']), $wrapWidth), $indent, false);
  117. }
  118. $chunks[] = sprintf(' <info>%s</info> %s', $type, $desc);
  119. $chunks[] = '';
  120. }
  121. array_pop($chunks); // get rid of the trailing newline
  122. return implode("\n", $chunks);
  123. }
  124. function thunk_tags($text)
  125. {
  126. $tagMap = array(
  127. 'parameter>' => 'strong>',
  128. 'function>' => 'strong>',
  129. 'literal>' => 'return>',
  130. 'type>' => 'info>',
  131. 'constant>' => 'info>',
  132. );
  133. $andBack = array(
  134. '&amp;' => '&',
  135. '&amp;true;' => '<return>true</return>',
  136. '&amp;false;' => '<return>false</return>',
  137. '&amp;null;' => '<return>null</return>',
  138. );
  139. return strtr(strip_tags(strtr($text, $tagMap), '<strong><return><info>'), $andBack);
  140. }
  141. function indent_text($text, $indent = ' ', $leading = true)
  142. {
  143. return ($leading ? $indent : '') . str_replace("\n", "\n" . $indent, $text);
  144. }
  145. function find_type($xml, $paramName)
  146. {
  147. foreach ($xml->getElementsByTagName('methodparam') as $param) {
  148. if ($type = $param->getElementsByTagName('type')->item(0)) {
  149. if ($parameter = $param->getElementsByTagName('parameter')->item(0)) {
  150. if ($paramName === $parameter->textContent) {
  151. return $type->textContent;
  152. }
  153. }
  154. }
  155. }
  156. }
  157. function format_function_doc($xml)
  158. {
  159. $doc = array();
  160. $refsect1s = $xml->getElementsByTagName('refsect1');
  161. foreach ($refsect1s as $refsect1) {
  162. $role = $refsect1->getAttribute('role');
  163. switch ($role) {
  164. case 'description':
  165. $doc['description'] = extract_paragraphs($refsect1);
  166. if ($synopsis = $refsect1->getElementsByTagName('methodsynopsis')->item(0)) {
  167. foreach ($synopsis->childNodes as $node) {
  168. if ($node instanceof DOMElement && $node->tagName === 'type') {
  169. $doc['return_type'] = $node->textContent;
  170. break;
  171. }
  172. }
  173. }
  174. break;
  175. case 'returnvalues':
  176. // do nothing.
  177. $doc['return'] = extract_paragraphs($refsect1);
  178. break;
  179. case 'parameters':
  180. $params = array();
  181. $vars = $refsect1->getElementsByTagName('varlistentry');
  182. foreach ($vars as $var) {
  183. if ($name = $var->getElementsByTagName('parameter')->item(0)) {
  184. $params[] = array(
  185. 'name' => '$' . $name->textContent,
  186. 'type' => find_type($xml, $name->textContent),
  187. 'description' => extract_paragraphs($var),
  188. );
  189. }
  190. }
  191. $doc['params'] = $params;
  192. break;
  193. }
  194. }
  195. // and the purpose
  196. if ($purpose = $xml->getElementsByTagName('refpurpose')->item(0)) {
  197. $desc = htmlwrap($purpose->textContent);
  198. if (isset($doc['description'])) {
  199. $desc .= "\n\n" . $doc['description'];
  200. }
  201. $doc['description'] = trim($desc);
  202. }
  203. $ids = array();
  204. foreach ($xml->getElementsByTagName('refname') as $ref) {
  205. $ids[] = $ref->textContent;
  206. }
  207. return array($ids, format_doc($doc));
  208. }
  209. function format_class_doc($xml)
  210. {
  211. // @todo implement this
  212. return array(array(), null);
  213. }
  214. $dir = new RecursiveDirectoryIterator($argv[1]);
  215. $filter = new RecursiveCallbackFilterIterator($dir, function ($current, $key, $iterator) {
  216. return $current->getFilename()[0] !== '.' &&
  217. ($current->isDir() || $current->getExtension() === 'xml') &&
  218. strpos($current->getFilename(), 'entities.') !== 0 &&
  219. $current->getFilename() !== 'pdo_4d'; // Temporarily blacklist this one, the docs are weird.
  220. });
  221. $iterator = new RecursiveIteratorIterator($filter);
  222. $docs = array();
  223. foreach ($iterator as $file) {
  224. $xmlstr = str_replace('&', '&amp;', file_get_contents($file));
  225. $xml = new DOMDocument();
  226. $xml->preserveWhiteSpace = false;
  227. if (!@$xml->loadXml($xmlstr)) {
  228. echo "XML Parse Error: $file\n";
  229. continue;
  230. }
  231. if ($xml->getElementsByTagName('refentry')->length !== 0) {
  232. list($ids, $doc) = format_function_doc($xml);
  233. } elseif ($xml->getElementsByTagName('classref')->length !== 0) {
  234. list($ids, $doc) = format_class_doc($xml);
  235. } else {
  236. $ids = array();
  237. $doc = null;
  238. }
  239. foreach ($ids as $id) {
  240. $docs[$id] = $doc;
  241. }
  242. }
  243. if (is_file($argv[2])) {
  244. unlink($argv[2]);
  245. }
  246. $db = new PDO('sqlite:' . $argv[2]);
  247. $db->query('CREATE TABLE php_manual (id char(256) PRIMARY KEY, doc TEXT)');
  248. $cmd = $db->prepare('INSERT INTO php_manual (id, doc) VALUES (?, ?)');
  249. foreach ($docs as $id => $doc) {
  250. $cmd->execute(array($id, $doc));
  251. }