123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314 |
- #!/usr/bin/env php
- <?php
- /*
- * This file is part of Psy Shell.
- *
- * (c) 2012-2017 Justin Hileman
- *
- * For the full copyright and license information, please view the LICENSE
- * file that was distributed with this source code.
- */
- define('WRAP_WIDTH', 100);
- $count = 0;
- if (count($argv) !== 3 || !is_dir($argv[1])) {
- echo "usage: build_manual path/to/manual output_filename.db\n";
- exit(1);
- }
- function htmlwrap($text, $width = null)
- {
- if ($width === null) {
- $width = WRAP_WIDTH;
- }
- $len = strlen($text);
- $return = array();
- $lastSpace = null;
- $inTag = false;
- $i = $tagWidth = 0;
- do {
- switch (substr($text, $i, 1)) {
- case "\n":
- $return[] = trim(substr($text, 0, $i));
- $text = substr($text, $i);
- $len = strlen($text);
- $i = $lastSpace = 0;
- continue;
- case ' ':
- if (!$inTag) {
- $lastSpace = $i;
- }
- break;
- case '<':
- $inTag = true;
- break;
- case '>':
- $inTag = false;
- default:
- }
- if ($inTag) {
- $tagWidth++;
- }
- $i++;
- if (!$inTag && ($i - $tagWidth > $width)) {
- $lastSpace = $lastSpace ?: $width;
- $return[] = trim(substr($text, 0, $lastSpace));
- $text = substr($text, $lastSpace);
- $len = strlen($text);
- $i = $tagWidth = 0;
- }
- } while ($i < $len);
- $return[] = trim($text);
- return implode("\n", $return);
- }
- function extract_paragraphs($element)
- {
- $paragraphs = array();
- foreach ($element->getElementsByTagName('para') as $p) {
- $text = '';
- foreach ($p->childNodes as $child) {
- // @todo figure out if there's something we can do with tables.
- if ($child instanceof DOMElement && $child->tagName === 'table') {
- continue;
- }
- // skip references, because ugh.
- if (preg_match('{^\s*&[a-z][a-z\.]+;\s*$}', $child->textContent)) {
- continue;
- }
- $text .= $child->ownerDocument->saveXML($child);
- }
- if ($text = trim(preg_replace('{\n[ \t]+}', ' ', $text))) {
- $paragraphs[] = $text;
- }
- }
- return implode("\n\n", $paragraphs);
- }
- function format_doc($doc)
- {
- $chunks = array();
- if (!empty($doc['description'])) {
- $chunks[] = '<comment>Description:</comment>';
- $chunks[] = indent_text(htmlwrap(thunk_tags($doc['description']), WRAP_WIDTH - 2));
- $chunks[] = '';
- }
- if (!empty($doc['params'])) {
- $chunks[] = '<comment>Param:</comment>';
- $typeMax = max(array_map(function ($param) {
- return strlen($param['type']);
- }, $doc['params']));
- $max = max(array_map(function ($param) {
- return strlen($param['name']);
- }, $doc['params']));
- $template = ' <info>%-' . $typeMax . 's</info> <strong>%-' . $max . 's</strong> %s';
- $indent = str_repeat(' ', $typeMax + $max + 6);
- $wrapWidth = WRAP_WIDTH - strlen($indent);
- foreach ($doc['params'] as $param) {
- $desc = indent_text(htmlwrap(thunk_tags($param['description']), $wrapWidth), $indent, false);
- $chunks[] = sprintf($template, $param['type'], $param['name'], $desc);
- }
- $chunks[] = '';
- }
- if (isset($doc['return']) || isset($doc['return_type'])) {
- $chunks[] = '<comment>Return:</comment>';
- $type = isset($doc['return_type']) ? $doc['return_type'] : 'unknown';
- $desc = isset($doc['return']) ? $doc['return'] : '';
- $indent = str_repeat(' ', strlen($type) + 4);
- $wrapWidth = WRAP_WIDTH - strlen($indent);
- if (!empty($desc)) {
- $desc = indent_text(htmlwrap(thunk_tags($doc['return']), $wrapWidth), $indent, false);
- }
- $chunks[] = sprintf(' <info>%s</info> %s', $type, $desc);
- $chunks[] = '';
- }
- array_pop($chunks); // get rid of the trailing newline
- return implode("\n", $chunks);
- }
- function thunk_tags($text)
- {
- $tagMap = array(
- 'parameter>' => 'strong>',
- 'function>' => 'strong>',
- 'literal>' => 'return>',
- 'type>' => 'info>',
- 'constant>' => 'info>',
- );
- $andBack = array(
- '&' => '&',
- '&true;' => '<return>true</return>',
- '&false;' => '<return>false</return>',
- '&null;' => '<return>null</return>',
- );
- return strtr(strip_tags(strtr($text, $tagMap), '<strong><return><info>'), $andBack);
- }
- function indent_text($text, $indent = ' ', $leading = true)
- {
- return ($leading ? $indent : '') . str_replace("\n", "\n" . $indent, $text);
- }
- function find_type($xml, $paramName)
- {
- foreach ($xml->getElementsByTagName('methodparam') as $param) {
- if ($type = $param->getElementsByTagName('type')->item(0)) {
- if ($parameter = $param->getElementsByTagName('parameter')->item(0)) {
- if ($paramName === $parameter->textContent) {
- return $type->textContent;
- }
- }
- }
- }
- }
- function format_function_doc($xml)
- {
- $doc = array();
- $refsect1s = $xml->getElementsByTagName('refsect1');
- foreach ($refsect1s as $refsect1) {
- $role = $refsect1->getAttribute('role');
- switch ($role) {
- case 'description':
- $doc['description'] = extract_paragraphs($refsect1);
- if ($synopsis = $refsect1->getElementsByTagName('methodsynopsis')->item(0)) {
- foreach ($synopsis->childNodes as $node) {
- if ($node instanceof DOMElement && $node->tagName === 'type') {
- $doc['return_type'] = $node->textContent;
- break;
- }
- }
- }
- break;
- case 'returnvalues':
- // do nothing.
- $doc['return'] = extract_paragraphs($refsect1);
- break;
- case 'parameters':
- $params = array();
- $vars = $refsect1->getElementsByTagName('varlistentry');
- foreach ($vars as $var) {
- if ($name = $var->getElementsByTagName('parameter')->item(0)) {
- $params[] = array(
- 'name' => '$' . $name->textContent,
- 'type' => find_type($xml, $name->textContent),
- 'description' => extract_paragraphs($var),
- );
- }
- }
- $doc['params'] = $params;
- break;
- }
- }
- // and the purpose
- if ($purpose = $xml->getElementsByTagName('refpurpose')->item(0)) {
- $desc = htmlwrap($purpose->textContent);
- if (isset($doc['description'])) {
- $desc .= "\n\n" . $doc['description'];
- }
- $doc['description'] = trim($desc);
- }
- $ids = array();
- foreach ($xml->getElementsByTagName('refname') as $ref) {
- $ids[] = $ref->textContent;
- }
- return array($ids, format_doc($doc));
- }
- function format_class_doc($xml)
- {
- // @todo implement this
- return array(array(), null);
- }
- $dir = new RecursiveDirectoryIterator($argv[1]);
- $filter = new RecursiveCallbackFilterIterator($dir, function ($current, $key, $iterator) {
- return $current->getFilename()[0] !== '.' &&
- ($current->isDir() || $current->getExtension() === 'xml') &&
- strpos($current->getFilename(), 'entities.') !== 0 &&
- $current->getFilename() !== 'pdo_4d'; // Temporarily blacklist this one, the docs are weird.
- });
- $iterator = new RecursiveIteratorIterator($filter);
- $docs = array();
- foreach ($iterator as $file) {
- $xmlstr = str_replace('&', '&', file_get_contents($file));
- $xml = new DOMDocument();
- $xml->preserveWhiteSpace = false;
- if (!@$xml->loadXml($xmlstr)) {
- echo "XML Parse Error: $file\n";
- continue;
- }
- if ($xml->getElementsByTagName('refentry')->length !== 0) {
- list($ids, $doc) = format_function_doc($xml);
- } elseif ($xml->getElementsByTagName('classref')->length !== 0) {
- list($ids, $doc) = format_class_doc($xml);
- } else {
- $ids = array();
- $doc = null;
- }
- foreach ($ids as $id) {
- $docs[$id] = $doc;
- }
- }
- if (is_file($argv[2])) {
- unlink($argv[2]);
- }
- $db = new PDO('sqlite:' . $argv[2]);
- $db->query('CREATE TABLE php_manual (id char(256) PRIMARY KEY, doc TEXT)');
- $cmd = $db->prepare('INSERT INTO php_manual (id, doc) VALUES (?, ?)');
- foreach ($docs as $id => $doc) {
- $cmd->execute(array($id, $doc));
- }
|