#!/usr/bin/env php ': $inTag = false; default: } if ($inTag) { $tagWidth++; } $i++; if (!$inTag && ($i - $tagWidth > $width)) { $lastSpace = $lastSpace ?: $width; $return[] = trim(substr($text, 0, $lastSpace)); $text = substr($text, $lastSpace); $len = strlen($text); $i = $tagWidth = 0; } } while ($i < $len); $return[] = trim($text); return implode("\n", $return); } function extract_paragraphs($element) { $paragraphs = array(); foreach ($element->getElementsByTagName('para') as $p) { $text = ''; foreach ($p->childNodes as $child) { // @todo figure out if there's something we can do with tables. if ($child instanceof DOMElement && $child->tagName === 'table') { continue; } // skip references, because ugh. if (preg_match('{^\s*&[a-z][a-z\.]+;\s*$}', $child->textContent)) { continue; } $text .= $child->ownerDocument->saveXML($child); } if ($text = trim(preg_replace('{\n[ \t]+}', ' ', $text))) { $paragraphs[] = $text; } } return implode("\n\n", $paragraphs); } function format_doc($doc) { $chunks = array(); if (!empty($doc['description'])) { $chunks[] = 'Description:'; $chunks[] = indent_text(htmlwrap(thunk_tags($doc['description']), WRAP_WIDTH - 2)); $chunks[] = ''; } if (!empty($doc['params'])) { $chunks[] = 'Param:'; $typeMax = max(array_map(function ($param) { return strlen($param['type']); }, $doc['params'])); $max = max(array_map(function ($param) { return strlen($param['name']); }, $doc['params'])); $template = ' %-' . $typeMax . 's %-' . $max . 's %s'; $indent = str_repeat(' ', $typeMax + $max + 6); $wrapWidth = WRAP_WIDTH - strlen($indent); foreach ($doc['params'] as $param) { $desc = indent_text(htmlwrap(thunk_tags($param['description']), $wrapWidth), $indent, false); $chunks[] = sprintf($template, $param['type'], $param['name'], $desc); } $chunks[] = ''; } if (isset($doc['return']) || isset($doc['return_type'])) { $chunks[] = 'Return:'; $type = isset($doc['return_type']) ? $doc['return_type'] : 'unknown'; $desc = isset($doc['return']) ? $doc['return'] : ''; $indent = str_repeat(' ', strlen($type) + 4); $wrapWidth = WRAP_WIDTH - strlen($indent); if (!empty($desc)) { $desc = indent_text(htmlwrap(thunk_tags($doc['return']), $wrapWidth), $indent, false); } $chunks[] = sprintf(' %s %s', $type, $desc); $chunks[] = ''; } array_pop($chunks); // get rid of the trailing newline return implode("\n", $chunks); } function thunk_tags($text) { $tagMap = array( 'parameter>' => 'strong>', 'function>' => 'strong>', 'literal>' => 'return>', 'type>' => 'info>', 'constant>' => 'info>', ); $andBack = array( '&' => '&', '&true;' => 'true', '&false;' => 'false', '&null;' => 'null', ); return strtr(strip_tags(strtr($text, $tagMap), ''), $andBack); } function indent_text($text, $indent = ' ', $leading = true) { return ($leading ? $indent : '') . str_replace("\n", "\n" . $indent, $text); } function find_type($xml, $paramName) { foreach ($xml->getElementsByTagName('methodparam') as $param) { if ($type = $param->getElementsByTagName('type')->item(0)) { if ($parameter = $param->getElementsByTagName('parameter')->item(0)) { if ($paramName === $parameter->textContent) { return $type->textContent; } } } } } function format_function_doc($xml) { $doc = array(); $refsect1s = $xml->getElementsByTagName('refsect1'); foreach ($refsect1s as $refsect1) { $role = $refsect1->getAttribute('role'); switch ($role) { case 'description': $doc['description'] = extract_paragraphs($refsect1); if ($synopsis = $refsect1->getElementsByTagName('methodsynopsis')->item(0)) { foreach ($synopsis->childNodes as $node) { if ($node instanceof DOMElement && $node->tagName === 'type') { $doc['return_type'] = $node->textContent; break; } } } break; case 'returnvalues': // do nothing. $doc['return'] = extract_paragraphs($refsect1); break; case 'parameters': $params = array(); $vars = $refsect1->getElementsByTagName('varlistentry'); foreach ($vars as $var) { if ($name = $var->getElementsByTagName('parameter')->item(0)) { $params[] = array( 'name' => '$' . $name->textContent, 'type' => find_type($xml, $name->textContent), 'description' => extract_paragraphs($var), ); } } $doc['params'] = $params; break; } } // and the purpose if ($purpose = $xml->getElementsByTagName('refpurpose')->item(0)) { $desc = htmlwrap($purpose->textContent); if (isset($doc['description'])) { $desc .= "\n\n" . $doc['description']; } $doc['description'] = trim($desc); } $ids = array(); foreach ($xml->getElementsByTagName('refname') as $ref) { $ids[] = $ref->textContent; } return array($ids, format_doc($doc)); } function format_class_doc($xml) { // @todo implement this return array(array(), null); } $dir = new RecursiveDirectoryIterator($argv[1]); $filter = new RecursiveCallbackFilterIterator($dir, function ($current, $key, $iterator) { return $current->getFilename()[0] !== '.' && ($current->isDir() || $current->getExtension() === 'xml') && strpos($current->getFilename(), 'entities.') !== 0 && $current->getFilename() !== 'pdo_4d'; // Temporarily blacklist this one, the docs are weird. }); $iterator = new RecursiveIteratorIterator($filter); $docs = array(); foreach ($iterator as $file) { $xmlstr = str_replace('&', '&', file_get_contents($file)); $xml = new DOMDocument(); $xml->preserveWhiteSpace = false; if (!@$xml->loadXml($xmlstr)) { echo "XML Parse Error: $file\n"; continue; } if ($xml->getElementsByTagName('refentry')->length !== 0) { list($ids, $doc) = format_function_doc($xml); } elseif ($xml->getElementsByTagName('classref')->length !== 0) { list($ids, $doc) = format_class_doc($xml); } else { $ids = array(); $doc = null; } foreach ($ids as $id) { $docs[$id] = $doc; } } if (is_file($argv[2])) { unlink($argv[2]); } $db = new PDO('sqlite:' . $argv[2]); $db->query('CREATE TABLE php_manual (id char(256) PRIMARY KEY, doc TEXT)'); $cmd = $db->prepare('INSERT INTO php_manual (id, doc) VALUES (?, ?)'); foreach ($docs as $id => $doc) { $cmd->execute(array($id, $doc)); }