1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071 |
- <?php
- namespace App\Service;
- use QL\QueryList;
- use App\Log;
- class ArticleService
- {
- /*
- * 根据链接获取详情内容
- * */
- public static function getDetailByLink($link, &$data)
- {
- $linkInfo = parse_url($link);
- $host = isset($linkInfo['host']) ? $linkInfo['host'] : '';
- if(empty($host)) {
- return 1001;
- }
- $basicInfo = [
- 'title' => '',
- 'digest' => '',
- 'content_url' => $link,
- 'cover' => '',
- 'content' => '',
- ];
- switch ($host) {
- case 'mp.weixin.qq.com':
- $data = self::weChatLink($link);
- break;
- default:
- $data = self::newsDetail($link);
- break;
- }
- $data = array_merge($basicInfo, $data);
- return 0;
- }
- /*
- * 根据链接获取详情内容
- * */
- public static function weChatLink($link)
- {
- $crawler = new WxCrawler();
- $content = $crawler->crawByUrl($link);
- return $content;
- }
- /*
- * 新闻详情(通用)内容爬取
- * */
- public static function newsDetail($link)
- {
- // 获取第一个p元素的父元素中的html
- $html = file_get_contents($link);
- // $html = iconv('GBK','UTF-8//IGNORE',file_get_contents($link));
- $ql = QueryList::html($html);
- // 获取class为 ql 的元素对象
- $content = $ql->find('p:first')->parent()->html();
- $title = $ql->find('title')->text();
- return [
- 'content' => $content,
- 'title'=>$title
- ];
- }
- }
|