houxiaohua
/
queryList


			
				
					
						
						
							1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071
							<?php
namespace App\Service;

use QL\QueryList;
use App\Log;

class ArticleService
{
    /*
     *  根据链接获取详情内容
     * */
    public static function getDetailByLink($link, &$data)
    {
        $linkInfo = parse_url($link);
        $host = isset($linkInfo['host']) ? $linkInfo['host'] : '';

        if(empty($host)) {
            return 1001;
        }

        $basicInfo = [
            'title' => '',
            'digest' => '',
            'content_url' => $link,
            'cover' => '',
            'content' => '',
        ];

        switch ($host) {
            case 'mp.weixin.qq.com':
                $data = self::weChatLink($link);
                break;
            default:
                $data = self::newsDetail($link);
                break;
        }

        $data = array_merge($basicInfo, $data);
        return 0;

    }

    /*
     *  根据链接获取详情内容
     * */
    public static function weChatLink($link)
    {
        $crawler = new WxCrawler();
        $content = $crawler->crawByUrl($link);
        return $content;
    }

    /*
     *  新闻详情(通用)内容爬取
     * */
    public static function newsDetail($link)
    {
        // 获取第一个p元素的父元素中的html
        $html = file_get_contents($link);
//        $html = iconv('GBK','UTF-8//IGNORE',file_get_contents($link));
        $ql = QueryList::html($html);
        // 获取class为 ql 的元素对象
        $content = $ql->find('p:first')->parent()->html();
        $title = $ql->find('title')->text();

        return [
            'content' => $content,
            'title'=>$title
        ];
    }
}