Search
Close this search box.

thinkphp5.1 queryList 采集后数据处理

<?php
namespace app\index\controller;
use QL\QueryList;
class Index
{
    public function index()
    {
        return '<style type="text/css">*{ padding: 0; margin: 0; } div{ padding: 4px 48px;} a{color:#2E5CD5;cursor: pointer;text-decoration: none} a:hover{text-decoration:underline; } body{ background: #fff; font-family: "Century Gothic","Microsoft yahei"; color: #333;font-size:18px;} h1{ font-size: 100px; font-weight: normal; margin-bottom: 12px; } p{ line-height: 1.6em; font-size: 42px }</style><div style="padding: 24px 48px;"> <h1>:) </h1><p> ThinkPHP V5.1<br/><span style="font-size:30px">12载初心不改(2006-2018) - 你值得信赖的PHP框架</span></p></div><script type="text/javascript" src="https://tajs.qq.com/stats?sId=64890268" charset="UTF-8"></script><script type="text/javascript" src="https://e.topthink.com/Public/static/client.js"></script><think id="eab4b9f840753f8e7"></think>';
    }

    public function hello($name = 'ThinkPHP5')
    {
        return 'hello,' . $name;
    }
    /**
     * 采集列表
     */
    public function test(){
        $ql = QueryList::get('https://www.1001shema.com/');
        $html = $ql->getHtml();
        $ql = QueryList::html($html);
        $rt[] = $ql->find('.entry-title')->texts();
        $rt[] = $ql->find('.entry-title a')->attrs('href');
        $rt[] = $ql->find('time')->texts();
        $rt[] = $ql->find('.entry-content')->texts();
        $as = json_decode(json_encode($rt),true);
        foreach($as[1] as $aa => $bb){
            $ql1 = QueryList::get($bb);
            $html1 = $ql1->getHtml();
            $ql1 = QueryList::html($html1);
            
            $rt1[] = $ql1->find('.entry-content')->htmls();
        }
        // array_push($rt,$rt1);
        $ass = json_decode(json_encode($rt1),true);
        array_push($as,$ass);
        $newArr =  [];
        for($i=0; $i<count($as[0]); $i++){
            array_push($newArr,array_column(json_decode(json_encode($as),true),$i));
        }
        print_r('<pre>');
        print_r($newArr);
        die;
    }
   
    /**
     * 采集某一篇文章
     *
     * @return void
     */
    public function testone(){
        $url = 'https://www.1001shema.com/';
        // 定义采集规则
        $rules = [
            // 采集文章标题
            'title' => ['.entry-title','text'],
            // 采集文章作者
            'href' => ['.entry-title a','href'],
            // 采集文章内容
            'content' => ['.entry-content','text']
        ];
        $rt = QueryList::get($url)->rules($rules)->query()->getData();
        print_r('<pre>');
        print_r($rt->all());
    }

}

发表评论

Optimized by WPJAM Basic