爬虫的时候,经常由于网页数据是动态渲染的,导致爬的时候数据还没有渲染出来,而且也不知道哪些数据何时全部渲染完成,于是爬的都是html或者爬不到,还好找到了第三方包,这里用王者荣誉官网来做示例,最终数据展示可在如下小程序中看到:
// 基本功能包
composer require jaeger/querylist
// JS动态渲染网页爬取插件(抓取动态渲染网页还需要下载工具:https://phantomjs.org/download.html)
composer require jaeger/querylist-phantomjs
$url = 'www.litblc.com'; // 抓取网页地址
$phantomPath = 'E:/githubShyzhen/FakePHP/phantomjs-2.1.1-windows/bin/phantomjs.exe'; // 下载的工具路径
$ql = QueryList::getInstance();
$ql->use(PhantomJs::class, $phantomPath);
$html = $ql->browser($url)->getHtml();
$dom = QueryList::html($html);
$dom->find('.title-name')->text();
...
public function spader()
{
$this->handleSpader(105);
}
public function handleSpader($id)
{
$url = 'https://pvp.qq.com/web201605/herodetail/'.$id.'.shtml';
$ql = QueryList::getInstance();
$ql->use(PhantomJs::class,'E:/githubShyzhen/FakePHP/phantomjs-2.1.1-windows/bin/phantomjs.exe');
$html = $ql->browser($url)->getHtml();
$dom = QueryList::html($html);
$mingTips = $dom->find('.sugg-tips')->text();
$equipTips = $dom->find('.equip-tips')->eq(0)->text();
// ming JSON
$ming1Ids = $dom->find('.sugg-u1')->attr('data-ming');
$tempIds = explode('|', $ming1Ids);
$ming1Id = $tempIds[0];
$ming2Id = $tempIds[1];
$ming3Id = $tempIds[2];
$ming1 = $dom->find('.sugg-u1 li')->eq(0);
$ming2 = $dom->find('.sugg-u1 li')->eq(1);
$ming3 = $dom->find('.sugg-u1 li')->eq(2);
$ming1Name = $ming1->find('p')->eq(0)->text();
$ming1Intro1 = $ming1->find('p')->eq(1)->text();
$ming1Intro2 = $ming1->find('p')->eq(2)->text();
$ming1Intro3 = $ming1->find('p')->eq(3)->text();
$ming2Name = $ming2->find('p')->eq(0)->text();
$ming2Intro1 = $ming2->find('p')->eq(1)->text();
$ming2Intro2 = $ming2->find('p')->eq(2)->text();
$ming2Intro3 = $ming2->find('p')->eq(3)->text();
$ming3Name = $ming3->find('p')->eq(0)->text();
$ming3Intro1 = $ming3->find('p')->eq(1)->text();
$ming3Intro2 = $ming3->find('p')->eq(2)->text();
$ming3Intro3 = $ming3->find('p')->eq(3)->text();
$mingRes = [
['id' => $ming1Id, 'name' => $ming1Name, 'intro' => trim(implode('|', [$ming1Intro1, $ming1Intro2, $ming1Intro3]), '|')],
['id' => $ming2Id, 'name' => $ming2Name, 'intro' => trim(implode('|', [$ming2Intro1, $ming2Intro2, $ming2Intro3]), '|')],
['id' => $ming3Id, 'name' => $ming3Name, 'intro' => trim(implode('|', [$ming3Intro1, $ming3Intro2, $ming3Intro3]), '|')],
];
$mingJson = json_encode($mingRes, JSON_UNESCAPED_UNICODE);
// equipment JSON
$equipmentDom = $dom->find('.equip-list')->eq(0);
$eIdStr = $equipmentDom->attr('data-item');
$eIds = explode('|', $eIdStr);
$e1Id = $eIds[0];
$e2Id = $eIds[1];
$e3Id = $eIds[2];
$e4Id = $eIds[3];
$e5Id = $eIds[4];
$e6Id = $eIds[5];
$e1Name = $equipmentDom->find('#Jname')->eq(0)->text();
$e2Name = $equipmentDom->find('#Jname')->eq(1)->text();
$e3Name = $equipmentDom->find('#Jname')->eq(2)->text();
$e4Name = $equipmentDom->find('#Jname')->eq(3)->text();
$e5Name = $equipmentDom->find('#Jname')->eq(4)->text();
$e6Name = $equipmentDom->find('#Jname')->eq(5)->text();
$eRes = [
['id' => $e1Id, 'name' => $e1Name, 'intro' => ''],
['id' => $e2Id, 'name' => $e2Name, 'intro' => ''],
['id' => $e3Id, 'name' => $e3Name, 'intro' => ''],
['id' => $e4Id, 'name' => $e4Name, 'intro' => ''],
['id' => $e5Id, 'name' => $e5Name, 'intro' => ''],
['id' => $e6Id, 'name' => $e6Name, 'intro' => ''],
];
$eJson = json_encode($eRes, JSON_UNESCAPED_UNICODE);
// counterHero JSON
$heroDom = $dom->find('.hero-info-box')->find('.hero-info')->eq(1);
$h1Id = $heroDom->find('img')->eq(0)->src;
$h2Id = $heroDom->find('img')->eq(1)->src;
$h1Intro = $heroDom->find('.hero-list-desc')->find('p')->eq(0)->text();
$h2Intro = $heroDom->find('.hero-list-desc')->find('p')->eq(1)->text();
$id1 = substr($h1Id, strripos($h1Id, '/') + 1, strripos($h1Id, '.') - strripos($h1Id, '/') - 1);
$id2 = substr($h2Id, strripos($h2Id, '/') + 1, strripos($h2Id, '.') - strripos($h2Id, '/') - 1);
$heroRes = [
['id' => $id1, 'name' => $this->handleHeroName($id1), 'intro' => $h1Intro],
['id' => $id2, 'name' => $this->handleHeroName($id2), 'intro' => $h2Intro],
];
$heroJson = json_encode($heroRes, JSON_UNESCAPED_UNICODE);
$resHeroId = $id;
$resMing = $mingJson;
$resMingTips = $mingTips;
$resEquipment = $eJson;
$resEtips = $equipTips;
$resCh = $heroJson;
// 拼装sql
$sql = "INSERT INTO `wangzhe_hero_tutorial` (`hero_id`,`ming`,`ming_tips`,`equipment`,`equipment_tips`,`counter_hero`, `created_at`, `updated_at`) VALUES ('$resHeroId', '$resMing', '$resMingTips', '$resEquipment', '$resEtips', '$resCh', '2022-03-29 16:29:53', '2022-03-29 16:29:53');";
echo $sql;
exit;
}
public function handleHeroName($heroId)
{
$json = '{"105": "廉颇","106": "小乔"}';
$heroArr = json_decode($json, true);
return $heroArr[$heroId];
}