113 lines
3.2 KiB
PHP
113 lines
3.2 KiB
PHP
<?php
|
|
|
|
namespace app\api\controller\Crawler;
|
|
|
|
use app\Request;
|
|
use app\common\model\Flow\Flow as ModelFlow;
|
|
use think\db\Where;
|
|
use think\facade\Validate;
|
|
use app\BaseController;
|
|
use app\exception\ErrorMsg;
|
|
use think\facade\Filesystem;
|
|
use app\common\arw\adjfut\src\UploadFile;
|
|
use app\common\exception\Tool;
|
|
use Goutte\Client;
|
|
|
|
class Crawler extends BaseController
|
|
{
|
|
|
|
/**
|
|
* 爬虫测试
|
|
*/
|
|
public function Test(Request $request)
|
|
{
|
|
// return 1;
|
|
// 创建Goutte客户端对象
|
|
$client = new Client();
|
|
|
|
// 获取目标网站的URL
|
|
$base_url = 'http://www.zs521.com/';
|
|
$url = 'http://www.zs521.com/article_categories/guangzhou/articles';
|
|
|
|
// return $url;
|
|
|
|
// 发起请求并获取响应对象
|
|
$crawler = $client->request('GET', $url);
|
|
|
|
// 选择要解析的HTML元素
|
|
$titles = $crawler->filter('.productsBody .productItem .products');
|
|
$imgs = $crawler->filter('.productsBody .productItem .products .productImg');
|
|
// return $titles;
|
|
// dump($titles);
|
|
// die;
|
|
|
|
$title111 = $titles->filter('.productType')->text();
|
|
$img111 = $titles->filter('.productImg')->eq(2)->attr('lazyload');
|
|
// return $img111;
|
|
// return $title111;
|
|
|
|
|
|
// 在全部元素中筛选
|
|
// $products = $crawler->filter('.productsBody .productItem .products');
|
|
|
|
// foreach ($products as $key => $value) {
|
|
// // dump($value->filter('.productType')->eq($key)->text());
|
|
// // die;
|
|
// foreach ($titles->filter('.productType')->eq($key) as $key => $value) {
|
|
// $title = $value->textContent;
|
|
// }
|
|
|
|
// // return $title;
|
|
// }
|
|
|
|
|
|
$titleArr = [];
|
|
// 遍历元素并输出结果
|
|
foreach ($titles as $title) {
|
|
$titleArr[] = $title->textContent;
|
|
}
|
|
|
|
return $titleArr;
|
|
|
|
// $imgArr = [];
|
|
foreach ($imgs as $img) {
|
|
// var_dump($img->getAttribute('lazyload'));
|
|
// die;
|
|
// return $img->getAttribute('lazyload');
|
|
$src = $img->getAttribute('lazyload');
|
|
|
|
if (strpos($src, 'http') === 0) {
|
|
$img_url = $src;
|
|
} else {
|
|
$img_url = $base_url . $src;
|
|
}
|
|
// return $img_url;
|
|
|
|
// 文件夹名称
|
|
$dirName = "product" . "Img";
|
|
// 文件保存位置
|
|
$fileSaveLocation = public_path('uploads') . $dirName . "\\";
|
|
// return $fileSaveLocation;
|
|
if (true !== $res = Tool::mkdir($fileSaveLocation)) {
|
|
return $res;
|
|
}
|
|
|
|
// 获取图片二进制数据
|
|
$imageContent = file_get_contents($img_url);
|
|
// var_dump($imageContent);
|
|
// die;
|
|
// return $imageContent;
|
|
|
|
// 保存图片到本地文件系统
|
|
file_put_contents($fileSaveLocation . basename($img_url), $imageContent);
|
|
|
|
$res_img_url = "/uoloads" . "/" . $dirName . "/" . basename($img_url);
|
|
return $res_img_url;
|
|
}
|
|
|
|
|
|
// return $imgArr;
|
|
return $titleArr;
|
|
}
|
|
}
|