drag-create-api/app/api/controller/Crawler/Crawler.php
2023-06-25 08:51:24 +08:00

113 lines
3.2 KiB
PHP

<?php
namespace app\api\controller\Crawler;
use app\Request;
use app\common\model\Flow\Flow as ModelFlow;
use think\db\Where;
use think\facade\Validate;
use app\BaseController;
use app\exception\ErrorMsg;
use think\facade\Filesystem;
use app\common\arw\adjfut\src\UploadFile;
use app\common\exception\Tool;
use Goutte\Client;
class Crawler extends BaseController
{
/**
* 爬虫测试
*/
public function Test(Request $request)
{
// return 1;
// 创建Goutte客户端对象
$client = new Client();
// 获取目标网站的URL
$base_url = 'http://www.zs521.com/';
$url = 'http://www.zs521.com/article_categories/guangzhou/articles';
// return $url;
// 发起请求并获取响应对象
$crawler = $client->request('GET', $url);
// 选择要解析的HTML元素
$titles = $crawler->filter('.productsBody .productItem .products');
$imgs = $crawler->filter('.productsBody .productItem .products .productImg');
// return $titles;
// dump($titles);
// die;
$title111 = $titles->filter('.productType')->text();
$img111 = $titles->filter('.productImg')->eq(2)->attr('lazyload');
// return $img111;
// return $title111;
// 在全部元素中筛选
// $products = $crawler->filter('.productsBody .productItem .products');
// foreach ($products as $key => $value) {
// // dump($value->filter('.productType')->eq($key)->text());
// // die;
// foreach ($titles->filter('.productType')->eq($key) as $key => $value) {
// $title = $value->textContent;
// }
// // return $title;
// }
$titleArr = [];
// 遍历元素并输出结果
foreach ($titles as $title) {
$titleArr[] = $title->textContent;
}
return $titleArr;
// $imgArr = [];
foreach ($imgs as $img) {
// var_dump($img->getAttribute('lazyload'));
// die;
// return $img->getAttribute('lazyload');
$src = $img->getAttribute('lazyload');
if (strpos($src, 'http') === 0) {
$img_url = $src;
} else {
$img_url = $base_url . $src;
}
// return $img_url;
// 文件夹名称
$dirName = "product" . "Img";
// 文件保存位置
$fileSaveLocation = public_path('uploads') . $dirName . "\\";
// return $fileSaveLocation;
if (true !== $res = Tool::mkdir($fileSaveLocation)) {
return $res;
}
// 获取图片二进制数据
$imageContent = file_get_contents($img_url);
// var_dump($imageContent);
// die;
// return $imageContent;
// 保存图片到本地文件系统
file_put_contents($fileSaveLocation . basename($img_url), $imageContent);
$res_img_url = "/uoloads" . "/" . $dirName . "/" . basename($img_url);
return $res_img_url;
}
// return $imgArr;
return $titleArr;
}
}