130 lines
3.7 KiB
PHP
130 lines
3.7 KiB
PHP
<?php
|
|
|
|
namespace app\api\controller\Crawler;
|
|
|
|
use app\Request;
|
|
use app\common\model\Flow\Flow as ModelFlow;
|
|
use app\common\model\Home\HomeEnv as ModelHomeEnv;
|
|
use app\common\model\Home\HomeWorks as ModelHomeWorks;
|
|
use think\db\Where;
|
|
use think\facade\Validate;
|
|
use app\BaseController;
|
|
use app\exception\ErrorMsg;
|
|
use think\facade\Filesystem;
|
|
use app\common\arw\adjfut\src\UploadFile;
|
|
use app\common\exception\Tool;
|
|
use Goutte\Client;
|
|
|
|
class CrawlerHoude extends BaseController
|
|
{
|
|
|
|
/**
|
|
* 爬虫测试
|
|
*/
|
|
public function Test(Request $request)
|
|
{
|
|
// return 1;
|
|
// 创建Goutte客户端对象
|
|
$client = new Client();
|
|
|
|
// 获取目标网站的URL
|
|
// $base_url = 'http://www.zs521.com/';
|
|
// $url = 'http://www.zs521.com/article_categories/guangzhou/articles';
|
|
|
|
$base_url = 'http://www.zs521.com/';
|
|
$url = 'http://www.zs521.com/article_categories/guangzhou/articles';
|
|
|
|
// return $url;
|
|
|
|
// 发起请求并获取响应对象
|
|
$crawler = $client->request('GET', $url);
|
|
|
|
// 选择要解析的HTML元素
|
|
$imgs = $crawler->filter('.grid img');
|
|
// return $titles;
|
|
// dump($titles);
|
|
// die;
|
|
|
|
// 在全部元素中筛选
|
|
// $products = $crawler->filter('.productsBody .productItem .products');
|
|
|
|
// foreach ($products as $key => $value) {
|
|
// // dump($value->filter('.productType')->eq($key)->text());
|
|
// // die;
|
|
// foreach ($titles->filter('.productType')->eq($key) as $key => $value) {
|
|
// $title = $value->textContent;
|
|
// }
|
|
|
|
// // return $title;
|
|
// }
|
|
|
|
|
|
$titleArr = [];
|
|
// 遍历元素并输出结果
|
|
// foreach ($titles as $title) {
|
|
// $titleArr[] = $title->textContent;
|
|
// }
|
|
|
|
// return $titleArr;
|
|
|
|
// $imgArr = [];
|
|
foreach ($imgs as $img) {
|
|
// var_dump($img->getAttribute('lazyload'));
|
|
// die;
|
|
// return $img->getAttribute('lazyload');
|
|
$src = $img->getAttribute('src');
|
|
|
|
if (strpos($src, 'http') === 0) {
|
|
$img_url = $src;
|
|
} else {
|
|
$img_url = $base_url . $src;
|
|
}
|
|
// return $img_url;
|
|
|
|
// 文件夹名称
|
|
$dirName = "HomeWorks" . "Img";
|
|
// 文件保存位置
|
|
$fileSaveLocation = public_path('uploads') . $dirName . "\\";
|
|
// return $fileSaveLocation;
|
|
if (true !== $res = Tool::mkdir($fileSaveLocation)) {
|
|
return $res;
|
|
}
|
|
|
|
$saveFileSaveLocation = $fileSaveLocation . date('Ymd') . "\\";
|
|
if (true !== $res = Tool::mkdir($saveFileSaveLocation)) {
|
|
return $res;
|
|
}
|
|
|
|
// 获取图片二进制数据
|
|
$imageContent = file_get_contents($img_url);
|
|
// var_dump($imageContent);
|
|
// die;
|
|
// return $imageContent;
|
|
|
|
// return $saveFileSaveLocation;
|
|
|
|
// 保存图片到本地文件系统
|
|
file_put_contents($saveFileSaveLocation . basename($img_url), $imageContent);
|
|
|
|
$res_img_url = "/uploads" . "/" . $dirName . "/" . date('Ymd') . "/" . basename($img_url);
|
|
// return $res_img_url;
|
|
|
|
// $model = ModelHomeEnv::create([
|
|
// 'home_env_img' => $res_img_url,
|
|
// 'home_env_sort' => 1
|
|
// ]);
|
|
|
|
$model = ModelHomeWorks::create([
|
|
'home_works_img' => $res_img_url,
|
|
'home_works_sort' => 1
|
|
]);
|
|
|
|
// return $res_img_url;
|
|
}
|
|
|
|
|
|
// return $imgArr;
|
|
return $titleArr;
|
|
}
|
|
}
|