emoticon_api/ARW.Repository/Business/Crawler/CrawlRepository.cs
2023-06-02 21:15:33 +08:00

144 lines
5.9 KiB
C#

using Infrastructure.Attribute;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using ARW.Model.Models.Business.Crawler;
using AngleSharp.Html.Parser;
using RestSharp;
namespace ARW.Repository.Business
{
/// <summary>
/// 爬虫仓储服务
/// </summary>
[AppService(ServiceLifetime = LifeTime.Transient)]
public class CrawlRepository : BaseRepository<Crawl>
{
protected static HtmlParser htmlParser = new HtmlParser();
// 不羞涩
public string LoadBuxiuseHTML(string url)
{
try
{
var client = new RestClient(url);
var request = new RestRequest("https://www.buxiuse.com/", Method.Get);
request.AddHeader("cookie", "SESSION=YmNhNjgxNTctNzk3MS00YTVkLThmM2YtMDBjYzhjZDNiNzNm; Hm_lvt_479b5d690f3b5d1eae450ce953f78480=1660291808,1660530399; Hm_lpvt_479b5d690f3b5d1eae450ce953f78480=1660530399");
request.AddHeader("accept-encoding", "gzip, deflate, br");
request.AddHeader("accept", "application/json, text/javascript, */*; q=0.01");
request.AddHeader("user-agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36");
request.AddHeader("upgrade-insecure-requests", "1");
request.AddHeader("referer", "https://www.buxiuse.com/");
request.AddHeader("cache-control", "max-age=0,no-cache");
request.AddHeader("authority", "www.buxiuse.com");
RestResponse response = client.Execute(request);
return response.IsSuccessful ? response.Content : "";
}
catch (Exception ex)
{
Console.WriteLine($"LoadHTML fail,url:{url},ex:{ex.ToString()}");
return string.Empty;
}
}
public int ParseBuxiusePic(string html)
{
var dom = htmlParser.ParseDocument(html);
var movies = new List<Crawl>();
var lis = dom.QuerySelectorAll(".panel-body li");
foreach (var li in lis)
{
var onlineURL = "https://www.buxiuse.com" + li.QuerySelector("a").GetAttribute("href");
var movie = new Crawl()
{
Name = li.QuerySelector("img").GetAttribute("title"),
Cover = li.QuerySelector("img").GetAttribute("src"),
Link = onlineURL,
Type = "图片",
PublishTime = DateTime.Now,
};
//Context.Insertable(movie).ExecuteReturnIdentity();
Add(movie);
movies.Add(movie);
}
if (movies.Count == 0)
return 0;
else
return 1;
}
// Yellow
public string LoadYellowHTML(string url)
{
try
{
var client = new RestClient(url);
var request = new RestRequest(url, Method.Get);
request.AddHeader("cookie", "Hm_lvt_07f2c7e5bd9592209d606f0184fc3d8f=1660568257; recente=%5B%7B%22vod_name%22%3A%22%E5%9B%BD%E4%BA%A7AV%E5%89%A7%E6%83%85-%E4%B8%88%E5%A4%AB%E7%9A%84%22%2C%22vod_url%22%3A%22https%3A%2F%2Frra5ii1x6k3in.com%3A58002%2Findex.php%2Fvod%2Fplay%2Fid%2F138577%2Fsid%2F1%2Fnid%2F1.html%22%2C%22vod_part%22%3A%22%E7%AC%AC1%E9%9B%86%22%7D%5D; Hm_lvt_36ab7abfe863c7133f2af34068ebfc82=1660568383; Hm_lpvt_36ab7abfe863c7133f2af34068ebfc82=1660568383; Hm_lpvt_07f2c7e5bd9592209d606f0184fc3d8f=1660570678");
request.AddHeader("accept-encoding", "gzip, deflate, br");
request.AddHeader("accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9");
request.AddHeader("user-agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36");
request.AddHeader("upgrade-insecure-requests", "1");
request.AddHeader("referer", url);
request.AddHeader("cache-control", "max-age=0");
RestResponse response = client.Execute(request);
return response.IsSuccessful ? response.Content : "";
}
catch (Exception ex)
{
Console.WriteLine($"LoadHTML fail,url:{url},ex:{ex.ToString()}");
return string.Empty;
}
}
public int ParseYellowPic(string html)
{
var dom = htmlParser.ParseDocument(html);
var movies = new List<Crawl>();
var lis = dom.QuerySelectorAll(".stui-pannel_bd")?.SelectMany(div => div.QuerySelectorAll("li"));
foreach (var li in lis)
{
var onlineURL = "https://x4dgjgj002d5c.com:58002/" + li.QuerySelector("a").GetAttribute("href");
var name = li.QuerySelector("a").GetAttribute("title");
string[] arr = { "福利姬","swag","丝" ,"jk","可爱","制服","网红"};
foreach (var item in arr)
{
if (name.Contains(item) == true)
{
var movie = new Crawl()
{
Name = name,
Link = onlineURL,
Type = "Yellow",
Cover = li.QuerySelector("a").GetAttribute("data-original"),
};
Add(movie);
movies.Add(movie);
}
else
{
continue;
}
}
}
if (movies.Count == 0)
return 0;
else
return 1;
}
}
}