写在前面
类库介绍:PuppeteerSharp 是一个用于在 .NET 平台上控制无头(Headless) Chrome 浏览器的库。它允许开发者通过编程方式模拟浏览器行为,执行网页操作,截取网页截图,获取网页内容,可以把网页转换成PDF等等。PuppeteerSharp 是 Puppeteer 在 .NET 环境中的等效实现,Puppeteer 是由 Google 提供的一个用于控制无界面浏览器 Chrome 的工具。
老规矩通过NuGet进行安装
代码实现
public static async void DoScreenshot(string url)
{
using var browserFetcher = new BrowserFetcher();
await browserFetcher.DownloadAsync();
await using var browser = await Puppeteer.LaunchAsync(new LaunchOptions { Headless = true });
await using var page = await browser.NewPageAsync();
await page.SetViewportAsync(new ViewPortOptions
{
Width = 1920,Height = 1080
});
await page.GoToAsync(url);
var picPath = Path.Combine(AppDomain.CurrentDomain.BaseDirectory,"test.jpg");
await page.ScreenshotAsync(picPath);
await browser.CloseAsync();
}
public static async void ExportAsPdf(string url)
{
using var browserFetcher = new BrowserFetcher();
await browserFetcher.DownloadAsync();
await using var browser = await Puppeteer.LaunchAsync(new LaunchOptions { Headless = true });
await using var page = await browser.NewPageAsync();
await page.GoToAsync(url);
//等待字体加载完毕
await page.EvaluateExpressionHandleAsync("document.fonts.ready");
var pdfPath = Path.Combine(AppDomain.CurrentDomain.BaseDirectory,"test.pdf");
await page.PdfAsync(pdfPath);
await browser.CloseAsync();
}
public static async void FetchUrls(string url)
{
//浏览器配置
var options = new LaunchOptions { Headless = true };
await new BrowserFetcher().DownloadAsync();
//打开浏览器
using (var browser = await Puppeteer.LaunchAsync(options))
using (var page = await browser.NewPageAsync())
{
//加载页面
await page.GoToAsync(url);
var urlList = new List<string>();
//获取所有超链接
var jsSelectAllAnchors = @"Array.from(document.querySelectorAll('a')).map(a => a.href);";
var urls = await page.EvaluateExpressionAsync<string[]>(jsSelectAllAnchors);
foreach (string u in urls)
{
urlList.Add($"Url: {u}");
}
var urlPath = Path.Combine(AppDomain.CurrentDomain.BaseDirectory,"urls.txt");
File.WriteAllLines(urlPath,urlList);
await browser.CloseAsync();
}
}
public static async void SearchTest(string url)
{
using var browserFetcher = new BrowserFetcher();
await browserFetcher.DownloadAsync();
await using var browser = await Puppeteer.LaunchAsync(new LaunchOptions { Headless = true });
await using var page = await browser.NewPageAsync();
await page.SetViewportAsync(new ViewPortOptions
{
Width = 1920,Height = 1080,//Devtools = true // 启用 Chrome DevTools,无头配置将自动禁用
});
//load: window.onload事件被触发时候完成导航,某些情况下它根本不会发生。
//domcontentloaded: Domcontentloaded事件触发时候认为导航成功
//networkidle0: 在 500ms 内没有网络连接时就算成功(全部的request结束),才认为导航结束
//networkidle2: 500ms 内有不超过 2 个网络连接时就算成功(还有两个以下的request),就认为导航完成
//加载时长 networkidle0 > networkidle2 > load > domcontentloaded
//await page.GoToAsync(url,new NavigationOptions() { WaitUntil = new WaitUntilNavigation[] { WaitUntilNavigation.Networkidle0 } });
// 跟踪日志
//await page.Tracing.StartAsync(new TracingOptions { Path = Path.Combine(AppDomain.CurrentDomain.BaseDirectory,"race.json") });
//await page.Tracing.StopAsync();
await page.GoToAsync(url);
await page.WaitForSelectorAsync(".searchbox input");
await page.FocusAsync(".searchbox input"); // 聚焦
await page.Keyboard.TypeAsync("CN Tower,Toronto,Ontario,Canada"); // 键盘输入
await page.ClickAsync(".searchIcon"); // 点击
await page.WaitForNavigationAsync();
var content = await page.GetContentAsync();
// 下载配置
//var cdp = await page.Target.CreateCDPSessionAsync();
//await cdp.SendAsync("Page.setDownloadBehavior",new
//{
// behavior = "allow",// downloadPath = Path.Combine(AppDomain.CurrentDomain.BaseDirectory,"testing_downloads")
//});
//await cdp.DetachAsync();
}
调用示例
PuppeteerHelper.DoScreenshot("https://www.baidu.com/");
PuppeteerHelper.ExportAsPdf("https://www.baidu.com/");
PuppeteerHelper.FetchUrls("https://www.baidu.com/");
执行结果如下:
需要注意的是运行时记得科学上网,不然还是手动将chrome包下载好后,下载后内容是这样的
官网源码: https://github.com/hardkoded/puppeteer-sharp
原文地址:https://blog.csdn.net/rjcql/article/details/134843842
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。