如何c#利用cersharp进行网页资源整站下载
有时候在网上发现很多好看的网页,想要下载下来珍藏一番,可是另存为的东西将图片、js、css等都放在一起,比较混乱,如何将整个网站的资源(html、js、css、图片、视频、字体)全部按照类别分类下载呢,手动的话比较麻烦,而且还耗时,今天教大家利用cefsharp进行整站资源下载。
如果不知道如何在c#中引入cefsharp的话,可以看看这边文章 http://blog.bfw.wiki/user6/15572101669805510054.html
引入cefsharp后,我们来试试看
一、自定义cefsharp的IRequestHandler
初始化浏览器
browser = new ChromiumWebBrowser("http://www.baidu.com/") { Dock = DockStyle.Fill, }; browser.RequestHandler = new MyRequestHandler();自定义irequesthandler
public class MyRequestHandler : IRequestHandler { private String hosturl = ""; public bool CanGetCOOKIEs(IWebBrowser chromiumWebBrowser, IBrowser browser, IFrame frame, IRequest request) { // throw new NotImplementedException(); return false; } public bool CanSetCOOKIE(IWebBrowser chromiumWebBrowser, IBrowser browser, IFrame frame, IRequest request, COOKIE COOKIE) { //throw new NotImplementedException(); return false; } public bool GetAuthCredentials(IWebBrowser browserControl, IBrowser browser, IFrame frame, bool isProxy, string host, int port, string realm, string scheme, IAuthCallback callback) { return false; } public IResponseFilter GetResourceResponseFilter(IWebBrowser chromiumWebBrowser, IBrowser browser, IFrame frame, IRequest request, IResponse response) { return null; //throw new NotImplementedException(); } public bool OnBeforeBrowse(IWebBrowser chromiumWebBrowser, IBrowser browser, IFrame frame, IRequest request, bool userGesture, bool isRedirect) { hosturl = request.Url; hosturl = request.Url.Substring(0, request.Url.LastIndexOf('/'))+"/"; String path = request.Url.Replace(hosturl, ""); // Directory.CreateDirectory(s);//如果文件夹不存在就创建它 //读取页面源码 if (path != "") { HttpDownFile dt = new HttpDownFile(); dt.Download(request.Url, Directory.GetCurrentDirectory() + "\\" + path); } //MessageBox.Show(hosturl); return false; // throw new NotImplementedException(); } public CefReturnValue OnBeforeResourceLoad(IWebBrowser browserControl, IBrowser browser, IFrame frame, IRequest request, IRequestCallback callback) { //下载静态资源 // You can also check the URL here //MessageBox.Show(request.Url); // String path = request.Url.Replace(hosturl,"" ); if (path!="") { HttpDownFile dt = new HttpDownFile(); dt.Download(request.Url, Directory.GetCurrentDirectory() + "\\"+ path.Replace("/","\\")); } callback.Dispose(); return CefReturnValue.Continue; } public bool OnCertificateError(IWebBrowser browserControl, IBrowser browser, CefErrorCode errorCode, string requestUrl, ISslInfo sslInfo, IRequestCallback callback) { callback.Dispose(); return false; } public bool OnOpenUrlFromTab(IWebBrowser browserControl, IBrowser browser, IFrame frame, string targetUrl, WindowOpenDisposition targetDisposition, bool userGesture) { return false; } public void OnPluginCrashed(IWebBrowser browserControl, IBrowser browser, string pluginPath) { } public bool OnProtocolExecution(IWebBrowser browserControl, IBrowser browser, string url) { return false; } public bool OnQuotaRequest(IWebBrowser browserControl, IBrowser browser, string originUrl, long newSize, IRequestCallback callback) { callback.Dispose(); return false; } public void OnRenderProcessTerminated(IWebBrowser browserControl, IBrowser browser, CefTerminationStatus status) { } public void OnRenderViewReady(IWebBrowser browserControl, IBrowser browser) { } public void OnResourceLoadComplete(IWebBrowser browserControl, IBrowser browser, IFrame frame, IRequest request, IResponse response, UrlRequestStatus status, long receivedContentLength) { // You can also check the request URL here } public void OnResourceRedirect(IWebBrowser chromiumWebBrowser, IBrowser browser, IFrame frame, IRequest request, IResponse response, ref string newUrl) { // return false; } public bool OnResourceResponse(IWebBrowser browserControl, IBrowser browser, IFrame frame, IRequest request, IResponse response) { return false; } public bool OnSelectClientCertificate(IWebBrowser chromiumWebBrowser, IBrowser browser, bool isProxy, string host, int port, X509Certificate2Collection certificates, ISelectClientCertificateCallback callback) { throw new NotImplementedException(); } }这里面我们看到了OnBeforeBrowse与OnBeforeResourceLoad,一个是请求html页面的,一个是加载资源的,那么我们在请求html的时候将主路径记录下来,然后在请求资源的时候按照路径下载到本地的目录中,就完成了
附上下载资源的类
using System; using System.IO; using System.Net; namespace CefSharp.MinimalExample.WinForms { class HttpDownFile { /// <summary> /// Http方式下载文件 /// </summary> /// <param name="url">http地址</param> /// <param name="localfile">本地文件</param> /// <returns /// ></returns> public bool Download(string url, string localfile) { bool flag = false; long startPosition = 0; // 上次下载的文件起始位置 FileStream writeStream; // 写入本地文件流对象 // 判断要下载的文件夹是否存在 if (File.Exists(localfile)) { writeStream = File.OpenWrite(localfile); // 存在则打开要下载的文件 startPosition = writeStream.Length; // 获取已经下载的长度 writeStream.Seek(startPosition, SeekOrigin.Current); // 本地文件写入位置定位 } else { string s = localfile.Substring(0, localfile.LastIndexOf('\\')); Directory.CreateDirectory(s);//如果文件夹不存在就创建它 writeStream = new FileStream(localfile, FileMode.Create);// 文件不保存创建一个文件 startPosition = 0; } try { HttpWebRequest myRequest = (HttpWebRequest)HttpWebRequest.Create(url);// 打开网络连接 if (startPosition > 0) { myRequest.AddRange((int)startPosition);// 设置Range值,与上面的writeStream.Seek用意相同,是为了定义远程文件读取位置 } Stream readStream = myRequest.GetResponse().GetResponseStream();// 向服务器请求,获得服务器的回应数据流 byte[] btArray = new byte[512];// 定义一个字节数据,用来向readStream读取内容和向writeStream写入内容 int contentSize = readStream.Read(btArray, 0, btArray.Length);// 向远程文件读第一次 while (contentSize > 0)// 如果读取长度大于零则继续读 { writeStream.Write(btArray, 0, contentSize);// 写入本地文件 contentSize = readStream.Read(btArray, 0, btArray.Length);// 继续向远程文件读取 } //关闭流 writeStream.Close(); readStream.Close(); flag = true; //返回true下载成功 } catch (Exception) { writeStream.Close(); flag = false; //返回false下载失败 } return flag; } /// <summary> /// 创建文件 /// </summary> /// <param name="path">文件路径</param> /// <param name="str">文件内容</param> public static void Save(string path, string str) { string s = path.Substring(0, path.LastIndexOf('\\')); Directory.CreateDirectory(s);//如果文件夹不存在就创建它 FileStream fs = new FileStream(path, FileMode.Create, FileAccess.ReadWrite, FileShare.ReadWrite); byte[] data = System.Text.Encoding.Default.GetBytes(str); //开始写入 fs.Write(data, 0, data.Length); //清空缓冲区、关闭流 fs.Flush(); fs.Close(); } } }过几天附上源码
网友评论0