如何c#利用cersharp进行网页资源整站下载

如何c#利用cersharp进行网页资源整站下载

如何c#利用cersharp进行网页资源整站下载

有时候在网上发现很多好看的网页,想要下载下来珍藏一番,可是另存为的东西将图片、js、css等都放在一起,比较混乱,如何将整个网站的资源(html、js、css、图片、视频、字体)全部按照类别分类下载呢,手动的话比较麻烦,而且还耗时,今天教大家利用cefsharp进行整站资源下载。

如果不知道如何在c#中引入cefsharp的话,可以看看这边文章 http://blog.bfw.wiki/user6/15572101669805510054.html

引入cefsharp后,我们来试试看

一、自定义cefsharp的IRequestHandler

初始化浏览器

 browser = new ChromiumWebBrowser("http://www.baidu.com/")
            {
                Dock = DockStyle.Fill,
            };
browser.RequestHandler = new MyRequestHandler();

自定义irequesthandler
    public class MyRequestHandler : IRequestHandler
    {
        private String hosturl = "";
        public bool CanGetCOOKIEs(IWebBrowser chromiumWebBrowser, IBrowser browser, IFrame frame, IRequest request)
        {
            // throw new NotImplementedException();
            return false;
        }

        public bool CanSetCOOKIE(IWebBrowser chromiumWebBrowser, IBrowser browser, IFrame frame, IRequest request, COOKIE COOKIE)
        {
            //throw new NotImplementedException();
            return false;
        }

        public bool GetAuthCredentials(IWebBrowser browserControl, IBrowser browser, IFrame frame, bool isProxy, string host, int port, string realm, string scheme, IAuthCallback callback)
        {
            return false;
        }

        public IResponseFilter GetResourceResponseFilter(IWebBrowser chromiumWebBrowser, IBrowser browser, IFrame frame, IRequest request, IResponse response)
        {
            return null;
            //throw new NotImplementedException();
        }

        public bool OnBeforeBrowse(IWebBrowser chromiumWebBrowser, IBrowser browser, IFrame frame, IRequest request, bool userGesture, bool isRedirect)
        {
            hosturl = request.Url;

            hosturl = request.Url.Substring(0, request.Url.LastIndexOf('/'))+"/";
            String path = request.Url.Replace(hosturl, "");
            // Directory.CreateDirectory(s);//如果文件夹不存在就创建它
            //读取页面源码
            if (path != "")
            {
                HttpDownFile dt = new HttpDownFile();
                dt.Download(request.Url, Directory.GetCurrentDirectory() + "\\" + path);
            }
           
            //MessageBox.Show(hosturl);
            return false;
           // throw new NotImplementedException();
        }

        public CefReturnValue OnBeforeResourceLoad(IWebBrowser browserControl, IBrowser browser, IFrame frame, IRequest request, IRequestCallback callback)
        {
            //下载静态资源
            // You can also check the URL here
            //MessageBox.Show(request.Url);
            //
           
            String path = request.Url.Replace(hosturl,"" );
            if (path!="")
            {
                HttpDownFile dt = new HttpDownFile();
                dt.Download(request.Url, Directory.GetCurrentDirectory() + "\\"+ path.Replace("/","\\"));
            }
            
            callback.Dispose();
            return CefReturnValue.Continue;
        }

        public bool OnCertificateError(IWebBrowser browserControl, IBrowser browser, CefErrorCode errorCode, string requestUrl, ISslInfo sslInfo, IRequestCallback callback)
        {
            callback.Dispose();
            return false;
        }
        public bool OnOpenUrlFromTab(IWebBrowser browserControl, IBrowser browser, IFrame frame, string targetUrl, WindowOpenDisposition targetDisposition, bool userGesture)
        {
            return false;
        }

        public void OnPluginCrashed(IWebBrowser browserControl, IBrowser browser, string pluginPath)
        {
        }

        public bool OnProtocolExecution(IWebBrowser browserControl, IBrowser browser, string url)
        {
            return false;
        }

        public bool OnQuotaRequest(IWebBrowser browserControl, IBrowser browser, string originUrl, long newSize, IRequestCallback callback)
        {
            callback.Dispose();
            return false;
        }

        public void OnRenderProcessTerminated(IWebBrowser browserControl, IBrowser browser, CefTerminationStatus status)
        {
        }

        public void OnRenderViewReady(IWebBrowser browserControl, IBrowser browser)
        {
        }

        public void OnResourceLoadComplete(IWebBrowser browserControl, IBrowser browser, IFrame frame, IRequest request, IResponse response, UrlRequestStatus status, long receivedContentLength)
        {
            // You can also check the request URL here
        }


        public void OnResourceRedirect(IWebBrowser chromiumWebBrowser, IBrowser browser, IFrame frame, IRequest request, IResponse response, ref string newUrl)
        {
           // return false;
        }

        public bool OnResourceResponse(IWebBrowser browserControl, IBrowser browser, IFrame frame, IRequest request, IResponse response)
        {
            return false;


        }

        public bool OnSelectClientCertificate(IWebBrowser chromiumWebBrowser, IBrowser browser, bool isProxy, string host, int port, X509Certificate2Collection certificates, ISelectClientCertificateCallback callback)
        {
            throw new NotImplementedException();
        }
    }

这里面我们看到了OnBeforeBrowse与OnBeforeResourceLoad,一个是请求html页面的,一个是加载资源的,那么我们在请求html的时候将主路径记录下来,然后在请求资源的时候按照路径下载到本地的目录中,就完成了

附上下载资源的类

using System;
using System.IO;
using System.Net;

namespace CefSharp.MinimalExample.WinForms
{
    class HttpDownFile
    {
        /// <summary>
        /// Http方式下载文件
        /// </summary>
        /// <param name="url">http地址</param>
        /// <param name="localfile">本地文件</param>
        /// <returns
        /// ></returns>
        public bool Download(string url, string localfile)
        {
            bool flag = false;
            long startPosition = 0; // 上次下载的文件起始位置
            FileStream writeStream; // 写入本地文件流对象

            // 判断要下载的文件夹是否存在
            if (File.Exists(localfile))
            {

                writeStream = File.OpenWrite(localfile);             // 存在则打开要下载的文件
                startPosition = writeStream.Length;                  // 获取已经下载的长度
                writeStream.Seek(startPosition, SeekOrigin.Current); // 本地文件写入位置定位
            }
            else
            {
                string s = localfile.Substring(0, localfile.LastIndexOf('\\'));
                Directory.CreateDirectory(s);//如果文件夹不存在就创建它
                writeStream = new FileStream(localfile, FileMode.Create);// 文件不保存创建一个文件
                startPosition = 0;
            }


            try
            {
                HttpWebRequest myRequest = (HttpWebRequest)HttpWebRequest.Create(url);// 打开网络连接

                if (startPosition > 0)
                {
                    myRequest.AddRange((int)startPosition);// 设置Range值,与上面的writeStream.Seek用意相同,是为了定义远程文件读取位置
                }


                Stream readStream = myRequest.GetResponse().GetResponseStream();// 向服务器请求,获得服务器的回应数据流


                byte[] btArray = new byte[512];// 定义一个字节数据,用来向readStream读取内容和向writeStream写入内容
                int contentSize = readStream.Read(btArray, 0, btArray.Length);// 向远程文件读第一次

                while (contentSize > 0)// 如果读取长度大于零则继续读
                {
                    writeStream.Write(btArray, 0, contentSize);// 写入本地文件
                    contentSize = readStream.Read(btArray, 0, btArray.Length);// 继续向远程文件读取
                }

                //关闭流
                writeStream.Close();
                readStream.Close();

                flag = true;        //返回true下载成功
            }
            catch (Exception)
            {
                writeStream.Close();
                flag = false;       //返回false下载失败
            }

            return flag;
        }


        /// <summary>
        /// 创建文件
        /// </summary>
        /// <param name="path">文件路径</param>
        /// <param name="str">文件内容</param>
        public static void Save(string path, string str)
        {
            string s = path.Substring(0, path.LastIndexOf('\\'));
            Directory.CreateDirectory(s);//如果文件夹不存在就创建它
            FileStream fs = new FileStream(path, FileMode.Create, FileAccess.ReadWrite, FileShare.ReadWrite);
            byte[] data = System.Text.Encoding.Default.GetBytes(str);
            //开始写入
            fs.Write(data, 0, data.Length);
            //清空缓冲区、关闭流
            fs.Flush();
            fs.Close();
        }
    }
}



 过几天附上源码

{{collectdata}}

网友评论0