none
C# Scraper RRS feed

  • Question

  • Can anyone help me and make this work? I've been stock for days now :(
    I'ts my first time scraping website with captcha. It always returns "Your CAPTCHA response was incorrect. Please try again.".
    using System;
    using System.Drawing;
    using System.IO;
    using System.Net;
    using System.Threading;
    using System.Windows.Forms;
    using HtmlAgilityPack;
    using System.Text.RegularExpressions;
    
    namespace Captchas
    {
        public partial class Form1 : Form
        {
            CookieContainer cookies;
            HttpWebRequest webRequest;
            HttpWebResponse response;
            StreamReader responseReader;
            StreamWriter stOut;
            
            string strUrl = string.Empty;
            string postData = string.Empty;
            string responseData = string.Empty;
            string captchID = string.Empty;
    
            HtmlAgilityPack.HtmlDocument _doc;
            HtmlNode _node;
    
            public Form1()
            {
                InitializeComponent();
    
                strUrl = "https://apps.huroncountyclerk.com/";
                postData = "";
                cookies = new CookieContainer();
                webRequest = (HttpWebRequest)WebRequest.Create(strUrl);
                webRequest.Method = "GET";
                webRequest.ContentType = "application/x-www-form-urlencoded";
                webRequest.UserAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36";
                webRequest.ContentLength = postData.Length;
                webRequest.CookieContainer = cookies;
                webRequest.Timeout = Timeout.Infinite;
                webRequest.KeepAlive = true;
                response = (HttpWebResponse)webRequest.GetResponse();
                response.Cookies = webRequest.CookieContainer.GetCookies(webRequest.RequestUri);
                responseReader = new StreamReader(response.GetResponseStream(), true);
                responseData = responseReader.ReadToEnd();
                response.Close();
                responseReader.Close();
    
                strUrl = "https://apps.huroncountyclerk.com/recordSearch.php?k=acceptAgreementsearchForm3901";
                postData = "";
                cookies = new CookieContainer();
                webRequest = (HttpWebRequest)WebRequest.Create(strUrl);
                webRequest.Method = "GET";
                webRequest.ContentType = "application/x-www-form-urlencoded";
                webRequest.UserAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36";
                webRequest.ContentLength = postData.Length;
                webRequest.CookieContainer = cookies;
                webRequest.Timeout = Timeout.Infinite;
                webRequest.KeepAlive = true;
                foreach (Cookie ck in response.Cookies)
                    webRequest.CookieContainer.Add(ck);
                webRequest.CookieContainer.Add(new Uri(strUrl), new Cookie("_ga", "GA1.2.1500688136.1565216581"));
                webRequest.CookieContainer.Add(new Uri(strUrl), new Cookie("_gid", "GA1.2.1260386379.1565216581"));
                webRequest.CookieContainer.Add(new Uri(strUrl), new Cookie("_gat", "1"));
                response = (HttpWebResponse)webRequest.GetResponse();
                response.Cookies = webRequest.CookieContainer.GetCookies(webRequest.RequestUri);
                responseReader = new StreamReader(response.GetResponseStream(), true);
                responseData = responseReader.ReadToEnd();
                response.Close();
                responseReader.Close();
    
                _doc = new HtmlAgilityPack.HtmlDocument();
                _doc.LoadHtml(responseData);
                _node = _doc.DocumentNode.SelectSingleNode("//img[@id='captchaImage']");
                            
                pictureBox1.ImageLocation = $"https://apps.huroncountyclerk.com{_node.GetAttributeValue("src", "")}";
            }        
    
            private void button1_Click(object sender, EventArgs e)
            {
                captchID = Regex.Match(responseData, "<input type=\"hidden\" name=\"k\" id=\"k\" value=\"(.+?)\" />").Groups[1].Value;
                var caseNumber = "20190628";
    
                strUrl = "https://apps.huroncountyclerk.com/recordSearch.php";
                postData = $"searchName=&searchHMonth=&searchHDay=&searchHYear=&searchCase={ caseNumber }&searchFMonth=&searchFDay=&searchFYear=&searchAgency%5B%5D=3901&searchCaseType%5B%5D=CA&searchCaseType%5B%5D=CV&searchCaseType%5B%5D=CR&searchCaseType%5B%5D=DR&searchCaseType%5B%5D=JL&searchCaseType%5B%5D=MI&searchBlock=25&captchaResponse={ textBox1.Text }&searchType=mainSearch&k={ captchID }";
                webRequest = (HttpWebRequest)WebRequest.Create(strUrl);
                webRequest.Method = "POST";
                webRequest.Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3";
                webRequest.ContentType = "application/x-www-form-urlencoded";
                webRequest.UserAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36";
                webRequest.ContentLength = postData.Length;
                webRequest.CookieContainer = cookies;
                webRequest.Referer = "https://apps.huroncountyclerk.com/recordSearch.php?k=acceptAgreementsearchForm3901";
                webRequest.Host = "apps.huroncountyclerk.com";
                webRequest.Headers.Set(HttpRequestHeader.CacheControl, "max-age=0");
                webRequest.Headers.Set(HttpRequestHeader.AcceptEncoding, "gzip, deflate, br");
                webRequest.Headers.Set(HttpRequestHeader.AcceptLanguage, "en-US,en;q=0.9");
                webRequest.Timeout = 120 * 1000; //2 minutes
                webRequest.KeepAlive = true;
                webRequest.AllowAutoRedirect = true;
                foreach (Cookie ck in response.Cookies)
                    webRequest.CookieContainer.Add(ck);
                webRequest.CookieContainer.Add(new Uri(strUrl), new Cookie("_ga", "GA1.2.1500688136.1565216581"));
                webRequest.CookieContainer.Add(new Uri(strUrl), new Cookie("_gid", "GA1.2.1260386379.1565216581"));
                webRequest.CookieContainer.Add(new Uri(strUrl), new Cookie("_gat", "1"));
                stOut = new StreamWriter(webRequest.GetRequestStream());
                stOut.Write(postData);
                stOut.Flush();
                stOut.Close();
                stOut = null;
                response = (HttpWebResponse)webRequest.GetResponse();
                responseReader = new StreamReader(response.GetResponseStream(), true);
                responseData = responseReader.ReadToEnd();
                response.Close();
                responseReader.Close();
            }
        }
    }
    

    Thursday, August 8, 2019 4:40 AM

All replies

  • Hi,

    Did you confirm if you can access the captcha by your network? Is it not blocked by some gateways?

    Regards,

    Kyle


    MSDN Community Support
    Please remember to click "Mark as Answer" the responses that resolved your issue, and to click "Unmark as Answer" if not. This can be beneficial to other community members reading this thread. If you have any compliments or complaints to MSDN Support, feel free to contact MSDNFSF@microsoft.com.

    Thursday, August 8, 2019 9:02 AM