Read web page and extract url's


/ Published in: VB.NET
Save to your folder(s)



Copy this code and paste it in your HTML
  1. 'http://www.dreamincode.net/code/snippet1249.htm
  2. Option Explicit Off
  3. Imports System
  4. Imports System.IO
  5. Imports System.Net
  6.  
  7. Public Class Form1
  8.  
  9. Dim t1 As Threading.Thread = Nothing
  10. Dim t2 As Threading.Thread = Nothing
  11. Dim ref() As String
  12.  
  13. Private Sub Btn_Search_Click(ByVal sender As System.Object, ByVal e As System.EventArgs) Handles Btn_Search.Click
  14. t2 = New Threading.Thread(AddressOf search_url)
  15. t2.Start()
  16. End Sub
  17.  
  18. Private Sub Btn_Download_Click(ByVal sender As System.Object, ByVal e As System.EventArgs) Handles Btn_Download.Click
  19. t1 = New Threading.Thread(AddressOf download_url)
  20. t1.Start()
  21. End Sub
  22.  
  23. Public Sub download_url()
  24. System.Windows.Forms.Control.CheckForIllegalCrossThreadCalls = False
  25. Dim webClient As New System.Net.WebClient
  26. Dim i As Integer = 0
  27. Dim url As String
  28. Try
  29. For i = 0 To 9
  30. url = Me.DataGridView1.Item(2, i).Value
  31. webClient.DownloadFile(url, "E:\Balagurunathan\Temp\API\Spider" & i & ".html")
  32. Next
  33. Catch
  34. ''
  35. Finally
  36. t1.Abort()
  37. End Try
  38. MsgBox("Download has been Finished")
  39. End Sub
  40.  
  41. Public Sub search_url()
  42. System.Windows.Forms.Control.CheckForIllegalCrossThreadCalls = False
  43. Try
  44. Dim s As com.google.api.GoogleSearchService = New Sample1.com.google.api.GoogleSearchService()
  45. Dim r As com.google.api.GoogleSearchResult = s.doGoogleSearch("Add your Licence Code here", Txt_Text.Text, 0, 10, False, "", True, "", "", "")
  46. Dim estimatedCount As Integer = r.estimatedTotalResultsCount
  47. Dim dtResults As New DataTable()
  48. dtResults.Columns.Add(New DataColumn("Title", GetType(String)))
  49. dtResults.Columns.Add(New DataColumn("Summary", GetType(String)))
  50. dtResults.Columns.Add(New DataColumn("URL", GetType(String)))
  51. For i As Integer = 0 To 50
  52. Dim dr As DataRow = dtResults.NewRow()
  53. dr(0) = r.resultElements(i).title
  54. dr(1) = r.resultElements(i).snippet
  55. dr(2) = r.resultElements(i).URL
  56. dtResults.Rows.Add(dr)
  57. Next
  58. DataGridView1.DataSource = dtResults
  59. Catch ex As Exception
  60. ''
  61. Finally
  62. t2.Abort()
  63. End Try
  64. End Sub
  65.  
  66. Private Sub Btn_Read_Click(ByVal sender As System.Object, ByVal e As System.EventArgs) Handles Btn_Read.Click
  67. Me.ListBox1.Items.Clear()
  68. For j = 0 To 9
  69. Dim request As WebRequest = WebRequest.Create("E:\Balagurunathan\Temp\API\Spider" & j & ".html")
  70. Dim response As WebResponse = request.GetResponse()
  71. Dim reader As StreamReader = New StreamReader(response.GetResponseStream())
  72. Dim str As String = reader.Read()
  73. Dim myarr() As String = Nothing
  74. Dim myarr1() As String = Nothing
  75. Do
  76. If str.Contains("<a href=") Then
  77. myarr = Split(str, ">", -1, CompareMethod.Text)
  78. GoTo a
  79. End If
  80. b: If reader.EndOfStream = True Then
  81. Exit For
  82. Else
  83. str = reader.ReadLine()
  84. End If
  85. Loop Until reader.EndOfStream = True
  86. Dim i As Integer = 0
  87. a: For i = 0 To myarr.Length - 1
  88. If myarr(i).Contains("<a href=") Then
  89. myarr1 = Split(myarr(i), "=", -1, CompareMethod.Text)
  90. If Me.ListBox1.Items.Contains(myarr1(1).ToString) = False Then
  91. Me.ListBox1.Items.Add(myarr1(1).ToString)
  92. Me.ListBox1.SelectedIndex = Me.ListBox1.SelectedIndex + 1
  93. End If
  94. End If
  95. Next
  96. GoTo b
  97. Next
  98. End Sub
  99. End Class

Report this snippet


Comments

RSS Icon Subscribe to comments

You need to login to post a comment.