Revision: 50500
Initial Code
Initial URL
Initial Description
Initial Title
Initial Tags
Initial Language
at August 25, 2011 00:15 by danfsmith
Initial Code
param ($path, $urlpath) add-type -Path f:\dan\tools\html-agility-pack\HtmlAgilityPack.dll $files = Get-ChildItem -Include *.htm,*.aspx -Path $path -Recurse $doc = New-Object HtmlAgilityPack.HtmlDocument $result = $files | % { Write-Host "Checking $_" $name = $_.FullName.Replace($path,$urlpath).Replace("\", "/") $htmldoc = $doc.Load($_.FullName) $linknodes = $doc.DocumentNode.SelectNodes("//a") if ($linknodes) { foreach ($node in $linknodes) { if ($node.GetAttributeValue("href", "").ToLower().Contains("pdf")) { Write-Host "Found" $node.GetAttributeValue("href", "") $pdflink = $node.GetAttributeValue("href", "") $line = $node.Line New-Object PsObject -Property @{PdfLink = $pdflink; FileName = $name; LineNumber = $line;} } } } } $result | Sort PdfLink
Initial URL
Initial Description
Initial Title
Get All PDF links from HTML files
Initial Tags
html
Initial Language
Windows PowerShell