12345678910111213141516171819202122232425262728293031323334353637383940414243 |
- ##
- # $Id$
- ##
-
- ##
- # This file is part of the Metasploit Framework and may be subject to
- # redistribution and commercial restrictions. Please see the Metasploit
- # Framework web site for more information on licensing and terms of use.
- # http://metasploit.com/framework/
- ##
-
- # $Revision$
-
- require 'rubygems'
- require 'pathname'
- require 'nokogiri'
- require 'uri'
-
- class CrawlerSimple < BaseParser
-
- def parse(request,result)
-
- if !result['Content-Type'].include? "text/html"
- return
- end
-
- # doc = Hpricot(result.body.to_s)
- doc = Nokogiri::HTML(result.body.to_s)
- doc.css('a').each do |anchor_tag|
- hr = anchor_tag['href']
- if hr && !hr.match(/^(\#|javascript\:)/)
- begin
- hreq = urltohash('GET', hr, request['uri'], nil)
- insertnewpath(hreq)
- rescue URI::InvalidURIError
- #puts "Parse error"
- #puts "Error: #{link[0]}"
- end
- end
- end
- end
- end
|