Browse Source

Replace hpricot by nokogiri

Sam 5 years ago
parent
commit
8cabc753a9

+ 1
- 0
.gitignore View File

@@ -3,6 +3,7 @@ Gemfile.local
3 3
 Gemfile.local.lock
4 4
 # Rubymine project directory
5 5
 .idea
6
+.ruby-version
6 7
 # Sublime Text project directory (not created by ST by default)
7 8
 .sublime-project
8 9
 # RVM control file, keep this to avoid backdooring Metasploit

+ 1
- 1
.ruby-version View File

@@ -1 +1 @@
1
-1.9.3-p547
1
+1.9.3-p545

+ 13
- 16
data/msfcrawler/basic.rb View File

@@ -13,7 +13,7 @@
13 13
 
14 14
 require 'rubygems'
15 15
 require 'pathname'
16
-require 'hpricot'
16
+require 'nokogiri'
17 17
 require 'uri'
18 18
 
19 19
 class CrawlerSimple < BaseParser
@@ -24,23 +24,20 @@ class CrawlerSimple < BaseParser
24 24
       return
25 25
     end
26 26
 
27
-    doc = Hpricot(result.body.to_s)
28
-    doc.search('a').each do |link|
29
-
30
-    hr = link.attributes['href']
31
-
32
-    if hr and !hr.match(/^(\#|javascript\:)/)
33
-      begin
34
-        hreq = urltohash('GET',hr,request['uri'],nil)
35
-
36
-        insertnewpath(hreq)
37
-
38
-      rescue URI::InvalidURIError
39
-        #puts "Parse error"
40
-        #puts "Error: #{link[0]}"
27
+    # doc = Hpricot(result.body.to_s)
28
+    doc = Nokogiri::HTML(result.body.to_s)
29
+    doc.css('a').each do |anchor_tag|
30
+      hr = anchor_tag['href']
31
+      if hr && !hr.match(/^(\#|javascript\:)/)
32
+        begin
33
+          hreq = urltohash('GET', hr, request['uri'], nil)
34
+          insertnewpath(hreq)
35
+        rescue URI::InvalidURIError
36
+          #puts "Parse error"
37
+          #puts "Error: #{link[0]}"
38
+        end
41 39
       end
42 40
     end
43
-    end
44 41
   end
45 42
 end
46 43
 

+ 11
- 30
data/msfcrawler/forms.rb View File

@@ -13,7 +13,7 @@
13 13
 
14 14
 require 'rubygems'
15 15
 require 'pathname'
16
-require 'hpricot'
16
+require 'nokogiri'
17 17
 require 'uri'
18 18
 
19 19
 class CrawlerForms < BaseParser
@@ -27,49 +27,30 @@ class CrawlerForms < BaseParser
27 27
     hr = ''
28 28
     m = ''
29 29
 
30
-    doc = Hpricot(result.body.to_s)
31
-    doc.search('form').each do |f|
32
-      hr = f.attributes['action']
30
+    doc = Nokogiri::HTML(result.body.to_s)
31
+    doc.css('form').each do |f|
32
+      hr = f['action']
33 33
 
34
-      fname = f.attributes['name']
35
-      if fname.empty?
36
-        fname = "NONE"
37
-      end
38
-
39
-      m = "GET"
40
-      if !f.attributes['method'].empty?
41
-        m = f.attributes['method'].upcase
42
-      end
34
+      fname = f['name']
35
+      fname = "NONE" if fname.empty?
43 36
 
44
-      #puts "Parsing form name: #{fname} (#{m})"
37
+      m = f['method'].empty? ? 'GET' : f['method'].upcase
45 38
 
46
-      htmlform = Hpricot(f.inner_html)
39
+      htmlform = Nokogiri::HTML(f.inner_html)
47 40
 
48 41
       arrdata = []
49 42
 
50
-      htmlform.search('input').each do |p|
51
-        #puts p.attributes['name']
52
-        #puts p.attributes['type']
53
-        #puts p.attributes['value']
54
-
55
-        #raw_request has uri_encoding disabled as it encodes '='.
56
-        arrdata << (p.attributes['name'] + "=" + Rex::Text.uri_encode(p.attributes['value']))
43
+      htmlform.css('input').each do |p|
44
+        arrdata << "#{p['name']}=#{Rex::Text.uri_encode(p['value'])}"
57 45
       end
58 46
 
59 47
       data = arrdata.join("&").to_s
60 48
 
61
-
62 49
       begin
63
-        hreq = urltohash(m,hr,request['uri'],data)
64
-
50
+        hreq = urltohash(m, hr, request['uri'], data)
65 51
         hreq['ctype'] = 'application/x-www-form-urlencoded'
66
-
67 52
         insertnewpath(hreq)
68
-
69
-
70 53
       rescue URI::InvalidURIError
71
-        #puts "Parse error"
72
-        #puts "Error: #{link[0]}"
73 54
       end
74 55
     end
75 56
   end

+ 13
- 17
data/msfcrawler/frames.rb View File

@@ -9,33 +9,29 @@
9 9
 
10 10
 require 'rubygems'
11 11
 require 'pathname'
12
-require 'hpricot'
12
+require 'nokogiri'
13 13
 require 'uri'
14 14
 
15 15
 class CrawlerFrames < BaseParser
16 16
 
17 17
   def parse(request,result)
18 18
 
19
-    if !result['Content-Type'].include? "text/html"
20
-      return
21
-    end
22
-
23
-    doc = Hpricot(result.body.to_s)
24
-    doc.search('iframe').each do |ifra|
25
-
26
-    ir = ifra.attributes['src']
27
-
28
-    if ir and !ir.match(/^(\#|javascript\:)/)
29
-      begin
30
-        hreq = urltohash('GET',ir,request['uri'],nil)
19
+    return unless result['Content-Type'].include?('text/html')
31 20
 
32
-        insertnewpath(hreq)
21
+    doc = Nokogiri::HTML(result.body.to_s)
22
+    doc.css('iframe').each do |ifra|
23
+      ir = ifra['src']
33 24
 
34
-      rescue URI::InvalidURIError
35
-        #puts "Error"
25
+      if ir && !ir.match(/^(\#|javascript\:)/)
26
+        begin
27
+          hreq = urltohash('GET', ir, request['uri'], nil)
28
+          insertnewpath(hreq)
29
+        rescue URI::InvalidURIError
30
+        end
36 31
       end
37
-    end
32
+
38 33
     end
39 34
   end
35
+
40 36
 end
41 37
 

+ 13
- 20
data/msfcrawler/image.rb View File

@@ -10,33 +10,26 @@
10 10
 
11 11
 require 'rubygems'
12 12
 require 'pathname'
13
-require 'hpricot'
13
+require 'nokogiri'
14 14
 require 'uri'
15 15
 
16 16
 class CrawlerImage < BaseParser
17 17
 
18 18
   def parse(request,result)
19 19
 
20
-    if !result['Content-Type'].include? "text/html"
21
-      return
22
-    end
23
-
24
-    doc = Hpricot(result.body.to_s)
25
-    doc.search('img').each do |i|
26
-
27
-    im = i.attributes['src']
28
-
29
-    if im and !im.match(/^(\#|javascript\:)/)
30
-      begin
31
-        hreq = urltohash('GET',im,request['uri'],nil)
32
-
33
-        insertnewpath(hreq)
34
-
35
-      rescue URI::InvalidURIError
36
-        #puts "Parse error"
37
-        #puts "Error: #{i[0]}"
20
+    return unless result['Content-Type'].include?('text/html')
21
+
22
+    doc = Nokogiri::HTML(result.body.to_s)
23
+    doc.css('img').each do |i|
24
+      im = i['src']
25
+      if im && !im.match(/^(\#|javascript\:)/)
26
+        begin
27
+          hreq = urltohash('GET', im, request['uri'], nil)
28
+          insertnewpath(hreq)
29
+        rescue URI::InvalidURIError
30
+        end
38 31
       end
39
-    end
32
+
40 33
     end
41 34
   end
42 35
 end

+ 13
- 21
data/msfcrawler/link.rb View File

@@ -10,33 +10,25 @@
10 10
 
11 11
 require 'rubygems'
12 12
 require 'pathname'
13
-require 'hpricot'
13
+require 'nokogiri'
14 14
 require 'uri'
15 15
 
16 16
 class CrawlerLink < BaseParser
17 17
 
18 18
   def parse(request,result)
19
-
20
-    if !result['Content-Type'].include? "text/html"
21
-      return
22
-    end
23
-
24
-    doc = Hpricot(result.body.to_s)
25
-    doc.search('link').each do |link|
26
-
27
-    hr = link.attributes['href']
28
-
29
-    if hr and !hr.match(/^(\#|javascript\:)/)
30
-      begin
31
-        hreq = urltohash('GET',hr,request['uri'],nil)
32
-
33
-        insertnewpath(hreq)
34
-
35
-      rescue URI::InvalidURIError
36
-        #puts "Parse error"
37
-        #puts "Error: #{link[0]}"
19
+    return unless result['Content-Type'].include?('text/html')
20
+
21
+    doc = Nokogiri::HTML(result.body.to_s)
22
+    doc.css('link').each do |link|
23
+      hr = link['href']
24
+      if hr && !hr.match(/^(\#|javascript\:)/)
25
+        begin
26
+          hreq = urltohash('GET', hr, request['uri'], nil)
27
+          insertnewpath(hreq)
28
+        rescue URI::InvalidURIError
29
+        end
38 30
       end
39
-    end
31
+
40 32
     end
41 33
   end
42 34
 end

+ 6
- 17
data/msfcrawler/objects.rb View File

@@ -13,36 +13,25 @@
13 13
 
14 14
 require 'rubygems'
15 15
 require 'pathname'
16
-require 'hpricot'
16
+require 'nokogiri'
17 17
 require 'uri'
18 18
 
19 19
 class CrawlerObjects < BaseParser
20 20
 
21 21
   def parse(request,result)
22
-
23
-    if !result['Content-Type'].include? "text/html"
24
-      return
25
-    end
26
-
22
+    return unless result['Content-Type'].include?('text/html') # TOOD: use MIXIN
27 23
     hr = ''
28 24
     m = ''
29
-
30
-    doc = Hpricot(result.body.to_s)
31
-    doc.search("//object/embed").each do |obj|
32
-
25
+    doc = Nokogiri::HTML(result.body.to_s)
26
+    doc.xpath("//object/embed").each do |obj|
33 27
       s = obj['src']
34
-
35 28
       begin
36
-        hreq = urltohash('GET',s,request['uri'],nil)
37
-
29
+        hreq = urltohash('GET', s, request['uri'], nil)
38 30
         insertnewpath(hreq)
39
-
40
-
41 31
       rescue URI::InvalidURIError
42
-        #puts "Parse error"
43
-        #puts "Error: #{link[0]}"
44 32
       end
45 33
     end
46 34
   end
35
+
47 36
 end
48 37
 

+ 7
- 16
data/msfcrawler/scripts.rb View File

@@ -13,36 +13,27 @@
13 13
 
14 14
 require 'rubygems'
15 15
 require 'pathname'
16
-require 'hpricot'
16
+require 'nokogiri'
17 17
 require 'uri'
18 18
 
19 19
 class CrawlerScripts < BaseParser
20 20
 
21 21
   def parse(request,result)
22
-
23
-    if !result['Content-Type'].include? "text/html"
24
-      return
25
-    end
22
+    return unless result['Content-Type'].include? "text/html"
26 23
 
27 24
     hr = ''
28 25
     m = ''
29
-
30
-    doc = Hpricot(result.body.to_s)
31
-    doc.search("//script").each do |obj|
32
-
26
+    doc = Nokogiri::HTML(result.body.to_s)
27
+    doc.xpath("//script").each do |obj|
33 28
       s = obj['src']
34
-
35 29
       begin
36
-        hreq = urltohash('GET',s,request['uri'],nil)
37
-
30
+        hreq = urltohash('GET', s, request['uri'], nil)
38 31
         insertnewpath(hreq)
39
-
40
-
41 32
       rescue URI::InvalidURIError
42
-        #puts "Parse error"
43
-        #puts "Error: #{link[0]}"
44 33
       end
45 34
     end
35
+
46 36
   end
37
+
47 38
 end
48 39
 

Loading…
Cancel
Save