Class: PublicStorage::Crawler
- Inherits:
-
Object
- Object
- PublicStorage::Crawler
- Defined in:
- lib/publicstorage/crawler.rb
Overview
Used to fetch and parse either HTML or XML via a URL.
Constant Summary collapse
- HOST =
'https://www.publicstorage.com'
Class Method Summary collapse
Instance Method Summary collapse
- #connection ⇒ HTTP::Client
- #fetch(url:) ⇒ HTTP::Response
- #html(url:) ⇒ Nokogiri::XML::Document
- #xml(url:) ⇒ Nokogiri::XML::Document
Class Method Details
.html(url:) ⇒ Nokogiri::HTML::Document
11 12 13 |
# File 'lib/publicstorage/crawler.rb', line 11 def self.html(url:) new.html(url:) end |
.xml(url:) ⇒ Nokogiri::XML::Document
18 19 20 |
# File 'lib/publicstorage/crawler.rb', line 18 def self.xml(url:) new.xml(url:) end |
Instance Method Details
#connection ⇒ HTTP::Client
23 24 25 26 27 28 29 30 31 32 |
# File 'lib/publicstorage/crawler.rb', line 23 def connection @connection ||= begin config = PublicStorage.config connection = HTTP.persistent(HOST) connection = connection.headers('User-Agent' => config.user_agent) if config.user_agent connection = connection.timeout(config.timeout) if config.timeout connection end end |
#fetch(url:) ⇒ HTTP::Response
36 37 38 39 40 41 |
# File 'lib/publicstorage/crawler.rb', line 36 def fetch(url:) response = connection.get(url) raise FetchError.new(url:, response: response.flush) unless response.status.ok? response end |
#html(url:) ⇒ Nokogiri::XML::Document
46 47 48 |
# File 'lib/publicstorage/crawler.rb', line 46 def html(url:) Nokogiri::HTML(String(fetch(url:).body)) end |
#xml(url:) ⇒ Nokogiri::XML::Document
53 54 55 |
# File 'lib/publicstorage/crawler.rb', line 53 def xml(url:) Nokogiri::XML(String(fetch(url:).body)) end |