Class: PublicStorage::Crawler

Inherits:
Object
  • Object
show all
Defined in:
lib/publicstorage/crawler.rb

Overview

Used to fetch and parse either HTML or XML via a URL.

Constant Summary collapse

HOST =
'https://www.publicstorage.com'

Class Method Summary collapse

Instance Method Summary collapse

Class Method Details

.html(url:) ⇒ Nokogiri::HTML::Document

Parameters:

  • url (String)

Returns:

  • (Nokogiri::HTML::Document)

Raises:



11
12
13
# File 'lib/publicstorage/crawler.rb', line 11

def self.html(url:)
  new.html(url:)
end

.xml(url:) ⇒ Nokogiri::XML::Document

Parameters:

  • url (String)

Returns:

  • (Nokogiri::XML::Document)

Raises:



18
19
20
# File 'lib/publicstorage/crawler.rb', line 18

def self.xml(url:)
  new.xml(url:)
end

Instance Method Details

#connectionHTTP::Client

Returns:

  • (HTTP::Client)


23
24
25
26
27
28
29
30
31
32
# File 'lib/publicstorage/crawler.rb', line 23

def connection
  @connection ||= begin
    config = PublicStorage.config

    connection = HTTP.persistent(HOST)
    connection = connection.headers('User-Agent' => config.user_agent) if config.user_agent
    connection = connection.timeout(config.timeout) if config.timeout
    connection
  end
end

#fetch(url:) ⇒ HTTP::Response

Parameters:

  • url (String)

Returns:

  • (HTTP::Response)

Raises:



36
37
38
39
40
41
# File 'lib/publicstorage/crawler.rb', line 36

def fetch(url:)
  response = connection.get(url)
  raise FetchError.new(url:, response: response.flush) unless response.status.ok?

  response
end

#html(url:) ⇒ Nokogiri::XML::Document

Parameters:

  • url (String)

Returns:

  • (Nokogiri::XML::Document)

Raises:



46
47
48
# File 'lib/publicstorage/crawler.rb', line 46

def html(url:)
  Nokogiri::HTML(String(fetch(url:).body))
end

#xml(url:) ⇒ Nokogiri::XML::Document

Parameters:

  • url (String)

Returns:

  • (Nokogiri::XML::Document)

Raises:



53
54
55
# File 'lib/publicstorage/crawler.rb', line 53

def xml(url:)
  Nokogiri::XML(String(fetch(url:).body))
end