class Mechanize::Page

This class encapsulates an HTML page. If Mechanize finds a content type of 'text/html', this class will be instantiated and returned.

Example:

require 'mechanize'

agent = Mechanize.new
agent.get('http://google.com/').class # => Mechanize::Page

Constants

DEFAULT_RESPONSE

Attributes

encodings[R]

Possible encodings for this page based on HTTP headers and meta elements

mech[RW]

Public Class Methods

charset(content_type) click to toggle source
# File lib/mechanize/page.rb, line 574
def charset content_type
  charset = content_type[/;(?:\s*,)?\s*charset\s*=\s*([^()<>@,;:\\"\/\[\]?={}\s]+)/i, 1]
  return nil if charset == 'none'
  charset
end
Also aliased as: charset_from_content_type
charset_from_content_type(content_type)
Alias for: charset
meta_charset(body) click to toggle source

Retrieves all charsets from meta tags in body

# File lib/mechanize/page.rb, line 596
def self.meta_charset body
  # HACK use .map
  body.scan(/<meta .*?>/i).map do |meta|
    if meta =~ /charset\s*=\s*(["'])?\s*(.+)\s*\1/i then
      $2
    elsif meta =~ /http-equiv\s*=\s*(["'])?content-type\1/i then
      meta =~ /content\s*=\s*(["'])?(.*?)\1/i

      m_charset = charset $2 if $2

      m_charset if m_charset
    end
  end.compact
end
meta_content_type(body) click to toggle source

Retrieves the last content-type set by a meta tag in body

# File lib/mechanize/page.rb, line 614
def self.meta_content_type body
  body.scan(/<meta .*?>/i).reverse.map do |meta|
    if meta =~ /http-equiv\s*=\s*(["'])?content-type\1/i then
      meta =~ /content=(["'])?(.*?)\1/i

      return $2
    end
  end

  nil
end
new(uri=nil, response=nil, body=nil, code=nil, mech=nil) click to toggle source
Calls superclass method Mechanize::File.new
# File lib/mechanize/page.rb, line 27
def initialize(uri=nil, response=nil, body=nil, code=nil, mech=nil)
  response ||= DEFAULT_RESPONSE

  @meta_content_type = nil
  @encoding = nil
  @encodings = [nil]
  raise 'no' if mech and not Mechanize === mech
  @mech = mech

  reset

  @encodings << Mechanize::Util.detect_charset(body) if body

  @encodings.concat self.class.response_header_charset(response)

  if body
    # Force the encoding to be 8BIT so we can perform regular expressions.
    # We'll set it to the detected encoding later
    body.force_encoding(Encoding::ASCII_8BIT)

    @encodings.concat self.class.meta_charset body

    meta_content_type = self.class.meta_content_type body
    @meta_content_type = meta_content_type if meta_content_type
  end

  @encodings << mech.default_encoding if mech and mech.default_encoding

  super uri, response, body, code
end
response_header_charset(response) click to toggle source
# File lib/mechanize/page.rb, line 583
def self.response_header_charset response
  charsets = []
  response.each do |header, value|
    next unless header == 'content-type'
    next unless value =~ /charset/i
    charsets << charset(value)
  end
  charsets
end

Public Instance Methods

%()
Alias for: at
/()
Alias for: search
at() click to toggle source

Shorthand for parser.at.

See also Nokogiri::XML::Node#at for details.

# File lib/mechanize/page.rb, line 222
  
Also aliased as: %
at_css() click to toggle source

Shorthand for parser.at_css.

See also Nokogiri::XML::Node#at_css for details.

# File lib/mechanize/page.rb, line 229
  
at_xpath() click to toggle source

Shorthand for parser.at_xpath.

See also Nokogiri::XML::Node#at_xpath for details.

# File lib/mechanize/page.rb, line 236
def_delegators :parser, :search, :css, :xpath, :at, :at_css, :at_xpath
base_with(criteria) click to toggle source
base_with(criteria) { |base| ... }

Find a single base tag matching criteria. See forms_with for details of criteria, where for “form(s)” read “base tag(s)”.

Example:

page.base_with(href: /foo/).click
# File lib/mechanize/page.rb, line 353
  
base_with!(criteria) click to toggle source
base_with!(criteria) { |base| ... }

Same as base_with but raises an ElementNotFoundError if no button matches criteria

# File lib/mechanize/page.rb, line 363
  
bases() click to toggle source

Return a list of all base tags

# File lib/mechanize/page.rb, line 525
def bases
  @bases ||=
    search('base').map { |node| Base.new(node, @mech, self) }
end
bases_with(criteria) click to toggle source

Find all base tags matching criteria. See forms_with for details of criteria, where for “form(s)” read “base tag(s)”.

Example:

page.bases_with(href: /foo/).each do |base|
  puts base.href
end
# File lib/mechanize/page.rb, line 376
elements_with :base
canonical_uri() click to toggle source

Return the canonical URI for the page if there is a link tag with href=“canonical”.

# File lib/mechanize/page.rb, line 179
def canonical_uri
  link = at('link[@rel="canonical"][@href]')
  return unless link
  href = link['href']

  URI href
rescue URI::InvalidURIError
  URI Mechanize::Util.uri_escape href
end
content_type() click to toggle source

Get the content type

# File lib/mechanize/page.rb, line 190
def content_type
  @meta_content_type || response['content-type']
end
css() click to toggle source

Shorthand for parser.css.

See also Nokogiri::XML::Node#css for details.

# File lib/mechanize/page.rb, line 208
  
detected_encoding() click to toggle source
# File lib/mechanize/page.rb, line 74
def detected_encoding
  Mechanize::Util.detect_charset(body)
end
encoding() click to toggle source
# File lib/mechanize/page.rb, line 94
def encoding
  parser.encoding
rescue NoMethodError
  nil
end
encoding=(encoding) click to toggle source
# File lib/mechanize/page.rb, line 78
def encoding=(encoding)
  reset

  @encoding = encoding

  if @parser
    parser_encoding = @parser.encoding
    if parser_encoding && encoding && parser_encoding.casecmp(encoding) != 0
      # lazy reinitialize the parser with the new encoding
      @parser = nil
    end
  end

  encoding
end
encoding_error?(parser=nil) click to toggle source

Return whether parser result has errors related to encoding or not. false indicates just parser has no encoding errors, not encoding is vaild.

# File lib/mechanize/page.rb, line 102
def encoding_error?(parser=nil)
  parser = self.parser unless parser
  return false if parser.errors.empty?
  parser.errors.any? do |error|
    error.message =~ /(indicate\ encoding)|
                      (Invalid\ char)|
                      (input\ conversion\ failed)/x
  end
end
form_with(criteria) click to toggle source
form_with(criteria) { |form| ... }

Find a single form matching criteria. See forms_with for details of criteria.

Examples:

page.form_with(action: '/post/login.php') do |f|
  ...
end
# File lib/mechanize/page.rb, line 256
  
form_with!(criteria) click to toggle source
form_with!(criteria) { |form| ... }

Same as form_with but raises an ElementNotFoundError if no button matches criteria

# File lib/mechanize/page.rb, line 266
  
forms() click to toggle source

Return a list of all form tags

# File lib/mechanize/page.rb, line 504
def forms
  @forms ||= search('form').map do |html_form|
    form = Mechanize::Form.new(html_form, @mech, self)
    form.action ||= @uri.to_s
    form
  end
end
forms_with(name) click to toggle source
forms_with(name: name_matcher, id: id_matcher, class: class_matcher,
search: search_expression, xpath: xpath_expression, css: css_expression,
action: action_matcher, ...)

Find all forms form matching criteria. If a string is given, it is taken as a name attribute value. If a hash is given, forms are narrowed by the key-value pairs as follows.

:id, :dom_id: selects forms with a dom_id value that matches this value.

:class, :dom_class: selects forms with a dom_class value that matches this value.

:search: only selects forms matching this selector expression.

:xpath: only selects forms matching this XPath expression.

:css: only selects forms matching this CSS selector expression.

:action, :method, etc.: narrows forms by a given attribute value using the === operator.

Example:

page.forms_with(css: '#content table.login_box form', method: /\APOST\z/i, ).each do |f|
  ...
end
# File lib/mechanize/page.rb, line 299
elements_with :form
frame_with(criteria) click to toggle source
frame_with(criteria) { |frame| ... }

Find a single frame tag matching criteria. See forms_with for details of criteria, where for “form(s)” read “frame tag(s)”.

Example:

page.frame_with(src: /foo/).click
# File lib/mechanize/page.rb, line 391
  
frame_with!(criteria) click to toggle source
frame_with!(criteria) { |frame| ... }

Same as frame_with but raises an ElementNotFoundError if no button matches criteria

# File lib/mechanize/page.rb, line 401
  
frames() click to toggle source

Return a list of all frame tags

# File lib/mechanize/page.rb, line 532
def frames
  @frames ||=
    search('frame').map { |node| Frame.new(node, @mech, self) }
end
frames_with(criteria) click to toggle source

Find all frame tags matching criteria. See forms_with for details of criteria, where for “form(s)” read “frame tag(s)”.

Example:

page.frames_with(src: /foo/).each do |frame|
  p frame.src
end
# File lib/mechanize/page.rb, line 414
elements_with :frame
iframe_with(criteria) click to toggle source
iframe_with(criteria) { |iframe| ... }

Find a single iframe tag matching criteria. See forms_with for details of criteria, where for “form(s)” read “iframe tag(s)”.

Example:

page.iframe_with(src: /foo/).click
# File lib/mechanize/page.rb, line 429
  
iframe_with!(criteria) click to toggle source
iframe_with!(criteria) { |iframe| ... }

Same as iframe_with but raises an ElementNotFoundError if no button matches criteria

# File lib/mechanize/page.rb, line 439
  
iframes() click to toggle source

Return a list of all iframe tags

# File lib/mechanize/page.rb, line 539
def iframes
  @iframes ||=
    search('iframe').map { |node| Frame.new(node, @mech, self) }
end
iframes_with(criteria) click to toggle source

Find all iframe tags matching criteria. See forms_with for details of criteria, where for “form(s)” read “iframe tag(s)”.

Example:

page.iframes_with(src: /foo/).each do |iframe|
  p iframe.src
end
# File lib/mechanize/page.rb, line 452
elements_with :iframe
image_urls() click to toggle source
# File lib/mechanize/page.rb, line 551
def image_urls
  @image_urls ||= images.map(&:url).uniq
end
image_with(criteria) click to toggle source
image_with(criteria) { |image| ... }

Find a single image matching criteria. See forms_with for details of criteria, where for “form(s)” read “image(s)”.

Example:

page.image_with(alt: /main/).fetch.save
# File lib/mechanize/page.rb, line 467
  
image_with!(criteria) click to toggle source
image_with!(criteria) { |image| ... }

Same as image_with but raises an ElementNotFoundError if no button matches criteria

# File lib/mechanize/page.rb, line 477
  
images() click to toggle source

Return a list of all img tags

# File lib/mechanize/page.rb, line 546
def images
  @images ||=
    search('img').map { |node| Image.new(node, self) }
end
images_with(criteria) click to toggle source

Find all images matching criteria. See forms_with for details of criteria, where for “form(s)” read “image(s)”.

Example:

page.images_with(src: /jpg\Z/).each do |img|
  img.fetch.save
end
# File lib/mechanize/page.rb, line 490
elements_with :image
labels() click to toggle source

Return a list of all label tags

# File lib/mechanize/page.rb, line 557
def labels
  @labels ||=
    search('label').map { |node| Label.new(node, self) }
end
labels_hash() click to toggle source
# File lib/mechanize/page.rb, line 562
def labels_hash
  unless @labels_hash
    hash = {}
    labels.each do |label|
      hash[label.node['for']] = label if label.for
    end
    @labels_hash = hash
  end
  return @labels_hash
end
meta_charset() click to toggle source
# File lib/mechanize/page.rb, line 70
def meta_charset
  self.class.meta_charset(body)
end
meta_refresh() click to toggle source

Return a list of all meta refresh elements

# File lib/mechanize/page.rb, line 515
def meta_refresh
  query = @mech.follow_meta_refresh == :anywhere ? 'meta' : 'head > meta'

  @meta_refresh ||= search(query).map do |node|
    MetaRefresh.from_node node, self
  end.compact
end
parser() click to toggle source
# File lib/mechanize/page.rb, line 112
def parser
  return @parser if @parser
  return nil unless @body

  if @encoding then
    @parser = @mech.html_parser.parse html_body, nil, @encoding
  elsif mech.force_default_encoding then
    @parser = @mech.html_parser.parse html_body, nil, @mech.default_encoding
  else
    @encodings.reverse_each do |encoding|
      @parser = @mech.html_parser.parse html_body, nil, encoding

      break unless encoding_error? @parser
    end
  end

  @parser
end
Also aliased as: root
reset() click to toggle source
# File lib/mechanize/page.rb, line 164
def reset
  @bases = nil
  @forms = nil
  @frames = nil
  @iframes = nil
  @links = nil
  @labels = nil
  @labels_hash = nil
  @meta_refresh = nil
  @parser = nil
  @title = nil
end
response_header_charset() click to toggle source
# File lib/mechanize/page.rb, line 66
def response_header_charset
  self.class.response_header_charset(response)
end
root()
Alias for: parser
title() click to toggle source
# File lib/mechanize/page.rb, line 58
def title
  @title ||=
    if doc = parser
      title = doc.search('title').inner_text
      title.empty? ? nil : title
    end
end
xpath() click to toggle source

Shorthand for parser.xpath.

See also Nokogiri::XML::Node#xpath for details.

# File lib/mechanize/page.rb, line 215