class Mechanize::Page
This class encapsulates an HTML page. If Mechanize finds a content type of 'text/html', this class will be instantiated and returned.
Example:
require 'mechanize' agent = Mechanize.new agent.get('http://google.com/').class # => Mechanize::Page
Constants
- DEFAULT_RESPONSE
Attributes
Possible encodings for this page based on HTTP headers and meta elements
Public Class Methods
# File lib/mechanize/page.rb, line 574 def charset content_type charset = content_type[/;(?:\s*,)?\s*charset\s*=\s*([^()<>@,;:\\"\/\[\]?={}\s]+)/i, 1] return nil if charset == 'none' charset end
Retrieves all charsets from meta
tags in body
# File lib/mechanize/page.rb, line 596 def self.meta_charset body # HACK use .map body.scan(/<meta .*?>/i).map do |meta| if meta =~ /charset\s*=\s*(["'])?\s*(.+)\s*\1/i then $2 elsif meta =~ /http-equiv\s*=\s*(["'])?content-type\1/i then meta =~ /content\s*=\s*(["'])?(.*?)\1/i m_charset = charset $2 if $2 m_charset if m_charset end end.compact end
Retrieves the last content-type
set by a meta
tag
in body
# File lib/mechanize/page.rb, line 614 def self.meta_content_type body body.scan(/<meta .*?>/i).reverse.map do |meta| if meta =~ /http-equiv\s*=\s*(["'])?content-type\1/i then meta =~ /content=(["'])?(.*?)\1/i return $2 end end nil end
# File lib/mechanize/page.rb, line 27 def initialize(uri=nil, response=nil, body=nil, code=nil, mech=nil) response ||= DEFAULT_RESPONSE @meta_content_type = nil @encoding = nil @encodings = [nil] raise 'no' if mech and not Mechanize === mech @mech = mech reset @encodings << Mechanize::Util.detect_charset(body) if body @encodings.concat self.class.response_header_charset(response) if body # Force the encoding to be 8BIT so we can perform regular expressions. # We'll set it to the detected encoding later body.force_encoding(Encoding::ASCII_8BIT) @encodings.concat self.class.meta_charset body meta_content_type = self.class.meta_content_type body @meta_content_type = meta_content_type if meta_content_type end @encodings << mech.default_encoding if mech and mech.default_encoding super uri, response, body, code end
# File lib/mechanize/page.rb, line 583 def self.response_header_charset response charsets = [] response.each do |header, value| next unless header == 'content-type' next unless value =~ /charset/i charsets << charset(value) end charsets end
Public Instance Methods
Shorthand for parser.at
.
See also Nokogiri::XML::Node#at for details.
# File lib/mechanize/page.rb, line 222
Shorthand for parser.at_css
.
See also Nokogiri::XML::Node#at_css for details.
# File lib/mechanize/page.rb, line 229
Shorthand for parser.at_xpath
.
See also Nokogiri::XML::Node#at_xpath for details.
# File lib/mechanize/page.rb, line 236 def_delegators :parser, :search, :css, :xpath, :at, :at_css, :at_xpath
Find a single base tag matching criteria
. See
forms_with
for details of criteria
, where for
“form(s)” read “base tag(s)”.
Example:
page.base_with(href: /foo/).click
# File lib/mechanize/page.rb, line 353
Same as base_with
but raises an ElementNotFoundError if no
button matches criteria
# File lib/mechanize/page.rb, line 363
Return a list of all base tags
# File lib/mechanize/page.rb, line 525 def bases @bases ||= search('base').map { |node| Base.new(node, @mech, self) } end
Find all base tags matching criteria
. See
forms_with
for details of criteria
, where for
“form(s)” read “base tag(s)”.
Example:
page.bases_with(href: /foo/).each do |base| puts base.href end
# File lib/mechanize/page.rb, line 376 elements_with :base
Return the canonical URI for the page if there is a link tag with href=“canonical”.
# File lib/mechanize/page.rb, line 179 def canonical_uri link = at('link[@rel="canonical"][@href]') return unless link href = link['href'] URI href rescue URI::InvalidURIError URI Mechanize::Util.uri_escape href end
Get the content type
# File lib/mechanize/page.rb, line 190 def content_type @meta_content_type || response['content-type'] end
Shorthand for parser.css
.
See also Nokogiri::XML::Node#css for details.
# File lib/mechanize/page.rb, line 208
# File lib/mechanize/page.rb, line 74 def detected_encoding Mechanize::Util.detect_charset(body) end
# File lib/mechanize/page.rb, line 94 def encoding parser.encoding rescue NoMethodError nil end
# File lib/mechanize/page.rb, line 78 def encoding=(encoding) reset @encoding = encoding if @parser parser_encoding = @parser.encoding if parser_encoding && encoding && parser_encoding.casecmp(encoding) != 0 # lazy reinitialize the parser with the new encoding @parser = nil end end encoding end
Return whether parser result has errors related to encoding or not. false indicates just parser has no encoding errors, not encoding is vaild.
# File lib/mechanize/page.rb, line 102 def encoding_error?(parser=nil) parser = self.parser unless parser return false if parser.errors.empty? parser.errors.any? do |error| error.message =~ /(indicate\ encoding)| (Invalid\ char)| (input\ conversion\ failed)/x end end
Find a single form matching criteria
. See
forms_with
for details of criteria
.
Examples:
page.form_with(action: '/post/login.php') do |f| ... end
# File lib/mechanize/page.rb, line 256
Same as form_with
but raises an ElementNotFoundError if no
button matches criteria
# File lib/mechanize/page.rb, line 266
Return a list of all form tags
# File lib/mechanize/page.rb, line 504 def forms @forms ||= search('form').map do |html_form| form = Mechanize::Form.new(html_form, @mech, self) form.action ||= @uri.to_s form end end
Find all forms form matching criteria. If a string is given, it is taken as a name attribute value. If a hash is given, forms are narrowed by the key-value pairs as follows.
:id, :dom_id: selects forms with a dom_id value that matches this value.
:class, :dom_class: selects forms with a dom_class value that matches this value.
:search: only selects forms matching this selector expression.
:xpath: only selects forms matching this XPath expression.
:css: only selects forms matching this CSS selector expression.
:action, :method, etc.: narrows forms by a given attribute value using the === operator.
Example:
page.forms_with(css: '#content table.login_box form', method: /\APOST\z/i, ).each do |f| ... end
# File lib/mechanize/page.rb, line 299 elements_with :form
Find a single frame tag matching criteria
. See
forms_with
for details of criteria
, where for
“form(s)” read “frame tag(s)”.
Example:
page.frame_with(src: /foo/).click
# File lib/mechanize/page.rb, line 391
Same as frame_with
but raises an ElementNotFoundError if no
button matches criteria
# File lib/mechanize/page.rb, line 401
Return a list of all frame tags
# File lib/mechanize/page.rb, line 532 def frames @frames ||= search('frame').map { |node| Frame.new(node, @mech, self) } end
Find all frame tags matching criteria
. See
forms_with
for details of criteria
, where for
“form(s)” read “frame tag(s)”.
Example:
page.frames_with(src: /foo/).each do |frame| p frame.src end
# File lib/mechanize/page.rb, line 414 elements_with :frame
Find a single iframe tag matching criteria
. See
forms_with
for details of criteria
, where for
“form(s)” read “iframe tag(s)”.
Example:
page.iframe_with(src: /foo/).click
# File lib/mechanize/page.rb, line 429
Same as iframe_with
but raises an ElementNotFoundError if no
button matches criteria
# File lib/mechanize/page.rb, line 439
Return a list of all iframe tags
# File lib/mechanize/page.rb, line 539 def iframes @iframes ||= search('iframe').map { |node| Frame.new(node, @mech, self) } end
Find all iframe tags matching criteria
. See
forms_with
for details of criteria
, where for
“form(s)” read “iframe tag(s)”.
Example:
page.iframes_with(src: /foo/).each do |iframe| p iframe.src end
# File lib/mechanize/page.rb, line 452 elements_with :iframe
# File lib/mechanize/page.rb, line 551 def image_urls @image_urls ||= images.map(&:url).uniq end
Find a single image matching criteria
. See
forms_with
for details of criteria
, where for
“form(s)” read “image(s)”.
Example:
page.image_with(alt: /main/).fetch.save
# File lib/mechanize/page.rb, line 467
Same as image_with
but raises an ElementNotFoundError if no
button matches criteria
# File lib/mechanize/page.rb, line 477
Return a list of all img tags
# File lib/mechanize/page.rb, line 546 def images @images ||= search('img').map { |node| Image.new(node, self) } end
Find all images matching criteria
. See
forms_with
for details of criteria
, where for
“form(s)” read “image(s)”.
Example:
page.images_with(src: /jpg\Z/).each do |img| img.fetch.save end
# File lib/mechanize/page.rb, line 490 elements_with :image
Return a list of all label tags
# File lib/mechanize/page.rb, line 557 def labels @labels ||= search('label').map { |node| Label.new(node, self) } end
# File lib/mechanize/page.rb, line 562 def labels_hash unless @labels_hash hash = {} labels.each do |label| hash[label.node['for']] = label if label.for end @labels_hash = hash end return @labels_hash end
Find a single link matching criteria
. See
forms_with
for details of criteria
, where for
“form(s)” read “link(s)”.
Example:
page.link_with(href: /foo/).click
# File lib/mechanize/page.rb, line 314
Same as link_with
but raises an ElementNotFoundError if no
button matches criteria
# File lib/mechanize/page.rb, line 324
Return a list of all link and area tags
# File lib/mechanize/page.rb, line 494 def links @links ||= %w{ a area }.map do |tag| search(tag).map do |node| Link.new(node, @mech, self) end end.flatten end
Find all links matching criteria
. See forms_with
for details of criteria
, where for “form(s)” read “link(s)”.
Example:
page.links_with(href: /foo/).each do |link| puts link.href end
# File lib/mechanize/page.rb, line 338 elements_with :link
# File lib/mechanize/page.rb, line 70 def meta_charset self.class.meta_charset(body) end
Return a list of all meta refresh elements
# File lib/mechanize/page.rb, line 515 def meta_refresh query = @mech.follow_meta_refresh == :anywhere ? 'meta' : 'head > meta' @meta_refresh ||= search(query).map do |node| MetaRefresh.from_node node, self end.compact end
# File lib/mechanize/page.rb, line 112 def parser return @parser if @parser return nil unless @body if @encoding then @parser = @mech.html_parser.parse html_body, nil, @encoding elsif mech.force_default_encoding then @parser = @mech.html_parser.parse html_body, nil, @mech.default_encoding else @encodings.reverse_each do |encoding| @parser = @mech.html_parser.parse html_body, nil, encoding break unless encoding_error? @parser end end @parser end
# File lib/mechanize/page.rb, line 164 def reset @bases = nil @forms = nil @frames = nil @iframes = nil @links = nil @labels = nil @labels_hash = nil @meta_refresh = nil @parser = nil @title = nil end
# File lib/mechanize/page.rb, line 66 def response_header_charset self.class.response_header_charset(response) end
Shorthand for parser.search
.
See Nokogiri::XML::Node#search for details.
# File lib/mechanize/page.rb, line 201
# File lib/mechanize/page.rb, line 58 def title @title ||= if doc = parser title = doc.search('title').inner_text title.empty? ? nil : title end end
Shorthand for parser.xpath
.
See also Nokogiri::XML::Node#xpath for details.
# File lib/mechanize/page.rb, line 215