class Mechanize::Util

Constants

CODE_DIC

Public Class Methods

build_query_string(parameters, enc=nil) click to toggle source
# File lib/mechanize/util.rb, line 12
def build_query_string(parameters, enc=nil)
  parameters.map { |k,v|
    # WEBrick::HTTP.escape* has some problems about m17n on ruby-1.9.*.
    [CGI.escape(k.to_s), CGI.escape(v.to_s)].join("=") if k
  }.compact.join('&')
end
detect_charset(src) click to toggle source
# File lib/mechanize/util.rb, line 58
def detect_charset(src)
  tmp = NKF.guess(src || "<html></html>")
  if RUBY_VERSION >= "1.9.0"
    enc = tmp.to_s.upcase
  else
    enc = NKF.constants.find{|c|
      NKF.const_get(c) == tmp
    }
    enc = CODE_DIC[enc.intern]
  end
  enc || "ISO-8859-1"
end
from_native_charset(s, code) click to toggle source
# File lib/mechanize/util.rb, line 29
def from_native_charset(s, code)
  return s unless s && code
  return s unless Mechanize.html_parser == Nokogiri::HTML

  if RUBY_VERSION < '1.9.2'
    begin
      Iconv.iconv(code.to_s, "UTF-8", s).join("")
    rescue Iconv::InvalidEncoding, Iconv::IllegalSequence
      s
    end
  else
    s.encode("UTF-8") rescue s
  end
end
html_unescape(s) click to toggle source
# File lib/mechanize/util.rb, line 44
def html_unescape(s)
  return s unless s
  s.gsub(/&(\w+|#[0-9]+);/) { |match|
    number = case match
             when /&(\w+);/
               Mechanize.html_parser::NamedCharacters[$1]
             when /&#([0-9]+);/
               $1.to_i
             end

    number ? ([number].pack('U') rescue match) : match
  }
end
to_native_charset(s, code=nil) click to toggle source
# File lib/mechanize/util.rb, line 19
def to_native_charset(s, code=nil)
  if Mechanize.html_parser == Nokogiri::HTML
    return unless s
    code ||= detect_charset(s)
    Iconv.iconv("UTF-8", code, s).join("")
  else
    s
  end
end