module Hpricot::Traverse
Public Class Methods
# File lib/hpricot/elements.rb, line 375 def self.filter(tok, &blk) define_method("filter[#{tok.is_a?(String) ? tok : tok.inspect}]", &blk) end
Public Instance Methods
Adds elements immediately after this element, contained in the
html
string.
# File lib/hpricot/traverse.rb, line 121 def after(html = nil, &blk) parent.insert_after(make(html, &blk), self) end
Find the first matching node for the CSS or XPath expr
string.
# File lib/hpricot/traverse.rb, line 341 def at(expr) search(expr).first end
Adds elements immediately before this element, contained in the
html
string.
# File lib/hpricot/traverse.rb, line 126 def before(html = nil, &blk) parent.insert_before(make(html, &blk), self) end
Is this object a stranded end tag?
# File lib/hpricot/traverse.rb, line 21 def bogusetag?() BogusETag::Trav === self end
Find children of a given tag_name
.
ele.children_of_type('p') #=> [...array of paragraphs...]
# File lib/hpricot/traverse.rb, line 390 def children_of_type(tag_name) if respond_to? :children children.find_all do |x| x.respond_to?(:pathname) && x.pathname == tag_name end end end
# File lib/hpricot/traverse.rb, line 203 def clean_path(path) path.gsub(/^\s+|\s+$/, '') end
Is this object a comment?
# File lib/hpricot/traverse.rb, line 19 def comment?() Comment::Trav === self end
Builds a unique CSS string for this node, from the root of the document containing it.
# File lib/hpricot/traverse.rb, line 226 def css_path if elem? and has_attribute? 'id' "##{get_attribute('id')}" else sim, i, id = 0, 0, 0 parent.children.each do |e| id = sim if e == self sim += 1 if e.pathname == self.pathname end if parent.children p = parent.css_path p = p ? "#{p} > #{self.pathname}" : self.pathname p += ":nth(#{id})" if sim >= 2 p end end
Is this object the enclosing HTML or XML document?
# File lib/hpricot/traverse.rb, line 7 def doc?() Doc::Trav === self end
Is this object a doctype tag?
# File lib/hpricot/traverse.rb, line 15 def doctype?() DocType::Trav === self end
Is this object an HTML or XML element?
# File lib/hpricot/traverse.rb, line 9 def elem?() Elem::Trav === self end
Find all nodes which follow the current one.
# File lib/hpricot/traverse.rb, line 114 def following sibs = parent.children si = sibs.index(self) + 1 return Elements[*sibs[si...sibs.length]] end
# File lib/hpricot/traverse.rb, line 138 def get_subnode(*indexes) n = self indexes.each {|index| n = n.get_subnode_internal(index) } n end
Builds an HTML string from the contents of this node.
# File lib/hpricot/traverse.rb, line 168 def html(inner = nil, &blk) if inner or blk altered! case inner when Array self.children = inner else self.children = make(inner, &blk) end reparent self.children else if respond_to?(:children) and children children.map { |x| x.output("") }.join else "" end end end
# File lib/hpricot/traverse.rb, line 47 def index(name) i = 0 return i if name == "*" children.each do |x| return i if (x.respond_to?(:name) and name == x.name) or (x.text? and name == "text()") i += 1 end if children -1 end
Inserts new contents into the current node, based on the HTML contained in
string inner
.
# File lib/hpricot/traverse.rb, line 191 def inner_html=(inner) html(inner || []) end
Builds a string from the text contained in this node. All HTML elements are removed.
# File lib/hpricot/traverse.rb, line 158 def inner_text if respond_to?(:children) and children children.map { |x| x.inner_text }.join else "" end end
Parses an HTML string, making an HTML fragment based on the options used to create the container document.
# File lib/hpricot/traverse.rb, line 25 def make(input = nil, &blk) if parent and parent.respond_to? :make parent.make(input, &blk) else Hpricot.make(input, &blk).children end end
Returns the node neighboring this node to the south: just below it. This method includes text nodes and comments and such.
# File lib/hpricot/traverse.rb, line 91 def next sib = parent.children sib[sib.index(self) + 1] if parent end
# File lib/hpricot/traverse.rb, line 242 def node_position parent.children.index(self) end
Puts together an array of neighboring nodes based on their proximity to
this node. So, for example, to get the next node, you could use
nodes_at(1). Or, to get the previous node, use
<tt>nodes_at(1)
.
This method also accepts ranges and sets of numbers.
ele.nodes_at(-3..-1, 1..3) # gets three nodes before and three after ele.nodes_at(1, 5, 7) # gets three nodes at offsets below the current node ele.nodes_at(0, 5..6) # the current node and two others
# File lib/hpricot/traverse.rb, line 67 def nodes_at(*pos) sib = parent.children i, si = 0, sib.index(self) pos.map! do |r| if r.is_a?(Range) and r.begin.is_a?(String) r = Range.new(parent.index(r.begin)-si, parent.index(r.end)-si, r.exclude_end?) end r end p pos Elements[* sib.select do |x| sel = case i - si when *pos true end i += 1 sel end ] end
# File lib/hpricot/traverse.rb, line 246 def position parent.children_of_type(self.pathname).index(self) end
Find all preceding nodes.
# File lib/hpricot/traverse.rb, line 107 def preceding sibs = parent.children si = sibs.index(self) return Elements[*sibs[0...si]] end
Returns to node neighboring this node to the north: just above it. This method includes text nodes and comments and such.
# File lib/hpricot/traverse.rb, line 99 def previous sib = parent.children x = sib.index(self) - 1 sib[x] if sib and x >= 0 end
Is this object an XML processing instruction?
# File lib/hpricot/traverse.rb, line 17 def procins?() ProcIns::Trav === self end
Searches this node for all elements matching the CSS or XPath
expr
. Returns an Elements array containing the matching
nodes. If blk
is given, it is used to iterate through the
matching set.
# File lib/hpricot/traverse.rb, line 254 def search(expr, &blk) if Range === expr return Elements.expand(at(expr.begin), at(expr.end), expr.exclude_end?) end last = nil nodes = [self] done = [] expr = expr.to_s hist = [] until expr.empty? expr = clean_path(expr) expr.gsub!(%r!^//!, '') case expr when %r!^/?\.\.! last = expr = $' nodes.map! { |node| node.parent } when %r!^[>/]\s*! last = expr = $' nodes = Elements[*nodes.map { |node| node.children if node.respond_to? :children }.flatten.compact] when %r!^\+! last = expr = $' nodes.map! do |node| siblings = node.parent.children siblings[siblings.index(node)+1] end nodes.compact! when %r!^~! last = expr = $' nodes.map! do |node| siblings = node.parent.children siblings[(siblings.index(node)+1)..-1] end nodes.flatten! when %r!^[|,]! last = expr = " #$'" nodes.shift if nodes.first == self done += nodes nodes = [self] else m = expr.match(%r!^([#.]?)([a-z0-9\*_-]*)!i).to_a after = $' mt = after[%r!:[a-z0-9\*_-]+!i, 0] oop = false if mt and not (mt == ":not" or Traverse.method_defined? "filter[#{mt}]") after = $' m[2] += mt expr = after end if m[1] == '#' oid = get_element_by_id(m[2]) nodes = oid ? [oid] : [] expr = after else m[2] = "*" if after =~ /^\(\)/ || m[2] == "" || m[1] == "." ret = [] nodes.each do |node| case m[2] when '*' node.traverse_element { |n| ret << n } else if node.respond_to? :get_elements_by_tag_name ret += [*node.get_elements_by_tag_name(m[2])] - [*(node unless last)] end end end nodes = ret end last = nil end hist << expr break if hist[-1] == hist[-2] nodes, expr = Elements.filter(nodes, expr) end nodes = done + nodes.flatten.uniq if blk nodes.each(&blk) self else Elements[*nodes] end end
Replace this element and its contents with the nodes contained in the
html
string.
# File lib/hpricot/traverse.rb, line 133 def swap(html = nil, &blk) parent.altered! parent.replace_child(self, make(html, &blk)) end
Is this object an HTML text node?
# File lib/hpricot/traverse.rb, line 11 def text?() Text::Trav === self end
Builds an HTML string from this node and its contents. If you need to write
to a stream, try calling output(io)
as a method on this
object.
# File lib/hpricot/traverse.rb, line 36 def to_html output("") end
Attempts to preserve the original HTML of the document, only outputing new tags for elements which have changed.
# File lib/hpricot/traverse.rb, line 43 def to_original_html output("", :preserve => true) end
Builds a string from the text contained in this node. All HTML elements are removed.
# File lib/hpricot/traverse.rb, line 148 def to_plain_text if respond_to?(:children) and children children.map { |x| x.to_plain_text }.join.strip.gsub(/\n{2,}/, "\n\n") else "" end end
traverse_element
traverses elements in the tree. It yields
elements in depth first order.
If names are empty, it yields all elements. If non-empty names are given, it should be list of universal names.
A nested element is yielded in depth first order as follows.
t = Hpricot('<a id=0><b><a id=1 /></b><c id=2 /></a>') t.traverse_element("a", "c") {|e| p e} # => {elem <a id="0"> {elem <b> {emptyelem <a id="1">} </b>} {emptyelem <c id="2">} </a>} {emptyelem <a id="1">} {emptyelem <c id="2">}
Universal names are specified as follows.
t = Hpricot(<<'End') <html> <meta name="robots" content="index,nofollow"> <meta name="author" content="Who am I?"> </html> End t.traverse_element("{http://www.w3.org/1999/xhtml}meta") {|e| p e} # => {emptyelem <{http://www.w3.org/1999/xhtml}meta name="robots" content="index,nofollow">} {emptyelem <{http://www.w3.org/1999/xhtml}meta name="author" content="Who am I?">}
# File lib/hpricot/traverse.rb, line 374 def traverse_element(*names, &block) # :yields: element if names.empty? traverse_all_element(&block) else name_set = {} names.each {|n| name_set[n] = true } traverse_some_element(name_set, &block) end nil end
traverse_text
traverses texts in the tree
# File lib/hpricot/traverse.rb, line 680 def traverse_text(&block) # :yields: text traverse_text_internal(&block) nil end
Is this object an XML declaration?
# File lib/hpricot/traverse.rb, line 13 def xmldecl?() XMLDecl::Trav === self end
Builds a unique XPath string for this node, from the root of the document containing it.
# File lib/hpricot/traverse.rb, line 209 def xpath if elem? and has_attribute? 'id' "//#{self.name}[@id='#{get_attribute('id')}']" else sim, id = 0, 0, 0 parent.children.each do |e| id = sim if e == self sim += 1 if e.pathname == self.pathname end if parent.children p = File.join(parent.xpath, self.pathname) p += "[#{id+1}]" if sim >= 2 p end end
Private Instance Methods
# File lib/hpricot/traverse.rb, line 196 def reparent(nodes) return unless nodes altered! [*nodes].each { |e| e.parent = self } end