def evaluate_extractor
root_results = []
current_page_count = 1
catch :quit_next_page_loop do
loop do
url = get_current_doc_url
@processed_pages << url
@root_patterns.each do |root_pattern|
root_results.push(*root_pattern.evaluate(get_hpricot_doc, nil))
end
while @processed_pages.include? url
if !@next_page_pattern.nil?
throw :quit_next_page_loop if @next_page_pattern.options[:limit] == current_page_count
throw :quit_next_page_loop unless @next_page_pattern.filters[0].generate_XPath_for_example(true)
xpath = @next_page_pattern.filters[0].xpath
node = (get_hpricot_doc/xpath).map.last
node = XPathUtils.find_nearest_node_with_attribute(node, 'href')
throw :quit_next_page_loop if node == nil || node.attributes['href'] == nil
href = node.attributes['href'].gsub('&') {'&'}
throw :quit_next_page_loop if href == nil
url = href
else
throw :quit_next_page_loop if @next_page_list.empty?
url = @next_page_list.pop
end
end
restore_host_name
FetchAction.fetch(url)
current_page_count += 1
end
end
root_results
end