def self.fetch(doc_url, *args)
if args.size > 0
proxy = args[0][:proxy]
mechanize_doc = args[0][:mechanize_doc]
resolve = args[0][:resolve]
basic_auth = args[0][:basic_auth]
user_agent = args[0][:user_agent] || "Mozilla/5.0 (compatible; Konqueror/3.5; Linux) KHTML/3.5.5 (like Gecko) (Kubuntu)"
parse_and_set_proxy(proxy) if proxy
set_user_agent(user_agent)
parse_and_set_basic_auth(basic_auth) if basic_auth
else
mechanize_doc = nil
resolve = :full
end
@@current_doc_url = doc_url
@@current_doc_protocol = determine_protocol
if mechanize_doc.nil? && @@current_doc_protocol != 'file'
handle_relative_path(doc_url)
handle_relative_url(doc_url, resolve)
Scrubyt.log :ACTION, "fetching document: #{@@current_doc_url}"
unless 'file' == @@current_doc_protocol
@@mechanize_doc = @@agent.get(@@current_doc_url)
end
else
@@mechanize_doc = mechanize_doc
end
if @@current_doc_protocol == 'file'
@@hpricot_doc = Hpricot(PreFilterDocument.br_to_newline(open(@@current_doc_url).read))
else
@@hpricot_doc = Hpricot(PreFilterDocument.br_to_newline(@@mechanize_doc.body))
store_host_name(self.get_current_doc_url)
end
end