# File lib/robotex.rb, line 100 def self.get_robots_txt(uri, user_agent) begin Timeout::timeout(Robotex.timeout) do io = URI.join(uri.to_s, "/robots.txt").open("User-Agent" => user_agent) rescue nil end rescue Timeout::Error STDERR.puts "robots.txt request timed out" end end
# File lib/robotex.rb, line 118 def initialize(user_agent = nil) user_agent = "Robotex/#{VERSION} (http://www.github.com/chriskite/robotex)" if user_agent.nil? @user_agent = user_agent @last_accessed = Time.at(1) @parsed = {} end
Download the server's robots.txt, and return try if we are allowed to acces the url, false otherwise
# File lib/robotex.rb, line 133 def allowed?(uri) parse_host(uri).allowed?(uri, @user_agent) end
Return the value of the Crawl-Delay directive, or nil if none
# File lib/robotex.rb, line 139 def delay(uri) parse_host(uri).delay(@user_agent) end
Generated with the Darkfish Rdoc Generator 2.