Class | ::Utils::HttpUtil |
In: |
lib/rbot/core/utils/httputil.rb
|
Parent: | Object |
class for making http requests easier (mainly for plugins to use) this class can check the bot proxy configuration to determine if a proxy needs to be used, which includes support for per-url proxy configuration.
Create the HttpUtil instance, associating it with Bot bot
# File lib/rbot/core/utils/httputil.rb, line 291 291: def initialize(bot) 292: @bot = bot 293: @cache = Hash.new 294: @headers = { 295: 'Accept-Charset' => 'utf-8;q=1.0, *;q=0.8', 296: 'Accept-Encoding' => 'gzip;q=1, deflate;q=1, identity;q=0.8, *;q=0.2', 297: 'User-Agent' => 298: "rbot http util #{$version} (#{Irc::Bot::SOURCE_URL})" 299: } 300: debug "starting http cache cleanup timer" 301: @timer = @bot.timer.add(300) { 302: self.remove_stale_cache unless @bot.config['http.no_expire_cache'] 303: } 304: end
uri: | uri to query (URI object or String) |
Simple GET request, returns (if possible) response body following redirs and caching if requested, yielding the actual response(s) to the optional block. See get_response for details on the supported options
# File lib/rbot/core/utils/httputil.rb, line 635 635: def get(uri, options = {}, &block) # :yields: resp 636: begin 637: resp = get_response(uri, options, &block) 638: raise "http error: #{resp}" unless Net::HTTPOK === resp || 639: Net::HTTPPartialContent === resp 640: return resp.body 641: rescue Exception => e 642: error e 643: end 644: return nil 645: end
uri: | uri to query (URI object or String) |
nbytes: | number of bytes to get |
Partial GET request, returns (if possible) the first nbytes bytes of the response body, following redirs and caching if requested, yielding the actual response(s) to the optional block. See get_response for details on the supported options
# File lib/rbot/core/utils/httputil.rb, line 693 693: def get_partial(uri, nbytes = @bot.config['http.info_bytes'], options = {}, &block) # :yields: resp 694: opts = {:range => "bytes=0-#{nbytes}"}.merge(options) 695: return get(uri, opts, &block) 696: end
uri: | URI to create a proxy for |
Return a net/http Proxy object, configured for proxying based on the bot‘s proxy configuration. See proxy_required for more details on this.
# File lib/rbot/core/utils/httputil.rb, line 362 362: def get_proxy(uri, options = {}) 363: opts = { 364: :read_timeout => @bot.config["http.read_timeout"], 365: :open_timeout => @bot.config["http.open_timeout"] 366: }.merge(options) 367: 368: proxy = nil 369: proxy_host = nil 370: proxy_port = nil 371: proxy_user = nil 372: proxy_pass = nil 373: 374: if @bot.config["http.use_proxy"] 375: if (ENV['http_proxy']) 376: proxy = URI.parse ENV['http_proxy'] rescue nil 377: end 378: if (@bot.config["http.proxy_uri"]) 379: proxy = URI.parse @bot.config["http.proxy_uri"] rescue nil 380: end 381: if proxy 382: debug "proxy is set to #{proxy.host} port #{proxy.port}" 383: if proxy_required(uri) 384: proxy_host = proxy.host 385: proxy_port = proxy.port 386: proxy_user = @bot.config["http.proxy_user"] 387: proxy_pass = @bot.config["http.proxy_pass"] 388: end 389: end 390: end 391: 392: h = Net::HTTP.new(uri.host, uri.port, proxy_host, proxy_port, proxy_user, proxy_pass) 393: h.use_ssl = true if uri.scheme == "https" 394: 395: h.read_timeout = opts[:read_timeout] 396: h.open_timeout = opts[:open_timeout] 397: return h 398: end
uri: | uri to query (URI object or String) |
Generic http transaction method. It will return a Net::HTTPResponse object or raise an exception
If a block is given, it will yield the response (see :yield option)
Currently supported options:
method: | request method [:get (default), :post or :head] |
open_timeout: | open timeout for the proxy |
read_timeout: | read timeout for the proxy |
cache: | should we cache results? |
yield: | if :final [default], calls the block for the response object; if :all, call the block for all intermediate redirects, too |
max_redir: | how many redirects to follow before raising the exception if -1, don‘t follow redirects, just return them |
range: | make a ranged request (usually GET). accepts a string for HTTP/1.1 "Range:" header (i.e. "bytes=0-1000") |
body: | request body (usually for POST requests) |
headers: | additional headers to be set for the request. Its value must be a Hash in the form { ‘Header’ => ‘value’ } |
# File lib/rbot/core/utils/httputil.rb, line 517 517: def get_response(uri_or_s, options = {}, &block) # :yields: resp 518: uri = uri_or_s.kind_of?(URI) ? uri_or_s : URI.parse(uri_or_s.to_s) 519: unless URI::HTTP === uri 520: if uri.scheme 521: raise "#{uri.scheme.inspect} URI scheme is not supported" 522: else 523: raise "don't know what to do with #{uri.to_s.inspect}" 524: end 525: end 526: 527: opts = { 528: :max_redir => @bot.config['http.max_redir'], 529: :yield => :final, 530: :cache => true, 531: :method => :GET 532: }.merge(options) 533: 534: resp = nil 535: 536: req_class = case opts[:method].to_s.downcase.intern 537: when :head, "net::http::head""net::http::head" 538: opts[:max_redir] = -1 539: Net::HTTP::Head 540: when :get, "net::http::get""net::http::get" 541: Net::HTTP::Get 542: when :post, "net::http::post""net::http::post" 543: opts[:cache] = false 544: opts[:body] or raise 'post request w/o a body?' 545: warning "refusing to cache POST request" if options[:cache] 546: Net::HTTP::Post 547: else 548: warning "unsupported method #{opts[:method]}, doing GET" 549: Net::HTTP::Get 550: end 551: 552: if req_class != Net::HTTP::Get && opts[:range] 553: warning "can't request ranges for #{req_class}" 554: opts.delete(:range) 555: end 556: 557: cache_key = "#{opts[:range]}|#{req_class}|#{uri.to_s}" 558: 559: if req_class != Net::HTTP::Get && req_class != Net::HTTP::Head 560: if opts[:cache] 561: warning "can't cache #{req_class.inspect} requests, working w/o cache" 562: opts[:cache] = false 563: end 564: end 565: 566: debug "get_response(#{uri}, #{opts.inspect})" 567: 568: cached = @cache[cache_key] 569: 570: if opts[:cache] && cached 571: debug "got cached" 572: if !cached.expired? 573: debug "using cached" 574: cached.use 575: return handle_response(uri, cached.response, opts, &block) 576: end 577: end 578: 579: headers = @headers.dup.merge(opts[:headers] || {}) 580: headers['Range'] = opts[:range] if opts[:range] 581: headers['Authorization'] = opts[:auth_head] if opts[:auth_head] 582: 583: if opts[:cache] && cached && (req_class == Net::HTTP::Get) 584: cached.setup_headers headers 585: end 586: 587: req = req_class.new(uri.request_uri, headers) 588: if uri.user && uri.password 589: req.basic_auth(uri.user, uri.password) 590: opts[:auth_head] = req['Authorization'] 591: end 592: req.body = opts[:body] if req_class == Net::HTTP::Post 593: debug "prepared request: #{req.to_hash.inspect}" 594: 595: begin 596: get_proxy(uri, opts).start do |http| 597: http.request(req) do |resp| 598: resp['x-rbot-location'] = uri.to_s 599: if Net::HTTPNotModified === resp 600: debug "not modified" 601: begin 602: cached.revalidate(resp) 603: rescue Exception => e 604: error e 605: end 606: debug "reusing cached" 607: resp = cached.response 608: elsif Net::HTTPServerError === resp || Net::HTTPClientError === resp 609: debug "http error, deleting cached obj" if cached 610: @cache.delete(cache_key) 611: end 612: 613: begin 614: return handle_response(uri, resp, opts, &block) 615: ensure 616: if cached = CachedObject.maybe_new(resp) rescue nil 617: debug "storing to cache" 618: @cache[cache_key] = cached 619: end 620: end 621: end 622: end 623: rescue Exception => e 624: error e 625: raise e.message 626: end 627: end
Internal method used to hanlde response resp received when making a request for URI uri.
It follows redirects, optionally yielding them if option :yield is :all.
Also yields and returns the final resp.
# File lib/rbot/core/utils/httputil.rb, line 407 407: def handle_response(uri, resp, opts, &block) # :yields: resp 408: if Net::HTTPRedirection === resp && opts[:max_redir] >= 0 409: if resp.key?('location') 410: raise 'Too many redirections' if opts[:max_redir] <= 0 411: yield resp if opts[:yield] == :all && block_given? 412: # some servers actually provide unescaped location, e.g. 413: # http://ulysses.soup.io/post/60734021/Image%20curve%20ball 414: # rediects to something like 415: # http://ulysses.soup.io/post/60734021/Image curve ball?sessid=8457b2a3752085cca3fb1d79b9965446 416: # causing the URI parser to (obviously) complain. We cannot just 417: # escape blindly, as this would make a mess of already-escaped 418: # locations, so we only do it if the URI.parse fails 419: loc = resp['location'] 420: escaped = false 421: debug "redirect location: #{loc.inspect}" 422: begin 423: new_loc = URI.join(uri.to_s, loc) rescue URI.parse(loc) 424: rescue 425: if escaped 426: raise $! 427: else 428: loc = URI.escape(loc) 429: escaped = true 430: debug "escaped redirect location: #{loc.inspect}" 431: retry 432: end 433: end 434: new_opts = opts.dup 435: new_opts[:max_redir] -= 1 436: case opts[:method].to_s.downcase.intern 437: when :post, "net::http::post""net::http::post" 438: new_opts[:method] = :get 439: end 440: if resp['set-cookie'] 441: debug "set cookie request for #{resp['set-cookie']}" 442: cookie, cookie_flags = (resp['set-cookie']+'; ').split('; ', 2) 443: domain = uri.host 444: cookie_flags.scan(/(\S+)=(\S+);/) { |key, val| 445: if key.intern == :domain 446: domain = val 447: break 448: end 449: } 450: debug "cookie domain #{domain} / #{new_loc.host}" 451: if new_loc.host.rindex(domain) == new_loc.host.length - domain.length 452: debug "setting cookie" 453: new_opts[:headers] ||= Hash.new 454: new_opts[:headers]['Cookie'] = cookie 455: else 456: debug "cookie is for another domain, ignoring" 457: end 458: end 459: debug "following the redirect to #{new_loc}" 460: return get_response(new_loc, new_opts, &block) 461: else 462: warning ":| redirect w/o location?" 463: end 464: end 465: class << resp 466: undef_method :body 467: alias :body :cooked_body 468: end 469: unless resp['content-type'] 470: debug "No content type, guessing" 471: resp['content-type'] = 472: case resp['x-rbot-location'] 473: when /.html?$/i 474: 'text/html' 475: when /.xml$/i 476: 'application/xml' 477: when /.xhtml$/i 478: 'application/xml+xhtml' 479: when /.(gif|png|jpe?g|jp2|tiff?)$/i 480: "image/#{$1.sub(/^jpg$/,'jpeg').sub(/^tif$/,'tiff')}" 481: else 482: 'application/octetstream' 483: end 484: end 485: if block_given? 486: yield(resp) 487: else 488: # Net::HTTP wants us to read the whole body here 489: resp.raw_body 490: end 491: return resp 492: end
uri: | uri to query (URI object or String) |
Simple HEAD request, returns (if possible) response head following redirs and caching if requested, yielding the actual response(s) to the optional block. See get_response for details on the supported options
# File lib/rbot/core/utils/httputil.rb, line 653 653: def head(uri, options = {}, &block) # :yields: resp 654: opts = {:method => :head}.merge(options) 655: begin 656: resp = get_response(uri, opts, &block) 657: # raise "http error #{resp}" if Net::HTTPClientError === resp || 658: # Net::HTTPServerError == resp 659: return resp 660: rescue Exception => e 661: error e 662: end 663: return nil 664: end
uri: | uri to query (URI object or String) |
data: | body of the POST |
Simple POST request, returns (if possible) response following redirs and caching if requested, yielding the response(s) to the optional block. See get_response for details on the supported options
# File lib/rbot/core/utils/httputil.rb, line 673 673: def post(uri, data, options = {}, &block) # :yields: resp 674: opts = {:method => :post, :body => data, :cache => false}.merge(options) 675: begin 676: resp = get_response(uri, opts, &block) 677: raise 'http error' unless Net::HTTPOK === resp or Net::HTTPCreated === resp 678: return resp 679: rescue Exception => e 680: error e 681: end 682: return nil 683: end
This method checks if a proxy is required to access uri, by looking at the values of config values +http.proxy_include+ and +http.proxy_exclude+.
Each of these config values, if set, should be a Regexp the server name and IP address should be checked against.
# File lib/rbot/core/utils/httputil.rb, line 318 318: def proxy_required(uri) 319: use_proxy = true 320: if @bot.config["http.proxy_exclude"].empty? && @bot.config["http.proxy_include"].empty? 321: return use_proxy 322: end 323: 324: list = [uri.host] 325: begin 326: list.concat Resolv.getaddresses(uri.host) 327: rescue StandardError => err 328: warning "couldn't resolve host uri.host" 329: end 330: 331: unless @bot.config["http.proxy_exclude"].empty? 332: re = @bot.config["http.proxy_exclude"].collect{|r| Regexp.new(r)} 333: re.each do |r| 334: list.each do |item| 335: if r.match(item) 336: use_proxy = false 337: break 338: end 339: end 340: end 341: end 342: unless @bot.config["http.proxy_include"].empty? 343: re = @bot.config["http.proxy_include"].collect{|r| Regexp.new(r)} 344: re.each do |r| 345: list.each do |item| 346: if r.match(item) 347: use_proxy = true 348: break 349: end 350: end 351: end 352: end 353: debug "using proxy for uri #{uri}?: #{use_proxy}" 354: return use_proxy 355: end
# File lib/rbot/core/utils/httputil.rb, line 698 698: def remove_stale_cache 699: debug "Removing stale cache" 700: now = Time.new 701: max_last = @bot.config['http.expire_time'] * 60 702: max_first = @bot.config['http.max_cache_time'] * 60 703: debug "#{@cache.size} pages before" 704: begin 705: @cache.reject! { |k, val| 706: (now - val.last_used > max_last) || (now - val.first_used > max_first) 707: } 708: rescue => e 709: error "Failed to remove stale cache: #{e.pretty_inspect}" 710: end 711: debug "#{@cache.size} pages after" 712: end