Class ::Utils::HttpUtil
In: lib/rbot/core/utils/httputil.rb
Parent: Object
User HTTPResponse BasicUserMessage Bot\n[lib/rbot/core/remote.rb\nlib/rbot/core/utils/extends.rb\nlib/rbot/core/utils/filters.rb\nlib/rbot/core/utils/wordlist.rb] HttpUtil lib/rbot/core/userdata.rb lib/rbot/core/utils/httputil.rb lib/rbot/core/utils/extends.rb lib/rbot/core/remote.rb lib/rbot/core/utils/httputil.rb ParseTime Utils (null) dot/m_15_0.png

class for making http requests easier (mainly for plugins to use) this class can check the bot proxy configuration to determine if a proxy needs to be used, which includes support for per-url proxy configuration.

Methods

Classes and Modules

Class ::Utils::HttpUtil::CachedObject

Public Class methods

Create the HttpUtil instance, associating it with Bot bot

[Source]

     # File lib/rbot/core/utils/httputil.rb, line 291
291:   def initialize(bot)
292:     @bot = bot
293:     @cache = Hash.new
294:     @headers = {
295:       'Accept-Charset' => 'utf-8;q=1.0, *;q=0.8',
296:       'Accept-Encoding' => 'gzip;q=1, deflate;q=1, identity;q=0.8, *;q=0.2',
297:       'User-Agent' =>
298:         "rbot http util #{$version} (#{Irc::Bot::SOURCE_URL})"
299:     }
300:     debug "starting http cache cleanup timer"
301:     @timer = @bot.timer.add(300) {
302:       self.remove_stale_cache unless @bot.config['http.no_expire_cache']
303:     }
304:   end

Public Instance methods

Clean up on HttpUtil unloading, by stopping the cache cleanup timer.

[Source]

     # File lib/rbot/core/utils/httputil.rb, line 307
307:   def cleanup
308:     debug 'stopping http cache cleanup timer'
309:     @bot.timer.remove(@timer)
310:   end
uri:uri to query (URI object or String)

Simple GET request, returns (if possible) response body following redirs and caching if requested, yielding the actual response(s) to the optional block. See get_response for details on the supported options

[Source]

     # File lib/rbot/core/utils/httputil.rb, line 635
635:   def get(uri, options = {}, &block) # :yields: resp
636:     begin
637:       resp = get_response(uri, options, &block)
638:       raise "http error: #{resp}" unless Net::HTTPOK === resp ||
639:         Net::HTTPPartialContent === resp
640:       return resp.body
641:     rescue Exception => e
642:       error e
643:     end
644:     return nil
645:   end
uri:uri to query (URI object or String)
nbytes:number of bytes to get

Partial GET request, returns (if possible) the first nbytes bytes of the response body, following redirs and caching if requested, yielding the actual response(s) to the optional block. See get_response for details on the supported options

[Source]

     # File lib/rbot/core/utils/httputil.rb, line 693
693:   def get_partial(uri, nbytes = @bot.config['http.info_bytes'], options = {}, &block) # :yields: resp
694:     opts = {:range => "bytes=0-#{nbytes}"}.merge(options)
695:     return get(uri, opts, &block)
696:   end
uri:URI to create a proxy for

Return a net/http Proxy object, configured for proxying based on the bot‘s proxy configuration. See proxy_required for more details on this.

[Source]

     # File lib/rbot/core/utils/httputil.rb, line 362
362:   def get_proxy(uri, options = {})
363:     opts = {
364:       :read_timeout => @bot.config["http.read_timeout"],
365:       :open_timeout => @bot.config["http.open_timeout"]
366:     }.merge(options)
367: 
368:     proxy = nil
369:     proxy_host = nil
370:     proxy_port = nil
371:     proxy_user = nil
372:     proxy_pass = nil
373: 
374:     if @bot.config["http.use_proxy"]
375:       if (ENV['http_proxy'])
376:         proxy = URI.parse ENV['http_proxy'] rescue nil
377:       end
378:       if (@bot.config["http.proxy_uri"])
379:         proxy = URI.parse @bot.config["http.proxy_uri"] rescue nil
380:       end
381:       if proxy
382:         debug "proxy is set to #{proxy.host} port #{proxy.port}"
383:         if proxy_required(uri)
384:           proxy_host = proxy.host
385:           proxy_port = proxy.port
386:           proxy_user = @bot.config["http.proxy_user"]
387:           proxy_pass = @bot.config["http.proxy_pass"]
388:         end
389:       end
390:     end
391: 
392:     h = Net::HTTP.new(uri.host, uri.port, proxy_host, proxy_port, proxy_user, proxy_pass)
393:     h.use_ssl = true if uri.scheme == "https"
394: 
395:     h.read_timeout = opts[:read_timeout]
396:     h.open_timeout = opts[:open_timeout]
397:     return h
398:   end
uri:uri to query (URI object or String)

Generic http transaction method. It will return a Net::HTTPResponse object or raise an exception

If a block is given, it will yield the response (see :yield option)

Currently supported options:

method:request method [:get (default), :post or :head]
open_timeout:open timeout for the proxy
read_timeout:read timeout for the proxy
cache:should we cache results?
yield:if :final [default], calls the block for the response object; if :all, call the block for all intermediate redirects, too
max_redir:how many redirects to follow before raising the exception if -1, don‘t follow redirects, just return them
range:make a ranged request (usually GET). accepts a string for HTTP/1.1 "Range:" header (i.e. "bytes=0-1000")
body:request body (usually for POST requests)
headers:additional headers to be set for the request. Its value must be a Hash in the form { ‘Header’ => ‘value’ }

[Source]

     # File lib/rbot/core/utils/httputil.rb, line 517
517:   def get_response(uri_or_s, options = {}, &block) # :yields: resp
518:     uri = uri_or_s.kind_of?(URI) ? uri_or_s : URI.parse(uri_or_s.to_s)
519:     unless URI::HTTP === uri
520:       if uri.scheme
521:         raise "#{uri.scheme.inspect} URI scheme is not supported"
522:       else
523:         raise "don't know what to do with #{uri.to_s.inspect}"
524:       end
525:     end
526: 
527:     opts = {
528:       :max_redir => @bot.config['http.max_redir'],
529:       :yield => :final,
530:       :cache => true,
531:       :method => :GET
532:     }.merge(options)
533: 
534:     resp = nil
535: 
536:     req_class = case opts[:method].to_s.downcase.intern
537:                 when :head, "net::http::head""net::http::head"
538:                   opts[:max_redir] = -1
539:                   Net::HTTP::Head
540:                 when :get, "net::http::get""net::http::get"
541:                   Net::HTTP::Get
542:                 when :post, "net::http::post""net::http::post"
543:                   opts[:cache] = false
544:                   opts[:body] or raise 'post request w/o a body?'
545:                   warning "refusing to cache POST request" if options[:cache]
546:                   Net::HTTP::Post
547:                 else
548:                   warning "unsupported method #{opts[:method]}, doing GET"
549:                   Net::HTTP::Get
550:                 end
551: 
552:     if req_class != Net::HTTP::Get && opts[:range]
553:       warning "can't request ranges for #{req_class}"
554:       opts.delete(:range)
555:     end
556: 
557:     cache_key = "#{opts[:range]}|#{req_class}|#{uri.to_s}"
558: 
559:     if req_class != Net::HTTP::Get && req_class != Net::HTTP::Head
560:       if opts[:cache]
561:         warning "can't cache #{req_class.inspect} requests, working w/o cache"
562:         opts[:cache] = false
563:       end
564:     end
565: 
566:     debug "get_response(#{uri}, #{opts.inspect})"
567: 
568:     cached = @cache[cache_key]
569: 
570:     if opts[:cache] && cached
571:       debug "got cached"
572:       if !cached.expired?
573:         debug "using cached"
574:         cached.use
575:         return handle_response(uri, cached.response, opts, &block)
576:       end
577:     end
578: 
579:     headers = @headers.dup.merge(opts[:headers] || {})
580:     headers['Range'] = opts[:range] if opts[:range]
581:     headers['Authorization'] = opts[:auth_head] if opts[:auth_head]
582: 
583:     if opts[:cache] && cached && (req_class == Net::HTTP::Get)
584:       cached.setup_headers headers
585:     end
586: 
587:     req = req_class.new(uri.request_uri, headers)
588:     if uri.user && uri.password
589:       req.basic_auth(uri.user, uri.password)
590:       opts[:auth_head] = req['Authorization']
591:     end
592:     req.body = opts[:body] if req_class == Net::HTTP::Post
593:     debug "prepared request: #{req.to_hash.inspect}"
594: 
595:     begin
596:       get_proxy(uri, opts).start do |http|
597:         http.request(req) do |resp|
598:           resp['x-rbot-location'] = uri.to_s
599:           if Net::HTTPNotModified === resp
600:             debug "not modified"
601:             begin
602:               cached.revalidate(resp)
603:             rescue Exception => e
604:               error e
605:             end
606:             debug "reusing cached"
607:             resp = cached.response
608:           elsif Net::HTTPServerError === resp || Net::HTTPClientError === resp
609:             debug "http error, deleting cached obj" if cached
610:             @cache.delete(cache_key)
611:           end
612: 
613:           begin
614:             return handle_response(uri, resp, opts, &block)
615:           ensure
616:             if cached = CachedObject.maybe_new(resp) rescue nil
617:               debug "storing to cache"
618:               @cache[cache_key] = cached
619:             end
620:           end
621:         end
622:       end
623:     rescue Exception => e
624:       error e
625:       raise e.message
626:     end
627:   end

Internal method used to hanlde response resp received when making a request for URI uri.

It follows redirects, optionally yielding them if option :yield is :all.

Also yields and returns the final resp.

[Source]

     # File lib/rbot/core/utils/httputil.rb, line 407
407:   def handle_response(uri, resp, opts, &block) # :yields: resp
408:     if Net::HTTPRedirection === resp && opts[:max_redir] >= 0
409:       if resp.key?('location')
410:         raise 'Too many redirections' if opts[:max_redir] <= 0
411:         yield resp if opts[:yield] == :all && block_given?
412:         # some servers actually provide unescaped location, e.g.
413:         # http://ulysses.soup.io/post/60734021/Image%20curve%20ball
414:         # rediects to something like
415:         # http://ulysses.soup.io/post/60734021/Image curve ball?sessid=8457b2a3752085cca3fb1d79b9965446
416:         # causing the URI parser to (obviously) complain. We cannot just
417:         # escape blindly, as this would make a mess of already-escaped
418:         # locations, so we only do it if the URI.parse fails
419:         loc = resp['location']
420:         escaped = false
421:         debug "redirect location: #{loc.inspect}"
422:         begin
423:           new_loc = URI.join(uri.to_s, loc) rescue URI.parse(loc)
424:         rescue
425:           if escaped
426:             raise $!
427:           else
428:             loc = URI.escape(loc)
429:             escaped = true
430:             debug "escaped redirect location: #{loc.inspect}"
431:             retry
432:           end
433:         end
434:         new_opts = opts.dup
435:         new_opts[:max_redir] -= 1
436:         case opts[:method].to_s.downcase.intern
437:         when :post, "net::http::post""net::http::post"
438:           new_opts[:method] = :get
439:         end
440:         if resp['set-cookie']
441:           debug "set cookie request for #{resp['set-cookie']}"
442:           cookie, cookie_flags = (resp['set-cookie']+'; ').split('; ', 2)
443:           domain = uri.host
444:           cookie_flags.scan(/(\S+)=(\S+);/) { |key, val|
445:             if key.intern == :domain
446:               domain = val
447:               break
448:             end
449:           }
450:           debug "cookie domain #{domain} / #{new_loc.host}"
451:           if new_loc.host.rindex(domain) == new_loc.host.length - domain.length
452:             debug "setting cookie"
453:             new_opts[:headers] ||= Hash.new
454:             new_opts[:headers]['Cookie'] = cookie
455:           else
456:             debug "cookie is for another domain, ignoring"
457:           end
458:         end
459:         debug "following the redirect to #{new_loc}"
460:         return get_response(new_loc, new_opts, &block)
461:       else
462:         warning ":| redirect w/o location?"
463:       end
464:     end
465:     class << resp
466:       undef_method :body
467:       alias :body :cooked_body
468:     end
469:     unless resp['content-type']
470:       debug "No content type, guessing"
471:       resp['content-type'] =
472:         case resp['x-rbot-location']
473:         when /.html?$/i
474:           'text/html'
475:         when /.xml$/i
476:           'application/xml'
477:         when /.xhtml$/i
478:           'application/xml+xhtml'
479:         when /.(gif|png|jpe?g|jp2|tiff?)$/i
480:           "image/#{$1.sub(/^jpg$/,'jpeg').sub(/^tif$/,'tiff')}"
481:         else
482:           'application/octetstream'
483:         end
484:     end
485:     if block_given?
486:       yield(resp)
487:     else
488:       # Net::HTTP wants us to read the whole body here
489:       resp.raw_body
490:     end
491:     return resp
492:   end
uri:uri to query (URI object or String)

Simple HEAD request, returns (if possible) response head following redirs and caching if requested, yielding the actual response(s) to the optional block. See get_response for details on the supported options

[Source]

     # File lib/rbot/core/utils/httputil.rb, line 653
653:   def head(uri, options = {}, &block) # :yields: resp
654:     opts = {:method => :head}.merge(options)
655:     begin
656:       resp = get_response(uri, opts, &block)
657:       # raise "http error #{resp}" if Net::HTTPClientError === resp ||
658:       #   Net::HTTPServerError == resp
659:       return resp
660:     rescue Exception => e
661:       error e
662:     end
663:     return nil
664:   end
uri:uri to query (URI object or String)
data:body of the POST

Simple POST request, returns (if possible) response following redirs and caching if requested, yielding the response(s) to the optional block. See get_response for details on the supported options

[Source]

     # File lib/rbot/core/utils/httputil.rb, line 673
673:   def post(uri, data, options = {}, &block) # :yields: resp
674:     opts = {:method => :post, :body => data, :cache => false}.merge(options)
675:     begin
676:       resp = get_response(uri, opts, &block)
677:       raise 'http error' unless Net::HTTPOK === resp or Net::HTTPCreated === resp
678:       return resp
679:     rescue Exception => e
680:       error e
681:     end
682:     return nil
683:   end

This method checks if a proxy is required to access uri, by looking at the values of config values +http.proxy_include+ and +http.proxy_exclude+.

Each of these config values, if set, should be a Regexp the server name and IP address should be checked against.

[Source]

     # File lib/rbot/core/utils/httputil.rb, line 318
318:   def proxy_required(uri)
319:     use_proxy = true
320:     if @bot.config["http.proxy_exclude"].empty? && @bot.config["http.proxy_include"].empty?
321:       return use_proxy
322:     end
323: 
324:     list = [uri.host]
325:     begin
326:       list.concat Resolv.getaddresses(uri.host)
327:     rescue StandardError => err
328:       warning "couldn't resolve host uri.host"
329:     end
330: 
331:     unless @bot.config["http.proxy_exclude"].empty?
332:       re = @bot.config["http.proxy_exclude"].collect{|r| Regexp.new(r)}
333:       re.each do |r|
334:         list.each do |item|
335:           if r.match(item)
336:             use_proxy = false
337:             break
338:           end
339:         end
340:       end
341:     end
342:     unless @bot.config["http.proxy_include"].empty?
343:       re = @bot.config["http.proxy_include"].collect{|r| Regexp.new(r)}
344:       re.each do |r|
345:         list.each do |item|
346:           if r.match(item)
347:             use_proxy = true
348:             break
349:           end
350:         end
351:       end
352:     end
353:     debug "using proxy for uri #{uri}?: #{use_proxy}"
354:     return use_proxy
355:   end

[Source]

     # File lib/rbot/core/utils/httputil.rb, line 698
698:   def remove_stale_cache
699:     debug "Removing stale cache"
700:     now = Time.new
701:     max_last = @bot.config['http.expire_time'] * 60
702:     max_first = @bot.config['http.max_cache_time'] * 60
703:     debug "#{@cache.size} pages before"
704:     begin
705:       @cache.reject! { |k, val|
706:         (now - val.last_used > max_last) || (now - val.first_used > max_first)
707:       }
708:     rescue => e
709:       error "Failed to remove stale cache: #{e.pretty_inspect}"
710:     end
711:     debug "#{@cache.size} pages after"
712:   end

[Validate]