Class | ::HTTPResponse |
In: |
lib/rbot/core/utils/httputil.rb
|
Parent: | Object |
body | -> | raw_body |
no_cache | [RW] |
# File lib/rbot/core/utils/httputil.rb, line 32 32: def body_charset(str=self.raw_body) 33: ctype = self['content-type'] || 'text/html' 34: return nil unless ctype =~ /^text/i || ctype =~ /x(ht)?ml/i 35: 36: charsets = ['latin1'] # should be in config 37: 38: if ctype.match(/charset=["']?([^\s"']+)["']?/i) 39: charsets << $1 40: debug "charset #{charsets.last} added from header" 41: end 42: 43: case str 44: when /<\?xml\s[^>]*encoding=['"]([^\s"'>]+)["'][^>]*\?>/i 45: charsets << $1 46: debug "xml charset #{charsets.last} added from xml pi" 47: when /<(meta\s[^>]*http-equiv=["']?Content-Type["']?[^>]*)>/i 48: meta = $1 49: if meta =~ /charset=['"]?([^\s'";]+)['"]?/ 50: charsets << $1 51: debug "html charset #{charsets.last} added from meta" 52: end 53: end 54: return charsets.uniq 55: end
# File lib/rbot/core/utils/httputil.rb, line 57 57: def body_to_utf(str) 58: charsets = self.body_charset(str) or return str 59: 60: charsets.reverse_each do |charset| 61: # XXX: this one is really ugly, but i don't know how to make it better 62: # -jsn 63: 64: 0.upto(5) do |off| 65: begin 66: debug "trying #{charset} / offset #{off}" 67: return Iconv.iconv('utf-8//ignore', 68: charset, 69: str.slice(0 .. (-1 - off))).first 70: rescue 71: debug "conversion failed for #{charset} / offset #{off}" 72: end 73: end 74: end 75: return str 76: end
# File lib/rbot/core/utils/httputil.rb, line 126 126: def cooked_body 127: return self.body_to_utf(self.decompress_body(self.raw_body)) 128: end
# File lib/rbot/core/utils/httputil.rb, line 78 78: def decompress_body(str) 79: method = self['content-encoding'] 80: case method 81: when nil 82: return str 83: when /gzip/ # Matches gzip, x-gzip, and the non-rfc-compliant gzip;q=\d sent by some servers 84: debug "gunzipping body" 85: begin 86: return Zlib::GzipReader.new(StringIO.new(str)).read 87: rescue Zlib::Error => e 88: # If we can't unpack the whole stream (e.g. because we're doing a 89: # partial read 90: debug "full gunzipping failed (#{e}), trying to recover as much as possible" 91: ret = "" 92: begin 93: Zlib::GzipReader.new(StringIO.new(str)).each_byte { |byte| 94: ret << byte 95: } 96: rescue 97: end 98: return ret 99: end 100: when 'deflate' 101: debug "inflating body" 102: # From http://www.koders.com/ruby/fid927B4382397E5115AC0ABE21181AB5C1CBDD5C17.aspx?s=thread: 103: # -MAX_WBITS stops zlib from looking for a zlib header 104: inflater = Zlib::Inflate.new(-Zlib::MAX_WBITS) 105: begin 106: return inflater.inflate(str) 107: rescue Zlib::Error => e 108: raise e 109: # TODO 110: # debug "full inflation failed (#{e}), trying to recover as much as possible" 111: end 112: when /^(?:iso-8859-\d+|windows-\d+|utf-8|utf8)$/i 113: # B0rked servers (Freshmeat being one of them) sometimes return the charset 114: # in the content-encoding; in this case we assume that the document has 115: # a standarc content-encoding 116: old_hsh = self.to_hash 117: self['content-type']= self['content-type']+"; charset="+method.downcase 118: warning "Charset vs content-encoding confusion, trying to recover: from\n#{old_hsh.pretty_inspect}to\n#{self.to_hash.pretty_inspect}" 119: return str 120: else 121: debug self.to_hash 122: raise "Unhandled content encoding #{method}" 123: end 124: end
Read chunks from the body until we have at least size bytes, yielding the partial text at each chunk. Return the partial body.
# File lib/rbot/core/utils/httputil.rb, line 132 132: def partial_body(size=0, &block) 133: 134: partial = String.new 135: 136: if @read 137: debug "using body() as partial" 138: partial = self.body 139: yield self.body_to_utf(self.decompress_body(partial)) if block_given? 140: else 141: debug "disabling cache" 142: self.no_cache = true 143: self.read_body { |chunk| 144: partial << chunk 145: yield self.body_to_utf(self.decompress_body(partial)) if block_given? 146: break if size and size > 0 and partial.length >= size 147: } 148: end 149: 150: return self.body_to_utf(self.decompress_body(partial)) 151: end