Package cherrypy :: Package lib :: Module encoding
[hide private]
[frames] | no frames]

Source Code for Module cherrypy.lib.encoding

  1  import struct 
  2  import time 
  3   
  4  import cherrypy 
  5  from cherrypy._cpcompat import basestring, BytesIO, ntob, set, unicodestr 
  6  from cherrypy.lib import file_generator 
  7  from cherrypy.lib import is_closable_iterator 
  8  from cherrypy.lib import set_vary_header 
  9   
 10   
11 -def decode(encoding=None, default_encoding='utf-8'):
12 """Replace or extend the list of charsets used to decode a request entity. 13 14 Either argument may be a single string or a list of strings. 15 16 encoding 17 If not None, restricts the set of charsets attempted while decoding 18 a request entity to the given set (even if a different charset is 19 given in the Content-Type request header). 20 21 default_encoding 22 Only in effect if the 'encoding' argument is not given. 23 If given, the set of charsets attempted while decoding a request 24 entity is *extended* with the given value(s). 25 26 """ 27 body = cherrypy.request.body 28 if encoding is not None: 29 if not isinstance(encoding, list): 30 encoding = [encoding] 31 body.attempt_charsets = encoding 32 elif default_encoding: 33 if not isinstance(default_encoding, list): 34 default_encoding = [default_encoding] 35 body.attempt_charsets = body.attempt_charsets + default_encoding
36
37 -class UTF8StreamEncoder:
38 - def __init__(self, iterator):
39 self._iterator = iterator
40
41 - def __iter__(self):
42 return self
43
44 - def next(self):
45 return self.__next__()
46
47 - def __next__(self):
48 res = next(self._iterator) 49 if isinstance(res, unicodestr): 50 res = res.encode('utf-8') 51 return res
52
53 - def close(self):
54 if is_closable_iterator(self._iterator): 55 self._iterator.close()
56
57 - def __getattr__(self, attr):
58 if attr.startswith('__'): 59 raise AttributeError(self, attr) 60 return getattr(self._iterator, attr)
61 62
63 -class ResponseEncoder:
64 65 default_encoding = 'utf-8' 66 failmsg = "Response body could not be encoded with %r." 67 encoding = None 68 errors = 'strict' 69 text_only = True 70 add_charset = True 71 debug = False 72
73 - def __init__(self, **kwargs):
74 for k, v in kwargs.items(): 75 setattr(self, k, v) 76 77 self.attempted_charsets = set() 78 request = cherrypy.serving.request 79 if request.handler is not None: 80 # Replace request.handler with self 81 if self.debug: 82 cherrypy.log('Replacing request.handler', 'TOOLS.ENCODE') 83 self.oldhandler = request.handler 84 request.handler = self
85
86 - def encode_stream(self, encoding):
87 """Encode a streaming response body. 88 89 Use a generator wrapper, and just pray it works as the stream is 90 being written out. 91 """ 92 if encoding in self.attempted_charsets: 93 return False 94 self.attempted_charsets.add(encoding) 95 96 def encoder(body): 97 for chunk in body: 98 if isinstance(chunk, unicodestr): 99 chunk = chunk.encode(encoding, self.errors) 100 yield chunk
101 self.body = encoder(self.body) 102 return True
103
104 - def encode_string(self, encoding):
105 """Encode a buffered response body.""" 106 if encoding in self.attempted_charsets: 107 return False 108 self.attempted_charsets.add(encoding) 109 body = [] 110 for chunk in self.body: 111 if isinstance(chunk, unicodestr): 112 try: 113 chunk = chunk.encode(encoding, self.errors) 114 except (LookupError, UnicodeError): 115 return False 116 body.append(chunk) 117 self.body = body 118 return True
119
120 - def find_acceptable_charset(self):
121 request = cherrypy.serving.request 122 response = cherrypy.serving.response 123 124 if self.debug: 125 cherrypy.log('response.stream %r' % 126 response.stream, 'TOOLS.ENCODE') 127 if response.stream: 128 encoder = self.encode_stream 129 else: 130 encoder = self.encode_string 131 if "Content-Length" in response.headers: 132 # Delete Content-Length header so finalize() recalcs it. 133 # Encoded strings may be of different lengths from their 134 # unicode equivalents, and even from each other. For example: 135 # >>> t = u"\u7007\u3040" 136 # >>> len(t) 137 # 2 138 # >>> len(t.encode("UTF-8")) 139 # 6 140 # >>> len(t.encode("utf7")) 141 # 8 142 del response.headers["Content-Length"] 143 144 # Parse the Accept-Charset request header, and try to provide one 145 # of the requested charsets (in order of user preference). 146 encs = request.headers.elements('Accept-Charset') 147 charsets = [enc.value.lower() for enc in encs] 148 if self.debug: 149 cherrypy.log('charsets %s' % repr(charsets), 'TOOLS.ENCODE') 150 151 if self.encoding is not None: 152 # If specified, force this encoding to be used, or fail. 153 encoding = self.encoding.lower() 154 if self.debug: 155 cherrypy.log('Specified encoding %r' % 156 encoding, 'TOOLS.ENCODE') 157 if (not charsets) or "*" in charsets or encoding in charsets: 158 if self.debug: 159 cherrypy.log('Attempting encoding %r' % 160 encoding, 'TOOLS.ENCODE') 161 if encoder(encoding): 162 return encoding 163 else: 164 if not encs: 165 if self.debug: 166 cherrypy.log('Attempting default encoding %r' % 167 self.default_encoding, 'TOOLS.ENCODE') 168 # Any character-set is acceptable. 169 if encoder(self.default_encoding): 170 return self.default_encoding 171 else: 172 raise cherrypy.HTTPError(500, self.failmsg % 173 self.default_encoding) 174 else: 175 for element in encs: 176 if element.qvalue > 0: 177 if element.value == "*": 178 # Matches any charset. Try our default. 179 if self.debug: 180 cherrypy.log('Attempting default encoding due ' 181 'to %r' % element, 'TOOLS.ENCODE') 182 if encoder(self.default_encoding): 183 return self.default_encoding 184 else: 185 encoding = element.value 186 if self.debug: 187 cherrypy.log('Attempting encoding %s (qvalue >' 188 '0)' % element, 'TOOLS.ENCODE') 189 if encoder(encoding): 190 return encoding 191 192 if "*" not in charsets: 193 # If no "*" is present in an Accept-Charset field, then all 194 # character sets not explicitly mentioned get a quality 195 # value of 0, except for ISO-8859-1, which gets a quality 196 # value of 1 if not explicitly mentioned. 197 iso = 'iso-8859-1' 198 if iso not in charsets: 199 if self.debug: 200 cherrypy.log('Attempting ISO-8859-1 encoding', 201 'TOOLS.ENCODE') 202 if encoder(iso): 203 return iso 204 205 # No suitable encoding found. 206 ac = request.headers.get('Accept-Charset') 207 if ac is None: 208 msg = "Your client did not send an Accept-Charset header." 209 else: 210 msg = "Your client sent this Accept-Charset header: %s." % ac 211 _charsets = ", ".join(sorted(self.attempted_charsets)) 212 msg += " We tried these charsets: %s." % (_charsets,) 213 raise cherrypy.HTTPError(406, msg)
214
215 - def __call__(self, *args, **kwargs):
216 response = cherrypy.serving.response 217 self.body = self.oldhandler(*args, **kwargs) 218 219 if isinstance(self.body, basestring): 220 # strings get wrapped in a list because iterating over a single 221 # item list is much faster than iterating over every character 222 # in a long string. 223 if self.body: 224 self.body = [self.body] 225 else: 226 # [''] doesn't evaluate to False, so replace it with []. 227 self.body = [] 228 elif hasattr(self.body, 'read'): 229 self.body = file_generator(self.body) 230 elif self.body is None: 231 self.body = [] 232 233 ct = response.headers.elements("Content-Type") 234 if self.debug: 235 cherrypy.log('Content-Type: %r' % [str(h) 236 for h in ct], 'TOOLS.ENCODE') 237 if ct and self.add_charset: 238 ct = ct[0] 239 if self.text_only: 240 if ct.value.lower().startswith("text/"): 241 if self.debug: 242 cherrypy.log( 243 'Content-Type %s starts with "text/"' % ct, 244 'TOOLS.ENCODE') 245 do_find = True 246 else: 247 if self.debug: 248 cherrypy.log('Not finding because Content-Type %s ' 249 'does not start with "text/"' % ct, 250 'TOOLS.ENCODE') 251 do_find = False 252 else: 253 if self.debug: 254 cherrypy.log('Finding because not text_only', 255 'TOOLS.ENCODE') 256 do_find = True 257 258 if do_find: 259 # Set "charset=..." param on response Content-Type header 260 ct.params['charset'] = self.find_acceptable_charset() 261 if self.debug: 262 cherrypy.log('Setting Content-Type %s' % ct, 263 'TOOLS.ENCODE') 264 response.headers["Content-Type"] = str(ct) 265 266 return self.body
267 268 # GZIP 269 270
271 -def compress(body, compress_level):
272 """Compress 'body' at the given compress_level.""" 273 import zlib 274 275 # See http://www.gzip.org/zlib/rfc-gzip.html 276 yield ntob('\x1f\x8b') # ID1 and ID2: gzip marker 277 yield ntob('\x08') # CM: compression method 278 yield ntob('\x00') # FLG: none set 279 # MTIME: 4 bytes 280 yield struct.pack("<L", int(time.time()) & int('FFFFFFFF', 16)) 281 yield ntob('\x02') # XFL: max compression, slowest algo 282 yield ntob('\xff') # OS: unknown 283 284 crc = zlib.crc32(ntob("")) 285 size = 0 286 zobj = zlib.compressobj(compress_level, 287 zlib.DEFLATED, -zlib.MAX_WBITS, 288 zlib.DEF_MEM_LEVEL, 0) 289 for line in body: 290 size += len(line) 291 crc = zlib.crc32(line, crc) 292 yield zobj.compress(line) 293 yield zobj.flush() 294 295 # CRC32: 4 bytes 296 yield struct.pack("<L", crc & int('FFFFFFFF', 16)) 297 # ISIZE: 4 bytes 298 yield struct.pack("<L", size & int('FFFFFFFF', 16))
299 300
301 -def decompress(body):
302 import gzip 303 304 zbuf = BytesIO() 305 zbuf.write(body) 306 zbuf.seek(0) 307 zfile = gzip.GzipFile(mode='rb', fileobj=zbuf) 308 data = zfile.read() 309 zfile.close() 310 return data
311 312
313 -def gzip(compress_level=5, mime_types=['text/html', 'text/plain'], 314 debug=False):
315 """Try to gzip the response body if Content-Type in mime_types. 316 317 cherrypy.response.headers['Content-Type'] must be set to one of the 318 values in the mime_types arg before calling this function. 319 320 The provided list of mime-types must be of one of the following form: 321 * type/subtype 322 * type/* 323 * type/*+subtype 324 325 No compression is performed if any of the following hold: 326 * The client sends no Accept-Encoding request header 327 * No 'gzip' or 'x-gzip' is present in the Accept-Encoding header 328 * No 'gzip' or 'x-gzip' with a qvalue > 0 is present 329 * The 'identity' value is given with a qvalue > 0. 330 331 """ 332 request = cherrypy.serving.request 333 response = cherrypy.serving.response 334 335 set_vary_header(response, "Accept-Encoding") 336 337 if not response.body: 338 # Response body is empty (might be a 304 for instance) 339 if debug: 340 cherrypy.log('No response body', context='TOOLS.GZIP') 341 return 342 343 # If returning cached content (which should already have been gzipped), 344 # don't re-zip. 345 if getattr(request, "cached", False): 346 if debug: 347 cherrypy.log('Not gzipping cached response', context='TOOLS.GZIP') 348 return 349 350 acceptable = request.headers.elements('Accept-Encoding') 351 if not acceptable: 352 # If no Accept-Encoding field is present in a request, 353 # the server MAY assume that the client will accept any 354 # content coding. In this case, if "identity" is one of 355 # the available content-codings, then the server SHOULD use 356 # the "identity" content-coding, unless it has additional 357 # information that a different content-coding is meaningful 358 # to the client. 359 if debug: 360 cherrypy.log('No Accept-Encoding', context='TOOLS.GZIP') 361 return 362 363 ct = response.headers.get('Content-Type', '').split(';')[0] 364 for coding in acceptable: 365 if coding.value == 'identity' and coding.qvalue != 0: 366 if debug: 367 cherrypy.log('Non-zero identity qvalue: %s' % coding, 368 context='TOOLS.GZIP') 369 return 370 if coding.value in ('gzip', 'x-gzip'): 371 if coding.qvalue == 0: 372 if debug: 373 cherrypy.log('Zero gzip qvalue: %s' % coding, 374 context='TOOLS.GZIP') 375 return 376 377 if ct not in mime_types: 378 # If the list of provided mime-types contains tokens 379 # such as 'text/*' or 'application/*+xml', 380 # we go through them and find the most appropriate one 381 # based on the given content-type. 382 # The pattern matching is only caring about the most 383 # common cases, as stated above, and doesn't support 384 # for extra parameters. 385 found = False 386 if '/' in ct: 387 ct_media_type, ct_sub_type = ct.split('/') 388 for mime_type in mime_types: 389 if '/' in mime_type: 390 media_type, sub_type = mime_type.split('/') 391 if ct_media_type == media_type: 392 if sub_type == '*': 393 found = True 394 break 395 elif '+' in sub_type and '+' in ct_sub_type: 396 ct_left, ct_right = ct_sub_type.split('+') 397 left, right = sub_type.split('+') 398 if left == '*' and ct_right == right: 399 found = True 400 break 401 402 if not found: 403 if debug: 404 cherrypy.log('Content-Type %s not in mime_types %r' % 405 (ct, mime_types), context='TOOLS.GZIP') 406 return 407 408 if debug: 409 cherrypy.log('Gzipping', context='TOOLS.GZIP') 410 # Return a generator that compresses the page 411 response.headers['Content-Encoding'] = 'gzip' 412 response.body = compress(response.body, compress_level) 413 if "Content-Length" in response.headers: 414 # Delete Content-Length header so finalize() recalcs it. 415 del response.headers["Content-Length"] 416 417 return 418 419 if debug: 420 cherrypy.log('No acceptable encoding found.', context='GZIP') 421 cherrypy.HTTPError(406, "identity, gzip").set_response()
422