comparison venv/lib/python2.7/site-packages/requests/utils.py @ 0:d67268158946 draft

planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
author bcclaywell
date Mon, 12 Oct 2015 17:43:33 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:d67268158946
1 # -*- coding: utf-8 -*-
2
3 """
4 requests.utils
5 ~~~~~~~~~~~~~~
6
7 This module provides utility functions that are used within Requests
8 that are also useful for external consumption.
9
10 """
11
12 import cgi
13 import codecs
14 import collections
15 import io
16 import os
17 import platform
18 import re
19 import sys
20 import socket
21 import struct
22 import warnings
23
24 from . import __version__
25 from . import certs
26 from .compat import parse_http_list as _parse_list_header
27 from .compat import (quote, urlparse, bytes, str, OrderedDict, unquote, is_py2,
28 builtin_str, getproxies, proxy_bypass, urlunparse,
29 basestring)
30 from .cookies import RequestsCookieJar, cookiejar_from_dict
31 from .structures import CaseInsensitiveDict
32 from .exceptions import InvalidURL
33
34 _hush_pyflakes = (RequestsCookieJar,)
35
36 NETRC_FILES = ('.netrc', '_netrc')
37
38 DEFAULT_CA_BUNDLE_PATH = certs.where()
39
40
41 def dict_to_sequence(d):
42 """Returns an internal sequence dictionary update."""
43
44 if hasattr(d, 'items'):
45 d = d.items()
46
47 return d
48
49
50 def super_len(o):
51 if hasattr(o, '__len__'):
52 return len(o)
53
54 if hasattr(o, 'len'):
55 return o.len
56
57 if hasattr(o, 'fileno'):
58 try:
59 fileno = o.fileno()
60 except io.UnsupportedOperation:
61 pass
62 else:
63 return os.fstat(fileno).st_size
64
65 if hasattr(o, 'getvalue'):
66 # e.g. BytesIO, cStringIO.StringIO
67 return len(o.getvalue())
68
69
70 def get_netrc_auth(url, raise_errors=False):
71 """Returns the Requests tuple auth for a given url from netrc."""
72
73 try:
74 from netrc import netrc, NetrcParseError
75
76 netrc_path = None
77
78 for f in NETRC_FILES:
79 try:
80 loc = os.path.expanduser('~/{0}'.format(f))
81 except KeyError:
82 # os.path.expanduser can fail when $HOME is undefined and
83 # getpwuid fails. See http://bugs.python.org/issue20164 &
84 # https://github.com/kennethreitz/requests/issues/1846
85 return
86
87 if os.path.exists(loc):
88 netrc_path = loc
89 break
90
91 # Abort early if there isn't one.
92 if netrc_path is None:
93 return
94
95 ri = urlparse(url)
96
97 # Strip port numbers from netloc
98 host = ri.netloc.split(':')[0]
99
100 try:
101 _netrc = netrc(netrc_path).authenticators(host)
102 if _netrc:
103 # Return with login / password
104 login_i = (0 if _netrc[0] else 1)
105 return (_netrc[login_i], _netrc[2])
106 except (NetrcParseError, IOError):
107 # If there was a parsing error or a permissions issue reading the file,
108 # we'll just skip netrc auth unless explicitly asked to raise errors.
109 if raise_errors:
110 raise
111
112 # AppEngine hackiness.
113 except (ImportError, AttributeError):
114 pass
115
116
117 def guess_filename(obj):
118 """Tries to guess the filename of the given object."""
119 name = getattr(obj, 'name', None)
120 if (name and isinstance(name, basestring) and name[0] != '<' and
121 name[-1] != '>'):
122 return os.path.basename(name)
123
124
125 def from_key_val_list(value):
126 """Take an object and test to see if it can be represented as a
127 dictionary. Unless it can not be represented as such, return an
128 OrderedDict, e.g.,
129
130 ::
131
132 >>> from_key_val_list([('key', 'val')])
133 OrderedDict([('key', 'val')])
134 >>> from_key_val_list('string')
135 ValueError: need more than 1 value to unpack
136 >>> from_key_val_list({'key': 'val'})
137 OrderedDict([('key', 'val')])
138 """
139 if value is None:
140 return None
141
142 if isinstance(value, (str, bytes, bool, int)):
143 raise ValueError('cannot encode objects that are not 2-tuples')
144
145 return OrderedDict(value)
146
147
148 def to_key_val_list(value):
149 """Take an object and test to see if it can be represented as a
150 dictionary. If it can be, return a list of tuples, e.g.,
151
152 ::
153
154 >>> to_key_val_list([('key', 'val')])
155 [('key', 'val')]
156 >>> to_key_val_list({'key': 'val'})
157 [('key', 'val')]
158 >>> to_key_val_list('string')
159 ValueError: cannot encode objects that are not 2-tuples.
160 """
161 if value is None:
162 return None
163
164 if isinstance(value, (str, bytes, bool, int)):
165 raise ValueError('cannot encode objects that are not 2-tuples')
166
167 if isinstance(value, collections.Mapping):
168 value = value.items()
169
170 return list(value)
171
172
173 # From mitsuhiko/werkzeug (used with permission).
174 def parse_list_header(value):
175 """Parse lists as described by RFC 2068 Section 2.
176
177 In particular, parse comma-separated lists where the elements of
178 the list may include quoted-strings. A quoted-string could
179 contain a comma. A non-quoted string could have quotes in the
180 middle. Quotes are removed automatically after parsing.
181
182 It basically works like :func:`parse_set_header` just that items
183 may appear multiple times and case sensitivity is preserved.
184
185 The return value is a standard :class:`list`:
186
187 >>> parse_list_header('token, "quoted value"')
188 ['token', 'quoted value']
189
190 To create a header from the :class:`list` again, use the
191 :func:`dump_header` function.
192
193 :param value: a string with a list header.
194 :return: :class:`list`
195 """
196 result = []
197 for item in _parse_list_header(value):
198 if item[:1] == item[-1:] == '"':
199 item = unquote_header_value(item[1:-1])
200 result.append(item)
201 return result
202
203
204 # From mitsuhiko/werkzeug (used with permission).
205 def parse_dict_header(value):
206 """Parse lists of key, value pairs as described by RFC 2068 Section 2 and
207 convert them into a python dict:
208
209 >>> d = parse_dict_header('foo="is a fish", bar="as well"')
210 >>> type(d) is dict
211 True
212 >>> sorted(d.items())
213 [('bar', 'as well'), ('foo', 'is a fish')]
214
215 If there is no value for a key it will be `None`:
216
217 >>> parse_dict_header('key_without_value')
218 {'key_without_value': None}
219
220 To create a header from the :class:`dict` again, use the
221 :func:`dump_header` function.
222
223 :param value: a string with a dict header.
224 :return: :class:`dict`
225 """
226 result = {}
227 for item in _parse_list_header(value):
228 if '=' not in item:
229 result[item] = None
230 continue
231 name, value = item.split('=', 1)
232 if value[:1] == value[-1:] == '"':
233 value = unquote_header_value(value[1:-1])
234 result[name] = value
235 return result
236
237
238 # From mitsuhiko/werkzeug (used with permission).
239 def unquote_header_value(value, is_filename=False):
240 r"""Unquotes a header value. (Reversal of :func:`quote_header_value`).
241 This does not use the real unquoting but what browsers are actually
242 using for quoting.
243
244 :param value: the header value to unquote.
245 """
246 if value and value[0] == value[-1] == '"':
247 # this is not the real unquoting, but fixing this so that the
248 # RFC is met will result in bugs with internet explorer and
249 # probably some other browsers as well. IE for example is
250 # uploading files with "C:\foo\bar.txt" as filename
251 value = value[1:-1]
252
253 # if this is a filename and the starting characters look like
254 # a UNC path, then just return the value without quotes. Using the
255 # replace sequence below on a UNC path has the effect of turning
256 # the leading double slash into a single slash and then
257 # _fix_ie_filename() doesn't work correctly. See #458.
258 if not is_filename or value[:2] != '\\\\':
259 return value.replace('\\\\', '\\').replace('\\"', '"')
260 return value
261
262
263 def dict_from_cookiejar(cj):
264 """Returns a key/value dictionary from a CookieJar.
265
266 :param cj: CookieJar object to extract cookies from.
267 """
268
269 cookie_dict = {}
270
271 for cookie in cj:
272 cookie_dict[cookie.name] = cookie.value
273
274 return cookie_dict
275
276
277 def add_dict_to_cookiejar(cj, cookie_dict):
278 """Returns a CookieJar from a key/value dictionary.
279
280 :param cj: CookieJar to insert cookies into.
281 :param cookie_dict: Dict of key/values to insert into CookieJar.
282 """
283
284 cj2 = cookiejar_from_dict(cookie_dict)
285 cj.update(cj2)
286 return cj
287
288
289 def get_encodings_from_content(content):
290 """Returns encodings from given content string.
291
292 :param content: bytestring to extract encodings from.
293 """
294 warnings.warn((
295 'In requests 3.0, get_encodings_from_content will be removed. For '
296 'more information, please see the discussion on issue #2266. (This'
297 ' warning should only appear once.)'),
298 DeprecationWarning)
299
300 charset_re = re.compile(r'<meta.*?charset=["\']*(.+?)["\'>]', flags=re.I)
301 pragma_re = re.compile(r'<meta.*?content=["\']*;?charset=(.+?)["\'>]', flags=re.I)
302 xml_re = re.compile(r'^<\?xml.*?encoding=["\']*(.+?)["\'>]')
303
304 return (charset_re.findall(content) +
305 pragma_re.findall(content) +
306 xml_re.findall(content))
307
308
309 def get_encoding_from_headers(headers):
310 """Returns encodings from given HTTP Header Dict.
311
312 :param headers: dictionary to extract encoding from.
313 """
314
315 content_type = headers.get('content-type')
316
317 if not content_type:
318 return None
319
320 content_type, params = cgi.parse_header(content_type)
321
322 if 'charset' in params:
323 return params['charset'].strip("'\"")
324
325 if 'text' in content_type:
326 return 'ISO-8859-1'
327
328
329 def stream_decode_response_unicode(iterator, r):
330 """Stream decodes a iterator."""
331
332 if r.encoding is None:
333 for item in iterator:
334 yield item
335 return
336
337 decoder = codecs.getincrementaldecoder(r.encoding)(errors='replace')
338 for chunk in iterator:
339 rv = decoder.decode(chunk)
340 if rv:
341 yield rv
342 rv = decoder.decode(b'', final=True)
343 if rv:
344 yield rv
345
346
347 def iter_slices(string, slice_length):
348 """Iterate over slices of a string."""
349 pos = 0
350 while pos < len(string):
351 yield string[pos:pos + slice_length]
352 pos += slice_length
353
354
355 def get_unicode_from_response(r):
356 """Returns the requested content back in unicode.
357
358 :param r: Response object to get unicode content from.
359
360 Tried:
361
362 1. charset from content-type
363 2. fall back and replace all unicode characters
364
365 """
366 warnings.warn((
367 'In requests 3.0, get_unicode_from_response will be removed. For '
368 'more information, please see the discussion on issue #2266. (This'
369 ' warning should only appear once.)'),
370 DeprecationWarning)
371
372 tried_encodings = []
373
374 # Try charset from content-type
375 encoding = get_encoding_from_headers(r.headers)
376
377 if encoding:
378 try:
379 return str(r.content, encoding)
380 except UnicodeError:
381 tried_encodings.append(encoding)
382
383 # Fall back:
384 try:
385 return str(r.content, encoding, errors='replace')
386 except TypeError:
387 return r.content
388
389
390 # The unreserved URI characters (RFC 3986)
391 UNRESERVED_SET = frozenset(
392 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
393 + "0123456789-._~")
394
395
396 def unquote_unreserved(uri):
397 """Un-escape any percent-escape sequences in a URI that are unreserved
398 characters. This leaves all reserved, illegal and non-ASCII bytes encoded.
399 """
400 parts = uri.split('%')
401 for i in range(1, len(parts)):
402 h = parts[i][0:2]
403 if len(h) == 2 and h.isalnum():
404 try:
405 c = chr(int(h, 16))
406 except ValueError:
407 raise InvalidURL("Invalid percent-escape sequence: '%s'" % h)
408
409 if c in UNRESERVED_SET:
410 parts[i] = c + parts[i][2:]
411 else:
412 parts[i] = '%' + parts[i]
413 else:
414 parts[i] = '%' + parts[i]
415 return ''.join(parts)
416
417
418 def requote_uri(uri):
419 """Re-quote the given URI.
420
421 This function passes the given URI through an unquote/quote cycle to
422 ensure that it is fully and consistently quoted.
423 """
424 safe_with_percent = "!#$%&'()*+,/:;=?@[]~"
425 safe_without_percent = "!#$&'()*+,/:;=?@[]~"
426 try:
427 # Unquote only the unreserved characters
428 # Then quote only illegal characters (do not quote reserved,
429 # unreserved, or '%')
430 return quote(unquote_unreserved(uri), safe=safe_with_percent)
431 except InvalidURL:
432 # We couldn't unquote the given URI, so let's try quoting it, but
433 # there may be unquoted '%'s in the URI. We need to make sure they're
434 # properly quoted so they do not cause issues elsewhere.
435 return quote(uri, safe=safe_without_percent)
436
437
438 def address_in_network(ip, net):
439 """
440 This function allows you to check if on IP belongs to a network subnet
441 Example: returns True if ip = 192.168.1.1 and net = 192.168.1.0/24
442 returns False if ip = 192.168.1.1 and net = 192.168.100.0/24
443 """
444 ipaddr = struct.unpack('=L', socket.inet_aton(ip))[0]
445 netaddr, bits = net.split('/')
446 netmask = struct.unpack('=L', socket.inet_aton(dotted_netmask(int(bits))))[0]
447 network = struct.unpack('=L', socket.inet_aton(netaddr))[0] & netmask
448 return (ipaddr & netmask) == (network & netmask)
449
450
451 def dotted_netmask(mask):
452 """
453 Converts mask from /xx format to xxx.xxx.xxx.xxx
454 Example: if mask is 24 function returns 255.255.255.0
455 """
456 bits = 0xffffffff ^ (1 << 32 - mask) - 1
457 return socket.inet_ntoa(struct.pack('>I', bits))
458
459
460 def is_ipv4_address(string_ip):
461 try:
462 socket.inet_aton(string_ip)
463 except socket.error:
464 return False
465 return True
466
467
468 def is_valid_cidr(string_network):
469 """Very simple check of the cidr format in no_proxy variable"""
470 if string_network.count('/') == 1:
471 try:
472 mask = int(string_network.split('/')[1])
473 except ValueError:
474 return False
475
476 if mask < 1 or mask > 32:
477 return False
478
479 try:
480 socket.inet_aton(string_network.split('/')[0])
481 except socket.error:
482 return False
483 else:
484 return False
485 return True
486
487
488 def should_bypass_proxies(url):
489 """
490 Returns whether we should bypass proxies or not.
491 """
492 get_proxy = lambda k: os.environ.get(k) or os.environ.get(k.upper())
493
494 # First check whether no_proxy is defined. If it is, check that the URL
495 # we're getting isn't in the no_proxy list.
496 no_proxy = get_proxy('no_proxy')
497 netloc = urlparse(url).netloc
498
499 if no_proxy:
500 # We need to check whether we match here. We need to see if we match
501 # the end of the netloc, both with and without the port.
502 no_proxy = (
503 host for host in no_proxy.replace(' ', '').split(',') if host
504 )
505
506 ip = netloc.split(':')[0]
507 if is_ipv4_address(ip):
508 for proxy_ip in no_proxy:
509 if is_valid_cidr(proxy_ip):
510 if address_in_network(ip, proxy_ip):
511 return True
512 else:
513 for host in no_proxy:
514 if netloc.endswith(host) or netloc.split(':')[0].endswith(host):
515 # The URL does match something in no_proxy, so we don't want
516 # to apply the proxies on this URL.
517 return True
518
519 # If the system proxy settings indicate that this URL should be bypassed,
520 # don't proxy.
521 # The proxy_bypass function is incredibly buggy on OS X in early versions
522 # of Python 2.6, so allow this call to fail. Only catch the specific
523 # exceptions we've seen, though: this call failing in other ways can reveal
524 # legitimate problems.
525 try:
526 bypass = proxy_bypass(netloc)
527 except (TypeError, socket.gaierror):
528 bypass = False
529
530 if bypass:
531 return True
532
533 return False
534
535 def get_environ_proxies(url):
536 """Return a dict of environment proxies."""
537 if should_bypass_proxies(url):
538 return {}
539 else:
540 return getproxies()
541
542 def select_proxy(url, proxies):
543 """Select a proxy for the url, if applicable.
544
545 :param url: The url being for the request
546 :param proxies: A dictionary of schemes or schemes and hosts to proxy URLs
547 """
548 proxies = proxies or {}
549 urlparts = urlparse(url)
550 proxy = proxies.get(urlparts.scheme+'://'+urlparts.hostname)
551 if proxy is None:
552 proxy = proxies.get(urlparts.scheme)
553 return proxy
554
555 def default_user_agent(name="python-requests"):
556 """Return a string representing the default user agent."""
557 return '%s/%s' % (name, __version__)
558
559
560 def default_headers():
561 return CaseInsensitiveDict({
562 'User-Agent': default_user_agent(),
563 'Accept-Encoding': ', '.join(('gzip', 'deflate')),
564 'Accept': '*/*',
565 'Connection': 'keep-alive',
566 })
567
568
569 def parse_header_links(value):
570 """Return a dict of parsed link headers proxies.
571
572 i.e. Link: <http:/.../front.jpeg>; rel=front; type="image/jpeg",<http://.../back.jpeg>; rel=back;type="image/jpeg"
573
574 """
575
576 links = []
577
578 replace_chars = " '\""
579
580 for val in re.split(", *<", value):
581 try:
582 url, params = val.split(";", 1)
583 except ValueError:
584 url, params = val, ''
585
586 link = {}
587
588 link["url"] = url.strip("<> '\"")
589
590 for param in params.split(";"):
591 try:
592 key, value = param.split("=")
593 except ValueError:
594 break
595
596 link[key.strip(replace_chars)] = value.strip(replace_chars)
597
598 links.append(link)
599
600 return links
601
602
603 # Null bytes; no need to recreate these on each call to guess_json_utf
604 _null = '\x00'.encode('ascii') # encoding to ASCII for Python 3
605 _null2 = _null * 2
606 _null3 = _null * 3
607
608
609 def guess_json_utf(data):
610 # JSON always starts with two ASCII characters, so detection is as
611 # easy as counting the nulls and from their location and count
612 # determine the encoding. Also detect a BOM, if present.
613 sample = data[:4]
614 if sample in (codecs.BOM_UTF32_LE, codecs.BOM32_BE):
615 return 'utf-32' # BOM included
616 if sample[:3] == codecs.BOM_UTF8:
617 return 'utf-8-sig' # BOM included, MS style (discouraged)
618 if sample[:2] in (codecs.BOM_UTF16_LE, codecs.BOM_UTF16_BE):
619 return 'utf-16' # BOM included
620 nullcount = sample.count(_null)
621 if nullcount == 0:
622 return 'utf-8'
623 if nullcount == 2:
624 if sample[::2] == _null2: # 1st and 3rd are null
625 return 'utf-16-be'
626 if sample[1::2] == _null2: # 2nd and 4th are null
627 return 'utf-16-le'
628 # Did not detect 2 valid UTF-16 ascii-range characters
629 if nullcount == 3:
630 if sample[:3] == _null3:
631 return 'utf-32-be'
632 if sample[1:] == _null3:
633 return 'utf-32-le'
634 # Did not detect a valid UTF-32 ascii-range character
635 return None
636
637
638 def prepend_scheme_if_needed(url, new_scheme):
639 '''Given a URL that may or may not have a scheme, prepend the given scheme.
640 Does not replace a present scheme with the one provided as an argument.'''
641 scheme, netloc, path, params, query, fragment = urlparse(url, new_scheme)
642
643 # urlparse is a finicky beast, and sometimes decides that there isn't a
644 # netloc present. Assume that it's being over-cautious, and switch netloc
645 # and path if urlparse decided there was no netloc.
646 if not netloc:
647 netloc, path = path, netloc
648
649 return urlunparse((scheme, netloc, path, params, query, fragment))
650
651
652 def get_auth_from_url(url):
653 """Given a url with authentication components, extract them into a tuple of
654 username,password."""
655 parsed = urlparse(url)
656
657 try:
658 auth = (unquote(parsed.username), unquote(parsed.password))
659 except (AttributeError, TypeError):
660 auth = ('', '')
661
662 return auth
663
664
665 def to_native_string(string, encoding='ascii'):
666 """
667 Given a string object, regardless of type, returns a representation of that
668 string in the native string type, encoding and decoding where necessary.
669 This assumes ASCII unless told otherwise.
670 """
671 out = None
672
673 if isinstance(string, builtin_str):
674 out = string
675 else:
676 if is_py2:
677 out = string.encode(encoding)
678 else:
679 out = string.decode(encoding)
680
681 return out
682
683
684 def urldefragauth(url):
685 """
686 Given a url remove the fragment and the authentication part
687 """
688 scheme, netloc, path, params, query, fragment = urlparse(url)
689
690 # see func:`prepend_scheme_if_needed`
691 if not netloc:
692 netloc, path = path, netloc
693
694 netloc = netloc.rsplit('@', 1)[-1]
695
696 return urlunparse((scheme, netloc, path, params, query, ''))