Mercurial > repos > bcclaywell > argo_navis
comparison venv/lib/python2.7/site-packages/pip/download.py @ 0:d67268158946 draft
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
author | bcclaywell |
---|---|
date | Mon, 12 Oct 2015 17:43:33 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:d67268158946 |
---|---|
1 from __future__ import absolute_import | |
2 | |
3 import cgi | |
4 import email.utils | |
5 import hashlib | |
6 import getpass | |
7 import json | |
8 import logging | |
9 import mimetypes | |
10 import os | |
11 import platform | |
12 import re | |
13 import shutil | |
14 import sys | |
15 import tempfile | |
16 | |
17 from pip._vendor.six.moves.urllib import parse as urllib_parse | |
18 from pip._vendor.six.moves.urllib import request as urllib_request | |
19 | |
20 import pip | |
21 | |
22 from pip.exceptions import InstallationError, HashMismatch | |
23 from pip.models import PyPI | |
24 from pip.utils import (splitext, rmtree, format_size, display_path, | |
25 backup_dir, ask_path_exists, unpack_file, | |
26 call_subprocess) | |
27 from pip.utils.filesystem import check_path_owner | |
28 from pip.utils.logging import indent_log | |
29 from pip.utils.ui import DownloadProgressBar, DownloadProgressSpinner | |
30 from pip.locations import write_delete_marker_file | |
31 from pip.vcs import vcs | |
32 from pip._vendor import requests, six | |
33 from pip._vendor.requests.adapters import BaseAdapter, HTTPAdapter | |
34 from pip._vendor.requests.auth import AuthBase, HTTPBasicAuth | |
35 from pip._vendor.requests.models import Response | |
36 from pip._vendor.requests.structures import CaseInsensitiveDict | |
37 from pip._vendor.requests.packages import urllib3 | |
38 from pip._vendor.cachecontrol import CacheControlAdapter | |
39 from pip._vendor.cachecontrol.caches import FileCache | |
40 from pip._vendor.lockfile import LockError | |
41 from pip._vendor.six.moves import xmlrpc_client | |
42 | |
43 | |
44 __all__ = ['get_file_content', | |
45 'is_url', 'url_to_path', 'path_to_url', | |
46 'is_archive_file', 'unpack_vcs_link', | |
47 'unpack_file_url', 'is_vcs_url', 'is_file_url', | |
48 'unpack_http_url', 'unpack_url'] | |
49 | |
50 | |
51 logger = logging.getLogger(__name__) | |
52 | |
53 | |
54 def user_agent(): | |
55 """ | |
56 Return a string representing the user agent. | |
57 """ | |
58 data = { | |
59 "installer": {"name": "pip", "version": pip.__version__}, | |
60 "python": platform.python_version(), | |
61 "implementation": { | |
62 "name": platform.python_implementation(), | |
63 }, | |
64 } | |
65 | |
66 if data["implementation"]["name"] == 'CPython': | |
67 data["implementation"]["version"] = platform.python_version() | |
68 elif data["implementation"]["name"] == 'PyPy': | |
69 if sys.pypy_version_info.releaselevel == 'final': | |
70 pypy_version_info = sys.pypy_version_info[:3] | |
71 else: | |
72 pypy_version_info = sys.pypy_version_info | |
73 data["implementation"]["version"] = ".".join( | |
74 [str(x) for x in pypy_version_info] | |
75 ) | |
76 elif data["implementation"]["name"] == 'Jython': | |
77 # Complete Guess | |
78 data["implementation"]["version"] = platform.python_version() | |
79 elif data["implementation"]["name"] == 'IronPython': | |
80 # Complete Guess | |
81 data["implementation"]["version"] = platform.python_version() | |
82 | |
83 if sys.platform.startswith("linux"): | |
84 distro = dict(filter( | |
85 lambda x: x[1], | |
86 zip(["name", "version", "id"], platform.linux_distribution()), | |
87 )) | |
88 libc = dict(filter( | |
89 lambda x: x[1], | |
90 zip(["lib", "version"], platform.libc_ver()), | |
91 )) | |
92 if libc: | |
93 distro["libc"] = libc | |
94 if distro: | |
95 data["distro"] = distro | |
96 | |
97 if sys.platform.startswith("darwin") and platform.mac_ver()[0]: | |
98 data["distro"] = {"name": "OS X", "version": platform.mac_ver()[0]} | |
99 | |
100 if platform.system(): | |
101 data.setdefault("system", {})["name"] = platform.system() | |
102 | |
103 if platform.release(): | |
104 data.setdefault("system", {})["release"] = platform.release() | |
105 | |
106 if platform.machine(): | |
107 data["cpu"] = platform.machine() | |
108 | |
109 return "{data[installer][name]}/{data[installer][version]} {json}".format( | |
110 data=data, | |
111 json=json.dumps(data, separators=(",", ":"), sort_keys=True), | |
112 ) | |
113 | |
114 | |
115 class MultiDomainBasicAuth(AuthBase): | |
116 | |
117 def __init__(self, prompting=True): | |
118 self.prompting = prompting | |
119 self.passwords = {} | |
120 | |
121 def __call__(self, req): | |
122 parsed = urllib_parse.urlparse(req.url) | |
123 | |
124 # Get the netloc without any embedded credentials | |
125 netloc = parsed.netloc.rsplit("@", 1)[-1] | |
126 | |
127 # Set the url of the request to the url without any credentials | |
128 req.url = urllib_parse.urlunparse(parsed[:1] + (netloc,) + parsed[2:]) | |
129 | |
130 # Use any stored credentials that we have for this netloc | |
131 username, password = self.passwords.get(netloc, (None, None)) | |
132 | |
133 # Extract credentials embedded in the url if we have none stored | |
134 if username is None: | |
135 username, password = self.parse_credentials(parsed.netloc) | |
136 | |
137 if username or password: | |
138 # Store the username and password | |
139 self.passwords[netloc] = (username, password) | |
140 | |
141 # Send the basic auth with this request | |
142 req = HTTPBasicAuth(username or "", password or "")(req) | |
143 | |
144 # Attach a hook to handle 401 responses | |
145 req.register_hook("response", self.handle_401) | |
146 | |
147 return req | |
148 | |
149 def handle_401(self, resp, **kwargs): | |
150 # We only care about 401 responses, anything else we want to just | |
151 # pass through the actual response | |
152 if resp.status_code != 401: | |
153 return resp | |
154 | |
155 # We are not able to prompt the user so simple return the response | |
156 if not self.prompting: | |
157 return resp | |
158 | |
159 parsed = urllib_parse.urlparse(resp.url) | |
160 | |
161 # Prompt the user for a new username and password | |
162 username = six.moves.input("User for %s: " % parsed.netloc) | |
163 password = getpass.getpass("Password: ") | |
164 | |
165 # Store the new username and password to use for future requests | |
166 if username or password: | |
167 self.passwords[parsed.netloc] = (username, password) | |
168 | |
169 # Consume content and release the original connection to allow our new | |
170 # request to reuse the same one. | |
171 resp.content | |
172 resp.raw.release_conn() | |
173 | |
174 # Add our new username and password to the request | |
175 req = HTTPBasicAuth(username or "", password or "")(resp.request) | |
176 | |
177 # Send our new request | |
178 new_resp = resp.connection.send(req, **kwargs) | |
179 new_resp.history.append(resp) | |
180 | |
181 return new_resp | |
182 | |
183 def parse_credentials(self, netloc): | |
184 if "@" in netloc: | |
185 userinfo = netloc.rsplit("@", 1)[0] | |
186 if ":" in userinfo: | |
187 return userinfo.split(":", 1) | |
188 return userinfo, None | |
189 return None, None | |
190 | |
191 | |
192 class LocalFSAdapter(BaseAdapter): | |
193 | |
194 def send(self, request, stream=None, timeout=None, verify=None, cert=None, | |
195 proxies=None): | |
196 pathname = url_to_path(request.url) | |
197 | |
198 resp = Response() | |
199 resp.status_code = 200 | |
200 resp.url = request.url | |
201 | |
202 try: | |
203 stats = os.stat(pathname) | |
204 except OSError as exc: | |
205 resp.status_code = 404 | |
206 resp.raw = exc | |
207 else: | |
208 modified = email.utils.formatdate(stats.st_mtime, usegmt=True) | |
209 content_type = mimetypes.guess_type(pathname)[0] or "text/plain" | |
210 resp.headers = CaseInsensitiveDict({ | |
211 "Content-Type": content_type, | |
212 "Content-Length": stats.st_size, | |
213 "Last-Modified": modified, | |
214 }) | |
215 | |
216 resp.raw = open(pathname, "rb") | |
217 resp.close = resp.raw.close | |
218 | |
219 return resp | |
220 | |
221 def close(self): | |
222 pass | |
223 | |
224 | |
225 class SafeFileCache(FileCache): | |
226 """ | |
227 A file based cache which is safe to use even when the target directory may | |
228 not be accessible or writable. | |
229 """ | |
230 | |
231 def __init__(self, *args, **kwargs): | |
232 super(SafeFileCache, self).__init__(*args, **kwargs) | |
233 | |
234 # Check to ensure that the directory containing our cache directory | |
235 # is owned by the user current executing pip. If it does not exist | |
236 # we will check the parent directory until we find one that does exist. | |
237 # If it is not owned by the user executing pip then we will disable | |
238 # the cache and log a warning. | |
239 if not check_path_owner(self.directory): | |
240 logger.warning( | |
241 "The directory '%s' or its parent directory is not owned by " | |
242 "the current user and the cache has been disabled. Please " | |
243 "check the permissions and owner of that directory. If " | |
244 "executing pip with sudo, you may want sudo's -H flag.", | |
245 self.directory, | |
246 ) | |
247 | |
248 # Set our directory to None to disable the Cache | |
249 self.directory = None | |
250 | |
251 def get(self, *args, **kwargs): | |
252 # If we don't have a directory, then the cache should be a no-op. | |
253 if self.directory is None: | |
254 return | |
255 | |
256 try: | |
257 return super(SafeFileCache, self).get(*args, **kwargs) | |
258 except (LockError, OSError, IOError): | |
259 # We intentionally silence this error, if we can't access the cache | |
260 # then we can just skip caching and process the request as if | |
261 # caching wasn't enabled. | |
262 pass | |
263 | |
264 def set(self, *args, **kwargs): | |
265 # If we don't have a directory, then the cache should be a no-op. | |
266 if self.directory is None: | |
267 return | |
268 | |
269 try: | |
270 return super(SafeFileCache, self).set(*args, **kwargs) | |
271 except (LockError, OSError, IOError): | |
272 # We intentionally silence this error, if we can't access the cache | |
273 # then we can just skip caching and process the request as if | |
274 # caching wasn't enabled. | |
275 pass | |
276 | |
277 def delete(self, *args, **kwargs): | |
278 # If we don't have a directory, then the cache should be a no-op. | |
279 if self.directory is None: | |
280 return | |
281 | |
282 try: | |
283 return super(SafeFileCache, self).delete(*args, **kwargs) | |
284 except (LockError, OSError, IOError): | |
285 # We intentionally silence this error, if we can't access the cache | |
286 # then we can just skip caching and process the request as if | |
287 # caching wasn't enabled. | |
288 pass | |
289 | |
290 | |
291 class InsecureHTTPAdapter(HTTPAdapter): | |
292 | |
293 def cert_verify(self, conn, url, verify, cert): | |
294 conn.cert_reqs = 'CERT_NONE' | |
295 conn.ca_certs = None | |
296 | |
297 | |
298 class PipSession(requests.Session): | |
299 | |
300 timeout = None | |
301 | |
302 def __init__(self, *args, **kwargs): | |
303 retries = kwargs.pop("retries", 0) | |
304 cache = kwargs.pop("cache", None) | |
305 insecure_hosts = kwargs.pop("insecure_hosts", []) | |
306 | |
307 super(PipSession, self).__init__(*args, **kwargs) | |
308 | |
309 # Attach our User Agent to the request | |
310 self.headers["User-Agent"] = user_agent() | |
311 | |
312 # Attach our Authentication handler to the session | |
313 self.auth = MultiDomainBasicAuth() | |
314 | |
315 # Create our urllib3.Retry instance which will allow us to customize | |
316 # how we handle retries. | |
317 retries = urllib3.Retry( | |
318 # Set the total number of retries that a particular request can | |
319 # have. | |
320 total=retries, | |
321 | |
322 # A 503 error from PyPI typically means that the Fastly -> Origin | |
323 # connection got interupted in some way. A 503 error in general | |
324 # is typically considered a transient error so we'll go ahead and | |
325 # retry it. | |
326 status_forcelist=[503], | |
327 | |
328 # Add a small amount of back off between failed requests in | |
329 # order to prevent hammering the service. | |
330 backoff_factor=0.25, | |
331 ) | |
332 | |
333 # We want to _only_ cache responses on securely fetched origins. We do | |
334 # this because we can't validate the response of an insecurely fetched | |
335 # origin, and we don't want someone to be able to poison the cache and | |
336 # require manual evication from the cache to fix it. | |
337 if cache: | |
338 secure_adapter = CacheControlAdapter( | |
339 cache=SafeFileCache(cache), | |
340 max_retries=retries, | |
341 ) | |
342 else: | |
343 secure_adapter = HTTPAdapter(max_retries=retries) | |
344 | |
345 # Our Insecure HTTPAdapter disables HTTPS validation. It does not | |
346 # support caching (see above) so we'll use it for all http:// URLs as | |
347 # well as any https:// host that we've marked as ignoring TLS errors | |
348 # for. | |
349 insecure_adapter = InsecureHTTPAdapter(max_retries=retries) | |
350 | |
351 self.mount("https://", secure_adapter) | |
352 self.mount("http://", insecure_adapter) | |
353 | |
354 # Enable file:// urls | |
355 self.mount("file://", LocalFSAdapter()) | |
356 | |
357 # We want to use a non-validating adapter for any requests which are | |
358 # deemed insecure. | |
359 for host in insecure_hosts: | |
360 self.mount("https://{0}/".format(host), insecure_adapter) | |
361 | |
362 def request(self, method, url, *args, **kwargs): | |
363 # Allow setting a default timeout on a session | |
364 kwargs.setdefault("timeout", self.timeout) | |
365 | |
366 # Dispatch the actual request | |
367 return super(PipSession, self).request(method, url, *args, **kwargs) | |
368 | |
369 | |
370 def get_file_content(url, comes_from=None, session=None): | |
371 """Gets the content of a file; it may be a filename, file: URL, or | |
372 http: URL. Returns (location, content). Content is unicode.""" | |
373 if session is None: | |
374 raise TypeError( | |
375 "get_file_content() missing 1 required keyword argument: 'session'" | |
376 ) | |
377 | |
378 match = _scheme_re.search(url) | |
379 if match: | |
380 scheme = match.group(1).lower() | |
381 if (scheme == 'file' and comes_from and | |
382 comes_from.startswith('http')): | |
383 raise InstallationError( | |
384 'Requirements file %s references URL %s, which is local' | |
385 % (comes_from, url)) | |
386 if scheme == 'file': | |
387 path = url.split(':', 1)[1] | |
388 path = path.replace('\\', '/') | |
389 match = _url_slash_drive_re.match(path) | |
390 if match: | |
391 path = match.group(1) + ':' + path.split('|', 1)[1] | |
392 path = urllib_parse.unquote(path) | |
393 if path.startswith('/'): | |
394 path = '/' + path.lstrip('/') | |
395 url = path | |
396 else: | |
397 # FIXME: catch some errors | |
398 resp = session.get(url) | |
399 resp.raise_for_status() | |
400 | |
401 if six.PY3: | |
402 return resp.url, resp.text | |
403 else: | |
404 return resp.url, resp.content | |
405 try: | |
406 with open(url) as f: | |
407 content = f.read() | |
408 except IOError as exc: | |
409 raise InstallationError( | |
410 'Could not open requirements file: %s' % str(exc) | |
411 ) | |
412 return url, content | |
413 | |
414 | |
415 _scheme_re = re.compile(r'^(http|https|file):', re.I) | |
416 _url_slash_drive_re = re.compile(r'/*([a-z])\|', re.I) | |
417 | |
418 | |
419 def is_url(name): | |
420 """Returns true if the name looks like a URL""" | |
421 if ':' not in name: | |
422 return False | |
423 scheme = name.split(':', 1)[0].lower() | |
424 return scheme in ['http', 'https', 'file', 'ftp'] + vcs.all_schemes | |
425 | |
426 | |
427 def url_to_path(url): | |
428 """ | |
429 Convert a file: URL to a path. | |
430 """ | |
431 assert url.startswith('file:'), ( | |
432 "You can only turn file: urls into filenames (not %r)" % url) | |
433 | |
434 _, netloc, path, _, _ = urllib_parse.urlsplit(url) | |
435 | |
436 # if we have a UNC path, prepend UNC share notation | |
437 if netloc: | |
438 netloc = '\\\\' + netloc | |
439 | |
440 path = urllib_request.url2pathname(netloc + path) | |
441 return path | |
442 | |
443 | |
444 def path_to_url(path): | |
445 """ | |
446 Convert a path to a file: URL. The path will be made absolute and have | |
447 quoted path parts. | |
448 """ | |
449 path = os.path.normpath(os.path.abspath(path)) | |
450 url = urllib_parse.urljoin('file:', urllib_request.pathname2url(path)) | |
451 return url | |
452 | |
453 | |
454 def is_archive_file(name): | |
455 """Return True if `name` is a considered as an archive file.""" | |
456 archives = ( | |
457 '.zip', '.tar.gz', '.tar.bz2', '.tgz', '.tar', '.whl' | |
458 ) | |
459 ext = splitext(name)[1].lower() | |
460 if ext in archives: | |
461 return True | |
462 return False | |
463 | |
464 | |
465 def unpack_vcs_link(link, location, only_download=False): | |
466 vcs_backend = _get_used_vcs_backend(link) | |
467 if only_download: | |
468 vcs_backend.export(location) | |
469 else: | |
470 vcs_backend.unpack(location) | |
471 | |
472 | |
473 def _get_used_vcs_backend(link): | |
474 for backend in vcs.backends: | |
475 if link.scheme in backend.schemes: | |
476 vcs_backend = backend(link.url) | |
477 return vcs_backend | |
478 | |
479 | |
480 def is_vcs_url(link): | |
481 return bool(_get_used_vcs_backend(link)) | |
482 | |
483 | |
484 def is_file_url(link): | |
485 return link.url.lower().startswith('file:') | |
486 | |
487 | |
488 def _check_hash(download_hash, link): | |
489 if download_hash.digest_size != hashlib.new(link.hash_name).digest_size: | |
490 logger.critical( | |
491 "Hash digest size of the package %d (%s) doesn't match the " | |
492 "expected hash name %s!", | |
493 download_hash.digest_size, link, link.hash_name, | |
494 ) | |
495 raise HashMismatch('Hash name mismatch for package %s' % link) | |
496 if download_hash.hexdigest() != link.hash: | |
497 logger.critical( | |
498 "Hash of the package %s (%s) doesn't match the expected hash %s!", | |
499 link, download_hash.hexdigest(), link.hash, | |
500 ) | |
501 raise HashMismatch( | |
502 'Bad %s hash for package %s' % (link.hash_name, link) | |
503 ) | |
504 | |
505 | |
506 def _get_hash_from_file(target_file, link): | |
507 try: | |
508 download_hash = hashlib.new(link.hash_name) | |
509 except (ValueError, TypeError): | |
510 logger.warning( | |
511 "Unsupported hash name %s for package %s", link.hash_name, link, | |
512 ) | |
513 return None | |
514 | |
515 with open(target_file, 'rb') as fp: | |
516 while True: | |
517 chunk = fp.read(4096) | |
518 if not chunk: | |
519 break | |
520 download_hash.update(chunk) | |
521 return download_hash | |
522 | |
523 | |
524 def _progress_indicator(iterable, *args, **kwargs): | |
525 return iterable | |
526 | |
527 | |
528 def _download_url(resp, link, content_file): | |
529 download_hash = None | |
530 if link.hash and link.hash_name: | |
531 try: | |
532 download_hash = hashlib.new(link.hash_name) | |
533 except ValueError: | |
534 logger.warning( | |
535 "Unsupported hash name %s for package %s", | |
536 link.hash_name, link, | |
537 ) | |
538 | |
539 try: | |
540 total_length = int(resp.headers['content-length']) | |
541 except (ValueError, KeyError, TypeError): | |
542 total_length = 0 | |
543 | |
544 cached_resp = getattr(resp, "from_cache", False) | |
545 | |
546 if logger.getEffectiveLevel() > logging.INFO: | |
547 show_progress = False | |
548 elif cached_resp: | |
549 show_progress = False | |
550 elif total_length > (40 * 1000): | |
551 show_progress = True | |
552 elif not total_length: | |
553 show_progress = True | |
554 else: | |
555 show_progress = False | |
556 | |
557 show_url = link.show_url | |
558 | |
559 def resp_read(chunk_size): | |
560 try: | |
561 # Special case for urllib3. | |
562 for chunk in resp.raw.stream( | |
563 chunk_size, | |
564 # We use decode_content=False here because we do | |
565 # want urllib3 to mess with the raw bytes we get | |
566 # from the server. If we decompress inside of | |
567 # urllib3 then we cannot verify the checksum | |
568 # because the checksum will be of the compressed | |
569 # file. This breakage will only occur if the | |
570 # server adds a Content-Encoding header, which | |
571 # depends on how the server was configured: | |
572 # - Some servers will notice that the file isn't a | |
573 # compressible file and will leave the file alone | |
574 # and with an empty Content-Encoding | |
575 # - Some servers will notice that the file is | |
576 # already compressed and will leave the file | |
577 # alone and will add a Content-Encoding: gzip | |
578 # header | |
579 # - Some servers won't notice anything at all and | |
580 # will take a file that's already been compressed | |
581 # and compress it again and set the | |
582 # Content-Encoding: gzip header | |
583 # | |
584 # By setting this not to decode automatically we | |
585 # hope to eliminate problems with the second case. | |
586 decode_content=False): | |
587 yield chunk | |
588 except AttributeError: | |
589 # Standard file-like object. | |
590 while True: | |
591 chunk = resp.raw.read(chunk_size) | |
592 if not chunk: | |
593 break | |
594 yield chunk | |
595 | |
596 progress_indicator = _progress_indicator | |
597 | |
598 if link.netloc == PyPI.netloc: | |
599 url = show_url | |
600 else: | |
601 url = link.url_without_fragment | |
602 | |
603 if show_progress: # We don't show progress on cached responses | |
604 if total_length: | |
605 logger.info( | |
606 "Downloading %s (%s)", url, format_size(total_length), | |
607 ) | |
608 progress_indicator = DownloadProgressBar( | |
609 max=total_length, | |
610 ).iter | |
611 else: | |
612 logger.info("Downloading %s", url) | |
613 progress_indicator = DownloadProgressSpinner().iter | |
614 elif cached_resp: | |
615 logger.info("Using cached %s", url) | |
616 else: | |
617 logger.info("Downloading %s", url) | |
618 | |
619 logger.debug('Downloading from URL %s', link) | |
620 | |
621 for chunk in progress_indicator(resp_read(4096), 4096): | |
622 if download_hash is not None: | |
623 download_hash.update(chunk) | |
624 content_file.write(chunk) | |
625 if link.hash and link.hash_name: | |
626 _check_hash(download_hash, link) | |
627 return download_hash | |
628 | |
629 | |
630 def _copy_file(filename, location, content_type, link): | |
631 copy = True | |
632 download_location = os.path.join(location, link.filename) | |
633 if os.path.exists(download_location): | |
634 response = ask_path_exists( | |
635 'The file %s exists. (i)gnore, (w)ipe, (b)ackup ' % | |
636 display_path(download_location), ('i', 'w', 'b')) | |
637 if response == 'i': | |
638 copy = False | |
639 elif response == 'w': | |
640 logger.warning('Deleting %s', display_path(download_location)) | |
641 os.remove(download_location) | |
642 elif response == 'b': | |
643 dest_file = backup_dir(download_location) | |
644 logger.warning( | |
645 'Backing up %s to %s', | |
646 display_path(download_location), | |
647 display_path(dest_file), | |
648 ) | |
649 shutil.move(download_location, dest_file) | |
650 if copy: | |
651 shutil.copy(filename, download_location) | |
652 logger.info('Saved %s', display_path(download_location)) | |
653 | |
654 | |
655 def unpack_http_url(link, location, download_dir=None, session=None): | |
656 if session is None: | |
657 raise TypeError( | |
658 "unpack_http_url() missing 1 required keyword argument: 'session'" | |
659 ) | |
660 | |
661 temp_dir = tempfile.mkdtemp('-unpack', 'pip-') | |
662 | |
663 # If a download dir is specified, is the file already downloaded there? | |
664 already_downloaded_path = None | |
665 if download_dir: | |
666 already_downloaded_path = _check_download_dir(link, download_dir) | |
667 | |
668 if already_downloaded_path: | |
669 from_path = already_downloaded_path | |
670 content_type = mimetypes.guess_type(from_path)[0] | |
671 else: | |
672 # let's download to a tmp dir | |
673 from_path, content_type = _download_http_url(link, session, temp_dir) | |
674 | |
675 # unpack the archive to the build dir location. even when only downloading | |
676 # archives, they have to be unpacked to parse dependencies | |
677 unpack_file(from_path, location, content_type, link) | |
678 | |
679 # a download dir is specified; let's copy the archive there | |
680 if download_dir and not already_downloaded_path: | |
681 _copy_file(from_path, download_dir, content_type, link) | |
682 | |
683 if not already_downloaded_path: | |
684 os.unlink(from_path) | |
685 rmtree(temp_dir) | |
686 | |
687 | |
688 def unpack_file_url(link, location, download_dir=None): | |
689 """Unpack link into location. | |
690 If download_dir is provided and link points to a file, make a copy | |
691 of the link file inside download_dir.""" | |
692 | |
693 link_path = url_to_path(link.url_without_fragment) | |
694 | |
695 # If it's a url to a local directory | |
696 if os.path.isdir(link_path): | |
697 if os.path.isdir(location): | |
698 rmtree(location) | |
699 shutil.copytree(link_path, location, symlinks=True) | |
700 if download_dir: | |
701 logger.info('Link is a directory, ignoring download_dir') | |
702 return | |
703 | |
704 # if link has a hash, let's confirm it matches | |
705 if link.hash: | |
706 link_path_hash = _get_hash_from_file(link_path, link) | |
707 _check_hash(link_path_hash, link) | |
708 | |
709 # If a download dir is specified, is the file already there and valid? | |
710 already_downloaded_path = None | |
711 if download_dir: | |
712 already_downloaded_path = _check_download_dir(link, download_dir) | |
713 | |
714 if already_downloaded_path: | |
715 from_path = already_downloaded_path | |
716 else: | |
717 from_path = link_path | |
718 | |
719 content_type = mimetypes.guess_type(from_path)[0] | |
720 | |
721 # unpack the archive to the build dir location. even when only downloading | |
722 # archives, they have to be unpacked to parse dependencies | |
723 unpack_file(from_path, location, content_type, link) | |
724 | |
725 # a download dir is specified and not already downloaded | |
726 if download_dir and not already_downloaded_path: | |
727 _copy_file(from_path, download_dir, content_type, link) | |
728 | |
729 | |
730 def _copy_dist_from_dir(link_path, location): | |
731 """Copy distribution files in `link_path` to `location`. | |
732 | |
733 Invoked when user requests to install a local directory. E.g.: | |
734 | |
735 pip install . | |
736 pip install ~/dev/git-repos/python-prompt-toolkit | |
737 | |
738 """ | |
739 | |
740 # Note: This is currently VERY SLOW if you have a lot of data in the | |
741 # directory, because it copies everything with `shutil.copytree`. | |
742 # What it should really do is build an sdist and install that. | |
743 # See https://github.com/pypa/pip/issues/2195 | |
744 | |
745 if os.path.isdir(location): | |
746 rmtree(location) | |
747 | |
748 # build an sdist | |
749 setup_py = 'setup.py' | |
750 sdist_args = [sys.executable] | |
751 sdist_args.append('-c') | |
752 sdist_args.append( | |
753 "import setuptools, tokenize;__file__=%r;" | |
754 "exec(compile(getattr(tokenize, 'open', open)(__file__).read()" | |
755 ".replace('\\r\\n', '\\n'), __file__, 'exec'))" % setup_py) | |
756 sdist_args.append('sdist') | |
757 sdist_args += ['--dist-dir', location] | |
758 logger.info('Running setup.py sdist for %s', link_path) | |
759 | |
760 with indent_log(): | |
761 call_subprocess(sdist_args, cwd=link_path, show_stdout=False) | |
762 | |
763 # unpack sdist into `location` | |
764 sdist = os.path.join(location, os.listdir(location)[0]) | |
765 logger.info('Unpacking sdist %s into %s', sdist, location) | |
766 unpack_file(sdist, location, content_type=None, link=None) | |
767 | |
768 | |
769 class PipXmlrpcTransport(xmlrpc_client.Transport): | |
770 """Provide a `xmlrpclib.Transport` implementation via a `PipSession` | |
771 object. | |
772 """ | |
773 def __init__(self, index_url, session, use_datetime=False): | |
774 xmlrpc_client.Transport.__init__(self, use_datetime) | |
775 index_parts = urllib_parse.urlparse(index_url) | |
776 self._scheme = index_parts.scheme | |
777 self._session = session | |
778 | |
779 def request(self, host, handler, request_body, verbose=False): | |
780 parts = (self._scheme, host, handler, None, None, None) | |
781 url = urllib_parse.urlunparse(parts) | |
782 try: | |
783 headers = {'Content-Type': 'text/xml'} | |
784 response = self._session.post(url, data=request_body, | |
785 headers=headers, stream=True) | |
786 response.raise_for_status() | |
787 self.verbose = verbose | |
788 return self.parse_response(response.raw) | |
789 except requests.HTTPError as exc: | |
790 logger.critical( | |
791 "HTTP error %s while getting %s", | |
792 exc.response.status_code, url, | |
793 ) | |
794 raise | |
795 | |
796 | |
797 def unpack_url(link, location, download_dir=None, | |
798 only_download=False, session=None): | |
799 """Unpack link. | |
800 If link is a VCS link: | |
801 if only_download, export into download_dir and ignore location | |
802 else unpack into location | |
803 for other types of link: | |
804 - unpack into location | |
805 - if download_dir, copy the file into download_dir | |
806 - if only_download, mark location for deletion | |
807 """ | |
808 # non-editable vcs urls | |
809 if is_vcs_url(link): | |
810 unpack_vcs_link(link, location, only_download) | |
811 | |
812 # file urls | |
813 elif is_file_url(link): | |
814 unpack_file_url(link, location, download_dir) | |
815 if only_download: | |
816 write_delete_marker_file(location) | |
817 | |
818 # http urls | |
819 else: | |
820 if session is None: | |
821 session = PipSession() | |
822 | |
823 unpack_http_url( | |
824 link, | |
825 location, | |
826 download_dir, | |
827 session, | |
828 ) | |
829 if only_download: | |
830 write_delete_marker_file(location) | |
831 | |
832 | |
833 def _download_http_url(link, session, temp_dir): | |
834 """Download link url into temp_dir using provided session""" | |
835 target_url = link.url.split('#', 1)[0] | |
836 try: | |
837 resp = session.get( | |
838 target_url, | |
839 # We use Accept-Encoding: identity here because requests | |
840 # defaults to accepting compressed responses. This breaks in | |
841 # a variety of ways depending on how the server is configured. | |
842 # - Some servers will notice that the file isn't a compressible | |
843 # file and will leave the file alone and with an empty | |
844 # Content-Encoding | |
845 # - Some servers will notice that the file is already | |
846 # compressed and will leave the file alone and will add a | |
847 # Content-Encoding: gzip header | |
848 # - Some servers won't notice anything at all and will take | |
849 # a file that's already been compressed and compress it again | |
850 # and set the Content-Encoding: gzip header | |
851 # By setting this to request only the identity encoding We're | |
852 # hoping to eliminate the third case. Hopefully there does not | |
853 # exist a server which when given a file will notice it is | |
854 # already compressed and that you're not asking for a | |
855 # compressed file and will then decompress it before sending | |
856 # because if that's the case I don't think it'll ever be | |
857 # possible to make this work. | |
858 headers={"Accept-Encoding": "identity"}, | |
859 stream=True, | |
860 ) | |
861 resp.raise_for_status() | |
862 except requests.HTTPError as exc: | |
863 logger.critical( | |
864 "HTTP error %s while getting %s", exc.response.status_code, link, | |
865 ) | |
866 raise | |
867 | |
868 content_type = resp.headers.get('content-type', '') | |
869 filename = link.filename # fallback | |
870 # Have a look at the Content-Disposition header for a better guess | |
871 content_disposition = resp.headers.get('content-disposition') | |
872 if content_disposition: | |
873 type, params = cgi.parse_header(content_disposition) | |
874 # We use ``or`` here because we don't want to use an "empty" value | |
875 # from the filename param. | |
876 filename = params.get('filename') or filename | |
877 ext = splitext(filename)[1] | |
878 if not ext: | |
879 ext = mimetypes.guess_extension(content_type) | |
880 if ext: | |
881 filename += ext | |
882 if not ext and link.url != resp.url: | |
883 ext = os.path.splitext(resp.url)[1] | |
884 if ext: | |
885 filename += ext | |
886 file_path = os.path.join(temp_dir, filename) | |
887 with open(file_path, 'wb') as content_file: | |
888 _download_url(resp, link, content_file) | |
889 return file_path, content_type | |
890 | |
891 | |
892 def _check_download_dir(link, download_dir): | |
893 """ Check download_dir for previously downloaded file with correct hash | |
894 If a correct file is found return its path else None | |
895 """ | |
896 download_path = os.path.join(download_dir, link.filename) | |
897 if os.path.exists(download_path): | |
898 # If already downloaded, does its hash match? | |
899 logger.info('File was already downloaded %s', download_path) | |
900 if link.hash: | |
901 download_hash = _get_hash_from_file(download_path, link) | |
902 try: | |
903 _check_hash(download_hash, link) | |
904 except HashMismatch: | |
905 logger.warning( | |
906 'Previously-downloaded file %s has bad hash, ' | |
907 're-downloading.', | |
908 download_path | |
909 ) | |
910 os.unlink(download_path) | |
911 return None | |
912 return download_path | |
913 return None |