Mercurial > repos > bcclaywell > argo_navis
comparison venv/lib/python2.7/site-packages/pip/index.py @ 0:d67268158946 draft
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
author | bcclaywell |
---|---|
date | Mon, 12 Oct 2015 17:43:33 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:d67268158946 |
---|---|
1 """Routines related to PyPI, indexes""" | |
2 from __future__ import absolute_import | |
3 | |
4 import logging | |
5 import cgi | |
6 import sys | |
7 import os | |
8 import re | |
9 import mimetypes | |
10 import posixpath | |
11 import warnings | |
12 | |
13 from pip._vendor.six.moves.urllib import parse as urllib_parse | |
14 from pip._vendor.six.moves.urllib import request as urllib_request | |
15 | |
16 from pip.compat import ipaddress | |
17 from pip.utils import ( | |
18 Inf, cached_property, normalize_name, splitext, normalize_path) | |
19 from pip.utils.deprecation import RemovedInPip7Warning, RemovedInPip8Warning | |
20 from pip.utils.logging import indent_log | |
21 from pip.exceptions import ( | |
22 DistributionNotFound, BestVersionAlreadyInstalled, InvalidWheelFilename, | |
23 UnsupportedWheel, | |
24 ) | |
25 from pip.download import url_to_path, path_to_url | |
26 from pip.models import PyPI | |
27 from pip.wheel import Wheel, wheel_ext | |
28 from pip.pep425tags import supported_tags, supported_tags_noarch, get_platform | |
29 from pip.req.req_requirement import InstallationCandidate | |
30 from pip._vendor import html5lib, requests, pkg_resources, six | |
31 from pip._vendor.packaging.version import parse as parse_version | |
32 from pip._vendor.requests.exceptions import SSLError | |
33 | |
34 | |
35 __all__ = ['PackageFinder'] | |
36 | |
37 | |
38 # Taken from Chrome's list of secure origins (See: http://bit.ly/1qrySKC) | |
39 SECURE_ORIGINS = [ | |
40 # protocol, hostname, port | |
41 ("https", "*", "*"), | |
42 ("*", "localhost", "*"), | |
43 ("*", "127.0.0.0/8", "*"), | |
44 ("*", "::1/128", "*"), | |
45 ("file", "*", None), | |
46 ] | |
47 | |
48 | |
49 logger = logging.getLogger(__name__) | |
50 | |
51 | |
52 class PackageFinder(object): | |
53 """This finds packages. | |
54 | |
55 This is meant to match easy_install's technique for looking for | |
56 packages, by reading pages and looking for appropriate links | |
57 """ | |
58 | |
59 def __init__(self, find_links, index_urls, | |
60 use_wheel=True, allow_external=(), allow_unverified=(), | |
61 allow_all_external=False, allow_all_prereleases=False, | |
62 trusted_hosts=None, process_dependency_links=False, | |
63 session=None): | |
64 if session is None: | |
65 raise TypeError( | |
66 "PackageFinder() missing 1 required keyword argument: " | |
67 "'session'" | |
68 ) | |
69 | |
70 # Build find_links. If an argument starts with ~, it may be | |
71 # a local file relative to a home directory. So try normalizing | |
72 # it and if it exists, use the normalized version. | |
73 # This is deliberately conservative - it might be fine just to | |
74 # blindly normalize anything starting with a ~... | |
75 self.find_links = [] | |
76 for link in find_links: | |
77 if link.startswith('~'): | |
78 new_link = normalize_path(link) | |
79 if os.path.exists(new_link): | |
80 link = new_link | |
81 self.find_links.append(link) | |
82 | |
83 self.index_urls = index_urls | |
84 self.dependency_links = [] | |
85 | |
86 # These are boring links that have already been logged somehow: | |
87 self.logged_links = set() | |
88 | |
89 self.use_wheel = use_wheel | |
90 | |
91 # Do we allow (safe and verifiable) externally hosted files? | |
92 self.allow_external = set(normalize_name(n) for n in allow_external) | |
93 | |
94 # Which names are allowed to install insecure and unverifiable files? | |
95 self.allow_unverified = set( | |
96 normalize_name(n) for n in allow_unverified | |
97 ) | |
98 | |
99 # Anything that is allowed unverified is also allowed external | |
100 self.allow_external |= self.allow_unverified | |
101 | |
102 # Do we allow all (safe and verifiable) externally hosted files? | |
103 self.allow_all_external = allow_all_external | |
104 | |
105 # Domains that we won't emit warnings for when not using HTTPS | |
106 self.secure_origins = [ | |
107 ("*", host, "*") | |
108 for host in (trusted_hosts if trusted_hosts else []) | |
109 ] | |
110 | |
111 # Stores if we ignored any external links so that we can instruct | |
112 # end users how to install them if no distributions are available | |
113 self.need_warn_external = False | |
114 | |
115 # Stores if we ignored any unsafe links so that we can instruct | |
116 # end users how to install them if no distributions are available | |
117 self.need_warn_unverified = False | |
118 | |
119 # Do we want to allow _all_ pre-releases? | |
120 self.allow_all_prereleases = allow_all_prereleases | |
121 | |
122 # Do we process dependency links? | |
123 self.process_dependency_links = process_dependency_links | |
124 | |
125 # The Session we'll use to make requests | |
126 self.session = session | |
127 | |
128 def add_dependency_links(self, links): | |
129 # # FIXME: this shouldn't be global list this, it should only | |
130 # # apply to requirements of the package that specifies the | |
131 # # dependency_links value | |
132 # # FIXME: also, we should track comes_from (i.e., use Link) | |
133 if self.process_dependency_links: | |
134 warnings.warn( | |
135 "Dependency Links processing has been deprecated and will be " | |
136 "removed in a future release.", | |
137 RemovedInPip7Warning, | |
138 ) | |
139 self.dependency_links.extend(links) | |
140 | |
141 def _sort_locations(self, locations): | |
142 """ | |
143 Sort locations into "files" (archives) and "urls", and return | |
144 a pair of lists (files,urls) | |
145 """ | |
146 files = [] | |
147 urls = [] | |
148 | |
149 # puts the url for the given file path into the appropriate list | |
150 def sort_path(path): | |
151 url = path_to_url(path) | |
152 if mimetypes.guess_type(url, strict=False)[0] == 'text/html': | |
153 urls.append(url) | |
154 else: | |
155 files.append(url) | |
156 | |
157 for url in locations: | |
158 | |
159 is_local_path = os.path.exists(url) | |
160 is_file_url = url.startswith('file:') | |
161 is_find_link = url in self.find_links | |
162 | |
163 if is_local_path or is_file_url: | |
164 if is_local_path: | |
165 path = url | |
166 else: | |
167 path = url_to_path(url) | |
168 if is_find_link and os.path.isdir(path): | |
169 path = os.path.realpath(path) | |
170 for item in os.listdir(path): | |
171 sort_path(os.path.join(path, item)) | |
172 elif is_file_url and os.path.isdir(path): | |
173 urls.append(url) | |
174 elif os.path.isfile(path): | |
175 sort_path(path) | |
176 else: | |
177 urls.append(url) | |
178 | |
179 return files, urls | |
180 | |
181 def _candidate_sort_key(self, candidate): | |
182 """ | |
183 Function used to generate link sort key for link tuples. | |
184 The greater the return value, the more preferred it is. | |
185 If not finding wheels, then sorted by version only. | |
186 If finding wheels, then the sort order is by version, then: | |
187 1. existing installs | |
188 2. wheels ordered via Wheel.support_index_min() | |
189 3. source archives | |
190 Note: it was considered to embed this logic into the Link | |
191 comparison operators, but then different sdist links | |
192 with the same version, would have to be considered equal | |
193 """ | |
194 if self.use_wheel: | |
195 support_num = len(supported_tags) | |
196 if candidate.location == INSTALLED_VERSION: | |
197 pri = 1 | |
198 elif candidate.location.is_wheel: | |
199 # can raise InvalidWheelFilename | |
200 wheel = Wheel(candidate.location.filename) | |
201 if not wheel.supported(): | |
202 raise UnsupportedWheel( | |
203 "%s is not a supported wheel for this platform. It " | |
204 "can't be sorted." % wheel.filename | |
205 ) | |
206 pri = -(wheel.support_index_min()) | |
207 else: # sdist | |
208 pri = -(support_num) | |
209 return (candidate.version, pri) | |
210 else: | |
211 return candidate.version | |
212 | |
213 def _sort_versions(self, applicable_versions): | |
214 """ | |
215 Bring the latest version (and wheels) to the front, but maintain the | |
216 existing ordering as secondary. See the docstring for `_link_sort_key` | |
217 for details. This function is isolated for easier unit testing. | |
218 """ | |
219 return sorted( | |
220 applicable_versions, | |
221 key=self._candidate_sort_key, | |
222 reverse=True | |
223 ) | |
224 | |
225 def _validate_secure_origin(self, logger, location): | |
226 # Determine if this url used a secure transport mechanism | |
227 parsed = urllib_parse.urlparse(str(location)) | |
228 origin = (parsed.scheme, parsed.hostname, parsed.port) | |
229 | |
230 # Determine if our origin is a secure origin by looking through our | |
231 # hardcoded list of secure origins, as well as any additional ones | |
232 # configured on this PackageFinder instance. | |
233 for secure_origin in (SECURE_ORIGINS + self.secure_origins): | |
234 # Check to see if the protocol matches | |
235 if origin[0] != secure_origin[0] and secure_origin[0] != "*": | |
236 continue | |
237 | |
238 try: | |
239 # We need to do this decode dance to ensure that we have a | |
240 # unicode object, even on Python 2.x. | |
241 addr = ipaddress.ip_address( | |
242 origin[1] | |
243 if ( | |
244 isinstance(origin[1], six.text_type) or | |
245 origin[1] is None | |
246 ) | |
247 else origin[1].decode("utf8") | |
248 ) | |
249 network = ipaddress.ip_network( | |
250 secure_origin[1] | |
251 if isinstance(secure_origin[1], six.text_type) | |
252 else secure_origin[1].decode("utf8") | |
253 ) | |
254 except ValueError: | |
255 # We don't have both a valid address or a valid network, so | |
256 # we'll check this origin against hostnames. | |
257 if origin[1] != secure_origin[1] and secure_origin[1] != "*": | |
258 continue | |
259 else: | |
260 # We have a valid address and network, so see if the address | |
261 # is contained within the network. | |
262 if addr not in network: | |
263 continue | |
264 | |
265 # Check to see if the port patches | |
266 if (origin[2] != secure_origin[2] and | |
267 secure_origin[2] != "*" and | |
268 secure_origin[2] is not None): | |
269 continue | |
270 | |
271 # If we've gotten here, then this origin matches the current | |
272 # secure origin and we should break out of the loop and continue | |
273 # on. | |
274 break | |
275 else: | |
276 # If the loop successfully completed without a break, that means | |
277 # that the origin we are testing is not a secure origin. | |
278 logger.warning( | |
279 "This repository located at %s is not a trusted host, if " | |
280 "this repository is available via HTTPS it is recommend to " | |
281 "use HTTPS instead, otherwise you may silence this warning " | |
282 "with '--trusted-host %s'.", | |
283 parsed.hostname, | |
284 parsed.hostname, | |
285 ) | |
286 | |
287 warnings.warn( | |
288 "Implicitly allowing locations which are not hosted at a " | |
289 "secure origin is deprecated and will require the use of " | |
290 "--trusted-host in the future.", | |
291 RemovedInPip7Warning, | |
292 ) | |
293 | |
294 def _get_index_urls_locations(self, project_name): | |
295 """Returns the locations found via self.index_urls | |
296 | |
297 Checks the url_name on the main (first in the list) index and | |
298 use this url_name to produce all locations | |
299 """ | |
300 | |
301 def mkurl_pypi_url(url): | |
302 loc = posixpath.join(url, project_url_name) | |
303 # For maximum compatibility with easy_install, ensure the path | |
304 # ends in a trailing slash. Although this isn't in the spec | |
305 # (and PyPI can handle it without the slash) some other index | |
306 # implementations might break if they relied on easy_install's | |
307 # behavior. | |
308 if not loc.endswith('/'): | |
309 loc = loc + '/' | |
310 return loc | |
311 | |
312 project_url_name = urllib_parse.quote(project_name.lower()) | |
313 | |
314 if self.index_urls: | |
315 # Check that we have the url_name correctly spelled: | |
316 | |
317 # Only check main index if index URL is given | |
318 main_index_url = Link( | |
319 mkurl_pypi_url(self.index_urls[0]), | |
320 trusted=True, | |
321 ) | |
322 | |
323 page = self._get_page(main_index_url) | |
324 if page is None and PyPI.netloc not in str(main_index_url): | |
325 warnings.warn( | |
326 "Failed to find %r at %s. It is suggested to upgrade " | |
327 "your index to support normalized names as the name in " | |
328 "/simple/{name}." % (project_name, main_index_url), | |
329 RemovedInPip8Warning, | |
330 ) | |
331 | |
332 project_url_name = self._find_url_name( | |
333 Link(self.index_urls[0], trusted=True), | |
334 project_url_name, | |
335 ) or project_url_name | |
336 | |
337 if project_url_name is not None: | |
338 return [mkurl_pypi_url(url) for url in self.index_urls] | |
339 return [] | |
340 | |
341 def _find_all_versions(self, project_name): | |
342 """Find all available versions for project_name | |
343 | |
344 This checks index_urls, find_links and dependency_links | |
345 All versions found are returned | |
346 | |
347 See _link_package_versions for details on which files are accepted | |
348 """ | |
349 index_locations = self._get_index_urls_locations(project_name) | |
350 file_locations, url_locations = self._sort_locations(index_locations) | |
351 fl_file_loc, fl_url_loc = self._sort_locations(self.find_links) | |
352 file_locations.extend(fl_file_loc) | |
353 url_locations.extend(fl_url_loc) | |
354 | |
355 _flocations, _ulocations = self._sort_locations(self.dependency_links) | |
356 file_locations.extend(_flocations) | |
357 | |
358 # We trust every url that the user has given us whether it was given | |
359 # via --index-url or --find-links | |
360 locations = [Link(url, trusted=True) for url in url_locations] | |
361 | |
362 # We explicitly do not trust links that came from dependency_links | |
363 locations.extend([Link(url) for url in _ulocations]) | |
364 | |
365 logger.debug('%d location(s) to search for versions of %s:', | |
366 len(locations), project_name) | |
367 for location in locations: | |
368 logger.debug('* %s', location) | |
369 self._validate_secure_origin(logger, location) | |
370 | |
371 find_links_versions = list(self._package_versions( | |
372 # We trust every directly linked archive in find_links | |
373 (Link(url, '-f', trusted=True) for url in self.find_links), | |
374 project_name.lower() | |
375 )) | |
376 | |
377 page_versions = [] | |
378 for page in self._get_pages(locations, project_name): | |
379 logger.debug('Analyzing links from page %s', page.url) | |
380 with indent_log(): | |
381 page_versions.extend( | |
382 self._package_versions(page.links, project_name.lower()) | |
383 ) | |
384 | |
385 dependency_versions = list(self._package_versions( | |
386 (Link(url) for url in self.dependency_links), project_name.lower() | |
387 )) | |
388 if dependency_versions: | |
389 logger.debug( | |
390 'dependency_links found: %s', | |
391 ', '.join([ | |
392 version.location.url for version in dependency_versions | |
393 ]) | |
394 ) | |
395 | |
396 file_versions = list( | |
397 self._package_versions( | |
398 (Link(url) for url in file_locations), | |
399 project_name.lower() | |
400 ) | |
401 ) | |
402 if file_versions: | |
403 file_versions.sort(reverse=True) | |
404 logger.debug( | |
405 'Local files found: %s', | |
406 ', '.join([ | |
407 url_to_path(candidate.location.url) | |
408 for candidate in file_versions | |
409 ]) | |
410 ) | |
411 | |
412 # This is an intentional priority ordering | |
413 return ( | |
414 file_versions + find_links_versions + page_versions + | |
415 dependency_versions | |
416 ) | |
417 | |
418 def find_requirement(self, req, upgrade): | |
419 """Try to find an InstallationCandidate for req | |
420 | |
421 Expects req, an InstallRequirement and upgrade, a boolean | |
422 Returns an InstallationCandidate or None | |
423 May raise DistributionNotFound or BestVersionAlreadyInstalled | |
424 """ | |
425 all_versions = self._find_all_versions(req.name) | |
426 # Filter out anything which doesn't match our specifier | |
427 | |
428 _versions = set( | |
429 req.specifier.filter( | |
430 [x.version for x in all_versions], | |
431 prereleases=( | |
432 self.allow_all_prereleases | |
433 if self.allow_all_prereleases else None | |
434 ), | |
435 ) | |
436 ) | |
437 applicable_versions = [ | |
438 x for x in all_versions if x.version in _versions | |
439 ] | |
440 | |
441 if req.satisfied_by is not None: | |
442 # Finally add our existing versions to the front of our versions. | |
443 applicable_versions.insert( | |
444 0, | |
445 InstallationCandidate( | |
446 req.name, | |
447 req.satisfied_by.version, | |
448 INSTALLED_VERSION, | |
449 ) | |
450 ) | |
451 existing_applicable = True | |
452 else: | |
453 existing_applicable = False | |
454 | |
455 applicable_versions = self._sort_versions(applicable_versions) | |
456 | |
457 if not upgrade and existing_applicable: | |
458 if applicable_versions[0].location is INSTALLED_VERSION: | |
459 logger.debug( | |
460 'Existing installed version (%s) is most up-to-date and ' | |
461 'satisfies requirement', | |
462 req.satisfied_by.version, | |
463 ) | |
464 else: | |
465 logger.debug( | |
466 'Existing installed version (%s) satisfies requirement ' | |
467 '(most up-to-date version is %s)', | |
468 req.satisfied_by.version, | |
469 applicable_versions[0][2], | |
470 ) | |
471 return None | |
472 | |
473 if not applicable_versions: | |
474 logger.critical( | |
475 'Could not find a version that satisfies the requirement %s ' | |
476 '(from versions: %s)', | |
477 req, | |
478 ', '.join( | |
479 sorted( | |
480 set(str(i.version) for i in all_versions), | |
481 key=parse_version, | |
482 ) | |
483 ) | |
484 ) | |
485 | |
486 if self.need_warn_external: | |
487 logger.warning( | |
488 "Some externally hosted files were ignored as access to " | |
489 "them may be unreliable (use --allow-external %s to " | |
490 "allow).", | |
491 req.name, | |
492 ) | |
493 | |
494 if self.need_warn_unverified: | |
495 logger.warning( | |
496 "Some insecure and unverifiable files were ignored" | |
497 " (use --allow-unverified %s to allow).", | |
498 req.name, | |
499 ) | |
500 | |
501 raise DistributionNotFound( | |
502 'No matching distribution found for %s' % req | |
503 ) | |
504 | |
505 if applicable_versions[0].location is INSTALLED_VERSION: | |
506 # We have an existing version, and its the best version | |
507 logger.debug( | |
508 'Installed version (%s) is most up-to-date (past versions: ' | |
509 '%s)', | |
510 req.satisfied_by.version, | |
511 ', '.join(str(i.version) for i in applicable_versions[1:]) or | |
512 "none", | |
513 ) | |
514 raise BestVersionAlreadyInstalled | |
515 | |
516 if len(applicable_versions) > 1: | |
517 logger.debug( | |
518 'Using version %s (newest of versions: %s)', | |
519 applicable_versions[0].version, | |
520 ', '.join(str(i.version) for i in applicable_versions) | |
521 ) | |
522 | |
523 selected_version = applicable_versions[0].location | |
524 | |
525 if (selected_version.verifiable is not None and not | |
526 selected_version.verifiable): | |
527 logger.warning( | |
528 "%s is potentially insecure and unverifiable.", req.name, | |
529 ) | |
530 | |
531 if selected_version._deprecated_regex: | |
532 warnings.warn( | |
533 "%s discovered using a deprecated method of parsing, in the " | |
534 "future it will no longer be discovered." % req.name, | |
535 RemovedInPip7Warning, | |
536 ) | |
537 | |
538 return selected_version | |
539 | |
540 def _find_url_name(self, index_url, url_name): | |
541 """ | |
542 Finds the true URL name of a package, when the given name isn't quite | |
543 correct. | |
544 This is usually used to implement case-insensitivity. | |
545 """ | |
546 if not index_url.url.endswith('/'): | |
547 # Vaguely part of the PyPI API... weird but true. | |
548 # FIXME: bad to modify this? | |
549 index_url.url += '/' | |
550 page = self._get_page(index_url) | |
551 if page is None: | |
552 logger.critical('Cannot fetch index base URL %s', index_url) | |
553 return | |
554 norm_name = normalize_name(url_name) | |
555 for link in page.links: | |
556 base = posixpath.basename(link.path.rstrip('/')) | |
557 if norm_name == normalize_name(base): | |
558 logger.debug( | |
559 'Real name of requirement %s is %s', url_name, base, | |
560 ) | |
561 return base | |
562 return None | |
563 | |
564 def _get_pages(self, locations, project_name): | |
565 """ | |
566 Yields (page, page_url) from the given locations, skipping | |
567 locations that have errors, and adding download/homepage links | |
568 """ | |
569 all_locations = list(locations) | |
570 seen = set() | |
571 normalized = normalize_name(project_name) | |
572 | |
573 while all_locations: | |
574 location = all_locations.pop(0) | |
575 if location in seen: | |
576 continue | |
577 seen.add(location) | |
578 | |
579 page = self._get_page(location) | |
580 if page is None: | |
581 continue | |
582 | |
583 yield page | |
584 | |
585 for link in page.rel_links(): | |
586 | |
587 if (normalized not in self.allow_external and not | |
588 self.allow_all_external): | |
589 self.need_warn_external = True | |
590 logger.debug( | |
591 "Not searching %s for files because external " | |
592 "urls are disallowed.", | |
593 link, | |
594 ) | |
595 continue | |
596 | |
597 if (link.trusted is not None and not | |
598 link.trusted and | |
599 normalized not in self.allow_unverified): | |
600 logger.debug( | |
601 "Not searching %s for urls, it is an " | |
602 "untrusted link and cannot produce safe or " | |
603 "verifiable files.", | |
604 link, | |
605 ) | |
606 self.need_warn_unverified = True | |
607 continue | |
608 | |
609 all_locations.append(link) | |
610 | |
611 _egg_fragment_re = re.compile(r'#egg=([^&]*)') | |
612 _egg_info_re = re.compile(r'([a-z0-9_.]+)-([a-z0-9_.!+-]+)', re.I) | |
613 _py_version_re = re.compile(r'-py([123]\.?[0-9]?)$') | |
614 | |
615 def _sort_links(self, links): | |
616 """ | |
617 Returns elements of links in order, non-egg links first, egg links | |
618 second, while eliminating duplicates | |
619 """ | |
620 eggs, no_eggs = [], [] | |
621 seen = set() | |
622 for link in links: | |
623 if link not in seen: | |
624 seen.add(link) | |
625 if link.egg_fragment: | |
626 eggs.append(link) | |
627 else: | |
628 no_eggs.append(link) | |
629 return no_eggs + eggs | |
630 | |
631 def _package_versions(self, links, search_name): | |
632 for link in self._sort_links(links): | |
633 v = self._link_package_versions(link, search_name) | |
634 if v is not None: | |
635 yield v | |
636 | |
637 def _known_extensions(self): | |
638 extensions = ('.tar.gz', '.tar.bz2', '.tar', '.tgz', '.zip') | |
639 if self.use_wheel: | |
640 return extensions + (wheel_ext,) | |
641 return extensions | |
642 | |
643 def _link_package_versions(self, link, search_name): | |
644 """Return an InstallationCandidate or None""" | |
645 platform = get_platform() | |
646 | |
647 version = None | |
648 if link.egg_fragment: | |
649 egg_info = link.egg_fragment | |
650 else: | |
651 egg_info, ext = link.splitext() | |
652 if not ext: | |
653 if link not in self.logged_links: | |
654 logger.debug('Skipping link %s; not a file', link) | |
655 self.logged_links.add(link) | |
656 return | |
657 if egg_info.endswith('.tar'): | |
658 # Special double-extension case: | |
659 egg_info = egg_info[:-4] | |
660 ext = '.tar' + ext | |
661 if ext not in self._known_extensions(): | |
662 if link not in self.logged_links: | |
663 logger.debug( | |
664 'Skipping link %s; unknown archive format: %s', | |
665 link, | |
666 ext, | |
667 ) | |
668 self.logged_links.add(link) | |
669 return | |
670 if "macosx10" in link.path and ext == '.zip': | |
671 if link not in self.logged_links: | |
672 logger.debug('Skipping link %s; macosx10 one', link) | |
673 self.logged_links.add(link) | |
674 return | |
675 if ext == wheel_ext: | |
676 try: | |
677 wheel = Wheel(link.filename) | |
678 except InvalidWheelFilename: | |
679 logger.debug( | |
680 'Skipping %s because the wheel filename is invalid', | |
681 link | |
682 ) | |
683 return | |
684 if (pkg_resources.safe_name(wheel.name).lower() != | |
685 pkg_resources.safe_name(search_name).lower()): | |
686 logger.debug( | |
687 'Skipping link %s; wrong project name (not %s)', | |
688 link, | |
689 search_name, | |
690 ) | |
691 return | |
692 if not wheel.supported(): | |
693 logger.debug( | |
694 'Skipping %s because it is not compatible with this ' | |
695 'Python', | |
696 link, | |
697 ) | |
698 return | |
699 # This is a dirty hack to prevent installing Binary Wheels from | |
700 # PyPI unless it is a Windows or Mac Binary Wheel. This is | |
701 # paired with a change to PyPI disabling uploads for the | |
702 # same. Once we have a mechanism for enabling support for | |
703 # binary wheels on linux that deals with the inherent problems | |
704 # of binary distribution this can be removed. | |
705 comes_from = getattr(link, "comes_from", None) | |
706 if ( | |
707 ( | |
708 not platform.startswith('win') and not | |
709 platform.startswith('macosx') and not | |
710 platform == 'cli' | |
711 ) and | |
712 comes_from is not None and | |
713 urllib_parse.urlparse( | |
714 comes_from.url | |
715 ).netloc.endswith(PyPI.netloc)): | |
716 if not wheel.supported(tags=supported_tags_noarch): | |
717 logger.debug( | |
718 "Skipping %s because it is a pypi-hosted binary " | |
719 "Wheel on an unsupported platform", | |
720 link, | |
721 ) | |
722 return | |
723 version = wheel.version | |
724 | |
725 if not version: | |
726 version = self._egg_info_matches(egg_info, search_name, link) | |
727 if version is None: | |
728 logger.debug( | |
729 'Skipping link %s; wrong project name (not %s)', | |
730 link, | |
731 search_name, | |
732 ) | |
733 return | |
734 | |
735 if (link.internal is not None and not | |
736 link.internal and not | |
737 normalize_name(search_name).lower() | |
738 in self.allow_external and not | |
739 self.allow_all_external): | |
740 # We have a link that we are sure is external, so we should skip | |
741 # it unless we are allowing externals | |
742 logger.debug("Skipping %s because it is externally hosted.", link) | |
743 self.need_warn_external = True | |
744 return | |
745 | |
746 if (link.verifiable is not None and not | |
747 link.verifiable and not | |
748 (normalize_name(search_name).lower() | |
749 in self.allow_unverified)): | |
750 # We have a link that we are sure we cannot verify its integrity, | |
751 # so we should skip it unless we are allowing unsafe installs | |
752 # for this requirement. | |
753 logger.debug( | |
754 "Skipping %s because it is an insecure and unverifiable file.", | |
755 link, | |
756 ) | |
757 self.need_warn_unverified = True | |
758 return | |
759 | |
760 match = self._py_version_re.search(version) | |
761 if match: | |
762 version = version[:match.start()] | |
763 py_version = match.group(1) | |
764 if py_version != sys.version[:3]: | |
765 logger.debug( | |
766 'Skipping %s because Python version is incorrect', link | |
767 ) | |
768 return | |
769 logger.debug('Found link %s, version: %s', link, version) | |
770 | |
771 return InstallationCandidate(search_name, version, link) | |
772 | |
773 def _egg_info_matches(self, egg_info, search_name, link): | |
774 match = self._egg_info_re.search(egg_info) | |
775 if not match: | |
776 logger.debug('Could not parse version from link: %s', link) | |
777 return None | |
778 name = match.group(0).lower() | |
779 # To match the "safe" name that pkg_resources creates: | |
780 name = name.replace('_', '-') | |
781 # project name and version must be separated by a dash | |
782 look_for = search_name.lower() + "-" | |
783 if name.startswith(look_for): | |
784 return match.group(0)[len(look_for):] | |
785 else: | |
786 return None | |
787 | |
788 def _get_page(self, link): | |
789 return HTMLPage.get_page(link, session=self.session) | |
790 | |
791 | |
792 class HTMLPage(object): | |
793 """Represents one page, along with its URL""" | |
794 | |
795 # FIXME: these regexes are horrible hacks: | |
796 _homepage_re = re.compile(b'<th>\\s*home\\s*page', re.I) | |
797 _download_re = re.compile(b'<th>\\s*download\\s+url', re.I) | |
798 _href_re = re.compile( | |
799 b'href=(?:"([^"]*)"|\'([^\']*)\'|([^>\\s\\n]*))', | |
800 re.I | re.S | |
801 ) | |
802 | |
803 def __init__(self, content, url, headers=None, trusted=None): | |
804 # Determine if we have any encoding information in our headers | |
805 encoding = None | |
806 if headers and "Content-Type" in headers: | |
807 content_type, params = cgi.parse_header(headers["Content-Type"]) | |
808 | |
809 if "charset" in params: | |
810 encoding = params['charset'] | |
811 | |
812 self.content = content | |
813 self.parsed = html5lib.parse( | |
814 self.content, | |
815 encoding=encoding, | |
816 namespaceHTMLElements=False, | |
817 ) | |
818 self.url = url | |
819 self.headers = headers | |
820 self.trusted = trusted | |
821 | |
822 def __str__(self): | |
823 return self.url | |
824 | |
825 @classmethod | |
826 def get_page(cls, link, skip_archives=True, session=None): | |
827 if session is None: | |
828 raise TypeError( | |
829 "get_page() missing 1 required keyword argument: 'session'" | |
830 ) | |
831 | |
832 url = link.url | |
833 url = url.split('#', 1)[0] | |
834 | |
835 # Check for VCS schemes that do not support lookup as web pages. | |
836 from pip.vcs import VcsSupport | |
837 for scheme in VcsSupport.schemes: | |
838 if url.lower().startswith(scheme) and url[len(scheme)] in '+:': | |
839 logger.debug('Cannot look at %s URL %s', scheme, link) | |
840 return None | |
841 | |
842 try: | |
843 if skip_archives: | |
844 filename = link.filename | |
845 for bad_ext in ['.tar', '.tar.gz', '.tar.bz2', '.tgz', '.zip']: | |
846 if filename.endswith(bad_ext): | |
847 content_type = cls._get_content_type( | |
848 url, session=session, | |
849 ) | |
850 if content_type.lower().startswith('text/html'): | |
851 break | |
852 else: | |
853 logger.debug( | |
854 'Skipping page %s because of Content-Type: %s', | |
855 link, | |
856 content_type, | |
857 ) | |
858 return | |
859 | |
860 logger.debug('Getting page %s', url) | |
861 | |
862 # Tack index.html onto file:// URLs that point to directories | |
863 (scheme, netloc, path, params, query, fragment) = \ | |
864 urllib_parse.urlparse(url) | |
865 if (scheme == 'file' and | |
866 os.path.isdir(urllib_request.url2pathname(path))): | |
867 # add trailing slash if not present so urljoin doesn't trim | |
868 # final segment | |
869 if not url.endswith('/'): | |
870 url += '/' | |
871 url = urllib_parse.urljoin(url, 'index.html') | |
872 logger.debug(' file: URL is directory, getting %s', url) | |
873 | |
874 resp = session.get( | |
875 url, | |
876 headers={ | |
877 "Accept": "text/html", | |
878 "Cache-Control": "max-age=600", | |
879 }, | |
880 ) | |
881 resp.raise_for_status() | |
882 | |
883 # The check for archives above only works if the url ends with | |
884 # something that looks like an archive. However that is not a | |
885 # requirement of an url. Unless we issue a HEAD request on every | |
886 # url we cannot know ahead of time for sure if something is HTML | |
887 # or not. However we can check after we've downloaded it. | |
888 content_type = resp.headers.get('Content-Type', 'unknown') | |
889 if not content_type.lower().startswith("text/html"): | |
890 logger.debug( | |
891 'Skipping page %s because of Content-Type: %s', | |
892 link, | |
893 content_type, | |
894 ) | |
895 return | |
896 | |
897 inst = cls( | |
898 resp.content, resp.url, resp.headers, | |
899 trusted=link.trusted, | |
900 ) | |
901 except requests.HTTPError as exc: | |
902 level = 2 if exc.response.status_code == 404 else 1 | |
903 cls._handle_fail(link, exc, url, level=level) | |
904 except requests.ConnectionError as exc: | |
905 cls._handle_fail(link, "connection error: %s" % exc, url) | |
906 except requests.Timeout: | |
907 cls._handle_fail(link, "timed out", url) | |
908 except SSLError as exc: | |
909 reason = ("There was a problem confirming the ssl certificate: " | |
910 "%s" % exc) | |
911 cls._handle_fail(link, reason, url, level=2, meth=logger.info) | |
912 else: | |
913 return inst | |
914 | |
915 @staticmethod | |
916 def _handle_fail(link, reason, url, level=1, meth=None): | |
917 if meth is None: | |
918 meth = logger.debug | |
919 | |
920 meth("Could not fetch URL %s: %s - skipping", link, reason) | |
921 | |
922 @staticmethod | |
923 def _get_content_type(url, session): | |
924 """Get the Content-Type of the given url, using a HEAD request""" | |
925 scheme, netloc, path, query, fragment = urllib_parse.urlsplit(url) | |
926 if scheme not in ('http', 'https'): | |
927 # FIXME: some warning or something? | |
928 # assertion error? | |
929 return '' | |
930 | |
931 resp = session.head(url, allow_redirects=True) | |
932 resp.raise_for_status() | |
933 | |
934 return resp.headers.get("Content-Type", "") | |
935 | |
936 @cached_property | |
937 def api_version(self): | |
938 metas = [ | |
939 x for x in self.parsed.findall(".//meta") | |
940 if x.get("name", "").lower() == "api-version" | |
941 ] | |
942 if metas: | |
943 try: | |
944 return int(metas[0].get("value", None)) | |
945 except (TypeError, ValueError): | |
946 pass | |
947 | |
948 return None | |
949 | |
950 @cached_property | |
951 def base_url(self): | |
952 bases = [ | |
953 x for x in self.parsed.findall(".//base") | |
954 if x.get("href") is not None | |
955 ] | |
956 if bases and bases[0].get("href"): | |
957 return bases[0].get("href") | |
958 else: | |
959 return self.url | |
960 | |
961 @property | |
962 def links(self): | |
963 """Yields all links in the page""" | |
964 for anchor in self.parsed.findall(".//a"): | |
965 if anchor.get("href"): | |
966 href = anchor.get("href") | |
967 url = self.clean_link( | |
968 urllib_parse.urljoin(self.base_url, href) | |
969 ) | |
970 | |
971 # Determine if this link is internal. If that distinction | |
972 # doesn't make sense in this context, then we don't make | |
973 # any distinction. | |
974 internal = None | |
975 if self.api_version and self.api_version >= 2: | |
976 # Only api_versions >= 2 have a distinction between | |
977 # external and internal links | |
978 internal = bool( | |
979 anchor.get("rel") and | |
980 "internal" in anchor.get("rel").split() | |
981 ) | |
982 | |
983 yield Link(url, self, internal=internal) | |
984 | |
985 def rel_links(self): | |
986 for url in self.explicit_rel_links(): | |
987 yield url | |
988 for url in self.scraped_rel_links(): | |
989 yield url | |
990 | |
991 def explicit_rel_links(self, rels=('homepage', 'download')): | |
992 """Yields all links with the given relations""" | |
993 rels = set(rels) | |
994 | |
995 for anchor in self.parsed.findall(".//a"): | |
996 if anchor.get("rel") and anchor.get("href"): | |
997 found_rels = set(anchor.get("rel").split()) | |
998 # Determine the intersection between what rels were found and | |
999 # what rels were being looked for | |
1000 if found_rels & rels: | |
1001 href = anchor.get("href") | |
1002 url = self.clean_link( | |
1003 urllib_parse.urljoin(self.base_url, href) | |
1004 ) | |
1005 yield Link(url, self, trusted=False) | |
1006 | |
1007 def scraped_rel_links(self): | |
1008 # Can we get rid of this horrible horrible method? | |
1009 for regex in (self._homepage_re, self._download_re): | |
1010 match = regex.search(self.content) | |
1011 if not match: | |
1012 continue | |
1013 href_match = self._href_re.search(self.content, pos=match.end()) | |
1014 if not href_match: | |
1015 continue | |
1016 url = ( | |
1017 href_match.group(1) or | |
1018 href_match.group(2) or | |
1019 href_match.group(3) | |
1020 ) | |
1021 if not url: | |
1022 continue | |
1023 try: | |
1024 url = url.decode("ascii") | |
1025 except UnicodeDecodeError: | |
1026 continue | |
1027 url = self.clean_link(urllib_parse.urljoin(self.base_url, url)) | |
1028 yield Link(url, self, trusted=False, _deprecated_regex=True) | |
1029 | |
1030 _clean_re = re.compile(r'[^a-z0-9$&+,/:;=?@.#%_\\|-]', re.I) | |
1031 | |
1032 def clean_link(self, url): | |
1033 """Makes sure a link is fully encoded. That is, if a ' ' shows up in | |
1034 the link, it will be rewritten to %20 (while not over-quoting | |
1035 % or other characters).""" | |
1036 return self._clean_re.sub( | |
1037 lambda match: '%%%2x' % ord(match.group(0)), url) | |
1038 | |
1039 | |
1040 class Link(object): | |
1041 | |
1042 def __init__(self, url, comes_from=None, internal=None, trusted=None, | |
1043 _deprecated_regex=False): | |
1044 | |
1045 # url can be a UNC windows share | |
1046 if url != Inf and url.startswith('\\\\'): | |
1047 url = path_to_url(url) | |
1048 | |
1049 self.url = url | |
1050 self.comes_from = comes_from | |
1051 self.internal = internal | |
1052 self.trusted = trusted | |
1053 self._deprecated_regex = _deprecated_regex | |
1054 | |
1055 def __str__(self): | |
1056 if self.comes_from: | |
1057 return '%s (from %s)' % (self.url, self.comes_from) | |
1058 else: | |
1059 return str(self.url) | |
1060 | |
1061 def __repr__(self): | |
1062 return '<Link %s>' % self | |
1063 | |
1064 def __eq__(self, other): | |
1065 if not isinstance(other, Link): | |
1066 return NotImplemented | |
1067 return self.url == other.url | |
1068 | |
1069 def __ne__(self, other): | |
1070 if not isinstance(other, Link): | |
1071 return NotImplemented | |
1072 return self.url != other.url | |
1073 | |
1074 def __lt__(self, other): | |
1075 if not isinstance(other, Link): | |
1076 return NotImplemented | |
1077 return self.url < other.url | |
1078 | |
1079 def __le__(self, other): | |
1080 if not isinstance(other, Link): | |
1081 return NotImplemented | |
1082 return self.url <= other.url | |
1083 | |
1084 def __gt__(self, other): | |
1085 if not isinstance(other, Link): | |
1086 return NotImplemented | |
1087 return self.url > other.url | |
1088 | |
1089 def __ge__(self, other): | |
1090 if not isinstance(other, Link): | |
1091 return NotImplemented | |
1092 return self.url >= other.url | |
1093 | |
1094 def __hash__(self): | |
1095 return hash(self.url) | |
1096 | |
1097 @property | |
1098 def filename(self): | |
1099 _, netloc, path, _, _ = urllib_parse.urlsplit(self.url) | |
1100 name = posixpath.basename(path.rstrip('/')) or netloc | |
1101 name = urllib_parse.unquote(name) | |
1102 assert name, ('URL %r produced no filename' % self.url) | |
1103 return name | |
1104 | |
1105 @property | |
1106 def scheme(self): | |
1107 return urllib_parse.urlsplit(self.url)[0] | |
1108 | |
1109 @property | |
1110 def netloc(self): | |
1111 return urllib_parse.urlsplit(self.url)[1] | |
1112 | |
1113 @property | |
1114 def path(self): | |
1115 return urllib_parse.unquote(urllib_parse.urlsplit(self.url)[2]) | |
1116 | |
1117 def splitext(self): | |
1118 return splitext(posixpath.basename(self.path.rstrip('/'))) | |
1119 | |
1120 @property | |
1121 def ext(self): | |
1122 return self.splitext()[1] | |
1123 | |
1124 @property | |
1125 def url_without_fragment(self): | |
1126 scheme, netloc, path, query, fragment = urllib_parse.urlsplit(self.url) | |
1127 return urllib_parse.urlunsplit((scheme, netloc, path, query, None)) | |
1128 | |
1129 _egg_fragment_re = re.compile(r'#egg=([^&]*)') | |
1130 | |
1131 @property | |
1132 def egg_fragment(self): | |
1133 match = self._egg_fragment_re.search(self.url) | |
1134 if not match: | |
1135 return None | |
1136 return match.group(1) | |
1137 | |
1138 _hash_re = re.compile( | |
1139 r'(sha1|sha224|sha384|sha256|sha512|md5)=([a-f0-9]+)' | |
1140 ) | |
1141 | |
1142 @property | |
1143 def hash(self): | |
1144 match = self._hash_re.search(self.url) | |
1145 if match: | |
1146 return match.group(2) | |
1147 return None | |
1148 | |
1149 @property | |
1150 def hash_name(self): | |
1151 match = self._hash_re.search(self.url) | |
1152 if match: | |
1153 return match.group(1) | |
1154 return None | |
1155 | |
1156 @property | |
1157 def show_url(self): | |
1158 return posixpath.basename(self.url.split('#', 1)[0].split('?', 1)[0]) | |
1159 | |
1160 @property | |
1161 def verifiable(self): | |
1162 """ | |
1163 Returns True if this link can be verified after download, False if it | |
1164 cannot, and None if we cannot determine. | |
1165 """ | |
1166 trusted = self.trusted or getattr(self.comes_from, "trusted", None) | |
1167 if trusted is not None and trusted: | |
1168 # This link came from a trusted source. It *may* be verifiable but | |
1169 # first we need to see if this page is operating under the new | |
1170 # API version. | |
1171 try: | |
1172 api_version = getattr(self.comes_from, "api_version", None) | |
1173 api_version = int(api_version) | |
1174 except (ValueError, TypeError): | |
1175 api_version = None | |
1176 | |
1177 if api_version is None or api_version <= 1: | |
1178 # This link is either trusted, or it came from a trusted, | |
1179 # however it is not operating under the API version 2 so | |
1180 # we can't make any claims about if it's safe or not | |
1181 return | |
1182 | |
1183 if self.hash: | |
1184 # This link came from a trusted source and it has a hash, so we | |
1185 # can consider it safe. | |
1186 return True | |
1187 else: | |
1188 # This link came from a trusted source, using the new API | |
1189 # version, and it does not have a hash. It is NOT verifiable | |
1190 return False | |
1191 elif trusted is not None: | |
1192 # This link came from an untrusted source and we cannot trust it | |
1193 return False | |
1194 | |
1195 @property | |
1196 def is_wheel(self): | |
1197 return self.ext == wheel_ext | |
1198 | |
1199 | |
1200 # An object to represent the "link" for the installed version of a requirement. | |
1201 # Using Inf as the url makes it sort higher. | |
1202 INSTALLED_VERSION = Link(Inf) |