Andreas Wacknitz
2024-04-06 90f5a5f4cb5346a35bbaa69499b36171279d7783
commit | author | age
de89cf 1 #!/usr/bin/python3.9
3a319e 2 #
NJ 3 # CDDL HEADER START
4 #
5 # The contents of this file are subject to the terms of the
6 # Common Development and Distribution License (the "License").
7 # You may not use this file except in compliance with the License.
8 #
9 # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 # or http://www.opensolaris.org/os/licensing.
11 # See the License for the specific language governing permissions
12 # and limitations under the License.
13 #
14 # When distributing Covered Code, include this CDDL HEADER in each
15 # file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 # If applicable, add the following below this CDDL HEADER, with the
17 # fields enclosed by brackets "[]" replaced with your own identifying
18 # information: Portions Copyright [yyyy] [name of copyright owner]
19 #
20 # CDDL HEADER END
21 #
8beffa 22 # Copyright (c) 2010, 2016, Oracle and/or its affiliates. All rights reserved.
3a319e 23 #
NJ 24 #
fb10ba 25 # userland-fetch - a file download utility
3a319e 26 #
NJ 27 #  A simple program similiar to wget(1), but handles local file copy, ignores
28 #  directories, and verifies file hashes.
29 #
30
f8b2e5 31 import errno
3a319e 32 import os
NJ 33 import sys
6ddf48 34 import shutil
8beffa 35 import json
f8b2e5 36 import subprocess
37 import re
38 import gzip
39 import bz2
222cf2 40 from urllib.parse import urlparse
fb10ba 41 from urllib.request import urlopen
222cf2 42 from urllib.error import HTTPError,URLError
fb10ba 43 from urllib.request import Request
222cf2 44 from pathlib import Path
0e8406 45 import hashlib
fb10ba 46 from http.client import BadStatusLine
3a319e 47
222cf2 48 # EXIT CODES:
D 49 # 1 - unspecified error
50 # 2 - download uses insecure protocol
51 # 3 - was unable to find a suitable download
52 # 4 - need-hash specified but no hash was found
53
54 # NEW PARAMETERS:
55 # -n/--need-hash:       Set this to tell userland-fetch to fail if it cannot find a
56 #                       correct hash. This also causes userland-fetch to search for
57 #                       and download hash files, if they are not already present in
58 #                       HASH_DIR. If --hash is provided this effectively does nothing.
59 #
60 # -N/--need-sig:        Set this to tell userland-fetch to require a signature. This
61 #                       also causes userland-fetch to search for signature files. If
62 #                       the signature fails then the download is considered corrupted,
63 #                       and will be deleted unless --keep is set.
64 #                       This means that if the signature can't be checked, the file
65 #                       WILL be deleted!
66 #
67 # -c/--clobber-hash:    Set this to tell userland-fetch to clobber old hash files.
68 #                       userland-fetch will replace hash files in HASH_DIR with their
69 #                       remote counterparts.
70 #
71
72 # convert environment variables to global python variables
73 def prep_envvars():
74     # This algorithm is set if it cannot be found in the filename
75     global DEFAULT_HASH_ALGO
76     DEFAULT_HASH_ALGO=os.getenv("DEFAULT_HASH_ALGO","sha256")
77
78     global DEFAULT_HASH_FILES
79     try:
80         DEFAULT_HASH_FILES=[ x for x in os.environ["DEFAULT_HASH_FILES"].split(" ") if x ]
81     except KeyError:
82         DEFAULT_HASH_FILES=["SHA256SUMS","sha256sums.txt"]
83
84     global HASH_DIR
85     try: 
86         HASH_DIR = os.path.realpath(os.environ["HASH_DIR"])
87     except KeyError:
88         # set after getting cmdline args
89         HASH_DIR = None
90
91     global SECURE_PROTOCOLS
92     try:
93         SECURE_PROTOCOLS=["UNCHECKED"]+[ x for x in os.environ["SECURE_PROTOCOLS"].split(" ") if x ]
94     except KeyError:
95         SECURE_PROTOCOLS=["UNCHECKED","https"]
96
97     global SIGNATURE_EXTENSIONS
98     try:
99         SIGNATURE_EXTENSIONS=[ x for x in os.environ["SIGNATURE_EXTENSIONS"].split(" ") if x ]
100     except KeyError:
101         SIGNATURE_EXTENSIONS=["sig","asc"]
102     
103     global ALLOW_UNVERIFIED_DOWNLOADS
104     try:
105         ALLOW_UNVERIFIED_DOWNLOADS = os.environ["ALLOW_UNVERIFIED_DOWNLOADS"] == 'yes'
106     except KeyError:
107         ALLOW_UNVERIFIED_DOWNLOADS = False
108
109 LOCAL_SCHEMES = [None, 'file','']
110 REMOTE_SCHEMES = ['https','http','ftp']
52916a 111
23ff40 112 def printIOError(e, txt):
52916a 113     """ Function to decode and print IOError type exception """
fb10ba 114     print("I/O Error: " + txt + ": ")
52916a 115     try:
116         (code, message) = e
fb10ba 117         print(str(message) + " (" + str(code) + ")")
52916a 118     except:
fb10ba 119         print(str(e))
52916a 120
222cf2 121 # TODO: refactor this so there aren't any global variables
D 122 VALIDATE_ERROR=""
123 VALIDATE_CODE=-1
f8b2e5 124 def validate_signature(path, signature):
125     """Given paths to a file and a detached PGP signature, verify that
126     the signature is valid for the file.  Current configuration allows for
127     unrecognized keys to be downloaded as necessary."""
128
129     # Find the root of the repo so that we can point GnuPG at the right
130     # configuration and keyring.
fb10ba 131     proc = subprocess.Popen(["git", "rev-parse", "--show-toplevel"], stdout=subprocess.PIPE,
AP 132                             universal_newlines=True)
f8b2e5 133     proc.wait()
134     if proc.returncode != 0:
135         return False
136     out, err = proc.communicate()
137     gpgdir = os.path.join(out.strip(), "tools", ".gnupg")
138
139     # Skip the permissions warning: none of the information here is private,
140     # so not having to worry about getting git keeping the directory
141     # unreadable is just simplest.
142     try:
143         proc = subprocess.Popen(["gpg2", "--verify",
144                                  "--no-permission-warning", "--homedir", gpgdir, signature,
145                                  path], stdin=open("/dev/null"),
222cf2 146                                  stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
fb10ba 147                                 universal_newlines=True)
f8b2e5 148     except OSError as e:
149         # If the executable simply couldn't be found, just skip the
150         # validation.
151         if e.errno == errno.ENOENT:
152             return False
153         raise
154
155     proc.wait()
222cf2 156     global VALIDATE_CODE
D 157     VALIDATE_CODE = proc.returncode
f8b2e5 158     if proc.returncode != 0:
159         # Only print GnuPG's output when there was a problem.
222cf2 160         # Make this a global variable so we can output it somewhere tidy.
D 161         global VALIDATE_ERROR
162         VALIDATE_ERROR=proc.stdout.read()
f8b2e5 163         return False
164     return True
165
166
0e8406 167 def validate(file, hash):
f8b2e5 168     """Given a file-like object and a hash string, verify that the hash
169     matches the file contents."""
170
171     try:
172         algorithm, hashvalue = hash.split(':')
173     except:
222cf2 174         algorithm = DEFAULT_HASH_ALGO
35e110 175
52916a 176     # force migration away from sha1
177     if algorithm == "sha1":
222cf2 178         algorithm = DEFAULT_HASH_ALGO
f8b2e5 179
52916a 180     try:
181         m = hashlib.new(algorithm)
182     except ValueError:
222cf2 183         print("Unable to generate hashlib instance for",algorithm)
52916a 184         return False
3a319e 185
fb10ba 186     try:
AP 187         block = file.read()
52916a 188         m.update(block)
fb10ba 189         return "%s:%s" % (algorithm, m.hexdigest())
AP 190     except IOError as err:
191         print(str(err), end=' ')
222cf2 192     except EOFError as err:
D 193         print(str(err), end=' ')
3a319e 194
fb10ba 195     return "%s:" % (algorithm)
52916a 196
0e8406 197
NJ 198 def validate_container(filename, hash):
f8b2e5 199     """Given a file path and a hash string, verify that the hash matches the
200     file contents."""
201
52916a 202     try:
fb10ba 203         file = open(filename, 'rb')
52916a 204     except IOError as e:
205         printIOError(e, "Can't open file " + filename)
206         return False
207     return validate(file, hash)
0e8406 208
NJ 209
210 def validate_payload(filename, hash):
f8b2e5 211     """Given a file path and a hash string, verify that the hash matches the
212     payload (uncompressed content) of the file."""
0e8406 213
222cf2 214     expr_bz = re.compile('.+\.bz2?$', re.IGNORECASE)
52916a 215     expr_gz = re.compile('.+\.gz$', re.IGNORECASE)
216     expr_tgz = re.compile('.+\.tgz$', re.IGNORECASE)
222cf2 217     expr_tbz = re.compile('.+\.tbz2?$', re.IGNORECASE)
0e8406 218
52916a 219     try:
220         if expr_bz.match(filename):
fb10ba 221             file = bz2.BZ2File(filename, 'rb')
52916a 222         elif expr_gz.match(filename):
fb10ba 223             file = gzip.GzipFile(filename, 'rb')
52916a 224         elif expr_tgz.match(filename):
fb10ba 225             file = gzip.GzipFile(filename, 'rb')
222cf2 226         elif expr_tbz.match(filename):
D 227             file = bz2.GzipFile(filename, 'rb')
52916a 228         else:
229             return False
230     except IOError as e:
231         printIOError(e, "Can't open archive " + filename)
232         return False
233     return validate(file, hash)
0e8406 234
3a319e 235
222cf2 236 def download(url, filename=None, user_agent_arg=None, quiet=None,allow_partial=True):
f8b2e5 237     """Download the content at the given URL to the given filename
238     (defaulting to the basename of the URL if not given.  If 'quiet' is
239     True, throw away any error messages.  Returns the name of the file to
240     which the content was donloaded."""
222cf2 241     retval = None
52916a 242     try:
222cf2 243         req = Request(url,method="HEAD")
f8b2e5 244         if user_agent_arg is not None:
52916a 245             req.add_header("User-Agent", user_agent_arg)
222cf2 246         if filename is None:
D 247             filename = req.get_full_url().split('/')[-1]
248         retry = 1
249         dl = 0
250         i = urlopen(req)
251         if 'transfer-encoding' in i.headers and i.headers['transfer-encoding'] == 'chunked':
252             length = 0
253             if not quiet:
254                 print("length unknown (streamed/chunked)")
255         else:
256             try:
257                 length = int(i.headers['content-length'])
258                 if not quiet:
259                     print("length %i bytes" % (length))
260             except (KeyError,ValueError,TypeError):
261                 length = 0
262                 if not quiet:
263                     print("length unknown")
264         if not 'accept-ranges' in i.headers or i.headers['accept-ranges'] != 'bytes':
265             if not quiet:
266                 print("No partial download support from server")
267             allow_partial = False
268         i.close()
269         req.method = "GET"
270         # This might speed things up and keep memory usage down
271         while retry <= 3:
272             with open(filename + ".part","ab" if allow_partial else "wb") as o:
273                 try:
274                     # seek to end of the file if applicable
275                     if allow_partial:
276                         o.seek(0,2)
277                     dl = o.tell()
278                     if not quiet:
279                         print("(Attempt %i of 3%s)..." % (retry,"; %i bytes done"%(dl) if dl else ""),end=" ")
280                     if dl > 0:
281                         req.add_header("Range","bytes=%i-"%(dl))
282                     with urlopen(req) as i: 
283                         src = i.read(65536)
284                         while len(src) > 0:
285                             o.write(src)
286                             src = i.read(65536)
287                         retry = 4
288                     if length > 0 and o.tell() < length:
289                         if not quiet:
290                             print("Download of %s stopped abruptly." % (str(url)))
291                         retry += 1
292
293                 except URLError as e:
294                     if not quiet:
295                         print("Error downloading %s at %i bytes: %s" % (str(url),dl,str(e)))
296                     # if we haven't downloaded any bytes since the last URLError, cancel the download.
297                     if dl > 0 and o.tell() > dl:
298                         dl = o.tell()
299                         retry += 1
300                         req.add_header("Range","bytes=%i-"%(o.tell()+1))
301                     else:
302                         retry = 4
303                 except HTTPError as e:
304                     if not quiet:
305                         print("Error downloading %s: %s" % (str(url),str(e)))
306                     retry = 4
307     # return the name of the file that we downloaded the data to.
308         os.rename(filename+".part",filename)
309         retval = filename
52916a 310     except IOError as e:
f8b2e5 311         if not quiet:
312             printIOError(e, "Can't open url " + url)
1e1f59 313     except BadStatusLine as e:
AP 314         if not quiet:
315             print("Can't open url %s: server answered with code which we couldn't understand " % (url))
222cf2 316     except KeyboardInterrupt:
D 317         print("Cancelling download...")
3a319e 318
222cf2 319     return retval
52916a 320
3a319e 321
222cf2 322 def download_paths(search, filenames, url):
f8b2e5 323     """Returns a list of URLs where the file 'filename' might be found,
324     using 'url', 'search', and $DOWNLOAD_SEARCH_PATH as places to look.
325
326     If 'filename' is None, then the list will simply contain 'url'."""
327
52916a 328     urls = list()
222cf2 329     if type(filenames) == str:
D 330         filenames = [filenames]
3a319e 331
222cf2 332     if filenames is not None:
52916a 333         tmp = os.getenv('DOWNLOAD_SEARCH_PATH')
334         if tmp:
335             search += tmp.split(' ')
222cf2 336         for filename in filenames:
D 337             file = os.path.basename(filename)
6ddf48 338
222cf2 339             urls += [base + '/' + file for base in search]
6ddf48 340
222cf2 341             # filename should always be first
D 342             if filename in urls:
343                 urls.remove(filename)
344             urls.insert(0, filename)
6ddf48 345
52916a 346     # command line url is a fallback, so it's last
f8b2e5 347     if url is not None and url not in urls:
222cf2 348         parse_result = urlparse(url)
D 349         scheme = parse_result.scheme
350         path = parse_result.path
8beffa 351         if scheme == "pypi":
DD 352             url = pypi_url(url, os.path.basename(filename))
353         if url != None and url not in urls:
354             urls.append(url)
3a319e 355
58825e 356     # last resort path
222cf2 357     if filenames is not None:
58825e 358         tmp = os.getenv('DOWNLOAD_FALLBACK_PATH')
AP 359         if tmp:
222cf2 360             for filename in filenames:
D 361                 file = os.path.basename(filename)
362                 urls += [base + '/' + file for base in tmp.split(' ')]
58825e 363
222cf2 364     local_urls = list()
D 365     remote_urls = list()
366     # sort entries by local first, then remote:
367     for entry in urls:
368         if urlparse(entry).scheme in LOCAL_SCHEMES:
369             local_urls.append(entry)
370         else:
371             remote_urls.append(entry)
372     return local_urls + remote_urls
58825e 373
52916a 374
8beffa 375 def pypi_url(url, filename):
DD 376     """Given a pypi: URL, return the real URL for that component/version.
377
378     The pypi scheme has a host (with an empty host defaulting to
379     pypi.python.org), and a path that should be of the form
380     "component==version".  Other specs could be supported, but == is the
381     only thing that makes sense in this context.
382
383     The filename argument is the name of the expected file to download, so
384     that when pypi gives us multiple archives to choose from, we can pick
385     the right one.
386     """
387
222cf2 388         
D 389     parse_result = urlparse(url)
390     host = parse_result.netloc
391     path = parse_result.path
8beffa 392
DD 393     # We have to use ==; anything fancier would require pkg_resources, but
394     # really that's the only thing that makes sense in this context.
395     try:
222cf2 396             name, version = re.match("/(.*)==(.*)$", path).groups()
8beffa 397     except AttributeError:
fb10ba 398         print("PyPI URLs must be of the form 'pypi:///component==version'")
8beffa 399         return None
DD 400
401     if not host:
402         jsurl = "https://pypi.python.org/pypi/%s/json" % name
403     else:
404         jsurl = "https://%s/pypi/%s/json" % (host, name)
405
406     try:
fb10ba 407         f = urlopen(jsurl, data=None)
8beffa 408     except HTTPError as e:
DD 409         if e.getcode() == 404:
fb10ba 410             print("Unknown component '%s'" % name)
8beffa 411         else:
DD 412             printIOError(e, "Can't open PyPI JSON url %s" % url)
413         return None
414     except IOError as e:
415         printIOError(e, "Can't open PyPI JSON url %s" % url)
416         return None
fb10ba 417     content = f.read().decode("utf-8")
AP 418     js = json.loads(content)
8beffa 419     try:
DD 420         verblock = js["releases"][version]
421     except KeyError:
fb10ba 422         print("Unknown version '%s'" % version)
8beffa 423         return None
DD 424
425     urls = [ d["url"] for d in verblock ]
426     for archiveurl in urls:
427         if archiveurl.endswith("/%s" % filename):
428             return archiveurl
429
430     if urls:
fb10ba 431         print("None of the following URLs delivers '%s':" % filename)
AP 432         print("  " + "\n  ".join(urls))
8beffa 433     else:
fb10ba 434         print("Couldn't find any suitable URLs")
8beffa 435     return None
3a319e 436
222cf2 437 def download_from_paths(search_list, file_arg, url, link_arg, quiet=False,get_signature=False,download_dir=None):
f8b2e5 438     """Attempts to download a file from a number of possible locations.
439     Generates a list of paths where the file ends up on the local
440     filesystem.  This is a generator because while a download might be
441     successful, the signature or hash may not validate, and the caller may
442     want to try again from the next location.  The 'link_arg' argument is a
443     boolean which, when True, specifies that if the source is not a remote
444     URL and not already found where it should be, to make a symlink to the
445     source rather than copying it."""
446
447     for url in download_paths(search_list, file_arg, url):
448         if not quiet:
fb10ba 449             print("Source %s..." % url, end=' ')
222cf2 450         elif quiet == 2:
D 451             if len(url) > 53:
452                 p = url[:24] + ' ... ' + url[-24:]
453             else:
454                 p = url
455             print("      {:54s}".format(p), end='')
f8b2e5 456
222cf2 457         parse_result = urlparse(url)
D 458         scheme = parse_result.scheme
459         path = parse_result.path
460             
f8b2e5 461
222cf2 462         if scheme in LOCAL_SCHEMES:
D 463             name = None
464             if type(file_arg) == str:
465                 names = [file_arg]
466             else:
467                 names = file_arg
468             notfound = False
469             for n in names:
470                 # don't rename stuff - there shouldn't be a file list here anyway
471                 if os.path.basename(n) != os.path.basename(url):
472                     continue
473                 if os.path.exists(path) is False:
474                     notfound = True
f8b2e5 475                     if not quiet:
222cf2 476                         print("not found, skipping file copy")
D 477                     elif quiet == 2:
478                         print("{:10s}".format("-"))
479                     break
480                 elif n and n != path:
481                     if link_arg is False:
482                         if not quiet:
483                             print("\n    copying...")
484                         shutil.copy2(path, n)
485                     else:
486                         if not quiet:
487                             print("\n    linking...")
488                         os.symlink(path, n)
f8b2e5 489                 else:
222cf2 490                     name = n
f8b2e5 491                     if not quiet:
222cf2 492                         print("cached")
D 493                     elif quiet == 2:
494                         print("{:10s}".format("cached"),end="")
495                     break
496             if notfound:
497                 continue
498         elif scheme in REMOTE_SCHEMES:
f8b2e5 499             if not quiet:
fb10ba 500                 print("\n    downloading...", end=' ')
222cf2 501             if type(file_arg) == str:
D 502                 name = download(url, file_arg, quiet,(scheme != 'ftp'))
503             else:
504                 if not download_dir:
505                     download_dir = os.curdir
506                 name = download(url, os.path.join(download_dir,os.path.basename(url)),quiet,(scheme != 'ftp'))
507             if get_signature and name:
508                 for ext in SIGNATURE_EXTENSIONS:
509                     sig = download(url+"."+ext, name+"."+ext, quiet,(scheme != 'ftp'))
510                     if sig:
511                         break
f8b2e5 512             if name is None:
513                 if not quiet:
fb10ba 514                     print("failed")
222cf2 515                 elif quiet == 2:
D 516                     print("{:10s}".format("-"))
f8b2e5 517                 continue
222cf2 518             else:
D 519                 if not quiet:
520                     print("ok")
521                 elif quiet == 2:
522                     print("{:10s}".format("fetched"),end="")
523         yield (name,url)
f8b2e5 524
525
222cf2 526 def find_hash_in_file(filename,hash_file):
D 527     splits = hash_file.split('.')
7c4acf 528     regex = re.compile('([0-9a-fA-F]+)( [ \*](.*/)?)('+os.path.basename(filename)+'$)')
D 529     match = re.match("(^[a-z0-9]+)(sums?(.txt)?$)",hash_file.lower())
222cf2 530     if '.'.join(splits[:-1]) == filename:
7c4acf 531         algo = re.match('([a-zA-Z0-9]+)(sums?)',hash_file.split('.')[-1]).group(1)
222cf2 532     elif match:
D 533         algo = match.group(1)
534     else:
535         algo = DEFAULT_HASH_ALGO
536     with open(os.path.join(HASH_DIR,hash_file),"r") as file:
537         hash_value = None
538         for line in file.readlines():
539             hash_value = regex.match(line)
540             if hash_value is not None:
541                 hash_value = hash_value.group(1)
542                 break
543     if hash_value is not None:
544         return "%s:%s" % (algo,hash_value)
545     return None
546
547 def find_hash_in_hash_dir(filename):
548     try:
549         hash_value = None
550         if not os.path.exists(HASH_DIR):
551             return None, None
552         for hash_file in sorted(os.listdir(HASH_DIR)):
553             splits = hash_file.split('.')
554             if '.'.join(splits[:-1]) in SIGNATURE_EXTENSIONS:
555                 continue
556             hash_value = find_hash_in_file(filename,hash_file)
557             if hash_value:
558                 return hash_value, hash_file
559         return None, None
560     except NotADirectoryError:
561         print(HASH_DIR,"should be a directory containing hashfiles in the",DEFAULT_HASH_ALGO+"sum","format.")
562         return (1)
563     except IsADirectoryError:
564         print(hash_file,"should be a file containing hashes, not a directory.")
565         return 1
3a319e 566 def usage():
fb10ba 567     print("Usage: %s [-a|--user-agent (user-agent)] [-f|--file (file)] [-l|--link] " \
222cf2 568         "[-k|--keep] [-h|--hash (hash)] [-n|--need-hash] [-s|--search (search-dir)] " \
D 569         "[-g|--get-hashes] [-G|--get-sigs] " \
291e6d 570         "[-S|--sigurl (signature-url)] [-N|--need-sig] --url (url)" % (sys.argv[0].split('/')[-1]))
222cf2 571     return 1
52916a 572
3a319e 573
NJ 574 def main():
52916a 575     import getopt
fb10ba 576     sys.stdout.flush()
222cf2 577     try:
D 578         opts, args = getopt.getopt(sys.argv[1:], "a:f:h:lks:u:GgNnc",
579                                    ["file=", "link", "keep", "hash=", "search=", "url=","get-sigs","get-hashes",
580                                     "sigurl=", "user-agent=", "need-sig", "need-hash","clobber-hash"])
581         sys.exit(realmain(opts, args))
582     except getopt.GetoptError as err:
583         print(str(err))
584         usage()
3a319e 585
222cf2 586 def realmain(opts, args):
D 587     prep_envvars()
52916a 588     user_agent_arg = None
589     file_arg = None
590     link_arg = False
591     keep_arg = False
592     hash_arg = None
593     url_arg = None
f8b2e5 594     sig_arg = None
291e6d 595     need_sig = False
222cf2 596     get_signature = False
D 597     need_hash = False
598     get_hash = False
599     clobber_hash = False
52916a 600     search_list = list()
222cf2 601     retval = 1
3a319e 602
52916a 603     for opt, arg in opts:
604         if opt in ["-a", "--user-agent"]:
605             user_agent_arg = arg
606         elif opt in ["-f", "--file"]:
607             file_arg = arg
608         elif opt in ["-l", "--link"]:
609             link_arg = True
610         elif opt in ["-k", "--keep"]:
611             keep_arg = True
612         elif opt in ["-h", "--hash"]:
613             hash_arg = arg
222cf2 614         elif opt in ["-n", "--need-hash"]:
D 615             need_hash = True
616         elif opt in ["-g", "--get-hashes"]:
617             get_hash = True
52916a 618         elif opt in ["-s", "--search"]:
619             search_list.append(arg)
f8b2e5 620         elif opt in ["-S", "--sigurl"]:
621             sig_arg = arg
52916a 622         elif opt in ["-u", "--url"]:
623             url_arg = arg
291e6d 624         elif opt in ["-N", "--need-sig"]:
D 625             need_sig = True
222cf2 626         elif opt in ["-G", "--get-sigs"]:
D 627             get_signature = True
628         elif opt in ["-c", "--clobber-hash"]:
629             clobber_hash = True
52916a 630         else:
631             assert False, "unknown option"
3a319e 632
f8b2e5 633     if url_arg is None:
222cf2 634         if clobber_hash and len(search_list) == 0:
D 635             print("WARN: -c/--clobber-hash is meaningless without --search or --url. Ignoring.")
636             clobber_hash = False
637         if file_arg is None:
638             usage()
639         scheme = "UNCHECKED"
640     else:
641         parse_result = urlparse(url_arg)
642         scheme = parse_result.scheme
643         path = parse_result.path
644         if file_arg is None:
645             file_arg = os.path.realpath(os.path.join(os.curdir,os.path.basename(path)))
3a319e 646
222cf2 647     file_arg = os.path.realpath(file_arg)
D 648     filename = os.path.basename(file_arg)
649     global HASH_DIR
650     if not HASH_DIR:
651         HASH_DIR = os.path.realpath(os.path.join(os.path.dirname(file_arg),"hashes"))
652     valid_sig = False
3a319e 653
222cf2 654     if clobber_hash or get_hash:
D 655         print("Hash directory: %s [clobbering: %s]" % (HASH_DIR,str(clobber_hash)))
656         if clobber_hash:
657             HASH_DIR_ORIG = HASH_DIR
658             HASH_DIR = HASH_DIR + ".tmp"
659         try:
660             os.mkdir(HASH_DIR)
661         except FileNotFoundError:
662             print("Refusing to create %s recursively - is HASH_DIR set correctly?" % (HASH_DIR))
663             return 1
664         except FileExistsError:
665             pass
666
667         # We need to account for the following possibilities for hash files:
668         # 1: .asc with embedded checksums (1 file, needs PGP stripping)
669         # TODO: ^
670         # 2: .asc or .sig, detached from hash file (2 files)
671         # 3: checksums without signature (need a secure protocol)
672
673         print("Sourcing hash files... ",end="")
674         search_hash_files = DEFAULT_HASH_FILES + [
675           filename + '.' + DEFAULT_HASH_ALGO,
676           filename + '.' + DEFAULT_HASH_ALGO + 'sum',
677           filename + '.' + DEFAULT_HASH_ALGO + 'sums'
678         ]
679         hash_file = None
680         print("\n      {:54s}{:10s}{:10s}".format("URL","LOCALITY","HAS HASH"))
681         if url_arg:
682             if not search_list:
683                 search_list = []
684             search_list.append(os.path.dirname(url_arg))
685         search_hash_files = [ os.path.join(HASH_DIR,x) for x in search_hash_files ]
686         for hashname, hashurl in download_from_paths(search_list, search_hash_files , None, link_arg, quiet=2,get_signature=True,download_dir=HASH_DIR):
687             scheme = urlparse(hashurl).scheme
688             safe = scheme in SECURE_PROTOCOLS or scheme in LOCAL_SCHEMES
689             valid_sig = False
690             for sigext in SIGNATURE_EXTENSIONS:
691                 signame = hashname + "." + sigext
692                 if os.path.exists(signame):
693                     valid_sig = validate_signature(hashname,signame)
694             if not valid_sig and (not safe or need_sig):
695                 print("denied (hashfile download did not meet security criteria)")
696                 os.remove(hashname)
697                 Path(signame).unlink(missing_ok=True)
698                 continue
699             hash_arg = find_hash_in_file(filename, hashname)
700             if not hash_arg:
701                 print("no")
52916a 702             else:
222cf2 703                 print("YES")
D 704                 hash_file = hashname
705                 break
706         if hash_file is not None:
707             print("INFO: hash found for",filename,"in",hash_file)
3a319e 708
222cf2 709         if clobber_hash:
D 710             for file in os.listdir(HASH_DIR):
711                 orig_file = None
712                 new_file = None
713                 try:
714                     orig_file = os.path.join(HASH_DIR_ORIG,os.path.basename(file))
715                     new_file = os.path.join(HASH_DIR,file)
716                     os.rename(new_file,orig_file)
717                 except IsADirectoryError as e:
718                     print("ERROR: moving hashfiles to HASH_DIR failed: %s" % (str(e)))
719                 except OSError as e:
720                     print("OSError: %s (%s -> %s)" %(str(e),new_file,orig_file))
721             try:
722                 os.rmdir(HASH_DIR)
723             except OSError as e:
724                 print("Couldn't remove %s: %s" % (HASH_DIR,str(e)))
725             HASH_DIR = HASH_DIR_ORIG
726     elif hash_arg == None:
727         hash_arg, hash_file = find_hash_in_hash_dir(filename)
728         if hash_file is not None:
729             print("INFO: hash found for",filename,"in",hash_file)
730     else:
731         print("INFO: not using any hashes in %s for" % (HASH_DIR),filename,"(overridden with --hash)")
f8b2e5 732
222cf2 733
D 734     
735     if (hash_arg is None or hash_arg == 'none') and need_hash:
736         print("-n/--need-hash and no hash found. Exiting.")
737         return 4
738     if ALLOW_UNVERIFIED_DOWNLOADS:
739         print("WARN: ALLOW_UNVERIFIED_DOWNLOADS set.")
740
741     if sig_arg:
742         if get_signature:
743             print("INFO: not searching with -g (--sigurl provided)")
744         get_signature=False
745     for name, url in download_from_paths(search_list, file_arg, url_arg, link_arg,get_signature=get_signature):
746         scheme = urlparse(url).scheme
747         if not name:
748             print("    was not downloaded")
749             continue
750         print("    validating signature...", end=' ')
751         if valid_sig:
752             print("hashfile had valid signature")
52916a 753         else:
222cf2 754             sig_file = None
D 755             if sig_arg:
756                 if sig_arg == 'none':
757                     print("skipping (--sigurl none)")
758                 else:
759                     print("using %s..." % sig_arg,end=' ')
760                     if urlparse(sig_arg).scheme in REMOTE_SCHEMES:
761                         sig_file = download(sig_arg,filename=os.path.join(os.path.dirname(name),os.path.basename(sig_arg)),quiet=True,allow_partial=False)
762             else:
763                 if get_signature:
764                     print("checking remote signature...",end=' ')
765                 else:
766                     print("checking local signature...",end=' ')
767             errors = []
768             if not sig_file:
769                 for ext in SIGNATURE_EXTENSIONS:
770                     if os.path.exists(name+'.'+ext):
771                         sig_file = name+'.'+ext
772                         break
773             if sig_file:
774                 if validate_signature(name, sig_file):
775                     print("VALID")
776                     valid_sig = True
777                 else:
778                     print("invalid")
779                     errors.append((sig_file,VALIDATE_CODE,VALIDATE_ERROR))
780                     break
781             else:
782                 print("not found")
783
784         if not valid_sig:
785             print("    signature validation failed\n")
786             bad_signature = False
787             for error in errors:
788                 print("---%s output(exit code %d):\n%s---" % error)
789                 if error[1] == 1:
790                     bad_signature = True
791             if need_sig:
792                 if not keep_arg or bad_signature:
793                     print("WARN: Deleting corrupt file.")
794                     try:
795                         os.remove(name)
796                     except OSError:
797                         pass
798                 print("-N/--need-sig is set. This download cannot be used.")
799                 retval = 3
800                 continue
801         elif not need_hash:
802             retval = 0
803             break
804         print("    validating hash...", end=' ')
805         if hash_arg and hash_arg != 'none':
806             realhash = validate_container(name, hash_arg)
807         else:
808             realhash = "skipped calculation (--hash none)"
809             hash_arg = None
810
811         if realhash == hash_arg:
812             print("ok")
813             retval = 0
814         else:
815             if hash_arg and hash_arg != 'none':
816                 payloadhash = validate_payload(name, hash_arg)
817             else:
818                 payloadhash = "skipped calculation (--hash none)"
52916a 819             if payloadhash == hash_arg:
fb10ba 820                 print("ok")
222cf2 821                 retval = 0
f8b2e5 822             else:
222cf2 823                 if not hash_arg or hash_arg == 'none':
D 824                     scheme = urlparse(url).scheme
825                     if not ALLOW_UNVERIFIED_DOWNLOADS:
826                         print("ERROR: Cannot validate download (no hash or signature).")
827                         if keep_arg == False:
828                             try:
829                                 print("\nWARN: Removing the downloaded file")
830                                 os.remove(name)
831                             except OSError:
832                                 pass
833                         retval = 3
834                         continue
835                     elif scheme not in SECURE_PROTOCOLS and scheme not in LOCAL_SCHEMES:
836                         print("ERROR: This download uses an insecure protocol: '%s'." % (str(scheme),))
837                         if keep_arg == False:
838                             try:
839                                 print("\nWARN: Removing the downloaded file")
840                                 os.remove(name)
841                             except OSError:
842                                 pass
843                         retval = 2
844                         continue
845                     print("ignoring errors")
846                     retval = 0
f8b2e5 847                 else:
222cf2 848                     print("invalid hash!")
D 849                     print("    expected: %s" % hash_arg)
850                     print("    actual:   %s" % realhash)
851                     print("    payload:  %s" % payloadhash)
3a319e 852
222cf2 853                     if valid_sig:
D 854                         if need_hash:
855                             print("-n/--need-hash is set. This download cannot be used.")
856                             if keep_arg == False:
857                                 try:
858                                     print("\nWARN: Removing the downloaded file")
859                                     os.remove(name)
860                                 except OSError:
861                                     pass
862                             retval = 3
863                             continue
52916a 864
222cf2 865                         # If the signature validated, then we assume
D 866                         # that the expected hash is just a typo.
eefa3b 867
222cf2 868                         # An invalid hash shouldn't cause us to remove
D 869                         # the target file if the signature was valid.
870                         # Also, if the developer is in progress of upgrading
871                         # some package version or introduces a new one, and
872                         # explicitly ran "gmake fetch", keep the downloaded
873                         # file (Makefile is not in position to have a valid
874                         # checksum entry just yet) so it does not have to be
875                         # downloaded twice.
876                         retval = 0
eefa3b 877                     else:
222cf2 878                         print("ERROR: This download failed to validate.")
D 879                         if keep_arg == False:
880                             try:
881                                 print("\nWARN: Removing the corrupt downloaded file")
882                                 os.remove(name)
883                             except OSError:
884                                 pass
885                         retval = 3
886                         continue
887         if retval == 0:
888             break
889     return retval
f8b2e5 890
3a319e 891 if __name__ == "__main__":
52916a 892     main()