rpmbuilder/get_sources.py

   1 #!/usr/bin/env python
   2 # Copyright 2019 Nokia
   3 #
   4 # Licensed under the Apache License, Version 2.0 (the "License");
   5 # you may not use this file except in compliance with the License.
   6 # You may obtain a copy of the License at
   7 #
   8 #     http://www.apache.org/licenses/LICENSE-2.0
   9 #
  10 # Unless required by applicable law or agreed to in writing, software
  11 # distributed under the License is distributed on an "AS IS" BASIS,
  12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13 # See the License for the specific language governing permissions and
  14 # limitations under the License.
  15
  16 import os
  17 import re
  18 import shutil
  19 import hashlib
  20 import lxml.html
  21 import urllib3
  22
  23
  24 HTTP = urllib3.PoolManager()
  25
  26 def debug(log):
  27     print(log)
  28
  29 def verbose(log):
  30     print(log)
  31
  32 def filter_dot(lst):
  33     return filter(lambda path: path[0] != '.', lst)
  34
  35 def get_url(url, file_hash):
  36     debug("http get {}".format(url))
  37     request = HTTP.request('GET', url)
  38     dom = lxml.html.fromstring(request.data)
  39     for link in filter_dot(dom.xpath('//a/@href')):
  40         path = '{}/{}'.format(url, link)
  41         debug("http get {}".format(path))
  42         request = HTTP.request('GET', path)
  43         dom = lxml.html.fromstring(request.data)
  44         if file_hash in dom.xpath('//a/@href'):
  45             return '{}{}'.format(path, file_hash)
  46
  47 def get_repo_name(path):
  48     regex = re.compile(r'^\.([^.]*).metadata$')
  49     meta = list(filter(regex.match, os.listdir(path)))
  50     if len(meta) == 0:
  51         return None
  52     if len(meta) != 1:
  53         raise Exception('Multiple metadata files: {}'.format(", ".join(meta)))
  54     repo_name = regex.search(meta[0]).group(1)
  55     debug("repo name is {}".format(repo_name))
  56     return repo_name
  57
  58 def parse_metadatafile(path, repo_name):
  59     result = {}
  60     filename = "{}/.{}.metadata".format(path, repo_name)
  61     debug("metadata file: {}".format(filename))
  62     with open(filename) as metadata:
  63         for line in metadata:
  64             items = line.split()
  65             result[items[1]] = items[0]
  66             debug('found {}: {}'.format(items[1], items[0]))
  67     return result
  68
  69 def get_hash(filename, hashfunc):
  70     with open(filename, 'rb', buffering=0) as contents:
  71         for buffer in iter(lambda: contents.read(128*1024), b''):
  72             hashfunc.update(buffer)
  73     digest = hashfunc.hexdigest()
  74     debug("digest is {}".format(digest))
  75     return digest
  76
  77 def check_file(filename, checksum):
  78     debug("checking {} {}".format(filename, checksum))
  79     hashmap = {
  80         32  : hashlib.md5(),
  81         40  : hashlib.sha1(),
  82         64  : hashlib.sha256(),
  83         128 : hashlib.sha512()
  84     }
  85     if len(checksum) not in hashmap:
  86         raise Exception('Checksum lenght unsupported: {}'.format(checksum))
  87     if get_hash(filename, hashmap[len(checksum)]) != checksum:
  88         raise Exception("Checksum doesn't match: {} {}".format(filename, checksum))
  89     debug("checksum ok")
  90
  91 def download(url, destination, checksum):
  92     tmpfile = "{}.tmp".format(destination)
  93     try:
  94         debug("downloading {} to {}".format(url, tmpfile))
  95         with HTTP.request('GET', url, preload_content=False) as resp, open(tmpfile, 'wb') as out_file:
  96             shutil.copyfileobj(resp, out_file)
  97         check_file(tmpfile, checksum)
  98         debug("renaming {} to {}".format(tmpfile, destination))
  99         os.rename(tmpfile, destination)
 100     finally:
 101         try:
 102             os.remove(tmpfile)
 103             debug("removed {}".format(tmpfile))
 104         except OSError:
 105             pass
 106
 107 def get_sources(path, sources_list, logger):
 108     if logger:
 109         global debug
 110         global verbose
 111         debug = logger.debug
 112         verbose = logger.info
 113
 114     repo = get_repo_name(path)
 115     if not repo:
 116         verbose('no metadata file in "{}".'.format(path))
 117         return
 118
 119     for k, v in parse_metadatafile(path, repo).items():
 120         filename = os.path.join(path, k)
 121         try:
 122             check_file(filename, v)
 123         except:
 124             found = False
 125             for sources in sources_list:
 126                 repo_root = "{}/{}".format(sources, repo)
 127                 url = get_url(repo_root, v)
 128                 if url:
 129                     debug("retrieving {} to {}".format(url, filename))
 130                     download(url, filename, v)
 131                     verbose('retrieved "{}"'.format(k))
 132                     found = True
 133                     break
 134             if not found:
 135                 raise Exception('File "{}" not found'.format(v))