#!/usr/bin/env python # Copyright 2019 Nokia # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import os import re import shutil import hashlib import lxml.html import urllib3 HTTP = urllib3.PoolManager() def debug(log): print(log) def verbose(log): print(log) def filter_dot(lst): return filter(lambda path: path[0] != '.', lst) def get_url(url, file_hash): debug("http get {}".format(url)) request = HTTP.request('GET', url) dom = lxml.html.fromstring(request.data) for link in filter_dot(dom.xpath('//a/@href')): path = '{}/{}'.format(url, link) debug("http get {}".format(path)) request = HTTP.request('GET', path) dom = lxml.html.fromstring(request.data) if file_hash in dom.xpath('//a/@href'): return '{}{}'.format(path, file_hash) def get_repo_name(path): regex = re.compile(r'^\.([^.]*).metadata$') meta = list(filter(regex.match, os.listdir(path))) if len(meta) == 0: return None if len(meta) != 1: raise Exception('Multiple metadata files: {}'.format(", ".join(meta))) repo_name = regex.search(meta[0]).group(1) debug("repo name is {}".format(repo_name)) return repo_name def parse_metadatafile(path, repo_name): result = {} filename = "{}/.{}.metadata".format(path, repo_name) debug("metadata file: {}".format(filename)) with open(filename) as metadata: for line in metadata: items = line.split() result[items[1]] = items[0] debug('found {}: {}'.format(items[1], items[0])) return result def get_hash(filename, hashfunc): with open(filename, 'rb', buffering=0) as contents: for buffer in iter(lambda: contents.read(128*1024), b''): hashfunc.update(buffer) digest = hashfunc.hexdigest() debug("digest is {}".format(digest)) return digest def check_file(filename, checksum): debug("checking {} {}".format(filename, checksum)) hashmap = { 32 : hashlib.md5(), 40 : hashlib.sha1(), 64 : hashlib.sha256(), 128 : hashlib.sha512() } if len(checksum) not in hashmap: raise Exception('Checksum lenght unsupported: {}'.format(checksum)) if get_hash(filename, hashmap[len(checksum)]) != checksum: raise Exception("Checksum doesn't match: {} {}".format(filename, checksum)) debug("checksum ok") def download(url, destination, checksum): tmpfile = "{}.tmp".format(destination) try: debug("downloading {} to {}".format(url, tmpfile)) with HTTP.request('GET', url, preload_content=False) as resp, open(tmpfile, 'wb') as out_file: shutil.copyfileobj(resp, out_file) check_file(tmpfile, checksum) debug("renaming {} to {}".format(tmpfile, destination)) os.rename(tmpfile, destination) finally: try: os.remove(tmpfile) debug("removed {}".format(tmpfile)) except OSError: pass def get_sources(path, sources_list, logger): if logger: global debug global verbose debug = logger.debug verbose = logger.info repo = get_repo_name(path) if not repo: verbose('no metadata file in "{}".'.format(path)) return for k, v in parse_metadatafile(path, repo).items(): filename = os.path.join(path, k) try: check_file(filename, v) except: found = False for sources in sources_list: repo_root = "{}/{}".format(sources, repo) url = get_url(repo_root, v) if url: debug("retrieving {} to {}".format(url, filename)) download(url, filename, v) verbose('retrieved "{}"'.format(k)) found = True break if not found: raise Exception('File "{}" not found'.format(v))