X-Git-Url: https://gerrit.akraino.org/r/gitweb?p=ta%2Frpmbuilder.git;a=blobdiff_plain;f=rpmbuilder%2Fget_sources.py;fp=rpmbuilder%2Fget_sources.py;h=3fc986f621c35a260fcfdad8389bc72b3a8f2b68;hp=0000000000000000000000000000000000000000;hb=876631a959303430aafc0be7897b086ee9b921fe;hpb=d8468e0423a9af0d3fd5bf30d45ebe18ba8b1801 diff --git a/rpmbuilder/get_sources.py b/rpmbuilder/get_sources.py new file mode 100644 index 0000000..3fc986f --- /dev/null +++ b/rpmbuilder/get_sources.py @@ -0,0 +1,135 @@ +#!/usr/bin/env python +# Copyright 2019 Nokia +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import re +import shutil +import hashlib +import lxml.html +import urllib3 + + +HTTP = urllib3.PoolManager() + +def debug(log): + print(log) + +def verbose(log): + print(log) + +def filter_dot(lst): + return filter(lambda path: path[0] != '.', lst) + +def get_url(url, file_hash): + debug("http get {}".format(url)) + request = HTTP.request('GET', url) + dom = lxml.html.fromstring(request.data) + for link in filter_dot(dom.xpath('//a/@href')): + path = '{}/{}'.format(url, link) + debug("http get {}".format(path)) + request = HTTP.request('GET', path) + dom = lxml.html.fromstring(request.data) + if file_hash in dom.xpath('//a/@href'): + return '{}{}'.format(path, file_hash) + +def get_repo_name(path): + regex = re.compile(r'^\.([^.]*).metadata$') + meta = list(filter(regex.match, os.listdir(path))) + if len(meta) == 0: + return None + if len(meta) != 1: + raise Exception('Multiple metadata files: {}'.format(", ".join(meta))) + repo_name = regex.search(meta[0]).group(1) + debug("repo name is {}".format(repo_name)) + return repo_name + +def parse_metadatafile(path, repo_name): + result = {} + filename = "{}/.{}.metadata".format(path, repo_name) + debug("metadata file: {}".format(filename)) + with open(filename) as metadata: + for line in metadata: + items = line.split() + result[items[1]] = items[0] + debug('found {}: {}'.format(items[1], items[0])) + return result + +def get_hash(filename, hashfunc): + with open(filename, 'rb', buffering=0) as contents: + for buffer in iter(lambda: contents.read(128*1024), b''): + hashfunc.update(buffer) + digest = hashfunc.hexdigest() + debug("digest is {}".format(digest)) + return digest + +def check_file(filename, checksum): + debug("checking {} {}".format(filename, checksum)) + hashmap = { + 32 : hashlib.md5(), + 40 : hashlib.sha1(), + 64 : hashlib.sha256(), + 128 : hashlib.sha512() + } + if len(checksum) not in hashmap: + raise Exception('Checksum lenght unsupported: {}'.format(checksum)) + if get_hash(filename, hashmap[len(checksum)]) != checksum: + raise Exception("Checksum doesn't match: {} {}".format(filename, checksum)) + debug("checksum ok") + +def download(url, destination, checksum): + tmpfile = "{}.tmp".format(destination) + try: + debug("downloading {} to {}".format(url, tmpfile)) + with HTTP.request('GET', url, preload_content=False) as resp, open(tmpfile, 'wb') as out_file: + shutil.copyfileobj(resp, out_file) + check_file(tmpfile, checksum) + debug("renaming {} to {}".format(tmpfile, destination)) + os.rename(tmpfile, destination) + finally: + try: + os.remove(tmpfile) + debug("removed {}".format(tmpfile)) + except OSError: + pass + +def get_sources(path, sources_list, logger): + if logger: + global debug + global verbose + debug = logger.debug + verbose = logger.info + + repo = get_repo_name(path) + if not repo: + verbose('no metadata file in "{}".'.format(path)) + return + + for k, v in parse_metadatafile(path, repo).items(): + filename = os.path.join(path, k) + try: + check_file(filename, v) + except: + found = False + for sources in sources_list: + repo_root = "{}/{}".format(sources, repo) + url = get_url(repo_root, v) + if url: + debug("retrieving {} to {}".format(url, filename)) + download(url, filename, v) + verbose('retrieved "{}"'.format(k)) + found = True + break + if not found: + raise Exception('File "{}" not found'.format(v))