Initial commit

[ta/rpmbuilder.git] / rpmbuilder / get_sources.py
diff --git a/rpmbuilder/get_sources.py b/rpmbuilder/get_sources.py

new file mode 100644 (file)

index 0000000..3fc986f
--- /dev/null
+++ b/rpmbuilder/get_sources.py
@@ -0,0 +1,135 @@
+#!/usr/bin/env python
+# Copyright 2019 Nokia
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import re
+import shutil
+import hashlib
+import lxml.html
+import urllib3
+
+
+HTTP = urllib3.PoolManager()
+
+def debug(log):
+    print(log)
+
+def verbose(log):
+    print(log)
+
+def filter_dot(lst):
+    return filter(lambda path: path[0] != '.', lst)
+
+def get_url(url, file_hash):
+    debug("http get {}".format(url))
+    request = HTTP.request('GET', url)
+    dom = lxml.html.fromstring(request.data)
+    for link in filter_dot(dom.xpath('//a/@href')):
+        path = '{}/{}'.format(url, link)
+        debug("http get {}".format(path))
+        request = HTTP.request('GET', path)
+        dom = lxml.html.fromstring(request.data)
+        if file_hash in dom.xpath('//a/@href'):
+            return '{}{}'.format(path, file_hash)
+
+def get_repo_name(path):
+    regex = re.compile(r'^\.([^.]*).metadata$')
+    meta = list(filter(regex.match, os.listdir(path)))
+    if len(meta) == 0:
+        return None
+    if len(meta) != 1:
+        raise Exception('Multiple metadata files: {}'.format(", ".join(meta)))
+    repo_name = regex.search(meta[0]).group(1)
+    debug("repo name is {}".format(repo_name))
+    return repo_name
+
+def parse_metadatafile(path, repo_name):
+    result = {}
+    filename = "{}/.{}.metadata".format(path, repo_name)
+    debug("metadata file: {}".format(filename))
+    with open(filename) as metadata:
+        for line in metadata:
+            items = line.split()
+            result[items[1]] = items[0]
+            debug('found {}: {}'.format(items[1], items[0]))
+    return result
+
+def get_hash(filename, hashfunc):
+    with open(filename, 'rb', buffering=0) as contents:
+        for buffer in iter(lambda: contents.read(128*1024), b''):
+            hashfunc.update(buffer)
+    digest = hashfunc.hexdigest()
+    debug("digest is {}".format(digest))
+    return digest
+
+def check_file(filename, checksum):
+    debug("checking {} {}".format(filename, checksum))
+    hashmap = {
+        32  : hashlib.md5(),
+        40  : hashlib.sha1(),
+        64  : hashlib.sha256(),
+        128 : hashlib.sha512()
+    }
+    if len(checksum) not in hashmap:
+        raise Exception('Checksum lenght unsupported: {}'.format(checksum))
+    if get_hash(filename, hashmap[len(checksum)]) != checksum:
+        raise Exception("Checksum doesn't match: {} {}".format(filename, checksum))
+    debug("checksum ok")
+
+def download(url, destination, checksum):
+    tmpfile = "{}.tmp".format(destination)
+    try:
+        debug("downloading {} to {}".format(url, tmpfile))
+        with HTTP.request('GET', url, preload_content=False) as resp, open(tmpfile, 'wb') as out_file:
+            shutil.copyfileobj(resp, out_file)
+        check_file(tmpfile, checksum)
+        debug("renaming {} to {}".format(tmpfile, destination))
+        os.rename(tmpfile, destination)
+    finally:
+        try:
+            os.remove(tmpfile)
+            debug("removed {}".format(tmpfile))
+        except OSError:
+            pass
+
+def get_sources(path, sources_list, logger):
+    if logger:
+        global debug
+        global verbose
+        debug = logger.debug
+        verbose = logger.info
+
+    repo = get_repo_name(path)
+    if not repo:
+        verbose('no metadata file in "{}".'.format(path))
+        return
+
+    for k, v in parse_metadatafile(path, repo).items():
+        filename = os.path.join(path, k)
+        try:
+            check_file(filename, v)
+        except:
+            found = False
+            for sources in sources_list:
+                repo_root = "{}/{}".format(sources, repo)
+                url = get_url(repo_root, v)
+                if url:
+                    debug("retrieving {} to {}".format(url, filename))
+                    download(url, filename, v)
+                    verbose('retrieved "{}"'.format(k))
+                    found = True
+                    break
+            if not found:
+                raise Exception('File "{}" not found'.format(v))