4 # Licensed under the Apache License, Version 2.0 (the "License");
5 # you may not use this file except in compliance with the License.
6 # You may obtain a copy of the License at
8 # http://www.apache.org/licenses/LICENSE-2.0
10 # Unless required by applicable law or agreed to in writing, software
11 # distributed under the License is distributed on an "AS IS" BASIS,
12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 # See the License for the specific language governing permissions and
14 # limitations under the License.
24 HTTP = urllib3.PoolManager()
33 return filter(lambda path: path[0] != '.', lst)
35 def get_url(url, file_hash):
36 debug("http get {}".format(url))
37 request = HTTP.request('GET', url)
38 dom = lxml.html.fromstring(request.data)
39 for link in filter_dot(dom.xpath('//a/@href')):
40 path = '{}/{}'.format(url, link)
41 debug("http get {}".format(path))
42 request = HTTP.request('GET', path)
43 dom = lxml.html.fromstring(request.data)
44 if file_hash in dom.xpath('//a/@href'):
45 return '{}{}'.format(path, file_hash)
47 def get_repo_name(path):
48 regex = re.compile(r'^\.([^.]*).metadata$')
49 meta = list(filter(regex.match, os.listdir(path)))
53 raise Exception('Multiple metadata files: {}'.format(", ".join(meta)))
54 repo_name = regex.search(meta[0]).group(1)
55 debug("repo name is {}".format(repo_name))
58 def parse_metadatafile(path, repo_name):
60 filename = "{}/.{}.metadata".format(path, repo_name)
61 debug("metadata file: {}".format(filename))
62 with open(filename) as metadata:
65 result[items[1]] = items[0]
66 debug('found {}: {}'.format(items[1], items[0]))
69 def get_hash(filename, hashfunc):
70 with open(filename, 'rb', buffering=0) as contents:
71 for buffer in iter(lambda: contents.read(128*1024), b''):
72 hashfunc.update(buffer)
73 digest = hashfunc.hexdigest()
74 debug("digest is {}".format(digest))
77 def check_file(filename, checksum):
78 debug("checking {} {}".format(filename, checksum))
82 64 : hashlib.sha256(),
83 128 : hashlib.sha512()
85 if len(checksum) not in hashmap:
86 raise Exception('Checksum lenght unsupported: {}'.format(checksum))
87 if get_hash(filename, hashmap[len(checksum)]) != checksum:
88 raise Exception("Checksum doesn't match: {} {}".format(filename, checksum))
91 def download(url, destination, checksum):
92 tmpfile = "{}.tmp".format(destination)
94 debug("downloading {} to {}".format(url, tmpfile))
95 with HTTP.request('GET', url, preload_content=False) as resp, open(tmpfile, 'wb') as out_file:
96 shutil.copyfileobj(resp, out_file)
97 check_file(tmpfile, checksum)
98 debug("renaming {} to {}".format(tmpfile, destination))
99 os.rename(tmpfile, destination)
103 debug("removed {}".format(tmpfile))
107 def get_sources(path, sources_list, logger):
112 verbose = logger.info
114 repo = get_repo_name(path)
116 verbose('no metadata file in "{}".'.format(path))
119 for k, v in parse_metadatafile(path, repo).items():
120 filename = os.path.join(path, k)
122 check_file(filename, v)
125 for sources in sources_list:
126 repo_root = "{}/{}".format(sources, repo)
127 url = get_url(repo_root, v)
129 debug("retrieving {} to {}".format(url, filename))
130 download(url, filename, v)
131 verbose('retrieved "{}"'.format(k))
135 raise Exception('File "{}" not found'.format(v))