4 # Licensed under the Apache License, Version 2.0 (the "License");
5 # you may not use this file except in compliance with the License.
6 # You may obtain a copy of the License at
8 # http://www.apache.org/licenses/LICENSE-2.0
10 # Unless required by applicable law or agreed to in writing, software
11 # distributed under the License is distributed on an "AS IS" BASIS,
12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 # See the License for the specific language governing permissions and
14 # limitations under the License.
16 # pylint: disable=too-many-instance-attributes,too-many-arguments
24 from pprint import pformat
28 from tools.rpm import RpmInfoParser
29 from tools.utils import apply_jenkins_auth
30 from tools.yum import Yum, YumInfoParser
31 from tools.repository import RepositoryConfig
32 from tools.log import set_logging
33 from tools.io import read_from, write_to, read_json
34 from tools.convert import to_json, CsvConverter
36 LOCAL_REPOS = ['localrepo', 'anaconda']
39 class RpmDataBuilder(object):
40 def __init__(self, build_config, yum_info_installed, rpm_info_installed,
41 crypto_info_installed, boms, remote=False):
43 self.yum_info_installed = yum_info_installed
44 self.rpm_info_installed = rpm_info_installed
45 self.crypto_info_installed = json.loads(crypto_info_installed)
47 logging.debug('BOMS: {}'.format(pformat(self.boms)))
48 self.repoconfig = RepositoryConfig(build_config)
49 self.installed_rpms = None
53 self.installed_rpms = self.read_installed_rpms()
54 srpms = set([rpm['Source RPM'] for rpm in self.installed_rpms])
55 logging.info('Installed RPMs:{} SRPMs:{}'.format(len(self.installed_rpms), len(srpms)))
56 self.repos = self._read_configured_repos()
57 logging.info('Configured repos: {}'.format(len(self.repos)))
58 available_rpms = self._read_available_rpms(self.repos)
59 logging.info('Found {} available RPMs in binary repos'.format(
60 len([rpm for repo_rpms in available_rpms.values() for rpm in repo_rpms])))
61 for i_rpm in self.installed_rpms:
62 i_rpm_repo_name = self._get_rpm_available_in(i_rpm, available_rpms)
63 i_rpm['Repo data'] = self._get_repo(i_rpm_repo_name)
64 i_rpm['Obsoletes'] = self._resolve_obsoletes(i_rpm)
65 i_rpm['Crypto capable'] = self._resolve_ecc(str(i_rpm))
66 i_rpm['BOM'] = self._resolve_bom(i_rpm)
67 self._log_repo_rpm_statistics()
68 self._log_rpm_statistics()
69 return self.installed_rpms
72 def _resolve_obsoletes(rpm):
73 if 'Obsoletes' not in rpm:
75 elif rpm['Obsoletes'] == '(none)':
77 return rpm['Obsoletes']
79 def _resolve_ecc(self, rpm):
80 for item in self.crypto_info_installed:
81 if item['name'] == rpm:
85 def _resolve_bom(self, rpm):
86 bom_content = self.boms.get(str(rpm))
87 if bom_content is None:
89 self._validate_bom(str(rpm), bom_content)
90 return bom_content['bom']
93 def _validate_bom(rpm_name, bom_content):
95 if 'bom' not in bom_content:
96 raise Exception('BOM base object "bom" missing')
97 bom = bom_content['bom']
99 for key in ['name', 'version', 'source-url', 'foss']:
100 if key not in material:
101 raise Exception('Key "{}" not found in BOM'.format(key))
102 if material['foss'].lower() not in ['yes', 'no', 'modified']:
103 raise Exception('BOM foss value not valid')
104 missing_crypto_count = len([material for material in bom if
105 'crypto-capable' not in material])
106 if missing_crypto_count != 0:
108 'crypto-capable missing from %s materials in RPM %s',
109 missing_crypto_count, rpm_name)
110 except Exception as e:
111 correct_format = {'bom': [
112 {'name': '<component-name>',
113 'version': '<component-version>',
114 'source-url': '<source-url>',
115 'foss': '<yes/no/modified>',
116 'crypto-capable': '<true/false (OPTIONAL)>'}]}
117 msg_fmt = 'BOM for {rpm} is not correct format. {error}:\n{correct_format}'
118 raise Exception(msg_fmt.format(rpm=rpm_name,
120 correct_format=pformat(correct_format)))
122 def _get_repo(self, name):
124 if r['name'] == name:
126 raise Exception('No repository found with name: {}'.format(name))
128 def read_installed_rpms(self):
130 yum_rpms = YumInfoParser().parse_installed(self.yum_info_installed)
131 rpm_rpms = RpmInfoParser().parse_multiple(self.rpm_info_installed)
132 self._validate_rpm_lists_identical(yum_rpms, rpm_rpms)
133 yum_rpms_dict = {rpm['Name']: rpm for rpm in yum_rpms}
134 for rpm_data in rpm_rpms:
135 yum_data = yum_rpms_dict[rpm_data['Name']]
136 combined_data = self._combine_rpm_data(rpm_data, yum_data)
137 installed_rpms.append(combined_data)
138 logging.debug('One parsed RPM data as example:\n{}'.format(pformat(installed_rpms[0])))
139 return installed_rpms
141 def _combine_rpm_data(self, rpm_data, yum_data):
142 combined_data = copy.deepcopy(rpm_data)
143 fields_known_to_differ = ['Description', # May contain deffering newline and indentation
144 'Size'] # Bytes in RPM, humanreadable in yum
145 yum2rpm_field_name_map = {'Arch': 'Architecture'}
146 for yum_key in yum_data:
147 if yum_key in yum2rpm_field_name_map:
148 rpm_key = yum2rpm_field_name_map[yum_key]
151 if rpm_key in combined_data:
152 yum_comparable_rpm_string = self._rpm_info_str_to_yum_info_str(
153 combined_data[rpm_key])
154 if yum_comparable_rpm_string != yum_data[yum_key]:
155 if rpm_key in fields_known_to_differ:
158 'RPM data in "{}" not match in rpm "{}" vs yum "{}" for package {}'.format(
160 repr(combined_data[rpm_key]),
161 repr(yum_data[yum_key]),
164 combined_data[rpm_key] = yum_data[yum_key]
168 def _rpm_info_str_to_yum_info_str(string):
171 except (UnicodeEncodeError, UnicodeDecodeError):
172 return re.sub(r'[^\x00-\x7F]+', '?', string)
173 except Exception as e:
174 logging.error('{}: for string {}'.format(str(e), repr(string)))
179 def _validate_rpm_lists_identical(yum_rpms, rpm_rpms):
180 yum_rpms_dict = {rpm['Name']: rpm for rpm in yum_rpms}
181 rpm_rpms_dict = {rpm['Name']: rpm for rpm in rpm_rpms}
182 if len(yum_rpms) != len(rpm_rpms):
184 'Given RPM lists are unequal: yum RPM count {} != rpm RPM count {}'.format(
185 len(yum_rpms), len(rpm_rpms)))
186 assert sorted(yum_rpms_dict.keys()) == sorted(rpm_rpms_dict.keys())
187 for name in yum_rpms_dict.keys():
188 if not yum_rpms_dict[name].is_same_package_as(rpm_rpms_dict[name]):
190 'Packages are not same: yum {} != rpm {}'.format(yum_rpms_dict[name],
191 rpm_rpms_dict[name]))
193 def _read_configured_repos(self):
194 repos = self.repoconfig.read_sections(
195 ['baseimage-repositories', 'repositories'])
196 if 'BUILD_URL' in os.environ:
197 repos.append(self.repoconfig.get_localrepo(remote=True))
199 repos.append(self.repoconfig.get_localrepo(remote=False))
200 logging.debug('Configured repos: {}'.format(pformat(repos)))
203 def _read_available_rpms(self, repos):
204 Yum.clean_and_remove_cache()
208 if name == 'localrepo':
210 url = self.repoconfig.get_localrepo(remote=True)['baseurl']
211 yum.add_repo(name, apply_jenkins_auth(url))
213 url = self.repoconfig.get_localrepo(remote=False)['baseurl']
214 yum.add_repo(name, url)
216 yum.add_repo(name, repo['baseurl'])
217 yum_available_output = yum.read_all_packages()
218 available_rpms = YumInfoParser().parse_available(yum_available_output)
220 for rpm in available_rpms:
221 repo = rpm.get('Repo')
222 if repo not in rpms_per_repo:
223 rpms_per_repo[repo] = []
224 rpms_per_repo[repo].append(rpm)
227 def _log_repo_rpm_statistics(self):
228 logging.info('--- RPM repo statistics ---')
229 for repo in self.repos:
231 repo_url = repo['baseurl']
232 if name in [r['name'] for r in self._get_nonerepos()]:
233 expected_from_repo = None
235 expected_from_repo = name
236 repo_installed_rpm_count = len([rpm for rpm in self.installed_rpms if
237 rpm['Repo data']['baseurl'] == repo_url and rpm.get(
238 'From repo') == expected_from_repo])
240 'RPMs installed from repo "{}": {}'.format(name, repo_installed_rpm_count))
241 if repo_installed_rpm_count is 0:
243 'Repository configured but no RPMs installed: {}={}'.format(name, repo_url))
245 return self.installed_rpms
247 def _log_rpm_statistics(self):
248 def _get_count(func):
249 return len([rpm for rpm in self.installed_rpms if func(rpm)])
251 logging.info('----- RPMs per type -----')
252 logging.info(' => Total: %s', len(self.installed_rpms))
253 logging.info('----- RPMs per attribute -----')
254 logging.info(' * Crypto capable: %s', _get_count(lambda rpm: rpm['Crypto capable']))
255 logging.info(' * Complex (BOM): %s', _get_count(lambda rpm: rpm['BOM']))
257 def _get_rpm_available_in(self, rpm, available_rpms):
258 if 'From repo' in rpm.keys():
259 if rpm['From repo'] in LOCAL_REPOS:
261 available_repo_rpms = available_rpms[rpm['From repo']]
262 for a_rpm in available_repo_rpms:
263 if self._is_same_rpm(a_rpm, rpm):
264 return rpm['From repo']
265 rpms_in_matching_repo = [str(a_rpm) for a_rpm in available_repo_rpms]
266 rpms_with_matching_name = [str(a_rpm) for a_rpm in available_repo_rpms if
267 rpm['Name'] == a_rpm['Name']]
268 if len(rpms_in_matching_repo) <= 1000:
270 'Available RPMs in {}: {}'.format(rpm['From repo'], rpms_in_matching_repo))
271 error_str = 'RPM "{}" is not available in configured repo: {}, ' \
272 'RPMs with correct name: {}'.format(str(rpm), rpm['From repo'],
273 rpms_with_matching_name)
274 raise Exception(error_str)
276 none_repos = self._get_nonerepos()
277 for repo in [r['name'] for r in none_repos]:
278 for a_rpm in available_rpms[repo]:
279 if self._is_same_rpm(a_rpm, rpm):
281 msg = 'RPM "{}" is not available in any configured "none*" repos: {}'.format(
282 rpm['Name'], none_repos)
285 def _get_nonerepos(self):
286 return [repo for repo in self.repos if re.match(r'^none\d+$', repo['name'])]
289 def _is_same_rpm(rpm1, rpm2):
290 return rpm1['Name'] == rpm2['Name'] and \
291 rpm1['Version'] == rpm2['Version'] and \
292 rpm1['Release'] == rpm2['Release'] and \
293 rpm1['Arch'] == rpm2['Architecture']
297 p = argparse.ArgumentParser(
298 description='Generate package info',
299 formatter_class=argparse.ArgumentDefaultsHelpFormatter)
300 p.add_argument('--verbose', '-v', action='store_true',
301 help='More verbose logging')
302 p.add_argument('--yum-info-path', required=True,
303 help='"yum info all" output as file')
304 p.add_argument('--rpm-info-path', required=True,
305 help='"rpm -qai" output as file')
306 p.add_argument('--crypto-info-path',
307 help='Dir from where to find ECC file')
308 p.add_argument('--boms-path',
309 help='Dir from where to find RPM bill of material files')
310 p.add_argument('--output-rpmlist',
311 help='output as rpm list like "rpm-qa"')
312 p.add_argument('--output-json',
313 help='output json file path')
314 p.add_argument('--output-csv',
315 help='output csv file path')
316 p.add_argument('--output-ms-csv',
317 help='output Microsoft Excel compatible csv file path')
318 p.add_argument('--build-config-path', required=True,
319 help='Build configuration ini path')
320 p.add_argument('--remote', action='store_true',
321 help='Read localrepo from remote defined by BUILD_URL, '
322 'otherwise use localrepo from WORKSPACE')
323 args = p.parse_args(args)
327 def read_files(boms_dir):
329 for f in os.listdir(boms_dir):
330 boms[f] = read_json(boms_dir + '/' + f)
334 def main(input_args):
335 args = parse(input_args)
337 set_logging(debug=True, timestamps=True)
339 set_logging(debug=False)
340 rpmdata = RpmDataBuilder(args.build_config_path,
341 read_from(args.yum_info_path),
342 read_from(args.rpm_info_path),
343 read_from(args.crypto_info_path),
344 read_files(args.boms_path),
345 remote=args.remote).run()
346 if args.output_rpmlist:
347 write_to(args.output_rpmlist, '\n'.join(sorted([str(rpm) for rpm in rpmdata])))
349 write_to(args.output_json, to_json(rpmdata))
350 csv = CsvConverter(rpmdata, preferred_field_order=['Name', 'Version', 'Release',
351 'License', 'Vendor', 'From repo',
354 write_to(args.output_csv, str(csv))
355 if args.output_ms_csv:
356 write_to(args.output_ms_csv,
357 csv.convert_to_ms_excel(text_fields=['Version', 'Size', 'Release']))
358 if not args.output_json and not args.output_csv:
362 if __name__ == "__main__":