Add initial code
[ta/build-tools.git] / tools / script / ci_build_diff.py
1 #!/usr/bin/env python
2 # Copyright 2019 Nokia
3 #
4 # Licensed under the Apache License, Version 2.0 (the "License");
5 # you may not use this file except in compliance with the License.
6 # You may obtain a copy of the License at
7 #
8 #     http://www.apache.org/licenses/LICENSE-2.0
9 #
10 # Unless required by applicable law or agreed to in writing, software
11 # distributed under the License is distributed on an "AS IS" BASIS,
12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 # See the License for the specific language governing permissions and
14 # limitations under the License.
15
16 import logging
17 import sys
18 import argparse
19 from operator import itemgetter
20 from pprint import pformat
21
22 from tools.convert import CsvConverter
23 from tools.io import read_json, write_to, write_json
24 from tools.log import set_logging
25
26
27 class SwComponent(dict):
28
29     @property
30     def name(self):
31         return self['Name']
32
33     @property
34     def version(self):
35         return self['Version']
36
37     @property
38     def foss_id(self):
39         return self.name, self.version, self.get('Release')
40
41     def __str__(self):
42         return '{}:{}({})'.format(self.name,
43                                   self.version,
44                                   self['Source RPM'])
45
46
47 class BuildDiffReader(object):
48
49     def __init__(self):
50         self.changes = {}
51         self.summary = {}
52
53     @staticmethod
54     def get_component_names(data):
55         return [i['Name'] for i in data]
56
57     @staticmethod
58     def get_components(name, components):
59         out = []
60         for component in components:
61             if component['Name'] == name:
62                 out.append(SwComponent(component))
63         return sorted(out, key=itemgetter('Name', 'Version', 'Source RPM'))
64
65     def read(self, json_old, json_new):
66         old_build = read_json(json_old)
67         new_build = read_json(json_new)
68         self.summary['input'] = {}
69         self.summary['input']['from'] = len(old_build)
70         self.summary['input']['to'] = len(new_build)
71         self.changes = self.read_build_diff(old_build, new_build)
72         self.summary['output'] = self._generate_summary(self.changes)
73
74     @staticmethod
75     def _generate_summary(changes):
76         summary = {}
77         summary['counts'] = changes['counts']
78         summary['added'] = {name: [str(c) for c in compos] for name, compos in
79                             changes['added'].items()}
80         summary['removed'] = {name: [str(c) for c in compos] for name, compos in
81                               changes['removed'].items()}
82         summary['changed'] = {name: {'old': [str(c) for c in change['old']],
83                                      'new': [str(c) for c in change['new']]} for name, change in
84                               changes['changed'].items()}
85         return summary
86
87     def read_build_diff(self, old_build, new_build):
88         old_names = self.get_component_names(old_build)
89         logging.debug('Old names: {}'.format(old_names))
90         new_names = self.get_component_names(new_build)
91         logging.debug('New names: {}'.format(new_names))
92         added = {n: self.get_components(n, new_build) for n in set(new_names) - set(old_names)}
93         self._mark('[MARK] added', [j for i in added.values() for j in i])
94         removed = {n: self.get_components(n, old_build) for n in set(old_names) - set(new_names)}
95         self._mark('[MARK] removed', [j for i in removed.values() for j in i])
96         changed = {}
97         for n in set(old_names) & set(new_names):
98             old_components = self.get_components(n, old_build)
99             new_components = self.get_components(n, new_build)
100             if sorted([i.foss_id for i in old_components]) != \
101                     sorted([i.foss_id for i in new_components]):
102                 changed[n] = {'old': old_components, 'new': new_components}
103                 self._mark('[MARK] changed old', changed[n]['old'])
104                 self._mark('[MARK] changed new', changed[n]['new'])
105         return dict(counts=dict(added=len(added),
106                                 changed=len(changed),
107                                 removed=len(removed)),
108                     added=added,
109                     removed=removed,
110                     changed=changed)
111
112     @staticmethod
113     def _mark(title, components):
114         logging.debug(
115             '[MARK] {}: {}'.format(title, pformat([i.foss_id for i in components])))
116
117     @staticmethod
118     def _get_csv_cells(name, old_components, new_components):
119         cells = dict(name=name)
120         if old_components:
121             cells.update(dict(old_components='\n'.join([str(i) for i in old_components]),
122                               old_srpms='\n'.join([i['Source RPM'] for i in old_components]),
123                               old_licenses='\n'.join(
124                                   [i.get('License', 'Unknown') for i in old_components])))
125         if new_components:
126             cells.update(dict(new_components='\n'.join([str(i) for i in new_components]),
127                               new_srpms='\n'.join([i['Source RPM'] for i in new_components]),
128                               new_licenses='\n'.join(
129                                   [i.get('License', 'Unknown') for i in new_components])))
130         return cells
131
132     def write_csv(self, path):
133         data = []
134         for name, components in self.changes['added'].items():
135             data += [self._get_csv_cells(name, [], components)]
136
137         for name, components in self.changes['removed'].items():
138             data += [self._get_csv_cells(name, components, [])]
139
140         for name, components in self.changes['changed'].items():
141             data += [self._get_csv_cells(name, components['old'], components['new'])]
142
143         csv = CsvConverter(sorted(data, key=itemgetter('name')),
144                            preferred_field_order=['name',
145                                                   'old_components', 'old_srpms', 'old_licenses',
146                                                   'new_components', 'new_srpms', 'new_licenses'],
147                            escape_newlines=False)
148         write_to(path, csv.convert_to_ms_excel())
149
150
151 def parse(args):
152     parser = argparse.ArgumentParser(description='Outputs RPM changes between two CI builds')
153     parser.add_argument('--verbose', '-v', action='store_true',
154                         help='More verbose logging')
155     parser.add_argument('components_json_1',
156                         help='Components json file path (CI build artifact)')
157     parser.add_argument('components_json_2',
158                         help='Components json file path (CI build artifact)')
159     parser.add_argument('--output-json',
160                         help='output to json file')
161     parser.add_argument('--output-csv',
162                         help='output to $MS csv file')
163     return parser.parse_args(args)
164
165
166 def main(input_args):
167     args = parse(input_args)
168     set_logging(debug=args.verbose)
169     x = BuildDiffReader()
170     x.read(args.components_json_1, args.components_json_2)
171     logging.info('----- SUMMARY ------\n{}'.format(pformat(x.summary)))
172     if args.output_json:
173         write_json(args.output_json, x.changes)
174     if args.output_csv:
175         x.write_csv(args.output_csv)
176
177
178 if __name__ == '__main__':
179     main(sys.argv[1:])