Pin pip to 20.3.3 and disable tmpfs in DIB
[ta/build-tools.git] / tools / convert.py
1 # Copyright 2019 Nokia
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 #     http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 import json
16 import re
17
18
19 def to_json(data):
20     return json.dumps(data, sort_keys=True, indent=4)
21
22
23 class CsvConverter(object):
24     def __init__(self, data, preferred_field_order=None, escape_newlines=True):
25         self.data = data
26         self.preferred_field_order = preferred_field_order
27         self.escape_newlines = escape_newlines
28         self.csv_data = None
29         self._convert()
30
31     def __str__(self):
32         return self.convert()
33
34     def convert(self):
35         return self._render(CsvFormatter(self.csv_data))
36
37     def convert_to_ms_excel(self, text_fields=None):
38         """
39         CSV that Microsoft Excel can read well.
40
41         :param text_fields: list of columns to mark as text
42                             NOTE: must not be used for fields that can contain comma(,) or
43                             semicolon(;) as field will be split from these
44         :return:
45         """
46         return self._render(CsvMSFormatter(self.csv_data, text_fields=text_fields))
47
48     def _convert(self):
49         if not isinstance(self.data, list):
50             raise Exception('Input data given is NOT a list')
51         if not self.data:
52             self.csv_data = []
53             return
54         if not isinstance(self.data[0], dict):
55             raise Exception('First data element is NOT a dict')
56         headers = []
57         possible_fields = list(set([key for i in self.data for key in i.keys()]))
58         if self.preferred_field_order is not None:
59             for preferred_field in self.preferred_field_order:
60                 if preferred_field in possible_fields:
61                     headers.append(preferred_field)
62                     possible_fields.remove(preferred_field)
63         headers += sorted(possible_fields)
64         self.csv_data = [headers]
65         for obj in self.data:
66             row_data = []
67             for header in headers:
68                 field = obj.get(header)
69                 if isinstance(field, (list, dict)):
70                     x = json.dumps(field, sort_keys=True)
71                 elif isinstance(field, unicode):
72                     x = field.encode('utf-8')
73                 else:
74                     x = str(field)
75                 row_data.append(x)
76             self.csv_data.append(row_data)
77
78     def _render(self, formatter):
79         return formatter.format(self.escape_newlines)
80
81
82 class CsvFormatter(object):
83     def __init__(self, csv_data):
84         self.csv_data = csv_data
85
86     def format(self, escape_newlines=True):
87         f_file = []
88         for record in self.csv_data:
89             f_record = []
90             for field in record:
91                 f_field = self._field_formatter(field, escape_newlines)
92                 f_record.append(f_field)
93             f_file.append(','.join(self._record_formatter(f_record)))
94         return '\r\n'.join(self._file_formatter(f_file))
95
96     @staticmethod
97     def _file_formatter(_file):
98         return _file
99
100     @staticmethod
101     def _record_formatter(record):
102         return ['"{}"'.format(i) for i in record]
103
104     @staticmethod
105     def _field_formatter(field, escape_newlines):
106         out = field.replace('"', '""')
107         if escape_newlines:
108             out = out.replace('\n', '\\n')
109         return out
110
111
112 class CsvMSFormatter(CsvFormatter):
113     max_cell_size = 32000
114
115     def __init__(self, csv_data, text_fields=None):
116         super(CsvMSFormatter, self).__init__(csv_data)
117         self.text_fields = text_fields
118
119     def _file_formatter(self, _file):
120         return ['sep=,'] + super(CsvMSFormatter, self)._file_formatter(_file)
121
122     def _record_formatter(self, record):
123         record = super(CsvMSFormatter, self)._record_formatter(record)
124         if self.text_fields:
125             formatted_record = []
126             for index, field in enumerate(record):
127                 heading = self.csv_data[0][index]
128                 if heading in self.text_fields:
129                     formatted_field = '=' + field
130                 else:
131                     formatted_field = field
132                 formatted_record.append(formatted_field)
133             record = formatted_record
134         return record
135
136     def _field_formatter(self, field, escape_newlines):
137         field = super(CsvMSFormatter, self)._field_formatter(field, escape_newlines)
138         if len(field) > self.max_cell_size:
139             field = field[:self.max_cell_size / 2] + "..." + field[-self.max_cell_size / 2:]
140         if not re.match(r'^-\d+$', field) and re.match(r'^-.*$', field):
141             return r'\{}'.format(field)
142         return field