3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
7 # http://www.apache.org/licenses/LICENSE-2.0
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
20 return json.dumps(data, sort_keys=True, indent=4)
23 class CsvConverter(object):
24 def __init__(self, data, preferred_field_order=None, escape_newlines=True):
26 self.preferred_field_order = preferred_field_order
27 self.escape_newlines = escape_newlines
35 return self._render(CsvFormatter(self.csv_data))
37 def convert_to_ms_excel(self, text_fields=None):
39 CSV that Microsoft Excel can read well.
41 :param text_fields: list of columns to mark as text
42 NOTE: must not be used for fields that can contain comma(,) or
43 semicolon(;) as field will be split from these
46 return self._render(CsvMSFormatter(self.csv_data, text_fields=text_fields))
49 if not isinstance(self.data, list):
50 raise Exception('Input data given is NOT a list')
54 if not isinstance(self.data[0], dict):
55 raise Exception('First data element is NOT a dict')
57 possible_fields = list(set([key for i in self.data for key in i.keys()]))
58 if self.preferred_field_order is not None:
59 for preferred_field in self.preferred_field_order:
60 if preferred_field in possible_fields:
61 headers.append(preferred_field)
62 possible_fields.remove(preferred_field)
63 headers += sorted(possible_fields)
64 self.csv_data = [headers]
67 for header in headers:
68 field = obj.get(header)
69 if isinstance(field, (list, dict)):
70 x = json.dumps(field, sort_keys=True)
71 elif isinstance(field, unicode):
72 x = field.encode('utf-8')
76 self.csv_data.append(row_data)
78 def _render(self, formatter):
79 return formatter.format(self.escape_newlines)
82 class CsvFormatter(object):
83 def __init__(self, csv_data):
84 self.csv_data = csv_data
86 def format(self, escape_newlines=True):
88 for record in self.csv_data:
91 f_field = self._field_formatter(field, escape_newlines)
92 f_record.append(f_field)
93 f_file.append(','.join(self._record_formatter(f_record)))
94 return '\r\n'.join(self._file_formatter(f_file))
97 def _file_formatter(_file):
101 def _record_formatter(record):
102 return ['"{}"'.format(i) for i in record]
105 def _field_formatter(field, escape_newlines):
106 out = field.replace('"', '""')
108 out = out.replace('\n', '\\n')
112 class CsvMSFormatter(CsvFormatter):
113 max_cell_size = 32000
115 def __init__(self, csv_data, text_fields=None):
116 super(CsvMSFormatter, self).__init__(csv_data)
117 self.text_fields = text_fields
119 def _file_formatter(self, _file):
120 return ['sep=,'] + super(CsvMSFormatter, self)._file_formatter(_file)
122 def _record_formatter(self, record):
123 record = super(CsvMSFormatter, self)._record_formatter(record)
125 formatted_record = []
126 for index, field in enumerate(record):
127 heading = self.csv_data[0][index]
128 if heading in self.text_fields:
129 formatted_field = '=' + field
131 formatted_field = field
132 formatted_record.append(formatted_field)
133 record = formatted_record
136 def _field_formatter(self, field, escape_newlines):
137 field = super(CsvMSFormatter, self)._field_formatter(field, escape_newlines)
138 if len(field) > self.max_cell_size:
139 field = field[:self.max_cell_size / 2] + "..." + field[-self.max_cell_size / 2:]
140 if not re.match(r'^-\d+$', field) and re.match(r'^-.*$', field):
141 return r'\{}'.format(field)