gerbv  2.6A
csv.c
Go to the documentation of this file.
1 /* csv - read write comma separated value format
2  * Copyright (c) 2003 Michael B. Allen <mba2000 ioplex.com>
3  *
4  * The MIT License
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included
14  * in all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22  * OTHER DEALINGS IN THE SOFTWARE.
23  */
24 
25 /* We (Juergen Haas and Tomasz Motylewski) execute our rights given above
26  * to distribute and sublicence this file (csv.c) and csv.h, csv_defines.h
27  * under General Pulic Licence version 2 or any later version.
28  *
29  * This file is derived from libmba : A library of generic C modules
30  * http://www.ioplex.com/~miallen/libmba/dl/libmba-0.8.9.tar.gz
31  */
32 
38 #ifdef HAVE_CONFIG_H
39 #include <config.h>
40 #endif /* HAVE_CONFIG_H */
41 
42 
43 #include <stdlib.h>
44 #include <string.h>
45 #include <stdio.h>
46 #include <ctype.h>
47 #include <errno.h>
48 #include <wchar.h>
49 #include <wctype.h>
50 
51 #include "common.h"
52 #include "gerbv.h"
53 #include "csv.h"
54 #include "csv_defines.h"
55 #define ST_START 1
56 #define ST_COLLECT 2
57 #define ST_TAILSPACE 3
58 #define ST_END_QUOTE 4
59 #define istspace iswspace
60 
61 
62 struct sinput {
63  FILE *in;
64  const char *src;
65  size_t sn;
66  size_t count;
67 };
68 
69 
70 struct winput {
71  const wchar_t *src;
72  size_t sn;
73  size_t count;
74 };
75 
76 
77 static int
78 snextch(struct sinput *in)
79 {
80  int ch;
81 
82  if (in->in) {
83  if ((ch = fgetc(in->in)) == EOF) {
84  if (ferror(in->in)) {
85  GERB_MESSAGE("errno:%d", errno);
86  return -1;
87  }
88  return 0;
89  }
90  } else {
91  if (in->sn == 0) {
92  return 0;
93  }
94  ch = (unsigned char) *(in->src)++;
95  in->sn--;
96  }
97  in->count++;
98 
99  return ch;
100 }/* snextch */
101 
102 
103 static int
104 wnextch(struct winput *in)
105 {
106  int ch;
107 
108  if (in->sn == 0) {
109  return 0;
110  }
111  ch = *(in->src)++;
112  in->sn--;
113  in->count++;
114 
115  return ch;
116 }/* wnextch */
117 
118 static int
119 csv_parse_str(struct sinput *in, char *buf, size_t bn, char *row[], int rn, int sep, int flags)
120 {
121  int trim, quotes, ch, state, r, j, t, inquotes;
122 
123  trim = flags & CSV_TRIM;
124  quotes = flags & CSV_QUOTES;
125  state = ST_START;
126  inquotes = 0;
127  ch = r = j = t = 0;
128 
129  memset(row, 0, sizeof(char *) * rn);
130 
131  while (rn && bn && (ch = snextch(in)) > 0) {
132  switch (state) {
133  case ST_START:
134  if (ch != '\n' && ch != sep && isspace(ch)) {
135  if (!trim) {
136  buf[j++] = ch; bn--;
137  t = j;
138  }
139  break;
140  } else if (quotes && ch == '"') {
141  j = t = 0;
142  state = ST_COLLECT;
143  inquotes = 1;
144  break;
145  }
146  state = ST_COLLECT;
147  case ST_COLLECT:
148  if (inquotes) {
149  if (ch == '"') {
150  state = ST_END_QUOTE;
151  break;
152  }
153  } else if (ch == sep || ch == '\n') {
154  row[r++] = buf; rn--;
155  buf[t] = '\0'; bn--;
156  buf += t + 1;
157  j = t = 0;
158 
159  state = ST_START;
160  inquotes = 0;
161  if (ch == '\n') {
162  rn = 0;
163  }
164  break;
165  } else if (quotes && ch == '"') {
166  errno = EILSEQ;
167  GERB_MESSAGE(_("%d: unexpected quote in element"),errno);
168  return -1;
169  }
170  buf[j++] = ch; bn--;
171  if (!trim || isspace(ch) == 0) {
172  t = j;
173  }
174  break;
175  case ST_TAILSPACE:
176  case ST_END_QUOTE:
177  if (ch == sep || ch == '\n') {
178  row[r++] = buf; rn--;
179  buf[j] = '\0'; bn--;
180  buf += j + 1;
181  j = t = 0;
182  state = ST_START;
183  inquotes = 0;
184  if (ch == '\n') {
185  rn = 0;
186  }
187  break;
188  } else if (quotes && ch == '"' && state != ST_TAILSPACE) {
189  buf[j++] = '"'; bn--; /* nope, just an escaped quote */
190  t = j;
191  state = ST_COLLECT;
192  break;
193  } else if (isspace(ch)) {
194  state = ST_TAILSPACE;
195  break;
196  }
197  errno = EILSEQ;
198  GERB_MESSAGE(_("%d: bad end quote in element"), errno);
199  return -1;
200  }
201  }
202  if (ch <= 0) {
203  /* treat EOF as EOL, so the last record is accepted even when
204  \n is not present. Some users parse strings, not lines */
205  if(state == ST_TAILSPACE || state == ST_END_QUOTE
206  || (state == ST_COLLECT && ! inquotes)) {
207  row[r++] = buf; rn--;
208  buf[j] = '\0'; bn--;
209  buf += j + 1;
210  inquotes = 0;
211  rn = 0;
212  } else {
213  // AMSG("");
214  return -1;
215  }
216  }
217  if (bn == 0) {
218  errno = E2BIG;
219  GERB_MESSAGE("E2BIG %d ", errno);
220  return -1;
221  }
222  if (rn) {
223  if (inquotes) {
224  errno = EILSEQ;
225  GERB_MESSAGE("EILSEQ %d ", errno);
226  return -1;
227  }
228  row[r] = buf;
229  buf[t] = '\0';
230  }
231  // return error if we can't read the minimum number of fields
232  if (r < 4) {
233  return -1;
234  }
235  return in->count;
236 }/* csv_parse_str */
237 
238 
239 static int
240 csv_parse_wcs(struct winput *in, wchar_t *buf, size_t bn, wchar_t *row[], int rn, wint_t sep, int flags)
241 {
242  int trim, quotes, state, r, j, t, inquotes;
243  wint_t ch;
244 
245  trim = flags & CSV_TRIM;
246  quotes = flags & CSV_QUOTES;
247  state = ST_START;
248  inquotes = 0;
249  ch = r = j = t = 0;
250 
251  memset(row, 0, sizeof(wchar_t *) * rn);
252 
253  while (rn && bn && (ch = wnextch(in)) > 0) {
254  switch (state) {
255  case ST_START:
256  if (ch != L'\n' && ch != sep && iswspace(ch)) {
257  if (!trim) {
258  buf[j++] = ch; bn--;
259  t = j;
260  }
261  break;
262  } else if (quotes && ch == L'"') {
263  j = t = 0;
264  state = ST_COLLECT;
265  inquotes = 1;
266  break;
267  }
268  state = ST_COLLECT;
269  case ST_COLLECT:
270  if (inquotes) {
271  if (ch == L'"') {
272  state = ST_END_QUOTE;
273  break;
274  }
275  } else if (ch == sep || ch == L'\n') {
276  row[r++] = buf; rn--;
277  buf[t] = L'\0'; bn--;
278  buf += t + 1;
279  j = t = 0;
280  state = ST_START;
281  inquotes = 0;
282  if (ch == L'\n') {
283  rn = 0;
284  }
285  break;
286  } else if (quotes && ch == L'"') {
287  errno = EILSEQ;
288  GERB_MESSAGE(_("%d: unexpected quote in element"), errno);
289  return -1;
290  }
291  buf[j++] = ch; bn--;
292  if (!trim || iswspace(ch) == 0) {
293  t = j;
294  }
295  break;
296  case ST_TAILSPACE:
297  case ST_END_QUOTE:
298  if (ch == sep || ch == L'\n') {
299  row[r++] = buf; rn--;
300  buf[j] = L'\0'; bn--;
301  buf += j + 1;
302  j = t = 0;
303  state = ST_START;
304  inquotes = 0;
305  if (ch == L'\n') {
306  rn = 0;
307  }
308  break;
309  } else if (quotes && ch == L'"' && state != ST_TAILSPACE) {
310  buf[j++] = L'"'; bn--; /* nope, just an escaped quote */
311  t = j;
312  state = ST_COLLECT;
313  break;
314  } else if (iswspace(ch)) {
315  state = ST_TAILSPACE;
316  break;
317  }
318  errno = EILSEQ;
319  GERB_MESSAGE(_("%d: bad end quote in element "), errno);
320  return -1;
321  }
322  }
323  if (ch <= 0) {
324  /* treat EOF as EOL, so the last record is accepted even when
325  \n is not present. Some users parse strings, not lines */
326  if(state == ST_TAILSPACE || state == ST_END_QUOTE
327  || (state == ST_COLLECT && ! inquotes)) {
328  row[r++] = buf; rn--;
329  buf[j] = L'\0'; bn--;
330  buf += j + 1;
331  inquotes = 0;
332  rn = 0;
333  } else {
334  // AMSG("");
335  return -1;
336  }
337  }
338  if (bn == 0) {
339  errno = E2BIG;
340  GERB_MESSAGE("%d", errno);
341  return -1;
342  }
343  if (rn) {
344  if (inquotes) {
345  errno = EILSEQ;
346  GERB_MESSAGE("%d", errno);
347  return -1;
348  }
349  row[r] = buf;
350  buf[t] = L'\0';
351  }
352 
353  return in->count;
354 }/*csv_row_parse_wcs*/
355 
356 
357 int
358 csv_row_parse_wcs(const wchar_t *src, size_t sn, wchar_t *buf, size_t bn, wchar_t *row[], int rn, int sep, int trim)
359 {
360  struct winput input;
361  input.src = src;
362  input.sn = sn;
363  input.count = 0;
364  return csv_parse_wcs(&input, buf, bn, row, rn, (wint_t)sep, trim);
365 }/*csv_row_parse_wcs*/
366 
367 
368 int
369 csv_row_parse_str(const char *src, size_t sn, char *buf, size_t bn, char *row[], int rn, int sep, int trim)
370 {
371  struct sinput input;
372  input.in = NULL;
373  input.src = src;
374  input.sn = sn;
375  input.count = 0;
376  return csv_parse_str(&input, buf, bn, row, rn, sep, trim);
377 }/*csv_row_parse_str*/
378 
379 
380 int
381 csv_row_fread(FILE *in, char *buf, size_t bn, char *row[], int numcols, int sep, int trim)
382 {
383  struct sinput input;
384  input.in = in;
385  input.count = 0;
386  return csv_parse_str(&input, buf, bn, row, numcols, sep, trim);
387 }/*csv_row_fread*/
388