csv

csv reading library
git clone git://bvnf.space/csv.git
Log | Files | Refs

libcsv.c (3646B)


      1
      2
      3
      4
      5
      6
      7
      8
      9
     10
     11
     12
     13
     14
     15
     16
     17
     18
     19
     20
     21
     22
     23
     24
     25
     26
     27
     28
     29
     30
     31
     32
     33
     34
     35
     36
     37
     38
     39
     40
     41
     42
     43
     44
     45
     46
     47
     48
     49
     50
     51
     52
     53
     54
     55
     56
     57
     58
     59
     60
     61
     62
     63
     64
     65
     66
     67
     68
     69
     70
     71
     72
     73
     74
     75
     76
     77
     78
     79
     80
     81
     82
     83
     84
     85
     86
     87
     88
     89
     90
     91
     92
     93
     94
     95
     96
     97
     98
     99
    100
    101
    102
    103
    104
    105
    106
    107
    108
    109
    110
    111
    112
    113
    114
    115
    116
    117
    118
    119
    120
    121
    122
    123
    124
    125
    126
    127
    128
    129
    130
    131
    132
    133
    134
    135
    136
    137
    138
    139
    140
    141
    142
    143
    144
    145
    146
    147
    148
    149
    150
    151
    152
    153
    154
    155
    156
    157
    158
    159
    160
    161
    162
    163
    164
    165
    166
    167
    168
    169
    170
    171
    172
    173
    174
    175
    176
    177
    178
    179
    180
    181
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "csv.h"

#define MAX_LINE 2048

struct csv *
csv_create(void) {
	struct csv *c;
	c = malloc(sizeof c + sizeof(char *) + sizeof(float *));
	if (c == NULL)
		return NULL;

	c->cols = 0;
	c->rows = 0;

	char **h = malloc(sizeof (char *) * MAX_LINE);
	if (h == NULL)
		return NULL;
	c->headers = h;

	float **d = malloc(sizeof (float *));
	if (d == NULL)
		return NULL;
	c->data = d;

	return c;
}

void
csv_destroy(struct csv *c) {
	int i;
	for (i = 0; i < c->cols; i++)
		free(c->headers[i]);
	for (i = 0; i < c->rows; i++)
		free(c->data[i]);
	free(c);
}

/* read_header reads each header field name from the first line of the file
 * into *header, and returns the number of columns.
 * This function mallocs the returned value of fields in *header[0..n-1]
 */
static int
read_header(FILE *f, char **header) {
	char **headerp = header;
	int c;
	int ncols = 0;
	char *n = malloc(MAX_LINE);
	char *np = n;
	while ((c = fgetc(f)) != EOF) {
		if (c == ',' || c == '\n') {
			*np++ = '\0';
			*headerp = malloc(strlen(n));
			strcpy(*headerp, n);
			headerp++;
			np = n;
			ncols++;
			if (c == '\n') break;
			continue;
		}
		*np++ = c;
	}
	free(n);
	return ncols;
}

/* read_data reads all the comma-separated values from f into *datap,
 * and returns the number of rows.
 * This function mallocs the number of rows in *header[0..n-1]
 * -1 is returned on error.
 */
static int
read_data(FILE *f, float ***datap, int ncols) {
	float **data = *datap;
	int nrows = 0;
	char *n = malloc(MAX_LINE);
	if (n == NULL) {
		perror("malloc");
		return -1;
	}
	char *np = n;
	data[0] = malloc(sizeof (float) * ncols);
	if (data[0] == NULL) {
		perror("malloc");
		free(n);
		return -1;
	}
	int c, x, y;
	x = y = 0;
	while ((c = fgetc(f)) != EOF) {
		if (c == ',' || c == '\n') {
			if (n == np) {
				/* nothing in this column */
				fprintf(stderr, "warning: no value in cell at row %d, col %d\n", y + 1, x + 1);
				data[y][x] = 0.;
			} else {
				*np++ = '\0';
				data[y][x] = (float)atof(n);
				np = n;
			}
			x++;
			if (x > ncols) {
				/* extra cells in the row. eat up the rest of the row to avoid writing into
				 * unallocated memory */
				fprintf(stderr, "warning: extra cells in row %d\n", y + 1);
				while (c != '\n' && c != EOF)
					c = fgetc(f);
				if (c == EOF)
					break;
			}
			if (c == '\n') {
				if (x < ncols) {
					fprintf(stderr, "warning: %d cells missing from row %d\n", ncols - x, y + 1);
					for (; x < ncols; x++)
						data[y][x] = 0.;
				}
				nrows++;
				y++;
				x = 0;
				float **tmp = realloc(data, sizeof (float *) * (nrows + 1));
				if (tmp == NULL) {
					perror("malloc");
					goto cleanup;
				}
				data = tmp;
				data[y] = malloc(sizeof (float) * ncols);
				if (data[y] == NULL) {
					nrows--;
					perror("malloc");
					goto cleanup;
				}
				continue;
			}
			continue;
		}
		*np++ = c;
	}
	free(n);
	*datap = data;
	return nrows;
cleanup:
	free(n);
	for (int i = 0; i < nrows; i++)
		free(data[i]);
	return -1;
}

int
csv_read_file(FILE *f, struct csv *c, char hdr) {
	int ch;
	int ncols = 0;
	if (hdr) {
		/* read header */
		ncols = read_header(f, c->headers);
	} else {
		/* scan the first line to get the value of ncols */
		while ((ch = fgetc(f)) != EOF) {
			if (ch == ',') ncols++;
			if (ch == '\n') {
				ncols++;
				break;
			}
		}
		/* ensure we can properly scan the first line when we come to it */
		rewind(f);
	}
	c->cols = ncols;

	/* read data */
	int nrows = read_data(f, &c->data, ncols);
	fclose(f);
	if (nrows == -1) {
		csv_destroy(c);
		return 1;
	}
	c->rows = nrows;
	return 0;
}