freq

count frequencies of keys in text
git clone git://bvnf.space/freq.git
Log | Files | Refs | README

freq.c (2945B)


      1
      2
      3
      4
      5
      6
      7
      8
      9
     10
     11
     12
     13
     14
     15
     16
     17
     18
     19
     20
     21
     22
     23
     24
     25
     26
     27
     28
     29
     30
     31
     32
     33
     34
     35
     36
     37
     38
     39
     40
     41
     42
     43
     44
     45
     46
     47
     48
     49
     50
     51
     52
     53
     54
     55
     56
     57
     58
     59
     60
     61
     62
     63
     64
     65
     66
     67
     68
     69
     70
     71
     72
     73
     74
     75
     76
     77
     78
     79
     80
     81
     82
     83
     84
     85
     86
     87
     88
     89
     90
     91
     92
     93
     94
     95
     96
     97
     98
     99
    100
    101
    102
    103
    104
    105
    106
    107
    108
    109
    110
    111
    112
    113
    114
    115
    116
    117
    118
    119
    120
    121
    122
    123
    124
    125
    126
    127
    128
    129
    130
    131
    132
    133
    134
    135
    136
    137
    138
    139
    140
    141
    142
    143
    144
    145
    146
    147
    148
    149
    150
    151
    152
    153
    154
    155
    156
    157
    158
    159
    160
    161
    162
    163
    164
    165
    166
    167
    168
    169
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>

enum {
	SEP_DEFAULT = -1,
};

void freq(FILE *f, int field);

struct count {
	char *name;
	int freq;
};

struct count *cs;
size_t cs_len = 0;
char sep;

size_t
get_index(char *name) {
	for (size_t i = 0; i < cs_len; i++) {
		if (strcmp(cs[i].name, name) == 0)
			return i;
	}
	return -1;
}

/* can only increase freqs */
void
update_record(char *name) {
	int i = get_index(name);
	if (i == -1) {
		/* create record */
		cs_len++;
		cs = realloc(cs, sizeof(struct count) * cs_len);
		if (cs == NULL) {
			perror("realloc");
			exit(1);
		}
		cs[cs_len - 1].name = strdup(name);
		cs[cs_len - 1].freq = 1;
	} else {
		/* add 1 to freq */
		cs[i].freq++;
	}
}

void
free_freqs(void) {
	for (size_t i = 0; i < cs_len; i++)
		free(cs[i].name);
	free(cs);
}

void
print_freqs(void) {
	for (size_t i = 0; i < cs_len; i++)
		printf("%d\t%s\n", cs[i].freq, cs[i].name);
}

int
is_sep(int c) {
	if (sep == SEP_DEFAULT)
		return isspace(c);
	else
		return (c == sep);
}

int
main (int argc, char **argv) {
	int c, field = 1;
	sep = SEP_DEFAULT;
	while ((c = getopt(argc, argv, ":d:k:")) != -1) {
		switch (c) {
			case 'd':
				sep = optarg[0];
				break;
			case 'k':
				field = atoi(optarg);
				break;
			case '?':
				fprintf(stderr, "usage: %s [-d delim] [-k fieldnum] [file...]\n", argv[0]);
				return 1;
		}
	}
	argc -= optind;
	argv += optind - 1;

	if (argc == 0)
		freq(stdin, field);
	else while (*++argv) {
		FILE *f = stdin;
		if (strcmp(*argv, "-") != 0) {
			f = fopen(*argv, "r");
			if (f == NULL) {
				perror(*argv);
				continue;
			}
		}
		freq(f, field);
		if (f != stdin)
			fclose(f);
	}

	print_freqs();
	free_freqs();

	return 0;
}

void
freq(FILE *f, int field) {
	char *field_start, *line = NULL;
	size_t linelen = 0;
	ssize_t n, i;
	int cur_field, in_word, linenum;
	linenum = 0;

	while ((n = getline(&line, &linelen, f)) != -1) {
		if (line[n-1] == '\n') {
			line[n-1] = '\0'; /* remove the '\n' */
			n--;
		}
		linenum++;
		i = 0;
		field_start = line;
		if (field == 0)
			goto field_found;
		in_word = 0;
		cur_field = 1;
		/* eat up whitespace at front */
		while (is_sep(line[i]))
			i++;
		for (; i < n; i++) {
			if (is_sep(line[i])) {
				if (in_word) {
					if (cur_field == field) {
						line[i] = '\0';
						goto field_found;
					}
					field_start = (char *)-1;
					cur_field++;
					in_word = 0;
				}
			} else {
				if (!in_word)
					field_start = line + i;
				in_word = 1;
			}
		}
		if (cur_field != field) {
			/* didn't find the specified field */
			fprintf(stderr, "freq: warning: field %d not found on line %d\n", field, linenum);
			continue;
		}
		if (field_start == (char *)-1) {
			fprintf(stderr, "freq: warning: empty field %d on line %d\n", field, linenum);
			continue;
		}
field_found:
		//fprintf(stderr, "found '%s'\n", field_start);
		update_record(field_start);

	}
	free(line);
}