/*********************************************************************** * uniq.c * Removes adjacent duplicate lines from a file. * notes * #define DEBUG to produce debugging code. * * Copyright (c) David R Tribble, May85. * 1.0 05-06-85 drt. * 2.0 08-13-87 drt. Reworked program. Added hash comparison for * speed. */ static char prog[] = "uniq"; #include #include #ifdef DEBUG #define D #else #define D if (0) #endif #define LINESZ 4096 /* max line buffer size */ #define strequal(a,b) (! strcmp(a, b)) /*********************************************************************** * getlin * returns * Hash value or EOF. * * 1.0 05-06-85 drt. * 2.0 08-13-87 drt. Reworked program. Added hash comparison for * speed. */ static int getlin (in, buf) FILE * in; char * buf; { int cnt; int c; int hash; char * b0; b0 = buf; hash = 0; cnt = 1; for (;;) { c = getc(in); if (c == EOF) break; *buf = c; ++buf; ++cnt; hash += c; if (cnt >= LINESZ) break; if (c == '\n' /** || c == '\f' **/) break; /* note: this won't handle NULs very well */ } *buf = '\0'; if (hash == EOF) hash = EOF + 1; D fprintf(stderr, "getlin(): %04X \"%s\"\n", hash, b0); if (cnt > 1) return hash; return EOF; } /*********************************************************************** * uniq * returns * void. * * 1.0 05-06-85 drt. * 2.0 08-13-87 drt. Reworked program. Added hash comparison for * speed. */ static char buf1 [LINESZ+1]; static char buf2 [LINESZ+1]; void uniq (in, out) FILE * in; FILE * out; { int hash1; int hash2; long linec, dupc, outc; linec = dupc = outc = 0; hash1 = getlin(in, buf1); while ((hash2 = getlin(in, buf2)) != EOF) { ++linec; if (hash1 != hash2 || !strequal(buf1, buf2)) { D fprintf(stderr, "uniq(): lines differ, %04X %04X\n", hash1, hash2); fputs(buf1, out); ++outc; strcpy(buf1, buf2); hash1 = hash2; } else { ++dupc; D fprintf(stderr, "uniq(): lines match, %04X\n", hash1); } } fputs(buf1, out); fprintf(stderr, "%lu in, %lu out (%lu duplicates)\n", linec, outc, dupc); } /*********************************************************************** * main * * 1.0 05-06-85 drt. * 2.0 08-13-87 drt. Reworked program. Added hash comparison for * speed. */ int main (argc, argv) int argc; char ** argv; { if (argc > 1 || isatty(fileno(stdin))) goto usage; uniq(stdin, stdout); return 0; usage: fprintf(stderr, "Remove adjacent duplicate lines.\n\n"); fprintf(stderr, "usage:\t%s