/* This software implements an efficient string matching method proposed by Aho and Corasick (CACM 1975) */ #include #include "AC.h" static void Encode_initial(), read_pat(), construct_goto(), enter(), failure(); /* read_pat - read all the patterns and their scores */ void read_pats(char *file) { FILE *pat_file, *ckopen(); char *p, buf[1000], *strtok(), *strsave(), *strchr(); int prefix_len; int cur_state = 0; pat_file = ckopen(file, "r"); while (fgets(buf, sizeof buf, pat_file) != NULL) { p = strchr(buf, '#'); if (p != NULL) *p = '\0'; /* parse the pattern's name */ p = strtok(buf, TOKSTR); if (p == NULL) continue; /* skip blank or comment lines */ if (P > MAXPAT) fatalf("Cannot handle more than %d recognition patterns.", MAXPAT); pat_name[P] = strsave(p); p = strtok(NULL, TOKSTR); if (p == NULL) fatalf("Incorrect pattern line: %s\n", buf); pat[P] = strsave(p); pat_len[P] = strlen(p); total_pat_length += pat_len[P]; p = strtok(NULL, TOKSTR); if (p == NULL) fatalf("Incorrect pattern line: %s\n", buf); pat_score[P] = atoi(p); if (++P >= MAXPAT) fatal("Too many patterns."); } (void) fclose(pat_file); } /* construct_goto - construction of the goto function */ void construct_goto() { int i, j; char *ckalloc(); gf = (int **) ckalloc((total_pat_length+1)*sizeof(int *)); gf[0] = (int *) ckalloc((total_pat_length+1)*ALPHABET_SIZE*sizeof(int)); for (i=1; i<=total_pat_length; ++i) gf[i] = gf[i-1] + ALPHABET_SIZE; s_output = (SOUT **) ckalloc((total_pat_length+1)*sizeof(SOUT *)); for (i=0; i<=total_pat_length; ++i) { for (j=0; jpat_no = pat_no; q->link = NULL; } /* failure - Construction of the failure function */ void failure() { char *ckalloc(); int j, state, r, s; typedef struct QUEUE { int state; struct QUEUE *link; } QUEUE; QUEUE *head, *tail, *qp; SOUT *sp, *tp; fs = (int *) ckalloc((newstate+1)*sizeof(int)); head = tail = NULL; for (j=0; jlink = (QUEUE *) ckalloc(sizeof(QUEUE)); tail = tail->link; } else { head = tail = (QUEUE *) ckalloc(sizeof(QUEUE)); } tail->state = s; tail->link = NULL; fs[s] = 0; } while (head) { r = head->state; qp = head; head = head->link; free(qp); for (j=0; jlink = (QUEUE *) ckalloc(sizeof(QUEUE)); tail = tail->link; if (head == NULL) head = tail; tail->state = s; tail->link = NULL; state = fs[r]; while (gf[state][j] == FAIL) state = fs[state]; fs[s] = gf[state][j]; if (s_output[fs[s]]) { /* add them to s_output[s] */ sp = s_output[fs[s]]; while (sp) { tp = (SOUT *) ckalloc(sizeof(SOUT)); tp->pat_no = sp->pat_no; tp->link = s_output[s]; s_output[s] = tp; sp = sp->link; } } } } } /* Construct - Construct an automaton for a given pattern file */ void Construct(char *pat_file) { int i; read_pats(pat_file); /* for (i=0; ilink) printf(" %d", sp->pat_no); printf("\n"); } } }