#include #include #include #include #include typedef short bool; #define TRUE (bool)1 #define FALSE (bool)0 typedef unsigned char byte; #define ALLOC(type) (type *)malloc(sizeof(type)) #define SALLOC(s) (char *)malloc(strlen(s)+1) #define NALLOC(type,n) (type *)malloc((n)*sizeof(type)) #define STRCPY(D,S) D = SALLOC(S); strcpy(D,S); /************ lclint macros ************/ #define streq(A,B) (strcmp(A,B) == 0) #define strneq(A,B,C) (strncmp(A,B,C) == 0) #define memeq(A,B,C) (memcmp(A,B,C) == 0) /*********** debug macros *********/ #define DYN_DEBUG #ifdef DEBUG #define DEBUG_PRINT(X) printf X #define DEBUG_P(X) printf(X) #define DEBUG_P1(X,A1) printf(X,A1) #define DEBUG_P2(X,A1,A2) printf(X,A1,A2) #define DEBUG_P3(X,A1,A2,A3) printf(X,A1,A2,A3) #define DEBUG_P4(X,A1,A2,A3,A4) printf(X,A1,A2,A3,A4) #define DEBUG_P5(X,A1,A2,A3,A4,A5) printf(X,A1,A2,A3,A4,A5) #define BREAK { int i = i/0; } /* sneaky trick to remain in debugger */ #else #ifdef DYN_DEBUG bool option_debug = FALSE; #define DEBUG_PRINT(X) if (option_debug) printf X #define DEBUG_P(X) if (option_debug) printf(X) #define DEBUG_P1(X,A1) if (option_debug) printf(X,A1) #define DEBUG_P2(X,A1,A2) if (option_debug) printf(X,A1,A2) #define DEBUG_P3(X,A1,A2,A3) if (option_debug) printf(X,A1,A2,A3) #define DEBUG_P4(X,A1,A2,A3,A4) if (option_debug) printf(X,A1,A2,A3,A4) #define DEBUG_P5(X,A1,A2,A3,A4,A5) if (option_debug) printf(X,A1,A2,A3,A4,A5) #define BREAK { int i = i/0; } /* sneaky trick to remain in debugger */ #else #define DEBUG_PRINT(X) #define DEBUG_P(X) #define DEBUG_P1(X,A1) #define DEBUG_P2(X,A1,A2) #define DEBUG_P3(X,A1,A2,A3) #define DEBUG_P4(X,A1,A2,A3,A4) #define DEBUG_P5(X,A1,A2,A3,A4,A5) #define BREAK { printf("\nInternal error, please report\n"); abort(); } #endif #endif #define DEBUG_GN(X) /* fputc(X, fout) */ #define DO_DEBUG_PRINT(X) printf X #define DO_DEBUG_P(X) printf(X) #define DO_DEBUG_P1(X,A1) printf(X,A1) #define DO_DEBUG_P2(X,A1,A2) printf(X,A1,A2) #define DO_DEBUG_P3(X,A1,A2,A3) printf(X,A1,A2,A3) #define DO_DEBUG_P4(X,A1,A2,A3,A4) printf(X,A1,A2,A3,A4) /* sizes of buffers used during reading of HTML files: */ #define MAX_SF 80 /* max size of name of source HTML file name */ #define MAX_DF 600 /* max size of name of reference HTML file name */ #define MAX_HC 100 /* max size of HTML commands */ char df_buffer[MAX_DF+1]; /* URL of document */ char *document_URL = NULL, *server_URL = NULL, /* server part of document_URL (without last '/'). */ *file_URL = NULL; /* file part of document_URL (starting with '/'). */ bool is_URL(name) char *name; /* returns TRUE if name is an URL. */ { return memeq(name, "news:", 5) || memeq(name, "http:", 5) || memeq(name, "file:", 5) || memeq(name, "ftp:", 4) || memeq(name, "wais:", 5) || memeq(name, "gopher:", 7) || memeq(name, "mailto:", 7) || memeq(name, "telnet:", 7); } bool is_html(name) char *name; /* returns TRUE if name has .html extension */ { return streq(name + strlen(name) - 5, ".html") || streq(name + strlen(name) - 4, ".htm"); } bool norm_URL(origin, file) char *origin, *file; /* Normalizes the file name `file' appearing in HTML file `origin', with the following steps: 1. If `file' is empty, use assign `origin' to `file'. Else if `file' is not an URL and does not start with '/' then glue it together with directories in `origin'. 2. If `file' is not an URL glue it together with document URL. 3. If `file' starts with document URL, remove it. */ { /* assume that origin: ['/']( '/')* */ DEBUG_PRINT(("norm_URL(%s, %s) %s %s\n", origin, file, server_URL, file_URL)); /* Step 1: */ /* if file is empty, use origin: */ if (file[0] == '\0') { if (strlen(origin) < MAX_DF) strcpy(file, origin); else return FALSE; } /* if file is not an URL and does not start with '/' then glue it together with directories in origin: */ else if (file[0] != '/' && !is_URL(file)) { int i = strlen(origin); char *s = file; DEBUG_PRINT(("glue %s with %s", origin, file)); /* remove file-name from origin: */ while (i > 0 && origin[i - 1] != '/') i--; DEBUG_PRINT((" : %s + %s\n", origin, file)); /* cancel last directory in origin with '../': */ while ( i > 1 && origin[i - 1] == '/' && s[0] == '.' && ( (s[1] == '.' && (s[2] == '/' || s[2] == '\0')) || s[1] == '/' || s[1] == '\0')) if (s[1] == '/') s += 2; else if (s[1] == '\0') s++; else { s += s[2] == '/' ? 3 : 2; do i--; while (i > 0 && origin[i - 1] != '/'); } if ( i == 1 && origin[0] == '/' && s[0] == '.' && s[1] == '.' /* && s[2] == '/'*/) return FALSE; else if (i == 0) strcpy(file, s); else if(i + strlen(s) < MAX_DF) { memcpy(df_buffer, origin, i); strcpy(df_buffer + i, s); strcpy(file, df_buffer); } else return FALSE; } DEBUG_PRINT(("After step 1: %s\n", file)); /* Step 2. */ if ( document_URL != NULL && file[0] == '.' && file[1] == '.' && file[2] == '/') { int i = strlen(file_URL) - 1; char *s = file; DEBUG_PRINT(("glue %s + %s\n", file_URL, file)); /* assume that file_URL is of the form: '/' ( '/')* */ while (i > 0 && s[0] == '.' && s[1] == '.' && s[2] == '/') { s += 3; i--; while (i > 0 && file_URL[i] != '/') i--; DEBUG_PRINT(("replace %s with %s\n", s, file_URL + i)); } i += strlen(server_URL); if (i + 1 + strlen(s) < MAX_DF) { memcpy(df_buffer, document_URL, i + 1); strcpy(df_buffer + i + 1, s); strcpy(file, df_buffer); } else return FALSE; } /* if file starts with '/' add server_URL: */ else if (document_URL != NULL && file[0] == '/') if (strlen(file) + strlen(server_URL) < MAX_DF) { strcpy(df_buffer, server_URL); strcat(df_buffer, file); strcpy(file, df_buffer); } else return FALSE; DEBUG_PRINT(("After step 2: %s\n", file)); /* Step 3: */ /* if URL starts with document URL, remove it: */ if ( document_URL != NULL && memeq(file, document_URL, strlen(document_URL))) strcpy(file, file + strlen(document_URL)); DEBUG_PRINT(("After step 3: %s\n", file)); /* Step 4: */ if (streq(file, ".")) file[0] = '\0'; /* if not URL and not .html, add index.html, when file exists */ if (!is_URL(file) && !is_html(file) && strlen(file) + 12 < MAX_DF) { strcpy(df_buffer, file); if (df_buffer[0] != '\0' && df_buffer[strlen(df_buffer)-1] != '/') strcat(df_buffer, "/"); strcat(df_buffer, "index.html"); DEBUG_PRINT(("Try: %s\n", df_buffer)); if (access(df_buffer, R_OK) == 0) strcpy(file, df_buffer); } DEBUG_PRINT(("After step 4: %s\n", file)); /* Step 4: */ return TRUE; } void accept_root_URL(char *URL) { int strlen_URL = strlen(URL); document_URL = NALLOC(char, strlen_URL + 2); strcpy(document_URL, URL); if (document_URL[strlen_URL - 1] != '/') { document_URL[strlen_URL] = '/'; document_URL[strlen_URL + 1] = '\0'; } server_URL = NULL; { int i; for (i = 0; URL[i] != '\0' && URL[i] != ':'; i++); if (URL[i] != '\0' && URL[i+1] == '/' && URL[i+2] == '/') { i += 3; while (URL[i] != '\0' && URL[i] != '/') i++; if (URL[i] == '/') file_URL = document_URL + i; else file_URL = "/"; URL[i] = '\0'; server_URL = SALLOC(URL); strcpy(server_URL, URL); } } if (server_URL == NULL) { fprintf(stderr, "%%html: illegal URL `%s'\n", document_URL); document_URL = NULL; } } typedef struct { char *from; char *to; char *state; } rule_t; rule_t *rules[10000]; int nrrules = 0; void skip_spaces(FILE *fin, char *r_ch, char *command, int *r_i); void skip_spaces(FILE *fin, char *r_ch, char *command, int *r_i) { char ch = *r_ch; int i = *r_i; /* skip spaces */ while (!feof(fin) && (ch == ' ' || ch == '\n')) { command[i++] = ch; ch = (char)fgetc(fin); } /* process comments */ while (!feof(fin) && ch == '-') { command[i++] = ch; ch = (char)fgetc(fin); if (ch != '-') break; while(!feof(fin)) { command[i++] = ch; ch = (char)fgetc(fin); if (ch == '-' && i > 0 && command[i-1] == '-') { command[i++] = ch; ch = (char)fgetc(fin); break; } } /* skip spaces */ while (!feof(fin) && (ch == ' ' || ch == '\n')) { command[i++] = ch; ch = (char)fgetc(fin); } } *r_ch = ch; *r_i = i; } bool update_extref( char *fn, FILE *fin, FILE *fout) { int body_pos = 0; bool forget_a_close = FALSE; bool file_has_changed = FALSE; char ch = (char)fgetc(fin); while (!feof(fin)) if (ch == '<') { int i = 0; char command[30]; ch = (char)fgetc(fin); while( !feof(fin) && (isalpha(ch) || ch == '/' )) { command[i++] = toupper(ch); ch = (char)fgetc(fin); } command[i] = '\0'; if ( forget_a_close && streq( command, "/A" ) ) { while( !feof(fin) && ch != '>' ) ch = (char)fgetc(fin); if ( ch == '>' ) ch = (char)fgetc(fin); forget_a_close = FALSE; } else if ( streq(command, "A") || streq(command, "IMG")) { char a_command[1000]; int a_i = 0; int attr_name_i; char attr_name[MAX_HC + 1]; int attr_val_i; char attr_val[MAX_DF + 1]; bool empty = TRUE; forget_a_close = FALSE; strcpy(a_command, command); a_i = strlen(a_command); while( !feof(fin) && ch != '>') { bool found_is = FALSE; skip_spaces(fin, &ch, a_command, &a_i); if (feof(fin) || ch == '>') break; attr_name_i = i; /* scan attribute in attr_name */ i = 0; while (!feof(fin) && ch != '>' && ch != '=' && ch != ' ' && ch != '\n' && ch != '\t') { if (i < MAX_HC) attr_name[i++] = tolower(ch); a_command[a_i++] = ch; ch = (char)fgetc(fin); } attr_name[i] = '\0'; /* skip = and spaces */ while ( !feof(fin) && ( ch == ' ' || ch == '\n' || ch == '=' || ch == '\t')) { if (ch == '=') found_is = TRUE; a_command[a_i++] = ch; ch = (char)fgetc(fin); } attr_val_i = a_i ; /* scan string into name */ i = 0; if (found_is) { bool is_quoted = ch == '"'; if (is_quoted) { a_command[a_i++] = ch; ch = (char)fgetc(fin); } while( !feof(fin) && ch != '>' && ch != '"' && (is_quoted || ch != ' ')) { if (i < MAX_DF) attr_val[i++] = ch; a_command[a_i++] = ch; ch = (char)fgetc(fin); } if (ch == '\"') { a_command[a_i++] = ch; ch = (char)fgetc(fin); } } attr_val[i] = '\0'; if (streq(command, "A") && streq(attr_name, "name")) { empty = FALSE; a_i = attr_val_i; a_command[a_i++] = '"'; strcpy(a_command + a_i, attr_val); a_i += strlen(attr_val); a_command[a_i++] = '"'; } else if( streq(command, "A") ? streq(attr_name, "href") : streq(attr_name, "src")) { int j; char *broken = "Broken.html?"; if (is_URL(attr_val) || !strncmp(attr_val, broken, strlen(broken))) { char *from = attr_val; char new_attr_val[1000]; strcpy(new_attr_val, attr_val); if (!strncmp(from, broken, strlen(broken))) { char *s = from + strlen(broken); char *t = strstr(s, "|"); if (t != NULL) from = t+1; else from = s; } for (j = 0; j < nrrules; j++) if (streq(rules[j]->from, from)) { if (!strcmp(rules[j]->state, "ok")) { if (rules[j]->to != NULL) strcpy(new_attr_val, rules[j]->to); else strcpy(new_attr_val, from); } else { sprintf(new_attr_val, "Broken.html?%s|%s", rules[j]->state, from); } break; } if (strcmp(attr_val, new_attr_val)) { printf("In file %s changed %s into %s\n", fn, attr_val, new_attr_val); file_has_changed = TRUE; } a_i = attr_val_i; a_command[a_i++] = '"'; strcpy(a_command + a_i, new_attr_val); a_i += strlen(new_attr_val); a_command[a_i++] = '"'; empty = FALSE; } else { a_i = attr_val_i; a_command[a_i++] = '"'; strcpy(a_command + a_i, attr_val); a_i += strlen(attr_val); a_command[a_i++] = '"'; empty = FALSE; } } } a_command[a_i] = '\0'; if (!empty) fprintf(fout, "<%s>", a_command); else if (streq(command, "A")) forget_a_close = TRUE; if ( ch == '>' ) ch = (char)fgetc(fin); } else { if ( body_pos == 1 ) { body_pos = 0; } if ( streq( command, "BODY" ) ) body_pos = 1; fprintf(fout, "<%s", command); while( !feof(fin) && ch != '>' ) { fputc(ch, fout); ch = (char)fgetc(fin); } fputc('>', fout); if ( ch == '>' ) ch = (char)fgetc(fin); } } else { fputc(ch, fout); ch = (char)fgetc(fin); } return file_has_changed; } /* Information of a file: */ typedef struct file_T { struct file_T *next; char *name; } file_t; /* List of all files: */ file_t *the_files = NULL; file_t *find_file(file) char *file; /* Returns pointer to HTML file record with the name `file'. If such a record did not exist in the list, it is added alphabetically on the file name. */ { file_t **p_file = &the_files; while (*p_file != NULL && stricmp((*p_file)->name, file) < 0) p_file = &(*p_file)->next; if (*p_file == NULL || stricmp((*p_file)->name, file)) { file_t *n = ALLOC(file_t); DEBUG_PRINT(("FILEADDED\n")); n->next = *p_file; n->name = SALLOC(file); strcpy(n->name, file); *p_file = n; } return *p_file; } void read_dir(void) { FILE *f; char fn[400]; /*fprintf(stderr, "Scan directory\n"); system("dir \\www >\\www\\compare\\dir.txt"); fprintf(stderr, "Ready\n");*/ f = fopen("compare\\dir.txt", "r"); if (f==NULL) return; while (fgets(fn, 399, f)) { file_t *file; if (fn[strlen(fn)-1]=='\n') fn[strlen(fn)-1] = '\0'; if (fn[16] == ' ' && strlen(fn) >= 45) { file = find_file(fn+44); } } fclose(f); } void read_states() { FILE *f = fopen("compare\\links.txt", "r"); char buffer[10000]; if (f == NULL) { printf("file compare\\links.txt is missing\n"); return; } fgets(buffer, 9999, f); fgets(buffer, 9999, f); while (!feof(f) && *buffer != '\0') { char *s = strstr(buffer, "\t"); char *t = strstr(s+1, "\t"); char *fn, *state; *s = '\0'; if (t) *t = '\0'; fn = buffer; state = s+1; rules[nrrules] = ALLOC(rule_t); STRCPY(rules[nrrules]->from, fn); STRCPY(rules[nrrules]->state, state); nrrules++; fgets(buffer, 9999, f); } fclose(f); } void read_redirects() { FILE *f = fopen("compare\\redirects.txt", "r"); char buffer[10000]; if (f == NULL) { printf("File compare\\redirects.txt is missing\n"); return; } fgets(buffer, 9999, f); while (!feof(f)) { char *s = strstr(buffer, "\t"); char *t = strstr(buffer, "\n"); char *from, *to; int i; *s = '\0'; if (t) *t = '\0'; from = buffer; to = s+1; for (i = 0; i < nrrules; i++) { if (!strcmp(rules[i]->from, from)) { if (*to == '#' && !strcmp(rules[i]->state, "ok")) { STRCPY(rules[i]->state, to + 1); } else { STRCPY(rules[i]->to, to); } break; } } if (i == nrrules) printf("redirect %s not found\n", from); fgets(buffer, 9999, f); } fclose(f); } int main (int argc, char *argv[]) { read_dir(); read_states(); read_redirects(); /* { int i; for (i = 0; i < nrrules; i++) printf("%s %s %s\n", rules[i]->from, rules[i]->to ? rules[i]->to : "", rules[i]->state); } */ { file_t *file; for (file = the_files; file != NULL; file = file->next) if (is_html(file->name)) { FILE *f = fopen(file->name, "r"); char new_name[100]; FILE *g = fopen("compare\\tempconverted.html", "w"); sprintf(new_name, "converted\\%s", file->name); if (f != NULL && g != NULL) { bool update = update_extref(file->name, f, g); fclose(f); fclose(g); if (update) { f = fopen("compare\\tempconverted.html", "r"); g = fopen(new_name, "w"); if (f != NULL && g != NULL) { char ch = (char)fgetc(f); while (!feof(f)) { fputc(ch, g); ch = (char)fgetc(f); } } } } else printf("Failed to open %s\n", file->name); if (f != NULL) fclose(f); if (g != NULL) fclose(g); } } }