/* ChkHTML -- Program for checking HTML files Copyright (C) 1995-2020 Frans Faase This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. GNU General Public License: https://www.iwriteiam.nl/GNU.txt */ #define VERSION "2.8 of November 28, 2024." #define WRITTEN_BY "F.J. Faase. https://www.iwriteiam.nl/" /* Status: Has been tested on my documents */ #ifdef WIN32 const char* copy_command = "copy"; #define CRLF "\n" #else const char* copy_command = "cp"; #define CRLF "\r\n" #endif #define USE_SYS_STAT #define DYN_DEBUG #include #include #include #include #include /*#include */ #include #ifdef USE_SYS_STAT #include #include #endif /*********** Basic definitions **************/ typedef char bool; #define TRUE (bool)1 #define FALSE (bool)0 typedef unsigned char byte; typedef unsigned int word; #define ALLOC(type) (type *)malloc(sizeof(type)) #define SALLOC(s) (char *)malloc(strlen(s)+1) #define NALLOC(type,n) (type *)malloc((n)*sizeof(type)) #define STRCPY(D,S) D = SALLOC(S); strcpy(D,S) #define STRNCPY(D,S,N) D = NALLOC(char,N+1); strncpy(D,S,N); D[N] = '\0'; /************ lclint macros ************/ #define streq(A,B) (strcmp(A,B) == 0) #define strieq(A,B) (stricmp(A,B) == 0) #define strneq(A,B,C) (strncmp(A,B,C) == 0) #define memeq(A,B,C) (memcmp(A,B,C) == 0) /*********** debug macros *********/ #ifdef DEBUG #define DEBUG_PRINT(X) printf X #define DEBUG_P(X) printf(X) #define DEBUG_P1(X,A1) printf(X,A1) #define DEBUG_P2(X,A1,A2) printf(X,A1,A2) #define DEBUG_P3(X,A1,A2,A3) printf(X,A1,A2,A3) #define DEBUG_P4(X,A1,A2,A3,A4) printf(X,A1,A2,A3,A4) #define DEBUG_P5(X,A1,A2,A3,A4,A5) printf(X,A1,A2,A3,A4,A5) #define BREAK { int i = i/0; } /* sneaky trick to remain in debugger */ #else #ifdef DYN_DEBUG bool option_debug = FALSE; #define DEBUG_PRINT(X) if (option_debug) printf X #define DEBUG_P(X) if (option_debug) printf(X) #define DEBUG_P1(X,A1) if (option_debug) printf(X,A1) #define DEBUG_P2(X,A1,A2) if (option_debug) printf(X,A1,A2) #define DEBUG_P3(X,A1,A2,A3) if (option_debug) printf(X,A1,A2,A3) #define DEBUG_P4(X,A1,A2,A3,A4) if (option_debug) printf(X,A1,A2,A3,A4) #define DEBUG_P5(X,A1,A2,A3,A4,A5) if (option_debug) printf(X,A1,A2,A3,A4,A5) #define BREAK { int i = i/0; } /* sneaky trick to remain in debugger */ #else #define DEBUG_PRINT(X) #define DEBUG_P(X) #define DEBUG_P1(X,A1) #define DEBUG_P2(X,A1,A2) #define DEBUG_P3(X,A1,A2,A3) #define DEBUG_P4(X,A1,A2,A3,A4) #define DEBUG_P5(X,A1,A2,A3,A4,A5) #define BREAK { printf("\nInternal error, please report\n"); abort(); } #endif #endif #define DEBUG_GN(X) /* fputc(X, fout) */ #define DO_DEBUG_PRINT(X) printf X #define DO_DEBUG_P(X) printf(X) #define DO_DEBUG_P1(X,A1) printf(X,A1) #define DO_DEBUG_P2(X,A1,A2) printf(X,A1,A2) #define DO_DEBUG_P3(X,A1,A2,A3) printf(X,A1,A2,A3) #define DO_DEBUG_P4(X,A1,A2,A3,A4) printf(X,A1,A2,A3,A4) /************** prototypes ***************/ char *rel_URL(char *from, char *to); /************** Program options stored in global variables *******/ bool option_info = FALSE, option_warn = FALSE, option_pedantic = FALSE, option_bibliography = FALSE, is_html_fn = FALSE; long nr_int_links, nr_ext_links, nr_broken_ext_links; #define MAX_NR_OTHER_EXT 40 char *other_ext_name[MAX_NR_OTHER_EXT]; long other_ext_size[MAX_NR_OTHER_EXT]; int other_ext_nr[MAX_NR_OTHER_EXT]; int nr_other_ext = 0; char *target_dir = "ftp"; char *target_domain = "www.iwriteiam.nl"; void add_other_ext(char *ext, long size) { int i; for (i = 0; i < nr_other_ext; i++) if (strcmp(ext, other_ext_name[i]) == 0) { other_ext_size[i] += size; other_ext_nr[i]++; return; } if (i == MAX_NR_OTHER_EXT) return; STRCPY(other_ext_name[i], ext); other_ext_size[i] = size; other_ext_nr[i] = 1; nr_other_ext++; } int nr_ext_files(char *ext) { int i; for (i = 0; i < nr_other_ext; i++) if (strcmp(ext, other_ext_name[i]) == 0) return other_ext_nr[i]; return 0; } char *with_commas(long nr) { static char result[20]; if (nr > 999999) sprintf(result, "%ld,%03ld,%03ld", nr / 1000000, (nr / 1000)%1000, nr % 1000); else if (nr > 999) sprintf(result, "%ld,%03ld", nr / 1000, nr % 1000); else sprintf(result, "%ld", nr); return result; } long size_ext_files(char *ext) { int i; for (i = 0; i < nr_other_ext; i++) if (strcmp(ext, other_ext_name[i]) == 0) return other_ext_size[i]; return 0; } /********** HTML files and there references **********/ #define NAME_EXISTS 1 #define NAME_REPEATED 2 #define NAME_REFERENCED 4 /* coding for ref_t.status: */ #define S_CORRECT 0 #define S_E_FILE 1 /* HTML file does not exist */ #define S_U_FILE 2 /* HTML file name is an URL */ #define S_E_NAME 3 /* NAME in HTML file does not exist */ typedef struct file_t file_t, *file_p; typedef struct section_t section_t, *section_p; typedef struct section_list_t section_list_t, *section_list_p; typedef struct name_t name_t, *name_p; typedef struct href_t href_t, *href_p; typedef struct tag_t tag_t, *tag_p; typedef struct tag_type_t tag_type_t, *tag_type_p; typedef struct tag_types_t tag_types_t, *tag_types_p; struct name_t { name_p next; char *name; long line; char status; bool ignore_for_tag; }; struct href_t { href_p next; file_p file; char *name; char *title; bool oneway; char type; char error; long line; section_p section; bool in_header; tag_types_p tag_types; }; struct tag_t { tag_p next; section_p section; tag_type_p type; long line; char *prev_text; char *next_text; tag_p prev_tag; tag_p next_tag; bool in_index; }; struct tag_type_t { tag_type_p next; char *name; char *title; tag_type_p parent; tag_p last_seen; }; struct tag_types_t { tag_types_p next; tag_type_p type; }; struct section_t { section_p next; file_p file; section_p parent; int level; int line; char *title; name_p names; href_p hrefs; tag_p tags; section_p nested; bool has_text; bool oneway; bool ignoretags; int nr_tag_types; tag_type_p tag_type; }; struct section_list_t { section_list_p next; section_p section; }; /* Information of a file: */ struct file_t { file_p next; char *name; section_p sections; bool exists; /* exists == TRUE implies !is_URL(name) */ char read; long size_local; long date_days; short date_mins; bool on_ftp; char *log_line; bool remove_from_ftp; bool upload_to_ftp; long ftp_size; long fd_sd; long fm_sd; char *contents; file_p parent; file_p children; file_p sibling; file_p next_sibling; file_p prev_sibling; bool special_child; }; tag_type_p tag_indexes[6]; int nr_tag_indexes; section_p cur_section = 0; name_p cur_names = 0; void add_section(file_p file, int line, int level) { section_p new_section; new_section = ALLOC(section_t); new_section->next = 0; new_section->file = file; new_section->parent = 0; new_section->line = line; new_section->level = level; new_section->title = ""; new_section->names = 0; new_section->hrefs = 0; new_section->nested = 0; new_section->has_text = FALSE; new_section->oneway = FALSE; new_section->ignoretags = FALSE; new_section->tags = 0; new_section->nr_tag_types = 0; new_section->tag_type = 0; if (file->sections == 0) { file->sections = new_section; } else { while (cur_section->parent != 0 && cur_section->parent->level >= level) cur_section = cur_section->parent; if (cur_section->level < level) { cur_section->nested = new_section; new_section->parent = cur_section; } else { cur_section->next = new_section; new_section->parent = cur_section->parent; } } cur_section = new_section; } void next_section(section_p *r_section, int *depth) { if (*r_section == 0) return; if ((*r_section)->nested != 0) { *r_section = (*r_section)->nested; if (depth) (*depth)++; return; } while ((*r_section) && (*r_section)->next == 0) { *r_section = (*r_section)->parent; if (depth) (*depth)--; } if ((*r_section) != 0) *r_section = (*r_section)->next; } /* Coding for file_t.read: */ #define R_UNREAD 0 #define R_READ 1 /* HTML file read, to check for consistency */ #define R_INCLUDED 2 /* HTML file included in LaTeX output */ #define R_INDIR 4 #define R_DOREAD 8 #define R_SITEMAP 16 /* In site map */ /* List of all files: */ file_p the_files = 0; tag_type_p the_tag_types = 0; /************ Procedures for storing reference information *******/ char *c_top = ":TOP"; char *norm_name(char *name) { return name == 0 ? c_top : name; } bool eq_name(char *a, char *b) { if (a == 0 || a == c_top || *a == '\0') return b == 0 || b == c_top || *b == '\0'; if (b == 0 || b == c_top || *b == '\0') return FALSE; return !strcmp(a, b); } bool is_URL(name) char *name; /* returns TRUE if name is an URL. */ { return memeq(name, "news:", 5) || memeq(name, "http:", 5) || memeq(name, "https:", 6) || memeq(name, "file:", 5) || memeq(name, "ftp:", 4) || memeq(name, "wais:", 5) || memeq(name, "gopher:", 7) || memeq(name, "mailto:", 7) || memeq(name, "telnet:", 7); } bool is_html(name) char *name; { return streq(name + strlen(name) - 5, ".html") || streq(name + strlen(name) - 4, ".htm"); } bool is_txt(name) char *name; { return streq(name + strlen(name) - 4, ".txt"); } bool is_js(name) char *name; { return streq(name + strlen(name) - 3, ".js"); } int stricmp(const char *a, const char *b) { while(*a != '\0' && *b != '\0' && toupper(*a) == toupper(*b)) { a++; b++; } if (toupper(*a) < toupper(*b)) return -1; if (toupper(*a) > toupper(*b)) return 1; return 0; } file_p find_file(char *file) /* Returns pointer to HTML file record with the name 'file'. If such a record did not exist in the list, it is added alphabetically on the file name. */ { file_p *p_file = &the_files; while (*p_file != 0 && stricmp((*p_file)->name, file) < 0) p_file = &(*p_file)->next; if (*p_file == 0 || stricmp((*p_file)->name, file)) { file_p n = ALLOC(file_t); DEBUG_PRINT(("FILEADDED\n")); n->next = *p_file; STRCPY(n->name, file); n->sections = 0; n->exists = FALSE; n->read = R_UNREAD; n->size_local = -1; n->date_days = -1; n->date_mins = -1; n->contents = 0; n->on_ftp = FALSE; n->log_line = 0; n->remove_from_ftp = FALSE; n->ftp_size = -1; n->upload_to_ftp = FALSE; n->fd_sd = 0; n->fm_sd = 0; n->parent = 0; n->children = 0; n->sibling = 0; n->next_sibling = 0; n->prev_sibling = 0; n->special_child = FALSE; *p_file = n; { struct stat file_stat; if (stat(n->name, &file_stat) == 0) { #ifdef WIN32 struct tm *timeinfo = localtime(&file_stat.st_mtime); #else struct tm *timeinfo = gmtime(&file_stat.st_mtime); #endif n->read |= R_INDIR; n->exists = TRUE; n->size_local = file_stat.st_size; n->date_days = (1900+timeinfo->tm_year) * 10000 + (timeinfo->tm_mon+1) * 100 + timeinfo->tm_mday; n->date_mins = timeinfo->tm_hour * 60 + timeinfo->tm_min; if (timeinfo->tm_isdst) { // Windows has incremented all the date/times with one hour for files created before day light saving period if (n->date_days < 20100328) { n->date_mins -= 60; if (n->date_mins < 0) { n->date_mins += 24*60; n->date_days--; } } } } else if (errno != 2) printf("File '%s' stat = %d\n", n->name, errno); } } return *p_file; } file_p find_file_if_exists(char *file) { file_p *p_file = &the_files; while (*p_file != 0 && stricmp((*p_file)->name, file) < 0) p_file = &(*p_file)->next; return 0; } void add_name(name_p *r_name, char *name, long line, bool ignore_for_tag) { while (*r_name != 0) r_name = &(*r_name)->next; (*r_name) = ALLOC(name_t); (*r_name)->next = 0; STRCPY((*r_name)->name, name); (*r_name)->line = line; (*r_name)->status = 0; (*r_name)->ignore_for_tag = ignore_for_tag; } char *title_up = "Up"; char *title_next = "Next"; char *title_prev = "Previous"; bool href_is_tag; void add_href(href_p *r_href, char *dest_file, char *href_name, char *title, bool oneway, long line, bool in_header) { while (*r_href != 0) r_href = &(*r_href)->next; (*r_href) = ALLOC(href_t); (*r_href)->next = 0; (*r_href)->file = find_file(dest_file); if (href_name[0] != '\0') { STRCPY((*r_href)->name, href_name); } else (*r_href)->name = 0; if (title[0] != '\0') { if (streq(title, title_up) || streq(title, "Boven")) (*r_href)->title = title_up; else if (streq(title, title_next) || streq(title, "Volgende")) (*r_href)->title = title_next; else if (streq(title, title_prev) || streq(title, "Vorige")) (*r_href)->title = title_prev; else { STRCPY((*r_href)->title, title); } } else (*r_href)->title = 0; (*r_href)->oneway = oneway; (*r_href)->type = ' '; (*r_href)->error = ' '; (*r_href)->line = line; (*r_href)->section = 0; (*r_href)->in_header = in_header; (*r_href)->tag_types = 0; if (href_is_tag) { int i; for (i = nr_tag_indexes-1; i >= 0; i--) { tag_types_p tag_type = ALLOC(tag_types_t); tag_type->next = (*r_href)->tag_types; tag_type->type = tag_indexes[i]; (*r_href)->tag_types = tag_type; } } } tag_type_p find_tag_type(char *tagname) { tag_type_p *p_tag_type = &the_tag_types; for (; *p_tag_type != 0; p_tag_type = &(*p_tag_type)->next) if (strcmp((*p_tag_type)->name, tagname) == 0) return *p_tag_type; (*p_tag_type) = ALLOC(tag_type_t); (*p_tag_type)->next = 0; STRCPY((*p_tag_type)->name, tagname); (*p_tag_type)->title = ""; (*p_tag_type)->parent = 0; (*p_tag_type)->last_seen = 0; return (*p_tag_type); } static char* empty_tag = ""; tag_p find_tag_at_section(section_p section, tag_type_p tag_type, long line) { tag_p *r_tag = §ion->tags; for (; *r_tag != 0; r_tag = &(*r_tag)->next) if ((*r_tag)->type == tag_type) return (*r_tag); (*r_tag) = ALLOC(tag_t); (*r_tag)->next = 0; (*r_tag)->section = section; (*r_tag)->line = line; (*r_tag)->type = tag_type; (*r_tag)->prev_text = empty_tag; (*r_tag)->next_text = empty_tag; (*r_tag)->prev_tag = 0; (*r_tag)->next_tag = 0; (*r_tag)->in_index = FALSE; return (*r_tag); } void add_tag_from_script(section_p section, char *tagname, char *prev, char *next, long line) { tag_type_p tag_type; tag_p tag; name_p name = 0; for (; section->parent != 0; section = section->parent) { name_p sec_name; for (sec_name = section->names; sec_name != 0; sec_name = sec_name->next) if (!sec_name->ignore_for_tag) { name = sec_name; break; } if (name != 0) break; } tag_type = find_tag_type(tagname); tag = find_tag_at_section(section, tag_type, line); STRCPY(tag->prev_text, prev); STRCPY(tag->next_text, next); //fprintf(stdout, "Add tag %s to section %s in %s from script '%s' '%s'\n", tag_type->name, name ? name->name : "", section->file->name, prev, next); } void add_tag_from_index(section_p section, tag_type_p tag_type) { tag_p tag = find_tag_at_section(section, tag_type, section->line); tag->in_index = TRUE; //fprintf(stdout, "Add tag %s to section %s in %s from index\n", tag_type->name, section->names ? section->names->name : "", section->file->name); } href_p find_href(href_p href, char *dest_file, char *href_name, char *title, bool oneway, long line, bool in_header) { /*for (; href != 0; href = href->next) if ( href->line == line && href->oneway == oneway && href->in_header == in_header && ( href->file == 0 ? find_file(dest_file) == 0 : streq(href->file->name, dest_file)) && ( href->name == 0 ? href_name[0] == '\0' : streq(href->name, href_name))) return href;*/ return 0; } bool name_repeated(section_p section, char *name) { int count; count = 2; for ( ; section; next_section(§ion, 0)) { name_p names; for (names = section->names; names; names = names->next) { if (streq(names->name, name)) { count--; if (count == 0) return TRUE; } } } return FALSE; } section_p section_with_name(section_p section, char *search_name) { for ( ; section; next_section(§ion, 0)) { name_p name; for (name = section->names; name; name = name->next) { if (streq(name->name, search_name)) { name->status |= NAME_REFERENCED; return section; } } } return 0; } typedef struct error_t { char* file_name; int line; char* message; } *error_p; error_p errors[10000]; int nr_errors = 0; void add_error(char* file_name, int line, char* message) { int i; error_p error = ALLOC(struct error_t); STRCPY(error->file_name, file_name); error->line = line; STRCPY(error->message, message); for (i = 0; i < nr_errors; i++) { int cmpfn = strcmp(errors[i]->file_name, file_name); if (cmpfn > 0 || (cmpfn == 0 && errors[i]->line > line)) break; } if (nr_errors >= 9900) return; nr_errors++; for (; i < nr_errors; i++) { error_p next = errors[i]; errors[i] = error; error = next; } } #define ERROR(F,L,M) { add_error(F,L,M); } #define ERROR1(F,L,M,A) { char message[7000]; sprintf(message, M,A); add_error(F,L,message); } #define ERROR2(F,L,M,A,B) { char message[7000]; sprintf(message, M,A,B); add_error(F,L,message); } #define ERROR3(F,L,M,A,B,C) { char message[7000]; sprintf(message, M,A,B,C); add_error(F,L,message); } #define ERROR4(F,L,M,A,B,C,D) { char message[7000]; sprintf(message, M,A,B,C,D); add_error(F,L,message); } void print_errors(FILE* freport) { int i; for (i = 0; i < nr_errors; i++) if (errors[i]->line == 0) fprintf(freport, "%s : %s\n", errors[i]->file_name, errors[i]->message); else fprintf(freport, "%s (%d) : %s\n", errors[i]->file_name, errors[i]->line, errors[i]->message); } bool href_okay; void analyze_href(char *html_fn, int ln, char* dest_file) { /* Find the file */ file_p tfile = find_file(dest_file); if (tfile == 0 || !tfile->exists) { if (tfile == 0 || !(tfile->read & R_SITEMAP)) ERROR1(html_fn, ln, "file '%s' does not exist", dest_file) return; } if (!streq(tfile->name, dest_file)) ERROR2(html_fn, ln, "change '%s' into '%s'.", dest_file, tfile->name) } /********** Procedures for making/checking cross references ***********/ int nstrcmp(str1, str2) char *str1, *str2; /* This procedure compares two strings, like strcmp, where the string pointers can be 0. A 0 pointer comes before all other strings. */ { return (str1 == 0) ? (str2 == 0 ? 0 : -1) : (str2 == 0) ? 1 : strcmp(str1, str2); } /************ Sites **************/ typedef struct site_t site_t, *site_p; struct site_t { char* name; int count; site_p next; }; site_p all_sites = 0; int nr_sites = 0; void add_site(name) char *name; { char sitename[201]; char *s; int i = 0; int slashes = 0; for (s = name; *s != '\0' && slashes < 3; s++) { if (i < 200) sitename[i++] = *s; if (*s == '/') slashes++; } sitename[i] = '\0'; { site_p *ref_site = &all_sites; while ((*ref_site) != 0 && strcmp((*ref_site)->name, sitename) < 0) ref_site = &(*ref_site)->next; if ((*ref_site) != 0 && strcmp((*ref_site)->name, sitename) == 0) { (*ref_site)->count++; return; } nr_sites++; { site_p new_site = ALLOC(site_t); STRCPY(new_site->name, sitename); new_site->count = 1; new_site->next = *ref_site; *ref_site = new_site; } } } /************ Scanning buffers **************/ /* sizes of buffers used during reading of HTML files: */ #define MAX_SF 80 /* max size of name of source HTML file name */ #define MAX_DF 600 /* max size of name of reference HTML file name */ #define MAX_N 100 /* max size of NAME */ #define MAX_HT 1000 /* max size of TITLE */ #define MAX_HC 100 /* max size of HTML commands */ #define MAX_AT 1000 /* max size of text inside an anchor */ #define MAX_AV 6000 /* max size of attribute value */ char url_argument[MAX_DF]; /* Temporary buffer used during file name manipulations: */ char df_buffer[MAX_DF+1]; /*********** Manipulating URL's *************/ /* URL of document */ char *document_URL = 0, *server_URL = 0, /* server part of document_URL (without last '/'). */ *file_URL = 0; /* file part of document_URL (starting with '/'). */ char *roots[2] = { "http://home.wxs.nl/~kabuki/", "http://home.wxs.nl/~faase009/", }; bool norm_URL(origin, file) char *origin, *file; /* Normalizes the file name 'file' appearing in HTML file 'origin', with the following steps: 1. If 'file' is empty, use assign 'origin' to 'file'. Else if 'file' is not an URL and does not start with '/' then glue it together with directories in 'origin'. 2. If 'file' is not an URL glue it together with document URL. 3. If 'file' starts with document URL, remove it. 4. If not URL and not html, add index.html, when the file exists. 5. Do any link mappings. */ { /* assume that origin: ['/']( '/')* */ int i; char *s; DEBUG_PRINT(("norm_URL(%s, %s) %s %s\n", origin, file, server_URL, file_URL)); /* Step 1: */ /* if file is empty, use origin: */ if (file[0] == '\0') { if (strlen(origin) < MAX_DF) strcpy(file, origin); else return FALSE; } /* if file is not an URL and does not start with '/' then glue it together with directories in origin: */ else if (file[0] != '/' && !is_URL(file)) { int i = strlen(origin); char *s = file; DEBUG_PRINT(("glue %s with %s", origin, file)); /* remove ./ from the start of file name: */ if (s[0] == '.' && s[1] == '/') s += 2; /* remove file-name from origin: */ while (i > 0 && origin[i - 1] != '/') i--; DEBUG_PRINT((" : %s + %s\n", origin, file)); /* cancel last directory in origin with '../': */ while ( i > 1 && origin[i - 1] == '/' && s[0] == '.' && ( (s[1] == '.' && (s[2] == '/' || s[2] == '\0')) || s[1] == '/' || s[1] == '\0')) if (s[1] == '/') s += 2; else if (s[1] == '\0') s++; else { s += s[2] == '/' ? 3 : 2; do i--; while (i > 0 && origin[i - 1] != '/'); } if ( i == 1 && origin[0] == '/' && s[0] == '.' && s[1] == '.' /* && s[2] == '/'*/) return FALSE; else if (i == 0) strcpy(file, s); else if(i + strlen(s) < MAX_DF) { memcpy(df_buffer, origin, i); strcpy(df_buffer + i, s); strcpy(file, df_buffer); } else return FALSE; } DEBUG_PRINT(("After step 1: %s\n", file)); /* Step 2. */ if ( document_URL != 0 && file[0] == '.' && file[1] == '.' && file[2] == '/') { int i = strlen(file_URL) - 1; char *s = file; DEBUG_PRINT(("glue %s + %s\n", file_URL, file)); /* assume that file_URL is of the form: '/' ( '/')* */ while (i > 0 && s[0] == '.' && s[1] == '.' && s[2] == '/') { s += 3; i--; while (i > 0 && file_URL[i] != '/') i--; DEBUG_PRINT(("replace %s with %s\n", s, file_URL + i)); } i += strlen(server_URL); if (i + 1 + strlen(s) < MAX_DF) { memcpy(df_buffer, document_URL, i + 1); strcpy(df_buffer + i + 1, s); strcpy(file, df_buffer); } else return FALSE; } /* if file starts with '/' add server_URL: */ else if (document_URL != 0 && file[0] == '/') { if (strlen(file) + strlen(server_URL) < MAX_DF) { strcpy(df_buffer, server_URL); strcat(df_buffer, file); strcpy(file, df_buffer); } else return FALSE; } DEBUG_PRINT(("After step 2: %s\n", file)); /* Step 3: */ /* if URL starts with document URL, remove it: */ if ( document_URL != 0 && memeq(file, document_URL, strlen(document_URL))) strcpy(file, file + strlen(document_URL)); for (i = 0; i < 2; i++) if (!strncmp(file, roots[i], strlen(roots[i]))) strcpy(file, file + strlen(roots[i])); DEBUG_PRINT(("After step 3: %s\n", file)); /* Step 4: */ if (streq(file, ".")) file[0] = '\0'; /* if not URL and not .html, add index.html, when file exists */ if (!is_URL(file) && !is_html(file) && strlen(file) + 12 < MAX_DF) { strcpy(df_buffer, file); if (df_buffer[0] != '\0' && df_buffer[strlen(df_buffer)-1] != '/') strcat(df_buffer, "/"); strcat(df_buffer, "index.html"); DEBUG_PRINT(("Try: %s\n", df_buffer)); { file_p df_file = find_file_if_exists(df_buffer); if (df_file != 0 && df_file->exists) strcpy(file, df_buffer); } } url_argument[0] = '\0'; s = strstr(file, ".html?"); if (!is_URL(file) && s != 0) { strcpy(url_argument, s+6); s[5] = '\0'; } DEBUG_PRINT(("After step 4: %s\n", file)); if (streq(file, "Broken.html")) nr_broken_ext_links++; else if (is_URL(file)) { if (strcmp(file, "brexrefs.html")) { nr_ext_links++; add_site(file); } } else nr_int_links++; return TRUE; } char *rel_URL(char *from, char *to) /* This function returns the shortest string to go file "to" inside file "from" */ { char *to2 = to; bool go; int d; DEBUG_P2("rel_URL(%s, %s)\n", from, to); go = TRUE; while (go) { char *f = from, *t = to; for (; *f != '\0' && *f != '/' && *f == *t; f++, t++); if (*f == '/' && *t == '/') { from = f + 1; to = t + 1; } else go = FALSE; } d = 0; for (; *from != '\0'; from++) if (*from == '/') d++; df_buffer[0] = '\0'; for (; d > 0; d--) if (strlen(df_buffer) + 3 >= MAX_DF) return to; else strcat(df_buffer, "../"); if (strlen(df_buffer) + strlen(to) >= MAX_DF) return to; else strcat(df_buffer, to); DEBUG_P1("relative: %s\n", df_buffer); if (file_URL != 0 && strlen(file_URL) + strlen(to2) < strlen(df_buffer)) { strcpy(df_buffer, file_URL); strcat(df_buffer, to2); } if (streq(df_buffer, "index.html")) return "."; { int l = strlen(df_buffer); if (l > 11 && streq(df_buffer + l - 11, "/index.html")) df_buffer[l - 11] = '\0'; } DEBUG_P1("return: %s\n", df_buffer); return df_buffer; } /******* Special characters ******/ char *ch_table[] = { /*160*/ "nbsp", "iexp", "cent", "pound", "curren", "yen", "brvar", "sect", "uml", "copy", /*170*/ "ordf", "laquo", "not", "shy", "reg", "macr", "deg", "plusmn", "sup2", "sup3", /*180*/ "acute", "micro", "para", "middot", "cedol", "sup1", "ordm", "raquo", "frac14", "frac12", /*190*/ "frac34", "iquest", "Agrave", "Aacute", "Acirc", "Atilde", "Auml", "Aring", "AElig", "Ccedil", /*200*/ "Egrave", "Eacute", "Ecirc", "Euml", "Igrave", "Iacute", "Icirc", "Iuml", "ETH", "Ntilde", /*210*/ "Ograve", "Oacute", "Ocirc", "Otilde", "Ouml", "times", "Oslash", "Ugrave", "Uacute", "Ucirc", /*220*/ "Uuml", "Yacute", "THORN", "szlig", "agrave", "aacute", "acirc", "atilde", "auml", "aring", /*230*/ "aelig", "ccedil", "egrave", "eacute", "ecirc", "euml", "igrave", "iacute", "icirc", "iuml", /*240*/ "eth", "ntilde", "ograve", "oacute", "ocirc", "otilde", "ouml", 0, "oslash", "ugrave", /*250*/ "uacute", "ucirc", "uuml", "yacute", "thorn", "yuml", "aring", "Eth", "icirc", "Thorn", "Yuml", "nbsp", "emsp", "ensp", "shy", "pd", "mdash", "ndash", "copy", "reg", "trade", "alpha", "beta", "gamma", "delta", "epsi", "zeta", "eta", "theta", "thetav", "iota", "kappa", "lambda", "mu", "nu", "xi", "omicron", "pi", "rho", "sigma", "tau", "upsi", "phi", "chi", "psi", "omega", "Alpha", "Beta", "Gamma", "Delta", "Epsi", "Zeta", "Eta", "Theta", "Iota", "Kappa", "Lambda", "Mu", "Nu", "Xi", "Pi", "Rho", "Sigma", "Tau", "Upsi", "Phi", "Chi", "Psi", "Omega", "amp", "gt", "lt", "quot", "euro", "bull" }; #define NR_CH_TABLE (sizeof(ch_table)/sizeof(char *)) /************* Scanning a HTML file ******************/ #define T_ILL 0 #define T_HTML 1 #define T_HEAD 2 #define T_TITLE 3 #define T_BODY 4 #define T_ADDR 5 #define T_LINK 6 #define T_STYLE 7 #define T_H 10 #define T_VERB 11 #define T_DIR 12 #define T_LIST 13 #define T_DESC 14 #define T_ITEM 15 #define T_DT 16 #define T_DD 17 #define T_P 18 #define T_A 19 #define T_IMG 20 #define T_CHAR 21 #define T_BR 22 #define T_META 23 #define T_SCRIPT 24 #define T_DIV 25 #define T_CANVAS 26 #define NR_TAGS 88 #define TN_H1 5 #define C_NO 0 #define C_YES 1 #define C_OPT 2 struct Codes { char *name; int closing; byte kind; } tags[NR_TAGS] = { #define H_HTML 0 { "html", C_YES, T_HTML }, #define H_HEAD 1 { "head", C_YES, T_HEAD }, #define H_TITLE 2 { "title", C_YES, T_TITLE }, #define H_BODY 3 { "body", C_YES, T_BODY }, #define H_ADDRESS 4 { "address", C_YES, T_ADDR }, #define H_H1 5 { "h1", C_YES, T_H }, #define H_H2 6 { "h2", C_YES, T_H }, #define H_H3 7 { "h3", C_YES, T_H }, #define H_H4 8 { "h4", C_YES, T_H }, #define H_H5 9 { "h5", C_YES, T_H }, #define H_H6 10 { "h6", C_YES, T_H }, #define H_P 11 { "p", C_OPT, T_P }, #define H_UL 12 { "ul", C_YES, T_LIST }, #define H_MENU 13 { "menu", C_YES, T_LIST }, #define H_DIR 14 { "dir", C_YES, T_LIST }, #define H_OL 15 { "ol", C_YES, T_LIST }, #define H_LI 16 { "li", C_OPT, T_ITEM }, #define H_LH 17 { "lh", C_OPT, T_ITEM }, #define H_DL 18 { "dl", C_YES, T_DESC }, #define H_DT 19 { "dt", C_OPT, T_DT }, #define H_DD 20 { "dd", C_OPT, T_DD }, #define H_A 21 { "a", C_OPT, T_A }, #define H_Q 22 { "q", C_YES, T_CHAR }, #define H_I 23 { "i", C_YES, T_CHAR }, #define H_EM 24 { "em", C_YES, T_CHAR }, #define H_B 25 { "b", C_YES, T_CHAR }, #define H_STRONG 26 { "strong", C_YES, T_CHAR }, #define H_TT 27 { "tt", C_YES, T_CHAR }, #define H_SAMP 28 { "samp", C_YES, T_CHAR }, #define H_KDB 29 { "kbd", C_YES, T_CHAR }, #define H_VAR 30 { "var", C_YES, T_CHAR }, #define H_DFN 31 { "dfn", C_YES, T_CHAR }, #define H_CODE 32 { "code", C_YES, T_CHAR }, #define H_BLINK 33 { "blink", C_YES, T_CHAR }, #define H_CITE 34 { "cite", C_YES, T_CHAR }, #define H_BLOCKQUOTE 35 { "blockquote", C_YES, T_CHAR }, #define H_BQ 36 { "bq", C_YES, T_CHAR }, #define H_U 37 { "u", C_YES, T_CHAR }, #define H_S 38 { "s", C_YES, T_CHAR }, #define H_SMALL 39 { "small", C_YES, T_CHAR }, #define H_BIG 40 { "big", C_YES, T_CHAR }, #define H_NOTE 41 { "note", C_YES, T_CHAR }, #define H_AU 42 { "au", C_YES, T_CHAR }, #define H_PERSON 43 { "person", C_YES, T_CHAR }, #define H_ACRONYM 44 { "acronym", C_YES, T_CHAR }, #define H_ABBREV 45 { "abbrev", C_YES, T_CHAR }, #define H_CREDIT 46 { "credit", C_YES, T_CHAR }, #define H_INS 47 { "ins", C_YES, T_CHAR }, #define H_DEL 48 { "del", C_YES, T_CHAR }, #define H_PRE 49 { "pre", C_YES, T_VERB }, #define H_XMP 50 { "xmp", C_YES, T_VERB }, #define H_LISTING 51 { "listing", C_YES, T_VERB }, #define H_BR 52 { "br", C_NO, T_DIR }, #define H_HR 53 { "hr", C_NO, T_DIR }, #define H_IMG 54 { "img", C_NO, T_IMG }, #define H_ISINDEX 55 { "isindex", C_NO, T_DIR }, #define H_SELECT 56 { "select", C_YES, T_DIR }, #define H_LINK 57 { "link", C_NO, T_LINK }, #define H_CENTER 58 { "center", C_YES, T_CHAR }, #define H_META 59 { "meta", C_NO, T_META }, #define H_TABLE 60 { "table", C_YES, T_DIR }, #define H_TH 61 { "th", C_OPT, T_DIR }, #define H_TR 62 { "tr", C_OPT, T_DIR }, #define H_TD 63 { "td", C_OPT, T_DIR }, #define H_SUP 64 { "sup", C_YES, T_DIR }, #define H_SUB 65 { "sub", C_YES, T_DIR }, #define H_CAPTION 66 { "caption", C_OPT, T_CHAR }, #define H_SCRIPT 67 { "script", C_OPT, T_SCRIPT }, #define H_FONT 68 { "font", C_OPT, T_DIR }, #define H_FORM 69 { "form", C_OPT, T_CHAR }, #define H_INPUT 70 { "input", C_NO, T_CHAR }, #define H_TEXTAREA 71 { "textarea",C_OPT, T_CHAR }, #define H_NOBR 72 { "nobr", C_OPT, T_DIR }, #define H_NOSCRIPT 73 { "noscript", C_YES, T_DIR }, #define H_MARQUEE 74 { "marquee", C_YES, T_DIR }, #define H_STYLE 75 { "style", C_YES, T_STYLE }, #define H_OBJECT 76 { "object", C_YES, T_CHAR }, #define H_PARAM 77 { "param", C_NO, T_CHAR }, #define H_EMBED 78 { "embed", C_YES, T_CHAR }, #define H_IFRAME 79 { "iframe", C_YES, T_CHAR }, #define H_DIV 80 { "div", C_YES, T_DIV }, #define H_CANVAS 81 { "canvas", C_YES, T_CANVAS }, #define H_OPTION 82 { "option", C_YES, T_DIR }, #define H_SPAN 83 { "span", C_YES, T_DIR }, #define H_SVG 84 { "svg", C_YES, T_CHAR }, #define H_PATH 85 { "path", C_YES, T_CHAR }, #define H_RECT 86 { "rect", C_YES, T_CHAR }, #define H_BUTTON 87 { "button", C_YES, T_CHAR }, }; /* Generation state values */ bool in_html, in_head, in_title, in_body, in_header, in_address, in_script; bool active_href; char dest_file[MAX_DF + 1], href_name[MAX_N + 1], href_title[MAX_HT + 1]; bool href_oneway; int h_level; #define MAX_STACK 30 struct { int ln; byte tagnr; byte tagkind; bool closing; } stack[MAX_STACK]; int stack_depth = 0; bool scan_def_string(char *def, int *rp, int *rl, char *str) { int p = *rp, l = *rl; l = 0; p++; for (;;) { if (def[p] == '\0') { *rp = p; *rl = l; return FALSE; } else if (def[p] == '"') if (def[p+1] == '"') { if (str != 0) str[l] = '"'; l++; p++; } else { if (str != 0) str[l] = '\0'; p++; *rp = p; *rl = l; return TRUE; } else if (strneq(def + p, "&", 5)) { if (str != 0) str[l] = '&'; l++; p += 5; } else if (strneq(def + p, "<", 4)) { if (str != 0) str[l] = '<'; l++; p += 4; } else if (strneq(def + p, ">", 4)) { if (str != 0) str[l] = '>'; l++; p += 4; } else if (strneq(def + p, "‐", 6)) { if (str != 0) str[l] = '-'; l++; p += 6; } else if (strneq(def + p, "\\nl", 3)) { if (str != 0) str[l] = '\n'; l++; p += 3; } else { if (str != 0) str[l] = def[p]; l++; p++; } } } void latex_open(int tagnr, int ln, char *html_fn, bool closing) { int tagkind = tags[tagnr].kind; DEBUG_P3("latex_open(,tagnr=%d, %s, ln=%d )\n", tagnr, tags[tagnr].name, ln); if (tags[tagnr].closing == C_NO) return; if (stack_depth < MAX_STACK) { DEBUG_P3("push(%d,%d,) : %d\n", ln, tagnr, stack_depth); stack[stack_depth].ln = ln; stack[stack_depth].tagnr = tagnr; stack[stack_depth].tagkind = tagkind; stack[stack_depth].closing = closing; stack_depth++; } else ERROR2(html_fn, ln, "remove <%s>, more than %d nested tags.", tags[tagnr].name, MAX_STACK) DEBUG_P2("latex_open(,tagnr=%d, ln=%d)\n", tagnr, ln); switch (tagnr) { case H_HTML : in_html = TRUE; break; case H_HEAD : in_head = TRUE; break; case H_BODY : in_body = TRUE; break; case H_TITLE : in_title = TRUE; break; case H_ADDRESS : in_address = TRUE; break; } switch (tagkind) { case T_H : in_header = TRUE; break; } } void latex_close(int ln, char *html_fn) { if (stack_depth <= 0) return; stack_depth--; /*DEBUG_P1("latex_close: %s\n", tags[tagnr].name);*/ switch (stack[stack_depth].tagnr) { case H_HTML : in_html = FALSE; break; case H_HEAD : in_head = FALSE; break; case H_BODY : in_body = FALSE; break; case H_TITLE : in_title = FALSE; break; case H_ADDRESS : in_address = FALSE; break; } switch (stack[stack_depth].tagkind) { case T_H : in_header = FALSE; case T_A : active_href = FALSE; break; } } char tmp_tagname[30]; char *tagname(int tagnr, bool closing_tag) { if (closing_tag) { tmp_tagname[0] = '/'; strcpy(tmp_tagname + 1, tags[tagnr].name); return tmp_tagname; } else return tags[tagnr].name; } #define REMOVE_TAG(R) remove_tag(tagnr, closing_tag, R, html_fn, ln) void remove_tag(int tagnr, bool closing_tag, char *r, char *html_fn, int ln) { ERROR2(html_fn, ln, "remove <%s>%s.", tagname(tagnr, closing_tag), r) } #define ADD_TAG(T,C,R) add_html_tag(T, C, R, html_fn, ln) void add_html_tag(int tagnr, bool closing_tag, char *r, char *html_fn, int ln) { if (closing_tag) ERROR4(html_fn, stack[stack_depth-1].ln, "add <%s> with <%s> %s in line %d", tagname(tagnr, closing_tag), tags[tagnr].name, r ? r : "", ln) else ERROR2(html_fn, ln, "add <%s>%s", tagname(tagnr, closing_tag), r ? r : "") } #define REPLACE_TAG() replace_tag(tagnr, closing_tag, html_fn, ln) void replace_tag(int tagnr, bool closing_tag, char *html_fn, int ln) { ERROR2(html_fn, ln, "replace <%s> by .", tagname(tagnr, closing_tag), tags[stack[stack_depth-1].tagnr].name) } void latex_closes(int tagnr, int tagkind, bool closing_tag, bool till, int ln, char *html_fn) { int j; bool found = FALSE; for (j = stack_depth - 1; j >= 0; j--) if ( tagkind != T_ILL ? stack[j].tagkind == tagkind : stack[j].tagnr == tagnr) { found = TRUE; break; } if (found && tags[tagnr].kind == T_CHAR) found = j > stack_depth - 5; if (found) { while (stack_depth > j + 1) { if (stack[stack_depth-1].closing == C_YES) ERROR4(html_fn, stack[stack_depth-1].ln, "add with <%s> before <%s> in line %d", tags[stack[stack_depth-1].tagnr].name, tags[stack[stack_depth-1].tagnr].name, tags[tagnr].name, ln); latex_close(ln, html_fn); } } if (till) return; if (found) { if (stack[stack_depth-1].tagnr != tagnr) REPLACE_TAG(); latex_close(ln, html_fn); } else if (stack_depth == 0) REMOVE_TAG(", no open tag"); else { char mess[60]; sprintf(mess, ", does not match <%s>", tags[stack[stack_depth-1].tagnr].name); REMOVE_TAG(mess); } } bool inside_tag(int tagkind) { int j; for (j = stack_depth - 1; j >= 0; j--) if (stack[j].tagkind == tagkind) return TRUE; else if ( stack[j].tagkind == T_LIST || stack[j].tagkind == T_DESC) return FALSE; return FALSE; } #define OPEN_TAG(K) (tagkind == K && !closing_tag) #define CLOSE_TAG(K) (tagkind == K && closing_tag) #define LATEX_OPEN(T) \ { latex_open(T, ln, html_fn, tags[T].closing); } #define LATEX_OPEN_C(T,C) \ { latex_open(T, ln, html_fn, C); } #define LATEX_CLOSES(T) \ { latex_closes(T, T_ILL, TRUE, FALSE, ln, html_fn); } #define LATEX_CLOSES_K() \ { latex_closes(tagnr, tagkind, closing_tag, FALSE, ln, \ html_fn); } #define LATEX_CLOSES_T(K) \ { latex_closes(tagnr, K, FALSE, TRUE, ln, html_fn); } void add_href(href_p *r_href, char *dest_file, char *href_name, char *title, bool oneway, long line, bool in_header); bool ignore_for_tag; void skip_spaces(char **r_s, char *html_fn, char *r_ch, int *r_ln, file_p in_file) { char ch = *r_ch; int ln = *r_ln; /* skip spaces */ while (ch != '\0' && (ch == ' ' || ch == '\n')) if ((ch = *(*r_s)++) == '\n') ln++; /* process comments */ while (ch != '\0' && ch == '-') { char prev_ch = '\0'; char comment[200000]; int i = 0; if ((ch = *(*r_s)++) == '\n') ln++; if (ch != '-') { ERROR(html_fn, ln, "ill comment start") break; } if ((ch = *(*r_s)++) == '\n') ln++; while (ch != '\0') { if (ch == '-' && prev_ch == '-') { i--; if ((ch = *(*r_s)++) == '\n') ln++; break; } if (in_script && ch == '"') { if (i < 199999) comment[i++] = ch; if ((ch = *(*r_s)++) == '\n') ln++; while (ch != '\0' && ch != '"') { if (ch == '\\') { if (i < 199999) comment[i++] = ch; if ((ch = *(*r_s)++) == '\n') ln++; } if (i < 199999) comment[i++] = ch; if ((ch = *(*r_s)++) == '\n') ln++; } } if (ch != '\0') { if (i < 199999) comment[i++] = ch; prev_ch = ch; if ((ch = *(*r_s)++) == '\n') ln++; } } comment[i] = '\0'; if (i > 0 && comment[i-1] == '-') comment[i-1] = '\0'; if (in_script) { /* process script */ char *s = comment; while (isspace(*s) || *s == '\n') s++; if (strncmp(s, "tags()", 6) == 0) { s += 6; //fprintf(stderr, "Found some tags\n"); for(;;) { char tagname[21]; char args[4][51]; int j; while (isspace(*s) || *s == '\n') s++; if (*s == '\0' || *s == '/') break; for (i = 0; *s != '(' && *s != '\0'; s++) if (i < 20) tagname[i++] = *s; tagname[i] = '\0'; if (*s != '(') { ERROR(html_fn, ln, "Incomplete tag"); break; } s++; for (j = 0; j < 4; j++) { int i = 0; if (*s == '"') { s++; for (;*s != '\0' && *s != '"'; s++) if (i < 50) args[j][i++] = *s; if (*s != '"') { ERROR(html_fn, ln, "Incomplete tag: closing quote missing"); break; } s++; while (isspace(*s) || *s == ',') s++; } args[j][i] = '\0'; } while (isspace(*s) || *s == ')' || *s == ';') s++; if (streq(tagname, "tag")) { if (args[1][0] == '\0') { ERROR(html_fn, ln, "Second argument is empty"); } else add_tag_from_script(cur_section, args[1], args[2], args[3], ln); } else add_tag_from_script(cur_section, tagname, args[0], args[1], ln); } } else { char *s = comment; while (*s != '\0') { /* process one line */ for (; *s != '\0' && *s != '\n'; s++) if (s[0] == '/' && s[1] == '/') { s += 2; if (!strncmp(s, "REFBY:", 6)) { int i; for (i = 0, s += 6; *s != '\0' && *s != '\n' && *s != '#'; i++, s++) dest_file[i] = *s; dest_file[i] = '\0'; if (*s == '#') s++; for (i = 0; *s != '\n'; i++, s++) href_name[i] = *s; href_name[i] = '\0'; href_title[0] = '\0'; href_oneway = FALSE; if (cur_section) add_href(&cur_section->hrefs, dest_file, href_name, href_title, FALSE, ln, FALSE); } else if (!strncmp(s, "LABEL:", 6)) { int i; for (i = 0, s += 6; *s != '\0' && *s != '\n'; i++, s++) dest_file[i] = *s; dest_file[i] = '\0'; add_name(&cur_names, dest_file, ln, FALSE); if (in_file) { add_section(in_file, ln, 10); cur_section->title = ""; cur_section->names = cur_names; cur_names = 0; } } break; } for (; *s != '\0' && *s != '\n'; s++) ; if (*s == '\n') s++; } } for (s = comment; *s != '\0'; s++) { int ch_val = (*s + 256)%256; if (ch_val >= 160 && ch_val <= 255 && ch_table[ch_val-160] != 0) { ERROR2(html_fn, ln, "Replace character %d by '&%s;'.", ch_val, ch_table[ch_val-160]) } else if (ch_val >= 128) { ERROR1(html_fn, ln, "Illegal character %d.", ch_val) } } } else { if (streq(comment, "ONEWAY") && cur_section) cur_section->oneway = TRUE; else if (streq(comment, "1")) href_oneway = TRUE; else if (streq(comment, "IGNORETAGS") && cur_section) cur_section->ignoretags = TRUE; else if (!strncmp(comment, "REFBY:", 6)) { int i; char *s; for (i = 0, s = comment+6; *s != '\0' && *s != '#'; i++, s++) dest_file[i] = *s; dest_file[i] = '\0'; if (*s == '#') s++; for (i = 0; *s != '\0'; i++, s++) href_name[i] = *s; href_name[i] = '\0'; href_title[0] = '\0'; href_oneway = FALSE; if (cur_section) add_href(&cur_section->hrefs, dest_file, href_name, href_title, FALSE, ln, FALSE); } else if (!strncmp(comment, "LABEL:", 6)) { add_name(&cur_names, comment+6, ln, FALSE); if (in_file) { add_section(in_file, ln, 10); cur_section->title = ""; cur_section->names = cur_names; cur_names = 0; } } else if (!strncmp(comment, "TAGINDEX:", 9)) { char tag_name[101]; int i = 0; char *s = comment+9; for (; *s != '\0'; s++) if (*s == ':') break; else if (i < 100) tag_name[i++] = *s; tag_name[i] = '\0'; tag_type_p tag_type = find_tag_type(tag_name); cur_section->nr_tag_types++; cur_section->tag_type = tag_type; tag_indexes[nr_tag_indexes] = tag_type; if (nr_tag_indexes > 0) tag_type->parent = tag_indexes[nr_tag_indexes - 1]; if (*s == ':') { s++; STRCPY(tag_indexes[nr_tag_indexes]->title, s); } nr_tag_indexes++; } else if (streq(comment, "TAGINDEX-OFF")) { if (nr_tag_indexes > 0) nr_tag_indexes--; } else if (streq(comment, "NOTAG")) { href_is_tag = FALSE; } else if (streq(comment, "IGNOREFORTAG")) { ignore_for_tag = TRUE; } else if (!strncmp(comment, "LEVEL=", 6)) { h_level = atoi(comment+6); } } /* skip spaces */ while (ch != '\0' && (ch == ' ' || ch == '\n')) if ((ch = *(*r_s)++) == '\n') ln++; } *r_ch = ch; *r_ln = ln; } bool check_src(char *html_fn, int ln, char *src) { if (!norm_URL(html_fn, src)) { ERROR1(html_fn, ln, "URL '%s' illegal or too long.", src) return FALSE; } if (!is_URL(src)) { file_p src_file = find_file(src) ; src_file->read |= R_DOREAD; if (!src_file->exists) { if (!(src_file->read & R_SITEMAP)) ERROR1(html_fn, ln, "file '%s' does not exist.", src) return FALSE; } else if (strcmp(src_file->name, src)) { ERROR2(html_fn, ln, "change '%s' into '%s'.", src, src_file->name) return FALSE; } else src_file->read |= R_INCLUDED; } return TRUE; } #define AT_ANY 0 #define AT_ALIGN 1 #define AT_COLOR 2 #define AT_NUM 3 #define AT_IMG_ALIGN 4 #define AT_FILE 5 typedef struct { int tagnr; char *attr_name; int type; } valid_comb_t; valid_comb_t valid_comb[] = { { H_BODY, "alink", AT_COLOR }, { H_BODY, "onload", AT_ANY }, { H_BODY, "bgcolor", AT_COLOR }, { H_BODY, "bgproperties", AT_ANY }, { H_BODY, "background", AT_FILE }, { H_P, "title", AT_ANY }, { H_P, "align", AT_ALIGN }, { H_TABLE, "width", AT_NUM }, { H_TABLE, "border", AT_NUM }, { H_TABLE, "cellpadding", AT_NUM }, { H_TABLE, "cellspacing", AT_NUM }, { H_TABLE, "align", AT_ALIGN }, { H_TABLE, "vspace", AT_NUM }, { H_TABLE, "hspace", AT_NUM }, { H_TABLE, "bgcolor", AT_COLOR }, { H_TABLE, "background", AT_FILE }, { H_TR, "valign", AT_ANY }, { H_TR, "align", AT_ALIGN }, { H_IMG, "border", AT_NUM }, { H_IMG, "width", AT_NUM }, { H_IMG, "height", AT_NUM }, { H_IMG, "align", AT_IMG_ALIGN }, { H_IMG, "space", AT_NUM }, { H_IMG, "hspace", AT_NUM }, { H_IMG, "vspace", AT_NUM }, { H_IMG, "alt", AT_ANY }, { H_IMG, "nosave", AT_ANY }, { H_IMG, "src", AT_FILE }, { H_FONT, "color", AT_COLOR }, { H_FONT, "size", AT_NUM }, { H_FONT, "face", AT_ANY }, { H_BR, "clear", AT_ANY }, { H_HR, "width", AT_NUM }, { H_HR, "align", AT_ALIGN }, { H_H1, "align", AT_ALIGN }, { H_H2, "align", AT_ALIGN }, { H_H3, "align", AT_ALIGN }, { H_H4, "align", AT_ALIGN }, { H_OL, "type", AT_NUM }, { H_UL, "type", AT_NUM }, { H_FORM, "method", AT_ANY }, { H_FORM, "action", AT_ANY }, { H_FORM, "name", AT_ANY }, { H_FORM, "target", AT_ANY }, { H_INPUT, "type", AT_ANY }, { H_INPUT, "name", AT_ANY }, { H_INPUT, "size", AT_NUM }, { H_INPUT, "maxlength", AT_NUM }, { H_INPUT, "value", AT_ANY }, { H_INPUT, "onclick", AT_ANY }, { H_INPUT, "checked", AT_ANY }, { H_TEXTAREA, "name", AT_ANY }, { H_TEXTAREA, "cols", AT_NUM }, { H_TEXTAREA, "rows", AT_NUM }, { H_TD, "align", AT_ALIGN }, { H_TD, "valign", AT_ANY }, { H_TD, "colspan", AT_ANY }, { H_TD, "rowspan", AT_ANY }, { H_TD, "width", AT_NUM }, { H_TD, "bgcolor", AT_COLOR }, { H_TD, "style", AT_ANY }, { H_TD, "title", AT_ANY }, { H_TH, "align", AT_ALIGN }, { H_TH, "valign", AT_ANY }, { H_TH, "colspan", AT_ANY }, { H_TH, "rowspan", AT_ANY }, { H_TH, "width", AT_NUM }, { H_TH, "bgcolor", AT_COLOR }, { H_TH, "style", AT_ANY }, { H_TH, "title", AT_ANY }, { H_A, "target", AT_ANY }, { H_A, "onmouseover", AT_ANY }, { H_A, "onclick", AT_ANY }, { H_A, "style", AT_ANY }, { H_SCRIPT, "language", AT_ANY }, { H_SCRIPT, "src", AT_FILE }, { H_MARQUEE, "direction", AT_ANY }, { H_MARQUEE, "height", AT_ANY }, { H_MARQUEE, "scrollamount", AT_ANY }, { H_CANVAS, "width", AT_NUM }, { H_CANVAS, "height", AT_NUM }, { H_SPAN, "style", AT_ANY }, { H_SPAN, "title", AT_ANY }, { H_BUTTON, "id", AT_ANY }, { H_BUTTON, "onclick", AT_ANY }, }; bool valid_attr(char *html_fn, int ln, char *html_com, int tagnr, char *attr_name, char *attr_value) { size_t i; for (i = 0; i < sizeof(valid_comb)/sizeof(valid_comb[0]); i++) if ( valid_comb[i].tagnr == tagnr && streq(valid_comb[i].attr_name, attr_name)) { bool correct = TRUE; switch (valid_comb[i].type) { case AT_ALIGN: correct = strieq(attr_value, "RIGHT") || strieq(attr_value, "LEFT") || strieq(attr_value, "CENTER"); break; case AT_IMG_ALIGN: correct = strieq(attr_value, "RIGHT") || strieq(attr_value, "LEFT") || strieq(attr_value, "CENTER") || strieq(attr_value, "TOP"); break; case AT_FILE: return check_src(html_fn, ln, attr_value); default: return TRUE; } if (!correct) ERROR3(html_fn, ln, "incorrect <%s .. %s=\"%s\">", html_com, attr_name, attr_value) return correct; } return FALSE; } void print_sections(section_p section) { int depth = 0; for ( ; section; next_section(§ion, &depth)) { printf("%*.*sSection(%d): '%s' #%ld", depth, depth, "", section->level, section->title, (long)section); { name_p names = section->names; if (names) { printf (" with name:"); for ( ; names; names = names->next) printf(" %s", names->name); } } printf("\n"); { href_p hrefs; for (hrefs = section->hrefs; hrefs; hrefs = hrefs->next) printf("%*.*s to: %s %s %c\n", depth, depth, "", hrefs->file->name, hrefs->name, hrefs->type); } } } char *trim(char *s) { int l; char *r; while (*s == ' ') s++; l = strlen(s); while (l > 0 && s[l-1] == ' ') l--; s[l] = '\0'; STRCPY(r,s); return r; } FILE *f_ext_dest = 0; FILE *f_broken_ext_dest = 0; char *get_contents(file_p in_file) { FILE *fin = 0; if (in_file->contents != 0) return in_file->contents; if (in_file->exists) { fin = fopen(in_file->name, "rt"); if (fin == 0) in_file->exists = FALSE; } if (!in_file->exists) { ERROR(in_file->name, 0, "does not exist") return 0; } if (fin != 0); { int fh = fileno(fin); long file_len; file_len = lseek(fh, 0L, SEEK_END); lseek(fh, 0L, SEEK_SET); in_file->contents = (char*)malloc(file_len+2); file_len = read(fh, in_file->contents, file_len); in_file->contents[file_len] = '\0'; #ifndef WIN32 // have to remove \r characters { char *s, *r; r = in_file->contents; for (s = in_file->contents; *s != '\0'; s++) if (*s != '\r') *r++ = *s; *r = '\0'; } #endif fclose(fin); } return in_file->contents; } //#define EXPORT_C_SOURCES #ifdef EXPORT_C_SOURCES FILE *f_c_sources; #endif void scan_a_file(char *html_fn, int depth, bool included) { file_p in_file = find_file(html_fn); char html_com[MAX_HC + 1], attr_name[MAX_HC + 1], attr_val[MAX_AV + 1], name[MAX_N + 1], alt[MAX_DF + 1]; char ch; int ln = 1; char *s = 0; char section_title[1000]; word i_st; cur_section = 0; cur_names = 0; nr_tag_indexes = 0; add_section(in_file, 0, 0); cur_section->title = ""; DEBUG_P1("Scan %s\n", html_fn); in_html = FALSE; in_head = FALSE; in_title = FALSE; in_body = FALSE; in_address = FALSE; in_header = FALSE; in_script = FALSE; active_href = FALSE; s = get_contents(in_file); if (s == 0) return; in_file->read |= R_READ; if (included) in_file->read |= R_INCLUDED; if ((ch = *s++) == '\n') ln++; while(ch != '\0') { if (ch == '<') { bool is_comment = FALSE, a_name = FALSE, a_href = FALSE; byte tagnr, tagkind = T_ILL; bool closing_tag = FALSE; alt[0] = '\0'; h_level = -1; href_title[0] = '\0'; ignore_for_tag = FALSE; /* SCAN <...> */ /* skip < and following spaces: */ if ((ch = *s++) == '\n') ln++; while (ch != '\0' && (ch == ' ' || ch == '\n')) if ((ch = *s++) == '\n') ln++; html_com[0] = '\0'; if (ch != '!') { int i; bool too_long; /* scan first word in html_com: */ i = 0; too_long = FALSE; while ( ch != '\0' && ch != '>' && ch != ' ' && ch != '\n' && ch != '\t') { if (i < MAX_HC) html_com[i++] = tolower(ch); else too_long = TRUE; if ((ch = *s++) == '\n') ln++; } html_com[i] = '\0'; if (too_long) ERROR1(html_fn, ln, "HTML tag name too long --- '%s'", html_com) for (tagnr = 0; tagnr < NR_TAGS; tagnr++) if (streq(html_com, tags[tagnr].name)) { tagkind = tags[tagnr].kind; DEBUG_P2("found '%s' = %d\n", tags[tagnr].name, tagkind); break; } else if ( html_com[0] == '/' && tags[tagnr].closing != C_NO && streq(html_com + 1, tags[tagnr].name)) { tagkind = tags[tagnr].kind; closing_tag = TRUE; DEBUG_P2("found '/%s' = %d\n", tags[tagnr].name, tagkind); break; } while (ch != '\0' && ch != '>') { bool found_is = FALSE; skip_spaces(&s, html_fn, &ch, &ln, in_file); if (ch == '\0' || ch == '>') break; /* scan attribute in attr_name */ i = 0; too_long = FALSE; while (ch != '\0' && ch != '>' && ch != '=' && ch != ' ' && ch != '\n' && ch != '\t') { if (i < MAX_HC) attr_name[i++] = tolower(ch); else too_long = TRUE; if ((ch = *s++) == '\n') ln++; } attr_name[i] = '\0'; DEBUG_P1("found attribute: '%s'\n", attr_name); if (too_long) ERROR1(html_fn, ln, "HTML attribute '%s' too long.", attr_name) /* skip = and spaces */ while ( ch != '\0' && ( ch == ' ' || ch == '\n' || ch == '=' || ch == '\t')) { if (ch == '=') found_is = TRUE; if ((ch = *s++) == '\n') ln++; } /* scan string into name */ i = 0; if (found_is) { bool is_quoted = ch == '"'; too_long = FALSE; if (is_quoted) if ((ch = *s++) == '\n') ln++; while( ch != '\0' && ch != '>' && ch != '"' && (is_quoted || ch != ' ')) { if (i < MAX_AV) attr_val[i++] = ch; else too_long = TRUE; if ((ch = *s++) == '\n') ln++; } if (is_quoted != (ch == '\"')) ERROR(html_fn, ln, "incorrectly quoted string") if (ch == '\"') if ((ch = *s++) == '\n') ln++; if (too_long) ERROR1(html_fn, ln, "attr value too long name '%s'", attr_val) } attr_val[i] = '\0'; DEBUG_P1("found attribute value: '%s'\n", attr_val); if (OPEN_TAG(T_A) && streq(attr_name, "name")) { if (attr_val[0] == '\0') ERROR(html_fn, ln, "found ") else if (strlen(attr_val) > MAX_N) ERROR1(html_fn, ln, "name too long '%s'", attr_val) else { strcpy(name, attr_val); a_name = TRUE; } } else if (OPEN_TAG(T_A) && streq(attr_name, "href")) { if (attr_val[0] == '\0') ERROR(html_fn, ln, "found ") else { char *av = attr_val; i = 0; too_long = FALSE; for (; *av != '\0' && *av != '#' && *av != '?'; av++) if (i < MAX_DF) dest_file[i++] = *av; else too_long = TRUE; if (*av == '?') for (; *av != '\0'; av++) { if (i < MAX_DF) dest_file[i++] = *av; else too_long = TRUE; } dest_file[i] = '\0'; DEBUG_P1("found dest file '%s'\n", dest_file); href_name[0] = '\0'; href_oneway = FALSE; href_is_tag = TRUE; if (too_long) ERROR1(html_fn, ln, "URL too long '%s'", dest_file) else if (*av == '\0') a_href = TRUE; else { i = 0; too_long = FALSE; av++; for (; *av != '\0'; av++) if (i < MAX_N) href_name[i++] = *av; else too_long = TRUE; if (i == 0) ERROR(html_fn, ln, "found empty string after #") href_name[i] = '\0'; DEBUG_P1("found href name '%s'\n", href_name); if (too_long) ERROR1(html_fn, ln, "name too long '%s'", href_name) else a_href = TRUE; } } } else if (OPEN_TAG(T_A) && streq(attr_name, "title")) { if (attr_val[0] == '\0') ERROR(html_fn, ln, "found ") else { strncpy(href_title, attr_val, MAX_HT); href_title[MAX_HT] = '\0'; } } else if ( OPEN_TAG(T_IMG) && streq(attr_name, "alt") && attr_val[0] != '\0') { if (strlen(attr_val) > MAX_DF) ERROR1(html_fn, ln, "alt too long '%s'", attr_val) else strcpy(alt, attr_val); } else if (!valid_attr(html_fn, ln, html_com, tagnr, attr_name, attr_val)) ; else if (option_warn && tagkind != T_ILL) ERROR3(html_fn, ln, "ignored <%s .. %s=\"%s\">", html_com, attr_name, attr_val) } } else /* ch == '!' */ { is_comment = TRUE; if ((ch = *s++) == '\n') ln++; skip_spaces(&s, html_fn, &ch, &ln, in_file); if (ch != '>') { if (option_warn) ERROR(html_fn, ln, "using non-standard comments.") while (ch != '\0' && ch != '>') { if ((ch = *s++) == '\n') ln++; } } } /* skip till > */ while (ch != '\0' && ch != '>') if ((ch = *s++) == '\n') ln++; /* PROCESS references */ if (a_name) add_name(&cur_names, name, ln, ignore_for_tag); if (a_href) { href_okay = TRUE; if (!norm_URL(html_fn, dest_file)) ERROR1(html_fn, ln, "URL '%s' illegal or too long", dest_file) else if (!is_URL(dest_file)) { /* add_ref(html_fn, ln, dest_file, href_name[0] == '\0' ? 0 : href_name); */ /* to add dest_file to list of files: */ file_p file; analyze_href(html_fn, ln, dest_file); file = find_file(dest_file); file->read |= R_DOREAD; if (streq(dest_file, "Broken.html") && strcmp(html_fn, "brexrefs.html")) { char *error_code = 0; char *brfile = url_argument; char *s = strstr(url_argument, "|"); if (s != 0 && !is_URL(url_argument)) { error_code = url_argument; *s = '\0'; brfile = s + 1; } if (f_broken_ext_dest != 0) { fprintf(f_broken_ext_dest, "
  • %s in file %s", brfile, brfile, html_fn, html_fn, ln); if (error_code != 0) fprintf(f_broken_ext_dest, " with error code: %s", error_code); fprintf(f_broken_ext_dest, ".\n"); } nr_broken_ext_links++; if (f_ext_dest != 0) fprintf(f_ext_dest, "%s in %s:%d is Broken
    \n", brfile, brfile, html_fn, ln); } else { if (is_html(dest_file)) { /* -- internal reference */ if (a_name && !in_header && !in_address) { /* Combination of name and href outside header: Introduce fake section */ add_section(in_file, ln, 10); cur_section->title = ""; cur_section->names = cur_names; cur_names = 0; } add_href( in_address ? &in_file->sections->hrefs : &cur_section->hrefs, dest_file, href_name, href_title, href_oneway, ln, in_header||in_address); } #ifdef EXPORT_C_SOURCES else { if (strstr(dest_file, "_c.txt") != 0 || strstr(dest_file, "_cpp.txt") != 0) { char filename[200]; char *s; section_p name_sect = cur_section; while (name_sect != 0 && name_sect->names == 0) name_sect = name_sect->parent; if (name_sect != 0 && name_sect->names != 0) fprintf(f_c_sources, "
  • %s: %s", html_fn, name_sect->names->name, cur_section->parent ? cur_section->parent->title : "xx", cur_section->title); else fprintf(f_c_sources, "
  • %s: %s", html_fn, cur_section->parent ? cur_section->parent->title : "xx", cur_section->title); strcpy(filename, dest_file); s = strstr(filename, "_c.txt"); if (s != 0) strcpy(s, ".c"); s = strstr(filename, "_cpp.txt"); if (s != 0) strcpy(s, ".cpp"); fprintf(f_c_sources, " %s\n", dest_file, filename, file->date_days); } } #endif } } else if (f_ext_dest != 0 && strcmp(html_fn, "brexrefs.html")) fprintf(f_ext_dest, "%s in %s:%d
    \n", dest_file, dest_file, html_fn, ln); } /* PROCESS <...> */ DEBUG_P4("found %s at %d: in_head %d: %d\n", tags[tagnr].name, ln, in_head, tagkind); if (is_comment) /* skip */; else if (tagkind == T_ILL) ERROR1(html_fn, ln, "unknown <%s>.", html_com) else if (OPEN_TAG(T_HTML)) if (in_html) REMOVE_TAG(", tag only inside "); else LATEX_OPEN(tagnr) else if (CLOSE_TAG(T_HTML)) if (!in_html) REMOVE_TAG(", not inside "); else LATEX_CLOSES(tagnr) else { if (!in_html) { if (option_pedantic) ADD_TAG(H_HTML, FALSE, ", tag requires "); LATEX_OPEN(H_HTML) } if (OPEN_TAG(T_HEAD)) if (in_head) REMOVE_TAG(", tag only outside "); else LATEX_OPEN(tagnr) else if (CLOSE_TAG(T_HEAD)) if (!in_head) REMOVE_TAG(", not inside "); else LATEX_CLOSES(tagnr) else if (OPEN_TAG(T_TITLE)) if (in_body) REMOVE_TAG(", tag not inside "); else if (in_title) REMOVE_TAG(", nested "); else LATEX_OPEN(tagnr) else if (CLOSE_TAG(T_TITLE)) if (in_body) REMOVE_TAG(", tag not inside <body>"); else if (!in_title) REMOVE_TAG(", not inside <title>"); else LATEX_CLOSES(tagnr) else if (OPEN_TAG(T_STYLE)) LATEX_OPEN(tagnr) else if (CLOSE_TAG(T_STYLE)) LATEX_CLOSES(tagnr) else if (OPEN_TAG(T_BODY)) { if (in_body) REMOVE_TAG(", nested <body>"); else if (in_head) REMOVE_TAG(", still inside <head>"); else LATEX_OPEN(tagnr) /* print file name here ??? */ /* \n\\par{\\footnotesize$(File:\\ )$}\\par\n */ } else if (CLOSE_TAG(T_BODY)) if (!in_body) REMOVE_TAG(", not inside <body>"); else if (in_head) REMOVE_TAG(", still inside <head>"); else LATEX_CLOSES(tagnr) else if (OPEN_TAG(T_LINK) || OPEN_TAG(T_META)) { /* ignore */ } else if (OPEN_TAG(T_SCRIPT)) { LATEX_OPEN(tagnr) in_script = TRUE; } else if (CLOSE_TAG(T_SCRIPT)) { in_script = FALSE; LATEX_CLOSES(tagnr) } else { if (in_head && !a_name) { char mess[60]; sprintf(mess, ", required by <%s>", tags[tagnr].name); ADD_TAG(H_HEAD, TRUE, mess); LATEX_CLOSES(H_HEAD) } if (!in_body && !a_name) { char mess[60]; sprintf(mess, ", required by <%s>", tags[tagnr].name); ADD_TAG(H_BODY, FALSE, mess); LATEX_OPEN(H_BODY) } if (OPEN_TAG(T_DIR)||OPEN_TAG(T_STYLE)||OPEN_TAG(T_DIV)||OPEN_TAG(T_CANVAS)) LATEX_OPEN(tagnr) else if (CLOSE_TAG(T_DIR)||CLOSE_TAG(T_STYLE)||CLOSE_TAG(T_DIV)||CLOSE_TAG(T_CANVAS)) LATEX_CLOSES(tagnr) else if (OPEN_TAG(T_ADDR)) if (in_address) REMOVE_TAG(", nested <address>"); else LATEX_OPEN(tagnr) else if (CLOSE_TAG(T_ADDR)) if (!in_address) REMOVE_TAG(", not inside <address>"); else LATEX_CLOSES(tagnr) else if (OPEN_TAG(T_H)) if (in_header) { REPLACE_TAG(); LATEX_CLOSES_K() /* lower section level */ section_title[i_st] = '\0'; cur_section->title = trim(section_title); cur_section->names = cur_names; cur_names = 0; } else { int level = 0; switch (tagnr) { case H_H1: level = 1; break; case H_H2: level = 2; break; case H_H3: level = 3; break; case H_H4: level = 4; break; case H_H5: level = 5; break; case H_H6: level = 6; break; } if (0 <= h_level && h_level < 10) level = h_level; add_section(in_file, ln, level); i_st = 0; LATEX_OPEN(tagnr) } else if (CLOSE_TAG(T_H)) if (!in_header) REMOVE_TAG(", not inside <h?>"); else { LATEX_CLOSES_K() section_title[i_st] = '\0'; cur_section->title = trim(section_title); cur_section->names = cur_names; cur_names = 0; } else if (OPEN_TAG(T_LIST)) LATEX_OPEN(tagnr) else if (CLOSE_TAG(T_LIST)) LATEX_CLOSES_K() else if (OPEN_TAG(T_ITEM)) if (!inside_tag(T_LIST)) REMOVE_TAG(", not inside listing tag"); else { LATEX_CLOSES_T(T_LIST) LATEX_OPEN(tagnr) } else if (CLOSE_TAG(T_ITEM)) LATEX_CLOSES_K() else if (OPEN_TAG(T_DESC)) LATEX_OPEN(tagnr) else if (CLOSE_TAG(T_DESC)) LATEX_CLOSES(tagnr) else if (OPEN_TAG(T_DT)) if (!inside_tag(T_DESC)) REMOVE_TAG(", not inside <dl>"); else { LATEX_CLOSES_T(T_DESC) LATEX_OPEN(tagnr) } else if (CLOSE_TAG(T_DT)) LATEX_CLOSES(tagnr) else if (OPEN_TAG(T_DD)) if (!inside_tag(T_DESC)) REMOVE_TAG(", not inside <dl>"); else { LATEX_CLOSES_T(T_DESC) LATEX_OPEN(tagnr) } else if (CLOSE_TAG(T_DD)) LATEX_CLOSES(tagnr) else if (OPEN_TAG(T_A)) { int j; for (j = stack_depth - 1; j >= 0 && stack[j].closing == C_OPT; j--) if (stack[j].tagkind == T_A) { LATEX_CLOSES(H_A); break; } if (a_href && href_okay) { if (active_href) ERROR(html_fn, ln, "nested href") active_href = TRUE; } LATEX_OPEN_C(tagnr, a_href ? C_YES : C_OPT); } else if (CLOSE_TAG(T_A)) { LATEX_CLOSES(tagnr) } else if (OPEN_TAG(T_VERB)) LATEX_OPEN(tagnr) else if (CLOSE_TAG(T_VERB)) LATEX_CLOSES_K() else if (OPEN_TAG(T_P)) { if (stack[stack_depth-1].tagkind == T_P) LATEX_CLOSES(H_P); if (in_header && option_info) ERROR(html_fn, ln, "<p> ignored in header") LATEX_OPEN(tagnr) } else if (CLOSE_TAG(T_P)) LATEX_CLOSES(tagnr) else if (OPEN_TAG(T_CHAR)) { int j; bool found = FALSE; for (j = stack_depth - 1; j >= 0; j--) if (stack[j].tagnr == tagnr) { found = TRUE; break; } if (found && j == stack_depth - 1) { REPLACE_TAG(); LATEX_CLOSES(tagnr) } else { if (found) ERROR1(html_fn, ln, "nested <%s>", html_com) LATEX_OPEN(tagnr) } } else if (CLOSE_TAG(T_CHAR)) LATEX_CLOSES(tagnr) else if (OPEN_TAG(T_IMG)) ; else ERROR1(html_fn, ln, "tag %s not processed", tags[tagnr].name) } } /* read > and skip till first non-space */ if (ch != '\0' && ch == '>') { if ((ch = *s++) == '\n') ln++; } } else { int ch_val = (ch + 256)%256; bool skip = FALSE; if (!isspace(ch) && !in_header && !in_head) { if (cur_names != 0) { /* Introduce fake section for text before first section */ add_section(in_file, ln, 10); cur_section->title = "<NO TITLE>"; cur_section->names = cur_names; cur_names = 0; } cur_section->has_text = TRUE; } if (in_header && ch != '\n' && i_st < 999) section_title[i_st++] = ch; /* Now analyze character */ if (ch == '\n') skip = TRUE; else if (ch_val >= 160 && ch_val <= 255 && ch_table[ch-160] != 0) { ERROR2(html_fn, ln, "Replace character %d by '&%s;'.", ch_val, ch_table[ch_val-160]) skip = TRUE; } else if (ch == '&') { int i = 0; bool correct = FALSE; char html_ch[10]; int v; if (in_header && i_st < 999) section_title[i_st++] = ch; if ((ch = *s++) == '\n') ln++; if (isalpha(ch)) { while (isalpha(ch)||isdigit(ch)) { if (i < 9) html_ch[i++] = ch; if (in_header && i_st < 999) section_title[i_st++] = ch; if ((ch = *s++) == '\n') ln++; } html_ch[i] = '\0'; for (v = 0; v < NR_CH_TABLE; v++) if ( ch_table[v] != 0 && !strcmp(html_ch, ch_table[v])) { correct = TRUE; break; } if (ch == ';') { if (i < 9) html_ch[i++] = ch; html_ch[i] = '\0'; if (in_header && i_st < 999) section_title[i_st++] = ch; if ((ch = *s++) == '\n') ln++; } else ERROR1(html_fn, ln, "Place ';' after sequence '&%s'.", html_ch) if (!correct) ERROR1(html_fn, ln, "Unknown sequence '&%s'.", html_ch) } else if (ch == '#') { int code = 0; bool hexcode = FALSE; if (in_header && i_st < 999) section_title[i_st++] = ch; if ((ch = *s++) == '\n') ln++; html_ch[i++] = '#'; if (ch == 'x') { hexcode = TRUE; html_ch[i++] = 'x'; if (in_header && i_st < 999) section_title[i_st++] = ch; if ((ch = *s++) == '\n') ln++; while (isxdigit(ch)) { if (i < 9) html_ch[i++] = ch; if (isdigit(ch)) code = code * 16 + ch - '0'; else if ('A' <= ch && ch <= 'F') code = code * 16 + ch - 'A' + 10; else if ('a' <= ch && ch <= 'f') code = code * 16 + ch - 'a' + 10; if (in_header && i_st < 999) section_title[i_st++] = ch; if ((ch = *s++) == '\n') ln++; } } else { while (isdigit(ch)) { if (i < 9) html_ch[i++] = ch; code = code * 10 + ch - '0'; if (in_header && i_st < 999) section_title[i_st++] = ch; if ((ch = *s++) == '\n') ln++; } } html_ch[i] = '\0'; if (ch == ';') { if (i < 9) html_ch[i++] = ch; html_ch[i] = '\0'; if (in_header && i_st < 999) section_title[i_st++] = ch; if ((ch = *s++) == '\n') ln++; } else ERROR1(html_fn, ln, "Place ';' after sequence '&%s'", html_ch) if ((code >= ' ' && code < 127)) ERROR2(html_fn, ln, "Replace sequence '&%s' by '%c'", html_ch, code) else if (code == 132 || code == 257 || code == 263 || code == 268 || code == 269 || code == -1 || code == 8224) ; // Okay else if (code >= 160 && code <= 255 && ch_table[code-160] != 0) ERROR2(html_fn, ln, "Replace sequence '&%s' by '&%s;'", html_ch, ch_table[code-160]) else if (!hexcode) ERROR1(html_fn, ln, "Unknown sequence '&%s'", html_ch) } else ERROR(html_fn, ln - (ch == '\n'), "Replace '&' by '&'") } else if (ch == '>') { ERROR(html_fn, ln, "Replace '>' by '>'") skip = TRUE; } else if ((ch >= ' ' && ch_val < 127) || ch == '\t') skip = TRUE; else { ERROR1(html_fn, ln, "Unknown character %d (decimal)", ch_val) skip = TRUE; } if (skip) if ((ch = *s++) == '\n') ln++; } } /* In case no header occured, dump label */ while (stack_depth > 0) { if (!option_pedantic || stack[stack_depth-1].tagnr != T_HTML) ERROR1(html_fn, ln, "add </%s>", tags[stack[stack_depth-1].tagnr].name) latex_close(ln, html_fn); } } #define NO_EXPLAIN_REASON int debug_refby = 0; void check_exists_file(char *html_fn) { file_p in_file = find_file(html_fn); //if (in_file->exists) // in_file->exists = file_exists(html_fn); if (!in_file->exists) ERROR(html_fn, 0, "does not exist\n") in_file->read |= R_READ; in_file->read |= R_INCLUDED; } void scan_a_js_file(char *js_fn) { file_p in_file = find_file(js_fn); char *s = 0, ch; int ln = 1; /*printf("Include JS file %s\n", js_fn);*/ s = get_contents(in_file); if (s == 0) return; in_file->read |= R_READ; in_file->read |= R_INCLUDED; if ((ch = *s++) == '\n') ln++; while(ch != '\0') { if (ch == '/' && *s == '/') { do { if ((ch = *s++) == '\n') ln++; } while (ch != '\0' && ch != '\n'); } else if (ch == '"' || ch == '\'') { char fn[101]; int i = 0; char quote = ch; if ((ch = *s++) == '\n') ln++; while (ch != quote && ch != '\0' && ch != '\n') { if (ch == '\\') { if ((ch = *s++) == '\n') ln++; if (ch != '\0' && ch != '\n') { if (i < 100) fn[i++] = ch; if ((ch = *s++) == '\n') ln++; } } else { if (i < 100) fn[i++] = ch; if ((ch = *s++) == '\n') ln++; } } if (ch == quote) if ((ch = *s++) == '\n') ln++; fn[i] = '\0'; /*printf("Found string |%s|\n", fn);*/ if ( !strcmp(fn + strlen(fn) - 4, ".jpg") || !strcmp(fn + strlen(fn) - 4, ".gif") || !strcmp(fn + strlen(fn) - 4, ".png")) { file_p img_file = find_file(fn); if (img_file->read & R_INDIR) img_file->read |= R_INCLUDED; else if (!img_file->exists) ERROR1(js_fn, ln, "file '%s' does not exist", fn) } } else { if ((ch = *s++) == '\n') ln++; } } } void accept_root_URL(char *URL) { int strlen_URL = strlen(URL); document_URL = NALLOC(char, strlen_URL + 2); strcpy(document_URL, URL); if (document_URL[strlen_URL - 1] != '/') { document_URL[strlen_URL] = '/'; document_URL[strlen_URL + 1] = '\0'; } server_URL = 0; { int i; for (i = 0; URL[i] != '\0' && URL[i] != ':'; i++); if (URL[i] != '\0' && URL[i+1] == '/' && URL[i+2] == '/') { i += 3; while (URL[i] != '\0' && URL[i] != '/') i++; if (URL[i] == '/') file_URL = document_URL + i; else file_URL = "/"; URL[i] = '\0'; server_URL = SALLOC(URL); strcpy(server_URL, URL); } } if (server_URL == 0) { printf("illegal URL '%s'\n", document_URL); document_URL = 0; } } void scan_not_included_files(bool included) { bool found = TRUE; while (found) { file_p file = the_files; found = FALSE; while (file != 0 && !found) { DEBUG_P4("%s %d %d %d ", file->name, file->exists, !(file->read & R_READ), !is_URL(file->name)); DEBUG_P3("%d %d %d\n", is_html(file->name), file->name[0] != '.', file->name[1] != '.'); if ( file->exists && !(file->read & R_READ) && (file->read & R_DOREAD) && !is_URL(file->name) && (file->name[0] != '.' || file->name[1] != '.')) found = TRUE; else file = file->next; } if (found) { if (is_html(file->name) && !streq(file->name, "brexrefs.html")) scan_a_file(file->name, 0, included); else if ((file->read & R_INCLUDED) && is_js(file->name)) scan_a_js_file(file->name); else if (!is_URL(file->name)) check_exists_file(file->name); } } } void dump_site_map(file_p file, int depth) { int i; file_p child; for (i = 0; i < depth; i++) printf("\t"); printf("%s", file->name); if (file->parent) printf(" p:%s", file->parent->name); if (file->sibling) printf(" s:%s", file->sibling->name); if (file->next_sibling) printf(" ns:%s", file->next_sibling->name); if (file->prev_sibling) printf(" ps:%s", file->prev_sibling->name); printf("\n"); for (child = file->children; child != 0; child = child->sibling) dump_site_map(child, depth+1); } file_p root_files[10]; int nr_root_files = 0; void read_site_map(void) { FILE *f = fopen("SiteMap.html", "rt"); file_p nested[20]; int cur_depth = 0; char fn[400]; bool empty_line = FALSE; int i; for (i = 0; i < 20; i++) nested[i] = 0; if (f==0) return; while (fgets(fn, 399, f)) { char *s = fn; int depth = 0; bool special_child = FALSE; bool par = FALSE; for (; *s == '\t'; s++) depth++; if (strncmp(s, "<LI>", 4) == 0) { s += 4; depth++; } if (strncmp(s, "<P>", 3) == 0) empty_line = TRUE; if (strncmp(s, "<A HREF=\"", 9) == 0) { char *e = s + 9; while (*e != '\"' && *e != '\0') e++; if (strncmp(e, "\"--*-->", 7) == 0) special_child = TRUE; *e = '\0'; s += 9; } else if (strncmp(s, "<!-- ", 5) == 0) { char *e = s + 5; while (*e != ' ' && *e != '\0') e++; *e = '\0'; s += 5; } else s = 0; if (s != 0) { file_p file = 0; file_p prev_sibling = 0; while (depth < cur_depth) nested[cur_depth--] = 0; prev_sibling = nested[cur_depth]; if (empty_line) { nested[cur_depth] = 0; empty_line = FALSE; } cur_depth = depth; file = find_file(s); file->special_child = special_child; if (file->read & R_SITEMAP) printf("site_map.txt : '%s' repeated\n", s); file->read |= R_SITEMAP; if (cur_depth == 0) root_files[nr_root_files++] = file; else { file_p parent = nested[cur_depth-1]; if (parent) { file->parent = parent; if (parent->children == 0) parent->children = file; else if (prev_sibling != 0) prev_sibling->sibling = file; } if (nested[cur_depth] != 0) { nested[cur_depth]->next_sibling = file; file->prev_sibling = nested[cur_depth]; } } nested[cur_depth] = file; } } /* printf("***\n"); { int i; for (i = 0; i < nr_root_files; i++) dump_site_map(root_files[i], 0); } printf("***\n"); */ fclose(f); } int month_of(char *s) { static char *months[] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" }; int i; for (i = 0; i < 12; i++) if (strncmp(s, months[i], 3) == 0) return i+1; return 0; } void add_size(long size, char *fn) { int l = strlen(fn); if ( (l > 6 && !strcmp(fn + l - 6, "_c.txt")) || (l > 8 && !strcmp(fn + l - 8, "_cpp.txt"))) { add_other_ext("C",size); } else if (l > 8 && !strcmp(fn + l - 8, "_scr.txt")) { add_other_ext("scr",size); } else if (l > 8 && !strcmp(fn + l - 8, "_pas.txt")) { add_other_ext("Pascal",size); } else if (l > 7 && !strcmp(fn + l - 7, "_py.txt")) { add_other_ext("Python",size); } else { char *ext = fn + l; while (ext > fn && *(ext-1) != '.') ext--; add_other_ext(ext, size); } } void copy_file(FILE *fout, file_p file) { if (file->contents != 0 && is_html(file->name)) { char ofn[200]; FILE *of; sprintf(ofn, "%s/%s", target_dir, file->name); of = fopen(ofn, "wt"); if (of) { char *r = file->contents; bool in_script = FALSE; char *head; if ( (head = strstr(file->contents, "<HEAD>")) != 0 || (head = strstr(file->contents, "<head>")) != 0) { fprintf(of, "<html><head><meta charset=\"UTF-8\"><base href=\"https://%s/%s\">", target_domain, file->name); r = head + 6; } else { printf("Error: Did not find <HEAD> in %s\n", file->name); } while (*r != '\0') { if (*r == '<') { r++; if (*r == '!') { r++; if (*r == '-' && r[1] == '-') { if (in_script) { fputs("<!--", of); r += 2; while (*r != '\0') { if (*r == '-' && r[1] == '-') { fputs("--", of); r += 2; break; } if (*r == '"') { fputc(*r++, of); while (*r != '\0' && *r != '"') { if (*r == '\\') fputc(*r++, of); fputc(*r++, of); } if (*r == '"') fputc(*r++, of); } else fputc(*r++, of); } while (*r != '\0' && *r != '>') fputc(*r++, of); if (*r == '>') fputc(*r++, of); } else { for (r += 2; *r != '\0'; r++) { if (*r == '-' && r[1] == '-') { r += 2; break; } } while (*r != '\0' && *r != '>') r++; if (*r == '>') r++; } } else { fputs("<!", of); while (*r != '\0' && *r != '>') { fputc(*r, of); r++; } if (*r == '>') { fputc('>', of); r++; } } } else { fputc('<', of); bool is_close = *r == '/'; if (is_close) { fputc('/', of); r++; } bool is_script = strncmp(r, "script", 6) == 0 || strncmp(r, "SCRIPT", 6) == 0; while (*r != '\0' && *r != '>') { if (*r == '"') { fputc('"', of); r++; while (*r != '\0' && *r != '"') { fputc(*r, of); r++; } if (*r == '"') { fputc('"', of); r++; } } else if (*r == '-' && r[1] == '-') { for (r += 2; *r != '\0'; r++) if (*r == '-' && r[1] == '-') { r += 2; break; } } else if (*r == '/') { fputc('/', of); r++; is_close = TRUE; } else { fputc(tolower(*r), of); r++; } } if (*r == '>') { fputc('>', of); r++; } if (is_script) in_script = !is_close; } } else { fputc(*r, of); r++; } } fclose(of); return; } } if (is_txt(file->name)) { char ofn[200]; FILE *f, *of; f = fopen(file->name, "rt"); sprintf(ofn, "%s/%s", target_dir, file->name); of = fopen(ofn, "wt"); printf("/* removed tabs */ %p %p\n", f, of); if (f && of) { int col = 0; for (char ch = fgetc(f); !feof(f); ch = fgetc(f)) if (ch == '\t') { fputc(' ', of); col++; while ((col % 4) > 0) { fputc(' ', of); col++; } } else { fputc(ch, of); col++; if (ch == '\r' || ch == '\n') col = 0; } fclose(of); return; } } fprintf(fout, "%s \"%s\" \"%s/%s\"\n", copy_command, file->name, target_dir, file->name); } void compare(bool execute) { FILE *fls; #ifdef WIN32 FILE *fout = fopen("cp2ftp.bat", "wt"); #else FILE *fout = fopen("cp2ftp", "wt"); #endif char buffer[500]; time_t timeinsec; struct tm timem; file_p a_file; long oldsize = 0L; long sizecopied = 0L; long newsize = 0L; long removesize = 0L; int i; long oldest_date_in_log = -1; char logfilename[300]; time(&timeinsec); memcpy(&timem, localtime(&timeinsec), sizeof(struct tm)); sprintf(logfilename, "%s/WS_FTP.LOG", target_dir); fls = fopen(logfilename, "rt"); if (fls == 0) printf("Could not open '%s'\n", logfilename); if (fls != 0) { while (fgets(buffer,499, fls)) { file_p file; char *s, *sfn; bool notincluded; //printf("Reading: %s\n", buffer); int bufferlen = strlen(buffer); if (buffer[bufferlen-1] < ' ') buffer[bufferlen-1] = '\0'; sfn = strstr(buffer, "\\ftp\\"); if (sfn != 0) { sfn += 5; s = sfn; while (*s >= ' ' && !(s[0] == ' ' && s[1] == '-' && s[2] == '-' && s[3] == '>')) s++; *s = '\0'; file = find_file(sfn); file->on_ftp = TRUE; if (file->log_line != 0) free(file->log_line); STRCPY(file->log_line, buffer); notincluded = !(file->read & R_INCLUDED) && (file->read & R_INDIR) && file->exists; if (file->size_local == -1 || notincluded) { if (!file->remove_from_ftp) { printf("Could remove %s\n", file->name); file->remove_from_ftp = TRUE; } } else if ( !notincluded ) { long int fyear, fmon, fday, fhour = 23, fmin = 59, fd=0, fm=0; long int ld = file->date_days, lm = file->date_mins; fyear = 2000 + (buffer[1] - '0')*10 + buffer[2] - '0'; fmon = (buffer[4] - '0')*10 + buffer[5] - '0'; fday = (buffer[7] - '0')*10 + buffer[8] - '0'; fhour = (buffer[10] - '0')*10 + buffer[11] - '0'; fmin = (buffer[13] - '0')*10 + buffer[14] - '0'; fd = fyear * 10000 + fmon * 100 + fday; fm = fhour * 60 + fmin; file->ftp_size = -1; file->upload_to_ftp = ld > fd || (ld == fd && lm > fm); //if (file->upload_to_ftp) // printf("%ld %ld:%ld %s %ld.%ld %ld.%ld: %s\n", fd, fhour, fmin, file->name, ld,lm, fd,fm, buffer); if (oldest_date_in_log == -1) oldest_date_in_log = fd; } } } fclose(fls); fls = fopen(logfilename, "wt"); for (a_file = the_files; a_file != 0 ; a_file = a_file->next) { if (a_file->log_line != 0) fprintf(fls, "%s\n", a_file->log_line); } fclose(fls); } for (a_file = the_files; a_file != 0 ; a_file = a_file->next) { if (a_file->read & R_INCLUDED && a_file->size_local >= 0) { add_size(a_file->size_local, a_file->name); newsize += a_file->size_local; if ( !a_file->on_ftp// && a_file->date_days > oldest_date_in_log) // oldest_date_in_log == 20090329 || a_file->upload_to_ftp || strcmp(a_file->name, "index.html") == 0) { //if (!a_file->on_ftp) // printf("not-on-ftp "); //if (a_file->date_days > oldest_date_in_log) // printf("%ld > %ld ", a_file->date_days, oldest_date_in_log); //if (a_file->upload_to_ftp) // printf("upload-to-ftp "); if (execute) { printf("Copy %s (%s)\n", a_file->name, !a_file->on_ftp ? "new file" : "updated"); copy_file(fout, a_file); } sizecopied += a_file->size_local; } /*printf("New? %s %ld %d %d %d\n", a_file->name, a_file->size_local, a_file->on_ftp, a_file->read, a_file->exists); if ( a_file->size_local >= 0 && ()) { bool notincluded = !(a_file->read & R_INCLUDED) && (a_file->read & R_INDIR) && a_file->exists; if (!notincluded) { /* printf("copy %s new\n", a_file->name); * / } */ } } printf("\n\nOld size %ld bytes, New size is %ld bytes\n", oldsize, newsize); printf("Have to copy %ld bytes\n", sizecopied); printf("Could remove %ld bytes\n", removesize); printf("Netto new size: %ld bytes\n", newsize - removesize); printf("\n"); #define GENERATE_STATISTICS 1 #if GENERATE_STATISTICS { FILE *f = fopen("index.html", "wt"); file_p fhome_file = find_file("index.html"); char *cont = fhome_file->contents; char linebuf[10000]; char *start_stat = "<!--startstat-->"; char *end_stat = "<!--endstat-->"; if (f != 0 && cont != 0 && *cont != '\0') { int found_start_stat = 0; linebuf[0] = '\0'; for(;*cont != '\0';) { int i; for (i = 0; *cont != '\0' && *cont != '\n'; i++, cont++) linebuf[i] = *cont; linebuf[i] = '\0'; cont++; if (!strcmp(linebuf, start_stat)) { found_start_stat = 1; break; } fprintf(f, "%s"CRLF, linebuf); } if (found_start_stat) { fprintf(f, "%s"CRLF, linebuf); fprintf(f, "consists of %d HTML-files", nr_ext_files("html")); fprintf(f, " with a total size of %s characters,"CRLF, with_commas(size_ext_files("html"))); fprintf(f, "having %s internal links ", with_commas(nr_int_links)); fprintf(f, "and %s external links ", with_commas(nr_ext_links + nr_broken_ext_links)); fprintf(f, "to (more than) %s websites."CRLF, with_commas(nr_sites)); if (nr_broken_ext_links != 0) fprintf(f, "(At least %s of the external links are broken.)"CRLF, with_commas(nr_broken_ext_links)); fprintf(f, "Furthermore, it contains %d C/C++ <A HREF=\"Programs.html\">program files</A> ", nr_ext_files("C")); fprintf(f, "with a total size of %s characters,"CRLF, with_commas(size_ext_files("C"))); fprintf(f, " %d <A HREF=\"MySample.html\">MySample</A> scripts ", nr_ext_files("scr")); fprintf(f, "with a total size of %s characters,"CRLF, with_commas(size_ext_files("scr"))); fprintf(f, "%d PASCAL program files ", nr_ext_files("Pascal")); fprintf(f, "with a total size of %s characters."CRLF, with_commas(size_ext_files("Pascal"))); fprintf(f, "and %d Python program files ", nr_ext_files("Python")); fprintf(f, "with a total size of %s characters."CRLF, with_commas(size_ext_files("Python"))); fprintf(f, "There are %d text files with a total size of %s characters."CRLF, nr_ext_files("txt"), with_commas(size_ext_files("txt"))); fprintf(f, "With respect to images, this site containts %d JPEG images"CRLF "(total size %s bytes),"CRLF, nr_ext_files("jpg"), with_commas(size_ext_files("jpg"))); fprintf(f, "%d GIF images (total size %s bytes),"CRLF, nr_ext_files("gif"), with_commas(size_ext_files("gif"))); fprintf(f, "%d PNG images (total size %s bytes), and"CRLF, nr_ext_files("png"), with_commas(size_ext_files("png"))); fprintf(f, "%d BMP images (total size %s bytes)."CRLF, nr_ext_files("bmp"), with_commas(size_ext_files("bmp"))); fprintf(f, "With respect to sounds, it contains %d WAV files"CRLF "with a total size of %s bytes and"CRLF, nr_ext_files("wav"), with_commas(size_ext_files("wav"))); fprintf(f, "%d MP3 files with a total size of %s bytes."CRLF, nr_ext_files("mp3"), with_commas(size_ext_files("mp3"))); fprintf(f, "It also contains %d PostScript files (total size %s bytes),"CRLF, nr_ext_files("ps") + nr_ext_files("eps"), with_commas(size_ext_files("ps") + size_ext_files("eps"))); fprintf(f, "%d LaTeX files (total size %s characters),"CRLF, nr_ext_files("tex"), with_commas(size_ext_files("tex"))); fprintf(f, "%d PDF files (total size %s characters),"CRLF, nr_ext_files("pdf"), with_commas(size_ext_files("pdf"))); fprintf(f, "%d zip files (total size %s bytes),"CRLF, nr_ext_files("zip"), with_commas(size_ext_files("zip"))); fprintf(f, "%d gzipped tar files (total size %s bytes),"CRLF, nr_ext_files("tgz"), with_commas(size_ext_files("tgz"))); fprintf(f, "%d <A HREF=\"Go.html\">SGF</A> files with"CRLF "a total size of %s bytes,"CRLF, nr_ext_files("sgf"), with_commas(size_ext_files("sgf"))); fprintf(f, "%d <A HREF=\"GoogleEarth.html\">KML</A> files with"CRLF "a total size of %s bytes,"CRLF, nr_ext_files("kml") + nr_ext_files("kmz"), with_commas(size_ext_files("kml") + size_ext_files("kmz"))); fprintf(f, "%d <A HREF=\"https://git-scm.com/docs/git-bundle\">bundle</A> files with"CRLF "a total size of %s bytes,"CRLF, nr_ext_files("bundle"), with_commas(size_ext_files("bundle"))); fprintf(f, "and %d EXE files with a total size of %s bytes."CRLF, nr_ext_files("exe"), with_commas(size_ext_files("exe"))); fprintf(f, "It also uses %d <A HREF=\"JavaScript.html\">JavaScript</A> files with"CRLF "a total size of %s bytes,"CRLF, nr_ext_files("js"), with_commas(size_ext_files("js"))); fprintf(f, "This leads to a total size of %s bytes."CRLF, with_commas(newsize)); for(;*cont != '\0';) { int i; for (i = 0; *cont != '\0' && *cont != '\n'; i++, cont++) linebuf[i] = *cont; linebuf[i] = '\0'; if (!strcmp(linebuf, end_stat)) break; if (*cont == '\n') cont++; } fprintf(f, "%s", linebuf); for (; *cont != '\0'; cont++) if (*cont == '\n') fprintf(f, CRLF); else fprintf(f, "%c", *cont); } fclose(f); } } #endif for (i = 0; i < nr_other_ext; i++) if (other_ext_nr[i] > 1) printf("%ld bytes in %d \"%s\"-files.\n", other_ext_size[i], other_ext_nr[i], other_ext_name[i]); else printf("%ld bytes in one \"%s\"-file.\n", other_ext_size[i], other_ext_name[i]); printf("\n\n"); fclose(fout); fls = fopen("sd.log", "rt"); if (fls != 0) { while (fgets(buffer,499, fls)) { if (strncmp(buffer, "add ", 4) != 0 && strncmp(buffer, "remove ", 7) != 0 && strlen(buffer) > 15) { file_p file; char *s = buffer; bool notincluded; int long fd, fm; for (fd = 0; '0' <= *s && *s <= '9'; s++) fd = 10*fd + *s - 10; if (*s == ' ') s++; for (fm = 0; '0' <= *s && *s <= '9'; s++) fm = 10*fm + *s - 10; if (*s == ' ') s++; file = find_file(s); file->fd_sd = fd; file->fm_sd = fm; /* notincluded = !(file->read & R_INCLUDED) && (file->read & R_INDIR) && file->exists; if (file->size_local == -1 || notincluded) file->remove_from_sd = TRUE; else if ( !notincluded ) { long int ld = file->date_days, lm = file->date_mins; file->upload_to_sd = ld > fd || (ld == fd && lm > fm); } */ } } fclose(fls); } fls = fopen("sd.log", "wt"); if (fls == 0) printf("Error: Cannot open sd.log for writing\n"); else { for (a_file = the_files; a_file != 0 ; a_file = a_file->next) { if (a_file->read & R_INCLUDED && a_file->size_local >= 0) { if ( ( a_file->date_days > a_file->fd_sd || (a_file->date_days == a_file->fd_sd && a_file->date_mins > a_file->fm_sd)) || strcmp(a_file->name, "index.html") == 0) fprintf(fls, "add %s\n", a_file->name); else fprintf(fls, "%ld %ld %s\n", a_file->fd_sd, a_file->fm_sd, a_file->name); } else if (a_file->fd_sd != 0) fprintf(fls, "remove %s\n", a_file->name); } fclose(fls); } } void unused() { FILE *funused = fopen("unused.bat", "wt"); if (funused == 0) { printf("Error: Cannot open unused.bat\n"); return; } file_p file; for (file = the_files; file != 0; file = file->next) { if ( (file->read & R_INCLUDED) == 0 && (file->read & R_INDIR) != 0) fprintf(funused, "move \"%s\" \"unused\\%s\"\n", file->name, file->name); } fclose(funused); } bool is_oneway(section_p sect) { for (; sect; sect = sect->parent) if (sect->oneway) return TRUE; return FALSE; } #define NO_SHOWREF void print_file_section(file_p file, section_p section) { if (section->file) printf("%s", section->file->name); if (section->names) printf(" (%ld) ", section->names->line); printf(" : "); /* static section_p cur_section = 0; if (section != cur_section) { if (cur_section != 0 && cur_section->file != file) printf("\n"); printf("In file %s", file->name); if (section->names) printf(", at line %ld", section->names->line); if (section->title[0] != '\0') printf(", in section '%s'", section->title); printf(":\n"); cur_section = section; } */ } void print_section(section_p section) { if (section == 0) printf("[NULL]"); else printf("[%ld %s'%s']", (long)section, section->file && section->file->name ? section->file->name : "", section->title); } char return_path[3000]; char found_path[3000]; int cur_level; void set_level_found(int level) { if (debug_refby > 0) printf("%*.*s #set\n", level*2, level*2, ""); if (level < cur_level) { strcpy(found_path, return_path); cur_level = level; } } void check_section_or_parent(section_p from, section_p to, int level, bool include_parent_files, char *s, int d) { if (debug_refby > 0) { printf("%*.*ss_o_p(%d,%d) ", d*2, d*2, "", level, cur_level); print_section(from); printf("\n"); } if (from == to) { strcpy(s, " = section"); set_level_found(level); return; } if (from->file == to->file) { for (to = to->parent; to; to = to->parent) { strcpy(s, " p"); s += 2; if (++level >= cur_level) return; if (debug_refby > 0) { printf("%*.*s = parent ", d*2, d*2, ""); print_section(to); printf("\n"); } if (from == to) { strcpy(s, " = section"); set_level_found(level); return; } } } else { /* if "from" section is top section of the file, check if the "to" section is a child according to the site map. */ if (include_parent_files && from->level == 0) { file_p to_parent = to->file; while (to_parent->special_child) { to_parent = to_parent->parent; if (to_parent == 0) break; strcpy(s, " C"); s += 2; if (++level >= cur_level) return; if (debug_refby > 0) { printf("%*.*s = parent file ", d*2, d*2, ""); print_section(to_parent->sections); printf("\n"); } if (from->file == to_parent) { strcpy(s, " = file"); set_level_found(level); return; } } } } } void search_href_in_subsections(section_p from, section_p to, int level, char *s, int d) { if (debug_refby > 0) { printf("%*.*sh_i_s(%d,%d) ", d*2, d*2, "", level, cur_level); print_section(from); printf("\n"); } if (level >= cur_level) return; { href_p href; for (href = from->hrefs; href; href = href->next) if (href->section != 0 && href->type == ' ') { sprintf(s, " %s#%s", href->section->file->name, href->name ? href->name : ""); check_section_or_parent(href->section, to, level, TRUE, s + strlen(s), d+1); } } /* check all sub section for the same: */ { section_p nested; sprintf(s, " c"); s += 2; for (nested = from->nested; nested; nested = nested->next) search_href_in_subsections(nested, to, level+1, s, d+1); } } void search_href_in_children(file_p file, section_p to, int level, char *s, int d) { file_p child; if (debug_refby > 0) printf("%*.*sh_i_c(%d,%d) %s\n", d*2, d*2, "", level, cur_level, file->name); strcpy(s, " C"); s += 2; for (child = file->children; child != 0; child = child->sibling) if (child->special_child) search_href_in_subsections(child->sections, to, level+3, s, d+1); } #if 0 void search_parents(section_p from, section_p to, int level, int d) { if (debug_refby > 0) { printf("%*.*sp(%d,%d) ", d*2, d*2, "", level, cur_level); print_section(from); printf("\n"); } for (; from; from = from->parent) { if (++level >= cur_level) return; { href_p href; for (href = from->hrefs; href; href = href->next) { if (level >= cur_level) return; if (href->section != 0 && href->type == ' ') check_section_or_parent(href->section, to, level, FALSE); } } } } #endif void search_chain(section_p from, section_p to, int level, char *s, int d) { if (debug_refby > 0) { printf("%*.*sc(%d,%d) ", d*2, d*2, "", level, cur_level); print_section(from); printf("\n"); } if (level >= cur_level) return; if (from->level != 0) { href_p href; for (href = from->hrefs; href; href = href->next) if (href->section != 0 && href->type == ' ') { char *r; sprintf(s, " %s#%s", href->section->file->name, href->name ? href->name : ""); r = s + strlen(s); check_section_or_parent(href->section, to, level, FALSE, r, d+1); search_chain(href->section, to, level+2, r, d+1); } } { section_p nested; strcpy(s, " c"); for (nested = from->nested; nested != 0; nested = nested->next) search_chain(nested, to, level+2, s+2, d+1); } if (from->level == 0) { file_p child; strcpy(s, " C"); s += 2; for (child = from->file->children; child != 0; child = child->sibling) if (child->special_child) search_chain(child->sections, to, level+2, s, d+1); } } void set_href_type(href_p href, section_p in_section, file_p in_file) { section_p refered_section = 0; section_p psect; /* Determine the section being linked to: */ if (href->file != 0) { if (href->name != 0) refered_section = section_with_name(href->file->sections, href->name); else refered_section = href->file->sections; } if (refered_section == 0) return; /* -- nothing to analyze */ /* Make a direct reference in the link to the section: */ href->section = refered_section; /* Determine the type of the reference: */ if (refered_section == in_section) { href->type = '='; return; } if (href->title == title_up) { href->type = 'U'; return; } if (href->title == title_next) { href->type = 'R'; return; } if (href->title == title_prev) { href->type = 'L'; return; } for (psect = refered_section->parent; psect != 0; psect = psect->parent) if (psect == in_section) { href->type = 'c'; return; } for (psect = in_section->parent; psect != 0; psect = psect->parent) if (psect == refered_section) { href->type = 'p'; return; } if (in_file != refered_section->file) { file_p p_file; for (p_file = refered_section->file->parent; p_file != 0; p_file = p_file->parent) if (p_file == in_file) { href->type = 'C'; return; } for (p_file = in_file->parent; p_file != 0; p_file = p_file->parent) if (p_file == refered_section->file) { href->type = 'P'; return; } } /* What is this ???? */ if (href->name == 0) { file_p parent; for (parent = in_file->parent; parent != 0; parent = parent->parent) if (parent == href->file) return; } } int count_nr_tag_types(section_p section, tag_type_p *ref_tag_type) { int result = section->nr_tag_types; if (result == 1) *ref_tag_type = section->tag_type; section_p nested; for (nested = section->nested; nested != 0; nested = nested->next) if (nested->names == 0) result += count_nr_tag_types(nested, ref_tag_type); return result; } void check_for_oneway(href_p href, section_p in_section, file_p in_file) { if (debug_refby > 0) debug_refby--; cur_level = 10; strcpy(found_path, "NOT FOUND"); search_href_in_subsections(href->section, in_section, 1, return_path, 0); if (href->section->level == 0) search_href_in_children(href->section->file, in_section, 1, return_path, 0); //search_parents(href->section, in_section, 1); search_chain(href->section, in_section, 1, return_path, 0); if (href->oneway) { if (cur_level < 6) ERROR3(in_file->name, href->line, "can remove --1-- from '%s%s%s'", href->file->name, href->name ? "#" : "", href->name ? href->name : "") } else { if (cur_level == 10) { ERROR3(in_file->name, href->line, "could add --1-- to '%s%s%s'", href->file->name, href->name ? "#" : "", href->name ? href->name : ""); if (in_section->parent && !in_section->parent->has_text) { section_p name_sect = in_section->names ? in_section : in_section->parent; if (name_sect->names) ERROR4(href->file->name, href->section->line, "could add: <LI><A HREF=\"%s#%s\">%s</A>: %s", in_file->name, name_sect->names->name, in_section->parent->title, in_section->title) else ERROR3(href->file->name, href->section->line, "could add: <LI><A HREF=\"%s\">%s</A>: %s", in_file->name, in_section->parent->title, in_section->title) tag_type_p tag_type = 0; int nr_tag_types = count_nr_tag_types(href->section, &tag_type); if (nr_tag_types == 1) { for (; tag_type != 0; tag_type = tag_type->parent) add_tag_from_index(name_sect, tag_type); } } else { section_p name_sect = in_section; while ( name_sect->names == 0 && name_sect->parent != 0) name_sect = name_sect->parent; if (name_sect->names) ERROR3(href->file->name, href->section->line, "could add: <LI><A HREF=\"%s#%s\">%s</A>", in_file->name, name_sect->names->name, in_section->title) else ERROR2(href->file->name, href->section->line, "could add: <LI><A HREF=\"%s\">%s</A>", in_file->name, in_section->title) } } } } bool link_to_next(file_p n, file_p p, file_p* cn) { while (p != 0 && p->next_sibling == 0) p = p->parent; if (p == 0 || p->next_sibling == 0) return FALSE; p = p->next_sibling; *cn = p; while (p != 0) { if (n == p) return TRUE; p = p->children; } return FALSE; } bool link_to_prev(file_p n, file_p p, file_p* cn) { while (p != 0 && p->prev_sibling == 0) p = p->parent; if (p == 0 || p->prev_sibling == 0) return FALSE; p = p->prev_sibling; *cn = p; while (p != 0) { if (n == p) return TRUE; p = p->children; while (p != 0 && p->next_sibling != 0) p = p->next_sibling; } return FALSE; } bool is_diary_file(file_p file) { char* name = file->name; return name[0] == 'D' /*|| name[0] == 'N')*/ && isdigit(name[1]) && isdigit(name[2]) && isdigit(name[3]) && isdigit(name[4]) && strcmp(name+5, ".html") == 0; } void analyze_all_sections() { file_p file; section_p section; href_p href; name_p name; tag_p tag; #ifdef SHOWREF printf("analyze_all_sections\n"); #endif /* Set the link type */ for (file = the_files; file; file = file->next) for (section = file->sections; section; next_section(§ion, 0)) for (href = section->hrefs; href; href = href->next) set_href_type(href, section, file); /* Analyze names and links */ for (file = the_files; file; file = file->next) { #ifdef SHOWREF printf("File: %s\n", file->name); #endif for (section = file->sections; section; next_section(§ion, 0)) { bool is_oneway_section = is_oneway(section); #ifdef SHOWREF printf(" %*.*ssection: %s\n", section->level, section->level, "", section->title); #endif /* For all names */ for (name = section->names; name; name = name->next) { if (name_repeated(file->sections, name->name)) ERROR1(file->name, name->line, "<a name=\"%s\"> not unique.", name->name) if ( strstr(name->name, "#") || strstr(name->name, ".html") || strstr(name->name, ".jpg")) ERROR1(file->name, name->line, "<a name=\"%s\"> looks like link.\n", name->name) } /* For all links */ for (href = section->hrefs; href; href = href->next) { if (href->file == 0) ; /* skip */ else if (!href->file->exists)// || !(href->file->read & R_READ)) ; // ERROR1(file->name, href->line, "file '%s' does not exist", href->file->name) else if (href->section == 0) ERROR2(file->name, href->line, "no <a name=\"%s\"> in file '%s'.", href->name, href->file->name) else if (href->type == 'U') { if (file->parent == 0) ERROR(file->name, href->line, "href not pointing to parent. File does not have parent") else if (href->file != file->parent) ERROR1(file->name, href->line, "href not pointing to parent %s", file->parent->name) } else if (href->type == 'R') { file_p next_file = 0; if (!link_to_next(href->file, file, &next_file)) { if (next_file == 0) ERROR(file->name, href->line, "href not pointing to next. File has no next.") else ERROR1(file->name, href->line, "href not pointing to next %s", next_file->name) } } else if (href->type == 'L') { file_p prev_file; if (!link_to_prev(href->file, file, &prev_file)) { if (prev_file == 0) ERROR(file->name, href->line, "href not pointing to previous. File has no previous.") else ERROR1(file->name, href->line, "href not pointing to previous %s", prev_file->name) } } else if (href->type == ' ' && href->section != 0) { if (!is_oneway_section) check_for_oneway(href, section, file); if (is_diary_file(href->section->file)) { tag_types_p tag_type; for (tag_type = href->tag_types; tag_type != 0; tag_type = tag_type->next) add_tag_from_index(href->section, tag_type->type); } #ifdef SHOWREF printf(" %*.*s- '%s' |%c| %s %s.%s\n", section->level, section->level, "", href->title, href->type, href->error == '1' ? "NOTONEWAY" : href->error == '2' ? "ONEWAY" : "", href->section->file->name, href->section->names ? href->section->names->name : ""); fflush(stdout); #endif } } } } #ifdef DUMP for (file = the_files; file; file = file->next) { printf("File: '%s'\n", file->name); for (section = file->sections; section; next_section(§ion, 0)) { bool is_oneway_section = is_oneway(section); printf(" %*.*s%d: section: '%s'\n", section->level, section->level, "", section->line, section->title); /* For all names */ for (name = section->names; name; name = name->next) { printf(" %*.*s- %d: name: '%s'\n", section->level, section->level, "", name->line, name->name); } /* For all links for (href = section->hrefs; href; href = href->next) { //printf(" %*.*s- %d: href: '%s' |%c| %s#%s\n", // section->level, section->level, // href->line, href->title, href->type, // href->section && href->section->file ? href->section->file->name : "???", // href->section && href->section->names ? href->section->names->name : ""); } */ } } #endif } void fill_tag_label(char* buf, tag_p tag) { section_p section; //fprintf(stderr, "(\n"); if (tag == 0) { buf[0] = '\0'; return; } section = tag->section; if (section == 0) buf[0] = '\0'; else if (section->file == 0) strcpy(buf, "err1"); else if (section->file->name == 0) strcpy(buf, "err2"); else if (is_diary_file(section->file)) { char *name = section->file->name; sprintf(buf, "%c%c%c%c%s", name[1], name[2], name[3], name[4], section->names ? section->names->name : ""); } else sprintf(buf, "%s#%s", section->file->name, section->names ? section->names->name : ""); } void connect_tags_in_file(file_p file) { section_p section; file_p child; for (section = file->sections; section != 0; next_section(§ion, 0)) { tag_p tag; for (tag = section->tags; tag != 0; tag = tag->next) { if (tag->type->last_seen) { tag->prev_tag = tag->type->last_seen; tag->prev_tag->next_tag = tag; } tag->type->last_seen = tag; } } for (child = file->children; child != 0; child = child->sibling) connect_tags_in_file(child); } void connect_tags() { int i; for (i = 0; i < nr_root_files; i++) connect_tags_in_file(root_files[i]); } void print_tags_script(FILE* f, section_p section) { tag_p tag; fprintf(f, "<SCRIPT><!--\n"); fprintf(f, "tags()\n"); for (tag = section->tags; tag != 0; tag = tag->next) { if (tag->in_index) { char prev[100]; char next[100]; tag_p prev_tag; tag_p next_tag; for (prev_tag = tag->prev_tag; prev_tag != 0 && !prev_tag->in_index; prev_tag = prev_tag->prev_tag) ; for (next_tag = tag->next_tag; next_tag != 0 && !next_tag->in_index; next_tag = next_tag->next_tag) ; fill_tag_label(prev, prev_tag); fill_tag_label(next, next_tag); if (tag->type->title[0] != '\0') fprintf(f, "tag(\"%s\",\"%s\",\"%s\",\"%s\")\n", tag->type->title, tag->type->name, prev, next); else fprintf(f, "%s(\"%s\",\"%s\")\n", tag->type->name, prev, next); } } fprintf(f, "//--></SCRIPT>\n"); } void print_tags_in_file(FILE* f, file_p file) { section_p section; file_p child; for (section = file->sections; section != 0; next_section(§ion, 0)) { if (section->parent != 0 && section->parent->ignoretags) section->ignoretags = TRUE; if (section->tags != 0 && !section->ignoretags) { tag_p tag; bool something_in_script = FALSE; for (tag = section->tags; tag != 0; tag = tag->next) { if (tag->prev_text != empty_tag || tag->next_text != empty_tag) { something_in_script = TRUE; break; } } if (something_in_script) { bool is_correct = TRUE; for (tag = section->tags; tag != 0; tag = tag->next) { char prev[100]; char next[100]; tag_p prev_tag; tag_p next_tag; for (prev_tag = tag->prev_tag; prev_tag != 0 && !prev_tag->in_index; prev_tag = prev_tag->prev_tag) ; for (next_tag = tag->next_tag; next_tag != 0 && !next_tag->in_index; next_tag = next_tag->next_tag) ; fill_tag_label(prev, prev_tag); fill_tag_label(next, next_tag); if (!tag->in_index) { is_correct = FALSE; fprintf(stdout, "%s (%ld) : %s not in index\n", file->name, tag->line, tag->type->name); } if (tag->prev_text == empty_tag && tag->next_text == empty_tag) { is_correct = FALSE; fprintf(stdout, "%s (%ld) : %s not in script\n", file->name, tag->line, tag->type->name); } else if (tag->in_index) { if (!streq(tag->prev_text, prev)) { is_correct = FALSE; fprintf(stdout, "%s (%ld) : %s prev '%s' -> '%s'\n", file->name, tag->line, tag->type->name, tag->prev_text, prev); } if (!streq(tag->next_text, next)) { is_correct = FALSE; fprintf(stdout, "%s (%ld) : %s next '%s' -> '%s'\n", file->name, tag->line, tag->type->name, tag->next_text, next); } } } if (!is_correct) print_tags_script(stdout, section); } else { fprintf(stdout, "\n%s (%d) :\n", file->name, section->line); print_tags_script(stdout, section); } } } for (child = file->children; child != 0; child = child->sibling) print_tags_in_file(f, child); } void print_tags(FILE* f) { int i; for (i = 0; i < nr_root_files; i++) print_tags_in_file(f, root_files[i]); printf("\n\n"); } int main(argc, argv) int argc; char **argv; { FILE *fin; char *fn = 0, *html_fn, *outfn = 0, *reffn; bool option_scan_not_inc = FALSE, option_cross_ref = FALSE, option_compare = FALSE, option_compare_copy = FALSE, option_export = FALSE; /* global options */ option_info = FALSE; option_warn = FALSE; option_pedantic = FALSE; option_bibliography = FALSE; #ifdef EXPORT_C_SOURCES f_c_sources = fopen("c.html", "wt"); #endif printf("%s: Version %s\nWritten by %s\n\n", "chkhtml", VERSION, WRITTEN_BY); { int i; bool error = FALSE; for (i = 1; i < argc; i++) { if (argv[i][0] == '-') { if (argv[i][1] == 'o') { if (argv[i][2] != '\0') outfn = argv[i] + 2; else if (i + 1 < argc) outfn = argv[++i]; else printf("Argument of -o option missing\n"); } else if (argv[i][1] == 'i' && argv[i][2] == '\0') { option_warn = TRUE; option_info = TRUE; } else if (argv[i][1] == 'w' && argv[i][2] == '\0') option_warn = TRUE; else if (argv[i][1] == 'p' && argv[i][2] == '\0') option_pedantic = TRUE; else if (argv[i][1] == 'e' && argv[i][2] == '\0') option_export = TRUE; else if (argv[i][1] == 's' && argv[i][2] == '\0') { option_scan_not_inc = TRUE; option_compare = TRUE; option_compare_copy = TRUE; } else if (argv[i][1] == 'r') { if (argv[i][2] != '\0') accept_root_URL(argv[i] + 2); else if (i + 1 < argc) accept_root_URL(argv[++i]); else printf("Argument of -r option missing\n"); } else if (argv[i][1] == 't') { if (argv[i][2] != '\0') { STRCPY(target_dir, argv[i] + 2); } else if (i + 1 < argc) { i++; STRCPY(target_dir, argv[i]); } else printf("Argument of -t option missing\n"); } else if (argv[i][1] == 'd') { if (argv[i][2] != '\0') { STRCPY(target_domain, argv[i] + 2); } else if (i + 1 < argc) { i++; STRCPY(target_domain, argv[i]); } else printf("Argument of -d option missing\n"); } else if (argv[i][1] == 'b' && argv[i][2] == '\0') option_bibliography = TRUE; #ifdef DYN_DEBUG else if (argv[i][1] == 'D') option_debug = TRUE; #endif else if (streq(argv[i], "-cr")) option_cross_ref = TRUE; else { printf("Unknown option %s\n", argv[i]); error = TRUE; } } else if (fn == 0) fn = argv[i]; else { printf("Too many input filenames\n"); error = TRUE; } } if (fn == 0) { printf("No input filename given\n"); error = TRUE; } else { fin = fopen(fn, "rt"); if (fin == 0) { printf("Error: Cannot open file: '%s'.\n", fn); error = TRUE; } } if (error) { printf("Usages: html2tex [options] <file>\n"); printf("\nOptions:\n"); printf(" -o<FN> : specify output file\n"); printf(" -i : print info\n"); printf(" -w : print warnings (and info)\n"); printf(" -r<URL> : root URL of document\n"); printf(" -b : make bibliography\n"); printf(" -cr : generate cross-reference\n"); printf(" -c : check html file\n"); printf(" -s : scan not included files\n"); printf(" -t<PATH> : specify target directory for file to upload\n"); printf(" -d<PATH> : specify the target domain\n"); #ifdef DYN_DEBUG printf(" -D : print (a lot of) debugging information\n"); #endif return 1; } if (streq(fn + strlen(fn) - 5, ".html")) { is_html_fn = TRUE; html_fn = SALLOC(fn); strcpy(html_fn, fn); fn[strlen(fn) - 5] = '\0'; } } reffn = NALLOC(char, strlen(fn) + 5); strcpy(reffn, fn); strcat(reffn, ".ref"); read_site_map(); if (option_export) { f_ext_dest = fopen("compare/ext_dest.txt", "wt"); if (f_ext_dest == 0) printf("Error: could not open compare/ext_dest.txt for writing\n"); f_broken_ext_dest = fopen("compare/broken_ext_dest.txt", "wt"); if (f_broken_ext_dest == 0) printf("Error: could not open compare/ext_dest.txt for writing\n"); } nr_other_ext = 0; nr_int_links = 0; nr_ext_links = 0; nr_broken_ext_links = 0; scan_a_file(html_fn, 0, TRUE); if (option_scan_not_inc) scan_not_included_files(is_html_fn); { file_p file; for (file = the_files; file != 0; file = file->next) file->read &= ~R_READ; } if (f_ext_dest != 0) fclose(f_ext_dest); if (f_broken_ext_dest != 0) fclose(f_broken_ext_dest); analyze_all_sections(); { file_p file; for (file = the_files; file != 0; file = file->next) { if ((file->read & R_SITEMAP) && !(file->read & R_INCLUDED)) ERROR(file->name, 0, "file not included or missing (but mentioed in site map)") if (!(file->read & R_SITEMAP) && (file->read & R_INCLUDED) && is_html(file->name)) ERROR(file->name, 0, "HTML file not in site map") } } print_errors(stdout); { FILE *g = fopen("compare/tags.txt","wt"); connect_tags(); print_tags(g); } DO_DEBUG_PRINT(("ready reading\n")); DEBUG_PRINT(("\n\n\n")); if (option_compare) compare(option_compare_copy); unused(); if (0) { FILE *fsites = fopen("ExtSites.html", "wt"); if (fsites != 0) { site_p site; fprintf(fsites, "<HTML><HEAD>\n<TITLE>Referenced external sites\n\n\n

    Referenced external sites

    \n\n
      \n"); for (site = all_sites; site != 0; site = site->next) { fprintf(fsites, "
    • %s", site->name, site->name); if (site->count > 1) fprintf(fsites, " with %d references", site->count); fprintf(fsites, "\n"); } fprintf(fsites, "
    \n\n


    \n
    \nHome and email address\n
    \n\n"); fclose(fsites); } } if (0) { FILE *fsites = fopen("ExtSites", "wt"); if (fsites != 0) { site_p site; for (site = all_sites; site != 0; site = site->next) { if (strncmp(site->name, "http", 4) == 0) fprintf(fsites, "%s\n", site->name); } fclose(fsites); } } return 0; }