*** ir/irtfiles.c.orig Wed Apr 7 13:39:40 1993 --- ir/irtfiles.c Fri Apr 16 19:13:56 1993 *************** *** 743,749 **** boolean check_for_file_already_indexed; boolean word_position, word_pairs; { ! /* Addes words to the index for a given file. * The function arguments can be NULL which means it would * always answer NULL. * separator_function is called on every line to see if it --- 743,752 ---- boolean check_for_file_already_indexed; boolean word_position, word_pairs; { ! /* Adds words to the index for a given file. ! * "Words" are extracted as strings of alphanumeric chars, whose ! * length is >=3 but charsleftover && line[charsread-1]=='\n') ! the_document_table_entry.number_of_lines++; ! ! /* save word frag at end as leftovers (unless it fills whole block) */ ! for(cut=charsread-1; cut>=0; cut--) ! if(!isascii(line[cut]) || !isalnum(line[cut])) break; ! if(++cut <= 0) charsleftover = 0; ! else { ! for(charsleftover=0, i=cut; iextraheaderweight, header, line, document_id, --- 967,974 ---- sprintf(header, "%s %s", pathname_name(full_path), pathname_directory(full_path, directory)); } ! if(the_document_table_entry.number_of_lines > 0) ! the_document_table_entry.number_of_lines--; /* dont count separator */ /* finish off the last */ finish_document( dataops->extraheaderweight, header, line, document_id, *** ir/ircfiles.c.orig Wed Apr 7 13:37:19 1993 --- ir/ircfiles.c Sun Apr 18 06:34:42 1993 *************** *** 374,448 **** #define pir_data_tab 16 ! /* pir Flat-file format: ! ENTRY CCHU #Type Protein 12345678901234567890123456789012345678901234567890123456789012345678901234567890 .........1.........2.........3.........4.........5.........6.........7.........8 all data starts at tab=17 or further ! ENTRY blah << Start entry, index it ! TITLE blah << Index def line == HEADER line ! ACCESSION blah << Index acc line ! KEYWORDS blah << index keywords ! SOURCE blah << index source ! REFERENCE blah << Index ! SUPERFAMILY blah << Index ! blah << Index ! ANYOTHERS jazz << skipit ! any word starting w/ "#", skipit ! /// << end of entry == entry separator ! ENTRY CCHU #Type Protein ! TITLE Cytochrome c - Human ! DATE #Sequence 30-Sep-1991 #Text 30-Sep-1991 ! PLACEMENT 1.0 1.0 1.0 1.0 1.0 ! SOURCE Homo sapiens #Common-name man ! ACCESSION A31764\ A05676\ A00001 ! REFERENCE ! #Authors Evans M.J., Scarpulla R.C. ! #Journal Proc. Natl. Acad. Sci. U.S.A. (1988) 85:9625-9629 ! #Title The human somatic cytochrome c gene: two classes of ! processed pseudogenes demarcate a period of rapid ! molecular evolution. ! #Reference-number A31764 ! #Accession A31764 ! #Molecule-type DNA ! #Residues 1-105 ! #Cross-reference GB:M22877 ! REFERENCE ! #Authors Matsubara H., Smith E.L. ! #Journal J. Biol. Chem. (1963) 238:2732-2753 ! #Reference-number A05676 ! #Accession A05676 ! #Molecule-type protein ! #Residues 2-28;29-46;47-100;101-105 ! REFERENCE ! #Authors Matsubara H., Smith E.L. ! #Journal J. Biol. Chem. (1962) 237:3575-3576 ! #Reference-number A00001 ! #Comment 66-Leu is found in 10% of the molecules in pooled ! protein. ! GENETIC ! #Introns 57/1 ! SUPERFAMILY #Name cytochrome c ! KEYWORDS acetylation\ electron transport\ heme\ ! mitochondrion\ oxidative phosphorylation\ ! polymorphism\ respiratory chain ! FEATURE ! 2-105 #Protein cytochrome c (experimental) ! \ ! 2 #Modified-site acetylated amino end ! (experimental)\ ! 15,18 #Binding-site heme (covalent)\ ! 19,81 #Binding-site heme iron (axial ligands) ! SUMMARY #Molecular-weight 11749 #Length 105 #Checksum 3247 ! SEQUENCE ! 5 10 15 20 25 30 ! 1 M G D V E K G K K I F I M K C S Q C H T V E K G G K H K T G ! 31 P N L H G L F G R K T G Q A P G Y S Y T A A N K N K G I I W ! 61 G E D T L M E Y L E N P K K Y I P G T K M I F V G I K K K E ! 91 E R A D L I A Y L K K A T N E ! /// ! *****/ --- 374,448 ---- #define pir_data_tab 16 ! /* pir Flat-file format (lines are prepended with '|' to keep cpp happy): ! |ENTRY CCHU #Type Protein 12345678901234567890123456789012345678901234567890123456789012345678901234567890 .........1.........2.........3.........4.........5.........6.........7.........8 all data starts at tab=17 or further ! |ENTRY blah << Start entry, index it ! |TITLE blah << Index def line == HEADER line ! |ACCESSION blah << Index acc line ! |KEYWORDS blah << index keywords ! |SOURCE blah << index source ! |REFERENCE blah << Index ! |SUPERFAMILY blah << Index ! | blah << Index ! |ANYOTHERS jazz << skipit ! |any word starting w/ "#", skipit ! |/// << end of entry == entry separator ! |ENTRY CCHU #Type Protein ! |TITLE Cytochrome c - Human ! |DATE #Sequence 30-Sep-1991 #Text 30-Sep-1991 ! |PLACEMENT 1.0 1.0 1.0 1.0 1.0 ! |SOURCE Homo sapiens #Common-name man ! |ACCESSION A31764\ A05676\ A00001 ! |REFERENCE ! | #Authors Evans M.J., Scarpulla R.C. ! | #Journal Proc. Natl. Acad. Sci. U.S.A. (1988) 85:9625-9629 ! | #Title The human somatic cytochrome c gene: two classes of ! | processed pseudogenes demarcate a period of rapid ! | molecular evolution. ! | #Reference-number A31764 ! | #Accession A31764 ! | #Molecule-type DNA ! | #Residues 1-105 ! | #Cross-reference GB:M22877 ! |REFERENCE ! | #Authors Matsubara H., Smith E.L. ! | #Journal J. Biol. Chem. (1963) 238:2732-2753 ! | #Reference-number A05676 ! | #Accession A05676 ! | #Molecule-type protein ! | #Residues 2-28;29-46;47-100;101-105 ! |REFERENCE ! | #Authors Matsubara H., Smith E.L. ! | #Journal J. Biol. Chem. (1962) 237:3575-3576 ! | #Reference-number A00001 ! | #Comment 66-Leu is found in 10% of the molecules in pooled ! | protein. ! |GENETIC ! | #Introns 57/1 ! |SUPERFAMILY #Name cytochrome c ! |KEYWORDS acetylation\ electron transport\ heme\ ! | mitochondrion\ oxidative phosphorylation\ ! | polymorphism\ respiratory chain ! |FEATURE ! | 2-105 #Protein cytochrome c (experimental) ! | \ ! | 2 #Modified-site acetylated amino end ! | (experimental)\ ! | 15,18 #Binding-site heme (covalent)\ ! | 19,81 #Binding-site heme iron (axial ligands) ! |SUMMARY #Molecular-weight 11749 #Length 105 #Checksum 3247 ! |SEQUENCE ! | 5 10 15 20 25 30 ! | 1 M G D V E K G K K I F I M K C S Q C H T V E K G G K H K T G ! | 31 P N L H G L F G R K T G Q A P G Y S Y T A A N K N K G I I W ! | 61 G E D T L M E Y L E N P K K Y I P G T K M I F V G I K K K E ! | 91 E R A D L I A Y L K K A T N E ! |/// ! | *****/ *************** *** 1054,1073 **** * ========================================== */ ! /*------ example ! #Abnormal: see A ! #abnormal abdomen: see a( ) ! #Abnormal abdomen: see A ! # abnormal eye: see mit15 ! #abnormal oocytes: see abo ! #abnormal tergites: see abt ! #abnormal wings: see abw ! #abo: abnormal oocyte ! location: 2-44.0 (mapped with respect to J, 2-41). ! origin: Naturally occurring allele recovered near Rome, ! Italy. ! references: Sandler, Lindsley, Nicoletti, and Trippa, ! ... ----*/ --- 1054,1073 ---- * ========================================== */ ! /*------ example (lines are prepended with '|' to keep cpp happy): ! |#Abnormal: see A ! |#abnormal abdomen: see a( ) ! |#Abnormal abdomen: see A ! |# abnormal eye: see mit15 ! |#abnormal oocytes: see abo ! |#abnormal tergites: see abt ! |#abnormal wings: see abw ! |#abo: abnormal oocyte ! | location: 2-44.0 (mapped with respect to J, 2-41). ! | origin: Naturally occurring allele recovered near Rome, ! | Italy. ! | references: Sandler, Lindsley, Nicoletti, and Trippa, ! | ... ----*/ *************** *** 2537,2544 **** */ - #ifdef NeXT /* only do this if it is on a NeXT */ - /*----------------------- FSA -------------------*/ #define fsa_max_edges 4 #define fsa_error_state (-1) --- 2537,2542 ---- *************** *** 2802,2808 **** return; } - #endif /* def NeXT */ /* ============================== --- 2800,2805 ---- *** ir/irbuild.c.orig Fri Apr 9 14:37:01 1993 --- ir/irbuild.c Sun Apr 18 06:50:56 1993 *************** *** 188,196 **** fprintf(stderr," | refer /* refer format */\n"); fprintf(stderr," | rn /* netnews saved by the [rt]?rn newsreader */\n"); fprintf(stderr," | server /* server structures for the dir of servers */\n"); - #ifdef NeXT fprintf(stderr," | objc /* objective-C .h and .m files */\n"); - #endif /* def NeXT */ fprintf(stderr," | tiff /* tiff files, only indexes the filename */\n"); #ifdef BIO fprintf(stderr," | genbank /* GenBank flatfile format */\n"); --- 188,194 ---- *** ir/sersrch.c.orig Fri Apr 9 10:02:37 1993 --- ir/sersrch.c Sun Apr 18 07:17:49 1993 *************** *** 52,58 **** #include - #define MAXINT (unsigned long)2^(sizeof(long)*8-1) #define VALUE 1000000L /* francois */ #include "stemmer.h" --- 52,57 ---- *************** *** 760,766 **** txt_stream = s_fopen(txt_filename, "rb"); strcpy(prev_txt_filename, txt_filename); } ! else if (0 != strcmp(txt_stream, prev_txt_filename)) { s_fclose(txt_stream); txt_stream = s_fopen(txt_filename, "rb"); strcpy(prev_txt_filename, txt_filename); /* 2/92 GS TLG */ --- 759,765 ---- txt_stream = s_fopen(txt_filename, "rb"); strcpy(prev_txt_filename, txt_filename); } ! else if (0 != strcmp(txt_filename, prev_txt_filename)) { s_fclose(txt_stream); txt_stream = s_fopen(txt_filename, "rb"); strcpy(prev_txt_filename, txt_filename); /* 2/92 GS TLG */ *** ir/Makefile.orig Fri Apr 9 11:29:22 1993 --- ir/Makefile Sun Apr 18 07:33:58 1993 *************** *** 195,200 **** --- 195,202 ---- # for newer BSD that needs to use , add -DBSD43 # for System V add -DSYSV # for XENIX add -M3e -Zi + # for NeXTSTEP add -DNeXT + # # -DSECURE_SERVER for waisserver to setuid to uucp after startup. # # -- dgg additions for biology data *** lib/scandir.c.orig Wed Apr 7 14:17:09 1993 --- lib/scandir.c Sun Apr 18 06:54:02 1993 *************** *** 23,29 **** int scandir(name, list, selector, sorter) ! char *name; struct dirent ***list; int (*selector)(); int (*sorter)(); --- 23,29 ---- int scandir(name, list, selector, sorter) ! const char *name; struct dirent ***list; int (*selector)(); int (*sorter)(); *** Makefile.orig Fri Apr 9 15:18:42 1993 --- Makefile Sun Apr 18 07:40:04 1993 *************** *** 48,56 **** # for other UNIX-like OS's set this to this directory. #TOP:sh = pwd # or fill in the blank for other OS's ! #TOP = ? ! comment-me: ! @echo You must set "\$$(TOP)" to point to the freeWAIS src directory SUPDIR = $(TOP)/ir --- 48,56 ---- # for other UNIX-like OS's set this to this directory. #TOP:sh = pwd # or fill in the blank for other OS's ! TOP = /u/burchard/Pending/freeWAIS ! #comment-me: ! # @echo You must set "\$$(TOP)" to point to the freeWAIS src directory SUPDIR = $(TOP)/ir *************** *** 62,67 **** --- 62,68 ---- # for XENIX add -M3e -Zi # USG for Unix Dirent in lib # for SGIs running IRIX 4.0.1, add -cckr + # for NeXTSTEP add -DNeXT # # For a little better security in the server, add -DSECURE_SERVER # this sets the server user id to -u argument after startup. *************** *** 93,99 **** # -DLIST_STEMS to show stemmed words in server log and indexer output # ! CFLAGS = -g -I$(SUPDIR) -DSECURE_SERVER -DRELEVANCE_FEEDBACK -DUSG -DBOOLEANS -DPARTIALWORD -DLITERAL MFLAGS = -k MAKE = make $(MFLAGS) --- 94,100 ---- # -DLIST_STEMS to show stemmed words in server log and indexer output # ! CFLAGS = -g -I$(SUPDIR) -DNeXT -DSECURE_SERVER -DRELEVANCE_FEEDBACK -DUSG -DBOOLEANS -DPARTIALWORD -DLITERAL MFLAGS = -k MAKE = make $(MFLAGS) *************** *** 122,128 **** # you may have to edit the CFLAGS in the Makefile yourself. x:: ! cd x; (env TOP=$(TOP) CC=$(CC) CFLAGS="$(CFLAGS)" MAKE="$(MAKE)" ./makex.sh) bin:: cd bin; $(MAKE) CC=$(CC) CFLAGS="$(CFLAGS)" TOP=$(TOP) --- 123,131 ---- # you may have to edit the CFLAGS in the Makefile yourself. x:: ! # cd x; (env TOP=$(TOP) CC=$(CC) CFLAGS="$(CFLAGS)" MAKE="$(MAKE)" ./makex.sh) ! cd x; (export TOP CC CFLAGS MAKE; \ ! TOP=$(TOP) CC=$(CC) CFLAGS="$(CFLAGS)" MAKE="$(MAKE)"; ./makex.sh) bin:: cd bin; $(MAKE) CC=$(CC) CFLAGS="$(CFLAGS)" TOP=$(TOP) *** ir/ircfiles.c.orig Wed Apr 7 13:37:19 1993 --- ir/ircfiles.c Tue Apr 27 20:38:10 1993 *************** *** 2710,2716 **** if((wobjc_header_end - wobjc_header)<(MAX_HEADER_LEN-5) && !(strchr(WOBJC_BLANK, *(wobjc_header_end-1)) && strchr(WOBJC_BLANK, input))) ! { *wobjc_header_end+= input; *wobjc_header_end = 0; } } --- 2708,2714 ---- if((wobjc_header_end - wobjc_header)<(MAX_HEADER_LEN-5) && !(strchr(WOBJC_BLANK, *(wobjc_header_end-1)) && strchr(WOBJC_BLANK, input))) ! { *wobjc_header_end++ = input; *wobjc_header_end = 0; } } *************** *** 2725,2734 **** || (wobjc_context==14 && !wobjc_header_end)) wobjc_header_end = wobjc_header; if((wobjc_class_end - wobjc_class_end)<(MAX_HEADER_LEN/2)) ! { *wobjc_class_end+= input; *wobjc_class_end = 0; } if((wobjc_context==13 || wobjc_context==14) && (wobjc_header_end-wobjc_header_end)<(MAX_HEADER_LEN/2)) ! { *wobjc_header_end+= input; *wobjc_header_end = 0; } } } --- 2723,2732 ---- || (wobjc_context==14 && !wobjc_header_end)) wobjc_header_end = wobjc_header; if((wobjc_class_end - wobjc_class_end)<(MAX_HEADER_LEN/2)) ! { *wobjc_class_end++ = input; *wobjc_class_end = 0; } if((wobjc_context==13 || wobjc_context==14) && (wobjc_header_end-wobjc_header_end)<(MAX_HEADER_LEN/2)) ! { *wobjc_header_end++ = input; *wobjc_header_end = 0; } } } *************** *** 2787,2797 **** { char *p; - /* Flush terminal blanks and balance opening '[' if any. */ for(p=wobjc_header+strlen(wobjc_header); p>wobjc_header && strchr(WOBJC_BLANK, *(p-1)); p--); ! if(wobjc_header[0]=='+' || wobjc_header[0]=='-') *p+= ']'; *p = 0; --- 2785,2794 ---- { char *p; /* Flush terminal blanks and balance opening '[' if any. */ for(p=wobjc_header+strlen(wobjc_header); p>wobjc_header && strchr(WOBJC_BLANK, *(p-1)); p--); ! if(wobjc_header[0]=='+' || wobjc_header[0]=='-') *p++ = ']'; *p = 0; *** ir/ircfiles.h.orig Wed Apr 7 13:49:54 1993 --- ir/ircfiles.h Tue Apr 27 21:09:28 1993 *************** *** 187,198 **** void nhyp_header_function _AP((char *line)); void nhyp_finish_header_function _AP((char *header)); ! #ifdef NeXT ! /* objective-C code */ boolean wobjc_separator_function _AP((char *line)); void wobjc_header_function _AP((char *line)); void wobjc_finish_header_function _AP((char *header)); - #endif /* def NeXT */ /* Ziff files */ boolean ziff_separator_function _AP((char *line)); --- 187,196 ---- void nhyp_header_function _AP((char *line)); void nhyp_finish_header_function _AP((char *header)); ! /* Objective-C code */ boolean wobjc_separator_function _AP((char *line)); void wobjc_header_function _AP((char *line)); void wobjc_finish_header_function _AP((char *header)); /* Ziff files */ boolean ziff_separator_function _AP((char *line));