Execute a shell-script to search

Stephen C. Pope (scp@acl.lanl.gov)
Tue, 20 Jul 93 17:35:36 -0600


Nathan> I've taken the standard CERN (2.06) daemon and added an
Nathan> executable shell script ability to it. What it now does, when
Nathan> it is given keywords on a URL like http://host/path/file is:

Nathan> -- expand /path/file using the rules file, giving a filename
Nathan> -- look for filename.search
Nathan> -- if it exists, execute it with the search keywords as arguments
Nathan> -- whatever this returns is passed back to the client verbatim
Nathan> -- if it doesn't exist, fail

Nathan> This has enabled me to put in my home.html file,
Nathan> <ISINDEX>
Nathan> and make a file called home.html.search that contains

Nathan> #!/bin/sh
Nathan> ARGS=`echo "$*" | /bin/sed 's/ /\?/g'`
Nathan> /usr/local/bin/www -source "wais://wais.vuw.ac.nz:90210/this-server?$ARGS"
Nathan> exit 0

Nathan> thus enabling users to search the home-page directly, and get the
Nathan> results of a WAIS search back.

Nathan> Sexy, eh? Diffs follow.

OK, here's (IMHO) a better way to do this. Better because it takes
the output of the executable and still applies all the stream
processing/filtering/formatting. I also chose to flag things using a
trailing "!" instead of ".search", but this is easily enough changed.

By retaining the stream processing, I can do things like have a
``script.txt'' executable, and have the server stick in the
<PLAINTEXT> for me. As more filter/converters get written into the
daemon, I can take advantage of all those simply by identifying the
nature of the executable's output via the suffix.

I highly recommend having a ``fail *!'' at the top of the rules file,
and then selectively mapping various nodes to ``whatever!'' to prevent
folks from trying to execute arbitrary docs as executables.

One detail: the code as exists treats the ?* search parameters as not
a part of the node, so the parameters are not normally passed any
deeper than HTRetrieve(). My unsightly hack is to point a global at
them so that they can be accessed deep inside HTLoadFile(). They also
simply could have been passed on down as part of the node address but
it seemed to me that violated the intent of the address parameter.

enjoy!

stephen pope
advanced computing lab
scp@acl.lanl.gov

*** Library/Implementation/HTFile.c.orig Wed Jun 30 06:37:10 1993
--- Library/Implementation/HTFile.c Tue Jul 20 15:22:59 1993
***************
*** 669,674 ****
--- 669,675 ----
}


+ char *HTSearchArgs = "";

/* Load a document
** ---------------
***************
*** 690,696 ****
)
{
char * filename;
! HTFormat format;
int fd = -1; /* Unix file descriptor number = INVALID */
char * nodename = 0;
char * newname=0; /* Simplified name of file */
--- 691,697 ----
)
{
char * filename;
! HTFormat format = NULL;
int fd = -1; /* Unix file descriptor number = INVALID */
char * nodename = 0;
char * newname=0; /* Simplified name of file */
***************
*** 703,711 ****
nodename=HTParse(newname, "", PARSE_HOST);
free(newname);

- format = HTFileFormat(filename, &encoding);

-
#ifdef vms
/* Assume that the file is in Unix-style syntax if it contains a '/'
after the leading one @@ */
--- 704,710 ----
***************
*** 1022,1032 ****
#endif
open_file:
{
! FILE * fp = fopen(localname,"r");
! if(TRACE) fprintf (stderr, "HTFile: Opening `%s' gives %p\n",
! localname, (void*)fp);
if (fp) { /* Good! */
! if (HTEditable(localname)) {
HTAtom * put = HTAtom_for("PUT");
HTList * methods = HTAnchor_methods(anchor);
if (HTList_indexOf(methods, put) == (-1)) {
--- 1021,1059 ----
#endif
open_file:
{
! int execute = NO;
! char *cmd;
! FILE *fp;
!
! if (localname[strlen(localname)-1] == '!') {
! localname[strlen(localname)-1] = 0;
! if (access(localname, X_OK) < 0) {
! if (TRACE)
! fprintf (stderr, "HTFile: `%s' not executable\n",
! localname);
! return HTLoadError(sink, 403,
! "Requested executable is not available");
! }
! execute = YES;
! cmd = (char *)malloc(strlen(localname) +
! strlen(HTSearchArgs) + 2);
! if (cmd == NULL) outofmem(__FILE__,"OpenFile");
! strcpy(cmd, localname);
! strcat(cmd, " ");
! strcat(cmd, HTSearchArgs);
! fp = popen(cmd, "r");
! if(TRACE) fprintf (stderr, "HTFile: Executing `%s' gives %p\n",
! cmd, (void*)fp);
! free(cmd);
! }
! else {
! fp = fopen(localname,"r");
! if(TRACE) fprintf (stderr, "HTFile: Opening `%s' gives %p\n",
! localname, (void*)fp);
! }
! if (!format) format = HTFileFormat(localname, &encoding);
if (fp) { /* Good! */
! if (!execute && HTEditable(localname)) {
HTAtom * put = HTAtom_for("PUT");
HTList * methods = HTAnchor_methods(anchor);
if (HTList_indexOf(methods, put) == (-1)) {
***************
*** 1035,1041 ****
}
free(localname);
HTParseFile(format, format_out, anchor, fp, sink);
! fclose(fp);
return HT_LOADED;
} /* If succesfull open */
} /* scope of fp */
--- 1062,1071 ----
}
free(localname);
HTParseFile(format, format_out, anchor, fp, sink);
! if (execute)
! pclose(fp);
! else
! fclose(fp);
return HT_LOADED;
} /* If succesfull open */
} /* scope of fp */
*** Daemon/Implementation/HTRetrieve.c.orig Wed May 26 09:16:34 1993
--- Daemon/Implementation/HTRetrieve.c Tue Jul 20 16:05:39 1993
***************
*** 79,86 ****
PUBLIC void HText_selectAnchor() {}
PUBLIC void * HTMainAnchor = NULL;

-
-
/* Retrieve a document
** -------------------
*/
--- 79,84 ----
***************
*** 95,100 ****
--- 93,99 ----

char * arg2 = 0; /* Simplified argument */
char * keywords=strchr(arg, '?');
+ extern char *HTSearchArgs;

#ifdef NOSEARCH
if (keywords) {
***************
*** 115,120 ****
--- 114,131 ----
"Sorry, this server does not perform searches.");
/* It ought to, using an executable script */
}
+ #else
+ if (keywords) {
+ *keywords++ = 0; /* Chop keywords off */
+ if (!*keywords) keywords = NULL;
+ else {
+ char *p;
+ for (p=keywords; *p; p++) if (*p == '+') *p = ' ';
+ /* Plusses to spaces */
+ HTUnEscape(keywords);
+ }
+ HTSearchArgs = keywords;
+ }
#endif

StrAllocCopy(arg2, arg);
***************
*** 128,133 ****
--- 139,145 ----

HTLoadToStream(arg2, NO, client);
free(arg2);
+ HTSearchArgs = "";
return HT_LOADED;
}