/*  File   : serv.c
    Author : Ozan Yigit
    Updated: 11/20/97
    Defines: Principal built-in macros for PD M4.
*/

/* Modification History:
 *
 * Nov 19 1997 RAOK Added $[offset/count$] (like ..*.. but no commas)
 *      and   $[offset%count$] (like ..@.. but no commas)
 *      and   $"...$"         (send stuff to output).
 *      doincl() disappeared, replaced by incopen() in
 *      misc.c.  dopaste() was revised to use incopen(),
 *      inclose().
 *
 * Nov 14 1997 RAOK Changed all the function headers over.
 *      Added 'const' to stae* and char* where possible.
 *      Renamed __FILE__ to __file__ to match GNU M4 and
 *      made it conditional on +EXTENDED, not -NO__FILE.
 *
 * Nov 13 1997 RAOK translit() was a neat piece of work, but other
 *      versions of m4 now do something different, so
 *      it had to change.  Now there is an argument
 *      saying whether to iterate or not.  Also, the
 *      old version had a bug:  if |from| > |to| > 0,
 *      it would address off the end of to.  Fixed.
 *
 * Nov 12 1997 RAOK Gave built-in macro definitions the form $!mn
 *      instead of ^Dmn^D, which makes it possible to
 *      define built-ins on the command line, and has
 *      the merit of being printable.
 *
 * Nov 11 1997 RAOK Removed $( $, $) from expand(),
 *      removed the lparen, rparen, and comma variables,
 *      removed the changeparen macro and all support code.
 *      This was for a change that was never finished;
 *      just enough of it was there to cause compatibility
 *      problems.  It was the wrong approach anyway; (,)
 *      should be properties of individual macros, not
 *      global variables.
 *      Made ifdef variadic.
 *      Made argv arguments point to stae arrays, not
 *      char* arrays.  This is necessary because that's
 *      what they _are_.
 *
 * Nov  8 1997 RAOK Fixed a mistake in the non-EXTENDED version of
 *      substr() that had gone noticed because I only
 *      tested the EXTENDED version of everything.
 *
 * Nov  7 1997 RAOK Made define( foo, ^Dmn^D)
 *      and  pushdef(foo, ^Dmn^D)
 *      define foo to act as built-in macro mn.
 *      I've been mulling over better ways for _years_,
 *      but this is good enough.  WARNING:  the actual
 *      string used is very likely to change; all that
 *      matters is that it's what defn() gives you for
 *      a built-in.
 *
 * Nov  7 1996 RAOK Added $[...?...$] for checking argument count.
 *
 * Oct 10 1996 RAOK 1.  Fixed the comment that used to mention $(...$)
 *                      to correctly mention $[...$] instead.
 *      2.  Made extended dosub() 8-bit clean.
 *
 * Mar 26 1993 RAOK Made m4trim() 8-bit clean.
 */

#include "mdef.h"
#include "extr.h"
#include "ourlims.h"

#ifdef EXTENDED

static char const *
backnum H4(
    register char const *,t, /* points to beginning of character string */
    register char const *,p, /* points just _after_ optional digit sequence */
    int                 *,n, /* variable to be given the numeric value */
    int                  ,v  /* default value to use if there are no digits */
)
    {
   register int d, s;

   if (p != t && (unsigned)(d = p[-1]-'0') < 10) {
       v = d, s = 1, p--;
       while (p != t && (unsigned)(d = p[-1]-'0') < 10)
     s *= 10, v += d*s, p--;
   }
   *n = v;
   return p;
    }

#endif


#define ucArgv(n) ((unsigned char *)arg(n))

/*  expand(<DS FN A1 ... An>)
        0  1  2      n+1     -- initial indices in arg()
       -1  0  1      n    -- after adjusting argv++, argc--
    This expands a user-defined macro;  FN is the name of the macro, DS
    is its definition string, and A1 ... An are its arguments.
    The following special forms are recognised.  Note that the second-
    to last character of a special form must be a dollar sign or we will
    not recognise it until too late.
   $0         The macro name
   $i    1<=i<=9    Argument i
   $*         All the arguments, separated by commas
   $@         All the arguments, separated by commas
           and quoted.
   $#         The number of arguments

    Our extensions:
   $$         A literal dollar sign
   $<         The current "left quote"
   $>         The current "right quote"
   $\         The current "escape quote"
   ${         The current "start of comment"
   $}         The current "end of comment"
   $|         The current "comment escape"
   $"...$"       Send ... to output stream with no rescan.
   $[offset*count$] Arguments offset+0, ..., offset+count-1
           separated by commas.
   $[offset/count$] Arguments offset+0, ..., offset+count-1
           with no separators at all.
   $[offset@count$] Arguments offset+0, ..., offset+count-1
           separated by commas and quoted.
   $[offset%count$] Arguments offset+0, ..., offset+count-1
           with no separators at all.
   $[offset#count$] The number of arguments that would have
           been processed by ..@... or ...*...
   $[offset?count$] Reports an error if the number of
           arguments is not in the range
           offset+0 .. offset+count-1
           Expands to "" if ok, 1 if bad.

    In these last six forms,
   offset and count are zero or more decimal digits.
   if the offset is empty it defaults to 1.
   if the count  is empty it defaults to $#+1-offset
    This means that $[*$], $[@$], and $[#$] mean the same as $*, $@, $#.
    It also means that for 1<=i<=9, $[i*1$] means the same as $i, but
    that we can access argument i directly even when i > 9.  It also
    provides us with a way of quoting any single argument: $[i@$] is
    argument i quoted with the current quotation characters.
    Note that it is $[...$] to enable parsing in either direction.

    Note:  some existing Makefile-generating scripts rely on $( being
    left untouched.  That was silly, because it was never so defined.
    There is very little I can do about this.
*/
void
expand H2(stae const *,argv, int,argc)
    {
   register char const *t;
   register char const *p;
   register int n;
   char *px = NULL;

#ifdef  DEBUG
   fprintf(stderr, "expand(%s,%d)\n", arg(1), argc);
#endif
   t = arg(0);      /* definition string as a whole */
   argc--;       /* discount definition string (-1th arg) */
   argv++;       /* move the origin to the name */
   for (p = t; *p++; ) {}
   p -= 2;       /* points to last character of definition */
   while (p > t) {     /* if definition is empty, fails at once  */
       if (*--p != ARGFLAG) {
     putback(p[1]);
       } else {
     switch (p[1]) {
         case '#':
        pbnum(argc-1);
        break;
         case '0': case '1': case '2': case '3': case '4':
         case '5': case '6': case '7': case '8': case '9':
        if ((n = p[1]-'0') < argc) pbstr(arg(n));
        break;
         case '*':     /* push all arguments back */
        for (n = argc-1; n > 1; n--) {
            pbstr(arg(n));
            putback(COMMA);
        }
        pbstr(arg(1));
        break;
         case '@':     /* push arguments back quoted */
        for (n = argc-1; n > 1; n--) {
            pbqtd(arg(n));
            putback(COMMA);
        }
        pbqtd(arg(1));
        break;
#ifdef  EXTENDED
         case '$':     /* $$ => $ */
        break;
         case '<':
        pbstr(lquote);
        break;
         case '>':
        pbstr(rquote);
        break;
         case '\\':
        pbstr(vquote);
        break;
         case '{':
        pbstr(scommt);
        break;
         case '}':
        pbstr(ecommt);
        break;
         case '|':
        pbstr(xcommt);
        break;
         case '"':
        if (px == NULL) {
            px = bp;
        } else {
            while (bp != px) putc(*--bp, active);
            px = NULL;
        }
        break;
         case ']':     /* $[number{#,@,*}number$] */
        {
            char flag; /* #,@,* */
            int start; /* first argument number */
            int count; /* number of arguments */

            p = backnum(t, p, &count, -1);
            if (p == t)
           error2("bad $[...$] construct", arg(0));
            flag = *--p;
            if (flag != '*' && flag != '@'
             && flag != '/' && flag != '%'
             && flag != '#' && flag != '?'
            ) {
           error2(
           "$[...c...$]; c should be @, *, ?, or #", t);
            }
            p = backnum(t, p, &start, 1);
            if (p <= t+1 || p[-1] != '[' || p[-2] != ARGFLAG)
           error2("$[...$] must begin with $[", arg(0));
            p -= 2;
            if (count < 0) count = argc-start;
            if (count <= 0) {
           if (flag == '#') pbnum(0);
            } else {
           switch (flag) {
               case '*':
              for (n = start+count-1; n > start; n--) {
                  pbstr(arg(n));
                  putback(COMMA);
              }
              pbstr(arg(start));
              break;
               case '/':
              for (n = start+count-1; n >= start; n--)
                  pbstr(arg(n));
              break;
               case '@':
              for (n = start+count-1; n > start; n--) {
                  pbqtd(arg(n));
                  putback(COMMA);
              }
              pbqtd(arg(start));
              break;
               case '%':
              for (n = start+count-1; n >= start; n--)
                  pbqtd(arg(n));
              break;
               case '#':
              pbnum(count);
              break;
               case '?':
              n = start + count - 1;
              if (start > argc-1 || n < argc-1) {
                  char buffer[60];
                  sprintf(buffer,
                  "should have %d <= $# <= %d\n",
                 start, n);
                  error2(buffer, arg(0));
                  pbnum(1);
              }
              break;
               default:
                   error2("internal error during", arg(0));
                   break;
           }
            }
        }
        break;
#endif
         default:
        putback(p[1]);
        putback(p[0]);
        break;
     }
     p--;
       }
   }
   if (p == t) putback(*p);     /* do last character */
   if (px != NULL) error2("unbalanced $\"", arg(0));
    }


static char nuldefmsg[] = "defining empty name";
static char recdefmsg[] = "macro defined as itself:";

#define is_decimal(c) ((unsigned)(c) - (unsigned)'0' < (unsigned)10)

/*  In November 1997, RAOK finally got around to making
   define(foo, defn(`bar'))
    work even when bar is a built in macro.  The old STATIC flag
    went away.  Now _every_ macro has a definition string, even
    the built-in ones.  A built-in macro definition looks like
    $ ! m n
    where m n is the macro number in decimal.  This is a bit of a kluge,
    but it's reasonably workable.  Assinging such a definition to a macro
    by any means _also_ copies the numeric value to the ->type field.
*/

static int
bim_mac_type H1(register char const *,p)
    {
   return p[0] == ARGFLAG
            && p[1] == BIMCHAR
            && is_decimal(p[2])
            && is_decimal(p[3])
            && p[4] == EOS
       ? (p[2]-'0')*10 + (p[3]-'0')
       : MACRTYPE;
    }


/*  dodefine(Name, Definition)
    install Definition as the only definition of Name in the hash table.
*/
void
dodefine H2(char const *,name, char const *,defn)
    {
   register ndptr p;

   if (*name == EOS) error1(nuldefmsg);
   if (strcmp(name, defn) == 0) error2(recdefmsg, name);
#ifdef  DEBUG
   fprintf(stderr, "define(%s,--)\n", name);
#endif
   if ((p = lookup(name)) == nil) {
       p = addent(name);
   } else
   if (p->defn != null) {
       free(p->defn);
   }
   p->defn = !defn || !*defn ? null : dupstr(defn);
   p->type = bim_mac_type(p->defn);
    }


/*  dopushdef(Name, Definition)
    install Definition as the *first* definition of Name in the hash table,
    but do not remove any existing definitions.  The new definition will
    hide any old ones until a popdef() removes it.
*/
void
dopushdef H2(char const *,name, char const *,defn)
    {
   register ndptr p;

   if (*name == EOS) error1(nuldefmsg);
   if (strcmp(name, defn) == 0) error2(recdefmsg, name);
#ifdef  DEBUG
   fprintf(stderr, "pushdef(%s,--)\n", name);
#endif
   p = addent(name);
   p->defn = !defn || !*defn ? null : dupstr(defn);
   p->type = bim_mac_type(p->defn);
    }


/*  dodefn(Name)
    push back a *quoted* copy of Name's definition.
*/
void
dodefn H1(char const *,name)
    {
   register ndptr p;

   if ((p = lookup(name)) != nil && p->defn != null) pbqtd(p->defn);
    }


static void
dump1 H3(
    char const *,name,
    char const *,defn,
    int         ,type
)
    {
   fprintf(stderr, "define(");
   prqtd(  stderr, name);
        fprintf(stderr, ", ");
   if (type == QUTRTYPE) fprintf(stderr, "{quoter}"); else
   prqtd(  stderr, defn);
   fprintf(stderr, ")dnl\n");
    }

/*  dodump(<? dump>)      dump all definition in the hash table
    dodump(<? dump F1 ... Fn>)  dump the definitions of F1 ... Fn in that
    order.  The requested definitions are written to stderr.  Names which
    have a built-in (numeric) definition do work; $!mn is written.
    However, quoters DON'T work at the moment.
*/
void
dodump H2(stae const *,argv, int,argc)
    {
   register int n;
   ndptr p;

   if (argc > 2) {
       for (n = 2; n < argc; n++)
     if ((p = lookup(arg(n))) != nil)
         dump1(p->name, p->defn, p->type);
   } else {
       for (n = 0; n < HASHSIZE; n++)
     for (p = hashtab[n]; p != nil; p = p->nxtptr)
         dump1(p->name, p->defn, p->type);
   }
    }


/*  doifelse(<? ifelse {x y ifx=y}... [else]>)
         0 1       2 3 4         [2 when we get to it]
*/
void
doifelse H2(stae const *,argv, int,argc)
    {
   for (; argc >= 5; argv += 3, argc -= 3)
       if (strcmp(arg(2), arg(3)) == 0) {
     pbstr(arg(4));
     return;
       }
   if (argc > 2) pbstr(arg(2));
    }


/*  doifdef(<? ifdef {m b}... [else]>)
             0 1      2 3     [2 when we get to it]
    This was made variadic solely to make
   ifdef(`unix',  UNIX case,
              `msdos', MS-DOS case,
              `macos', MacOS case,
              `vms',   VMS case,
              `ibmvm', VM/CMS case,
              `mvs',   MVS case,
                       default case)
    easy.  But it was an obvious extension in any case.
*/
void
doifdef H2(stae const *,argv, int,argc)
    {
   for (; argc >= 4; argv += 2, argc -= 2)
       if (lookup(arg(2)) != nil) {
     pbstr(arg(3));
     return;
       }
   if (argc >= 3) pbstr(arg(2));
    }



/*  dochq(<? changequote [left [right [verbatim]]]>)
      0 1            2     3      4
    change the quote delimiters; to strings of any positive length.
    Empty arguments result in no change for that parameter.
    Missing arguments result in defaults:
   changequote      => ` ' ^V
   changequote(q)      => q q ^V
   changequote(l,r) => l r ^V
   changequote(l,r,v)  => l r v
    There isn't any way of switching the verbatim-quote off,
    but if you make it the same as the right quote it won't
    be able to do anything (we check for R, L, V in that order).
*/
void
dochq H2(stae const *,argv, int,argc)
    {
   if (argc > 2) {
       if (*arg(2)) {
     if (lquote != LQUOTE) free(lquote);
     lquote = dupstr(arg(2));
       }
       if (argc > 3) {
     if (*arg(3)) {
         if (rquote != RQUOTE) free(rquote);
         rquote = dupstr(arg(3));
     }
     if (argc > 4 && *arg(4)) {
         if (vquote != VQUOTE) free(vquote);
         vquote = dupstr(arg(4));
     }
       } else {
     if (rquote != RQUOTE) free(rquote);
     rquote = dupstr(lquote);
       }
   } else {
       if (lquote != LQUOTE) free(lquote);
       lquote = LQUOTE;
       if (rquote != RQUOTE) free(rquote);
       rquote = RQUOTE;
       if (vquote != VQUOTE) free(vquote);
       vquote = VQUOTE;
   }
    }


/*  dochc(<? changecom [left [right [escape]]]>)
           0 1      2     3      4
    change the comment delimiters; to strings of any positive length.
    The setup in V7 m4 and older versions of pdm4 was that if you
    omitted the arguments, the defaults were restored, just like
    omitting the arguments to changequote.  But current (SVr4)
    System V M4 and GNU M4 disable the comment mechanism in this case.
    So we have                     scommt    ecommt   xcommt
   <defaults>       #         \n       <nothing>
   changecom     => <nothing> \n       <nothing>
   changecom(s)     => s         \n       <nothing>
   changecom(s,e)      => s         e        <nothing>
   changecom(s,e,x) => s         e        x
*/
void
dochc H2(stae const *,argv, int,argc)
    {
   if (xcommt != XCOMMT) free(xcommt);
   xcommt = XCOMMT;
   if (argc > 2) {
       if (*arg(2)) {
     if (scommt != SCOMMT && scommt != null) free(scommt);
     scommt = dupstr(arg(2));
       }
       if (argc > 3) {
     if (*arg(3)) {
         if (ecommt != ECOMMT) free(ecommt);
         ecommt = dupstr(arg(3));
     }
     if (argc > 4 && *arg(4)) {
         xcommt = dupstr(arg(4));
     }
       } else {
     if (ecommt != ECOMMT) free(ecommt);
     ecommt = ECOMMT;
       }
   } else {
       if (scommt != SCOMMT && scommt != null) free(scommt);
       scommt = null;
       if (ecommt != ECOMMT) free(ecommt);
       ecommt = ECOMMT;
   }
    }


/*  dodivert - divert the output to a temporary file
*/
void
dodiv H1(int,n)
    {
   if (n < 0 || n >= MAXOUT) n = 0;   /* bitbucket */
   if (outfile[n] == NULL) {
       m4temp[UNIQUE] = '0' + n;
       if ((outfile[n] = fopen(m4temp, "w")) == NULL)
     error2("cannot divert to", m4temp);
   }
   oindex = n;
   active = outfile[n];
    }


/*  doundivert - undivert a specified output, or all
                other outputs, in numerical order.
*/
void
doundiv H2(stae const *,argv, int,argc)
    {
   register int ind;
   register int n;

   if (argc > 2) {
       for (ind = 2; ind < argc; ind++) {
     n = expr(arg(ind));
     if (n > 0 && n < MAXOUT && outfile[n] != NULL) getdiv(n);
       }
   } else {
       for (n = 1; n < MAXOUT; n++)
     if (outfile[n] != NULL) getdiv(n);
   }
    }


/*  dosub(<? substr [string [offset [length [background]]]>)
           0 1       2       3       4       5
    This function provides the substring feature.
    The basic idea is to discard the first {offset} characters of
    {string} and return the next {length} of them.
    If the length is omitted, it returns all of the characters
    that remain after the first {offset} of them are discarded.
    But what are we to do if offset or string is omitted,
    if length < 0, offset < 0, or length+offset > len(string)?
    The UNIX manuals, even the System V Interface Definition, do not
    say when parameters are out of range, or parameters other than
    length are missing.

    My original choice was to clip the parameters back.  It turns out
    that System V Release 3 and System V Release 4 do exactly that,
    treating an omitted string as empty and an omitted offset as 0,
    which again was my choice.

    However, I have decided on something which I now regard as more
    useful.  We regard the string as embedded in an infinite sequence
    of background characters (the default is blanks), and we take a
    slice out of this infinite string.  In particular, this means that
    To left justify  S in W characters truncating right: substr(S,0,W)
    To right justify S in W characters truncating left : substr(S,len(S)-W,W)
    To left justify  S in W or more with no truncation : S`'substr(,0,W-len(S))
    To right justify S in W or more with no truncation : substr(,0,W-len(S))`'S
    To get N copies of character B                     : substr(,0,N,B)
*/
void
dosub H2(stae const *,argv, int,argc)
    {
   char *ap = argc > 2 ? arg(2) : ""; /* string */
   int   al = strlen(ap);       /* but strlen -> size_t! */
   int   df = argc > 3 ? expr(arg(3)) : 0;/* offset */
   int   nc = argc > 4 ? expr(arg(4)) : al-df;
   int   t  = df + nc;       /* apparent end */
#ifdef  EXTENDED
   int B = argc > 5 && arg(5)[0] != '\0'  /* background */
         ? *(unsigned char *)arg(5)   /* supplied */
         : ' ';           /* defaults to ' ' */

   if (nc <= 0) return;
   for (; nc > 0 && t > al; t--, nc--) putback(B);
   if (nc <= 0) return;
   if (t > 0) {
       for (ap += t; nc > 0 && t > 0; t--, nc--) putback(*--ap);
   }
   for (; nc > 0; nc--) putback(B);
#else
   if (df > al) df = al; else if (df < 0) df = 0;
   if (t  > al) t  = al; else if (t < df) t = df;
   /* Now 0 <= df <= t <= al; 0 <= t-df <= al */
   nc = t-df, ap += t;
   while (--nc >= 0) putback(*--ap);
#endif
    }


/*  translit(buff, from, to, iterate)

    Let buff be b1...bk
    Let from be f1...fm
    Let to   be t1...tn

    If n > m, the trailing n-m characters of to are ignored.
    If n < m, imagine m-n trailing \0 characters being added to to.
    So now from is f1...fp
           to   is t1...tp
    and some of the characters of to may be \0.

    This rewrites buff:
   if bi = fj for some j, take the rightmost such j and then
       if tj is \0, delete bi
       if tj is not \0, replace bi by tj

    Until November 1997, pdm4 used a standard implementation of Icon's
    map(s, from, to) function, which is why this function used to be
    called map().  It was renamed to avoid a name clash in a pre-ANSI
    compiler.  Icon's map() uses a loop instead of an if:

   while bi = fj for some j, take the rightmost such j and then
       if tj is \0, delete bi and exit the loop
       if tj is not \0, replace bi by tj and continue the loop.

    If the iterate argument is true, translit() will do what it always did.
    If the iterate argument is false, translit() will do what GNU M4's and
    System V M4's translit() do.

    It has never really been clear what translit() is _supposed_ to do;
    I have met other versions of m4 which agree with old pdm4.

    This is a standard implementation of Icon's map(s,from,to) function.
    Within translit(), we replace every character of "from" with the
    corresponding character in "to".  If "to" is shorter than "from",
    then the corresponding entries are null, which means that those
    characters disappear altogether.  Furthermore, imagine a call like
    translit(dest, "sourcestring", "srtin", "rn..*"). In this case, `s' maps
    to `r', `r' maps to `n' and `n' maps to `*'. Thus, `s' ultimately
    maps to `*'. In order to achieve this effect in an efficient manner
    (i.e. without multiple passes over the destination string), we loop
    over mapvec, starting with the initial source character.  If the
    character value (dch) in this location is different from the source
    character (sch), sch becomes dch, once again to index into mapvec,
    until the character value stabilizes (i.e. sch = dch, in other words
    mapvec[n] == n).  Even if the entry in the mapvec is null for an
    ordinary character, it will stabilize, since mapvec[0] == 0 at all
    times.  At the end, we restore mapvec* back to normal where
    mapvec[n] == n for 0 <= n <= UCHAR_MAX.  This strategy, along with the
    restoration of mapvec, is about 5 times faster than any algorithm
    that makes multiple passes over the destination string.

    The destination and source strings used to be separated, but they
    _can_ be the same, and in all uses in this program they _are_ the
    same.  One day it might be nice to add 'restrict' to various
    arguments, so now we can say clearly buff mustn't overlap f or t.
*/
void
translit H4(
    char       *,b,
    char const *,f,
    char const *,t,
    int         ,r
)
    {
   register unsigned char       *dest = (unsigned char       *)b;
   register unsigned char const *src  = (unsigned char const *)b;
            unsigned char const *from = (unsigned char const *)f;
   register unsigned char const *to   = (unsigned char const *)t;
   register unsigned char const *tmp;
   register unsigned char sch, dch;
   static   unsigned char mapvec[1+UCHAR_MAX] = {1};

   if (mapvec[0]) {
       register int i;
       for (i = 0; i <= UCHAR_MAX; i++) mapvec[i] = i;
   }
   if (src && *src) {
       /* create a mapping between "from" and "to" */
       if (to == NULL) to = (unsigned char *)null;
       for (tmp = from; (sch = *tmp++) != EOS; )
     if ((mapvec[sch] = *to) != EOS) to++;
         mapvec[sch] = EOS;
       if (r) {
     while ((sch = *src++) != EOS) {
         while ((dch = mapvec[sch]) != sch) sch = dch;
         if ((*dest = dch) != EOS) dest++;
     }
       } else {
     while ((sch = *src++) != EOS) {
         if ((*dest = mapvec[sch]) != EOS) dest++;
     }
       }
       /* restore all the changed characters */
       for (tmp = from; (sch = *tmp++) != EOS; )
     mapvec[sch] = sch;
   }
   *dest = EOS;
    }


#ifdef  EXTENDED

/*  dopaste(FileName)
    copy a given file to the output stream without any macro processing.
*/
int
dopaste H1(char const *,filename)
    {
   register FILE *get;
   register FILE *put;
   register int   c;

   if (!incopen(filename)) return 0;
   get = ifile;
   put = active;
   while ((c = getc(get)) != EOF) putc(c, put);
   inclose();
   return 1;
    }


/*  m4trim(<? m4trim [string [leading [trailing [middle [rep]]]]]>)
       0 1       2       3        4         5       6

    (1) Any prefix consisting of characters in the "leading" set is removed.
   The default is " \t\n".
    (2) Any suffix consisting of characters in the "trailing" set is removed.
   The default is to be the same as leading.
    (3) Any block of consecutive characters in the "middle" set is replaced
   by the rep string.  The default for middle is " \t\n", and the
   default for rep is the first character of middle.
*/
void
m4trim H2(stae const *,argv, int,argc)
    {
   static unsigned char repbuf[2] = " ";
   static unsigned char layout[] = " \t\n\r\f";
   unsigned char *string   = argc > 2 ? ucArgv(2) : repbuf+1;
   unsigned char *leading  = argc > 3 ? ucArgv(3) : layout;
   unsigned char *trailing = argc > 4 ? ucArgv(4) : leading;
   unsigned char *middle   = argc > 5 ? ucArgv(5) : trailing;
   unsigned char *rep      = argc > 6 ? ucArgv(6) :
                  (repbuf[0] = *middle, repbuf);
   static unsigned char sets[1+UCHAR_MAX];
#  define PREF 1
#  define SUFF 2
#  define MIDL 4
   register int i, n;

   for (i = UCHAR_MAX; i >= 0; ) sets[i--] = 0;
   while (*leading)  sets[*leading++]  |= PREF;
   while (*trailing) sets[*trailing++] |= SUFF;
   while (*middle)   sets[*middle++]   |= MIDL;

   while (*string && sets[*string]&PREF) string++;
   n = strlen((char *)string);
   while (n > 0 && sets[string[n-1]]&SUFF) n--;
   while (n > 0) {
       i = string[--n];
       if (sets[i]&MIDL) {
     pbstr((char*)rep);
     while (n > 0 && sets[string[n-1]]&MIDL) n--;
       } else {
     putback(i);
       }
   }
    }


/*  defquote(MacroName # The name of the "quoter" macro to be defined.
   [, Opener  # default: "'".  The characters to place at the
        # beginning of the result.
   [, Separator  # default: ",".  The characters to place between
        # successive arguments.
   [, Closer  # default: same as Opener.  The characters to
        # place at the end of the result.
   [, Escape  # default: `'  The escape character to put in
        # front of things that need escaping.
   [, Default # default: simple.  Possible values are
        # [lL].* = letter, corresponds to PLAIN1.
        # [dD].* = digit,  corresponds to PLAIN2.
        # [sS].* = simple, corresponds to SIMPLE.
        # [eE].* = escaped,corresponds to SCAPED.
        # .*,              corresponds to FANCY
   [, Letters # default: `'.  The characters of type "L".
   [, Digits  # default: `'.  The characters of type "D".
   [, Simple  # default: `'.  The characters of type "S".
   [, Escaped # default: `'.  The characters of type "E".
   {, Fancy   # default: none.  Each has the form `C'`Repr'
        # saying that the character C is to be represented
        # as Repr.  Can be used for trigraphs, \n, &c.
   }]]]]]]]]])

    Examples:
   defquote(DOUBLEQT, ")
   defquote(SINGLEQT, ')
    After these definitions,
   DOUBLEQT(a, " b", c)   => "a,"" b"",c"
   SINGLEQT("Don't`, 'he said.") => '"Don''t, he said."'
    Other examples defining quote styles for several languages will be
    provided later.

    A quoter is represented in M4 by a special identifying number and a
    pointer to a Quoter record.  I expect that there will be few quoters
    but that they will need to go fairly fast.

*/

#define PLAIN1   0
#define PLAIN2   1
#define SIMPLE   2
#define SCAPED   3
#define FANCY 4

struct Quoter
    {
   char *opener;
   char *separator;
   char *closer;
   char *escape;
   char *fancy[1+UCHAR_MAX];
   char class[1+UCHAR_MAX];
     };

#if 0
static void
freeQuoter H1(struct Quoter *,q)
    {
   int i;

   free(q->opener);
   free(q->separator);
   free(q->closer);
   free(q->escape);
   for (i = UCHAR_MAX; i >= 0; i--)
       if (q->fancy[i]) free(q->fancy[i]);
   free((char *)q);
    }
#endif

/*  dodefqt(<
   0 ?
   1 defquote
   2 MacroName
      [ 3  Opener
      [ 4  Separator
      [ 5  Closer
      [ 6  Escape
      [ 7  Default
      [ 8  Letters
      [ 9  Digits
      [10  Simple
      [11  Escaped
      [11+i   Fancy[i] ]]]]]]]]]]>)
*/
void
dodefqt H2(stae const *,argv, int,argc)
    {
   struct Quoter q, *r;
   register int i;
   register unsigned char *s;
   register int c;
   ndptr p;

   if (!(argc > 2 && *arg(2) != EOS)) error1(nuldefmsg);
   switch (argc > 7 ? arg(7)[0] : '\0') {
       case 'l': case 'L':   c = PLAIN1; break;
       case 'd': case 'D': c = PLAIN2; break;
       case 'e': case 'E': c = SCAPED; break;
       case 'f': case 'F': c = FANCY;  break;
       default:            c = SIMPLE;
   }
   for (i = UCHAR_MAX; --i >= 0; ) q.class[i] = c;
   for (i = UCHAR_MAX; --i >= 0; ) q.fancy[i] = 0;
   q.opener = dupstr(argc > 3 ? arg(3) : "");
   q.separator = dupstr(argc > 4 ? arg(4) : ",");
   q.closer = dupstr(argc > 5 ? arg(5) : q.opener);
   q.escape = dupstr(argc > 6 ? arg(6) : "");
   if (argc > 8)
       for (s = (unsigned char *)arg(8); (c = *s++) != '\0'; )
     q.class[c] = PLAIN1;
   if (argc > 9)
       for (s = (unsigned char *)arg(9); (c = *s++) != '\0'; )
     q.class[c] = PLAIN2;
   if (argc > 10)
       for (s = (unsigned char *)arg(10); (c = *s++) != '\0'; )
     q.class[c] = SIMPLE;
   if (argc > 11)
       for (s = (unsigned char *)arg(11); (c = *s++) != '\0'; )
     q.class[c] = SCAPED;
   for (i = 12; i < argc; i++) {
       s = (unsigned char *)arg(i);
       c = *s++;
       q.fancy[c] = dupstr((char *)s);
       q.class[c] = FANCY;
   }
   /*  Now we have to make sure that the closing quote works.  */
   if ((c = q.closer[0]) && q.class[c] <= SIMPLE) {
       if (q.escape[0]) {
     q.class[c] = SCAPED;
       } else {
     char qbuf[3];
     qbuf[0] = c, qbuf[1] = c, qbuf[2] = '\0';
     q.fancy[c] = dupstr(qbuf);
        q.class[c] = FANCY;
       }
   }
   /*  We also have to make sure that the escape (if any) works.  */
   if ((c = q.escape[0]) && q.class[c] <= SIMPLE) {
       q.class[c] = SCAPED;
   }
   r = (struct Quoter *)malloc(sizeof *r);
   if (r == NULL) error1(memsg);
   *r = q;
        p = addent(arg(2));
        p->defn = (char *)r;
        p->type = QUTRTYPE;
    }


/*  doqutr(<DB MN A1 ... An>)
        0  1  2      n+1 argc
    arg(0) points to the struct Quoter.
    arg(1) points to the name of this quoting macro
    argv[2..argc-1] point to the arguments.
    This applies a user-defined quoting macro.  For example, we could
    define a macro to produce Prolog identifiers:
   defquote(plid, ', , ', , simple,
       abcdefghijklmnopqrstuvwxyz,
       ABCDEFGHIJKLMNOPQRSTUVWXYZ_0123456789)

    After doing that,
   plid(foo)     => foo
   plid(*)       => '*'
   plid(Don't)      => 'Don''t'
   plid(foo,)    => 'foo'
*/
void
doqutr H2(stae const *,argv, int,argc)
    /* DEFINITION-BLOCK MacroName Arg1 ... Argn
       0                1         2        n-1   argc
    */
    {
   struct Quoter *r = (struct Quoter *)arg(0);
   char *p;
   register unsigned char *b, *e;
   int i;
   register int c;

   /* If there is exactly one argument and it has the form */
   /*   <PLAIN1> <PLAIN2>*            */
   /* then it is copied without quotes.  Prolog wants this */
   /* and it doesn't hurt languages with no PLAIN1 chars.  */
   if (argc == 3) {
       b = ucArgv(2);
       e = b + strlen((char*)b);
       if (e != b && r->class[*b++] == PLAIN1) {
     while (b != e && r->class[*b] <= PLAIN2) b++;
     if (b == e) {
         pbstr(arg(2));
         return;
     }
       }
   }

   /* We now know that quotes are needed */

   p = r->closer;
   if (argc < 3) {
       pbstr(p);
   } else
   for (i = argc-1; i >= 2; i--) {
       pbstr(p);
       b = ucArgv(i);
       e = b+strlen((char *)b);
       while (e != b)
     switch (r->class[c = *--e]) {
         case FANCY:
        p = r->fancy[c];
        if (p) {
            pbstr(p);
        } else {
            pbrad(c, 8, 1);
            pbstr(r->escape);
        }
        break;
         case SCAPED:
        putback(c);
        pbstr(r->escape);
        break;
         default:
        putback(c);
        break;
           }
       p = r->separator;
   }
   pbstr(r->opener);
    }

#endif

