/* * Read Lisp expressions * Don Hopkins */ #include "lisp.h" #include #define OPEN_TOKEN -1 #define CLOSE_TOKEN -2 #define QUOTE_TOKEN -3 #define DOT_TOKEN -4 /* * Return true if c is a whitespace character. Whitespace characters * are space, newline, return, tab, and form feed. */ int is_white(c) int c; { return(c == ' ' || c == '\n' || c == '\t' || c == '\r' || c == '\f'); } #define NOTHING_UNGOTTEN -1 int un_gotten = NOTHING_UNGOTTEN; /* * Push a character back onto the input stream. */ un_get_char(c) int c; { un_gotten = c; } /* * Get a character from the input stream. Echo it if echo is true and * not reading from the keyboard. On end of file, if reading from * keyboard, quit, else pop the input file and return a newline. */ int get_char(echo) int echo; { int c; object obj; if (un_gotten != NOTHING_UNGOTTEN) { c = un_gotten; un_gotten = NOTHING_UNGOTTEN; } else { c = getc(Input_File); if (echo && Input_Stack != Nil && c != EOF) putc(c, Output_File); } if (c == EOF) { if((obj = pop_input()) == Nil) quit("End of file"); fprintf(Output_File, "\n<* End of file on stream %s *>\n", Atom_Name(obj)); c = '\n'; } return(c); } /* * Parse a token from the input stream. Skips leading white space. * Returns OPEN_TOKEN, CLOSE_TOKEN, QUOTE_TOKEN, DOT_TOKEN, an atom, * or a number. Ignores comments starting with a semicolon and ending * with a newline. */ object get_token() { char *name, atom_name[ATOM_SIZE], last; double value; int c, i = 0; while (1) { while (is_white(c = get_char(1))) ; /* Skip white space */ switch (c) { /* * Deal with simple single character tokens. */ case '(': return(OPEN_TOKEN); case ')': return(CLOSE_TOKEN); case '\'': return(QUOTE_TOKEN); /* * Skip over comments and keep reading. */ case ';': while ((c = get_char(1)) != '\n') ; continue; /* * Quoted atom. */ case '|': while ((c = get_char(1)) != '|') { if (i < ATOM_SIZE - 1) atom_name[i++] = c; } atom_name[i++] = '\0'; goto new_atom; /* * It's either a dot, an atom, or a number. */ default: /* * Read token into atom_name. */ do { if (i < ATOM_SIZE - 1) atom_name[i++] = c; c = get_char(1); } while (!is_white(c) && c != '(' && c != ')' && c != '\'' && c != ';'); atom_name[i++] = '\0'; /* * Put char back if necessary. */ if ((i < ATOM_SIZE) && !is_white(c)) un_get_char(c); /* * Is it a dot token? */ if (atom_name[0] == '.' && atom_name[1] == '\0') return(DOT_TOKEN); /* * Hack to see if it's a number. The variable last will not be changed * if the entire string is parsed as a floating point number. It's only * considered a number if it begins with the right characters, and sscanf * uses all of it. */ if (isdigit(atom_name[0]) || (atom_name[0] == '-' && (isdigit(atom_name[1]) || (atom_name[1] == '.' && isdigit(atom_name[2])))) || (atom_name[0] == '.' && isdigit(atom_name[1]))) { last = '\0'; sscanf(atom_name, "%F%c", &value, &last); if (last == '\0') return(make_number(value)); } /* * What the hell, it must be an atom! Allocate space for the name and * return an atom. */ new_atom: name = (char *)malloc(strlen(atom_name) + 1); strcpy(name, atom_name); return(make_atom(name)); } } } /* * Read an S-expression from the input stream. Reads a token. It it's * an atom or number, then return it. If it's an open paren, read a * list. If it's a quote, return a quoted s-expression. */ object sread() { object token = get_token(); if (token >= 0) return(token); if (token == OPEN_TOKEN) return(read_list(1)); if (token == QUOTE_TOKEN) return(make_cons(Quote, make_cons(sread(), Nil))); return(error("Syntax error during sread")); } /* * Read a list from the input stream. The variable head is 1 when * reading the head of a list, and 0 when reading the tail. Read a * token. Return Nil for a close paren token. When reading the tail, * a dot token, followed by an s-expression specifying the return * value (to go in the cdr field of the previous node), followed by a * close paren token ending this list, is allowed. Otherwise, for an * open paren token, recursivly read a list as this list element; for * a quote token, read and quote an s-expression as this list element; * for an atom or number token, use the atom or number as this list * element; then make a new cons node whose car is this list element, * and whose cdr is the recursivly read tail of the list. */ object read_list(head) int head; { object token = get_token(), obj; if (token == CLOSE_TOKEN) return(Nil); Ob_Push(); if (token == DOT_TOKEN) if (head) obj = error("Nothing before . in read_list"); /* * Read in an s-expression to return. */ else { Ob_Save(obj = sread()); if (get_token() != CLOSE_TOKEN) obj = error("Too much after . in read_list"); } else { /* * If it's an open paren, read a list as this element. */ if (token == OPEN_TOKEN) obj = read_list(1); /* * If it's a quote, read in the next s-expression and quote it. */ else if (token == QUOTE_TOKEN) obj = make_cons(Quote, make_cons(sread(), Nil)); /* * Token is an atom or number. */ else obj = token; Ob_Save(obj); /* * Read in the rest of the list. */ obj = make_cons(obj, read_list(0)); } Ob_Pop(); return(obj); } /* * Skip white space to the end of the line and read the newline. */ read_line() { int c; while (is_white(c = get_char(0))) { putc(c, Output_File); if (c == '\n') break; } if (!is_white(c)) un_get_char(c); }