/*
 * Read Lisp expressions
 * Don Hopkins
 */

#include "lisp.h"
#include <ctype.h>

#define OPEN_TOKEN -1
#define CLOSE_TOKEN -2
#define QUOTE_TOKEN -3
#define DOT_TOKEN -4

/*
 * Return true if c is a whitespace character. Whitespace characters
 * are space, newline, return, tab, and form feed. 
 */

int is_white(c)
     int c;
{
  return(c == ' ' || c == '\n' || c == '\t' || c == '\r' || c == '\f');
}

#define NOTHING_UNGOTTEN -1

int un_gotten = NOTHING_UNGOTTEN;

/*
 * Push a character back onto the input stream.
 */

un_get_char(c)
     int c;
{
  un_gotten = c;
}

/*
 * Get a character from the input stream. Echo it if echo is true and
 * not reading from the keyboard. On end of file, if reading from
 * keyboard, quit, else pop the input file and return a newline. 
 */

int get_char(echo)
     int echo;
{
  int c;
  object obj;

  if (un_gotten != NOTHING_UNGOTTEN) {
    c = un_gotten;
    un_gotten = NOTHING_UNGOTTEN;
  }
  else {
    c = getc(Input_File);
    if (echo &&
	Input_Stack != Nil &&
	c != EOF)
      putc(c, Output_File);
  }

  if (c == EOF) {
    if((obj = pop_input()) == Nil)
      quit("End of file");
    fprintf(Output_File, "\n<* End of file on stream %s *>\n",
	    Atom_Name(obj));
    c = '\n';
  }

  return(c);
}

/*

 * Parse a token from the input stream. Skips leading white space.
 * Returns OPEN_TOKEN, CLOSE_TOKEN, QUOTE_TOKEN, DOT_TOKEN, an atom,
 * or a number. Ignores comments starting with a semicolon and ending
 * with a newline.
 */

object get_token()
{
  char *name, atom_name[ATOM_SIZE], last;
  double value;
  int c, i = 0;

  while (1) {
    while (is_white(c = get_char(1))) ;	/* Skip white space */

    switch (c) {

/*
 * Deal with simple single character tokens.
 */

    case '(':
      return(OPEN_TOKEN);

    case ')':
      return(CLOSE_TOKEN);

    case '\'':
      return(QUOTE_TOKEN);

/*
 * Skip over comments and keep reading.
 */

    case ';':
      while ((c = get_char(1)) != '\n') ; 
      continue;				  

/*
 * Quoted atom.
 */

    case '|':
      while ((c = get_char(1)) != '|') {
	if (i < ATOM_SIZE - 1)
	  atom_name[i++] = c;
      }
      
      atom_name[i++] = '\0';

      goto new_atom;
      
/*
 * It's either a dot, an atom, or a number.
 */
 
    default:

/*
 * Read token into atom_name.
 */

      do { 
	if (i < ATOM_SIZE - 1)
	  atom_name[i++] = c;
	c = get_char(1);
      } while (!is_white(c) && c != '(' && c != ')' && c != '\'' && c != ';');
      
      atom_name[i++] = '\0';
      
/* 
 * Put char back if necessary.
 */

      if ((i < ATOM_SIZE) && !is_white(c)) 
	un_get_char(c);
      
/* 
 * Is it a dot token? 
 */

      if (atom_name[0] == '.' && atom_name[1] == '\0') 
	return(DOT_TOKEN);
      
/*
 * Hack to see if it's a number. The variable last will not be changed
 * if the entire string is parsed as a floating point number. It's only
 * considered a number if it begins with the right characters, and sscanf
 * uses all of it.
 */

      if (isdigit(atom_name[0]) ||
	  (atom_name[0] == '-' &&
	   (isdigit(atom_name[1]) ||
	    (atom_name[1] == '.' && isdigit(atom_name[2])))) ||
	  (atom_name[0] == '.' && isdigit(atom_name[1]))) {

	last = '\0';
	sscanf(atom_name, "%F%c", &value, &last);
	
	if (last == '\0')
	  return(make_number(value));
      }

/* 
 * What the hell, it must be an atom! Allocate space for the name and
 * return an atom.
 */
    new_atom:
      name = (char *)malloc(strlen(atom_name) + 1);
      strcpy(name, atom_name);

      return(make_atom(name));
    }
  }
}

/*
 * Read an S-expression from the input stream. Reads a token. It it's
 * an atom or number, then return it. If it's an open paren, read a
 * list. If it's a quote, return a quoted s-expression.
 */

object sread()
{
  object token = get_token();

  if (token >= 0)
    return(token);

  if (token == OPEN_TOKEN)
    return(read_list(1));

  if (token == QUOTE_TOKEN)
    return(make_cons(Quote, make_cons(sread(), Nil)));

  return(error("Syntax error during sread"));
}

/*
 * Read a list from the input stream. The variable head is 1 when
 * reading the head of a list, and 0 when reading the tail. Read a
 * token. Return Nil for a close paren token.  When reading the tail,
 * a dot token, followed by an s-expression specifying the return
 * value (to go in the cdr field of the previous node), followed by a
 * close paren token ending this list, is allowed. Otherwise, for an
 * open paren token, recursivly read a list as this list element; for
 * a quote token, read and quote an s-expression as this list element;
 * for an atom or number token, use the atom or number as this list
 * element; then make a new cons node whose car is this list element,
 * and whose cdr is the recursivly read tail of the list.
 */

object read_list(head)
     int head;
{
  object token = get_token(), obj;
  
  if (token == CLOSE_TOKEN)
    return(Nil);

  Ob_Push();

  if (token == DOT_TOKEN)
    if (head)
      obj = error("Nothing before . in read_list");

/*
 * Read in an s-expression to return.
 */

    else {
      Ob_Save(obj = sread());

      if (get_token() != CLOSE_TOKEN)
	obj = error("Too much after . in read_list");
    }

  else {

/*
 * If it's an open paren, read a list as this element.
 */

    if (token == OPEN_TOKEN)
      obj = read_list(1);

/*
 * If it's a quote, read in the next s-expression and quote it. 
 */

    else if (token == QUOTE_TOKEN)
      obj = make_cons(Quote, make_cons(sread(), Nil));

/* 
 * Token is an atom or number.
 */

    else
      obj = token;
    
    Ob_Save(obj);

/*
 * Read in the rest of the list.
 */

    obj = make_cons(obj, read_list(0));
  }

  Ob_Pop();
  return(obj);
}

/*
 * Skip white space to the end of the line and read the newline.
 */

read_line()
{
  int c;

  while (is_white(c = get_char(0))) {
    putc(c, Output_File);
    if (c == '\n')
      break;
  }

  if (!is_white(c))
    un_get_char(c);
}