/*
 *   Format FORTH strings and screens
 */

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <stdint.h>
#include <ctype.h>
#include "forthfmt.h"


/** This function removes unnecessary characters from a string:
 *  leading and trailing spaces, non-printable characters.
 *  @param s  ptr to the string
 */
void ffmt_cleanup_string(char *s)
{
  int i=0;
  char *p;
  
  p = s;
  while (*p != 0)
  {
    if (((uint8_t)*p) < 0x20) {
      *p = 0x20;
    }
    p++;
  }

  p = s;
  while (*p == 0x20)
    p++;

  for (i=0; p[i]!=0; i++) {
    s[i] = p[i];
  }
  s[i] = 0;

  while (i > 0)
  {
    i--;
    if (s[i] != 0x20)
      break;
    s[i] = 0;
  }
  
  if (s[0] == '\\')
    s[0] = 0;
}


/** This function removes spaces and control characters
 *  from the end of a line.
 *  @param s    ptr to the string
 *  @param len  string length
 *  @return final string length
 */
int ffmt_remove_trailing_whitespaces(char *s, int len)
{
  int i, j;
  
  while (len > 0)
  {
    if (((uint8_t)s[len-1]) > 0x20)
      break;
    len--;
  }

  for (i=0; (i<len) && (((uint8_t)s[i]) < 0x20); i++);
  
  for (j=0; i<len; i++, j++)
    s[j] = s[i];
  
  return j;
}


/** Make all characters in a buffer printable.
 *  @param s    ptr to the string
 *  @param len  string length
 */
void ffmt_make_printable(char *s, int len)
{
  uint8_t *p = (uint8_t*)s;

  while (len-- > 0)
  {
    if ((*p < 0x20) || (*p == 0xFF)) {
      *p = 0x20;
    }
    p++;
  }
}


/** Check if a string contains a line that is used
 *  for an mynor-tool internal comment.
 *  @param s    ptr to the string
 *  @return nonzero if the line is a comment.
 */
int ffmt_is_internal_comment_line(char *s)
{
  while (*s == 0x20)
    s++;
  
  if (!strncmp(s, "----", 4) || !strncmp(s, "\\---", 4) ||
      !strncmp(s, "\\ --", 4) || !strncmp(s, "\\ \\ ", 4))
    return 1;

  return 0;
}


static char *forthfilename_g = NULL;
static int filelinenumber_g = 0;
static int screenline_g = 0;
static int screenstartline_g = 0;


/** Initialize the screen parser
 *  @param filename   name of the source file, used for error messages
 */
void ffmt_screen_parser_init(char *filename)
{
  forthfilename_g = filename;
  filelinenumber_g = 0;
  screenline_g = 0;  // 0 - 15
  screenstartline_g = 0;
}


/** Parses a FORTH line and writes it to the screen buffer.
 *  @param string   a string read from a file
 *  @param scrbuf   output screen buffer
 *  @param used     this flag is set to 1 when the screen gets used
 *  @return 0 on success, 1 if the screen is finished and
 *          can be further processed -1 if an error occured
 */
int ffmt_screen_parse_line(char *string, uint8_t *scrbuf, int *used)
{
  char *s, *d;
  int i, len;
  
  if (screenline_g == 0)
    memset(scrbuf, 0x20, 1024);
  
  filelinenumber_g++;
  
  s = string;
  while ((*s != 0) && ((uint8_t)*s < 0x20)) s++;
 
  d = s;
  while (*d == 0x20) d++;
  i = 0;
  if ((d[i] == '\\') || (d[i] == '-')) {
    i++;
    if ((d[i-1] == '\\') && (d[i] == 0x20))
      i++;
    while (d[i] == '-')
      i++;
  }
  if (i >= 4)
  {
    /* found a screen separator */
    if (screenline_g == 0)
      return 0;  // ignore it
    
    screenline_g = 0;
    return 1;
  }
  else
  if (ffmt_is_internal_comment_line(string))
  {
    return 0; // ignore this line
  }
  else
  {
    if (screenline_g == 0)
      screenstartline_g = filelinenumber_g;

    screenline_g++;
    len = ffmt_remove_trailing_whitespaces(s, strlen(s));
    if (len > 64)
    {
      fprintf(stderr, "ERROR: %s: The line %d is longer than 64 characters\n", forthfilename_g, filelinenumber_g);
      return -1;
    }
    
    if (len > 0)
    {
      while (screenline_g > 16)
      {
        for (i=0; (i<64) && (scrbuf[i] == 0x20); i++);
        if (i >= 64)
        {
          for (i=0; i<(1024-64); i++) {
            scrbuf[i] = scrbuf[i+64];
          }
          screenline_g--;
        }
        else
        {
          fprintf(stderr, "ERROR: %s: The screen starting at line %d has more than 16 lines.\n", forthfilename_g, screenstartline_g);
          fprintf(stderr, "Hint: Write a line with four or more slashes (----) to separate the screens.\n");
          return -1;
        }
      }
      
      d = (char*)(scrbuf + (screenline_g - 1) * 64);
      for (i=0; i < len; i++)
        d[i] = s[i];
      while (i < 64)
        d[i++] = 0x20;
      
      *used = 1;
    }
  }
  
  return 0;
}


/*----------------------------------------------------------------------------*/

static char *badwords_g[] = {
  "parse", ".(", ""
};

static char *skipwords_g[] = {
  "char", "[char]", "'", "[']", "CREATE", "POSTPONE", ""
};

static char *goodwords_g[] = {
  "dup", "swap", "drop", "tuck", "rot", "over", "pick", "emit",
  ">r", "r>", "2dup", "2drop", "-rot", "0=", "0<>", "0<=", "1+",
  "1-", "@", "<#", "#>", "mod", "abs", "dabs", "s>d", "nip",
  "endof", "cr", "cell", "cells", "cell+", "2drop", "2swap",
  "2dup", "does>", "do", "?do", "loop", "begin", "until",
  "again", "if", "else", "then",
  ""
};

static int wordcmp(char *word, char *str)
{
  while (*word != 0)
  {
    if (toupper(*word) != toupper(*str))
      return 0;
    word++;
    str++;
  }
  
  return ((*str == 0x20) || (*str == 0)) ? 1 : 0;
}

static int find_in_wordlist(char* list[], char *str)
{
  char *word;
  int i;
  
  while (*str == 0x20)
    str++;
  
  if (!*str)
    return 0;
  
  for (i=0;;i++)
  {
    word = list[i];
    if (!*word)
      return 0; // not in this list
    if (wordcmp(word, str))
      return 1; // word is in the list
  }
}

static int find_in_str_wordlist(char* list, char *str)
{
  char *word;
  
  while (*str == 0x20)
    str++;
  
  if (!*str)
    return 0;
  
  while (*list != 0)
  {
    word = list;
    if (wordcmp(word, str))
      return 1; // word is in the list
    list += strlen(word) + 1;
  }
  return 0;
}

static int strnllen(char *str)
{
  int i;
  for (i=0; (str[i] != 0) && (str[i] != '\n'); i++);
  return i;
}


/** Load and compress a FORTH source code text file.
 *  Make lines no longer than 64 characters. Remove
 *  comments where necessary and possible.
 *  @param filename   name of the text file
 *  @param flags  Optional. If not NULL, this variable
 *                will be set to some flags  (see FFMT_xxx defines)
 *                FFMT_RUN    : the word "run" was found in the text
 *                FFMT_SCRFMT : the source code is preformatted screen code
 *  @return ptr to "compressed" text data.
 *          NULL is returned when an error occured.
 *          This function prints also the corresponding
 *          error message.
 *          The user is responsible for freeing the returned
 *          memory blob by calling the free() - function.
 */
char* ffmt_load_and_compress_file(const char *filename, int *flags)
{
  char *textfile = NULL;
  char *wordlist, *wlp;
  char *in, *out, *nextline, *s;
  char eol = 0;
  int filesize = -1;
  int linectr = 0;
  int thisline = 0;
  int len, i;
  FILE *f;
  
  if (flags != NULL)
    *flags = 0;

  f = fopen(filename, "rb");
  if (f == NULL)
  {
    fprintf(stderr, "Failed to open source file %s\n", filename);
    return NULL;
  }
  
  if (fseek(f, 0, SEEK_END) >= 0)
  {
    filesize = ftell(f);
    fseek(f, 0, SEEK_SET);
  }
  if (filesize < 0)
  {
    fprintf(stderr, "Failed to get size of source file %s\n", filename);
    return NULL;
  }

  textfile = (char*)malloc(2*filesize+(filesize/2)+10);
  if (textfile == NULL)
  {
    fclose(f);
    fprintf(stderr, "Failed to allocate memory for source file %s\n", filename);
    return NULL;
  }
  out = textfile;
  wordlist = textfile + filesize;
  in = textfile + filesize + (filesize / 2);
  wordlist[0] = 0;
  wordlist[1] = 0;
  wlp = wordlist;
  
  if (fread(in, 1, filesize, f) != filesize)
  {
    free(textfile);
    fclose(f);
    fprintf(stderr, "Failed to read source file %s\n", filename);
    return NULL;
  }
  fclose(f);
  textfile[filesize] = 0;

  /* compress the file (remove unnecessary comments and white spaces, make lines no longer than 64 characters) */
  
  while (*in != 0)
  {
    /* extract one line */
    thisline = ++linectr;
    nextline = in;
    if (eol == 0)
    {
      while ((*nextline != 0) && (*nextline != '\r') && (*nextline != '\n')) nextline++;
      if (eol == 0)
        eol = *nextline;
    }
    else
    {
      while ((*nextline != 0) && (*nextline != eol)) nextline++;
    }

    if ((nextline[0] == eol) && (nextline[1] == ((eol == '\r') ? '\n' : '\r')))
    {
      *nextline = 0;
      nextline+=2;
    }
    else
    if (*nextline != 0)
      *nextline++ = 0;

    /* replace control characters with spaces */
    s = in;
    while (*s != 0)
    {
      if ((uint8_t)*s < 0x20) *s = 0x20;
      s++;
    }

    /* remove spaces from the beginning of the line */
    while (*in == 0x20) in++;

    /* remove lines that contain only a comment */
    s = in;
    if ((s[0] == '\\') && ((s[1] == 0x20) || (s[1] == 0)))
      goto skipline;

    /* remove a dividing line that separates screens from the file */
    if ((((s[0] == '\\') && (s[1] == ' ') && (s[4] == ']')) || ((s[0] == '-') && (s[1] == '-'))) &&
        ((s[2] == '-') && (s[3] == '-')))
    {
      if (*flags != 0)
        *flags |= FFMT_SCRFMT;
      goto skipline;
    }

    /* remove all spaces from the end of the line */
    s = in + strlen(in);
    while (s > in)
    {
      s--;
      if (*s != 0x20)
        break;
      *s = 0;
    }

    /* Check if the line contains a new word definition. If yes, save the new word. */
    if (wordcmp(":", in))
    {
      s = in + 2;
      while (*s == ' ') s++;
      if (*s != 0)
      {
        if (wordcmp("run", s))
        {
          if (flags != NULL)
            *flags |= FFMT_RUN;
        }
        while ((*s != 0) && (*s != 0x20))
        {
          *wlp++ = *s++;
        }
        *wlp++ = 0;
        *wlp = 0;
      }
    }

    len = strlen(in);

    /* check if there is a ( ) comment at the end of the line that can be removed */
    if ((len > 1) && (in[len-2] == 0x20) && (in[len-1] == ')'))
    {
      len -= 2;
      while (len > 0)
      {
        len--;
        if ((in[len] == 0x20) && (in[len+1] == '(') && (in[len+2] == 0x20))
        {
          in[len] = 0;
          break;
        }
        if ((len == 0) && (in[len] == '(') && (in[len+1] == 0x20))
        {
          in[len] = 0;
          break;
        }
      }
    }

    /* remove all spaces from the end of the line */
    s = in + strlen(in);
    while (s > in)
    {
      s--;
      if (*s != 0x20)
        break;
      *s = 0;
    }

    /* remove a comment from the end of the line completely */
    len = strlen(in);
    while (len > 0)
    {
      len--;
      if (in[len] == 0x22)
        break;
      if ((in[len] == 0x20) && (in[len+1] == '\\') && ((in[len+2] == 0x20) || (in[len+2] == 0)))
      {
        while ((len > 0) && (in[len] == 0x20)) len--;
        in[len+1] = 0;
        break;
      }
    }

    /* remove all necessary spaces from the beginning of the line */
    len = strlen(in);
    while ((*in == 0x20) && (len > 64))
    {
      in++;
      len--;
    }

    /* remove all spaces from beginning of the line */
    while (*in == 0x20) in++;
    
    /* remove a ( -- ) -comment behind a new word defintion */
    if ((in[0] == ':') && (in[1] == ' '))
    {
      i = 2;
      while (in[i] == 0x20) i++;
      while ((in[i] != 0) && (in[i] != 0x20)) i++;
      while (in[i] == 0x20) i++;
      if (in[i] == '(')
      {
        s = in+i;
        while ((in[i] != 0) && (in[i] != ')')) i++;
        if (in[i] == ')') i++;
        while (in[i] == 0x20) i++;
        while (in[i] != 0)
        {
          *s++ = in[i++];
        }
        *s = 0;
      }
    }

    /* if the line is still too long, try to break it into multiple lines */
    while (strlen(in) > 64)
    {
      /* find the "cut point" that is closest to the 64 character mark */
      i = 0;
      len = 0;
      do
      {
        while (in[i] == 0x20) i++;
        if ((in[i] == ':') && (in[i+1] == 0x20))
        {
          i++;
          while (in[i] == 0x20) i++;
          while ((in[i] != 0) && (in[i] != 0x20)) i++;
        }
        if (i >= 64)
          break;

        if (find_in_wordlist(goodwords_g, in+i) || find_in_str_wordlist(wordlist, in+i))
        {
          len = i;
          while (in[i] == 0x20) i++;
          while ((in[i] != 0) && (in[i] != 0x20)) i++;
          if ((i < 64) && (in[i] == 0x20))
            len = i;
        }
        else
        {
          while (in[i] == 0x20) i++;

          if (find_in_wordlist(badwords_g, in+i))
            break; // quit!!
          
          if (find_in_wordlist(skipwords_g, in+i))
          {
            while ((in[i] != 0) && (in[i] != 0x20)) i++;
            while (in[i] == 0x20) i++;
            while ((in[i] != 0) && (in[i] != 0x20)) i++;
          }
          else
          {
            while ((in[i] != 0) && (in[i] != 0x20))
            {
              if (in[i] == 0x22)
              {
                i = 100; // quit!!
                break;
              }
              i++;
            }
          }
        }
      }
      while ((i < 64) && (in[i] != 0));
      
      // copy partial line
      if (len > 1)
      {
        while ((len > 0) && (in[len-1] == 0x20)) len--;
        memcpy(out, in, len);
        out += len;
        *out++ = '\n';
        in += len;
      }

      /* remove all spaces from beginning of the line */
      while (*in == 0x20) in++;
      
      if ((len == 0) && (strlen(in) > 64))
      {
        free(textfile);
        printf("ERROR: Line %d is longer than 64 characters!\n", thisline);
        return NULL;
      }
    }

    /* check if line is empty, if not, take it */
    if (*in != 0)
    {
      if (strlen(in) <= 64)
      {
        /* ok, add line to output buffer */
        strcpy(out, in);
        out += strlen(in);
        *out++ = '\n';
      }
      else
      {
        /* line too long, cut it into two or more lines */
        fprintf(stderr, "ERROR: The line %d is too long (must not be longer than 64 characters)\n", thisline);
        free(textfile);
        return NULL;
      }
    }
skipline:
    in = nextline;
  }
  *out = 0;

  /* now compress the text, put as many characters as possible into a line */
  out = textfile;
  len = strnllen(out);
  out += len;
  s = out;
  if (*s != 0) s++;
  
  // out = ptr to prev. line
  // len = length of prev. line
  while (*s != 0)
  {
    i = strnllen(s);
    if ((*s != ':') && ((len + i + 1) <= 64))
    {
      *out = 0x20;
      len += i + 1;
      s += i;
      out = s;
      if (*s != 0) s++;
    }
    else
    {
      len = i;
      s += i;
      out = s;
      if (*s != 0) s++;
    }
  }

  return textfile;
}
