/* This is part of the NEWTRACK eyetracking software, (c) 2004 by  */
/* Eric Auer. NEWTRACK is free software; you can redistribute it   */
/* and modify it under the terms of the GNU General Public License */
/* as published by the Free Software Foundation; either version 2  */
/* of the License, or (at your option) any later version.          */
/*     NEWTRACK is distributed in the hope that it will be useful, */
/*     but WITHOUT ANY WARRANTY; without even the implied warranty */
/*     of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.     */
/*     See the GNU General Public License for more details.        */
/* You should have received a copy of the GNU General Public       */
/* License (license.txt) along with this program; if not, check    */
/* www.gnu.org or write to the Free Software Foundation, Inc., 59  */
/* Temple Place, Suite 330, Boston, MA  02111-1307 USA.            */

/* parsing inside an huge multiline text string */
/* UPDATE 4/2005: Convert some ISO 8859-1 (Latin 1) charset Umlauts */
/* to their DOS (codepage 437) counterparts on the fly. Note that   */
/* there is no conversion back to ISO in the created log files.     */

#include "tracker.h"	/* include all needed headers */

int get_word(char * str, char * word, char ** afterword, int skipcomments)
{   /* returns length and a new afterword pointer and word contents.   */
    /* copies first word from str, skipping initial space and comments */
    /* (not if skipcomments is 0). Word is empty at real end of line.  */
    /* Never copies more than 70 chars. Comment lines == 1 empty word. */
  int wordlen;
  char lastchar;

  wordlen = 0;
  if (word == NULL)
      return wordlen;

  word[wordlen] = 0;
  if (str == NULL) {
      return wordlen;
  }

  if ((str[0] == 13) || (str[0] == 10)) {	/* special: hit new line */
      while ((str[0] == 13) || (str[0] == 10))
          str++;				/* skip linebreak marker(s) */
      *afterword = str;				/* do not hit it again! */
      return wordlen;				/* return magic value 0 */
  }

  while ((str[0] == ' ') || (str[0] == '\t'))
      str++;					/* skip leading whitespace */

  lastchar = 0;
  while ((wordlen < 70) && (str[0] != 0) &&	/* limit size, check EOF */
         (str[0] != ' ') && (str[0] != '\t')) {	/* whitespace ends the word */

      if ( (skipcomments) && ((str[0] == ';') || (str[0] == '#')) ) {
          /* If word ended with a comment marker, skip over rest of line.   */
          /* You cannot protect a linebreak in a comment from being parsed. */
          str++;		/* skip comment marker itself (!) */
          while ((str[0] != 13) && (str[0] != 10) && (str[0]))
              str++;		/* skip until end of line */
          while ((str[0] == 13) || (str[0] == 10))
              str++;		/* skip linebreak marker(s) */
          break;		/* we hit a comment */
      }

      if ( (str[0] == 13) || (str[0] == 10) ) {	/* hit a linebreak? */
          if (lastchar == '\\') {	/* protected from parsing?  */
              word[wordlen-1] = '\0';	/* backslash becomes word end */
              wordlen--;
              /* (IF the backslash protected a linebreak from being parsed) */
              while ( (str[0] == 13) || (str[0] == 10) )
                  str++;		/* skip over the linebreak(s) */
              while ( (str[0] == ' ') || (str[0] == '\t') )
                  str++;		/* skip over whitespace */
              break;			/* word ends here, but line does not */
              				/* (because linebreak was protected) */
          } else {
              break;			/* hit an unprotected linebreak */
          } /* unprotected linebreak */
      } /* linebreak processing */

      if (lastchar == '\\') {		/* special escape sequence? */
          lastchar = *str;
          switch (lastchar) {
              case 'n': word[wordlen-1] = '\n';
                  lastchar = 0;		/* word ends here */
                  str++;		/* do not process n again */
                  break; /* transform backslash into \n */
              case '_': word[wordlen-1] = INVSPACE;
                  lastchar = 0;		/* (logical) word ends here */
                  str++;		/* do not process _ again */
                  break; /* transform backslash into invisible area break */                  
              default: word[wordlen] = lastchar;	/* use raw char */
                  wordlen++;
                  break;		/* do nothing special */
          } /* switch */
          if (!lastchar)
              break;			/* leave this word */
      } else {
          uint8 lc;
          lastchar = *str;
          lc = (unsigned char)(lastchar);	/* Must not be signed char!  */
          switch (lc) {			/* new 4/2005: partial ISO to DOS,   */
             /* ONLY German umlauts handled yet, no other accented chars!    */
             case 0xe4: lc = 0x84; break;	/* &auml; (Epsilon)          */
             case 0xf6: lc = 0x94; break;	/* &ouml; (-:- style "div")  */
             case 0xfc: lc = 0x81; break;	/* &uuml; (superscript n)    */
             case 0xc4: lc = 0x8e; break;	/* &Auml; (upper half block) */
             case 0xd6: lc = 0x99; break;	/* &Ouml; (horizontal line)  */
             case 0xdc: lc = 0x9a; break;	/* &Uuml; (!!~ frame corner) */
             case 0xdf: lc = 0xe1; break;	/* &szlig; (low half block)  */
             /* in () is the DOS char which would be shown w/o translation.  */
          } /* switch */		/* to bypass this for char X, use \X */
          lastchar = lc;			/* Convert back to char...   */
          word[wordlen] = lastchar;
          wordlen++;
      } /* no escape sequence */
      str++;

  } /* while in word */

  *afterword = str;
  word[wordlen] = '\0';
  return wordlen;

} /* get_word */

