#include "gd.h"

/* All we care about is what it returns, and that it doesn't enable us
 * to skip past the terminating EOS. */
static int
gdTcl_UtfToUniChar (char *str, Tcl_UniChar * chPtr)
{
  int byte;

  byte = (unsigned char)(str[0]);

  /* If we see an ampersand, treat what follows as an HTML4.0 entity, 
   * terminated with a semicolon 
   */
  if (byte == '&') { 
    int i, n = 0;
    
    byte = *((unsigned char *) (str + 1));
    if (byte == '#') {
      byte = *((unsigned char *) (str + 2));
      if (byte == 'x' || byte == 'X') {
        for (i = 3; i < 8; i++) {
          byte = *((unsigned char *) (str + i));
          if (byte >= 'A' && byte <= 'F')
            byte = byte - 'A' + 10;
          else if (byte >= 'a' && byte <= 'f')
            byte = byte - 'a' + 10;
          else if (byte >= '0' && byte <= '9')
            byte = byte - '0';
          else
            break; /* end FOR */
          n = (n * 16) + byte;
        }
      }
      else {
        for (i = 2; i < 8; i++) {
          byte = (unsigned char)(str[i]);
          if (byte >= '0' && byte <= '9')
            n = (n * 10) + (byte - '0');
          else
            break;
        }
      }
      /* In each of the above branches, i was computed. It's the number of 
       * characters representing an HTML4.0 entity. Increment i since we're 
       * also consuming the semicolon.
       *
       * If this branch isn't taken, we're not dealing with an HTML4.0 
       * entity, so we go back to the start of str, and process it as 
       * UTF-8.
       */
      if (byte == ';') {
        return ++i;
      }
    }
  }

  /* This is the UTF-8 case. */
  byte = (unsigned char)(str[0]);
  if (byte < 0xC0) {
    return 1;
  }
  else if (byte < 0xE0) {
    if ((str[1] & 0xC0) == 0x80) {
      return 2;
    }
    return 1;
  }
  else if (byte < 0xF0) {
    if (((str[1] & 0xC0) == 0x80) && ((str[2] & 0xC0) == 0x80)) {
      return 3;
    }
    return 1;
  }

  return 1;
}



/* Greatly, GREATLY simplified. There's a bunch of cruft that doesn't
 * have to do with the manipulation of "string". */
void gdImageStringFTEx (char *string) {
  int next;
  int encoding;
  int i;
  int ch;
  int len;

  encoding = nondet_int();
  if (encoding > 2 || encoding < 0)
    return;

  next = 0;
  /* OK */
__TESTCLAIM_1:
  for (i=0; string[next] != EOS; i++)
    {
      /* grabbing a character and storing it in an int
       *
       * this'll fill the low-order byte, and keep more space free for
       * extra bytes for Unicode encoding, etc.
       */
      ch = string[next];

      /* carriage returns */
      if (ch == '\r')
	{
	  next++;
	  continue;
	}
      /* newlines */
      if (ch == '\n')
	{
	  next++;
	  continue;
	}


      switch (encoding)
        {
        case gdFTEX_Unicode:
          {
            len = gdTcl_UtfToUniChar (string + next, &ch);
            next += len;
          }
          break;
        case gdFTEX_Shift_JIS:
          {
            unsigned char c;
            c = (unsigned char) string[next];
            if (0xA1 <= c && c <= 0xFE)
              {
                next++;
              }
            if (string[next] != EOS)
              next++;
          }
          break;
        case gdFTEX_Big5:
          {
            ch = (string[next]) & 0xFF;	/* don't extend sign */
            next++;
            if (ch >= 161
                && string[next] != EOS)
              {
                next++;
              }
          }
          break;
        }
    }
}

int main ()
{
  char in [INSZ];
  in [INSZ-1] = EOS;

  gdImageStringFTEx (in);

  return 0;
}

