/*   -*- coding: utf-8 -*-  */

#include "Utf8Converter.h"
#include "Except.h"
#include <ctype.h>

Utf8Converter *Utf8ConverterCreate()
{
  Utf8Converter *newConverter;
   if ((newConverter = (Utf8Converter *)malloc(sizeof(Utf8Converter))))
      {
	newConverter->targetBuf=BufferCreate (100,100);
	return newConverter;
      }
   else
     {
       ExceptSetError("Utf8Converter","MA", "memory allocation failed","","",2);
       return NULL;
     }
}

Utf8Converter *Utf8ConverterSetString(Utf8Converter *c1, char *s1)
{
  c1->currentPosition=s1;
  c1->returnCode='\0';
  BufferReset(c1->targetBuf);
  return c1;
}


char *Utf8AlphaCharConvert (Utf8Converter *cv1, int mode)
{
  /*  mode =
     a : alpha lower case
     A : alpha upper case
     s : sort lower case
     S : sort upper case
     C : Camel (sort) first letter
   */
  unsigned char c1;
  unsigned char c2;
  if (!(c1=cv1->currentPosition[0]))
    {
      cv1->returnCode=0;
      return NULL;
    }
  if ((c1 & 0x80)==0)   /* ascii */
    {
      if(!isalpha(c1))
	{
	  if (isdigit(c1))
	    {
	      BufferCharCat(cv1->targetBuf,c1);
	      cv1->currentPosition++;
	      return BufferString(cv1->targetBuf);
	    }
	  switch(c1)
	    {
	      /*
      	    case '-':
	    case '.':             
	      BufferCharCat(cv1->targetBuf,c1);
	      */
	    default:
	      cv1->returnCode='n';   /* non alpha */
	      return NULL;
	    }
	}
      else
	{
	  switch(mode)
	    {
	    case 'a': case 's':
	      BufferCharCat(cv1->targetBuf,tolower(c1));
	      break;
	    default:
	      BufferCharCat(cv1->targetBuf,toupper(c1));
	    }
	}
      cv1->currentPosition++;
      return BufferString(cv1->targetBuf);

    }
  if (!(c2=cv1->currentPosition[1]))
    {
      cv1->returnCode='X';   /* bad formed at end */
      return NULL;
    }
  switch(c1)
    {
    case 0xC2:
      switch(c2)
	{
	case 0x9C: case 0x8C: Utf8Lig ("œ","Œ","Oe","oe","OE");
	default:
	  cv1->returnCode='n';   /* non alpha */
	  return NULL;
	}
      cv1->currentPosition+=2;
      return BufferString(cv1->targetBuf);
    case 0xC3:
      switch(c2)
	{
	case 0x80: case 0xA0: Utf8VoyDiac("à","À","a","A");
	case 0x81: case 0xA1: Utf8VoyDiac("á","Á","a","A");
	case 0x82: case 0xA2: Utf8VoyDiac("â","Â","a","A");
	case 0x83: case 0xA3: Utf8VoyDiac("ã","Ã","a","A");
	case 0x87: case 0xA7: Utf8VoyDiac("ç","Ç","c","C");
	case 0x88: case 0xA8: Utf8VoyDiac("è","È","e","E");
	case 0x89: case 0xA9: Utf8VoyDiac("é","É","e","E");
	case 0x8A: case 0xAA: Utf8VoyDiac("ê","Ê","e","E");
	case 0x8B: case 0xAB: Utf8VoyDiac("ë","Ë","e","E");
	case 0x8C: case 0xAC: Utf8VoyDiac("ì","Ì","i","I");
	case 0x8D: case 0xAD: Utf8VoyDiac("í","Í","i","I");
	case 0x8E: case 0xAE: Utf8VoyDiac("î","Î","i","I");
	case 0x8F: case 0xAF: Utf8VoyDiac("ï","Ï","i","I");
	case 0x94: case 0xB4: Utf8VoyDiac("ô","Ô","o","O");
	case 0x96: case 0xB6: Utf8VoyDiac("ö","Ö","o","O");
	case 0x99: case 0xB9: Utf8VoyDiac("ù","Ù","u","U");
	case 0x9A: case 0xBA: Utf8VoyDiac("ú","Ú","u","U");
	case 0x9B: case 0xBB: Utf8VoyDiac("û","Û","u","U");
	case 0x9C: case 0xBC: Utf8VoyDiac("ü","Ü","u","U");
	default:
	  cv1->returnCode='n';   /* non alpha */
	  return NULL;
	}
      cv1->currentPosition+=2;
      return BufferString(cv1->targetBuf);
    case 0xC5:
      switch(c2)
	{
	case 0x4C: case 0x4D: Utf8VoyDiac("č","Č","c","C");
	case 0x81: case 0x82: Utf8VoyDiac("ł","Ł","l","L");
	case 0x92: case 0x93: Utf8Lig ("œ","Œ","Oe","oe","OE");
	case 0xA0: case 0xA1: Utf8VoyDiac("š","Š","s","S");
	case 0xAE: case 0xAF: Utf8VoyDiac("ů","Ů","u","U");
	case 0xBD: case 0xBE: Utf8VoyDiac("ž","Ž","z","Z");
	default:
	  cv1->returnCode='n';   /* non alpha */
	  return NULL;
	}
      cv1->currentPosition+=2;
      return BufferString(cv1->targetBuf);
    default:
      cv1->returnCode='n';   /* non alpha */
      return NULL;
    }
}

Utf8Converter *Utf8StringConverter=NULL;

char *Utf8ConvertStringToLower(Utf8Converter *cv1)
{
  char nextChar;
  while(Utf8AlphaCharConvert(cv1,'a')) continue;
  if (!(nextChar=cv1->currentPosition[0])) return BufferString(cv1->targetBuf);
  switch(cv1->returnCode)
    {
    case 'n':
      switch(nextChar)
	{
	case ' ':
	  BufferCharCat(cv1->targetBuf,' ');
	  cv1->currentPosition++;
	  return Utf8ConvertStringToLower(cv1);
	case '-':
	  BufferCharCat(cv1->targetBuf,'-');
	  cv1->currentPosition++;
	  return Utf8ConvertStringToLower(cv1);
	default:
	  if (Utf8ConverterJumpToAlpha(cv1))
	    {
	      BufferStrcat(cv1->targetBuf," ");
	      return Utf8ConvertStringToLower(cv1);
	    }
	}
    default:
      break;
    }
  return Utf8ConvertStringToLower(cv1);
}


char *Utf8ConvertStringToSort(Utf8Converter *cv1)
{
  char nextChar;
  while(Utf8AlphaCharConvert(cv1,'s')) continue;
  if (!(nextChar=cv1->currentPosition[0])) return BufferString(cv1->targetBuf);
  switch(cv1->returnCode)
    {
    case 'n':
      switch(nextChar)
	{
	case ' ':
	  BufferCharCat(cv1->targetBuf,' ');
	  cv1->currentPosition++;
	  return Utf8ConvertStringToSort(cv1);
	case '-':
	  BufferCharCat(cv1->targetBuf,' ');
	  cv1->currentPosition++;
	  return Utf8ConvertStringToSort(cv1);
	case '.':
	  cv1->currentPosition++;
	  return Utf8ConvertStringToSort(cv1);
	default:
	  if (Utf8ConverterJumpToAlpha(cv1))
	    {
	      BufferStrcat(cv1->targetBuf," ");
	      return Utf8ConvertStringToSort(cv1);
	    }
	}
    case 'X':
      return BufferString(cv1->targetBuf);
    default:
      break;
    }
  return Utf8ConvertStringToSort(cv1);
}

char *Utf8StringToLower(char *s1)
{
  if(!Utf8StringConverter) Utf8StringConverter=Utf8ConverterCreate();
  Utf8ConverterSetString(Utf8StringConverter, s1);
  return Utf8ConvertStringToLower(Utf8StringConverter);
}

char *Utf8StringToSort(char *s1)
{
  char *retStr;
  if(!Utf8StringConverter) Utf8StringConverter=Utf8ConverterCreate();
  Utf8ConverterSetString(Utf8StringConverter, s1);
  retStr= Utf8ConvertStringToSort(Utf8StringConverter);
  return retStr; 
}
