/*   -*- coding: utf-8 -*-  */
#include "Utf8Text.h"
#include "Except.h"

Utf8TextSplitEngine *Utf8TextSplitEngineCreate()
{
  Utf8TextSplitEngine *newEngine;
  newEngine=(Utf8TextSplitEngine *)malloc(sizeof(Utf8TextSplitEngine));
  newEngine->transMode=0;
  newEngine->toParse=NULL;
  newEngine->minLenght=0;
  newEngine->converter=Utf8ConverterCreate();
  newEngine->stopWordDict=NULL;
  newEngine->bufPath=NewBuffer();
  return newEngine;
}

Utf8TextSplitEngine *Utf8SplitEngineSetText(Utf8TextSplitEngine *engine, char *str)
{
  engine->toParse=str;
  return engine;
}

Utf8TextSplitEngine *Utf8SplitEngineSetMinLenght(Utf8TextSplitEngine *engine, int lenght)
{
  engine->minLenght=lenght;
  return engine;
}

Utf8TextSplitEngine *Utf8SplitEngineSetTransMode(Utf8TextSplitEngine *engine, int mode)
{
  engine->transMode=mode;
  return engine;
}

Utf8TextSplitEngine *Utf8SplitEngineSetStopWordDict(Utf8TextSplitEngine *engine, char *envVar, char *filePath)
{
  char *targetPath;
  if (envVar)
    {
      char *var;
      var=getenv(envVar);
      if (!var) ExceptSetError("Utf8SplitEngine", "ND","Environement variable", envVar, "is not défined",2);
      BufferStrcpy(engine->bufPath, var);
      BufferStrcat(engine->bufPath, "/");
      BufferStrcat(engine->bufPath, filePath);
      targetPath=BufferString(engine->bufPath);
    }
  else
    {
      targetPath=filePath;
    }
  engine->stopWordDict =StrDictFromFile(targetPath);
  return engine;
}

char *Utf8SplitGetStringItem(Utf8TextSplitEngine *engine)
{
  char *word;
  char *targetWord;
  word=Utf8GetAlphaString(engine->toParse, &engine->toParse, 0);
  if (!word)return NULL;
  if (engine->minLenght>0)
    {
      int len;
      len=Utf8Length(word);
      if (len<engine->minLenght) return Utf8SplitGetStringItem(engine);
    }
  if(engine->transMode!=0)
    {
      Utf8ConverterSetString(engine->converter, word);
      while(Utf8AlphaCharConvert(engine->converter, engine->transMode))continue;
      targetWord=BufferString(engine->converter->targetBuf);
    }
  else targetWord=word;
  if(engine->stopWordDict)
    {
      if ( StrDictSearch(engine->stopWordDict, targetWord)) return Utf8SplitGetStringItem(engine);
    }
  return targetWord;
}

/*
SxmlNode *Utf8SplitSxmlTextAppendAlpha(SxmlNode *resultText, SxmlNode *sxmlText, int mode)
{
  SxmlNode *nodeText;
  char *toParse;
  char *word;
  switch(SxmlNodeType(x))
    {
    default: 
      return NULL;

    SXML_NODE_ELEMENT:
      SxmlReset(sxmlText);
      while((nodeText=SxmlNextNode(sxmlText)))
	{
	  Utf8SplitSxmlTextAppendAlpha(resultText, nodeText, mode);
	}
      return resultText;
    SXML_NODE_TEXT:
      toParse=SxmlNodeValue(sxmlText);
      while ((word=Utf8GetAlphaString(toParse, &toParse, mode)))
	{
	  SxmlAppendChild(resultText, SxmlLeafCreate("n", word));
	}
    }
}

SxmlNode *Utf8SplitSxmlTextAlpha(SxmlNode *sxmlText, int mode)
{
  SxmlNode *resultText;
  if (!sxmlText)return NULL;
  resultText=SxmlElementCreate("text");
  Utf8SplitSxmlTextAappendAlpha(resultText, sxmlText, mode);
  if (SxmlFirstChild(resulText))return resultText;
  else
    {
      SxmlFree(resultText);
      return NULL;
    }
}

*/
