
/****************************************************************************

     Module   : SgmlText
     Commande : SgmlWordSplitUsual
     Fichier  : SgmlWordSplitUsual.c
     Auteur   : Jacques DUCLOY
     Date     : 18/7/94
     Modif    : cf ligne 175 - L. Mirtain oct 95
     $Id: SgmlWordSplitUsual.c,v 1.2 2003/03/17 16:58:15 parmentf Exp $

****************************************************************************/
#include <stdio.h>
#include <string.h>
#include <ctype.h>

#include "SgmlPath.h"
#include "SgmlInput.h"
#include "SgmlText.h"

SgmlTextIterator *i1;

int atoi();

SgmlPathList  *listInputField;
SgmlNode      *listWord;
char          *listTag;
char          *wordTag;
int            minLen; 

static char outputMode;
static SgmlPathIterator *outputPath=NULL;
void usage()
{
  fprintf(stderr,"usage: SgmlWordSplitUsual -i inputField [inputField...] [-l listTag] [-m minLen][-o outputField] [-O a|b|f|l] [-s stopWordList ][-w wordTag] \n");
  exit(1);
}

void initPar()
{
  listTag="lw";
  wordTag="w";
  minLen=2;
  outputMode='l';
}

int getopt();
extern char *optarg;
extern int optind;

void analPar(argc,argv)
  int argc;
char **argv;
{
  int cod_arg;
  
  while ((cod_arg = getopt(argc,argv,"i:l:m:o:O:s:w:"))!=EOF)
    {
      switch(cod_arg) 
	{
	case 'i':
	  SgmlPathList_GetOpt(listInputField,argc,argv);
	  break;
	  
        case 'l':
	  listTag=optarg;
	  break;
	  
        case 'm':
	  minLen=atoi(optarg);
	  SgmlTextWordSetMinLen(i1,minLen);
	  break;
	  
        case 'o':
	  outputPath=SgmlPathIteratorCreate(SgmlPathCompile(optarg));
	  break;
	  
        case 'O':
	  outputMode = optarg[0];
	  break;
	  
        case 'w':
	  wordTag=optarg;
	  break;
	  
        case 's':
	  SgmlTextAddStopWordTable(i1,optarg);
	  break;
	  
	default:
	  usage();
	}
    }
}

int main(argc,argv)
     int argc;
     char **argv;
{
  SgmlTreeIterator *iterString;
  SgmlNode *outputAnchor;

  i1=SgmlTextIteratorCreate();
  SgmlTextSetTransco(i1,'l');
  SgmlTextWordSetMinLen(i1,3);

  iterString    =SgmlTreeIteratorCreate();
  listInputField=SgmlPathList_Create();
  initPar();
  
  analPar(argc,argv);

  while(SgmlInputNextRecord())
    {
      SgmlNode *field;
      char     *str;

      listWord=SgmlCreateMark(listTag);
      SgmlPathList_Init(listInputField ,SgmlInputRecord);
      
      while((field=SgmlPathList_Next(listInputField)))
	{
	  SgmlTreeIteratorInit(iterString,field);
	  while ((str= SgmlTreeNextData(iterString)))
	    {
	      char *word;
	      SgmlTextIteratorInit(i1,str);
	      while ((word=SgmlTextNextWord(i1)))
		{
		  SgmlAddSon(listWord,SgmlCreateLeaf(wordTag,word));
		}
	    }
	}
      
      if(SgmlFirst(listWord))
	{
	  if(outputPath)
	    {
	      SgmlPathIteratorInit(outputPath, SgmlInputRecord);
	      outputAnchor=SgmlPathNext(outputPath);
	    }
	  else
	    {
	      outputAnchor=SgmlLast(SgmlInputRecord);
	    }
	  switch(outputMode)
	    {
	    case 'l':
	      SgmlAddLast(outputAnchor, listWord);
	      break;
	    case 'f':
	      SgmlAddFirst(outputAnchor, listWord);
	      break;
	    case 'a':
	      SgmlInsertAfter(outputAnchor, listWord);
	      break;
	    case 'b':
	      SgmlInsertBefore(outputAnchor, listWord);
	      break;
	    }
	}
      else
	{
	  SgmlFree(listWord);
	}
      
      
      SgmlInputRecordPrint();
    }
  return 0;
}

