/***********************************************************************
 *
 *  Projet    : DilibPro
 *  Module    : Sgml
 *  Fichier   : SgmlStrAnal
 *  Auteur    : Ducloy
 *  Date      : 22/7/93
 *  $Id: SgmlStrAnal.c,v 1.2 2005/08/30 13:30:53 parmentf Exp $
 ***********************************************************************
 *
 * Copyright (c) 1994 CNRS/CRIN & INRIA Lorraine
 * 
 ***********************************************************************/

/****************************************************************************

        SgmlStrAnal.c
        SgmlStringAnalysor

        Interface utilisateur : 
	 SgmlNode *SgmlFromString()

****************************************************************************/

#include "SgmlString.h"
#include "Buffer.h"
#include "Except.h"
#include <string.h>
#include <stdio.h>
#include "RegExp.h"
#include <stdlib.h>


/********************** pre declarations ********************************/

/*SgmlNode *SgmlCreateNode();*/
SgmlNode *SgmlCreateMark();
SgmlNode *SgmlCreateData();
void SgmlAnalysorVisual();

/**************************************************************************

     Objets elementaires,
     constructeurs et initialiseurs.

**************************************************************************/


SgmlStackElem*  SgmlStackElemCreate()
{
  SgmlStackElem *elem;
  elem = (SgmlStackElem *) malloc (sizeof (SgmlStackElem));
  if (!elem)
    {
      ExceptSetError("SgmlFromString","MA", "memory allocation failed in SgmlStackElemCreate","","",2);
      return NULL;
    }
  elem->next=NULL;
  elem->object=NULL;
  return (elem);
}


static RegExp *reOnTag=NULL;
static RegExp *reEqual=NULL;

SgmlAnalysor *SgmlStringAnalysor;
int SgmlAnalysorDebug=0;
int SgmlAnalysorFlag=0;
#define SSA SgmlStringAnalysor

SgmlAnalysor *SgmlAnalysorCreate()
{
  SgmlAnalysor *anal;
  anal = (SgmlAnalysor *) malloc (sizeof (SgmlAnalysor));
  if(!anal)
    {
      ExceptSetError("SgmlFromString","MA", "memory allocation failed in SgmlAnalysorCreate","","",2);
      return NULL;
    }
  anal->begin=NULL;
  anal->left=NULL;
  anal->top=NULL;
  anal->listFreeElem=NULL;
  if (!reOnTag)reOnTag = RegExpCreate("^[^ =/><\t][^ =/><\t]*");
  if (!reEqual)reEqual = RegExpCreate("[ ]*=[ ]*");
  return(anal);
}

SgmlStackElem *SgmlAnalysorPull();

void SgmlAnalysorInit(str)
     char *str;
{
  if (SgmlAnalysorFlag==0)
    {
      SgmlAnalysorFlag=1;
      SSA=SgmlAnalysorCreate();
    };
  SSA->begin=str;
  SSA->left=str;
  while(SSA->top)SgmlAnalysorPull();
  return;
}


/************************************************************************

          GESTION DE LA PILE DE L'ANALYSEUR

************************************************************************/

char SgmlAnalysorTypeOfTop()
{
  SgmlStackElem *elemTop;
  if((elemTop = SSA->top)) return elemTop->type;
  else return ('\0');
}

void SgmlAnalysorPush(node, typ)
     SgmlNode *node;
     char typ;
{
  SgmlStackElem *elem;
  if ((elem=SSA->listFreeElem))
    {
      SSA->listFreeElem=elem->next;
    }
  else
    {
      elem=SgmlStackElemCreate();
    };
  elem->next=SSA->top;
  SSA->top=elem;
  elem->type=typ;
  elem->object=node;
}

SgmlStackElem *SgmlAnalysorPull()
{
  SgmlStackElem* elem;

  if((elem=SSA->top))
    {
      SSA->top=elem->next;
      elem->next=SSA->listFreeElem;
      SSA->listFreeElem=elem;
      return (elem);
    }
  else return (NULL);
}

SgmlStackElem *SgmlAnalysorSearchTag(tag)
     char *tag;
{
  SgmlStackElem *elem;
  if ((elem=SSA->top))
    {
      do
	{
	  if (strcmp(tag, SgmlTag(elem->object))==0)return (elem);
	}
      while ((elem=elem->next));
      return(NULL);
    }
  else return (NULL);
}

SgmlStackElem *SgmlAnalysorSearchTypeBTag(tag)
     char *tag;
{
  SgmlStackElem *elem;
  if ((elem=SSA->top))
    {
      do
	{
	  if (elem->type=='B')
	    if (strcmp(tag, SgmlTag(elem->object))==0)return (elem);
	}
      while ((elem=elem->next));
      return(NULL);
    }
  else return (NULL);
}
/***********************************************************************

 reconnaissance des objets elementaires

***********************************************************************/
char* SgmlAnalysorSkip()
{
  if (SSA->left) 
    {
      if(SSA->left[0]=='\0')return (NULL);
      SSA->left++;
    }
  return (SSA->left);
}

char SgmlAnalysorTestChar()
{
  if (SSA->left) return (SSA->left[0]);
  return 0;
}

char SgmlAnalysorTestNextChar()
{
  if (SSA->left) return (SSA->left[1]);
  return 0;
}



char* SgmlAnalysorSkipSpaces()
{
  if (SSA->left) 
    {
      if(SSA->left[0]=='0')return (NULL);
      while (SSA->left[0]==' ')SSA->left++;
    }
  return (SSA->left);
}

char* SgmlAnalysorSkipSpaceLike()
{
  if (SSA->left) 
    {
      if(SSA->left[0]=='0')return (NULL);
      while ((SSA->left[0]==' ')||(SSA->left[0]=='\n'))SSA->left++;
    }
  return (SSA->left);
}

char* SgmlAnalysorSkipUntil(c)
     char c;
{
  char *pos;
  if (SSA->left)
    {
      if ((pos = strchr(SSA->left, c)))
	{
	  SSA->left=pos;
	  return (pos);
	}
      else return (NULL);
    } 
  else return (NULL);
}
char* SgmlAnalysorGetUntilStr(s1)
     char *s1;
{
  char *pos;
  char *str;
   if (SSA->left)
     {
       if ((pos = strstr(SSA->left, s1)))
	{
	  int len;
	  len = pos-(SSA->left);
	  str = (char *)malloc(len+1);
	  if(!str)
	    {
	      ExceptSetError("SgmlFromString","MA", "memory allocation failed in SgmlAnalysorGetUntilStr","","",2);
	      return NULL;
	    }
	  str[0]='\0';
	  strncat(str, SSA->left, len);
	  SSA->left=pos;
	  return (str);
	}
      else return (NULL);
    } 
   else return (NULL);
}

char* SgmlAnalysorGetUntil(c)
     char c;
{
  char *pos;
  char *str;
  if (SSA->left)
    {
      if ((pos = strchr(SSA->left, c)))
	{
	  int len;
	  len = pos-(SSA->left);
	  str = (char *)malloc(len+1);
	  if(!str)
	    {
	      ExceptSetError("SgmlFromString","MA", "memory allocation failed in SgmlAnalysorGetUntil","","",2);
      return NULL;
	    }
	  str[0]='\0';
	  strncat(str, SSA->left, len);
	  SSA->left=pos;
	  return (str);
	}
      else return (NULL);
    } 
  else return (NULL);
}


char* SgmlAnalysorGetUntilBrk(s1)
     char *s1;
{
  char *pos;
  char *str;
  if (SSA->left)
    {
      if ((pos = strpbrk(SSA->left, s1)))
	{
	  int len;
	  len = pos-(SSA->left);
	  str = (char *)malloc(len+1);
	  if(!str)
	    {
	      ExceptSetError("SgmlFromString","MA", "memory allocation failed in SgmlAnalysorGetUntilBrk","","",2);
      return NULL;
	    }
	  str[0]='\0';
	  strncat(str, SSA->left, len);
	  SSA->left=pos;
	  return (str);
	}
      else return (NULL);
    } 
  else return (NULL);
}

char* SgmlAnalysorGetIdentifier()
{
  char *id;
  RegExpIf_Ta 
    (reOnTag, SSA->left,
     ident, suite,
     {
       id = (char *) malloc(strlen (ident) +1);
       if(!id)
	 {
	   ExceptSetError("SgmlFromString","MA", "memory allocation failed in SgmlAnalysorGetIdentifier","","",2);
	   return NULL;
	 }
       strcpy (id, ident);
       SSA->left = suite;
     }
     ,   /* else */
     {
       id=NULL;
     }
    );

  return (id);
}

 
char* SgmlAnalysorGetUnquotedValue()
{
 static Buffer *locBuf =NULL;
  char c;
  if (!locBuf)locBuf=BufferCreate(10,10);
  else BufferReset(locBuf);
  while((c=SgmlAnalysorTestChar()))
    {
      switch(c)
	{
	case ' ':
	case '>':
	  return BufferSave(locBuf);
	case '/':
	  if (SgmlAnalysorTestNextChar()=='>') 
	    return BufferSave(locBuf);
	  SgmlAnalysorSkip();
	  BufferCharCat(locBuf,c);
	  break;
	default:
	  SgmlAnalysorSkip();
	  BufferCharCat(locBuf,c);
	  break;
	 
	}
    }
  return BufferSave(locBuf);
}

char* SgmlAnalysorGetValOfAttribute()
{
  char *value;
  switch (SgmlAnalysorTestChar())
    {
    case '\"':
      SgmlAnalysorSkip();
      if ((value = SgmlAnalysorGetUntil('\"')))
	{
	  SgmlAnalysorSkip();  /* saut du quote restant */
	  return (value);
	}
      ExceptSetError("SgmlFromString","01","syntax error : \" unbalanced ",SSA->left-1,"",1);
      break;

    case '\'':
      SgmlAnalysorSkip();
      if ((value = SgmlAnalysorGetUntil('\'')))
	{
	  SgmlAnalysorSkip();  /* saut du quote restant */
	  return (value);
	}
      ExceptSetError("SgmlFromString","01","syntax error : \' unbalanced ",SSA->left-1,"",1);
      break;

    default:
      if ((value = SgmlAnalysorGetUnquotedValue()))
	{
	  return (value);
	}
    };
  return (NULL);
}

int SgmlAnalysorGetAttVal()
{
  char *attVal;

  if (RegExpExec(reEqual, SSA->left))
    {
      char* attId;
      char* strVal;
      unsigned RegExp_pos;
      
      strVal=RegExpLoc2;
      RegExp_pos=RegExpLoc1-SSA->left;
      attId=(char*)malloc(RegExp_pos+1);
      if(!attId)
	{
	  ExceptSetError("SgmlFromString","MA", "memory allocation failed in SgmlAnalysorGetAttVal","","",2);
	  return 0;
	}
      attId[0]='\0';
      strncat(attId, SSA->left, RegExp_pos); 
      SSA->left=strVal;
      if ((attVal=SgmlAnalysorGetValOfAttribute()))
	{
	  SgmlAddAtt(
		     (SSA->top)->object,
		     attId,
		     attVal);
	  free (attVal);
	  free (attId);
	  return(1);
	}
      else
	{
	  ExceptSetError("SgmlFromString","02",
			 "syntax error : attribute without value"
			 ,SSA->left-1,"",1);
	}
    }
  else
    {
      ExceptSetError("SgmlFromString","02",
		     "syntax error : = connector missing ",
		     SSA->left-1,"",1);    
    }
  return(0);
}

SgmlNode* SgmlAnalysorGetMark()
{
 /* le curseur est suppose etre sur le caractere "<" */
  char *tag;
  SgmlNode *mark;
  char c;

  SgmlAnalysorSkip();
  if ((tag=SgmlAnalysorGetIdentifier()))
    {
      mark = SgmlCreateMark(tag);
      SgmlAnalysorPush(mark, 'B'); 
      free(tag);
    }
  else
    {
      ExceptSetError("SgmlFromString","03","syntax error : no Gid on Begin Tag ",SSA->left-1,"",1);
      return(NULL);
    };
  
  SgmlAnalysorSkipSpaceLike();

  while ((c = SgmlAnalysorTestChar()))
    {switch (c)
       {
       case '>':
	 SgmlAnalysorSkip();
	 return (mark);
       case '/':
	 if(SgmlAnalysorTestNextChar()=='>')
	   {
	     SgmlAnalysorSkip();
	     SgmlAnalysorSkip();
	     XmlSetEmpty(mark);
	     return (mark);
	   }
	 else
	   {
	     ExceptSetError("SgmlFromString","01","syntax error (?/?) : ", SSA->left-1,"",1);
	   }
       default:
	 if (SgmlAnalysorGetAttVal())
	   {
	     SgmlAnalysorSkipSpaceLike();
	     break;
	   }
	 else return(NULL);
       }
   }
  ExceptSetError("SgmlFromString","03","syntax error : no > in Begin Tag ",SSA->left-1,"",1);
  return (NULL);
}

SgmlNode *SgmlAnalysorGetComment()
{
  char *strComment;
  char *strBegin;
  SgmlNode *nComment;
  strBegin=SSA->left;
  SSA->left+=4;
  if ((strComment=SgmlAnalysorGetUntilStr("-->")))
    {
      nComment=SgmlCommentCreate(strComment);
      SgmlAnalysorPush(nComment,'D');
      SSA->left+=3;
      return(nComment);
    }
  else
    {
      ExceptSetError("SgmlFromString/GetComment","03","syntax error : End Comment missing ",strBegin,"",1);  
      return 0;  
    };
}

SgmlNode *SgmlAnalysorGetDtdOrComment()
{
  if (strncmp(SSA->left+2, "--",2)==0) 
    return SgmlAnalysorGetComment();
  else return NULL;
}

SgmlNode *SgmlAnalysorGetData()
{
  SgmlNode *nData;
  char *strData;

  nData = SgmlCreateData();
  if ((strData=SgmlAnalysorGetUntil('<')))
    {
      SgmlDataString(nData)=strData;
      SgmlAnalysorPush(nData,'D');
      return(nData);
    }
  else
    {
      ExceptSetError("SgmlFromString/GetData","03","syntax error : End Mark missing ",SSA->left-1,"",1);  
      return 0;  
    };
}

SgmlNode* SgmlAnalysorGetEndMark()
{
  char *tag;

  SgmlAnalysorSkip();
  SgmlAnalysorSkip();
  if ((tag=SgmlAnalysorGetIdentifier()))
    {
      SgmlStackElem *newTop;
      SgmlStackElem *elem;
      char typTop;

      /*   on recherche un noeud de tag identique  que l'on marque a T(top) */
      if ((newTop=SgmlAnalysorSearchTypeBTag(tag)))
	{
	  newTop->type='T';
	}
      else
	{
	  ExceptSetError("SgmlFromString","03","syntax error : End Mark missing ",tag ,SSA->left-1,1);
	  return (NULL);
	};
      free(tag);

      /*  on depile */
      while ((typTop=(SSA->top)->type)!='T')
	{
	  elem = SgmlAnalysorPull();
	  if (typTop=='B')SgmlSetEmpty(elem->object);
	  SgmlAddFirst(newTop->object, elem->object);
	}
      /* on remet Top a M */
      newTop->type='M';
      /* on termine la balise */
      SgmlAnalysorSkipSpaces();
      SgmlAnalysorSkip();          /*     >   */
      return (newTop->object);
    }
  else
    {
      return (NULL);
    }
  
}

SgmlNode *SgmlFromString(str)
     char *str;
{
  char c, c1;
  SgmlStackElem  *bottom;
  if(!str)return NULL;

  SgmlAnalysorInit(str);
  if (SgmlAnalysorSkipUntil('<'))
    {
      if(SgmlAnalysorGetMark())
	{
	  bottom=SSA->top;
	  if (SgmlIsEmpty(bottom->object))
	    return (bottom->object);
	  if (!(SSA->left[0]))
	    {
	      SgmlSetEmpty(bottom->object);
	      return (bottom->object);
	    }
	  while (bottom->type!='M')
	    {
	      if (SgmlAnalysorDebug==1)SgmlAnalysorVisual();
	      if ((c=SSA->left[0]))
		{
		  if (c=='<')
		    {
		      if ((c1=SSA->left[1]))
			{
			  if (c1=='/') 
			    {if(!SgmlAnalysorGetEndMark())return (NULL);}
			  else if (c1=='!')
			    {if(!SgmlAnalysorGetDtdOrComment())return (NULL);}
			  else 
			    {if (!SgmlAnalysorGetMark())return (NULL);};
			}
		      else return(NULL);
		    }
		  else {if(!SgmlAnalysorGetData()) return (NULL);};
		}
	      else return (NULL);
	    }; /* end while */
	}
      else return NULL;
    };
  return (bottom->object);

}

void SgmlAnalysorVisual()
{
  SgmlStackElem *elem;
  int neStack;
  int neFreeElem;

  printf("str begin: %s\n", SSA->begin);
  printf("str left : %s\n", SSA->left);
  printf("stack:\n");

  neStack=0;

  if ((elem=SSA->top))
    {
      do
	{
	  printf("%d:%c:",++neStack, elem->type);
	  switch(elem->type)
	    {
	    case 'D':
	      printf("%s\n", SgmlDataString(elem->object));
	      break;
	    case 'B':
	    case 'M':
	    case 'T':
	      SgmlPrint(elem->object);
	      printf("\n");
	      break;
	    };
	}
      while ((elem=elem->next));
    }
  else
    {
      printf("*** empty ***\n");
    };

  neFreeElem=0;

  if ((elem=SSA->listFreeElem))
    {do {neFreeElem++;} while ((elem=elem->next));};

  printf ("\n stack :%d free :%d total: %d \n",
	    neStack,   neFreeElem, neStack+neFreeElem);

}


