/**********************************************************************
*
*  module   : SgmlFast
*  commande : SgmlStreamExtract
*  fichier  : SgmlStreamExtract.c
*  Auteur   : Jacques DUCLOY
*  Date     : Avril 2010
*
***********************************************************************/
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <stdlib.h>

int getopt();
extern char *optarg;
char *tag;       /* balise sur laquelle porte l'extraction */
char *key;
int curPos;
int curPos1;

int testEndTag()
{
  int c1;
  int indic;

  indic=0;

  while((c1=tag[indic++]))
    {
      curPos=getchar();
      putchar(curPos);
      if (curPos==c1)continue;
      return 0;
    }
  curPos=getchar();
  putchar(curPos);
  if (curPos=='>')return 1;
  return (0);
}

int elemUntilEndTag()
{
  int space;
  int afterTag;
  space=1;
  afterTag=0;
  while ((curPos=getchar())!=EOF)
    {
      switch(curPos)
	{
	case ' ':
        case '\015':
	case '\t':
	  if (space==1)
	    {
	      continue;
	    }
	  else
	    {
	      afterTag=0;
	      putchar(' ');
	      space=1;
	    }
	  continue;
	case '\n':
	   if (space==1)continue;
	   if (afterTag==1) afterTag=0;
	   else putchar(' ');
	   space=1;
	   continue;
	case '<':
	  afterTag=0;
	  putchar(curPos);
	  space=0;
	  curPos1=getchar();
	  putchar(curPos1);
	  if (curPos1=='/')
	    {
	      if (testEndTag()==1)
		{
		  return 0;
		}
	    }
	  continue;
	case '>':
	   putchar(curPos);
	  space=0;
	  afterTag=1;
	  continue;
	default:
	  afterTag=0;
	  putchar(curPos);
	  space=0;
	}     
    }
}

int elemStartTag()
{
  while ((curPos=getchar())!=EOF)
    {
      switch (curPos)
	{
	case '>':
	  putchar ('>');
	  elemUntilEndTag();
	  return (0);
	case '/':
          curPos1=getchar();
	  if(curPos1=='>')
	    {
	      printf("/>");
	      return (0);
	    }
	  putchar (curPos);
	  putchar (curPos1);
	  continue;
	default:
	  putchar(curPos);
	  continue;
	}
    }
}

int skipPI()
{
  return 0;
}

testTagTag()
{
  /* teste si le tag est reconnu sachant que le première lettre est identique */
  int c1;
  int indic;

  indic=1;
  while((c1=tag[indic++]))
    {
      curPos=getchar();
      if (curPos==c1)continue;
      return 0;
    }
  curPos=getchar();
  switch(curPos)
    {
    case '/':
      curPos=getchar();
      if(curPos=='>')
	{
	  if (key) printf("%s\t<%s/>\n",key, tag);
	  else printf("<%s/>\n",tag);
	  return (0);
	}
      return 0;
    case ' ':
    case '\n':
    case '\015':
    case '\t':
      if (key) printf("%s\t<%s ",key, tag);
      else printf("<%s ",tag);
      elemStartTag();
      putchar ('\n');
      return 0;
    case '>':
      if (key) printf("%s\t<%s>",key, tag);
      else printf("<%s>",tag);
      elemUntilEndTag();
      putchar ('\n');
      return (0);
    }
}

int printPI()
{
  putchar(curPos);
  putchar(curPos1);
  while (curPos=getchar())
    {
      switch(curPos)
	{
	case EOF:
	  exit (0);
	  return (0);
	case '?':
	  curPos1=getchar();
	  if (curPos1==EOF){exit(0); return(0);}
	  if (curPos1=='>')
	    {
	      printf("?>\n");
	      return (0);
	    }
	  putchar(curPos);
	  ungetc(curPos1, stdin);
	  continue;
	default:
	  putchar(curPos);
	}
      
    }
  return 0;
}

int testElem()
{
  switch (curPos1=getchar())
    {
    case EOF:
      perror ("syntax error\n");
      exit (1);
      return (0);

    case '?':
      if(tag)skipPI();
      else printPI();
      return (0);
    case '!':      /* pas bon.. */
      skipPI();
      return (0);
    default:
      if(tag)
	{
	  if(curPos1==tag[0])
	    {
	      testTagTag();
	    }
	  return (0);
	}
    }
}

int main(argc,argv)
      int argc;
      char **argv;
{
 int c;
 tag=NULL;
 key=NULL;

  while((c=getopt(argc,argv,"k:t:"))!=EOF)
    {
      switch(c)
	{
	case 'k':
	  key=optarg;
	  continue;
	case 't':
	  tag=optarg;
	  continue;
	}
    }

  while((curPos=getchar())!=EOF)
    {
      if (curPos=='<') testElem();
    }

  exit(0);
  return 0;
}

