/*   -*- coding: utf-8 -*-  */
/**********************************************************************
*
*  Module   : Explor
*  fichier  : ExplorExtractContent.c
*  Auteur   : Jacques DUCLOY
*  Date     : Decembre 2016

This tool extracts content unit to genrate triplets < hfdKey / weight / text >
From Title, Abstract, body , keywords , title in bibliography

*
*
***********************************************************************/


#include <stdlib.h>
#include <string.h>
#include <stdio.h>

#include "Hfd.h"
#include "Buffer.h"
#include "SxmlNode.h"
#include "TeiHandler.h"
#include "SxPath.h"

char *format;

SxPathResult *istexBodyDivPath;

void printUnit(char *content, double w)
{
  int i;
  i=0;
  while(format[i])
    {
      switch(format[i])
	{
	case 'k':
	  printf("%s", SxmlInputRecordKey);
	  break;
	case 'w':
	  printf("%08.3f", w);
	  break;
	case 'c':
	  printf("%s", content);
	  break;
	}
      if (format[++i]) putchar ('\t'); else putchar ('\n');
    }
}

    int getopt();
    extern char *optarg;

int main(int argc, char **argv)
{
  SxmlNode *docInput;
  SxmlNode *unitNode;
  int cOption;

  format="ckw";
  istexBodyDivPath=SxPathFirstCompile("fulltext/istex:fulltextTEI/text/body/div");

  while((cOption=getopt(argc,argv,"f:"))!=EOF)
    {
      switch(cOption)
	{
	case 'f':
	  format=optarg;
	  break;
	}
    }

    while(TeiInputNextRecord())
    {
      docInput=SxmlParent (Tei_root);
      SxmlReset(Tei_titleStmt);
      while ((unitNode=SxmlNextNode(Tei_titleStmt)))
	{
	  if (SxmlNodeHasName(unitNode, "title")) 
	    {
	      printUnit(SxmlGetTextContent(unitNode), (double)10);
	    }
	}
      if ((Tei_front=SxmlGetFirstChildByTagName(Tei_root, "front")))
	{
	  printUnit(SxmlGetTextContent(Tei_front), (double)3);
	}
      if (TeiHandlerHasExplorSource("ISTEX")) 
	{
	  SxmlNode *istexRoot;
	  if ((istexRoot=SxmlGetFirstChildByTagName(docInput, "istex")))
	    {
	      SxmlNode *divNode;
	      divNode=SxPathFirstResultNode(istexBodyDivPath, istexRoot);
	      if(divNode)
		{
		  SxmlNode *pNode;
		  SxmlReset(divNode);
		  while ((pNode=SxmlNextNode(divNode)))
		    {
		      printUnit(SxmlGetTextContent(pNode), (double)1);
		    }
		}
	    }
	}
    }
  exit(EXIT_SUCCESS);
}
