/*   -*- coding: utf-8 -*-  */
/***********************************************************************
 
   Module   : Nlm
   Commande : NlmPmcExplorShell
   Fichier  : NlmPmcExplorShell.c
   Auteur   : DUCLOY
   Date     : 2012
 
************************************************************************/

#include <stdio.h>     /* for printf */
#include <stdlib.h>    /* for exit */
#include "Explor.h"

extern char *optarg;
extern int   optind;
int getopt();

SxmlNode *ExplorStepNode;
SxmlNode *streamListSteps;

void generNlmPmcShellCorpusBiblio()
{
  char *pmcImportFile;
  char *strListPmcSxmlImportFile;
  char *strListPmcImportFile;
  SxmlNode *nodeInput;
  SxmlNode *listPmcSxmlImportFile;
  SxmlNode *listPmcImportFile;
  SxmlNode *itemPmcImportFile;

  printf("<?dilib file=\"%s/bin/%sCorpusBiblio.sh\" ?>\n", ExplorAreaDir, ExplorStreamCode );
  printf ("#!/bin/sh\n");
  pmcImportFile=ExplorGetFromDictK3("Area", ExplorStreamCode, "Corpus/Nlm:PmcImportFile");
  strListPmcSxmlImportFile=ExplorGetFromDictK3("Area", ExplorStreamCode, "Corpus/Nlm:PmcSxmlImportFile");
  strListPmcImportFile=ExplorGetFromDictK3("Area", ExplorStreamCode, "Corpus/importFile");
  printf ("# This file is generated by Dilib / Nlm / NlmPmcExplorDataShells \n");
  printf ("echo \"---- Executing sh $EXPLOR_AREA/bin/%sCorpusBiblio.sh\" \n", ExplorStreamCode );
  ExplorGenerDeleteHfd(ExplorStreamCode, "Corpus", "biblio");
  printf (" (\n");
  if (pmcImportFile)
    {
      printf ("cat $EXPLOR_AREA/Import/%s   \\\n", pmcImportFile);
      printf ("   | NlmPmc2Sxml   \\\n");
      printf ("   | Utf8FromHexEntity  \\\n");
      printf ("   | SxmlUnIndent  \n\n");
    }
  if (strListPmcSxmlImportFile)
    {
      listPmcSxmlImportFile=SxmlFromString(strListPmcSxmlImportFile);
      while ((nodeInput=SxmlNextNode(listPmcSxmlImportFile)))
	{
	  char *pmcImportFile;
	  pmcImportFile=SxmlLeafText(nodeInput);
	  printf (" cat $EXPLOR_AREA/Import/%s   \n", pmcImportFile);
	}
    }
  if (strListPmcImportFile)
    {
      listPmcImportFile=SxmlFromString(strListPmcImportFile);
      printf("   ( \n");
      SxmlReset(listPmcImportFile);
      while ((itemPmcImportFile=SxmlNextNode(listPmcImportFile)))
	{
	  printf ("cat $EXPLOR_AREA/Import/%s   \\\n", SxmlLeafText(itemPmcImportFile));
          printf ("   | NlmPmc2Sxml   \\\n");
          printf ("   | Utf8FromHexEntity  \\\n");
	  printf ("   | SxmlUnIndent  \n\n");
	}
      printf("   ) \n");
    }
 
  printf (" ) | sort -u   -T $DILIB_WORKSPACE            \\\n");
  printf ("   | HcsSxmlRecordPutKey  \\\n");
  printf ("   | NlmPmc2CorpusTei   \\\n"); 
  printf ("   | TeiPutRefToIdno -t wicri:Area/%s/Corpus -c PMC -S %s -s Corpus \\\n", ExplorStreamCode, ExplorStreamCode );
  printf ("   | HfdBuild -bh $EXPLOR_AREA/Data/%s/Corpus/repository\n\n",  ExplorStreamCode);

  printf (" echo \"==== \" %s, step Corpus, repository done\n", ExplorStreamCode);
  printf (" HfdCat $EXPLOR_AREA/Data/%s/Corpus/repository.hfd | wc \n", ExplorStreamCode);

  printf (" HfdCat $EXPLOR_AREA/Data/%s/Corpus/repository.hfd \\\n", ExplorStreamCode);
  printf ("   | NlmPmcCleanCorpus   \\\n"); 
  printf ("   | HfdBuild -h $EXPLOR_AREA/Data/%s/Corpus/biblio\n\n",  ExplorStreamCode);

  printf (" echo \"==== \" %s, step Corpus, biblio done\n", ExplorStreamCode);
  printf (" HfdCat $EXPLOR_AREA/Data/%s/Corpus/biblio.hfd | wc \n", ExplorStreamCode);

  printf("<?dilib appendFile=\"%s/bin/area.mk\" ?>\n", ExplorAreaDir );
  printf("# This part is generated by Dilib / Nlm / NlmPmcExplorDataShells \n");
  printf("%s/Make/%s/Corpus/biblio: \\\n ", ExplorAreaDir, ExplorStreamCode);
  if(pmcImportFile)
    {
      printf("                %s/Import/%s", ExplorAreaDir,  pmcImportFile);
      if (strListPmcSxmlImportFile) printf (" \\\n");
      else printf ("\n");
    }

  if (strListPmcSxmlImportFile)
    {
      SxmlReset (listPmcSxmlImportFile);
      while ((nodeInput=SxmlNextNode(listPmcSxmlImportFile)))
	{
	  char *pmcImportFile;
	  pmcImportFile=SxmlLeafText(nodeInput);
	  printf ("           %s/Import/%s", ExplorAreaDir, pmcImportFile);
	  if (SxmlNextSibling(nodeInput)) printf (" \\\n");
	  else printf ("\n");
	}
    }
 if (strListPmcImportFile)
   {
     SxmlReset(listPmcImportFile);
     while ((itemPmcImportFile=SxmlNextNode(listPmcImportFile)))
	{
	  printf ("           %s/Import/%s", ExplorAreaDir, SxmlLeafText(itemPmcImportFile));
	  if (SxmlNextSibling(itemPmcImportFile)) printf (" \\\n");
	  else printf ("\n");
	}
   }
  printf("\tsh %s/bin/%sCorpusBiblio.sh \n", ExplorAreaDir, ExplorStreamCode );
  printf("\ttouch %s/Make/%s/Corpus/biblio\n\n", ExplorAreaDir, ExplorStreamCode);
}

void generNlmPmcShellCurationBiblio()
{
  SxmlNode *previousStep;
  char *codePrevious;

  printf("<?dilib file=\"%s/bin/%sCurationBiblio.sh\" ?>\n", ExplorAreaDir, ExplorStreamCode );
  printf ("#!/bin/sh\n");
  printf ("# This shell has been generated by NlmPmcExplorDataShells \n");
  printf ("echo \"---- Executing sh %s/bin/%sCurationBiblio.sh\" \n", ExplorAreaDir, ExplorStreamCode );
  ExplorGenerDeleteHfd(ExplorStreamCode, "Curation", "biblio");
  previousStep=SxmlPreviousSibling(ExplorStepNode);
  codePrevious=SxmlLeafText(previousStep);
  printf (" HfdCat %s/Data/%s/%s/biblio.hfd \\\n", ExplorAreaDir, ExplorStreamCode, codePrevious);
 
  printf ("   | SgmlFast -c 1  \\\n");
  ExplorCurationRecordOperations(ExplorStreamCode, "Curation");
  printf ("   | TeiPutRefToIdno -t wicri:Area/%s/Curation -S %s -s Curation  \\\n", ExplorStreamCode,  ExplorStreamCode );
  printf ("   | NlmPmcAffiliationSetPays    \\\n"); 
  printf ("   | HfdBuild -h %s/Data/%s/Curation/biblio\n\n",ExplorAreaDir, ExplorStreamCode);

  printf (" echo \"==== \" %s, step Curation, biblio done\n", ExplorStreamCode);
  printf (" HfdCat %s/Data/%s/Curation/biblio.hfd | wc \n", ExplorAreaDir, ExplorStreamCode);

  printf("<?dilib appendFile=\"%s/bin/area.mk\" ?>\n", ExplorAreaDir );
  printf("# This part is generated by NlmMedlinesExplorDataShells /  generNlmPmcShellCurationBiblio() \n");
  printf("%s/Make/%s/Curation/biblio: \\\n", ExplorAreaDir, ExplorStreamCode);
  printf("                %s/Make/%s/%s.step \n", ExplorAreaDir, ExplorStreamCode, codePrevious);
  printf("\tsh %s/bin/%sCurationBiblio.sh \n", ExplorAreaDir, ExplorStreamCode );
  printf("\ttouch %s/Make/%s/Curation/biblio\n\n", ExplorAreaDir, ExplorStreamCode);
}

void generNlmPmcShellCheckpointBiblio(char *step)
{
  SxmlNode *previousStep;
  char *codePrevious;

  printf("<?dilib file=\"%s/bin/%s%sBiblio.sh\" ?>\n", ExplorAreaDir, ExplorStreamCode, step );
  printf ("#!/bin/sh\n");
  ExplorGenerDeleteHfd(ExplorStreamCode, "Checkpoint", "biblio");
  previousStep=SxmlPreviousSibling(ExplorStepNode);
  codePrevious=SxmlLeafText(previousStep);
  printf (" HfdCat %s/Data/%s/%s/biblio.hfd \\\n", ExplorAreaDir, ExplorStreamCode, codePrevious);
  printf ("   | NlmCurationCountry   \\\n");
  ExplorExplorationCommonOperations(ExplorStreamCode, step, 1);
  printf ("   | TeiBuildAffiliationTree    \\\n");
  ExplorGenerBuildBiblio(ExplorStreamCode, step, NULL);
  printf("<?dilib appendFile=\"%s/bin/area.mk\" ?>\n", ExplorAreaDir );
  printf("# This part is generated by NlmPmcExplorDataShells /  generNlmPmcShellCheckpointBiblio() \n");
  printf("%s/Make/%s/Checkpoint/biblio: \\\n", ExplorAreaDir, ExplorStreamCode);
  printf("                %s/Make/%s/%s.step \n", ExplorAreaDir, ExplorStreamCode, codePrevious);
  printf("\tsh %s/bin/%sCheckpointBiblio.sh \n", ExplorAreaDir, ExplorStreamCode );
  printf("\ttouch %s/Make/%s/Checkpoint/biblio\n\n", ExplorAreaDir, ExplorStreamCode);
}

void generNlmPmcShellExplorationBiblio(char *step)
{
  SxmlNode *previousStep;
  char *codePrevious;

  printf("<?dilib file=\"%s/bin/%s%sBiblio.sh\" ?>\n", ExplorAreaDir, ExplorStreamCode, step );
  printf ("#!/bin/sh\n");
  previousStep=SxmlPreviousSibling(ExplorStepNode);
  codePrevious=SxmlLeafText(previousStep);

  printf ("# This file is generated by Dilib / Inist / NlmPmcExplorDataShells \n");
  printf ("echo \"---- Executing sh %s/bin/%sCorpusBiblio.sh\" \n", ExplorAreaDir, ExplorStreamCode );

  printf (" HfdCat %s/Data/%s/%s/biblio.hfd \\\n", ExplorAreaDir, ExplorStreamCode, codePrevious);
  printf ("   | SgmlSelect -g record/TEI/teiHeader/fileDesc/publicationStmt/date@when@# -p @g1 -p @2 \\\n");
  printf ("   | sort -rn    -T $DILIB_WORKSPACE  \\\n");
  printf ("   | SgmlFast -c 1  \\\n");
  printf ("   | NlmCurationCountry   \\\n");
  printf ("   | TeiPutRefToIdno -t wicri:Area/%s/%s  \\\n", ExplorStreamCode,step );
  printf ("   | TeiBuildAffiliationTree    \\\n");
  printf ("   | HfdBuild -h %s/Data/%s/%s/biblio\n\n",ExplorAreaDir, ExplorStreamCode, step);

  printf (" echo \"==== \" %s, step %s, biblio done\n", ExplorStreamCode, step);
  printf (" HfdCat %s/Data/%s/%s/biblio.hfd | wc \n", ExplorAreaDir, ExplorStreamCode, step);

  printf("<?dilib appendFile=\"%s/bin/area.mk\" ?>\n", ExplorAreaDir );
  printf("# This part is generated by NlmMedlinesExplorDataShells /  generNlmPmcShellCurationBiblio() \n");
  printf("%s/Make/%s/Exploration/biblio: \\\n", ExplorAreaDir, ExplorStreamCode);
  printf("                %s/Make/%s/%s.step \n", ExplorAreaDir, ExplorStreamCode, "Curation");
  printf("\tsh %s/bin/%sExplorationBiblio.sh \n", ExplorAreaDir, ExplorStreamCode );
  printf("\ttouch %s/Make/%s/Exploration/biblio\n\n", ExplorAreaDir, ExplorStreamCode);
}

   
void generNlmPmcIndexAutAff(char *step)
{
  printf("<?dilib file=\"%s/bin/%s%sIndexAutAff.i.sh\" ?>\n", ExplorAreaDir, ExplorStreamCode, step );
  printf ("#!/bin/sh\n");
 printf ("#  This shell is generated by NlmMedlineExplorDataShells / generIndexAutAff\n");
  ExplorGenerDeleteHfd(ExplorStreamCode, step, "AutAff.i");
  printf ("(");
  printf (" HfdCat %s/Data/%s/%s/biblio.hfd\\\n", ExplorAreaDir, ExplorStreamCode, step);
  printf ("  | SxmlSelect -s record/TEI/teiHeader/fileDesc/titleStmt/author -p @s1 -p @1  \\\n");
  printf ("  | SxmlSelect -s author/name/attribute::uniqKey -p @s1 -g author/name/1 -p @g1   \\\n");
  printf (" -s author/affiliation/nlm:aff author/affiliation/wicri:noCountry -p @s2  -p @2       \n\n");

  printf (" HfdCat %s/Data/%s/%s/biblio.hfd\\\n", ExplorAreaDir, ExplorStreamCode, step);
  printf ("  | SxmlSelect -s record/TEI/teiHeader/fileDesc/titleStmt/author -p @s1 -p @1  \\\n");
  printf ("  | grep -v \"</nlm:aff>\"     \\\n");   
  printf ("  | SxmlSelect -s author/name/attribute::uniqKey -p @s1 -g author/name/1 -p @g1   \\\n");
  printf ("          -p \"<nlm:aff>NONE</nlm:aff>\"   -p @2       \n\n");

  printf (" HfdCat %s/Data/%s/%s/biblio.hfd\\\n", ExplorAreaDir, ExplorStreamCode, step);
  printf ("  | SxmlSelect -s record/TEI/back/div1/listBibl/biblStruct/analytic/author -p @s1 -p @1  \\\n");
  printf ("  | SxmlSelect -s author/name/attribute::uniqKey -p @s1 -g author/name/1 -p @g1   \\\n");
  printf ("          -p \"<nlm:aff>NONE (BIBLIO)</nlm:aff>\"   -p @2       \n\n");

  printf (" ) | sort  -u   -T $DILIB_WORKSPACE    \\\n");
  printf ("  | SxmlGrouping | HfdBuild -h %s/Data/%s/%s/AutAff.i   \n",  ExplorAreaDir,  ExplorStreamCode, step );

  printf (" IndexBuildHid -h %s/Data/%s/%s/AutAff.i -e k   \n", ExplorAreaDir, ExplorStreamCode, step);

  printf (" HfdCat %s/Data/%s/%s/AutAff.i.hfd     \\\n", ExplorAreaDir, ExplorStreamCode, step);
  printf ("      |  SgmlSelect -g g/t# -g g/k# -p @g1 -p \"<idx><kw>@g2</kw><f>@g1</f></idx>\"     \\\n");
  printf ("      | sort -rn   -T $DILIB_WORKSPACE    \\\n");
  printf ("      | SgmlFast -c 1     \\\n");
  printf ("      | head -500 > %s/Data/%s/%s/AutAff.i.sort    \n", ExplorAreaDir, ExplorStreamCode, step);

  printf (" echo \"==== \" %s, step %s, index aut-aff done\n", ExplorStreamCode, step);
  printf (" HfdCat %s/Data/%s/%s/AutAff.i.hfd | wc \n", ExplorAreaDir, ExplorStreamCode, step);

  printf("<?dilib appendFile=\"%s/bin/area.mk\" ?>\n", ExplorAreaDir );
  printf("# This part is generated by NlmMedulineExplorDataShells / generIndexAutAff \n");
  printf("%s/Make/%s/%s/AutAff.i.index:  \\\n ", ExplorAreaDir, ExplorStreamCode, step);
  printf("                %s/Make/%s/%s/biblio \n", ExplorAreaDir, ExplorStreamCode, step);
  printf("\tsh %s/bin/%s%sIndexAutAff.i.sh \n", ExplorAreaDir, ExplorStreamCode, step );
  printf("\ttouch %s/Make/%s/%s/AutAff.i.index \n\n", ExplorAreaDir, ExplorStreamCode, step);
}


int main (int argc, char **argv) {
  int cOption;

  char *pathParamFile;

  pathParamFile=NULL;
  
   while((cOption=getopt(argc,argv,"s:t:"))!=EOF)
     {switch (cOption)
	 {
	 case 's':
	   ExplorStreamCode=optarg;
	   break;
	 case 't':
	   pathParamFile=optarg;
	   break;
	 }
     }
   ExplorParamInit(pathParamFile);
   streamListSteps=SxmlFromString(ExplorGetFromDictK3("Area", ExplorStreamCode, "listSteps"));
   ExplorGenerShellCreateData( ExplorStreamCode, streamListSteps);

   SxmlReset(streamListSteps);
   while ((ExplorStepNode=SxmlNextNode(streamListSteps)))
    {
      char *stepCode;
      stepCode=SxmlLeafText(ExplorStepNode);
      if(strcmp(stepCode, "Corpus")==0)
	{
	  ExplorGenerShellCreateStep(ExplorStreamCode , stepCode);
	  generNlmPmcShellCorpusBiblio();
	  continue;
	}
      if(strcmp(stepCode, "Curation")==0)
	{
	  ExplorGenerShellCreateStep(ExplorStreamCode , stepCode);
	  generNlmPmcShellCurationBiblio();
	  continue;
	}
      if(strcmp(stepCode, "Checkpoint")==0)
	{
	  ExplorGenerShellCreateStep(ExplorStreamCode , stepCode);
	  generNlmPmcShellCheckpointBiblio("Checkpoint");
	  continue;
	}
      if(strcmp(stepCode, "Exploration")==0)
	{
	  ExplorGenerShellCreateStep(ExplorStreamCode , stepCode);
	  generNlmPmcShellExplorationBiblio("Exploration");
	  continue;
	}
    }
   ExplorGenerAllShellsIndex(ExplorStreamCode);
   generNlmPmcIndexAutAff("Corpus");
   exit (EXIT_SUCCESS);
}

