/*   -*- coding: utf-8 -*-  */
/***********************************************************************
 
   Module   : Nlm
   Commande : NlmMedlineExplorShell
   Fichier  : NlmMedlineExplorShell.c
   Auteur   : DUCLOY
   Date     : 2012
 
************************************************************************/

#include <stdio.h>     /* for printf */
#include <stdlib.h>    /* for exit */
#include "Explor.h"

extern char *optarg;
extern int   optind;
int getopt();

SxmlNode *ExplorStepNode;
SxmlNode *streamListSteps;

void generNlmMedlineShellCorpusBiblio()
{
  char *pubMedImportFile;
  char *strListPubMedSxmlImportFile;
  SxmlNode *nodeInput;
  SxmlNode *listPubMedSxmlImportFile;  /*  to be deprecated */
  
  SxmlNode *listImportFile;
  char     *strImportFile;
  SxmlNode *fileNode;

  printf("<?dilib file=\"%s/bin/%sCorpusBiblio.sh\" ?>\n", ExplorAreaDir, ExplorStreamCode );
  printf ("#!/bin/sh\n");
  printf ("# This file is generated by Dilib / Nlm / NlmMedlineExplorDataShells \n");
  printf ("echo \"---- Executing sh %s/bin/%sCorpusBiblio.sh\" \n", ExplorAreaDir, ExplorStreamCode );

  ExplorGenerDeleteHfd(ExplorStreamCode, "Corpus", "biblio");
  pubMedImportFile=ExplorGetFromDictK3("Area", ExplorStreamCode, "Corpus/Nlm:PubMedImportFile");
  if(!pubMedImportFile)pubMedImportFile=ExplorGetFromDictK3("Area", ExplorStreamCode, "Corpus/Nlm:pubMedImportFile");
  strListPubMedSxmlImportFile=ExplorGetFromDictK3("Area", ExplorStreamCode, "Corpus/Nlm:PubMedSxmlImportFile");
  strImportFile=ExplorGetFromDictK3("Area", ExplorStreamCode, "Corpus/importFile");

  printf (" (\n");
  if(pubMedImportFile)
    {
      printf ("cat $EXPLOR_AREA/Import/%s   \\\n",  pubMedImportFile);
      printf ("   | Nlm2Sxml      \\\n");
      printf ("   | SxmlUnIndent  \n\n");
    }
  if(strImportFile)
    {
      listImportFile=SxmlFromString(strImportFile);
      SxmlReset (listImportFile);
      while ((fileNode=SxmlNextNode(listImportFile)))
	{
	  printf ("cat $EXPLOR_AREA/Import/%s   \\\n", SxmlLeafText(fileNode));
	  printf ("   | Nlm2Sxml      \\\n");
	  printf ("   | SxmlUnIndent  \n\n");
	}
    }
  if (strListPubMedSxmlImportFile)
    {
      listPubMedSxmlImportFile=SxmlFromString(strListPubMedSxmlImportFile);
      while ((nodeInput=SxmlNextNode(listPubMedSxmlImportFile)))
	{
	  char *pubMedImportFile;
	  pubMedImportFile=SxmlLeafText(nodeInput);
	  printf (" cat %s/Import/%s   \n", ExplorAreaDir, pubMedImportFile);
	}
    }
  printf (" )  | HcsSxmlRecordPutKey \\\n");
  printf ("   | NlmPubMed2CorpusTei  -t $EXPLOR_AREA/Input/AreaParam.data.tab -s %s   \\\n", ExplorStreamCode);
  printf ("   | TeiPutRefToIdno -t wicri:Area/%s/Corpus -c PubMed -S %s -s Corpus  \\\n", ExplorStreamCode,  ExplorStreamCode );
  printf ("   | HfdBuild -h $EXPLOR_AREA/Data/%s/Corpus/biblio\n\n", ExplorStreamCode);

  printf (" echo \"==== \" %s, step Corpus, biblio done\n", ExplorStreamCode);
  printf (" HfdCat $EXPLOR_AREA/Data/%s/Corpus/biblio.hfd | wc \n", ExplorStreamCode);

  printf("<?dilib appendFile=\"%s/bin/area.mk\" ?>\n", ExplorAreaDir );
  printf("# This part is generated by Dilib / Nlm / NlmMedlineExplorDataShells \n");
  printf("%s/Make/%s/Corpus/biblio: ", ExplorAreaDir, ExplorStreamCode);
  if(pubMedImportFile)
    {
      printf (" \\\n");
      printf("                %s/Import/%s", ExplorAreaDir,  pubMedImportFile);
    }
  if (strListPubMedSxmlImportFile)
    {
      SxmlReset (listPubMedSxmlImportFile);
      while ((nodeInput=SxmlNextNode(listPubMedSxmlImportFile)))
	{
	  char *pubMedImportFile;
	  printf (" \\\n");
	  pubMedImportFile=SxmlLeafText(nodeInput);
	  printf ("           %s/Import/%s", ExplorAreaDir, pubMedImportFile);
	}
    }
  if (strImportFile)
    {
      SxmlReset (listImportFile);
      while ((fileNode=SxmlNextNode(listImportFile)))
	{
	  char *strFile;
	  printf (" \\\n");
	  strFile=SxmlLeafText(fileNode);
	  printf ("           %s/Import/%s", ExplorAreaDir, strFile);
	}
    }

  printf("\n\tsh %s/bin/%sCorpusBiblio.sh \n", ExplorAreaDir, ExplorStreamCode );
  printf("\ttouch %s/Make/%s/Corpus/biblio\n\n", ExplorAreaDir, ExplorStreamCode);
}

void generNlmMedlineShellCurationBiblio()
{
  SxmlNode *previousStep;
  char *codePrevious;

  printf("<?dilib file=\"%s/bin/%sCurationBiblio.sh\" ?>\n", ExplorAreaDir, ExplorStreamCode );
  printf ("#!/bin/sh\n");
  printf ("# This shell has been generated by NlmMedlineExplorDataShells \n");
  printf ("echo \"---- Executing sh %s/bin/%sCurationBiblio.sh\" \n", ExplorAreaDir, ExplorStreamCode );
  ExplorGenerDeleteHfd(ExplorStreamCode, "Curation", "biblio");
  previousStep=SxmlPreviousSibling(ExplorStepNode);
  codePrevious=SxmlLeafText(previousStep);
  printf (" HfdCat $EXPLOR_AREA/Data/%s/%s/biblio.hfd \\\n", ExplorStreamCode, codePrevious);
  printf ("   | SgmlFast -c 1  \\\n"); 
  ExplorCurationRecordOperations(ExplorStreamCode, "Curation");
  printf ("   | TeiPutRefToIdno -t wicri:Area/%s/Curation -S %s -s Curation \\\n", ExplorStreamCode,  ExplorStreamCode );
  printf ("   | NlmMedlineAffiliationSetPays    \\\n");
  printf ("   | NlmPubMedMeshFre    \\\n");
  printf ("   | TeiKeywordsFromTable -s MESH -t geographic -m $DILIB/data/Nlm/Geo2WicriFr.tab -S Wicri -T geographic -L fr \\\n");
  printf ("   | HfdBuild -h $EXPLOR_AREA/Data/%s/Curation/biblio\n\n", ExplorStreamCode);

  printf (" echo \"==== \" %s, step Curation, biblio done\n", ExplorStreamCode);
  printf (" HfdCat $EXPLOR_AREA/Data/%s/Curation/biblio.hfd | wc \n", ExplorStreamCode);

  printf("<?dilib appendFile=\"%s/bin/area.mk\" ?>\n", ExplorAreaDir );
  printf("# This part is generated by NlmMedlinesExplorDataShells /  generNlmMedlineShellCurationBiblio() \n");
  printf("%s/Make/%s/Curation/biblio: \\\n", ExplorAreaDir, ExplorStreamCode);
  printf("                $(DILIB)/data/Nlm/Geo2WicriFr.tab \\\n");
  printf("                %s/Make/%s/%s.step \n", ExplorAreaDir, ExplorStreamCode, codePrevious);
  printf("\tsh %s/bin/%sCurationBiblio.sh \n", ExplorAreaDir, ExplorStreamCode );
  printf("\ttouch %s/Make/%s/Curation/biblio\n\n", ExplorAreaDir, ExplorStreamCode);
}

void generNlmMedlineShellCheckpointBiblio()
{
  SxmlNode *previousStep;
  char *codePrevious;
  char *step;

  step="Checkpoint";
  printf("<?dilib file=\"%s/bin/%sCheckpointBiblio.sh\" ?>\n", ExplorAreaDir, ExplorStreamCode );
  printf ("#!/bin/sh\n");
  previousStep=SxmlPreviousSibling(ExplorStepNode);
  codePrevious=SxmlLeafText(previousStep);
  printf ("# This shell has been generated by NlmMedlineExplorDataShells / generNmlMedlineShellExplorationBiblio \n");
  printf ("echo \"---- Executing sh %s/bin/%sCheckpointBiblio.sh\" \n", ExplorAreaDir, ExplorStreamCode );
  ExplorGenerDeleteHfd(ExplorStreamCode, "Checkpoint", "biblio");
  printf (" HfdCat $EXPLOR_AREA/Data/%s/%s/biblio.hfd \\\n", ExplorStreamCode, codePrevious);
  printf ("   | NlmCurationCountry   \\\n");
  ExplorExplorationCommonOperations(ExplorStreamCode, step, 1);
  printf ("   | TeiCurationPers -p $DILIB/data/Wicri/pers.dict -o $DILIB/data/Wicri/org.dict  \\\n");
  printf ("   | TeiPutRefToIdno -t wicri:Area/%s/Checkpoint -s %s -S Checkpoint \\\n", ExplorStreamCode,  ExplorStreamCode );
  printf ("   | TeiBuildAffiliationTree    \\\n");
  printf ("   | HfdBuild -h $EXPLOR_AREA/Data/%s/Checkpoint/biblio\n\n", ExplorStreamCode);

  printf (" echo \"==== \" %s, step Checkpoint, biblio done\n", ExplorStreamCode);
  printf (" HfdCat %s/Data/%s/Checkpoint/biblio.hfd | wc \n", ExplorAreaDir, ExplorStreamCode);

  printf("<?dilib appendFile=\"%s/bin/area.mk\" ?>\n", ExplorAreaDir );
  printf("# This part is generated by NlmMedlineExplorDataShells /  generNlmMedlineShellCheckpointBiblio() \n");
  printf("%s/Make/%s/Checkpoint/biblio: \\\n", ExplorAreaDir, ExplorStreamCode);
  printf("                %s/Make/%s/%s.step \n", ExplorAreaDir, ExplorStreamCode, codePrevious);
  printf("\tsh %s/bin/%sCheckpointBiblio.sh \n", ExplorAreaDir, ExplorStreamCode );
  printf("\ttouch %s/Make/%s/Checkpoint/biblio\n\n", ExplorAreaDir, ExplorStreamCode);
}

void generNlmMedlineShellExplorationBiblio()
{
  SxmlNode *previousStep;
  char *codePrevious;

  printf("<?dilib file=\"%s/bin/%sExplorationBiblio.sh\" ?>\n", ExplorAreaDir, ExplorStreamCode );
  printf ("#!/bin/sh\n");
  printf ("# This shell has been generated by NlmMedlineExplorDataShells \n");
  printf ("echo \"---- Executing sh %s/bin/%sExplorationBiblio.sh\" \n", ExplorAreaDir, ExplorStreamCode );
  previousStep=SxmlPreviousSibling(ExplorStepNode);
  codePrevious=SxmlLeafText(previousStep);
  ExplorGenerDeleteHfd(ExplorStreamCode, "Exploration", "biblio");
  printf (" HfdCat %s/Data/%s/%s/biblio.hfd \\\n", ExplorAreaDir, ExplorStreamCode, codePrevious);
  printf ("   | TeiPutRefToIdno -t wicri:Area/%s/Exploration  \\\n", ExplorStreamCode );
  printf ("   | NlmCurationCountry   \\\n");
  ExplorExplorationCommonOperations(ExplorStreamCode, "Exploration", 1);
  /* printf ("   | TeiCurationPers -p $DILIB/data/Wicri/pers.dict -o $DILIB/data/Wicri/org.dict  \\\n"); */
  printf ("   | TeiBuildAffiliationTree    \\\n");
  printf ("   | HfdBuild -h %s/Data/%s/Exploration/biblio\n\n",ExplorAreaDir, ExplorStreamCode);

  printf (" echo \"==== \" %s, step Exploration, biblio done\n", ExplorStreamCode);
  printf (" HfdCat %s/Data/%s/Exploration/biblio.hfd | wc \n", ExplorAreaDir, ExplorStreamCode);

  printf("<?dilib appendFile=\"%s/bin/area.mk\" ?>\n", ExplorAreaDir );
  printf("# This part is generated by NlmMedlinesExplorDataShells /  generNlmMedlineShellCurationBiblio() \n");
  printf("%s/Make/%s/Exploration/biblio: \\\n", ExplorAreaDir, ExplorStreamCode);
  printf("                %s/Make/%s/%s.step \n", ExplorAreaDir, ExplorStreamCode, "Curation");
  printf("\tsh %s/bin/%sExplorationBiblio.sh \n", ExplorAreaDir, ExplorStreamCode );
  printf("\ttouch %s/Make/%s/Exploration/biblio\n\n", ExplorAreaDir, ExplorStreamCode);
}

void generNlmMedlineIndexAutAff(char *step)
{
  printf("<?dilib file=\"%s/bin/%s%sIndexAutAff.i.sh\" ?>\n", ExplorAreaDir, ExplorStreamCode, step );
  printf ("#!/bin/sh\n");
 printf ("#  This shell is generated by NlmMedlineExplorDataShells / generIndexAutAff\n");
  ExplorGenerDeleteHfd(ExplorStreamCode, step, "AutAff.i");
  printf ("(");
  printf (" HfdCat $EXPLOR_AREA/Data/%s/%s/biblio.hfd\\\n", ExplorStreamCode, step);
  printf ("  | SxmlSelect -s record/TEI/teiHeader/fileDesc/titleStmt/author -p @s1 -p @1  \\\n");
  /*  printf ("  | SgmlSelect -s author/name@uniqKey@# -p @s1 -g author/name# -p @g1   \\\n"); */
  printf ("  |  SxmlSelect -s author/name/attribute::uniqKey -p @s1 -g author/name/1 -p @g1  \\\n");
  printf (" -s author/affiliation/nlm:affiliation author/affiliation/wicri:noCountry -p @s2  -p @2       \n\n");

  printf (" HfdCat $EXPLOR_AREA/Data/%s/%s/biblio.hfd\\\n",  ExplorStreamCode, step);
  printf ("  | SxmlSelect -s record/TEI/teiHeader/fileDesc/titleStmt/author -p @s1 -p @1  \\\n");
  printf ("  | grep -v \"</nlm:affiliation>\"     \\\n");   
  printf ("  | SxmlSelect -s author/name/attribute::uniqKey -p @s1 -g author/name/1 -p @g1   \\\n");
  printf ("          -p \"<nlm:affiliation>NONE</nlm:affiliation>\"   -p @2       \n\n");

  printf (" ) | sort      \\\n");
  printf ("  | SxmlGrouping | HfdBuild -h $EXPLOR_AREA/Data/%s/%s/AutAff.i   \n",   ExplorStreamCode, step );

  printf (" IndexBuildHid -h $EXPLOR_AREA/Data/%s/%s/AutAff.i -e k   \n",  ExplorStreamCode, step);

  printf (" HfdCat $EXPLOR_AREA/Data/%s/%s/AutAff.i.hfd     \\\n", ExplorStreamCode, step);
  printf ("      |  SxmlSelect -g g/t/1 -g g/k/1 -p @g1 -p \"<idx><kw>@g2</kw><f>@g1</f></idx>\"     \\\n");
  printf ("      | sort -rn    \\\n");
  printf ("      | SgmlFast -c 1     \\\n");
  printf ("      | head -500 > %s/Data/%s/%s/AutAff.i.sort    \n", ExplorAreaDir, ExplorStreamCode, step);

  printf (" echo \"==== \" %s, step %s, index aut-aff done\n", ExplorStreamCode, step);
  printf (" HfdCat %s/Data/%s/%s/AutAff.i.hfd | wc \n", ExplorAreaDir, ExplorStreamCode, step);

  printf("<?dilib appendFile=\"%s/bin/area.mk\" ?>\n", ExplorAreaDir );
  printf("# This part is generated by NlmMedlineExplorDataShells / generIndexAutAff \n");
  printf("$(EXPLOR_AREA)/Make/%s/%s/AutAff.i.index:  \\\n ", ExplorStreamCode, step);
  printf("                $(EXPLOR_AREA)/Make/%s/%s/biblio \n", ExplorStreamCode, step);
  printf("\tsh $(EXPLOR_AREA)/bin/%s%sIndexAutAff.i.sh \n",  ExplorStreamCode, step );
  printf("\ttouch $(EXPLOR_AREA)/Make/%s/%s/AutAff.i.index \n\n", ExplorStreamCode, step);
}

void generNlmMedlineAllShellsIndex()
{
  char *step;
  SxmlNode *stepNode;
  SxmlNode *stepListIndexes;
  SxmlNode *indexNode;

  ExplorGenerAllShellsIndex(ExplorStreamCode);
  SxmlReset (streamListSteps);
  while ((stepNode=SxmlNextNode(streamListSteps)))
    {
      step=SxmlLeafText(stepNode);
      if((stepListIndexes=SxmlFromString(ExplorGetFromDictK4("Area",ExplorStreamCode, step, "listIndexes"))))
	{
	  SxmlReset (stepListIndexes);
	  while ((indexNode=SxmlNextNode(stepListIndexes)))
	    {
	      char *indexTypeStr;
	      char *indexCode;
	      SxmlNode *indexTypeNode;
	      SxmlNode *nodeType;

	      indexCode=SxmlLeafText(indexNode);
	      indexTypeStr=ExplorGetFromDictK5 ("Area", ExplorStreamCode, step, indexCode, "indexType");
	      indexTypeNode=SxmlFromString(indexTypeStr);
	      if (strcmp(SxmlLeafText(nodeType=SxmlFirstChild(indexTypeNode)), "builtin")==0)
		{
		  if (strcmp(indexCode,"AutAff.i")==0)generNlmMedlineIndexAutAff(step);
		}
	    }
	}
    }
}

int main (int argc, char **argv) {
  int cOption;

  char *pathParamFile;

  pathParamFile=NULL;
  
   while((cOption=getopt(argc,argv,"s:t:"))!=EOF)
     {switch (cOption)
	 {
	 case 's':
	   ExplorStreamCode=optarg;
	   break;
	 case 't':
	   pathParamFile=optarg;
	   break;
	 }
     }
   ExplorParamInit(pathParamFile);
   streamListSteps=SxmlFromString(ExplorGetFromDictK3("Area", ExplorStreamCode, "listSteps"));
   ExplorGenerShellCreateData( ExplorStreamCode, streamListSteps);

   SxmlReset(streamListSteps);
   while ((ExplorStepNode=SxmlNextNode(streamListSteps)))
    {
      char *stepCode;
      stepCode=SxmlLeafText(ExplorStepNode);
      if(strcmp(stepCode, "Corpus")==0)
	{
	  ExplorGenerShellCreateStep(ExplorStreamCode , stepCode);
	  generNlmMedlineShellCorpusBiblio();
	  continue;
	}
      if(strcmp(stepCode, "Curation")==0)
	{
	  ExplorGenerShellCreateStep(ExplorStreamCode , stepCode);
	  generNlmMedlineShellCurationBiblio();
	  continue;
	}
      if(strcmp(stepCode, "Checkpoint")==0)
	{
	  ExplorGenerShellCreateStep(ExplorStreamCode , stepCode);
	  generNlmMedlineShellCheckpointBiblio();
	  continue;
	}
      if(strcmp(stepCode, "Exploration")==0)
	{
	  ExplorGenerShellCreateStep(ExplorStreamCode , stepCode);
	  generNlmMedlineShellExplorationBiblio();
	  continue;
	}
    }
   generNlmMedlineAllShellsIndex();
   exit (EXIT_SUCCESS);
}
