/*   -*- coding: utf-8 -*-  */

/*
           NlmPmc2CorpusTei.c

 */

#include <stdio.h>     /* for printf */
#include <stdlib.h>    /* for exit */
#include <getopt.h>
#include <ctype.h>

#include "TeiHandler.h"
#include "Buffer.h"
#include "StrDict.h"
#include "Utf8Converter.h"
/* #include "Explor.h" */


SxmlNode *docInput;
SxmlNode *corpusRecord;
SxmlNode *teiRoot;

SxmlNode *pmcFront;
SxmlNode *pmcArticleMeta;
SxmlNode *pmcJournalMeta;
SxmlNode *pmcBack;

Buffer *bufName;
Buffer *bufSort;
Buffer *bufUniq;
Buffer *bufCat;

char *NlmJastLeafText(SxmlNode *leaf)
{
  if (SxmlIsLeaf(leaf)) return SxmlLeafText(leaf);
  return NULL;
}

SxmlNode *pmcComputeIdnoPmc()                    
{
  SxmlNode *pmcId;
  SxmlNode *idnoPmcId;
  SxmlNode *idnoPmid;
  SxmlNode *idnoRBID;
  idnoPmcId=NULL;
  idnoPmid=NULL;
  idnoRBID=NULL;
  if ((pmcId=SxmlGetFirstChildByTagName(pmcArticleMeta, "article-id")))
    {
      while (pmcId)
	{
	  char *idType;
	  idType=SxmlGetAttribute(pmcId, "pub-id-type");
	  if (idType)                                    /* to be improved */
	    {
	      if ((strcmp(idType,"pmid")==0)) idnoPmid=TeiAppendIdno("pmid", SxmlLeafText(pmcId));
	      else if (strcmp(idType,"pmc")==0)   {
		idnoPmcId=TeiAppendIdno(idType, SxmlLeafText(pmcId));
		BufferStrcpy(bufCat,"http://www.ncbi.nlm.nih.gov/pmc/articles/PMC");
		BufferStrcat(bufCat, SxmlLeafText(pmcId));
		TeiAppendIdno("url", BufferString(bufCat));
		BufferStrcpy(bufCat,"PMC:");
		BufferStrcat(bufCat, SxmlLeafText(pmcId));
		idnoRBID=TeiAppendIdno("RBID", BufferString(bufCat));
	      }
	      else if ((strcmp(idType,"doi")==0)) TeiAppendIdno("doi", SxmlLeafText(pmcId));
	    }
	  pmcId=SxmlGetNextSiblingByTagName(pmcId,"article-id");
	}
    } 
  if (!idnoPmid) TeiAppendIdno("pmid", "NONE");
  return idnoRBID;
}

SxmlNode *pmcComputeTitle()
 {
   SxmlNode *pmcTitleGroup;
   SxmlNode *ArticleTitle;
   SxmlNode *titleTei;
   SxmlNode *titleA;

   titleTei=NULL;
   if ((pmcTitleGroup=SxmlGetFirstChildByTagName(pmcArticleMeta, "title-group"))
       &&(ArticleTitle=SxmlGetFirstChildByTagName(pmcTitleGroup,"article-title")))
     {
       SxmlAppendChild(Tei_titleStmt,titleTei=SxmlClone(ArticleTitle));
       SxmlSetNodeName(titleTei,strdup("title"));
       SxmlSetAttribute(titleTei,"xml:lang","en");

       SxmlAppendChild(Tei_analytic, titleA=SxmlClone(titleTei)); 
       SxmlSetAttribute(titleA, "level", "a");
       SxmlSetAttribute(titleA, "type", "main");  /* to be improved */
     }

   return titleTei;
 }

SxmlNode *pmcComputeAbstract()
 {
   SxmlNode *pmcAbstract;
   if ((pmcAbstract=SxmlGetFirstChildByTagName(pmcArticleMeta, "abstract")))
     {
       TeiHandlerStoreAbstract(pmcAbstract, "en");
     }
   return pmcAbstract;
 }


/*                                                                                           Affiliations */
SxmlNode *appendAffText(SxmlNode *teiNlmAff, Buffer *bAff)
{
  char *cutStr;
  cutStr=NULL;
  if (BufferLen(bAff)>0)
    {
      if (BufferTailCmp(bAff,"and")==0)
	{
	  if (BufferTailCmp(bAff,"; and")==0) { BufferTailCut(bAff,5); cutStr="; and";}
	  else if (BufferTailCmp(bAff,", and")==0) { BufferTailCut(bAff,5); cutStr=", and";} 
	  else if (BufferTailCmp(bAff," and")==0) { BufferTailCut(bAff,4); cutStr=" and";} 
	}
      SxmlAppendChild(teiNlmAff, SxmlTextCreate(BufferString(bAff)));
      if (cutStr) SxmlSetAttribute(teiNlmAff, "wicri:cut", cutStr);
      BufferReset(bAff);
    }
  else return NULL;
  return teiNlmAff;
}

SxmlNode *buildTeiNlmAff(SxmlNode *teiAffiliation, SxmlNode *nlmAff, char *numAff)
{
  int nbChildren;
  SxmlNode *teiNlmAff;
  char *idAtt;
  static Buffer *bAff=NULL;
  SxmlNode *elemAff;
  SxmlNode *firstItemAff;
  char *tagIdent;
  SxmlNode *beginAff;

  if (!bAff) bAff=NewBuffer();
  tagIdent=NULL;
  firstItemAff=SxmlFirstChild(nlmAff);
  if (!firstItemAff) return NULL;
  if ((SxmlIsElement(firstItemAff))
      &&((SxmlElementIsEmpty(firstItemAff)
	  ||(SxmlIsLeaf(firstItemAff)))))
    {
      if(SxmlNodeHasName(firstItemAff,"sup"))tagIdent="sup";
      else
	{
	  if(SxmlNodeHasName(firstItemAff,"label"))tagIdent="label";
	}
    }
  nbChildren=SxmlLength (nlmAff);
  if (!tagIdent)
    {
      if (nbChildren==1)
	{
	  teiNlmAff=SxmlLeafCreate ("nlm:aff", SxmlLeafText(nlmAff)); 
	  SxmlAppendChild(teiAffiliation,  teiNlmAff);
	  if ((idAtt=SxmlGetAttribute(nlmAff, "id"))) SxmlSetAttribute(teiNlmAff,"id", idAtt);
	}
      else
	{
	  teiNlmAff=SxmlElementCreate ("nlm:aff");
	  while ((elemAff=SxmlNextNode(nlmAff)))
	    {
	      if (SxmlIsElement(elemAff))
		{
		  if (SxmlNodeHasName(elemAff, "break"))
		    {
		      BufferStrcat(bAff,", ");
		      continue;
		    }
		  appendAffText(teiNlmAff, bAff);
		  SxmlAppendChild(teiNlmAff, SxmlClone(elemAff));
		}
	      else BufferStrcat(bAff, SxmlNodeValue(elemAff));
	    } /* end while */
	  if ((idAtt=SxmlGetAttribute(nlmAff, "id"))) SxmlSetAttribute(teiNlmAff,"id", idAtt);
	  SxmlAppendChild(teiAffiliation,  teiNlmAff);
	}
      return teiAffiliation;
    }
  SxmlReset(nlmAff);
  beginAff=NULL;
  while ((elemAff=SxmlNextNode(nlmAff)))
    {
      if (SxmlIsElement(elemAff))
	{
	  if ((SxmlNodeHasName(elemAff, tagIdent)))
	    {
	      if(SxmlElementIsEmpty(elemAff))
		{
		  beginAff=elemAff;
		  break;
		}
	      if((numAff)
		 &&(strcmp(SxmlLeafText(elemAff), numAff)==0))
		{
		  beginAff=elemAff;
		  break;
		}
	      else continue;
	    }
	  else continue;
	}
    }
  if (!beginAff)
    {
      SxmlAppendChild(teiAffiliation,  SxmlLeafCreate("nlm:aff", "NONE"));
      return NULL;
    }
  elemAff=beginAff;
  BufferReset(bAff);
  teiNlmAff=SxmlElementCreate("nlm:aff");
  while ((elemAff=SxmlNextSibling(elemAff)))
    {
      if (SxmlIsElement(elemAff))
	{
	  if (SxmlNodeHasName(elemAff, "break"))
	    {
	      BufferStrcat(bAff,", ");
	      continue;
	    }
	  appendAffText(teiNlmAff, bAff);
	  if (SxmlNodeHasName(elemAff, tagIdent))
	    {
	      if ((idAtt=SxmlGetAttribute(nlmAff, "id"))) SxmlSetAttribute(teiNlmAff,"id", idAtt);
	      SxmlAppendChild(teiAffiliation,  teiNlmAff);
	      return teiAffiliation;
	    }
	  SxmlAppendChild(teiNlmAff, SxmlClone(elemAff));
	}
      else
	{
	  BufferStrcat(bAff,SxmlNodeValue(elemAff));
	  continue;
	}
    }
  appendAffText(teiNlmAff, bAff);
  if ((idAtt=SxmlGetAttribute(nlmAff, "id"))) SxmlSetAttribute(teiNlmAff,"id", idAtt);
  SxmlAppendChild(teiAffiliation,  teiNlmAff);
  return teiAffiliation;
}

SxmlNode *pmcComputeContrib()
{
  SxmlNode *pmcContribGroup;
  SxmlNode *pmcAff;
  static StrDict *tableAff=NULL;
  char *idAff;

  if (tableAff)
    {
      StrDictFree(tableAff);
      tableAff=NULL;
    }
  tableAff=NewStrDict();
  pmcContribGroup=SxmlGetFirstChildByTagName(pmcArticleMeta, "contrib-group");
  if ((pmcAff=SxmlGetFirstChildByTagName(pmcArticleMeta, "aff")))
    {
      while((pmcAff)
	    &&(idAff=SxmlGetAttribute(pmcAff,"id")))
	{
	  StrDictAddNewDatum(tableAff,idAff, (char *)pmcAff);
	  pmcAff=SxmlGetNextSiblingByTagName(pmcAff, "aff");
	}
    }
  if ( pmcContribGroup)
    {
      if ((pmcAff=SxmlGetFirstChildByTagName(pmcContribGroup, "aff")))
	{
	  while((pmcAff)
		&&(idAff=SxmlGetAttribute(pmcAff,"id")))
	    {
	      StrDictAddNewDatum(tableAff,idAff, (char *)pmcAff);
	      pmcAff=SxmlGetNextSiblingByTagName(pmcAff, "aff");
	    }
	}
    }

    /*  if ((pmcContribGroup=SxmlGetFirstChildByTagName(pmcArticleMeta, "contrib-group"))) */
  if(pmcContribGroup)
    {
      SxmlNode *contribNode;
      while ((contribNode=SxmlNextNode(pmcContribGroup)))
	{
	  SxmlNode *teiName;
	  SxmlNode *teiAuthor;
	  SxmlNode *pmcName;
	  SxmlNode *pmcSurname;
	  SxmlNode *pmcGiven;
	  char *pmcStrSurname;
	  char *pmcStrGiven;
	  
	  if ((SxmlHasAttribute(contribNode, "contrib-type", "author"))
	      && (pmcName=SxmlGetFirstChildByTagName(contribNode,"name"))
	      && (pmcGiven=SxmlGetFirstChildByTagName(pmcName,"given-names"))
	      && (pmcSurname=SxmlGetFirstChildByTagName(pmcName,"surname"))
	      && (pmcStrSurname=NlmJastLeafText(pmcSurname))
	      && (pmcStrGiven=NlmJastLeafText(pmcGiven))
	      )
	    {
	      SxmlNode *pmcXref;

	      teiAuthor=SxmlElementCreate("author");
	      SxmlAppendChild(Tei_titleStmt,teiAuthor);
	      /*
	      BufferStrcpy(bufName, pmcStrGiven);
	      givenLen=strlen(pmcStrGiven);
	      if ((givenLen>3)
		  && (pmcStrGiven[givenLen-1] !='.')
		  && (isupper(pmcStrGiven[givenLen-1]))
		  && (pmcStrGiven[givenLen-2] ==' ')) BufferStrcat(bufName, ".");
	      BufferStrcat(bufName, " ");
	      BufferStrcat(bufName, pmcStrSurname);
	      SxmlAppendChild(teiAuthor, teiName=SxmlLeafCreate("name", BufferString(bufName)));
	      BufferStrcpy(bufSort, Utf8NameToWikiSort(pmcStrSurname));
	      BufferStrcpy(bufUniq, Utf8NameToWikiSort(pmcStrSurname));
	      BufferStrcat(bufSort, ", ");
	      BufferStrcat(bufUniq, " ");
	      BufferStrcat(bufSort, Utf8NameToWikiSort(pmcStrGiven));
	      BufferCatChar(bufUniq, Utf8NameToWikiSort(pmcStrGiven)[0]);
	      BufferCatChar(bufUniq, '\0');
	      SxmlSetAttribute(teiName, "sortKey", BufferString(bufSort));
	      SxmlSetAttribute(teiName, "uniqKey", BufferString(bufUniq));
	      */
	      teiName=TeiAuthorNameFromFirstLastNamesStr(pmcStrGiven, pmcStrSurname);
	      SxmlAppendChild(teiAuthor, teiName);
	      if ((pmcXref=SxmlGetFirstChildByTagName(contribNode, "xref")))
		{
		  while (pmcXref)
		    {
		      char *id;
		      SxmlNode *newAff;
		      /* SxmlNode *newAff1; */
		      SxmlNode *teiAffiliation;
		      if ((SxmlHasAttribute(pmcXref, "ref-type","aff"))
			  &&(tableAff)
			  &&(id=SxmlGetAttribute(pmcXref, "rid"))
			  &&(newAff=(SxmlNode *)StrDictSearch(tableAff,id))
			  )
			{
			  char *numAff;
			  if (SxmlIsLeaf(pmcXref))numAff=SxmlLeafText(pmcXref);
			  else 
			    {
			      SxmlNode *fc;
			      fc=SxmlFirstChild(pmcXref);
			      if (fc && SxmlIsLeaf(fc))numAff=SxmlLeafText(fc);
			      else numAff=NULL;
			    }
			  /*
			    if (numAff)
			    { */
			  SxmlAppendChild(teiAuthor, teiAffiliation=SxmlElementCreate("affiliation"));
			  buildTeiNlmAff(teiAffiliation,newAff, numAff);
			  /*  } */
			}
		      pmcXref=SxmlGetNextSiblingByTagName(pmcXref, "xref");
		    }
		}
	      SxmlAppendChild(Tei_analytic,SxmlClone(teiAuthor));
	    } /* end author... */
	} /* end contribNode */
    } /* end contribGroup */
  return Tei_titleStmt;
}

SxmlNode *pmcBadDate()
{
   SxmlNode *teiDate;
   SxmlAppendChild(Tei_publicationStmt, teiDate=SxmlLeafCreate("date", "????"));
   SxmlSetAttribute(teiDate,"when", "????");
   if (Tei_seriesImprint)
     {
       SxmlAppendChild (Tei_seriesImprint, SxmlClone(teiDate));
     }
   return teiDate;
}

SxmlNode *pmcGoodDate(SxmlNode *nDate, char *yearStr)
{
  SxmlNode *teiDate;
  SxmlAppendChild(Tei_publicationStmt, teiDate=SxmlLeafCreate("date", yearStr));
  SxmlSetAttribute(teiDate,"when",yearStr);
  if (Tei_seriesImprint)
    {
      SxmlAppendChild (Tei_seriesImprint, SxmlClone(teiDate));
    }
  return teiDate;
}
SxmlNode *pmcComputePubDate()
 {
   SxmlNode *pmcPubDate;
   if (pmcArticleMeta && (pmcPubDate=SxmlGetFirstChildByTagName(pmcArticleMeta, "pub-date")))
     {
       SxmlNode *year;
       SxmlNode *epub;
       SxmlNode *ppub;
       char *yearEpub;
       char *yearPpub;
       char *otherDate;
       epub=NULL;
       ppub=NULL;
       yearEpub=NULL;
       yearPpub=NULL;
       otherDate=NULL;
       while (pmcPubDate)
	 {
	   char *pubType;
	   char *yearStr;
	   if (!(pubType=SxmlGetAttribute(pmcPubDate, "pub-type"))) return pmcBadDate();
	   if (!(year=SxmlGetFirstChildByTagName(pmcPubDate, "year"))) return pmcBadDate();
	   yearStr=SxmlLeafText(year);
	   if (strcmp (pubType,"epub")==0)
	     {
	       yearEpub=yearStr;
	       epub=pmcPubDate;
	     }
	   else if (strcmp (pubType,"ppub")==0)
	     {
	       yearPpub=yearStr;
	       ppub=pmcPubDate;
	     }
	   else
	     {
	       otherDate=yearStr;
	     }
	   pmcPubDate=SxmlGetNextSiblingByTagName(pmcPubDate, "pub-date");
	 }
       if(epub||ppub)
	 {
	   if (!epub) return pmcGoodDate(ppub, yearPpub);
	   if (!ppub) return pmcGoodDate(epub, yearEpub);
	   if (strcmp(yearPpub, yearEpub)>0)return pmcGoodDate(epub, yearEpub);
	   return pmcGoodDate(ppub, yearPpub);
	 }
       else return pmcBadDate();
     }
   return pmcBadDate();
 }

void pmcProceedCitation(SxmlNode *pmcCite, SxmlNode *teiBibl)
{
  SxmlNode *pmcName;
  SxmlNode *teiName;
  SxmlNode *firstNode;
  SxmlNode *lastNode;
  char *firstStr;
  SxmlNode *analytic;

  pmcName=SxmlGetFirstChildByTagName(pmcCite,"name");
  if (!pmcName)
    {
      SxmlNode *PG;
      PG=SxmlGetFirstChildByTagName(pmcCite,"person-group");
      if (PG) pmcName=SxmlGetFirstChildByTagName(PG,"name");
    }
  if (!pmcName)return;
  SxmlAppendChild(teiBibl, analytic=SxmlElementCreate("analytic"));

  while (pmcName)
    {
      if ((lastNode=SxmlGetFirstChildByTagName(pmcName,"surname")))
	{
	  if ((firstNode=SxmlGetFirstChildByTagName(pmcName,"given-names")))
	    firstStr=SxmlLeafText(firstNode);
	  else firstStr=NULL;
	  teiName=TeiAuthorNameFromFirstLast(firstStr, SxmlLeafText(lastNode));
	  if (teiName)
	    {
	      SxmlNode *teiAuthor;
	      SxmlAppendChild(analytic,teiAuthor=SxmlElementCreate("author"));
	      SxmlAppendChild(teiAuthor,teiName);
	    }
	}
      pmcName=SxmlGetNextSiblingByTagName(pmcName, "name");
    }
}

SxmlNode *pmcComputeBiblio()
{
  SxmlNode *pmcRefList;
  SxmlNode *pmcRef;
  SxmlNode *teiBibl;
  
  if (!pmcBack)return NULL;
  pmcRefList=SxmlGetFirstChildByTagName(pmcBack, "ref-list");
  if (!pmcRefList)return NULL;
  TeiSetBackListBibl();

  SxmlReset (pmcRefList);
  while ((pmcRef=SxmlNextNode(pmcRefList)))
    {
      SxmlNode *pmcCite;
      if(!SxmlNodeHasName(pmcRef,"ref"))continue;
      teiBibl=SxmlElementCreate("biblStruct");
      SxmlAppendChild(Tei_back_listBibl, teiBibl);
      if ((pmcCite=SxmlGetFirstChildByTagName(pmcRef,"mixed-citation")))
	{
	  pmcProceedCitation(pmcCite, teiBibl);
	}
      else if  ((pmcCite=SxmlGetFirstChildByTagName(pmcRef,"element-citation")))
	{
	  pmcProceedCitation(pmcCite, teiBibl);
	}
    }
  return teiBibl;
}

int main(int argc, char **argv)
{
  bufName=BufferCreate(10,10);
  bufSort=BufferCreate(10,10);
  bufUniq=BufferCreate(10,10);
  bufCat=BufferCreate(10,10);

  /* ExplorParamInit(pathParamFile); */  

  while ((docInput=SxmlInputNextDocumentElement()))
    {
      if(SxmlNodeHasName(docInput,"article"))
	{
	  SxmlNode *docu;

	  corpusRecord=SxmlElementCreate("record");

	  SxmlAppendChild(corpusRecord, teiRoot=TeiHandlerNew());
	  SxmlSetAttribute(teiRoot, "xmlns:nlm", "http://www.nlm.org");

	  docu=SxmlClone(docInput);
	  SxmlSetNodeName(docu,strdup("pmc")); /* strdup pour SxmlFree() */
	  SxmlAppendChild(corpusRecord,docu);
	  TeiAppendIdno("wicri:source","PMC");

          if ((pmcFront=SxmlGetFirstChildByTagName(docInput, "front"))
              &&(pmcArticleMeta=SxmlGetFirstChildByTagName(pmcFront, "article-meta")))
            {
	      /* SxmlAppendChild(Tei_biblStruct, Tei_analytic=SxmlElementCreate("analytic")); */
	      pmcJournalMeta=SxmlGetFirstChildByTagName(pmcFront, "journal-meta");
	      if (pmcJournalMeta)
		{
		  SxmlNode *jTitle;
		  SxmlNode *issnNode;
		  SxmlAppendChild(Tei_biblStruct, Tei_series=SxmlElementCreate("series"));
		  jTitle=SxmlGetFirstChildByTagName(pmcJournalMeta, "journal-title");
		  if (!jTitle)
		    {
		      SxmlNode *tg;
		      tg=SxmlGetFirstChildByTagName(pmcJournalMeta, "journal-title-group");
		      if (tg) jTitle=SxmlGetFirstChildByTagName(tg, "journal-title");
		    }
		  if (jTitle)
		    {
		      SxmlNode *teiTitle;
		      SxmlAppendChild(Tei_series, teiTitle=SxmlLeafCreate("title", SxmlLeafText(jTitle)));
		      SxmlSetAttribute(teiTitle, "level", "j");
		    }
		  issnNode=SxmlGetFirstChildByTagName(pmcJournalMeta, "issn");
		  while(issnNode)
		    {
		      SxmlNode *idno;
		      SxmlAppendChild(Tei_series, idno=SxmlLeafCreate("idno", SxmlLeafText(issnNode)));
		      if (SxmlHasAttribute(issnNode, "pub-type","ppub")) SxmlSetAttribute(idno, "type", "ISSN");
		      else if  (SxmlHasAttribute(issnNode, "pub-type","epub")) SxmlSetAttribute(idno, "type", "eISSN");
		      issnNode=SxmlGetNextSiblingByTagName(issnNode,"issn");
		    }
		  SxmlAppendChild(Tei_series, Tei_seriesImprint=SxmlElementCreate("imprint"));
		  
		}
	      pmcComputeIdnoPmc();
	      pmcComputeTitle();
	      pmcComputeContrib();
	      pmcComputePubDate();
	      pmcComputeAbstract();
	    }
	  pmcBack=SxmlGetFirstChildByTagName(docInput, "back");
	  pmcComputeBiblio();
	  SxmlPrint(corpusRecord);
	  putchar('\n');
	  SxmlFree(corpusRecord);
	}
    }
   exit(EXIT_SUCCESS);
}
