Wicri:Dilib source, module Nlm, commande NlmPubMed2Wicri

De Wicri Outils

Cette commande accepte un flots de notices issues de PubMed dans un serveur d'exploration.

Elle génère des couples de pages (français, anglais) pour donner des versions initiales.

Code source

/*   -*- coding: utf-8 -*-  */

/*
           NlmPubMed2Wicri.c

 */

#include <stdio.h>     /* for printf */
#include <stdlib.h>    /* for exit */
#include <getopt.h>
#include "SxmlNode.h"
#include "SxPath.h"
#include "StrDict.h"

int traceLevel;        /*  static variables for this command */
char *wiki;
char *area;
StrDict *affiliationsDict;
StrDict *affiliationsDictById;
char  strInt[10];

SxPathResult *pathTeiAnalytic;      /* static variables for TEI part */
SxmlNode *TeiNode;
SxmlNode *TeiFileDesc;
SxmlNode *TeiProfileDesc;
SxmlNode *TeiPublicationStmt;
SxmlNode *TeiAnalytic;
SxmlNode *TeiTextClass;

char *RBID;

SxmlNode *PubMedRoot;            /* static variables for PubMed part */
SxmlNode *MedlineCitation;
SxmlNode *PubMedArticle;
SxmlNode *PubMedJournal;
SxmlNode *PubMedJournalIssue;
SxmlNode *PubMedDateCompleted;
SxmlNode *PubMedAuthorList;
SxmlNode *PubMedAbstract;

SxmlNode *PubDate;

char *PMID;
char *JournalAbbrevTitle;
char *JournalTitle;
char *PubMedArticleTitle;
char *PubDateYear;

char *WicriPageName;           /* static variables for Wicri */

void buildAffiliationsDict()
{
    SxmlNode *analyticNode;
    int rank;


    SxmlReset(TeiAnalytic);
    rank=1;
    while((analyticNode=SxmlNextNode(TeiAnalytic)))
      {
         SxmlNode *affiliationNode;
         if (SxmlNodeHasName(analyticNode, "author"))
          {
              affiliationNode=SxmlGetFirstChildByTagName(analyticNode, "affiliation");
              while (affiliationNode)
                 {
                    char * key;
                    SxmlNode *previousAff;
                    previousAff=NULL;
                    key = SxmlLeafText( 
                             SxmlGetFirstChildByTagName(affiliationNode,"nlm:affiliation"));
                    if (rank>1)
                       {
                          previousAff=(SxmlNode *)StrDictSearch(affiliationsDict, key);
                       }
                    if (previousAff)
                       {
                           SxmlSetAttribute(affiliationNode, "rank", 
                              SxmlGetAttribute(previousAff, "rank"));
                       }
                    else
                       {
                           SxmlSetIntAttribute(affiliationNode, "rank", rank);                   
                           StrDictAddNewDatum(affiliationsDict, key, (char *) affiliationNode);
                           sprintf(strInt, "%d", rank);
                           StrDictAddNewDatum(affiliationsDictById, strdup(strInt) , (char *) affiliationNode);
                           rank++;
                       } 
                    affiliationNode=SxmlGetNextSiblingByTagName(affiliationNode, "affiliation");
                 }
             
          }
      }
}

void initRecord(SxmlNode *recInput)
{
   if (traceLevel>0) printf("------ initRecord -----\n");
   TeiNode=SxmlGetFirstChildByTagName(recInput, "TEI");
   TeiFileDesc=SxmlGetFirstChildByTagName(SxmlGetFirstChildByTagName(TeiNode, "teiHeader"),"fileDesc");
   TeiProfileDesc=SxmlGetFirstChildByTagName(SxmlGetFirstChildByTagName(TeiNode, "teiHeader"),"profileDesc");
   TeiTextClass=SxmlGetFirstChildByTagName(TeiProfileDesc, "textClass");
   TeiPublicationStmt=SxmlGetFirstChildByTagName(TeiFileDesc, "publicationStmt");
   RBID=SxmlLeafText(SxmlGetFirstChildTagAtt(TeiPublicationStmt,"idno", "type", "RBID"));
   TeiAnalytic=SxPathFirstResultNode(pathTeiAnalytic, TeiNode);

   PubMedRoot=SxmlGetFirstChildByTagName(recInput, "pubmed");
   MedlineCitation=SxmlGetFirstChildByTagName(PubMedRoot, "MedlineCitation");
   PubMedArticle=SxmlGetFirstChildByTagName(MedlineCitation, "Article");
   PubMedJournal=SxmlGetFirstChildByTagName(PubMedArticle, "Journal");
   PubMedAbstract=SxmlGetFirstChildByTagName(PubMedArticle, "Abstract");
   PubMedJournalIssue=SxmlGetFirstChildByTagName(PubMedJournal, "JournalIssue");
   PubMedAuthorList=SxmlGetFirstDescendantByTagName(PubMedArticle, "AuthorList");

   PubDate=SxmlGetFirstChildByTagName(PubMedJournalIssue, "PubDate");

   PMID=SxmlLeafText(SxmlGetFirstChildByTagName(MedlineCitation, "PMID"));
   JournalAbbrevTitle=SxmlLeafText(SxmlGetFirstChildByTagName(PubMedJournal, "ISOAbbreviation"));
   JournalTitle=SxmlLeafText(SxmlGetFirstChildByTagName(PubMedJournal, "Title"));
   PubMedArticleTitle=SxmlLeafText(SxmlGetFirstChildByTagName(PubMedArticle, "ArticleTitle"));
   
   PubDateYear=SxmlLeafText(SxmlGetFirstChildByTagName(PubDate, "Year"));

   buildAffiliationsDict();

   if (traceLevel>0) printf("------ initRecord done -----\n");
   
}

char *computePageName()
{
    static Buffer* bufName=NULL;
    if (!bufName) bufName=NewBuffer();
    BufferStrcpy(bufName, JournalAbbrevTitle);
    BufferStrcat(bufName, " (");
    BufferStrcat(bufName, PubDateYear);
    BufferStrcat(bufName, ") ");
    BufferStrcat(bufName, 
         SxmlLeafText(SxmlGetFirstChildByTagName
                         (SxmlGetFirstChildByTagName(PubMedAuthorList, "Author"), "LastName")));
    return BufferString(bufName);
}

void computeBoiteBiblioFrance()
{
    SxmlNode *analyticNode;
    int firstAuthor;
    char *nlmAffiliation;
    char *rankStr;

    printf("{{Boîte bibliographique droite|\n");
    printf(";Auteurs:");
    SxmlReset(TeiAnalytic);
    firstAuthor=1;
    while((analyticNode=SxmlNextNode(TeiAnalytic)))      {
         SxmlNode *affiliationNode;
         int rankL;        /* max 9 affiliations by author */
         rankL='0';
         if (SxmlNodeHasName(analyticNode, "author")){
             if (firstAuthor==1) {firstAuthor=0; printf("[[A pour premier auteur::");}
             else {printf(", [[A pour auteur::");}
             printf("%s]]", SxmlLeafText(SxmlGetFirstChildByTagName(analyticNode, "name")));
             affiliationNode=SxmlGetFirstChildByTagName(analyticNode, "affiliation");
             while (affiliationNode){
                 char *rank;

                 if ((rank=SxmlGetAttribute(affiliationNode, "rank"))){
                       if (rankL=='0'){
                           rankL='1';
                           printf ("{{Lien affiliation|l1=%s", rank);
                           }
                       else printf("|l%c=%s", rankL, rank);
                       rankL++;
                    }
                 affiliationNode=SxmlGetNextSiblingByTagName(affiliationNode, "affiliation");}
              if (rankL>'0')printf ("}}");             
          }
      }
    printf("\n");

    printf(";Affiliations:\n");
    StrDictIteratorReset(affiliationsDict);
    while((rankStr=StrDictNext(affiliationsDictById)))
       {
         nlmAffiliation=SxmlLeafText(
           SxmlGetFirstChildByTagName((SxmlNode *) StrDictValue(affiliationsDictById) ,
                "nlm:affiliation"));
         printf( "* {{Ancre affiliation|l=%s}} %s\n", rankStr, nlmAffiliation);
       }

    printf(";In:");
    printf("[[Est dans la revue::%s (revue)|%s]], ", JournalTitle, JournalTitle);
    printf("([[A pour date d'édition::%s]])", PubDateYear);
    printf("\n");

    printf(";En ligne:\n");

    printf("}}\n");
}

void computeExplorLink()
{
   if(!area) return;
   printf (";Sur les serveurs d'explorations:\n");
   printf ("{{Explor lien\n  |wiki=%s\n   |area=%s\n", wiki, area);
   printf ("  |flux= Main\n   |étape= Exploration\n   |type=  RBID\n");
   printf ("  |clé = %s\n", RBID);
   printf ("  |texte=%s\n}}\n", PubMedArticleTitle);
}

void proceedFrenchPage()
{
    SxmlNode *categoryIterator;
    SxmlNode *termNode;

    printf("<!--                                     %s  \n   Page pour wiki (fr) \n-->", WicriPageName );
    printf("{{Titre page article|titre={{Rouge|[[A pour titre::Mettre ici une traduction du titre]]}}\n");
    printf("  |sous-titre=%s}}<!--\n-->", PubMedArticleTitle);
    computeBoiteBiblioFrance();
    printf("{{Rouge|mettre ici une introduction}}\n");
    printf("__TOC__\n");
    printf("{{Clr}}\n");
    printf("==Résumé==\n");
    printf("{{Rouge|mettre ici une traduction du résumé}}\n");
    printf("==Voir aussi==\n");
    computeExplorLink();
    printf(";Liens externes:\n");
    printf("* [https://pubmed.ncbi.nlm.nih.gov/%s/ Lien vers Entrez PubMed]\n", PMID);
    categoryIterator=SxmlGetFirstChildTagAtt(TeiTextClass, "keywords", "scheme", "KwdFr");
    SxmlReset (categoryIterator);
    while ((termNode=SxmlNextNode(categoryIterator)))
      {
         char *term;
         term=SxmlLeafText(termNode);
         if (strchr(term,')')) printf ("[[Catégorie:%s]]\n", term);
         else printf ("[[Catégorie:%s (MeSH)]]\n", term);
      }


    printf("__SHOWFACTBOX__\n");
    printf("[[en:%s]]\n", WicriPageName );
}

void computeBiblioBox()
{
    SxmlNode *analyticNode;
    int firstAuthor;
    char *nlmAffiliation;
    char *rankStr;

    printf("{{Bibliobox right|\n");
    printf(";Authors:");
    SxmlReset(TeiAnalytic);
    firstAuthor=1;
    while((analyticNode=SxmlNextNode(TeiAnalytic)))
      {
         if (SxmlNodeHasName(analyticNode, "author"))
          {
             SxmlNode *affiliationNode;
             int rankL;        /* max 9 affiliations by author */

             rankL='0';
             if (firstAuthor==1) {firstAuthor=0; printf("[[Has first author::");}
             else {printf(", [[Has author::");}
             printf("%s]]", SxmlLeafText(SxmlGetFirstChildByTagName(analyticNode, "name")));
             affiliationNode=SxmlGetFirstChildByTagName(analyticNode, "affiliation");
             while (affiliationNode){
                 char *rank;

                 if ((rank=SxmlGetAttribute(affiliationNode, "rank"))){
                       if (rankL=='0'){
                           rankL='1';
                           printf ("{{Link to affiliation|l1=%s", rank);
                           }
                       else printf("|l%c=%s", rankL, rank);
                       rankL++;
                    }
                 affiliationNode=SxmlGetNextSiblingByTagName(affiliationNode, "affiliation");}
              if (rankL>'0')printf ("}}");
          }
      }
    printf("\n");

    printf(";Affiliations:\n");
    StrDictIteratorReset(affiliationsDictById);
    while((rankStr=StrDictNext(affiliationsDictById)))
       {
         nlmAffiliation=SxmlLeafText(
           SxmlGetFirstChildByTagName((SxmlNode *) StrDictValue(affiliationsDictById) ,
                "nlm:affiliation"));
         printf( "* {{Affiliation anchor|l=%s}} %s\n", rankStr, nlmAffiliation);
       }

    printf(";In:");
    printf("[[Is in journal::%s (journal)|%s]], ", JournalTitle, JournalTitle);
    printf("([[Publishing date::%s]])", PubDateYear);
    printf("\n");

    printf(";On line:\n");

    printf("}}\n");
}

void computeAbstract()
{
    SxmlNode *absText;
    printf("==Abstract==\n");
    SxmlReset(PubMedAbstract);
    while ((absText=SxmlNextNode(PubMedAbstract)))
      {
           char *label;
           if ((label=SxmlGetAttribute(absText, "Label")))
                   printf(";%s:\n", label);
           printf("%s\n", SxmlLeafText(absText));
           if (SxmlNextSibling(absText)) printf("\n");
      }
}

void proceedEnglishPage()
{
    SxmlNode *categoryIterator;
    SxmlNode *termNode;

    printf("<!--                                     %s  \n   Page for wiki (en) \n-->", WicriPageName );
    printf("{{Page title for an article\n |title=[[Has title::%s]]}}<!--\n-->", PubMedArticleTitle);
    computeBiblioBox();
    computeAbstract();

    printf("==See also==\n");
    printf(";External links:\n");
    printf("* [https://pubmed.ncbi.nlm.nih.gov/%s/ Link toward PubMed]\n", PMID);

    categoryIterator=SxmlGetFirstChildTagAtt(TeiTextClass, "keywords", "scheme", "KwdEn");
    SxmlReset (categoryIterator);
    while ((termNode=SxmlNextNode(categoryIterator)))
      {
         char *term;
         term=SxmlLeafText(termNode);
         if (strchr(term,')')) printf ("[[Category:%s]]\n", term);
         else printf ("[[Category:%s (MeSH)]]\n", term);
      }


    printf("__SHOWFACTBOX__\n");
    printf("[[fr:%s]]\n", WicriPageName );
}  


int main(int argc, char **argv)
{
  SxmlNode *docInput;
  int cOption;

  traceLevel=0;
  wiki="Santé";
  area=NULL;
  pathTeiAnalytic=SxPathFirstCompile("teiHeader/fileDesc/sourceDesc/biblStruct/analytic");
  affiliationsDict=NewStrDict();
  affiliationsDictById=NewStrDict();

  while((cOption=getopt(argc,argv,"a:t:w:"))!=EOF)
    {switch (cOption)
      {  
         case 'a':
            area=optarg;
            break;
         case 't':
            traceLevel=atoi(optarg);
            break;
        case 'w':
            wiki=optarg;
            break;
      }
    }


  while ((docInput=SxmlInputNextDocumentElement()))
    {
      if(SxmlNodeHasName(docInput,"record"))
	{
          initRecord(docInput);
          WicriPageName=computePageName();
	  proceedFrenchPage();
          proceedEnglishPage();
	}
    }
}

Mise en œuvre

Cette mise en oeuvre suppose de travailler sur un répertoire de travail qui contient un répertoire nomme « testDilib »

Exemple :

cd monRepertoireDeTest
mkdir testDilib
Importation
WicriGetPage -l wicri-outils.fr -p "Wicri:Dilib source, module Nlm, commande NlmPubMed2Wicri"   \
      | MediaWikiExtractSources -w | HfdStoreFile
Compilation
gcc testDilib/NlmPubMed2Wicri.c $DILIB_CC  -o testDilib/NlmPubMed2Wicri

Tests

Avec le serveur sur la grippe en Belgique

Contexte : voir wicri-sante.fr:Serveur d'exploration sur la grippe en Belgique

EXPLOR_AREA=$WICRI_ROOT/Sante/explor/GrippeBelgique.storage/GrippeBelgiqueV2

echo $EXPLOR_AREA
ls  $EXPLOR_AREA

HfdIndexSelect -h $EXPLOR_AREA/Data/Main/Exploration/RBID.i  \
                -Sk "pubmed:22994451" \
         | HfdSelect -Kh $EXPLOR_AREA/Data/Main/Exploration/biblio.hfd \
         | ./testDilib/NlmPubMed2Wicri -a BelgiqueV2


HfdIndexSelect -h $EXPLOR_AREA/Data/Main/Exploration/RBID.i  \
                -Sk "pubmed:19660245" \
         | HfdSelect -Kh $EXPLOR_AREA/Data/Main/Exploration/biblio.hfd \
         | ./testDilib/NlmPubMed2Wicri -a GrippeBelgiqueV2

HfdIndexSelect -h $EXPLOR_AREA/Data/Main/Exploration/RBID.i  \
                -Sk "pubmed:30802260" \
         | HfdSelect -Kh $EXPLOR_AREA/Data/Main/Exploration/biblio.hfd \
         | ./testDilib/NlmPubMed2Wicri -a GrippeBelgiqueV2

Voir aussi

Cette commande est intégrée à la version V0.6.35.

Voir Dilib, module Nlm, commande NlmPubMed2Wicri