Wicri:Dilib source, module Nlm, commande NlmPubMed2Wicri
De Wicri Outils
Cette commande accepte un flots de notices issues de PubMed dans un serveur d'exploration.
Elle génère des couples de pages (français, anglais) pour donner des versions initiales.
Sommaire
Code source
/* -*- coding: utf-8 -*- */
/*
NlmPubMed2Wicri.c
*/
#include <stdio.h> /* for printf */
#include <stdlib.h> /* for exit */
#include <getopt.h>
#include "SxmlNode.h"
#include "SxPath.h"
#include "StrDict.h"
int traceLevel; /* static variables for this command */
char *wiki;
char *area;
StrDict *affiliationsDict;
StrDict *affiliationsDictById;
char strInt[10];
SxPathResult *pathTeiAnalytic; /* static variables for TEI part */
SxmlNode *TeiNode;
SxmlNode *TeiFileDesc;
SxmlNode *TeiProfileDesc;
SxmlNode *TeiPublicationStmt;
SxmlNode *TeiAnalytic;
SxmlNode *TeiTextClass;
char *RBID;
SxmlNode *PubMedRoot; /* static variables for PubMed part */
SxmlNode *MedlineCitation;
SxmlNode *PubMedArticle;
SxmlNode *PubMedJournal;
SxmlNode *PubMedJournalIssue;
SxmlNode *PubMedDateCompleted;
SxmlNode *PubMedAuthorList;
SxmlNode *PubMedAbstract;
SxmlNode *PubDate;
char *PMID;
char *JournalAbbrevTitle;
char *JournalTitle;
char *PubMedArticleTitle;
char *PubDateYear;
char *WicriPageName; /* static variables for Wicri */
void buildAffiliationsDict()
{
SxmlNode *analyticNode;
int rank;
SxmlReset(TeiAnalytic);
rank=1;
while((analyticNode=SxmlNextNode(TeiAnalytic)))
{
SxmlNode *affiliationNode;
if (SxmlNodeHasName(analyticNode, "author"))
{
affiliationNode=SxmlGetFirstChildByTagName(analyticNode, "affiliation");
while (affiliationNode)
{
char * key;
SxmlNode *previousAff;
previousAff=NULL;
key = SxmlLeafText(
SxmlGetFirstChildByTagName(affiliationNode,"nlm:affiliation"));
if (rank>1)
{
previousAff=(SxmlNode *)StrDictSearch(affiliationsDict, key);
}
if (previousAff)
{
SxmlSetAttribute(affiliationNode, "rank",
SxmlGetAttribute(previousAff, "rank"));
}
else
{
SxmlSetIntAttribute(affiliationNode, "rank", rank);
StrDictAddNewDatum(affiliationsDict, key, (char *) affiliationNode);
sprintf(strInt, "%d", rank);
StrDictAddNewDatum(affiliationsDictById, strdup(strInt) , (char *) affiliationNode);
rank++;
}
affiliationNode=SxmlGetNextSiblingByTagName(affiliationNode, "affiliation");
}
}
}
}
void initRecord(SxmlNode *recInput)
{
if (traceLevel>0) printf("------ initRecord -----\n");
TeiNode=SxmlGetFirstChildByTagName(recInput, "TEI");
TeiFileDesc=SxmlGetFirstChildByTagName(SxmlGetFirstChildByTagName(TeiNode, "teiHeader"),"fileDesc");
TeiProfileDesc=SxmlGetFirstChildByTagName(SxmlGetFirstChildByTagName(TeiNode, "teiHeader"),"profileDesc");
TeiTextClass=SxmlGetFirstChildByTagName(TeiProfileDesc, "textClass");
TeiPublicationStmt=SxmlGetFirstChildByTagName(TeiFileDesc, "publicationStmt");
RBID=SxmlLeafText(SxmlGetFirstChildTagAtt(TeiPublicationStmt,"idno", "type", "RBID"));
TeiAnalytic=SxPathFirstResultNode(pathTeiAnalytic, TeiNode);
PubMedRoot=SxmlGetFirstChildByTagName(recInput, "pubmed");
MedlineCitation=SxmlGetFirstChildByTagName(PubMedRoot, "MedlineCitation");
PubMedArticle=SxmlGetFirstChildByTagName(MedlineCitation, "Article");
PubMedJournal=SxmlGetFirstChildByTagName(PubMedArticle, "Journal");
PubMedAbstract=SxmlGetFirstChildByTagName(PubMedArticle, "Abstract");
PubMedJournalIssue=SxmlGetFirstChildByTagName(PubMedJournal, "JournalIssue");
PubMedAuthorList=SxmlGetFirstDescendantByTagName(PubMedArticle, "AuthorList");
PubDate=SxmlGetFirstChildByTagName(PubMedJournalIssue, "PubDate");
PMID=SxmlLeafText(SxmlGetFirstChildByTagName(MedlineCitation, "PMID"));
JournalAbbrevTitle=SxmlLeafText(SxmlGetFirstChildByTagName(PubMedJournal, "ISOAbbreviation"));
JournalTitle=SxmlLeafText(SxmlGetFirstChildByTagName(PubMedJournal, "Title"));
PubMedArticleTitle=SxmlLeafText(SxmlGetFirstChildByTagName(PubMedArticle, "ArticleTitle"));
PubDateYear=SxmlLeafText(SxmlGetFirstChildByTagName(PubDate, "Year"));
buildAffiliationsDict();
if (traceLevel>0) printf("------ initRecord done -----\n");
}
char *computePageName()
{
static Buffer* bufName=NULL;
if (!bufName) bufName=NewBuffer();
BufferStrcpy(bufName, JournalAbbrevTitle);
BufferStrcat(bufName, " (");
BufferStrcat(bufName, PubDateYear);
BufferStrcat(bufName, ") ");
BufferStrcat(bufName,
SxmlLeafText(SxmlGetFirstChildByTagName
(SxmlGetFirstChildByTagName(PubMedAuthorList, "Author"), "LastName")));
return BufferString(bufName);
}
void computeBoiteBiblioFrance()
{
SxmlNode *analyticNode;
int firstAuthor;
char *nlmAffiliation;
char *rankStr;
printf("{{Boîte bibliographique droite|\n");
printf(";Auteurs:");
SxmlReset(TeiAnalytic);
firstAuthor=1;
while((analyticNode=SxmlNextNode(TeiAnalytic))) {
SxmlNode *affiliationNode;
int rankL; /* max 9 affiliations by author */
rankL='0';
if (SxmlNodeHasName(analyticNode, "author")){
if (firstAuthor==1) {firstAuthor=0; printf("[[A pour premier auteur::");}
else {printf(", [[A pour auteur::");}
printf("%s]]", SxmlLeafText(SxmlGetFirstChildByTagName(analyticNode, "name")));
affiliationNode=SxmlGetFirstChildByTagName(analyticNode, "affiliation");
while (affiliationNode){
char *rank;
if ((rank=SxmlGetAttribute(affiliationNode, "rank"))){
if (rankL=='0'){
rankL='1';
printf ("{{Lien affiliation|l1=%s", rank);
}
else printf("|l%c=%s", rankL, rank);
rankL++;
}
affiliationNode=SxmlGetNextSiblingByTagName(affiliationNode, "affiliation");}
if (rankL>'0')printf ("}}");
}
}
printf("\n");
printf(";Affiliations:\n");
StrDictIteratorReset(affiliationsDict);
while((rankStr=StrDictNext(affiliationsDictById)))
{
nlmAffiliation=SxmlLeafText(
SxmlGetFirstChildByTagName((SxmlNode *) StrDictValue(affiliationsDictById) ,
"nlm:affiliation"));
printf( "* {{Ancre affiliation|l=%s}} %s\n", rankStr, nlmAffiliation);
}
printf(";In:");
printf("[[Est dans la revue::%s (revue)|%s]], ", JournalTitle, JournalTitle);
printf("([[A pour date d'édition::%s]])", PubDateYear);
printf("\n");
printf(";En ligne:\n");
printf("}}\n");
}
void computeExplorLink()
{
if(!area) return;
printf (";Sur les serveurs d'explorations:\n");
printf ("{{Explor lien\n |wiki=%s\n |area=%s\n", wiki, area);
printf (" |flux= Main\n |étape= Exploration\n |type= RBID\n");
printf (" |clé = %s\n", RBID);
printf (" |texte=%s\n}}\n", PubMedArticleTitle);
}
void proceedFrenchPage()
{
SxmlNode *categoryIterator;
SxmlNode *termNode;
printf("<!-- %s \n Page pour wiki (fr) \n-->", WicriPageName );
printf("{{Titre page article|titre={{Rouge|[[A pour titre::Mettre ici une traduction du titre]]}}\n");
printf(" |sous-titre=%s}}<!--\n-->", PubMedArticleTitle);
computeBoiteBiblioFrance();
printf("{{Rouge|mettre ici une introduction}}\n");
printf("__TOC__\n");
printf("{{Clr}}\n");
printf("==Résumé==\n");
printf("{{Rouge|mettre ici une traduction du résumé}}\n");
printf("==Voir aussi==\n");
computeExplorLink();
printf(";Liens externes:\n");
printf("* [https://pubmed.ncbi.nlm.nih.gov/%s/ Lien vers Entrez PubMed]\n", PMID);
categoryIterator=SxmlGetFirstChildTagAtt(TeiTextClass, "keywords", "scheme", "KwdFr");
SxmlReset (categoryIterator);
while ((termNode=SxmlNextNode(categoryIterator)))
{
char *term;
term=SxmlLeafText(termNode);
if (strchr(term,')')) printf ("[[Catégorie:%s]]\n", term);
else printf ("[[Catégorie:%s (MeSH)]]\n", term);
}
printf("__SHOWFACTBOX__\n");
printf("[[en:%s]]\n", WicriPageName );
}
void computeBiblioBox()
{
SxmlNode *analyticNode;
int firstAuthor;
char *nlmAffiliation;
char *rankStr;
printf("{{Bibliobox right|\n");
printf(";Authors:");
SxmlReset(TeiAnalytic);
firstAuthor=1;
while((analyticNode=SxmlNextNode(TeiAnalytic)))
{
if (SxmlNodeHasName(analyticNode, "author"))
{
SxmlNode *affiliationNode;
int rankL; /* max 9 affiliations by author */
rankL='0';
if (firstAuthor==1) {firstAuthor=0; printf("[[Has first author::");}
else {printf(", [[Has author::");}
printf("%s]]", SxmlLeafText(SxmlGetFirstChildByTagName(analyticNode, "name")));
affiliationNode=SxmlGetFirstChildByTagName(analyticNode, "affiliation");
while (affiliationNode){
char *rank;
if ((rank=SxmlGetAttribute(affiliationNode, "rank"))){
if (rankL=='0'){
rankL='1';
printf ("{{Link to affiliation|l1=%s", rank);
}
else printf("|l%c=%s", rankL, rank);
rankL++;
}
affiliationNode=SxmlGetNextSiblingByTagName(affiliationNode, "affiliation");}
if (rankL>'0')printf ("}}");
}
}
printf("\n");
printf(";Affiliations:\n");
StrDictIteratorReset(affiliationsDictById);
while((rankStr=StrDictNext(affiliationsDictById)))
{
nlmAffiliation=SxmlLeafText(
SxmlGetFirstChildByTagName((SxmlNode *) StrDictValue(affiliationsDictById) ,
"nlm:affiliation"));
printf( "* {{Affiliation anchor|l=%s}} %s\n", rankStr, nlmAffiliation);
}
printf(";In:");
printf("[[Is in journal::%s (journal)|%s]], ", JournalTitle, JournalTitle);
printf("([[Publishing date::%s]])", PubDateYear);
printf("\n");
printf(";On line:\n");
printf("}}\n");
}
void computeAbstract()
{
SxmlNode *absText;
printf("==Abstract==\n");
SxmlReset(PubMedAbstract);
while ((absText=SxmlNextNode(PubMedAbstract)))
{
char *label;
if ((label=SxmlGetAttribute(absText, "Label")))
printf(";%s:\n", label);
printf("%s\n", SxmlLeafText(absText));
if (SxmlNextSibling(absText)) printf("\n");
}
}
void proceedEnglishPage()
{
SxmlNode *categoryIterator;
SxmlNode *termNode;
printf("<!-- %s \n Page for wiki (en) \n-->", WicriPageName );
printf("{{Page title for an article\n |title=[[Has title::%s]]}}<!--\n-->", PubMedArticleTitle);
computeBiblioBox();
computeAbstract();
printf("==See also==\n");
printf(";External links:\n");
printf("* [https://pubmed.ncbi.nlm.nih.gov/%s/ Link toward PubMed]\n", PMID);
categoryIterator=SxmlGetFirstChildTagAtt(TeiTextClass, "keywords", "scheme", "KwdEn");
SxmlReset (categoryIterator);
while ((termNode=SxmlNextNode(categoryIterator)))
{
char *term;
term=SxmlLeafText(termNode);
if (strchr(term,')')) printf ("[[Category:%s]]\n", term);
else printf ("[[Category:%s (MeSH)]]\n", term);
}
printf("__SHOWFACTBOX__\n");
printf("[[fr:%s]]\n", WicriPageName );
}
int main(int argc, char **argv)
{
SxmlNode *docInput;
int cOption;
traceLevel=0;
wiki="Santé";
area=NULL;
pathTeiAnalytic=SxPathFirstCompile("teiHeader/fileDesc/sourceDesc/biblStruct/analytic");
affiliationsDict=NewStrDict();
affiliationsDictById=NewStrDict();
while((cOption=getopt(argc,argv,"a:t:w:"))!=EOF)
{switch (cOption)
{
case 'a':
area=optarg;
break;
case 't':
traceLevel=atoi(optarg);
break;
case 'w':
wiki=optarg;
break;
}
}
while ((docInput=SxmlInputNextDocumentElement()))
{
if(SxmlNodeHasName(docInput,"record"))
{
initRecord(docInput);
WicriPageName=computePageName();
proceedFrenchPage();
proceedEnglishPage();
}
}
}
Mise en œuvre
Cette mise en oeuvre suppose de travailler sur un répertoire de travail qui contient un répertoire nomme « testDilib »
Exemple :
cd monRepertoireDeTest
mkdir testDilib
- Importation
WicriGetPage -l wicri-outils.fr -p "Wicri:Dilib source, module Nlm, commande NlmPubMed2Wicri" \
| MediaWikiExtractSources -w | HfdStoreFile
- Compilation
gcc testDilib/NlmPubMed2Wicri.c $DILIB_CC -o testDilib/NlmPubMed2Wicri
Tests
Avec le serveur sur la grippe en Belgique
Contexte : voir wicri-sante.fr:Serveur d'exploration sur la grippe en Belgique
EXPLOR_AREA=$WICRI_ROOT/Sante/explor/GrippeBelgique.storage/GrippeBelgiqueV2
echo $EXPLOR_AREA
ls $EXPLOR_AREA
HfdIndexSelect -h $EXPLOR_AREA/Data/Main/Exploration/RBID.i \
-Sk "pubmed:22994451" \
| HfdSelect -Kh $EXPLOR_AREA/Data/Main/Exploration/biblio.hfd \
| ./testDilib/NlmPubMed2Wicri -a BelgiqueV2
HfdIndexSelect -h $EXPLOR_AREA/Data/Main/Exploration/RBID.i \
-Sk "pubmed:19660245" \
| HfdSelect -Kh $EXPLOR_AREA/Data/Main/Exploration/biblio.hfd \
| ./testDilib/NlmPubMed2Wicri -a GrippeBelgiqueV2
HfdIndexSelect -h $EXPLOR_AREA/Data/Main/Exploration/RBID.i \
-Sk "pubmed:30802260" \
| HfdSelect -Kh $EXPLOR_AREA/Data/Main/Exploration/biblio.hfd \
| ./testDilib/NlmPubMed2Wicri -a GrippeBelgiqueV2
Voir aussi
Cette commande est intégrée à la version V0.6.35.