Wicri:Dilib source, module Wicri, commande WicriExpandApiFromList

De Wicri Outils

Code source

/*   -*- coding: utf-8 -*-  */

/*
           WicriExpandApiFromList.c

 */

#include <stdio.h>     /* for printf */
#include <stdlib.h>    /* for exit */
#include <getopt.h>
#include "SxmlNode.h"

/*   CURL  */

#include <curl/curl.h>

CURL *curl_handle;

struct MemoryStruct {
  char *memory;
  size_t size;
};

int WicriCurlFlagMemory;

static size_t WriteMemoryCallback(void *contents, size_t size, size_t nmemb, void *userp)
{
  size_t realsize = size * nmemb;
  struct MemoryStruct *mem = (struct MemoryStruct *)userp;
 
  mem->memory = realloc(mem->memory, mem->size + realsize + 1);
  if(mem->memory == NULL) {
    /* out of memory! */ 
    printf("not enough memory (realloc returned NULL)\n");
    return 0;
  }
 
  memcpy(&(mem->memory[mem->size]), contents, realsize);
  mem->size += realsize;
  mem->memory[mem->size] = 0;
 
  return realsize;
}
 
CURLcode resCurl;
struct MemoryStruct WicriChunk;

void CurlInit()
{

  WicriCurlFlagMemory=0;
  curl_global_init(CURL_GLOBAL_ALL);
  curl_handle = curl_easy_init();
  curl_easy_setopt(curl_handle, CURLOPT_WRITEFUNCTION, WriteMemoryCallback);
  curl_easy_setopt(curl_handle, CURLOPT_WRITEDATA, (void *)&WicriChunk);

}

char *WicriApiGetDocumentByUrl(char *url)
{
  if (WicriCurlFlagMemory==1) free(WicriChunk.memory);
  else WicriCurlFlagMemory=1;

  WicriChunk.memory = malloc(1);  /* will be grown as needed by the realloc above */ 
  WicriChunk.size = 0;    /* no data at this point */ 

  curl_easy_setopt(curl_handle, CURLOPT_URL, url);

  resCurl = curl_easy_perform(curl_handle);

  if(resCurl != CURLE_OK)
    {
      fprintf(stderr, "curl_easy_perform() failed: %s\n",
	      curl_easy_strerror(resCurl));
      exit(EXIT_FAILURE);
    }
  return WicriChunk.memory;
}



/*  fin CURL  */


char *apiAction;
char *domainUrlWicriRoot;

int proceedWiki(SxmlNode *wiki)
{
    char *name;
    char *url;
    char *result;
    static Buffer *bufUrl=NULL;

    if (!bufUrl) bufUrl=NewBuffer();

    BufferStrcpy (bufUrl,  domainUrlWicriRoot);
    BufferStrcat (bufUrl, "/");
    BufferStrcat (bufUrl,  SxmlLeafText(SxmlGetFirstChildByTagName(wiki, "url")));
    BufferStrcat (bufUrl, "/api.php?action=query&format=xml&");
    BufferStrcat (bufUrl, apiAction);

    name=SxmlLeafText(SxmlGetFirstChildByTagName(wiki, "name"));

    result=WicriApiGetDocumentByUrl(BufferString(bufUrl));

    printf ("%s\t%s\n", name, strstr (result, "<api"));
    return 0;
}

int main(int argc, char **argv)
{
  int cOption;
  SxmlNode *docInput;

  domainUrlWicriRoot="https://wicri-demo.istex.fr/Wicri";

  while((cOption=getopt(argc,argv,"a:d:"))!=EOF)
    {switch (cOption)
      {  
      case 'a':
	apiAction=optarg;
	break;
      case 'd':
	domainUrlWicriRoot=optarg;
	break;
      }
    }
    
  CurlInit();

  while ((docInput=SxmlInputGetDocumentElement()))
    {
      if(SxmlNodeHasName(docInput,"wiki"))
	{
	  proceedWiki(docInput);
	}
    }
}

Mise en œuvre

Importation
WicriGetPage -l wicri-outils.fr -p "Wicri:Dilib source, module Wicri, commande WicriExpandApiFromList"   \
      | MediaWikiExtractSources -w | HfdStoreFile
Compilation
gcc testDilib/WicriExpandApiFromList.c $DILIB_CC  -l curl -o testDilib/WicriExpandApiFromList

Tests

Les tests utilisent la table générée à partir de la page Site lorExplor sur Wicri/Base 1.30

Module statistique de l'API

cat testDilib/wiki.dict   \
 | ./testDilib/WicriExpandApiFromList -a "meta=siteinfo&siprop=statistics"

Présence de pages

cat testDilib/wiki.dict    \
  | ./testDilib/WicriExpandApiFromList -a "list=allpages&apfrom=Luxembourg" \
  | SxmlIndent -k    \
  | grep Luxembourg

Changements récents

cat testDilib/wiki.dict    \
  | ./testDilib/WicriExpandApiFromList  \
    -a  "list=recentchanges&rcprop=title|ids|sizes|flags|user|timestamp&rclimit=3"

Voir aussi