Wicri:Dilib source, module Utf8, commande Utf8FromHexEntity

De Wicri Outils
LogoDilib.gif
Panneau travaux.png
Bibliothèque Dilib (ressources numériques)
Module Utf8

Cette page introduit le fichier source pour générer la commande de conversion Utf8FromHexEntity.

 

Code source

/********************************************************************************
ATTENTION : la version de référence de ce source est sur le wiki Wicri/Outils
WARNING:    reference source code is on Wicri/Outils (fr)  

http://ticri.univ-lorraine.fr/wicri-outils.fr/index.php/Wicri:Dilib_source,_module_Utf8,_commande_Utf8FromHexEntity
================================================================================

     Module   : Utf8
     Command  : Utf8FromHexEntity
     File     : Utf8FromHexEntity.lex

 ********************************************************************************/
%{
#include "stdio.h"
#include "string.h"
#include <stdlib.h> 
#include "Buffer.h"

int Hexa2bin(char *hexa)
{
  int i,l,res,w;
  l=strlen(hexa);
  i=l;
  res=0;
  w=1;
  while (i--)
    {
      if (isdigit(hexa[i]))res+=(hexa[i]&0x0F)*w;
      else res+=((hexa[i]&0x07)+9)*w;
      w=16*w;
    }
  return res;
}

Buffer *Utf8CharBuffer=NULL;

char *Utf8CharFromHexa(char *hexa)
{
  int charNumber;
  if(!Utf8CharBuffer)Utf8CharBuffer=BufferCreate(10,10);
  BufferReset(Utf8CharBuffer);
  charNumber=Hexa2bin(hexa);
  if (charNumber<33)                           /*  ASCII control  char */
    {
      BufferStrcpy(Utf8CharBuffer, "&#x");
      BufferStrcat(Utf8CharBuffer, hexa);
      BufferStrcat(Utf8CharBuffer, ";");
      return (BufferString(Utf8CharBuffer));
    }
  if (charNumber<128)                            /* ASCII with XML exceptions */
    {
      switch (charNumber)
	{
	case 38: return ("&amp");
	case 60: return ("&lt;");
	case 62: return ("&gt;");
	default: 
	  BufferCharCat(Utf8CharBuffer, charNumber);
	  return (BufferString(Utf8CharBuffer));
	}
    }
  if  (charNumber<2048)                          /* sth like &#x999;  */
    {
      int n1, n2;
      n2=(charNumber & 0x3F) | 0x80;
      n1=((charNumber & 0x7C0)/0x40) | 0xC0;
      BufferCharCat(Utf8CharBuffer,n1);
      BufferCharCat(Utf8CharBuffer,n2);
      return (BufferString(Utf8CharBuffer));
    }
  if (charNumber < 0x10000)                       /* sth like &#x9999;  */
    {
      int n1, n2, n3;
      n3=(charNumber & 0x3F) | 0x80;
      n2=((charNumber & 0xFC0)/0x40) | 0x80;
      n1=((charNumber & 0xF000)/0x1000) | 0xE0;
      BufferCharCat(Utf8CharBuffer,n1);
      BufferCharCat(Utf8CharBuffer,n2);
      BufferCharCat(Utf8CharBuffer,n3);
      return (BufferString(Utf8CharBuffer));
    }
  BufferStrcpy(Utf8CharBuffer, "&#x");             /* to be improved */
  BufferStrcat(Utf8CharBuffer, hexa);
  BufferStrcat(Utf8CharBuffer, ";");
  return (BufferString(Utf8CharBuffer));
}
%}

%START UTF8

%%
"&#x"                 BEGIN UTF8;
<UTF8>[A-Fa-f0-9]+    printf("%s",Utf8CharFromHexa(yytext));
<UTF8>";"             BEGIN 0;
%%

main()
{
  yylex();
  exit(EXIT_SUCCESS);
}

Voir aussi