Wicri:Dilib source, module Utf8, commande Utf8FromHexEntity
De Wicri Outils
Module Utf8 |
Cette page introduit le fichier source pour générer la commande de conversion Utf8FromHexEntity.
Sommaire |
Liste des modules Dilib
|
Code source
/********************************************************************************
ATTENTION : la version de référence de ce source est sur le wiki Wicri/Outils
WARNING: reference source code is on Wicri/Outils (fr)
http://ticri.univ-lorraine.fr/wicri-outils.fr/index.php/Wicri:Dilib_source,_module_Utf8,_commande_Utf8FromHexEntity
================================================================================
Module : Utf8
Command : Utf8FromHexEntity
File : Utf8FromHexEntity.lex
********************************************************************************/
%{
#include "stdio.h"
#include "string.h"
#include <stdlib.h>
#include "Buffer.h"
int Hexa2bin(char *hexa)
{
int i,l,res,w;
l=strlen(hexa);
i=l;
res=0;
w=1;
while (i--)
{
if (isdigit(hexa[i]))res+=(hexa[i]&0x0F)*w;
else res+=((hexa[i]&0x07)+9)*w;
w=16*w;
}
return res;
}
Buffer *Utf8CharBuffer=NULL;
char *Utf8CharFromHexa(char *hexa)
{
int charNumber;
if(!Utf8CharBuffer)Utf8CharBuffer=BufferCreate(10,10);
BufferReset(Utf8CharBuffer);
charNumber=Hexa2bin(hexa);
if (charNumber<33) /* ASCII control char */
{
BufferStrcpy(Utf8CharBuffer, "&#x");
BufferStrcat(Utf8CharBuffer, hexa);
BufferStrcat(Utf8CharBuffer, ";");
return (BufferString(Utf8CharBuffer));
}
if (charNumber<128) /* ASCII with XML exceptions */
{
switch (charNumber)
{
case 38: return ("&");
case 60: return ("<");
case 62: return (">");
default:
BufferCharCat(Utf8CharBuffer, charNumber);
return (BufferString(Utf8CharBuffer));
}
}
if (charNumber<2048) /* sth like ঙ */
{
int n1, n2;
n2=(charNumber & 0x3F) | 0x80;
n1=((charNumber & 0x7C0)/0x40) | 0xC0;
BufferCharCat(Utf8CharBuffer,n1);
BufferCharCat(Utf8CharBuffer,n2);
return (BufferString(Utf8CharBuffer));
}
if (charNumber < 0x10000) /* sth like 香 */
{
int n1, n2, n3;
n3=(charNumber & 0x3F) | 0x80;
n2=((charNumber & 0xFC0)/0x40) | 0x80;
n1=((charNumber & 0xF000)/0x1000) | 0xE0;
BufferCharCat(Utf8CharBuffer,n1);
BufferCharCat(Utf8CharBuffer,n2);
BufferCharCat(Utf8CharBuffer,n3);
return (BufferString(Utf8CharBuffer));
}
BufferStrcpy(Utf8CharBuffer, "&#x"); /* to be improved */
BufferStrcat(Utf8CharBuffer, hexa);
BufferStrcat(Utf8CharBuffer, ";");
return (BufferString(Utf8CharBuffer));
}
%}
%START UTF8
%%
"&#x" BEGIN UTF8;
<UTF8>[A-Fa-f0-9]+ printf("%s",Utf8CharFromHexa(yytext));
<UTF8>";" BEGIN 0;
%%
main()
{
yylex();
exit(EXIT_SUCCESS);
}