src/ccv.c

/* [<][>]
[^][v][top][bottom][index][help] */

FUNCTIONS

This source file includes following functions.
  1. NAMEPP
  2. vf_ccv_init
  3. ccv_no_conv
  4. ccv_jis2kuten
  5. ccv_jis2euc
  6. ccv_jis2sjis
  7. ccv_kuten2jis
  8. ccv_euc2jis
  9. ccv_euc2sjis
  10. ccv_sjis2jis
  11. ccv_rc2wansung
  12. ccv_wansung2rc
  13. ccv_jis2seq2_0
  14. ccv_jis2seq2_1
  15. vf_ccv_require
  16. cmp_alias
  17. ccv_add_conv_info
  18. make_canonical_charset_name
  19. cmp_charset_name
  20. cmp_enc_name
  21. vf_ccv_autoload
  22. vf_ccv_install_func
  23. ccv_load
  24. ccv_read_header
  25. ccv_read_file
  26. ccv_read_aliases
  27. ccv_read_file_alloc_blocks
  28. ccv_read_file_block_array
  29. ccv_read_file_block_random_array
  30. ccv_file_read_list
  31. ccv_file_read_elem
  32. vf_ccv_conv
  33. ccv_conv_array
  34. ccv_conv_random_array
  35. main

   1 /* ccv.c  --- charset conversion
   2  * by Hirotsugu Kakugawa
   3  *
   4  *  28 Jul 1997  
   5  *  20 Jan 1999  Added to check /usr/local/share/site/
   6  *  16 Feb 1999  Added encoding conversion funcs for Row-Cell and WangSung.
   7  *  14 Sep 1999  Added alias for charset & encoding names (but not tested yet)
   8  */
   9 /*
  10  * Copyright (C) 1997-1999  Hirotsugu Kakugawa. 
  11  * All rights reserved.
  12  *
  13  * This file is part of the VFlib Library.  This library is free
  14  * software; you can redistribute it and/or modify it under the terms of
  15  * the GNU Library General Public License as published by the Free
  16  * Software Foundation; either version 2 of the License, or (at your
  17  * option) any later version.  This library is distributed in the hope
  18  * that it will be useful, but WITHOUT ANY WARRANTY; without even the
  19  * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
  20  * PURPOSE.  See the GNU Library General Public License for more details.
  21  * You should have received a copy of the GNU Library General Public
  22  * License along with this library; if not, write to the Free Software
  23  * Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  24  */
  25 
  26 #include "config.h"
  27 #include <stdio.h>
  28 #include <stdlib.h>
  29 #include <ctype.h>
  30 #ifdef HAVE_UNISTD_H
  31 #  include <unistd.h>
  32 #endif
  33 #include <sys/types.h>
  34 #include <sys/param.h>
  35 
  36 #include "VFlib-3_6.h"
  37 #include "VFsys.h"
  38 #include "consts.h"
  39 #include "str.h"
  40 #include "path.h"
  41 #include "vflpaths.h"
  42 #include "ccv.h"
  43 
  44 #define NAMEPP(s)  ((s!=NULL)?s:"*")
     /* [<][>][^][v][top][bottom][index][help] */
  45 
  46 #define CS_NAME_XCHARS     ".-_ "
  47 #define CS_NAME_XCHARS_TO  '_'
  48 
  49 #define CCV_TBL_INITIAL_SIZE   32
  50 static int                 ccv_tbl_index = 0;
  51 static int                 ccv_tbl_size = 0;
  52 static struct s_ccv_info  *ccv_tbl = NULL;
  53 
  54 Private int  ccv_add_conv_info(struct s_ccv_info*,int,char*);
  55 Private int  ccv_load(struct s_ccv_info *ccvi);
  56 Private int  ccv_read_header(struct s_ccv_info *ccvi);
  57 Private void ccv_read_aliases(FILE *fp, char ***oalias_tbl);
  58 Private int  ccv_read_file(struct s_ccv_info *ccvi,int);
  59 Private int  ccv_read_file_alloc_blocks(struct s_ccv_info *);
  60 Private int  ccv_read_file_block_array(FILE*, struct s_ccv_info*, int);
  61 Private int  ccv_read_file_block_random_array(FILE*, struct s_ccv_info*, int);
  62 Private int  ccv_file_read_list(FILE *fp, char *buff, int nbuff);
  63 Private int  ccv_file_read_elem(FILE *fp, char *buff, int nbuff);
  64 
  65 Private char *make_canonical_charset_name(char*);
  66 Private int   cmp_alias(char **alias_tbl, char *name, int how_compare);
  67 Private int   cmp_charset_name(char*,char*);
  68 Private int   cmp_enc_name(char*,char*);
  69 
  70 Private long  ccv_conv_array(int,long);
  71 Private long  ccv_conv_random_array(int,long);
  72 Private long  ccv_no_conv(int ccvi_index, long code_point);
  73 Private long  ccv_jis2kuten(int ccvi_index, long code_point);
  74 Private long  ccv_jis2euc(int ccvi_index, long code_point);
  75 Private long  ccv_jis2sjis(int ccvi_index, long code_point);
  76 Private long  ccv_kuten2jis(int ccvi_index, long code_point);
  77 Private long  ccv_euc2jis(int ccvi_index, long code_point);
  78 Private long  ccv_euc2sjis(int ccvi_index, long code_point);
  79 Private long  ccv_sjis2jis(int ccvi_index, long code_point);
  80 Private long  ccv_wansung2rc(int ccvi_index, long code_point);
  81 Private long  ccv_rc2wansung(int ccvi_index, long code_point);
  82 Private long  ccv_jis2seq2_0(int ccvi_index, long code_point);
  83 Private long  ccv_jis2seq2_1(int ccvi_index, long code_point);
  84 
  85 
  86 Glocal int
  87 vf_ccv_init(void)
     /* [<][>][^][v][top][bottom][index][help] */
  88 {
  89   int   new_size, i;
  90   static struct s_ccv_info   *ccv_tbl_new;
  91 
  92   if (ccv_tbl == NULL){  /* initialization */
  93     ccv_tbl_index = 0;
  94     ccv_tbl_size = CCV_TBL_INITIAL_SIZE;
  95     ALLOCN_IF_ERR(ccv_tbl, struct s_ccv_info, ccv_tbl_size){
  96       ccv_tbl_size = 0;
  97       vf_error = VF_ERR_NO_MEMORY;
  98       return -1;
  99     }
 100 
 101     vf_ccv_install_func(NULL, "ISO",       NULL, "ISO",      ccv_no_conv);
 102     vf_ccv_install_func(NULL, "ISO2022",   NULL, "ISO2022",  ccv_no_conv);
 103     vf_ccv_install_func(NULL, "UNICODE",   NULL, "UNICODE",  ccv_no_conv);
 104     vf_ccv_install_func(NULL, "JIS",       NULL, "JIS",      ccv_no_conv);
 105     vf_ccv_install_func(NULL, "SJIS",      NULL, "SJIS",     ccv_no_conv);
 106     
 107     vf_ccv_install_func(NULL, "JIS",       NULL, "SJIS",     ccv_jis2sjis);
 108     vf_ccv_install_func(NULL, "ISO2022",   NULL, "SJIS",     ccv_jis2sjis);
 109     vf_ccv_install_func(NULL, "SJIS",      NULL, "JIS",      ccv_sjis2jis);
 110     vf_ccv_install_func(NULL, "SJIS",      NULL, "ISO2022",  ccv_sjis2jis);
 111     vf_ccv_install_func(NULL, "EUC",       NULL, "JIS",      ccv_euc2jis);
 112     vf_ccv_install_func(NULL, "EUC",       NULL, "ISO2022",  ccv_euc2jis);
 113     vf_ccv_install_func(NULL, "EUC",       NULL, "SJIS",     ccv_euc2sjis);
 114     vf_ccv_install_func(NULL, "KU-TEN",    NULL, "JIS",      ccv_kuten2jis);
 115     vf_ccv_install_func(NULL, "ROW-CELL",  NULL, "JIS",      ccv_kuten2jis);
 116     vf_ccv_install_func(NULL, "KU-TEN",    NULL, "ISO2022",  ccv_kuten2jis);
 117     vf_ccv_install_func(NULL, "ROW-CELL",  NULL, "ISO2022",  ccv_kuten2jis);
 118     vf_ccv_install_func(NULL, "JIS",       NULL, "ROW-CELL", ccv_jis2kuten);
 119     vf_ccv_install_func(NULL, "ISO2022",   NULL, "ROW-CELL", ccv_jis2kuten);
 120     vf_ccv_install_func(NULL, "JIS",       NULL, "KU-TEN",   ccv_jis2kuten);
 121     vf_ccv_install_func(NULL, "ISO2022",   NULL, "KU-TEN",   ccv_jis2kuten);
 122     vf_ccv_install_func(NULL, "JIS",    NULL, "SEQUENTIAL2-0",ccv_jis2seq2_0);
 123     vf_ccv_install_func(NULL, "ISO2022",NULL, "SEQUENTIAL2-0",ccv_jis2seq2_0);
 124     vf_ccv_install_func(NULL, "JIS",    NULL, "SEQUENTIAL2-1",ccv_jis2seq2_1);
 125     vf_ccv_install_func(NULL, "ISO2022",NULL, "SEQUENTIAL2-1",ccv_jis2seq2_1);
 126 
 127     vf_ccv_install_func(NULL, "ISO2022",   NULL, "WANSUNG",  ccv_jis2euc);
 128     vf_ccv_install_func(NULL, "JIS",       NULL, "WANSUNG",  ccv_jis2euc);
 129     vf_ccv_install_func(NULL, "ROW-CELL",  NULL, "WANSUNG",  ccv_rc2wansung);
 130     vf_ccv_install_func(NULL, "KU-TEN",    NULL, "WANSUNG",  ccv_rc2wansung);
 131     vf_ccv_install_func(NULL, "WANSUNG",   NULL, "ROW-CELL", ccv_wansung2rc);
 132     vf_ccv_install_func(NULL, "WANSUNG",   NULL, "KU-TEN",   ccv_wansung2rc);
 133 
 134     return 0;
 135   }
 136 
 137   /* expansion */
 138   new_size = ccv_tbl_size + 8;
 139   ALLOCN_IF_ERR(ccv_tbl_new, struct s_ccv_info, new_size){
 140     return -1;
 141   }
 142   for (i = 0; i < ccv_tbl_index; i++){
 143     memcpy(&ccv_tbl_new[i], &ccv_tbl[i], sizeof(struct s_ccv_info));
 144   }
 145   vf_free(ccv_tbl);
 146   ccv_tbl_size = new_size;
 147   ccv_tbl = ccv_tbl_new;
 148 
 149   return 0;
 150 }
 151 
 152 
 153 Private long
 154 ccv_no_conv(int ccvi_index, long code_point)
     /* [<][>][^][v][top][bottom][index][help] */
 155 {
 156   return code_point;
 157 }
 158 
 159 Private long
 160 ccv_jis2kuten(int ccvi_index, long code_point)
     /* [<][>][^][v][top][bottom][index][help] */
 161 {
 162   unsigned int   c1, c2;
 163 
 164   if (code_point < 256)
 165     return code_point;
 166   c1 = code_point / 0x100;
 167   c2 = code_point % 0x100;
 168   return (c1 - 0x20)*0x100 + (c2 - 0x20);
 169 }
 170 
 171 Private long
 172 ccv_jis2euc(int ccvi_index, long code_point)
     /* [<][>][^][v][top][bottom][index][help] */
 173 {
 174   unsigned int   c1, c2;
 175 
 176   if (code_point < 256)
 177     return code_point;
 178   c1 = code_point / 0x100;
 179   c2 = code_point % 0x100;
 180   return (c1 + 0x80)*0x100 + (c2 + 0x80);
 181 }
 182 
 183 Private long
 184 ccv_jis2sjis(int ccvi_index, long code_point)
     /* [<][>][^][v][top][bottom][index][help] */
 185 {
 186   unsigned int   c1, c2;
 187   int            row_offset, cell_offset;
 188 
 189   if (code_point < 256)
 190     return code_point;
 191   /* Snarfed from 'jis2sjis()' in UJIP by Ken R Lunde. */
 192   c1 = code_point / 0x100;
 193   c2 = code_point % 0x100;
 194   row_offset  = (c1 < 95) ? 112 : 176;
 195   cell_offset = (c1 % 2) ? ((c2 > 95) ? 32 : 31) : 126;
 196   c1 = ((c1 + 1) >> 1) + row_offset;
 197   c2 += cell_offset;
 198   return c1*0x100 + c2;
 199 }
 200 
 201 Private long
 202 ccv_kuten2jis(int ccvi_index, long code_point)
     /* [<][>][^][v][top][bottom][index][help] */
 203 {
 204   unsigned int   c1, c2;
 205 
 206   if (code_point < 256)
 207     return code_point;
 208   c1 = code_point / 0x100;
 209   c2 = code_point % 0x100;
 210   return (c1 + 0x20)*0x100 + (c2 + 0x20);
 211 }
 212 
 213 Private long
 214 ccv_euc2jis(int ccvi_index, long code_point)
     /* [<][>][^][v][top][bottom][index][help] */
 215 {
 216   unsigned int   c1, c2;
 217 
 218   if (code_point < 256)
 219     return code_point;
 220   c1 = code_point / 0x100;
 221   c2 = code_point % 0x100;
 222   return (c1 - 0x80)*0x100 + (c2 - 0x80);
 223 }
 224 
 225 Private long
 226 ccv_euc2sjis(int ccvi_index, long code_point)
     /* [<][>][^][v][top][bottom][index][help] */
 227 {
 228   long  code_point_jis;
 229 
 230   code_point_jis = ccv_euc2jis(ccvi_index, code_point);
 231   return  ccv_jis2sjis(ccvi_index, code_point_jis);
 232 }
 233 
 234 Private long
 235 ccv_sjis2jis(int ccvi_index, long code_point)
     /* [<][>][^][v][top][bottom][index][help] */
 236 {
 237   unsigned int   c1, c2;
 238   int            row_offset, cell_offset, adjust;
 239 
 240   if (code_point < 256)
 241     return code_point;
 242   /* Snarfed from 'sjis2jis()' in UJIP by Ken R Lunde. */
 243   c1 = code_point / 0x100;
 244   c2 = code_point % 0x100;
 245   if (c2 < 159)
 246     adjust = 1;
 247   else
 248     adjust = 0;
 249   row_offset  = (c1 < 160) ? 112 : 176;
 250   cell_offset = (adjust == 1) ? ((c2 > 127) ? 32 : 31) : 126;
 251   c1 = ((c1 - row_offset) << 1) - adjust;
 252   c2 -= cell_offset;
 253   return c1*0x100 + c2;
 254 }
 255 
 256 Private long
 257 ccv_rc2wansung(int ccvi_index, long code_point)
     /* [<][>][^][v][top][bottom][index][help] */
 258 {
 259   unsigned int   c1, c2;
 260 
 261   if (code_point < 256)
 262     return code_point;
 263   c1 = code_point / 0x100;
 264   c2 = code_point % 0x100;
 265   return (c1 + 0xa0)*0x100 + (c2 + 0xa0);
 266 }
 267 
 268 Private long
 269 ccv_wansung2rc(int ccvi_index, long code_point)
     /* [<][>][^][v][top][bottom][index][help] */
 270 {
 271   unsigned int   c1, c2;
 272 
 273   if (code_point < 256)
 274     return code_point;
 275   c1 = code_point / 0x100;
 276   c2 = code_point % 0x100;
 277   return (c1 - 0xa0)*0x100 + (c2 - 0xa0);
 278 }
 279 
 280 
 281 Private long
 282 ccv_jis2seq2_0(int ccvi_index, long code_point)
     /* [<][>][^][v][top][bottom][index][help] */
 283 {
 284   int   b0, b1;
 285 
 286   b0 = (code_point / 0x1)   % 0x100;
 287   b1 = (code_point / 0x100) % 0x100;
 288 
 289   if ((b0 < 0x21) || (b0 > 0x7e))
 290     return -1;
 291   if ((b1 < 0x21) || (b1 > 0x7e))
 292     return -1;
 293 
 294   return  94 * (b1-0x21) + (b0-0x21) + 0;
 295 }
 296 
 297 Private long
 298 ccv_jis2seq2_1(int ccvi_index, long code_point)
     /* [<][>][^][v][top][bottom][index][help] */
 299 {
 300   int   b0, b1;
 301 
 302   b0 = (code_point / 0x1)   % 0x100;
 303   b1 = (code_point / 0x100) % 0x100;
 304 
 305   if ((b0 < 0x21) || (b0 > 0x7e))
 306     return -1;
 307   if ((b1 < 0x21) || (b1 > 0x7e))
 308     return -1;
 309 
 310   return  94 * (b1-0x21) + (b0-0x21) + 1;
 311 }
 312 
 313 
 314 
 315 Glocal int
 316 vf_ccv_require(char *cs1_name, char *cs1_enc,
     /* [<][>][^][v][top][bottom][index][help] */
 317                char *cs2_name, char *cs2_enc)
 318 {
 319   int  i;
 320   
 321   if (vf_dbg_ccv == 1)
 322     printf(">> CCV searching conversion: %s %s => %s %s\n", 
 323            NAMEPP(cs1_name), NAMEPP(cs1_enc),
 324            NAMEPP(cs2_name), NAMEPP(cs2_enc));
 325 
 326   if ((cs1_name == NULL) && (cs1_enc == NULL))
 327     return -1;
 328 
 329   for (i = 0; i < ccv_tbl_index; i++){
 330     if (   ((ccv_tbl[i].cs1_name == NULL)
 331             || (cmp_charset_name(ccv_tbl[i].cs1_name, cs1_name) == 0)
 332             || (cmp_alias(ccv_tbl[i].cs1_name_aliases, cs1_name, 0) == 0))
 333         && ((ccv_tbl[i].cs2_name == NULL)
 334             || (cmp_charset_name(ccv_tbl[i].cs2_name, cs2_name) == 0)
 335             || (cmp_alias(ccv_tbl[i].cs2_name_aliases, cs2_name, 0) == 0))
 336         && ((ccv_tbl[i].cs1_enc == NULL) || (cs1_enc == NULL)
 337             || (cmp_enc_name(ccv_tbl[i].cs1_enc, cs1_enc) == 0)
 338             || (cmp_alias(ccv_tbl[i].cs1_enc_aliases, cs1_enc, 1) == 0))
 339         && ((ccv_tbl[i].cs2_enc == NULL) || (cs2_enc == NULL)
 340             || (cmp_enc_name(ccv_tbl[i].cs2_enc, cs2_enc) == 0)
 341             || (cmp_alias(ccv_tbl[i].cs2_enc_aliases, cs2_enc, 1) == 0)) ){
 342       if (vf_dbg_ccv == 1){
 343         printf(">> CCV use conversion  #%d [%s %s => %s %s]\n", i, 
 344                NAMEPP(ccv_tbl[i].cs1_name), NAMEPP(ccv_tbl[i].cs1_enc),
 345                NAMEPP(ccv_tbl[i].cs2_name), NAMEPP(ccv_tbl[i].cs2_enc));
 346       }
 347       if (ccv_tbl[i].load_stat == CCV_STAT_AUTOLOAD){
 348         if (ccv_load(&ccv_tbl[i]) < 0)
 349           return -1;
 350         ccv_tbl[i].load_stat = CCV_STAT_LOADED;
 351       }
 352       return i;
 353     }
 354   }
 355   return -1;
 356 }
 357 
 358 Private int 
 359 cmp_alias(char **alias_tbl, char *name, int how_compare)
     /* [<][>][^][v][top][bottom][index][help] */
 360 {
 361   char  **pp;
 362 
 363   if (alias_tbl == NULL)
 364     return -1;
 365 
 366   if (how_compare == 0){
 367     for (pp = alias_tbl; *pp != NULL; pp++){
 368       if (cmp_charset_name(*pp, name) == 0)
 369         return 0;
 370     }
 371   } else {
 372     for (pp = alias_tbl; *pp != NULL; pp++){
 373       if (cmp_enc_name(*pp, name) == 0)
 374         return 0;
 375     }
 376   }
 377   return -1;
 378 }
 379    
 380 
 381 
 382 Private int
 383 ccv_add_conv_info(struct s_ccv_info *ccvi, 
     /* [<][>][^][v][top][bottom][index][help] */
 384                   int load_stat, char *file_name)
 385 {
 386   char **pp, *q;
 387 
 388   if (ccv_tbl_index >= ccv_tbl_size){
 389     if (vf_dbg_ccv == 1)
 390       printf(">> CCV expand table\n");
 391     if (vf_ccv_init() < 0){
 392       fprintf(stderr, "VFlib: too many code coversion table.\n");
 393       return 0;
 394     }
 395   }
 396 
 397   ccv_tbl[ccv_tbl_index].cs1_name          = ccvi->cs1_name;
 398   ccv_tbl[ccv_tbl_index].cs1_name_aliases  = ccvi->cs1_name_aliases;
 399   ccv_tbl[ccv_tbl_index].cs1_enc           = ccvi->cs1_enc;
 400   ccv_tbl[ccv_tbl_index].cs1_enc_aliases   = ccvi->cs1_enc_aliases;
 401   ccv_tbl[ccv_tbl_index].cs2_name          = ccvi->cs2_name;
 402   ccv_tbl[ccv_tbl_index].cs2_name_aliases  = ccvi->cs2_name_aliases;
 403   ccv_tbl[ccv_tbl_index].cs2_enc           = ccvi->cs2_enc;
 404   ccv_tbl[ccv_tbl_index].cs2_enc_aliases   = ccvi->cs2_enc_aliases;
 405   ccv_tbl[ccv_tbl_index].block_size = ccvi->block_size;
 406   ccv_tbl[ccv_tbl_index].load_stat  = load_stat;
 407   ccv_tbl[ccv_tbl_index].conv       = ccvi->conv;
 408   ccv_tbl[ccv_tbl_index].arg        = ccvi->arg;
 409   ccv_tbl[ccv_tbl_index].arg_type   = ccvi->arg_type;
 410   ccv_tbl[ccv_tbl_index].c1min      = ccvi->c1min;
 411   ccv_tbl[ccv_tbl_index].c1max      = ccvi->c1max;
 412   ccv_tbl[ccv_tbl_index].c2min      = ccvi->c2min;
 413   ccv_tbl[ccv_tbl_index].c2max      = ccvi->c2max;
 414   ccv_tbl[ccv_tbl_index].nblocks    = ccvi->nblocks;
 415   if (file_name != NULL){
 416     ccv_tbl[ccv_tbl_index].file_name  = vf_strdup(file_name);
 417     if (ccv_tbl[ccv_tbl_index].file_name == NULL){
 418       vf_error = VF_ERR_NO_MEMORY;
 419       return -1;
 420     }
 421   } else {
 422     ccv_tbl[ccv_tbl_index].file_name  = NULL;
 423   }
 424 
 425   if (vf_dbg_ccv == 1){
 426     printf(">> CCV installed #%d [%s %s => %s %s]\n", 
 427            ccv_tbl_index, 
 428            NAMEPP(ccv_tbl[ccv_tbl_index].cs1_name), 
 429            NAMEPP(ccv_tbl[ccv_tbl_index].cs1_enc),
 430            NAMEPP(ccv_tbl[ccv_tbl_index].cs2_name), 
 431            NAMEPP(ccv_tbl[ccv_tbl_index].cs2_enc));
 432     if ((pp = ccv_tbl[ccv_tbl_index].cs1_name_aliases) != NULL){
 433       q = NAMEPP(ccv_tbl[ccv_tbl_index].cs1_name);
 434       for ( ; *pp != NULL; pp++)
 435         printf(">>   alias %s: %s\n", *pp, q);
 436     }
 437     if ((pp = ccv_tbl[ccv_tbl_index].cs1_enc_aliases) != NULL){
 438       q = NAMEPP(ccv_tbl[ccv_tbl_index].cs1_enc);
 439       for ( ; *pp != NULL; pp++)
 440         printf(">>   alias %s: %s\n", *pp, q);
 441     }
 442     if ((pp = ccv_tbl[ccv_tbl_index].cs2_name_aliases) != NULL){
 443       q = NAMEPP(ccv_tbl[ccv_tbl_index].cs2_name);
 444       for ( ; *pp != NULL; pp++)
 445         printf(">>   alias %s: %s\n", *pp, q);
 446     }
 447     if ((pp = ccv_tbl[ccv_tbl_index].cs2_enc_aliases) != NULL){
 448       q = NAMEPP(ccv_tbl[ccv_tbl_index].cs2_enc);
 449       for ( ; *pp != NULL; pp++)
 450         printf(">>   alias %s: %s\n", *pp, q);
 451     }
 452   }
 453 
 454   ccv_tbl_index++;
 455   return 0;
 456 }
 457 
 458 Private char*
 459 make_canonical_charset_name(char *cs_name)
     /* [<][>][^][v][top][bottom][index][help] */
 460 {
 461   char *canon, *p;
 462 
 463   if (cs_name == NULL)
 464     return NULL;
 465 
 466   if ((canon = vf_strdup(cs_name)) == NULL)
 467     return NULL;
 468   for (p = canon; *p != '\0'; p++){
 469     if (vf_index(CS_NAME_XCHARS, *p) != NULL)
 470       *p = CS_NAME_XCHARS_TO;
 471     else
 472       *p = toupper(*p);
 473   }
 474   return canon;
 475 }
 476 
 477 Private int
 478 cmp_charset_name(char *canon, char *name)
     /* [<][>][^][v][top][bottom][index][help] */
 479 {
 480   char  *p, *q, cp, cq;
 481 
 482   if (canon == NULL)
 483     return 0;
 484 
 485   p = canon;
 486   q = name;
 487   do {
 488     cp = toupper(*p);
 489     if (vf_index(CS_NAME_XCHARS, *p) != NULL)
 490       cp = CS_NAME_XCHARS_TO;
 491     cq = toupper(*q);
 492     if (vf_index(CS_NAME_XCHARS, *q) != NULL)
 493       cq = CS_NAME_XCHARS_TO;
 494     if (cp != cq)
 495       return -1;
 496     p++;
 497     q++;
 498   } while ((*p != '\0') && (*q != '\0'));
 499   return 0;
 500 }
 501 
 502 Private int
 503 cmp_enc_name(char *enc1, char *enc2)
     /* [<][>][^][v][top][bottom][index][help] */
 504 {
 505   char  *p, *q, cp, cq;
 506 
 507   p = enc1;
 508   q = enc2;
 509   do {
 510     cp = toupper(*p);
 511     if (vf_index(CS_NAME_XCHARS, *p) != NULL)
 512       cp = CS_NAME_XCHARS_TO;
 513     cq = toupper(*q);
 514     if (vf_index(CS_NAME_XCHARS, *q) != NULL)
 515       cq = CS_NAME_XCHARS_TO;
 516     if (cp != cq)
 517       return -1;
 518     p++;
 519     q++;
 520   } while ((*p != '\0') && (*q != '\0'));
 521   return 0;
 522 }
 523 
 524 
 525 
 526 Glocal int
 527 vf_ccv_autoload(char *file_name)
     /* [<][>][^][v][top][bottom][index][help] */
 528 {
 529   struct s_ccv_info ccv_info;
 530 
 531   if (vf_dbg_ccv == 1)
 532     printf(">> CCV autoload: %s\n", file_name);
 533 
 534   ccv_info.file_name = vf_strdup(file_name);
 535 
 536   if (ccv_read_header(&ccv_info) < 0){
 537     if (vf_dbg_ccv == 1)
 538       printf(">> CCV failed autoload\n");
 539     return -1;
 540   }
 541 
 542 #if 0
 543   printf("  %s %s => %s %s  0x%02x 0x%02x 0x%02x 0x%02x\n", 
 544          ccv_info.cs1_name, ccv_info.cs1_enc, 
 545          ccv_info.cs2_name, ccv_info.cs2_enc, 
 546          ccv_info.c1min, ccv_info.c1max, ccv_info.c2min, ccv_info.c2max);
 547 #endif
 548 
 549   ccv_add_conv_info(&ccv_info, CCV_STAT_AUTOLOAD, file_name);
 550 
 551   if (vf_dbg_ccv == 1)
 552     printf(">> CCV autoload done.\n");
 553 
 554   return 0;
 555 }
 556 
 557 
 558 Glocal int
 559 vf_ccv_install_func(char *cs1_name, char *cs1_enc, 
     /* [<][>][^][v][top][bottom][index][help] */
 560                     char *cs2_name, char *cs2_enc,
 561                     long (*conv)(int,long))
 562 {
 563   struct s_ccv_info ccv_info;
 564 
 565   ccv_info.cs1_name          = cs1_name;
 566   ccv_info.cs1_name_aliases  = NULL;
 567   ccv_info.cs1_enc           = cs1_enc;
 568   ccv_info.cs1_enc_aliases   = NULL;
 569   ccv_info.cs2_name          = cs2_name;
 570   ccv_info.cs2_name_aliases  = NULL;
 571   ccv_info.cs2_enc           = cs2_enc; 
 572   ccv_info.cs2_enc_aliases   = NULL; 
 573   ccv_info.arg_type  = CCV_ARG_TYPE_FUNC;
 574   ccv_info.conv      = conv;
 575   ccv_info.arg       = 0;
 576 
 577   ccv_info.block_size = 0;
 578   ccv_info.c1min      = 0;
 579   ccv_info.c1max      = 0;
 580   ccv_info.c2min      = 0;
 581   ccv_info.c2max      = 0;
 582   ccv_info.nblocks    = 0;
 583   ccv_info.file_name  = NULL;
 584 
 585   ccv_add_conv_info(&ccv_info, CCV_STAT_LOADED, NULL);
 586   return 0;
 587 }
 588 
 589 
 590 
 591 Private int
 592 ccv_load(struct s_ccv_info *ccvi)
     /* [<][>][^][v][top][bottom][index][help] */
 593 {
 594   if (vf_dbg_ccv == 1)
 595     printf(">> CCV loading %s\n", ccvi->file_name);
 596 
 597   return ccv_read_file(ccvi, 0);
 598 }
 599 
 600 Private int
 601 ccv_read_header(struct s_ccv_info *ccvi)
     /* [<][>][^][v][top][bottom][index][help] */
 602 {
 603   return ccv_read_file(ccvi, 1);
 604 }
 605 
 606 Private int
 607 ccv_read_file(struct s_ccv_info *ccvi, int header_only)
     /* [<][>][^][v][top][bottom][index][help] */
 608 {
 609   FILE  *fp;
 610   char   key[256], val[256];
 611   int    block, v;
 612 
 613   ccvi->file_path = vf_path_find_runtime_file("ccv", ccvi->file_name,
 614                                               VF_ENV_CCV_DIR);
 615   
 616   if (vf_dbg_ccv == 1){
 617     if (ccvi->file_path != NULL)
 618       printf(">> CCV autoload file %s: %s\n",
 619              ccvi->file_name, ccvi->file_path);
 620     else
 621       printf(">> CCV autoload file %s: not found\n", ccvi->file_name);
 622   }
 623 
 624   if (ccvi->file_path == NULL)
 625     return -1;
 626 
 627   if ((fp = vf_fm_OpenTextFileStream(ccvi->file_path)) == NULL)
 628     return -1;
 629 
 630   if (header_only == 1){
 631     ccvi->cs1_name_aliases = NULL;
 632     ccvi->cs1_enc_aliases  = NULL;
 633     ccvi->cs2_name_aliases = NULL;
 634     ccvi->cs2_enc_aliases  = NULL;
 635   }
 636 
 637   while (ccv_file_read_list(fp, key, sizeof(key)) >= 0){
 638     v = ccv_file_read_elem(fp, val, sizeof(val));
 639     if (v == -1)
 640       break;
 641     else if (v == 0)
 642       continue;
 643     if ((   (vf_strcmp_ci(key, "charset-from-name") == 0) 
 644          || (vf_strcmp_ci(key, "charset-external-name") == 0)) 
 645         && (header_only == 1)){
 646       ccvi->cs1_name = make_canonical_charset_name(val);
 647       ccv_read_aliases(fp, &ccvi->cs1_name_aliases);
 648     } else if ((   (vf_strcmp_ci(key, "charset-from-encoding") == 0)
 649                 || (vf_strcmp_ci(key, "charset-external-encoding") == 0))
 650                && (header_only == 1)){
 651       ccvi->cs1_enc = vf_strdup(val);
 652       ccv_read_aliases(fp, &ccvi->cs1_enc_aliases);
 653     } else if ((   (vf_strcmp_ci(key, "charset-to-name") == 0)
 654                 || (vf_strcmp_ci(key, "charset-internal-name") == 0))
 655                && (header_only == 1)){
 656       ccvi->cs2_name = make_canonical_charset_name(val);
 657       ccv_read_aliases(fp, &ccvi->cs2_name_aliases);
 658     } else if ((   (vf_strcmp_ci(key, "charset-to-encoding") == 0)
 659                 || (vf_strcmp_ci(key, "charset-internal-encoding") == 0))
 660                && (header_only == 1)){
 661       ccvi->cs2_enc = vf_strdup(val);
 662       ccv_read_aliases(fp, &ccvi->cs2_enc_aliases);
 663     } else if ((vf_strcmp_ci(key, "table-type") == 0)
 664                && (header_only == 1)){
 665       ccvi->conv = NULL;
 666       if (vf_strcmp_ci(val, "array") == 0){
 667         ccvi->arg_type = CCV_ARG_TYPE_ARRAY;
 668         ccvi->conv = ccv_conv_array;
 669       } else if (vf_strcmp_ci(val, "random-arrays") == 0){
 670         ccvi->arg_type = CCV_ARG_TYPE_RANDOM_ARRAY;
 671         ccvi->conv = ccv_conv_random_array;
 672       } else {
 673         fprintf(stderr, "VFlib: broken code conversion file: %s\n", 
 674                 ccvi->file_name);
 675         return -1;
 676       }
 677     } else if ((vf_strcmp_ci(key, "nblocks") == 0) && (header_only == 1)){
 678       sscanf(val, "%i", &ccvi->nblocks);
 679     } else if ((vf_strcmp_ci(key, "c1-min") == 0) && (header_only == 1)){
 680       sscanf(val, "%i", &ccvi->c1min);
 681     } else if ((vf_strcmp_ci(key, "c1-max") == 0) && (header_only == 1)){
 682       sscanf(val, "%i", &ccvi->c1max);
 683     } else if ((vf_strcmp_ci(key, "c2-min") == 0) && (header_only == 1)){
 684       sscanf(val, "%i", &ccvi->c2min);
 685     } else if ((vf_strcmp_ci(key, "c2-max") == 0) && (header_only == 1)){
 686       sscanf(val, "%i", &ccvi->c2max);
 687     } else if ((vf_strcmp_ci(key, "block-size") == 0) && (header_only == 1)){
 688       sscanf(val, "%i", &ccvi->block_size);
 689     } else if (vf_strcmp_ci(key, "block") == 0){
 690       if (header_only == 1){
 691         break;
 692       } else {
 693         ccv_read_file_alloc_blocks(ccvi);
 694         sscanf(val, "%i", &block);
 695         if (ccvi->arg_type == CCV_ARG_TYPE_ARRAY){
 696           ccv_read_file_block_array(fp, ccvi, block);
 697         } else {
 698           ccv_read_file_block_random_array(fp, ccvi, block);
 699         }
 700       }
 701     }
 702   }
 703 
 704   return 0;
 705 }
 706 
 707 Private void
 708 ccv_read_aliases(FILE  *fp, char ***alias_tbl)
     /* [<][>][^][v][top][bottom][index][help] */
 709 {
 710   char   val[128];
 711   char  *names[64];
 712   int    nnames, i;
 713 
 714   nnames = 0;
 715   while (ccv_file_read_elem(fp, val, sizeof(val)) > 0){
 716     names[nnames++] = vf_strdup(val);
 717   }
 718 
 719   if (nnames == 0){
 720     *alias_tbl = NULL;
 721     return;
 722   }
 723   if ((*alias_tbl = (char**)malloc(sizeof(char*) * (nnames + 1))) != NULL){
 724     for (i = 0; i < nnames; i++){
 725       (*alias_tbl)[i] = names[i];
 726     }
 727     (*alias_tbl)[nnames] = NULL;
 728   }
 729 }
 730 
 731 Private int
 732 ccv_read_file_alloc_blocks(struct s_ccv_info *ccvi)
     /* [<][>][^][v][top][bottom][index][help] */
 733 {
 734   long              *array;
 735   CCV_RANDOM_ARRAY  rarray;
 736 
 737   switch (ccvi->arg_type){
 738   case CCV_ARG_TYPE_ARRAY:
 739     array = (long*)calloc(ccvi->nblocks * (ccvi->c2max - ccvi->c2min + 1),
 740                           sizeof(long));
 741     if (array == NULL)
 742       return -1;
 743     ccvi->arg = (long)array;
 744     break;
 745   case CCV_ARG_TYPE_RANDOM_ARRAY:
 746   default:
 747     rarray = (CCV_RANDOM_ARRAY)calloc(1, sizeof(struct s_ccv_random_array));
 748     if (rarray == NULL)
 749       return -1;
 750     rarray->block_index
 751       = (int*)calloc(ccvi->nblocks, sizeof(int));
 752     rarray->tbl 
 753       = (long*)calloc(ccvi->nblocks * (ccvi->c2max - ccvi->c2min + 1),
 754                       sizeof(long));
 755     if ((rarray == NULL)
 756         || (rarray->block_index == NULL) || (rarray->tbl == NULL))
 757       return -1;
 758     ccvi->arg = (long)rarray;
 759     break;
 760   }
 761   return 0;
 762 }
 763 
 764 
 765 Private int
 766 ccv_read_file_block_array(FILE *fp, struct s_ccv_info *ccvi, int block)
     /* [<][>][^][v][top][bottom][index][help] */
 767 {
 768   int   code, base, b, c, v;
 769   char  key[256], val[256];
 770   long  *tbl;
 771 
 772   if (vf_dbg_ccv == 1)
 773     printf(">> CCV Reding table (array) nblocks=%d\n", ccvi->nblocks);
 774 
 775   tbl = (long*)ccvi->arg;
 776   for (b = ccvi->c1min; ; ){
 777     if (vf_dbg_ccv == 1)
 778       printf("  Block %d", block);
 779     base = block * (ccvi->c2max - ccvi->c2min + 1);
 780     for (c = ccvi->c2min; c <= ccvi->c2max; c++){
 781       if ((v = ccv_file_read_elem(fp, val, sizeof(val))) <= 0){
 782         fprintf(stderr, "VFlib warning: broken code conversion table: %s\n",
 783                 ccvi->file_name);
 784         return -1;
 785       }
 786       sscanf(val, "%i", &code);
 787       tbl[base + (c - ccvi->c2min)] = code;
 788 #if defined(DEBUG) && 0
 789       printf("\n   0x%04x ==> 0x%04x", b*ccvi->block_size+c, code);
 790 #endif
 791     }
 792 
 793     if ((++b) > ccvi->c1max)
 794       break;
 795 
 796     for (;;){
 797       if (ccv_file_read_list(fp, key, sizeof(key)) < 0){
 798         fprintf(stderr, "VFlib warning: broken code conversion table: %s\n",
 799                 ccvi->file_name);
 800         return -1;
 801       }
 802       if (vf_strcmp_ci(key, "block") == 0)
 803         break;
 804     }
 805     ccv_file_read_elem(fp, val, sizeof(val));
 806     sscanf(val, "%i", &block);
 807   }
 808   if (vf_dbg_ccv == 1)
 809     printf("\n");
 810 
 811   return 0;
 812 }
 813 
 814 Private int
 815 ccv_read_file_block_random_array(FILE *fp, struct s_ccv_info *ccvi, int block)
     /* [<][>][^][v][top][bottom][index][help] */
 816 {
 817   int   code, base, b, c, v;
 818   char  key[256], val[256];
 819   int   *block_index, index;
 820   long  *tbl;
 821   CCV_RANDOM_ARRAY rarray;
 822 
 823   if (vf_dbg_ccv == 1)
 824     printf(">> CCV reding table (random-arrays) nblocks=%d", ccvi->nblocks);
 825 
 826   rarray = (CCV_RANDOM_ARRAY)ccvi->arg;
 827   block_index = rarray->block_index;
 828   tbl = rarray->tbl;
 829   for (b = 0; ; ){
 830     if (vf_dbg_ccv == 1)
 831       printf("  Block %d", block);
 832     rarray->block_index[b] = block;
 833     base = b * (ccvi->c2max - ccvi->c2min + 1);
 834     for (c = ccvi->c2min; c <= ccvi->c2max; c++){
 835       if ((v = ccv_file_read_elem(fp, val, sizeof(val))) <= 0){
 836         fprintf(stderr, "VFlib warning: broken code conversion table: %s\n",
 837                 ccvi->file_name);
 838         return -1;
 839       }
 840       sscanf(val, "%i", &code);
 841       index = base + (c - ccvi->c2min);
 842       tbl[index] = code;
 843 #if defined(DEBUG) && 0
 844       printf("\n   0x%04x ==> 0x%04x  (%d)", 
 845              (block + ccvi->c1min)*ccvi->block_size+c, code, index);
 846 #endif
 847     }
 848 
 849     if ((++b) >= ccvi->nblocks)
 850       break;
 851 
 852     for (;;){
 853       if ((v = ccv_file_read_list(fp, key, sizeof(key))) == 0)
 854         break;
 855       if (v < 0){
 856         fprintf(stderr, "VFlib warning: broken code conversion table: %s\n",
 857                 ccvi->file_name);
 858         return -1;
 859       }
 860       if (vf_strcmp_ci(key, "block") == 0)
 861         break;
 862     }
 863     ccv_file_read_elem(fp, val, sizeof(val));
 864     sscanf(val, "%i", &block);
 865   }
 866 
 867   return 0;
 868 }
 869 
 870 
 871 Private int 
 872 ccv_file_read_list(FILE *fp, char *buff, int nbuff)
     /* [<][>][^][v][top][bottom][index][help] */
 873 {
 874   int  ch, i;
 875 
 876   buff[0] = '\0';
 877 
 878   /* skip until '(' */
 879   while ((ch = getc(fp)) != EOF){
 880     if (ch == '(')
 881       break;
 882     if (ch == ';'){    /* coment begins. skip until the eol */
 883       while ((ch = getc(fp)) != '\n'){  
 884         if (ch == EOF)
 885           return -1;
 886       }
 887     }
 888   }
 889   if (ch == EOF)
 890     return -1;
 891 
 892   /* read the car part of an s-exp */
 893   i = 0;
 894   while ((ch = getc(fp)) != EOF){
 895     if (i >= nbuff-1)
 896       break;
 897     if (isspace((int)ch))
 898       break;
 899     if (ch == ')')
 900       break;
 901     buff[i++] = ch;    
 902     if (ch == ' ')
 903       break;
 904   }
 905   buff[i] = '\0';
 906 #if 0
 907   printf("\n* %s:  ", buff);
 908 #endif
 909   return 1;
 910 }
 911 
 912 Private int 
 913 ccv_file_read_elem(FILE *fp, char *buff, int nbuff)
     /* [<][>][^][v][top][bottom][index][help] */
 914 {
 915   int  ch, i;
 916 
 917   buff[0] = '\0';
 918   while ((ch = getc(fp)) != EOF){
 919     if (!isspace(ch))
 920       break;
 921   }
 922   if (ch == ')')
 923     return 0;
 924   if (ch == EOF)
 925     return -1;
 926   i = 0;
 927   buff[i++] = ch;
 928   while ((ch = getc(fp)) != EOF){
 929     if (i >= nbuff-1)
 930       break;
 931     if (isspace((int)ch))
 932       break;
 933     if (ch == ')'){
 934       ungetc(ch, fp);
 935       break;
 936     }
 937     buff[i++] = ch;
 938   }
 939   buff[i] = '\0';
 940 #if 0
 941   printf("%s  ", buff);
 942 #endif
 943   return 1;
 944 }
 945 
 946 
 947 
 948 Glocal long
 949 vf_ccv_conv(int ccvi_index, long code_point)
     /* [<][>][^][v][top][bottom][index][help] */
 950 {
 951   long cv_code_point;
 952 
 953   if (ccvi_index < 0)
 954     return code_point;
 955 
 956   cv_code_point = (*ccv_tbl[ccvi_index].conv)(ccvi_index, code_point);
 957 
 958   if (vf_dbg_ccv_map == 1)
 959     printf(">> CCV code conversion: 0x%04lx => 0x%04lx\n", 
 960            code_point, cv_code_point);
 961 
 962   return cv_code_point;
 963 }
 964 
 965 
 966 Private long
 967 ccv_conv_array(int ccvi_index, long code_point)
     /* [<][>][^][v][top][bottom][index][help] */
 968 {
 969   int       c1, c2, index; 
 970   CCV_INFO  ccvi;
 971   long      *tbl;
 972 
 973   ccvi = &ccv_tbl[ccvi_index];
 974   c1 = code_point / ccvi->block_size;
 975   c2 = code_point % ccvi->block_size;
 976   if (   (c1 < ccvi->c1min) || (ccvi->c1max < c1)
 977       || (c2 < ccvi->c2min) || (ccvi->c2max < c2) ){
 978     return -1;
 979   }
 980   index = (c1 - ccvi->c1min) * (ccvi->c2max - ccvi->c2min + 1) 
 981           + (c2 - ccvi->c2min);
 982   tbl = (long*) ccvi->arg;
 983   return tbl[index];
 984 }
 985 
 986 Private long
 987 ccv_conv_random_array(int ccvi_index, long code_point)
     /* [<][>][^][v][top][bottom][index][help] */
 988 {
 989   int       c1, c2, index, i; 
 990   CCV_INFO  ccvi;
 991   CCV_RANDOM_ARRAY cra;
 992 
 993   ccvi = &ccv_tbl[ccvi_index];
 994   c1 = code_point / ccvi->block_size;
 995   c2 = code_point % ccvi->block_size;
 996   if (   (c1 < ccvi->c1min) || (ccvi->c1max < c1)
 997       || (c2 < ccvi->c2min) || (ccvi->c2max < c2) ){
 998     return -1;
 999   }
1000   cra = (CCV_RANDOM_ARRAY)ccvi->arg;
1001   for (i = 0; i < ccvi->nblocks; i++){  /** TOO SLOW! **/
1002     /*printf("\n index %d %d", cra->block_index[i], c1);*/
1003     if (cra->block_index[i]+ccvi->c1min == c1)
1004       break;
1005   }
1006   if (i == ccvi->nblocks)
1007     return -1;
1008   index = i * (ccvi->c2max - ccvi->c2min + 1) + (c2 - ccvi->c2min);
1009   return cra->tbl[index];
1010 }
1011 
1012 
1013 
1014 #ifdef DEBUG
1015 
1016 /*
1017  * Usage: dbg-ccv filename - cs1_name cs1_enc cs2_name cs2_enc code_point ...
1018  */
1019 int
1020 main(int argc, char **argv)
     /* [<][>][^][v][top][bottom][index][help] */
1021 {
1022   int   i;
1023   int   cc, cvcc;
1024 
1025   argc--; argv++;
1026 
1027   vf_ccv_init();
1028   if (argc <= 4)
1029     exit(1);
1030 
1031   while (argc > 0){
1032     if (strcmp(argv[0], "-") == 0){
1033       argc--;
1034       argv++;
1035       break;
1036     }
1037     vf_ccv_autoload(argv[0]);
1038     argc--;
1039     argv++;
1040   }
1041   if (argc == 0)
1042     exit(0);
1043 
1044   if (argc > 4){
1045     i = vf_ccv_require(argv[0], argv[1], argv[2], argv[3]);
1046     argc -= 4;
1047     argv = &argv[4];
1048   }
1049   if (argc == 0)
1050     exit(0);
1051 
1052   while (argc > 0){
1053     sscanf(argv[0], "%i", &cc);
1054     cvcc = vf_ccv_conv(i, (long)cc);
1055     printf("\n  Conv 0x%x ==> 0x%x", cc, (int)cvcc);
1056     argc--;
1057     argv++;
1058   }
1059 }
1060 #endif
1061 
1062 /*EOF*/

/* [<][>][^][v][top][bottom][index][help] */