2013-04-16 549 views
0

我将古老的IBM Code Page 437中的字符映射到16位UCS2代码。要UCS2是一个相当简单的过程:将积分值映射到其他积分值

const unsigned short ibm437_to_ucs[] = { 0x0000, 0x0001, ..., 0x2320, 0x00f7 }; 
... 
return ibm437_to_ucs[ibm_code]; 

然而,UCS代码是不连续的,所以我选择了写一个可比阵去另一个方向。有没有比简单地循环每个字符UCS字符的原始数组更好的方法?

for(int i = 0; i < IBM437_MAX /* 0xff */; i++) 
{ 
    if(ibm437_to_ucs[i] == ucs_code) 
     return i; 
} 
return '?'; 
+1

为什么不'std :: map'? –

+2

将这个集合细分为两个连续代码的子集? –

+1

要添加@AlexeyFrunze所说的内容,如果您最终得到的范围不止一个范围,您可以使用二分查找来找到该范围。 – NPE

回答

0

这里是你如何能做到这一点在C(后轻微的变化C++一样)使用查表UPS和二进制搜索:

#include <stdio.h> 
#include <stdlib.h> 

// conversion tables are based on 
// http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP437.TXT 

const unsigned short cp437hi_utf16[128] = 
{ 
    0x00c7, 0x00fc, 0x00e9, 0x00e2, 0x00e4, 0x00e0, 0x00e5, 0x00e7, 
    0x00ea, 0x00eb, 0x00e8, 0x00ef, 0x00ee, 0x00ec, 0x00c4, 0x00c5, 
    0x00c9, 0x00e6, 0x00c6, 0x00f4, 0x00f6, 0x00f2, 0x00fb, 0x00f9, 
    0x00ff, 0x00d6, 0x00dc, 0x00a2, 0x00a3, 0x00a5, 0x20a7, 0x0192, 
    0x00e1, 0x00ed, 0x00f3, 0x00fa, 0x00f1, 0x00d1, 0x00aa, 0x00ba, 
    0x00bf, 0x2310, 0x00ac, 0x00bd, 0x00bc, 0x00a1, 0x00ab, 0x00bb, 
    0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x2561, 0x2562, 0x2556, 
    0x2555, 0x2563, 0x2551, 0x2557, 0x255d, 0x255c, 0x255b, 0x2510, 
    0x2514, 0x2534, 0x252c, 0x251c, 0x2500, 0x253c, 0x255e, 0x255f, 
    0x255a, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256c, 0x2567, 
    0x2568, 0x2564, 0x2565, 0x2559, 0x2558, 0x2552, 0x2553, 0x256b, 
    0x256a, 0x2518, 0x250c, 0x2588, 0x2584, 0x258c, 0x2590, 0x2580, 
    0x03b1, 0x00df, 0x0393, 0x03c0, 0x03a3, 0x03c3, 0x00b5, 0x03c4, 
    0x03a6, 0x0398, 0x03a9, 0x03b4, 0x221e, 0x03c6, 0x03b5, 0x2229, 
    0x2261, 0x00b1, 0x2265, 0x2264, 0x2320, 0x2321, 0x00f7, 0x2248, 
    0x00b0, 0x2219, 0x00b7, 0x221a, 0x207f, 0x00b2, 0x25a0, 0x00a0 
}; 

const unsigned long utf16_cp437hi[128] = 
{ 
    0xff00a0, 0xad00a1, 0x9b00a2, 0x9c00a3, 
    0x9d00a5, 0xa600aa, 0xae00ab, 0xaa00ac, 
    0xf800b0, 0xf100b1, 0xfd00b2, 0xe600b5, 
    0xfa00b7, 0xa700ba, 0xaf00bb, 0xac00bc, 
    0xab00bd, 0xa800bf, 0x8e00c4, 0x8f00c5, 
    0x9200c6, 0x8000c7, 0x9000c9, 0xa500d1, 
    0x9900d6, 0x9a00dc, 0xe100df, 0x8500e0, 
    0xa000e1, 0x8300e2, 0x8400e4, 0x8600e5, 
    0x9100e6, 0x8700e7, 0x8a00e8, 0x8200e9, 
    0x8800ea, 0x8900eb, 0x8d00ec, 0xa100ed, 
    0x8c00ee, 0x8b00ef, 0xa400f1, 0x9500f2, 
    0xa200f3, 0x9300f4, 0x9400f6, 0xf600f7, 
    0x9700f9, 0xa300fa, 0x9600fb, 0x8100fc, 
    0x9800ff, 0x9f0192, 0xe20393, 0xe90398, 
    0xe403a3, 0xe803a6, 0xea03a9, 0xe003b1, 
    0xeb03b4, 0xee03b5, 0xe303c0, 0xe503c3, 
    0xe703c4, 0xed03c6, 0xfc207f, 0x9e20a7, 
    0xf92219, 0xfb221a, 0xec221e, 0xef2229, 
    0xf72248, 0xf02261, 0xf32264, 0xf22265, 
    0xa92310, 0xf42320, 0xf52321, 0xc42500, 
    0xb32502, 0xda250c, 0xbf2510, 0xc02514, 
    0xd92518, 0xc3251c, 0xb42524, 0xc2252c, 
    0xc12534, 0xc5253c, 0xcd2550, 0xba2551, 
    0xd52552, 0xd62553, 0xc92554, 0xb82555, 
    0xb72556, 0xbb2557, 0xd42558, 0xd32559, 
    0xc8255a, 0xbe255b, 0xbd255c, 0xbc255d, 
    0xc6255e, 0xc7255f, 0xcc2560, 0xb52561, 
    0xb62562, 0xb92563, 0xd12564, 0xd22565, 
    0xcb2566, 0xcf2567, 0xd02568, 0xca2569, 
    0xd8256a, 0xd7256b, 0xce256c, 0xdf2580, 
    0xdc2584, 0xdb2588, 0xdd258c, 0xde2590, 
    0xb02591, 0xb12592, 0xb22593, 0xfe25a0 
}; 

unsigned short Cp437toUtf16(unsigned char c) 
{ 
    if (c < 0x80) 
    return c; 
    return cp437hi_utf16[c - 0x80]; 
} 

static int CompareUtf16toCp437(const void* pv1, const void* pv2) 
{ 
    const unsigned long *pl1 = pv1, *pl2 = pv2; 
    unsigned long v1 = *pl1 & 0xFFFF, v2 = *pl2 & 0xFFFF; 

    if (v1 > v2) 
    return +1; 

    if (v1 < v2) 
    return -1; 

    return 0; 
} 

int Utf16toCp437(unsigned long cp) 
{ 
    unsigned long* p; 

    if (cp < 0x80) 
    return cp; 

    if (cp > 0xFFFF) 
    return -1; 

    p = bsearch(&cp, 
       utf16_cp437hi, 
       sizeof(utf16_cp437hi)/sizeof(utf16_cp437hi[0]), 
       sizeof(utf16_cp437hi[0]), 
       &CompareUtf16toCp437); 

    if (p == NULL) 
    return -1; 

    return *p >> 16; 
} 

int main(void) 
{ 
    unsigned c; 

    for (c = 0; c <= 0xFF; c++) 
    { 
    unsigned long cp = Cp437toUtf16(c); 
    int c2 = Utf16toCp437(cp); 

    printf("0x%02X -> 0x%04lX -> 0x%02X\n", c, cp, c2); 

    if (c != c2) 
    { 
     puts("Failure!"); 
     return EXIT_FAILURE; 
    } 
    } 

    puts("Success!"); 
    return EXIT_SUCCESS; 
} 

输出(ideone):

0x00 -> 0x0000 -> 0x00 
0x01 -> 0x0001 -> 0x01 
0x02 -> 0x0002 -> 0x02 
0x03 -> 0x0003 -> 0x03 
0x04 -> 0x0004 -> 0x04 
0x05 -> 0x0005 -> 0x05 
0x06 -> 0x0006 -> 0x06 
0x07 -> 0x0007 -> 0x07 
0x08 -> 0x0008 -> 0x08 
0x09 -> 0x0009 -> 0x09 
0x0A -> 0x000A -> 0x0A 
0x0B -> 0x000B -> 0x0B 
0x0C -> 0x000C -> 0x0C 
0x0D -> 0x000D -> 0x0D 
0x0E -> 0x000E -> 0x0E 
0x0F -> 0x000F -> 0x0F 
0x10 -> 0x0010 -> 0x10 
0x11 -> 0x0011 -> 0x11 
0x12 -> 0x0012 -> 0x12 
0x13 -> 0x0013 -> 0x13 
0x14 -> 0x0014 -> 0x14 
0x15 -> 0x0015 -> 0x15 
0x16 -> 0x0016 -> 0x16 
0x17 -> 0x0017 -> 0x17 
0x18 -> 0x0018 -> 0x18 
0x19 -> 0x0019 -> 0x19 
0x1A -> 0x001A -> 0x1A 
0x1B -> 0x001B -> 0x1B 
0x1C -> 0x001C -> 0x1C 
0x1D -> 0x001D -> 0x1D 
0x1E -> 0x001E -> 0x1E 
0x1F -> 0x001F -> 0x1F 
0x20 -> 0x0020 -> 0x20 
0x21 -> 0x0021 -> 0x21 
0x22 -> 0x0022 -> 0x22 
0x23 -> 0x0023 -> 0x23 
0x24 -> 0x0024 -> 0x24 
0x25 -> 0x0025 -> 0x25 
0x26 -> 0x0026 -> 0x26 
0x27 -> 0x0027 -> 0x27 
0x28 -> 0x0028 -> 0x28 
0x29 -> 0x0029 -> 0x29 
0x2A -> 0x002A -> 0x2A 
0x2B -> 0x002B -> 0x2B 
0x2C -> 0x002C -> 0x2C 
0x2D -> 0x002D -> 0x2D 
0x2E -> 0x002E -> 0x2E 
0x2F -> 0x002F -> 0x2F 
0x30 -> 0x0030 -> 0x30 
0x31 -> 0x0031 -> 0x31 
0x32 -> 0x0032 -> 0x32 
0x33 -> 0x0033 -> 0x33 
0x34 -> 0x0034 -> 0x34 
0x35 -> 0x0035 -> 0x35 
0x36 -> 0x0036 -> 0x36 
0x37 -> 0x0037 -> 0x37 
0x38 -> 0x0038 -> 0x38 
0x39 -> 0x0039 -> 0x39 
0x3A -> 0x003A -> 0x3A 
0x3B -> 0x003B -> 0x3B 
0x3C -> 0x003C -> 0x3C 
0x3D -> 0x003D -> 0x3D 
0x3E -> 0x003E -> 0x3E 
0x3F -> 0x003F -> 0x3F 
0x40 -> 0x0040 -> 0x40 
0x41 -> 0x0041 -> 0x41 
0x42 -> 0x0042 -> 0x42 
0x43 -> 0x0043 -> 0x43 
0x44 -> 0x0044 -> 0x44 
0x45 -> 0x0045 -> 0x45 
0x46 -> 0x0046 -> 0x46 
0x47 -> 0x0047 -> 0x47 
0x48 -> 0x0048 -> 0x48 
0x49 -> 0x0049 -> 0x49 
0x4A -> 0x004A -> 0x4A 
0x4B -> 0x004B -> 0x4B 
0x4C -> 0x004C -> 0x4C 
0x4D -> 0x004D -> 0x4D 
0x4E -> 0x004E -> 0x4E 
0x4F -> 0x004F -> 0x4F 
0x50 -> 0x0050 -> 0x50 
0x51 -> 0x0051 -> 0x51 
0x52 -> 0x0052 -> 0x52 
0x53 -> 0x0053 -> 0x53 
0x54 -> 0x0054 -> 0x54 
0x55 -> 0x0055 -> 0x55 
0x56 -> 0x0056 -> 0x56 
0x57 -> 0x0057 -> 0x57 
0x58 -> 0x0058 -> 0x58 
0x59 -> 0x0059 -> 0x59 
0x5A -> 0x005A -> 0x5A 
0x5B -> 0x005B -> 0x5B 
0x5C -> 0x005C -> 0x5C 
0x5D -> 0x005D -> 0x5D 
0x5E -> 0x005E -> 0x5E 
0x5F -> 0x005F -> 0x5F 
0x60 -> 0x0060 -> 0x60 
0x61 -> 0x0061 -> 0x61 
0x62 -> 0x0062 -> 0x62 
0x63 -> 0x0063 -> 0x63 
0x64 -> 0x0064 -> 0x64 
0x65 -> 0x0065 -> 0x65 
0x66 -> 0x0066 -> 0x66 
0x67 -> 0x0067 -> 0x67 
0x68 -> 0x0068 -> 0x68 
0x69 -> 0x0069 -> 0x69 
0x6A -> 0x006A -> 0x6A 
0x6B -> 0x006B -> 0x6B 
0x6C -> 0x006C -> 0x6C 
0x6D -> 0x006D -> 0x6D 
0x6E -> 0x006E -> 0x6E 
0x6F -> 0x006F -> 0x6F 
0x70 -> 0x0070 -> 0x70 
0x71 -> 0x0071 -> 0x71 
0x72 -> 0x0072 -> 0x72 
0x73 -> 0x0073 -> 0x73 
0x74 -> 0x0074 -> 0x74 
0x75 -> 0x0075 -> 0x75 
0x76 -> 0x0076 -> 0x76 
0x77 -> 0x0077 -> 0x77 
0x78 -> 0x0078 -> 0x78 
0x79 -> 0x0079 -> 0x79 
0x7A -> 0x007A -> 0x7A 
0x7B -> 0x007B -> 0x7B 
0x7C -> 0x007C -> 0x7C 
0x7D -> 0x007D -> 0x7D 
0x7E -> 0x007E -> 0x7E 
0x7F -> 0x007F -> 0x7F 
0x80 -> 0x00C7 -> 0x80 
0x81 -> 0x00FC -> 0x81 
0x82 -> 0x00E9 -> 0x82 
0x83 -> 0x00E2 -> 0x83 
0x84 -> 0x00E4 -> 0x84 
0x85 -> 0x00E0 -> 0x85 
0x86 -> 0x00E5 -> 0x86 
0x87 -> 0x00E7 -> 0x87 
0x88 -> 0x00EA -> 0x88 
0x89 -> 0x00EB -> 0x89 
0x8A -> 0x00E8 -> 0x8A 
0x8B -> 0x00EF -> 0x8B 
0x8C -> 0x00EE -> 0x8C 
0x8D -> 0x00EC -> 0x8D 
0x8E -> 0x00C4 -> 0x8E 
0x8F -> 0x00C5 -> 0x8F 
0x90 -> 0x00C9 -> 0x90 
0x91 -> 0x00E6 -> 0x91 
0x92 -> 0x00C6 -> 0x92 
0x93 -> 0x00F4 -> 0x93 
0x94 -> 0x00F6 -> 0x94 
0x95 -> 0x00F2 -> 0x95 
0x96 -> 0x00FB -> 0x96 
0x97 -> 0x00F9 -> 0x97 
0x98 -> 0x00FF -> 0x98 
0x99 -> 0x00D6 -> 0x99 
0x9A -> 0x00DC -> 0x9A 
0x9B -> 0x00A2 -> 0x9B 
0x9C -> 0x00A3 -> 0x9C 
0x9D -> 0x00A5 -> 0x9D 
0x9E -> 0x20A7 -> 0x9E 
0x9F -> 0x0192 -> 0x9F 
0xA0 -> 0x00E1 -> 0xA0 
0xA1 -> 0x00ED -> 0xA1 
0xA2 -> 0x00F3 -> 0xA2 
0xA3 -> 0x00FA -> 0xA3 
0xA4 -> 0x00F1 -> 0xA4 
0xA5 -> 0x00D1 -> 0xA5 
0xA6 -> 0x00AA -> 0xA6 
0xA7 -> 0x00BA -> 0xA7 
0xA8 -> 0x00BF -> 0xA8 
0xA9 -> 0x2310 -> 0xA9 
0xAA -> 0x00AC -> 0xAA 
0xAB -> 0x00BD -> 0xAB 
0xAC -> 0x00BC -> 0xAC 
0xAD -> 0x00A1 -> 0xAD 
0xAE -> 0x00AB -> 0xAE 
0xAF -> 0x00BB -> 0xAF 
0xB0 -> 0x2591 -> 0xB0 
0xB1 -> 0x2592 -> 0xB1 
0xB2 -> 0x2593 -> 0xB2 
0xB3 -> 0x2502 -> 0xB3 
0xB4 -> 0x2524 -> 0xB4 
0xB5 -> 0x2561 -> 0xB5 
0xB6 -> 0x2562 -> 0xB6 
0xB7 -> 0x2556 -> 0xB7 
0xB8 -> 0x2555 -> 0xB8 
0xB9 -> 0x2563 -> 0xB9 
0xBA -> 0x2551 -> 0xBA 
0xBB -> 0x2557 -> 0xBB 
0xBC -> 0x255D -> 0xBC 
0xBD -> 0x255C -> 0xBD 
0xBE -> 0x255B -> 0xBE 
0xBF -> 0x2510 -> 0xBF 
0xC0 -> 0x2514 -> 0xC0 
0xC1 -> 0x2534 -> 0xC1 
0xC2 -> 0x252C -> 0xC2 
0xC3 -> 0x251C -> 0xC3 
0xC4 -> 0x2500 -> 0xC4 
0xC5 -> 0x253C -> 0xC5 
0xC6 -> 0x255E -> 0xC6 
0xC7 -> 0x255F -> 0xC7 
0xC8 -> 0x255A -> 0xC8 
0xC9 -> 0x2554 -> 0xC9 
0xCA -> 0x2569 -> 0xCA 
0xCB -> 0x2566 -> 0xCB 
0xCC -> 0x2560 -> 0xCC 
0xCD -> 0x2550 -> 0xCD 
0xCE -> 0x256C -> 0xCE 
0xCF -> 0x2567 -> 0xCF 
0xD0 -> 0x2568 -> 0xD0 
0xD1 -> 0x2564 -> 0xD1 
0xD2 -> 0x2565 -> 0xD2 
0xD3 -> 0x2559 -> 0xD3 
0xD4 -> 0x2558 -> 0xD4 
0xD5 -> 0x2552 -> 0xD5 
0xD6 -> 0x2553 -> 0xD6 
0xD7 -> 0x256B -> 0xD7 
0xD8 -> 0x256A -> 0xD8 
0xD9 -> 0x2518 -> 0xD9 
0xDA -> 0x250C -> 0xDA 
0xDB -> 0x2588 -> 0xDB 
0xDC -> 0x2584 -> 0xDC 
0xDD -> 0x258C -> 0xDD 
0xDE -> 0x2590 -> 0xDE 
0xDF -> 0x2580 -> 0xDF 
0xE0 -> 0x03B1 -> 0xE0 
0xE1 -> 0x00DF -> 0xE1 
0xE2 -> 0x0393 -> 0xE2 
0xE3 -> 0x03C0 -> 0xE3 
0xE4 -> 0x03A3 -> 0xE4 
0xE5 -> 0x03C3 -> 0xE5 
0xE6 -> 0x00B5 -> 0xE6 
0xE7 -> 0x03C4 -> 0xE7 
0xE8 -> 0x03A6 -> 0xE8 
0xE9 -> 0x0398 -> 0xE9 
0xEA -> 0x03A9 -> 0xEA 
0xEB -> 0x03B4 -> 0xEB 
0xEC -> 0x221E -> 0xEC 
0xED -> 0x03C6 -> 0xED 
0xEE -> 0x03B5 -> 0xEE 
0xEF -> 0x2229 -> 0xEF 
0xF0 -> 0x2261 -> 0xF0 
0xF1 -> 0x00B1 -> 0xF1 
0xF2 -> 0x2265 -> 0xF2 
0xF3 -> 0x2264 -> 0xF3 
0xF4 -> 0x2320 -> 0xF4 
0xF5 -> 0x2321 -> 0xF5 
0xF6 -> 0x00F7 -> 0xF6 
0xF7 -> 0x2248 -> 0xF7 
0xF8 -> 0x00B0 -> 0xF8 
0xF9 -> 0x2219 -> 0xF9 
0xFA -> 0x00B7 -> 0xFA 
0xFB -> 0x221A -> 0xFB 
0xFC -> 0x207F -> 0xFC 
0xFD -> 0x00B2 -> 0xFD 
0xFE -> 0x25A0 -> 0xFE 
0xFF -> 0x00A0 -> 0xFF 
Success! 

UPDATE:您也可以在不调用bsearch()和回调函数的情况下实现内联二进制搜索。如果它是重要的,它会加快速度。

+0

感谢您的回应。将UCS和IBM代码点整合到一个整数的上下文字中,这当然是有创意的。我花了一些时间去看看你做了什么,因为我很少需要按照我习惯的语言进行按位操作。但是,快速搜索并没有告诉我,'bsearch'只是滚动内部for循环,还是非线性? – user2285060

+0

[For,while or recursion](http://en.wikipedia.org/wiki/Binary_search_algorithm)。它是非线性的,即对数。 –

+0

那么,每个C标准库都以不同的方式实现它? – user2285060