2010-05-02 92 views
0

我已经实现了this question中建议的一些更改,并且(非常感谢)它工作得很好,但是......在这个过程中,我似乎打破了后处理声明分配操作符。使用以下代码:调用构造函数的赋值操作符中断

#include <cstdio> 
#include "ucpp" 
main() { 
    ustring a = "test"; 
    ustring b = "ing"; 
    ustring c = "- -"; 
    ustring d = "cafe\xcc\x81"; 
    printf("%s\n", (a + b + c[1] + d).encode()); 
} 

我收到了一个很好的“测试咖啡馆”消息。但是,如果我稍微修改了代码,以便为const char *转换是分别进行,后期声明:

#include <cstdio> 
#include "ucpp" 
main() { 
    ustring a = "test"; 
    ustring b = "ing"; 
    ustring c = "- -"; 
    ustring d; 
    d = "cafe\xcc\x81"; 
    printf("%s\n", (a + b + c[1] + d).encode()); 
} 

的ustring命名d变成空白,和所有的输出被“测试”。我的新代码有三个构造函数,一个是void(可能是一个被错误使用的函数,在operator +函数中使用),一个采用常量字符串&,另一个采用const char *。以下是我的新库代码:

#include <cstdlib> 
#include <cstring> 
class ustring { 
    int * values; 
    long len; 
    public: 
    long length() { 
    return len; 
    } 
    ustring() { 
    len = 0; 
    values = (int *) malloc(0); 
    } 
    ustring(const ustring &input) { 
    len = input.len; 
    values = (int *) malloc(sizeof(int) * len); 
    for (long i = 0; i < len; i++) 
     values[i] = input.values[i]; 
    } 
    ustring operator=(ustring input) { 
    ustring result(input); 
    return result; 
    } 
    ustring(const char * input) { 
    values = (int *) malloc(0); 
    long s = 0;                 // s = number of parsed chars 
    int a, b, c, d, contNeed = 0, cont = 0; 
    for (long i = 0; input[i]; i++) 
     if (input[i] < 0x80) {             // ASCII, direct copy (00-7f) 
     values = (int *) realloc(values, sizeof(int) * ++s); 
     values[s - 1] = input[i]; 
     } else if (input[i] < 0xc0) {            // this is a continuation (80-bf) 
     if (cont == contNeed) {             // no need for continuation, use U+fffd 
      values = (int *) realloc(values, sizeof(int) * ++s); 
      values[s - 1] = 0xfffd; 
     } 
     cont = cont + 1; 
     values[s - 1] = values[s - 1] | ((input[i] & 0x3f) << ((contNeed - cont) * 6)); 
     if (cont == contNeed) cont = contNeed = 0; 
     } else if (input[i] < 0xc2) {            // invalid byte, use U+fffd (c0-c1) 
     values = (int *) realloc(values, sizeof(int) * ++s); 
     values[s - 1] = 0xfffd; 
     } else if (input[i] < 0xe0) {            // start of 2-byte sequence (c2-df) 
     contNeed = 1; 
     values = (int *) realloc(values, sizeof(int) * ++s); 
     values[s - 1] = (input[i] & 0x1f) << 6; 
     } else if (input[i] < 0xf0) {            // start of 3-byte sequence (e0-ef) 
     contNeed = 2; 
     values = (int *) realloc(values, sizeof(int) * ++s); 
     values[s - 1] = (input[i] & 0x0f) << 12; 
     } else if (input[i] < 0xf5) {            // start of 4-byte sequence (f0-f4) 
     contNeed = 3; 
     values = (int *) realloc(values, sizeof(int) * ++s); 
     values[s - 1] = (input[i] & 0x07) << 18; 
     } else {                 // restricted or invalid (f5-ff) 
     values = (int *) realloc(values, sizeof(int) * ++s); 
     values[s - 1] = 0xfffd; 
     } 
    len = s; 
    } 
    ustring operator=(const char * input) { 
    ustring result(input); 
    return result; 
    } 
    ustring operator+(ustring input) { 
    ustring result; 
    result.len = len + input.len; 
    result.values = (int *) malloc(sizeof(int) * result.len); 
    for (long i = 0; i < len; i++) 
     result.values[i] = values[i]; 
    for (long i = 0; i < input.len; i++) 
     result.values[i + len] = input.values[i]; 
    return result; 
    } 
    ustring operator[](long index) { 
    ustring result; 
    result.len = 1; 
    result.values = (int *) malloc(sizeof(int)); 
    result.values[0] = values[index]; 
    return result; 
    } 
    char * encode() { 
    char * r = (char *) malloc(0); 
    long s = 0; 
    for (long i = 0; i < len; i++) { 
     if (values[i] < 0x80) 
     r = (char *) realloc(r, s + 1), 
     r[s + 0] = char(values[i]), 
     s += 1; 
     else if (values[i] < 0x800) 
     r = (char *) realloc(r, s + 2), 
     r[s + 0] = char(values[i] >> 6 | 0x60), 
     r[s + 1] = char(values[i] & 0x3f | 0x80), 
     s += 2; 
     else if (values[i] < 0x10000) 
     r = (char *) realloc(r, s + 3), 
     r[s + 0] = char(values[i] >> 12 | 0xe0), 
     r[s + 1] = char(values[i] >> 6 & 0x3f | 0x80), 
     r[s + 2] = char(values[i] & 0x3f | 0x80), 
     s += 3; 
     else 
     r = (char *) realloc(r, s + 4), 
     r[s + 0] = char(values[i] >> 18 | 0xf0), 
     r[s + 1] = char(values[i] >> 12 & 0x3f | 0x80), 
     r[s + 2] = char(values[i] >> 6 & 0x3f | 0x80), 
     r[s + 3] = char(values[i] & 0x3f | 0x80), 
     s += 4; 
    } 
    return r; 
    } 
}; 
+0

抱歉,但是,什么是值=(int *)malloc(0); – Svisstack 2010-05-02 09:23:08

+1

该语句为ustring分配一个零长度的int块,在UTF-8解析期间由realloc()增加。 – 2010-05-02 09:24:16

回答

3

operator=应该修改*this。返回值(您最好做一个参考)仅用于链接情况:

a = b = c; 
(a = b).foo(); 
//etc. 
+0

非常感谢。我现在清楚地了解差异。 – 2010-05-02 09:29:14

0

两个赋值操作符被破坏,例如,这样的:

ustring operator=(const char * input) { 
    ustring result(input); 
    return result; 
    } 

无助于目标对象。它只是创建一个本地临时并返回。写他们喜欢这个:

ustring& operator=(ustring input) { 
    swap(input); 
    return *this; 
    } 

    ustring& operator=(const char * input) { 
    swap(ustring(input)); 
    return *this; 
    } 

    void swap(ustring& s) { 
    int* tv = values; values = s.values; s.values = tv; 
    long tl = len; len = s.len; s.len = tl; 
    } 
+0

非常感谢您的建议! – 2010-05-02 09:28:05

+0

你的交换看起来很长。 'std :: swap(values,s.values)有什么问题吗? std :: swap(len,s.len);'我会发现明显更具可读性? – 2010-05-02 09:47:15

相关问题