2017-04-16 53 views
0

我有我的类字符的列表。序列化和反序列化按预期工作。如果我的列表包含需要描述字节顺序标记的字符。示例char代码是56256.所以,创建了一个简单的测试来解决这个问题。序列化和反序列化字符(S)

[Test] 
public void Utf8CharSerializeAndDeserializeShouldEqual() 
{ 
    UInt16 charCode = 56256; 
    char utfChar = (char)charCode; 
    using (MemoryStream ms = new MemoryStream()) 
    { 
     using (StreamWriter writer = new StreamWriter(ms, Encoding.UTF8, 1024, true)) 
     { 
      var serializer = new JsonSerializer(); 
      serializer.Serialize(writer, utfChar); 
     } 

     ms.Position = 0; 
     using (StreamReader reader = new StreamReader(ms, true)) 
     { 
      using (JsonTextReader jsonReader = new JsonTextReader(reader)) 
      { 
       var serializer = new JsonSerializer(); 
       char deserializedChar = serializer.Deserialize<char>(jsonReader); 

       Console.WriteLine($"{(int)utfChar}, {(int)deserializedChar}"); 
       Assert.AreEqual(utfChar, deserializedChar); 
       Assert.AreEqual((int)utfChar, (int)deserializedChar); 
      } 
     } 
    } 
} 

当不需要char代码时,测试可以很好地工作BOM。例如65(A)将通过该测试。

+0

你就不能写同StreamWriter的任意码点的BOM是比较特殊的,当然。比如说写一个代理人没什么不同。如果*有*写JSON与StreamWriter的(为什么?),那么至少做一个实验,看一个JSON序列如何保存它 –

+0

当然,我们的API是支持流写入,但我们已经改变了我们的代码有一个小黑客。 Json.net支持StreamWriter。我有一个字符表(表)和序列化字符不等于反序列化。这不是一个重要的问题吗? –

+0

我不知道你是什么意思“是需要描述字节顺序标记”或“不需要BOM”。代码点56256(也称为U + DBC0)是高代理码点;它与字节顺序标记没有任何关系。 –

回答

1

你的问题是无关的Json.NET。你的问题是,U+DBC0 (decimal 56256)是无效的Unicode字符,并且,作为documentation解释,你StreamWriter使用的Encoding.UTF8将不编码这样的性格:

Encoding.UTF8返回使用替换回退来代替每个UTF8Encoding对象它不能编码的字符串以及它不能用问号(“?”)字符解码的每个字节。

为了证实这一点,如果你在你的测试实例与new UTF8Encoding(true, true)取代Encoding.UTF8,你会得到以下异常:

EncoderFallbackException: Unable to translate Unicode character \uDBC0 at index 1 to specified code page. 

如果你要尝试序列无效的Unicode char值,你”重新将需要手动编码它们作为,例如,使用下面的一个字节数组:

public static partial class TextExtensions 
{ 
    static void ToBytesWithoutEncoding(char c, out byte lower, out byte upper) 
    { 
     var u = (uint)c; 
     lower = unchecked((byte)u); 
     upper = unchecked((byte)(u >> 8)); 
    } 

    public static byte[] ToByteArrayWithoutEncoding(this char c) 
    { 
     byte lower, upper; 
     ToBytesWithoutEncoding(c, out lower, out upper); 
     return new byte[] { lower, upper }; 
    } 

    public static byte[] ToByteArrayWithoutEncoding(this ICollection<char> list) 
    { 
     if (list == null) 
      return null; 
     var bytes = new byte[checked(list.Count * 2)]; 
     int to = 0; 
     foreach (var c in list) 
     { 
      ToBytesWithoutEncoding(c, out bytes[to], out bytes[to + 1]); 
      to += 2; 
     } 
     return bytes; 
    } 

    public static char ToCharWithoutEncoding(this byte[] bytes) 
    { 
     return bytes.ToCharWithoutEncoding(0); 
    } 

    public static char ToCharWithoutEncoding(this byte[] bytes, int position) 
    { 
     if (bytes == null) 
      return default(char); 
     char c = default(char); 
     if (position < bytes.Length) 
      c += (char)bytes[position]; 
     if (position + 1 < bytes.Length) 
      c += (char)((uint)bytes[position + 1] << 8); 
     return c; 
    } 

    public static List<char> ToCharListWithoutEncoding(this byte[] bytes) 
    { 
     if (bytes == null) 
      return null; 
     var chars = new List<char>(bytes.Length/2 + bytes.Length % 2); 
     for (int from = 0; from < bytes.Length; from += 2) 
     { 
      chars.Add(bytes.ToCharWithoutEncoding(from)); 
     } 
     return chars; 
    } 
} 

然后修改您的测试方法如下:

public void Utf8JsonCharSerializeAndDeserializeShouldEqualFixed() 
    { 
     Utf8JsonCharSerializeAndDeserializeShouldEqualFixed((char)56256); 
    } 

    public void Utf8JsonCharSerializeAndDeserializeShouldEqualFixed(char utfChar) 
    { 
     byte[] data; 

     using (MemoryStream ms = new MemoryStream()) 
     { 
      using (StreamWriter writer = new StreamWriter(ms, new UTF8Encoding(true, true), 1024)) 
      { 
       var serializer = new JsonSerializer(); 
       serializer.Serialize(writer, utfChar.ToByteArrayWithoutEncoding()); 
      } 
      data = ms.ToArray(); 
     } 

     using (MemoryStream ms = new MemoryStream(data)) 
     { 
      using (StreamReader reader = new StreamReader(ms, true)) 
      { 
       using (JsonTextReader jsonReader = new JsonTextReader(reader)) 
       { 
        var serializer = new JsonSerializer(); 
        char deserializedChar = serializer.Deserialize<byte[]>(jsonReader).ToCharWithoutEncoding(); 

        //Console.WriteLine(string.Format("{0}, {1}", utfChar, deserializedChar)); 
        Assert.AreEqual(utfChar, deserializedChar); 
        Assert.AreEqual((int)utfChar, (int)deserializedChar); 
       } 
      } 
     } 
    } 

或者,如果你在一些容器类有List<char>属性,您可以创建以下转换器:

public class CharListConverter : JsonConverter 
{ 
    public override bool CanConvert(Type objectType) 
    { 
     return objectType == typeof(List<char>); 
    } 

    public override object ReadJson(JsonReader reader, Type objectType, object existingValue, JsonSerializer serializer) 
    { 
     if (reader.TokenType == JsonToken.Null) 
      return null; 
     var bytes = serializer.Deserialize<byte[]>(reader); 
     return bytes.ToCharListWithoutEncoding(); 
    } 

    public override void WriteJson(JsonWriter writer, object value, JsonSerializer serializer) 
    { 
     var list = (ICollection<char>)value; 
     var bytes = list.ToByteArrayWithoutEncoding(); 
     serializer.Serialize(writer, bytes); 
    } 
} 

,并按如下应用它:

public class RootObject 
{ 
    [JsonConverter(typeof(CharListConverter))] 
    public List<char> Characters { get; set; } 
} 

在这两种情况下, Json.NET会将字节数组编码为Base64。