2017-08-18 87 views
1

我有问题YAML序列化类型引用作为成员的类。我正在使用ruamel.yaml的安全装载程序。YAML - 序列化属性类型

我从REPL提示符下运行以下所有内容(以获取多个错误)。

初始化:

Y.dump(A(T1), sys.stdout) 
Y.dump(A(T2), sys.stdout) 
Y.dump(A(T3), sys.stdout) 
Y.dump(A(int), sys.stdout) 

此输出(回溯的仅最后行)::

ruamel.yaml.representer.RepresenterError: cannot represent an object: <attribute '__dict__' of 'T1' objects> 
ruamel.yaml.representer.RepresenterError: cannot represent an object: <attribute '__dict__' of 'T2' objects> 
ruamel.yaml.representer.RepresenterError: cannot represent an object: <attribute '__dict__' of 'T3' objects> 
ruamel.yaml.representer.RepresenterError: cannot represent an object: <slot wrapper '__abs__' of 'int' objects> 

的任何解决方案,可让我(引起故障

import sys 
from ruamel.yaml import YAML, yaml_object 

Y = YAML(typ="safe",pure=True) 

# ============== 

@yaml_object(Y) 
class A(object): 
    """Object I want to serialize""" 
    yaml_tag = "!Aclass" 
    def __init__(self, type): 
     self.type = type 
    def f(self): 
     return self.type() 
    pass 

class T1(object): 
    """This will be referenced.""" 
    pass 

@yaml_object(Y) 
class T2(object): 
    """Another referenced object""" 
    pass 

class T3(object): 
    """Yet another try""" 
    pass 
Y.register_class(T3.__class__) 

代码安全地)唯一地保存类型(我需要生成类型的对象并检查是否一个传入的对象是某种类型)将不胜感激。生成我所需类型的函数或类也会遇到不可序列化的问题。


P.S.我也可能发现了一个错误,在这个错误中,解析器出于某种原因会根据相同的有效参数是否被序列化(尝试)而具有不同的行为。

Y.dump(A(str), sys.stdout) 
Y.dump(A(str), sys.stdout) 
Y.dump(A(str), sys.stdout) 
Y.dump(A(str), sys.stdout) 

输出:

>>> Y.dump(A(str), sys.stdout) 
Traceback (most recent call last): 
    File "<stdin>", line 1, in <module> 
    File "C:\Program Files\Anaconda3\lib\site-packages\ruamel\yaml\main.py", line 352, in dump 
    return self.dump_all([data], stream, _kw, transform=transform) 
    File "C:\Program Files\Anaconda3\lib\site-packages\ruamel\yaml\main.py", line 383, in dump_all 
    self.representer.represent(data) 
    File "C:\Program Files\Anaconda3\lib\site-packages\ruamel\yaml\representer.py", line 73, in represent 
    node = self.represent_data(data) 
    File "C:\Program Files\Anaconda3\lib\site-packages\ruamel\yaml\representer.py", line 101, in represent_data 
    node = self.yaml_representers[data_types[0]](self, data) 
    File "C:\Program Files\Anaconda3\lib\site-packages\ruamel\yaml\main.py", line 552, in t_y 
    tag, data, cls, flow_style=representer.default_flow_style) 
    File "C:\Program Files\Anaconda3\lib\site-packages\ruamel\yaml\representer.py", line 371, in represent_yaml_object 
    return self.represent_mapping(tag, state, flow_style=flow_style) 
    File "C:\Program Files\Anaconda3\lib\site-packages\ruamel\yaml\representer.py", line 206, in represent_mapping 
    node_value = self.represent_data(item_value) 
    File "C:\Program Files\Anaconda3\lib\site-packages\ruamel\yaml\representer.py", line 101, in represent_data 
    node = self.yaml_representers[data_types[0]](self, data) 
    File "C:\Program Files\Anaconda3\lib\site-packages\ruamel\yaml\main.py", line 492, in t_y 
    tag, data, cls, flow_style=representer.default_flow_style) 
    File "C:\Program Files\Anaconda3\lib\site-packages\ruamel\yaml\representer.py", line 371, in represent_yaml_object 
    return self.represent_mapping(tag, state, flow_style=flow_style) 
    File "C:\Program Files\Anaconda3\lib\site-packages\ruamel\yaml\representer.py", line 206, in represent_mapping 
    node_value = self.represent_data(item_value) 
    File "C:\Program Files\Anaconda3\lib\site-packages\ruamel\yaml\representer.py", line 111, in represent_data 
    node = self.yaml_representers[None](self, data) 
    File "C:\Program Files\Anaconda3\lib\site-packages\ruamel\yaml\representer.py", line 375, in represent_undefined 
    raise RepresenterError("cannot represent an object: %s" % data) 
ruamel.yaml.representer.RepresenterError: cannot represent an object: <slot wrapper '__add__' of 'str' objects> 
>>> Y.dump(A(str), sys.stdout) 
!Aclass 
type: !type {} 
>>> Y.dump(A(str), sys.stdout) 
Traceback (most recent call last): 
# same traceback here 
ruamel.yaml.representer.RepresenterError: cannot represent an object: <slot wrapper '__add__' of 'str' objects> 
>>> Y.dump(A(str), sys.stdout) 
!Aclass 
type: !type {} 
>>> 

回答

1

YAML预计抛售的对象,并最终被写出标串这样做。 T1不是一个对象(也不是T2T3),这就是问题出在哪里。您可以尝试将每个类引用放入一个对象中,并在这些对象上使用标签,但是IMO仅会使事情复杂化。

最终这一切都归结于得到一个标量表示,即类的字符串表示到该文件,所以你还不如适应A()直接倾倒字符串表示,并读回:

import sys 
from ruamel.yaml import YAML, yaml_object 
from ruamel.yaml.compat import StringIO 
from ruamel.yaml.scalarstring import DoubleQuotedScalarString 


Y = YAML(typ="safe", pure=True) 

# ============== 

@yaml_object(Y) 
class A(object): 
    """Object I want to serialize""" 
    yaml_tag = "!Aclass" 
    def __init__(self, type): 
     self.type = type #.__class__.__name__ 

    @classmethod 
    def to_yaml(cls, representer, node): 
     return representer.represent_scalar(
      cls.yaml_tag, u'{}'.format(node.type.__name__) 
     ) 

    @classmethod 
    def from_yaml(cls, constructor, node): 
     if '.' in node.value: # in some other module 
      m, n = node.value.rsplit('.', 1) 
      return cls(getattr(sys.modules[m], n)) 
     else: 
      return cls(globals()[node.value]) 


class T1(object): 
    """This will be referenced.""" 
    pass 


@yaml_object(Y) 
class T2(object): 
    """Another referenced object""" 
    pass 


class T3(object): 
    """Yet another try""" 
    pass 
Y.register_class(T3) 


for t in T1, T2, T3, DoubleQuotedScalarString: 
    print('----------------------') 
    x = StringIO() 
    s = A(t) 
    print('s', s.type) 
    Y.dump(s, x) 
    print(x.getvalue()) 

    d = Y.load(x.getvalue()) 
    print('d', d.type) 

这给:

---------------------- 
s <class '__main__.T1'> 
!Aclass T1 
... 

d <class '__main__.T1'> 
---------------------- 
s <class '__main__.T2'> 
!Aclass T2 
... 

d <class '__main__.T2'> 
---------------------- 
s <class '__main__.T3'> 
!Aclass T3 
... 

d <class '__main__.T3'> 
---------------------- 
s <class 'ruamel.yaml.scalarstring.DoubleQuotedScalarString'> 
!Aclass DoubleQuotedScalarString 
... 

d <class 'ruamel.yaml.scalarstring.DoubleQuotedScalarString'> 

如果对A()需要转储/加载的其他属性,您应该创建一个字典(与串并转换.type)和倾倒/升那个。

我不认为你发现了一个真正的bug,但是你会遇到错误后继续产生的副作用:Y对象(及其组件)处于未定义状态。捕捉错误后,您不应该重复使用YAML()实例。这应该在文档中更清楚。因此,如果您想在for循环中执行try/except,则应该在try部分中移动Y = YAML(typ='safe', pure=True)

+0

好吧,似乎工作正常,如果我想保留例如T2在我的。但是,由于我构造了给定类的对象,并且Y不能告诉(至少,不是从您的A实现中)是否可以安全地加载对象,所以我可能会在代码中加载恶意类型。在这种情况下,即使课程没有注册,T1也是“加载”的。一种可能的解决方法是检查A.from_yaml()是否找到的类是由Y注册的(即它在constructor.yaml_constructors中?),我猜。关于第二部分,这是有道理的。 –

+0

你对安全性的看法是对的,我在回答问题时想到了这个问题,但忘了包括这个。我会把所有相关的类型放在一个模块中(或者在子目录中有多个模块)并从那里导入它们。然后你可以在''''from_yaml()''的'node.value'中检查字符串。您也可以创建自己的'@ yaml_type'来设置您测试的(唯一)属性(在转储和/或加载时)。没有必要重新使用ruamel.yaml注册要转储的对象(实质上是其他)。 – Anthon

+0

我在我的答案中做了一个小模型,随时修改它(或你自己的答案),使其安全。编辑:我在发布时错过了你的评论,不太确定你的意思是什么?你的意思是使用@yaml_type装饰器在对象中设置隐藏属性?什么阻止某人为自己的班级做同样的事情并冒充我们的? –

0

添加到Anthon's answer,我开始修改A.from_yaml更安全,虽然我没有通过_check_registered()的所有情况。这个想法是加载Y允许加载实例的所有类型,并阻止所有其他类型。认为这是一个WIP:

import sys 
from ruamel.yaml import YAML, yaml_object 
from ruamel.yaml.compat import StringIO 
from ruamel.yaml.scalarstring import DoubleQuotedScalarString 


Y = YAML(typ="safe", pure=True) 

# ============== 

@yaml_object(Y) 
class A(object): 
    """Object I want to serialize""" 
    yaml_tag = "!Aclass" 
    def __init__(self, type): 
     self.type = type #.__class__.__name__ 

    @classmethod 
    def to_yaml(cls, representer, node): 
     return representer.represent_scalar(
      cls.yaml_tag, u'{}'.format(node.type.__name__) 
     ) 

    @classmethod 
    def from_yaml(cls, constructor, node): 
     if '.' in node.value: # in some other module 
      m, n = node.value.rsplit('.', 1) 
      t = getattr(sys.modules[m], n) 
     else: 
      t = globals()[node.value] 
     cls._check_registered(t,constructor, node) 
     return cls(t) 

    @classmethod 
    def _check_registered(cls, t, constructor, node): 
     # Check if type "t" is registered in "constr" 
     # Note: only a very basic check, 
     # and ideally should be made more secure 

     if hasattr(t,"yaml_tag"): 
      if t.yaml_tag in constructor.yaml_constructors: 

       return 
      raise Exception("Error: Tag not registered!") 
     else: 
      # 
      raise Exception("Error: No attribute 'yaml_tag'!") 
     pass 

    pass 

class T1(object): 
    """This will be referenced.""" 
    yaml_tag = u"!T1" 
    pass 


@yaml_object(Y) 
class T2(object): 
    """Another referenced object""" 
    yaml_tag = u"!T2" 

    def __init__(self): 
     print("Initializing...") 
     pass 
    pass 

class T2_bad(object): 
    """Malicious class impersonating T2""" 
    # Note: It's not registered 
    yaml_tag = u"!T2" 

    def __init__(self): 
     print("Evil code here!") 
     pass 

    pass 


class T3(object): 
    """Yet another try""" 
    yaml_tag = u"!T3" 
    pass 
Y.register_class(T3) 



for t in T1, T2, T2_bad, T3, DoubleQuotedScalarString: 
    try: 
     print('----------------------') 
     x = StringIO() 
     s = A(t) 
     print('s', s.type) 
     Y.dump(s, x) 
     print(x.getvalue()) 
     d = Y.load(x.getvalue()) 
     print('d', d.type) 
     d.type() 
    except Exception as e: 
     print(e) 
     continue 
    pass 

这将返回:

---------------------- 
s <class '__main__.T1'> 
!Aclass T1 
... 

Error: Tag not registered! 
---------------------- 
s <class '__main__.T2'> 
!Aclass T2 
... 

d <class '__main__.T2'> 
Initializing... 
<__main__.T2 object at 0x0000015B8EC82F60> 
---------------------- 
s <class '__main__.T2_bad'> 
!Aclass T2_bad 
... 

d <class '__main__.T2_bad'> 
Evil code here! 
<__main__.T2_bad object at 0x0000015B8EC82EF0> 
---------------------- 
s <class '__main__.T3'> 
!Aclass T3 
... 

d <class '__main__.T3'> 
<__main__.T3 object at 0x0000015B8EC82E10> 
---------------------- 
s <class 'ruamel.yaml.scalarstring.DoubleQuotedScalarString'> 
!Aclass DoubleQuotedScalarString 
... 

Error: No attribute 'yaml_tag'! 

可以看出,它仍然不安全(“恶码”已运行),也不允许类型没有定义yaml_tag。随意修改来解决这个问题。