Coverage for src/amisc/serialize.py: 90%

62 statements  

« prev     ^ index     » next       coverage.py v7.6.1, created at 2024-11-05 19:26 +0000

1"""Provides serialization protocols for objects in the package. Serialization in the context of `amisc` 

2means converting an object to a built-in Python object (e.g. string, dictionary, float, etc.). The serialized objects 

3are then easy to convert to binary or text forms for storage or transmission using various protocols (i.e. pickle, 

4json, yaml, etc.). 

5 

6Includes: 

7 

8- `Serializable` — mixin interface for serializing and deserializing objects 

9- `Base64Serializable` — mixin class for serializing objects using base64 encoding 

10- `StringSerializable` — mixin class for serializing objects using string representation 

11- `PickleSerializable` — mixin class for serializing objects using pickle files 

12- `YamlSerializable` — metaclass for serializing an object using Yaml load/dump from string 

13""" 

14from __future__ import annotations 

15 

16import base64 

17import pickle 

18import tempfile 

19from abc import ABC, abstractmethod 

20from dataclasses import dataclass 

21from pathlib import Path 

22from typing import Any 

23 

24import yaml 

25 

26from amisc.utils import parse_function_string 

27 

28__all__ = ['Serializable', 'Base64Serializable', 'StringSerializable', 'PickleSerializable', 'YamlSerializable'] 

29 

30_builtin = str | dict | list | int | float | tuple | bool # Generic type for common built-in Python objects 

31 

32 

33class Serializable(ABC): 

34 """Mixin interface for serializing and deserializing objects.""" 

35 

36 @abstractmethod 

37 def serialize(self) -> _builtin: 

38 """Serialize to a builtin Python object.""" 

39 raise NotImplementedError 

40 

41 @classmethod 

42 @abstractmethod 

43 def deserialize(cls, serialized_data: _builtin) -> Serializable: 

44 """Construct a `Serializable` object from serialized data. 

45 

46 !!! Note "Passing arguments to deserialize" 

47 Subclasses should generally not take arguments for deserialization. The serialized object should contain 

48 all the information it needs to reconstruct itself. If you need arguments for deserialization, then 

49 serialize them along with the object itself and unpack them during the call to deserialize. 

50 """ 

51 raise NotImplementedError 

52 

53 

54class Base64Serializable(Serializable): 

55 """Mixin class for serializing objects using base64 encoding.""" 

56 def serialize(self) -> str: 

57 return base64.b64encode(pickle.dumps(self)).decode('utf-8') 

58 

59 @classmethod 

60 def deserialize(cls, serialized_data: str) -> Base64Serializable: 

61 return pickle.loads(base64.b64decode(serialized_data)) 

62 

63 

64class StringSerializable(Serializable): 

65 """Mixin class for serializing objects using string representation.""" 

66 

67 def serialize(self) -> str: 

68 return str(self) 

69 

70 @classmethod 

71 def deserialize(cls, serialized_data: str, trust: bool = False) -> StringSerializable: 

72 """Deserialize a string representation of the object. 

73 

74 !!! Warning "Security Risk" 

75 Only use `trust=True` if you trust the source of the serialized data. This provides a more flexible 

76 option for `eval`-ing the serialized data from string. By default, this will instead try to parse the 

77 string as a class signature like `MyClass(*args, **kwargs)`. 

78 

79 :param serialized_data: the string representation of the object 

80 :param trust: whether to trust the source of the serialized data (i.e. for `eval`) 

81 """ 

82 if trust: 

83 return eval(serialized_data) 

84 else: 

85 try: 

86 name, args, kwargs = parse_function_string(serialized_data) 

87 return cls(*args, **kwargs) 

88 except Exception as e: 

89 raise ValueError(f'String "{serialized_data}" is not a valid class signature.') from e 

90 

91 

92class PickleSerializable(Serializable): 

93 """Mixin class for serializing objects using pickle.""" 

94 def serialize(self, save_path: str | Path = None) -> str: 

95 if save_path is None: 

96 raise ValueError('Must provide a save path for Pickle serialization.') 

97 with open(Path(save_path), 'wb') as fd: 

98 pickle.dump(self, fd) 

99 return str(Path(save_path).resolve().as_posix()) 

100 

101 @classmethod 

102 def deserialize(cls, serialized_data: str | Path) -> PickleSerializable: 

103 with open(Path(serialized_data), 'rb') as fd: 

104 return pickle.load(fd) 

105 

106 

107@dataclass 

108class YamlSerializable(Serializable): 

109 """Mixin for serializing an object using Yaml load/dump from string.""" 

110 obj: Any 

111 

112 def serialize(self) -> str: 

113 with tempfile.NamedTemporaryFile(mode='w+', encoding='utf-8', suffix='.yml') as f: 

114 yaml.dump(self.obj, f, allow_unicode=True) 

115 f.seek(0) 

116 s = f.read().strip() 

117 return s 

118 

119 @classmethod 

120 def deserialize(cls, yaml_str: str) -> YamlSerializable: 

121 obj = yaml.load(yaml_str, yaml.Loader) 

122 return YamlSerializable(obj=obj)