Skip to content
This repository was archived by the owner on Feb 23, 2026. It is now read-only.

Commit a082f85

Browse files
authored
feat: allow enum strings in json serialization and deserialization (#107)
* feat: allow enum strings in json serialization and deserialization For protobuf messages that contain enum fields, it is now possible to specify that enum variants should be serialized as names and not as integers. E.g. json_str = MyMessage.to_json(my_message, enum_strings=True) Similarly, serialization from json that uses this convention is now supported. This is useful for interoperation with other data sources that do use strings to define enum variants in json serialization; and for debugging, where visually inspecting data structures can be helpful, and variant names are more informative than numerical values. Note: includes reformatting of many source files due to an update to Black
1 parent 310dc18 commit a082f85

24 files changed

Lines changed: 332 additions & 79 deletions

.circleci/config.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -249,7 +249,7 @@ jobs:
249249
- run:
250250
name: Format files
251251
command: |
252-
black .
252+
black -l 88 .
253253
- run:
254254
name: Check diff
255255
command: |

docs/messages.rst

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,3 +165,12 @@ via the :meth:`~.Message.to_json` and :meth:`~.Message.from_json` methods.
165165
166166
new_song = Song.from_json(json)
167167
168+
The behavior of JSON serialization can be customized to use strings to
169+
represent enum values.
170+
171+
.. code-block:: python
172+
173+
song = Song(genre=Genre.JAZZ)
174+
json = Song.to_json(song, use_integers_for_enums=False)
175+
176+
assert "JAZZ" in json

proto/_file_info.py

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,11 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
import collections.abc
15+
import collections
1616
import inspect
1717
import logging
1818

19+
from google.protobuf import descriptor_pb2
1920
from google.protobuf import descriptor_pool
2021
from google.protobuf import message
2122
from google.protobuf import reflection
@@ -32,6 +33,28 @@ class _FileInfo(
3233
):
3334
registry = {} # Mapping[str, '_FileInfo']
3435

36+
@classmethod
37+
def maybe_add_descriptor(cls, filename, package):
38+
descriptor = cls.registry.get(filename)
39+
if not descriptor:
40+
descriptor = cls.registry[filename] = cls(
41+
descriptor=descriptor_pb2.FileDescriptorProto(
42+
name=filename,
43+
package=package,
44+
syntax="proto3",
45+
),
46+
enums=collections.OrderedDict(),
47+
messages=collections.OrderedDict(),
48+
name=filename,
49+
nested={},
50+
)
51+
52+
return descriptor
53+
54+
@staticmethod
55+
def proto_file_name(name):
56+
return "{0}.proto".format(name).replace(".", "/")
57+
3558
def _get_manifest(self, new_class):
3659
module = inspect.getmodule(new_class)
3760
if hasattr(module, "__protobuf__"):

proto/enums.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,9 @@
1414

1515
import enum
1616

17+
from google.protobuf import descriptor_pb2
18+
19+
from proto import _file_info
1720
from proto import _package_info
1821
from proto.marshal.rules.enums import EnumRule
1922

@@ -30,9 +33,49 @@ def __new__(mcls, name, bases, attrs):
3033
# this component belongs within the file.
3134
package, marshal = _package_info.compile(name, attrs)
3235

36+
# Determine the local path of this proto component within the file.
37+
local_path = tuple(attrs.get("__qualname__", name).split("."))
38+
39+
# Sanity check: We get the wrong full name if a class is declared
40+
# inside a function local scope; correct this.
41+
if "<locals>" in local_path:
42+
ix = local_path.index("<locals>")
43+
local_path = local_path[: ix - 1] + local_path[ix + 1 :]
44+
45+
# Determine the full name in protocol buffers.
46+
# The C++ proto implementation doesn't like dots in names, so use underscores.
47+
full_name = "_".join((package,) + local_path).lstrip("_")
48+
enum_desc = descriptor_pb2.EnumDescriptorProto(
49+
name=full_name,
50+
# Note: the superclass ctor removes the variants, so get them now.
51+
# Note: proto3 requires that the first variant value be zero.
52+
value=sorted(
53+
(
54+
descriptor_pb2.EnumValueDescriptorProto(name=name, number=number)
55+
# Minor hack to get all the enum variants out.
56+
for name, number in attrs.items()
57+
if isinstance(number, int)
58+
),
59+
key=lambda v: v.number,
60+
),
61+
)
62+
63+
filename = _file_info._FileInfo.proto_file_name(
64+
attrs.get("__module__", name.lower())
65+
)
66+
67+
file_info = _file_info._FileInfo.maybe_add_descriptor(filename, package)
68+
file_info.descriptor.enum_type.add().MergeFrom(enum_desc)
69+
3370
# Run the superclass constructor.
3471
cls = super().__new__(mcls, name, bases, attrs)
3572

73+
# We can't just add a "_meta" element to attrs because the Enum
74+
# machinery doesn't know what to do with a non-int value.
75+
cls._meta = _EnumInfo(full_name=full_name, pb=enum_desc)
76+
77+
file_info.enums[full_name] = cls
78+
3679
# Register the enum with the marshal.
3780
marshal.register(cls, EnumRule(cls))
3881

@@ -44,3 +87,9 @@ class Enum(enum.IntEnum, metaclass=ProtoEnumMeta):
4487
"""A enum object that also builds a protobuf enum descriptor."""
4588

4689
pass
90+
91+
92+
class _EnumInfo:
93+
def __init__(self, *, full_name: str, pb):
94+
self.full_name = full_name
95+
self.pb = pb

proto/fields.py

Lines changed: 3 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,8 @@ def descriptor(self):
7878
if isinstance(self.message, str):
7979
if not self.message.startswith(self.package):
8080
self.message = "{package}.{name}".format(
81-
package=self.package, name=self.message,
81+
package=self.package,
82+
name=self.message,
8283
)
8384
type_name = self.message
8485
elif self.message:
@@ -88,19 +89,7 @@ def descriptor(self):
8889
else self.message.meta.full_name
8990
)
9091
elif self.enum:
91-
# Nos decipiat.
92-
#
93-
# As far as the wire format is concerned, enums are int32s.
94-
# Protocol buffers itself also only sends ints; the enum
95-
# objects are simply helper classes for translating names
96-
# and values and it is the user's job to resolve to an int.
97-
#
98-
# Therefore, the non-trivial effort of adding the actual
99-
# enum descriptors seems to add little or no actual value.
100-
#
101-
# FIXME: Eventually, come back and put in the actual enum
102-
# descriptors.
103-
proto_type = ProtoType.INT32
92+
type_name = self.enum._meta.full_name
10493

10594
# Set the descriptor.
10695
self._descriptor = descriptor_pb2.FieldDescriptorProto(

proto/marshal/marshal.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -209,7 +209,8 @@ def to_proto(self, proto_type, value, *, strict: bool = False):
209209
raise TypeError(
210210
"Parameter must be instance of the same class; "
211211
"expected {expected}, got {got}".format(
212-
expected=proto_type.__name__, got=pb_value.__class__.__name__,
212+
expected=proto_type.__name__,
213+
got=pb_value.__class__.__name__,
213214
),
214215
)
215216

proto/marshal/rules/dates.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,8 @@ def to_proto(self, value) -> timestamp_pb2.Timestamp:
4444
return value.timestamp_pb()
4545
if isinstance(value, datetime):
4646
return timestamp_pb2.Timestamp(
47-
seconds=int(value.timestamp()), nanos=value.microsecond * 1000,
47+
seconds=int(value.timestamp()),
48+
nanos=value.microsecond * 1000,
4849
)
4950
return value
5051

proto/marshal/rules/enums.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,8 @@ def to_python(self, value, *, absent: bool = None):
3636
# the user realizes that an unexpected value came along.
3737
warnings.warn(
3838
"Unrecognized {name} enum value: {value}".format(
39-
name=self._enum.__name__, value=value,
39+
name=self._enum.__name__,
40+
value=value,
4041
)
4142
)
4243
return value

proto/marshal/rules/struct.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,11 +43,15 @@ def to_python(self, value, *, absent: bool = None):
4343
return str(value.string_value)
4444
if kind == "struct_value":
4545
return self._marshal.to_python(
46-
struct_pb2.Struct, value.struct_value, absent=False,
46+
struct_pb2.Struct,
47+
value.struct_value,
48+
absent=False,
4749
)
4850
if kind == "list_value":
4951
return self._marshal.to_python(
50-
struct_pb2.ListValue, value.list_value, absent=False,
52+
struct_pb2.ListValue,
53+
value.list_value,
54+
absent=False,
5155
)
5256
raise AttributeError
5357

@@ -114,7 +118,9 @@ def to_proto(self, value) -> struct_pb2.Struct:
114118
if isinstance(value, struct_pb2.Struct):
115119
return value
116120
if isinstance(value, maps.MapComposite):
117-
return struct_pb2.Struct(fields={k: v for k, v in value.pb.items()},)
121+
return struct_pb2.Struct(
122+
fields={k: v for k, v in value.pb.items()},
123+
)
118124

119125
# We got a dict (or something dict-like); convert it.
120126
answer = struct_pb2.Struct(

proto/message.py

Lines changed: 35 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,9 @@ def __new__(mcls, name, bases, attrs):
6868
# Determine the name of the entry message.
6969
msg_name = "{pascal_key}Entry".format(
7070
pascal_key=re.sub(
71-
r"_\w", lambda m: m.group()[1:].upper(), key,
71+
r"_\w",
72+
lambda m: m.group()[1:].upper(),
73+
key,
7274
).replace(key[0], key[0].upper(), 1),
7375
)
7476

@@ -84,20 +86,26 @@ def __new__(mcls, name, bases, attrs):
8486
{
8587
"__module__": attrs.get("__module__", None),
8688
"__qualname__": "{prefix}.{name}".format(
87-
prefix=attrs.get("__qualname__", name), name=msg_name,
89+
prefix=attrs.get("__qualname__", name),
90+
name=msg_name,
8891
),
8992
"_pb_options": {"map_entry": True},
9093
}
9194
)
9295
entry_attrs["key"] = Field(field.map_key_type, number=1)
9396
entry_attrs["value"] = Field(
94-
field.proto_type, number=2, enum=field.enum, message=field.message,
97+
field.proto_type,
98+
number=2,
99+
enum=field.enum,
100+
message=field.message,
95101
)
96102
map_fields[msg_name] = MessageMeta(msg_name, (Message,), entry_attrs)
97103

98104
# Create the repeated field for the entry message.
99105
map_fields[key] = RepeatedField(
100-
ProtoType.MESSAGE, number=field.number, message=map_fields[msg_name],
106+
ProtoType.MESSAGE,
107+
number=field.number,
108+
message=map_fields[msg_name],
101109
)
102110

103111
# Add the new entries to the attrs
@@ -183,24 +191,13 @@ def __new__(mcls, name, bases, attrs):
183191
# Determine the filename.
184192
# We determine an appropriate proto filename based on the
185193
# Python module.
186-
filename = "{0}.proto".format(
187-
new_attrs.get("__module__", name.lower()).replace(".", "/")
194+
filename = _file_info._FileInfo.proto_file_name(
195+
new_attrs.get("__module__", name.lower())
188196
)
189197

190198
# Get or create the information about the file, including the
191199
# descriptor to which the new message descriptor shall be added.
192-
file_info = _file_info._FileInfo.registry.setdefault(
193-
filename,
194-
_file_info._FileInfo(
195-
descriptor=descriptor_pb2.FileDescriptorProto(
196-
name=filename, package=package, syntax="proto3",
197-
),
198-
enums=collections.OrderedDict(),
199-
messages=collections.OrderedDict(),
200-
name=filename,
201-
nested={},
202-
),
203-
)
200+
file_info = _file_info._FileInfo.maybe_add_descriptor(filename, package)
204201

205202
# Ensure any imports that would be necessary are assigned to the file
206203
# descriptor proto being created.
@@ -286,7 +283,13 @@ def pb(cls, obj=None, *, coerce: bool = False):
286283
if coerce:
287284
obj = cls(obj)
288285
else:
289-
raise TypeError("%r is not an instance of %s" % (obj, cls.__name__,))
286+
raise TypeError(
287+
"%r is not an instance of %s"
288+
% (
289+
obj,
290+
cls.__name__,
291+
)
292+
)
290293
return obj._pb
291294

292295
def wrap(cls, pb):
@@ -325,17 +328,24 @@ def deserialize(cls, payload: bytes) -> "Message":
325328
"""
326329
return cls.wrap(cls.pb().FromString(payload))
327330

328-
def to_json(cls, instance) -> str:
331+
def to_json(cls, instance, *, use_integers_for_enums=True) -> str:
329332
"""Given a message instance, serialize it to json
330333
331334
Args:
332335
instance: An instance of this message type, or something
333336
compatible (accepted by the type's constructor).
337+
use_integers_for_enums (Optional(bool)): An option that determines whether enum
338+
values should be represented by strings (False) or integers (True).
339+
Default is True.
334340
335341
Returns:
336342
str: The json string representation of the protocol buffer.
337343
"""
338-
return MessageToJson(cls.pb(instance))
344+
return MessageToJson(
345+
cls.pb(instance),
346+
use_integers_for_enums=use_integers_for_enums,
347+
including_default_value_fields=True,
348+
)
339349

340350
def from_json(cls, payload) -> "Message":
341351
"""Given a json string representing an instance,
@@ -399,7 +409,10 @@ def __init__(self, mapping=None, **kwargs):
399409
# Sanity check: Did we get something not a map? Error if so.
400410
raise TypeError(
401411
"Invalid constructor input for %s: %r"
402-
% (self.__class__.__name__, mapping,)
412+
% (
413+
self.__class__.__name__,
414+
mapping,
415+
)
403416
)
404417
else:
405418
# Can't have side effects on mapping.

0 commit comments

Comments
 (0)