Skip to content
This repository was archived by the owner on Feb 23, 2026. It is now read-only.

Commit 8d2e3a3

Browse files
authored
feat: json serialization and deserialization support stringy enums (#112)
For protobuf messages that contain enum fields, it is now possible to specify that enum variants should be serialized as names and not as integers. E.g. json_str = MyMessage.to_json(my_message, enum_strings=True) Similarly, serialization from json that uses this convention is now supported. This is useful for interoperation with other data sources that do use strings to define enum variants in json serialization; and for debugging, where visually inspecting data structures can be helpful, and variant names are more informative than numerical values. Re-addition of #107 with correct support for nested enum definitions this time. Includes new tests and modifications to existing tests that take into account the fact that Enums now share many of the same properties as messages: their definition adds an entry to the file descriptor proto, out-of-order field references to enums require resolution, and out-of-file references require proto-file dependency definition.
1 parent c01800b commit 8d2e3a3

13 files changed

Lines changed: 373 additions & 78 deletions

noxfile.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -30,11 +30,16 @@ def unit(session, proto="python"):
3030
"py.test",
3131
"-W=error",
3232
"--quiet",
33-
"--cov=proto",
34-
"--cov-config=.coveragerc",
35-
"--cov-report=term",
36-
"--cov-report=html",
37-
os.path.join("tests", ""),
33+
*(
34+
session.posargs # Coverage info when running individual tests is annoying.
35+
or [
36+
"--cov=proto",
37+
"--cov-config=.coveragerc",
38+
"--cov-report=term",
39+
"--cov-report=html",
40+
os.path.join("tests", ""),
41+
]
42+
),
3843
)
3944

4045

proto/_file_info.py

Lines changed: 39 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,11 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
import collections.abc
15+
import collections
1616
import inspect
1717
import logging
1818

19+
from google.protobuf import descriptor_pb2
1920
from google.protobuf import descriptor_pool
2021
from google.protobuf import message
2122
from google.protobuf import reflection
@@ -27,11 +28,33 @@
2728

2829
class _FileInfo(
2930
collections.namedtuple(
30-
"_FileInfo", ["descriptor", "messages", "enums", "name", "nested"]
31+
"_FileInfo",
32+
["descriptor", "messages", "enums", "name", "nested", "nested_enum"],
3133
)
3234
):
3335
registry = {} # Mapping[str, '_FileInfo']
3436

37+
@classmethod
38+
def maybe_add_descriptor(cls, filename, package):
39+
descriptor = cls.registry.get(filename)
40+
if not descriptor:
41+
descriptor = cls.registry[filename] = cls(
42+
descriptor=descriptor_pb2.FileDescriptorProto(
43+
name=filename, package=package, syntax="proto3",
44+
),
45+
enums=collections.OrderedDict(),
46+
messages=collections.OrderedDict(),
47+
name=filename,
48+
nested={},
49+
nested_enum={},
50+
)
51+
52+
return descriptor
53+
54+
@staticmethod
55+
def proto_file_name(name):
56+
return "{0}.proto".format(name.replace(".", "/"))
57+
3558
def _get_manifest(self, new_class):
3659
module = inspect.getmodule(new_class)
3760
if hasattr(module, "__protobuf__"):
@@ -107,6 +130,13 @@ def generate_file_pb(self, new_class, fallback_salt=""):
107130
for field in proto_plus_message._meta.fields.values():
108131
if field.message and isinstance(field.message, str):
109132
field.message = self.messages[field.message]
133+
elif field.enum and isinstance(field.enum, str):
134+
field.enum = self.enums[field.enum]
135+
136+
# Same thing for enums
137+
for full_name, proto_plus_enum in self.enums.items():
138+
descriptor = pool.FindEnumTypeByName(full_name)
139+
proto_plus_enum._meta.pb = descriptor
110140

111141
# We no longer need to track this file's info; remove it from
112142
# the module's registry and from this object.
@@ -130,14 +160,16 @@ def ready(self, new_class):
130160
"""
131161
# If there are any nested descriptors that have not been assigned to
132162
# the descriptors that should contain them, then we are not ready.
133-
if len(self.nested):
163+
if len(self.nested) or len(self.nested_enum):
134164
return False
135165

136166
# If there are any unresolved fields (fields with a composite message
137167
# declared as a string), ensure that the corresponding message is
138168
# declared.
139169
for field in self.unresolved_fields:
140-
if field.message not in self.messages:
170+
if (field.message and field.message not in self.messages) or (
171+
field.enum and field.enum not in self.enums
172+
):
141173
return False
142174

143175
# If the module in which this class is defined provides a
@@ -156,5 +188,7 @@ def unresolved_fields(self):
156188
"""Return fields with referencing message types as strings."""
157189
for proto_plus_message in self.messages.values():
158190
for field in proto_plus_message._meta.fields.values():
159-
if field.message and isinstance(field.message, str):
191+
if (field.message and isinstance(field.message, str)) or (
192+
field.enum and isinstance(field.enum, str)
193+
):
160194
yield field

proto/enums.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,9 @@
1414

1515
import enum
1616

17+
from google.protobuf import descriptor_pb2
18+
19+
from proto import _file_info
1720
from proto import _package_info
1821
from proto.marshal.rules.enums import EnumRule
1922

@@ -30,12 +33,58 @@ def __new__(mcls, name, bases, attrs):
3033
# this component belongs within the file.
3134
package, marshal = _package_info.compile(name, attrs)
3235

36+
# Determine the local path of this proto component within the file.
37+
local_path = tuple(attrs.get("__qualname__", name).split("."))
38+
39+
# Sanity check: We get the wrong full name if a class is declared
40+
# inside a function local scope; correct this.
41+
if "<locals>" in local_path:
42+
ix = local_path.index("<locals>")
43+
local_path = local_path[: ix - 1] + local_path[ix + 1 :]
44+
45+
# Determine the full name in protocol buffers.
46+
full_name = ".".join((package,) + local_path).lstrip(".")
47+
filename = _file_info._FileInfo.proto_file_name(
48+
attrs.get("__module__", name.lower())
49+
)
50+
enum_desc = descriptor_pb2.EnumDescriptorProto(
51+
name=name,
52+
# Note: the superclass ctor removes the variants, so get them now.
53+
# Note: proto3 requires that the first variant value be zero.
54+
value=sorted(
55+
(
56+
descriptor_pb2.EnumValueDescriptorProto(name=name, number=number)
57+
# Minor hack to get all the enum variants out.
58+
for name, number in attrs.items()
59+
if isinstance(number, int)
60+
),
61+
key=lambda v: v.number,
62+
),
63+
)
64+
65+
file_info = _file_info._FileInfo.maybe_add_descriptor(filename, package)
66+
if len(local_path) == 1:
67+
file_info.descriptor.enum_type.add().MergeFrom(enum_desc)
68+
else:
69+
file_info.nested_enum[local_path] = enum_desc
70+
3371
# Run the superclass constructor.
3472
cls = super().__new__(mcls, name, bases, attrs)
3573

74+
# We can't just add a "_meta" element to attrs because the Enum
75+
# machinery doesn't know what to do with a non-int value.
76+
# The pb is set later, in generate_file_pb
77+
cls._meta = _EnumInfo(full_name=full_name, pb=None)
78+
79+
file_info.enums[full_name] = cls
80+
3681
# Register the enum with the marshal.
3782
marshal.register(cls, EnumRule(cls))
3883

84+
# Generate the descriptor for the file if it is ready.
85+
if file_info.ready(new_class=cls):
86+
file_info.generate_file_pb(new_class=cls, fallback_salt=full_name)
87+
3988
# Done; return the class.
4089
return cls
4190

@@ -44,3 +93,9 @@ class Enum(enum.IntEnum, metaclass=ProtoEnumMeta):
4493
"""A enum object that also builds a protobuf enum descriptor."""
4594

4695
pass
96+
97+
98+
class _EnumInfo:
99+
def __init__(self, *, full_name: str, pb):
100+
self.full_name = full_name
101+
self.pb = pb

proto/fields.py

Lines changed: 9 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,6 @@ def __init__(
7272
def descriptor(self):
7373
"""Return the descriptor for the field."""
7474
if not self._descriptor:
75-
proto_type = self.proto_type
7675
# Resolve the message type, if any, to a string.
7776
type_name = None
7877
if isinstance(self.message, str):
@@ -85,29 +84,23 @@ def descriptor(self):
8584
type_name = (
8685
self.message.DESCRIPTOR.full_name
8786
if hasattr(self.message, "DESCRIPTOR")
88-
else self.message.meta.full_name
87+
else self.message._meta.full_name
8988
)
89+
elif isinstance(self.enum, str):
90+
if not self.enum.startswith(self.package):
91+
self.enum = "{package}.{name}".format(
92+
package=self.package, name=self.enum,
93+
)
94+
type_name = self.enum
9095
elif self.enum:
91-
# Nos decipiat.
92-
#
93-
# As far as the wire format is concerned, enums are int32s.
94-
# Protocol buffers itself also only sends ints; the enum
95-
# objects are simply helper classes for translating names
96-
# and values and it is the user's job to resolve to an int.
97-
#
98-
# Therefore, the non-trivial effort of adding the actual
99-
# enum descriptors seems to add little or no actual value.
100-
#
101-
# FIXME: Eventually, come back and put in the actual enum
102-
# descriptors.
103-
proto_type = ProtoType.INT32
96+
type_name = self.enum._meta.full_name
10497

10598
# Set the descriptor.
10699
self._descriptor = descriptor_pb2.FieldDescriptorProto(
107100
name=self.name,
108101
number=self.number,
109102
label=3 if self.repeated else 1,
110-
type=proto_type,
103+
type=self.proto_type,
111104
type_name=type_name,
112105
json_name=self.json_name,
113106
proto3_optional=self.optional,

proto/message.py

Lines changed: 24 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,6 @@ def __new__(mcls, name, bases, attrs):
145145
field_msg = field.message
146146
if hasattr(field_msg, "pb") and callable(field_msg.pb):
147147
field_msg = field_msg.pb()
148-
149148
# Sanity check: The field's message may not yet be defined if
150149
# it was a Message defined in the same file, and the file
151150
# descriptor proto has not yet been generated.
@@ -154,7 +153,13 @@ def __new__(mcls, name, bases, attrs):
154153
# correctly when the file descriptor is created later.
155154
if field_msg:
156155
proto_imports.add(field_msg.DESCRIPTOR.file.name)
157-
symbol_database.Default().RegisterMessage(field_msg)
156+
157+
# Same thing, but for enums.
158+
elif field.enum and not isinstance(field.enum, str):
159+
field_enum = field.enum._meta.pb
160+
161+
if field_enum:
162+
proto_imports.add(field_enum.file.name)
158163

159164
# Increment the field index counter.
160165
index += 1
@@ -183,24 +188,13 @@ def __new__(mcls, name, bases, attrs):
183188
# Determine the filename.
184189
# We determine an appropriate proto filename based on the
185190
# Python module.
186-
filename = "{0}.proto".format(
187-
new_attrs.get("__module__", name.lower()).replace(".", "/")
191+
filename = _file_info._FileInfo.proto_file_name(
192+
new_attrs.get("__module__", name.lower())
188193
)
189194

190195
# Get or create the information about the file, including the
191196
# descriptor to which the new message descriptor shall be added.
192-
file_info = _file_info._FileInfo.registry.setdefault(
193-
filename,
194-
_file_info._FileInfo(
195-
descriptor=descriptor_pb2.FileDescriptorProto(
196-
name=filename, package=package, syntax="proto3",
197-
),
198-
enums=collections.OrderedDict(),
199-
messages=collections.OrderedDict(),
200-
name=filename,
201-
nested={},
202-
),
203-
)
197+
file_info = _file_info._FileInfo.maybe_add_descriptor(filename, package)
204198

205199
# Ensure any imports that would be necessary are assigned to the file
206200
# descriptor proto being created.
@@ -227,6 +221,11 @@ def __new__(mcls, name, bases, attrs):
227221
for child_path in child_paths:
228222
desc.nested_type.add().MergeFrom(file_info.nested.pop(child_path))
229223

224+
# Same thing, but for enums
225+
child_paths = [p for p in file_info.nested_enum.keys() if local_path == p[:-1]]
226+
for child_path in child_paths:
227+
desc.enum_type.add().MergeFrom(file_info.nested_enum.pop(child_path))
228+
230229
# Add the descriptor to the file if it is a top-level descriptor,
231230
# or to a "holding area" for nested messages otherwise.
232231
if len(local_path) == 1:
@@ -325,17 +324,24 @@ def deserialize(cls, payload: bytes) -> "Message":
325324
"""
326325
return cls.wrap(cls.pb().FromString(payload))
327326

328-
def to_json(cls, instance) -> str:
327+
def to_json(cls, instance, *, use_integers_for_enums=True) -> str:
329328
"""Given a message instance, serialize it to json
330329
331330
Args:
332331
instance: An instance of this message type, or something
333332
compatible (accepted by the type's constructor).
333+
use_integers_for_enums (Optional(bool)): An option that determines whether enum
334+
values should be represented by strings (False) or integers (True).
335+
Default is True.
334336
335337
Returns:
336338
str: The json string representation of the protocol buffer.
337339
"""
338-
return MessageToJson(cls.pb(instance))
340+
return MessageToJson(
341+
cls.pb(instance),
342+
use_integers_for_enums=use_integers_for_enums,
343+
including_default_value_fields=True,
344+
)
339345

340346
def from_json(cls, payload) -> "Message":
341347
"""Given a json string representing an instance,

tests/clam.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
# Copyright (C) 2020 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import proto
16+
17+
__protobuf__ = proto.module(package="ocean.clam.v1", manifest={"Clam", "Species",},)
18+
19+
20+
class Species(proto.Enum):
21+
UNKNOWN = 0
22+
SQUAMOSA = 1
23+
DURASA = 2
24+
GIGAS = 3
25+
26+
27+
class Clam(proto.Message):
28+
species = proto.Field(proto.ENUM, number=1, enum="Species")
29+
mass_kg = proto.Field(proto.DOUBLE, number=2)

tests/mollusc.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
# Copyright (C) 2020 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import proto
16+
import zone
17+
18+
__protobuf__ = proto.module(package="ocean.mollusc.v1", manifest={"Mollusc",},)
19+
20+
21+
class Mollusc(proto.Message):
22+
zone = proto.Field(zone.Zone, number=1)

0 commit comments

Comments
 (0)