Skip to content
This repository was archived by the owner on Feb 24, 2026. It is now read-only.

Commit 1ce2621

Browse files
authored
fix: ProtoSchemaConver's problem when converting fields reference same… (#428)
* fix: ProtoSchemaConver's problem when converting fields reference same enum types, the generated proto is not buildable. modified: google-cloud-bigquerystorage/src/main/java/com/google/cloud/bigquery/storage/v1alpha2/ProtoSchemaConverter.java modified: google-cloud-bigquerystorage/src/test/java/com/google/cloud/bigquery/storage/v1alpha2/ProtoSchemaConverterTest.java * fix review comments
1 parent e2156fc commit 1ce2621

File tree

3 files changed

+139
-51
lines changed

3 files changed

+139
-51
lines changed

google-cloud-bigquerystorage/src/main/java/com/google/cloud/bigquery/storage/v1alpha2/ProtoSchemaConverter.java

Lines changed: 73 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
import com.google.api.gax.rpc.InvalidArgumentException;
2020
import com.google.cloud.bigquery.storage.v1alpha2.ProtoBufProto.ProtoSchema;
2121
import com.google.protobuf.DescriptorProtos.DescriptorProto;
22+
import com.google.protobuf.DescriptorProtos.EnumDescriptorProto;
2223
import com.google.protobuf.DescriptorProtos.FieldDescriptorProto;
2324
import com.google.protobuf.Descriptors.Descriptor;
2425
import com.google.protobuf.Descriptors.FieldDescriptor;
@@ -29,51 +30,95 @@
2930
// protobuf::DescriptorProto
3031
// that can be reconstructed by the backend.
3132
public class ProtoSchemaConverter {
32-
private static class StructName {
33-
public String getName() {
34-
count++;
35-
return count == 1 ? "__ROOT__" : "__S" + count;
36-
}
37-
38-
private int count = 0;
39-
}
40-
4133
private static ProtoSchema convertInternal(
42-
Descriptor input, List<String> visitedTypes, StructName structName) {
34+
Descriptor input,
35+
Set<String> visitedTypes,
36+
Set<String> enumTypes,
37+
Set<String> structTypes,
38+
DescriptorProto.Builder rootProtoSchema) {
4339
DescriptorProto.Builder resultProto = DescriptorProto.newBuilder();
44-
resultProto.setName(structName.getName());
40+
if (rootProtoSchema == null) {
41+
rootProtoSchema = resultProto;
42+
}
43+
String protoName = input.getFullName();
44+
protoName = protoName.replace('.', '_');
45+
resultProto.setName(protoName);
46+
Set<String> localEnumTypes = new HashSet<String>();
4547
visitedTypes.add(input.getFullName());
4648
for (int i = 0; i < input.getFields().size(); i++) {
4749
FieldDescriptor inputField = input.getFields().get(i);
4850
FieldDescriptorProto.Builder resultField = inputField.toProto().toBuilder();
4951
if (inputField.getType() == FieldDescriptor.Type.GROUP
5052
|| inputField.getType() == FieldDescriptor.Type.MESSAGE) {
51-
if (visitedTypes.contains(inputField.getMessageType().getFullName())) {
52-
throw new InvalidArgumentException(
53-
"Recursive type is not supported:" + inputField.getMessageType().getFullName(),
54-
null,
55-
GrpcStatusCode.of(Status.Code.INVALID_ARGUMENT),
56-
false);
53+
String msgFullName = inputField.getMessageType().getFullName();
54+
msgFullName = msgFullName.replace('.', '_');
55+
if (structTypes.contains(msgFullName)) {
56+
resultField.setTypeName(msgFullName);
57+
} else {
58+
if (visitedTypes.contains(msgFullName)) {
59+
throw new InvalidArgumentException(
60+
"Recursive type is not supported:" + inputField.getMessageType().getFullName(),
61+
null,
62+
GrpcStatusCode.of(Status.Code.INVALID_ARGUMENT),
63+
false);
64+
}
65+
visitedTypes.add(msgFullName);
66+
rootProtoSchema.addNestedType(
67+
convertInternal(
68+
inputField.getMessageType(),
69+
visitedTypes,
70+
enumTypes,
71+
structTypes,
72+
rootProtoSchema)
73+
.getProtoDescriptor());
74+
visitedTypes.remove(msgFullName);
75+
resultField.setTypeName(
76+
rootProtoSchema.getNestedType(rootProtoSchema.getNestedTypeCount() - 1).getName());
5777
}
58-
resultProto.addNestedType(
59-
convertInternal(inputField.getMessageType(), visitedTypes, structName)
60-
.getProtoDescriptor());
61-
visitedTypes.remove(inputField.getMessageType().getFullName());
62-
resultField.setTypeName(
63-
resultProto.getNestedType(resultProto.getNestedTypeCount() - 1).getName());
6478
}
6579
if (inputField.getType() == FieldDescriptor.Type.ENUM) {
66-
resultProto.addEnumType(inputField.getEnumType().toProto());
67-
resultField.setTypeName(
68-
resultProto.getEnumType(resultProto.getEnumTypeCount() - 1).getName());
80+
String enumFullName = inputField.getEnumType().getFullName();
81+
// If the enum is defined within the current message, we don't want to
82+
// pull it out to the top since then the same enum values will not be
83+
// allowed if the value collides with other enums.
84+
if (enumFullName.startsWith(input.getFullName())) {
85+
String enumName = inputField.getEnumType().getName();
86+
if (localEnumTypes.contains(enumName)) {
87+
resultField.setTypeName(enumName);
88+
} else {
89+
resultProto.addEnumType(inputField.getEnumType().toProto());
90+
resultField.setTypeName(enumName);
91+
localEnumTypes.add(enumName);
92+
}
93+
} else {
94+
// If the enum is defined elsewhere, then redefine it at the top
95+
// message scope. There is a problem that different enum values might
96+
// be OK when living under its original scope, but when they all live
97+
// in top scope, their values cannot collide. Say if thers A.Color with
98+
// RED and B.Color with RED, if they are redefined here, the RED will
99+
// collide with each other and thus not allowed.
100+
enumFullName = enumFullName.replace('.', '_');
101+
if (enumTypes.contains(enumFullName)) {
102+
resultField.setTypeName(enumFullName);
103+
} else {
104+
EnumDescriptorProto enumType =
105+
inputField.getEnumType().toProto().toBuilder().setName(enumFullName).build();
106+
resultProto.addEnumType(enumType);
107+
resultField.setTypeName(enumFullName);
108+
enumTypes.add(enumFullName);
109+
}
110+
}
69111
}
70112
resultProto.addField(resultField);
71113
}
114+
structTypes.add(protoName);
72115
return ProtoSchema.newBuilder().setProtoDescriptor(resultProto.build()).build();
73116
}
74117

75118
public static ProtoSchema convert(Descriptor descriptor) {
76-
ArrayList<String> visitedTypes = new ArrayList<String>();
77-
return convertInternal(descriptor, visitedTypes, new StructName());
119+
Set<String> visitedTypes = new HashSet<String>();
120+
Set<String> enumTypes = new HashSet<String>();
121+
Set<String> structTypes = new HashSet<String>();
122+
return convertInternal(descriptor, visitedTypes, enumTypes, structTypes, null);
78123
}
79124
}

google-cloud-bigquerystorage/src/test/java/com/google/cloud/bigquery/storage/v1alpha2/ProtoSchemaConverterTest.java

Lines changed: 55 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717

1818
import com.google.api.gax.rpc.InvalidArgumentException;
1919
import com.google.cloud.bigquery.storage.test.Test.*;
20+
import com.google.protobuf.DescriptorProtos.FileDescriptorProto;
21+
import com.google.protobuf.Descriptors;
2022
import org.junit.*;
2123

2224
public class ProtoSchemaConverterTest {
@@ -26,7 +28,7 @@ public void convertSimple() {
2628
ProtoBufProto.ProtoSchema protoSchema =
2729
ProtoSchemaConverter.convert(testProto.getDescriptorForType());
2830
Assert.assertEquals(
29-
"name: \"__ROOT__\"\n"
31+
"name: \"com_google_cloud_bigquery_storage_test_AllSupportedTypes\"\n"
3032
+ "field {\n"
3133
+ " name: \"int32_value\"\n"
3234
+ " number: 1\n"
@@ -74,7 +76,7 @@ public void convertSimple() {
7476
+ " number: 8\n"
7577
+ " label: LABEL_OPTIONAL\n"
7678
+ " type: TYPE_ENUM\n"
77-
+ " type_name: \"TestEnum\"\n"
79+
+ " type_name: \"com_google_cloud_bigquery_storage_test_TestEnum\"\n"
7880
+ "}\n"
7981
+ "field {\n"
8082
+ " name: \"string_value\"\n"
@@ -83,7 +85,7 @@ public void convertSimple() {
8385
+ " type: TYPE_STRING\n"
8486
+ "}\n"
8587
+ "enum_type {\n"
86-
+ " name: \"TestEnum\"\n"
88+
+ " name: \"com_google_cloud_bigquery_storage_test_TestEnum\"\n"
8789
+ " value {\n"
8890
+ " name: \"TestEnum0\"\n"
8991
+ " number: 0\n"
@@ -102,47 +104,38 @@ public void convertNested() {
102104
ProtoBufProto.ProtoSchema protoSchema =
103105
ProtoSchemaConverter.convert(testProto.getDescriptorForType());
104106
Assert.assertEquals(
105-
"name: \"__ROOT__\"\n"
107+
"name: \"com_google_cloud_bigquery_storage_test_ComplicateType\"\n"
106108
+ "field {\n"
107109
+ " name: \"nested_repeated_type\"\n"
108110
+ " number: 1\n"
109111
+ " label: LABEL_REPEATED\n"
110112
+ " type: TYPE_MESSAGE\n"
111-
+ " type_name: \"__S2\"\n"
113+
+ " type_name: \"com_google_cloud_bigquery_storage_test_NestedType\"\n"
112114
+ "}\n"
113115
+ "field {\n"
114116
+ " name: \"inner_type\"\n"
115117
+ " number: 2\n"
116118
+ " label: LABEL_OPTIONAL\n"
117119
+ " type: TYPE_MESSAGE\n"
118-
+ " type_name: \"__S4\"\n"
120+
+ " type_name: \"com_google_cloud_bigquery_storage_test_InnerType\"\n"
119121
+ "}\n"
120122
+ "nested_type {\n"
121-
+ " name: \"__S2\"\n"
123+
+ " name: \"com_google_cloud_bigquery_storage_test_InnerType\"\n"
122124
+ " field {\n"
123-
+ " name: \"inner_type\"\n"
125+
+ " name: \"value\"\n"
124126
+ " number: 1\n"
125127
+ " label: LABEL_REPEATED\n"
126-
+ " type: TYPE_MESSAGE\n"
127-
+ " type_name: \"__S3\"\n"
128-
+ " }\n"
129-
+ " nested_type {\n"
130-
+ " name: \"__S3\"\n"
131-
+ " field {\n"
132-
+ " name: \"value\"\n"
133-
+ " number: 1\n"
134-
+ " label: LABEL_REPEATED\n"
135-
+ " type: TYPE_STRING\n"
136-
+ " }\n"
128+
+ " type: TYPE_STRING\n"
137129
+ " }\n"
138130
+ "}\n"
139131
+ "nested_type {\n"
140-
+ " name: \"__S4\"\n"
132+
+ " name: \"com_google_cloud_bigquery_storage_test_NestedType\"\n"
141133
+ " field {\n"
142-
+ " name: \"value\"\n"
134+
+ " name: \"inner_type\"\n"
143135
+ " number: 1\n"
144136
+ " label: LABEL_REPEATED\n"
145-
+ " type: TYPE_STRING\n"
137+
+ " type: TYPE_MESSAGE\n"
138+
+ " type_name: \"com_google_cloud_bigquery_storage_test_InnerType\"\n"
146139
+ " }\n"
147140
+ "}\n",
148141
protoSchema.getProtoDescriptor().toString());
@@ -156,7 +149,46 @@ public void convertRecursive() {
156149
ProtoSchemaConverter.convert(testProto.getDescriptorForType());
157150
Assert.fail("No exception raised");
158151
} catch (InvalidArgumentException e) {
159-
// Expected exception
152+
Assert.assertEquals(
153+
"Recursive type is not supported:com.google.cloud.bigquery.storage.test.ContainsRecursive",
154+
e.getMessage());
155+
}
156+
}
157+
158+
@Test
159+
public void convertRecursiveTopMessage() {
160+
try {
161+
RecursiveTypeTopMessage testProto = RecursiveTypeTopMessage.newBuilder().build();
162+
ProtoBufProto.ProtoSchema protoSchema =
163+
ProtoSchemaConverter.convert(testProto.getDescriptorForType());
164+
Assert.fail("No exception raised");
165+
} catch (InvalidArgumentException e) {
166+
Assert.assertEquals(
167+
"Recursive type is not supported:com.google.cloud.bigquery.storage.test.RecursiveTypeTopMessage",
168+
e.getMessage());
169+
}
170+
}
171+
172+
@Test
173+
public void convertDuplicateType() {
174+
DuplicateType testProto = DuplicateType.newBuilder().build();
175+
ProtoBufProto.ProtoSchema protoSchema =
176+
ProtoSchemaConverter.convert(testProto.getDescriptorForType());
177+
178+
FileDescriptorProto fileDescriptorProto =
179+
FileDescriptorProto.newBuilder()
180+
.setName("foo.proto")
181+
.addMessageType(protoSchema.getProtoDescriptor())
182+
.build();
183+
try {
184+
Descriptors.FileDescriptor fs =
185+
Descriptors.FileDescriptor.buildFrom(
186+
fileDescriptorProto, new Descriptors.FileDescriptor[0]);
187+
Descriptors.Descriptor type =
188+
fs.findMessageTypeByName(protoSchema.getProtoDescriptor().getName());
189+
Assert.assertEquals(4, type.getFields().size());
190+
} catch (Descriptors.DescriptorValidationException ex) {
191+
Assert.fail("Got unexpected exception: " + ex.getMessage());
160192
}
161193
}
162194
}

google-cloud-bigquerystorage/src/test/proto/test.proto

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,17 @@ message RecursiveType {
5555
optional ContainsRecursive field = 2;
5656
}
5757

58+
message RecursiveTypeTopMessage {
59+
optional RecursiveTypeTopMessage field = 2;
60+
}
61+
5862
message FooType {
5963
optional string foo = 1;
6064
}
65+
66+
message DuplicateType {
67+
optional TestEnum f1 = 1;
68+
optional TestEnum f2 = 2;
69+
optional ComplicateType f3 = 3;
70+
optional ComplicateType f4 = 4;
71+
}

0 commit comments

Comments
 (0)