Skip to content

Commit 27c517a

Browse files
authored
fix: Resolve nested JSON schemas in documentation generation (#69)
* Traverse `anyOf` & `oneOf` types when generating docs from JSON schema * Properly handle new roots (when `$id` is defined)
1 parent 4b31999 commit 27c517a

File tree

7 files changed

+414
-73
lines changed

7 files changed

+414
-73
lines changed
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
# Table of contents
2+
3+
* [`FileSpec`](#FileSpec)
4+
* [`CSVSpec`](#CSVSpec)
5+
* [`JSONSpec`](#JSONSpec)
6+
* [`ParquetSpec`](#ParquetSpec)
7+
8+
## <a name="FileSpec"></a>FileSpec
9+
10+
* `format` (`string`) (required) (possible values: `csv`, `json`, `parquet`)
11+
12+
Output format.
13+
14+
* `format_spec` ([`CSVSpec`](#CSVSpec), [`JSONSpec`](#JSONSpec) or [`ParquetSpec`](#ParquetSpec)) (nullable)
15+
16+
* `compression` (`string`) (possible values: ` `, `gzip`)
17+
18+
Compression type.
19+
Empty or missing stands for no compression.
20+
21+
### <a name="CSVSpec"></a>CSVSpec
22+
23+
CloudQuery CSV file output spec.
24+
25+
* `skip_header` (`boolean`) (default: `false`)
26+
27+
Specifies if the first line of a file should be the header.
28+
29+
* `delimiter` (`string`) ([pattern](https://json-schema.org/draft/2020-12/json-schema-validation#section-6.3.3): `^.$`) (default: `,`)
30+
31+
Character that will be used as the delimiter.
32+
33+
### <a name="JSONSpec"></a>JSONSpec
34+
35+
CloudQuery JSON file output spec.
36+
37+
(`object`)
38+
39+
### <a name="ParquetSpec"></a>ParquetSpec
40+
41+
CloudQuery Parquet file output spec.
42+
43+
(`object`)

jsonschema/docs/docs.go

Lines changed: 107 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@ import (
44
"encoding/json"
55
"fmt"
66
"math/big"
7-
"regexp"
87
"slices"
98
"strconv"
109
"strings"
@@ -19,19 +18,18 @@ func Generate(schema []byte, headerLevel int) (string, error) {
1918
}
2019

2120
buff := new(strings.Builder)
22-
toc, err := generate(root.Definitions, unwrapRef(root.Ref), headerLevel, buff)
21+
toc, err := generate(&root, headerLevel, buff)
2322
return toc + "\n\n" + buff.String(), err
2423
}
2524

26-
type reference struct {
27-
key string
28-
level int
29-
}
30-
31-
func generate(definitions jsonschema.Definitions, ref string, level int, buff *strings.Builder) (toc string, err error) {
32-
processed := make(map[string]struct{}, len(definitions))
33-
references := make([]reference, 1, len(definitions))
34-
references[0] = reference{key: ref, level: level + 1} // +1 as toc is on the level
25+
func generate(root *jsonschema.Schema, level int, buff *strings.Builder) (toc string, err error) {
26+
processed := make(map[refKey]struct{}, len(root.Definitions))
27+
references := make([]reference, 1, len(root.Definitions))
28+
references[0] = reference{
29+
key: refKey{id: root.ID, key: root.Ref},
30+
level: level + 1,
31+
definitions: root.Definitions,
32+
} // +1 as toc is on the level
3533

3634
toc = strings.Repeat("#", level) + " Table of contents\n"
3735
var curr reference
@@ -46,20 +44,20 @@ func generate(definitions jsonschema.Definitions, ref string, level int, buff *s
4644
}
4745
processed[curr.key] = struct{}{}
4846

49-
currSchema, ok := definitions[curr.key]
50-
if !ok {
51-
return toc, fmt.Errorf("missing definition for key %q, possibly incomplete schema", curr.key)
47+
currSchema, err := curr.schema()
48+
if err != nil {
49+
return toc, err
5250
}
5351

5452
// we prepend references to make the docs more localized
5553
references = append(writeDefinition(curr, currSchema, buff), references...)
56-
toc += "\n" + strings.Repeat(" ", curr.level-level-1) + "* " + linkTo(curr.key)
54+
toc += "\n" + strings.Repeat(" ", curr.level-level-1) + "* " + curr.key.link()
5755
}
5856
return toc, nil
5957
}
6058

6159
func writeDefinition(ref reference, sc *jsonschema.Schema, buff *strings.Builder) []reference {
62-
buff.WriteString(header(ref))
60+
buff.WriteString(ref.header())
6361
buff.WriteString("\n")
6462

6563
if len(sc.Title) > 0 {
@@ -72,34 +70,23 @@ func writeDefinition(ref reference, sc *jsonschema.Schema, buff *strings.Builder
7270

7371
if sc.Properties.Len() == 0 {
7472
buff.WriteString("\n")
75-
newRef := writeInlineDefinition(sc, slices.Contains(sc.Required, ref.key), buff)
76-
if len(newRef) > 0 {
77-
return []reference{{key: newRef, level: ref.level + 1}}
78-
}
79-
return nil
73+
return ref.newReferences(sc, writeInlineDefinition(sc, false, buff))
8074
}
8175

8276
refs := make([]reference, 0, sc.Properties.Len()) // prealloc to some meaningful len
8377
for prop := sc.Properties.Oldest(); prop != nil; prop = prop.Next() {
8478
buff.WriteString("\n")
85-
newRef := docProperty(prop.Key, prop.Value, slices.Contains(sc.Required, prop.Key), buff)
86-
if len(newRef) > 0 {
87-
refs = append(refs, reference{key: newRef, level: ref.level + 1})
88-
}
79+
refs = append(refs, ref.newReferences(sc, docProperty(prop.Key, prop.Value, slices.Contains(sc.Required, prop.Key), buff))...)
8980
}
9081

9182
return refs
9283
}
9384

94-
func writeInlineDefinition(sc *jsonschema.Schema, required bool, buff *strings.Builder) (ref string) {
85+
func writeInlineDefinition(sc *jsonschema.Schema, required bool, buff *strings.Builder) (refs []refKey) {
9586
return writeProperty(sc, required, buff)
9687
}
9788

98-
func header(ref reference) string {
99-
return strings.Repeat("#", min(ref.level, 6)) + ` <a name="` + anchorValue(ref.key) + `"></a>` + trimClashingSuffix(ref.key)
100-
}
101-
102-
func docProperty(key string, property *jsonschema.Schema, required bool, buff *strings.Builder) (ref string) {
89+
func docProperty(key string, property *jsonschema.Schema, required bool, buff *strings.Builder) (refs []refKey) {
10390
buff.WriteString("* `" + key + "`")
10491
sc, _ := unwrapNullable(property)
10592

@@ -116,9 +103,9 @@ func docProperty(key string, property *jsonschema.Schema, required bool, buff *s
116103
}
117104

118105
// writeProperty starts off with the type definition without any line breaks & prefixes
119-
func writeProperty(property *jsonschema.Schema, required bool, buff *strings.Builder) (ref string) {
106+
func writeProperty(property *jsonschema.Schema, required bool, buff *strings.Builder) (refs []refKey) {
120107
sc, nullable := unwrapNullable(property)
121-
propType, ref := propertyType(sc)
108+
propType, refs := propertyType(sc)
122109
buff.WriteString(propType)
123110
if nullable {
124111
buff.WriteString(" (nullable)")
@@ -133,7 +120,7 @@ func writeProperty(property *jsonschema.Schema, required bool, buff *strings.Bui
133120

134121
writeDescription(sc, buff)
135122

136-
return ref
123+
return refs
137124
}
138125

139126
func writeDescription(sc *jsonschema.Schema, buff *strings.Builder) {
@@ -144,6 +131,8 @@ func writeDescription(sc *jsonschema.Schema, buff *strings.Builder) {
144131
buff.WriteString("\n ")
145132
buff.WriteString(strings.ReplaceAll(sc.Description, "\n", "\n "))
146133
buff.WriteString("\n")
134+
135+
sc.Description = "" // already used
147136
}
148137

149138
func writeValueAnnotations(sc *jsonschema.Schema, buff *strings.Builder) {
@@ -171,7 +160,7 @@ func writeValueAnnotations(sc *jsonschema.Schema, buff *strings.Builder) {
171160
if i > 0 {
172161
buff.WriteString(", ")
173162
}
174-
_, _ = fmt.Fprintf(buff, "`%v`", e)
163+
_, _ = fmt.Fprintf(buff, "`%v`", anyValue(e))
175164
}
176165
buff.WriteString(")")
177166
}
@@ -183,6 +172,11 @@ func writeValueAnnotations(sc *jsonschema.Schema, buff *strings.Builder) {
183172

184173
func anyValue(a any) string {
185174
switch a := a.(type) {
175+
case string:
176+
if len(a) == 0 {
177+
// Markdown needs at least 1 space to represent empty string
178+
return " "
179+
}
186180
case float32:
187181
if float32(int64(a)) == a {
188182
return fmt.Sprintf("%d", int64(a))
@@ -265,43 +259,53 @@ func unwrapNullable(sc *jsonschema.Schema) (*jsonschema.Schema, bool) {
265259
return sc, false
266260
}
267261

268-
func propertyType(sc *jsonschema.Schema) (_type string, ref string) {
269-
_type, ref = propertyTypeNoSuffix(sc)
270-
_type = "`" + _type + "`" // backticks for type name
271-
if len(ref) > 0 {
272-
_type = `[` + _type + `](#` + anchorValue(ref) + `)` // link
262+
func propertyType(sc *jsonschema.Schema) (_type string, refs []refKey) {
263+
types := propertyTypeNoSuffix(sc)
264+
265+
if len(types) == 1 {
266+
t := types[0]
267+
return "(" + t.printable() + ")", t.refs()
273268
}
274-
_type = `(` + _type + `)` // wrap in brackets
275-
return _type, ref
269+
270+
parts := make([]string, len(types)) // >1 part ~ oneOf/anyOf
271+
for i, t := range types {
272+
parts[i] = t.printable()
273+
refs = append(refs, t.refs()...)
274+
}
275+
276+
return "(" + strings.Join(parts[:len(parts)-1], ", ") + " or " + parts[len(parts)-1] + ")", refs
276277
}
277278

278-
func propertyTypeNoSuffix(sc *jsonschema.Schema) (_type string, ref string) {
279+
func propertyTypeNoSuffix(sc *jsonschema.Schema) []typeReference {
279280
sc, _ = unwrapNullable(sc)
280281

281282
if isAnything(sc) {
282-
return "anything", ""
283+
return []typeReference{{name: "anything"}}
283284
}
284285

285-
if ref = unwrapRef(sc.Ref); len(ref) > 0 {
286-
return trimClashingSuffix(ref), ref
286+
if len(sc.Ref) > 0 {
287+
ref := refKey{key: sc.Ref}
288+
return []typeReference{{name: ref.name(), ref: &ref}}
287289
}
288290

289-
if _type, ref, ok := mapType(sc); ok {
290-
return _type, ref
291+
if _types, ok := mapType(sc); ok {
292+
return _types
291293
}
292294

293-
if sc.Type != "array" {
294-
return sc.Type, ""
295+
if _types, ok := arrayType(sc); ok {
296+
return _types
295297
}
296298

297-
// arrays are a bit tricky
298-
item, nullable := unwrapNullable(sc.Items)
299-
pfx := "[]"
300-
if nullable {
301-
pfx += "*"
299+
if _types, ok := anyOfType(sc); ok {
300+
return _types
301+
}
302+
303+
if _types, ok := oneOfType(sc); ok {
304+
return _types
302305
}
303-
_type, ref = propertyTypeNoSuffix(item)
304-
return pfx + _type, ref
306+
307+
// default case
308+
return []typeReference{{name: sc.Type}}
305309
}
306310

307311
func isAnything(sc *jsonschema.Schema) bool {
@@ -312,32 +316,62 @@ func isAnything(sc *jsonschema.Schema) bool {
312316
return string(data) == "true"
313317
}
314318

315-
func mapType(sc *jsonschema.Schema) (_type string, ref string, ok bool) {
316-
if sc.Type != "object" || sc.AdditionalProperties == nil {
317-
return "", "", false
319+
func isNothing(sc *jsonschema.Schema) bool {
320+
data, err := json.Marshal(sc)
321+
if err != nil {
322+
panic(err)
323+
}
324+
return string(data) == "false"
325+
}
326+
327+
func mapType(sc *jsonschema.Schema) (refs []typeReference, ok bool) {
328+
if sc.Type != "object" || sc.AdditionalProperties == nil || isNothing(sc.AdditionalProperties) {
329+
return nil, false
318330
}
319331
pfx := `map[string]`
320-
_type, ref = propertyTypeNoSuffix(sc.AdditionalProperties)
321-
return pfx + _type, ref, true
332+
refs = propertyTypeNoSuffix(sc.AdditionalProperties)
333+
for i := range refs {
334+
refs[i].name = pfx + refs[i].name
335+
}
336+
return refs, true
322337
}
323338

324-
func unwrapRef(ref string) string {
325-
return strings.TrimPrefix(ref, "#/$defs/")
339+
func arrayType(sc *jsonschema.Schema) (refs []typeReference, ok bool) {
340+
if sc.Type != "array" {
341+
return nil, false
342+
}
343+
item, nullable := unwrapNullable(sc.Items)
344+
pfx := "[]"
345+
if nullable {
346+
pfx += "*"
347+
}
348+
refs = propertyTypeNoSuffix(item)
349+
for i := range refs {
350+
refs[i].name = pfx + refs[i].name
351+
}
352+
return refs, true
326353
}
327354

328-
func trimClashingSuffix(ref string) string {
329-
clashingRef := regexp.MustCompile(`^(.+)[_-]\d+$`)
330-
if !clashingRef.MatchString(ref) {
331-
return ref
355+
func oneOfType(sc *jsonschema.Schema) (refs []typeReference, ok bool) {
356+
if len(sc.OneOf) == 0 {
357+
return nil, false
332358
}
333359

334-
return clashingRef.FindStringSubmatch(ref)[1]
360+
return ofTypes(sc.OneOf), true
335361
}
336362

337-
func linkTo(key string) string {
338-
return "[`" + trimClashingSuffix(key) + "`](#" + anchorValue(key) + ")"
363+
func anyOfType(sc *jsonschema.Schema) (refs []typeReference, ok bool) {
364+
if len(sc.AnyOf) == 0 {
365+
return nil, false
366+
}
367+
368+
return ofTypes(sc.AnyOf), true
339369
}
340370

341-
func anchorValue(key string) string {
342-
return strings.ReplaceAll(key, "_", "-")
371+
func ofTypes(types []*jsonschema.Schema) []typeReference {
372+
refs := make([]typeReference, 0, len(types))
373+
for _, t := range types {
374+
refs = append(refs, propertyTypeNoSuffix(t)...)
375+
}
376+
return refs
343377
}

jsonschema/docs/docs_test.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,3 +40,7 @@ func TestGCP(t *testing.T) {
4040
func TestClickHouse(t *testing.T) {
4141
genSnapshot(t, "testdata/clickhouse.json")
4242
}
43+
44+
func TestFiletypes(t *testing.T) {
45+
genSnapshot(t, "testdata/filetypes.json")
46+
}

jsonschema/docs/ref_key.go

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
package docs
2+
3+
import (
4+
"regexp"
5+
"strings"
6+
7+
"github.com/invopop/jsonschema"
8+
)
9+
10+
type refKey struct {
11+
id jsonschema.ID // $id of the schema, differs for nested schemas
12+
key string // key in definitions map
13+
}
14+
15+
func (r refKey) unwrap() string {
16+
return strings.TrimPrefix(r.key, "#/$defs/")
17+
}
18+
19+
func (r refKey) name() string {
20+
clashingRef := regexp.MustCompile(`^(.+)[_-]\d+$`)
21+
key := r.unwrap()
22+
23+
match := clashingRef.FindStringSubmatch(key)
24+
if len(match) > 1 {
25+
return match[1]
26+
}
27+
return key
28+
}
29+
30+
func (r refKey) anchor() string {
31+
return strings.ReplaceAll(r.unwrap(), "_", "-")
32+
}
33+
34+
func (r refKey) link() string {
35+
return "[`" + r.name() + "`](#" + r.anchor() + ")"
36+
}

0 commit comments

Comments
 (0)