-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.c
More file actions
768 lines (742 loc) · 17.1 KB
/
main.c
File metadata and controls
768 lines (742 loc) · 17.1 KB
Edit and raw actions
OlderNewer
1
#define _XOPEN_SOURCE 600
2
#include <assert.h>
3
#include <ctype.h>
4
#include <errno.h>
5
#include <limits.h>
6
#include <stdbool.h>
7
#include <stdio.h>
8
#include <stdlib.h>
9
#include <string.h>
10
#include <time.h>
11
#include <unistd.h>
12
#include "str.h"
13
#include "unicode.h"
14
#include "util.h"
15
16
char *strstr(const char *haystack, const char *needle);
17
char *strerror(int errnum);
18
19
static struct str *parse_section(struct parser *p) {
20
struct str *section = str_create();
21
uint32_t ch;
22
char *subsection;
23
while ((ch = parser_getch(p)) != UTF8_INVALID) {
24
if (ch < 0x80 && isalnum((unsigned char)ch)) {
25
int ret = str_append_ch(section, ch);
26
assert(ret != -1);
27
} else if (ch == ')') {
28
if (section->len == 0) {
29
break;
30
}
31
int sec = strtol(section->str, &subsection, 10);
32
if (section->str == subsection) {
33
parser_fatal(p, "Expected section digit");
34
break;
35
}
36
if (sec < 0 || sec > 9) {
37
parser_fatal(p, "Expected section between 0 and 9");
38
break;
39
}
40
return section;
41
} else {
42
parser_fatal(p, "Expected alphanumerical character or )");
43
break;
44
}
45
};
46
parser_fatal(p, "Expected manual section");
47
return NULL;
48
}
49
50
static struct str *parse_extra(struct parser *p) {
51
struct str *extra = str_create();
52
int ret = str_append_ch(extra, '"');
53
assert(ret != -1);
54
uint32_t ch;
55
while ((ch = parser_getch(p)) != UTF8_INVALID) {
56
if (ch == '"') {
57
ret = str_append_ch(extra, ch);
58
assert(ret != -1);
59
return extra;
60
} else if (ch == '\n') {
61
parser_fatal(p, "Unclosed extra preamble field");
62
break;
63
} else {
64
ret = str_append_ch(extra, ch);
65
assert(ret != -1);
66
}
67
}
68
str_free(extra);
69
return NULL;
70
}
71
72
static void parse_preamble(struct parser *p) {
73
struct str *name = str_create();
74
int ex = 0;
75
struct str *extras[2] = { NULL };
76
struct str *section = NULL;
77
uint32_t ch;
78
time_t date_time;
79
char date[256];
80
char *source_date_epoch = getenv("SOURCE_DATE_EPOCH");
81
if (source_date_epoch != NULL) {
82
unsigned long long epoch;
83
char *endptr;
84
errno = 0;
85
epoch = strtoull(source_date_epoch, &endptr, 10);
86
if ((errno == ERANGE && (epoch == ULLONG_MAX || epoch == 0))
87
|| (errno != 0 && epoch == 0)) {
88
fprintf(stderr, "$SOURCE_DATE_EPOCH: strtoull: %s\n",
89
strerror(errno));
90
exit(EXIT_FAILURE);
91
}
92
if (endptr == source_date_epoch) {
93
fprintf(stderr, "$SOURCE_DATE_EPOCH: No digits were found: %s\n",
94
endptr);
95
exit(EXIT_FAILURE);
96
}
97
if (*endptr != '\0') {
98
fprintf(stderr, "$SOURCE_DATE_EPOCH: Trailing garbage: %s\n",
99
endptr);
100
exit(EXIT_FAILURE);
101
}
102
if (epoch > ULONG_MAX) {
103
fprintf(stderr, "$SOURCE_DATE_EPOCH: value must be smaller than or "
104
"equal to %lu but was found to be: %llu \n",
105
ULONG_MAX, epoch);
106
exit(EXIT_FAILURE);
107
}
108
date_time = epoch;
109
} else {
110
date_time = time(NULL);
111
}
112
struct tm *date_tm = gmtime(&date_time);
113
strftime(date, sizeof(date), "%F", date_tm);
114
while ((ch = parser_getch(p)) != UTF8_INVALID) {
115
if ((ch < 0x80 && isalnum((unsigned char)ch))
116
|| ch == '_' || ch == '-' || ch == '.') {
117
int ret = str_append_ch(name, ch);
118
assert(ret != -1);
119
} else if (ch == '(') {
120
section = parse_section(p);
121
} else if (ch == '"') {
122
if (ex == 2) {
123
parser_fatal(p, "Too many extra preamble fields");
124
}
125
extras[ex++] = parse_extra(p);
126
} else if (ch == '\n') {
127
if (name->len == 0) {
128
parser_fatal(p, "Expected preamble");
129
}
130
if (section == NULL) {
131
parser_fatal(p, "Expected manual section");
132
}
133
char *ex2 = extras[0] != NULL ? extras[0]->str : NULL;
134
char *ex3 = extras[1] != NULL ? extras[1]->str : NULL;
135
fprintf(p->output, ".TH \"%s\" \"%s\" \"%s\"", name->str, section->str, date);
136
/* ex2 and ex3 are already double-quoted */
137
if (ex2) {
138
fprintf(p->output, " %s", ex2);
139
}
140
if (ex3) {
141
fprintf(p->output, " %s", ex3);
142
}
143
fprintf(p->output, "\n");
144
break;
145
} else if (section == NULL) {
146
parser_fatal(p, "Name characters must be A-Z, a-z, 0-9, `-`, `_`, or `.`");
147
}
148
}
149
str_free(name);
150
for (int i = 0; i < 2; ++i) {
151
if (extras[i] != NULL) {
152
str_free(extras[i]);
153
}
154
}
155
}
156
157
static void parse_format(struct parser *p, enum formatting fmt) {
158
char formats[FORMAT_LAST] = {
159
[FORMAT_BOLD] = 'B',
160
[FORMAT_UNDERLINE] = 'I',
161
};
162
char error[512];
163
if (p->flags) {
164
if ((p->flags & ~fmt)) {
165
snprintf(error, sizeof(error), "Cannot nest inline formatting "
166
"(began with %c at %d:%d)",
167
p->flags == FORMAT_BOLD ? '*' : '_',
168
p->fmt_line, p->fmt_col);
169
parser_fatal(p, error);
170
}
171
fprintf(p->output, "\\fR");
172
} else {
173
fprintf(p->output, "\\f%c", formats[fmt]);
174
p->fmt_line = p->line;
175
p->fmt_col = p->col;
176
}
177
p->flags ^= fmt;
178
}
179
180
static bool parse_linebreak(struct parser *p) {
181
uint32_t plus = parser_getch(p);
182
if (plus != '+') {
183
fprintf(p->output, "+");
184
parser_pushch(p, plus);
185
return false;
186
}
187
uint32_t lf = parser_getch(p);
188
if (lf != '\n') {
189
fprintf(p->output, "+");
190
parser_pushch(p, lf);
191
parser_pushch(p, plus);
192
return false;
193
}
194
uint32_t ch = parser_getch(p);
195
if (ch == '\n') {
196
parser_fatal(
197
p, "Explicit line breaks cannot be followed by a blank line");
198
}
199
parser_pushch(p, ch);
200
fprintf(p->output, "\n.br\n");
201
return true;
202
}
203
204
static void parse_text(struct parser *p) {
205
uint32_t ch, next, last = ' ';
206
int i = 0;
207
while ((ch = parser_getch(p)) != UTF8_INVALID) {
208
switch (ch) {
209
case '\\':
210
ch = parser_getch(p);
211
if (ch == UTF8_INVALID) {
212
parser_fatal(p, "Unexpected EOF");
213
} else if (ch == '\\') {
214
fprintf(p->output, "\\\\");
215
} else {
216
utf8_fputch(p->output, ch);
217
}
218
break;
219
case '*':
220
parse_format(p, FORMAT_BOLD);
221
break;
222
case '_':
223
next = parser_getch(p);
224
if (!isalnum((unsigned char)last) || (
225
(p->flags & FORMAT_UNDERLINE) &&
226
!isalnum((unsigned char)next))) {
227
parse_format(p, FORMAT_UNDERLINE);
228
} else {
229
utf8_fputch(p->output, ch);
230
}
231
if (next == UTF8_INVALID) {
232
return;
233
}
234
parser_pushch(p, next);
235
break;
236
case '+':
237
if (parse_linebreak(p)) {
238
last = '\n';
239
}
240
break;
241
case '\n':
242
utf8_fputch(p->output, ch);
243
return;
244
case '.':
245
if (!i) {
246
// Escape . if it's the first character
247
fprintf(p->output, "\\&.\\&");
248
break;
249
}
250
/* fallthrough */
251
case '!':
252
case '?':
253
last = ch;
254
utf8_fputch(p->output, ch);
255
// Suppress sentence spacing
256
fprintf(p->output, "\\&");
257
break;
258
default:
259
last = ch;
260
utf8_fputch(p->output, ch);
261
break;
262
}
263
++i;
264
}
265
}
266
267
static void parse_heading(struct parser *p) {
268
uint32_t ch;
269
int level = 1;
270
while ((ch = parser_getch(p)) != UTF8_INVALID) {
271
if (ch == '#') {
272
++level;
273
} else if (ch == ' ') {
274
break;
275
} else {
276
parser_fatal(p, "Invalid start of heading (probably needs a space)");
277
}
278
}
279
switch (level) {
280
case 1:
281
fprintf(p->output, ".SH ");
282
break;
283
case 2:
284
fprintf(p->output, ".SS ");
285
break;
286
default:
287
parser_fatal(p, "Only headings up to two levels deep are permitted");
288
break;
289
}
290
while ((ch = parser_getch(p)) != UTF8_INVALID) {
291
utf8_fputch(p->output, ch);
292
if (ch == '\n') {
293
break;
294
}
295
}
296
}
297
298
static int parse_indent(struct parser *p, int *indent, bool write) {
299
int i = 0;
300
uint32_t ch;
301
while ((ch = parser_getch(p)) == '\t') {
302
++i;
303
}
304
parser_pushch(p, ch);
305
if ((ch == '\n' || ch == UTF8_INVALID) && *indent != 0) {
306
// Don't change indent when we encounter empty lines or EOF
307
return *indent;
308
}
309
if (write) {
310
if ((i - *indent) > 1) {
311
parser_fatal(p, "Indented by an amount greater than 1");
312
} else if (i < *indent) {
313
for (int j = *indent; i < j; --j) {
314
roff_macro(p, "RE", NULL);
315
}
316
} else if (i == *indent + 1) {
317
fprintf(p->output, ".RS 4\n");
318
}
319
}
320
*indent = i;
321
return i;
322
}
323
324
static void list_header(struct parser *p, int *num) {
325
fprintf(p->output, ".RS 4\n");
326
fprintf(p->output, ".ie n \\{\\\n");
327
if (*num == -1) {
328
fprintf(p->output, "\\h'-0%d'%s\\h'+03'\\c\n",
329
*num >= 10 ? 5 : 4, "\\(bu");
330
} else {
331
fprintf(p->output, "\\h'-0%d'%d.\\h'+03'\\c\n",
332
*num >= 10 ? 5 : 4, *num);
333
}
334
fprintf(p->output, ".\\}\n");
335
fprintf(p->output, ".el \\{\\\n");
336
if (*num == -1) {
337
fprintf(p->output, ".IP %s 4\n", "\\(bu");
338
} else {
339
fprintf(p->output, ".IP %d. 4\n", *num);
340
*num = *num + 1;
341
}
342
fprintf(p->output, ".\\}\n");
343
}
344
345
static void parse_list(struct parser *p, int *indent, int num) {
346
uint32_t ch;
347
if ((ch = parser_getch(p)) != ' ') {
348
parser_fatal(p, "Expected space before start of list entry");
349
}
350
list_header(p, &num);
351
parse_text(p);
352
do {
353
parse_indent(p, indent, true);
354
if ((ch = parser_getch(p)) == UTF8_INVALID) {
355
break;
356
}
357
switch (ch) {
358
case ' ':
359
if ((ch = parser_getch(p)) != ' ') {
360
parser_fatal(p, "Expected two spaces for list entry continuation");
361
}
362
parse_text(p);
363
break;
364
case '-':
365
case '.':
366
if ((ch = parser_getch(p)) != ' ') {
367
parser_fatal(p, "Expected space before start of list entry");
368
}
369
roff_macro(p, "RE", NULL);
370
list_header(p, &num);
371
parse_text(p);
372
break;
373
default:
374
fprintf(p->output, "\n");
375
parser_pushch(p, ch);
376
goto ret;
377
}
378
} while (ch != UTF8_INVALID);
379
ret:
380
roff_macro(p, "RE", NULL);
381
}
382
383
static void parse_literal(struct parser *p, int *indent) {
384
uint32_t ch;
385
if ((ch = parser_getch(p)) != '`' ||
386
(ch = parser_getch(p)) != '`' ||
387
(ch = parser_getch(p)) != '\n') {
388
parser_fatal(p, "Expected ``` and a newline to begin literal block");
389
}
390
int stops = 0;
391
roff_macro(p, "nf", NULL);
392
fprintf(p->output, ".RS 4\n");
393
bool check_indent = true;
394
do {
395
if (check_indent) {
396
int _indent = *indent;
397
parse_indent(p, &_indent, false);
398
if (_indent < *indent) {
399
parser_fatal(p, "Cannot deindent in literal block");
400
}
401
while (_indent > *indent) {
402
--_indent;
403
fprintf(p->output, "\t");
404
}
405
check_indent = false;
406
}
407
if ((ch = parser_getch(p)) == UTF8_INVALID) {
408
break;
409
}
410
if (ch == '`') {
411
if (++stops == 3) {
412
if ((ch = parser_getch(p)) != '\n') {
413
parser_fatal(p, "Expected literal block to end with newline");
414
}
415
roff_macro(p, "fi", NULL);
416
roff_macro(p, "RE", NULL);
417
return;
418
}
419
} else {
420
while (stops != 0) {
421
fputc('`', p->output);
422
--stops;
423
}
424
switch (ch) {
425
case '.':
426
fprintf(p->output, "\\&.");
427
break;
428
case '\\':
429
ch = parser_getch(p);
430
if (ch == UTF8_INVALID) {
431
parser_fatal(p, "Unexpected EOF");
432
} else if (ch == '\\') {
433
fprintf(p->output, "\\\\");
434
} else {
435
utf8_fputch(p->output, ch);
436
}
437
break;
438
case '\n':
439
check_indent = true;
440
/* fallthrough */
441
default:
442
utf8_fputch(p->output, ch);
443
break;
444
}
445
}
446
} while (ch != UTF8_INVALID);
447
}
448
449
enum table_align {
450
ALIGN_LEFT,
451
ALIGN_CENTER,
452
ALIGN_RIGHT,
453
ALIGN_LEFT_EXPAND,
454
ALIGN_CENTER_EXPAND,
455
ALIGN_RIGHT_EXPAND,
456
};
457
458
struct table_row {
459
struct table_cell *cell;
460
struct table_row *next;
461
};
462
463
struct table_cell {
464
enum table_align align;
465
struct str *contents;
466
struct table_cell *next;
467
};
468
469
static void parse_table(struct parser *p, uint32_t style) {
470
struct table_row *table = NULL;
471
struct table_row *currow = NULL, *prevrow = NULL;
472
struct table_cell *curcell = NULL;
473
int column = 0;
474
uint32_t ch;
475
parser_pushch(p, '|');
476
477
do {
478
if ((ch = parser_getch(p)) == UTF8_INVALID) {
479
break;
480
}
481
switch (ch) {
482
case '\n':
483
goto commit_table;
484
case '|':
485
prevrow = currow;
486
currow = calloc(1, sizeof(struct table_row));
487
if (prevrow) {
488
// TODO: Verify the number of columns match
489
prevrow->next = currow;
490
}
491
curcell = calloc(1, sizeof(struct table_cell));
492
currow->cell = curcell;
493
column = 0;
494
if (!table) {
495
table = currow;
496
}
497
break;
498
case ':':
499
if (!currow) {
500
parser_fatal(p, "Cannot start a column without "
501
"starting a row first");
502
} else {
503
struct table_cell *prev = curcell;
504
curcell = calloc(1, sizeof(struct table_cell));
505
if (prev) {
506
prev->next = curcell;
507
}
508
++column;
509
}
510
break;
511
case ' ':
512
goto continue_cell;
513
default:
514
parser_fatal(p, "Expected either '|' or ':'");
515
break;
516
}
517
if ((ch = parser_getch(p)) == UTF8_INVALID) {
518
break;
519
}
520
switch (ch) {
521
case '[':
522
curcell->align = ALIGN_LEFT;
523
break;
524
case '-':
525
curcell->align = ALIGN_CENTER;
526
break;
527
case ']':
528
curcell->align = ALIGN_RIGHT;
529
break;
530
case '<':
531
curcell->align = ALIGN_LEFT_EXPAND;
532
break;
533
case '=':
534
curcell->align = ALIGN_CENTER_EXPAND;
535
break;
536
case '>':
537
curcell->align = ALIGN_RIGHT_EXPAND;
538
break;
539
case ' ':
540
if (prevrow) {
541
struct table_cell *pcell = prevrow->cell;
542
for (int i = 0; i <= column && pcell; ++i, pcell = pcell->next) {
543
if (i == column) {
544
curcell->align = pcell->align;
545
break;
546
}
547
}
548
} else {
549
parser_fatal(p, "No previous row to infer alignment from");
550
}
551
break;
552
default:
553
parser_fatal(p, "Expected one of '[', '-', ']', or ' '");
554
break;
555
}
556
curcell->contents = str_create();
557
continue_cell:
558
switch (ch = parser_getch(p)) {
559
case ' ':
560
// Read out remainder of the text
561
while ((ch = parser_getch(p)) != UTF8_INVALID) {
562
switch (ch) {
563
case '\n':
564
goto commit_cell;
565
default:;
566
int ret = str_append_ch(curcell->contents, ch);
567
assert(ret != -1);
568
break;
569
}
570
}
571
break;
572
case '\n':
573
goto commit_cell;
574
default:
575
parser_fatal(p, "Expected ' ' or a newline");
576
break;
577
}
578
commit_cell:
579
if (strstr(curcell->contents->str, "T{")
580
|| strstr(curcell->contents->str, "T}")) {
581
parser_fatal(p, "Cells cannot contain T{ or T} "
582
"due to roff limitations");
583
}
584
} while (ch != UTF8_INVALID);
585
commit_table:
586
587
if (ch == UTF8_INVALID) {
588
return;
589
}
590
591
roff_macro(p, "TS", NULL);
592
593
switch (style) {
594
case '[':
595
fprintf(p->output, "allbox;");
596
break;
597
case ']':
598
fprintf(p->output, "box;");
599
break;
600
}
601
602
// Print alignments first
603
currow = table;
604
while (currow) {
605
curcell = currow->cell;
606
while (curcell) {
607
char *align = "";
608
switch (curcell->align) {
609
case ALIGN_LEFT:
610
align = "l";
611
break;
612
case ALIGN_CENTER:
613
align = "c";
614
break;
615
case ALIGN_RIGHT:
616
align = "r";
617
break;
618
case ALIGN_LEFT_EXPAND:
619
align = "lx";
620
break;
621
case ALIGN_CENTER_EXPAND:
622
align = "cx";
623
break;
624
case ALIGN_RIGHT_EXPAND:
625
align = "rx";
626
break;
627
}
628
fprintf(p->output, "%s%s", align, curcell->next ? " " : "");
629
curcell = curcell->next;
630
}
631
fprintf(p->output, "%s\n", currow->next ? "" : ".");
632
currow = currow->next;
633
}
634
635
// Then contents
636
currow = table;
637
while (currow) {
638
curcell = currow->cell;
639
fprintf(p->output, "T{\n");
640
while (curcell) {
641
parser_pushstr(p, curcell->contents->str);
642
parse_text(p);
643
if (curcell->next) {
644
fprintf(p->output, "\nT}\tT{\n");
645
} else {
646
fprintf(p->output, "\nT}");
647
}
648
struct table_cell *prev = curcell;
649
curcell = curcell->next;
650
str_free(prev->contents);
651
free(prev);
652
}
653
fprintf(p->output, "\n");
654
struct table_row *prev = currow;
655
currow = currow->next;
656
free(prev);
657
}
658
659
roff_macro(p, "TE", NULL);
660
fprintf(p->output, ".sp 1\n");
661
}
662
663
static void parse_document(struct parser *p) {
664
uint32_t ch;
665
int indent = 0;
666
do {
667
parse_indent(p, &indent, true);
668
if ((ch = parser_getch(p)) == UTF8_INVALID) {
669
break;
670
}
671
switch (ch) {
672
case ';':
673
if ((ch = parser_getch(p)) != ' ') {
674
parser_fatal(p, "Expected space after ; to begin comment");
675
}
676
do {
677
ch = parser_getch(p);
678
} while (ch != UTF8_INVALID && ch != '\n');
679
break;
680
case '#':
681
if (indent != 0) {
682
parser_pushch(p, ch);
683
parse_text(p);
684
break;
685
}
686
parse_heading(p);
687
break;
688
case '-':
689
parse_list(p, &indent, -1);
690
break;
691
case '.':
692
if ((ch = parser_getch(p)) == ' ') {
693
parser_pushch(p, ch);
694
parse_list(p, &indent, 1);
695
} else {
696
parser_pushch(p, ch);
697
parse_text(p);
698
}
699
break;
700
case '`':
701
parse_literal(p, &indent);
702
break;
703
case '[':
704
case '|':
705
case ']':
706
if (indent != 0) {
707
parser_fatal(p, "Tables cannot be indented");
708
}
709
parse_table(p, ch);
710
break;
711
case ' ':
712
parser_fatal(p, "Tabs are required for indentation");
713
break;
714
case '\n':
715
if (p->flags) {
716
char error[512];
717
snprintf(error, sizeof(error), "Expected %c before starting "
718
"new paragraph (began with %c at %d:%d)",
719
p->flags == FORMAT_BOLD ? '*' : '_',
720
p->flags == FORMAT_BOLD ? '*' : '_',
721
p->fmt_line, p->fmt_col);
722
parser_fatal(p, error);
723
}
724
roff_macro(p, "P", NULL);
725
break;
726
default:
727
parser_pushch(p, ch);
728
parse_text(p);
729
break;
730
}
731
} while (ch != UTF8_INVALID);
732
}
733
734
static void output_scdoc_preamble(struct parser *p) {
735
fprintf(p->output, ".\\\" Generated by scdoc " VERSION "\n");
736
fprintf(p->output, ".\\\" Complete documentation for this program is not "
737
"available as a GNU info page\n");
738
// Fix weird quotation marks
739
// http://bugs.debian.org/507673
740
// http://lists.gnu.org/archive/html/groff/2009-02/msg00013.html
741
fprintf(p->output, ".ie \\n(.g .ds Aq \\(aq\n");
742
fprintf(p->output, ".el .ds Aq '\n");
743
// Disable hyphenation:
744
roff_macro(p, "nh", NULL);
745
// Disable justification:
746
roff_macro(p, "ad l", NULL);
747
fprintf(p->output, ".\\\" Begin generated content:\n");
748
}
749
750
int main(int argc, char **argv) {
751
if (argc == 2 && strcmp(argv[1], "-v") == 0) {
752
printf("scdoc " VERSION "\n");
753
return 0;
754
} else if (argc > 1) {
755
fprintf(stderr, "Usage: scdoc < input.scd > output.roff\n");
756
return 1;
757
}
758
struct parser p = {
759
.input = stdin,
760
.output = stdout,
761
.line = 1,
762
.col = 1
763
};
764
output_scdoc_preamble(&p);
765
parse_preamble(&p);
766
parse_document(&p);
767
return 0;
768
}