|
1 | 1 | package com.adobe.epubcheck.ocf; |
2 | 2 |
|
| 3 | +import java.util.LinkedHashSet; |
3 | 4 | import java.util.Set; |
| 5 | +import java.util.stream.Collectors; |
| 6 | + |
| 7 | +import org.w3c.epubcheck.core.Checker; |
4 | 8 |
|
5 | 9 | import com.adobe.epubcheck.api.EPUBLocation; |
6 | 10 | import com.adobe.epubcheck.api.Report; |
7 | 11 | import com.adobe.epubcheck.messages.MessageId; |
8 | 12 | import com.adobe.epubcheck.opf.ValidationContext; |
9 | 13 | import com.adobe.epubcheck.util.EPUBVersion; |
10 | | -import com.google.common.collect.ImmutableSet; |
| 14 | +import com.google.common.base.Preconditions; |
| 15 | +import com.google.common.collect.ImmutableMap; |
| 16 | +import com.ibm.icu.lang.UCharacter; |
| 17 | +import com.ibm.icu.text.UCharacterIterator; |
| 18 | +import com.ibm.icu.text.UForwardCharacterIterator; |
| 19 | +import com.ibm.icu.text.UnicodeSet; |
11 | 20 |
|
12 | | -//FIXME 2022 update related PKG-* messages to contain the file name string |
13 | | -public final class OCFFilenameChecker |
| 21 | +public final class OCFFilenameChecker implements Checker |
14 | 22 | { |
15 | | - private static final Set<String> RESTRICTED_30_CHARACTER_SET = ImmutableSet.of("PRIVATE_USE_AREA", |
16 | | - "ARABIC_PRESENTATION_FORMS_A", "SPECIALS", "SUPPLEMENTARY_PRIVATE_USE_AREA_A", |
17 | | - "SUPPLEMENTARY_PRIVATE_USE_AREA_B", "VARIATION_SELECTORS_SUPPLEMENT", "TAGS"); |
| 23 | + |
| 24 | + private static final UnicodeSet ASCII = new UnicodeSet("[:ascii:]").freeze(); |
| 25 | + |
| 26 | + private static final UnicodeSet DISALLOWED_EPUB2 = new UnicodeSet() |
| 27 | + // .add(0x002F)// SOLIDUS '/' -- allowed as path separator |
| 28 | + .add(0x0022)// QUOTATION MARK '"' |
| 29 | + .add(0x002A)// ASTERISK '*' |
| 30 | + // .add(0x002E)// FULL STOP '.' -- only disallowed as the last character |
| 31 | + .add(0x003A)// COLON ':' |
| 32 | + .add(0x003C)// LESS-THAN SIGN '<' |
| 33 | + .add(0x003E)// GREATER-THAN SIGN '>' |
| 34 | + .add(0x003F)// QUESTION MARK '?' |
| 35 | + .add(0x005C)// REVERSE SOLIDUS '\' |
| 36 | + .freeze(); |
| 37 | + |
| 38 | + private static final ImmutableMap<String, UnicodeSet> DISALLOWED_EPUB3 = new ImmutableMap.Builder<String, UnicodeSet>() |
| 39 | + .put("ASCII", new UnicodeSet() // |
| 40 | + .addAll(DISALLOWED_EPUB2)// all disallowed in EPUB 2.0.1 |
| 41 | + .add(0x007C) // VERTICAL LINE '|' |
| 42 | + .freeze()) |
| 43 | + .put("NON CHARACTER", new UnicodeSet("[:Noncharacter_Code_Point=Yes:]")// |
| 44 | + .freeze()) |
| 45 | + .put("CONTROL", new UnicodeSet().add(0x007F) // DEL |
| 46 | + .addAll(0x0000, 0x001F) // C0 range |
| 47 | + .addAll(0x0080, 0x009F) // C1 range |
| 48 | + .freeze()) |
| 49 | + .put("PRIVATE USE", new UnicodeSet() // |
| 50 | + .addAll(0xE000, 0xF8FF) // Private Use Area |
| 51 | + .addAll(0xF0000, 0xFFFFF) // Supplementary Private Use Area-A |
| 52 | + .addAll(0x100000, 0x10FFFF) // Supplementary Private Use Area-B |
| 53 | + .freeze()) |
| 54 | + .put("SPECIALS", new UnicodeSet() // |
| 55 | + .addAll(0xFFF0, 0xFFFF) // Specials Blocks |
| 56 | + .freeze()) |
| 57 | + .put("DEPRECATED", new UnicodeSet() // |
| 58 | + .add(0xE0001)// LANGUAGE TAG |
| 59 | + // .add(0xE007F)// CANCEL TAG -- reinstated in Emoji tag sequences |
| 60 | + .freeze()) |
| 61 | + .build(); |
| 62 | + |
| 63 | + private static String toString(int codepoint, String setName) |
| 64 | + { |
| 65 | + assert setName != null; |
| 66 | + StringBuilder result = new StringBuilder().append(String.format("U+%04X ", codepoint)); |
| 67 | + if ("ASCII".equals(setName)) |
| 68 | + { |
| 69 | + result.append('(').append(UCharacter.toString(codepoint)).append(')'); |
| 70 | + } |
| 71 | + else |
| 72 | + { |
| 73 | + String characterName = UCharacter.getName(codepoint); |
| 74 | + if (characterName != null) |
| 75 | + { |
| 76 | + result.append(characterName).append(' '); |
| 77 | + } |
| 78 | + result.append('(').append(setName).append(')'); |
| 79 | + } |
| 80 | + return result.toString(); |
| 81 | + } |
18 | 82 |
|
19 | 83 | private final Report report; |
20 | 84 | private final EPUBVersion version; |
21 | 85 | private final EPUBLocation location; |
| 86 | + private final String filename; |
| 87 | + |
| 88 | + public OCFFilenameChecker(String filename, ValidationContext context) |
| 89 | + { |
| 90 | + this(filename, context, null); |
| 91 | + } |
22 | 92 |
|
23 | | - public OCFFilenameChecker(ValidationContext context) |
| 93 | + public OCFFilenameChecker(String filename, ValidationContext context, EPUBLocation location) |
24 | 94 | { |
| 95 | + Preconditions.checkArgument(filename != null); |
| 96 | + Preconditions.checkArgument(context != null); |
| 97 | + this.filename = filename; |
25 | 98 | this.report = context.report; |
26 | 99 | this.version = context.version; |
27 | | - this.location = EPUBLocation.of(context); |
| 100 | + this.location = (location != null) ? location : EPUBLocation.of(context); |
28 | 101 | } |
29 | 102 |
|
30 | | - public String checkCompatiblyEscaped(final String str) |
| 103 | + @Override |
| 104 | + public void check() |
31 | 105 | { |
32 | | - // don't check remote resources |
33 | | - if (str.matches("^[^:/?#]+://.*")) |
34 | | - { |
35 | | - return ""; |
36 | | - } |
37 | | - |
38 | | - // the test string will be used to compare test result |
39 | | - String test = checkNonAsciiFilename(str); |
40 | | - |
41 | | - if (str.endsWith(".")) |
42 | | - { |
43 | | - report.message(MessageId.PKG_011, location, str); |
44 | | - test += "."; |
45 | | - } |
46 | | - |
47 | | - boolean spaces = false; |
48 | | - final char[] ascciGraphic = new char[] { '<', '>', '"', '{', '}', '|', '^', '`', '*', |
49 | | - '?' /* , ':','/', '\\' */ }; |
50 | | - String result = ""; |
51 | | - char[] chars = str.toCharArray(); |
52 | | - for (char c : chars) |
| 106 | + // Iterate through the code points to search disallowed characters |
| 107 | + UCharacterIterator chars = UCharacterIterator.getInstance(filename); |
| 108 | + final Set<String> disallowed = new LinkedHashSet<>(); |
| 109 | + boolean hasSpaces = false; |
| 110 | + boolean isASCIIOnly = true; |
| 111 | + int codepoint; |
| 112 | + while ((codepoint = chars.nextCodePoint()) != UForwardCharacterIterator.DONE) |
53 | 113 | { |
54 | | - for (char a : ascciGraphic) |
| 114 | + // Check if the string has non-ASCII characters |
| 115 | + isASCIIOnly = isASCIIOnly && ASCII.contains(codepoint); |
| 116 | + // Check if the string has space characters |
| 117 | + hasSpaces = hasSpaces || UCharacter.isUWhiteSpace(codepoint); |
| 118 | + // Check for disallowed characters |
| 119 | + switch (version) |
55 | 120 | { |
56 | | - if (c == a) |
| 121 | + case VERSION_2: |
| 122 | + if (DISALLOWED_EPUB2.contains(codepoint)) |
57 | 123 | { |
58 | | - result += "\"" + Character.toString(c) + "\","; |
59 | | - test += Character.toString(c); |
| 124 | + disallowed.add(toString(codepoint, "ASCII")); |
60 | 125 | } |
61 | | - } |
62 | | - if (Character.isSpaceChar(c)) |
63 | | - { |
64 | | - spaces = true; |
65 | | - test += Character.toString(c); |
| 126 | + break; |
| 127 | + default: |
| 128 | + for (String name : DISALLOWED_EPUB3.keySet()) |
| 129 | + { |
| 130 | + if (DISALLOWED_EPUB3.get(name).contains(codepoint)) |
| 131 | + { |
| 132 | + disallowed.add(toString(codepoint, name)); |
| 133 | + break; |
| 134 | + } |
| 135 | + } |
| 136 | + break; |
66 | 137 | } |
67 | 138 | } |
68 | | - if (result.length() > 1) |
| 139 | + // Check that FULL STOP is not used as the last character |
| 140 | + if (chars.previousCodePoint() == 0x002E) |
69 | 141 | { |
70 | | - result = result.substring(0, result.length() - 1); |
71 | | - report.message(MessageId.PKG_009, location, str, result); |
| 142 | + report.message(MessageId.PKG_011, location, filename); |
72 | 143 | } |
73 | | - if (spaces) |
| 144 | + // Report if disallowed characters were found |
| 145 | + if (!disallowed.isEmpty()) |
74 | 146 | { |
75 | | - report.message(MessageId.PKG_010, location, str); |
| 147 | + report.message(MessageId.PKG_009, location, filename, |
| 148 | + disallowed.stream().collect(Collectors.joining(", "))); |
76 | 149 | } |
77 | | - |
78 | | - if (version == EPUBVersion.VERSION_3) |
| 150 | + // Report whitespace characters |
| 151 | + if (hasSpaces) |
79 | 152 | { |
80 | | - checkCompatiblyEscaped30(str, test); |
| 153 | + report.message(MessageId.PKG_010, location, filename); |
81 | 154 | } |
82 | | - return test; |
83 | | - } |
84 | | - |
85 | | - private String checkNonAsciiFilename(final String str) |
86 | | - { |
87 | | - String nonAscii = str.replaceAll("[\\p{ASCII}]", ""); |
88 | | - if (nonAscii.length() > 0) |
| 155 | + // Report non-ASCII characters as usage |
| 156 | + if (!isASCIIOnly) |
89 | 157 | { |
90 | | - report.message(MessageId.PKG_012, location, str, nonAscii); |
| 158 | + report.message(MessageId.PKG_012, location, filename); |
91 | 159 | } |
92 | | - return nonAscii; |
93 | 160 | } |
94 | 161 |
|
95 | | - private String checkCompatiblyEscaped30(String str, String test) |
96 | | - { |
97 | | - String result = ""; |
98 | | - |
99 | | - char[] chars = str.toCharArray(); |
100 | | - for (char c : chars) |
101 | | - { |
102 | | - if (Character.isISOControl(c)) |
103 | | - { |
104 | | - result += "\"" + Character.toString(c) + "\","; |
105 | | - test += Character.toString(c); |
106 | | - } |
107 | | - |
108 | | - // DEL (U+007F) |
109 | | - if (c == '\u007F') |
110 | | - { |
111 | | - result += "\"" + Character.toString(c) + "\","; |
112 | | - test += Character.toString(c); |
113 | | - } |
114 | | - String unicodeType = Character.UnicodeBlock.of(c).toString(); |
115 | | - if (RESTRICTED_30_CHARACTER_SET.contains(unicodeType)) |
116 | | - { |
117 | | - result += "\"" + Character.toString(c) + "\","; |
118 | | - } |
119 | | - } |
120 | | - if (result.length() > 1) |
121 | | - { |
122 | | - result = result.substring(0, result.length() - 1); |
123 | | - report.message(MessageId.PKG_009, location, str, result); |
124 | | - } |
125 | | - return test; |
126 | | - } |
127 | 162 | } |
0 commit comments