3131import java .util .Map ;
3232import java .util .Queue ;
3333import java .util .Set ;
34- import java .util .regex .Pattern ;
3534
35+ import org .w3c .epubcheck .constants .MIMEType ;
36+ import org .w3c .epubcheck .url .URLFragment ;
3637import org .w3c .epubcheck .url .URLUtils ;
3738
3839import com .adobe .epubcheck .api .EPUBLocation ;
@@ -126,6 +127,7 @@ public static final class Builder
126127 private OPFItem item = null ;
127128 private boolean hasItemFallback = false ;
128129 private boolean hasImageFallback = false ;
130+ public String mimetype ;
129131
130132 public Builder url (URL url )
131133 {
@@ -137,6 +139,13 @@ public Builder item(OPFItem item)
137139 {
138140 this .url = item .getURL ();
139141 this .item = item ;
142+ this .mimetype = item .getMimeType ();
143+ return this ;
144+ }
145+
146+ public Builder mimetype (String mimetype )
147+ {
148+ this .mimetype = mimetype ;
140149 return this ;
141150 }
142151
@@ -231,8 +240,6 @@ public boolean isInSpine()
231240 }
232241 }
233242
234- private static final Pattern REGEX_SVG_VIEW = Pattern .compile ("svgView\\ (.*\\ )" );
235-
236243 private final Map <URL , Resource > resources = new HashMap <URL , Resource >();
237244
238245 private final Set <URL > undeclared = new HashSet <URL >();
@@ -281,7 +288,7 @@ public Optional<OPFItem> getResource(URL url)
281288 * @param path
282289 * the path to a publication resource
283290 * @return an immutable {@link EnumSet} containing the types of references to
284- * {@code path}.
291+ * {@code path}.
285292 */
286293 public Set <Type > getTypes (URL resource )
287294 {
@@ -413,9 +420,15 @@ public void checkReferences()
413420 private void checkReference (URLReference reference )
414421 {
415422 Resource hostResource = resources .get (reference .location .url );
416- Resource targetResource = resources .get (reference .targetDoc );
423+
424+ // Retrieve the Resource instance representing the targeted document
417425 // If the resource was not declared in the manifest,
418426 // we build a new Resource object for the data URL.
427+ Resource targetResource = resources .get (reference .targetDoc );
428+ String targetMimetype = (targetResource != null ) ? targetResource .getMimeType () : "" ;
429+
430+ // Parse the URL fragment
431+ URLFragment fragment = URLFragment .parse (reference .url , targetMimetype );
419432
420433 // Check remote resources
421434 if (container .isRemote (reference .url )
@@ -470,15 +483,18 @@ else if (!undeclared.contains(reference.targetDoc)
470483 return ;
471484 }
472485
473- String mimetype = targetResource .getMimeType ();
474-
475486 // Type-specific checks
476487 switch (reference .type )
477488 {
478489 case HYPERLINK :
490+ if ("epubcfi" .equals (fragment .getScheme ()))
491+ {
492+ break ; // EPUB CFI is not supported
493+ }
479494 // if mimeType is null, we should have reported an error already
480- if (!OPFChecker .isBlessedItemType (mimetype , version )
481- && !OPFChecker .isDeprecatedBlessedItemType (mimetype ) && !targetResource .hasItemFallback ())
495+ if (!OPFChecker .isBlessedItemType (targetMimetype , version )
496+ && !OPFChecker .isDeprecatedBlessedItemType (targetMimetype )
497+ && !targetResource .hasItemFallback ())
482498 {
483499 report .message (MessageId .RSC_010 ,
484500 reference .location .context (container .relativize (reference .url )));
@@ -494,31 +510,35 @@ else if (!undeclared.contains(reference.targetDoc)
494510 case IMAGE :
495511 case PICTURE_SOURCE :
496512 case PICTURE_SOURCE_FOREIGN :
497- if (reference .url .fragment () != null && !mimetype .equals ("image/svg+xml" ))
513+ if ("epubcfi" .equals (fragment .getScheme ()))
514+ {
515+ break ; // EPUB CFI is not supported
516+ }
517+ if (fragment .exists () && !MIMEType .SVG .is (targetMimetype ))
498518 {
499519 report .message (MessageId .RSC_009 ,
500520 reference .location .context (container .relativize (reference .url )));
501521 return ;
502522 }
503523 // if mimeType is null, we should have reported an error already
504- if (!OPFChecker .isBlessedImageType (mimetype , version ))
524+ if (!OPFChecker .isBlessedImageType (targetMimetype , version ))
505525 {
506526 if (version == EPUBVersion .VERSION_3 && reference .type == Type .PICTURE_SOURCE )
507527 {
508528 report .message (MessageId .MED_007 , reference .location ,
509- container .relativize (reference .targetDoc ), mimetype );
529+ container .relativize (reference .targetDoc ), targetMimetype );
510530 return ;
511531 }
512532 else if (reference .type == Type .IMAGE && !targetResource .hasImageFallback ())
513533 {
514534 report .message (MessageId .MED_003 , reference .location ,
515- container .relativize (reference .targetDoc ), mimetype );
535+ container .relativize (reference .targetDoc ), targetMimetype );
516536 }
517537 }
518538 break ;
519539 case SEARCH_KEY :
520540 // TODO update when we support EPUB CFI
521- if ((reference . url . fragment () == null || !reference . url . fragment (). startsWith ( "epubcfi(" ))
541+ if ((! fragment . exists () || !"epubcfi" . equals ( fragment . getScheme () ))
522542 && !targetResource .isInSpine ())
523543 {
524544 report .message (MessageId .RSC_021 , reference .location ,
@@ -527,7 +547,7 @@ else if (reference.type == Type.IMAGE && !targetResource.hasImageFallback())
527547 }
528548 break ;
529549 case STYLESHEET :
530- if (reference . url . fragment () != null )
550+ if (fragment . exists () )
531551 {
532552 report .message (MessageId .RSC_013 ,
533553 reference .location .context (container .relativize (reference .url )));
@@ -551,7 +571,7 @@ else if (reference.type == Type.IMAGE && !targetResource.hasImageFallback())
551571 case SVG_CLIP_PATH :
552572 case SVG_PAINT :
553573 case SVG_SYMBOL :
554- if (reference . url . fragment () == null )
574+ if (! fragment . exists () )
555575 {
556576 report .message (MessageId .RSC_015 , reference .location .context (reference .url ));
557577 return ;
@@ -562,32 +582,32 @@ else if (reference.type == Type.IMAGE && !targetResource.hasImageFallback())
562582 }
563583
564584 // Fragment integrity checks
565- String fragment = reference .url .fragment ();
566- if (fragment != null && !fragment .isEmpty ())
585+ if (fragment .exists () && !fragment .isEmpty ())
567586 {
568587 // EPUB CFI
569- if (fragment . startsWith ( "epubcfi(" ))
588+ if ("epubcfi" . equals ( fragment . getScheme () ))
570589 {
590+ // FIXME HOT should warn if in MO
571591 // FIXME epubcfi currently not supported (see issue 150).
572592 return ;
573593 }
574594 // Media fragments in Data Navigation Documents
575- else if (fragment .contains ( "=" ) && hostResource != null && hostResource .hasItem ()
595+ else if (fragment .isMediaFragment ( ) && hostResource != null && hostResource .hasItem ()
576596 && hostResource .getItem ().getProperties ()
577597 .contains (PackageVocabs .ITEM_VOCAB .get (PackageVocabs .ITEM_PROPERTIES .DATA_NAV )))
578598 {
579599 // Ignore,
580600 return ;
581601 }
582- // SVG view fragments are ignored
583- else if (mimetype . equals ( "image/svg+xml" ) && REGEX_SVG_VIEW . matcher ( fragment ). matches ())
602+ // Non-ID-based fragments are ignored
603+ else if (fragment . getId (). isEmpty ())
584604 {
585605 return ;
586606 }
587607 // Fragment Identifier (by default)
588608 else if (!container .isRemote (reference .targetDoc ))
589609 {
590- ID anchor = targetResource .ids .get (fragment );
610+ ID anchor = targetResource .ids .get (fragment . getId () );
591611 if (anchor == null )
592612 {
593613 report .message (MessageId .RSC_012 , reference .location .context (reference .url .toString ()));
@@ -674,7 +694,8 @@ private void checkReadingOrder(Queue<URLReference> references, int lastSpinePosi
674694 }
675695
676696 // check that the fragment is in document order
677- int targetAnchorPosition = res .getIDPosition (ref .url .fragment ());
697+ URLFragment fragment = URLFragment .parse (ref .url , res .getMimeType ());
698+ int targetAnchorPosition = res .getIDPosition (fragment .getId ());
678699 if (targetAnchorPosition < lastAnchorPosition )
679700 {
680701 String orderContext = LocalizedMessages .getInstance (locale ).getSuggestion (MessageId .NAV_011 ,
0 commit comments