-
Notifications
You must be signed in to change notification settings - Fork 12
Closed
Labels
Description
Given this FoLiA file F1:
<?xml version="1.0" encoding="UTF-8"?>
<FoLiA xmlns="http://ilk.uvt.nl/folia" xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="issue100" version="2.5">
<metadata>
<annotations>
<text-annotation set="https://raw.githubusercontent.com/proycon/folia/master/setdefinitions/text.foliaset.ttl"/>
<division-annotation/>
<sentence-annotation/>
</annotations>
</metadata>
<text xml:id="text">
<div xml:id="div">
<s xml:id="s">
<t>Een Bug? Nee toch?</t>
</s>
</div>
</text>
</FoLiA>Running frog on this file gives:
$ frog --skip=pmnla -x F1 -X F2
File F2:
<?xml version="1.0" encoding="UTF-8"?>
<FoLiA xmlns:xlink="http://www.w3.org/1999/xlink" xmlns="http://ilk.uvt.nl/folia" xml:id="issue100" generator="libfolia-v2.15" version="2.5">
<metadata type="native">
<annotations>
<text-annotation set="https://raw.githubusercontent.com/proycon/folia/master/setdefinitions/text.foliaset.ttl"/>
<division-annotation/>
<sentence-annotation/>
<token-annotation alias="tokconfig-nld" set="https://raw.githubusercontent.com/LanguageMachines/uctodata/master/setdefinitions/tokconfig-nld.foliaset.ttl">
<annotator processor="ucto.1"/>
</token-annotation>
<paragraph-annotation>
<annotator processor="ucto.1"/>
</paragraph-annotation>
<quote-annotation>
<annotator processor="ucto.1"/>
</quote-annotation>
<pos-annotation set="http://ilk.uvt.nl/folia/sets/frog-mbpos-cgn">
<annotator processor="tagger.1"/>
</pos-annotation>
<chunking-annotation set="http://ilk.uvt.nl/folia/sets/frog-chunker-nl">
<annotator processor="IOB.1"/>
</chunking-annotation>
</annotations>
<provenance>
<processor xml:id="frog.1" begindatetime="2023-05-02T08:40:42" command="frog --skip=pmnla -x F1 -X F2" folia_version="2.5.1" host="kobus" name="frog" user="sloot" version="0.29">
<processor xml:id="frog.1.generator" folia_version="2.5.1" name="libfolia" type="generator" version="2.15"/>
<processor xml:id="ucto.1" begindatetime="2023-05-02T08:40:42" name="ucto" version="0.30">
<processor xml:id="uctodata.1" name="uctodata" type="datasource" version="0.9.1">
<processor xml:id="uctodata.1.1" name="tokconfig-nld" type="datasource" version="0.2"/>
</processor>
</processor>
<processor xml:id="tagger.1" begindatetime="2023-05-02T08:40:42" name="tagger" version="2.0"/>
<processor xml:id="IOB.1" begindatetime="2023-05-02T08:40:42" name="IOB" version="2.0"/>
</processor>
</provenance>
<meta id="language">nld</meta>
</metadata>
<text xml:id="text">
<div xml:id="div">
<s xml:id="s">
<t>Een Bug? Nee toch?</t>
<w xml:id="s.w.1" class="WORD">
<t>Een</t>
<pos class="LID(onbep,stan,agr)" confidence="0.981771" head="LID">
<feat class="onbep" subset="lwtype"/>
<feat class="stan" subset="naamval"/>
<feat class="agr" subset="npagr"/>
</pos>
</w>
<w xml:id="s.w.2" class="WORD" space="no">
<t>Bug</t>
<pos class="SPEC(vreemd)" confidence="1.0" head="SPEC">
<feat class="vreemd" subset="spectype"/>
</pos>
</w>
<w xml:id="s.w.3" class="PUNCTUATION">
<t>?</t>
<pos class="LET()" confidence="1.0" head="LET"/>
</w>
<chunking xml:id="s.chunking.1">
<chunk xml:id="s.chunking.1.chunk.1" class="NP" confidence="1.0">
<wref id="s.w.1" t="Een"/>
<wref id="s.w.2" t="Bug"/>
</chunk>
<chunk xml:id="s.chunking.1.chunk.2" class="TSW" confidence="1.0">
<wref id="s.w.4" t="Nee"/>
</chunk>
<chunk xml:id="s.chunking.1.chunk.3" class="ADVP" confidence="1.0">
<wref id="s.w.5" t="toch"/>
</chunk>
</chunking>
<w xml:id="s.w.4" class="WORD">
<t>Nee</t>
<pos class="TSW()" confidence="0.978799" head="TSW"/>
</w>
<w xml:id="s.w.5" class="WORD" space="no">
<t>toch</t>
<pos class="BW()" confidence="0.998829" head="BW"/>
</w>
<w xml:id="s.w.6" class="PUNCTUATION">
<t>?</t>
<pos class="LET()" confidence="1.0" head="LET"/>
</w>
</s>
</div>
</text>
</FoLiA>This is INVALID!
the chunking information is interleaved with the words in the sentence. Leading to a forward reference to w,6 and above
This is NOT supported:
folialint:
F2 failed: XML error: Unresolvable id s.w.4 in WordReference
foliavalidator:
VALIDATION ERROR on full parse by library (stage 2/3), in F2
ParseError: FoLiA exception in handling of <wref> @ line 66 (in parent <chunk> @ parent line 65) : [InvalidReference] s.w.4